indeedcrawler 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/indeedcrawler.rb +11 -2
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88deb83542fefafa92618b959e8082df2e44428f
4
- data.tar.gz: e642f0b41dc9ae3f490fc86ccbd7fa6109ba5553
3
+ metadata.gz: 28e95f336784e596793f415a0d38731549202f5e
4
+ data.tar.gz: 49a3ed636cecb8b80f3dd106c35dead0bc4e8b4e
5
5
  SHA512:
6
- metadata.gz: 365d10c6116b4492922d481f311fb2b0195276a53aa941155f181d1cbd43b72ea6686e895df3d0787ebcfedc7cb5f452ee5ecc10a762f098034d106f8f2c8564
7
- data.tar.gz: 1e0466b9a75dd028e8256e6f4417e77802ca0c655698100837371039b4cda54c381302c33b9e5ce3c68571f4f999d8375ce0084ba1ec5abe786c1f4b3a88e975
6
+ metadata.gz: cc031047358453560be6e77a87879caa6fdf8d5643720515980cebbc05041b77742c2c3acc32072d25479a3fe97f01498b199ca75756e53176141893dc5e9c4b
7
+ data.tar.gz: 52a5257da3815de5de404cfc7d58ce5055ebbb6f4c89f1c5419b518797c55280357ddbc2f6f76328b2a8200b38e40c1b86ebc64787cf82c174bbd4c89194d7c8
data/lib/indeedcrawler.rb CHANGED
@@ -76,8 +76,8 @@ class IndeedCrawler
76
76
  i = IndeedParser.new(resume, link, {time_scraped: Time.now})
77
77
  results = JSON.parse(i.get_results_by_job)
78
78
  report_results(results, link)
79
- rescue
80
-
79
+ rescue => e
80
+ report_status("Error in parsing " + link+": "+e.to_s)
81
81
  end
82
82
  end
83
83
  end
@@ -107,6 +107,14 @@ class IndeedCrawler
107
107
  Curl::PostField.content('results', JSON.pretty_generate(results)))
108
108
  end
109
109
 
110
+ # Report Harvester status message
111
+ def report_status(status_msg)
112
+ curl_url = @cm_url+"/update_status"
113
+ c = Curl::Easy.http_post(curl_url,
114
+ Curl::PostField.content('selector_id', @selector_id),
115
+ Curl::PostField.content('status_message', status_msg))
116
+ end
117
+
110
118
  # Get the JSON of results
111
119
  def get_json
112
120
  return JSON.pretty_generate(@output)
@@ -129,5 +137,6 @@ class IndeedCrawler
129
137
 
130
138
  # Close browsers when done and return results
131
139
  @requests.close_all_browsers
140
+ report_status("Finished collecting data for selector "+@search_query.to_s+" "+@location.to_s)
132
141
  end
133
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeedcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-07 00:00:00.000000000 Z
11
+ date: 2016-10-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawls Indeed resumes
14
14
  email: shidash@transparencytoolkit.org