indeedcrawler 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/indeedcrawler.rb +11 -2
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88deb83542fefafa92618b959e8082df2e44428f
4
- data.tar.gz: e642f0b41dc9ae3f490fc86ccbd7fa6109ba5553
3
+ metadata.gz: 28e95f336784e596793f415a0d38731549202f5e
4
+ data.tar.gz: 49a3ed636cecb8b80f3dd106c35dead0bc4e8b4e
5
5
  SHA512:
6
- metadata.gz: 365d10c6116b4492922d481f311fb2b0195276a53aa941155f181d1cbd43b72ea6686e895df3d0787ebcfedc7cb5f452ee5ecc10a762f098034d106f8f2c8564
7
- data.tar.gz: 1e0466b9a75dd028e8256e6f4417e77802ca0c655698100837371039b4cda54c381302c33b9e5ce3c68571f4f999d8375ce0084ba1ec5abe786c1f4b3a88e975
6
+ metadata.gz: cc031047358453560be6e77a87879caa6fdf8d5643720515980cebbc05041b77742c2c3acc32072d25479a3fe97f01498b199ca75756e53176141893dc5e9c4b
7
+ data.tar.gz: 52a5257da3815de5de404cfc7d58ce5055ebbb6f4c89f1c5419b518797c55280357ddbc2f6f76328b2a8200b38e40c1b86ebc64787cf82c174bbd4c89194d7c8
data/lib/indeedcrawler.rb CHANGED
@@ -76,8 +76,8 @@ class IndeedCrawler
76
76
  i = IndeedParser.new(resume, link, {time_scraped: Time.now})
77
77
  results = JSON.parse(i.get_results_by_job)
78
78
  report_results(results, link)
79
- rescue
80
-
79
+ rescue => e
80
+ report_status("Error in parsing " + link+": "+e.to_s)
81
81
  end
82
82
  end
83
83
  end
@@ -107,6 +107,14 @@ class IndeedCrawler
107
107
  Curl::PostField.content('results', JSON.pretty_generate(results)))
108
108
  end
109
109
 
110
+ # Report Harvester status message
111
+ def report_status(status_msg)
112
+ curl_url = @cm_url+"/update_status"
113
+ c = Curl::Easy.http_post(curl_url,
114
+ Curl::PostField.content('selector_id', @selector_id),
115
+ Curl::PostField.content('status_message', status_msg))
116
+ end
117
+
110
118
  # Get the JSON of results
111
119
  def get_json
112
120
  return JSON.pretty_generate(@output)
@@ -129,5 +137,6 @@ class IndeedCrawler
129
137
 
130
138
  # Close browsers when done and return results
131
139
  @requests.close_all_browsers
140
+ report_status("Finished collecting data for selector "+@search_query.to_s+" "+@location.to_s)
132
141
  end
133
142
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeedcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-07 00:00:00.000000000 Z
11
+ date: 2016-10-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Crawls Indeed resumes
14
14
  email: shidash@transparencytoolkit.org