indeedcrawler 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/indeedcrawler.rb +11 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28e95f336784e596793f415a0d38731549202f5e
|
4
|
+
data.tar.gz: 49a3ed636cecb8b80f3dd106c35dead0bc4e8b4e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc031047358453560be6e77a87879caa6fdf8d5643720515980cebbc05041b77742c2c3acc32072d25479a3fe97f01498b199ca75756e53176141893dc5e9c4b
|
7
|
+
data.tar.gz: 52a5257da3815de5de404cfc7d58ce5055ebbb6f4c89f1c5419b518797c55280357ddbc2f6f76328b2a8200b38e40c1b86ebc64787cf82c174bbd4c89194d7c8
|
data/lib/indeedcrawler.rb
CHANGED
@@ -76,8 +76,8 @@ class IndeedCrawler
|
|
76
76
|
i = IndeedParser.new(resume, link, {time_scraped: Time.now})
|
77
77
|
results = JSON.parse(i.get_results_by_job)
|
78
78
|
report_results(results, link)
|
79
|
-
rescue
|
80
|
-
|
79
|
+
rescue => e
|
80
|
+
report_status("Error in parsing " + link+": "+e.to_s)
|
81
81
|
end
|
82
82
|
end
|
83
83
|
end
|
@@ -107,6 +107,14 @@ class IndeedCrawler
|
|
107
107
|
Curl::PostField.content('results', JSON.pretty_generate(results)))
|
108
108
|
end
|
109
109
|
|
110
|
+
# Report Harvester status message
|
111
|
+
def report_status(status_msg)
|
112
|
+
curl_url = @cm_url+"/update_status"
|
113
|
+
c = Curl::Easy.http_post(curl_url,
|
114
|
+
Curl::PostField.content('selector_id', @selector_id),
|
115
|
+
Curl::PostField.content('status_message', status_msg))
|
116
|
+
end
|
117
|
+
|
110
118
|
# Get the JSON of results
|
111
119
|
def get_json
|
112
120
|
return JSON.pretty_generate(@output)
|
@@ -129,5 +137,6 @@ class IndeedCrawler
|
|
129
137
|
|
130
138
|
# Close browsers when done and return results
|
131
139
|
@requests.close_all_browsers
|
140
|
+
report_status("Finished collecting data for selector "+@search_query.to_s+" "+@location.to_s)
|
132
141
|
end
|
133
142
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indeedcrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Crawls Indeed resumes
|
14
14
|
email: shidash@transparencytoolkit.org
|