indeedcrawler 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/indeedcrawler.rb +11 -2
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 28e95f336784e596793f415a0d38731549202f5e
|
|
4
|
+
data.tar.gz: 49a3ed636cecb8b80f3dd106c35dead0bc4e8b4e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cc031047358453560be6e77a87879caa6fdf8d5643720515980cebbc05041b77742c2c3acc32072d25479a3fe97f01498b199ca75756e53176141893dc5e9c4b
|
|
7
|
+
data.tar.gz: 52a5257da3815de5de404cfc7d58ce5055ebbb6f4c89f1c5419b518797c55280357ddbc2f6f76328b2a8200b38e40c1b86ebc64787cf82c174bbd4c89194d7c8
|
data/lib/indeedcrawler.rb
CHANGED
|
@@ -76,8 +76,8 @@ class IndeedCrawler
|
|
|
76
76
|
i = IndeedParser.new(resume, link, {time_scraped: Time.now})
|
|
77
77
|
results = JSON.parse(i.get_results_by_job)
|
|
78
78
|
report_results(results, link)
|
|
79
|
-
rescue
|
|
80
|
-
|
|
79
|
+
rescue => e
|
|
80
|
+
report_status("Error in parsing " + link+": "+e.to_s)
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
83
|
end
|
|
@@ -107,6 +107,14 @@ class IndeedCrawler
|
|
|
107
107
|
Curl::PostField.content('results', JSON.pretty_generate(results)))
|
|
108
108
|
end
|
|
109
109
|
|
|
110
|
+
# Report Harvester status message
|
|
111
|
+
def report_status(status_msg)
|
|
112
|
+
curl_url = @cm_url+"/update_status"
|
|
113
|
+
c = Curl::Easy.http_post(curl_url,
|
|
114
|
+
Curl::PostField.content('selector_id', @selector_id),
|
|
115
|
+
Curl::PostField.content('status_message', status_msg))
|
|
116
|
+
end
|
|
117
|
+
|
|
110
118
|
# Get the JSON of results
|
|
111
119
|
def get_json
|
|
112
120
|
return JSON.pretty_generate(@output)
|
|
@@ -129,5 +137,6 @@ class IndeedCrawler
|
|
|
129
137
|
|
|
130
138
|
# Close browsers when done and return results
|
|
131
139
|
@requests.close_all_browsers
|
|
140
|
+
report_status("Finished collecting data for selector "+@search_query.to_s+" "+@location.to_s)
|
|
132
141
|
end
|
|
133
142
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: indeedcrawler
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- M. C. McGrath
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-10-
|
|
11
|
+
date: 2016-10-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Crawls Indeed resumes
|
|
14
14
|
email: shidash@transparencytoolkit.org
|