remote_job_scraper 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/remote_job_scraper/version.rb +1 -1
- data/lib/sites/jobs_rails42.rb +6 -8
- data/lib/sites/remote_ok.rb +1 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b880f59b8ef9959438e8025736371ee13b88ec7c
|
4
|
+
data.tar.gz: a7e1aff1067ea8bdc33d79962cfe749c76c111b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92d3ba352d3f42be0f4840cb95fb46521eda24fcc669697d4f1cb1882bc15435fbbc55e096adab99e11e61a20e9596231789fd735e719f4af576e3912ff1b6bf
|
7
|
+
data.tar.gz: 980d533d890bd8ebc3ba2da1e69d9a951c98ce8290aea2a2b136919281d1e5bf1c11c4a8e0110e5cc595e75360ef86c4a9cd0a1cbafddc261791b020a6346568
|
data/README.md
CHANGED
data/lib/sites/jobs_rails42.rb
CHANGED
@@ -12,19 +12,18 @@ module Sites
|
|
12
12
|
JOB_ITEM_SELECTOR = 'li.job-offers__item a'.freeze
|
13
13
|
STORE_DIR = 'data/jobs_rails42'.freeze
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
def initialize(job_type: :programming)
|
15
|
+
def initialize(job_type: :programming, total_pages: 4)
|
18
16
|
@job_type = job_type
|
19
17
|
@url = build_url
|
20
18
|
@doc = nil
|
21
19
|
@current_time = Time.new
|
22
20
|
@timestamp = @current_time.strftime("%Y%m%d%H%M%S")
|
21
|
+
@total_pages = total_pages
|
23
22
|
@count = get_count
|
24
23
|
end
|
25
24
|
|
26
25
|
def collect_jobs
|
27
|
-
(1
|
26
|
+
(1..@total_pages).to_a.each do |page|
|
28
27
|
current_page = "#{@url}?page=#{page}"
|
29
28
|
doc = Nokogiri::HTML(open_page(current_page))
|
30
29
|
process_page(doc, current_page, page)
|
@@ -45,20 +44,19 @@ module Sites
|
|
45
44
|
offer_text = job_page.css('.job-offer__description').to_s
|
46
45
|
|
47
46
|
location = Support::OfferParser.get_location(offer_text)
|
48
|
-
region = nil
|
49
47
|
keywords = Support::OfferParser.get_keywords(offer_text)
|
50
48
|
|
51
|
-
csv << [job_url, location,
|
49
|
+
csv << [job_url, location, keywords]
|
52
50
|
end
|
53
51
|
end
|
54
52
|
|
55
|
-
puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}." if page ==
|
53
|
+
puts "[Done] Collected #{@count} job offers from #{url}. Data stores in: #{filepath}." if page == @total_pages
|
56
54
|
end
|
57
55
|
|
58
56
|
private
|
59
57
|
|
60
58
|
def get_count
|
61
|
-
25 *
|
59
|
+
25 * @total_pages
|
62
60
|
end
|
63
61
|
end
|
64
62
|
end
|
data/lib/sites/remote_ok.rb
CHANGED
@@ -24,10 +24,9 @@ module Sites
|
|
24
24
|
offer_text = job_page.css('td.heading').to_s
|
25
25
|
|
26
26
|
location = Support::OfferParser.get_location(offer_text)
|
27
|
-
region = nil
|
28
27
|
keywords = Support::OfferParser.get_keywords(offer_text)
|
29
28
|
|
30
|
-
csv << [job_url, location,
|
29
|
+
csv << [job_url, location, keywords]
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|