indeed_scraper2022 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07f5323381b5751c470454f6f4c3ba6dced6f1424e054b85360a49d814d662ba
4
- data.tar.gz: 3d25353b9f8a0543944cac82ef6dc91adf7d3e83444f3c6ef469f15cbba8a3d8
3
+ metadata.gz: 5e33dfd54667ecc9f8b7985aa07af403be8d95729ce68e8a40d8c985d57bd4e1
4
+ data.tar.gz: a2c041ec8103b6afac3a422e7b73bc82c89fd7f8d955240439a29ec0347c8a5f
5
5
  SHA512:
6
- metadata.gz: 5e13ae04b46bfa3eb15aab8d0aff388d8caec591c413493db591c37da099d2bcd5ba340a72137d4aa7d374652b68bc1d037b86fe4cc2ed2ae5b0a56c5202f00b
7
- data.tar.gz: ca14ae99251aabbcaee08a3bb6f240742ed1fab0f438496dc742ef39a10abb13e310b2d6a93bc472f5e1b3e45cfd8956d6a62f803b1d3a152054cf4e1ae35402
6
+ metadata.gz: 8e640cb8262a057bb588b501ee1122a59e6e239e2a5988dd0566ffffb814a2fef763c36fdeae1ba5dc4e6f819ca145374058bd62373ce776df1e393057a49fc0
7
+ data.tar.gz: 0a6bfe0ef2b685d5711a95704cee3fa67d58eb7c9d0f149c872f9c23b0cc489382ab1327b101f7accf982f7ea1f1a6d56dc20ae528ef1fd4d6105c9ef93067da
checksums.yaml.gz.sig CHANGED
Binary file
@@ -14,13 +14,17 @@ end
14
14
 
15
15
  class IndeedScraper2022
16
16
 
17
+ attr_reader :browser
18
+
17
19
  def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '',
18
20
  headless: true, cookies: nil, debug: false)
19
21
 
20
22
  @debug = debug
21
23
  @url_base, @q, @location = url, q, location
22
24
  @headless, @cookies = headless, cookies
23
- @results = search(q: @q, location: @location)
25
+
26
+ fw = FerrumWizard.new( headless: @headless, cookies: @cookies, debug: @debug)
27
+ @browser = fw.browser
24
28
 
25
29
  end
26
30
 
@@ -32,35 +36,49 @@ class IndeedScraper2022
32
36
 
33
37
  def search(q: @q, location: @location, start: nil)
34
38
 
35
- fw = FerrumWizard.new( headless: @headless, cookies: @cookies, debug: @debug)
36
-
37
39
  url = @url_base
38
40
  url += 'start=' + start if start
39
41
 
40
- browser = fw.browser
41
- browser.goto(url)
42
+ @browser.goto(url)
43
+ #@browser.network.wait_for_idle
44
+ puts 'sleeping for 4 seconds' if @debug
45
+ sleep 4
42
46
 
43
47
  if q.length > 1 then
44
- input = browser.at_xpath("//input[@name='q']")
45
- input.focus.type(q)
48
+
49
+ input = @browser.at_xpath("//input[@name='q']")
50
+
51
+ # select any existing text and overwrite it
52
+ input.focus.type(:home); sleep 0.2
53
+ input.focus.type(:shift, :end); sleep 0.2
54
+ input.focus.type(q); sleep 0.2
46
55
  end
47
56
 
48
57
  if location.length > 1 then
49
- input2 = browser.at_xpath("//input[@name='l']")
50
- input2.focus.type(location)
58
+
59
+ input2 = @browser.at_xpath("//input[@name='l']")
60
+
61
+ # select any existing text and overwrite it
62
+ input2.focus.type(:home); sleep 0.2
63
+ input2.focus.type(:shift, :end); sleep 0.2
64
+ input2.focus.type(location); sleep 0.2
65
+
51
66
  end
52
67
 
53
- button = browser.at_xpath("//button[@type='submit']")
68
+ button = @browser.at_xpath("//button[@type='submit']")
54
69
  button.click
70
+ #@browser.network.wait_for_idle
71
+ puts 'sleeping for 2 seconds' if @debug
72
+ sleep 2
55
73
 
56
- doc2 = Nokogiri::XML(browser.body)
74
+ doc2 = Nokogiri::XML(@browser.body)
57
75
 
58
76
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
59
77
  puts 'a2: ' + a2.length.inspect if @debug
60
78
 
61
79
  @a2 = a2.map {|x| Rexle.new x.to_s }
62
80
 
63
- @a2.map do |doc|
81
+ @results = @a2.map do |doc|
64
82
 
65
83
  div = doc.element("a[@class='desktop']/div[@class='slider" \
66
84
  "_container']/div[@class='slider_list']/div[@class='sl" \
@@ -202,7 +220,9 @@ class IS22Plus < IndeedScraper2022
202
220
 
203
221
  def archive()
204
222
 
205
- 1.upto(15).each do |n|
223
+ return unless @results
224
+
225
+ 1.upto(@results.length).each do |n|
206
226
  page(n)
207
227
  end
208
228
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file