indeed_scraper2022 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07f5323381b5751c470454f6f4c3ba6dced6f1424e054b85360a49d814d662ba
4
- data.tar.gz: 3d25353b9f8a0543944cac82ef6dc91adf7d3e83444f3c6ef469f15cbba8a3d8
3
+ metadata.gz: 5e33dfd54667ecc9f8b7985aa07af403be8d95729ce68e8a40d8c985d57bd4e1
4
+ data.tar.gz: a2c041ec8103b6afac3a422e7b73bc82c89fd7f8d955240439a29ec0347c8a5f
5
5
  SHA512:
6
- metadata.gz: 5e13ae04b46bfa3eb15aab8d0aff388d8caec591c413493db591c37da099d2bcd5ba340a72137d4aa7d374652b68bc1d037b86fe4cc2ed2ae5b0a56c5202f00b
7
- data.tar.gz: ca14ae99251aabbcaee08a3bb6f240742ed1fab0f438496dc742ef39a10abb13e310b2d6a93bc472f5e1b3e45cfd8956d6a62f803b1d3a152054cf4e1ae35402
6
+ metadata.gz: 8e640cb8262a057bb588b501ee1122a59e6e239e2a5988dd0566ffffb814a2fef763c36fdeae1ba5dc4e6f819ca145374058bd62373ce776df1e393057a49fc0
7
+ data.tar.gz: 0a6bfe0ef2b685d5711a95704cee3fa67d58eb7c9d0f149c872f9c23b0cc489382ab1327b101f7accf982f7ea1f1a6d56dc20ae528ef1fd4d6105c9ef93067da
checksums.yaml.gz.sig CHANGED
Binary file
@@ -14,13 +14,17 @@ end
14
14
 
15
15
  class IndeedScraper2022
16
16
 
17
+ attr_reader :browser
18
+
17
19
  def initialize(url='https://uk.indeed.com/?r=us', q: '', location: '',
18
20
  headless: true, cookies: nil, debug: false)
19
21
 
20
22
  @debug = debug
21
23
  @url_base, @q, @location = url, q, location
22
24
  @headless, @cookies = headless, cookies
23
- @results = search(q: @q, location: @location)
25
+
26
+ fw = FerrumWizard.new( headless: @headless, cookies: @cookies, debug: @debug)
27
+ @browser = fw.browser
24
28
 
25
29
  end
26
30
 
@@ -32,35 +36,49 @@ class IndeedScraper2022
32
36
 
33
37
  def search(q: @q, location: @location, start: nil)
34
38
 
35
- fw = FerrumWizard.new( headless: @headless, cookies: @cookies, debug: @debug)
36
-
37
39
  url = @url_base
38
40
  url += 'start=' + start if start
39
41
 
40
- browser = fw.browser
41
- browser.goto(url)
42
+ @browser.goto(url)
43
+ #@browser.network.wait_for_idle
44
+ puts 'sleeping for 4 seconds' if @debug
45
+ sleep 4
42
46
 
43
47
  if q.length > 1 then
44
- input = browser.at_xpath("//input[@name='q']")
45
- input.focus.type(q)
48
+
49
+ input = @browser.at_xpath("//input[@name='q']")
50
+
51
+ # select any existing text and overwrite it
52
+ input.focus.type(:home); sleep 0.2
53
+ input.focus.type(:shift, :end); sleep 0.2
54
+ input.focus.type(q); sleep 0.2
46
55
  end
47
56
 
48
57
  if location.length > 1 then
49
- input2 = browser.at_xpath("//input[@name='l']")
50
- input2.focus.type(location)
58
+
59
+ input2 = @browser.at_xpath("//input[@name='l']")
60
+
61
+ # select any existing text and overwrite it
62
+ input2.focus.type(:home); sleep 0.2
63
+ input2.focus.type(:shift, :end); sleep 0.2
64
+ input2.focus.type(location); sleep 0.2
65
+
51
66
  end
52
67
 
53
- button = browser.at_xpath("//button[@type='submit']")
68
+ button = @browser.at_xpath("//button[@type='submit']")
54
69
  button.click
70
+ #@browser.network.wait_for_idle
71
+ puts 'sleeping for 2 seconds' if @debug
72
+ sleep 2
55
73
 
56
- doc2 = Nokogiri::XML(browser.body)
74
+ doc2 = Nokogiri::XML(@browser.body)
57
75
 
58
76
  a2 = doc2.xpath "//a[div/div/div/div/table/tbody/tr/td/div]"
59
77
  puts 'a2: ' + a2.length.inspect if @debug
60
78
 
61
79
  @a2 = a2.map {|x| Rexle.new x.to_s }
62
80
 
63
- @a2.map do |doc|
81
+ @results = @a2.map do |doc|
64
82
 
65
83
  div = doc.element("a[@class='desktop']/div[@class='slider" \
66
84
  "_container']/div[@class='slider_list']/div[@class='sl" \
@@ -202,7 +220,9 @@ class IS22Plus < IndeedScraper2022
202
220
 
203
221
  def archive()
204
222
 
205
- 1.upto(15).each do |n|
223
+ return unless @results
224
+
225
+ 1.upto(@results.length).each do |n|
206
226
  page(n)
207
227
  end
208
228
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indeed_scraper2022
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
metadata.gz.sig CHANGED
Binary file