ferrumwizard 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2940cdfbf2aff12f2f0f70d219b46483f1561a3bf3a900299837644e1752a66f
4
- data.tar.gz: f747b47b9ebddb10046b559de95952b48c39dc3375d617f2d246c9bcb4cc697c
3
+ metadata.gz: 4b012bc3e8cb9b91311c91eecef6b816cae7bc3d179dc037ac0011373e1d54d2
4
+ data.tar.gz: f0421333194ed8b204ea87beab02314a44f978bbafa28809622b89d1db26bd4f
5
5
  SHA512:
6
- metadata.gz: 5e9bdf0508b48921682828926525b914b276269380c8871ea0ce70263878948fc27aab93d1ab1bf9c53b906b9cbebd2b3d19548b3595405b238b3c5935428174
7
- data.tar.gz: f6137500af8d41c75f077219ada94581cd0a5fc8313315a7fa92c0a883ec055613578b42e37831810d83d44215dae09f64ff31b29c6569e8aac88ca8362a9f61
6
+ metadata.gz: b96dd21840cbed0d1253f4795ddf4a4cf36250d5d3736b918c70c7c48ef63d4e8ba31592395cce6de691049a076f41186337a92643e3e1748e122f58edc51905
7
+ data.tar.gz: 39d7c5cc96e6900db1bbfa668ba9b972d49f09147d0fc5b079c34fd40e7235bb256a1713c1bf5725a16c0087dca959d5df8b12b7f10e093891478bf0a33f53f2
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/ferrumwizard.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  # file: ferrumwizard.rb
4
4
 
5
+ require 'yaml'
5
6
  require 'rexle'
6
7
  require 'ferrum'
7
8
 
@@ -10,67 +11,197 @@ class FerrumWizard
10
11
 
11
12
  attr_reader :browser, :links, :radio, :buttons, :js_methods
12
13
 
13
- def initialize(url, headless: true, debug: false)
14
+ def initialize(url=nil, headless: true, timeout: 10, debug: false)
14
15
 
15
16
  @url, @debug = url, debug
16
- @browser = Ferrum::Browser.new headless: headless
17
- sleep 2
17
+ @browser = Ferrum::Browser.new headless: headless, timeout: timeout
18
+ sleep 3
19
+
20
+ if url
21
+ @browser.goto(@url)
22
+ @browser.network.wait_for_idle
23
+ sleep 4
24
+ end
18
25
  end
19
-
26
+
20
27
  def inspect()
21
28
  "#<FerrumWizard>"
22
29
  end
23
-
30
+
31
+ # Intended to load all the cookies for a user to login automatically
32
+ #
33
+ # Follow these steps to load the cookies file:
34
+ #
35
+ # 1. launch the Ferrum browser
36
+ # fw = FerrumWizard.new( headless: false, debug: false)
37
+ #
38
+ # 2. load the cookies before you visit the website
39
+ # fw.load_cookies('/tmp/indeed2.txt')
40
+ #
41
+ # 3. visit the website
42
+ # url='https://somewebsite.com'
43
+ # fw.browser.goto(url)
44
+ #
45
+ def load_cookies(filepath)
46
+
47
+ rawcookies = YAML.load(File.read(filepath))
48
+
49
+ rawcookies.each do |h|
50
+
51
+ if @debug then
52
+ puts 'name: ' + h['name']
53
+ puts 'h: ' + h.inspect
54
+ sleep 0.7
55
+ end
56
+
57
+ browser.cookies.set(name: h['name'], value: h['value'],
58
+ domain: h['domain'], expires: h['expires'],
59
+ httponly: h['httpOnly'])
60
+ end
61
+
62
+ end
63
+
24
64
  def login(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
25
-
26
- puts 'username: ' + username.inspect if @debug
27
65
 
28
- b = @browser
29
- b.goto(@url)
30
- @browser.network.wait_for_idle
31
- sleep 3
66
+ puts 'username: ' + username.inspect if @debug
32
67
 
33
68
  # search for the username input box
34
- e_username = b.at_xpath('//input[@type="email"]')
69
+ e_username = @browser.at_xpath('//input[@type="email"]')
35
70
  puts 'e_username: ' + e_username.inspect if @debug
36
71
  sleep 1
37
72
  # search for the password input box
38
- e_password = b.at_xpath('//input[@type="password"]')
73
+ found = @browser.at_xpath('//input[@type="password"]')
74
+
75
+ e_password = if found then
76
+ found
77
+ else
78
+ @browser.xpath('//input').find {|x| x.property(:id) =~ /password/i}
79
+ end
80
+
39
81
  sleep 1
40
-
82
+
41
83
  if username and e_username then
42
84
  puts 'entering the username' if @debug
43
- e_username.focus.type(username)
85
+ e_username.focus.type(username)
44
86
  sleep 1
45
87
  end
46
-
88
+
47
89
  e_password.focus.type(password, :Enter) if e_password
48
- @browser.network.wait_for_idle
49
-
50
- sleep 4
51
90
 
52
- scan_page()
53
-
54
- end
55
-
91
+ after_login()
92
+
93
+ end
94
+
95
+ # login2 is used for websites where the user is presented with the username
96
+ # input box on the first page and the password input box on the next page.
97
+ #
98
+ def login2(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
99
+
100
+ puts 'username: ' + username.inspect if @debug
101
+
102
+ # search for the username input box
103
+ e_username = @browser.at_xpath('//input[@type="email"]')
104
+ puts 'e_username: ' + e_username.inspect if @debug
105
+ sleep 1
106
+ # search for the password input box
107
+
108
+ if username and e_username then
109
+ puts 'entering the username' if @debug
110
+ e_username.focus.type(username, :Enter)
111
+ sleep 2
112
+ end
113
+
114
+ e_password = @browser.at_xpath('//input[@type="password"]')
115
+ sleep 1
116
+
117
+ e_password.focus.type(password, :Enter) if e_password
118
+
119
+ after_login()
120
+
121
+
122
+ end
123
+
56
124
  def quit
57
125
  @browser.quit
58
126
  end
59
-
127
+
60
128
  def scan_page()
61
-
62
- @doc = Rexle.new @browser.body
129
+
130
+ @doc = Rexle.new @browser.body
63
131
  fetch_links()
64
- scan_form_elements()
132
+ scan_form_elements()
65
133
  scan_js_links()
134
+ @browser.mouse.scroll_to(0, 800)
66
135
  self
67
136
  end
68
137
 
138
+ # Saves all cookies for a given website into a YAML file
139
+ # see also load_cookies()
140
+ #
141
+ # To use this method follow these steps:
142
+ #
143
+ # 1. launch the web browser through Ferrum
144
+ # fw = FerrumWizard.new(url, headless: false, debug: false)
145
+ #
146
+ # 2. go to the browser and login using your credentials
147
+ # fw.save_cookies(filepath)
148
+ #
149
+ # 3. exit the IRB session
150
+ #
151
+ def save_cookies(filepath=Tempfile.new('ferrum').path)
152
+
153
+ rawcookies = @browser.cookies.all.keys.map do |key|
154
+
155
+ if @debug then
156
+ puts 'key: ' + key.inspect
157
+ sleep 0.5
158
+ end
159
+
160
+ s = @browser.cookies[key].inspect
161
+ a = s.scan(/"([^"]+)"=\>/)
162
+ s2 = s[/(?<=@attributes=).*(?=>)/]
163
+ eval(s2)
164
+
165
+ end
166
+
167
+ File.write filepath, rawcookies.to_yaml
168
+
169
+ end
170
+
171
+ def submit(h)
172
+
173
+ e = nil
174
+
175
+ h.each do |key, value|
176
+ e = @browser.xpath('//input').find {|x| x.attribute('name') == key.to_s}
177
+ e.focus.type(value)
178
+ end
179
+
180
+ e.focus.type('', :Enter)
181
+
182
+ sleep 4
183
+ scan_page()
184
+
185
+ end
186
+
69
187
  def to_rb()
70
188
  end
71
-
189
+
72
190
  private
73
-
191
+
192
+ def after_login()
193
+
194
+ @browser.network.wait_for_idle
195
+ sleep 4
196
+ scan_page()
197
+
198
+ @browser.base_url = File.dirname(@browser.url)
199
+ @browser.mouse.scroll_to(0, 800)
200
+ self
201
+
202
+ end
203
+
204
+
74
205
  def fetch_buttons()
75
206
 
76
207
  a2 = @browser.xpath('//input[@type="button"]')
@@ -86,24 +217,24 @@ class FerrumWizard
86
217
  buttons = @buttons
87
218
 
88
219
  names.each do |name|
89
-
220
+
90
221
  define_singleton_method name.to_sym do
91
222
  buttons[name].click
92
223
  @browser.network.wait_for_idle
93
224
  sleep = 1
94
225
  self
95
226
  end
96
-
227
+
97
228
  end
98
229
 
99
230
  end
100
-
231
+
101
232
  def fetch_links()
102
-
103
- all_links = @doc.root.xpath('//a')
104
-
233
+
234
+ all_links = @doc.root.xpath('//a[@href]')
235
+
105
236
  all_links.each do |x|
106
-
237
+
107
238
  if x.plaintext.empty? then
108
239
  x.text = x.attributes[:href].sub(/\.\w+$/,'')[/([^\/]+)$/].split(/[_]|(?=[A-Z])/).join(' ')
109
240
  else
@@ -111,90 +242,94 @@ class FerrumWizard
111
242
  end
112
243
 
113
244
  end
114
-
245
+
115
246
  valid_links = all_links.reject do |x|
116
-
247
+
117
248
  puts 'x: ' + x.inspect if @debug
118
249
  r = (x.attributes[:target] == '_blank')
119
250
 
120
251
  puts 'r: ' + r.inspect if @debug
121
252
  r
122
-
253
+
123
254
  end
255
+
124
256
  indices = valid_links.map {|x| all_links.index x}
125
257
 
126
- active_links = @browser.xpath('//a')
258
+ active_links = @browser.xpath('//a[@href]')
127
259
  valid_active_links = indices.map {|n| active_links[n]}
128
-
129
260
 
130
- @links = valid_active_links.flat_map.with_index do |x, i|
261
+
262
+ @links = valid_active_links.flat_map.with_index do |x, i|
131
263
 
132
264
  a = valid_links[i].text.split(/\W+/).map {|label| [label, x]}
133
265
  a << [valid_links[i].text, x]
134
-
266
+
135
267
  puts 'a: ' + a.inspect if @debug
136
- a + a.map {|x, obj| [x.downcase, obj]}
268
+ a + a.map {|x2, obj| [x2.downcase, obj]}
269
+
137
270
  end.to_h
138
-
271
+
139
272
  names = @links.keys.map(&:downcase).uniq.select {|x| x =~ /^[\w ]+$/}
140
273
  links = @links
141
-
274
+
142
275
  names.each do |name|
143
-
276
+
144
277
  define_singleton_method name.gsub(/ +/,'_').to_sym do
278
+
145
279
  links[name].click
280
+ @browser.network.wait_for_idle
281
+
146
282
  sleep 1
147
283
  scan_page()
148
284
  self
285
+
149
286
  end
150
-
287
+
151
288
  end
152
-
289
+
153
290
  end
154
291
 
155
292
  def scan_form_elements()
156
-
293
+
157
294
  # find radio buttons
158
-
159
- #a = doc.root.xpath('//input[@type="radio"]')
295
+
160
296
  a = @browser.xpath('//input[@type="radio"]')
161
- #h = a.group_by {|x| x.attributes[:name]}
162
297
  h = a.group_by {|x| x.attribute('name')}
163
298
  @radio = h.values
164
299
  define_singleton_method(:on) { @radio[0][0].click; self }
165
300
  define_singleton_method(:off) { @radio[0][1].click; self }
166
-
301
+
167
302
  fetch_buttons()
168
-
303
+
169
304
  end
170
-
305
+
171
306
  def scan_js_links()
172
-
307
+
173
308
  @js_methods = {}
174
309
  b = @browser
175
-
310
+
176
311
  b.xpath('//a').select {|x| x.attribute('href') =~ /^javascript/}.each do |e|
177
-
312
+
178
313
 
179
314
  s = e.attribute('href')[/(?<=^javascript:)[^\(]+/]
180
- puts 's: ' + s.inspect
315
+ puts 's: ' + s.inspect if @debug
181
316
  a = s.split(/\W+|(?=[A-Z])/).map {|label| [label, s]}
182
317
  a << [s, s]
183
318
  a << [s.split(/\W+|(?=[A-Z])/).join('_'), s]
184
319
  a << [s.split(/\W+|(?=[A-Z])/).join('_').downcase, s]
185
320
  #@js_methods[s] = a
186
321
 
187
- a.concat a.map {|x, name| [x.downcase, name] }
322
+ a.concat a.map {|x, name| [x.downcase, name] }
188
323
 
189
- puts 'a: ' + a.inspect
324
+ puts 'a: ' + a.inspect if @debug
190
325
 
191
326
  a.uniq.select {|x, _| x =~ /^[a-z0-9_]+$/}.each do |x, name|
192
-
327
+
193
328
  if @debug then
194
329
  puts 'x: ' + x.inspect
195
330
  puts 'name: ' + name.inspect
196
331
  end
197
-
332
+
198
333
  define_singleton_method(x.to_sym) do |*args|
199
334
  #args = raw_args.map {|x| x[/^[0-9]+$/] ? x.to_i : x}
200
335
  js_method = "%s(%s)" % [name, args.map(&:inspect).join(', ')]
@@ -203,9 +338,9 @@ class FerrumWizard
203
338
  sleep 4
204
339
  self.scan_page()
205
340
  end
206
-
341
+
207
342
  end
208
-
343
+
209
344
  end
210
345
  end
211
346
 
@@ -214,7 +349,7 @@ class FerrumWizard
214
349
  puts 'method_missing: ' + method_name.inspect if @debug
215
350
  node = @browser.at_css '.' + method_name.to_s
216
351
  node.text if node
217
-
218
- end
219
-
352
+
353
+ end
354
+
220
355
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrumwizard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,31 +11,31 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzMwMTYxOTE5WhcN
15
- MjEwNzMwMTYxOTE5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDhNEIG
17
- /Ab4nneih/AQFMcYk76JCiy26Xcy5uxd9ib7Emkj/9sZo6nxuSBaH03+Ixv3jgJs
18
- TxZyaIKRsESFFmupYmKsyatCGGaBEsDb210ZBm313rP2Pk2fGrUtON0CjwJljWxR
19
- 8pHuglEXrGN/XhVicy7sZLJ2nVnvRtyiKi92XmY0S9LaCkWlOx2f3D7yiazkmHh5
20
- 59nHiGNlZ/SOzFrRMdBvkWZYHgqUEBv0KxEuMqW65U4HdlQImcwqu8XOWH9kutof
21
- yyisv03kPqMvrOC8ptG/TieKYK0JuY23gS9MrVxkrf0gX3IQLY21JWG9t9uRImc/
22
- kHC+EJ2rI8HQqcq/v6dndJb6MhYEhj7R5XsZqlfsLFo21FFBAyaPrqPRUstnW5U0
23
- /tCpcuFyZJeRPqQ8LSlRGDuB/TdmV9dF+P5aGS32k9Okf9L6E6x3OGV29eMHSdDt
24
- LOOB8l0EJbNXzpvYW+htziU8TbuzRQU8K7uTeAfpMUg4auPxdVyQpJcQWXcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU2+nN7PCw
26
- js3NFmK8b17Ji/t+dvwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMzMwMTI1ODU1WhcN
15
+ MjMwMzMwMTI1ODU1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDROL32
17
+ 3LQKGcDR6x6XFa1US/Vq98DVnMeZHCSKdf471I4gJIOA7sQnrQTB6IZKTxb94Wjr
18
+ OSeGzlJpVq6pa7ltxvb9T7YQVVRrXYMC+u0gD9ukolnkpV/4Rh2/IIMxSNKncoZB
19
+ LKPseizGKlli4gs134gAu3wuWdCC7/UWPG/XyocdJC8tLtf/zi4JuRJTojKqYLOp
20
+ KsP9jHPmGVr81cW8HePmhQ/+LiYlKDE4Fwj4yl16XqhF7/5YOz9e5LOHsMUEord4
21
+ JscQ3GnhMfEXGJpwqCwNEpM3xAwcHp2DDdrwtT36ujSfnTJ3UpUIQUKVehA2i9rm
22
+ uDcDTr1PATGcOMPpExvLZu3a9uC81mj9z+axH5mWQ7jZ92sze79oAQTsMiMyBavJ
23
+ djSpnVBo71PFk8QekgIVVBIzG0iN5zoNUrSthvL/xUWXM6ea015HEDCCIEL417ID
24
+ humVWZyzKf7ITCdZWcxTgTgFfuPMctcICT5u7va+FrycYpdtt8kXvtD3VnkCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUDipvmNU2
26
+ WydgAK8QPGb0vhhoGl0wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAlaYpTQ2vLuKU/nJl1inw9iE9XCwnTmIhmA9lnu1q
29
- QKKCd7Z2PwtkahbDvMVQ347DQZQAanuZmtTPFMc4FDA530qJtwoYk03FTQXBh12M
30
- d4C27VP9BOrUQcxkqtnTo+4Z60taszXqyPsPYU+Fd8AZUPeS5TOYG52OXTQ+q+pO
31
- vNxkRP9oEka81ZrN1y3r3YaFHATZzf4pJo0HupZvMsQwa33/vA+xxxpDeTuWytNN
32
- O0mYbo8Em2LnPnE8ehOnniDGXIIaDO9B1Qbbr0GhNCIWq3JIcbI2IBCKFWA6HyNF
33
- yCdr7ZqPrnxXlhhnTPLFkzR/0+XxpbrdW4zb6uQqX92/tiUqP9uKf5dBEVoCWax/
34
- IWPJE5JXx2iMvE9cWe4bFCUi7cZT7HsL6jkdUWxeTvsfc7XMbE8eWtHHiG6NjeFJ
35
- 7e24hNRMt3t/JE9ogEO4JzFUH2vq2zzR5X9JQqEclWfwHi4cf8bZFJ7spjZQPjSZ
36
- Ok3rs0A+kW4ixAj1rDYuoyG/
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHqRvm6iqjJ+bpEzSSgVmOMOMcgIoN6px1LMVAOmY
29
+ BJpF5F0fJr99thc1EYZJoRTwEcXJYhCTqKg+3xhNKpCzk2qHsaLKYEygPeBpyJOg
30
+ LyfHLrj98QLPYyFzqhWsqZAAAGC9WSF/kBJazpuotU2ec/Xw/e3NPopedV/Zvuhs
31
+ +/OKZWwRez/hg97ckaCYAp/7OdrVhJvR87MaQnN52Uk8OQbuPSyUNQUJ044HWHtu
32
+ lEJjsDetEFhNB69j3wAIMjMEZao29/dZhALbUDp+9ewK7uYbrX9Bo68NX+H+XcCZ
33
+ VFrdrjkyJUOHwSmvjYXN1V0Yz8kVVFU7E+Q4RHL8yAwBv+ynd927HtZVjs+455Pc
34
+ z+9gNpBQVr6LLXLJgJF2pTaIoYhgG6pcoMQHGVoxWdKzvOcl0h1epeJSp/aynX/r
35
+ FK+cyrQNA9DLJYJuz6uO7Z+gXZWjwAO38LUwF01w49asSv/5ZH2HH/EauX5xWpe+
36
+ ry6lYQlb8j50Iys5elAy1p0i
37
37
  -----END CERTIFICATE-----
38
- date: 2020-08-01 00:00:00.000000000 Z
38
+ date: 2022-03-30 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
@@ -46,7 +46,7 @@ dependencies:
46
46
  version: '1.5'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.5.7
49
+ version: 1.5.14
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
@@ -56,29 +56,29 @@ dependencies:
56
56
  version: '1.5'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.5.7
59
+ version: 1.5.14
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: ferrum
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ">="
65
- - !ruby/object:Gem::Version
66
- version: 0.9.0
67
64
  - - "~>"
68
65
  - !ruby/object:Gem::Version
69
- version: '0.9'
66
+ version: '0.11'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0.11'
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- version: 0.9.0
77
74
  - - "~>"
78
75
  - !ruby/object:Gem::Version
79
- version: '0.9'
76
+ version: '0.11'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0.11'
80
80
  description:
81
- email: james@jamesrobertson.eu
81
+ email: digital.robertson@gmail.com
82
82
  executables: []
83
83
  extensions: []
84
84
  extra_rdoc_files: []
@@ -103,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubygems_version: 3.0.3
106
+ rubygems_version: 3.2.22
107
107
  signing_key:
108
108
  specification_version: 4
109
109
  summary: Makes web scraping easier using the Ferrum gem.
metadata.gz.sig CHANGED
Binary file