ferrumwizard 0.1.4 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 96cb226b3acf38c7c4b4f8c82665175025effc7d2cbe25d0ae6026e92109c899
4
- data.tar.gz: e31dbbc763e88c553881660d7579117781934e92c2eae10f32fff329da670e09
3
+ metadata.gz: 3b11fa25652b962c446a8022e3d9cfce067110ac2c6c6509995533449942ad70
4
+ data.tar.gz: e487524c0b613f6a1aea2872faccc520106cd57bbfbee5f9725504467c219e51
5
5
  SHA512:
6
- metadata.gz: 0d6f8bd47a33d4d8d3224cc231a993a36c47fa297c91e7b83a3888094de658cf129cc289b001935b442fd96f93103bfe5e214551b5d365de18971330d0e380e2
7
- data.tar.gz: 7fce37cf15f5a493b0261b36eb843ad8087a19b040c14176a0488b0ab0c84220b5e70c12fdb5a962443608092e148b7ecff2832311d7395209115e5cfec5e268
6
+ metadata.gz: fd67b1f378008cfb07e86fec087a1ce3344f5df905bde152d447b731d9936d3e26240932cf68a1a837acf10338ffbe22f689af378939e19cb2f42a47d112f7ab
7
+ data.tar.gz: a098e1b6ae39a3c5dc0dcfc35250a14993036ec4907e28cc9e3027d0ab3c259043086f5d177424abca3517cb58b59649a15487d68d93098abc6305d4459e3fee
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/ferrumwizard.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  # file: ferrumwizard.rb
4
4
 
5
+ require 'yaml'
5
6
  require 'rexle'
6
7
  require 'ferrum'
7
8
 
@@ -10,25 +11,66 @@ class FerrumWizard
10
11
 
11
12
  attr_reader :browser, :links, :radio, :buttons, :js_methods
12
13
 
13
- def initialize(url, headless: true, timeout: 10, debug: false)
14
+ def initialize(url=nil, headless: true, timeout: 10, cookies: nil,
15
+ debug: false)
14
16
 
15
17
  @url, @debug = url, debug
16
18
  @browser = Ferrum::Browser.new headless: headless, timeout: timeout
17
- sleep 3
18
-
19
- if url
19
+
20
+ loadx(cookies) if cookies
21
+
22
+ if url then
23
+
24
+ sleep 3
25
+
20
26
  @browser.goto(@url)
21
27
  @browser.network.wait_for_idle
22
- sleep 4
28
+ sleep 4
29
+
23
30
  end
24
31
  end
25
-
32
+
26
33
  def inspect()
27
34
  "#<FerrumWizard>"
28
35
  end
29
-
36
+
37
+ # Intended to load all the cookies for a user to login automatically
38
+ #
39
+ # Follow these steps to load the cookies file:
40
+ #
41
+ # 1. launch the Ferrum browser
42
+ # fw = FerrumWizard.new( headless: false, debug: false)
43
+ #
44
+ # 2. load the cookies before you visit the website
45
+ # fw.load_cookies('/tmp/indeed2.txt')
46
+ #
47
+ # 3. visit the website
48
+ # url='https://somewebsite.com'
49
+ # fw.browser.goto(url)
50
+ #
51
+ def load_cookies(filepath)
52
+
53
+ rawcookies = YAML.load(File.read(filepath))
54
+
55
+ rawcookies.each do |h|
56
+
57
+ if @debug then
58
+ puts 'name: ' + h['name']
59
+ puts 'h: ' + h.inspect
60
+ sleep 0.7
61
+ end
62
+
63
+ browser.cookies.set(name: h['name'], value: h['value'],
64
+ domain: h['domain'], expires: h['expires'],
65
+ httponly: h['httpOnly'])
66
+ end
67
+
68
+ end
69
+
70
+ alias loadx load_cookies
71
+
30
72
  def login(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
31
-
73
+
32
74
  puts 'username: ' + username.inspect if @debug
33
75
 
34
76
  # search for the username input box
@@ -36,23 +78,33 @@ class FerrumWizard
36
78
  puts 'e_username: ' + e_username.inspect if @debug
37
79
  sleep 1
38
80
  # search for the password input box
39
- e_password = @browser.at_xpath('//input[@type="password"]')
81
+ found = @browser.at_xpath('//input[@type="password"]')
82
+
83
+ e_password = if found then
84
+ found
85
+ else
86
+ @browser.xpath('//input').find {|x| x.property(:id) =~ /password/i}
87
+ end
88
+
40
89
  sleep 1
41
-
90
+
42
91
  if username and e_username then
43
92
  puts 'entering the username' if @debug
44
- e_username.focus.type(username)
93
+ e_username.focus.type(username)
45
94
  sleep 1
46
95
  end
47
-
96
+
48
97
  e_password.focus.type(password, :Enter) if e_password
49
-
98
+
50
99
  after_login()
51
-
52
- end
53
-
100
+
101
+ end
102
+
103
+ # login2 is used for websites where the user is presented with the username
104
+ # input box on the first page and the password input box on the next page.
105
+ #
54
106
  def login2(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
55
-
107
+
56
108
  puts 'username: ' + username.inspect if @debug
57
109
 
58
110
  # search for the username input box
@@ -60,70 +112,104 @@ class FerrumWizard
60
112
  puts 'e_username: ' + e_username.inspect if @debug
61
113
  sleep 1
62
114
  # search for the password input box
63
-
115
+
64
116
  if username and e_username then
65
117
  puts 'entering the username' if @debug
66
- e_username.focus.type(username, :Enter)
118
+ e_username.focus.type(username, :Enter)
67
119
  sleep 2
68
120
  end
69
121
 
70
- e_password = b.at_xpath('//input[@type="password"]')
122
+ e_password = @browser.at_xpath('//input[@type="password"]')
71
123
  sleep 1
72
-
124
+
73
125
  e_password.focus.type(password, :Enter) if e_password
74
-
126
+
75
127
  after_login()
76
128
 
77
-
78
- end
79
-
129
+
130
+ end
131
+
80
132
  def quit
81
133
  @browser.quit
82
134
  end
83
-
135
+
84
136
  def scan_page()
85
-
86
- @doc = Rexle.new @browser.body
137
+
138
+ @doc = Rexle.new @browser.body
87
139
  fetch_links()
88
- scan_form_elements()
140
+ scan_form_elements()
89
141
  scan_js_links()
90
- self
142
+ @browser.mouse.scroll_to(0, 800)
143
+ self
144
+ end
145
+
146
+ # Saves all cookies for a given website into a YAML file
147
+ # see also load_cookies()
148
+ #
149
+ # To use this method follow these steps:
150
+ #
151
+ # 1. launch the web browser through Ferrum
152
+ # fw = FerrumWizard.new(url, headless: false, debug: false)
153
+ #
154
+ # 2. go to the browser and login using your credentials
155
+ # fw.save_cookies(filepath)
156
+ #
157
+ # 3. exit the IRB session
158
+ #
159
+ def save_cookies(filepath=Tempfile.new('ferrum').path)
160
+
161
+ rawcookies = @browser.cookies.all.keys.map do |key|
162
+
163
+ if @debug then
164
+ puts 'key: ' + key.inspect
165
+ sleep 0.5
166
+ end
167
+
168
+ s = @browser.cookies[key].inspect
169
+ a = s.scan(/"([^"]+)"=\>/)
170
+ s2 = s[/(?<=@attributes=).*(?=>)/]
171
+ eval(s2)
172
+
173
+ end
174
+
175
+ File.write filepath, rawcookies.to_yaml
176
+
91
177
  end
92
-
178
+
93
179
  def submit(h)
94
180
 
95
181
  e = nil
96
-
182
+
97
183
  h.each do |key, value|
98
- e = @browser.xpath('//input').find {|x| x.attribute('name') == key.to_s}
99
- e.focus.type(value)
184
+ e = @browser.xpath('//input').find {|x| x.attribute('name') == key.to_s}
185
+ e.focus.type(value)
100
186
  end
101
-
102
- e.focus.type('', :Enter)
103
-
104
- sleep 4
187
+
188
+ e.focus.type('', :Enter)
189
+
190
+ sleep 4
105
191
  scan_page()
106
-
192
+
107
193
  end
108
194
 
109
195
  def to_rb()
110
196
  end
111
-
197
+
112
198
  private
113
-
199
+
114
200
  def after_login()
115
-
116
- @browser.network.wait_for_idle
201
+
202
+ @browser.network.wait_for_idle
117
203
  sleep 4
118
204
  scan_page()
119
-
205
+
120
206
  @browser.base_url = File.dirname(@browser.url)
121
207
  @browser.mouse.scroll_to(0, 800)
122
208
  self
123
-
209
+
124
210
  end
125
-
126
-
211
+
212
+
127
213
  def fetch_buttons()
128
214
 
129
215
  a2 = @browser.xpath('//input[@type="button"]')
@@ -139,24 +225,24 @@ class FerrumWizard
139
225
  buttons = @buttons
140
226
 
141
227
  names.each do |name|
142
-
228
+
143
229
  define_singleton_method name.to_sym do
144
230
  buttons[name].click
145
231
  @browser.network.wait_for_idle
146
232
  sleep = 1
147
233
  self
148
234
  end
149
-
235
+
150
236
  end
151
237
 
152
238
  end
153
-
239
+
154
240
  def fetch_links()
155
-
241
+
156
242
  all_links = @doc.root.xpath('//a[@href]')
157
-
243
+
158
244
  all_links.each do |x|
159
-
245
+
160
246
  if x.plaintext.empty? then
161
247
  x.text = x.attributes[:href].sub(/\.\w+$/,'')[/([^\/]+)$/].split(/[_]|(?=[A-Z])/).join(' ')
162
248
  else
@@ -164,74 +250,74 @@ class FerrumWizard
164
250
  end
165
251
 
166
252
  end
167
-
253
+
168
254
  valid_links = all_links.reject do |x|
169
-
255
+
170
256
  puts 'x: ' + x.inspect if @debug
171
257
  r = (x.attributes[:target] == '_blank')
172
258
 
173
259
  puts 'r: ' + r.inspect if @debug
174
260
  r
175
-
261
+
176
262
  end
177
-
263
+
178
264
  indices = valid_links.map {|x| all_links.index x}
179
265
 
180
266
  active_links = @browser.xpath('//a[@href]')
181
267
  valid_active_links = indices.map {|n| active_links[n]}
182
-
183
268
 
184
- @links = valid_active_links.flat_map.with_index do |x, i|
269
+
270
+ @links = valid_active_links.flat_map.with_index do |x, i|
185
271
 
186
272
  a = valid_links[i].text.split(/\W+/).map {|label| [label, x]}
187
273
  a << [valid_links[i].text, x]
188
-
274
+
189
275
  puts 'a: ' + a.inspect if @debug
190
276
  a + a.map {|x2, obj| [x2.downcase, obj]}
191
-
277
+
192
278
  end.to_h
193
-
279
+
194
280
  names = @links.keys.map(&:downcase).uniq.select {|x| x =~ /^[\w ]+$/}
195
281
  links = @links
196
-
282
+
197
283
  names.each do |name|
198
-
284
+
199
285
  define_singleton_method name.gsub(/ +/,'_').to_sym do
200
-
286
+
201
287
  links[name].click
202
288
  @browser.network.wait_for_idle
203
-
289
+
204
290
  sleep 1
205
291
  scan_page()
206
292
  self
207
-
293
+
208
294
  end
209
-
295
+
210
296
  end
211
-
297
+
212
298
  end
213
299
 
214
300
  def scan_form_elements()
215
-
301
+
216
302
  # find radio buttons
217
-
303
+
218
304
  a = @browser.xpath('//input[@type="radio"]')
219
305
  h = a.group_by {|x| x.attribute('name')}
220
306
  @radio = h.values
221
307
  define_singleton_method(:on) { @radio[0][0].click; self }
222
308
  define_singleton_method(:off) { @radio[0][1].click; self }
223
-
309
+
224
310
  fetch_buttons()
225
-
311
+
226
312
  end
227
-
313
+
228
314
  def scan_js_links()
229
-
315
+
230
316
  @js_methods = {}
231
317
  b = @browser
232
-
318
+
233
319
  b.xpath('//a').select {|x| x.attribute('href') =~ /^javascript/}.each do |e|
234
-
320
+
235
321
 
236
322
  s = e.attribute('href')[/(?<=^javascript:)[^\(]+/]
237
323
  puts 's: ' + s.inspect if @debug
@@ -241,17 +327,17 @@ class FerrumWizard
241
327
  a << [s.split(/\W+|(?=[A-Z])/).join('_').downcase, s]
242
328
  #@js_methods[s] = a
243
329
 
244
- a.concat a.map {|x, name| [x.downcase, name] }
330
+ a.concat a.map {|x, name| [x.downcase, name] }
245
331
 
246
332
  puts 'a: ' + a.inspect if @debug
247
333
 
248
334
  a.uniq.select {|x, _| x =~ /^[a-z0-9_]+$/}.each do |x, name|
249
-
335
+
250
336
  if @debug then
251
337
  puts 'x: ' + x.inspect
252
338
  puts 'name: ' + name.inspect
253
339
  end
254
-
340
+
255
341
  define_singleton_method(x.to_sym) do |*args|
256
342
  #args = raw_args.map {|x| x[/^[0-9]+$/] ? x.to_i : x}
257
343
  js_method = "%s(%s)" % [name, args.map(&:inspect).join(', ')]
@@ -260,9 +346,9 @@ class FerrumWizard
260
346
  sleep 4
261
347
  self.scan_page()
262
348
  end
263
-
349
+
264
350
  end
265
-
351
+
266
352
  end
267
353
  end
268
354
 
@@ -271,7 +357,7 @@ class FerrumWizard
271
357
  puts 'method_missing: ' + method_name.inspect if @debug
272
358
  node = @browser.at_css '.' + method_name.to_s
273
359
  node.text if node
274
-
275
- end
276
-
360
+
361
+ end
362
+
277
363
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrumwizard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,31 +11,31 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzMwMTYxOTE5WhcN
15
- MjEwNzMwMTYxOTE5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDhNEIG
17
- /Ab4nneih/AQFMcYk76JCiy26Xcy5uxd9ib7Emkj/9sZo6nxuSBaH03+Ixv3jgJs
18
- TxZyaIKRsESFFmupYmKsyatCGGaBEsDb210ZBm313rP2Pk2fGrUtON0CjwJljWxR
19
- 8pHuglEXrGN/XhVicy7sZLJ2nVnvRtyiKi92XmY0S9LaCkWlOx2f3D7yiazkmHh5
20
- 59nHiGNlZ/SOzFrRMdBvkWZYHgqUEBv0KxEuMqW65U4HdlQImcwqu8XOWH9kutof
21
- yyisv03kPqMvrOC8ptG/TieKYK0JuY23gS9MrVxkrf0gX3IQLY21JWG9t9uRImc/
22
- kHC+EJ2rI8HQqcq/v6dndJb6MhYEhj7R5XsZqlfsLFo21FFBAyaPrqPRUstnW5U0
23
- /tCpcuFyZJeRPqQ8LSlRGDuB/TdmV9dF+P5aGS32k9Okf9L6E6x3OGV29eMHSdDt
24
- LOOB8l0EJbNXzpvYW+htziU8TbuzRQU8K7uTeAfpMUg4auPxdVyQpJcQWXcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU2+nN7PCw
26
- js3NFmK8b17Ji/t+dvwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMzMwMTI1ODU1WhcN
15
+ MjMwMzMwMTI1ODU1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDROL32
17
+ 3LQKGcDR6x6XFa1US/Vq98DVnMeZHCSKdf471I4gJIOA7sQnrQTB6IZKTxb94Wjr
18
+ OSeGzlJpVq6pa7ltxvb9T7YQVVRrXYMC+u0gD9ukolnkpV/4Rh2/IIMxSNKncoZB
19
+ LKPseizGKlli4gs134gAu3wuWdCC7/UWPG/XyocdJC8tLtf/zi4JuRJTojKqYLOp
20
+ KsP9jHPmGVr81cW8HePmhQ/+LiYlKDE4Fwj4yl16XqhF7/5YOz9e5LOHsMUEord4
21
+ JscQ3GnhMfEXGJpwqCwNEpM3xAwcHp2DDdrwtT36ujSfnTJ3UpUIQUKVehA2i9rm
22
+ uDcDTr1PATGcOMPpExvLZu3a9uC81mj9z+axH5mWQ7jZ92sze79oAQTsMiMyBavJ
23
+ djSpnVBo71PFk8QekgIVVBIzG0iN5zoNUrSthvL/xUWXM6ea015HEDCCIEL417ID
24
+ humVWZyzKf7ITCdZWcxTgTgFfuPMctcICT5u7va+FrycYpdtt8kXvtD3VnkCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUDipvmNU2
26
+ WydgAK8QPGb0vhhoGl0wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAlaYpTQ2vLuKU/nJl1inw9iE9XCwnTmIhmA9lnu1q
29
- QKKCd7Z2PwtkahbDvMVQ347DQZQAanuZmtTPFMc4FDA530qJtwoYk03FTQXBh12M
30
- d4C27VP9BOrUQcxkqtnTo+4Z60taszXqyPsPYU+Fd8AZUPeS5TOYG52OXTQ+q+pO
31
- vNxkRP9oEka81ZrN1y3r3YaFHATZzf4pJo0HupZvMsQwa33/vA+xxxpDeTuWytNN
32
- O0mYbo8Em2LnPnE8ehOnniDGXIIaDO9B1Qbbr0GhNCIWq3JIcbI2IBCKFWA6HyNF
33
- yCdr7ZqPrnxXlhhnTPLFkzR/0+XxpbrdW4zb6uQqX92/tiUqP9uKf5dBEVoCWax/
34
- IWPJE5JXx2iMvE9cWe4bFCUi7cZT7HsL6jkdUWxeTvsfc7XMbE8eWtHHiG6NjeFJ
35
- 7e24hNRMt3t/JE9ogEO4JzFUH2vq2zzR5X9JQqEclWfwHi4cf8bZFJ7spjZQPjSZ
36
- Ok3rs0A+kW4ixAj1rDYuoyG/
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHqRvm6iqjJ+bpEzSSgVmOMOMcgIoN6px1LMVAOmY
29
+ BJpF5F0fJr99thc1EYZJoRTwEcXJYhCTqKg+3xhNKpCzk2qHsaLKYEygPeBpyJOg
30
+ LyfHLrj98QLPYyFzqhWsqZAAAGC9WSF/kBJazpuotU2ec/Xw/e3NPopedV/Zvuhs
31
+ +/OKZWwRez/hg97ckaCYAp/7OdrVhJvR87MaQnN52Uk8OQbuPSyUNQUJ044HWHtu
32
+ lEJjsDetEFhNB69j3wAIMjMEZao29/dZhALbUDp+9ewK7uYbrX9Bo68NX+H+XcCZ
33
+ VFrdrjkyJUOHwSmvjYXN1V0Yz8kVVFU7E+Q4RHL8yAwBv+ynd927HtZVjs+455Pc
34
+ z+9gNpBQVr6LLXLJgJF2pTaIoYhgG6pcoMQHGVoxWdKzvOcl0h1epeJSp/aynX/r
35
+ FK+cyrQNA9DLJYJuz6uO7Z+gXZWjwAO38LUwF01w49asSv/5ZH2HH/EauX5xWpe+
36
+ ry6lYQlb8j50Iys5elAy1p0i
37
37
  -----END CERTIFICATE-----
38
- date: 2020-08-01 00:00:00.000000000 Z
38
+ date: 2022-03-30 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
@@ -46,7 +46,7 @@ dependencies:
46
46
  version: '1.5'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.5.7
49
+ version: 1.5.14
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
@@ -56,29 +56,29 @@ dependencies:
56
56
  version: '1.5'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.5.7
59
+ version: 1.5.14
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: ferrum
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ">="
65
- - !ruby/object:Gem::Version
66
- version: 0.9.0
67
64
  - - "~>"
68
65
  - !ruby/object:Gem::Version
69
- version: '0.9'
66
+ version: '0.11'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0.11'
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- version: 0.9.0
77
74
  - - "~>"
78
75
  - !ruby/object:Gem::Version
79
- version: '0.9'
76
+ version: '0.11'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0.11'
80
80
  description:
81
- email: james@jamesrobertson.eu
81
+ email: digital.robertson@gmail.com
82
82
  executables: []
83
83
  extensions: []
84
84
  extra_rdoc_files: []
@@ -103,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubygems_version: 3.0.3
106
+ rubygems_version: 3.2.22
107
107
  signing_key:
108
108
  specification_version: 4
109
109
  summary: Makes web scraping easier using the Ferrum gem.
metadata.gz.sig CHANGED
Binary file