ferrumwizard 0.1.4 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 96cb226b3acf38c7c4b4f8c82665175025effc7d2cbe25d0ae6026e92109c899
4
- data.tar.gz: e31dbbc763e88c553881660d7579117781934e92c2eae10f32fff329da670e09
3
+ metadata.gz: 3b11fa25652b962c446a8022e3d9cfce067110ac2c6c6509995533449942ad70
4
+ data.tar.gz: e487524c0b613f6a1aea2872faccc520106cd57bbfbee5f9725504467c219e51
5
5
  SHA512:
6
- metadata.gz: 0d6f8bd47a33d4d8d3224cc231a993a36c47fa297c91e7b83a3888094de658cf129cc289b001935b442fd96f93103bfe5e214551b5d365de18971330d0e380e2
7
- data.tar.gz: 7fce37cf15f5a493b0261b36eb843ad8087a19b040c14176a0488b0ab0c84220b5e70c12fdb5a962443608092e148b7ecff2832311d7395209115e5cfec5e268
6
+ metadata.gz: fd67b1f378008cfb07e86fec087a1ce3344f5df905bde152d447b731d9936d3e26240932cf68a1a837acf10338ffbe22f689af378939e19cb2f42a47d112f7ab
7
+ data.tar.gz: a098e1b6ae39a3c5dc0dcfc35250a14993036ec4907e28cc9e3027d0ab3c259043086f5d177424abca3517cb58b59649a15487d68d93098abc6305d4459e3fee
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/ferrumwizard.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  # file: ferrumwizard.rb
4
4
 
5
+ require 'yaml'
5
6
  require 'rexle'
6
7
  require 'ferrum'
7
8
 
@@ -10,25 +11,66 @@ class FerrumWizard
10
11
 
11
12
  attr_reader :browser, :links, :radio, :buttons, :js_methods
12
13
 
13
- def initialize(url, headless: true, timeout: 10, debug: false)
14
+ def initialize(url=nil, headless: true, timeout: 10, cookies: nil,
15
+ debug: false)
14
16
 
15
17
  @url, @debug = url, debug
16
18
  @browser = Ferrum::Browser.new headless: headless, timeout: timeout
17
- sleep 3
18
-
19
- if url
19
+
20
+ loadx(cookies) if cookies
21
+
22
+ if url then
23
+
24
+ sleep 3
25
+
20
26
  @browser.goto(@url)
21
27
  @browser.network.wait_for_idle
22
- sleep 4
28
+ sleep 4
29
+
23
30
  end
24
31
  end
25
-
32
+
26
33
  def inspect()
27
34
  "#<FerrumWizard>"
28
35
  end
29
-
36
+
37
+ # Intended to load all the cookies for a user to login automatically
38
+ #
39
+ # Follow these steps to load the cookies file:
40
+ #
41
+ # 1. launch the Ferrum browser
42
+ # fw = FerrumWizard.new( headless: false, debug: false)
43
+ #
44
+ # 2. load the cookies before you visit the website
45
+ # fw.load_cookies('/tmp/indeed2.txt')
46
+ #
47
+ # 3. visit the website
48
+ # url='https://somewebsite.com'
49
+ # fw.browser.goto(url)
50
+ #
51
+ def load_cookies(filepath)
52
+
53
+ rawcookies = YAML.load(File.read(filepath))
54
+
55
+ rawcookies.each do |h|
56
+
57
+ if @debug then
58
+ puts 'name: ' + h['name']
59
+ puts 'h: ' + h.inspect
60
+ sleep 0.7
61
+ end
62
+
63
+ browser.cookies.set(name: h['name'], value: h['value'],
64
+ domain: h['domain'], expires: h['expires'],
65
+ httponly: h['httpOnly'])
66
+ end
67
+
68
+ end
69
+
70
+ alias loadx load_cookies
71
+
30
72
  def login(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
31
-
73
+
32
74
  puts 'username: ' + username.inspect if @debug
33
75
 
34
76
  # search for the username input box
@@ -36,23 +78,33 @@ class FerrumWizard
36
78
  puts 'e_username: ' + e_username.inspect if @debug
37
79
  sleep 1
38
80
  # search for the password input box
39
- e_password = @browser.at_xpath('//input[@type="password"]')
81
+ found = @browser.at_xpath('//input[@type="password"]')
82
+
83
+ e_password = if found then
84
+ found
85
+ else
86
+ @browser.xpath('//input').find {|x| x.property(:id) =~ /password/i}
87
+ end
88
+
40
89
  sleep 1
41
-
90
+
42
91
  if username and e_username then
43
92
  puts 'entering the username' if @debug
44
- e_username.focus.type(username)
93
+ e_username.focus.type(username)
45
94
  sleep 1
46
95
  end
47
-
96
+
48
97
  e_password.focus.type(password, :Enter) if e_password
49
-
98
+
50
99
  after_login()
51
-
52
- end
53
-
100
+
101
+ end
102
+
103
+ # login2 is used for websites where the user is presented with the username
104
+ # input box on the first page and the password input box on the next page.
105
+ #
54
106
  def login2(usernamex=nil, passwordx=nil, username: usernamex, password: passwordx)
55
-
107
+
56
108
  puts 'username: ' + username.inspect if @debug
57
109
 
58
110
  # search for the username input box
@@ -60,70 +112,104 @@ class FerrumWizard
60
112
  puts 'e_username: ' + e_username.inspect if @debug
61
113
  sleep 1
62
114
  # search for the password input box
63
-
115
+
64
116
  if username and e_username then
65
117
  puts 'entering the username' if @debug
66
- e_username.focus.type(username, :Enter)
118
+ e_username.focus.type(username, :Enter)
67
119
  sleep 2
68
120
  end
69
121
 
70
- e_password = b.at_xpath('//input[@type="password"]')
122
+ e_password = @browser.at_xpath('//input[@type="password"]')
71
123
  sleep 1
72
-
124
+
73
125
  e_password.focus.type(password, :Enter) if e_password
74
-
126
+
75
127
  after_login()
76
128
 
77
-
78
- end
79
-
129
+
130
+ end
131
+
80
132
  def quit
81
133
  @browser.quit
82
134
  end
83
-
135
+
84
136
  def scan_page()
85
-
86
- @doc = Rexle.new @browser.body
137
+
138
+ @doc = Rexle.new @browser.body
87
139
  fetch_links()
88
- scan_form_elements()
140
+ scan_form_elements()
89
141
  scan_js_links()
90
- self
142
+ @browser.mouse.scroll_to(0, 800)
143
+ self
144
+ end
145
+
146
+ # Saves all cookies for a given website into a YAML file
147
+ # see also load_cookies()
148
+ #
149
+ # To use this method follow these steps:
150
+ #
151
+ # 1. launch the web browser through Ferrum
152
+ # fw = FerrumWizard.new(url, headless: false, debug: false)
153
+ #
154
+ # 2. go to the browser and login using your credentials
155
+ # fw.save_cookies(filepath)
156
+ #
157
+ # 3. exit the IRB session
158
+ #
159
+ def save_cookies(filepath=Tempfile.new('ferrum').path)
160
+
161
+ rawcookies = @browser.cookies.all.keys.map do |key|
162
+
163
+ if @debug then
164
+ puts 'key: ' + key.inspect
165
+ sleep 0.5
166
+ end
167
+
168
+ s = @browser.cookies[key].inspect
169
+ a = s.scan(/"([^"]+)"=\>/)
170
+ s2 = s[/(?<=@attributes=).*(?=>)/]
171
+ eval(s2)
172
+
173
+ end
174
+
175
+ File.write filepath, rawcookies.to_yaml
176
+
91
177
  end
92
-
178
+
93
179
  def submit(h)
94
180
 
95
181
  e = nil
96
-
182
+
97
183
  h.each do |key, value|
98
- e = @browser.xpath('//input').find {|x| x.attribute('name') == key.to_s}
99
- e.focus.type(value)
184
+ e = @browser.xpath('//input').find {|x| x.attribute('name') == key.to_s}
185
+ e.focus.type(value)
100
186
  end
101
-
102
- e.focus.type('', :Enter)
103
-
104
- sleep 4
187
+
188
+ e.focus.type('', :Enter)
189
+
190
+ sleep 4
105
191
  scan_page()
106
-
192
+
107
193
  end
108
194
 
109
195
  def to_rb()
110
196
  end
111
-
197
+
112
198
  private
113
-
199
+
114
200
  def after_login()
115
-
116
- @browser.network.wait_for_idle
201
+
202
+ @browser.network.wait_for_idle
117
203
  sleep 4
118
204
  scan_page()
119
-
205
+
120
206
  @browser.base_url = File.dirname(@browser.url)
121
207
  @browser.mouse.scroll_to(0, 800)
122
208
  self
123
-
209
+
124
210
  end
125
-
126
-
211
+
212
+
127
213
  def fetch_buttons()
128
214
 
129
215
  a2 = @browser.xpath('//input[@type="button"]')
@@ -139,24 +225,24 @@ class FerrumWizard
139
225
  buttons = @buttons
140
226
 
141
227
  names.each do |name|
142
-
228
+
143
229
  define_singleton_method name.to_sym do
144
230
  buttons[name].click
145
231
  @browser.network.wait_for_idle
146
232
  sleep = 1
147
233
  self
148
234
  end
149
-
235
+
150
236
  end
151
237
 
152
238
  end
153
-
239
+
154
240
  def fetch_links()
155
-
241
+
156
242
  all_links = @doc.root.xpath('//a[@href]')
157
-
243
+
158
244
  all_links.each do |x|
159
-
245
+
160
246
  if x.plaintext.empty? then
161
247
  x.text = x.attributes[:href].sub(/\.\w+$/,'')[/([^\/]+)$/].split(/[_]|(?=[A-Z])/).join(' ')
162
248
  else
@@ -164,74 +250,74 @@ class FerrumWizard
164
250
  end
165
251
 
166
252
  end
167
-
253
+
168
254
  valid_links = all_links.reject do |x|
169
-
255
+
170
256
  puts 'x: ' + x.inspect if @debug
171
257
  r = (x.attributes[:target] == '_blank')
172
258
 
173
259
  puts 'r: ' + r.inspect if @debug
174
260
  r
175
-
261
+
176
262
  end
177
-
263
+
178
264
  indices = valid_links.map {|x| all_links.index x}
179
265
 
180
266
  active_links = @browser.xpath('//a[@href]')
181
267
  valid_active_links = indices.map {|n| active_links[n]}
182
-
183
268
 
184
- @links = valid_active_links.flat_map.with_index do |x, i|
269
+
270
+ @links = valid_active_links.flat_map.with_index do |x, i|
185
271
 
186
272
  a = valid_links[i].text.split(/\W+/).map {|label| [label, x]}
187
273
  a << [valid_links[i].text, x]
188
-
274
+
189
275
  puts 'a: ' + a.inspect if @debug
190
276
  a + a.map {|x2, obj| [x2.downcase, obj]}
191
-
277
+
192
278
  end.to_h
193
-
279
+
194
280
  names = @links.keys.map(&:downcase).uniq.select {|x| x =~ /^[\w ]+$/}
195
281
  links = @links
196
-
282
+
197
283
  names.each do |name|
198
-
284
+
199
285
  define_singleton_method name.gsub(/ +/,'_').to_sym do
200
-
286
+
201
287
  links[name].click
202
288
  @browser.network.wait_for_idle
203
-
289
+
204
290
  sleep 1
205
291
  scan_page()
206
292
  self
207
-
293
+
208
294
  end
209
-
295
+
210
296
  end
211
-
297
+
212
298
  end
213
299
 
214
300
  def scan_form_elements()
215
-
301
+
216
302
  # find radio buttons
217
-
303
+
218
304
  a = @browser.xpath('//input[@type="radio"]')
219
305
  h = a.group_by {|x| x.attribute('name')}
220
306
  @radio = h.values
221
307
  define_singleton_method(:on) { @radio[0][0].click; self }
222
308
  define_singleton_method(:off) { @radio[0][1].click; self }
223
-
309
+
224
310
  fetch_buttons()
225
-
311
+
226
312
  end
227
-
313
+
228
314
  def scan_js_links()
229
-
315
+
230
316
  @js_methods = {}
231
317
  b = @browser
232
-
318
+
233
319
  b.xpath('//a').select {|x| x.attribute('href') =~ /^javascript/}.each do |e|
234
-
320
+
235
321
 
236
322
  s = e.attribute('href')[/(?<=^javascript:)[^\(]+/]
237
323
  puts 's: ' + s.inspect if @debug
@@ -241,17 +327,17 @@ class FerrumWizard
241
327
  a << [s.split(/\W+|(?=[A-Z])/).join('_').downcase, s]
242
328
  #@js_methods[s] = a
243
329
 
244
- a.concat a.map {|x, name| [x.downcase, name] }
330
+ a.concat a.map {|x, name| [x.downcase, name] }
245
331
 
246
332
  puts 'a: ' + a.inspect if @debug
247
333
 
248
334
  a.uniq.select {|x, _| x =~ /^[a-z0-9_]+$/}.each do |x, name|
249
-
335
+
250
336
  if @debug then
251
337
  puts 'x: ' + x.inspect
252
338
  puts 'name: ' + name.inspect
253
339
  end
254
-
340
+
255
341
  define_singleton_method(x.to_sym) do |*args|
256
342
  #args = raw_args.map {|x| x[/^[0-9]+$/] ? x.to_i : x}
257
343
  js_method = "%s(%s)" % [name, args.map(&:inspect).join(', ')]
@@ -260,9 +346,9 @@ class FerrumWizard
260
346
  sleep 4
261
347
  self.scan_page()
262
348
  end
263
-
349
+
264
350
  end
265
-
351
+
266
352
  end
267
353
  end
268
354
 
@@ -271,7 +357,7 @@ class FerrumWizard
271
357
  puts 'method_missing: ' + method_name.inspect if @debug
272
358
  node = @browser.at_css '.' + method_name.to_s
273
359
  node.text if node
274
-
275
- end
276
-
360
+
361
+ end
362
+
277
363
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrumwizard
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,31 +11,31 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzMwMTYxOTE5WhcN
15
- MjEwNzMwMTYxOTE5WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDhNEIG
17
- /Ab4nneih/AQFMcYk76JCiy26Xcy5uxd9ib7Emkj/9sZo6nxuSBaH03+Ixv3jgJs
18
- TxZyaIKRsESFFmupYmKsyatCGGaBEsDb210ZBm313rP2Pk2fGrUtON0CjwJljWxR
19
- 8pHuglEXrGN/XhVicy7sZLJ2nVnvRtyiKi92XmY0S9LaCkWlOx2f3D7yiazkmHh5
20
- 59nHiGNlZ/SOzFrRMdBvkWZYHgqUEBv0KxEuMqW65U4HdlQImcwqu8XOWH9kutof
21
- yyisv03kPqMvrOC8ptG/TieKYK0JuY23gS9MrVxkrf0gX3IQLY21JWG9t9uRImc/
22
- kHC+EJ2rI8HQqcq/v6dndJb6MhYEhj7R5XsZqlfsLFo21FFBAyaPrqPRUstnW5U0
23
- /tCpcuFyZJeRPqQ8LSlRGDuB/TdmV9dF+P5aGS32k9Okf9L6E6x3OGV29eMHSdDt
24
- LOOB8l0EJbNXzpvYW+htziU8TbuzRQU8K7uTeAfpMUg4auPxdVyQpJcQWXcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU2+nN7PCw
26
- js3NFmK8b17Ji/t+dvwwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMzMwMTI1ODU1WhcN
15
+ MjMwMzMwMTI1ODU1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDROL32
17
+ 3LQKGcDR6x6XFa1US/Vq98DVnMeZHCSKdf471I4gJIOA7sQnrQTB6IZKTxb94Wjr
18
+ OSeGzlJpVq6pa7ltxvb9T7YQVVRrXYMC+u0gD9ukolnkpV/4Rh2/IIMxSNKncoZB
19
+ LKPseizGKlli4gs134gAu3wuWdCC7/UWPG/XyocdJC8tLtf/zi4JuRJTojKqYLOp
20
+ KsP9jHPmGVr81cW8HePmhQ/+LiYlKDE4Fwj4yl16XqhF7/5YOz9e5LOHsMUEord4
21
+ JscQ3GnhMfEXGJpwqCwNEpM3xAwcHp2DDdrwtT36ujSfnTJ3UpUIQUKVehA2i9rm
22
+ uDcDTr1PATGcOMPpExvLZu3a9uC81mj9z+axH5mWQ7jZ92sze79oAQTsMiMyBavJ
23
+ djSpnVBo71PFk8QekgIVVBIzG0iN5zoNUrSthvL/xUWXM6ea015HEDCCIEL417ID
24
+ humVWZyzKf7ITCdZWcxTgTgFfuPMctcICT5u7va+FrycYpdtt8kXvtD3VnkCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUDipvmNU2
26
+ WydgAK8QPGb0vhhoGl0wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAlaYpTQ2vLuKU/nJl1inw9iE9XCwnTmIhmA9lnu1q
29
- QKKCd7Z2PwtkahbDvMVQ347DQZQAanuZmtTPFMc4FDA530qJtwoYk03FTQXBh12M
30
- d4C27VP9BOrUQcxkqtnTo+4Z60taszXqyPsPYU+Fd8AZUPeS5TOYG52OXTQ+q+pO
31
- vNxkRP9oEka81ZrN1y3r3YaFHATZzf4pJo0HupZvMsQwa33/vA+xxxpDeTuWytNN
32
- O0mYbo8Em2LnPnE8ehOnniDGXIIaDO9B1Qbbr0GhNCIWq3JIcbI2IBCKFWA6HyNF
33
- yCdr7ZqPrnxXlhhnTPLFkzR/0+XxpbrdW4zb6uQqX92/tiUqP9uKf5dBEVoCWax/
34
- IWPJE5JXx2iMvE9cWe4bFCUi7cZT7HsL6jkdUWxeTvsfc7XMbE8eWtHHiG6NjeFJ
35
- 7e24hNRMt3t/JE9ogEO4JzFUH2vq2zzR5X9JQqEclWfwHi4cf8bZFJ7spjZQPjSZ
36
- Ok3rs0A+kW4ixAj1rDYuoyG/
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHqRvm6iqjJ+bpEzSSgVmOMOMcgIoN6px1LMVAOmY
29
+ BJpF5F0fJr99thc1EYZJoRTwEcXJYhCTqKg+3xhNKpCzk2qHsaLKYEygPeBpyJOg
30
+ LyfHLrj98QLPYyFzqhWsqZAAAGC9WSF/kBJazpuotU2ec/Xw/e3NPopedV/Zvuhs
31
+ +/OKZWwRez/hg97ckaCYAp/7OdrVhJvR87MaQnN52Uk8OQbuPSyUNQUJ044HWHtu
32
+ lEJjsDetEFhNB69j3wAIMjMEZao29/dZhALbUDp+9ewK7uYbrX9Bo68NX+H+XcCZ
33
+ VFrdrjkyJUOHwSmvjYXN1V0Yz8kVVFU7E+Q4RHL8yAwBv+ynd927HtZVjs+455Pc
34
+ z+9gNpBQVr6LLXLJgJF2pTaIoYhgG6pcoMQHGVoxWdKzvOcl0h1epeJSp/aynX/r
35
+ FK+cyrQNA9DLJYJuz6uO7Z+gXZWjwAO38LUwF01w49asSv/5ZH2HH/EauX5xWpe+
36
+ ry6lYQlb8j50Iys5elAy1p0i
37
37
  -----END CERTIFICATE-----
38
- date: 2020-08-01 00:00:00.000000000 Z
38
+ date: 2022-03-30 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
@@ -46,7 +46,7 @@ dependencies:
46
46
  version: '1.5'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.5.7
49
+ version: 1.5.14
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
@@ -56,29 +56,29 @@ dependencies:
56
56
  version: '1.5'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.5.7
59
+ version: 1.5.14
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: ferrum
62
62
  requirement: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ">="
65
- - !ruby/object:Gem::Version
66
- version: 0.9.0
67
64
  - - "~>"
68
65
  - !ruby/object:Gem::Version
69
- version: '0.9'
66
+ version: '0.11'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0.11'
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- version: 0.9.0
77
74
  - - "~>"
78
75
  - !ruby/object:Gem::Version
79
- version: '0.9'
76
+ version: '0.11'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0.11'
80
80
  description:
81
- email: james@jamesrobertson.eu
81
+ email: digital.robertson@gmail.com
82
82
  executables: []
83
83
  extensions: []
84
84
  extra_rdoc_files: []
@@ -103,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
103
103
  - !ruby/object:Gem::Version
104
104
  version: '0'
105
105
  requirements: []
106
- rubygems_version: 3.0.3
106
+ rubygems_version: 3.2.22
107
107
  signing_key:
108
108
  specification_version: 4
109
109
  summary: Makes web scraping easier using the Ferrum gem.
metadata.gz.sig CHANGED
Binary file