formscraper_helper 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/formscraper_helper.rb +66 -14
- data.tar.gz.sig +0 -0
- metadata +24 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fc2eba27c47cbfae52b020e76bf1771953bf4944ae5da17216bd513eff74067
|
4
|
+
data.tar.gz: 5200c30d48828c8d29dd3794e26d3779237ddc9da8da5afda854bc15968c33c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 40d8f123a3000ff0072ba5d3bf71ccb0b983363c6298281edc94326af842c81688c67f25624662e5c02d4383b602392be9379ada75136a4205decebe1d843619
|
7
|
+
data.tar.gz: 516f582b35e1da445631d7f8f1daf1a22a86e62b4c5c4b4861d71f16a5918222701bb935d0e65547b1c4d12baf3a170993b761a0bc071ba85f78086893134668
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/formscraper_helper.rb
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
require 'ferrumwizard'
|
6
6
|
require 'nokorexi'
|
7
7
|
require 'clipboard'
|
8
|
+
require 'fdg22'
|
8
9
|
|
9
10
|
|
10
11
|
class FormScraperHelper
|
@@ -27,28 +28,44 @@ class FormScraperHelper
|
|
27
28
|
doc = Nokorexi.new(body).to_doc
|
28
29
|
|
29
30
|
#a = doc.root.xpath('//input|//select')
|
30
|
-
a = doc.root.xpath('//*').select
|
31
|
+
a = doc.root.xpath('//*').select do |x|
|
32
|
+
x.name == 'input' or x.name == 'select' or \
|
33
|
+
(x.name == 'button' and x.attributes[:type] == 'submit')
|
34
|
+
end
|
31
35
|
a.reject! do |x|
|
32
36
|
x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
|
33
37
|
end
|
34
38
|
|
35
|
-
|
39
|
+
a2 = a.map do |x|
|
36
40
|
|
37
41
|
key = x.attributes[:name]
|
38
|
-
|
42
|
+
name = x.name
|
39
43
|
|
40
44
|
h = {}
|
41
|
-
h[:type] = x.attributes[:type] ||
|
42
|
-
|
45
|
+
h[:type] = x.attributes[:type] || name
|
46
|
+
|
47
|
+
if key then
|
48
|
+
h[:xpath] = "//%s[@name=\"%s\"]" % [name, key]
|
49
|
+
else
|
50
|
+
h[:xpath] = "//%s[@type=\"%s\"]" % [name, h[:type]]
|
51
|
+
end
|
52
|
+
|
43
53
|
h[:title] = x.attributes[:title]
|
44
54
|
|
45
|
-
if
|
55
|
+
if name == 'select' then
|
46
56
|
h[:options] = x.xpath('option').map {|x| x.text.to_s}
|
47
57
|
end
|
48
58
|
|
49
|
-
[key, h]
|
59
|
+
[key || h[:type], h]
|
50
60
|
|
51
|
-
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# ensure submit appears at the end
|
64
|
+
submit = a2.assoc 'submit'
|
65
|
+
a2.delete submit
|
66
|
+
a2 << submit
|
67
|
+
|
68
|
+
@h = a2.to_h
|
52
69
|
|
53
70
|
end
|
54
71
|
|
@@ -63,16 +80,13 @@ require 'yaml'
|
|
63
80
|
require 'ferrum'
|
64
81
|
require 'nokorexi'
|
65
82
|
|
66
|
-
browser = Ferrum::Browser.new headless: false
|
67
83
|
url = '#{@url}'
|
68
|
-
browser.
|
69
|
-
sleep 2
|
70
|
-
|
84
|
+
browser = FerrumWizard.new(url, headless: false)
|
71
85
|
doc = Nokorexi.new(browser.body).to_doc
|
72
86
|
|
73
87
|
# load the YAML document containing the inputs
|
74
88
|
#filepath = ''
|
75
|
-
filepath = '/tmp/
|
89
|
+
filepath = '/tmp/data.yaml'
|
76
90
|
h = YAML.load(File.read(filepath))
|
77
91
|
EOF
|
78
92
|
|
@@ -106,7 +120,16 @@ EOF
|
|
106
120
|
s += "sleep 0.5\n\n"
|
107
121
|
|
108
122
|
elsif h[:type] == 'checkbox'
|
109
|
-
|
123
|
+
|
124
|
+
s += "r.focus.click\n"
|
125
|
+
s += "sleep 0.5\n\n"
|
126
|
+
|
127
|
+
elsif h[:type] == 'submit'
|
128
|
+
|
129
|
+
s += "r.focus.click\n"
|
130
|
+
s += "sleep 4\n"
|
131
|
+
s += "browser.save_cookies('/tmp/cookies.yaml')\n"
|
132
|
+
|
110
133
|
end
|
111
134
|
|
112
135
|
end
|
@@ -199,3 +222,32 @@ EOF
|
|
199
222
|
|
200
223
|
end
|
201
224
|
|
225
|
+
class FormDataTool
|
226
|
+
|
227
|
+
def initialize(fd: nil)
|
228
|
+
|
229
|
+
@fd = fd
|
230
|
+
|
231
|
+
|
232
|
+
end
|
233
|
+
|
234
|
+
def regen(yml='/tmp/data.yaml')
|
235
|
+
|
236
|
+
s = File.read(yml)
|
237
|
+
h = YAML.load(s)
|
238
|
+
|
239
|
+
h2 = h.map do |key, value|
|
240
|
+
v = @fd.lookup key
|
241
|
+
[key, (v || value)]
|
242
|
+
end.to_h
|
243
|
+
|
244
|
+
h2.each do |key, value|
|
245
|
+
puts 'scanning key: ' + key.inspect
|
246
|
+
s.sub!(/#{key}: [^\n]+/, "%s: '%s'" % [key, value])
|
247
|
+
end
|
248
|
+
|
249
|
+
return s
|
250
|
+
|
251
|
+
end
|
252
|
+
|
253
|
+
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: formscraper_helper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
hBw+4Vg30COBUGSGYs46Cy3vhis61poJJeWm/pLTMOH4lcl/Jz5fR//QP9ovEu3k
|
36
36
|
3v0q89HVKLBtQzj+Dii/vHeI
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-05-
|
38
|
+
date: 2022-05-29 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: ferrumwizard
|
@@ -46,7 +46,7 @@ dependencies:
|
|
46
46
|
version: '0.3'
|
47
47
|
- - ">="
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version: 0.3.
|
49
|
+
version: 0.3.3
|
50
50
|
type: :runtime
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -56,7 +56,7 @@ dependencies:
|
|
56
56
|
version: '0.3'
|
57
57
|
- - ">="
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version: 0.3.
|
59
|
+
version: 0.3.3
|
60
60
|
- !ruby/object:Gem::Dependency
|
61
61
|
name: nokorexi
|
62
62
|
requirement: !ruby/object:Gem::Requirement
|
@@ -77,6 +77,26 @@ dependencies:
|
|
77
77
|
- - ">="
|
78
78
|
- !ruby/object:Gem::Version
|
79
79
|
version: 0.7.0
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: fdg22
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
requirements:
|
84
|
+
- - "~>"
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
version: '0.1'
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.1.0
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.1'
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 0.1.0
|
80
100
|
description:
|
81
101
|
email: digital.robertson@gmail.com
|
82
102
|
executables: []
|
metadata.gz.sig
CHANGED
Binary file
|