formscraper_helper 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 402c3ab5b633cce3e4852deba197a388a30d267e81595829cbc05deb6509f07e
4
- data.tar.gz: c3867d152abfa6d910efd7f24b119c413c8ca8368229290583704bba1018108d
3
+ metadata.gz: cb75f7060b87ea5800e4aa4ddc1b8b9cc54e5f77b7b4e7037031f3f4a714ba36
4
+ data.tar.gz: 39a21217d1c7f39f3e6e29cac3671481117a187aa49d1ea76c2802a608667608
5
5
  SHA512:
6
- metadata.gz: a3c5c2217823d8734d7e273a07c166012191f0196a3c039cbb7bb55a0ee2ab3593295b9e985c049ebcfdddaa768696091fa49352cb8af3c7782ac56a25d86dd2
7
- data.tar.gz: 15b49ce36dd5517099e04436153f3e63c1f9138a383ddb22033e4896eb9b0203c32bcca28ec18ab1883bc452a175182e21700cbca4cd812cdb618ad0b571abcd
6
+ metadata.gz: 489e4acefdc092393f0530656e894b2c5e896e530e9ed76a73ff751be1591777c23212b28bc0d01b429e2d79333d0a24373729b1d998c4754d959b79d47ed990
7
+ data.tar.gz: 3dbdb3bb6abf2579d58621babf7dbd8b1f0320542503bbf3f858122f5d7e3bd8d17f0876ea1e9fbdc32efc3d72f11d35ee429e33ac1489dc2d57fb5c9f879a4a
checksums.yaml.gz.sig CHANGED
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  # file: formscraper_helper.rb
4
4
 
5
- require 'ferrum'
5
+ require 'ferrumwizard'
6
6
  require 'nokorexi'
7
7
  require 'clipboard'
8
8
 
@@ -13,14 +13,42 @@ class FormScraperHelper
13
13
 
14
14
  # note: fd corresponds to FakeDataGenerator22 which is optional
15
15
  #
16
- def initialize(url, headless: false, clipb: true, fd: nil, debug: false)
16
+ def initialize(url=nil, browser: nil, headless: false, clipb: true,
17
+ fd: nil, debug: false)
17
18
 
18
19
  @url, @clipb, @fd, @debug = url, clipb, fd, debug
19
- @browser = Ferrum::Browser.new headless: headless
20
- @browser.goto(url)
21
20
 
22
- sleep 2
23
- scrape()
21
+ @browser = browser ? browser : FerrumWizard.new(url, headless: headless)
22
+
23
+ end
24
+
25
+ def scrape(body=@browser.body)
26
+ puts 'body: ' + body.inspect if @debug
27
+ doc = Nokorexi.new(body).to_doc
28
+
29
+ #a = doc.root.xpath('//input|//select')
30
+ a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
31
+ a.reject! do |x|
32
+ x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
33
+ end
34
+
35
+ @h = a.map do |x|
36
+
37
+ key = x.attributes[:name]
38
+ type = x.name
39
+
40
+ h = {}
41
+ h[:type] = x.attributes[:type] || type
42
+ h[:xpath] = "//%s[@name=\"%s\"]" % [type, key]
43
+ h[:title] = x.attributes[:title]
44
+
45
+ if type == 'select' then
46
+ h[:options] = x.xpath('option').map {|x| x.text.to_s}
47
+ end
48
+
49
+ [key, h]
50
+
51
+ end.to_h
24
52
 
25
53
  end
26
54
 
@@ -113,7 +141,7 @@ EOF
113
141
 
114
142
  found = @fd.lookup var1
115
143
  val = found.is_a?(String) ? found : 'xxx'
116
- s += var1 + ": #{val}\n"
144
+ s += var1 + ": '#{val}'\n"
117
145
  else
118
146
  s += var1 + ": xxx\n"
119
147
  end
@@ -125,7 +153,7 @@ EOF
125
153
  s += s2
126
154
  s += "# options: #{h[:options].join(', ')}\n"
127
155
  val = h[:options][1..-1].sample
128
- s += "#{var1}: #{val}\n"
156
+ s += "#{var1}: '#{val}'\n"
129
157
 
130
158
  elsif h[:type] == 'checkbox'
131
159
 
@@ -143,7 +171,6 @@ EOF
143
171
  private
144
172
 
145
173
  # returns var1 using arguments rawtitle or key
146
- # note: argument s is passed by reference
147
174
  #
148
175
  def format_var1(rawtitle, key)
149
176
 
@@ -170,36 +197,5 @@ EOF
170
197
 
171
198
  end
172
199
 
173
- def scrape()
174
-
175
- doc = Nokorexi.new(@browser.body).to_doc
176
-
177
- #a = doc.root.xpath('//input|//select')
178
- a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
179
- a.reject! do |x|
180
- x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
181
- end
182
-
183
- @h = a.map do |x|
184
-
185
- key = x.attributes[:name]
186
- type = x.name
187
-
188
- h = {}
189
- h[:type] = x.attributes[:type] || type
190
- h[:xpath] = "//%s[@name=\"%s\"]" % [type, key]
191
- h[:title] = x.attributes[:title]
192
-
193
- if type == 'select' then
194
- h[:options] = x.xpath('option').map {|x| x.text.to_s}
195
- end
196
-
197
- [key, h]
198
-
199
- end.to_h
200
-
201
- end
202
-
203
-
204
200
  end
205
201
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: formscraper_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -38,25 +38,25 @@ cert_chain:
38
38
  date: 2022-05-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: ferrum
41
+ name: ferrumwizard
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.11'
46
+ version: '0.3'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: '0.11'
49
+ version: 0.3.2
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '0.11'
56
+ version: '0.3'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: '0.11'
59
+ version: 0.3.2
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: nokorexi
62
62
  requirement: !ruby/object:Gem::Requirement
metadata.gz.sig CHANGED
Binary file