formscraper_helper 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 402c3ab5b633cce3e4852deba197a388a30d267e81595829cbc05deb6509f07e
4
- data.tar.gz: c3867d152abfa6d910efd7f24b119c413c8ca8368229290583704bba1018108d
3
+ metadata.gz: cb75f7060b87ea5800e4aa4ddc1b8b9cc54e5f77b7b4e7037031f3f4a714ba36
4
+ data.tar.gz: 39a21217d1c7f39f3e6e29cac3671481117a187aa49d1ea76c2802a608667608
5
5
  SHA512:
6
- metadata.gz: a3c5c2217823d8734d7e273a07c166012191f0196a3c039cbb7bb55a0ee2ab3593295b9e985c049ebcfdddaa768696091fa49352cb8af3c7782ac56a25d86dd2
7
- data.tar.gz: 15b49ce36dd5517099e04436153f3e63c1f9138a383ddb22033e4896eb9b0203c32bcca28ec18ab1883bc452a175182e21700cbca4cd812cdb618ad0b571abcd
6
+ metadata.gz: 489e4acefdc092393f0530656e894b2c5e896e530e9ed76a73ff751be1591777c23212b28bc0d01b429e2d79333d0a24373729b1d998c4754d959b79d47ed990
7
+ data.tar.gz: 3dbdb3bb6abf2579d58621babf7dbd8b1f0320542503bbf3f858122f5d7e3bd8d17f0876ea1e9fbdc32efc3d72f11d35ee429e33ac1489dc2d57fb5c9f879a4a
checksums.yaml.gz.sig CHANGED
Binary file
@@ -2,7 +2,7 @@
2
2
 
3
3
  # file: formscraper_helper.rb
4
4
 
5
- require 'ferrum'
5
+ require 'ferrumwizard'
6
6
  require 'nokorexi'
7
7
  require 'clipboard'
8
8
 
@@ -13,14 +13,42 @@ class FormScraperHelper
13
13
 
14
14
  # note: fd corresponds to FakeDataGenerator22 which is optional
15
15
  #
16
- def initialize(url, headless: false, clipb: true, fd: nil, debug: false)
16
+ def initialize(url=nil, browser: nil, headless: false, clipb: true,
17
+ fd: nil, debug: false)
17
18
 
18
19
  @url, @clipb, @fd, @debug = url, clipb, fd, debug
19
- @browser = Ferrum::Browser.new headless: headless
20
- @browser.goto(url)
21
20
 
22
- sleep 2
23
- scrape()
21
+ @browser = browser ? browser : FerrumWizard.new(url, headless: headless)
22
+
23
+ end
24
+
25
+ def scrape(body=@browser.body)
26
+ puts 'body: ' + body.inspect if @debug
27
+ doc = Nokorexi.new(body).to_doc
28
+
29
+ #a = doc.root.xpath('//input|//select')
30
+ a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
31
+ a.reject! do |x|
32
+ x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
33
+ end
34
+
35
+ @h = a.map do |x|
36
+
37
+ key = x.attributes[:name]
38
+ type = x.name
39
+
40
+ h = {}
41
+ h[:type] = x.attributes[:type] || type
42
+ h[:xpath] = "//%s[@name=\"%s\"]" % [type, key]
43
+ h[:title] = x.attributes[:title]
44
+
45
+ if type == 'select' then
46
+ h[:options] = x.xpath('option').map {|x| x.text.to_s}
47
+ end
48
+
49
+ [key, h]
50
+
51
+ end.to_h
24
52
 
25
53
  end
26
54
 
@@ -113,7 +141,7 @@ EOF
113
141
 
114
142
  found = @fd.lookup var1
115
143
  val = found.is_a?(String) ? found : 'xxx'
116
- s += var1 + ": #{val}\n"
144
+ s += var1 + ": '#{val}'\n"
117
145
  else
118
146
  s += var1 + ": xxx\n"
119
147
  end
@@ -125,7 +153,7 @@ EOF
125
153
  s += s2
126
154
  s += "# options: #{h[:options].join(', ')}\n"
127
155
  val = h[:options][1..-1].sample
128
- s += "#{var1}: #{val}\n"
156
+ s += "#{var1}: '#{val}'\n"
129
157
 
130
158
  elsif h[:type] == 'checkbox'
131
159
 
@@ -143,7 +171,6 @@ EOF
143
171
  private
144
172
 
145
173
  # returns var1 using arguments rawtitle or key
146
- # note: argument s is passed by reference
147
174
  #
148
175
  def format_var1(rawtitle, key)
149
176
 
@@ -170,36 +197,5 @@ EOF
170
197
 
171
198
  end
172
199
 
173
- def scrape()
174
-
175
- doc = Nokorexi.new(@browser.body).to_doc
176
-
177
- #a = doc.root.xpath('//input|//select')
178
- a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
179
- a.reject! do |x|
180
- x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
181
- end
182
-
183
- @h = a.map do |x|
184
-
185
- key = x.attributes[:name]
186
- type = x.name
187
-
188
- h = {}
189
- h[:type] = x.attributes[:type] || type
190
- h[:xpath] = "//%s[@name=\"%s\"]" % [type, key]
191
- h[:title] = x.attributes[:title]
192
-
193
- if type == 'select' then
194
- h[:options] = x.xpath('option').map {|x| x.text.to_s}
195
- end
196
-
197
- [key, h]
198
-
199
- end.to_h
200
-
201
- end
202
-
203
-
204
200
  end
205
201
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: formscraper_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -38,25 +38,25 @@ cert_chain:
38
38
  date: 2022-05-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: ferrum
41
+ name: ferrumwizard
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '0.11'
46
+ version: '0.3'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: '0.11'
49
+ version: 0.3.2
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '0.11'
56
+ version: '0.3'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: '0.11'
59
+ version: 0.3.2
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: nokorexi
62
62
  requirement: !ruby/object:Gem::Requirement
metadata.gz.sig CHANGED
Binary file