formscraper_helper 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b4f9be47b4c46d2e2823161ac508b4748afab4f2f6a03e2617bb91ae6ababe9
4
- data.tar.gz: bea40e151206763162b372bb2163010f65ce021fbd795b82757780eba32ccde2
3
+ metadata.gz: 402c3ab5b633cce3e4852deba197a388a30d267e81595829cbc05deb6509f07e
4
+ data.tar.gz: c3867d152abfa6d910efd7f24b119c413c8ca8368229290583704bba1018108d
5
5
  SHA512:
6
- metadata.gz: 8c7809564deb15b0b6a1ce109a676ceed9f77211dcf30fe14894a638ad75609421de74cc6863064d7c4fdb428b99fdffbb9ae777598bd3c32c99e0ca542f4860
7
- data.tar.gz: a61eaf497175a3bb4aac7eed0e3e54038640697a90484c05adc11fa8342f1c668f94255488d1a6a6e932c8a42e0143b821af1bd98f4256605e86da92a9c4f218
6
+ metadata.gz: a3c5c2217823d8734d7e273a07c166012191f0196a3c039cbb7bb55a0ee2ab3593295b9e985c049ebcfdddaa768696091fa49352cb8af3c7782ac56a25d86dd2
7
+ data.tar.gz: 15b49ce36dd5517099e04436153f3e63c1f9138a383ddb22033e4896eb9b0203c32bcca28ec18ab1883bc452a175182e21700cbca4cd812cdb618ad0b571abcd
checksums.yaml.gz.sig CHANGED
Binary file
@@ -4,17 +4,21 @@
4
4
 
5
5
  require 'ferrum'
6
6
  require 'nokorexi'
7
+ require 'clipboard'
7
8
 
8
9
 
9
10
  class FormScraperHelper
10
11
 
11
12
  attr_reader :browser
12
13
 
13
- def initialize(url, headless: false, debug: false)
14
+ # note: fd corresponds to FakeDataGenerator22 which is optional
15
+ #
16
+ def initialize(url, headless: false, clipb: true, fd: nil, debug: false)
14
17
 
15
- @url, @debug = url, debug
18
+ @url, @clipb, @fd, @debug = url, clipb, fd, debug
16
19
  @browser = Ferrum::Browser.new headless: headless
17
20
  @browser.goto(url)
21
+
18
22
  sleep 2
19
23
  scrape()
20
24
 
@@ -40,6 +44,7 @@ doc = Nokorexi.new(browser.body).to_doc
40
44
 
41
45
  # load the YAML document containing the inputs
42
46
  #filepath = ''
47
+ filepath = '/tmp/tmp.yaml'
43
48
  h = YAML.load(File.read(filepath))
44
49
  EOF
45
50
 
@@ -51,29 +56,26 @@ EOF
51
56
 
52
57
  if h[:type] == 'text' or h[:type] == 'password' then
53
58
 
54
- var1 = if h[:title].length > 1 then
55
- h[:title].downcase.gsub(/ +/,'_')
56
- else
57
- key.downcase
58
- end
59
+ var1, s2 = format_var1(h[:title], key)
60
+ s += s2
59
61
  s += var1 + " = h['#{var1}']\n"
60
- s += "r.focus.type #{var1}\n\n"
62
+ s += "r.focus.type #{var1}\n"
63
+ s += "sleep 0.5\n\n"
61
64
 
62
65
  elsif h[:type] == 'select'
63
66
 
64
- var1 = if h[:title].length > 1 then
65
- h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
66
- else
67
- key.downcase
68
- end
67
+ var1, s2 = format_var1(h[:title], key)
68
+ s += s2
69
69
 
70
70
  s += "# options: #{h[:options].join(', ')}\n"
71
71
  s += "#{var1} = h['#{var1}']\n"
72
- s += 'r = titles.grep /#{' + var1 + '}/i' + "\n"
73
- s += "n = titles.index(r.first) + 1\n"
72
+ s += 'titles = %w(' + h[:options].join(' ') + ')' + "\n"
73
+ s += 'found = titles.grep /#{' + var1 + '}/i' + "\n"
74
+ s += "n = titles.index(found.first) + 1\n"
74
75
  s += "r.focus\n"
75
76
  s += "n.times { r.type(:down); sleep 1}\n"
76
- s += "r.click\n\n"
77
+ s += "r.click\n"
78
+ s += "sleep 0.5\n\n"
77
79
 
78
80
  elsif h[:type] == 'checkbox'
79
81
  s += "r.focus.click\n\n"
@@ -81,6 +83,9 @@ EOF
81
83
 
82
84
  end
83
85
 
86
+ Clipboard.copy s if @clipb
87
+ puts 'generated code copied to clipboard'
88
+
84
89
  return s
85
90
 
86
91
  end
@@ -97,22 +102,30 @@ EOF
97
102
 
98
103
  if h[:type] == 'text' or h[:type] == 'password' then
99
104
 
100
- var1 = if h[:title].length > 1 then
101
- h[:title].downcase.gsub(/ +/,'_')
102
- else
103
- key.downcase
105
+ var1, s2 = format_var1(h[:title], key)
106
+
107
+ s += s2
108
+
109
+ if h[:type] == 'password' then
110
+ @pwd ||= @fd ? @fd.password : 'xxx'
111
+ s += var1 + ": #{@pwd}\n"
112
+ elsif @fd
113
+
114
+ found = @fd.lookup var1
115
+ val = found.is_a?(String) ? found : 'xxx'
116
+ s += var1 + ": #{val}\n"
117
+ else
118
+ s += var1 + ": xxx\n"
104
119
  end
105
- s += var1 + ": xxx\n"
106
120
 
107
121
  elsif h[:type] == 'select'
108
122
 
109
- var1 = if h[:title].length > 1 then
110
- h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
111
- else
112
- key.downcase
113
- end
123
+ var1, s2 = format_var1(h[:title], key)
114
124
 
115
- s += "#{var1}: xxx\n"
125
+ s += s2
126
+ s += "# options: #{h[:options].join(', ')}\n"
127
+ val = h[:options][1..-1].sample
128
+ s += "#{var1}: #{val}\n"
116
129
 
117
130
  elsif h[:type] == 'checkbox'
118
131
 
@@ -120,20 +133,49 @@ EOF
120
133
 
121
134
  end
122
135
 
136
+ Clipboard.copy s if @clipb
137
+ puts 'generated YAML copied to clipboard'
138
+
123
139
  return s
124
140
 
125
141
  end
126
142
 
127
143
  private
128
144
 
129
- def scrape()
145
+ # returns var1 using arguments rawtitle or key
146
+ # note: argument s is passed by reference
147
+ #
148
+ def format_var1(rawtitle, key)
149
+
150
+ var1 = if rawtitle.length > 1 then
151
+
152
+ s = "\n# " + rawtitle + "\n"
153
+ title = rawtitle.scan(/[A-Z][^A-Z]+/).join(' ').gsub(/[^\w ]/,'')
154
+ words = title.downcase.scan(/\w+/)
155
+
156
+ if words.count > 2 then
157
+ words.take(5).map {|x| x[0]}.join
158
+ else
159
+ title.downcase.gsub(/ +/,'_')
160
+ end
161
+
162
+ else
163
+ newtitle = key.scan(/[A-Z][^A-Z]+/).join(' ')
164
+ s = "\n# " + newtitle + "\n"
165
+ newtitle.gsub(/[^\w ]/,'').downcase\
166
+ .gsub(/ +/,'_')
167
+ end
168
+
169
+ [var1, s]
170
+
171
+ end
172
+
173
+ def scrape()
130
174
 
131
175
  doc = Nokorexi.new(@browser.body).to_doc
132
176
 
133
177
  #a = doc.root.xpath('//input|//select')
134
- a = doc.root.xpath('//*').select do |x|
135
- x.name == 'input' or x.name == 'select'
136
- end
178
+ a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
137
179
  a.reject! do |x|
138
180
  x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
139
181
  end
@@ -160,3 +202,4 @@ EOF
160
202
 
161
203
 
162
204
  end
205
+
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: formscraper_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  hBw+4Vg30COBUGSGYs46Cy3vhis61poJJeWm/pLTMOH4lcl/Jz5fR//QP9ovEu3k
36
36
  3v0q89HVKLBtQzj+Dii/vHeI
37
37
  -----END CERTIFICATE-----
38
- date: 2022-05-25 00:00:00.000000000 Z
38
+ date: 2022-05-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: ferrum
@@ -106,5 +106,5 @@ requirements: []
106
106
  rubygems_version: 3.2.22
107
107
  signing_key:
108
108
  specification_version: 4
109
- summary: Attempts to scrape the inputs required to complate a 1 page online form.
109
+ summary: Attempts to scrape the inputs required to complete a 1 page online form.
110
110
  test_files: []
metadata.gz.sig CHANGED
Binary file