formscraper_helper 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5b4f9be47b4c46d2e2823161ac508b4748afab4f2f6a03e2617bb91ae6ababe9
4
- data.tar.gz: bea40e151206763162b372bb2163010f65ce021fbd795b82757780eba32ccde2
3
+ metadata.gz: 402c3ab5b633cce3e4852deba197a388a30d267e81595829cbc05deb6509f07e
4
+ data.tar.gz: c3867d152abfa6d910efd7f24b119c413c8ca8368229290583704bba1018108d
5
5
  SHA512:
6
- metadata.gz: 8c7809564deb15b0b6a1ce109a676ceed9f77211dcf30fe14894a638ad75609421de74cc6863064d7c4fdb428b99fdffbb9ae777598bd3c32c99e0ca542f4860
7
- data.tar.gz: a61eaf497175a3bb4aac7eed0e3e54038640697a90484c05adc11fa8342f1c668f94255488d1a6a6e932c8a42e0143b821af1bd98f4256605e86da92a9c4f218
6
+ metadata.gz: a3c5c2217823d8734d7e273a07c166012191f0196a3c039cbb7bb55a0ee2ab3593295b9e985c049ebcfdddaa768696091fa49352cb8af3c7782ac56a25d86dd2
7
+ data.tar.gz: 15b49ce36dd5517099e04436153f3e63c1f9138a383ddb22033e4896eb9b0203c32bcca28ec18ab1883bc452a175182e21700cbca4cd812cdb618ad0b571abcd
checksums.yaml.gz.sig CHANGED
Binary file
@@ -4,17 +4,21 @@
4
4
 
5
5
  require 'ferrum'
6
6
  require 'nokorexi'
7
+ require 'clipboard'
7
8
 
8
9
 
9
10
  class FormScraperHelper
10
11
 
11
12
  attr_reader :browser
12
13
 
13
- def initialize(url, headless: false, debug: false)
14
+ # note: fd corresponds to FakeDataGenerator22 which is optional
15
+ #
16
+ def initialize(url, headless: false, clipb: true, fd: nil, debug: false)
14
17
 
15
- @url, @debug = url, debug
18
+ @url, @clipb, @fd, @debug = url, clipb, fd, debug
16
19
  @browser = Ferrum::Browser.new headless: headless
17
20
  @browser.goto(url)
21
+
18
22
  sleep 2
19
23
  scrape()
20
24
 
@@ -40,6 +44,7 @@ doc = Nokorexi.new(browser.body).to_doc
40
44
 
41
45
  # load the YAML document containing the inputs
42
46
  #filepath = ''
47
+ filepath = '/tmp/tmp.yaml'
43
48
  h = YAML.load(File.read(filepath))
44
49
  EOF
45
50
 
@@ -51,29 +56,26 @@ EOF
51
56
 
52
57
  if h[:type] == 'text' or h[:type] == 'password' then
53
58
 
54
- var1 = if h[:title].length > 1 then
55
- h[:title].downcase.gsub(/ +/,'_')
56
- else
57
- key.downcase
58
- end
59
+ var1, s2 = format_var1(h[:title], key)
60
+ s += s2
59
61
  s += var1 + " = h['#{var1}']\n"
60
- s += "r.focus.type #{var1}\n\n"
62
+ s += "r.focus.type #{var1}\n"
63
+ s += "sleep 0.5\n\n"
61
64
 
62
65
  elsif h[:type] == 'select'
63
66
 
64
- var1 = if h[:title].length > 1 then
65
- h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
66
- else
67
- key.downcase
68
- end
67
+ var1, s2 = format_var1(h[:title], key)
68
+ s += s2
69
69
 
70
70
  s += "# options: #{h[:options].join(', ')}\n"
71
71
  s += "#{var1} = h['#{var1}']\n"
72
- s += 'r = titles.grep /#{' + var1 + '}/i' + "\n"
73
- s += "n = titles.index(r.first) + 1\n"
72
+ s += 'titles = %w(' + h[:options].join(' ') + ')' + "\n"
73
+ s += 'found = titles.grep /#{' + var1 + '}/i' + "\n"
74
+ s += "n = titles.index(found.first) + 1\n"
74
75
  s += "r.focus\n"
75
76
  s += "n.times { r.type(:down); sleep 1}\n"
76
- s += "r.click\n\n"
77
+ s += "r.click\n"
78
+ s += "sleep 0.5\n\n"
77
79
 
78
80
  elsif h[:type] == 'checkbox'
79
81
  s += "r.focus.click\n\n"
@@ -81,6 +83,9 @@ EOF
81
83
 
82
84
  end
83
85
 
86
+ Clipboard.copy s if @clipb
87
+ puts 'generated code copied to clipboard'
88
+
84
89
  return s
85
90
 
86
91
  end
@@ -97,22 +102,30 @@ EOF
97
102
 
98
103
  if h[:type] == 'text' or h[:type] == 'password' then
99
104
 
100
- var1 = if h[:title].length > 1 then
101
- h[:title].downcase.gsub(/ +/,'_')
102
- else
103
- key.downcase
105
+ var1, s2 = format_var1(h[:title], key)
106
+
107
+ s += s2
108
+
109
+ if h[:type] == 'password' then
110
+ @pwd ||= @fd ? @fd.password : 'xxx'
111
+ s += var1 + ": #{@pwd}\n"
112
+ elsif @fd
113
+
114
+ found = @fd.lookup var1
115
+ val = found.is_a?(String) ? found : 'xxx'
116
+ s += var1 + ": #{val}\n"
117
+ else
118
+ s += var1 + ": xxx\n"
104
119
  end
105
- s += var1 + ": xxx\n"
106
120
 
107
121
  elsif h[:type] == 'select'
108
122
 
109
- var1 = if h[:title].length > 1 then
110
- h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
111
- else
112
- key.downcase
113
- end
123
+ var1, s2 = format_var1(h[:title], key)
114
124
 
115
- s += "#{var1}: xxx\n"
125
+ s += s2
126
+ s += "# options: #{h[:options].join(', ')}\n"
127
+ val = h[:options][1..-1].sample
128
+ s += "#{var1}: #{val}\n"
116
129
 
117
130
  elsif h[:type] == 'checkbox'
118
131
 
@@ -120,20 +133,49 @@ EOF
120
133
 
121
134
  end
122
135
 
136
+ Clipboard.copy s if @clipb
137
+ puts 'generated YAML copied to clipboard'
138
+
123
139
  return s
124
140
 
125
141
  end
126
142
 
127
143
  private
128
144
 
129
- def scrape()
145
+ # returns var1 using arguments rawtitle or key
146
+ # note: argument s is passed by reference
147
+ #
148
+ def format_var1(rawtitle, key)
149
+
150
+ var1 = if rawtitle.length > 1 then
151
+
152
+ s = "\n# " + rawtitle + "\n"
153
+ title = rawtitle.scan(/[A-Z][^A-Z]+/).join(' ').gsub(/[^\w ]/,'')
154
+ words = title.downcase.scan(/\w+/)
155
+
156
+ if words.count > 2 then
157
+ words.take(5).map {|x| x[0]}.join
158
+ else
159
+ title.downcase.gsub(/ +/,'_')
160
+ end
161
+
162
+ else
163
+ newtitle = key.scan(/[A-Z][^A-Z]+/).join(' ')
164
+ s = "\n# " + newtitle + "\n"
165
+ newtitle.gsub(/[^\w ]/,'').downcase\
166
+ .gsub(/ +/,'_')
167
+ end
168
+
169
+ [var1, s]
170
+
171
+ end
172
+
173
+ def scrape()
130
174
 
131
175
  doc = Nokorexi.new(@browser.body).to_doc
132
176
 
133
177
  #a = doc.root.xpath('//input|//select')
134
- a = doc.root.xpath('//*').select do |x|
135
- x.name == 'input' or x.name == 'select'
136
- end
178
+ a = doc.root.xpath('//*').select {|x| x.name == 'input' or x.name == 'select'}
137
179
  a.reject! do |x|
138
180
  x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
139
181
  end
@@ -160,3 +202,4 @@ EOF
160
202
 
161
203
 
162
204
  end
205
+
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: formscraper_helper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  hBw+4Vg30COBUGSGYs46Cy3vhis61poJJeWm/pLTMOH4lcl/Jz5fR//QP9ovEu3k
36
36
  3v0q89HVKLBtQzj+Dii/vHeI
37
37
  -----END CERTIFICATE-----
38
- date: 2022-05-25 00:00:00.000000000 Z
38
+ date: 2022-05-28 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: ferrum
@@ -106,5 +106,5 @@ requirements: []
106
106
  rubygems_version: 3.2.22
107
107
  signing_key:
108
108
  specification_version: 4
109
- summary: Attempts to scrape the inputs required to complate a 1 page online form.
109
+ summary: Attempts to scrape the inputs required to complete a 1 page online form.
110
110
  test_files: []
metadata.gz.sig CHANGED
Binary file