formscraper_helper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5b4f9be47b4c46d2e2823161ac508b4748afab4f2f6a03e2617bb91ae6ababe9
4
+ data.tar.gz: bea40e151206763162b372bb2163010f65ce021fbd795b82757780eba32ccde2
5
+ SHA512:
6
+ metadata.gz: 8c7809564deb15b0b6a1ce109a676ceed9f77211dcf30fe14894a638ad75609421de74cc6863064d7c4fdb428b99fdffbb9ae777598bd3c32c99e0ca542f4860
7
+ data.tar.gz: a61eaf497175a3bb4aac7eed0e3e54038640697a90484c05adc11fa8342f1c668f94255488d1a6a6e932c8a42e0143b821af1bd98f4256605e86da92a9c4f218
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: formscraper_helper.rb
4
+
5
+ require 'ferrum'
6
+ require 'nokorexi'
7
+
8
+
9
+ class FormScraperHelper
10
+
11
+ attr_reader :browser
12
+
13
+ def initialize(url, headless: false, debug: false)
14
+
15
+ @url, @debug = url, debug
16
+ @browser = Ferrum::Browser.new headless: headless
17
+ @browser.goto(url)
18
+ sleep 2
19
+ scrape()
20
+
21
+ end
22
+
23
+ def to_h()
24
+ @h
25
+ end
26
+
27
+ def to_code()
28
+
29
+ s=<<EOF
30
+ require 'yaml'
31
+ require 'ferrum'
32
+ require 'nokorexi'
33
+
34
+ browser = Ferrum::Browser.new headless: false
35
+ url = '#{@url}'
36
+ browser.goto(url)
37
+ sleep 2
38
+
39
+ doc = Nokorexi.new(browser.body).to_doc
40
+
41
+ # load the YAML document containing the inputs
42
+ #filepath = ''
43
+ h = YAML.load(File.read(filepath))
44
+ EOF
45
+
46
+ @h.each do |key, h|
47
+
48
+ puts 'key: ' + key.inspect if @debug
49
+
50
+ s += "r = browser.at_xpath('#{h[:xpath]}')\n"
51
+
52
+ if h[:type] == 'text' or h[:type] == 'password' then
53
+
54
+ var1 = if h[:title].length > 1 then
55
+ h[:title].downcase.gsub(/ +/,'_')
56
+ else
57
+ key.downcase
58
+ end
59
+ s += var1 + " = h['#{var1}']\n"
60
+ s += "r.focus.type #{var1}\n\n"
61
+
62
+ elsif h[:type] == 'select'
63
+
64
+ var1 = if h[:title].length > 1 then
65
+ h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
66
+ else
67
+ key.downcase
68
+ end
69
+
70
+ s += "# options: #{h[:options].join(', ')}\n"
71
+ s += "#{var1} = h['#{var1}']\n"
72
+ s += 'r = titles.grep /#{' + var1 + '}/i' + "\n"
73
+ s += "n = titles.index(r.first) + 1\n"
74
+ s += "r.focus\n"
75
+ s += "n.times { r.type(:down); sleep 1}\n"
76
+ s += "r.click\n\n"
77
+
78
+ elsif h[:type] == 'checkbox'
79
+ s += "r.focus.click\n\n"
80
+ end
81
+
82
+ end
83
+
84
+ return s
85
+
86
+ end
87
+
88
+ # creates a YAML document for the inputs
89
+ #
90
+ def to_yaml()
91
+
92
+ s = '---' + "\n"
93
+
94
+ @h.each do |key, h|
95
+
96
+ puts 'key: ' + key.inspect if @debug
97
+
98
+ if h[:type] == 'text' or h[:type] == 'password' then
99
+
100
+ var1 = if h[:title].length > 1 then
101
+ h[:title].downcase.gsub(/ +/,'_')
102
+ else
103
+ key.downcase
104
+ end
105
+ s += var1 + ": xxx\n"
106
+
107
+ elsif h[:type] == 'select'
108
+
109
+ var1 = if h[:title].length > 1 then
110
+ h[:title].downcase.gsub(/ +/,'_').gsub(/\W/,'')
111
+ else
112
+ key.downcase
113
+ end
114
+
115
+ s += "#{var1}: xxx\n"
116
+
117
+ elsif h[:type] == 'checkbox'
118
+
119
+ end
120
+
121
+ end
122
+
123
+ return s
124
+
125
+ end
126
+
127
+ private
128
+
129
+ def scrape()
130
+
131
+ doc = Nokorexi.new(@browser.body).to_doc
132
+
133
+ #a = doc.root.xpath('//input|//select')
134
+ a = doc.root.xpath('//*').select do |x|
135
+ x.name == 'input' or x.name == 'select'
136
+ end
137
+ a.reject! do |x|
138
+ x.attributes[:type] == 'hidden' or x.attributes[:style] =~ /display:none/
139
+ end
140
+
141
+ @h = a.map do |x|
142
+
143
+ key = x.attributes[:name]
144
+ type = x.name
145
+
146
+ h = {}
147
+ h[:type] = x.attributes[:type] || type
148
+ h[:xpath] = "//%s[@name=\"%s\"]" % [type, key]
149
+ h[:title] = x.attributes[:title]
150
+
151
+ if type == 'select' then
152
+ h[:options] = x.xpath('option').map {|x| x.text.to_s}
153
+ end
154
+
155
+ [key, h]
156
+
157
+ end.to_h
158
+
159
+ end
160
+
161
+
162
+ end
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: formscraper_helper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwNTI1MjA0NzIxWhcN
15
+ MjMwNTI1MjA0NzIxWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxEJVm
17
+ 2J0b5OWsG7dGVaATl+OOZOM3vvtID8nZdmkI750Z/5dsKrUDXRID1/tN7OIXsD4C
18
+ kf/gQTu1ZhDs0d2ZAFF++HhGBHsRpWbJFodCUXlxlSrsOouwsb9QU9wwrjSf2ROh
19
+ CewjBtBu5elGVxg88eCpHm5WJdTt1niiMCnl8ci2QzUkjqyoD7mujUTnJm0vbR2G
20
+ cAoU6A6/xraZb5HyVj+S3iU3tCcvZ7GsyfG5U60XLlee073tEbrhA+1Veu6jD2q+
21
+ 445lGG247SYCEtWmu+S1ia7xWoTtmoHqoc0GsThK8FmCILTO0VouQwI1Em0erLM0
22
+ UWeUxzOIL3HiaxElI39RZwAo0AjsQs9btmIWnE6Qa0UlCc4aIZY4FPPeeOg58LXD
23
+ P3U1zu2dm9BFzJA9fanaKPIFWu9JaLi0h6VfRQ9HxRF0HUxyJz2vc91xhgg4cVv2
24
+ wIE2htNKt3KJrRoz4FF7abK371dYheDVPtgPrTZDRx3ftlIu5gscIwpCohsCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUYBF7Fed1
26
+ r+l7945CzwRkfTiERfIwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEARmjroNgUuAeDY25EA0uXOWArJdBq+0Z95PHZweIa
29
+ Et38VT81ul/VnoEk7ehvRZNUU42XjhJkRXfnfSJdViEZYfuXFmr1GzN+Ib8Og1O/
30
+ 3uC8eiDax+7RpFOeqIjAZ7/6lHrW6/h0oQ+66yLAc7kq4SbPZJETHkAj5JFPJijm
31
+ YanatYhSZ2P0/k42k38PLHBn8w8YBq7kcnntMyh/DAq7cr1G/5fLfZKLx4+Aim6S
32
+ wp+1pU+SnrUdzobFQ3Tq1N76CJp27iN3XpNsu5wlSUQDKbDT2hWSPZKO9XsV9/ch
33
+ rlJMGcoOwGdXlgipiQcc71tXQTxPgxZVw8nYlvHtoBexEupPxcPK54BS4LI88KFr
34
+ UTqj+Ktsfri/arY458I/Th+KLrzwetk+Z8xNOa1Pw8pTXyynbiVEPfhe80TYH/Zg
35
+ hBw+4Vg30COBUGSGYs46Cy3vhis61poJJeWm/pLTMOH4lcl/Jz5fR//QP9ovEu3k
36
+ 3v0q89HVKLBtQzj+Dii/vHeI
37
+ -----END CERTIFICATE-----
38
+ date: 2022-05-25 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: ferrum
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.11'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0.11'
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.11'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0.11'
60
+ - !ruby/object:Gem::Dependency
61
+ name: nokorexi
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '0.7'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 0.7.0
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.7'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 0.7.0
80
+ description:
81
+ email: digital.robertson@gmail.com
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - lib/formscraper_helper.rb
87
+ homepage: https://github.com/jrobertson/formscraper_helper
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubygems_version: 3.2.22
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Attempts to scrape the inputs required to complate a 1 page online form.
110
+ test_files: []
metadata.gz.sig ADDED
Binary file