xml_col_finder 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -2
- data/lib/xml_col_finder.rb +60 -10
- data.tar.gz.sig +0 -0
- metadata +23 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82516bf9b6e8ca979788ac9be6024ec6cc698e959a89b13006a75bba13e473ba
|
4
|
+
data.tar.gz: deddd9d4197bab3913761d93e8f02a44f66f49968ae1985ab16e8d411ad8f755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de927549a8e8451f66891bc558523ff2332786d79a6966f827ed5480ba06fa14b57c8116abb528176184de6b4b0357245ca1940316c2ac0d696c672ce3bc49c6
|
7
|
+
data.tar.gz: fdb5610f7b3f0775b0e156ec9f1c19be35a6374187fa40259481e7dd4b949c0df0202250dcc678ecb2eef906d2323c76400456081c52bbb1277cacca000fd9f3
|
checksums.yaml.gz.sig
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
�
|
1
|
+
[�GT3M���sV����h���%f7��%GA��#N�V��Tӳ
|
2
|
+
��L���O�9�-5*ƴtB!)O]ew�]9�{)���2���9����[ʃ�/2�Q+\5�!�{Y:iQ����cq���J�
|
3
|
+
����б{އ����� �UU�jgl�h9_V���z��@���bԧ�����D
|
data/lib/xml_col_finder.rb
CHANGED
@@ -3,35 +3,49 @@
|
|
3
3
|
# file: xml_col_finder.rb
|
4
4
|
|
5
5
|
require 'rexle'
|
6
|
+
require 'clipboard'
|
6
7
|
|
8
|
+
# how to use this gem?
|
9
|
+
#
|
10
|
+
# 1. Find a web page to fetch values from
|
11
|
+
# 2. Press F12 to invoke developer tools
|
12
|
+
# 3. right-click on the element containing the
|
13
|
+
# child elements containing the values
|
14
|
+
# 4. select copy > copy outerHTML
|
15
|
+
# 5. paste the test into a file
|
16
|
+
# 6. File.read the txt file and pass it to XMLColFinder.new
|
17
|
+
|
18
|
+
# note: There's a to_code method which makes it convenient to fetch the
|
19
|
+
# values from the generated code.
|
7
20
|
|
8
21
|
class XMLColFinder
|
9
22
|
|
10
23
|
attr_reader :to_a
|
11
24
|
|
12
|
-
def initialize(
|
25
|
+
def initialize(obj, debug: false)
|
13
26
|
|
14
27
|
@debug = debug
|
15
|
-
doc = Rexle.new(
|
28
|
+
@doc = obj.is_a?(Rexle) ? obj : Rexle.new(obj)
|
16
29
|
|
17
30
|
a = []
|
18
|
-
doc.root.each_recursive do |node|
|
31
|
+
@doc.root.each_recursive do |node|
|
19
32
|
|
20
|
-
if node.text then
|
33
|
+
if node.text and node.text.strip.length >= 1 then
|
21
34
|
a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
|
22
35
|
node.text]
|
23
36
|
end
|
24
37
|
|
25
38
|
end
|
26
39
|
|
27
|
-
|
40
|
+
@to_a = a
|
28
41
|
h = group_by_xpath(a)
|
29
42
|
@to_a = truncate_xpath(h).flatten(1)
|
30
43
|
|
31
44
|
end
|
32
45
|
|
33
|
-
def to_code()
|
46
|
+
def to_code(nametip: true)
|
34
47
|
|
48
|
+
@nametip = nametip
|
35
49
|
@tags = {}
|
36
50
|
|
37
51
|
xpath, remaining = @to_a
|
@@ -40,17 +54,51 @@ class XMLColFinder
|
|
40
54
|
linex = formatline('doc', eid, xpath)
|
41
55
|
a = scan(remaining, eid)
|
42
56
|
|
43
|
-
a.flatten.compact.prepend linex
|
57
|
+
lines = a.flatten.compact.prepend linex
|
58
|
+
lines2 = lines.join("\n").lines\
|
59
|
+
.map {|line| line =~ /.text$/ ? 'puts ' + line : line }
|
60
|
+
lines2[0].sub!(/(?<=\")div/,'//div')
|
61
|
+
s = "require 'nokorexi'
|
62
|
+
|
63
|
+
url = 'https://insert-your-url'
|
64
|
+
doc = Nokorexi.new(url).to_doc
|
65
|
+
" + lines2.join
|
66
|
+
|
67
|
+
Clipboard.copy s
|
68
|
+
return s
|
44
69
|
|
45
70
|
end
|
46
71
|
|
72
|
+
def to_doc()
|
73
|
+
@doc
|
74
|
+
end
|
75
|
+
|
47
76
|
private
|
48
77
|
|
49
78
|
def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
|
50
79
|
|
51
80
|
if eid then
|
52
|
-
|
53
|
-
|
81
|
+
|
82
|
+
nametip = @nametip && tail.is_a?(String)
|
83
|
+
klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
|
84
|
+
|
85
|
+
line = if klass then
|
86
|
+
desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
|
87
|
+
desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
|
88
|
+
"\n# " + desc + "\n"
|
89
|
+
elsif nametip
|
90
|
+
"\n# e.g. %s\n" % [tail.length < 50 ? tail : tail[0..46] + '...']
|
91
|
+
else
|
92
|
+
''
|
93
|
+
end
|
94
|
+
|
95
|
+
key.gsub!("[@class='']",'') # Rexle XPath bug solution!
|
96
|
+
line += "%s = %s.element(\"%s\")" % [eid, pid, key]
|
97
|
+
if tail.is_a? String
|
98
|
+
line += '.text'
|
99
|
+
#line += "\n" if nametip
|
100
|
+
end
|
101
|
+
|
54
102
|
else
|
55
103
|
line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
|
56
104
|
end
|
@@ -66,6 +114,8 @@ class XMLColFinder
|
|
66
114
|
tag = case rawtagx.to_sym
|
67
115
|
when :a
|
68
116
|
'link'
|
117
|
+
when :p
|
118
|
+
'para'
|
69
119
|
else
|
70
120
|
rawtagx
|
71
121
|
end
|
@@ -191,7 +241,7 @@ class XMLColFinder
|
|
191
241
|
puts '_tail: ' + tail.inspect if @debug
|
192
242
|
tline = tail.map.with_index do |x,i|
|
193
243
|
formatline(pid=eid, index: i)
|
194
|
-
end
|
244
|
+
end
|
195
245
|
|
196
246
|
end
|
197
247
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xml_col_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
|
36
36
|
ShsxXxzmzIrRENmpBp3tyR3k
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-
|
38
|
+
date: 2022-03-29 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: rexle
|
@@ -57,6 +57,26 @@ dependencies:
|
|
57
57
|
- - ">="
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 1.5.14
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: clipboard
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '1.3'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.3.6
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '1.3'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.3.6
|
60
80
|
description:
|
61
81
|
email: digital.robertson@gmail.com
|
62
82
|
executables: []
|
@@ -83,8 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
103
|
- !ruby/object:Gem::Version
|
84
104
|
version: '0'
|
85
105
|
requirements: []
|
86
|
-
|
87
|
-
rubygems_version: 2.7.10
|
106
|
+
rubygems_version: 3.2.22
|
88
107
|
signing_key:
|
89
108
|
specification_version: 4
|
90
109
|
summary: Attempts to return the relative xpath for each element containing text.
|
metadata.gz.sig
CHANGED
Binary file
|