xml_col_finder 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +3 -2
- data/lib/xml_col_finder.rb +60 -10
- data.tar.gz.sig +0 -0
- metadata +23 -4
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82516bf9b6e8ca979788ac9be6024ec6cc698e959a89b13006a75bba13e473ba
|
4
|
+
data.tar.gz: deddd9d4197bab3913761d93e8f02a44f66f49968ae1985ab16e8d411ad8f755
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de927549a8e8451f66891bc558523ff2332786d79a6966f827ed5480ba06fa14b57c8116abb528176184de6b4b0357245ca1940316c2ac0d696c672ce3bc49c6
|
7
|
+
data.tar.gz: fdb5610f7b3f0775b0e156ec9f1c19be35a6374187fa40259481e7dd4b949c0df0202250dcc678ecb2eef906d2323c76400456081c52bbb1277cacca000fd9f3
|
checksums.yaml.gz.sig
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
�
|
1
|
+
[�GT3M���sV����h���%f7��%GA��#N�V��Tӳ
|
2
|
+
��L���O�9�-5*ƴtB!)O]ew�]9�{)���2���9����[ʃ�/2�Q+\5�!�{Y:iQ����cq���J�
|
3
|
+
����б{އ����� �UU�jgl�h9_V���z��@���bԧ�����D
|
data/lib/xml_col_finder.rb
CHANGED
@@ -3,35 +3,49 @@
|
|
3
3
|
# file: xml_col_finder.rb
|
4
4
|
|
5
5
|
require 'rexle'
|
6
|
+
require 'clipboard'
|
6
7
|
|
8
|
+
# how to use this gem?
|
9
|
+
#
|
10
|
+
# 1. Find a web page to fetch values from
|
11
|
+
# 2. Press F12 to invoke developer tools
|
12
|
+
# 3. right-click on the element containing the
|
13
|
+
# child elements containing the values
|
14
|
+
# 4. select copy > copy outerHTML
|
15
|
+
# 5. paste the test into a file
|
16
|
+
# 6. File.read the txt file and pass it to XMLColFinder.new
|
17
|
+
|
18
|
+
# note: There's a to_code method which makes it convenient to fetch the
|
19
|
+
# values from the generated code.
|
7
20
|
|
8
21
|
class XMLColFinder
|
9
22
|
|
10
23
|
attr_reader :to_a
|
11
24
|
|
12
|
-
def initialize(
|
25
|
+
def initialize(obj, debug: false)
|
13
26
|
|
14
27
|
@debug = debug
|
15
|
-
doc = Rexle.new(
|
28
|
+
@doc = obj.is_a?(Rexle) ? obj : Rexle.new(obj)
|
16
29
|
|
17
30
|
a = []
|
18
|
-
doc.root.each_recursive do |node|
|
31
|
+
@doc.root.each_recursive do |node|
|
19
32
|
|
20
|
-
if node.text then
|
33
|
+
if node.text and node.text.strip.length >= 1 then
|
21
34
|
a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
|
22
35
|
node.text]
|
23
36
|
end
|
24
37
|
|
25
38
|
end
|
26
39
|
|
27
|
-
|
40
|
+
@to_a = a
|
28
41
|
h = group_by_xpath(a)
|
29
42
|
@to_a = truncate_xpath(h).flatten(1)
|
30
43
|
|
31
44
|
end
|
32
45
|
|
33
|
-
def to_code()
|
46
|
+
def to_code(nametip: true)
|
34
47
|
|
48
|
+
@nametip = nametip
|
35
49
|
@tags = {}
|
36
50
|
|
37
51
|
xpath, remaining = @to_a
|
@@ -40,17 +54,51 @@ class XMLColFinder
|
|
40
54
|
linex = formatline('doc', eid, xpath)
|
41
55
|
a = scan(remaining, eid)
|
42
56
|
|
43
|
-
a.flatten.compact.prepend linex
|
57
|
+
lines = a.flatten.compact.prepend linex
|
58
|
+
lines2 = lines.join("\n").lines\
|
59
|
+
.map {|line| line =~ /.text$/ ? 'puts ' + line : line }
|
60
|
+
lines2[0].sub!(/(?<=\")div/,'//div')
|
61
|
+
s = "require 'nokorexi'
|
62
|
+
|
63
|
+
url = 'https://insert-your-url'
|
64
|
+
doc = Nokorexi.new(url).to_doc
|
65
|
+
" + lines2.join
|
66
|
+
|
67
|
+
Clipboard.copy s
|
68
|
+
return s
|
44
69
|
|
45
70
|
end
|
46
71
|
|
72
|
+
def to_doc()
|
73
|
+
@doc
|
74
|
+
end
|
75
|
+
|
47
76
|
private
|
48
77
|
|
49
78
|
def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
|
50
79
|
|
51
80
|
if eid then
|
52
|
-
|
53
|
-
|
81
|
+
|
82
|
+
nametip = @nametip && tail.is_a?(String)
|
83
|
+
klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
|
84
|
+
|
85
|
+
line = if klass then
|
86
|
+
desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
|
87
|
+
desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
|
88
|
+
"\n# " + desc + "\n"
|
89
|
+
elsif nametip
|
90
|
+
"\n# e.g. %s\n" % [tail.length < 50 ? tail : tail[0..46] + '...']
|
91
|
+
else
|
92
|
+
''
|
93
|
+
end
|
94
|
+
|
95
|
+
key.gsub!("[@class='']",'') # Rexle XPath bug solution!
|
96
|
+
line += "%s = %s.element(\"%s\")" % [eid, pid, key]
|
97
|
+
if tail.is_a? String
|
98
|
+
line += '.text'
|
99
|
+
#line += "\n" if nametip
|
100
|
+
end
|
101
|
+
|
54
102
|
else
|
55
103
|
line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
|
56
104
|
end
|
@@ -66,6 +114,8 @@ class XMLColFinder
|
|
66
114
|
tag = case rawtagx.to_sym
|
67
115
|
when :a
|
68
116
|
'link'
|
117
|
+
when :p
|
118
|
+
'para'
|
69
119
|
else
|
70
120
|
rawtagx
|
71
121
|
end
|
@@ -191,7 +241,7 @@ class XMLColFinder
|
|
191
241
|
puts '_tail: ' + tail.inspect if @debug
|
192
242
|
tline = tail.map.with_index do |x,i|
|
193
243
|
formatline(pid=eid, index: i)
|
194
|
-
end
|
244
|
+
end
|
195
245
|
|
196
246
|
end
|
197
247
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xml_col_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
|
36
36
|
ShsxXxzmzIrRENmpBp3tyR3k
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2022-
|
38
|
+
date: 2022-03-29 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
41
|
name: rexle
|
@@ -57,6 +57,26 @@ dependencies:
|
|
57
57
|
- - ">="
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: 1.5.14
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: clipboard
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '1.3'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.3.6
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '1.3'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.3.6
|
60
80
|
description:
|
61
81
|
email: digital.robertson@gmail.com
|
62
82
|
executables: []
|
@@ -83,8 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
103
|
- !ruby/object:Gem::Version
|
84
104
|
version: '0'
|
85
105
|
requirements: []
|
86
|
-
|
87
|
-
rubygems_version: 2.7.10
|
106
|
+
rubygems_version: 3.2.22
|
88
107
|
signing_key:
|
89
108
|
specification_version: 4
|
90
109
|
summary: Attempts to return the relative xpath for each element containing text.
|
metadata.gz.sig
CHANGED
Binary file
|