xml_col_finder 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb9968f62dfa167170d225305499a06a3fb07949d22b44cfc5fc7aaff2250e70
4
- data.tar.gz: 590549966bf32070036e68f45ff1c61cc5b81b7c11a5237466928e573758eeb5
3
+ metadata.gz: 82516bf9b6e8ca979788ac9be6024ec6cc698e959a89b13006a75bba13e473ba
4
+ data.tar.gz: deddd9d4197bab3913761d93e8f02a44f66f49968ae1985ab16e8d411ad8f755
5
5
  SHA512:
6
- metadata.gz: a022f1836170e5628062360a2bef7d4fc26ac1748dd7970fd56eb76ca5fa194f2f61b75bf7f8dfe667191ef194aeb2306a8c5e94e55552aafa105e0d34a7828b
7
- data.tar.gz: 2f932bcffe0ace2bfc4fdefb3df0fde1cff61e3095110fb765de6732c096c44066e13046591c8423bb75465e9b981774751c225b9791a72601bac97bb24237a5
6
+ metadata.gz: de927549a8e8451f66891bc558523ff2332786d79a6966f827ed5480ba06fa14b57c8116abb528176184de6b4b0357245ca1940316c2ac0d696c672ce3bc49c6
7
+ data.tar.gz: fdb5610f7b3f0775b0e156ec9f1c19be35a6374187fa40259481e7dd4b949c0df0202250dcc678ecb2eef906d2323c76400456081c52bbb1277cacca000fd9f3
checksums.yaml.gz.sig CHANGED
@@ -1,2 +1,3 @@
1
- T�V�c.nf���}�"Ny�S�dVA$�t/�n����F��*�2ay� .�7��"�L�B�ϊ#[����|��RiQ��L�2���~�T�u� N�6r]�u[��$�4�"����ے��i���jxe8W*%rT�!mf��\�QӪ�+i5�3G�*t�*�ԡ9�;�u��:�u����Aj�IG�8���U*����i�9*��rΥ����d�JG��,
2
- d����Dn��F�*
1
+ [�GT3M���sV����h���%f7 ��%GA��#N�V��Tӳ
2
+ ��L���O�9�-5*ƴtB!)O]ew]9�{)���2���9����[ʃ�/2Q+\5�!�{Y:iQ����cq���J�
3
+ ����б{އ����� �UU�jgl�h9_V���z��@���bԧ�����D
@@ -3,35 +3,49 @@
3
3
  # file: xml_col_finder.rb
4
4
 
5
5
  require 'rexle'
6
+ require 'clipboard'
6
7
 
8
+ # how to use this gem?
9
+ #
10
+ # 1. Find a web page to fetch values from
11
+ # 2. Press F12 to invoke developer tools
12
+ # 3. right-click on the element containing the
13
+ # child elements containing the values
14
+ # 4. select copy > copy outerHTML
15
+ # 5. paste the test into a file
16
+ # 6. File.read the txt file and pass it to XMLColFinder.new
17
+
18
+ # note: There's a to_code method which makes it convenient to fetch the
19
+ # values from the generated code.
7
20
 
8
21
  class XMLColFinder
9
22
 
10
23
  attr_reader :to_a
11
24
 
12
- def initialize(s, debug: false)
25
+ def initialize(obj, debug: false)
13
26
 
14
27
  @debug = debug
15
- doc = Rexle.new(s)
28
+ @doc = obj.is_a?(Rexle) ? obj : Rexle.new(obj)
16
29
 
17
30
  a = []
18
- doc.root.each_recursive do |node|
31
+ @doc.root.each_recursive do |node|
19
32
 
20
- if node.text then
33
+ if node.text and node.text.strip.length >= 1 then
21
34
  a << [BacktrackXPath.new(node, ignore_id: true).to_xpath.split('/'),
22
35
  node.text]
23
36
  end
24
37
 
25
38
  end
26
39
 
27
- #@to_a = a
40
+ @to_a = a
28
41
  h = group_by_xpath(a)
29
42
  @to_a = truncate_xpath(h).flatten(1)
30
43
 
31
44
  end
32
45
 
33
- def to_code()
46
+ def to_code(nametip: true)
34
47
 
48
+ @nametip = nametip
35
49
  @tags = {}
36
50
 
37
51
  xpath, remaining = @to_a
@@ -40,17 +54,51 @@ class XMLColFinder
40
54
  linex = formatline('doc', eid, xpath)
41
55
  a = scan(remaining, eid)
42
56
 
43
- a.flatten.compact.prepend linex
57
+ lines = a.flatten.compact.prepend linex
58
+ lines2 = lines.join("\n").lines\
59
+ .map {|line| line =~ /.text$/ ? 'puts ' + line : line }
60
+ lines2[0].sub!(/(?<=\")div/,'//div')
61
+ s = "require 'nokorexi'
62
+
63
+ url = 'https://insert-your-url'
64
+ doc = Nokorexi.new(url).to_doc
65
+ " + lines2.join
66
+
67
+ Clipboard.copy s
68
+ return s
44
69
 
45
70
  end
46
71
 
72
+ def to_doc()
73
+ @doc
74
+ end
75
+
47
76
  private
48
77
 
49
78
  def formatline(pid, eid=nil, key=nil, tail=nil, index: nil)
50
79
 
51
80
  if eid then
52
- line = "%s = %s.element(\"%s\")" % [eid, pid, key]
53
- line += '.text' if tail.is_a? String
81
+
82
+ nametip = @nametip && tail.is_a?(String)
83
+ klass = nametip ? key.scan(/@class=['"]([^'"]+)/).last : nil
84
+
85
+ line = if klass then
86
+ desc = klass[0][/^[^\-]+/].gsub(/(?=[A-Z])/,' ').downcase
87
+ desc += " (e.g. %s)" % [tail.length < 50 ? tail : tail[0..46] + '...']
88
+ "\n# " + desc + "\n"
89
+ elsif nametip
90
+ "\n# e.g. %s\n" % [tail.length < 50 ? tail : tail[0..46] + '...']
91
+ else
92
+ ''
93
+ end
94
+
95
+ key.gsub!("[@class='']",'') # Rexle XPath bug solution!
96
+ line += "%s = %s.element(\"%s\")" % [eid, pid, key]
97
+ if tail.is_a? String
98
+ line += '.text'
99
+ #line += "\n" if nametip
100
+ end
101
+
54
102
  else
55
103
  line = index ? ("%s[%d].text" % [pid, index]) : ("%s.text" % pid)
56
104
  end
@@ -66,6 +114,8 @@ class XMLColFinder
66
114
  tag = case rawtagx.to_sym
67
115
  when :a
68
116
  'link'
117
+ when :p
118
+ 'para'
69
119
  else
70
120
  rawtagx
71
121
  end
@@ -191,7 +241,7 @@ class XMLColFinder
191
241
  puts '_tail: ' + tail.inspect if @debug
192
242
  tline = tail.map.with_index do |x,i|
193
243
  formatline(pid=eid, index: i)
194
- end.join("\n")
244
+ end
195
245
 
196
246
  end
197
247
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_col_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  GSlZ9ilAfm8srTjbZ2cWQyNGGxH+zHQ3Z02c4ZEtgPv/wHjptd1VeBm0P1aemsRA
36
36
  ShsxXxzmzIrRENmpBp3tyR3k
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-23 00:00:00.000000000 Z
38
+ date: 2022-03-29 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
@@ -57,6 +57,26 @@ dependencies:
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
59
  version: 1.5.14
60
+ - !ruby/object:Gem::Dependency
61
+ name: clipboard
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '1.3'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 1.3.6
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '1.3'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 1.3.6
60
80
  description:
61
81
  email: digital.robertson@gmail.com
62
82
  executables: []
@@ -83,8 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
103
  - !ruby/object:Gem::Version
84
104
  version: '0'
85
105
  requirements: []
86
- rubyforge_project:
87
- rubygems_version: 2.7.10
106
+ rubygems_version: 3.2.22
88
107
  signing_key:
89
108
  specification_version: 4
90
109
  summary: Attempts to return the relative xpath for each element containing text.
metadata.gz.sig CHANGED
Binary file