curlyq 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92b27e3065435d17fd5d6129bd640481dee8acf66f9ed82b2b68ae6a7589f463
4
- data.tar.gz: a5cd5299248fd01d8f12a80a1c9982de4f8e0160ea5033ab96b60677bb9ea2c8
3
+ metadata.gz: d3e32b382d7318b067ee3fb22f2e9057cf6aa9facfac41c74a0ebb5d4fb4743d
4
+ data.tar.gz: d379da3f0db621052e61230356f5c58b587eefccbb0a4c997216516a4159b44a
5
5
  SHA512:
6
- metadata.gz: 1348b97fdf89faf44cd0cfc0f2aecc05a679606f19fe57392d588209da26fc3a5c2407569173d41e1497bdf409d762adee0d2533089b5ce27854e298fe98cc13
7
- data.tar.gz: c80ecd381e1d941d8e8e5ead0dd925682e26a2f8cc639f0202d5b5cd30f025582fc5d5a2665daca3e5e7d0f2099066d3ca3d9ad8ad00b07d2ee5673b122bae01
6
+ metadata.gz: ae63654deb943771e5f6f3aa0f6a037b1015336abbd696a8ce77acc22f361a3b6a18b03f3b7d02e5c7d5dcaa8d3608248bed240679acfce22ba2e462d84b529f
7
+ data.tar.gz: 481f8499e45a65cb3981fcf20ef7fc9f01f97a1b7014c6566aa2f3bf7a6611fd2d5d35f78e742e4063eea192b938c0642f0ca764e5032f330778d2815a191a41
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ### 0.0.8
2
+
3
+ 2024-01-15 16:45
4
+
5
+ #### IMPROVED
6
+
7
+ - Dot syntax query can now operate on a full array using empty set []
8
+ - Dot syntax query should output a specific key, e.g. attrs[id*=news].content (work in progress)
9
+ - Dot query syntax handling touch-ups. Piping to jq is still more flexible, but the basics are there.
10
+
1
11
  ### 0.0.7
2
12
 
3
13
  2024-01-12 17:03
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- curlyq (0.0.7)
4
+ curlyq (0.0.8)
5
5
  gli (~> 2.21.0)
6
6
  nokogiri (~> 1.16.0)
7
7
  selenium-webdriver (~> 4.16.0)
data/README.md CHANGED
@@ -10,7 +10,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
10
10
  [donate]: https://brettterpstra.com/donate
11
11
 
12
12
 
13
- The current version of `curlyq` is 0.0.7
13
+ The current version of `curlyq` is 0.0.8
14
14
  .
15
15
 
16
16
  CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
@@ -44,7 +44,7 @@ SYNOPSIS
44
44
  curlyq [global options] command [command options] [arguments...]
45
45
 
46
46
  VERSION
47
- 0.0.7
47
+ 0.0.8
48
48
 
49
49
  GLOBAL OPTIONS
50
50
  --help - Show this message
data/bin/curlyq CHANGED
@@ -147,6 +147,12 @@ command %i[html curl] do |c|
147
147
  # output = output[0] if output.count == 1
148
148
  output.map! { |o| o[options[:raw].to_sym] } if options[:raw]
149
149
 
150
+ if output.is_a?(Array)
151
+ while output.length == 1
152
+ output = output[0]
153
+ end
154
+ end
155
+
150
156
  print_out(output, global_options[:yaml], raw: options[:raw], pretty: global_options[:pretty])
151
157
  end
152
158
  end
@@ -342,9 +348,7 @@ command :tags do |c|
342
348
  out = out.dot_query(options[:query]) if options[:query]
343
349
  output.push(out)
344
350
  elsif options[:query]
345
- query = options[:query] =~ /^links/ ? options[:query] : "links#{options[:query]}"
346
-
347
- output = res.to_data.dot_query(query)
351
+ output = res.to_data.dot_query(options[:query])
348
352
  elsif tags.count.positive?
349
353
  tags.each { |tag| output.concat(res.tags(tag)) }
350
354
  else
@@ -393,13 +397,13 @@ command :images do |c|
393
397
  res.curl
394
398
 
395
399
  res = res.images(types: types)
400
+ res = { images: res }.dot_query(options[:query], 'images', full_tag: false) if options[:query]
396
401
 
397
- if options[:query]
398
- query = options[:query] =~ /^images/ ? options[:query] : "images#{options[:query]}"
399
- res = { images: res }.dot_query(query)
402
+ if res.is_a?(Array)
403
+ output.concat(res)
404
+ else
405
+ output.push(res)
400
406
  end
401
-
402
- output.concat(res)
403
407
  end
404
408
 
405
409
  print_out(output, global_options[:yaml], pretty: global_options[:pretty])
@@ -439,9 +443,9 @@ command :links do |c|
439
443
  res.curl
440
444
 
441
445
  if options[:query]
442
- query = options[:query] =~ /^links/ ? options[:query] : "links#{options[:query]}"
443
- queried = res.to_data.dot_query(query)
444
- output.concat(queried) if queried
446
+ queried = res.to_data.dot_query(options[:query], 'links', full_tag: false)
447
+
448
+ queried.is_a?(Array) ? output.concat(queried) : output.push(queried) if queried
445
449
  else
446
450
  output.concat(res.body_links)
447
451
  end
@@ -469,9 +473,8 @@ command :headlinks do |c|
469
473
  res.curl
470
474
 
471
475
  if options[:query]
472
- query = options[:query] =~ /^links/ ? options[:query] : "links#{options[:query]}"
473
- queried = { links: res.to_data[:meta_links] }.dot_query(query)
474
- output.concat(queried) if queried
476
+ queried = { links: res.to_data[:meta_links] }.dot_query(options[:query], 'links', full_tag: false)
477
+ output.push(queried) if queried
475
478
  else
476
479
  output.push(res.to_data[:meta_links])
477
480
  end
@@ -516,10 +519,10 @@ command :scrape do |c|
516
519
  if options[:search]
517
520
  out = res.search(options[:search])
518
521
 
519
- out = out.dot_query(options[:query]) if options[:query]
522
+ out = out.dot_query(options[:query], full_tag: false) if options[:query]
520
523
  output.push(out)
521
524
  elsif options[:query]
522
- queried = res.to_data(url: url).dot_query(options[:query])
525
+ queried = res.to_data(url: url).dot_query(options[:query], full_tag: false)
523
526
  output.push(queried) if queried
524
527
  else
525
528
  output.push(res.to_data(url: url))
data/lib/curly/array.rb CHANGED
@@ -74,20 +74,19 @@ class ::Array
74
74
  ## @return [Array] elements matching dot query
75
75
  ##
76
76
  def dot_query(path)
77
- filter! do |tag|
78
- r = tag.dot_query(path)
79
- if r.is_a?(Array)
80
- r.count.positive?
81
- else
82
- r
83
- end
84
- end
77
+ res = map { |el| el.dot_query(path) }
78
+ res.delete_if { |r| !r }
79
+ res.delete_if(&:empty?)
80
+ res
81
+ end
85
82
 
86
- return self
83
+ def get_value(path)
84
+ res = map { |el| el.get_value(path) }
85
+ res.is_a?(Array) && res.count == 1 ? res[0] : res
87
86
  end
88
87
 
89
88
  def to_html
90
- map { |el| el.to_html }
89
+ map(&:to_html)
91
90
  end
92
91
 
93
92
  ##
data/lib/curly/hash.rb CHANGED
@@ -29,24 +29,50 @@ class ::Hash
29
29
  end
30
30
  end
31
31
 
32
+ def get_value(query)
33
+ return nil if self.empty?
34
+ query.split('.').inject(self) do |v, k|
35
+ k = k.to_i if v.is_a? Array
36
+ next unless v.key?(k)
37
+ v.fetch(k)
38
+ end
39
+ end
40
+
32
41
  # Extract data using a dot-syntax path
33
42
  #
34
43
  # @param path [String] The path
35
44
  #
36
45
  # @return Result of path query
37
46
  #
38
- def dot_query(path)
47
+ def dot_query(path, root = nil, full_tag: true)
39
48
  res = stringify_keys
49
+ res = res[root] unless root.nil?
40
50
 
51
+ unless path =~ /\[/
52
+ return res.get_value(path)
53
+ end
54
+
55
+ enumerate = false
41
56
  out = []
42
57
  q = path.split(/(?<![\d.])\./)
43
- q.each do |pth|
44
- el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
45
- pth.sub!(/\[([0-9,.]+)\]/, '')
58
+
59
+ while q.count.positive?
60
+ pth = q.shift
61
+
62
+ return nil if res.nil?
63
+
64
+ unless pth =~ /\[/
65
+ return res.get_value(pth)
66
+ end
67
+
68
+ el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)?\]/
69
+ pth.sub!(/\[([0-9,.]+)?\]/, '')
70
+
46
71
  ats = []
47
72
  at = []
48
- while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
49
- m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+) */)
73
+ while pth =~ /\[[+&,]?\w+( *[\^*$=<>]=? *\w+)?/
74
+ m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))? */)
75
+
50
76
  comp = [m['key'], m['op'], m['val']]
51
77
  case m['com']
52
78
  when ','
@@ -56,36 +82,61 @@ class ::Hash
56
82
  at.push(comp)
57
83
  end
58
84
 
59
- pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+)/, '[')
85
+ pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))?/, '[')
60
86
  end
61
87
  ats.push(at) unless at.empty?
62
88
  pth.sub!(/\[\]/, '')
63
89
 
64
- res = res[0] if res.is_a?(Array)
90
+ res = res[0] if res.is_a?(Array) && res.count == 1
91
+ if ats.empty? && el.nil? && res.is_a?(Array) && res[0]&.key?(pth)
92
+ res.map! { |r| r[pth] }
93
+ next
94
+ end
65
95
 
66
- return false if el.nil? && ats.empty? && !res.key?(pth)
96
+ res.map!(&:stringify_keys) if res.is_a?(Array) && res[0].is_a?(Hash)
97
+ # if res.is_a?(String) || (res.is_a?(Array) && res[0].is_a?(String))
98
+ # out.push(res)
99
+ # next
100
+ # end
67
101
 
68
- res = res[pth] unless pth.empty?
102
+ # if res.is_a?(Array) && !pth.nil?
103
+ # return res.delete_if { |r| !r.key?(pth) }
104
+ # else
105
+ # return false if el.nil? && ats.empty? && res.is_a?(Hash) && (res.nil? || !res.key?(pth))
106
+ # end
107
+ tag = res
108
+ res = res[pth] unless pth.nil? || pth.empty?
69
109
 
70
- return false if res.nil?
110
+ pth = ''
71
111
 
112
+ return false if res.nil?
72
113
  if ats.count.positive?
73
114
  while ats.count.positive?
74
115
  atr = ats.shift
75
116
  res = [res] if res.is_a?(Hash)
76
- keepers = res.filter do |r|
77
- evaluate_comp(r, atr)
78
- end
79
117
 
80
- out.concat(keepers)
118
+ res.each do |r|
119
+ out.push(full_tag ? tag : r) if evaluate_comp(r, atr)
120
+ end
81
121
  end
82
122
  else
83
123
  out = res
84
124
  end
85
125
 
86
- out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
126
+ out = out.get_value(pth) unless pth.nil?
127
+
128
+ if el.nil? && out.is_a?(Array) && out[0].is_a?(Hash)
129
+ out.map! { |o|
130
+ o.stringify_keys
131
+ # o.key?(pth) ? o[pth] : o
132
+ }
133
+ elsif out.is_a?(Array) && el =~ /^[\d.,]+$/
134
+ out = out[eval(el)]
135
+ end
136
+ res = out
87
137
  end
88
138
 
139
+ out = out[0] if out&.count == 1
89
140
  out
90
141
  end
91
142
 
@@ -112,6 +163,9 @@ class ::Hash
112
163
  else
113
164
  a[2]
114
165
  end
166
+ r = r.get_value(key.to_s) if key.to_s =~ /\./
167
+
168
+ return r.key?(key) && !r[key].nil? && !r[key].empty? if val.nil?
115
169
 
116
170
  if !r.key?(key)
117
171
  keep = false
@@ -251,4 +305,8 @@ class ::Hash
251
305
  hsh[k.to_s] = v.is_a?(Hash) ? v.stringify_keys : v
252
306
  end
253
307
  end
308
+
309
+ def stringify_keys!
310
+ replace stringify_keys
311
+ end
254
312
  end
data/lib/curly/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Curly
2
- VERSION = '0.0.7'
2
+ VERSION = '0.0.8'
3
3
  end
data/src/_README.md CHANGED
@@ -10,7 +10,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
10
10
  [donate]: https://brettterpstra.com/donate
11
11
  <!--END GITHUB-->
12
12
 
13
- The current version of `curlyq` is <!--VER-->0.0.6<!--END VER-->.
13
+ The current version of `curlyq` is <!--VER-->0.0.4<!--END VER-->.
14
14
 
15
15
  CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
16
16
 
@@ -7,7 +7,7 @@ require 'helpers/curlyq-helpers'
7
7
  require 'test_helper'
8
8
 
9
9
  # Tests for tags command
10
- class CurlyQTagsTest < Test::Unit::TestCase
10
+ class CurlyQExtractTest < Test::Unit::TestCase
11
11
  include CurlyQHelpers
12
12
 
13
13
  def setup
@@ -12,9 +12,9 @@ class CurlyQHtmlTest < Test::Unit::TestCase
12
12
 
13
13
  def test_html_search_query
14
14
  result = curlyq('html', '-s', '#main article .aligncenter', '-q', 'images[1]', 'https://brettterpstra.com')
15
- json = JSON.parse(result)[0]
15
+ json = JSON.parse(result)
16
16
 
17
- assert_match(/aligncenter/, json[0]['class'], 'Should have found an image with class "aligncenter"')
17
+ assert_match(/aligncenter/, json['class'], 'Should have found an image with class "aligncenter"')
18
18
  end
19
19
 
20
20
  def test_html_query
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: curlyq
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Terpstra
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-12 00:00:00.000000000 Z
11
+ date: 2024-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake