curlyq 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/bin/curlyq +19 -16
- data/lib/curly/array.rb +9 -10
- data/lib/curly/hash.rb +74 -16
- data/lib/curly/version.rb +1 -1
- data/src/_README.md +1 -1
- data/test/curlyq_extract_test.rb +1 -1
- data/test/curlyq_html_test.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3e32b382d7318b067ee3fb22f2e9057cf6aa9facfac41c74a0ebb5d4fb4743d
|
4
|
+
data.tar.gz: d379da3f0db621052e61230356f5c58b587eefccbb0a4c997216516a4159b44a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae63654deb943771e5f6f3aa0f6a037b1015336abbd696a8ce77acc22f361a3b6a18b03f3b7d02e5c7d5dcaa8d3608248bed240679acfce22ba2e462d84b529f
|
7
|
+
data.tar.gz: 481f8499e45a65cb3981fcf20ef7fc9f01f97a1b7014c6566aa2f3bf7a6611fd2d5d35f78e742e4063eea192b938c0642f0ca764e5032f330778d2815a191a41
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
### 0.0.8
|
2
|
+
|
3
|
+
2024-01-15 16:45
|
4
|
+
|
5
|
+
#### IMPROVED
|
6
|
+
|
7
|
+
- Dot syntax query can now operate on a full array using empty set []
|
8
|
+
- Dot syntax query should output a specific key, e.g. attrs[id*=news].content (work in progress)
|
9
|
+
- Dot query syntax handling touch-ups. Piping to jq is still more flexible, but the basics are there.
|
10
|
+
|
1
11
|
### 0.0.7
|
2
12
|
|
3
13
|
2024-01-12 17:03
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
|
|
10
10
|
[donate]: https://brettterpstra.com/donate
|
11
11
|
|
12
12
|
|
13
|
-
The current version of `curlyq` is 0.0.
|
13
|
+
The current version of `curlyq` is 0.0.8
|
14
14
|
.
|
15
15
|
|
16
16
|
CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
|
@@ -44,7 +44,7 @@ SYNOPSIS
|
|
44
44
|
curlyq [global options] command [command options] [arguments...]
|
45
45
|
|
46
46
|
VERSION
|
47
|
-
0.0.
|
47
|
+
0.0.8
|
48
48
|
|
49
49
|
GLOBAL OPTIONS
|
50
50
|
--help - Show this message
|
data/bin/curlyq
CHANGED
@@ -147,6 +147,12 @@ command %i[html curl] do |c|
|
|
147
147
|
# output = output[0] if output.count == 1
|
148
148
|
output.map! { |o| o[options[:raw].to_sym] } if options[:raw]
|
149
149
|
|
150
|
+
if output.is_a?(Array)
|
151
|
+
while output.length == 1
|
152
|
+
output = output[0]
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
150
156
|
print_out(output, global_options[:yaml], raw: options[:raw], pretty: global_options[:pretty])
|
151
157
|
end
|
152
158
|
end
|
@@ -342,9 +348,7 @@ command :tags do |c|
|
|
342
348
|
out = out.dot_query(options[:query]) if options[:query]
|
343
349
|
output.push(out)
|
344
350
|
elsif options[:query]
|
345
|
-
|
346
|
-
|
347
|
-
output = res.to_data.dot_query(query)
|
351
|
+
output = res.to_data.dot_query(options[:query])
|
348
352
|
elsif tags.count.positive?
|
349
353
|
tags.each { |tag| output.concat(res.tags(tag)) }
|
350
354
|
else
|
@@ -393,13 +397,13 @@ command :images do |c|
|
|
393
397
|
res.curl
|
394
398
|
|
395
399
|
res = res.images(types: types)
|
400
|
+
res = { images: res }.dot_query(options[:query], 'images', full_tag: false) if options[:query]
|
396
401
|
|
397
|
-
if
|
398
|
-
|
399
|
-
|
402
|
+
if res.is_a?(Array)
|
403
|
+
output.concat(res)
|
404
|
+
else
|
405
|
+
output.push(res)
|
400
406
|
end
|
401
|
-
|
402
|
-
output.concat(res)
|
403
407
|
end
|
404
408
|
|
405
409
|
print_out(output, global_options[:yaml], pretty: global_options[:pretty])
|
@@ -439,9 +443,9 @@ command :links do |c|
|
|
439
443
|
res.curl
|
440
444
|
|
441
445
|
if options[:query]
|
442
|
-
|
443
|
-
|
444
|
-
output.concat(queried) if queried
|
446
|
+
queried = res.to_data.dot_query(options[:query], 'links', full_tag: false)
|
447
|
+
|
448
|
+
queried.is_a?(Array) ? output.concat(queried) : output.push(queried) if queried
|
445
449
|
else
|
446
450
|
output.concat(res.body_links)
|
447
451
|
end
|
@@ -469,9 +473,8 @@ command :headlinks do |c|
|
|
469
473
|
res.curl
|
470
474
|
|
471
475
|
if options[:query]
|
472
|
-
|
473
|
-
queried
|
474
|
-
output.concat(queried) if queried
|
476
|
+
queried = { links: res.to_data[:meta_links] }.dot_query(options[:query], 'links', full_tag: false)
|
477
|
+
output.push(queried) if queried
|
475
478
|
else
|
476
479
|
output.push(res.to_data[:meta_links])
|
477
480
|
end
|
@@ -516,10 +519,10 @@ command :scrape do |c|
|
|
516
519
|
if options[:search]
|
517
520
|
out = res.search(options[:search])
|
518
521
|
|
519
|
-
out = out.dot_query(options[:query]) if options[:query]
|
522
|
+
out = out.dot_query(options[:query], full_tag: false) if options[:query]
|
520
523
|
output.push(out)
|
521
524
|
elsif options[:query]
|
522
|
-
queried = res.to_data(url: url).dot_query(options[:query])
|
525
|
+
queried = res.to_data(url: url).dot_query(options[:query], full_tag: false)
|
523
526
|
output.push(queried) if queried
|
524
527
|
else
|
525
528
|
output.push(res.to_data(url: url))
|
data/lib/curly/array.rb
CHANGED
@@ -74,20 +74,19 @@ class ::Array
|
|
74
74
|
## @return [Array] elements matching dot query
|
75
75
|
##
|
76
76
|
def dot_query(path)
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
r
|
83
|
-
end
|
84
|
-
end
|
77
|
+
res = map { |el| el.dot_query(path) }
|
78
|
+
res.delete_if { |r| !r }
|
79
|
+
res.delete_if(&:empty?)
|
80
|
+
res
|
81
|
+
end
|
85
82
|
|
86
|
-
|
83
|
+
def get_value(path)
|
84
|
+
res = map { |el| el.get_value(path) }
|
85
|
+
res.is_a?(Array) && res.count == 1 ? res[0] : res
|
87
86
|
end
|
88
87
|
|
89
88
|
def to_html
|
90
|
-
map
|
89
|
+
map(&:to_html)
|
91
90
|
end
|
92
91
|
|
93
92
|
##
|
data/lib/curly/hash.rb
CHANGED
@@ -29,24 +29,50 @@ class ::Hash
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
+
def get_value(query)
|
33
|
+
return nil if self.empty?
|
34
|
+
query.split('.').inject(self) do |v, k|
|
35
|
+
k = k.to_i if v.is_a? Array
|
36
|
+
next unless v.key?(k)
|
37
|
+
v.fetch(k)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
32
41
|
# Extract data using a dot-syntax path
|
33
42
|
#
|
34
43
|
# @param path [String] The path
|
35
44
|
#
|
36
45
|
# @return Result of path query
|
37
46
|
#
|
38
|
-
def dot_query(path)
|
47
|
+
def dot_query(path, root = nil, full_tag: true)
|
39
48
|
res = stringify_keys
|
49
|
+
res = res[root] unless root.nil?
|
40
50
|
|
51
|
+
unless path =~ /\[/
|
52
|
+
return res.get_value(path)
|
53
|
+
end
|
54
|
+
|
55
|
+
enumerate = false
|
41
56
|
out = []
|
42
57
|
q = path.split(/(?<![\d.])\./)
|
43
|
-
|
44
|
-
|
45
|
-
pth.
|
58
|
+
|
59
|
+
while q.count.positive?
|
60
|
+
pth = q.shift
|
61
|
+
|
62
|
+
return nil if res.nil?
|
63
|
+
|
64
|
+
unless pth =~ /\[/
|
65
|
+
return res.get_value(pth)
|
66
|
+
end
|
67
|
+
|
68
|
+
el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)?\]/
|
69
|
+
pth.sub!(/\[([0-9,.]+)?\]/, '')
|
70
|
+
|
46
71
|
ats = []
|
47
72
|
at = []
|
48
|
-
while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w
|
49
|
-
m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+) */)
|
73
|
+
while pth =~ /\[[+&,]?\w+( *[\^*$=<>]=? *\w+)?/
|
74
|
+
m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))? */)
|
75
|
+
|
50
76
|
comp = [m['key'], m['op'], m['val']]
|
51
77
|
case m['com']
|
52
78
|
when ','
|
@@ -56,36 +82,61 @@ class ::Hash
|
|
56
82
|
at.push(comp)
|
57
83
|
end
|
58
84
|
|
59
|
-
pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+)
|
85
|
+
pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))?/, '[')
|
60
86
|
end
|
61
87
|
ats.push(at) unless at.empty?
|
62
88
|
pth.sub!(/\[\]/, '')
|
63
89
|
|
64
|
-
res = res[0] if res.is_a?(Array)
|
90
|
+
res = res[0] if res.is_a?(Array) && res.count == 1
|
91
|
+
if ats.empty? && el.nil? && res.is_a?(Array) && res[0]&.key?(pth)
|
92
|
+
res.map! { |r| r[pth] }
|
93
|
+
next
|
94
|
+
end
|
65
95
|
|
66
|
-
|
96
|
+
res.map!(&:stringify_keys) if res.is_a?(Array) && res[0].is_a?(Hash)
|
97
|
+
# if res.is_a?(String) || (res.is_a?(Array) && res[0].is_a?(String))
|
98
|
+
# out.push(res)
|
99
|
+
# next
|
100
|
+
# end
|
67
101
|
|
68
|
-
|
102
|
+
# if res.is_a?(Array) && !pth.nil?
|
103
|
+
# return res.delete_if { |r| !r.key?(pth) }
|
104
|
+
# else
|
105
|
+
# return false if el.nil? && ats.empty? && res.is_a?(Hash) && (res.nil? || !res.key?(pth))
|
106
|
+
# end
|
107
|
+
tag = res
|
108
|
+
res = res[pth] unless pth.nil? || pth.empty?
|
69
109
|
|
70
|
-
|
110
|
+
pth = ''
|
71
111
|
|
112
|
+
return false if res.nil?
|
72
113
|
if ats.count.positive?
|
73
114
|
while ats.count.positive?
|
74
115
|
atr = ats.shift
|
75
116
|
res = [res] if res.is_a?(Hash)
|
76
|
-
keepers = res.filter do |r|
|
77
|
-
evaluate_comp(r, atr)
|
78
|
-
end
|
79
117
|
|
80
|
-
|
118
|
+
res.each do |r|
|
119
|
+
out.push(full_tag ? tag : r) if evaluate_comp(r, atr)
|
120
|
+
end
|
81
121
|
end
|
82
122
|
else
|
83
123
|
out = res
|
84
124
|
end
|
85
125
|
|
86
|
-
out = out
|
126
|
+
out = out.get_value(pth) unless pth.nil?
|
127
|
+
|
128
|
+
if el.nil? && out.is_a?(Array) && out[0].is_a?(Hash)
|
129
|
+
out.map! { |o|
|
130
|
+
o.stringify_keys
|
131
|
+
# o.key?(pth) ? o[pth] : o
|
132
|
+
}
|
133
|
+
elsif out.is_a?(Array) && el =~ /^[\d.,]+$/
|
134
|
+
out = out[eval(el)]
|
135
|
+
end
|
136
|
+
res = out
|
87
137
|
end
|
88
138
|
|
139
|
+
out = out[0] if out&.count == 1
|
89
140
|
out
|
90
141
|
end
|
91
142
|
|
@@ -112,6 +163,9 @@ class ::Hash
|
|
112
163
|
else
|
113
164
|
a[2]
|
114
165
|
end
|
166
|
+
r = r.get_value(key.to_s) if key.to_s =~ /\./
|
167
|
+
|
168
|
+
return r.key?(key) && !r[key].nil? && !r[key].empty? if val.nil?
|
115
169
|
|
116
170
|
if !r.key?(key)
|
117
171
|
keep = false
|
@@ -251,4 +305,8 @@ class ::Hash
|
|
251
305
|
hsh[k.to_s] = v.is_a?(Hash) ? v.stringify_keys : v
|
252
306
|
end
|
253
307
|
end
|
308
|
+
|
309
|
+
def stringify_keys!
|
310
|
+
replace stringify_keys
|
311
|
+
end
|
254
312
|
end
|
data/lib/curly/version.rb
CHANGED
data/src/_README.md
CHANGED
@@ -10,7 +10,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
|
|
10
10
|
[donate]: https://brettterpstra.com/donate
|
11
11
|
<!--END GITHUB-->
|
12
12
|
|
13
|
-
The current version of `curlyq` is <!--VER-->0.0.
|
13
|
+
The current version of `curlyq` is <!--VER-->0.0.4<!--END VER-->.
|
14
14
|
|
15
15
|
CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
|
16
16
|
|
data/test/curlyq_extract_test.rb
CHANGED
data/test/curlyq_html_test.rb
CHANGED
@@ -12,9 +12,9 @@ class CurlyQHtmlTest < Test::Unit::TestCase
|
|
12
12
|
|
13
13
|
def test_html_search_query
|
14
14
|
result = curlyq('html', '-s', '#main article .aligncenter', '-q', 'images[1]', 'https://brettterpstra.com')
|
15
|
-
json = JSON.parse(result)
|
15
|
+
json = JSON.parse(result)
|
16
16
|
|
17
|
-
assert_match(/aligncenter/, json[
|
17
|
+
assert_match(/aligncenter/, json['class'], 'Should have found an image with class "aligncenter"')
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_html_query
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: curlyq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Terpstra
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|