curlyq 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +41 -0
- data/LICENSE.txt +19 -0
- data/README.md +233 -0
- data/README.rdoc +6 -0
- data/Rakefile +77 -0
- data/bin/curlyq +477 -0
- data/curlyq.gemspec +27 -0
- data/curlyq.rdoc +355 -0
- data/lib/curly/array.rb +134 -0
- data/lib/curly/curl/html.rb +720 -0
- data/lib/curly/curl/json.rb +108 -0
- data/lib/curly/curl.rb +7 -0
- data/lib/curly/hash.rb +200 -0
- data/lib/curly/string.rb +91 -0
- data/lib/curly/version.rb +3 -0
- data/lib/curly.rb +12 -0
- data/src/_README.md +101 -0
- data/test/default_test.rb +14 -0
- data/test/test_helper.rb +4 -0
- metadata +191 -0
data/curlyq.rdoc
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
== curlyq - A scriptable interface to curl
|
2
|
+
|
3
|
+
v0.0.1
|
4
|
+
|
5
|
+
=== Global Options
|
6
|
+
=== --help
|
7
|
+
Show this message
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
=== --[no-]pretty
|
12
|
+
Output "pretty" JSON
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
=== --version
|
17
|
+
Display the program version
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
=== -y|--[no-]yaml
|
22
|
+
Output YAML instead of json
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
=== Commands
|
27
|
+
==== Command: <tt>extract URL...</tt>
|
28
|
+
Extract contents between two regular expressions
|
29
|
+
|
30
|
+
|
31
|
+
===== Options
|
32
|
+
===== -a|--after arg
|
33
|
+
|
34
|
+
Text after extraction, parsed as regex
|
35
|
+
|
36
|
+
[Default Value] None
|
37
|
+
|
38
|
+
|
39
|
+
===== -b|--before arg
|
40
|
+
|
41
|
+
Text before extraction, parsed as regex
|
42
|
+
|
43
|
+
[Default Value] None
|
44
|
+
|
45
|
+
|
46
|
+
===== -h|--header arg
|
47
|
+
|
48
|
+
Define a header to send as key=value
|
49
|
+
|
50
|
+
[Default Value] None
|
51
|
+
|
52
|
+
|
53
|
+
===== -c|--[no-]compressed
|
54
|
+
Expect compressed results
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
===== --[no-]clean
|
59
|
+
Remove extra whitespace from results
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
===== --[no-]strip
|
64
|
+
Strip HTML tags from results
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
==== Command: <tt>headlinks URL...</tt>
|
69
|
+
Return all <head> links on URL's page
|
70
|
+
|
71
|
+
|
72
|
+
===== Options
|
73
|
+
===== -q|--query|--filter arg
|
74
|
+
|
75
|
+
Filter output using dot-syntax path
|
76
|
+
|
77
|
+
[Default Value] None
|
78
|
+
|
79
|
+
|
80
|
+
==== Command: <tt>help command</tt>
|
81
|
+
Shows a list of commands or help for one command
|
82
|
+
|
83
|
+
Gets help for the application or its commands. Can also list the commands in a way helpful to creating a bash-style completion function
|
84
|
+
===== Options
|
85
|
+
===== -c
|
86
|
+
List commands one per line, to assist with shell completion
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
==== Command: <tt>html|curl URL...</tt>
|
91
|
+
Curl URL and output its elements, multiple URLs allowed
|
92
|
+
|
93
|
+
|
94
|
+
===== Options
|
95
|
+
===== -b|--browser arg
|
96
|
+
|
97
|
+
Use a browser to retrieve a dynamic web page (firefox, chrome)
|
98
|
+
|
99
|
+
[Default Value] None
|
100
|
+
[Must Match] (?-mix:^[fc].*?$)
|
101
|
+
|
102
|
+
|
103
|
+
===== -f|--fallback arg
|
104
|
+
|
105
|
+
If curl doesn't work, use a fallback browser (firefox, chrome)
|
106
|
+
|
107
|
+
[Default Value] None
|
108
|
+
[Must Match] (?-mix:^[fc].*?$)
|
109
|
+
|
110
|
+
|
111
|
+
===== -h|--header arg
|
112
|
+
|
113
|
+
Define a header to send as "key=value"
|
114
|
+
|
115
|
+
[Default Value] None
|
116
|
+
|
117
|
+
|
118
|
+
===== -q|--query|--filter arg
|
119
|
+
|
120
|
+
Filter output using dot-syntax path
|
121
|
+
|
122
|
+
[Default Value] None
|
123
|
+
|
124
|
+
|
125
|
+
===== -r|--raw arg
|
126
|
+
|
127
|
+
Output a raw value for a key
|
128
|
+
|
129
|
+
[Default Value] None
|
130
|
+
|
131
|
+
|
132
|
+
===== --search arg
|
133
|
+
|
134
|
+
Regurn an array of matches to a CSS or XPath query
|
135
|
+
|
136
|
+
[Default Value] None
|
137
|
+
|
138
|
+
|
139
|
+
===== -I|--info
|
140
|
+
Only retrieve headers/info
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
===== -c|--compressed
|
145
|
+
Expect compressed results
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
===== --[no-]clean
|
150
|
+
Remove extra whitespace from results
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
===== --[no-]ignore_fragments
|
155
|
+
Ignore fragment hrefs when gathering content links
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
===== --[no-]ignore_relative
|
160
|
+
Ignore relative hrefs when gathering content links
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
===== -x|--external_links_only
|
165
|
+
Only gather external links
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
==== Command: <tt>images URL...</tt>
|
170
|
+
Extract all images from a URL
|
171
|
+
|
172
|
+
|
173
|
+
===== Options
|
174
|
+
===== -t|--type arg
|
175
|
+
|
176
|
+
Type of images to return (img, srcset, opengraph, all)
|
177
|
+
|
178
|
+
[Default Value] ["all"]
|
179
|
+
|
180
|
+
|
181
|
+
===== -c|--[no-]compressed
|
182
|
+
Expect compressed results
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
===== --[no-]clean
|
187
|
+
Remove extra whitespace from results
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
==== Command: <tt>json URL...</tt>
|
192
|
+
Get a JSON response from a URL, multiple URLs allowed
|
193
|
+
|
194
|
+
|
195
|
+
===== Options
|
196
|
+
===== -h|--header arg
|
197
|
+
|
198
|
+
Define a header to send as key=value
|
199
|
+
|
200
|
+
[Default Value] None
|
201
|
+
|
202
|
+
|
203
|
+
===== -q|--query|--filter arg
|
204
|
+
|
205
|
+
Filter output using dot-syntax path
|
206
|
+
|
207
|
+
[Default Value] None
|
208
|
+
|
209
|
+
|
210
|
+
===== -c|--[no-]compressed
|
211
|
+
Expect compressed results
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
==== Command: <tt>links URL...</tt>
|
216
|
+
Return all links on a URL's page
|
217
|
+
|
218
|
+
|
219
|
+
===== Options
|
220
|
+
===== -q|--query|--filter arg
|
221
|
+
|
222
|
+
Filter output using dot-syntax path
|
223
|
+
|
224
|
+
[Default Value] None
|
225
|
+
|
226
|
+
|
227
|
+
===== -d|--[no-]dedup
|
228
|
+
Filter out duplicate links, preserving only first one
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
===== --[no-]ignore_fragments
|
233
|
+
Ignore fragment hrefs when gathering content links
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
===== --[no-]ignore_relative
|
238
|
+
Ignore relative hrefs when gathering content links
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
===== -x|--external_links_only
|
243
|
+
Only gather external links
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
==== Command: <tt>scrape URL...</tt>
|
248
|
+
Scrape a page using a web browser, for dynamic (JS) pages. Be sure to have the selected --browser installed.
|
249
|
+
|
250
|
+
|
251
|
+
===== Options
|
252
|
+
===== -b|--browser arg
|
253
|
+
|
254
|
+
Browser to use (firefox, chrome)
|
255
|
+
|
256
|
+
[Default Value] None
|
257
|
+
|
258
|
+
|
259
|
+
===== -h|--header arg
|
260
|
+
|
261
|
+
Define a header to send as "key=value"
|
262
|
+
|
263
|
+
[Default Value] None
|
264
|
+
|
265
|
+
|
266
|
+
===== -q|--query|--filter arg
|
267
|
+
|
268
|
+
Filter output using dot-syntax path
|
269
|
+
|
270
|
+
[Default Value] None
|
271
|
+
|
272
|
+
|
273
|
+
===== -r|--raw arg
|
274
|
+
|
275
|
+
Output a raw value for a key
|
276
|
+
|
277
|
+
[Default Value] None
|
278
|
+
|
279
|
+
|
280
|
+
===== --search arg
|
281
|
+
|
282
|
+
Regurn an array of matches to a CSS or XPath query
|
283
|
+
|
284
|
+
[Default Value] None
|
285
|
+
|
286
|
+
|
287
|
+
===== --[no-]clean
|
288
|
+
Remove extra whitespace from results
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
==== Command: <tt>screenshot URL...</tt>
|
293
|
+
Save a screenshot of the URL
|
294
|
+
|
295
|
+
|
296
|
+
===== Options
|
297
|
+
===== -b|--browser arg
|
298
|
+
|
299
|
+
Browser to use (firefox, chrome)
|
300
|
+
|
301
|
+
[Default Value] chrome
|
302
|
+
[Must Match] (?-mix:^[fc].*?$)
|
303
|
+
|
304
|
+
|
305
|
+
===== -o|--out|--file arg
|
306
|
+
|
307
|
+
File destination
|
308
|
+
|
309
|
+
[Default Value] None
|
310
|
+
|
311
|
+
|
312
|
+
===== -t|--type arg
|
313
|
+
|
314
|
+
Type of screenshot to save (full (requires firefox), print, visible)
|
315
|
+
|
316
|
+
[Default Value] full
|
317
|
+
[Must Match] (?-mix:^[fpv].*?$)
|
318
|
+
|
319
|
+
|
320
|
+
==== Command: <tt>tags URL...</tt>
|
321
|
+
Extract all instances of a tag
|
322
|
+
|
323
|
+
|
324
|
+
===== Options
|
325
|
+
===== -h|--header arg
|
326
|
+
|
327
|
+
Define a header to send as key=value
|
328
|
+
|
329
|
+
[Default Value] None
|
330
|
+
|
331
|
+
|
332
|
+
===== -q|--query|--search arg
|
333
|
+
|
334
|
+
CSS/XPath query
|
335
|
+
|
336
|
+
[Default Value] None
|
337
|
+
|
338
|
+
|
339
|
+
===== -t|--tag arg
|
340
|
+
|
341
|
+
Specify a tag to collect
|
342
|
+
|
343
|
+
[Default Value] None
|
344
|
+
|
345
|
+
|
346
|
+
===== -c|--[no-]compressed
|
347
|
+
Expect compressed results
|
348
|
+
|
349
|
+
|
350
|
+
|
351
|
+
===== --[no-]clean
|
352
|
+
Remove extra whitespace from results
|
353
|
+
|
354
|
+
|
355
|
+
|
data/lib/curly/array.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Array helpers
|
4
|
+
class ::Array
|
5
|
+
##
|
6
|
+
## Remove extra spaces from each element of an array of
|
7
|
+
## strings
|
8
|
+
##
|
9
|
+
## @return [Array] cleaned array
|
10
|
+
##
|
11
|
+
def clean
|
12
|
+
map(&:clean)
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
## @see #clean
|
17
|
+
##
|
18
|
+
def clean!
|
19
|
+
replace clean
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
## Strip HTML tags from each element of an array of
|
24
|
+
## strings
|
25
|
+
##
|
26
|
+
## @return [Array] array of strings with HTML tags removed
|
27
|
+
##
|
28
|
+
def strip_tags
|
29
|
+
map(&:strip_tags)
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
## Destructive version of #strip_tags
|
34
|
+
##
|
35
|
+
## @see #strip_tags
|
36
|
+
##
|
37
|
+
def strip_tags!
|
38
|
+
replace strip_tags
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
## Remove duplicate links from an array of link objects
|
43
|
+
##
|
44
|
+
## @return [Array] deduped array of link objects
|
45
|
+
##
|
46
|
+
def dedup_links
|
47
|
+
used = []
|
48
|
+
good = []
|
49
|
+
each do |link|
|
50
|
+
href = link[:href].sub(%r{/$}, '')
|
51
|
+
next if used.include?(href)
|
52
|
+
|
53
|
+
used.push(href)
|
54
|
+
good.push(link)
|
55
|
+
end
|
56
|
+
|
57
|
+
good
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
## Destructive version of #dedup_links
|
62
|
+
##
|
63
|
+
## @see #dedup_links
|
64
|
+
##
|
65
|
+
def dedup_links!
|
66
|
+
replace dedup_links
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
## Convert and execute a dot-syntax query on the array
|
71
|
+
##
|
72
|
+
## @param path [String] The dot-syntax path
|
73
|
+
##
|
74
|
+
## @return [Array] Matching elements
|
75
|
+
##
|
76
|
+
def dot_query(path)
|
77
|
+
output = []
|
78
|
+
if path =~ /^\[([\d+.])\]\.?/
|
79
|
+
int = Regexp.last_match(1)
|
80
|
+
path.sub!(/^\[[\d.]+\]\.?/, '')
|
81
|
+
items = self[eval(int)]
|
82
|
+
else
|
83
|
+
items = self
|
84
|
+
end
|
85
|
+
|
86
|
+
if items.is_a? Hash
|
87
|
+
output = items.dot_query(path)
|
88
|
+
else
|
89
|
+
items.each do |item|
|
90
|
+
res = item.is_a?(Hash) ? item.stringify_keys : item
|
91
|
+
out = []
|
92
|
+
q = path.split(/(?<![\d.])\./)
|
93
|
+
q.each do |pth|
|
94
|
+
el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
|
95
|
+
pth.sub!(/\[([0-9,.]+)\]/, '')
|
96
|
+
ats = []
|
97
|
+
at = []
|
98
|
+
while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
|
99
|
+
m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+) */)
|
100
|
+
comp = [m['key'], m['op'], m['val']]
|
101
|
+
case m['com']
|
102
|
+
when ','
|
103
|
+
ats.push(comp)
|
104
|
+
at = []
|
105
|
+
else
|
106
|
+
at.push(comp)
|
107
|
+
end
|
108
|
+
|
109
|
+
pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+)/, '[')
|
110
|
+
end
|
111
|
+
ats.push(at) unless at.empty?
|
112
|
+
pth.sub!(/\[\]/, '')
|
113
|
+
|
114
|
+
return false if el.nil? && ats.empty? && !res.key?(pth)
|
115
|
+
|
116
|
+
res = res[pth] unless pth.empty?
|
117
|
+
|
118
|
+
while ats.count.positive?
|
119
|
+
atr = ats.shift
|
120
|
+
|
121
|
+
keepers = res.filter do |r|
|
122
|
+
evaluate_comp(r, atr)
|
123
|
+
end
|
124
|
+
out.concat(keepers)
|
125
|
+
end
|
126
|
+
|
127
|
+
out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
|
128
|
+
end
|
129
|
+
output.push(out)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
output
|
133
|
+
end
|
134
|
+
end
|