curlyq 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +41 -0
- data/LICENSE.txt +19 -0
- data/README.md +233 -0
- data/README.rdoc +6 -0
- data/Rakefile +77 -0
- data/bin/curlyq +477 -0
- data/curlyq.gemspec +27 -0
- data/curlyq.rdoc +355 -0
- data/lib/curly/array.rb +134 -0
- data/lib/curly/curl/html.rb +720 -0
- data/lib/curly/curl/json.rb +108 -0
- data/lib/curly/curl.rb +7 -0
- data/lib/curly/hash.rb +200 -0
- data/lib/curly/string.rb +91 -0
- data/lib/curly/version.rb +3 -0
- data/lib/curly.rb +12 -0
- data/src/_README.md +101 -0
- data/test/default_test.rb +14 -0
- data/test/test_helper.rb +4 -0
- metadata +191 -0
data/curlyq.rdoc
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
== curlyq - A scriptable interface to curl
|
2
|
+
|
3
|
+
v0.0.1
|
4
|
+
|
5
|
+
=== Global Options
|
6
|
+
=== --help
|
7
|
+
Show this message
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
=== --[no-]pretty
|
12
|
+
Output "pretty" JSON
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
=== --version
|
17
|
+
Display the program version
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
=== -y|--[no-]yaml
|
22
|
+
Output YAML instead of json
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
=== Commands
|
27
|
+
==== Command: <tt>extract URL...</tt>
|
28
|
+
Extract contents between two regular expressions
|
29
|
+
|
30
|
+
|
31
|
+
===== Options
|
32
|
+
===== -a|--after arg
|
33
|
+
|
34
|
+
Text after extraction, parsed as regex
|
35
|
+
|
36
|
+
[Default Value] None
|
37
|
+
|
38
|
+
|
39
|
+
===== -b|--before arg
|
40
|
+
|
41
|
+
Text before extraction, parsed as regex
|
42
|
+
|
43
|
+
[Default Value] None
|
44
|
+
|
45
|
+
|
46
|
+
===== -h|--header arg
|
47
|
+
|
48
|
+
Define a header to send as key=value
|
49
|
+
|
50
|
+
[Default Value] None
|
51
|
+
|
52
|
+
|
53
|
+
===== -c|--[no-]compressed
|
54
|
+
Expect compressed results
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
===== --[no-]clean
|
59
|
+
Remove extra whitespace from results
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
===== --[no-]strip
|
64
|
+
Strip HTML tags from results
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
==== Command: <tt>headlinks URL...</tt>
|
69
|
+
Return all <head> links on URL's page
|
70
|
+
|
71
|
+
|
72
|
+
===== Options
|
73
|
+
===== -q|--query|--filter arg
|
74
|
+
|
75
|
+
Filter output using dot-syntax path
|
76
|
+
|
77
|
+
[Default Value] None
|
78
|
+
|
79
|
+
|
80
|
+
==== Command: <tt>help command</tt>
|
81
|
+
Shows a list of commands or help for one command
|
82
|
+
|
83
|
+
Gets help for the application or its commands. Can also list the commands in a way helpful to creating a bash-style completion function
|
84
|
+
===== Options
|
85
|
+
===== -c
|
86
|
+
List commands one per line, to assist with shell completion
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
==== Command: <tt>html|curl URL...</tt>
|
91
|
+
Curl URL and output its elements, multiple URLs allowed
|
92
|
+
|
93
|
+
|
94
|
+
===== Options
|
95
|
+
===== -b|--browser arg
|
96
|
+
|
97
|
+
Use a browser to retrieve a dynamic web page (firefox, chrome)
|
98
|
+
|
99
|
+
[Default Value] None
|
100
|
+
[Must Match] (?-mix:^[fc].*?$)
|
101
|
+
|
102
|
+
|
103
|
+
===== -f|--fallback arg
|
104
|
+
|
105
|
+
If curl doesn't work, use a fallback browser (firefox, chrome)
|
106
|
+
|
107
|
+
[Default Value] None
|
108
|
+
[Must Match] (?-mix:^[fc].*?$)
|
109
|
+
|
110
|
+
|
111
|
+
===== -h|--header arg
|
112
|
+
|
113
|
+
Define a header to send as "key=value"
|
114
|
+
|
115
|
+
[Default Value] None
|
116
|
+
|
117
|
+
|
118
|
+
===== -q|--query|--filter arg
|
119
|
+
|
120
|
+
Filter output using dot-syntax path
|
121
|
+
|
122
|
+
[Default Value] None
|
123
|
+
|
124
|
+
|
125
|
+
===== -r|--raw arg
|
126
|
+
|
127
|
+
Output a raw value for a key
|
128
|
+
|
129
|
+
[Default Value] None
|
130
|
+
|
131
|
+
|
132
|
+
===== --search arg
|
133
|
+
|
134
|
+
Regurn an array of matches to a CSS or XPath query
|
135
|
+
|
136
|
+
[Default Value] None
|
137
|
+
|
138
|
+
|
139
|
+
===== -I|--info
|
140
|
+
Only retrieve headers/info
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
===== -c|--compressed
|
145
|
+
Expect compressed results
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
===== --[no-]clean
|
150
|
+
Remove extra whitespace from results
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
===== --[no-]ignore_fragments
|
155
|
+
Ignore fragment hrefs when gathering content links
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
===== --[no-]ignore_relative
|
160
|
+
Ignore relative hrefs when gathering content links
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
===== -x|--external_links_only
|
165
|
+
Only gather external links
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
==== Command: <tt>images URL...</tt>
|
170
|
+
Extract all images from a URL
|
171
|
+
|
172
|
+
|
173
|
+
===== Options
|
174
|
+
===== -t|--type arg
|
175
|
+
|
176
|
+
Type of images to return (img, srcset, opengraph, all)
|
177
|
+
|
178
|
+
[Default Value] ["all"]
|
179
|
+
|
180
|
+
|
181
|
+
===== -c|--[no-]compressed
|
182
|
+
Expect compressed results
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
===== --[no-]clean
|
187
|
+
Remove extra whitespace from results
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
==== Command: <tt>json URL...</tt>
|
192
|
+
Get a JSON response from a URL, multiple URLs allowed
|
193
|
+
|
194
|
+
|
195
|
+
===== Options
|
196
|
+
===== -h|--header arg
|
197
|
+
|
198
|
+
Define a header to send as key=value
|
199
|
+
|
200
|
+
[Default Value] None
|
201
|
+
|
202
|
+
|
203
|
+
===== -q|--query|--filter arg
|
204
|
+
|
205
|
+
Filter output using dot-syntax path
|
206
|
+
|
207
|
+
[Default Value] None
|
208
|
+
|
209
|
+
|
210
|
+
===== -c|--[no-]compressed
|
211
|
+
Expect compressed results
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
==== Command: <tt>links URL...</tt>
|
216
|
+
Return all links on a URL's page
|
217
|
+
|
218
|
+
|
219
|
+
===== Options
|
220
|
+
===== -q|--query|--filter arg
|
221
|
+
|
222
|
+
Filter output using dot-syntax path
|
223
|
+
|
224
|
+
[Default Value] None
|
225
|
+
|
226
|
+
|
227
|
+
===== -d|--[no-]dedup
|
228
|
+
Filter out duplicate links, preserving only first one
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
===== --[no-]ignore_fragments
|
233
|
+
Ignore fragment hrefs when gathering content links
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
===== --[no-]ignore_relative
|
238
|
+
Ignore relative hrefs when gathering content links
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
===== -x|--external_links_only
|
243
|
+
Only gather external links
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
==== Command: <tt>scrape URL...</tt>
|
248
|
+
Scrape a page using a web browser, for dynamic (JS) pages. Be sure to have the selected --browser installed.
|
249
|
+
|
250
|
+
|
251
|
+
===== Options
|
252
|
+
===== -b|--browser arg
|
253
|
+
|
254
|
+
Browser to use (firefox, chrome)
|
255
|
+
|
256
|
+
[Default Value] None
|
257
|
+
|
258
|
+
|
259
|
+
===== -h|--header arg
|
260
|
+
|
261
|
+
Define a header to send as "key=value"
|
262
|
+
|
263
|
+
[Default Value] None
|
264
|
+
|
265
|
+
|
266
|
+
===== -q|--query|--filter arg
|
267
|
+
|
268
|
+
Filter output using dot-syntax path
|
269
|
+
|
270
|
+
[Default Value] None
|
271
|
+
|
272
|
+
|
273
|
+
===== -r|--raw arg
|
274
|
+
|
275
|
+
Output a raw value for a key
|
276
|
+
|
277
|
+
[Default Value] None
|
278
|
+
|
279
|
+
|
280
|
+
===== --search arg
|
281
|
+
|
282
|
+
Regurn an array of matches to a CSS or XPath query
|
283
|
+
|
284
|
+
[Default Value] None
|
285
|
+
|
286
|
+
|
287
|
+
===== --[no-]clean
|
288
|
+
Remove extra whitespace from results
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
==== Command: <tt>screenshot URL...</tt>
|
293
|
+
Save a screenshot of the URL
|
294
|
+
|
295
|
+
|
296
|
+
===== Options
|
297
|
+
===== -b|--browser arg
|
298
|
+
|
299
|
+
Browser to use (firefox, chrome)
|
300
|
+
|
301
|
+
[Default Value] chrome
|
302
|
+
[Must Match] (?-mix:^[fc].*?$)
|
303
|
+
|
304
|
+
|
305
|
+
===== -o|--out|--file arg
|
306
|
+
|
307
|
+
File destination
|
308
|
+
|
309
|
+
[Default Value] None
|
310
|
+
|
311
|
+
|
312
|
+
===== -t|--type arg
|
313
|
+
|
314
|
+
Type of screenshot to save (full (requires firefox), print, visible)
|
315
|
+
|
316
|
+
[Default Value] full
|
317
|
+
[Must Match] (?-mix:^[fpv].*?$)
|
318
|
+
|
319
|
+
|
320
|
+
==== Command: <tt>tags URL...</tt>
|
321
|
+
Extract all instances of a tag
|
322
|
+
|
323
|
+
|
324
|
+
===== Options
|
325
|
+
===== -h|--header arg
|
326
|
+
|
327
|
+
Define a header to send as key=value
|
328
|
+
|
329
|
+
[Default Value] None
|
330
|
+
|
331
|
+
|
332
|
+
===== -q|--query|--search arg
|
333
|
+
|
334
|
+
CSS/XPath query
|
335
|
+
|
336
|
+
[Default Value] None
|
337
|
+
|
338
|
+
|
339
|
+
===== -t|--tag arg
|
340
|
+
|
341
|
+
Specify a tag to collect
|
342
|
+
|
343
|
+
[Default Value] None
|
344
|
+
|
345
|
+
|
346
|
+
===== -c|--[no-]compressed
|
347
|
+
Expect compressed results
|
348
|
+
|
349
|
+
|
350
|
+
|
351
|
+
===== --[no-]clean
|
352
|
+
Remove extra whitespace from results
|
353
|
+
|
354
|
+
|
355
|
+
|
data/lib/curly/array.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Array helpers
|
4
|
+
class ::Array
|
5
|
+
##
|
6
|
+
## Remove extra spaces from each element of an array of
|
7
|
+
## strings
|
8
|
+
##
|
9
|
+
## @return [Array] cleaned array
|
10
|
+
##
|
11
|
+
def clean
|
12
|
+
map(&:clean)
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
## @see #clean
|
17
|
+
##
|
18
|
+
def clean!
|
19
|
+
replace clean
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
## Strip HTML tags from each element of an array of
|
24
|
+
## strings
|
25
|
+
##
|
26
|
+
## @return [Array] array of strings with HTML tags removed
|
27
|
+
##
|
28
|
+
def strip_tags
|
29
|
+
map(&:strip_tags)
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
## Destructive version of #strip_tags
|
34
|
+
##
|
35
|
+
## @see #strip_tags
|
36
|
+
##
|
37
|
+
def strip_tags!
|
38
|
+
replace strip_tags
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
## Remove duplicate links from an array of link objects
|
43
|
+
##
|
44
|
+
## @return [Array] deduped array of link objects
|
45
|
+
##
|
46
|
+
def dedup_links
|
47
|
+
used = []
|
48
|
+
good = []
|
49
|
+
each do |link|
|
50
|
+
href = link[:href].sub(%r{/$}, '')
|
51
|
+
next if used.include?(href)
|
52
|
+
|
53
|
+
used.push(href)
|
54
|
+
good.push(link)
|
55
|
+
end
|
56
|
+
|
57
|
+
good
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
## Destructive version of #dedup_links
|
62
|
+
##
|
63
|
+
## @see #dedup_links
|
64
|
+
##
|
65
|
+
def dedup_links!
|
66
|
+
replace dedup_links
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
## Convert and execute a dot-syntax query on the array
|
71
|
+
##
|
72
|
+
## @param path [String] The dot-syntax path
|
73
|
+
##
|
74
|
+
## @return [Array] Matching elements
|
75
|
+
##
|
76
|
+
def dot_query(path)
|
77
|
+
output = []
|
78
|
+
if path =~ /^\[([\d+.])\]\.?/
|
79
|
+
int = Regexp.last_match(1)
|
80
|
+
path.sub!(/^\[[\d.]+\]\.?/, '')
|
81
|
+
items = self[eval(int)]
|
82
|
+
else
|
83
|
+
items = self
|
84
|
+
end
|
85
|
+
|
86
|
+
if items.is_a? Hash
|
87
|
+
output = items.dot_query(path)
|
88
|
+
else
|
89
|
+
items.each do |item|
|
90
|
+
res = item.is_a?(Hash) ? item.stringify_keys : item
|
91
|
+
out = []
|
92
|
+
q = path.split(/(?<![\d.])\./)
|
93
|
+
q.each do |pth|
|
94
|
+
el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
|
95
|
+
pth.sub!(/\[([0-9,.]+)\]/, '')
|
96
|
+
ats = []
|
97
|
+
at = []
|
98
|
+
while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
|
99
|
+
m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+) */)
|
100
|
+
comp = [m['key'], m['op'], m['val']]
|
101
|
+
case m['com']
|
102
|
+
when ','
|
103
|
+
ats.push(comp)
|
104
|
+
at = []
|
105
|
+
else
|
106
|
+
at.push(comp)
|
107
|
+
end
|
108
|
+
|
109
|
+
pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+)/, '[')
|
110
|
+
end
|
111
|
+
ats.push(at) unless at.empty?
|
112
|
+
pth.sub!(/\[\]/, '')
|
113
|
+
|
114
|
+
return false if el.nil? && ats.empty? && !res.key?(pth)
|
115
|
+
|
116
|
+
res = res[pth] unless pth.empty?
|
117
|
+
|
118
|
+
while ats.count.positive?
|
119
|
+
atr = ats.shift
|
120
|
+
|
121
|
+
keepers = res.filter do |r|
|
122
|
+
evaluate_comp(r, atr)
|
123
|
+
end
|
124
|
+
out.concat(keepers)
|
125
|
+
end
|
126
|
+
|
127
|
+
out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
|
128
|
+
end
|
129
|
+
output.push(out)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
output
|
133
|
+
end
|
134
|
+
end
|