curlyq 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/curlyq.rdoc ADDED
@@ -0,0 +1,355 @@
1
+ == curlyq - A scriptable interface to curl
2
+
3
+ v0.0.1
4
+
5
+ === Global Options
6
+ === --help
7
+ Show this message
8
+
9
+
10
+
11
+ === --[no-]pretty
12
+ Output "pretty" JSON
13
+
14
+
15
+
16
+ === --version
17
+ Display the program version
18
+
19
+
20
+
21
+ === -y|--[no-]yaml
22
+ Output YAML instead of json
23
+
24
+
25
+
26
+ === Commands
27
+ ==== Command: <tt>extract URL...</tt>
28
+ Extract contents between two regular expressions
29
+
30
+
31
+ ===== Options
32
+ ===== -a|--after arg
33
+
34
+ Text after extraction, parsed as regex
35
+
36
+ [Default Value] None
37
+
38
+
39
+ ===== -b|--before arg
40
+
41
+ Text before extraction, parsed as regex
42
+
43
+ [Default Value] None
44
+
45
+
46
+ ===== -h|--header arg
47
+
48
+ Define a header to send as key=value
49
+
50
+ [Default Value] None
51
+
52
+
53
+ ===== -c|--[no-]compressed
54
+ Expect compressed results
55
+
56
+
57
+
58
+ ===== --[no-]clean
59
+ Remove extra whitespace from results
60
+
61
+
62
+
63
+ ===== --[no-]strip
64
+ Strip HTML tags from results
65
+
66
+
67
+
68
+ ==== Command: <tt>headlinks URL...</tt>
69
+ Return all <head> links on URL's page
70
+
71
+
72
+ ===== Options
73
+ ===== -q|--query|--filter arg
74
+
75
+ Filter output using dot-syntax path
76
+
77
+ [Default Value] None
78
+
79
+
80
+ ==== Command: <tt>help command</tt>
81
+ Shows a list of commands or help for one command
82
+
83
+ Gets help for the application or its commands. Can also list the commands in a way helpful to creating a bash-style completion function
84
+ ===== Options
85
+ ===== -c
86
+ List commands one per line, to assist with shell completion
87
+
88
+
89
+
90
+ ==== Command: <tt>html|curl URL...</tt>
91
+ Curl URL and output its elements, multiple URLs allowed
92
+
93
+
94
+ ===== Options
95
+ ===== -b|--browser arg
96
+
97
+ Use a browser to retrieve a dynamic web page (firefox, chrome)
98
+
99
+ [Default Value] None
100
+ [Must Match] (?-mix:^[fc].*?$)
101
+
102
+
103
+ ===== -f|--fallback arg
104
+
105
+ If curl doesn't work, use a fallback browser (firefox, chrome)
106
+
107
+ [Default Value] None
108
+ [Must Match] (?-mix:^[fc].*?$)
109
+
110
+
111
+ ===== -h|--header arg
112
+
113
+ Define a header to send as "key=value"
114
+
115
+ [Default Value] None
116
+
117
+
118
+ ===== -q|--query|--filter arg
119
+
120
+ Filter output using dot-syntax path
121
+
122
+ [Default Value] None
123
+
124
+
125
+ ===== -r|--raw arg
126
+
127
+ Output a raw value for a key
128
+
129
+ [Default Value] None
130
+
131
+
132
+ ===== --search arg
133
+
134
+ Regurn an array of matches to a CSS or XPath query
135
+
136
+ [Default Value] None
137
+
138
+
139
+ ===== -I|--info
140
+ Only retrieve headers/info
141
+
142
+
143
+
144
+ ===== -c|--compressed
145
+ Expect compressed results
146
+
147
+
148
+
149
+ ===== --[no-]clean
150
+ Remove extra whitespace from results
151
+
152
+
153
+
154
+ ===== --[no-]ignore_fragments
155
+ Ignore fragment hrefs when gathering content links
156
+
157
+
158
+
159
+ ===== --[no-]ignore_relative
160
+ Ignore relative hrefs when gathering content links
161
+
162
+
163
+
164
+ ===== -x|--external_links_only
165
+ Only gather external links
166
+
167
+
168
+
169
+ ==== Command: <tt>images URL...</tt>
170
+ Extract all images from a URL
171
+
172
+
173
+ ===== Options
174
+ ===== -t|--type arg
175
+
176
+ Type of images to return (img, srcset, opengraph, all)
177
+
178
+ [Default Value] ["all"]
179
+
180
+
181
+ ===== -c|--[no-]compressed
182
+ Expect compressed results
183
+
184
+
185
+
186
+ ===== --[no-]clean
187
+ Remove extra whitespace from results
188
+
189
+
190
+
191
+ ==== Command: <tt>json URL...</tt>
192
+ Get a JSON response from a URL, multiple URLs allowed
193
+
194
+
195
+ ===== Options
196
+ ===== -h|--header arg
197
+
198
+ Define a header to send as key=value
199
+
200
+ [Default Value] None
201
+
202
+
203
+ ===== -q|--query|--filter arg
204
+
205
+ Filter output using dot-syntax path
206
+
207
+ [Default Value] None
208
+
209
+
210
+ ===== -c|--[no-]compressed
211
+ Expect compressed results
212
+
213
+
214
+
215
+ ==== Command: <tt>links URL...</tt>
216
+ Return all links on a URL's page
217
+
218
+
219
+ ===== Options
220
+ ===== -q|--query|--filter arg
221
+
222
+ Filter output using dot-syntax path
223
+
224
+ [Default Value] None
225
+
226
+
227
+ ===== -d|--[no-]dedup
228
+ Filter out duplicate links, preserving only first one
229
+
230
+
231
+
232
+ ===== --[no-]ignore_fragments
233
+ Ignore fragment hrefs when gathering content links
234
+
235
+
236
+
237
+ ===== --[no-]ignore_relative
238
+ Ignore relative hrefs when gathering content links
239
+
240
+
241
+
242
+ ===== -x|--external_links_only
243
+ Only gather external links
244
+
245
+
246
+
247
+ ==== Command: <tt>scrape URL...</tt>
248
+ Scrape a page using a web browser, for dynamic (JS) pages. Be sure to have the selected --browser installed.
249
+
250
+
251
+ ===== Options
252
+ ===== -b|--browser arg
253
+
254
+ Browser to use (firefox, chrome)
255
+
256
+ [Default Value] None
257
+
258
+
259
+ ===== -h|--header arg
260
+
261
+ Define a header to send as "key=value"
262
+
263
+ [Default Value] None
264
+
265
+
266
+ ===== -q|--query|--filter arg
267
+
268
+ Filter output using dot-syntax path
269
+
270
+ [Default Value] None
271
+
272
+
273
+ ===== -r|--raw arg
274
+
275
+ Output a raw value for a key
276
+
277
+ [Default Value] None
278
+
279
+
280
+ ===== --search arg
281
+
282
+ Regurn an array of matches to a CSS or XPath query
283
+
284
+ [Default Value] None
285
+
286
+
287
+ ===== --[no-]clean
288
+ Remove extra whitespace from results
289
+
290
+
291
+
292
+ ==== Command: <tt>screenshot URL...</tt>
293
+ Save a screenshot of the URL
294
+
295
+
296
+ ===== Options
297
+ ===== -b|--browser arg
298
+
299
+ Browser to use (firefox, chrome)
300
+
301
+ [Default Value] chrome
302
+ [Must Match] (?-mix:^[fc].*?$)
303
+
304
+
305
+ ===== -o|--out|--file arg
306
+
307
+ File destination
308
+
309
+ [Default Value] None
310
+
311
+
312
+ ===== -t|--type arg
313
+
314
+ Type of screenshot to save (full (requires firefox), print, visible)
315
+
316
+ [Default Value] full
317
+ [Must Match] (?-mix:^[fpv].*?$)
318
+
319
+
320
+ ==== Command: <tt>tags URL...</tt>
321
+ Extract all instances of a tag
322
+
323
+
324
+ ===== Options
325
+ ===== -h|--header arg
326
+
327
+ Define a header to send as key=value
328
+
329
+ [Default Value] None
330
+
331
+
332
+ ===== -q|--query|--search arg
333
+
334
+ CSS/XPath query
335
+
336
+ [Default Value] None
337
+
338
+
339
+ ===== -t|--tag arg
340
+
341
+ Specify a tag to collect
342
+
343
+ [Default Value] None
344
+
345
+
346
+ ===== -c|--[no-]compressed
347
+ Expect compressed results
348
+
349
+
350
+
351
+ ===== --[no-]clean
352
+ Remove extra whitespace from results
353
+
354
+
355
+
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Array helpers
4
+ class ::Array
5
+ ##
6
+ ## Remove extra spaces from each element of an array of
7
+ ## strings
8
+ ##
9
+ ## @return [Array] cleaned array
10
+ ##
11
+ def clean
12
+ map(&:clean)
13
+ end
14
+
15
+ ##
16
+ ## @see #clean
17
+ ##
18
+ def clean!
19
+ replace clean
20
+ end
21
+
22
+ ##
23
+ ## Strip HTML tags from each element of an array of
24
+ ## strings
25
+ ##
26
+ ## @return [Array] array of strings with HTML tags removed
27
+ ##
28
+ def strip_tags
29
+ map(&:strip_tags)
30
+ end
31
+
32
+ ##
33
+ ## Destructive version of #strip_tags
34
+ ##
35
+ ## @see #strip_tags
36
+ ##
37
+ def strip_tags!
38
+ replace strip_tags
39
+ end
40
+
41
+ ##
42
+ ## Remove duplicate links from an array of link objects
43
+ ##
44
+ ## @return [Array] deduped array of link objects
45
+ ##
46
+ def dedup_links
47
+ used = []
48
+ good = []
49
+ each do |link|
50
+ href = link[:href].sub(%r{/$}, '')
51
+ next if used.include?(href)
52
+
53
+ used.push(href)
54
+ good.push(link)
55
+ end
56
+
57
+ good
58
+ end
59
+
60
+ ##
61
+ ## Destructive version of #dedup_links
62
+ ##
63
+ ## @see #dedup_links
64
+ ##
65
+ def dedup_links!
66
+ replace dedup_links
67
+ end
68
+
69
+ ##
70
+ ## Convert and execute a dot-syntax query on the array
71
+ ##
72
+ ## @param path [String] The dot-syntax path
73
+ ##
74
+ ## @return [Array] Matching elements
75
+ ##
76
+ def dot_query(path)
77
+ output = []
78
+ if path =~ /^\[([\d+.])\]\.?/
79
+ int = Regexp.last_match(1)
80
+ path.sub!(/^\[[\d.]+\]\.?/, '')
81
+ items = self[eval(int)]
82
+ else
83
+ items = self
84
+ end
85
+
86
+ if items.is_a? Hash
87
+ output = items.dot_query(path)
88
+ else
89
+ items.each do |item|
90
+ res = item.is_a?(Hash) ? item.stringify_keys : item
91
+ out = []
92
+ q = path.split(/(?<![\d.])\./)
93
+ q.each do |pth|
94
+ el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
95
+ pth.sub!(/\[([0-9,.]+)\]/, '')
96
+ ats = []
97
+ at = []
98
+ while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
99
+ m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+) */)
100
+ comp = [m['key'], m['op'], m['val']]
101
+ case m['com']
102
+ when ','
103
+ ats.push(comp)
104
+ at = []
105
+ else
106
+ at.push(comp)
107
+ end
108
+
109
+ pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+)/, '[')
110
+ end
111
+ ats.push(at) unless at.empty?
112
+ pth.sub!(/\[\]/, '')
113
+
114
+ return false if el.nil? && ats.empty? && !res.key?(pth)
115
+
116
+ res = res[pth] unless pth.empty?
117
+
118
+ while ats.count.positive?
119
+ atr = ats.shift
120
+
121
+ keepers = res.filter do |r|
122
+ evaluate_comp(r, atr)
123
+ end
124
+ out.concat(keepers)
125
+ end
126
+
127
+ out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
128
+ end
129
+ output.push(out)
130
+ end
131
+ end
132
+ output
133
+ end
134
+ end