curlyq 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/bin/curlyq CHANGED
@@ -71,7 +71,7 @@ command %i[html curl] do |c|
71
71
  c.switch %i[I info], negatable: false
72
72
 
73
73
  c.desc 'Regurn an array of matches to a CSS or XPath query'
74
- c.flag %i[search]
74
+ c.flag %i[s search]
75
75
 
76
76
  c.desc 'Define a header to send as "key=value"'
77
77
  c.flag %i[h header], multiple: true
@@ -110,25 +110,31 @@ command %i[html curl] do |c|
110
110
  output = []
111
111
 
112
112
  urls.each do |url|
113
- res = Curl::Html.new(url, { browser: options[:browser], fallback: options[:fallback],
114
- headers: headers, headers_only: options[:info],
115
- compressed: options[:compressed], clean: options[:clean],
116
- ignore_local_links: options[:ignore_relative],
117
- ignore_fragment_links: options[:ignore_fragments],
118
- external_links_only: options[:external_links_only] })
113
+ curl_settings = { browser: options[:browser], fallback: options[:fallback],
114
+ headers: headers, headers_only: options[:info],
115
+ compressed: options[:compressed], clean: options[:clean],
116
+ ignore_local_links: options[:ignore_relative],
117
+ ignore_fragment_links: options[:ignore_fragments],
118
+ external_links_only: options[:external_links_only] }
119
+ res = Curl::Html.new(url, curl_settings)
119
120
  res.curl
120
121
 
121
122
  if options[:info]
122
123
  output.push(res.headers)
123
- # print_out(res.headers, global_options[:yaml], raw: options[:raw], pretty: global_options[:pretty])
124
124
  next
125
125
  end
126
126
 
127
127
  if options[:search]
128
- out = res.search(options[:search])
128
+ source = res.search(options[:search], return_source: true)
129
129
 
130
- out = out.dot_query(options[:query]) if options[:query]
131
- output.push(out)
130
+ out = res.parse(source)
131
+
132
+ if options[:query]
133
+ out = out.to_data(url: url, clean: options[:clean]).dot_query(options[:query])
134
+ else
135
+ out = out.to_data
136
+ end
137
+ output.push([out])
132
138
  elsif options[:query]
133
139
  queried = res.to_data.dot_query(options[:query])
134
140
  output.push(queried) if queried
@@ -136,7 +142,7 @@ command %i[html curl] do |c|
136
142
  output.push(res.to_data(url: url))
137
143
  end
138
144
  end
139
-
145
+ output.delete_if(&:nil?)
140
146
  output.delete_if(&:empty?)
141
147
  output = output[0] if output.count == 1
142
148
  output.map! { |o| o[options[:raw].to_sym] } if options[:raw]
@@ -149,13 +155,13 @@ desc 'Save a screenshot of a URL'
149
155
  arg_name 'URL', multiple: true
150
156
  command :screenshot do |c|
151
157
  c.desc 'Type of screenshot to save (full (requires firefox), print, visible)'
152
- c.flag %i[t type], type: ScreenshotType, must_match: /^[fpv].*?$/, default_value: 'full'
158
+ c.flag %i[t type], type: ScreenshotType, must_match: /^[fpv].*?$/, default_value: 'visible'
153
159
 
154
160
  c.desc 'Browser to use (firefox, chrome)'
155
161
  c.flag %i[b browser], type: BrowserType, must_match: /^[fc].*?$/, default_value: 'chrome'
156
162
 
157
163
  c.desc 'File destination'
158
- c.flag %i[o out file]
164
+ c.flag %i[o out file], required: true
159
165
 
160
166
  c.desc 'Define a header to send as key=value'
161
167
  c.flag %i[h header], multiple: true
@@ -164,11 +170,19 @@ command :screenshot do |c|
164
170
  urls = args.join(' ').split(/[, ]+/)
165
171
  headers = break_headers(options[:header])
166
172
 
173
+ type = options[:type]
174
+ browser = options[:browser]
175
+
176
+ type = type.is_a?(Symbol) ? type : type.normalize_screenshot_type
177
+ browser = browser.is_a?(Symbol) ? browser : browser.normalize_browser_type
178
+
179
+ raise 'Full page screen shots only available with Firefox' if type == :full_page && browser != :firefox
180
+
167
181
  urls.each do |url|
168
182
  c = Curl::Html.new(url)
169
183
  c.headers = headers
170
- c.browser = options[:browser]
171
- c.screenshot(options[:out], type: options[:type])
184
+ c.browser = browser
185
+ c.screenshot(options[:out], type: type)
172
186
  end
173
187
  end
174
188
  end
@@ -208,12 +222,26 @@ command :json do |c|
208
222
  headers: res.headers
209
223
  })
210
224
  else
211
- json = json.dot_query(options[:query]) if options[:query]
225
+ if options[:query]
226
+ if options[:query] =~ /^json$/
227
+ res = json
228
+ elsif options[:query] =~ /^json\./
229
+ query = options[:query].sub(/^json\./, '')
230
+ else
231
+ query = options[:query]
232
+ end
233
+
234
+ res = json.dot_query(query)
235
+ else
236
+ res = res.to_data
237
+ end
212
238
 
213
- output.push(json)
239
+ output.push(res)
214
240
  end
215
241
  end
216
242
 
243
+ output = output[0] if output.count == 1
244
+
217
245
  print_out(output, global_options[:yaml], pretty: global_options[:pretty])
218
246
  end
219
247
  end
@@ -221,12 +249,18 @@ end
221
249
  desc 'Extract contents between two regular expressions'
222
250
  arg_name 'URL', multiple: true
223
251
  command :extract do |c|
224
- c.desc 'Text before extraction, parsed as regex'
252
+ c.desc 'Text before extraction'
225
253
  c.flag %i[b before]
226
254
 
227
- c.desc 'Text after extraction, parsed as regex'
255
+ c.desc 'Text after extraction'
228
256
  c.flag %i[a after]
229
257
 
258
+ c.desc 'Process before/after strings as regular expressions'
259
+ c.switch %i[r regex]
260
+
261
+ c.desc 'Include the before/after matches in the result'
262
+ c.switch %i[i include]
263
+
230
264
  c.desc 'Define a header to send as key=value'
231
265
  c.flag %i[h header], multiple: true
232
266
 
@@ -249,7 +283,15 @@ command :extract do |c|
249
283
  res = Curl::Html.new(url, { headers: headers, headers_only: false,
250
284
  compressed: options[:compressed], clean: options[:clean] })
251
285
  res.curl
252
- extracted = res.extract(options[:before], options[:after])
286
+ if options[:regex]
287
+ before = Regexp.new(options[:before])
288
+ after = Regexp.new(options[:after])
289
+ else
290
+ before = /#{Regexp.escape(options[:before])}/
291
+ after = /#{Regexp.escape(options[:after])}/
292
+ end
293
+
294
+ extracted = res.extract(before, after, inclusive: options[:include])
253
295
  extracted.strip_tags! if options[:strip]
254
296
  output.concat(extracted)
255
297
  end
@@ -262,10 +304,10 @@ desc 'Extract all instances of a tag'
262
304
  arg_name 'URL', multiple: true
263
305
  command :tags do |c|
264
306
  c.desc 'Define a header to send as key=value'
265
- c.flag %i[h header], multiple: true
307
+ c.flag %i[h header], multiple: true, arg_name: 'KEY=VAL'
266
308
 
267
309
  c.desc 'Specify a tag to collect'
268
- c.flag %i[t tag], multiple: true
310
+ c.flag %i[t tag], multiple: true, arg_name: 'TAG'
269
311
 
270
312
  c.desc 'Expect compressed results'
271
313
  c.switch %i[c compressed]
@@ -273,8 +315,14 @@ command :tags do |c|
273
315
  c.desc 'Remove extra whitespace from results'
274
316
  c.switch %i[clean]
275
317
 
276
- c.desc 'CSS/XPath query'
277
- c.flag %i[q query search]
318
+ c.desc 'Output the HTML source of the results'
319
+ c.switch %i[source html]
320
+
321
+ c.desc 'Dot syntax query to filter results'
322
+ c.flag %i[q query filter], arg_name: 'DOT_SYNTAX'
323
+
324
+ c.desc 'Regurn an array of matches to a CSS or XPath query'
325
+ c.flag %i[search], arg_name: 'CSS/XPATH'
278
326
 
279
327
  c.action do |global_options, options, args|
280
328
  urls = args.join(' ').split(/[, ]+/)
@@ -286,9 +334,17 @@ command :tags do |c|
286
334
  res = Curl::Html.new(url, { headers: headers, headers_only: options[:headers],
287
335
  compressed: options[:compressed], clean: options[:clean] })
288
336
  res.curl
337
+
289
338
  output = []
290
339
  if options[:search]
291
- output = res.tags.search(options[:search])
340
+ out = res.search(options[:search])
341
+
342
+ out = out.dot_query(options[:query]) if options[:query]
343
+ output.push(out)
344
+ elsif options[:query]
345
+ query = options[:query] =~ /^links/ ? options[:query] : "links#{options[:query]}"
346
+
347
+ output = res.to_data.dot_query(query)
292
348
  elsif tags.count.positive?
293
349
  tags.each { |tag| output.concat(res.tags(tag)) }
294
350
  else
@@ -296,7 +352,13 @@ command :tags do |c|
296
352
  end
297
353
  end
298
354
 
299
- print_out(output, global_options[:yaml], pretty: global_options[:pretty])
355
+ output = output[0] if output.count == 1
356
+
357
+ if options[:source]
358
+ puts output.to_html
359
+ else
360
+ print_out(output, global_options[:yaml], pretty: global_options[:pretty])
361
+ end
300
362
  end
301
363
  end
302
364
 
@@ -312,6 +374,9 @@ command :images do |c|
312
374
  c.desc 'Remove extra whitespace from results'
313
375
  c.switch %i[clean]
314
376
 
377
+ c.desc 'Filter output using dot-syntax path'
378
+ c.flag %i[q query filter]
379
+
315
380
  c.desc 'Define a header to send as key=value'
316
381
  c.flag %i[h header], multiple: true
317
382
 
@@ -326,7 +391,15 @@ command :images do |c|
326
391
  urls.each do |url|
327
392
  res = Curl::Html.new(url, { compressed: options[:compressed], clean: options[:clean] })
328
393
  res.curl
329
- output.concat(res.images(types: types))
394
+
395
+ res = res.images(types: types)
396
+
397
+ if options[:query]
398
+ query = options[:query] =~ /^images/ ? options[:query] : "images#{options[:query]}"
399
+ res = { images: res }.dot_query(query)
400
+ end
401
+
402
+ output.concat(res)
330
403
  end
331
404
 
332
405
  print_out(output, global_options[:yaml], pretty: global_options[:pretty])
@@ -367,7 +440,7 @@ command :links do |c|
367
440
 
368
441
  if options[:query]
369
442
  query = options[:query] =~ /^links/ ? options[:query] : "links#{options[:query]}"
370
- queried = { links: res.to_data[:links] }.dot_query(query)
443
+ queried = res.to_data.dot_query(query)
371
444
  output.concat(queried) if queried
372
445
  else
373
446
  output.concat(res.body_links)
@@ -414,7 +487,7 @@ desc %(Scrape a page using a web browser, for dynamic (JS) pages. Be sure to hav
414
487
  arg_name 'URL', multiple: true
415
488
  command :scrape do |c|
416
489
  c.desc 'Browser to use (firefox, chrome)'
417
- c.flag %i[b browser], type: BrowserType
490
+ c.flag %i[b browser], type: BrowserType, required: true
418
491
 
419
492
  c.desc 'Regurn an array of matches to a CSS or XPath query'
420
493
  c.flag %i[search]
@@ -437,30 +510,19 @@ command :scrape do |c|
437
510
  output = []
438
511
 
439
512
  urls.each do |url|
440
- driver = Selenium::WebDriver.for options[:browser]
441
- begin
442
- driver.get url
443
- res = driver.page_source
444
-
445
- res = Curl::Html.new(nil, { source: res, clean: options[:clean] })
446
- res.curl
447
- if options[:search]
448
- out = res.search(options[:search])
449
-
450
- out = out.dot_query(options[:query]) if options[:query]
451
- output.push(out)
452
- elsif options[:query]
453
- queried = res.to_data(url: url).dot_query(options[:query])
454
- output = queried if queried
455
- else
456
- output.push(res.to_data(url: url))
457
- end
513
+ res = Curl::Html.new(url, { browser: options[:browser], clean: options[:clean] })
514
+ res.curl
458
515
 
459
- # elements = driver.find_elements(css: options[:query])
516
+ if options[:search]
517
+ out = res.search(options[:search])
460
518
 
461
- # elements.each { |e| output.push(e.text.strip) }
462
- ensure
463
- driver.quit
519
+ out = out.dot_query(options[:query]) if options[:query]
520
+ output.push(out)
521
+ elsif options[:query]
522
+ queried = res.to_data(url: url).dot_query(options[:query])
523
+ output.push(queried) if queried
524
+ else
525
+ output.push(res.to_data(url: url))
464
526
  end
465
527
  end
466
528
 
data/curlyq.gemspec CHANGED
@@ -16,10 +16,14 @@ spec = Gem::Specification.new do |s|
16
16
  s.rdoc_options << '--title' << 'curlyq' << '--main' << 'README.rdoc' << '-ri'
17
17
  s.bindir = 'bin'
18
18
  s.executables << 'curlyq'
19
- s.add_development_dependency('rake','~> 0.9.2')
20
- s.add_development_dependency('rdoc', '~> 4.3')
21
- s.add_development_dependency('minitest', '~> 5.14')
19
+ s.add_development_dependency('rake','~> 13.0', '>= 13.0.1')
20
+ s.add_development_dependency('rdoc', '~> 6.3.1')
21
+ s.add_development_dependency('test-unit', '~> 3.4.4')
22
22
  s.add_development_dependency('yard', '~> 0.9', '>= 0.9.26')
23
+ s.add_development_dependency('tty-spinner', '~> 0.9', '>= 0.9.3')
24
+ s.add_development_dependency('tty-progressbar', '~> 0.18', '>= 0.18.2')
25
+ s.add_development_dependency('pastel', '~> 0.8.0')
26
+ s.add_development_dependency('parallel_tests', '~> 3.7', '>= 3.7.3')
23
27
  s.add_runtime_dependency('gli','~> 2.21.0')
24
28
  s.add_runtime_dependency('tty-which','~> 0.5.0')
25
29
  s.add_runtime_dependency('nokogiri','~> 1.16.0')
data/lib/curly/array.rb CHANGED
@@ -66,69 +66,94 @@ class ::Array
66
66
  replace dedup_links
67
67
  end
68
68
 
69
+ #---------------------------------------------------------
70
+ ## Run a query on array elements
69
71
  ##
70
- ## Convert and execute a dot-syntax query on the array
72
+ ## @param path [String] dot.syntax path to compare
71
73
  ##
72
- ## @param path [String] The dot-syntax path
73
- ##
74
- ## @return [Array] Matching elements
74
+ ## @return [Array] elements matching dot query
75
75
  ##
76
76
  def dot_query(path)
77
- output = []
78
- if path =~ /^\[([\d+.])\]\.?/
79
- int = Regexp.last_match(1)
80
- path.sub!(/^\[[\d.]+\]\.?/, '')
81
- items = self[eval(int)]
82
- else
83
- items = self
77
+ filter! do |tag|
78
+ r = tag.dot_query(path)
79
+ if r.is_a?(Array)
80
+ r.count.positive?
81
+ else
82
+ r
83
+ end
84
84
  end
85
85
 
86
- if items.is_a? Hash
87
- output = items.dot_query(path)
88
- else
89
- items.each do |item|
90
- res = item.is_a?(Hash) ? item.stringify_keys : item
91
- out = []
92
- q = path.split(/(?<![\d.])\./)
93
- q.each do |pth|
94
- el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
95
- pth.sub!(/\[([0-9,.]+)\]/, '')
96
- ats = []
97
- at = []
98
- while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
99
- m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+) */)
100
- comp = [m['key'], m['op'], m['val']]
101
- case m['com']
102
- when ','
103
- ats.push(comp)
104
- at = []
105
- else
106
- at.push(comp)
107
- end
108
-
109
- pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+)/, '[')
110
- end
111
- ats.push(at) unless at.empty?
112
- pth.sub!(/\[\]/, '')
113
-
114
- return false if el.nil? && ats.empty? && !res.key?(pth)
115
-
116
- res = res[pth] unless pth.empty?
117
-
118
- while ats.count.positive?
119
- atr = ats.shift
120
-
121
- keepers = res.filter do |r|
122
- evaluate_comp(r, atr)
123
- end
124
- out.concat(keepers)
125
- end
126
-
127
- out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
86
+ return self
87
+ end
88
+
89
+ def to_html
90
+ map { |el| el.to_html }
91
+ end
92
+
93
+ ##
94
+ ## Test if a tag contains an attribute matching filter queries
95
+ ##
96
+ ## @param tag_name [String] The tag name
97
+ ## @param classes [String] The classes to match
98
+ ## @param id [String] The id attribute to
99
+ ## match
100
+ ## @param attribute [String] The attribute
101
+ ## @param operator [String] The operator, <>= *=
102
+ ## $= ^=
103
+ ## @param value [String] The value to match
104
+ ## @param descendant [Boolean] Check descendant tags
105
+ ##
106
+ def tag_match(tag_name, classes, id, attribute, operator, value, descendant: false)
107
+ tag = self
108
+ keep = true
109
+
110
+ keep = false if tag_name && !tag['tag'] =~ /^#{tag_name}$/i
111
+
112
+ if tag.key?('attrs') && tag['attrs']
113
+ if keep && id
114
+ tag_id = tag['attrs'].filter { |a| a['key'] == 'id' }.first['value']
115
+ keep = tag_id && tag_id =~ /#{id}/i
116
+ end
117
+
118
+ if keep && classes
119
+ cls = tag['attrs'].filter { |a| a['key'] == 'class' }.first
120
+ if cls
121
+ all = true
122
+ classes.each { |c| all = cls['value'].include?(c) }
123
+ keep = all
124
+ else
125
+ keep = false
128
126
  end
129
- output.push(out)
130
127
  end
128
+
129
+ if keep && attribute
130
+ attributes = tag['attrs'].filter { |a| a['key'] =~ /^#{attribute}$/i }
131
+ any = false
132
+ attributes.each do |a|
133
+ break if any
134
+
135
+ any = case operator
136
+ when /^*/
137
+ a['value'] =~ /#{value}/i
138
+ when /^\^/
139
+ a['value'] =~ /^#{value}/i
140
+ when /^\$/
141
+ a['value'] =~ /#{value}$/i
142
+ else
143
+ a['value'] =~ /^#{value}$/i
144
+ end
145
+ end
146
+ keep = any
147
+ end
148
+ end
149
+
150
+ return false if descendant && !keep
151
+
152
+ if !descendant && tag.key?('tags')
153
+ tags = tag['tags'].filter { |t| t.tag_match(tag_name, classes, id, attribute, operator, value) }
154
+ tags.count.positive?
155
+ else
156
+ keep
131
157
  end
132
- output
133
158
  end
134
159
  end