curlyq 0.0.12 → 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +1 -1
- data/README.md +2 -2
- data/bin/curlyq +11 -0
- data/lib/curly/array.rb +2 -0
- data/lib/curly/curl/html.rb +20 -15
- data/lib/curly/version.rb +1 -1
- data/src/_README.md +1 -1
- data/test/curlyq_html_test.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db3d3f02ab489eefdf85122b219d01a14d43322040009566548ba76ebed07767
|
4
|
+
data.tar.gz: 1979f168d55f7f11fab7625a3916154b97ec9a1736e9a204a561399de5a3406c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b47fd47823f2b74493affc0180eb96cb4146c34d89d670edb4350703009dccd341ce278f1221e9c7eafd8f2d8a117caa2ba6991a931e5927396091347d4970b6
|
7
|
+
data.tar.gz: 3143684aabac9d380583de9bfcb51045e78efacefc75051a7281dc2760a2efc24e70101f6c688ef396b7ba7f336d128f6641372a52c2e95be33082c8d9724712
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,20 @@
|
|
1
|
+
### 0.0.13
|
2
|
+
|
3
|
+
2024-10-25 10:23
|
4
|
+
|
5
|
+
#### FIXED
|
6
|
+
|
7
|
+
- Fix tests, handle empty results better
|
8
|
+
|
1
9
|
### 0.0.12
|
2
10
|
|
3
11
|
2024-04-04 13:06
|
4
12
|
|
13
|
+
#### NEW
|
14
|
+
|
15
|
+
- Add --script option to screenshot command
|
16
|
+
- Add `execute` command for executing JavaScript on a page
|
17
|
+
|
5
18
|
### 0.0.11
|
6
19
|
|
7
20
|
2024-01-21 15:29
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -13,7 +13,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
|
|
13
13
|
[jq]: https://github.com/jqlang/jq "Command-line JSON processor"
|
14
14
|
[yq]: https://github.com/mikefarah/yq "yq is a portable command-line YAML, JSON, XML, CSV, TOML and properties processor"
|
15
15
|
|
16
|
-
The current version of `curlyq` is 0.0.
|
16
|
+
The current version of `curlyq` is 0.0.13.
|
17
17
|
|
18
18
|
CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like [jq] to parse the output.
|
19
19
|
|
@@ -46,7 +46,7 @@ SYNOPSIS
|
|
46
46
|
curlyq [global options] command [command options] [arguments...]
|
47
47
|
|
48
48
|
VERSION
|
49
|
-
0.0.
|
49
|
+
0.0.13
|
50
50
|
|
51
51
|
GLOBAL OPTIONS
|
52
52
|
--help - Show this message
|
data/bin/curlyq
CHANGED
@@ -148,6 +148,12 @@ command %i[html curl] do |c|
|
|
148
148
|
end
|
149
149
|
output.delete_if(&:nil?)
|
150
150
|
output.delete_if(&:empty?)
|
151
|
+
|
152
|
+
if output.nil? || output.empty?
|
153
|
+
print_out([], global_options[:yaml], raw: options[:raw], pretty: global_options[:pretty])
|
154
|
+
Process.exit 1
|
155
|
+
end
|
156
|
+
|
151
157
|
output.map! { |o| o[options[:raw].to_sym] } if options[:raw]
|
152
158
|
|
153
159
|
output = output.clean_output
|
@@ -486,6 +492,11 @@ command :images do |c|
|
|
486
492
|
end
|
487
493
|
end
|
488
494
|
|
495
|
+
if output.nil? || output.empty?
|
496
|
+
print_out([], global_options[:yaml], raw: options[:raw], pretty: global_options[:pretty])
|
497
|
+
Process.exit 1
|
498
|
+
end
|
499
|
+
|
489
500
|
print_out(output, global_options[:yaml], pretty: global_options[:pretty])
|
490
501
|
end
|
491
502
|
end
|
data/lib/curly/array.rb
CHANGED
data/lib/curly/curl/html.rb
CHANGED
@@ -289,6 +289,7 @@ module Curl
|
|
289
289
|
end
|
290
290
|
when /img/
|
291
291
|
next unless %i[all img].include?(type)
|
292
|
+
|
292
293
|
width = img[:attrs]['width']
|
293
294
|
height = img[:attrs]['height']
|
294
295
|
alt = img[:attrs]['alt']
|
@@ -301,7 +302,7 @@ module Curl
|
|
301
302
|
height: height || 'unknown',
|
302
303
|
alt: alt,
|
303
304
|
title: title,
|
304
|
-
attrs: img[:attrs]
|
305
|
+
attrs: img[:attrs]
|
305
306
|
}
|
306
307
|
end
|
307
308
|
end
|
@@ -333,7 +334,9 @@ module Curl
|
|
333
334
|
##
|
334
335
|
def h(level = '\d')
|
335
336
|
res = []
|
336
|
-
headlines = @body.to_enum(:scan, %r{<h(?<level>#{level})(?<tag> .*?)?>(?<text>.*?)</h#{level}>}i).map
|
337
|
+
headlines = @body.to_enum(:scan, %r{<h(?<level>#{level})(?<tag> .*?)?>(?<text>.*?)</h#{level}>}i).map do
|
338
|
+
Regexp.last_match
|
339
|
+
end
|
337
340
|
headlines.each do |m|
|
338
341
|
headline = { level: m['level'] }
|
339
342
|
if m['tag'].nil?
|
@@ -436,7 +439,13 @@ module Curl
|
|
436
439
|
attrs = tag['attrs'].strip.to_enum(:scan, /(?ix)
|
437
440
|
(?<key>[@a-z0-9-]+)(?:=(?<quot>["'])
|
438
441
|
(?<value>[^"']+)\k<quot>|[ >])?/i).map { Regexp.last_match }
|
439
|
-
attributes = attrs.each_with_object({})
|
442
|
+
attributes = attrs.each_with_object({}) do |a, hsh|
|
443
|
+
if a['value'].nil?
|
444
|
+
hsh[a['key']] = nil
|
445
|
+
else
|
446
|
+
hsh[a['key']] = a['key'] =~ /^(class|rel)$/ ? a['value'].split(/ /) : a['value']
|
447
|
+
end
|
448
|
+
end
|
440
449
|
end
|
441
450
|
{
|
442
451
|
tag: tag['tag'],
|
@@ -640,7 +649,7 @@ module Curl
|
|
640
649
|
driver.quit
|
641
650
|
end
|
642
651
|
|
643
|
-
|
652
|
+
warn "Executed JS on #{@url}"
|
644
653
|
|
645
654
|
res
|
646
655
|
end
|
@@ -680,9 +689,7 @@ module Curl
|
|
680
689
|
wait.until { driver.find_element(id: id) }
|
681
690
|
end
|
682
691
|
|
683
|
-
if script
|
684
|
-
res = driver.execute_script(script)
|
685
|
-
end
|
692
|
+
res = driver.execute_script(script) if script
|
686
693
|
|
687
694
|
sleep wait_seconds.to_i
|
688
695
|
|
@@ -698,7 +705,7 @@ module Curl
|
|
698
705
|
driver.quit
|
699
706
|
end
|
700
707
|
|
701
|
-
|
708
|
+
warn "Screenshot saved to #{destination}"
|
702
709
|
end
|
703
710
|
|
704
711
|
##
|
@@ -837,13 +844,11 @@ module Curl
|
|
837
844
|
## @return [Boolean] true if hostnames match
|
838
845
|
##
|
839
846
|
def same_origin?(href)
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
false
|
846
|
-
end
|
847
|
+
uri = URI(href)
|
848
|
+
origin = URI(@url)
|
849
|
+
uri.host == origin.host
|
850
|
+
rescue StandardError
|
851
|
+
false
|
847
852
|
end
|
848
853
|
end
|
849
854
|
end
|
data/lib/curly/version.rb
CHANGED
data/src/_README.md
CHANGED
@@ -13,7 +13,7 @@ _If you find this useful, feel free to [buy me some coffee][donate]._
|
|
13
13
|
[jq]: https://github.com/jqlang/jq "Command-line JSON processor"
|
14
14
|
[yq]: https://github.com/mikefarah/yq "yq is a portable command-line YAML, JSON, XML, CSV, TOML and properties processor"
|
15
15
|
|
16
|
-
The current version of `curlyq` is <!--VER-->0.0.
|
16
|
+
The current version of `curlyq` is <!--VER-->0.0.12<!--END VER-->.
|
17
17
|
|
18
18
|
CurlyQ is a utility that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It's designed to be part of a scripting pipeline, outputting everything as structured data (JSON or YAML). It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like [jq] to parse the output.
|
19
19
|
|
data/test/curlyq_html_test.rb
CHANGED
@@ -11,7 +11,7 @@ class CurlyQHtmlTest < Test::Unit::TestCase
|
|
11
11
|
include CurlyQHelpers
|
12
12
|
|
13
13
|
def test_html_search_query
|
14
|
-
result = curlyq('html', '-s', '#main article .aligncenter', '-q', 'images[
|
14
|
+
result = curlyq('html', '-s', '#main article .aligncenter', '-q', 'images[0]', 'https://brettterpstra.com/2024/10/19/web-excursions-for-october-19-2024/')
|
15
15
|
json = JSON.parse(result)
|
16
16
|
|
17
17
|
assert_match(/aligncenter/, json[0]['class'], 'Should have found an image with class "aligncenter"')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: curlyq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Terpstra
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|