curlyq 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +41 -0
- data/LICENSE.txt +19 -0
- data/README.md +233 -0
- data/README.rdoc +6 -0
- data/Rakefile +77 -0
- data/bin/curlyq +477 -0
- data/curlyq.gemspec +27 -0
- data/curlyq.rdoc +355 -0
- data/lib/curly/array.rb +134 -0
- data/lib/curly/curl/html.rb +720 -0
- data/lib/curly/curl/json.rb +108 -0
- data/lib/curly/curl.rb +7 -0
- data/lib/curly/hash.rb +200 -0
- data/lib/curly/string.rb +91 -0
- data/lib/curly/version.rb +3 -0
- data/lib/curly.rb +12 -0
- data/src/_README.md +101 -0
- data/test/default_test.rb +14 -0
- data/test/test_helper.rb +4 -0
- metadata +191 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4a73a5990b9c07f4d564216cd13c1ea0d73a833191c3f7734e7e3e5af2954b40
|
4
|
+
data.tar.gz: 8444276e61febd7b3e517eec56155a4f8754809fa8dc46c0d6e173737bca79e0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ca1a8c0bfc122e8020b356018276e27647449834f30eb66d7561acf187ec6cd837b59564a722ceaad5b3e99ac47de4a9944dfc370e69b92575155988a81fcfd4
|
7
|
+
data.tar.gz: 3bc9ed736378cc70607d4f42ecbe1f8cc91fbe87243d0dda4f7dc9ff6e44f5cc33f687d00e188f25ae3494f47bbbfedd2f2e27e8b008048e22c6c10ce2dc3b7f
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
html
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
curlyq (0.0.2)
|
5
|
+
gli (~> 2.21.0)
|
6
|
+
nokogiri (~> 1.16.0)
|
7
|
+
selenium-webdriver (~> 4.16.0)
|
8
|
+
tty-which (~> 0.5.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
gli (2.21.1)
|
14
|
+
minitest (5.16.3)
|
15
|
+
nokogiri (1.16.0-arm64-darwin)
|
16
|
+
racc (~> 1.4)
|
17
|
+
racc (1.7.3)
|
18
|
+
rake (0.9.6)
|
19
|
+
rdoc (4.3.0)
|
20
|
+
rexml (3.2.6)
|
21
|
+
rubyzip (2.3.2)
|
22
|
+
selenium-webdriver (4.16.0)
|
23
|
+
rexml (~> 3.2, >= 3.2.5)
|
24
|
+
rubyzip (>= 1.2.2, < 3.0)
|
25
|
+
websocket (~> 1.0)
|
26
|
+
tty-which (0.5.0)
|
27
|
+
websocket (1.2.10)
|
28
|
+
yard (0.9.34)
|
29
|
+
|
30
|
+
PLATFORMS
|
31
|
+
arm64-darwin-20
|
32
|
+
|
33
|
+
DEPENDENCIES
|
34
|
+
curlyq!
|
35
|
+
minitest (~> 5.14)
|
36
|
+
rake (~> 0.9.2)
|
37
|
+
rdoc (~> 4.3)
|
38
|
+
yard (~> 0.9, >= 0.9.26)
|
39
|
+
|
40
|
+
BUNDLED WITH
|
41
|
+
2.2.29
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is furnished
|
8
|
+
to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice (including the next
|
11
|
+
paragraph) shall be included in all copies or substantial portions of the
|
12
|
+
Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
15
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
16
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
|
17
|
+
OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
18
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
19
|
+
OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
# curlyq
|
2
|
+
|
3
|
+
[![Gem](https://img.shields.io/gem/v/na.svg)](https://rubygems.org/gems/curlyq)
|
4
|
+
[![GitHub license](https://img.shields.io/github/license/ttscoff/curlyq.svg)](./LICENSE.txt)
|
5
|
+
|
6
|
+
**A command line helper for curl and web scraping**
|
7
|
+
|
8
|
+
_If you find this useful, feel free to [buy me some coffee][donate]._
|
9
|
+
|
10
|
+
|
11
|
+
The current version of `curlyq` is 0.0.2
|
12
|
+
.
|
13
|
+
|
14
|
+
`curlyq` is a command that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
|
15
|
+
|
16
|
+
[github]: https://github.com/ttscoff/curlyq/
|
17
|
+
|
18
|
+
### Installation
|
19
|
+
|
20
|
+
Assuming you have Ruby and RubyGems installed, you can just run `gem install curlyq`. If you run into errors, try `gem install --user-install curlyq`, or use `sudo gem install curlyq`.
|
21
|
+
|
22
|
+
If you're using Homebrew, you have the option to install via [brew-gem](https://github.com/sportngin/brew-gem):
|
23
|
+
|
24
|
+
brew install brew-gem
|
25
|
+
brew gem install curlyq
|
26
|
+
|
27
|
+
If you don't have Ruby/RubyGems, you can install them pretty easily with Homebrew, rvm, or asdf.
|
28
|
+
|
29
|
+
### Usage
|
30
|
+
|
31
|
+
Run `curlyq help` for a list of commands. Run `curlyq help SUBCOMMAND` for details on a particular subcommand and its options.
|
32
|
+
|
33
|
+
```
|
34
|
+
NAME
|
35
|
+
curlyq - A scriptable interface to curl
|
36
|
+
|
37
|
+
SYNOPSIS
|
38
|
+
curlyq [global options] command [command options] [arguments...]
|
39
|
+
|
40
|
+
VERSION
|
41
|
+
0.0.2
|
42
|
+
|
43
|
+
GLOBAL OPTIONS
|
44
|
+
--help - Show this message
|
45
|
+
--[no-]pretty - Output "pretty" JSON (default: enabled)
|
46
|
+
--version - Display the program version
|
47
|
+
-y, --[no-]yaml - Output YAML instead of json
|
48
|
+
|
49
|
+
COMMANDS
|
50
|
+
extract - Extract contents between two regular expressions
|
51
|
+
headlinks - Return all <head> links on URL's page
|
52
|
+
help - Shows a list of commands or help for one command
|
53
|
+
html, curl - Curl URL and output its elements, multiple URLs allowed
|
54
|
+
images - Extract all images from a URL
|
55
|
+
json - Get a JSON response from a URL, multiple URLs allowed
|
56
|
+
links - Return all links on a URL's page
|
57
|
+
scrape - Scrape a page using a web browser, for dynamic (JS) pages. Be sure to have the selected --browser installed.
|
58
|
+
screenshot - Save a screenshot of a URL
|
59
|
+
tags - Extract all instances of a tag
|
60
|
+
```
|
61
|
+
|
62
|
+
#### Commands
|
63
|
+
|
64
|
+
curlyq makes use of subcommands, e.g. `curlyq html` or `curlyq extract`. Each subcommand takes its own options, but I've made an effort to standardize the choices between each command.
|
65
|
+
|
66
|
+
##### extract
|
67
|
+
|
68
|
+
```
|
69
|
+
NAME
|
70
|
+
extract - Extract contents between two regular expressions
|
71
|
+
|
72
|
+
SYNOPSIS
|
73
|
+
|
74
|
+
curlyq [global options] extract [command options] URL...
|
75
|
+
|
76
|
+
COMMAND OPTIONS
|
77
|
+
-a, --after=arg - Text after extraction, parsed as regex (default: none)
|
78
|
+
-b, --before=arg - Text before extraction, parsed as regex (default: none)
|
79
|
+
-c, --[no-]compressed - Expect compressed results
|
80
|
+
--[no-]clean - Remove extra whitespace from results
|
81
|
+
-h, --header=arg - Define a header to send as key=value (may be used more than once, default: none)
|
82
|
+
--[no-]strip - Strip HTML tags from results
|
83
|
+
```
|
84
|
+
|
85
|
+
|
86
|
+
##### headlinks
|
87
|
+
|
88
|
+
```
|
89
|
+
NAME
|
90
|
+
headlinks - Return all <head> links on URL's page
|
91
|
+
|
92
|
+
SYNOPSIS
|
93
|
+
|
94
|
+
curlyq [global options] headlinks [command options] URL...
|
95
|
+
|
96
|
+
COMMAND OPTIONS
|
97
|
+
-q, --query, --filter=arg - Filter output using dot-syntax path (default: none)
|
98
|
+
```
|
99
|
+
|
100
|
+
##### html
|
101
|
+
|
102
|
+
```
|
103
|
+
NAME
|
104
|
+
html - Curl URL and output its elements, multiple URLs allowed
|
105
|
+
|
106
|
+
SYNOPSIS
|
107
|
+
|
108
|
+
curlyq [global options] html [command options] URL...
|
109
|
+
|
110
|
+
COMMAND OPTIONS
|
111
|
+
-I, --info - Only retrieve headers/info
|
112
|
+
-b, --browser=arg - Use a browser to retrieve a dynamic web page (firefox, chrome) (default: none)
|
113
|
+
-c, --compressed - Expect compressed results
|
114
|
+
--[no-]clean - Remove extra whitespace from results
|
115
|
+
-f, --fallback=arg - If curl doesn't work, use a fallback browser (firefox, chrome) (default: none)
|
116
|
+
-h, --header=arg - Define a header to send as "key=value" (may be used more than once, default: none)
|
117
|
+
--[no-]ignore_fragments - Ignore fragment hrefs when gathering content links
|
118
|
+
--[no-]ignore_relative - Ignore relative hrefs when gathering content links
|
119
|
+
-q, --query, --filter=arg - Filter output using dot-syntax path (default: none)
|
120
|
+
-r, --raw=arg - Output a raw value for a key (default: none)
|
121
|
+
--search=arg - Regurn an array of matches to a CSS or XPath query (default: none)
|
122
|
+
-x, --external_links_only - Only gather external links
|
123
|
+
```
|
124
|
+
|
125
|
+
##### images
|
126
|
+
|
127
|
+
```
|
128
|
+
NAME
|
129
|
+
images - Extract all images from a URL
|
130
|
+
|
131
|
+
SYNOPSIS
|
132
|
+
|
133
|
+
curlyq [global options] images [command options] URL...
|
134
|
+
|
135
|
+
COMMAND OPTIONS
|
136
|
+
-c, --[no-]compressed - Expect compressed results
|
137
|
+
--[no-]clean - Remove extra whitespace from results
|
138
|
+
-t, --type=arg - Type of images to return (img, srcset, opengraph, all) (may be used more than once, default: ["all"])
|
139
|
+
```
|
140
|
+
|
141
|
+
##### json
|
142
|
+
|
143
|
+
```
|
144
|
+
NAME
|
145
|
+
json - Get a JSON response from a URL, multiple URLs allowed
|
146
|
+
|
147
|
+
SYNOPSIS
|
148
|
+
|
149
|
+
curlyq [global options] json [command options] URL...
|
150
|
+
|
151
|
+
COMMAND OPTIONS
|
152
|
+
-c, --[no-]compressed - Expect compressed results
|
153
|
+
-h, --header=arg - Define a header to send as key=value (may be used more than once, default: none)
|
154
|
+
-q, --query, --filter=arg - Filter output using dot-syntax path (default: none)
|
155
|
+
```
|
156
|
+
|
157
|
+
##### links
|
158
|
+
|
159
|
+
```
|
160
|
+
NAME
|
161
|
+
links - Return all links on a URL's page
|
162
|
+
|
163
|
+
SYNOPSIS
|
164
|
+
|
165
|
+
curlyq [global options] links [command options] URL...
|
166
|
+
|
167
|
+
COMMAND OPTIONS
|
168
|
+
-d, --[no-]dedup - Filter out duplicate links, preserving only first one
|
169
|
+
--[no-]ignore_fragments - Ignore fragment hrefs when gathering content links
|
170
|
+
--[no-]ignore_relative - Ignore relative hrefs when gathering content links
|
171
|
+
-q, --query, --filter=arg - Filter output using dot-syntax path (default: none)
|
172
|
+
-x, --external_links_only - Only gather external links
|
173
|
+
```
|
174
|
+
|
175
|
+
##### scrape
|
176
|
+
|
177
|
+
```
|
178
|
+
NAME
|
179
|
+
scrape - Scrape a page using a web browser, for dynamic (JS) pages. Be sure to have the selected --browser installed.
|
180
|
+
|
181
|
+
SYNOPSIS
|
182
|
+
|
183
|
+
curlyq [global options] scrape [command options] URL...
|
184
|
+
|
185
|
+
COMMAND OPTIONS
|
186
|
+
-b, --browser=arg - Browser to use (firefox, chrome) (default: none)
|
187
|
+
--[no-]clean - Remove extra whitespace from results
|
188
|
+
-h, --header=arg - Define a header to send as "key=value" (may be used more than once, default: none)
|
189
|
+
-q, --query, --filter=arg - Filter output using dot-syntax path (default: none)
|
190
|
+
-r, --raw=arg - Output a raw value for a key (default: none)
|
191
|
+
--search=arg - Regurn an array of matches to a CSS or XPath query (default: none)
|
192
|
+
```
|
193
|
+
|
194
|
+
##### screenshot
|
195
|
+
|
196
|
+
```
|
197
|
+
NAME
|
198
|
+
screenshot - Save a screenshot of a URL
|
199
|
+
|
200
|
+
SYNOPSIS
|
201
|
+
|
202
|
+
curlyq [global options] screenshot [command options] URL...
|
203
|
+
|
204
|
+
COMMAND OPTIONS
|
205
|
+
-b, --browser=arg - Browser to use (firefox, chrome) (default: chrome)
|
206
|
+
-o, --out, --file=arg - File destination (default: none)
|
207
|
+
-t, --type=arg - Type of screenshot to save (full (requires firefox), print, visible) (default: full)
|
208
|
+
```
|
209
|
+
|
210
|
+
##### tags
|
211
|
+
|
212
|
+
```
|
213
|
+
NAME
|
214
|
+
tags - Extract all instances of a tag
|
215
|
+
|
216
|
+
SYNOPSIS
|
217
|
+
|
218
|
+
curlyq [global options] tags [command options] URL...
|
219
|
+
|
220
|
+
COMMAND OPTIONS
|
221
|
+
-c, --[no-]compressed - Expect compressed results
|
222
|
+
--[no-]clean - Remove extra whitespace from results
|
223
|
+
-h, --header=arg - Define a header to send as key=value (may be used more than once, default: none)
|
224
|
+
-q, --query, --search=arg - CSS/XPath query (default: none)
|
225
|
+
-t, --tag=arg - Specify a tag to collect (may be used more than once, default: none)
|
226
|
+
```
|
227
|
+
|
228
|
+
|
229
|
+
PayPal link: [paypal.me/ttscoff](https://paypal.me/ttscoff)
|
230
|
+
|
231
|
+
## Changelog
|
232
|
+
|
233
|
+
See [CHANGELOG.md](https://github.com/ttscoff/na_gem/blob/master/CHANGELOG.md)
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'rake/clean'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rubygems/package_task'
|
4
|
+
require 'rdoc/task'
|
5
|
+
require 'yard'
|
6
|
+
|
7
|
+
YARD::Rake::YardocTask.new do |t|
|
8
|
+
t.files = ['lib/curly/*.rb']
|
9
|
+
t.options = ['--markup=markdown', '--no-private', '-p', 'yard_templates']
|
10
|
+
# t.stats_options = ['--list-undoc']
|
11
|
+
end
|
12
|
+
|
13
|
+
task :doc, [*Rake.application[:yard].arg_names] => [:yard]
|
14
|
+
|
15
|
+
Rake::RDocTask.new do |rd|
|
16
|
+
rd.main = "README.rdoc"
|
17
|
+
rd.rdoc_files.include("README.rdoc","lib/**/*.rb","bin/**/*")
|
18
|
+
rd.title = 'curlyq'
|
19
|
+
end
|
20
|
+
|
21
|
+
spec = eval(File.read('curlyq.gemspec'))
|
22
|
+
|
23
|
+
Gem::PackageTask.new(spec) do |pkg|
|
24
|
+
end
|
25
|
+
require 'rake/testtask'
|
26
|
+
Rake::TestTask.new do |t|
|
27
|
+
t.libs << "test"
|
28
|
+
t.test_files = FileList['test/*_test.rb']
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Development version check'
|
32
|
+
task :ver do
|
33
|
+
gver = `git ver`
|
34
|
+
cver = IO.read(File.join(File.dirname(__FILE__), 'CHANGELOG.md')).match(/^#+ (\d+\.\d+\.\d+(\w+)?)/)[1]
|
35
|
+
res = `grep VERSION lib/curly/version.rb`
|
36
|
+
version = res.match(/VERSION *= *['"](\d+\.\d+\.\d+(\w+)?)/)[1]
|
37
|
+
puts "git tag: #{gver}"
|
38
|
+
puts "version.rb: #{version}"
|
39
|
+
puts "changelog: #{cver}"
|
40
|
+
end
|
41
|
+
|
42
|
+
desc 'Changelog version check'
|
43
|
+
task :cver do
|
44
|
+
puts IO.read(File.join(File.dirname(__FILE__), 'CHANGELOG.md')).match(/^#+ (\d+\.\d+\.\d+(\w+)?)/)[1]
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'Bump incremental version number'
|
48
|
+
task :bump, :type do |_, args|
|
49
|
+
args.with_defaults(type: 'inc')
|
50
|
+
version_file = 'lib/curly/version.rb'
|
51
|
+
content = IO.read(version_file)
|
52
|
+
content.sub!(/VERSION = '(?<major>\d+)\.(?<minor>\d+)\.(?<inc>\d+)(?<pre>\S+)?'/) do
|
53
|
+
m = Regexp.last_match
|
54
|
+
major = m['major'].to_i
|
55
|
+
minor = m['minor'].to_i
|
56
|
+
inc = m['inc'].to_i
|
57
|
+
pre = m['pre']
|
58
|
+
|
59
|
+
case args[:type]
|
60
|
+
when /^maj/
|
61
|
+
major += 1
|
62
|
+
minor = 0
|
63
|
+
inc = 0
|
64
|
+
when /^min/
|
65
|
+
minor += 1
|
66
|
+
inc = 0
|
67
|
+
else
|
68
|
+
inc += 1
|
69
|
+
end
|
70
|
+
|
71
|
+
$stdout.puts "At version #{major}.#{minor}.#{inc}#{pre}"
|
72
|
+
"VERSION = '#{major}.#{minor}.#{inc}#{pre}'"
|
73
|
+
end
|
74
|
+
File.open(version_file, 'w+') { |f| f.puts content }
|
75
|
+
end
|
76
|
+
|
77
|
+
task default: %i[test clobber package]
|