spidr_cli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -9
- data/lib/spidr_cli.rb +1 -0
- data/lib/spidr_cli/options.rb +36 -13
- data/lib/spidr_cli/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e52c41017020452ca4bce9efdf8a4925941f2dd926fe9a896af32e5c02ba0350
|
4
|
+
data.tar.gz: 9007ef979a5d62185cdd7a9656921f53990946f997c0bb7535ae2276212ce60d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c80f0fbd9031f9d9c44192f610b7b5147411d53c7195b11dec733502299743fb760277e938653fb1e9cd64a8d10ea09e2fff4622262d0d022a512cc5bf5df1b
|
7
|
+
data.tar.gz: 2479ae42d872fc2541ccd938c01661ccabd11ebf3a195733851db5ede769eac14f478c869426757036b7e43d2ce2aec183f550c4c91b814bc3040c4754da2a0b
|
data/README.md
CHANGED
@@ -55,9 +55,13 @@ Usage: spidr [<method>] [options] <url>
|
|
55
55
|
--columns=[val1,val2] Columns in output
|
56
56
|
--content-types=[val1,val2] Formats to output (html, javascript, css, json, ..)
|
57
57
|
--[no-]header Include the header
|
58
|
-
--
|
58
|
+
--[no-]strip-fragments Specifies whether the Agent will strip URI fragments (default: true)
|
59
|
+
--[no-]strip-query Specifies whether the Agent will strip URI query (default: false)
|
60
|
+
--schemes=[http,https] Only spider links with certain scheme
|
61
|
+
--host=[example] Only spider links on certain host
|
62
|
+
--hosts=[example.com] Only spider links on certain hosts (ignored unless method is "start_at")
|
59
63
|
--ignore-hosts=[www.example.com]
|
60
|
-
Do not spider links on certain hosts
|
64
|
+
Do not spider links on certain hosts (ignored unless method is "start_at")
|
61
65
|
--ports=[80, 443] Only spider links on certain ports
|
62
66
|
--ignore-ports=[8000, 8080, 3000]
|
63
67
|
Do not spider links on certain ports
|
@@ -67,15 +71,15 @@ Usage: spidr [<method>] [options] <url>
|
|
67
71
|
--ignore-urls=[/blog/] Do not spider links on certain urls
|
68
72
|
--exts=[htm] Only spider links on certain extensions
|
69
73
|
--ignore-exts=[cfm] Do not spider links on certain extensions
|
70
|
-
--open-timeout=val
|
71
|
-
--read-timeout=val
|
72
|
-
--ssl-timeout=val
|
73
|
-
--continue-timeout=val
|
74
|
-
--keep-alive-timeout=val
|
74
|
+
--open-timeout=val Open timeout
|
75
|
+
--read-timeout=val Read timeout
|
76
|
+
--ssl-timeout=val SSL timeout
|
77
|
+
--continue-timeout=val Continue timeout
|
78
|
+
--keep-alive-timeout=val Keep alive timeout
|
75
79
|
--proxy-host=val The host the proxy is running on
|
76
80
|
--proxy-port=val The port the proxy is running on
|
77
|
-
--proxy-user=val The user to authenticate
|
78
|
-
--proxy-password=val The password to authenticate with
|
81
|
+
--proxy-user=val The user to authenticate with the proxy
|
82
|
+
--proxy-password=val The password to authenticate with the proxy
|
79
83
|
--default-headers=[key1=val1,key2=val2]
|
80
84
|
Default headers to set for every request
|
81
85
|
--host-header=val The HTTP Host header to use with each request
|
data/lib/spidr_cli.rb
CHANGED
data/lib/spidr_cli/options.rb
CHANGED
@@ -2,7 +2,7 @@ require 'optparse'
|
|
2
2
|
|
3
3
|
module SpidrCLI
|
4
4
|
class Options
|
5
|
-
# Spidr
|
5
|
+
# Spidr methods
|
6
6
|
METHODS = %w[site start_at host].map { |c| [c, c] }.to_h.freeze
|
7
7
|
|
8
8
|
attr_reader :url, :columns, :content_types, :header, :spidr_options, :usage_doc,
|
@@ -44,15 +44,33 @@ module SpidrCLI
|
|
44
44
|
@header = value
|
45
45
|
end
|
46
46
|
|
47
|
-
# Spidr::
|
47
|
+
# Spidr::Sanitizers options
|
48
|
+
parser.on('--[no-]strip-fragments', 'Specifies whether the Agent will strip URI fragments (default: true)') do |value|
|
49
|
+
spidr_options[:strip_fragments] = value
|
50
|
+
end
|
51
|
+
|
52
|
+
parser.on('--[no-]strip-query', 'Specifies whether the Agent will strip URI query (default: false)') do |value|
|
53
|
+
spidr_options[:strip_query] = value
|
54
|
+
end
|
55
|
+
|
56
|
+
# Spidr::Filters options
|
57
|
+
parser.on('--schemes=[http,https]', Array, 'Only spider links with certain scheme') do |value|
|
58
|
+
spidr_options[:schemes] = value if value
|
59
|
+
end
|
48
60
|
|
49
|
-
|
61
|
+
parser.on('--host=[example]', String, 'Only spider links on certain host') do |value|
|
62
|
+
spidr_options[:host] = value if value
|
63
|
+
end
|
64
|
+
|
65
|
+
# NOTE: --hosts is overriden
|
50
66
|
# @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
|
51
|
-
parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts') do |value|
|
67
|
+
parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts (ignored unless method is "start_at")') do |value|
|
52
68
|
spidr_options[:hosts] = to_option_regexp_array(value) if value
|
53
69
|
end
|
54
70
|
|
55
|
-
|
71
|
+
# NOTE: --ignore-hosts is overriden
|
72
|
+
# @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
|
73
|
+
parser.on('--ignore-hosts=[www.example.com]', Array, 'Do not spider links on certain hosts (ignored unless method is "start_at")') do |value|
|
56
74
|
spidr_options[:ignore_hosts] = to_option_regexp_array(value) if value
|
57
75
|
end
|
58
76
|
|
@@ -89,23 +107,23 @@ module SpidrCLI
|
|
89
107
|
end
|
90
108
|
|
91
109
|
# Spidr::Agent options
|
92
|
-
parser.on('--open-timeout=val', Integer, '
|
110
|
+
parser.on('--open-timeout=val', Integer, 'Open timeout') do |value|
|
93
111
|
spidr_options[:open_timeout] = value
|
94
112
|
end
|
95
113
|
|
96
|
-
parser.on('--read-timeout=val', Integer, '
|
114
|
+
parser.on('--read-timeout=val', Integer, 'Read timeout') do |value|
|
97
115
|
spidr_options[:read_timeout] = value
|
98
116
|
end
|
99
117
|
|
100
|
-
parser.on('--ssl-timeout=val', Integer, '
|
118
|
+
parser.on('--ssl-timeout=val', Integer, 'SSL timeout') do |value|
|
101
119
|
spidr_options[:ssl_timeout] = value
|
102
120
|
end
|
103
121
|
|
104
|
-
parser.on('--continue-timeout=val', Integer, '
|
122
|
+
parser.on('--continue-timeout=val', Integer, 'Continue timeout') do |value|
|
105
123
|
spidr_options[:continue_timeout] = value
|
106
124
|
end
|
107
125
|
|
108
|
-
parser.on('--keep-alive-timeout=val', Integer, '
|
126
|
+
parser.on('--keep-alive-timeout=val', Integer, 'Keep alive timeout') do |value|
|
109
127
|
spidr_options[:keep_alive_timeout] = value
|
110
128
|
end
|
111
129
|
|
@@ -117,11 +135,11 @@ module SpidrCLI
|
|
117
135
|
proxy_options[:port] = value
|
118
136
|
end
|
119
137
|
|
120
|
-
parser.on('--proxy-user=val', String, 'The user to authenticate
|
138
|
+
parser.on('--proxy-user=val', String, 'The user to authenticate with the proxy') do |value|
|
121
139
|
proxy_options[:user] = value
|
122
140
|
end
|
123
141
|
|
124
|
-
parser.on('--proxy-password=val', String, 'The password to authenticate with') do |value|
|
142
|
+
parser.on('--proxy-password=val', String, 'The password to authenticate with the proxy') do |value|
|
125
143
|
proxy_options[:password] = value
|
126
144
|
end
|
127
145
|
|
@@ -176,11 +194,16 @@ module SpidrCLI
|
|
176
194
|
end
|
177
195
|
|
178
196
|
parser.on_tail('--version', 'Show version') do
|
179
|
-
puts "Spidr version #{Spidr::VERSION}"
|
197
|
+
puts "Spidr version #{Spidr::VERSION} (SpidrCLI version #{SpidrCLI::VERSION})"
|
180
198
|
exit
|
181
199
|
end
|
182
200
|
end.parse!
|
183
201
|
|
202
|
+
if @spidr_method != 'start_at' &&
|
203
|
+
(spidr_options.key?(:hosts) || spidr_options.key?(:ignore_hosts))
|
204
|
+
raise(ArgumentError, '--hosts and --ignore-hosts argument are only valid if spidr method is "start_at"')
|
205
|
+
end
|
206
|
+
|
184
207
|
spidr_options[:proxy] = proxy_options unless proxy_options.empty?
|
185
208
|
|
186
209
|
@url = argv.last
|
data/lib/spidr_cli/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr_cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jacob Burenstam
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|