spidr_cli 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 122d1ee2879d8333bae8fda51cbaa5adcc0a3810344a925593cfdc9dfc459916
4
- data.tar.gz: 7f5471d05f5b977b531fccca29b07f3c5278b1b1b23e2c625f1ae832c0b57481
3
+ metadata.gz: e52c41017020452ca4bce9efdf8a4925941f2dd926fe9a896af32e5c02ba0350
4
+ data.tar.gz: 9007ef979a5d62185cdd7a9656921f53990946f997c0bb7535ae2276212ce60d
5
5
  SHA512:
6
- metadata.gz: 4dfc79d60c5f5a6a3dd119eee44af28f31d92b915fe4ebe4fcb07a0a00781e111e260adacae6d0bb860223902478f6550e4ba39e6f82c1865237bf1f5a8de939
7
- data.tar.gz: c577c737be56e1b1009a343354bef7aae2fc2dc3e1a829a358430e54772623d252ae24080a1800370f2c670a04c6f55598e91cd462e64ab81709682104cfaffb
6
+ metadata.gz: 9c80f0fbd9031f9d9c44192f610b7b5147411d53c7195b11dec733502299743fb760277e938653fb1e9cd64a8d10ea09e2fff4622262d0d022a512cc5bf5df1b
7
+ data.tar.gz: 2479ae42d872fc2541ccd938c01661ccabd11ebf3a195733851db5ede769eac14f478c869426757036b7e43d2ce2aec183f550c4c91b814bc3040c4754da2a0b
data/README.md CHANGED
@@ -55,9 +55,13 @@ Usage: spidr [<method>] [options] <url>
55
55
  --columns=[val1,val2] Columns in output
56
56
  --content-types=[val1,val2] Formats to output (html, javascript, css, json, ..)
57
57
  --[no-]header Include the header
58
- --hosts=[example.com] Only spider links on certain hosts
58
+ --[no-]strip-fragments Specifies whether the Agent will strip URI fragments (default: true)
59
+ --[no-]strip-query Specifies whether the Agent will strip URI query (default: false)
60
+ --schemes=[http,https] Only spider links with certain scheme
61
+ --host=[example] Only spider links on certain host
62
+ --hosts=[example.com] Only spider links on certain hosts (ignored unless method is "start_at")
59
63
  --ignore-hosts=[www.example.com]
60
- Do not spider links on certain hosts
64
+ Do not spider links on certain hosts (ignored unless method is "start_at")
61
65
  --ports=[80, 443] Only spider links on certain ports
62
66
  --ignore-ports=[8000, 8080, 3000]
63
67
  Do not spider links on certain ports
@@ -67,15 +71,15 @@ Usage: spidr [<method>] [options] <url>
67
71
  --ignore-urls=[/blog/] Do not spider links on certain urls
68
72
  --exts=[htm] Only spider links on certain extensions
69
73
  --ignore-exts=[cfm] Do not spider links on certain extensions
70
- --open-timeout=val Optional open timeout
71
- --read-timeout=val Optional read timeout
72
- --ssl-timeout=val Optional ssl timeout
73
- --continue-timeout=val Optional continue timeout
74
- --keep-alive-timeout=val Optional keep_alive timeout
74
+ --open-timeout=val Open timeout
75
+ --read-timeout=val Read timeout
76
+ --ssl-timeout=val SSL timeout
77
+ --continue-timeout=val Continue timeout
78
+ --keep-alive-timeout=val Keep alive timeout
75
79
  --proxy-host=val The host the proxy is running on
76
80
  --proxy-port=val The port the proxy is running on
77
- --proxy-user=val The user to authenticate as with the proxy
78
- --proxy-password=val The password to authenticate with
81
+ --proxy-user=val The user to authenticate with the proxy
82
+ --proxy-password=val The password to authenticate with the proxy
79
83
  --default-headers=[key1=val1,key2=val2]
80
84
  Default headers to set for every request
81
85
  --host-header=val The HTTP Host header to use with each request
@@ -1,3 +1,4 @@
1
+ require 'spidr'
1
2
  require 'spidr_cli/version'
2
3
  require 'spidr_cli/options'
3
4
 
@@ -2,7 +2,7 @@ require 'optparse'
2
2
 
3
3
  module SpidrCLI
4
4
  class Options
5
- # Spidr method
5
+ # Spidr methods
6
6
  METHODS = %w[site start_at host].map { |c| [c, c] }.to_h.freeze
7
7
 
8
8
  attr_reader :url, :columns, :content_types, :header, :spidr_options, :usage_doc,
@@ -44,15 +44,33 @@ module SpidrCLI
44
44
  @header = value
45
45
  end
46
46
 
47
- # Spidr::Rules options
47
+ # Spidr::Sanitizers options
48
+ parser.on('--[no-]strip-fragments', 'Specifies whether the Agent will strip URI fragments (default: true)') do |value|
49
+ spidr_options[:strip_fragments] = value
50
+ end
51
+
52
+ parser.on('--[no-]strip-query', 'Specifies whether the Agent will strip URI query (default: false)') do |value|
53
+ spidr_options[:strip_query] = value
54
+ end
55
+
56
+ # Spidr::Filters options
57
+ parser.on('--schemes=[http,https]', Array, 'Only spider links with certain scheme') do |value|
58
+ spidr_options[:schemes] = value if value
59
+ end
48
60
 
49
- # NOTE: --hosts and --ignore-hosts are overriden when using Spidr::site
61
+ parser.on('--host=[example]', String, 'Only spider links on certain host') do |value|
62
+ spidr_options[:host] = value if value
63
+ end
64
+
65
+ # NOTE: --hosts is overriden
50
66
  # @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
51
- parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts') do |value|
67
+ parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts (ignored unless method is "start_at")') do |value|
52
68
  spidr_options[:hosts] = to_option_regexp_array(value) if value
53
69
  end
54
70
 
55
- parser.on('--ignore-hosts=[www.example.com]', Array, 'Do not spider links on certain hosts') do |value|
71
+ # NOTE: --ignore-hosts is overriden
72
+ # @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
73
+ parser.on('--ignore-hosts=[www.example.com]', Array, 'Do not spider links on certain hosts (ignored unless method is "start_at")') do |value|
56
74
  spidr_options[:ignore_hosts] = to_option_regexp_array(value) if value
57
75
  end
58
76
 
@@ -89,23 +107,23 @@ module SpidrCLI
89
107
  end
90
108
 
91
109
  # Spidr::Agent options
92
- parser.on('--open-timeout=val', Integer, 'Optional open timeout') do |value|
110
+ parser.on('--open-timeout=val', Integer, 'Open timeout') do |value|
93
111
  spidr_options[:open_timeout] = value
94
112
  end
95
113
 
96
- parser.on('--read-timeout=val', Integer, 'Optional read timeout') do |value|
114
+ parser.on('--read-timeout=val', Integer, 'Read timeout') do |value|
97
115
  spidr_options[:read_timeout] = value
98
116
  end
99
117
 
100
- parser.on('--ssl-timeout=val', Integer, 'Optional ssl timeout') do |value|
118
+ parser.on('--ssl-timeout=val', Integer, 'SSL timeout') do |value|
101
119
  spidr_options[:ssl_timeout] = value
102
120
  end
103
121
 
104
- parser.on('--continue-timeout=val', Integer, 'Optional continue timeout') do |value|
122
+ parser.on('--continue-timeout=val', Integer, 'Continue timeout') do |value|
105
123
  spidr_options[:continue_timeout] = value
106
124
  end
107
125
 
108
- parser.on('--keep-alive-timeout=val', Integer, 'Optional keep_alive timeout') do |value|
126
+ parser.on('--keep-alive-timeout=val', Integer, 'Keep alive timeout') do |value|
109
127
  spidr_options[:keep_alive_timeout] = value
110
128
  end
111
129
 
@@ -117,11 +135,11 @@ module SpidrCLI
117
135
  proxy_options[:port] = value
118
136
  end
119
137
 
120
- parser.on('--proxy-user=val', String, 'The user to authenticate as with the proxy') do |value|
138
+ parser.on('--proxy-user=val', String, 'The user to authenticate with the proxy') do |value|
121
139
  proxy_options[:user] = value
122
140
  end
123
141
 
124
- parser.on('--proxy-password=val', String, 'The password to authenticate with') do |value|
142
+ parser.on('--proxy-password=val', String, 'The password to authenticate with the proxy') do |value|
125
143
  proxy_options[:password] = value
126
144
  end
127
145
 
@@ -176,11 +194,16 @@ module SpidrCLI
176
194
  end
177
195
 
178
196
  parser.on_tail('--version', 'Show version') do
179
- puts "Spidr version #{Spidr::VERSION}"
197
+ puts "Spidr version #{Spidr::VERSION} (SpidrCLI version #{SpidrCLI::VERSION})"
180
198
  exit
181
199
  end
182
200
  end.parse!
183
201
 
202
+ if @spidr_method != 'start_at' &&
203
+ (spidr_options.key?(:hosts) || spidr_options.key?(:ignore_hosts))
204
+ raise(ArgumentError, '--hosts and --ignore-hosts argument are only valid if spidr method is "start_at"')
205
+ end
206
+
184
207
  spidr_options[:proxy] = proxy_options unless proxy_options.empty?
185
208
 
186
209
  @url = argv.last
@@ -1,3 +1,3 @@
1
1
  module SpidrCLI
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr_cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-06-30 00:00:00.000000000 Z
11
+ date: 2018-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr