spidr_cli 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 122d1ee2879d8333bae8fda51cbaa5adcc0a3810344a925593cfdc9dfc459916
4
- data.tar.gz: 7f5471d05f5b977b531fccca29b07f3c5278b1b1b23e2c625f1ae832c0b57481
3
+ metadata.gz: e52c41017020452ca4bce9efdf8a4925941f2dd926fe9a896af32e5c02ba0350
4
+ data.tar.gz: 9007ef979a5d62185cdd7a9656921f53990946f997c0bb7535ae2276212ce60d
5
5
  SHA512:
6
- metadata.gz: 4dfc79d60c5f5a6a3dd119eee44af28f31d92b915fe4ebe4fcb07a0a00781e111e260adacae6d0bb860223902478f6550e4ba39e6f82c1865237bf1f5a8de939
7
- data.tar.gz: c577c737be56e1b1009a343354bef7aae2fc2dc3e1a829a358430e54772623d252ae24080a1800370f2c670a04c6f55598e91cd462e64ab81709682104cfaffb
6
+ metadata.gz: 9c80f0fbd9031f9d9c44192f610b7b5147411d53c7195b11dec733502299743fb760277e938653fb1e9cd64a8d10ea09e2fff4622262d0d022a512cc5bf5df1b
7
+ data.tar.gz: 2479ae42d872fc2541ccd938c01661ccabd11ebf3a195733851db5ede769eac14f478c869426757036b7e43d2ce2aec183f550c4c91b814bc3040c4754da2a0b
data/README.md CHANGED
@@ -55,9 +55,13 @@ Usage: spidr [<method>] [options] <url>
55
55
  --columns=[val1,val2] Columns in output
56
56
  --content-types=[val1,val2] Formats to output (html, javascript, css, json, ..)
57
57
  --[no-]header Include the header
58
- --hosts=[example.com] Only spider links on certain hosts
58
+ --[no-]strip-fragments Specifies whether the Agent will strip URI fragments (default: true)
59
+ --[no-]strip-query Specifies whether the Agent will strip URI query (default: false)
60
+ --schemes=[http,https] Only spider links with certain scheme
61
+ --host=[example] Only spider links on certain host
62
+ --hosts=[example.com] Only spider links on certain hosts (ignored unless method is "start_at")
59
63
  --ignore-hosts=[www.example.com]
60
- Do not spider links on certain hosts
64
+ Do not spider links on certain hosts (ignored unless method is "start_at")
61
65
  --ports=[80, 443] Only spider links on certain ports
62
66
  --ignore-ports=[8000, 8080, 3000]
63
67
  Do not spider links on certain ports
@@ -67,15 +71,15 @@ Usage: spidr [<method>] [options] <url>
67
71
  --ignore-urls=[/blog/] Do not spider links on certain urls
68
72
  --exts=[htm] Only spider links on certain extensions
69
73
  --ignore-exts=[cfm] Do not spider links on certain extensions
70
- --open-timeout=val Optional open timeout
71
- --read-timeout=val Optional read timeout
72
- --ssl-timeout=val Optional ssl timeout
73
- --continue-timeout=val Optional continue timeout
74
- --keep-alive-timeout=val Optional keep_alive timeout
74
+ --open-timeout=val Open timeout
75
+ --read-timeout=val Read timeout
76
+ --ssl-timeout=val SSL timeout
77
+ --continue-timeout=val Continue timeout
78
+ --keep-alive-timeout=val Keep alive timeout
75
79
  --proxy-host=val The host the proxy is running on
76
80
  --proxy-port=val The port the proxy is running on
77
- --proxy-user=val The user to authenticate as with the proxy
78
- --proxy-password=val The password to authenticate with
81
+ --proxy-user=val The user to authenticate with the proxy
82
+ --proxy-password=val The password to authenticate with the proxy
79
83
  --default-headers=[key1=val1,key2=val2]
80
84
  Default headers to set for every request
81
85
  --host-header=val The HTTP Host header to use with each request
@@ -1,3 +1,4 @@
1
+ require 'spidr'
1
2
  require 'spidr_cli/version'
2
3
  require 'spidr_cli/options'
3
4
 
@@ -2,7 +2,7 @@ require 'optparse'
2
2
 
3
3
  module SpidrCLI
4
4
  class Options
5
- # Spidr method
5
+ # Spidr methods
6
6
  METHODS = %w[site start_at host].map { |c| [c, c] }.to_h.freeze
7
7
 
8
8
  attr_reader :url, :columns, :content_types, :header, :spidr_options, :usage_doc,
@@ -44,15 +44,33 @@ module SpidrCLI
44
44
  @header = value
45
45
  end
46
46
 
47
- # Spidr::Rules options
47
+ # Spidr::Sanitizers options
48
+ parser.on('--[no-]strip-fragments', 'Specifies whether the Agent will strip URI fragments (default: true)') do |value|
49
+ spidr_options[:strip_fragments] = value
50
+ end
51
+
52
+ parser.on('--[no-]strip-query', 'Specifies whether the Agent will strip URI query (default: false)') do |value|
53
+ spidr_options[:strip_query] = value
54
+ end
55
+
56
+ # Spidr::Filters options
57
+ parser.on('--schemes=[http,https]', Array, 'Only spider links with certain scheme') do |value|
58
+ spidr_options[:schemes] = value if value
59
+ end
48
60
 
49
- # NOTE: --hosts and --ignore-hosts are overriden when using Spidr::site
61
+ parser.on('--host=[example]', String, 'Only spider links on certain host') do |value|
62
+ spidr_options[:host] = value if value
63
+ end
64
+
65
+ # NOTE: --hosts is overriden
50
66
  # @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
51
- parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts') do |value|
67
+ parser.on('--hosts=[example.com]', Array, 'Only spider links on certain hosts (ignored unless method is "start_at")') do |value|
52
68
  spidr_options[:hosts] = to_option_regexp_array(value) if value
53
69
  end
54
70
 
55
- parser.on('--ignore-hosts=[www.example.com]', Array, 'Do not spider links on certain hosts') do |value|
71
+ # NOTE: --ignore-hosts is overriden
72
+ # @see https://github.com/postmodern/spidr/blob/master/lib/spidr/agent.rb#L273
73
+ parser.on('--ignore-hosts=[www.example.com]', Array, 'Do not spider links on certain hosts (ignored unless method is "start_at")') do |value|
56
74
  spidr_options[:ignore_hosts] = to_option_regexp_array(value) if value
57
75
  end
58
76
 
@@ -89,23 +107,23 @@ module SpidrCLI
89
107
  end
90
108
 
91
109
  # Spidr::Agent options
92
- parser.on('--open-timeout=val', Integer, 'Optional open timeout') do |value|
110
+ parser.on('--open-timeout=val', Integer, 'Open timeout') do |value|
93
111
  spidr_options[:open_timeout] = value
94
112
  end
95
113
 
96
- parser.on('--read-timeout=val', Integer, 'Optional read timeout') do |value|
114
+ parser.on('--read-timeout=val', Integer, 'Read timeout') do |value|
97
115
  spidr_options[:read_timeout] = value
98
116
  end
99
117
 
100
- parser.on('--ssl-timeout=val', Integer, 'Optional ssl timeout') do |value|
118
+ parser.on('--ssl-timeout=val', Integer, 'SSL timeout') do |value|
101
119
  spidr_options[:ssl_timeout] = value
102
120
  end
103
121
 
104
- parser.on('--continue-timeout=val', Integer, 'Optional continue timeout') do |value|
122
+ parser.on('--continue-timeout=val', Integer, 'Continue timeout') do |value|
105
123
  spidr_options[:continue_timeout] = value
106
124
  end
107
125
 
108
- parser.on('--keep-alive-timeout=val', Integer, 'Optional keep_alive timeout') do |value|
126
+ parser.on('--keep-alive-timeout=val', Integer, 'Keep alive timeout') do |value|
109
127
  spidr_options[:keep_alive_timeout] = value
110
128
  end
111
129
 
@@ -117,11 +135,11 @@ module SpidrCLI
117
135
  proxy_options[:port] = value
118
136
  end
119
137
 
120
- parser.on('--proxy-user=val', String, 'The user to authenticate as with the proxy') do |value|
138
+ parser.on('--proxy-user=val', String, 'The user to authenticate with the proxy') do |value|
121
139
  proxy_options[:user] = value
122
140
  end
123
141
 
124
- parser.on('--proxy-password=val', String, 'The password to authenticate with') do |value|
142
+ parser.on('--proxy-password=val', String, 'The password to authenticate with the proxy') do |value|
125
143
  proxy_options[:password] = value
126
144
  end
127
145
 
@@ -176,11 +194,16 @@ module SpidrCLI
176
194
  end
177
195
 
178
196
  parser.on_tail('--version', 'Show version') do
179
- puts "Spidr version #{Spidr::VERSION}"
197
+ puts "Spidr version #{Spidr::VERSION} (SpidrCLI version #{SpidrCLI::VERSION})"
180
198
  exit
181
199
  end
182
200
  end.parse!
183
201
 
202
+ if @spidr_method != 'start_at' &&
203
+ (spidr_options.key?(:hosts) || spidr_options.key?(:ignore_hosts))
204
+ raise(ArgumentError, '--hosts and --ignore-hosts argument are only valid if spidr method is "start_at"')
205
+ end
206
+
184
207
  spidr_options[:proxy] = proxy_options unless proxy_options.empty?
185
208
 
186
209
  @url = argv.last
@@ -1,3 +1,3 @@
1
1
  module SpidrCLI
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.3.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spidr_cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jacob Burenstam
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-06-30 00:00:00.000000000 Z
11
+ date: 2018-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr