archive_lister 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ data/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in archive_lister.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Russell Garner
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,53 @@
1
+ # ArchiveLister
2
+
3
+ Queries the Wayback Machine for URLs associated with a given root
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'archive_lister'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install archive_lister
18
+
19
+ ## Usage
20
+
21
+ Has a bin 'arcl', usage:
22
+
23
+ ```
24
+ arcl version 0.0.1
25
+ Options:
26
+ --input-file, -i <s>: File from which to draw the urls
27
+ --output-dir, -o <s>: Dir to output files (requires --input-file)
28
+ --skip-existing, -s: Skip existing files when in batch
29
+ --verbose, -v: Print failures at finish
30
+ --version, -e: Print version and exit
31
+ --help, -h: Show this message
32
+ ```
33
+
34
+ ### Examples
35
+
36
+ ```shell
37
+ arcl http://somewhere.com
38
+ ```
39
+
40
+ ### Batch
41
+
42
+ ```shell
43
+ arcl -i urls.txt -o data/wayback -v -s
44
+ ```
45
+
46
+
47
+ ## Contributing
48
+
49
+ 1. Fork it
50
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
51
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
52
+ 4. Push to the branch (`git push origin my-new-feature`)
53
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'archive_lister/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'archive_lister'
8
+ gem.version = ArchiveLister::VERSION
9
+ gem.authors = ['Russell Garner']
10
+ gem.email = ['rgarner@zephyros-systems.co.uk']
11
+ gem.description = %q{Ask archives about URLs}
12
+ gem.summary = %q{Ask Wayback / TNA for URLs}
13
+ gem.homepage = 'https://github.com/rgarner/archive_lister'
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ['lib']
19
+
20
+ gem.add_dependency 'nokogiri'
21
+ gem.add_dependency 'addressable'
22
+
23
+ gem.add_development_dependency 'rspec'
24
+ gem.add_development_dependency 'rake'
25
+ end
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
4
+
5
+ require 'trollop'
6
+ require 'archive_lister'
7
+
8
+ @options = Trollop::options do
9
+ version "arcl version #{ArchiveLister::VERSION}"
10
+
11
+ opt :input_file, 'File from which to draw the urls', type: String
12
+ opt :output_dir, 'Dir to output files (requires --input-file)', type: String
13
+ opt :skip_existing, 'Skip existing files when in batch', type: TrueClass
14
+ opt :verbose, 'Print failures at finish', type: TrueClass
15
+ end
16
+
17
+ mode = (@options[:input_file] && @options[:output_dir]) ? :batch : :single
18
+
19
+ case mode
20
+ when :single
21
+ begin
22
+ ArchiveLister.list(ARGV[0]).each do |url|
23
+ puts url
24
+ end
25
+ rescue ArchiveLister::HttpError => e
26
+ puts e
27
+ end
28
+ when :batch
29
+ ArchiveLister.batch(
30
+ @options[:input_file],
31
+ @options[:output_dir],
32
+ @options.select { |k, _| [:skip_existing, :verbose].include?(k) }
33
+ )
34
+ end
35
+
36
+
@@ -0,0 +1,72 @@
1
+ require 'archive_lister/version'
2
+
3
+ require 'addressable/uri'
4
+ require 'net/http'
5
+ require 'nokogiri'
6
+
7
+ require 'archive_lister/wayback_file'
8
+
9
+ module ArchiveLister
10
+ class HttpError < RuntimeError
11
+ attr_reader :uri, :response
12
+
13
+ def initialize(uri, response)
14
+ @uri = uri
15
+ @response = response
16
+ end
17
+
18
+ def to_s
19
+ "#{uri}\t#{response}"
20
+ end
21
+ end
22
+
23
+ WAYBACK_FORMAT = 'http://wayback.archive.org/web/*/#SITE#/*'
24
+
25
+ def self.list(url)
26
+ query_uri = url.is_a?(URI) ? url : URI.parse(url)
27
+ query_uri.query = nil
28
+
29
+ wayback_uri = URI(WAYBACK_FORMAT.sub('#SITE#', query_uri.to_s))
30
+
31
+ # Poor man's one-level redirect
32
+ response = Net::HTTP.get_response(wayback_uri)
33
+ if response.is_a?(Net::HTTPRedirection)
34
+ response = Net::HTTP.get_response(URI(response.header['location']))
35
+ end
36
+
37
+ unless response.is_a?(Net::HTTPSuccess)
38
+ raise HttpError.new(wayback_uri, response)
39
+ end
40
+
41
+ WaybackFile.parse(response.body).urls
42
+ end
43
+
44
+ def self.batch(url_filename, output_dir, options = {})
45
+ successes, skipped, failures = 0, 0, []
46
+
47
+ File.read(url_filename).each_line do |url|
48
+ normalised_url = url.sub(/(\n$)|(_$)|(\/\n$)/, '')
49
+ url = Addressable::URI.parse(normalised_url)
50
+ output_filename = File.join(output_dir, "#{url.host}#{url.path.gsub('/', '_')}").chomp
51
+
52
+ File.delete(output_filename) if File.exist?(output_filename) && File.zero?(output_filename)
53
+ skipping = options[:skip_existing] && File.exist?(output_filename)
54
+ puts "#{url}#{skipping ? ' -- Skipping' : ''}"
55
+ skipped += 1 and next if skipping
56
+
57
+ File.open(output_filename, 'w') do |file|
58
+ begin
59
+ urls = ArchiveLister.list(url)
60
+ urls.each { |url| file.puts url.to_s }
61
+ successes += 1
62
+ rescue HttpError => e
63
+ failures << e
64
+ File.delete(output_filename)
65
+ end
66
+ end
67
+ end
68
+
69
+ puts "#{successes} successes, #{failures.length} failures, #{skipped} skipped"
70
+ failures.each { |e| puts e } if options[:verbose]
71
+ end
72
+ end
@@ -0,0 +1,3 @@
1
+ module ArchiveLister
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,21 @@
1
+ module ArchiveLister
2
+ class WaybackFile
3
+ attr_reader :urls
4
+
5
+ def initialize(urls)
6
+ @urls = urls
7
+ end
8
+
9
+ def self.parse(content)
10
+ doc = Nokogiri::HTML(content)
11
+ WaybackFile.new(
12
+ doc.css('td.url a').map do |url_node|
13
+ Addressable::URI.parse(url_node.text).tap do |url|
14
+ url.port = nil if (url.port == 80 && url.scheme == 'http')
15
+ url.port = nil if (url.port == 443 && url.scheme == 'https')
16
+ end
17
+ end
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,782 @@
1
+ ## lib/trollop.rb -- trollop command-line processing library
2
+ ## Author:: William Morgan (mailto: wmorgan-trollop@masanjin.net)
3
+ ## Copyright:: Copyright 2007 William Morgan
4
+ ## License:: the same terms as ruby itself
5
+
6
+ require 'date'
7
+
8
+ module Trollop
9
+
10
+ VERSION = "2.0"
11
+
12
+ ## Thrown by Parser in the event of a commandline error. Not needed if
13
+ ## you're using the Trollop::options entry.
14
+ class CommandlineError < StandardError; end
15
+
16
+ ## Thrown by Parser if the user passes in '-h' or '--help'. Handled
17
+ ## automatically by Trollop#options.
18
+ class HelpNeeded < StandardError; end
19
+
20
+ ## Thrown by Parser if the user passes in '-h' or '--version'. Handled
21
+ ## automatically by Trollop#options.
22
+ class VersionNeeded < StandardError; end
23
+
24
+ ## Regex for floating point numbers
25
+ FLOAT_RE = /^-?((\d+(\.\d+)?)|(\.\d+))([eE][-+]?[\d]+)?$/
26
+
27
+ ## Regex for parameters
28
+ PARAM_RE = /^-(-|\.$|[^\d\.])/
29
+
30
+ ## The commandline parser. In typical usage, the methods in this class
31
+ ## will be handled internally by Trollop::options. In this case, only the
32
+ ## #opt, #banner and #version, #depends, and #conflicts methods will
33
+ ## typically be called.
34
+ ##
35
+ ## If you want to instantiate this class yourself (for more complicated
36
+ ## argument-parsing logic), call #parse to actually produce the output hash,
37
+ ## and consider calling it from within
38
+ ## Trollop::with_standard_exception_handling.
39
+ class Parser
40
+
41
+ ## The set of values that indicate a flag option when passed as the
42
+ ## +:type+ parameter of #opt.
43
+ FLAG_TYPES = [:flag, :bool, :boolean]
44
+
45
+ ## The set of values that indicate a single-parameter (normal) option when
46
+ ## passed as the +:type+ parameter of #opt.
47
+ ##
48
+ ## A value of +io+ corresponds to a readable IO resource, including
49
+ ## a filename, URI, or the strings 'stdin' or '-'.
50
+ SINGLE_ARG_TYPES = [:int, :integer, :string, :double, :float, :io, :date]
51
+
52
+ ## The set of values that indicate a multiple-parameter option (i.e., that
53
+ ## takes multiple space-separated values on the commandline) when passed as
54
+ ## the +:type+ parameter of #opt.
55
+ MULTI_ARG_TYPES = [:ints, :integers, :strings, :doubles, :floats, :ios, :dates]
56
+
57
+ ## The complete set of legal values for the +:type+ parameter of #opt.
58
+ TYPES = FLAG_TYPES + SINGLE_ARG_TYPES + MULTI_ARG_TYPES
59
+
60
+ INVALID_SHORT_ARG_REGEX = /[\d-]/ #:nodoc:
61
+
62
+ ## The values from the commandline that were not interpreted by #parse.
63
+ attr_reader :leftovers
64
+
65
+ ## The complete configuration hashes for each option. (Mainly useful
66
+ ## for testing.)
67
+ attr_reader :specs
68
+
69
+ ## Initializes the parser, and instance-evaluates any block given.
70
+ def initialize *a, &b
71
+ @version = nil
72
+ @leftovers = []
73
+ @specs = {}
74
+ @long = {}
75
+ @short = {}
76
+ @order = []
77
+ @constraints = []
78
+ @stop_words = []
79
+ @stop_on_unknown = false
80
+
81
+ #instance_eval(&b) if b # can't take arguments
82
+ cloaker(&b).bind(self).call(*a) if b
83
+ end
84
+
85
+ ## Define an option. +name+ is the option name, a unique identifier
86
+ ## for the option that you will use internally, which should be a
87
+ ## symbol or a string. +desc+ is a string description which will be
88
+ ## displayed in help messages.
89
+ ##
90
+ ## Takes the following optional arguments:
91
+ ##
92
+ ## [+:long+] Specify the long form of the argument, i.e. the form with two dashes. If unspecified, will be automatically derived based on the argument name by turning the +name+ option into a string, and replacing any _'s by -'s.
93
+ ## [+:short+] Specify the short form of the argument, i.e. the form with one dash. If unspecified, will be automatically derived from +name+.
94
+ ## [+:type+] Require that the argument take a parameter or parameters of type +type+. For a single parameter, the value can be a member of +SINGLE_ARG_TYPES+, or a corresponding Ruby class (e.g. +Integer+ for +:int+). For multiple-argument parameters, the value can be any member of +MULTI_ARG_TYPES+ constant. If unset, the default argument type is +:flag+, meaning that the argument does not take a parameter. The specification of +:type+ is not necessary if a +:default+ is given.
95
+ ## [+:default+] Set the default value for an argument. Without a default value, the hash returned by #parse (and thus Trollop::options) will have a +nil+ value for this key unless the argument is given on the commandline. The argument type is derived automatically from the class of the default value given, so specifying a +:type+ is not necessary if a +:default+ is given. (But see below for an important caveat when +:multi+: is specified too.) If the argument is a flag, and the default is set to +true+, then if it is specified on the the commandline the value will be +false+.
96
+ ## [+:required+] If set to +true+, the argument must be provided on the commandline.
97
+ ## [+:multi+] If set to +true+, allows multiple occurrences of the option on the commandline. Otherwise, only a single instance of the option is allowed. (Note that this is different from taking multiple parameters. See below.)
98
+ ##
99
+ ## Note that there are two types of argument multiplicity: an argument
100
+ ## can take multiple values, e.g. "--arg 1 2 3". An argument can also
101
+ ## be allowed to occur multiple times, e.g. "--arg 1 --arg 2".
102
+ ##
103
+ ## Arguments that take multiple values should have a +:type+ parameter
104
+ ## drawn from +MULTI_ARG_TYPES+ (e.g. +:strings+), or a +:default:+
105
+ ## value of an array of the correct type (e.g. [String]). The
106
+ ## value of this argument will be an array of the parameters on the
107
+ ## commandline.
108
+ ##
109
+ ## Arguments that can occur multiple times should be marked with
110
+ ## +:multi+ => +true+. The value of this argument will also be an array.
111
+ ## In contrast with regular non-multi options, if not specified on
112
+ ## the commandline, the default value will be [], not nil.
113
+ ##
114
+ ## These two attributes can be combined (e.g. +:type+ => +:strings+,
115
+ ## +:multi+ => +true+), in which case the value of the argument will be
116
+ ## an array of arrays.
117
+ ##
118
+ ## There's one ambiguous case to be aware of: when +:multi+: is true and a
119
+ ## +:default+ is set to an array (of something), it's ambiguous whether this
120
+ ## is a multi-value argument as well as a multi-occurrence argument.
121
+ ## In thise case, Trollop assumes that it's not a multi-value argument.
122
+ ## If you want a multi-value, multi-occurrence argument with a default
123
+ ## value, you must specify +:type+ as well.
124
+
125
+ def opt name, desc="", opts={}
126
+ raise ArgumentError, "you already have an argument named '#{name}'" if @specs.member? name
127
+
128
+ ## fill in :type
129
+ opts[:type] = # normalize
130
+ case opts[:type]
131
+ when :boolean, :bool; :flag
132
+ when :integer; :int
133
+ when :integers; :ints
134
+ when :double; :float
135
+ when :doubles; :floats
136
+ when Class
137
+ case opts[:type].name
138
+ when 'TrueClass', 'FalseClass'; :flag
139
+ when 'String'; :string
140
+ when 'Integer'; :int
141
+ when 'Float'; :float
142
+ when 'IO'; :io
143
+ when 'Date'; :date
144
+ else
145
+ raise ArgumentError, "unsupported argument type '#{opts[:type].class.name}'"
146
+ end
147
+ when nil; nil
148
+ else
149
+ raise ArgumentError, "unsupported argument type '#{opts[:type]}'" unless TYPES.include?(opts[:type])
150
+ opts[:type]
151
+ end
152
+
153
+ ## for options with :multi => true, an array default doesn't imply
154
+ ## a multi-valued argument. for that you have to specify a :type
155
+ ## as well. (this is how we disambiguate an ambiguous situation;
156
+ ## see the docs for Parser#opt for details.)
157
+ disambiguated_default = if opts[:multi] && opts[:default].is_a?(Array) && !opts[:type]
158
+ opts[:default].first
159
+ else
160
+ opts[:default]
161
+ end
162
+
163
+ type_from_default =
164
+ case disambiguated_default
165
+ when Integer; :int
166
+ when Numeric; :float
167
+ when TrueClass, FalseClass; :flag
168
+ when String; :string
169
+ when IO; :io
170
+ when Date; :date
171
+ when Array
172
+ if opts[:default].empty?
173
+ raise ArgumentError, "multiple argument type cannot be deduced from an empty array for '#{opts[:default][0].class.name}'"
174
+ end
175
+ case opts[:default][0] # the first element determines the types
176
+ when Integer; :ints
177
+ when Numeric; :floats
178
+ when String; :strings
179
+ when IO; :ios
180
+ when Date; :dates
181
+ else
182
+ raise ArgumentError, "unsupported multiple argument type '#{opts[:default][0].class.name}'"
183
+ end
184
+ when nil; nil
185
+ else
186
+ raise ArgumentError, "unsupported argument type '#{opts[:default].class.name}'"
187
+ end
188
+
189
+ raise ArgumentError, ":type specification and default type don't match (default type is #{type_from_default})" if opts[:type] && type_from_default && opts[:type] != type_from_default
190
+
191
+ opts[:type] = opts[:type] || type_from_default || :flag
192
+
193
+ ## fill in :long
194
+ opts[:long] = opts[:long] ? opts[:long].to_s : name.to_s.gsub("_", "-")
195
+ opts[:long] = case opts[:long]
196
+ when /^--([^-].*)$/; $1
197
+ when /^[^-]/; opts[:long]
198
+ else; raise ArgumentError, "invalid long option name #{opts[:long].inspect}"
199
+ end
200
+ raise ArgumentError, "long option name #{opts[:long].inspect} is already taken; please specify a (different) :long" if @long[opts[:long]]
201
+
202
+ ## fill in :short
203
+ opts[:short] = opts[:short].to_s if opts[:short] unless opts[:short] == :none
204
+ opts[:short] = case opts[:short]
205
+ when /^-(.)$/; $1
206
+ when nil, :none, /^.$/; opts[:short]
207
+ else raise ArgumentError, "invalid short option name '#{opts[:short].inspect}'"
208
+ end
209
+
210
+ if opts[:short]
211
+ raise ArgumentError, "short option name #{opts[:short].inspect} is already taken; please specify a (different) :short" if @short[opts[:short]]
212
+ raise ArgumentError, "a short option name can't be a number or a dash" if opts[:short] =~ INVALID_SHORT_ARG_REGEX
213
+ end
214
+
215
+ ## fill in :default for flags
216
+ opts[:default] = false if opts[:type] == :flag && opts[:default].nil?
217
+
218
+ ## autobox :default for :multi (multi-occurrence) arguments
219
+ opts[:default] = [opts[:default]] if opts[:default] && opts[:multi] && !opts[:default].is_a?(Array)
220
+
221
+ ## fill in :multi
222
+ opts[:multi] ||= false
223
+
224
+ opts[:desc] ||= desc
225
+ @long[opts[:long]] = name
226
+ @short[opts[:short]] = name if opts[:short] && opts[:short] != :none
227
+ @specs[name] = opts
228
+ @order << [:opt, name]
229
+ end
230
+
231
+ ## Sets the version string. If set, the user can request the version
232
+ ## on the commandline. Should probably be of the form "<program name>
233
+ ## <version number>".
234
+ def version s=nil; @version = s if s; @version end
235
+
236
+ ## Adds text to the help display. Can be interspersed with calls to
237
+ ## #opt to build a multi-section help page.
238
+ def banner s; @order << [:text, s] end
239
+ alias :text :banner
240
+
241
+ ## Marks two (or more!) options as requiring each other. Only handles
242
+ ## undirected (i.e., mutual) dependencies. Directed dependencies are
243
+ ## better modeled with Trollop::die.
244
+ def depends *syms
245
+ syms.each { |sym| raise ArgumentError, "unknown option '#{sym}'" unless @specs[sym] }
246
+ @constraints << [:depends, syms]
247
+ end
248
+
249
+ ## Marks two (or more!) options as conflicting.
250
+ def conflicts *syms
251
+ syms.each { |sym| raise ArgumentError, "unknown option '#{sym}'" unless @specs[sym] }
252
+ @constraints << [:conflicts, syms]
253
+ end
254
+
255
+ ## Defines a set of words which cause parsing to terminate when
256
+ ## encountered, such that any options to the left of the word are
257
+ ## parsed as usual, and options to the right of the word are left
258
+ ## intact.
259
+ ##
260
+ ## A typical use case would be for subcommand support, where these
261
+ ## would be set to the list of subcommands. A subsequent Trollop
262
+ ## invocation would then be used to parse subcommand options, after
263
+ ## shifting the subcommand off of ARGV.
264
+ def stop_on *words
265
+ @stop_words = [*words].flatten
266
+ end
267
+
268
+ ## Similar to #stop_on, but stops on any unknown word when encountered
269
+ ## (unless it is a parameter for an argument). This is useful for
270
+ ## cases where you don't know the set of subcommands ahead of time,
271
+ ## i.e., without first parsing the global options.
272
+ def stop_on_unknown
273
+ @stop_on_unknown = true
274
+ end
275
+
276
+ ## Parses the commandline. Typically called by Trollop::options,
277
+ ## but you can call it directly if you need more control.
278
+ ##
279
+ ## throws CommandlineError, HelpNeeded, and VersionNeeded exceptions.
280
+ def parse cmdline=ARGV
281
+ vals = {}
282
+ required = {}
283
+
284
+ opt :version, "Print version and exit" if @version unless @specs[:version] || @long["version"]
285
+ opt :help, "Show this message" unless @specs[:help] || @long["help"]
286
+
287
+ @specs.each do |sym, opts|
288
+ required[sym] = true if opts[:required]
289
+ vals[sym] = opts[:default]
290
+ vals[sym] = [] if opts[:multi] && !opts[:default] # multi arguments default to [], not nil
291
+ end
292
+
293
+ resolve_default_short_options!
294
+
295
+ ## resolve symbols
296
+ given_args = {}
297
+ @leftovers = each_arg cmdline do |arg, params|
298
+ ## handle --no- forms
299
+ arg, negative_given = if arg =~ /^--no-([^-]\S*)$/
300
+ ["--#{$1}", true]
301
+ else
302
+ [arg, false]
303
+ end
304
+
305
+ sym = case arg
306
+ when /^-([^-])$/; @short[$1]
307
+ when /^--([^-]\S*)$/; @long[$1] || @long["no-#{$1}"]
308
+ else; raise CommandlineError, "invalid argument syntax: '#{arg}'"
309
+ end
310
+
311
+ sym = nil if arg =~ /--no-/ # explicitly invalidate --no-no- arguments
312
+
313
+ raise CommandlineError, "unknown argument '#{arg}'" unless sym
314
+
315
+ if given_args.include?(sym) && !@specs[sym][:multi]
316
+ raise CommandlineError, "option '#{arg}' specified multiple times"
317
+ end
318
+
319
+ given_args[sym] ||= {}
320
+ given_args[sym][:arg] = arg
321
+ given_args[sym][:negative_given] = negative_given
322
+ given_args[sym][:params] ||= []
323
+
324
+ # The block returns the number of parameters taken.
325
+ num_params_taken = 0
326
+
327
+ unless params.nil?
328
+ if SINGLE_ARG_TYPES.include?(@specs[sym][:type])
329
+ given_args[sym][:params] << params[0, 1] # take the first parameter
330
+ num_params_taken = 1
331
+ elsif MULTI_ARG_TYPES.include?(@specs[sym][:type])
332
+ given_args[sym][:params] << params # take all the parameters
333
+ num_params_taken = params.size
334
+ end
335
+ end
336
+
337
+ num_params_taken
338
+ end
339
+
340
+ ## check for version and help args
341
+ raise VersionNeeded if given_args.include? :version
342
+ raise HelpNeeded if given_args.include? :help
343
+
344
+ ## check constraint satisfaction
345
+ @constraints.each do |type, syms|
346
+ constraint_sym = syms.find { |sym| given_args[sym] }
347
+ next unless constraint_sym
348
+
349
+ case type
350
+ when :depends
351
+ syms.each { |sym| raise CommandlineError, "--#{@specs[constraint_sym][:long]} requires --#{@specs[sym][:long]}" unless given_args.include? sym }
352
+ when :conflicts
353
+ syms.each { |sym| raise CommandlineError, "--#{@specs[constraint_sym][:long]} conflicts with --#{@specs[sym][:long]}" if given_args.include?(sym) && (sym != constraint_sym) }
354
+ end
355
+ end
356
+
357
+ required.each do |sym, val|
358
+ raise CommandlineError, "option --#{@specs[sym][:long]} must be specified" unless given_args.include? sym
359
+ end
360
+
361
+ ## parse parameters
362
+ given_args.each do |sym, given_data|
363
+ arg, params, negative_given = given_data.values_at :arg, :params, :negative_given
364
+
365
+ opts = @specs[sym]
366
+ raise CommandlineError, "option '#{arg}' needs a parameter" if params.empty? && opts[:type] != :flag
367
+
368
+ vals["#{sym}_given".intern] = true # mark argument as specified on the commandline
369
+
370
+ case opts[:type]
371
+ when :flag
372
+ vals[sym] = (sym.to_s =~ /^no_/ ? negative_given : !negative_given)
373
+ when :int, :ints
374
+ vals[sym] = params.map { |pg| pg.map { |p| parse_integer_parameter p, arg } }
375
+ when :float, :floats
376
+ vals[sym] = params.map { |pg| pg.map { |p| parse_float_parameter p, arg } }
377
+ when :string, :strings
378
+ vals[sym] = params.map { |pg| pg.map { |p| p.to_s } }
379
+ when :io, :ios
380
+ vals[sym] = params.map { |pg| pg.map { |p| parse_io_parameter p, arg } }
381
+ when :date, :dates
382
+ vals[sym] = params.map { |pg| pg.map { |p| parse_date_parameter p, arg } }
383
+ end
384
+
385
+ if SINGLE_ARG_TYPES.include?(opts[:type])
386
+ unless opts[:multi] # single parameter
387
+ vals[sym] = vals[sym][0][0]
388
+ else # multiple options, each with a single parameter
389
+ vals[sym] = vals[sym].map { |p| p[0] }
390
+ end
391
+ elsif MULTI_ARG_TYPES.include?(opts[:type]) && !opts[:multi]
392
+ vals[sym] = vals[sym][0] # single option, with multiple parameters
393
+ end
394
+ # else: multiple options, with multiple parameters
395
+ end
396
+
397
+ ## modify input in place with only those
398
+ ## arguments we didn't process
399
+ cmdline.clear
400
+ @leftovers.each { |l| cmdline << l }
401
+
402
+ ## allow openstruct-style accessors
403
+ class << vals
404
+ def method_missing(m, *args)
405
+ self[m] || self[m.to_s]
406
+ end
407
+ end
408
+ vals
409
+ end
410
+
411
+ def parse_date_parameter param, arg #:nodoc:
412
+ begin
413
+ begin
414
+ time = Chronic.parse(param)
415
+ rescue NameError
416
+ # chronic is not available
417
+ end
418
+ time ? Date.new(time.year, time.month, time.day) : Date.parse(param)
419
+ rescue ArgumentError
420
+ raise CommandlineError, "option '#{arg}' needs a date"
421
+ end
422
+ end
423
+
424
+ ## Print the help message to +stream+.
425
+ def educate stream=$stdout
426
+ width # hack: calculate it now; otherwise we have to be careful not to
427
+ # call this unless the cursor's at the beginning of a line.
428
+ left = {}
429
+ @specs.each do |name, spec|
430
+ left[name] = "--#{spec[:long]}" +
431
+ (spec[:type] == :flag && spec[:default] ? ", --no-#{spec[:long]}" : "") +
432
+ (spec[:short] && spec[:short] != :none ? ", -#{spec[:short]}" : "") +
433
+ case spec[:type]
434
+ when :flag; ""
435
+ when :int; " <i>"
436
+ when :ints; " <i+>"
437
+ when :string; " <s>"
438
+ when :strings; " <s+>"
439
+ when :float; " <f>"
440
+ when :floats; " <f+>"
441
+ when :io; " <filename/uri>"
442
+ when :ios; " <filename/uri+>"
443
+ when :date; " <date>"
444
+ when :dates; " <date+>"
445
+ end
446
+ end
447
+
448
+ leftcol_width = left.values.map { |s| s.length }.max || 0
449
+ rightcol_start = leftcol_width + 6 # spaces
450
+
451
+ unless @order.size > 0 && @order.first.first == :text
452
+ stream.puts "#@version\n" if @version
453
+ stream.puts "Options:"
454
+ end
455
+
456
+ @order.each do |what, opt|
457
+ if what == :text
458
+ stream.puts wrap(opt)
459
+ next
460
+ end
461
+
462
+ spec = @specs[opt]
463
+ stream.printf " %#{leftcol_width}s: ", left[opt]
464
+ desc = spec[:desc] + begin
465
+ default_s = case spec[:default]
466
+ when $stdout; "<stdout>"
467
+ when $stdin; "<stdin>"
468
+ when $stderr; "<stderr>"
469
+ when Array
470
+ spec[:default].join(", ")
471
+ else
472
+ spec[:default].to_s
473
+ end
474
+
475
+ if spec[:default]
476
+ if spec[:desc] =~ /\.$/
477
+ " (Default: #{default_s})"
478
+ else
479
+ " (default: #{default_s})"
480
+ end
481
+ else
482
+ ""
483
+ end
484
+ end
485
+ stream.puts wrap(desc, :width => width - rightcol_start - 1, :prefix => rightcol_start)
486
+ end
487
+ end
488
+
489
+ def width #:nodoc:
490
+ @width ||= if $stdout.tty?
491
+ begin
492
+ require 'curses'
493
+ Curses::init_screen
494
+ x = Curses::cols
495
+ Curses::close_screen
496
+ x
497
+ rescue Exception
498
+ 80
499
+ end
500
+ else
501
+ 80
502
+ end
503
+ end
504
+
505
+ def wrap str, opts={} # :nodoc:
506
+ if str == ""
507
+ [""]
508
+ else
509
+ str.split("\n").map { |s| wrap_line s, opts }.flatten
510
+ end
511
+ end
512
+
513
+ ## The per-parser version of Trollop::die (see that for documentation).
514
+ def die arg, msg
515
+ if msg
516
+ $stderr.puts "Error: argument --#{@specs[arg][:long]} #{msg}."
517
+ else
518
+ $stderr.puts "Error: #{arg}."
519
+ end
520
+ $stderr.puts "Try --help for help."
521
+ exit(-1)
522
+ end
523
+
524
+ private
525
+
526
+ ## yield successive arg, parameter pairs
527
+ def each_arg args
528
+ remains = []
529
+ i = 0
530
+
531
+ until i >= args.length
532
+ if @stop_words.member? args[i]
533
+ remains += args[i .. -1]
534
+ return remains
535
+ end
536
+ case args[i]
537
+ when /^--$/ # arg terminator
538
+ remains += args[(i + 1) .. -1]
539
+ return remains
540
+ when /^--(\S+?)=(.*)$/ # long argument with equals
541
+ yield "--#{$1}", [$2]
542
+ i += 1
543
+ when /^--(\S+)$/ # long argument
544
+ params = collect_argument_parameters(args, i + 1)
545
+ unless params.empty?
546
+ num_params_taken = yield args[i], params
547
+ unless num_params_taken
548
+ if @stop_on_unknown
549
+ remains += args[i + 1 .. -1]
550
+ return remains
551
+ else
552
+ remains += params
553
+ end
554
+ end
555
+ i += 1 + num_params_taken
556
+ else # long argument no parameter
557
+ yield args[i], nil
558
+ i += 1
559
+ end
560
+ when /^-(\S+)$/ # one or more short arguments
561
+ shortargs = $1.split(//)
562
+ shortargs.each_with_index do |a, j|
563
+ if j == (shortargs.length - 1)
564
+ params = collect_argument_parameters(args, i + 1)
565
+ unless params.empty?
566
+ num_params_taken = yield "-#{a}", params
567
+ unless num_params_taken
568
+ if @stop_on_unknown
569
+ remains += args[i + 1 .. -1]
570
+ return remains
571
+ else
572
+ remains += params
573
+ end
574
+ end
575
+ i += 1 + num_params_taken
576
+ else # argument no parameter
577
+ yield "-#{a}", nil
578
+ i += 1
579
+ end
580
+ else
581
+ yield "-#{a}", nil
582
+ end
583
+ end
584
+ else
585
+ if @stop_on_unknown
586
+ remains += args[i .. -1]
587
+ return remains
588
+ else
589
+ remains << args[i]
590
+ i += 1
591
+ end
592
+ end
593
+ end
594
+
595
+ remains
596
+ end
597
+
598
+ def parse_integer_parameter param, arg
599
+ raise CommandlineError, "option '#{arg}' needs an integer" unless param =~ /^\d+$/
600
+ param.to_i
601
+ end
602
+
603
+ def parse_float_parameter param, arg
604
+ raise CommandlineError, "option '#{arg}' needs a floating-point number" unless param =~ FLOAT_RE
605
+ param.to_f
606
+ end
607
+
608
+ def parse_io_parameter param, arg
609
+ case param
610
+ when /^(stdin|-)$/i; $stdin
611
+ else
612
+ require 'open-uri'
613
+ begin
614
+ open param
615
+ rescue SystemCallError => e
616
+ raise CommandlineError, "file or url for option '#{arg}' cannot be opened: #{e.message}"
617
+ end
618
+ end
619
+ end
620
+
621
+ def collect_argument_parameters args, start_at
622
+ params = []
623
+ pos = start_at
624
+ while args[pos] && args[pos] !~ PARAM_RE && !@stop_words.member?(args[pos]) do
625
+ params << args[pos]
626
+ pos += 1
627
+ end
628
+ params
629
+ end
630
+
631
+ def resolve_default_short_options!
632
+ @order.each do |type, name|
633
+ next unless type == :opt
634
+ opts = @specs[name]
635
+ next if opts[:short]
636
+
637
+ c = opts[:long].split(//).find { |d| d !~ INVALID_SHORT_ARG_REGEX && !@short.member?(d) }
638
+ if c # found a character to use
639
+ opts[:short] = c
640
+ @short[c] = name
641
+ end
642
+ end
643
+ end
644
+
645
+ def wrap_line str, opts={}
646
+ prefix = opts[:prefix] || 0
647
+ width = opts[:width] || (self.width - 1)
648
+ start = 0
649
+ ret = []
650
+ until start > str.length
651
+ nextt =
652
+ if start + width >= str.length
653
+ str.length
654
+ else
655
+ x = str.rindex(/\s/, start + width)
656
+ x = str.index(/\s/, start) if x && x < start
657
+ x || str.length
658
+ end
659
+ ret << (ret.empty? ? "" : " " * prefix) + str[start ... nextt]
660
+ start = nextt + 1
661
+ end
662
+ ret
663
+ end
664
+
665
+ ## instance_eval but with ability to handle block arguments
666
+ ## thanks to _why: http://redhanded.hobix.com/inspect/aBlockCostume.html
667
+ def cloaker &b
668
+ (class << self; self; end).class_eval do
669
+ define_method :cloaker_, &b
670
+ meth = instance_method :cloaker_
671
+ remove_method :cloaker_
672
+ meth
673
+ end
674
+ end
675
+ end
676
+
677
+ ## The easy, syntactic-sugary entry method into Trollop. Creates a Parser,
678
+ ## passes the block to it, then parses +args+ with it, handling any errors or
679
+ ## requests for help or version information appropriately (and then exiting).
680
+ ## Modifies +args+ in place. Returns a hash of option values.
681
+ ##
682
+ ## The block passed in should contain zero or more calls to +opt+
683
+ ## (Parser#opt), zero or more calls to +text+ (Parser#text), and
684
+ ## probably a call to +version+ (Parser#version).
685
+ ##
686
+ ## The returned block contains a value for every option specified with
687
+ ## +opt+. The value will be the value given on the commandline, or the
688
+ ## default value if the option was not specified on the commandline. For
689
+ ## every option specified on the commandline, a key "<option
690
+ ## name>_given" will also be set in the hash.
691
+ ##
692
+ ## Example:
693
+ ##
694
+ ## require 'trollop'
695
+ ## opts = Trollop::options do
696
+ ## opt :monkey, "Use monkey mode" # a flag --monkey, defaulting to false
697
+ ## opt :name, "Monkey name", :type => :string # a string --name <s>, defaulting to nil
698
+ ## opt :num_limbs, "Number of limbs", :default => 4 # an integer --num-limbs <i>, defaulting to 4
699
+ ## end
700
+ ##
701
+ ## ## if called with no arguments
702
+ ## p opts # => {:monkey=>false, :name=>nil, :num_limbs=>4, :help=>false}
703
+ ##
704
+ ## ## if called with --monkey
705
+ ## p opts # => {:monkey=>true, :name=>nil, :num_limbs=>4, :help=>false, :monkey_given=>true}
706
+ ##
707
+ ## See more examples at http://trollop.rubyforge.org.
708
+ def options args=ARGV, *a, &b
709
+ @last_parser = Parser.new(*a, &b)
710
+ with_standard_exception_handling(@last_parser) { @last_parser.parse args }
711
+ end
712
+
713
+ ## If Trollop::options doesn't do quite what you want, you can create a Parser
714
+ ## object and call Parser#parse on it. That method will throw CommandlineError,
715
+ ## HelpNeeded and VersionNeeded exceptions when necessary; if you want to
716
+ ## have these handled for you in the standard manner (e.g. show the help
717
+ ## and then exit upon an HelpNeeded exception), call your code from within
718
+ ## a block passed to this method.
719
+ ##
720
+ ## Note that this method will call System#exit after handling an exception!
721
+ ##
722
+ ## Usage example:
723
+ ##
724
+ ## require 'trollop'
725
+ ## p = Trollop::Parser.new do
726
+ ## opt :monkey, "Use monkey mode" # a flag --monkey, defaulting to false
727
+ ## opt :goat, "Use goat mode", :default => true # a flag --goat, defaulting to true
728
+ ## end
729
+ ##
730
+ ## opts = Trollop::with_standard_exception_handling p do
731
+ ## o = p.parse ARGV
732
+ ## raise Trollop::HelpNeeded if ARGV.empty? # show help screen
733
+ ## o
734
+ ## end
735
+ ##
736
+ ## Requires passing in the parser object.
737
+
738
+ def with_standard_exception_handling parser
739
+ begin
740
+ yield
741
+ rescue CommandlineError => e
742
+ $stderr.puts "Error: #{e.message}."
743
+ $stderr.puts "Try --help for help."
744
+ exit(-1)
745
+ rescue HelpNeeded
746
+ parser.educate
747
+ exit
748
+ rescue VersionNeeded
749
+ puts parser.version
750
+ exit
751
+ end
752
+ end
753
+
754
+ ## Informs the user that their usage of 'arg' was wrong, as detailed by
755
+ ## 'msg', and dies. Example:
756
+ ##
757
+ ## options do
758
+ ## opt :volume, :default => 0.0
759
+ ## end
760
+ ##
761
+ ## die :volume, "too loud" if opts[:volume] > 10.0
762
+ ## die :volume, "too soft" if opts[:volume] < 0.1
763
+ ##
764
+ ## In the one-argument case, simply print that message, a notice
765
+ ## about -h, and die. Example:
766
+ ##
767
+ ## options do
768
+ ## opt :whatever # ...
769
+ ## end
770
+ ##
771
+ ## Trollop::die "need at least one filename" if ARGV.empty?
772
+ def die arg, msg=nil
773
+ if @last_parser
774
+ @last_parser.die arg, msg
775
+ else
776
+ raise ArgumentError, "Trollop::die can only be called after Trollop::options"
777
+ end
778
+ end
779
+
780
+ module_function :options, :die, :with_standard_exception_handling
781
+
782
+ end # module