imw 0.2.18 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/lib/imw/utils/has_uri.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
require 'addressable/uri'
|
2
|
-
|
3
|
-
module IMW
|
4
|
-
module Utils
|
5
|
-
|
6
|
-
# Endows an including class with a wrapper for Addressable::URI
|
7
|
-
module HasURI
|
8
|
-
|
9
|
-
# The URI of this object.
|
10
|
-
attr_reader :uri
|
11
|
-
|
12
|
-
# Set the URI of this resource by parsing the given +uri+ (if
|
13
|
-
# necessary).
|
14
|
-
#
|
15
|
-
# @param [String, Addressable::URI] uri the uri to parse
|
16
|
-
def uri= uri
|
17
|
-
if uri.is_a?(Addressable::URI)
|
18
|
-
@uri = uri
|
19
|
-
else
|
20
|
-
begin
|
21
|
-
@uri = Addressable::URI.parse(uri.to_s)
|
22
|
-
rescue URI::InvalidURIError
|
23
|
-
@uri = Addressable::URI.parse(URI.encode(uri.to_s))
|
24
|
-
@encoded_uri = true
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# The scheme of this resource. Will be +nil+ for local resources.
|
30
|
-
#
|
31
|
-
# @return [String]
|
32
|
-
def scheme
|
33
|
-
@scheme ||= uri.scheme
|
34
|
-
end
|
35
|
-
|
36
|
-
# The directory name of this resource's path.
|
37
|
-
#
|
38
|
-
# @return [String]
|
39
|
-
def dirname
|
40
|
-
@dirname ||= File.dirname(path)
|
41
|
-
end
|
42
|
-
|
43
|
-
# The basename of this resource's path.
|
44
|
-
#
|
45
|
-
# @return [String]
|
46
|
-
def basename
|
47
|
-
@basename ||= File.basename(path)
|
48
|
-
end
|
49
|
-
|
50
|
-
# Returns the extension (INCLUDING the '.') of this resource's
|
51
|
-
# path. Redefine this in an including class for which this is
|
52
|
-
# weird ('.tar.gz' I'm talking to you...)
|
53
|
-
#
|
54
|
-
# @return [String]
|
55
|
-
def extname
|
56
|
-
@extname ||= File.extname(path)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Returns the extension (WITHOUT the '.') of this resource's path.
|
60
|
-
#
|
61
|
-
# @return [String]
|
62
|
-
def extension
|
63
|
-
@extension ||= extname[1..-1] || ''
|
64
|
-
end
|
65
|
-
|
66
|
-
# Returns the basename of the file with its extension removed
|
67
|
-
#
|
68
|
-
# IMW.open('/path/to/some_file.tar.gz').name # => some_file
|
69
|
-
#
|
70
|
-
# @return [String]
|
71
|
-
def name
|
72
|
-
@name ||= extname ? basename[0,basename.length - extname.length] : basename
|
73
|
-
end
|
74
|
-
|
75
|
-
# Returns the user associated with the host of this URI.
|
76
|
-
#
|
77
|
-
# @return [String]
|
78
|
-
def user
|
79
|
-
@user ||= uri.user
|
80
|
-
end
|
81
|
-
|
82
|
-
# Returns the password associated with access to this URI.
|
83
|
-
#
|
84
|
-
# @return [String]
|
85
|
-
def password
|
86
|
-
@password ||= uri.password
|
87
|
-
end
|
88
|
-
|
89
|
-
# Return the fragment part of this resource's URI.
|
90
|
-
#
|
91
|
-
# Will likely be +nil+ for local resources.
|
92
|
-
#
|
93
|
-
# @return [String]
|
94
|
-
def fragment
|
95
|
-
@fragment ||= uri.fragment
|
96
|
-
end
|
97
|
-
|
98
|
-
# Return the URI of this resource with any query strings and
|
99
|
-
# fragments removed.
|
100
|
-
#
|
101
|
-
# @return [URI::Generic]
|
102
|
-
def stripped_uri
|
103
|
-
uri_args = {}.tap do |args|
|
104
|
-
%w[scheme userinfo host port path].each do |method|
|
105
|
-
args[method.to_sym] = respond_to?(method) ? send(method) : uri.send(method)
|
106
|
-
end
|
107
|
-
end
|
108
|
-
uri.class.new(uri_args)
|
109
|
-
end
|
110
|
-
|
111
|
-
# Return the path complete with query string and fragment.
|
112
|
-
#
|
113
|
-
# @return [String]
|
114
|
-
def raw_path
|
115
|
-
p = uri.path
|
116
|
-
p += "?#{uri.query}" unless uri.query.nil?
|
117
|
-
p += "##{uri.fragment}" unless uri.fragment.nil?
|
118
|
-
p
|
119
|
-
end
|
120
|
-
|
121
|
-
def to_s
|
122
|
-
uri.to_s
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
data/lib/imw/utils/log.rb
DELETED
@@ -1,92 +0,0 @@
|
|
1
|
-
require 'logger'
|
2
|
-
|
3
|
-
module IMW
|
4
|
-
|
5
|
-
# Default log file.
|
6
|
-
LOG_FILE_DESTINATION = STDERR unless defined?(LOG_FILE_DESTINATION)
|
7
|
-
|
8
|
-
# Default log file time format
|
9
|
-
LOG_TIMEFORMAT = "%Y-%m-%d %H:%M:%S " unless defined?(LOG_TIMEFORMAT)
|
10
|
-
|
11
|
-
# Default verbosity
|
12
|
-
VERBOSE = false unless defined?(VERBOSE)
|
13
|
-
|
14
|
-
class << self; attr_accessor :log, :verbose end
|
15
|
-
|
16
|
-
# Is IMW operating in verbose mode?
|
17
|
-
#
|
18
|
-
# Calls to <tt>IMW.warn_if_verbose</tt> and friends utilize this
|
19
|
-
# method. Verbosity is controlled on the command line (see
|
20
|
-
# IMW::Runner) or by setting IMW::VERBOSE in your configuration
|
21
|
-
# file.
|
22
|
-
#
|
23
|
-
# @return [nil, false, true]
|
24
|
-
def self.verbose?
|
25
|
-
VERBOSE || verbose
|
26
|
-
end
|
27
|
-
|
28
|
-
# Create a Logger and point it at IMW::LOG_FILE_DESTINATION which is
|
29
|
-
# set in ~/.imwrc and defaults to STDERR.
|
30
|
-
def self.instantiate_logger!
|
31
|
-
IMW.log ||= Logger.new(LOG_FILE_DESTINATION)
|
32
|
-
IMW.log.datetime_format = "%Y%m%d-%H:%M:%S "
|
33
|
-
IMW.log.level = Logger::INFO
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.announce *events
|
37
|
-
options = events.flatten.extract_options!
|
38
|
-
options.reverse_merge! :level => Logger::INFO
|
39
|
-
IMW.log.add options[:level], "IMW: " + events.join("\n")
|
40
|
-
end
|
41
|
-
def self.announce_if_verbose *events
|
42
|
-
announce(*events) if IMW.verbose?
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.banner *events
|
46
|
-
options = events.flatten.extract_options!
|
47
|
-
options.reverse_merge! :level => Logger::INFO
|
48
|
-
announce(["*"*75, events, "*"*75], options)
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.warn *events
|
52
|
-
options = events.flatten.extract_options!
|
53
|
-
options.reverse_merge! :level => Logger::WARN
|
54
|
-
announce events, options
|
55
|
-
end
|
56
|
-
def self.warn_if_verbose *events
|
57
|
-
warn(*events) if IMW.verbose?
|
58
|
-
end
|
59
|
-
|
60
|
-
PROGRESS_TRACKERS = {}
|
61
|
-
#
|
62
|
-
# When the slowly-changing tracked variable +var+ changes value,
|
63
|
-
# announce its new value. Always announces on first call.
|
64
|
-
#
|
65
|
-
# Ex:
|
66
|
-
# track_progress :indexing_names, name[0..0] # announce at each initial letter
|
67
|
-
# track_progress :files, (i % 1000) # announce at each 1,000 iterations
|
68
|
-
#
|
69
|
-
def track_progress tracker, val
|
70
|
-
unless (IMW::PROGRESS_TRACKERS.include?(tracker)) &&
|
71
|
-
(IMW::PROGRESS_TRACKERS[tracker] == val)
|
72
|
-
announce "#{tracker.to_s.gsub(/_/,' ')}: #{val}"
|
73
|
-
IMW::PROGRESS_TRACKERS[tracker] = val
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
PROGRESS_COUNTERS = {}
|
78
|
-
#
|
79
|
-
# Log repetitions in a given context
|
80
|
-
#
|
81
|
-
# At every n'th (default 1000) call,
|
82
|
-
# announce progress in the IMW.log
|
83
|
-
#
|
84
|
-
def track_count tracker, every=1000
|
85
|
-
PROGRESS_COUNTERS[tracker] ||= 0
|
86
|
-
PROGRESS_COUNTERS[tracker] += 1
|
87
|
-
chunk = every * (PROGRESS_COUNTERS[tracker]/every).to_i
|
88
|
-
track_progress "count_of_#{tracker}", chunk
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
IMW.instantiate_logger!
|
data/lib/imw/utils/misc.rb
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
module IMW
|
2
|
-
|
3
|
-
# A simple counter. The +value+ and +add+ methods read and
|
4
|
-
# increment the counter's value.
|
5
|
-
#
|
6
|
-
# counter = IMW::Counter.new
|
7
|
-
# counter.value #=> 0
|
8
|
-
# counter.add 1
|
9
|
-
# counter.value #=> 1
|
10
|
-
#
|
11
|
-
# The +next!+ method acts as like C's <tt>value++</tt>, incrementing
|
12
|
-
# +value+ _after_ it is referenced.
|
13
|
-
#
|
14
|
-
# counter = IMW::Counter.new
|
15
|
-
# counter.value #=> 0
|
16
|
-
# counter.next! #=> 0
|
17
|
-
# counter.value #=> 1
|
18
|
-
#
|
19
|
-
# Counters can also be reset
|
20
|
-
#
|
21
|
-
# counter.reset!
|
22
|
-
# counter.value #=> 0
|
23
|
-
class Counter
|
24
|
-
|
25
|
-
attr_accessor :value, :starting_value, :increment
|
26
|
-
|
27
|
-
# Return a new Counter. The first argument is the starting value
|
28
|
-
# (defaults to 0) and the second is the increment (defaults to 1).
|
29
|
-
def initialize starting_value=0,increment=1
|
30
|
-
@starting_value = starting_value
|
31
|
-
@value = starting_value
|
32
|
-
@increment = increment
|
33
|
-
end
|
34
|
-
|
35
|
-
# Add +amount+ (defaults to the value of <tt>@increment</tt>).
|
36
|
-
def add amount=nil
|
37
|
-
@value += amount || @increment
|
38
|
-
end
|
39
|
-
alias_method :add!, :add
|
40
|
-
|
41
|
-
# Increment the counter by <tt>@increment</tt> but return its
|
42
|
-
# value _before_ being incremented.
|
43
|
-
def next!
|
44
|
-
old_value = @value
|
45
|
-
@value += @increment
|
46
|
-
old_value
|
47
|
-
end
|
48
|
-
|
49
|
-
# Reset the counter to +value+ (defaults to the value of
|
50
|
-
# <tt>@starting_value</tt>).
|
51
|
-
def reset! value=nil
|
52
|
-
@value = value || @starting_value
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# puts "#{File.basename(__FILE__)}: Your Monkeywrench seems suddenly more utilisable." # at bottom
|
data/lib/imw/utils/paths.rb
DELETED
@@ -1,146 +0,0 @@
|
|
1
|
-
require 'pathname'
|
2
|
-
|
3
|
-
module IMW
|
4
|
-
|
5
|
-
# Implements methods designed to work with an object's
|
6
|
-
# <tt>@paths</tt> attributes, adding and deleting symbolic
|
7
|
-
# references to paths and expanding calls to +path_to+ from that
|
8
|
-
# attribute or (when a miss) from <tt>IMW::PATHS</tt>.
|
9
|
-
#
|
10
|
-
# An including class should therefore define an array attribute
|
11
|
-
# <tt>@paths</tt>.
|
12
|
-
module Paths
|
13
|
-
|
14
|
-
# Expands a shorthand workflow path specification to an actual
|
15
|
-
# file path. Strings are interpreted literally but symbols are
|
16
|
-
# first resolved to the paths they represent.
|
17
|
-
#
|
18
|
-
# add_path :foo, '~/whoa'
|
19
|
-
# path_to :foo, 'my_thing'
|
20
|
-
# => '~/whoa/my_thing'
|
21
|
-
#
|
22
|
-
# @param [String, Symbol] pathsegs the path segments to join
|
23
|
-
# @return [String] the resulting expanded path
|
24
|
-
def path_to *pathsegs
|
25
|
-
path = Pathname.new path_to_helper(*pathsegs)
|
26
|
-
path.absolute? ? File.expand_path(path) : path.to_s
|
27
|
-
end
|
28
|
-
|
29
|
-
# Return the presently defined paths for this object.
|
30
|
-
#
|
31
|
-
# @return [Hash]
|
32
|
-
def paths
|
33
|
-
@paths ||= {}
|
34
|
-
end
|
35
|
-
|
36
|
-
# Adds a symbolic path for expansion by +path_to+.
|
37
|
-
#
|
38
|
-
# add_path :foo, '~/whoa'
|
39
|
-
# add_path :bar, :foo, 'baz'
|
40
|
-
# path_to :bar
|
41
|
-
# => '~/whoa/baz'
|
42
|
-
#
|
43
|
-
# @param [Symbol] sym the name of the path to store
|
44
|
-
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
45
|
-
# @return [String] the resulting path
|
46
|
-
def add_path sym, *pathsegs
|
47
|
-
paths[sym] = pathsegs.flatten
|
48
|
-
path_to(sym)
|
49
|
-
end
|
50
|
-
|
51
|
-
# Removes a symbolic path for expansion by +path_to+.
|
52
|
-
#
|
53
|
-
# @param [Symbol] sym the stored path symbol to remove
|
54
|
-
def remove_path sym
|
55
|
-
paths.delete sym if paths.include? sym
|
56
|
-
end
|
57
|
-
|
58
|
-
private
|
59
|
-
def path_to_helper *pathsegs # :nodoc:
|
60
|
-
# +path_to_helper+ handles the recursive calls for +path_to+.
|
61
|
-
expanded = pathsegs.flatten.compact.map do |pathseg|
|
62
|
-
case
|
63
|
-
when pathseg.is_a?(Symbol) && paths.include?(pathseg) then path_to(paths[pathseg])
|
64
|
-
when pathseg.is_a?(Symbol) && IMW::PATHS.include?(pathseg) then path_to(IMW::PATHS[pathseg])
|
65
|
-
when pathseg.is_a?(Symbol) then raise IMW::PathError.new("No path expansion set for #{pathseg.inspect}")
|
66
|
-
else pathseg
|
67
|
-
end
|
68
|
-
end
|
69
|
-
File.join(*expanded)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
# Default paths for the IMW. Chosen to make sense on most *NIX
|
75
|
-
# distributions.
|
76
|
-
DEFAULT_PATHS = {
|
77
|
-
:home => ENV['HOME'],
|
78
|
-
:data_root => "/var/lib/imw",
|
79
|
-
:log_root => "/var/log/imw",
|
80
|
-
:scripts_root => "/usr/share/imw",
|
81
|
-
:tmp_root => "/tmp/imw",
|
82
|
-
|
83
|
-
# the imw library
|
84
|
-
:imw_root => File.expand_path(File.dirname(__FILE__) + "/../../.."),
|
85
|
-
:imw_bin => [:imw_root, 'bin'],
|
86
|
-
:imw_etc => [:imw_root, 'etc'],
|
87
|
-
:imw_lib => [:imw_root, 'lib'],
|
88
|
-
|
89
|
-
# workflow
|
90
|
-
:ripd_root => [:data_root, 'ripd'],
|
91
|
-
:rawd_root => [:data_root, 'rawd'],
|
92
|
-
:fixd_root => [:data_root, 'fixd'],
|
93
|
-
:pkgd_root => [:data_root, 'pkgd']
|
94
|
-
}
|
95
|
-
defined?(PATHS) ? PATHS.reverse_merge!(DEFAULT_PATHS) : PATHS = DEFAULT_PATHS
|
96
|
-
|
97
|
-
# Expands a shorthand workflow path specification to an actual
|
98
|
-
# file path. Strings are interpreted literally but symbols are
|
99
|
-
# first resolved to the paths they represent.
|
100
|
-
#
|
101
|
-
# IMW.add_path :foo, '~/whoa'
|
102
|
-
# IMW.path_to :foo, 'my_thing'
|
103
|
-
# => '~/whoa/my_thing'
|
104
|
-
#
|
105
|
-
# @param [String, Symbol] pathsegs the path segments to join
|
106
|
-
# @return [String] the resulting expanded path
|
107
|
-
def self.path_to *pathsegs
|
108
|
-
path = Pathname.new IMW.path_to_helper(*pathsegs)
|
109
|
-
path.absolute? ? File.expand_path(path) : path.to_s
|
110
|
-
end
|
111
|
-
|
112
|
-
# Adds a symbolic path for expansion by +path_to+.
|
113
|
-
#
|
114
|
-
# IMW.add_path :foo, '~/whoa'
|
115
|
-
# IMW.add_path :bar, :foo, 'baz'
|
116
|
-
# IMW.path_to :bar
|
117
|
-
# => '~/whoa/baz'
|
118
|
-
#
|
119
|
-
# @param [Symbol] sym the name of the path to store
|
120
|
-
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
121
|
-
# @return [String] the resulting path
|
122
|
-
def self.add_path sym, *pathsegs
|
123
|
-
IMW::PATHS[sym] = pathsegs.flatten
|
124
|
-
path_to[sym]
|
125
|
-
end
|
126
|
-
|
127
|
-
# Removes a symbolic path for expansion by +path_to+.
|
128
|
-
#
|
129
|
-
# @param [Symbol] sym the stored path symbol to remove
|
130
|
-
def self.remove_path sym
|
131
|
-
IMW::PATHS.delete sym if IMW::PATHS.include? sym
|
132
|
-
end
|
133
|
-
|
134
|
-
protected
|
135
|
-
def self.path_to_helper *pathsegs # :nodoc:
|
136
|
-
# +path_to_helper+ handles the recursive calls for +path_to+.
|
137
|
-
expanded = pathsegs.flatten.compact.map do |pathseg|
|
138
|
-
case
|
139
|
-
when pathseg.is_a?(Symbol) && IMW::PATHS.include?(pathseg) then path_to(IMW::PATHS[pathseg])
|
140
|
-
when pathseg.is_a?(Symbol) then raise IMW::PathError.new("No path expansion set for #{pathseg.inspect}")
|
141
|
-
else pathseg
|
142
|
-
end
|
143
|
-
end
|
144
|
-
File.join(*expanded)
|
145
|
-
end
|
146
|
-
end
|
data/lib/imw/utils/uri.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
require 'imw/utils'
|
2
|
-
require 'imw/utils/uuid'
|
3
|
-
require 'addressable/uri'
|
4
|
-
module Addressable
|
5
|
-
#
|
6
|
-
# Add the #scrubbed and #revhost calls
|
7
|
-
#
|
8
|
-
class URI
|
9
|
-
SAFE_CHARS = %r{a-zA-Z0-9\-\._!\(\)\*\'}
|
10
|
-
PATH_CHARS = %r{#{SAFE_CHARS}\$&\+,:=@\/;}
|
11
|
-
RESERVED_CHARS = %r{\$&\+,:=@\/;\?\%}
|
12
|
-
UNSAFE_CHARS = %r{\\ \"\#<>\[\]\^\`\|\~\{\}}
|
13
|
-
HOST_HEAD = '(?:[a-z0-9\-]+\.)+'
|
14
|
-
HOST_TLD = '(?:[a-z]{2}|com|org|net|edu|gov|mil|biz|info|mobi|name|aero|jobs|museum)'
|
15
|
-
|
16
|
-
def host_valid?
|
17
|
-
!!(host =~ %r{\A#{HOST_HEAD}#{HOST_TLD}\z}i)
|
18
|
-
end
|
19
|
-
def path_valid?
|
20
|
-
!!(path =~ %r{\A[#{PATH_CHARS}%]*\z})
|
21
|
-
end
|
22
|
-
def simple_connection_part?
|
23
|
-
( ['http', nil].include?(scheme) &&
|
24
|
-
[80, nil].include?(port) &&
|
25
|
-
(self.to_hash.values_at(:password, :user).join.blank?) )
|
26
|
-
end
|
27
|
-
|
28
|
-
#
|
29
|
-
# Does this look like a
|
30
|
-
#
|
31
|
-
def simple?
|
32
|
-
host_valid? && path_valid? && simple_connection_part?
|
33
|
-
end
|
34
|
-
|
35
|
-
#
|
36
|
-
# +revhost+
|
37
|
-
# the dot-reversed host:
|
38
|
-
# foo.company.com => com.company.foo
|
39
|
-
#
|
40
|
-
def revhost
|
41
|
-
return host unless host =~ /\./
|
42
|
-
host.split('.').reverse.join('.')
|
43
|
-
end
|
44
|
-
#
|
45
|
-
# +uuid+ -- RFC-4122 ver.5 uuid; guaranteed to be universally unique
|
46
|
-
#
|
47
|
-
# See
|
48
|
-
# http://www.faqs.org/rfcs/rfc4122.html
|
49
|
-
#
|
50
|
-
def url_uuid
|
51
|
-
UUID.sha1_create(UUID_URL_NAMESPACE, self.normalize.to_s)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
class << Addressable::URI
|
57
|
-
alias_method :encode_segment, :encode_component if ! defined?(encode_segment)
|
58
|
-
alias_method :unencode_segment, :unencode_component if ! defined?(unencode_segment)
|
59
|
-
end
|