imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
data/lib/imw/utils/has_uri.rb
DELETED
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
require 'addressable/uri'
|
|
2
|
-
|
|
3
|
-
module IMW
|
|
4
|
-
module Utils
|
|
5
|
-
|
|
6
|
-
# Endows an including class with a wrapper for Addressable::URI
|
|
7
|
-
module HasURI
|
|
8
|
-
|
|
9
|
-
# The URI of this object.
|
|
10
|
-
attr_reader :uri
|
|
11
|
-
|
|
12
|
-
# Set the URI of this resource by parsing the given +uri+ (if
|
|
13
|
-
# necessary).
|
|
14
|
-
#
|
|
15
|
-
# @param [String, Addressable::URI] uri the uri to parse
|
|
16
|
-
def uri= uri
|
|
17
|
-
if uri.is_a?(Addressable::URI)
|
|
18
|
-
@uri = uri
|
|
19
|
-
else
|
|
20
|
-
begin
|
|
21
|
-
@uri = Addressable::URI.parse(uri.to_s)
|
|
22
|
-
rescue URI::InvalidURIError
|
|
23
|
-
@uri = Addressable::URI.parse(URI.encode(uri.to_s))
|
|
24
|
-
@encoded_uri = true
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# The scheme of this resource. Will be +nil+ for local resources.
|
|
30
|
-
#
|
|
31
|
-
# @return [String]
|
|
32
|
-
def scheme
|
|
33
|
-
@scheme ||= uri.scheme
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# The directory name of this resource's path.
|
|
37
|
-
#
|
|
38
|
-
# @return [String]
|
|
39
|
-
def dirname
|
|
40
|
-
@dirname ||= File.dirname(path)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# The basename of this resource's path.
|
|
44
|
-
#
|
|
45
|
-
# @return [String]
|
|
46
|
-
def basename
|
|
47
|
-
@basename ||= File.basename(path)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Returns the extension (INCLUDING the '.') of this resource's
|
|
51
|
-
# path. Redefine this in an including class for which this is
|
|
52
|
-
# weird ('.tar.gz' I'm talking to you...)
|
|
53
|
-
#
|
|
54
|
-
# @return [String]
|
|
55
|
-
def extname
|
|
56
|
-
@extname ||= File.extname(path)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Returns the extension (WITHOUT the '.') of this resource's path.
|
|
60
|
-
#
|
|
61
|
-
# @return [String]
|
|
62
|
-
def extension
|
|
63
|
-
@extension ||= extname[1..-1] || ''
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Returns the basename of the file with its extension removed
|
|
67
|
-
#
|
|
68
|
-
# IMW.open('/path/to/some_file.tar.gz').name # => some_file
|
|
69
|
-
#
|
|
70
|
-
# @return [String]
|
|
71
|
-
def name
|
|
72
|
-
@name ||= extname ? basename[0,basename.length - extname.length] : basename
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
# Returns the user associated with the host of this URI.
|
|
76
|
-
#
|
|
77
|
-
# @return [String]
|
|
78
|
-
def user
|
|
79
|
-
@user ||= uri.user
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
# Returns the password associated with access to this URI.
|
|
83
|
-
#
|
|
84
|
-
# @return [String]
|
|
85
|
-
def password
|
|
86
|
-
@password ||= uri.password
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# Return the fragment part of this resource's URI.
|
|
90
|
-
#
|
|
91
|
-
# Will likely be +nil+ for local resources.
|
|
92
|
-
#
|
|
93
|
-
# @return [String]
|
|
94
|
-
def fragment
|
|
95
|
-
@fragment ||= uri.fragment
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
# Return the URI of this resource with any query strings and
|
|
99
|
-
# fragments removed.
|
|
100
|
-
#
|
|
101
|
-
# @return [URI::Generic]
|
|
102
|
-
def stripped_uri
|
|
103
|
-
uri_args = {}.tap do |args|
|
|
104
|
-
%w[scheme userinfo host port path].each do |method|
|
|
105
|
-
args[method.to_sym] = respond_to?(method) ? send(method) : uri.send(method)
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
uri.class.new(uri_args)
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
# Return the path complete with query string and fragment.
|
|
112
|
-
#
|
|
113
|
-
# @return [String]
|
|
114
|
-
def raw_path
|
|
115
|
-
p = uri.path
|
|
116
|
-
p += "?#{uri.query}" unless uri.query.nil?
|
|
117
|
-
p += "##{uri.fragment}" unless uri.fragment.nil?
|
|
118
|
-
p
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
def to_s
|
|
122
|
-
uri.to_s
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
end
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
data/lib/imw/utils/log.rb
DELETED
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
require 'logger'
|
|
2
|
-
|
|
3
|
-
module IMW
|
|
4
|
-
|
|
5
|
-
# Default log file.
|
|
6
|
-
LOG_FILE_DESTINATION = STDERR unless defined?(LOG_FILE_DESTINATION)
|
|
7
|
-
|
|
8
|
-
# Default log file time format
|
|
9
|
-
LOG_TIMEFORMAT = "%Y-%m-%d %H:%M:%S " unless defined?(LOG_TIMEFORMAT)
|
|
10
|
-
|
|
11
|
-
# Default verbosity
|
|
12
|
-
VERBOSE = false unless defined?(VERBOSE)
|
|
13
|
-
|
|
14
|
-
class << self; attr_accessor :log, :verbose end
|
|
15
|
-
|
|
16
|
-
# Is IMW operating in verbose mode?
|
|
17
|
-
#
|
|
18
|
-
# Calls to <tt>IMW.warn_if_verbose</tt> and friends utilize this
|
|
19
|
-
# method. Verbosity is controlled on the command line (see
|
|
20
|
-
# IMW::Runner) or by setting IMW::VERBOSE in your configuration
|
|
21
|
-
# file.
|
|
22
|
-
#
|
|
23
|
-
# @return [nil, false, true]
|
|
24
|
-
def self.verbose?
|
|
25
|
-
VERBOSE || verbose
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Create a Logger and point it at IMW::LOG_FILE_DESTINATION which is
|
|
29
|
-
# set in ~/.imwrc and defaults to STDERR.
|
|
30
|
-
def self.instantiate_logger!
|
|
31
|
-
IMW.log ||= Logger.new(LOG_FILE_DESTINATION)
|
|
32
|
-
IMW.log.datetime_format = "%Y%m%d-%H:%M:%S "
|
|
33
|
-
IMW.log.level = Logger::INFO
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def self.announce *events
|
|
37
|
-
options = events.flatten.extract_options!
|
|
38
|
-
options.reverse_merge! :level => Logger::INFO
|
|
39
|
-
IMW.log.add options[:level], "IMW: " + events.join("\n")
|
|
40
|
-
end
|
|
41
|
-
def self.announce_if_verbose *events
|
|
42
|
-
announce(*events) if IMW.verbose?
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def self.banner *events
|
|
46
|
-
options = events.flatten.extract_options!
|
|
47
|
-
options.reverse_merge! :level => Logger::INFO
|
|
48
|
-
announce(["*"*75, events, "*"*75], options)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def self.warn *events
|
|
52
|
-
options = events.flatten.extract_options!
|
|
53
|
-
options.reverse_merge! :level => Logger::WARN
|
|
54
|
-
announce events, options
|
|
55
|
-
end
|
|
56
|
-
def self.warn_if_verbose *events
|
|
57
|
-
warn(*events) if IMW.verbose?
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
PROGRESS_TRACKERS = {}
|
|
61
|
-
#
|
|
62
|
-
# When the slowly-changing tracked variable +var+ changes value,
|
|
63
|
-
# announce its new value. Always announces on first call.
|
|
64
|
-
#
|
|
65
|
-
# Ex:
|
|
66
|
-
# track_progress :indexing_names, name[0..0] # announce at each initial letter
|
|
67
|
-
# track_progress :files, (i % 1000) # announce at each 1,000 iterations
|
|
68
|
-
#
|
|
69
|
-
def track_progress tracker, val
|
|
70
|
-
unless (IMW::PROGRESS_TRACKERS.include?(tracker)) &&
|
|
71
|
-
(IMW::PROGRESS_TRACKERS[tracker] == val)
|
|
72
|
-
announce "#{tracker.to_s.gsub(/_/,' ')}: #{val}"
|
|
73
|
-
IMW::PROGRESS_TRACKERS[tracker] = val
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
PROGRESS_COUNTERS = {}
|
|
78
|
-
#
|
|
79
|
-
# Log repetitions in a given context
|
|
80
|
-
#
|
|
81
|
-
# At every n'th (default 1000) call,
|
|
82
|
-
# announce progress in the IMW.log
|
|
83
|
-
#
|
|
84
|
-
def track_count tracker, every=1000
|
|
85
|
-
PROGRESS_COUNTERS[tracker] ||= 0
|
|
86
|
-
PROGRESS_COUNTERS[tracker] += 1
|
|
87
|
-
chunk = every * (PROGRESS_COUNTERS[tracker]/every).to_i
|
|
88
|
-
track_progress "count_of_#{tracker}", chunk
|
|
89
|
-
end
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
IMW.instantiate_logger!
|
data/lib/imw/utils/misc.rb
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
module IMW
|
|
2
|
-
|
|
3
|
-
# A simple counter. The +value+ and +add+ methods read and
|
|
4
|
-
# increment the counter's value.
|
|
5
|
-
#
|
|
6
|
-
# counter = IMW::Counter.new
|
|
7
|
-
# counter.value #=> 0
|
|
8
|
-
# counter.add 1
|
|
9
|
-
# counter.value #=> 1
|
|
10
|
-
#
|
|
11
|
-
# The +next!+ method acts as like C's <tt>value++</tt>, incrementing
|
|
12
|
-
# +value+ _after_ it is referenced.
|
|
13
|
-
#
|
|
14
|
-
# counter = IMW::Counter.new
|
|
15
|
-
# counter.value #=> 0
|
|
16
|
-
# counter.next! #=> 0
|
|
17
|
-
# counter.value #=> 1
|
|
18
|
-
#
|
|
19
|
-
# Counters can also be reset
|
|
20
|
-
#
|
|
21
|
-
# counter.reset!
|
|
22
|
-
# counter.value #=> 0
|
|
23
|
-
class Counter
|
|
24
|
-
|
|
25
|
-
attr_accessor :value, :starting_value, :increment
|
|
26
|
-
|
|
27
|
-
# Return a new Counter. The first argument is the starting value
|
|
28
|
-
# (defaults to 0) and the second is the increment (defaults to 1).
|
|
29
|
-
def initialize starting_value=0,increment=1
|
|
30
|
-
@starting_value = starting_value
|
|
31
|
-
@value = starting_value
|
|
32
|
-
@increment = increment
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Add +amount+ (defaults to the value of <tt>@increment</tt>).
|
|
36
|
-
def add amount=nil
|
|
37
|
-
@value += amount || @increment
|
|
38
|
-
end
|
|
39
|
-
alias_method :add!, :add
|
|
40
|
-
|
|
41
|
-
# Increment the counter by <tt>@increment</tt> but return its
|
|
42
|
-
# value _before_ being incremented.
|
|
43
|
-
def next!
|
|
44
|
-
old_value = @value
|
|
45
|
-
@value += @increment
|
|
46
|
-
old_value
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Reset the counter to +value+ (defaults to the value of
|
|
50
|
-
# <tt>@starting_value</tt>).
|
|
51
|
-
def reset! value=nil
|
|
52
|
-
@value = value || @starting_value
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# puts "#{File.basename(__FILE__)}: Your Monkeywrench seems suddenly more utilisable." # at bottom
|
data/lib/imw/utils/paths.rb
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
require 'pathname'
|
|
2
|
-
|
|
3
|
-
module IMW
|
|
4
|
-
|
|
5
|
-
# Implements methods designed to work with an object's
|
|
6
|
-
# <tt>@paths</tt> attributes, adding and deleting symbolic
|
|
7
|
-
# references to paths and expanding calls to +path_to+ from that
|
|
8
|
-
# attribute or (when a miss) from <tt>IMW::PATHS</tt>.
|
|
9
|
-
#
|
|
10
|
-
# An including class should therefore define an array attribute
|
|
11
|
-
# <tt>@paths</tt>.
|
|
12
|
-
module Paths
|
|
13
|
-
|
|
14
|
-
# Expands a shorthand workflow path specification to an actual
|
|
15
|
-
# file path. Strings are interpreted literally but symbols are
|
|
16
|
-
# first resolved to the paths they represent.
|
|
17
|
-
#
|
|
18
|
-
# add_path :foo, '~/whoa'
|
|
19
|
-
# path_to :foo, 'my_thing'
|
|
20
|
-
# => '~/whoa/my_thing'
|
|
21
|
-
#
|
|
22
|
-
# @param [String, Symbol] pathsegs the path segments to join
|
|
23
|
-
# @return [String] the resulting expanded path
|
|
24
|
-
def path_to *pathsegs
|
|
25
|
-
path = Pathname.new path_to_helper(*pathsegs)
|
|
26
|
-
path.absolute? ? File.expand_path(path) : path.to_s
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Return the presently defined paths for this object.
|
|
30
|
-
#
|
|
31
|
-
# @return [Hash]
|
|
32
|
-
def paths
|
|
33
|
-
@paths ||= {}
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Adds a symbolic path for expansion by +path_to+.
|
|
37
|
-
#
|
|
38
|
-
# add_path :foo, '~/whoa'
|
|
39
|
-
# add_path :bar, :foo, 'baz'
|
|
40
|
-
# path_to :bar
|
|
41
|
-
# => '~/whoa/baz'
|
|
42
|
-
#
|
|
43
|
-
# @param [Symbol] sym the name of the path to store
|
|
44
|
-
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
|
45
|
-
# @return [String] the resulting path
|
|
46
|
-
def add_path sym, *pathsegs
|
|
47
|
-
paths[sym] = pathsegs.flatten
|
|
48
|
-
path_to(sym)
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Removes a symbolic path for expansion by +path_to+.
|
|
52
|
-
#
|
|
53
|
-
# @param [Symbol] sym the stored path symbol to remove
|
|
54
|
-
def remove_path sym
|
|
55
|
-
paths.delete sym if paths.include? sym
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
private
|
|
59
|
-
def path_to_helper *pathsegs # :nodoc:
|
|
60
|
-
# +path_to_helper+ handles the recursive calls for +path_to+.
|
|
61
|
-
expanded = pathsegs.flatten.compact.map do |pathseg|
|
|
62
|
-
case
|
|
63
|
-
when pathseg.is_a?(Symbol) && paths.include?(pathseg) then path_to(paths[pathseg])
|
|
64
|
-
when pathseg.is_a?(Symbol) && IMW::PATHS.include?(pathseg) then path_to(IMW::PATHS[pathseg])
|
|
65
|
-
when pathseg.is_a?(Symbol) then raise IMW::PathError.new("No path expansion set for #{pathseg.inspect}")
|
|
66
|
-
else pathseg
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
File.join(*expanded)
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# Default paths for the IMW. Chosen to make sense on most *NIX
|
|
75
|
-
# distributions.
|
|
76
|
-
DEFAULT_PATHS = {
|
|
77
|
-
:home => ENV['HOME'],
|
|
78
|
-
:data_root => "/var/lib/imw",
|
|
79
|
-
:log_root => "/var/log/imw",
|
|
80
|
-
:scripts_root => "/usr/share/imw",
|
|
81
|
-
:tmp_root => "/tmp/imw",
|
|
82
|
-
|
|
83
|
-
# the imw library
|
|
84
|
-
:imw_root => File.expand_path(File.dirname(__FILE__) + "/../../.."),
|
|
85
|
-
:imw_bin => [:imw_root, 'bin'],
|
|
86
|
-
:imw_etc => [:imw_root, 'etc'],
|
|
87
|
-
:imw_lib => [:imw_root, 'lib'],
|
|
88
|
-
|
|
89
|
-
# workflow
|
|
90
|
-
:ripd_root => [:data_root, 'ripd'],
|
|
91
|
-
:rawd_root => [:data_root, 'rawd'],
|
|
92
|
-
:fixd_root => [:data_root, 'fixd'],
|
|
93
|
-
:pkgd_root => [:data_root, 'pkgd']
|
|
94
|
-
}
|
|
95
|
-
defined?(PATHS) ? PATHS.reverse_merge!(DEFAULT_PATHS) : PATHS = DEFAULT_PATHS
|
|
96
|
-
|
|
97
|
-
# Expands a shorthand workflow path specification to an actual
|
|
98
|
-
# file path. Strings are interpreted literally but symbols are
|
|
99
|
-
# first resolved to the paths they represent.
|
|
100
|
-
#
|
|
101
|
-
# IMW.add_path :foo, '~/whoa'
|
|
102
|
-
# IMW.path_to :foo, 'my_thing'
|
|
103
|
-
# => '~/whoa/my_thing'
|
|
104
|
-
#
|
|
105
|
-
# @param [String, Symbol] pathsegs the path segments to join
|
|
106
|
-
# @return [String] the resulting expanded path
|
|
107
|
-
def self.path_to *pathsegs
|
|
108
|
-
path = Pathname.new IMW.path_to_helper(*pathsegs)
|
|
109
|
-
path.absolute? ? File.expand_path(path) : path.to_s
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
# Adds a symbolic path for expansion by +path_to+.
|
|
113
|
-
#
|
|
114
|
-
# IMW.add_path :foo, '~/whoa'
|
|
115
|
-
# IMW.add_path :bar, :foo, 'baz'
|
|
116
|
-
# IMW.path_to :bar
|
|
117
|
-
# => '~/whoa/baz'
|
|
118
|
-
#
|
|
119
|
-
# @param [Symbol] sym the name of the path to store
|
|
120
|
-
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
|
121
|
-
# @return [String] the resulting path
|
|
122
|
-
def self.add_path sym, *pathsegs
|
|
123
|
-
IMW::PATHS[sym] = pathsegs.flatten
|
|
124
|
-
path_to[sym]
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
# Removes a symbolic path for expansion by +path_to+.
|
|
128
|
-
#
|
|
129
|
-
# @param [Symbol] sym the stored path symbol to remove
|
|
130
|
-
def self.remove_path sym
|
|
131
|
-
IMW::PATHS.delete sym if IMW::PATHS.include? sym
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
protected
|
|
135
|
-
def self.path_to_helper *pathsegs # :nodoc:
|
|
136
|
-
# +path_to_helper+ handles the recursive calls for +path_to+.
|
|
137
|
-
expanded = pathsegs.flatten.compact.map do |pathseg|
|
|
138
|
-
case
|
|
139
|
-
when pathseg.is_a?(Symbol) && IMW::PATHS.include?(pathseg) then path_to(IMW::PATHS[pathseg])
|
|
140
|
-
when pathseg.is_a?(Symbol) then raise IMW::PathError.new("No path expansion set for #{pathseg.inspect}")
|
|
141
|
-
else pathseg
|
|
142
|
-
end
|
|
143
|
-
end
|
|
144
|
-
File.join(*expanded)
|
|
145
|
-
end
|
|
146
|
-
end
|
data/lib/imw/utils/uri.rb
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
require 'imw/utils'
|
|
2
|
-
require 'imw/utils/uuid'
|
|
3
|
-
require 'addressable/uri'
|
|
4
|
-
module Addressable
|
|
5
|
-
#
|
|
6
|
-
# Add the #scrubbed and #revhost calls
|
|
7
|
-
#
|
|
8
|
-
class URI
|
|
9
|
-
SAFE_CHARS = %r{a-zA-Z0-9\-\._!\(\)\*\'}
|
|
10
|
-
PATH_CHARS = %r{#{SAFE_CHARS}\$&\+,:=@\/;}
|
|
11
|
-
RESERVED_CHARS = %r{\$&\+,:=@\/;\?\%}
|
|
12
|
-
UNSAFE_CHARS = %r{\\ \"\#<>\[\]\^\`\|\~\{\}}
|
|
13
|
-
HOST_HEAD = '(?:[a-z0-9\-]+\.)+'
|
|
14
|
-
HOST_TLD = '(?:[a-z]{2}|com|org|net|edu|gov|mil|biz|info|mobi|name|aero|jobs|museum)'
|
|
15
|
-
|
|
16
|
-
def host_valid?
|
|
17
|
-
!!(host =~ %r{\A#{HOST_HEAD}#{HOST_TLD}\z}i)
|
|
18
|
-
end
|
|
19
|
-
def path_valid?
|
|
20
|
-
!!(path =~ %r{\A[#{PATH_CHARS}%]*\z})
|
|
21
|
-
end
|
|
22
|
-
def simple_connection_part?
|
|
23
|
-
( ['http', nil].include?(scheme) &&
|
|
24
|
-
[80, nil].include?(port) &&
|
|
25
|
-
(self.to_hash.values_at(:password, :user).join.blank?) )
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
#
|
|
29
|
-
# Does this look like a
|
|
30
|
-
#
|
|
31
|
-
def simple?
|
|
32
|
-
host_valid? && path_valid? && simple_connection_part?
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
#
|
|
36
|
-
# +revhost+
|
|
37
|
-
# the dot-reversed host:
|
|
38
|
-
# foo.company.com => com.company.foo
|
|
39
|
-
#
|
|
40
|
-
def revhost
|
|
41
|
-
return host unless host =~ /\./
|
|
42
|
-
host.split('.').reverse.join('.')
|
|
43
|
-
end
|
|
44
|
-
#
|
|
45
|
-
# +uuid+ -- RFC-4122 ver.5 uuid; guaranteed to be universally unique
|
|
46
|
-
#
|
|
47
|
-
# See
|
|
48
|
-
# http://www.faqs.org/rfcs/rfc4122.html
|
|
49
|
-
#
|
|
50
|
-
def url_uuid
|
|
51
|
-
UUID.sha1_create(UUID_URL_NAMESPACE, self.normalize.to_s)
|
|
52
|
-
end
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
class << Addressable::URI
|
|
57
|
-
alias_method :encode_segment, :encode_component if ! defined?(encode_segment)
|
|
58
|
-
alias_method :unencode_segment, :unencode_component if ! defined?(unencode_segment)
|
|
59
|
-
end
|