imw 0.2.18 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Tools::ExtensionAnalyzer do
|
|
4
|
-
|
|
5
|
-
before do
|
|
6
|
-
class Analyzer
|
|
7
|
-
attr_accessor :dir, :resources
|
|
8
|
-
include IMW::Tools::ExtensionAnalyzer
|
|
9
|
-
def initialize dir
|
|
10
|
-
self.dir = File.expand_path(dir)
|
|
11
|
-
@resources = IMW.open(self.dir).all_resources
|
|
12
|
-
end
|
|
13
|
-
def total_size
|
|
14
|
-
@total_size ||= resources.map(&:size).inject(0) { |e, sum| sum += e }
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
describe 'working with an empty directory' do
|
|
20
|
-
before do
|
|
21
|
-
@analyzer = Analyzer.new(IMWTest::TMP_DIR)
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
%w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
|
|
25
|
-
it "should return 'flat' when asked for its '#{method}'" do
|
|
26
|
-
@analyzer.send(method).should == 'flat'
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
%w[extension_counts normalized_extension_counts extension_sizes normalized_extension_sizes].each do |method|
|
|
31
|
-
it "should return an empty hash when asked for its '#{method}'" do
|
|
32
|
-
@analyzer.send(method).should == {}
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
describe 'working with files that lack extensions' do
|
|
38
|
-
|
|
39
|
-
before do
|
|
40
|
-
@dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
|
|
41
|
-
FileUtils.mkdir_p(@dir)
|
|
42
|
-
|
|
43
|
-
@f1 = "foobar1"
|
|
44
|
-
@f2 = "foobar2"
|
|
45
|
-
@f3 = "foobar1"
|
|
46
|
-
@files = [@f1, @f2, @f3]
|
|
47
|
-
|
|
48
|
-
@files.each do |basename|
|
|
49
|
-
IMWTest::Random.file File.join(@dir, basename)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
@analyzer = Analyzer.new(IMWTest::TMP_DIR)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
%w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
|
|
56
|
-
it "should return 'flat' when asked for its '#{method}'" do
|
|
57
|
-
@analyzer.send(method).should == 'flat'
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
describe 'working with a directory of files' do
|
|
63
|
-
before do
|
|
64
|
-
@dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
|
|
65
|
-
FileUtils.mkdir_p(@dir)
|
|
66
|
-
|
|
67
|
-
@csv1 = "foobar1.csv"
|
|
68
|
-
@csv2 = "foobar2.csv"
|
|
69
|
-
@xml = "foobar1.xml"
|
|
70
|
-
@txt = "foobar1.txt"
|
|
71
|
-
@files = [@csv1, @csv2, @xml, @txt]
|
|
72
|
-
|
|
73
|
-
@files.each do |basename|
|
|
74
|
-
IMWTest::Random.file File.join(@dir, basename)
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def bloat basename
|
|
78
|
-
File.open(File.join(@dir, basename), 'a') do |f|
|
|
79
|
-
1000.times do
|
|
80
|
-
f.write( 'hello ' * 100)
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
@analyzer = Analyzer.new @dir
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
describe "working with extension counts" do
|
|
89
|
-
it "should be able to return counts by extension" do
|
|
90
|
-
@analyzer.extension_counts.should == {'xml' => 1, 'txt' => 1, 'csv' => 2 }
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
it "should be able to return the most common extension by count" do
|
|
94
|
-
@analyzer.most_common_extension_by_count.should == 'csv'
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
it "should be able to calculate extension weighted by number of files" do
|
|
98
|
-
@analyzer.normalized_extension_counts.should == { 'csv' => 0.5, 'xml' => 0.25, 'txt' => 0.25 }
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
describe "working with extension sizes" do
|
|
103
|
-
it "should be able to calculate extension sizes" do
|
|
104
|
-
csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
|
|
105
|
-
xml_size = File.size(File.join(@dir, @xml))
|
|
106
|
-
txt_size = File.size(File.join(@dir, @txt))
|
|
107
|
-
@analyzer.extension_sizes.should == { 'csv' => csv_size, 'xml' => xml_size, 'txt' => txt_size }
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
it "should be able to return the most common extension by size" do
|
|
111
|
-
bloat @txt
|
|
112
|
-
@analyzer.most_common_extension_by_size.should == 'txt'
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
it "should be able to calculate extension sizes" do
|
|
116
|
-
csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
|
|
117
|
-
xml_size = File.size(File.join(@dir, @xml))
|
|
118
|
-
txt_size = File.size(File.join(@dir, @txt))
|
|
119
|
-
total_size = csv_size + xml_size + txt_size
|
|
120
|
-
@analyzer.normalized_extension_sizes.should == { 'csv' => csv_size.to_f / total_size.to_f, 'xml' => xml_size.to_f / total_size.to_f, 'txt' => txt_size.to_f / total_size.to_f }
|
|
121
|
-
end
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
describe "determining the most common extension" do
|
|
125
|
-
|
|
126
|
-
it "should obviously return an extension if it is the most common by count as well as the most common by size" do
|
|
127
|
-
bloat @csv1
|
|
128
|
-
@analyzer.most_common_extension.should == 'csv'
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
it "should return the most common extension by count if the count fraction is half or greater and the size fraction is less than half" do
|
|
132
|
-
bloat @txt
|
|
133
|
-
bloat @xml
|
|
134
|
-
@analyzer.most_common_extension.should == 'csv'
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
it "should return the most common extension by size if the size fraction is half or greater and the count fraction is less than half" do
|
|
138
|
-
# need to add an xml file
|
|
139
|
-
@new_xml = File.join(@dir, 'xml2.xml')
|
|
140
|
-
IMWTest::Random.file(@new_xml)
|
|
141
|
-
bloat @txt
|
|
142
|
-
@analyzer = Analyzer.new @dir
|
|
143
|
-
@analyzer.most_common_extension.should == 'txt'
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
it "should return the most common extension by size if no other conditions are met" do
|
|
147
|
-
bloat @txt
|
|
148
|
-
@analyzer.most_common_extension.should == 'txt'
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
end
|
|
152
|
-
end
|
|
153
|
-
end
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Tools::Transferer do
|
|
4
|
-
before do
|
|
5
|
-
@dir = IMW.open("dir")
|
|
6
|
-
@new_dir = IMW.open("new_dir")
|
|
7
|
-
@nested = IMW.open('new_dir/nested.txt')
|
|
8
|
-
@nested_dir = IMW.open('new_dir/nested')
|
|
9
|
-
@local = IMW.open("foobar.txt")
|
|
10
|
-
@dest = IMW.open("barbaz.txt")
|
|
11
|
-
@http = IMW.open("http://www.google.com")
|
|
12
|
-
@hdfs = IMW.open("hdfs:///path/to/foobar.txt")
|
|
13
|
-
@s3 = IMW.open("s3://mybucket/foo/bar")
|
|
14
|
-
IMWTest::Random.file(@local.path)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
|
|
18
|
-
@transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
|
|
19
|
-
@transferer.action = :cp
|
|
20
|
-
@transferer.action = :copy
|
|
21
|
-
@transferer.action = :mv
|
|
22
|
-
@transferer.action = :mv!
|
|
23
|
-
@transferer.action = :move
|
|
24
|
-
lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
it "should raise an error if the source and the destination have the same URI" do
|
|
28
|
-
lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "should print a log message when IMW is verbose" do
|
|
32
|
-
IMW.stub!(:verbose).and_return(:true)
|
|
33
|
-
IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
|
|
34
|
-
IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
describe "transfering local files" do
|
|
38
|
-
|
|
39
|
-
before do
|
|
40
|
-
IMWTest::Random.file @local.path
|
|
41
|
-
@transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
it "should raise an error if the source doesn't exist" do
|
|
45
|
-
@local.rm!
|
|
46
|
-
lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
it "should raise an error if the directory of the destination doesn't exist" do
|
|
50
|
-
lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it "can copy a local file" do
|
|
54
|
-
@transferer.transfer!
|
|
55
|
-
@local.exist?.should be_true
|
|
56
|
-
@dest.exist?.should be_true
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
it "can copy a local file to a directory" do
|
|
60
|
-
FileUtils.mkdir(@dir.path)
|
|
61
|
-
@transferer.destination = @dir
|
|
62
|
-
@transferer.transfer!
|
|
63
|
-
IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
it "can move a local file" do
|
|
67
|
-
@transferer.action = :mv
|
|
68
|
-
@transferer.transfer!
|
|
69
|
-
@local.exist?.should be_false
|
|
70
|
-
@dest.exist?.should be_true
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
it "can move a local file to a directory" do
|
|
74
|
-
FileUtils.mkdir(@dir.path)
|
|
75
|
-
@transferer.action = :mv
|
|
76
|
-
@transferer.destination = @dir
|
|
77
|
-
@transferer.transfer!
|
|
78
|
-
IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
|
|
79
|
-
@local.exist?.should be_false
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
describe "transfering local directories" do
|
|
84
|
-
|
|
85
|
-
before do
|
|
86
|
-
IMWTest::Random.directory_with_files @dir.path
|
|
87
|
-
@dir = @dir.reopen
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
it "should raise an error if the source doesn't exist" do
|
|
91
|
-
@dir.rm_rf!
|
|
92
|
-
lambda { IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer! }.should raise_error(IMW::PathError)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
it "should raise an error if the directory of the destination doesn't exist" do
|
|
96
|
-
lambda { IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer! }.should raise_error(IMW::PathError)
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
it "can copy a local directory" do
|
|
100
|
-
IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer!
|
|
101
|
-
@dir.exist?.should be_true
|
|
102
|
-
@new_dir.exist?.should be_true
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
it "can move a local directory" do
|
|
106
|
-
IMW::Tools::Transferer.new(:mv, @dir, @new_dir).transfer!
|
|
107
|
-
@dir.exist?.should be_false
|
|
108
|
-
@new_dir.exist?.should be_true
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
it "can copy a local directory to an existing directory" do
|
|
112
|
-
FileUtils.mkdir(@new_dir.path)
|
|
113
|
-
IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer!
|
|
114
|
-
@dir.exist?.should be_true
|
|
115
|
-
@nested_dir.exist?.should be_true
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
it "can move a local directory to an existing directory" do
|
|
119
|
-
FileUtils.mkdir(@new_dir.path)
|
|
120
|
-
IMW::Tools::Transferer.new(:mv, @dir, @nested_dir).transfer!
|
|
121
|
-
@dir.exist?.should_not be_true
|
|
122
|
-
@nested_dir.exist?.should be_true
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
describe "transferring HTTP files" do
|
|
128
|
-
it "can copy a remote file to a local path" do
|
|
129
|
-
IMW::Tools::Transferer.new(:cp, @http, @local).transfer!
|
|
130
|
-
@local.exist?.should be_true
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
describe "transferring S3 files" do
|
|
135
|
-
|
|
136
|
-
it "can copy an S3 file to a local path" do
|
|
137
|
-
IMW::Schemes::S3.should_receive(:get).with(@s3, @local)
|
|
138
|
-
IMW::Tools::Transferer.new(:cp, @s3, @local).transfer!
|
|
139
|
-
end
|
|
140
|
-
|
|
141
|
-
it "can copy a local path to an S3 file" do
|
|
142
|
-
IMWTest::Random.file @local.path
|
|
143
|
-
IMW::Schemes::S3.should_receive(:put).with(@local, @s3)
|
|
144
|
-
IMW::Tools::Transferer.new(:cp, @local, @s3).transfer!
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
it "can copy between S3 files" do
|
|
148
|
-
@new_s3 = IMW.open('s3://mybucket/new/path')
|
|
149
|
-
IMW::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
|
|
150
|
-
IMW::Tools::Transferer.new(:cp, @s3, @new_s3).transfer!
|
|
151
|
-
end
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
describe "transferring HDFS files" do
|
|
155
|
-
before do
|
|
156
|
-
IMW::Schemes::HDFS.stub!(:fs)
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
it "can copy a local file to an HDFS path" do
|
|
160
|
-
IMWTest::Random.file @local.path
|
|
161
|
-
|
|
162
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
|
|
163
|
-
IMW::Tools::Transferer.new(:cp, @local, @hdfs).transfer!
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
it "can copy an HDFS file to a local path" do
|
|
167
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
|
|
168
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @local).transfer!
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
it "can copy between HDFS paths" do
|
|
172
|
-
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
|
173
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
|
|
174
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
it "can move between HDFS paths" do
|
|
178
|
-
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
|
179
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
|
|
180
|
-
IMW::Tools::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
|
|
181
|
-
end
|
|
182
|
-
|
|
183
|
-
it "can copy from S3 to HDFS" do
|
|
184
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
|
|
185
|
-
IMW::Tools::Transferer.new(:cp, @s3, @hdfs).transfer!
|
|
186
|
-
end
|
|
187
|
-
|
|
188
|
-
it "can copy from HDFS to S3" do
|
|
189
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
|
|
190
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @s3).transfer!
|
|
191
|
-
end
|
|
192
|
-
end
|
|
193
|
-
end
|
|
194
|
-
|
|
195
|
-
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe "extending resources with specific modules" do
|
|
4
|
-
before do
|
|
5
|
-
@class = Class.new
|
|
6
|
-
@class.send(:include, IMW::Utils::DynamicallyExtendable)
|
|
7
|
-
@instance = @class.new
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it "should raise an error when registering a malformed handler" do
|
|
11
|
-
lambda { @class.register_handler("Foo", 3) }.should raise_error(IMW::ArgumentError)
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it "should store in instances modules they've been extended by" do
|
|
15
|
-
@foo = Module.new
|
|
16
|
-
@instance.extend(@foo)
|
|
17
|
-
@instance.modules.should include(@foo)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
describe "evaluating handlers" do
|
|
21
|
-
before do
|
|
22
|
-
@proccer = Module.new
|
|
23
|
-
@class.send(:attr_accessor, :prop)
|
|
24
|
-
@class.register_handler(@proccer, Proc.new { |instance| instance.prop })
|
|
25
|
-
|
|
26
|
-
@regexper = Module.new
|
|
27
|
-
@class.send(:define_method, :to_s) { 'whoa' }
|
|
28
|
-
@class.register_handler(@regexper, /whoa/)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
it "should extend an instance with a matching proc handler" do
|
|
32
|
-
@instance.prop = true
|
|
33
|
-
@instance.extend_appropriately!
|
|
34
|
-
@instance.modules.should include(@proccer)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
it "should not extend an instance with a non-matching proc handler" do
|
|
38
|
-
@instance.prop = false
|
|
39
|
-
@instance.extend_appropriately!
|
|
40
|
-
@instance.modules.should_not include(@proccer)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
it "should extend an instance with a matching regexp handler" do
|
|
44
|
-
@instance.extend_appropriately!
|
|
45
|
-
@instance.modules.should include(@regexper)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
it "should not extend an instance with a non-matching regexp handler" do
|
|
49
|
-
@class.send(:define_method, :to_s) { 'fowl' }
|
|
50
|
-
@instance.extend_appropriately!
|
|
51
|
-
@instance.modules.should_not include(@regexper)
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
it "should not extend an instance with a module it was asked to skip" do
|
|
55
|
-
@instance.extend_appropriately!(:skip_modules => [@regexper])
|
|
56
|
-
@instance.modules.should_not include(@regexper)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
it "should not extend with any modules if asked" do
|
|
60
|
-
@instance.extend_appropriately!(:no_modules => true)
|
|
61
|
-
@instance.modules.should_not include(@regexper)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
it "should use a module if asked to do so even if it's handler didn't match" do
|
|
65
|
-
@instance.extend_appropriately!(:use_modules => [@proccer])
|
|
66
|
-
@instance.modules.should include(@proccer)
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
class Klass
|
|
4
|
-
include IMW::Utils::HasURI
|
|
5
|
-
end
|
|
6
|
-
|
|
7
|
-
def new_obj uri
|
|
8
|
-
obj = Klass.new
|
|
9
|
-
obj.uri = uri
|
|
10
|
-
obj
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
describe IMW::Utils::HasURI do
|
|
14
|
-
|
|
15
|
-
it "local file path" do
|
|
16
|
-
obj = new_obj("/home/foo.txt")
|
|
17
|
-
obj.stub!(:path).and_return("/home/foo.txt")
|
|
18
|
-
|
|
19
|
-
obj.scheme.should be_nil
|
|
20
|
-
obj.dirname.should == '/home'
|
|
21
|
-
obj.basename.should == 'foo.txt'
|
|
22
|
-
obj.extname.should == '.txt'
|
|
23
|
-
obj.extension.should == 'txt'
|
|
24
|
-
obj.name.should == 'foo'
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
it "local file path with spaces in the name" do
|
|
28
|
-
obj = new_obj("/home/foo bar.txt")
|
|
29
|
-
obj.stub!(:path).and_return("/home/foo bar.txt")
|
|
30
|
-
obj.name.should == 'foo bar'
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
it "local file path with an explicit file:// scheme" do
|
|
34
|
-
obj = new_obj("file:///home/foo.txt")
|
|
35
|
-
obj.scheme.should == 'file'
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
it "web URL with query and fragment" do
|
|
39
|
-
obj = new_obj("http://mysite.com/some/page?param=value#frag")
|
|
40
|
-
obj.stub!(:path).and_return("/some/page")
|
|
41
|
-
obj.scheme.should == 'http'
|
|
42
|
-
obj.dirname.should == '/some'
|
|
43
|
-
obj.basename.should == 'page'
|
|
44
|
-
obj.extname.should == ''
|
|
45
|
-
obj.extension.should == ''
|
|
46
|
-
obj.name.should == 'page'
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
it "should be able to strip URIs" do
|
|
50
|
-
new_obj('/path/to/something').stripped_uri.to_s.should == '/path/to/something'
|
|
51
|
-
new_obj('http://user:pass@example.com:8080/path/to/some/script.php?param=value#frag').stripped_uri.to_s.should == 'http://user:pass@example.com:8080/path/to/some/script.php'
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
it "should be able to return raw paths" do
|
|
55
|
-
new_obj('s3://bucket/crazy url with # some dumb naming convention').raw_path.should == '/crazy url with # some dumb naming convention'
|
|
56
|
-
new_obj('s3://bucket/crazy url with ?some dumb naming convention').raw_path.should == '/crazy url with ?some dumb naming convention'
|
|
57
|
-
new_obj('s3://bucket/crazy url with ?some dumb naming #convention').raw_path.should == '/crazy url with ?some dumb naming #convention'
|
|
58
|
-
new_obj('s3://bucket/crazy url with #some dumb naming ?convention').raw_path.should == '/crazy url with #some dumb naming ?convention'
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|