imw 0.2.18 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -26
- data/Gemfile.lock +13 -38
- data/{LICENSE → LICENSE.txt} +1 -1
- data/README.textile +35 -0
- data/Rakefile +45 -22
- data/VERSION +1 -1
- data/examples/foo.rb +19 -0
- data/examples/html_selector.rb +22 -0
- data/examples/nes_game_list.csv +625 -0
- data/examples/nes_gamespot.csv +1371 -0
- data/examples/nes_nintendo.csv +624 -0
- data/examples/nes_unlicensed.csv +89 -0
- data/examples/nes_wikipedia.csv +710 -0
- data/examples/nibbler_test.rb +24 -0
- data/examples/script.rb +19 -0
- data/lib/imw.rb +28 -140
- data/lib/imw/error.rb +9 -0
- data/lib/imw/recordizer.rb +8 -0
- data/lib/imw/recordizer/html_selector_recordizer.rb +86 -0
- data/lib/imw/recordizer/string_slice_recordizer.rb +39 -0
- data/lib/imw/resource.rb +3 -119
- data/lib/imw/serializer.rb +7 -0
- data/lib/imw/serializer/json_serializer.rb +17 -0
- data/lib/imw/uri.rb +41 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/uri_spec.rb +55 -0
- metadata +81 -232
- data/README.rdoc +0 -371
- data/bin/imw +0 -5
- data/bin/tsv_to_json.rb +0 -29
- data/etc/imwrc.rb +0 -26
- data/examples/dataset.rb +0 -12
- data/examples/metadata.yml +0 -10
- data/lib/imw/archives.rb +0 -120
- data/lib/imw/archives/rar.rb +0 -19
- data/lib/imw/archives/tar.rb +0 -19
- data/lib/imw/archives/tarbz2.rb +0 -73
- data/lib/imw/archives/targz.rb +0 -73
- data/lib/imw/archives/zip.rb +0 -51
- data/lib/imw/boot.rb +0 -87
- data/lib/imw/compressed_files.rb +0 -94
- data/lib/imw/compressed_files/bz2.rb +0 -16
- data/lib/imw/compressed_files/compressible.rb +0 -75
- data/lib/imw/compressed_files/gz.rb +0 -16
- data/lib/imw/dataset.rb +0 -125
- data/lib/imw/dataset/paths.rb +0 -29
- data/lib/imw/dataset/workflow.rb +0 -195
- data/lib/imw/formats.rb +0 -33
- data/lib/imw/formats/delimited.rb +0 -170
- data/lib/imw/formats/excel.rb +0 -100
- data/lib/imw/formats/json.rb +0 -41
- data/lib/imw/formats/pdf.rb +0 -71
- data/lib/imw/formats/sgml.rb +0 -69
- data/lib/imw/formats/yaml.rb +0 -41
- data/lib/imw/metadata.rb +0 -83
- data/lib/imw/metadata/contains_metadata.rb +0 -54
- data/lib/imw/metadata/dsl.rb +0 -111
- data/lib/imw/metadata/field.rb +0 -37
- data/lib/imw/metadata/has_metadata.rb +0 -98
- data/lib/imw/metadata/has_summary.rb +0 -57
- data/lib/imw/metadata/schema.rb +0 -17
- data/lib/imw/parsers.rb +0 -8
- data/lib/imw/parsers/flat.rb +0 -44
- data/lib/imw/parsers/html_parser.rb +0 -387
- data/lib/imw/parsers/html_parser/matchers.rb +0 -289
- data/lib/imw/parsers/line_parser.rb +0 -87
- data/lib/imw/parsers/regexp_parser.rb +0 -72
- data/lib/imw/repository.rb +0 -12
- data/lib/imw/runner.rb +0 -118
- data/lib/imw/schemes.rb +0 -23
- data/lib/imw/schemes/ftp.rb +0 -142
- data/lib/imw/schemes/hdfs.rb +0 -251
- data/lib/imw/schemes/http.rb +0 -165
- data/lib/imw/schemes/local.rb +0 -409
- data/lib/imw/schemes/remote.rb +0 -119
- data/lib/imw/schemes/s3.rb +0 -143
- data/lib/imw/schemes/sql.rb +0 -129
- data/lib/imw/tools.rb +0 -12
- data/lib/imw/tools/aggregator.rb +0 -148
- data/lib/imw/tools/archiver.rb +0 -220
- data/lib/imw/tools/downloader.rb +0 -63
- data/lib/imw/tools/extension_analyzer.rb +0 -114
- data/lib/imw/tools/summarizer.rb +0 -83
- data/lib/imw/tools/transferer.rb +0 -167
- data/lib/imw/utils.rb +0 -74
- data/lib/imw/utils/dynamically_extendable.rb +0 -137
- data/lib/imw/utils/error.rb +0 -59
- data/lib/imw/utils/extensions/hpricot.rb +0 -34
- data/lib/imw/utils/has_uri.rb +0 -131
- data/lib/imw/utils/log.rb +0 -92
- data/lib/imw/utils/misc.rb +0 -57
- data/lib/imw/utils/paths.rb +0 -146
- data/lib/imw/utils/uri.rb +0 -59
- data/lib/imw/utils/uuid.rb +0 -33
- data/lib/imw/utils/validate.rb +0 -38
- data/lib/imw/utils/version.rb +0 -11
- data/spec/data/formats/delimited/sample.csv +0 -131
- data/spec/data/formats/delimited/sample.tsv +0 -131
- data/spec/data/formats/delimited/with_schema/ace-hardware-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -16
- data/spec/data/formats/delimited/with_schema/complete-list-of-starbucks-locations.tsv +0 -11
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -12
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -13
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -22
- data/spec/data/formats/delimited/with_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -22
- data/spec/data/formats/delimited/without_schema/ace-hardware-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/all-countries-ip-address-to-geolocation-data.tsv +0 -15
- data/spec/data/formats/delimited/without_schema/complete-list-of-starbucks-locations.tsv +0 -10
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-cumulative-word-count-from-from-dec.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-adds-by-zip-cod.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-myspace-application-counts.tsv +0 -11
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-latlong.tsv +0 -12
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-user-count-by-zip-code.tsv +0 -21
- data/spec/data/formats/delimited/without_schema/myspace-user-activity-stream-word-count-by-day-from-december-200.tsv +0 -21
- data/spec/data/formats/excel/sample.xls +0 -0
- data/spec/data/formats/json/sample.json +0 -1
- data/spec/data/formats/none/sample +0 -650
- data/spec/data/formats/sgml/sample.xml +0 -617
- data/spec/data/formats/text/sample.txt +0 -650
- data/spec/data/formats/yaml/sample.yaml +0 -410
- data/spec/data/schema-tabular.yaml +0 -11
- data/spec/imw/archives/rar_spec.rb +0 -16
- data/spec/imw/archives/tar_spec.rb +0 -16
- data/spec/imw/archives/tarbz2_spec.rb +0 -24
- data/spec/imw/archives/targz_spec.rb +0 -21
- data/spec/imw/archives/zip_spec.rb +0 -16
- data/spec/imw/archives_spec.rb +0 -77
- data/spec/imw/compressed_files/bz2_spec.rb +0 -15
- data/spec/imw/compressed_files/compressible_spec.rb +0 -36
- data/spec/imw/compressed_files/gz_spec.rb +0 -15
- data/spec/imw/compressed_files_spec.rb +0 -47
- data/spec/imw/dataset/paths_spec.rb +0 -32
- data/spec/imw/dataset/workflow_spec.rb +0 -41
- data/spec/imw/formats/delimited_spec.rb +0 -44
- data/spec/imw/formats/excel_spec.rb +0 -55
- data/spec/imw/formats/json_spec.rb +0 -18
- data/spec/imw/formats/sgml_spec.rb +0 -24
- data/spec/imw/formats/yaml_spec.rb +0 -19
- data/spec/imw/metadata/contains_metadata_spec.rb +0 -56
- data/spec/imw/metadata/field_spec.rb +0 -25
- data/spec/imw/metadata/has_metadata_spec.rb +0 -58
- data/spec/imw/metadata/has_summary_spec.rb +0 -32
- data/spec/imw/metadata/schema_spec.rb +0 -24
- data/spec/imw/metadata_spec.rb +0 -86
- data/spec/imw/parsers/line_parser_spec.rb +0 -96
- data/spec/imw/parsers/regexp_parser_spec.rb +0 -42
- data/spec/imw/resource_spec.rb +0 -32
- data/spec/imw/schemes/hdfs_spec.rb +0 -67
- data/spec/imw/schemes/http_spec.rb +0 -19
- data/spec/imw/schemes/local_spec.rb +0 -165
- data/spec/imw/schemes/remote_spec.rb +0 -38
- data/spec/imw/schemes/s3_spec.rb +0 -31
- data/spec/imw/schemes/sql_spec.rb +0 -3
- data/spec/imw/tools/aggregator_spec.rb +0 -71
- data/spec/imw/tools/archiver_spec.rb +0 -120
- data/spec/imw/tools/extension_analyzer_spec.rb +0 -153
- data/spec/imw/tools/summarizer_spec.rb +0 -8
- data/spec/imw/tools/transferer_spec.rb +0 -195
- data/spec/imw/utils/dynamically_extendable_spec.rb +0 -69
- data/spec/imw/utils/has_uri_spec.rb +0 -61
- data/spec/imw/utils/paths_spec.rb +0 -10
- data/spec/imw/utils/shared_paths_spec.rb +0 -29
- data/spec/imw_spec.rb +0 -14
- data/spec/rcov.opts +0 -1
- data/spec/spec_helper.rb +0 -31
- data/spec/support/custom_matchers.rb +0 -28
- data/spec/support/file_contents_matcher.rb +0 -30
- data/spec/support/paths_matcher.rb +0 -66
- data/spec/support/random.rb +0 -213
- data/spec/support/without_regard_to_order_matcher.rb +0 -41
@@ -1,153 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Tools::ExtensionAnalyzer do
|
4
|
-
|
5
|
-
before do
|
6
|
-
class Analyzer
|
7
|
-
attr_accessor :dir, :resources
|
8
|
-
include IMW::Tools::ExtensionAnalyzer
|
9
|
-
def initialize dir
|
10
|
-
self.dir = File.expand_path(dir)
|
11
|
-
@resources = IMW.open(self.dir).all_resources
|
12
|
-
end
|
13
|
-
def total_size
|
14
|
-
@total_size ||= resources.map(&:size).inject(0) { |e, sum| sum += e }
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
describe 'working with an empty directory' do
|
20
|
-
before do
|
21
|
-
@analyzer = Analyzer.new(IMWTest::TMP_DIR)
|
22
|
-
end
|
23
|
-
|
24
|
-
%w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
|
25
|
-
it "should return 'flat' when asked for its '#{method}'" do
|
26
|
-
@analyzer.send(method).should == 'flat'
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
%w[extension_counts normalized_extension_counts extension_sizes normalized_extension_sizes].each do |method|
|
31
|
-
it "should return an empty hash when asked for its '#{method}'" do
|
32
|
-
@analyzer.send(method).should == {}
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe 'working with files that lack extensions' do
|
38
|
-
|
39
|
-
before do
|
40
|
-
@dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
|
41
|
-
FileUtils.mkdir_p(@dir)
|
42
|
-
|
43
|
-
@f1 = "foobar1"
|
44
|
-
@f2 = "foobar2"
|
45
|
-
@f3 = "foobar1"
|
46
|
-
@files = [@f1, @f2, @f3]
|
47
|
-
|
48
|
-
@files.each do |basename|
|
49
|
-
IMWTest::Random.file File.join(@dir, basename)
|
50
|
-
end
|
51
|
-
|
52
|
-
@analyzer = Analyzer.new(IMWTest::TMP_DIR)
|
53
|
-
end
|
54
|
-
|
55
|
-
%w[most_common_extension_by_count most_common_extension_by_size most_common_extension].each do |method|
|
56
|
-
it "should return 'flat' when asked for its '#{method}'" do
|
57
|
-
@analyzer.send(method).should == 'flat'
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
describe 'working with a directory of files' do
|
63
|
-
before do
|
64
|
-
@dir = File.join(IMWTest::TMP_DIR, 'ext_dir')
|
65
|
-
FileUtils.mkdir_p(@dir)
|
66
|
-
|
67
|
-
@csv1 = "foobar1.csv"
|
68
|
-
@csv2 = "foobar2.csv"
|
69
|
-
@xml = "foobar1.xml"
|
70
|
-
@txt = "foobar1.txt"
|
71
|
-
@files = [@csv1, @csv2, @xml, @txt]
|
72
|
-
|
73
|
-
@files.each do |basename|
|
74
|
-
IMWTest::Random.file File.join(@dir, basename)
|
75
|
-
end
|
76
|
-
|
77
|
-
def bloat basename
|
78
|
-
File.open(File.join(@dir, basename), 'a') do |f|
|
79
|
-
1000.times do
|
80
|
-
f.write( 'hello ' * 100)
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
@analyzer = Analyzer.new @dir
|
86
|
-
end
|
87
|
-
|
88
|
-
describe "working with extension counts" do
|
89
|
-
it "should be able to return counts by extension" do
|
90
|
-
@analyzer.extension_counts.should == {'xml' => 1, 'txt' => 1, 'csv' => 2 }
|
91
|
-
end
|
92
|
-
|
93
|
-
it "should be able to return the most common extension by count" do
|
94
|
-
@analyzer.most_common_extension_by_count.should == 'csv'
|
95
|
-
end
|
96
|
-
|
97
|
-
it "should be able to calculate extension weighted by number of files" do
|
98
|
-
@analyzer.normalized_extension_counts.should == { 'csv' => 0.5, 'xml' => 0.25, 'txt' => 0.25 }
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
describe "working with extension sizes" do
|
103
|
-
it "should be able to calculate extension sizes" do
|
104
|
-
csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
|
105
|
-
xml_size = File.size(File.join(@dir, @xml))
|
106
|
-
txt_size = File.size(File.join(@dir, @txt))
|
107
|
-
@analyzer.extension_sizes.should == { 'csv' => csv_size, 'xml' => xml_size, 'txt' => txt_size }
|
108
|
-
end
|
109
|
-
|
110
|
-
it "should be able to return the most common extension by size" do
|
111
|
-
bloat @txt
|
112
|
-
@analyzer.most_common_extension_by_size.should == 'txt'
|
113
|
-
end
|
114
|
-
|
115
|
-
it "should be able to calculate extension sizes" do
|
116
|
-
csv_size = File.size(File.join(@dir, @csv1)) + File.size(File.join(@dir, @csv2))
|
117
|
-
xml_size = File.size(File.join(@dir, @xml))
|
118
|
-
txt_size = File.size(File.join(@dir, @txt))
|
119
|
-
total_size = csv_size + xml_size + txt_size
|
120
|
-
@analyzer.normalized_extension_sizes.should == { 'csv' => csv_size.to_f / total_size.to_f, 'xml' => xml_size.to_f / total_size.to_f, 'txt' => txt_size.to_f / total_size.to_f }
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
describe "determining the most common extension" do
|
125
|
-
|
126
|
-
it "should obviously return an extension if it is the most common by count as well as the most common by size" do
|
127
|
-
bloat @csv1
|
128
|
-
@analyzer.most_common_extension.should == 'csv'
|
129
|
-
end
|
130
|
-
|
131
|
-
it "should return the most common extension by count if the count fraction is half or greater and the size fraction is less than half" do
|
132
|
-
bloat @txt
|
133
|
-
bloat @xml
|
134
|
-
@analyzer.most_common_extension.should == 'csv'
|
135
|
-
end
|
136
|
-
|
137
|
-
it "should return the most common extension by size if the size fraction is half or greater and the count fraction is less than half" do
|
138
|
-
# need to add an xml file
|
139
|
-
@new_xml = File.join(@dir, 'xml2.xml')
|
140
|
-
IMWTest::Random.file(@new_xml)
|
141
|
-
bloat @txt
|
142
|
-
@analyzer = Analyzer.new @dir
|
143
|
-
@analyzer.most_common_extension.should == 'txt'
|
144
|
-
end
|
145
|
-
|
146
|
-
it "should return the most common extension by size if no other conditions are met" do
|
147
|
-
bloat @txt
|
148
|
-
@analyzer.most_common_extension.should == 'txt'
|
149
|
-
end
|
150
|
-
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
@@ -1,195 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Tools::Transferer do
|
4
|
-
before do
|
5
|
-
@dir = IMW.open("dir")
|
6
|
-
@new_dir = IMW.open("new_dir")
|
7
|
-
@nested = IMW.open('new_dir/nested.txt')
|
8
|
-
@nested_dir = IMW.open('new_dir/nested')
|
9
|
-
@local = IMW.open("foobar.txt")
|
10
|
-
@dest = IMW.open("barbaz.txt")
|
11
|
-
@http = IMW.open("http://www.google.com")
|
12
|
-
@hdfs = IMW.open("hdfs:///path/to/foobar.txt")
|
13
|
-
@s3 = IMW.open("s3://mybucket/foo/bar")
|
14
|
-
IMWTest::Random.file(@local.path)
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
|
18
|
-
@transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
|
19
|
-
@transferer.action = :cp
|
20
|
-
@transferer.action = :copy
|
21
|
-
@transferer.action = :mv
|
22
|
-
@transferer.action = :mv!
|
23
|
-
@transferer.action = :move
|
24
|
-
lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
|
25
|
-
end
|
26
|
-
|
27
|
-
it "should raise an error if the source and the destination have the same URI" do
|
28
|
-
lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should print a log message when IMW is verbose" do
|
32
|
-
IMW.stub!(:verbose).and_return(:true)
|
33
|
-
IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
|
34
|
-
IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
|
35
|
-
end
|
36
|
-
|
37
|
-
describe "transfering local files" do
|
38
|
-
|
39
|
-
before do
|
40
|
-
IMWTest::Random.file @local.path
|
41
|
-
@transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should raise an error if the source doesn't exist" do
|
45
|
-
@local.rm!
|
46
|
-
lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
|
47
|
-
end
|
48
|
-
|
49
|
-
it "should raise an error if the directory of the destination doesn't exist" do
|
50
|
-
lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
|
51
|
-
end
|
52
|
-
|
53
|
-
it "can copy a local file" do
|
54
|
-
@transferer.transfer!
|
55
|
-
@local.exist?.should be_true
|
56
|
-
@dest.exist?.should be_true
|
57
|
-
end
|
58
|
-
|
59
|
-
it "can copy a local file to a directory" do
|
60
|
-
FileUtils.mkdir(@dir.path)
|
61
|
-
@transferer.destination = @dir
|
62
|
-
@transferer.transfer!
|
63
|
-
IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
|
64
|
-
end
|
65
|
-
|
66
|
-
it "can move a local file" do
|
67
|
-
@transferer.action = :mv
|
68
|
-
@transferer.transfer!
|
69
|
-
@local.exist?.should be_false
|
70
|
-
@dest.exist?.should be_true
|
71
|
-
end
|
72
|
-
|
73
|
-
it "can move a local file to a directory" do
|
74
|
-
FileUtils.mkdir(@dir.path)
|
75
|
-
@transferer.action = :mv
|
76
|
-
@transferer.destination = @dir
|
77
|
-
@transferer.transfer!
|
78
|
-
IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
|
79
|
-
@local.exist?.should be_false
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
describe "transfering local directories" do
|
84
|
-
|
85
|
-
before do
|
86
|
-
IMWTest::Random.directory_with_files @dir.path
|
87
|
-
@dir = @dir.reopen
|
88
|
-
end
|
89
|
-
|
90
|
-
it "should raise an error if the source doesn't exist" do
|
91
|
-
@dir.rm_rf!
|
92
|
-
lambda { IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer! }.should raise_error(IMW::PathError)
|
93
|
-
end
|
94
|
-
|
95
|
-
it "should raise an error if the directory of the destination doesn't exist" do
|
96
|
-
lambda { IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer! }.should raise_error(IMW::PathError)
|
97
|
-
end
|
98
|
-
|
99
|
-
it "can copy a local directory" do
|
100
|
-
IMW::Tools::Transferer.new(:cp, @dir, @new_dir).transfer!
|
101
|
-
@dir.exist?.should be_true
|
102
|
-
@new_dir.exist?.should be_true
|
103
|
-
end
|
104
|
-
|
105
|
-
it "can move a local directory" do
|
106
|
-
IMW::Tools::Transferer.new(:mv, @dir, @new_dir).transfer!
|
107
|
-
@dir.exist?.should be_false
|
108
|
-
@new_dir.exist?.should be_true
|
109
|
-
end
|
110
|
-
|
111
|
-
it "can copy a local directory to an existing directory" do
|
112
|
-
FileUtils.mkdir(@new_dir.path)
|
113
|
-
IMW::Tools::Transferer.new(:cp, @dir, @nested_dir).transfer!
|
114
|
-
@dir.exist?.should be_true
|
115
|
-
@nested_dir.exist?.should be_true
|
116
|
-
end
|
117
|
-
|
118
|
-
it "can move a local directory to an existing directory" do
|
119
|
-
FileUtils.mkdir(@new_dir.path)
|
120
|
-
IMW::Tools::Transferer.new(:mv, @dir, @nested_dir).transfer!
|
121
|
-
@dir.exist?.should_not be_true
|
122
|
-
@nested_dir.exist?.should be_true
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
|
127
|
-
describe "transferring HTTP files" do
|
128
|
-
it "can copy a remote file to a local path" do
|
129
|
-
IMW::Tools::Transferer.new(:cp, @http, @local).transfer!
|
130
|
-
@local.exist?.should be_true
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
describe "transferring S3 files" do
|
135
|
-
|
136
|
-
it "can copy an S3 file to a local path" do
|
137
|
-
IMW::Schemes::S3.should_receive(:get).with(@s3, @local)
|
138
|
-
IMW::Tools::Transferer.new(:cp, @s3, @local).transfer!
|
139
|
-
end
|
140
|
-
|
141
|
-
it "can copy a local path to an S3 file" do
|
142
|
-
IMWTest::Random.file @local.path
|
143
|
-
IMW::Schemes::S3.should_receive(:put).with(@local, @s3)
|
144
|
-
IMW::Tools::Transferer.new(:cp, @local, @s3).transfer!
|
145
|
-
end
|
146
|
-
|
147
|
-
it "can copy between S3 files" do
|
148
|
-
@new_s3 = IMW.open('s3://mybucket/new/path')
|
149
|
-
IMW::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
|
150
|
-
IMW::Tools::Transferer.new(:cp, @s3, @new_s3).transfer!
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
describe "transferring HDFS files" do
|
155
|
-
before do
|
156
|
-
IMW::Schemes::HDFS.stub!(:fs)
|
157
|
-
end
|
158
|
-
|
159
|
-
it "can copy a local file to an HDFS path" do
|
160
|
-
IMWTest::Random.file @local.path
|
161
|
-
|
162
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
|
163
|
-
IMW::Tools::Transferer.new(:cp, @local, @hdfs).transfer!
|
164
|
-
end
|
165
|
-
|
166
|
-
it "can copy an HDFS file to a local path" do
|
167
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
|
168
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @local).transfer!
|
169
|
-
end
|
170
|
-
|
171
|
-
it "can copy between HDFS paths" do
|
172
|
-
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
173
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
|
174
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
|
175
|
-
end
|
176
|
-
|
177
|
-
it "can move between HDFS paths" do
|
178
|
-
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
179
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
|
180
|
-
IMW::Tools::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
|
181
|
-
end
|
182
|
-
|
183
|
-
it "can copy from S3 to HDFS" do
|
184
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
|
185
|
-
IMW::Tools::Transferer.new(:cp, @s3, @hdfs).transfer!
|
186
|
-
end
|
187
|
-
|
188
|
-
it "can copy from HDFS to S3" do
|
189
|
-
IMW::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
|
190
|
-
IMW::Tools::Transferer.new(:cp, @hdfs, @s3).transfer!
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
|
@@ -1,69 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe "extending resources with specific modules" do
|
4
|
-
before do
|
5
|
-
@class = Class.new
|
6
|
-
@class.send(:include, IMW::Utils::DynamicallyExtendable)
|
7
|
-
@instance = @class.new
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should raise an error when registering a malformed handler" do
|
11
|
-
lambda { @class.register_handler("Foo", 3) }.should raise_error(IMW::ArgumentError)
|
12
|
-
end
|
13
|
-
|
14
|
-
it "should store in instances modules they've been extended by" do
|
15
|
-
@foo = Module.new
|
16
|
-
@instance.extend(@foo)
|
17
|
-
@instance.modules.should include(@foo)
|
18
|
-
end
|
19
|
-
|
20
|
-
describe "evaluating handlers" do
|
21
|
-
before do
|
22
|
-
@proccer = Module.new
|
23
|
-
@class.send(:attr_accessor, :prop)
|
24
|
-
@class.register_handler(@proccer, Proc.new { |instance| instance.prop })
|
25
|
-
|
26
|
-
@regexper = Module.new
|
27
|
-
@class.send(:define_method, :to_s) { 'whoa' }
|
28
|
-
@class.register_handler(@regexper, /whoa/)
|
29
|
-
end
|
30
|
-
|
31
|
-
it "should extend an instance with a matching proc handler" do
|
32
|
-
@instance.prop = true
|
33
|
-
@instance.extend_appropriately!
|
34
|
-
@instance.modules.should include(@proccer)
|
35
|
-
end
|
36
|
-
|
37
|
-
it "should not extend an instance with a non-matching proc handler" do
|
38
|
-
@instance.prop = false
|
39
|
-
@instance.extend_appropriately!
|
40
|
-
@instance.modules.should_not include(@proccer)
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should extend an instance with a matching regexp handler" do
|
44
|
-
@instance.extend_appropriately!
|
45
|
-
@instance.modules.should include(@regexper)
|
46
|
-
end
|
47
|
-
|
48
|
-
it "should not extend an instance with a non-matching regexp handler" do
|
49
|
-
@class.send(:define_method, :to_s) { 'fowl' }
|
50
|
-
@instance.extend_appropriately!
|
51
|
-
@instance.modules.should_not include(@regexper)
|
52
|
-
end
|
53
|
-
|
54
|
-
it "should not extend an instance with a module it was asked to skip" do
|
55
|
-
@instance.extend_appropriately!(:skip_modules => [@regexper])
|
56
|
-
@instance.modules.should_not include(@regexper)
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should not extend with any modules if asked" do
|
60
|
-
@instance.extend_appropriately!(:no_modules => true)
|
61
|
-
@instance.modules.should_not include(@regexper)
|
62
|
-
end
|
63
|
-
|
64
|
-
it "should use a module if asked to do so even if it's handler didn't match" do
|
65
|
-
@instance.extend_appropriately!(:use_modules => [@proccer])
|
66
|
-
@instance.modules.should include(@proccer)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
class Klass
|
4
|
-
include IMW::Utils::HasURI
|
5
|
-
end
|
6
|
-
|
7
|
-
def new_obj uri
|
8
|
-
obj = Klass.new
|
9
|
-
obj.uri = uri
|
10
|
-
obj
|
11
|
-
end
|
12
|
-
|
13
|
-
describe IMW::Utils::HasURI do
|
14
|
-
|
15
|
-
it "local file path" do
|
16
|
-
obj = new_obj("/home/foo.txt")
|
17
|
-
obj.stub!(:path).and_return("/home/foo.txt")
|
18
|
-
|
19
|
-
obj.scheme.should be_nil
|
20
|
-
obj.dirname.should == '/home'
|
21
|
-
obj.basename.should == 'foo.txt'
|
22
|
-
obj.extname.should == '.txt'
|
23
|
-
obj.extension.should == 'txt'
|
24
|
-
obj.name.should == 'foo'
|
25
|
-
end
|
26
|
-
|
27
|
-
it "local file path with spaces in the name" do
|
28
|
-
obj = new_obj("/home/foo bar.txt")
|
29
|
-
obj.stub!(:path).and_return("/home/foo bar.txt")
|
30
|
-
obj.name.should == 'foo bar'
|
31
|
-
end
|
32
|
-
|
33
|
-
it "local file path with an explicit file:// scheme" do
|
34
|
-
obj = new_obj("file:///home/foo.txt")
|
35
|
-
obj.scheme.should == 'file'
|
36
|
-
end
|
37
|
-
|
38
|
-
it "web URL with query and fragment" do
|
39
|
-
obj = new_obj("http://mysite.com/some/page?param=value#frag")
|
40
|
-
obj.stub!(:path).and_return("/some/page")
|
41
|
-
obj.scheme.should == 'http'
|
42
|
-
obj.dirname.should == '/some'
|
43
|
-
obj.basename.should == 'page'
|
44
|
-
obj.extname.should == ''
|
45
|
-
obj.extension.should == ''
|
46
|
-
obj.name.should == 'page'
|
47
|
-
end
|
48
|
-
|
49
|
-
it "should be able to strip URIs" do
|
50
|
-
new_obj('/path/to/something').stripped_uri.to_s.should == '/path/to/something'
|
51
|
-
new_obj('http://user:pass@example.com:8080/path/to/some/script.php?param=value#frag').stripped_uri.to_s.should == 'http://user:pass@example.com:8080/path/to/some/script.php'
|
52
|
-
end
|
53
|
-
|
54
|
-
it "should be able to return raw paths" do
|
55
|
-
new_obj('s3://bucket/crazy url with # some dumb naming convention').raw_path.should == '/crazy url with # some dumb naming convention'
|
56
|
-
new_obj('s3://bucket/crazy url with ?some dumb naming convention').raw_path.should == '/crazy url with ?some dumb naming convention'
|
57
|
-
new_obj('s3://bucket/crazy url with ?some dumb naming #convention').raw_path.should == '/crazy url with ?some dumb naming #convention'
|
58
|
-
new_obj('s3://bucket/crazy url with #some dumb naming ?convention').raw_path.should == '/crazy url with #some dumb naming ?convention'
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|