imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
2
|
+
|
|
3
|
+
describe IMW::Resources::Schemes::HDFS do
|
|
4
|
+
before do
|
|
5
|
+
def fake_hdfs_resource path, num_dirs=nil, num_files=nil, size=nil
|
|
6
|
+
if num_dirs && num_files && size
|
|
7
|
+
response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
|
|
8
|
+
else
|
|
9
|
+
response = ""
|
|
10
|
+
end
|
|
11
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
|
|
12
|
+
IMW.open("hdfs://#{path}")
|
|
13
|
+
end
|
|
14
|
+
@path = '/path/to/myfile'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe "refreshing its properties" do
|
|
18
|
+
it "should correctly get properties for a resource which exists" do
|
|
19
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
|
20
|
+
resource.exist?.should be_true
|
|
21
|
+
resource.num_dirs.should == 2
|
|
22
|
+
resource.num_files.should == 3
|
|
23
|
+
resource.size.should == 1000
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "should gracefully handle a resource which doesn't exist" do
|
|
27
|
+
resource = fake_hdfs_resource(@path)
|
|
28
|
+
resource.exist?.should be_false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "should execute the correct command to delete the path" do
|
|
32
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
|
33
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
|
34
|
+
resource.rm
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "should execute the correct command to delete the path when skipping the trash" do
|
|
38
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
|
39
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
|
40
|
+
resource.rm :skip_trash => true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "should recognize a file and extend it properly" do
|
|
44
|
+
resource = fake_hdfs_resource(@path, 0, 1, 1000)
|
|
45
|
+
resource.num_dirs.should == 0
|
|
46
|
+
resource.num_files.should == 1
|
|
47
|
+
resource.exist?.should be_true
|
|
48
|
+
resource.is_directory?.should be_false
|
|
49
|
+
resource.resource_modules.should include(IMW::Resources::Schemes::HDFSFile)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "should recognize a directory and extend it properly" do
|
|
53
|
+
resource = fake_hdfs_resource(@path, 2, 1, 1000)
|
|
54
|
+
resource.num_dirs.should == 2
|
|
55
|
+
resource.num_files.should == 1
|
|
56
|
+
resource.exist?.should be_true
|
|
57
|
+
resource.is_directory?.should be_true
|
|
58
|
+
resource.resource_modules.should include(IMW::Resources::Schemes::HDFSDirectory)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
2
|
+
|
|
3
|
+
describe IMW::Resources::Schemes::HTTP do
|
|
4
|
+
|
|
5
|
+
describe "finding its effective basename" do
|
|
6
|
+
it "should use the real basename when present" do
|
|
7
|
+
IMW.open('http://www.google.com/foobar').effective_basename.should == 'foobar'
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it "should use '_index' when at the root (without a slash)" do
|
|
11
|
+
IMW.open('http://www.google.com').effective_basename.should == '_index'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "should use '_index' when at the root (even when a slash is given)" do
|
|
15
|
+
IMW.open('http://www.google.com/').effective_basename.should == '_index'
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
2
|
+
|
|
3
|
+
describe IMW::Resources::Schemes::S3 do
|
|
4
|
+
|
|
5
|
+
describe 'manipulating S3 paths' do
|
|
6
|
+
before do
|
|
7
|
+
@resource = IMW::Resource.new('s3://mybucket/foobar/foo.txt')
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
it "should set the bucket" do
|
|
11
|
+
@resource.bucket.should == 'mybucket'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "can generate an S3N url" do
|
|
15
|
+
@resource.s3n_url.should == 's3n://mybucket/foobar/foo.txt'
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
|
|
3
|
+
describe IMW::Transforms::Archiver do
|
|
4
|
+
before do
|
|
5
|
+
@name = 'foobar'
|
|
6
|
+
|
|
7
|
+
# remote files
|
|
8
|
+
@homepage = "http://www.google.com"
|
|
9
|
+
@website = "http://www.google.com/support/"
|
|
10
|
+
@remote_files = [@homepage, @website]
|
|
11
|
+
|
|
12
|
+
# regular files
|
|
13
|
+
@csv = "foobar-csv.csv"
|
|
14
|
+
@xml = "foobar-xml.xml"
|
|
15
|
+
@txt = "foobar-txt.txt"
|
|
16
|
+
@blah = "foobar"
|
|
17
|
+
@files = [@csv, @xml, @txt, @blah]
|
|
18
|
+
|
|
19
|
+
# compressed files
|
|
20
|
+
@bz2 = "foobar-bz2.bz2"
|
|
21
|
+
@gz = "foobar-gz.gz"
|
|
22
|
+
@compressed_files = [@bz2, @gz]
|
|
23
|
+
|
|
24
|
+
# archives
|
|
25
|
+
@zip = "foobar-zip.zip"
|
|
26
|
+
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
|
27
|
+
@targz = "foobar-targz.tar.gz"
|
|
28
|
+
@tar = "foobar-tar.tar"
|
|
29
|
+
@rar = "foobar-rar.rar"
|
|
30
|
+
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
|
31
|
+
|
|
32
|
+
@local_files = @files + @compressed_files + @archives
|
|
33
|
+
|
|
34
|
+
@all_files = @remote_files + @local_files
|
|
35
|
+
|
|
36
|
+
@local_files.each do |path|
|
|
37
|
+
IMWTest::Random.file path
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
@archiver = IMW::Transforms::Archiver.new @name, @all_files
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
after do
|
|
44
|
+
@archiver.clean!
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
describe "preparing input files" do
|
|
48
|
+
|
|
49
|
+
describe "before preparing input files" do
|
|
50
|
+
it "should not be prepared when initialized" do
|
|
51
|
+
@archiver.prepared?.should be_false
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe "after preparing files" do
|
|
56
|
+
before { @archiver.prepare! }
|
|
57
|
+
|
|
58
|
+
it "should be prepared" do
|
|
59
|
+
@archiver.prepared?.should be_true
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it "should name its archive directory properly" do
|
|
63
|
+
@archiver.tmp_dir.should contain(@name)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "should copy regular files to its archive directory" do
|
|
67
|
+
@archiver.dir.should contain(*@files)
|
|
68
|
+
@local_files.each { |path| IMW.open(path).exist?.should be_true }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "should copy remote files to its archive directory" do
|
|
72
|
+
@archiver.dir.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "should uncompress compressed files to its archive directory" do
|
|
76
|
+
@archiver.dir.should contain('foobar-bz2', 'foobar-gz')
|
|
77
|
+
@archiver.dir.should_not contain(*@compressed_files)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
|
81
|
+
@archives.each do |archive|
|
|
82
|
+
@archiver.dir.should_not contain(archive)
|
|
83
|
+
@archiver.dir.should contain(*IMW.open(archive).contents)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
describe "when packaging files" do
|
|
91
|
+
@packages = ["package.tar.bz2", "package.zip", "package.tar.gz", "package.tar", "package.rar"]
|
|
92
|
+
|
|
93
|
+
@packages.each do |package|
|
|
94
|
+
it "should create a #{package} file containing all the files and return it" do
|
|
95
|
+
output = @archiver.package!(package)
|
|
96
|
+
output.basename.should == package
|
|
97
|
+
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
describe 'when packaging into multiple output formats' do
|
|
102
|
+
|
|
103
|
+
it "should prepare input files without being asked" do
|
|
104
|
+
@archiver.prepared?.should be_false
|
|
105
|
+
@archiver.package! 'package.tar.bz2'
|
|
106
|
+
@archiver.prepared?.should be_true
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "should not prepare input files once they've already been prepared" do
|
|
110
|
+
@archiver.prepared?.should be_false
|
|
111
|
+
@archiver.package! 'package.tar.bz2'
|
|
112
|
+
@archiver.prepared?.should be_true
|
|
113
|
+
@archiver.should_not_receive(:prepare!)
|
|
114
|
+
@archiver.package! 'package.tar.gz'
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
|
|
3
|
+
describe IMW::Transforms::Transferer do
|
|
4
|
+
before do
|
|
5
|
+
@local = IMW.open("foobar.txt")
|
|
6
|
+
@http = IMW.open("http://www.google.com")
|
|
7
|
+
@hdfs = IMW.open("hdfs:///path/to/foobar.txt")
|
|
8
|
+
@s3 = IMW.open("s3://mybucket/foo/bar")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
|
|
12
|
+
IMW::Transforms::Transferer.new(:cp, @local, @http).should be
|
|
13
|
+
IMW::Transforms::Transferer.new(:copy, @local, @http).should be
|
|
14
|
+
IMW::Transforms::Transferer.new(:mv, @local, @http).should be
|
|
15
|
+
IMW::Transforms::Transferer.new(:move, @local, @http).should be
|
|
16
|
+
IMW::Transforms::Transferer.new(:mv!, @local, @http).should be
|
|
17
|
+
lambda { IMW::Transforms::Transferer.new(:foobar, @local, @http) }.should raise_error(IMW::ArgumentError)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "should raise an error if the source and the destination have the same URI" do
|
|
21
|
+
lambda { IMW::Transforms::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "transfering local files" do
|
|
25
|
+
it "should raise an error if the source doesn't exist" do
|
|
26
|
+
lambda { IMW::Transforms::Transferer.new(:cp, @local, 'barbaz.txt').transfer! }.should raise_error(IMW::PathError)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "can copy a local file" do
|
|
30
|
+
IMWTest::Random.file @local.path
|
|
31
|
+
IMW::Transforms::Transferer.new(:cp, @local, 'barbaz.txt').transfer!
|
|
32
|
+
@local.exist?.should be_true
|
|
33
|
+
IMW.open('barbaz.txt').exist?.should be_true
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "can move a local file" do
|
|
37
|
+
IMWTest::Random.file @local.path
|
|
38
|
+
IMW::Transforms::Transferer.new(:mv, @local, 'barbaz.txt').transfer!
|
|
39
|
+
@local.exist?.should be_false
|
|
40
|
+
IMW.open('barbaz.txt').exist?.should be_true
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
describe "transferring HTTP files" do
|
|
46
|
+
it "can copy a remote file to a local path" do
|
|
47
|
+
IMW::Transforms::Transferer.new(:cp, @http, @local).transfer!
|
|
48
|
+
@local.exist?.should be_true
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
describe "transferring S3 files" do
|
|
53
|
+
|
|
54
|
+
it "can copy an S3 file to a local path" do
|
|
55
|
+
IMW::Resources::Schemes::S3.should_receive(:get).with(@s3, @local)
|
|
56
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @local).transfer!
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "can copy a local path to an S3 file" do
|
|
60
|
+
IMWTest::Random.file @local.path
|
|
61
|
+
IMW::Resources::Schemes::S3.should_receive(:put).with(@local, @s3)
|
|
62
|
+
IMW::Transforms::Transferer.new(:cp, @local, @s3).transfer!
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it "can copy between S3 files" do
|
|
66
|
+
@new_s3 = IMW.open('s3://mybucket/new/path')
|
|
67
|
+
IMW::Resources::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
|
|
68
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @new_s3).transfer!
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
describe "transferring HDFS files" do
|
|
73
|
+
before do
|
|
74
|
+
IMW::Resources::Schemes::HDFS.stub!(:fs)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "can copy a local file to an HDFS path" do
|
|
78
|
+
IMWTest::Random.file @local.path
|
|
79
|
+
|
|
80
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
|
|
81
|
+
IMW::Transforms::Transferer.new(:cp, @local, @hdfs).transfer!
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it "can copy an HDFS file to a local path" do
|
|
85
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
|
|
86
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @local).transfer!
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "can copy between HDFS paths" do
|
|
90
|
+
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
|
91
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
|
|
92
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "can move between HDFS paths" do
|
|
96
|
+
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
|
97
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
|
|
98
|
+
IMW::Transforms::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "can copy from S3 to HDFS" do
|
|
102
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
|
|
103
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @hdfs).transfer!
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "can copy from HDFS to S3" do
|
|
107
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
|
|
108
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @s3).transfer!
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
|
|
@@ -1,38 +1,10 @@
|
|
|
1
1
|
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
-
require '
|
|
3
|
-
require 'imw/utils/paths'
|
|
2
|
+
require File.join(File.dirname(__FILE__), '/shared_paths_spec')
|
|
4
3
|
|
|
5
4
|
describe IMW do
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
IMW::PATHS = {
|
|
9
|
-
:data => '/data',
|
|
10
|
-
:weather => 'ftp.ncdc.noaa.gov/pub/data/noaa',
|
|
11
|
-
:first => ['1', :second, 'last'],
|
|
12
|
-
:second => ['2', :third],
|
|
13
|
-
:third => ['3'],
|
|
14
|
-
}
|
|
5
|
+
before do
|
|
6
|
+
@path_manager = IMW
|
|
15
7
|
end
|
|
16
|
-
|
|
17
|
-
it 'is idempotent on a string' do
|
|
18
|
-
path_to('hi').should == 'hi'
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
it 'has an absolute path to the data dir' do
|
|
22
|
-
path_to(:data).should =~ %r{^/}
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
it 'handles mixed array and sym args' do
|
|
26
|
-
path_to( [:data, 'hi'], [[['there']]]).should == '/data/hi/there'
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
it 'expands to later generations' do
|
|
30
|
-
path_to(:first).should == File.join('1/2/3/last')
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
it 'expands interior symbols' do
|
|
34
|
-
path_to(['hadoop1:/working', :data, :weather]).should ==
|
|
35
|
-
File.join('hadoop1:/working/data/ftp.ncdc.noaa.gov/pub/data/noaa')
|
|
36
|
-
end
|
|
37
|
-
|
|
8
|
+
it_should_behave_like "an object that manages paths"
|
|
38
9
|
end
|
|
10
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
+
|
|
3
|
+
share_examples_for "an object that manages paths" do
|
|
4
|
+
before do
|
|
5
|
+
@path_manager.add_path :testing, '/testing'
|
|
6
|
+
@path_manager.add_path :first, '/1'
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it 'returns a string when given a string' do
|
|
10
|
+
@path_manager.path_to('hi').should == 'hi'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'returns a path when given a registered symbol' do
|
|
14
|
+
@path_manager.path_to(:testing).should == '/testing'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'raises an error when given a unregistered symbol' do
|
|
18
|
+
lambda { @path_manager.path_to(:foobar) }.should raise_error(IMW::PathError)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'returns a constructed path when passed a mixture of symbols, strings, and arrays ' do
|
|
22
|
+
@path_manager.path_to( [:testing, 'hi'], [[['there']]]).should == '/testing/hi/there'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'will correctly expand paths themselves defined via symbols' do
|
|
26
|
+
@path_manager.add_path(:first, :testing, '1')
|
|
27
|
+
@path_manager.path_to(:first).should == '/testing/1'
|
|
28
|
+
end
|
|
29
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
|
|
2
|
-
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec')
|
|
3
|
-
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib')
|
|
2
|
+
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
|
|
3
|
+
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
|
|
4
4
|
$: << IMW_LIB_DIR
|
|
5
5
|
|
|
6
6
|
require 'rubygems'
|
|
7
7
|
require 'spec'
|
|
8
|
-
require 'fileutils'
|
|
9
8
|
require 'imw'
|
|
10
9
|
|
|
11
10
|
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
|
12
11
|
|
|
13
12
|
module IMWTest
|
|
14
|
-
TMP_DIR
|
|
13
|
+
TMP_DIR = "/tmp/imwtest" unless defined?(TMP_DIR)
|
|
14
|
+
DATA_DIR = File.join(IMW_SPEC_DIR, 'data') unless defined?(DATA_DIR)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
Spec::Runner.configure do |config|
|
|
18
18
|
|
|
19
19
|
config.include CustomMatchers
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
config.before do
|
|
22
22
|
FileUtils.mkdir_p IMWTest::TMP_DIR
|
|
23
23
|
FileUtils.cd IMWTest::TMP_DIR
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
|
|
3
|
+
module Spec
|
|
4
|
+
module Matchers
|
|
5
|
+
module IMW
|
|
6
|
+
|
|
7
|
+
class PathsMatcher
|
|
8
|
+
|
|
9
|
+
attr_accessor :given, :given_contents, :given_base, :to_match, :to_match_contents, :to_match_base
|
|
10
|
+
|
|
11
|
+
def initialize given, options={}
|
|
12
|
+
@given_base = options[:given_base] || options[:relative_to]
|
|
13
|
+
@to_match_base = options[:to_match_base]
|
|
14
|
+
@given = given
|
|
15
|
+
@given_contents = get_contents(given, given_base)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def matches? to_match
|
|
19
|
+
@to_match = to_match
|
|
20
|
+
@to_match_contents = get_contents(to_match, to_match_base)
|
|
21
|
+
to_match_contents == given_contents
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def failure_message
|
|
25
|
+
given_string = given_contents.to_a.join("\n\t")
|
|
26
|
+
to_match_string = to_match_contents.to_a.join("\n\t")
|
|
27
|
+
"expected contents to be identical.\n\ngiven #{given.inspect}:\n\t#{given_string}\n\nto match #{to_match}:\n\t#{to_match_string}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def negative_failure_message
|
|
31
|
+
"expected contents of #{given} and #{to_match} to be different"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
protected
|
|
35
|
+
def get_contents obj, base=nil
|
|
36
|
+
if obj.is_a?(String) || obj.is_a?(Array)
|
|
37
|
+
contents = [obj].flatten.map do |raw_path|
|
|
38
|
+
path = File.expand_path(raw_path)
|
|
39
|
+
if File.directory?(path)
|
|
40
|
+
Dir[path + "/**/*"]
|
|
41
|
+
else
|
|
42
|
+
path
|
|
43
|
+
end
|
|
44
|
+
end.flatten
|
|
45
|
+
else
|
|
46
|
+
# obj is an IMW obj (archive or directory) so it has a
|
|
47
|
+
# contents method
|
|
48
|
+
contents = obj.contents
|
|
49
|
+
end
|
|
50
|
+
if base
|
|
51
|
+
contents.map do |path|
|
|
52
|
+
new_path = path[base.length + 1..-1]
|
|
53
|
+
new_path = nil if !new_path.nil? && new_path.size == 0
|
|
54
|
+
new_path
|
|
55
|
+
end.compact.to_set
|
|
56
|
+
else
|
|
57
|
+
contents.to_set
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def contain_paths_like given, options={}
|
|
63
|
+
PathsMatcher.new(given, options)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
data/spec/support/random.rb
CHANGED
|
@@ -3,36 +3,23 @@ require 'fileutils'
|
|
|
3
3
|
module IMWTest
|
|
4
4
|
module Random
|
|
5
5
|
|
|
6
|
-
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
|
|
7
|
-
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
|
|
8
|
-
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
|
|
9
|
-
FILENAME_MAX_LENGTH = 9
|
|
10
|
-
TEXT_MAX_LENGTH = 1024
|
|
11
|
-
EXTENSIONS =
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
else
|
|
24
|
-
{
|
|
25
|
-
:tar => "tar",
|
|
26
|
-
:rar => "rar",
|
|
27
|
-
:zip => "zip",
|
|
28
|
-
:unzip => "unzip",
|
|
29
|
-
:gzip => "gzip",
|
|
30
|
-
:bzip2 => "bzip2",
|
|
31
|
-
:wget => "wget"
|
|
32
|
-
}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
private
|
|
6
|
+
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' '] unless defined?(STRING_CHARS)
|
|
7
|
+
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"] unless defined?(TEXT_CHARS)
|
|
8
|
+
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_",' '] unless defined?(FILENAME_CHARS)
|
|
9
|
+
FILENAME_MAX_LENGTH = 9 unless defined?(FILENAME_MAX_LENGTH)
|
|
10
|
+
TEXT_MAX_LENGTH = 1024 unless defined?(TEXT_MAX_LENGTH)
|
|
11
|
+
EXTENSIONS = [
|
|
12
|
+
[/\.csv$/ , :csv_file],
|
|
13
|
+
[/\.xml$/ , :xml_file],
|
|
14
|
+
[/\.html$/ , :html_file],
|
|
15
|
+
[/\.tar\.gz$/ , :targz_file],
|
|
16
|
+
[/\.tar\.bz2$/ , :tarbz2_file],
|
|
17
|
+
[/\.bz2$/ , :bz2_file],
|
|
18
|
+
[/\.gz$/ , :gz_file],
|
|
19
|
+
[/\.tar$/ , :tar_file],
|
|
20
|
+
[/\.rar$/ , :rar_file],
|
|
21
|
+
[/\.zip$/ , :zip_file]
|
|
22
|
+
] unless defined?(EXTENSIONS)
|
|
36
23
|
# Return a random filename. Optional +length+ to set the maximum
|
|
37
24
|
# length of the filename returned.
|
|
38
25
|
def self.basename options = {}
|
|
@@ -54,7 +41,6 @@ module IMWTest
|
|
|
54
41
|
(1..length).map { |i| char_pool.random }.join
|
|
55
42
|
end
|
|
56
43
|
|
|
57
|
-
public
|
|
58
44
|
# Create a random file by matching the extension of the given
|
|
59
45
|
# +filename+ or a text file if no match is found.
|
|
60
46
|
def self.file filename
|
|
@@ -115,7 +101,7 @@ module IMWTest
|
|
|
115
101
|
def self.tar_file filename
|
|
116
102
|
tmpd = File.dirname(filename) + '/dir'
|
|
117
103
|
directory_with_files(tmpd)
|
|
118
|
-
FileUtils.cd(tmpd) {|dir| system("
|
|
104
|
+
FileUtils.cd(tmpd) {|dir| system("tar -cf file.tar *") }
|
|
119
105
|
FileUtils.cp(tmpd + "/file.tar",filename)
|
|
120
106
|
FileUtils.rm_rf(tmpd)
|
|
121
107
|
end
|
|
@@ -126,7 +112,7 @@ module IMWTest
|
|
|
126
112
|
tar = File.dirname(filename) + "/file.tar"
|
|
127
113
|
targz = tar + ".gz"
|
|
128
114
|
tar_file tar
|
|
129
|
-
system("
|
|
115
|
+
system("gzip #{tar}")
|
|
130
116
|
FileUtils.cp(targz,filename)
|
|
131
117
|
FileUtils.rm(targz)
|
|
132
118
|
end
|
|
@@ -137,17 +123,34 @@ module IMWTest
|
|
|
137
123
|
tar = File.dirname(filename) + "/file.tar"
|
|
138
124
|
tarbz2 = tar + ".bz2"
|
|
139
125
|
tar_file tar
|
|
140
|
-
system("
|
|
126
|
+
system("bzip2 #{tar}")
|
|
141
127
|
FileUtils.cp(tarbz2,filename)
|
|
142
128
|
FileUtils.rm(tarbz2)
|
|
143
129
|
end
|
|
144
130
|
|
|
131
|
+
# Create a .bz2 file at the given +filename+.
|
|
132
|
+
def self.bz2_file filename
|
|
133
|
+
text_path = File.dirname(filename) + "/fake_file"
|
|
134
|
+
text_file(text_path)
|
|
135
|
+
system("bzip2 #{text_path}")
|
|
136
|
+
FileUtils.mv(text_path + ".bz2", filename)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Create a .gz file at the given +filename+.
|
|
140
|
+
def self.gz_file filename
|
|
141
|
+
text_path = File.dirname(filename) + "/fake_file"
|
|
142
|
+
text_file(text_path)
|
|
143
|
+
system("gzip #{text_path}")
|
|
144
|
+
FileUtils.mv(text_path + ".gz", filename)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
|
|
145
148
|
# Create a compressed rar archive at the given +filename+
|
|
146
149
|
# containing random files.
|
|
147
150
|
def self.rar_file filename
|
|
148
151
|
tmpd = File.dirname(filename) + '/dir'
|
|
149
152
|
directory_with_files(tmpd)
|
|
150
|
-
FileUtils.cd(tmpd) {|dir| system("
|
|
153
|
+
FileUtils.cd(tmpd) {|dir| system("rar a -o+ -inul file.rar *") }
|
|
151
154
|
FileUtils.cp(tmpd + "/file.rar",filename)
|
|
152
155
|
FileUtils.rm_rf(tmpd)
|
|
153
156
|
end
|
|
@@ -157,7 +160,7 @@ module IMWTest
|
|
|
157
160
|
def self.zip_file filename
|
|
158
161
|
tmpd = File.dirname(filename) + '/dir'
|
|
159
162
|
directory_with_files(tmpd)
|
|
160
|
-
FileUtils.cd(tmpd) {|dir| system("
|
|
163
|
+
FileUtils.cd(tmpd) {|dir| system("zip -qqr file.zip *") }
|
|
161
164
|
FileUtils.cp(tmpd + "/file.zip",filename)
|
|
162
165
|
FileUtils.rm_rf(tmpd)
|
|
163
166
|
end
|