imw 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
@@ -0,0 +1,61 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
2
|
+
|
3
|
+
describe IMW::Resources::Schemes::HDFS do
|
4
|
+
before do
|
5
|
+
def fake_hdfs_resource path, num_dirs=nil, num_files=nil, size=nil
|
6
|
+
if num_dirs && num_files && size
|
7
|
+
response = " #{num_dirs} #{num_files} #{size} hdfs://localhost#{path}"
|
8
|
+
else
|
9
|
+
response = ""
|
10
|
+
end
|
11
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:count, path).and_return(response)
|
12
|
+
IMW.open("hdfs://#{path}")
|
13
|
+
end
|
14
|
+
@path = '/path/to/myfile'
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "refreshing its properties" do
|
18
|
+
it "should correctly get properties for a resource which exists" do
|
19
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
20
|
+
resource.exist?.should be_true
|
21
|
+
resource.num_dirs.should == 2
|
22
|
+
resource.num_files.should == 3
|
23
|
+
resource.size.should == 1000
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should gracefully handle a resource which doesn't exist" do
|
27
|
+
resource = fake_hdfs_resource(@path)
|
28
|
+
resource.exist?.should be_false
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should execute the correct command to delete the path" do
|
32
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
33
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:rm, resource.path)
|
34
|
+
resource.rm
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should execute the correct command to delete the path when skipping the trash" do
|
38
|
+
resource = fake_hdfs_resource(@path, 2, 3, 1000)
|
39
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:rm, '-skipTrash', resource.path)
|
40
|
+
resource.rm :skip_trash => true
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should recognize a file and extend it properly" do
|
44
|
+
resource = fake_hdfs_resource(@path, 0, 1, 1000)
|
45
|
+
resource.num_dirs.should == 0
|
46
|
+
resource.num_files.should == 1
|
47
|
+
resource.exist?.should be_true
|
48
|
+
resource.is_directory?.should be_false
|
49
|
+
resource.resource_modules.should include(IMW::Resources::Schemes::HDFSFile)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should recognize a directory and extend it properly" do
|
53
|
+
resource = fake_hdfs_resource(@path, 2, 1, 1000)
|
54
|
+
resource.num_dirs.should == 2
|
55
|
+
resource.num_files.should == 1
|
56
|
+
resource.exist?.should be_true
|
57
|
+
resource.is_directory?.should be_true
|
58
|
+
resource.resource_modules.should include(IMW::Resources::Schemes::HDFSDirectory)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
2
|
+
|
3
|
+
describe IMW::Resources::Schemes::HTTP do
|
4
|
+
|
5
|
+
describe "finding its effective basename" do
|
6
|
+
it "should use the real basename when present" do
|
7
|
+
IMW.open('http://www.google.com/foobar').effective_basename.should == 'foobar'
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should use '_index' when at the root (without a slash)" do
|
11
|
+
IMW.open('http://www.google.com').effective_basename.should == '_index'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should use '_index' when at the root (even when a slash is given)" do
|
15
|
+
IMW.open('http://www.google.com/').effective_basename.should == '_index'
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
2
|
+
|
3
|
+
describe IMW::Resources::Schemes::S3 do
|
4
|
+
|
5
|
+
describe 'manipulating S3 paths' do
|
6
|
+
before do
|
7
|
+
@resource = IMW::Resource.new('s3://mybucket/foobar/foo.txt')
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should set the bucket" do
|
11
|
+
@resource.bucket.should == 'mybucket'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "can generate an S3N url" do
|
15
|
+
@resource.s3n_url.should == 's3n://mybucket/foobar/foo.txt'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Transforms::Archiver do
|
4
|
+
before do
|
5
|
+
@name = 'foobar'
|
6
|
+
|
7
|
+
# remote files
|
8
|
+
@homepage = "http://www.google.com"
|
9
|
+
@website = "http://www.google.com/support/"
|
10
|
+
@remote_files = [@homepage, @website]
|
11
|
+
|
12
|
+
# regular files
|
13
|
+
@csv = "foobar-csv.csv"
|
14
|
+
@xml = "foobar-xml.xml"
|
15
|
+
@txt = "foobar-txt.txt"
|
16
|
+
@blah = "foobar"
|
17
|
+
@files = [@csv, @xml, @txt, @blah]
|
18
|
+
|
19
|
+
# compressed files
|
20
|
+
@bz2 = "foobar-bz2.bz2"
|
21
|
+
@gz = "foobar-gz.gz"
|
22
|
+
@compressed_files = [@bz2, @gz]
|
23
|
+
|
24
|
+
# archives
|
25
|
+
@zip = "foobar-zip.zip"
|
26
|
+
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
27
|
+
@targz = "foobar-targz.tar.gz"
|
28
|
+
@tar = "foobar-tar.tar"
|
29
|
+
@rar = "foobar-rar.rar"
|
30
|
+
@archives = [@zip, @tarbz2, @targz, @rar, @tar]
|
31
|
+
|
32
|
+
@local_files = @files + @compressed_files + @archives
|
33
|
+
|
34
|
+
@all_files = @remote_files + @local_files
|
35
|
+
|
36
|
+
@local_files.each do |path|
|
37
|
+
IMWTest::Random.file path
|
38
|
+
end
|
39
|
+
|
40
|
+
@archiver = IMW::Transforms::Archiver.new @name, @all_files
|
41
|
+
end
|
42
|
+
|
43
|
+
after do
|
44
|
+
@archiver.clean!
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "preparing input files" do
|
48
|
+
|
49
|
+
describe "before preparing input files" do
|
50
|
+
it "should not be prepared when initialized" do
|
51
|
+
@archiver.prepared?.should be_false
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "after preparing files" do
|
56
|
+
before { @archiver.prepare! }
|
57
|
+
|
58
|
+
it "should be prepared" do
|
59
|
+
@archiver.prepared?.should be_true
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should name its archive directory properly" do
|
63
|
+
@archiver.tmp_dir.should contain(@name)
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should copy regular files to its archive directory" do
|
67
|
+
@archiver.dir.should contain(*@files)
|
68
|
+
@local_files.each { |path| IMW.open(path).exist?.should be_true }
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should copy remote files to its archive directory" do
|
72
|
+
@archiver.dir.should contain('_index', 'support') # _index from Http#effective_basename on http://www.google.com
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should uncompress compressed files to its archive directory" do
|
76
|
+
@archiver.dir.should contain('foobar-bz2', 'foobar-gz')
|
77
|
+
@archiver.dir.should_not contain(*@compressed_files)
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
81
|
+
@archives.each do |archive|
|
82
|
+
@archiver.dir.should_not contain(archive)
|
83
|
+
@archiver.dir.should contain(*IMW.open(archive).contents)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "when packaging files" do
|
91
|
+
@packages = ["package.tar.bz2", "package.zip", "package.tar.gz", "package.tar", "package.rar"]
|
92
|
+
|
93
|
+
@packages.each do |package|
|
94
|
+
it "should create a #{package} file containing all the files and return it" do
|
95
|
+
output = @archiver.package!(package)
|
96
|
+
output.basename.should == package
|
97
|
+
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe 'when packaging into multiple output formats' do
|
102
|
+
|
103
|
+
it "should prepare input files without being asked" do
|
104
|
+
@archiver.prepared?.should be_false
|
105
|
+
@archiver.package! 'package.tar.bz2'
|
106
|
+
@archiver.prepared?.should be_true
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should not prepare input files once they've already been prepared" do
|
110
|
+
@archiver.prepared?.should be_false
|
111
|
+
@archiver.package! 'package.tar.bz2'
|
112
|
+
@archiver.prepared?.should be_true
|
113
|
+
@archiver.should_not_receive(:prepare!)
|
114
|
+
@archiver.package! 'package.tar.gz'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
|
3
|
+
describe IMW::Transforms::Transferer do
|
4
|
+
before do
|
5
|
+
@local = IMW.open("foobar.txt")
|
6
|
+
@http = IMW.open("http://www.google.com")
|
7
|
+
@hdfs = IMW.open("hdfs:///path/to/foobar.txt")
|
8
|
+
@s3 = IMW.open("s3://mybucket/foo/bar")
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
|
12
|
+
IMW::Transforms::Transferer.new(:cp, @local, @http).should be
|
13
|
+
IMW::Transforms::Transferer.new(:copy, @local, @http).should be
|
14
|
+
IMW::Transforms::Transferer.new(:mv, @local, @http).should be
|
15
|
+
IMW::Transforms::Transferer.new(:move, @local, @http).should be
|
16
|
+
IMW::Transforms::Transferer.new(:mv!, @local, @http).should be
|
17
|
+
lambda { IMW::Transforms::Transferer.new(:foobar, @local, @http) }.should raise_error(IMW::ArgumentError)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should raise an error if the source and the destination have the same URI" do
|
21
|
+
lambda { IMW::Transforms::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "transfering local files" do
|
25
|
+
it "should raise an error if the source doesn't exist" do
|
26
|
+
lambda { IMW::Transforms::Transferer.new(:cp, @local, 'barbaz.txt').transfer! }.should raise_error(IMW::PathError)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can copy a local file" do
|
30
|
+
IMWTest::Random.file @local.path
|
31
|
+
IMW::Transforms::Transferer.new(:cp, @local, 'barbaz.txt').transfer!
|
32
|
+
@local.exist?.should be_true
|
33
|
+
IMW.open('barbaz.txt').exist?.should be_true
|
34
|
+
end
|
35
|
+
|
36
|
+
it "can move a local file" do
|
37
|
+
IMWTest::Random.file @local.path
|
38
|
+
IMW::Transforms::Transferer.new(:mv, @local, 'barbaz.txt').transfer!
|
39
|
+
@local.exist?.should be_false
|
40
|
+
IMW.open('barbaz.txt').exist?.should be_true
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "transferring HTTP files" do
|
46
|
+
it "can copy a remote file to a local path" do
|
47
|
+
IMW::Transforms::Transferer.new(:cp, @http, @local).transfer!
|
48
|
+
@local.exist?.should be_true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "transferring S3 files" do
|
53
|
+
|
54
|
+
it "can copy an S3 file to a local path" do
|
55
|
+
IMW::Resources::Schemes::S3.should_receive(:get).with(@s3, @local)
|
56
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @local).transfer!
|
57
|
+
end
|
58
|
+
|
59
|
+
it "can copy a local path to an S3 file" do
|
60
|
+
IMWTest::Random.file @local.path
|
61
|
+
IMW::Resources::Schemes::S3.should_receive(:put).with(@local, @s3)
|
62
|
+
IMW::Transforms::Transferer.new(:cp, @local, @s3).transfer!
|
63
|
+
end
|
64
|
+
|
65
|
+
it "can copy between S3 files" do
|
66
|
+
@new_s3 = IMW.open('s3://mybucket/new/path')
|
67
|
+
IMW::Resources::Schemes::S3.should_receive(:copy).with(@s3, @new_s3)
|
68
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @new_s3).transfer!
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
describe "transferring HDFS files" do
|
73
|
+
before do
|
74
|
+
IMW::Resources::Schemes::HDFS.stub!(:fs)
|
75
|
+
end
|
76
|
+
|
77
|
+
it "can copy a local file to an HDFS path" do
|
78
|
+
IMWTest::Random.file @local.path
|
79
|
+
|
80
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:put, @local.path, @hdfs.path)
|
81
|
+
IMW::Transforms::Transferer.new(:cp, @local, @hdfs).transfer!
|
82
|
+
end
|
83
|
+
|
84
|
+
it "can copy an HDFS file to a local path" do
|
85
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:get, @hdfs.path, @local.path)
|
86
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @local).transfer!
|
87
|
+
end
|
88
|
+
|
89
|
+
it "can copy between HDFS paths" do
|
90
|
+
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
91
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @new_hdfs.path)
|
92
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @new_hdfs).transfer!
|
93
|
+
end
|
94
|
+
|
95
|
+
it "can move between HDFS paths" do
|
96
|
+
@new_hdfs = IMW.open('hdfs:///a/new/path')
|
97
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:mv, @hdfs.path, @new_hdfs.path)
|
98
|
+
IMW::Transforms::Transferer.new(:mv, @hdfs, @new_hdfs).transfer!
|
99
|
+
end
|
100
|
+
|
101
|
+
it "can copy from S3 to HDFS" do
|
102
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @s3.s3n_url, @hdfs.path)
|
103
|
+
IMW::Transforms::Transferer.new(:cp, @s3, @hdfs).transfer!
|
104
|
+
end
|
105
|
+
|
106
|
+
it "can copy from HDFS to S3" do
|
107
|
+
IMW::Resources::Schemes::HDFS.should_receive(:fs).with(:cp, @hdfs.path, @s3.s3n_url)
|
108
|
+
IMW::Transforms::Transferer.new(:cp, @hdfs, @s3).transfer!
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
@@ -1,38 +1,10 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
-
require '
|
3
|
-
require 'imw/utils/paths'
|
2
|
+
require File.join(File.dirname(__FILE__), '/shared_paths_spec')
|
4
3
|
|
5
4
|
describe IMW do
|
6
|
-
|
7
|
-
|
8
|
-
IMW::PATHS = {
|
9
|
-
:data => '/data',
|
10
|
-
:weather => 'ftp.ncdc.noaa.gov/pub/data/noaa',
|
11
|
-
:first => ['1', :second, 'last'],
|
12
|
-
:second => ['2', :third],
|
13
|
-
:third => ['3'],
|
14
|
-
}
|
5
|
+
before do
|
6
|
+
@path_manager = IMW
|
15
7
|
end
|
16
|
-
|
17
|
-
it 'is idempotent on a string' do
|
18
|
-
path_to('hi').should == 'hi'
|
19
|
-
end
|
20
|
-
|
21
|
-
it 'has an absolute path to the data dir' do
|
22
|
-
path_to(:data).should =~ %r{^/}
|
23
|
-
end
|
24
|
-
|
25
|
-
it 'handles mixed array and sym args' do
|
26
|
-
path_to( [:data, 'hi'], [[['there']]]).should == '/data/hi/there'
|
27
|
-
end
|
28
|
-
|
29
|
-
it 'expands to later generations' do
|
30
|
-
path_to(:first).should == File.join('1/2/3/last')
|
31
|
-
end
|
32
|
-
|
33
|
-
it 'expands interior symbols' do
|
34
|
-
path_to(['hadoop1:/working', :data, :weather]).should ==
|
35
|
-
File.join('hadoop1:/working/data/ftp.ncdc.noaa.gov/pub/data/noaa')
|
36
|
-
end
|
37
|
-
|
8
|
+
it_should_behave_like "an object that manages paths"
|
38
9
|
end
|
10
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
+
|
3
|
+
share_examples_for "an object that manages paths" do
|
4
|
+
before do
|
5
|
+
@path_manager.add_path :testing, '/testing'
|
6
|
+
@path_manager.add_path :first, '/1'
|
7
|
+
end
|
8
|
+
|
9
|
+
it 'returns a string when given a string' do
|
10
|
+
@path_manager.path_to('hi').should == 'hi'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'returns a path when given a registered symbol' do
|
14
|
+
@path_manager.path_to(:testing).should == '/testing'
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'raises an error when given a unregistered symbol' do
|
18
|
+
lambda { @path_manager.path_to(:foobar) }.should raise_error(IMW::PathError)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns a constructed path when passed a mixture of symbols, strings, and arrays ' do
|
22
|
+
@path_manager.path_to( [:testing, 'hi'], [[['there']]]).should == '/testing/hi/there'
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'will correctly expand paths themselves defined via symbols' do
|
26
|
+
@path_manager.add_path(:first, :testing, '1')
|
27
|
+
@path_manager.path_to(:first).should == '/testing/1'
|
28
|
+
end
|
29
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
IMW_ROOT_DIR = File.join(File.expand_path(File.dirname(__FILE__)), '..') unless defined? IMW_ROOT_DIR
|
2
|
-
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec')
|
3
|
-
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib')
|
2
|
+
IMW_SPEC_DIR = File.join(IMW_ROOT_DIR, 'spec') unless defined? IMW_SPEC_DIR
|
3
|
+
IMW_LIB_DIR = File.join(IMW_ROOT_DIR, 'lib') unless defined? IMW_LIB_DIR
|
4
4
|
$: << IMW_LIB_DIR
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'spec'
|
8
|
-
require 'fileutils'
|
9
8
|
require 'imw'
|
10
9
|
|
11
10
|
Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each { |path| require path }
|
12
11
|
|
13
12
|
module IMWTest
|
14
|
-
TMP_DIR
|
13
|
+
TMP_DIR = "/tmp/imwtest" unless defined?(TMP_DIR)
|
14
|
+
DATA_DIR = File.join(IMW_SPEC_DIR, 'data') unless defined?(DATA_DIR)
|
15
15
|
end
|
16
16
|
|
17
17
|
Spec::Runner.configure do |config|
|
18
18
|
|
19
19
|
config.include CustomMatchers
|
20
|
-
|
20
|
+
|
21
21
|
config.before do
|
22
22
|
FileUtils.mkdir_p IMWTest::TMP_DIR
|
23
23
|
FileUtils.cd IMWTest::TMP_DIR
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Spec
|
4
|
+
module Matchers
|
5
|
+
module IMW
|
6
|
+
|
7
|
+
class PathsMatcher
|
8
|
+
|
9
|
+
attr_accessor :given, :given_contents, :given_base, :to_match, :to_match_contents, :to_match_base
|
10
|
+
|
11
|
+
def initialize given, options={}
|
12
|
+
@given_base = options[:given_base] || options[:relative_to]
|
13
|
+
@to_match_base = options[:to_match_base]
|
14
|
+
@given = given
|
15
|
+
@given_contents = get_contents(given, given_base)
|
16
|
+
end
|
17
|
+
|
18
|
+
def matches? to_match
|
19
|
+
@to_match = to_match
|
20
|
+
@to_match_contents = get_contents(to_match, to_match_base)
|
21
|
+
to_match_contents == given_contents
|
22
|
+
end
|
23
|
+
|
24
|
+
def failure_message
|
25
|
+
given_string = given_contents.to_a.join("\n\t")
|
26
|
+
to_match_string = to_match_contents.to_a.join("\n\t")
|
27
|
+
"expected contents to be identical.\n\ngiven #{given.inspect}:\n\t#{given_string}\n\nto match #{to_match}:\n\t#{to_match_string}"
|
28
|
+
end
|
29
|
+
|
30
|
+
def negative_failure_message
|
31
|
+
"expected contents of #{given} and #{to_match} to be different"
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
def get_contents obj, base=nil
|
36
|
+
if obj.is_a?(String) || obj.is_a?(Array)
|
37
|
+
contents = [obj].flatten.map do |raw_path|
|
38
|
+
path = File.expand_path(raw_path)
|
39
|
+
if File.directory?(path)
|
40
|
+
Dir[path + "/**/*"]
|
41
|
+
else
|
42
|
+
path
|
43
|
+
end
|
44
|
+
end.flatten
|
45
|
+
else
|
46
|
+
# obj is an IMW obj (archive or directory) so it has a
|
47
|
+
# contents method
|
48
|
+
contents = obj.contents
|
49
|
+
end
|
50
|
+
if base
|
51
|
+
contents.map do |path|
|
52
|
+
new_path = path[base.length + 1..-1]
|
53
|
+
new_path = nil if !new_path.nil? && new_path.size == 0
|
54
|
+
new_path
|
55
|
+
end.compact.to_set
|
56
|
+
else
|
57
|
+
contents.to_set
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def contain_paths_like given, options={}
|
63
|
+
PathsMatcher.new(given, options)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/spec/support/random.rb
CHANGED
@@ -3,36 +3,23 @@ require 'fileutils'
|
|
3
3
|
module IMWTest
|
4
4
|
module Random
|
5
5
|
|
6
|
-
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ']
|
7
|
-
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"]
|
8
|
-
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_"]
|
9
|
-
FILENAME_MAX_LENGTH = 9
|
10
|
-
TEXT_MAX_LENGTH = 1024
|
11
|
-
EXTENSIONS =
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
else
|
24
|
-
{
|
25
|
-
:tar => "tar",
|
26
|
-
:rar => "rar",
|
27
|
-
:zip => "zip",
|
28
|
-
:unzip => "unzip",
|
29
|
-
:gzip => "gzip",
|
30
|
-
:bzip2 => "bzip2",
|
31
|
-
:wget => "wget"
|
32
|
-
}
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
6
|
+
STRING_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' '] unless defined?(STRING_CHARS)
|
7
|
+
TEXT_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + [' ',' ',' ',' ',' ',"\n"] unless defined?(TEXT_CHARS)
|
8
|
+
FILENAME_CHARS = ("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["-","_",' '] unless defined?(FILENAME_CHARS)
|
9
|
+
FILENAME_MAX_LENGTH = 9 unless defined?(FILENAME_MAX_LENGTH)
|
10
|
+
TEXT_MAX_LENGTH = 1024 unless defined?(TEXT_MAX_LENGTH)
|
11
|
+
EXTENSIONS = [
|
12
|
+
[/\.csv$/ , :csv_file],
|
13
|
+
[/\.xml$/ , :xml_file],
|
14
|
+
[/\.html$/ , :html_file],
|
15
|
+
[/\.tar\.gz$/ , :targz_file],
|
16
|
+
[/\.tar\.bz2$/ , :tarbz2_file],
|
17
|
+
[/\.bz2$/ , :bz2_file],
|
18
|
+
[/\.gz$/ , :gz_file],
|
19
|
+
[/\.tar$/ , :tar_file],
|
20
|
+
[/\.rar$/ , :rar_file],
|
21
|
+
[/\.zip$/ , :zip_file]
|
22
|
+
] unless defined?(EXTENSIONS)
|
36
23
|
# Return a random filename. Optional +length+ to set the maximum
|
37
24
|
# length of the filename returned.
|
38
25
|
def self.basename options = {}
|
@@ -54,7 +41,6 @@ module IMWTest
|
|
54
41
|
(1..length).map { |i| char_pool.random }.join
|
55
42
|
end
|
56
43
|
|
57
|
-
public
|
58
44
|
# Create a random file by matching the extension of the given
|
59
45
|
# +filename+ or a text file if no match is found.
|
60
46
|
def self.file filename
|
@@ -115,7 +101,7 @@ module IMWTest
|
|
115
101
|
def self.tar_file filename
|
116
102
|
tmpd = File.dirname(filename) + '/dir'
|
117
103
|
directory_with_files(tmpd)
|
118
|
-
FileUtils.cd(tmpd) {|dir| system("
|
104
|
+
FileUtils.cd(tmpd) {|dir| system("tar -cf file.tar *") }
|
119
105
|
FileUtils.cp(tmpd + "/file.tar",filename)
|
120
106
|
FileUtils.rm_rf(tmpd)
|
121
107
|
end
|
@@ -126,7 +112,7 @@ module IMWTest
|
|
126
112
|
tar = File.dirname(filename) + "/file.tar"
|
127
113
|
targz = tar + ".gz"
|
128
114
|
tar_file tar
|
129
|
-
system("
|
115
|
+
system("gzip #{tar}")
|
130
116
|
FileUtils.cp(targz,filename)
|
131
117
|
FileUtils.rm(targz)
|
132
118
|
end
|
@@ -137,17 +123,34 @@ module IMWTest
|
|
137
123
|
tar = File.dirname(filename) + "/file.tar"
|
138
124
|
tarbz2 = tar + ".bz2"
|
139
125
|
tar_file tar
|
140
|
-
system("
|
126
|
+
system("bzip2 #{tar}")
|
141
127
|
FileUtils.cp(tarbz2,filename)
|
142
128
|
FileUtils.rm(tarbz2)
|
143
129
|
end
|
144
130
|
|
131
|
+
# Create a .bz2 file at the given +filename+.
|
132
|
+
def self.bz2_file filename
|
133
|
+
text_path = File.dirname(filename) + "/fake_file"
|
134
|
+
text_file(text_path)
|
135
|
+
system("bzip2 #{text_path}")
|
136
|
+
FileUtils.mv(text_path + ".bz2", filename)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Create a .gz file at the given +filename+.
|
140
|
+
def self.gz_file filename
|
141
|
+
text_path = File.dirname(filename) + "/fake_file"
|
142
|
+
text_file(text_path)
|
143
|
+
system("gzip #{text_path}")
|
144
|
+
FileUtils.mv(text_path + ".gz", filename)
|
145
|
+
end
|
146
|
+
|
147
|
+
|
145
148
|
# Create a compressed rar archive at the given +filename+
|
146
149
|
# containing random files.
|
147
150
|
def self.rar_file filename
|
148
151
|
tmpd = File.dirname(filename) + '/dir'
|
149
152
|
directory_with_files(tmpd)
|
150
|
-
FileUtils.cd(tmpd) {|dir| system("
|
153
|
+
FileUtils.cd(tmpd) {|dir| system("rar a -o+ -inul file.rar *") }
|
151
154
|
FileUtils.cp(tmpd + "/file.rar",filename)
|
152
155
|
FileUtils.rm_rf(tmpd)
|
153
156
|
end
|
@@ -157,7 +160,7 @@ module IMWTest
|
|
157
160
|
def self.zip_file filename
|
158
161
|
tmpd = File.dirname(filename) + '/dir'
|
159
162
|
directory_with_files(tmpd)
|
160
|
-
FileUtils.cd(tmpd) {|dir| system("
|
163
|
+
FileUtils.cd(tmpd) {|dir| system("zip -qqr file.zip *") }
|
161
164
|
FileUtils.cp(tmpd + "/file.zip",filename)
|
162
165
|
FileUtils.rm_rf(tmpd)
|
163
166
|
end
|