imw 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,125 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
-
|
3
|
-
describe IMW::Packagers::Archiver do
|
4
|
-
before do
|
5
|
-
@name = 'foobar'
|
6
|
-
|
7
|
-
# regular files
|
8
|
-
@csv = "foobar-csv.csv"
|
9
|
-
@xml = "foobar-xml.xml"
|
10
|
-
@txt = "foobar-txt.txt"
|
11
|
-
@blah = "foobar"
|
12
|
-
|
13
|
-
# compressed files
|
14
|
-
@bz2 = "foobar-bz2.bz2"
|
15
|
-
|
16
|
-
# archives
|
17
|
-
@zip = "foobar-zip.zip"
|
18
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
19
|
-
@rar = "foobar-rar.rar"
|
20
|
-
@archives = [@zip, @tarbz2]
|
21
|
-
|
22
|
-
@files = [@csv, @xml, @txt, @blah, @bz2, @zip, @tarbz2]
|
23
|
-
|
24
|
-
@files.each do |path|
|
25
|
-
IMWTest::Random.file path
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
|
-
describe "when preparing files" do
|
31
|
-
before do
|
32
|
-
@archiver = IMW::Packagers::Archiver.new @name, @files
|
33
|
-
@archiver.prepare!
|
34
|
-
end
|
35
|
-
|
36
|
-
after do
|
37
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
38
|
-
end
|
39
|
-
|
40
|
-
it "should name its archive directory properly" do
|
41
|
-
@archiver.tmp_dir.should contain(@name)
|
42
|
-
end
|
43
|
-
|
44
|
-
it "should copy regular files to its archive directory" do
|
45
|
-
@archiver.dir.should contain(@csv, @xml, @txt)
|
46
|
-
end
|
47
|
-
|
48
|
-
it "should uncompress compressed files to its archive directory" do
|
49
|
-
@archiver.dir.should contain('foobar-bz2')
|
50
|
-
@archiver.dir.should_not contain(@bz2)
|
51
|
-
end
|
52
|
-
|
53
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
54
|
-
@archives.each do |archive|
|
55
|
-
@archiver.dir.should_not contain(archive)
|
56
|
-
@archiver.dir.should contain(*IMW.open(archive).contents)
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
it "should not move any of the original files" do
|
61
|
-
IMWTest::TMP_DIR.should contain(@files)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
describe "when preparing files while renaming them" do
|
66
|
-
before do
|
67
|
-
|
68
|
-
# to test renaming, consider the new paths to be the old paths
|
69
|
-
# but with the hyphens mapped to underscores...
|
70
|
-
@renaming_hash = {}
|
71
|
-
@files.each { |f| @renaming_hash[f] = f.gsub(/-/,'_') }
|
72
|
-
|
73
|
-
@archiver = IMW::Packagers::Archiver.new @name, @renaming_hash
|
74
|
-
@archiver.prepare!
|
75
|
-
end
|
76
|
-
|
77
|
-
after do
|
78
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
79
|
-
end
|
80
|
-
|
81
|
-
it "should copy regular files to its archive directory, renaming them" do
|
82
|
-
@archiver.dir.should_not contain([@csv, @xml, @txt])
|
83
|
-
@archiver.dir.should contain([@csv, @xml, @txt].map { |f| @renaming_hash[f] })
|
84
|
-
end
|
85
|
-
|
86
|
-
it "should uncompress compressed files to its archive directory, renaming them" do
|
87
|
-
@archiver.dir.should contain('foobar_bz2')
|
88
|
-
@archiver.dir.should_not contain('foobar-bz2')
|
89
|
-
@archiver.dir.should_not contain(@renaming_hash[@bz2])
|
90
|
-
@archiver.dir.should_not contain(@bz2)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
describe "when packaging files" do
|
95
|
-
before do
|
96
|
-
@archiver = IMW::Packagers::Archiver.new @name, @files
|
97
|
-
@archiver.prepare!
|
98
|
-
|
99
|
-
@package_tarbz2 = "package.tar.bz2"
|
100
|
-
@package_zip = "package.zip"
|
101
|
-
@packages = [@package_tarbz2, @package_zip]
|
102
|
-
end
|
103
|
-
|
104
|
-
after do
|
105
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
106
|
-
end
|
107
|
-
|
108
|
-
it "should create a package file containing the proper files" do
|
109
|
-
@packages.each do |package|
|
110
|
-
@archiver.package! package
|
111
|
-
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
it "should return the package file" do
|
116
|
-
@packages.each do |package|
|
117
|
-
output = @archiver.package! package
|
118
|
-
output.basename.should == package
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
@@ -1,72 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
|
3
|
-
#
|
4
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
5
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
6
|
-
# License:: GPL 3.0
|
7
|
-
# Website:: http://infinitemonkeywrench.org/
|
8
|
-
#
|
9
|
-
|
10
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
11
|
-
|
12
|
-
require 'fileutils'
|
13
|
-
|
14
|
-
require 'imw/utils/random'
|
15
|
-
|
16
|
-
describe File do
|
17
|
-
|
18
|
-
it "should return the 'name' of a file with 'name_of_file'" do
|
19
|
-
File.name_of_file("/path/to/some_file.txt").should eql("some_file")
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "when finding the handle corresponding to a path" do
|
23
|
-
|
24
|
-
it "should correctly identify paths with the processing instruction suffix" do
|
25
|
-
File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should correctly identify paths with the metadata instruction suffix" do
|
29
|
-
File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
|
30
|
-
end
|
31
|
-
|
32
|
-
it "should raise an error if the path does not correspond to a handle" do
|
33
|
-
lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
describe "when creating unique filenames" do
|
38
|
-
|
39
|
-
before(:each) do
|
40
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
|
41
|
-
@file0 = @root_directory + "/the_original.txt"
|
42
|
-
@file1 = @root_directory + "/the_original.txt.1"
|
43
|
-
@file2 = @root_directory + "/the_original.txt.2"
|
44
|
-
FileUtils.mkdir(@root_directory)
|
45
|
-
end
|
46
|
-
|
47
|
-
after(:each) do
|
48
|
-
FileUtils.rm_rf @root_directory
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should return the given path if there is no such file already" do
|
52
|
-
File.uniquify(@file0).should eql(@file0)
|
53
|
-
end
|
54
|
-
|
55
|
-
it "should return the given path with a numerical suffix of `.1' if the file exists" do
|
56
|
-
IMW::Random.file(@file0)
|
57
|
-
File.uniquify(@file0).should eql(@file1)
|
58
|
-
end
|
59
|
-
|
60
|
-
it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
|
61
|
-
IMW::Random.file(@file0)
|
62
|
-
IMW::Random.file(@file1)
|
63
|
-
File.uniquify(@file0).should eql(@file2)
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
# puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
|
@@ -1,113 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
-
# License:: GPL 3.0
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
10
|
-
#
|
11
|
-
|
12
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
13
|
-
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
|
14
|
-
|
15
|
-
require 'fileutils'
|
16
|
-
require 'set'
|
17
|
-
|
18
|
-
require 'imw/utils'
|
19
|
-
require 'imw/utils/random'
|
20
|
-
require 'imw/utils/extensions/find'
|
21
|
-
|
22
|
-
describe Find do
|
23
|
-
|
24
|
-
include Spec::Matchers::IMW
|
25
|
-
|
26
|
-
def create_sample_files
|
27
|
-
FileUtils.mkdir_p(@subsubdirectory)
|
28
|
-
[@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
|
29
|
-
end
|
30
|
-
|
31
|
-
before(:all) do
|
32
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
|
33
|
-
@subdirectory = @root_directory + "/subdir"
|
34
|
-
@subsubdirectory = @subdirectory + "/subsubdir"
|
35
|
-
@fake_directory = @root_directory + "/notreal"
|
36
|
-
@file1 = @root_directory + "/my_file1.txt"
|
37
|
-
@file2 = @root_directory + "/my_file2.csv"
|
38
|
-
@file3 = @root_directory + "/my_file3.dat"
|
39
|
-
@file4 = @subdirectory + "/your_file4.html"
|
40
|
-
@file5 = @subdirectory + "/your_file5.csv"
|
41
|
-
@file6 = @subdirectory + "/your_file5"
|
42
|
-
end
|
43
|
-
|
44
|
-
before(:each) do
|
45
|
-
create_sample_files
|
46
|
-
end
|
47
|
-
|
48
|
-
after(:each) do
|
49
|
-
FileUtils.rm_rf @root_directory
|
50
|
-
end
|
51
|
-
|
52
|
-
describe "when listing files with absolute paths contained in a directory" do
|
53
|
-
|
54
|
-
it "should raise an error when listing a non-exsiting directory" do
|
55
|
-
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
56
|
-
end
|
57
|
-
|
58
|
-
it "should find every file by default" do
|
59
|
-
Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
|
60
|
-
end
|
61
|
-
|
62
|
-
it "should only find files which match its :include argument" do
|
63
|
-
Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
|
64
|
-
end
|
65
|
-
|
66
|
-
it "should not find files which match its :exclude argument" do
|
67
|
-
Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
|
68
|
-
end
|
69
|
-
|
70
|
-
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
71
|
-
Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
describe "when listing files with relative paths contained in a directory" do
|
76
|
-
|
77
|
-
def strip_root_directory array
|
78
|
-
array.map {|item| item[@root_directory.length + 1,item.size]}
|
79
|
-
end
|
80
|
-
|
81
|
-
it "should raise an error when listing a non-exsiting directory" do
|
82
|
-
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
83
|
-
end
|
84
|
-
|
85
|
-
it "should find every file by default" do
|
86
|
-
Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
|
87
|
-
end
|
88
|
-
|
89
|
-
it "should only find files which match its :include argument" do
|
90
|
-
Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
|
91
|
-
end
|
92
|
-
|
93
|
-
it "should not find files which match its :exclude argument" do
|
94
|
-
Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
|
95
|
-
end
|
96
|
-
|
97
|
-
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
98
|
-
Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
|
99
|
-
end
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
describe "when listing handles in a directory" do
|
104
|
-
|
105
|
-
it "should return a unique set of handles" do
|
106
|
-
Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
|
111
|
-
end
|
112
|
-
|
113
|
-
# puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
|
@@ -1,89 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
-
# License:: GPL 3.0
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
10
|
-
#
|
11
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
12
|
-
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
|
13
|
-
|
14
|
-
require 'fileutils'
|
15
|
-
|
16
|
-
require 'imw/utils/random'
|
17
|
-
require 'imw/utils/extensions/find'
|
18
|
-
require 'imw/workflow/rip/local'
|
19
|
-
|
20
|
-
describe "Ripping from local disk" do
|
21
|
-
|
22
|
-
include Spec::Matchers::IMW
|
23
|
-
|
24
|
-
before(:all) do
|
25
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
|
26
|
-
@file1 = @root_directory + "/first.csv"
|
27
|
-
|
28
|
-
@source_directory1 = @root_directory + "/source1"
|
29
|
-
@file2 = @source_directory1 + "/second.txt"
|
30
|
-
@file3 = @source_directory1 + "/third.csv"
|
31
|
-
|
32
|
-
@source_directory2 = @root_directory + "/source2"
|
33
|
-
@file4 = @source_directory2 + "/fourth.txt"
|
34
|
-
@file5a = @source_directory2 + "/fifth-shared.yaml"
|
35
|
-
|
36
|
-
@source_directory3 = @source_directory2 + "/source3-nested"
|
37
|
-
@file5b = @source_directory3 + "/fifth-shared.yaml"
|
38
|
-
|
39
|
-
@target_directory = @root_directory + "/target"
|
40
|
-
end
|
41
|
-
|
42
|
-
before(:each) do
|
43
|
-
FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
|
44
|
-
[@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
|
45
|
-
end
|
46
|
-
|
47
|
-
after(:each) do
|
48
|
-
FileUtils.rm_rf @root_directory
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
def basenames_of files
|
53
|
-
files.map {|file| File.basename file}
|
54
|
-
end
|
55
|
-
|
56
|
-
it "should raise an error when attempting to copy to a non-existent target directory" do
|
57
|
-
FileUtils.rm_rf @target_directory
|
58
|
-
lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
|
59
|
-
end
|
60
|
-
|
61
|
-
it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
|
62
|
-
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
|
63
|
-
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
|
64
|
-
end
|
65
|
-
|
66
|
-
it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
|
67
|
-
|
68
|
-
# complicated block to copy files to sub-directories of the target
|
69
|
-
# directory depending on their extension
|
70
|
-
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
|
71
|
-
if File.extname(path) == '.txt' then
|
72
|
-
File.join('txt',File.basename(path)) # put text files in txt
|
73
|
-
elsif File.extname(path) == '.csv' then
|
74
|
-
File.join("csv",File.basename(path)) # put csv files in csv
|
75
|
-
else
|
76
|
-
nil # don't copy other extensions
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# what we would expect to see from that block
|
81
|
-
txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
|
82
|
-
csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
|
83
|
-
|
84
|
-
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
# puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
-
# License:: GPL 3.0
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
10
|
-
#
|
11
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
12
|
-
|
13
|
-
# require 'imw/workflow/rip'
|
14
|
-
#
|
15
|
-
# describe Source do
|
16
|
-
#
|
17
|
-
# before(:all) do
|
18
|
-
# @source = IMW::Source.new(:fake_source)
|
19
|
-
# @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
|
20
|
-
# end
|
21
|
-
#
|
22
|
-
# it "should raise an error when asked to rip in an unknown way" do
|
23
|
-
# @source.rip_from :silly_way
|
24
|
-
# end
|
25
|
-
# end
|
26
|
-
|
27
|
-
# puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
|
@@ -1,94 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# An RSpec matcher which tests that an archive of files has the same
|
7
|
-
# contents as various paths on disk.
|
8
|
-
#
|
9
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
-
# License:: GPL 3.0
|
12
|
-
# Website:: http://infinitemonkeywrench.org/
|
13
|
-
#
|
14
|
-
|
15
|
-
require 'find'
|
16
|
-
|
17
|
-
module Spec
|
18
|
-
module Matchers
|
19
|
-
module IMW
|
20
|
-
|
21
|
-
# Match the contents of the archive against files or directories
|
22
|
-
# in +paths+.
|
23
|
-
#
|
24
|
-
# Options include:
|
25
|
-
#
|
26
|
-
# <tt>:relative_to</tt>:: a leading path which will be stripped
|
27
|
-
# from all +paths+ before comparison with the contents of the
|
28
|
-
# directory.
|
29
|
-
class ArchiveContentsMatchPaths
|
30
|
-
|
31
|
-
private
|
32
|
-
def initialize paths,opts = {}
|
33
|
-
opts.reverse_merge!({:relative_to => nil})
|
34
|
-
paths = [paths] if paths.class == String
|
35
|
-
@paths = paths
|
36
|
-
@relative_to = opts[:relative_to]
|
37
|
-
find_paths_contents
|
38
|
-
end
|
39
|
-
|
40
|
-
def find_paths_contents
|
41
|
-
# find all the files
|
42
|
-
contents = []
|
43
|
-
@paths.each do |path|
|
44
|
-
path = File.expand_path path
|
45
|
-
if File.file? path then
|
46
|
-
contents << path
|
47
|
-
elsif File.directory? path then
|
48
|
-
contents += Find.files_in_directory(path)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
# strip leading path
|
53
|
-
contents.map! do |path|
|
54
|
-
# the +1 is because we want a relative path
|
55
|
-
path = path[@relative_to.length + 1,path.size]
|
56
|
-
end
|
57
|
-
|
58
|
-
@paths_contents = contents.to_set
|
59
|
-
end
|
60
|
-
|
61
|
-
def pretty_print set
|
62
|
-
set.to_a.join("\n\t")
|
63
|
-
end
|
64
|
-
|
65
|
-
public
|
66
|
-
def matches? archive
|
67
|
-
@archive = archive
|
68
|
-
@archive_contents = @archive.contents.to_set
|
69
|
-
@archive_contents == @paths_contents
|
70
|
-
end
|
71
|
-
|
72
|
-
def failure_message
|
73
|
-
missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
|
74
|
-
missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
|
75
|
-
common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
|
76
|
-
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
|
77
|
-
end
|
78
|
-
|
79
|
-
def negative_failure_message
|
80
|
-
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
# Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
|
86
|
-
def contain_paths_like paths, opts = {}
|
87
|
-
ArchiveContentsMatchPaths.new(paths,opts)
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
# puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
|
@@ -1,61 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
|
3
|
-
#
|
4
|
-
# == About
|
5
|
-
#
|
6
|
-
# An RSpec matcher which tests that two directories share the same set
|
7
|
-
# of files.
|
8
|
-
#
|
9
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
10
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
11
|
-
# License:: GPL 3.0
|
12
|
-
# Website:: http://infinitemonkeywrench.org/
|
13
|
-
#
|
14
|
-
|
15
|
-
require 'set'
|
16
|
-
require 'find'
|
17
|
-
|
18
|
-
module Spec
|
19
|
-
module Matchers
|
20
|
-
module IMW
|
21
|
-
|
22
|
-
class DirectoryContentsMatcher
|
23
|
-
private
|
24
|
-
def initialize dir
|
25
|
-
@dir = File.expand_path(dir)
|
26
|
-
@dir_files = Find.files_relative_to_directory(@dir).to_set
|
27
|
-
end
|
28
|
-
|
29
|
-
# Pretty print a set of files.
|
30
|
-
def format_files_for_printing files
|
31
|
-
files.to_a.join("\n\t")
|
32
|
-
end
|
33
|
-
|
34
|
-
public
|
35
|
-
def matches? target
|
36
|
-
@target = target
|
37
|
-
@target_files = Find.files_relative_to_directory(@target).to_set
|
38
|
-
@target_files == @dir_files
|
39
|
-
end
|
40
|
-
|
41
|
-
def failure_message
|
42
|
-
files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
|
43
|
-
files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
|
44
|
-
files_in_common = format_files_for_printing(@dir_files & @target_files)
|
45
|
-
"expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
|
46
|
-
end
|
47
|
-
|
48
|
-
def negative_failure_message
|
49
|
-
"expected files in #{@dir} and #{@target} to be different"
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
# Checks that files in one directory match those in another.
|
54
|
-
def contain_files_matching_directory dir
|
55
|
-
DirectoryContentsMatcher.new(dir)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
# puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
|