imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
-
|
|
3
|
-
describe IMW::Packagers::Archiver do
|
|
4
|
-
before do
|
|
5
|
-
@name = 'foobar'
|
|
6
|
-
|
|
7
|
-
# regular files
|
|
8
|
-
@csv = "foobar-csv.csv"
|
|
9
|
-
@xml = "foobar-xml.xml"
|
|
10
|
-
@txt = "foobar-txt.txt"
|
|
11
|
-
@blah = "foobar"
|
|
12
|
-
|
|
13
|
-
# compressed files
|
|
14
|
-
@bz2 = "foobar-bz2.bz2"
|
|
15
|
-
|
|
16
|
-
# archives
|
|
17
|
-
@zip = "foobar-zip.zip"
|
|
18
|
-
@tarbz2 = "foobar-tarbz2.tar.bz2"
|
|
19
|
-
@rar = "foobar-rar.rar"
|
|
20
|
-
@archives = [@zip, @tarbz2]
|
|
21
|
-
|
|
22
|
-
@files = [@csv, @xml, @txt, @blah, @bz2, @zip, @tarbz2]
|
|
23
|
-
|
|
24
|
-
@files.each do |path|
|
|
25
|
-
IMWTest::Random.file path
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
describe "when preparing files" do
|
|
31
|
-
before do
|
|
32
|
-
@archiver = IMW::Packagers::Archiver.new @name, @files
|
|
33
|
-
@archiver.prepare!
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
after do
|
|
37
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
it "should name its archive directory properly" do
|
|
41
|
-
@archiver.tmp_dir.should contain(@name)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
it "should copy regular files to its archive directory" do
|
|
45
|
-
@archiver.dir.should contain(@csv, @xml, @txt)
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
it "should uncompress compressed files to its archive directory" do
|
|
49
|
-
@archiver.dir.should contain('foobar-bz2')
|
|
50
|
-
@archiver.dir.should_not contain(@bz2)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it "should copy the content of archive files to its archive directory (but not the actual archives)" do
|
|
54
|
-
@archives.each do |archive|
|
|
55
|
-
@archiver.dir.should_not contain(archive)
|
|
56
|
-
@archiver.dir.should contain(*IMW.open(archive).contents)
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "should not move any of the original files" do
|
|
61
|
-
IMWTest::TMP_DIR.should contain(@files)
|
|
62
|
-
end
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
describe "when preparing files while renaming them" do
|
|
66
|
-
before do
|
|
67
|
-
|
|
68
|
-
# to test renaming, consider the new paths to be the old paths
|
|
69
|
-
# but with the hyphens mapped to underscores...
|
|
70
|
-
@renaming_hash = {}
|
|
71
|
-
@files.each { |f| @renaming_hash[f] = f.gsub(/-/,'_') }
|
|
72
|
-
|
|
73
|
-
@archiver = IMW::Packagers::Archiver.new @name, @renaming_hash
|
|
74
|
-
@archiver.prepare!
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
after do
|
|
78
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "should copy regular files to its archive directory, renaming them" do
|
|
82
|
-
@archiver.dir.should_not contain([@csv, @xml, @txt])
|
|
83
|
-
@archiver.dir.should contain([@csv, @xml, @txt].map { |f| @renaming_hash[f] })
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
it "should uncompress compressed files to its archive directory, renaming them" do
|
|
87
|
-
@archiver.dir.should contain('foobar_bz2')
|
|
88
|
-
@archiver.dir.should_not contain('foobar-bz2')
|
|
89
|
-
@archiver.dir.should_not contain(@renaming_hash[@bz2])
|
|
90
|
-
@archiver.dir.should_not contain(@bz2)
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
describe "when packaging files" do
|
|
95
|
-
before do
|
|
96
|
-
@archiver = IMW::Packagers::Archiver.new @name, @files
|
|
97
|
-
@archiver.prepare!
|
|
98
|
-
|
|
99
|
-
@package_tarbz2 = "package.tar.bz2"
|
|
100
|
-
@package_zip = "package.zip"
|
|
101
|
-
@packages = [@package_tarbz2, @package_zip]
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
after do
|
|
105
|
-
FileUtils.rm_rf @archiver.tmp_dir
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
it "should create a package file containing the proper files" do
|
|
109
|
-
@packages.each do |package|
|
|
110
|
-
@archiver.package! package
|
|
111
|
-
@archiver.tmp_dir.should contain(IMW.open(package).contents)
|
|
112
|
-
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
it "should return the package file" do
|
|
116
|
-
@packages.each do |package|
|
|
117
|
-
output = @archiver.package! package
|
|
118
|
-
output.basename.should == package
|
|
119
|
-
end
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
|
|
3
|
-
#
|
|
4
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
5
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
6
|
-
# License:: GPL 3.0
|
|
7
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
8
|
-
#
|
|
9
|
-
|
|
10
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
11
|
-
|
|
12
|
-
require 'fileutils'
|
|
13
|
-
|
|
14
|
-
require 'imw/utils/random'
|
|
15
|
-
|
|
16
|
-
describe File do
|
|
17
|
-
|
|
18
|
-
it "should return the 'name' of a file with 'name_of_file'" do
|
|
19
|
-
File.name_of_file("/path/to/some_file.txt").should eql("some_file")
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
describe "when finding the handle corresponding to a path" do
|
|
23
|
-
|
|
24
|
-
it "should correctly identify paths with the processing instruction suffix" do
|
|
25
|
-
File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
it "should correctly identify paths with the metadata instruction suffix" do
|
|
29
|
-
File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
it "should raise an error if the path does not correspond to a handle" do
|
|
33
|
-
lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
describe "when creating unique filenames" do
|
|
38
|
-
|
|
39
|
-
before(:each) do
|
|
40
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
|
|
41
|
-
@file0 = @root_directory + "/the_original.txt"
|
|
42
|
-
@file1 = @root_directory + "/the_original.txt.1"
|
|
43
|
-
@file2 = @root_directory + "/the_original.txt.2"
|
|
44
|
-
FileUtils.mkdir(@root_directory)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
after(:each) do
|
|
48
|
-
FileUtils.rm_rf @root_directory
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
it "should return the given path if there is no such file already" do
|
|
52
|
-
File.uniquify(@file0).should eql(@file0)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
it "should return the given path with a numerical suffix of `.1' if the file exists" do
|
|
56
|
-
IMW::Random.file(@file0)
|
|
57
|
-
File.uniquify(@file0).should eql(@file1)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
|
|
61
|
-
IMW::Random.file(@file0)
|
|
62
|
-
IMW::Random.file(@file1)
|
|
63
|
-
File.uniquify(@file0).should eql(@file2)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
# puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
|
|
12
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
13
|
-
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
|
|
14
|
-
|
|
15
|
-
require 'fileutils'
|
|
16
|
-
require 'set'
|
|
17
|
-
|
|
18
|
-
require 'imw/utils'
|
|
19
|
-
require 'imw/utils/random'
|
|
20
|
-
require 'imw/utils/extensions/find'
|
|
21
|
-
|
|
22
|
-
describe Find do
|
|
23
|
-
|
|
24
|
-
include Spec::Matchers::IMW
|
|
25
|
-
|
|
26
|
-
def create_sample_files
|
|
27
|
-
FileUtils.mkdir_p(@subsubdirectory)
|
|
28
|
-
[@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
before(:all) do
|
|
32
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
|
|
33
|
-
@subdirectory = @root_directory + "/subdir"
|
|
34
|
-
@subsubdirectory = @subdirectory + "/subsubdir"
|
|
35
|
-
@fake_directory = @root_directory + "/notreal"
|
|
36
|
-
@file1 = @root_directory + "/my_file1.txt"
|
|
37
|
-
@file2 = @root_directory + "/my_file2.csv"
|
|
38
|
-
@file3 = @root_directory + "/my_file3.dat"
|
|
39
|
-
@file4 = @subdirectory + "/your_file4.html"
|
|
40
|
-
@file5 = @subdirectory + "/your_file5.csv"
|
|
41
|
-
@file6 = @subdirectory + "/your_file5"
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
before(:each) do
|
|
45
|
-
create_sample_files
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
after(:each) do
|
|
49
|
-
FileUtils.rm_rf @root_directory
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
describe "when listing files with absolute paths contained in a directory" do
|
|
53
|
-
|
|
54
|
-
it "should raise an error when listing a non-exsiting directory" do
|
|
55
|
-
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
it "should find every file by default" do
|
|
59
|
-
Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
it "should only find files which match its :include argument" do
|
|
63
|
-
Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
it "should not find files which match its :exclude argument" do
|
|
67
|
-
Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
|
71
|
-
Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
describe "when listing files with relative paths contained in a directory" do
|
|
76
|
-
|
|
77
|
-
def strip_root_directory array
|
|
78
|
-
array.map {|item| item[@root_directory.length + 1,item.size]}
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "should raise an error when listing a non-exsiting directory" do
|
|
82
|
-
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
it "should find every file by default" do
|
|
86
|
-
Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
it "should only find files which match its :include argument" do
|
|
90
|
-
Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
it "should not find files which match its :exclude argument" do
|
|
94
|
-
Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
|
98
|
-
Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
describe "when listing handles in a directory" do
|
|
104
|
-
|
|
105
|
-
it "should return a unique set of handles" do
|
|
106
|
-
Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
12
|
-
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
|
|
13
|
-
|
|
14
|
-
require 'fileutils'
|
|
15
|
-
|
|
16
|
-
require 'imw/utils/random'
|
|
17
|
-
require 'imw/utils/extensions/find'
|
|
18
|
-
require 'imw/workflow/rip/local'
|
|
19
|
-
|
|
20
|
-
describe "Ripping from local disk" do
|
|
21
|
-
|
|
22
|
-
include Spec::Matchers::IMW
|
|
23
|
-
|
|
24
|
-
before(:all) do
|
|
25
|
-
@root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
|
|
26
|
-
@file1 = @root_directory + "/first.csv"
|
|
27
|
-
|
|
28
|
-
@source_directory1 = @root_directory + "/source1"
|
|
29
|
-
@file2 = @source_directory1 + "/second.txt"
|
|
30
|
-
@file3 = @source_directory1 + "/third.csv"
|
|
31
|
-
|
|
32
|
-
@source_directory2 = @root_directory + "/source2"
|
|
33
|
-
@file4 = @source_directory2 + "/fourth.txt"
|
|
34
|
-
@file5a = @source_directory2 + "/fifth-shared.yaml"
|
|
35
|
-
|
|
36
|
-
@source_directory3 = @source_directory2 + "/source3-nested"
|
|
37
|
-
@file5b = @source_directory3 + "/fifth-shared.yaml"
|
|
38
|
-
|
|
39
|
-
@target_directory = @root_directory + "/target"
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
before(:each) do
|
|
43
|
-
FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
|
|
44
|
-
[@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
after(:each) do
|
|
48
|
-
FileUtils.rm_rf @root_directory
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def basenames_of files
|
|
53
|
-
files.map {|file| File.basename file}
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
it "should raise an error when attempting to copy to a non-existent target directory" do
|
|
57
|
-
FileUtils.rm_rf @target_directory
|
|
58
|
-
lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
|
|
62
|
-
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
|
|
63
|
-
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
|
|
67
|
-
|
|
68
|
-
# complicated block to copy files to sub-directories of the target
|
|
69
|
-
# directory depending on their extension
|
|
70
|
-
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
|
|
71
|
-
if File.extname(path) == '.txt' then
|
|
72
|
-
File.join('txt',File.basename(path)) # put text files in txt
|
|
73
|
-
elsif File.extname(path) == '.csv' then
|
|
74
|
-
File.join("csv",File.basename(path)) # put csv files in csv
|
|
75
|
-
else
|
|
76
|
-
nil # don't copy other extensions
|
|
77
|
-
end
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# what we would expect to see from that block
|
|
81
|
-
txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
|
|
82
|
-
csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
|
|
83
|
-
|
|
84
|
-
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
12
|
-
|
|
13
|
-
# require 'imw/workflow/rip'
|
|
14
|
-
#
|
|
15
|
-
# describe Source do
|
|
16
|
-
#
|
|
17
|
-
# before(:all) do
|
|
18
|
-
# @source = IMW::Source.new(:fake_source)
|
|
19
|
-
# @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
|
|
20
|
-
# end
|
|
21
|
-
#
|
|
22
|
-
# it "should raise an error when asked to rip in an unknown way" do
|
|
23
|
-
# @source.rip_from :silly_way
|
|
24
|
-
# end
|
|
25
|
-
# end
|
|
26
|
-
|
|
27
|
-
# puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# An RSpec matcher which tests that an archive of files has the same
|
|
7
|
-
# contents as various paths on disk.
|
|
8
|
-
#
|
|
9
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
10
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
11
|
-
# License:: GPL 3.0
|
|
12
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
require 'find'
|
|
16
|
-
|
|
17
|
-
module Spec
|
|
18
|
-
module Matchers
|
|
19
|
-
module IMW
|
|
20
|
-
|
|
21
|
-
# Match the contents of the archive against files or directories
|
|
22
|
-
# in +paths+.
|
|
23
|
-
#
|
|
24
|
-
# Options include:
|
|
25
|
-
#
|
|
26
|
-
# <tt>:relative_to</tt>:: a leading path which will be stripped
|
|
27
|
-
# from all +paths+ before comparison with the contents of the
|
|
28
|
-
# directory.
|
|
29
|
-
class ArchiveContentsMatchPaths
|
|
30
|
-
|
|
31
|
-
private
|
|
32
|
-
def initialize paths,opts = {}
|
|
33
|
-
opts.reverse_merge!({:relative_to => nil})
|
|
34
|
-
paths = [paths] if paths.class == String
|
|
35
|
-
@paths = paths
|
|
36
|
-
@relative_to = opts[:relative_to]
|
|
37
|
-
find_paths_contents
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def find_paths_contents
|
|
41
|
-
# find all the files
|
|
42
|
-
contents = []
|
|
43
|
-
@paths.each do |path|
|
|
44
|
-
path = File.expand_path path
|
|
45
|
-
if File.file? path then
|
|
46
|
-
contents << path
|
|
47
|
-
elsif File.directory? path then
|
|
48
|
-
contents += Find.files_in_directory(path)
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# strip leading path
|
|
53
|
-
contents.map! do |path|
|
|
54
|
-
# the +1 is because we want a relative path
|
|
55
|
-
path = path[@relative_to.length + 1,path.size]
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
@paths_contents = contents.to_set
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def pretty_print set
|
|
62
|
-
set.to_a.join("\n\t")
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
public
|
|
66
|
-
def matches? archive
|
|
67
|
-
@archive = archive
|
|
68
|
-
@archive_contents = @archive.contents.to_set
|
|
69
|
-
@archive_contents == @paths_contents
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def failure_message
|
|
73
|
-
missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
|
|
74
|
-
missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
|
|
75
|
-
common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
|
|
76
|
-
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def negative_failure_message
|
|
80
|
-
"expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
|
|
86
|
-
def contain_paths_like paths, opts = {}
|
|
87
|
-
ArchiveContentsMatchPaths.new(paths,opts)
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# An RSpec matcher which tests that two directories share the same set
|
|
7
|
-
# of files.
|
|
8
|
-
#
|
|
9
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
10
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
11
|
-
# License:: GPL 3.0
|
|
12
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
13
|
-
#
|
|
14
|
-
|
|
15
|
-
require 'set'
|
|
16
|
-
require 'find'
|
|
17
|
-
|
|
18
|
-
module Spec
|
|
19
|
-
module Matchers
|
|
20
|
-
module IMW
|
|
21
|
-
|
|
22
|
-
class DirectoryContentsMatcher
|
|
23
|
-
private
|
|
24
|
-
def initialize dir
|
|
25
|
-
@dir = File.expand_path(dir)
|
|
26
|
-
@dir_files = Find.files_relative_to_directory(@dir).to_set
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Pretty print a set of files.
|
|
30
|
-
def format_files_for_printing files
|
|
31
|
-
files.to_a.join("\n\t")
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
public
|
|
35
|
-
def matches? target
|
|
36
|
-
@target = target
|
|
37
|
-
@target_files = Find.files_relative_to_directory(@target).to_set
|
|
38
|
-
@target_files == @dir_files
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def failure_message
|
|
42
|
-
files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
|
|
43
|
-
files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
|
|
44
|
-
files_in_common = format_files_for_printing(@dir_files & @target_files)
|
|
45
|
-
"expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def negative_failure_message
|
|
49
|
-
"expected files in #{@dir} and #{@target} to be different"
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Checks that files in one directory match those in another.
|
|
54
|
-
def contain_files_matching_directory dir
|
|
55
|
-
DirectoryContentsMatcher.new(dir)
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom
|