imw 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,125 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Packagers::Archiver do
4
- before do
5
- @name = 'foobar'
6
-
7
- # regular files
8
- @csv = "foobar-csv.csv"
9
- @xml = "foobar-xml.xml"
10
- @txt = "foobar-txt.txt"
11
- @blah = "foobar"
12
-
13
- # compressed files
14
- @bz2 = "foobar-bz2.bz2"
15
-
16
- # archives
17
- @zip = "foobar-zip.zip"
18
- @tarbz2 = "foobar-tarbz2.tar.bz2"
19
- @rar = "foobar-rar.rar"
20
- @archives = [@zip, @tarbz2]
21
-
22
- @files = [@csv, @xml, @txt, @blah, @bz2, @zip, @tarbz2]
23
-
24
- @files.each do |path|
25
- IMWTest::Random.file path
26
- end
27
- end
28
-
29
-
30
- describe "when preparing files" do
31
- before do
32
- @archiver = IMW::Packagers::Archiver.new @name, @files
33
- @archiver.prepare!
34
- end
35
-
36
- after do
37
- FileUtils.rm_rf @archiver.tmp_dir
38
- end
39
-
40
- it "should name its archive directory properly" do
41
- @archiver.tmp_dir.should contain(@name)
42
- end
43
-
44
- it "should copy regular files to its archive directory" do
45
- @archiver.dir.should contain(@csv, @xml, @txt)
46
- end
47
-
48
- it "should uncompress compressed files to its archive directory" do
49
- @archiver.dir.should contain('foobar-bz2')
50
- @archiver.dir.should_not contain(@bz2)
51
- end
52
-
53
- it "should copy the content of archive files to its archive directory (but not the actual archives)" do
54
- @archives.each do |archive|
55
- @archiver.dir.should_not contain(archive)
56
- @archiver.dir.should contain(*IMW.open(archive).contents)
57
- end
58
- end
59
-
60
- it "should not move any of the original files" do
61
- IMWTest::TMP_DIR.should contain(@files)
62
- end
63
- end
64
-
65
- describe "when preparing files while renaming them" do
66
- before do
67
-
68
- # to test renaming, consider the new paths to be the old paths
69
- # but with the hyphens mapped to underscores...
70
- @renaming_hash = {}
71
- @files.each { |f| @renaming_hash[f] = f.gsub(/-/,'_') }
72
-
73
- @archiver = IMW::Packagers::Archiver.new @name, @renaming_hash
74
- @archiver.prepare!
75
- end
76
-
77
- after do
78
- FileUtils.rm_rf @archiver.tmp_dir
79
- end
80
-
81
- it "should copy regular files to its archive directory, renaming them" do
82
- @archiver.dir.should_not contain([@csv, @xml, @txt])
83
- @archiver.dir.should contain([@csv, @xml, @txt].map { |f| @renaming_hash[f] })
84
- end
85
-
86
- it "should uncompress compressed files to its archive directory, renaming them" do
87
- @archiver.dir.should contain('foobar_bz2')
88
- @archiver.dir.should_not contain('foobar-bz2')
89
- @archiver.dir.should_not contain(@renaming_hash[@bz2])
90
- @archiver.dir.should_not contain(@bz2)
91
- end
92
- end
93
-
94
- describe "when packaging files" do
95
- before do
96
- @archiver = IMW::Packagers::Archiver.new @name, @files
97
- @archiver.prepare!
98
-
99
- @package_tarbz2 = "package.tar.bz2"
100
- @package_zip = "package.zip"
101
- @packages = [@package_tarbz2, @package_zip]
102
- end
103
-
104
- after do
105
- FileUtils.rm_rf @archiver.tmp_dir
106
- end
107
-
108
- it "should create a package file containing the proper files" do
109
- @packages.each do |package|
110
- @archiver.package! package
111
- @archiver.tmp_dir.should contain(IMW.open(package).contents)
112
- end
113
- end
114
-
115
- it "should return the package file" do
116
- @packages.each do |package|
117
- output = @archiver.package! package
118
- output.basename.should == package
119
- end
120
- end
121
-
122
-
123
- end
124
- end
125
-
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Packagers::S3Mover do
4
- it { pending }
5
- end
6
-
7
-
@@ -1,72 +0,0 @@
1
- #
2
- # h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
3
- #
4
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
5
- # Copyright:: Copyright (c) 2008 infochimps.org
6
- # License:: GPL 3.0
7
- # Website:: http://infinitemonkeywrench.org/
8
- #
9
-
10
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
11
-
12
- require 'fileutils'
13
-
14
- require 'imw/utils/random'
15
-
16
- describe File do
17
-
18
- it "should return the 'name' of a file with 'name_of_file'" do
19
- File.name_of_file("/path/to/some_file.txt").should eql("some_file")
20
- end
21
-
22
- describe "when finding the handle corresponding to a path" do
23
-
24
- it "should correctly identify paths with the processing instruction suffix" do
25
- File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
26
- end
27
-
28
- it "should correctly identify paths with the metadata instruction suffix" do
29
- File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
30
- end
31
-
32
- it "should raise an error if the path does not correspond to a handle" do
33
- lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
34
- end
35
- end
36
-
37
- describe "when creating unique filenames" do
38
-
39
- before(:each) do
40
- @root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
41
- @file0 = @root_directory + "/the_original.txt"
42
- @file1 = @root_directory + "/the_original.txt.1"
43
- @file2 = @root_directory + "/the_original.txt.2"
44
- FileUtils.mkdir(@root_directory)
45
- end
46
-
47
- after(:each) do
48
- FileUtils.rm_rf @root_directory
49
- end
50
-
51
- it "should return the given path if there is no such file already" do
52
- File.uniquify(@file0).should eql(@file0)
53
- end
54
-
55
- it "should return the given path with a numerical suffix of `.1' if the file exists" do
56
- IMW::Random.file(@file0)
57
- File.uniquify(@file0).should eql(@file1)
58
- end
59
-
60
- it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
61
- IMW::Random.file(@file0)
62
- IMW::Random.file(@file1)
63
- File.uniquify(@file0).should eql(@file2)
64
- end
65
-
66
- end
67
-
68
- end
69
-
70
-
71
-
72
- # puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
@@ -1,113 +0,0 @@
1
- #
2
- # h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
-
12
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
13
- require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
14
-
15
- require 'fileutils'
16
- require 'set'
17
-
18
- require 'imw/utils'
19
- require 'imw/utils/random'
20
- require 'imw/utils/extensions/find'
21
-
22
- describe Find do
23
-
24
- include Spec::Matchers::IMW
25
-
26
- def create_sample_files
27
- FileUtils.mkdir_p(@subsubdirectory)
28
- [@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
29
- end
30
-
31
- before(:all) do
32
- @root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
33
- @subdirectory = @root_directory + "/subdir"
34
- @subsubdirectory = @subdirectory + "/subsubdir"
35
- @fake_directory = @root_directory + "/notreal"
36
- @file1 = @root_directory + "/my_file1.txt"
37
- @file2 = @root_directory + "/my_file2.csv"
38
- @file3 = @root_directory + "/my_file3.dat"
39
- @file4 = @subdirectory + "/your_file4.html"
40
- @file5 = @subdirectory + "/your_file5.csv"
41
- @file6 = @subdirectory + "/your_file5"
42
- end
43
-
44
- before(:each) do
45
- create_sample_files
46
- end
47
-
48
- after(:each) do
49
- FileUtils.rm_rf @root_directory
50
- end
51
-
52
- describe "when listing files with absolute paths contained in a directory" do
53
-
54
- it "should raise an error when listing a non-exsiting directory" do
55
- lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
56
- end
57
-
58
- it "should find every file by default" do
59
- Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
60
- end
61
-
62
- it "should only find files which match its :include argument" do
63
- Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
64
- end
65
-
66
- it "should not find files which match its :exclude argument" do
67
- Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
68
- end
69
-
70
- it "should only find files which match its :include argument and don't match its :exclude argument" do
71
- Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
72
- end
73
- end
74
-
75
- describe "when listing files with relative paths contained in a directory" do
76
-
77
- def strip_root_directory array
78
- array.map {|item| item[@root_directory.length + 1,item.size]}
79
- end
80
-
81
- it "should raise an error when listing a non-exsiting directory" do
82
- lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
83
- end
84
-
85
- it "should find every file by default" do
86
- Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
87
- end
88
-
89
- it "should only find files which match its :include argument" do
90
- Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
91
- end
92
-
93
- it "should not find files which match its :exclude argument" do
94
- Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
95
- end
96
-
97
- it "should only find files which match its :include argument and don't match its :exclude argument" do
98
- Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
99
- end
100
-
101
- end
102
-
103
- describe "when listing handles in a directory" do
104
-
105
- it "should return a unique set of handles" do
106
- Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
107
- end
108
- end
109
-
110
-
111
- end
112
-
113
- # puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
@@ -1,89 +0,0 @@
1
- #
2
- # h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
12
- require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
13
-
14
- require 'fileutils'
15
-
16
- require 'imw/utils/random'
17
- require 'imw/utils/extensions/find'
18
- require 'imw/workflow/rip/local'
19
-
20
- describe "Ripping from local disk" do
21
-
22
- include Spec::Matchers::IMW
23
-
24
- before(:all) do
25
- @root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
26
- @file1 = @root_directory + "/first.csv"
27
-
28
- @source_directory1 = @root_directory + "/source1"
29
- @file2 = @source_directory1 + "/second.txt"
30
- @file3 = @source_directory1 + "/third.csv"
31
-
32
- @source_directory2 = @root_directory + "/source2"
33
- @file4 = @source_directory2 + "/fourth.txt"
34
- @file5a = @source_directory2 + "/fifth-shared.yaml"
35
-
36
- @source_directory3 = @source_directory2 + "/source3-nested"
37
- @file5b = @source_directory3 + "/fifth-shared.yaml"
38
-
39
- @target_directory = @root_directory + "/target"
40
- end
41
-
42
- before(:each) do
43
- FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
44
- [@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
45
- end
46
-
47
- after(:each) do
48
- FileUtils.rm_rf @root_directory
49
- end
50
-
51
-
52
- def basenames_of files
53
- files.map {|file| File.basename file}
54
- end
55
-
56
- it "should raise an error when attempting to copy to a non-existent target directory" do
57
- FileUtils.rm_rf @target_directory
58
- lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
59
- end
60
-
61
- it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
62
- IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
63
- Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
64
- end
65
-
66
- it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
67
-
68
- # complicated block to copy files to sub-directories of the target
69
- # directory depending on their extension
70
- IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
71
- if File.extname(path) == '.txt' then
72
- File.join('txt',File.basename(path)) # put text files in txt
73
- elsif File.extname(path) == '.csv' then
74
- File.join("csv",File.basename(path)) # put csv files in csv
75
- else
76
- nil # don't copy other extensions
77
- end
78
- end
79
-
80
- # what we would expect to see from that block
81
- txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
82
- csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
83
-
84
- Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
85
- end
86
-
87
- end
88
-
89
- # puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
@@ -1,27 +0,0 @@
1
- #
2
- # h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
- require File.join(File.dirname(__FILE__),'../../spec_helper')
12
-
13
- # require 'imw/workflow/rip'
14
- #
15
- # describe Source do
16
- #
17
- # before(:all) do
18
- # @source = IMW::Source.new(:fake_source)
19
- # @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
20
- # end
21
- #
22
- # it "should raise an error when asked to rip in an unknown way" do
23
- # @source.rip_from :silly_way
24
- # end
25
- # end
26
-
27
- # puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
@@ -1,94 +0,0 @@
1
- #
2
- # h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
3
- #
4
- # == About
5
- #
6
- # An RSpec matcher which tests that an archive of files has the same
7
- # contents as various paths on disk.
8
- #
9
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
- # Copyright:: Copyright (c) 2008 infochimps.org
11
- # License:: GPL 3.0
12
- # Website:: http://infinitemonkeywrench.org/
13
- #
14
-
15
- require 'find'
16
-
17
- module Spec
18
- module Matchers
19
- module IMW
20
-
21
- # Match the contents of the archive against files or directories
22
- # in +paths+.
23
- #
24
- # Options include:
25
- #
26
- # <tt>:relative_to</tt>:: a leading path which will be stripped
27
- # from all +paths+ before comparison with the contents of the
28
- # directory.
29
- class ArchiveContentsMatchPaths
30
-
31
- private
32
- def initialize paths,opts = {}
33
- opts.reverse_merge!({:relative_to => nil})
34
- paths = [paths] if paths.class == String
35
- @paths = paths
36
- @relative_to = opts[:relative_to]
37
- find_paths_contents
38
- end
39
-
40
- def find_paths_contents
41
- # find all the files
42
- contents = []
43
- @paths.each do |path|
44
- path = File.expand_path path
45
- if File.file? path then
46
- contents << path
47
- elsif File.directory? path then
48
- contents += Find.files_in_directory(path)
49
- end
50
- end
51
-
52
- # strip leading path
53
- contents.map! do |path|
54
- # the +1 is because we want a relative path
55
- path = path[@relative_to.length + 1,path.size]
56
- end
57
-
58
- @paths_contents = contents.to_set
59
- end
60
-
61
- def pretty_print set
62
- set.to_a.join("\n\t")
63
- end
64
-
65
- public
66
- def matches? archive
67
- @archive = archive
68
- @archive_contents = @archive.contents.to_set
69
- @archive_contents == @paths_contents
70
- end
71
-
72
- def failure_message
73
- missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
74
- missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
75
- common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
76
- "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
77
- end
78
-
79
- def negative_failure_message
80
- "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
81
- end
82
-
83
- end
84
-
85
- # Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
86
- def contain_paths_like paths, opts = {}
87
- ArchiveContentsMatchPaths.new(paths,opts)
88
- end
89
-
90
- end
91
- end
92
- end
93
-
94
- # puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
@@ -1,61 +0,0 @@
1
- #
2
- # h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
3
- #
4
- # == About
5
- #
6
- # An RSpec matcher which tests that two directories share the same set
7
- # of files.
8
- #
9
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
- # Copyright:: Copyright (c) 2008 infochimps.org
11
- # License:: GPL 3.0
12
- # Website:: http://infinitemonkeywrench.org/
13
- #
14
-
15
- require 'set'
16
- require 'find'
17
-
18
- module Spec
19
- module Matchers
20
- module IMW
21
-
22
- class DirectoryContentsMatcher
23
- private
24
- def initialize dir
25
- @dir = File.expand_path(dir)
26
- @dir_files = Find.files_relative_to_directory(@dir).to_set
27
- end
28
-
29
- # Pretty print a set of files.
30
- def format_files_for_printing files
31
- files.to_a.join("\n\t")
32
- end
33
-
34
- public
35
- def matches? target
36
- @target = target
37
- @target_files = Find.files_relative_to_directory(@target).to_set
38
- @target_files == @dir_files
39
- end
40
-
41
- def failure_message
42
- files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
43
- files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
44
- files_in_common = format_files_for_printing(@dir_files & @target_files)
45
- "expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
46
- end
47
-
48
- def negative_failure_message
49
- "expected files in #{@dir} and #{@target} to be different"
50
- end
51
- end
52
-
53
- # Checks that files in one directory match those in another.
54
- def contain_files_matching_directory dir
55
- DirectoryContentsMatcher.new(dir)
56
- end
57
- end
58
- end
59
- end
60
-
61
- # puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom