imw 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. data/.gitignore +4 -1
  2. data/Rakefile +10 -0
  3. data/TODO +18 -0
  4. data/VERSION +1 -1
  5. data/bin/imw +1 -1
  6. data/etc/imwrc.rb +0 -50
  7. data/examples/dataset.rb +12 -0
  8. data/lib/imw/boot.rb +55 -9
  9. data/lib/imw/dataset/paths.rb +15 -24
  10. data/lib/imw/dataset/workflow.rb +131 -72
  11. data/lib/imw/dataset.rb +94 -186
  12. data/lib/imw/parsers/html_parser.rb +1 -1
  13. data/lib/imw/parsers.rb +1 -1
  14. data/lib/imw/repository.rb +3 -27
  15. data/lib/imw/resource.rb +190 -0
  16. data/lib/imw/resources/archive.rb +97 -0
  17. data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
  18. data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
  19. data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
  20. data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
  21. data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
  22. data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
  23. data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
  24. data/lib/imw/resources/archives_and_compressed.rb +32 -0
  25. data/lib/imw/resources/compressed_file.rb +89 -0
  26. data/lib/imw/resources/compressible.rb +77 -0
  27. data/lib/imw/resources/formats/delimited.rb +92 -0
  28. data/lib/imw/resources/formats/excel.rb +125 -0
  29. data/lib/imw/resources/formats/json.rb +53 -0
  30. data/lib/imw/resources/formats/sgml.rb +72 -0
  31. data/lib/imw/resources/formats/yaml.rb +53 -0
  32. data/lib/imw/resources/formats.rb +32 -0
  33. data/lib/imw/resources/local.rb +198 -0
  34. data/lib/imw/resources/remote.rb +110 -0
  35. data/lib/imw/resources/schemes/hdfs.rb +242 -0
  36. data/lib/imw/resources/schemes/http.rb +161 -0
  37. data/lib/imw/resources/schemes/s3.rb +137 -0
  38. data/lib/imw/resources/schemes.rb +19 -0
  39. data/lib/imw/resources.rb +118 -0
  40. data/lib/imw/runner.rb +5 -4
  41. data/lib/imw/transforms/archiver.rb +215 -0
  42. data/lib/imw/transforms/transferer.rb +103 -0
  43. data/lib/imw/transforms.rb +8 -0
  44. data/lib/imw/utils/error.rb +26 -30
  45. data/lib/imw/utils/extensions/array.rb +5 -15
  46. data/lib/imw/utils/extensions/hash.rb +6 -16
  47. data/lib/imw/utils/extensions/hpricot.rb +0 -14
  48. data/lib/imw/utils/extensions/string.rb +5 -15
  49. data/lib/imw/utils/extensions/symbol.rb +0 -13
  50. data/lib/imw/utils/extensions.rb +65 -0
  51. data/lib/imw/utils/log.rb +14 -13
  52. data/lib/imw/utils/misc.rb +0 -6
  53. data/lib/imw/utils/paths.rb +101 -42
  54. data/lib/imw/utils/version.rb +8 -9
  55. data/lib/imw/utils.rb +2 -18
  56. data/lib/imw.rb +92 -17
  57. data/spec/data/sample.csv +1 -1
  58. data/spec/data/sample.json +1 -0
  59. data/spec/data/sample.tsv +1 -1
  60. data/spec/data/sample.txt +1 -1
  61. data/spec/data/sample.xml +1 -1
  62. data/spec/data/sample.yaml +1 -1
  63. data/spec/imw/dataset/paths_spec.rb +32 -0
  64. data/spec/imw/dataset/workflow_spec.rb +41 -0
  65. data/spec/imw/resource_spec.rb +79 -0
  66. data/spec/imw/resources/archive_spec.rb +69 -0
  67. data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
  68. data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
  69. data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
  70. data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
  71. data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
  72. data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
  73. data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
  74. data/spec/imw/resources/compressed_file_spec.rb +48 -0
  75. data/spec/imw/resources/compressible_spec.rb +36 -0
  76. data/spec/imw/resources/formats/delimited_spec.rb +33 -0
  77. data/spec/imw/resources/formats/json_spec.rb +32 -0
  78. data/spec/imw/resources/formats/sgml_spec.rb +24 -0
  79. data/spec/imw/resources/formats/yaml_spec.rb +41 -0
  80. data/spec/imw/resources/local_spec.rb +98 -0
  81. data/spec/imw/resources/remote_spec.rb +35 -0
  82. data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
  83. data/spec/imw/resources/schemes/http_spec.rb +19 -0
  84. data/spec/imw/resources/schemes/s3_spec.rb +19 -0
  85. data/spec/imw/transforms/archiver_spec.rb +120 -0
  86. data/spec/imw/transforms/transferer_spec.rb +113 -0
  87. data/spec/imw/utils/paths_spec.rb +5 -33
  88. data/spec/imw/utils/shared_paths_spec.rb +29 -0
  89. data/spec/spec_helper.rb +5 -5
  90. data/spec/support/paths_matcher.rb +67 -0
  91. data/spec/support/random.rb +39 -36
  92. metadata +88 -75
  93. data/lib/imw/dataset/task.rb +0 -41
  94. data/lib/imw/files/archive.rb +0 -113
  95. data/lib/imw/files/basicfile.rb +0 -122
  96. data/lib/imw/files/binary.rb +0 -28
  97. data/lib/imw/files/compressed_file.rb +0 -93
  98. data/lib/imw/files/compressed_files_and_archives.rb +0 -334
  99. data/lib/imw/files/compressible.rb +0 -103
  100. data/lib/imw/files/csv.rb +0 -113
  101. data/lib/imw/files/directory.rb +0 -62
  102. data/lib/imw/files/excel.rb +0 -84
  103. data/lib/imw/files/json.rb +0 -41
  104. data/lib/imw/files/sgml.rb +0 -46
  105. data/lib/imw/files/text.rb +0 -68
  106. data/lib/imw/files/yaml.rb +0 -46
  107. data/lib/imw/files.rb +0 -125
  108. data/lib/imw/packagers/archiver.rb +0 -126
  109. data/lib/imw/packagers/s3_mover.rb +0 -36
  110. data/lib/imw/packagers.rb +0 -8
  111. data/lib/imw/utils/components.rb +0 -61
  112. data/lib/imw/utils/config.rb +0 -46
  113. data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
  114. data/lib/imw/utils/extensions/core.rb +0 -27
  115. data/lib/imw/utils/extensions/dir.rb +0 -24
  116. data/lib/imw/utils/extensions/file_core.rb +0 -64
  117. data/lib/imw/utils/extensions/typed_struct.rb +0 -22
  118. data/lib/imw/utils/extensions/uri.rb +0 -59
  119. data/lib/imw/utils/view/dump_csv.rb +0 -112
  120. data/lib/imw/utils/view/dump_csv_older.rb +0 -117
  121. data/lib/imw/utils/view.rb +0 -113
  122. data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
  123. data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
  124. data/spec/imw/files/archive_spec.rb +0 -118
  125. data/spec/imw/files/basicfile_spec.rb +0 -121
  126. data/spec/imw/files/bz2_spec.rb +0 -32
  127. data/spec/imw/files/compressed_file_spec.rb +0 -96
  128. data/spec/imw/files/compressible_spec.rb +0 -100
  129. data/spec/imw/files/file_spec.rb +0 -144
  130. data/spec/imw/files/gz_spec.rb +0 -32
  131. data/spec/imw/files/rar_spec.rb +0 -33
  132. data/spec/imw/files/tar_spec.rb +0 -31
  133. data/spec/imw/files/text_spec.rb +0 -23
  134. data/spec/imw/files/zip_spec.rb +0 -31
  135. data/spec/imw/files_spec.rb +0 -38
  136. data/spec/imw/packagers/archiver_spec.rb +0 -125
  137. data/spec/imw/packagers/s3_mover_spec.rb +0 -7
  138. data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
  139. data/spec/imw/utils/extensions/find_spec.rb +0 -113
  140. data/spec/imw/workflow/rip/local_spec.rb +0 -89
  141. data/spec/imw/workflow/rip_spec.rb +0 -27
  142. data/spec/support/archive_contents_matcher.rb +0 -94
  143. data/spec/support/directory_contents_matcher.rb +0 -61
@@ -1,125 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Packagers::Archiver do
4
- before do
5
- @name = 'foobar'
6
-
7
- # regular files
8
- @csv = "foobar-csv.csv"
9
- @xml = "foobar-xml.xml"
10
- @txt = "foobar-txt.txt"
11
- @blah = "foobar"
12
-
13
- # compressed files
14
- @bz2 = "foobar-bz2.bz2"
15
-
16
- # archives
17
- @zip = "foobar-zip.zip"
18
- @tarbz2 = "foobar-tarbz2.tar.bz2"
19
- @rar = "foobar-rar.rar"
20
- @archives = [@zip, @tarbz2]
21
-
22
- @files = [@csv, @xml, @txt, @blah, @bz2, @zip, @tarbz2]
23
-
24
- @files.each do |path|
25
- IMWTest::Random.file path
26
- end
27
- end
28
-
29
-
30
- describe "when preparing files" do
31
- before do
32
- @archiver = IMW::Packagers::Archiver.new @name, @files
33
- @archiver.prepare!
34
- end
35
-
36
- after do
37
- FileUtils.rm_rf @archiver.tmp_dir
38
- end
39
-
40
- it "should name its archive directory properly" do
41
- @archiver.tmp_dir.should contain(@name)
42
- end
43
-
44
- it "should copy regular files to its archive directory" do
45
- @archiver.dir.should contain(@csv, @xml, @txt)
46
- end
47
-
48
- it "should uncompress compressed files to its archive directory" do
49
- @archiver.dir.should contain('foobar-bz2')
50
- @archiver.dir.should_not contain(@bz2)
51
- end
52
-
53
- it "should copy the content of archive files to its archive directory (but not the actual archives)" do
54
- @archives.each do |archive|
55
- @archiver.dir.should_not contain(archive)
56
- @archiver.dir.should contain(*IMW.open(archive).contents)
57
- end
58
- end
59
-
60
- it "should not move any of the original files" do
61
- IMWTest::TMP_DIR.should contain(@files)
62
- end
63
- end
64
-
65
- describe "when preparing files while renaming them" do
66
- before do
67
-
68
- # to test renaming, consider the new paths to be the old paths
69
- # but with the hyphens mapped to underscores...
70
- @renaming_hash = {}
71
- @files.each { |f| @renaming_hash[f] = f.gsub(/-/,'_') }
72
-
73
- @archiver = IMW::Packagers::Archiver.new @name, @renaming_hash
74
- @archiver.prepare!
75
- end
76
-
77
- after do
78
- FileUtils.rm_rf @archiver.tmp_dir
79
- end
80
-
81
- it "should copy regular files to its archive directory, renaming them" do
82
- @archiver.dir.should_not contain([@csv, @xml, @txt])
83
- @archiver.dir.should contain([@csv, @xml, @txt].map { |f| @renaming_hash[f] })
84
- end
85
-
86
- it "should uncompress compressed files to its archive directory, renaming them" do
87
- @archiver.dir.should contain('foobar_bz2')
88
- @archiver.dir.should_not contain('foobar-bz2')
89
- @archiver.dir.should_not contain(@renaming_hash[@bz2])
90
- @archiver.dir.should_not contain(@bz2)
91
- end
92
- end
93
-
94
- describe "when packaging files" do
95
- before do
96
- @archiver = IMW::Packagers::Archiver.new @name, @files
97
- @archiver.prepare!
98
-
99
- @package_tarbz2 = "package.tar.bz2"
100
- @package_zip = "package.zip"
101
- @packages = [@package_tarbz2, @package_zip]
102
- end
103
-
104
- after do
105
- FileUtils.rm_rf @archiver.tmp_dir
106
- end
107
-
108
- it "should create a package file containing the proper files" do
109
- @packages.each do |package|
110
- @archiver.package! package
111
- @archiver.tmp_dir.should contain(IMW.open(package).contents)
112
- end
113
- end
114
-
115
- it "should return the package file" do
116
- @packages.each do |package|
117
- output = @archiver.package! package
118
- output.basename.should == package
119
- end
120
- end
121
-
122
-
123
- end
124
- end
125
-
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../spec_helper"
2
-
3
- describe IMW::Packagers::S3Mover do
4
- it { pending }
5
- end
6
-
7
-
@@ -1,72 +0,0 @@
1
- #
2
- # h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
3
- #
4
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
5
- # Copyright:: Copyright (c) 2008 infochimps.org
6
- # License:: GPL 3.0
7
- # Website:: http://infinitemonkeywrench.org/
8
- #
9
-
10
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
11
-
12
- require 'fileutils'
13
-
14
- require 'imw/utils/random'
15
-
16
- describe File do
17
-
18
- it "should return the 'name' of a file with 'name_of_file'" do
19
- File.name_of_file("/path/to/some_file.txt").should eql("some_file")
20
- end
21
-
22
- describe "when finding the handle corresponding to a path" do
23
-
24
- it "should correctly identify paths with the processing instruction suffix" do
25
- File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
26
- end
27
-
28
- it "should correctly identify paths with the metadata instruction suffix" do
29
- File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
30
- end
31
-
32
- it "should raise an error if the path does not correspond to a handle" do
33
- lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
34
- end
35
- end
36
-
37
- describe "when creating unique filenames" do
38
-
39
- before(:each) do
40
- @root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
41
- @file0 = @root_directory + "/the_original.txt"
42
- @file1 = @root_directory + "/the_original.txt.1"
43
- @file2 = @root_directory + "/the_original.txt.2"
44
- FileUtils.mkdir(@root_directory)
45
- end
46
-
47
- after(:each) do
48
- FileUtils.rm_rf @root_directory
49
- end
50
-
51
- it "should return the given path if there is no such file already" do
52
- File.uniquify(@file0).should eql(@file0)
53
- end
54
-
55
- it "should return the given path with a numerical suffix of `.1' if the file exists" do
56
- IMW::Random.file(@file0)
57
- File.uniquify(@file0).should eql(@file1)
58
- end
59
-
60
- it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
61
- IMW::Random.file(@file0)
62
- IMW::Random.file(@file1)
63
- File.uniquify(@file0).should eql(@file2)
64
- end
65
-
66
- end
67
-
68
- end
69
-
70
-
71
-
72
- # puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
@@ -1,113 +0,0 @@
1
- #
2
- # h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
-
12
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
13
- require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
14
-
15
- require 'fileutils'
16
- require 'set'
17
-
18
- require 'imw/utils'
19
- require 'imw/utils/random'
20
- require 'imw/utils/extensions/find'
21
-
22
- describe Find do
23
-
24
- include Spec::Matchers::IMW
25
-
26
- def create_sample_files
27
- FileUtils.mkdir_p(@subsubdirectory)
28
- [@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
29
- end
30
-
31
- before(:all) do
32
- @root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
33
- @subdirectory = @root_directory + "/subdir"
34
- @subsubdirectory = @subdirectory + "/subsubdir"
35
- @fake_directory = @root_directory + "/notreal"
36
- @file1 = @root_directory + "/my_file1.txt"
37
- @file2 = @root_directory + "/my_file2.csv"
38
- @file3 = @root_directory + "/my_file3.dat"
39
- @file4 = @subdirectory + "/your_file4.html"
40
- @file5 = @subdirectory + "/your_file5.csv"
41
- @file6 = @subdirectory + "/your_file5"
42
- end
43
-
44
- before(:each) do
45
- create_sample_files
46
- end
47
-
48
- after(:each) do
49
- FileUtils.rm_rf @root_directory
50
- end
51
-
52
- describe "when listing files with absolute paths contained in a directory" do
53
-
54
- it "should raise an error when listing a non-exsiting directory" do
55
- lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
56
- end
57
-
58
- it "should find every file by default" do
59
- Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
60
- end
61
-
62
- it "should only find files which match its :include argument" do
63
- Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
64
- end
65
-
66
- it "should not find files which match its :exclude argument" do
67
- Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
68
- end
69
-
70
- it "should only find files which match its :include argument and don't match its :exclude argument" do
71
- Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
72
- end
73
- end
74
-
75
- describe "when listing files with relative paths contained in a directory" do
76
-
77
- def strip_root_directory array
78
- array.map {|item| item[@root_directory.length + 1,item.size]}
79
- end
80
-
81
- it "should raise an error when listing a non-exsiting directory" do
82
- lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
83
- end
84
-
85
- it "should find every file by default" do
86
- Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
87
- end
88
-
89
- it "should only find files which match its :include argument" do
90
- Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
91
- end
92
-
93
- it "should not find files which match its :exclude argument" do
94
- Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
95
- end
96
-
97
- it "should only find files which match its :include argument and don't match its :exclude argument" do
98
- Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
99
- end
100
-
101
- end
102
-
103
- describe "when listing handles in a directory" do
104
-
105
- it "should return a unique set of handles" do
106
- Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
107
- end
108
- end
109
-
110
-
111
- end
112
-
113
- # puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
@@ -1,89 +0,0 @@
1
- #
2
- # h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
- require File.join(File.dirname(__FILE__),'../../../spec_helper')
12
- require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
13
-
14
- require 'fileutils'
15
-
16
- require 'imw/utils/random'
17
- require 'imw/utils/extensions/find'
18
- require 'imw/workflow/rip/local'
19
-
20
- describe "Ripping from local disk" do
21
-
22
- include Spec::Matchers::IMW
23
-
24
- before(:all) do
25
- @root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
26
- @file1 = @root_directory + "/first.csv"
27
-
28
- @source_directory1 = @root_directory + "/source1"
29
- @file2 = @source_directory1 + "/second.txt"
30
- @file3 = @source_directory1 + "/third.csv"
31
-
32
- @source_directory2 = @root_directory + "/source2"
33
- @file4 = @source_directory2 + "/fourth.txt"
34
- @file5a = @source_directory2 + "/fifth-shared.yaml"
35
-
36
- @source_directory3 = @source_directory2 + "/source3-nested"
37
- @file5b = @source_directory3 + "/fifth-shared.yaml"
38
-
39
- @target_directory = @root_directory + "/target"
40
- end
41
-
42
- before(:each) do
43
- FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
44
- [@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
45
- end
46
-
47
- after(:each) do
48
- FileUtils.rm_rf @root_directory
49
- end
50
-
51
-
52
- def basenames_of files
53
- files.map {|file| File.basename file}
54
- end
55
-
56
- it "should raise an error when attempting to copy to a non-existent target directory" do
57
- FileUtils.rm_rf @target_directory
58
- lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
59
- end
60
-
61
- it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
62
- IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
63
- Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
64
- end
65
-
66
- it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
67
-
68
- # complicated block to copy files to sub-directories of the target
69
- # directory depending on their extension
70
- IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
71
- if File.extname(path) == '.txt' then
72
- File.join('txt',File.basename(path)) # put text files in txt
73
- elsif File.extname(path) == '.csv' then
74
- File.join("csv",File.basename(path)) # put csv files in csv
75
- else
76
- nil # don't copy other extensions
77
- end
78
- end
79
-
80
- # what we would expect to see from that block
81
- txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
82
- csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
83
-
84
- Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
85
- end
86
-
87
- end
88
-
89
- # puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
@@ -1,27 +0,0 @@
1
- #
2
- # h2. spec/imw/workflow/rip_spec.rb -- spec for rip.rb
3
- #
4
- # == About
5
- #
6
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
7
- # Copyright:: Copyright (c) 2008 infochimps.org
8
- # License:: GPL 3.0
9
- # Website:: http://infinitemonkeywrench.org/
10
- #
11
- require File.join(File.dirname(__FILE__),'../../spec_helper')
12
-
13
- # require 'imw/workflow/rip'
14
- #
15
- # describe Source do
16
- #
17
- # before(:all) do
18
- # @source = IMW::Source.new(:fake_source)
19
- # @source.stub("returns path to ripd directory",:path_to => IMW::DIRECTORIES[:dump] + "/source_rip_spec")
20
- # end
21
- #
22
- # it "should raise an error when asked to rip in an unknown way" do
23
- # @source.rip_from :silly_way
24
- # end
25
- # end
26
-
27
- # puts "#{File.basename(__FILE__)}: Bending over, you hear a thunderous RRRRRRRIIIIIP and then scuttle off to check your pants..." # at bottom
@@ -1,94 +0,0 @@
1
- #
2
- # h2. spec/matchers/archive_contents_matcher.rb -- matches contents of archive to disk
3
- #
4
- # == About
5
- #
6
- # An RSpec matcher which tests that an archive of files has the same
7
- # contents as various paths on disk.
8
- #
9
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
- # Copyright:: Copyright (c) 2008 infochimps.org
11
- # License:: GPL 3.0
12
- # Website:: http://infinitemonkeywrench.org/
13
- #
14
-
15
- require 'find'
16
-
17
- module Spec
18
- module Matchers
19
- module IMW
20
-
21
- # Match the contents of the archive against files or directories
22
- # in +paths+.
23
- #
24
- # Options include:
25
- #
26
- # <tt>:relative_to</tt>:: a leading path which will be stripped
27
- # from all +paths+ before comparison with the contents of the
28
- # directory.
29
- class ArchiveContentsMatchPaths
30
-
31
- private
32
- def initialize paths,opts = {}
33
- opts.reverse_merge!({:relative_to => nil})
34
- paths = [paths] if paths.class == String
35
- @paths = paths
36
- @relative_to = opts[:relative_to]
37
- find_paths_contents
38
- end
39
-
40
- def find_paths_contents
41
- # find all the files
42
- contents = []
43
- @paths.each do |path|
44
- path = File.expand_path path
45
- if File.file? path then
46
- contents << path
47
- elsif File.directory? path then
48
- contents += Find.files_in_directory(path)
49
- end
50
- end
51
-
52
- # strip leading path
53
- contents.map! do |path|
54
- # the +1 is because we want a relative path
55
- path = path[@relative_to.length + 1,path.size]
56
- end
57
-
58
- @paths_contents = contents.to_set
59
- end
60
-
61
- def pretty_print set
62
- set.to_a.join("\n\t")
63
- end
64
-
65
- public
66
- def matches? archive
67
- @archive = archive
68
- @archive_contents = @archive.contents.to_set
69
- @archive_contents == @paths_contents
70
- end
71
-
72
- def failure_message
73
- missing_from_archive = "missing from archive:\n\t#{pretty_print(@paths_contents - @archive_contents)}\n"
74
- missing_from_paths = "missing from paths:\n\t#{pretty_print(@archive_contents - @paths_contents)}\n"
75
- common = "common to both:\n\t#{pretty_print(@archive_contents & @paths_contents)}\n"
76
- "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to be identical.\n#{missing_from_archive}\n#{missing_from_paths}\n#{common}"
77
- end
78
-
79
- def negative_failure_message
80
- "expected contents of archive (#{@archive.path}) and paths (#{@paths.join(", ")}) to differ."
81
- end
82
-
83
- end
84
-
85
- # Invokes the matcher <tt>Spec::Matchers::IMW::ArchiveContentsMatchPaths
86
- def contain_paths_like paths, opts = {}
87
- ArchiveContentsMatchPaths.new(paths,opts)
88
- end
89
-
90
- end
91
- end
92
- end
93
-
94
- # puts "#{File.basename(__FILE__)}: An archive is something that is bigger on the inside than it is on the outside." # at bottom
@@ -1,61 +0,0 @@
1
- #
2
- # h2. spec/matchers/directory_contents_matcher.rb -- matches files between directories
3
- #
4
- # == About
5
- #
6
- # An RSpec matcher which tests that two directories share the same set
7
- # of files.
8
- #
9
- # Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
10
- # Copyright:: Copyright (c) 2008 infochimps.org
11
- # License:: GPL 3.0
12
- # Website:: http://infinitemonkeywrench.org/
13
- #
14
-
15
- require 'set'
16
- require 'find'
17
-
18
- module Spec
19
- module Matchers
20
- module IMW
21
-
22
- class DirectoryContentsMatcher
23
- private
24
- def initialize dir
25
- @dir = File.expand_path(dir)
26
- @dir_files = Find.files_relative_to_directory(@dir).to_set
27
- end
28
-
29
- # Pretty print a set of files.
30
- def format_files_for_printing files
31
- files.to_a.join("\n\t")
32
- end
33
-
34
- public
35
- def matches? target
36
- @target = target
37
- @target_files = Find.files_relative_to_directory(@target).to_set
38
- @target_files == @dir_files
39
- end
40
-
41
- def failure_message
42
- files_missing_from_dir = format_files_for_printing(@target_files - @dir_files)
43
- files_missing_from_target = format_files_for_printing(@dir_files - @target_files)
44
- files_in_common = format_files_for_printing(@dir_files & @target_files)
45
- "expected files in #{@dir} and #{@target} to be identical.\n\nfiles missing from #{@dir}:\n\t#{files_missing_from_dir}\n\nfiles missing from #{@target}:\n\t#{files_missing_from_target}\n\nfiles in common:\n\t#{files_in_common}"
46
- end
47
-
48
- def negative_failure_message
49
- "expected files in #{@dir} and #{@target} to be different"
50
- end
51
- end
52
-
53
- # Checks that files in one directory match those in another.
54
- def contain_files_matching_directory dir
55
- DirectoryContentsMatcher.new(dir)
56
- end
57
- end
58
- end
59
- end
60
-
61
- # puts "#{File.basename(__FILE__)}: From far away, the two filing cabinets appear to be identical. Upon closer inspection, one of them is actually a Maine lobster. Delicious!" # at bottom