imw 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
describe IMW::Parsers::LineParser do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@path = File.dirname(__FILE__) + "/../../data/sample.csv"
|
8
|
+
@file = File.new(@path)
|
9
|
+
@fields = [:id, :name, :genus, :species]
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "without an implemented parsing method" do
|
13
|
+
|
14
|
+
before do
|
15
|
+
@parser = IMW::Parsers::LineParser.new
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should raise an error when attempting to parse a line" do
|
19
|
+
lambda { @parser.parse_line "wahtever" }.should raise_error(IMW::NotImplementedError)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
describe "with an implemented parsing method" do
|
25
|
+
|
26
|
+
before do
|
27
|
+
|
28
|
+
@parser_class = Class.new(IMW::Parsers::LineParser)
|
29
|
+
@parser_class.class_eval do
|
30
|
+
def parse_line line
|
31
|
+
id, name, genus, species = line.chomp.split(',')
|
32
|
+
{ :id => id, :name => name, :genus => genus, :species => species }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
@parser = @parser_class.new
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should skip lines as needed" do
|
40
|
+
@parser.skip_first = 1
|
41
|
+
results = @parser.parse!(@file)
|
42
|
+
results.length.should == 130
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should read as many lines as it's asked" do
|
46
|
+
results = @parser.parse!(@file, :lines => 10)
|
47
|
+
results.length.should == 10
|
48
|
+
end
|
49
|
+
|
50
|
+
describe "when parsing into hashes" do
|
51
|
+
|
52
|
+
it "should return an array of hashes when called without a block" do
|
53
|
+
results = @parser.parse!(@file)
|
54
|
+
results.length.should == 131
|
55
|
+
results.first.should == { :id => "ID", :name => "Name", :genus => "Genus", :species => "Species" }
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should pass each hash to a block when given one" do
|
59
|
+
results = returning([]) do |array|
|
60
|
+
@parser.parse!(@file) do |hsh|
|
61
|
+
hsh.delete(:id)
|
62
|
+
array << hsh
|
63
|
+
end
|
64
|
+
end
|
65
|
+
results.length.should == 131
|
66
|
+
results.first.should == { :name => "Name", :genus => "Genus", :species => "Species" }
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
describe "when parsing into objects" do
|
71
|
+
before { @parser.klass = OpenStruct }
|
72
|
+
|
73
|
+
it "should return an array of objects when defined with a class" do
|
74
|
+
results = @parser.parse!(@file)
|
75
|
+
results.length.should == 131
|
76
|
+
results.first.class.should == OpenStruct
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should pass each object to a block when given one and defined with a class" do
|
80
|
+
@parser.klass = OpenStruct
|
81
|
+
results = returning([]) do |array|
|
82
|
+
@parser.parse!(@file) do |obj|
|
83
|
+
obj.genus = nil
|
84
|
+
array << obj
|
85
|
+
end
|
86
|
+
end
|
87
|
+
results.length.should == 131
|
88
|
+
results.first.class.should == OpenStruct
|
89
|
+
results.first.genus.should be_blank
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
describe IMW::Parsers::RegexpParser do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@path = "foobar.dat"
|
8
|
+
@text = <<EOF
|
9
|
+
151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
|
10
|
+
81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
|
11
|
+
81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
|
12
|
+
EOF
|
13
|
+
File.open(@path, 'w') { |f| f.write(@text) }
|
14
|
+
@file = File.new(@path)
|
15
|
+
|
16
|
+
@regexp = %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$}
|
17
|
+
@fields = [:ip, :timestamp, :verb, :url, :version]
|
18
|
+
|
19
|
+
@parser = IMW::Parsers::RegexpParser.new :by_regexp => @regexp, :into_fields => @fields
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "parsing a line which matches its regexp" do
|
23
|
+
it "should return an appropriate hash" do
|
24
|
+
@parser.parse_line(@file.readline).should == {:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "parsing a line which doesn't match its regexp" do
|
29
|
+
before { @parser.regexp = /foobar/ }
|
30
|
+
|
31
|
+
it "return an empty hash if not parsing strictly" do
|
32
|
+
@parser.parse_line(@file.readline).should == {}
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should raise an error if parsing strictly" do
|
36
|
+
@parser.strict = true
|
37
|
+
lambda { @parser.parse_line(@file.readline) }.should raise_error IMW::ParseError
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
|
3
|
+
#
|
4
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
5
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
6
|
+
# License:: GPL 3.0
|
7
|
+
# Website:: http://infinitemonkeywrench.org/
|
8
|
+
#
|
9
|
+
|
10
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
11
|
+
|
12
|
+
require 'fileutils'
|
13
|
+
|
14
|
+
require 'imw/utils/random'
|
15
|
+
|
16
|
+
describe File do
|
17
|
+
|
18
|
+
it "should return the 'name' of a file with 'name_of_file'" do
|
19
|
+
File.name_of_file("/path/to/some_file.txt").should eql("some_file")
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "when finding the handle corresponding to a path" do
|
23
|
+
|
24
|
+
it "should correctly identify paths with the processing instruction suffix" do
|
25
|
+
File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should correctly identify paths with the metadata instruction suffix" do
|
29
|
+
File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should raise an error if the path does not correspond to a handle" do
|
33
|
+
lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "when creating unique filenames" do
|
38
|
+
|
39
|
+
before(:each) do
|
40
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
|
41
|
+
@file0 = @root_directory + "/the_original.txt"
|
42
|
+
@file1 = @root_directory + "/the_original.txt.1"
|
43
|
+
@file2 = @root_directory + "/the_original.txt.2"
|
44
|
+
FileUtils.mkdir(@root_directory)
|
45
|
+
end
|
46
|
+
|
47
|
+
after(:each) do
|
48
|
+
FileUtils.rm_rf @root_directory
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should return the given path if there is no such file already" do
|
52
|
+
File.uniquify(@file0).should eql(@file0)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should return the given path with a numerical suffix of `.1' if the file exists" do
|
56
|
+
IMW::Random.file(@file0)
|
57
|
+
File.uniquify(@file0).should eql(@file1)
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
|
61
|
+
IMW::Random.file(@file0)
|
62
|
+
IMW::Random.file(@file1)
|
63
|
+
File.uniquify(@file0).should eql(@file2)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
# puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
|
@@ -0,0 +1,113 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
+
# License:: GPL 3.0
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
10
|
+
#
|
11
|
+
|
12
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
13
|
+
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
|
14
|
+
|
15
|
+
require 'fileutils'
|
16
|
+
require 'set'
|
17
|
+
|
18
|
+
require 'imw/utils'
|
19
|
+
require 'imw/utils/random'
|
20
|
+
require 'imw/utils/extensions/find'
|
21
|
+
|
22
|
+
describe Find do
|
23
|
+
|
24
|
+
include Spec::Matchers::IMW
|
25
|
+
|
26
|
+
def create_sample_files
|
27
|
+
FileUtils.mkdir_p(@subsubdirectory)
|
28
|
+
[@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
|
29
|
+
end
|
30
|
+
|
31
|
+
before(:all) do
|
32
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
|
33
|
+
@subdirectory = @root_directory + "/subdir"
|
34
|
+
@subsubdirectory = @subdirectory + "/subsubdir"
|
35
|
+
@fake_directory = @root_directory + "/notreal"
|
36
|
+
@file1 = @root_directory + "/my_file1.txt"
|
37
|
+
@file2 = @root_directory + "/my_file2.csv"
|
38
|
+
@file3 = @root_directory + "/my_file3.dat"
|
39
|
+
@file4 = @subdirectory + "/your_file4.html"
|
40
|
+
@file5 = @subdirectory + "/your_file5.csv"
|
41
|
+
@file6 = @subdirectory + "/your_file5"
|
42
|
+
end
|
43
|
+
|
44
|
+
before(:each) do
|
45
|
+
create_sample_files
|
46
|
+
end
|
47
|
+
|
48
|
+
after(:each) do
|
49
|
+
FileUtils.rm_rf @root_directory
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "when listing files with absolute paths contained in a directory" do
|
53
|
+
|
54
|
+
it "should raise an error when listing a non-exsiting directory" do
|
55
|
+
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should find every file by default" do
|
59
|
+
Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should only find files which match its :include argument" do
|
63
|
+
Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should not find files which match its :exclude argument" do
|
67
|
+
Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
71
|
+
Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe "when listing files with relative paths contained in a directory" do
|
76
|
+
|
77
|
+
def strip_root_directory array
|
78
|
+
array.map {|item| item[@root_directory.length + 1,item.size]}
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should raise an error when listing a non-exsiting directory" do
|
82
|
+
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should find every file by default" do
|
86
|
+
Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should only find files which match its :include argument" do
|
90
|
+
Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should not find files which match its :exclude argument" do
|
94
|
+
Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
98
|
+
Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
|
103
|
+
describe "when listing handles in a directory" do
|
104
|
+
|
105
|
+
it "should return a unique set of handles" do
|
106
|
+
Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
# puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
2
|
+
require 'imw'
|
3
|
+
require 'imw/utils/paths'
|
4
|
+
|
5
|
+
describe IMW do
|
6
|
+
include IMW
|
7
|
+
before(:each) do
|
8
|
+
IMW::PATHS = {
|
9
|
+
:data => '/data',
|
10
|
+
:weather => 'ftp.ncdc.noaa.gov/pub/data/noaa',
|
11
|
+
:first => ['1', :second, 'last'],
|
12
|
+
:second => ['2', :third],
|
13
|
+
:third => ['3'],
|
14
|
+
}
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'is idempotent on a string' do
|
18
|
+
path_to('hi').should == 'hi'
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'has an absolute path to the data dir' do
|
22
|
+
path_to(:data).should =~ %r{^/}
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'handles mixed array and sym args' do
|
26
|
+
path_to( [:data, 'hi'], [[['there']]]).should == '/data/hi/there'
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'expands to later generations' do
|
30
|
+
path_to(:first).should == File.join('1/2/3/last')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'expands interior symbols' do
|
34
|
+
path_to(['hadoop1:/working', :data, :weather]).should ==
|
35
|
+
File.join('hadoop1:/working/data/ftp.ncdc.noaa.gov/pub/data/noaa')
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
# h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
+
# License:: GPL 3.0
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
10
|
+
#
|
11
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
12
|
+
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
|
13
|
+
|
14
|
+
require 'fileutils'
|
15
|
+
|
16
|
+
require 'imw/utils/random'
|
17
|
+
require 'imw/utils/extensions/find'
|
18
|
+
require 'imw/workflow/rip/local'
|
19
|
+
|
20
|
+
describe "Ripping from local disk" do
|
21
|
+
|
22
|
+
include Spec::Matchers::IMW
|
23
|
+
|
24
|
+
before(:all) do
|
25
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
|
26
|
+
@file1 = @root_directory + "/first.csv"
|
27
|
+
|
28
|
+
@source_directory1 = @root_directory + "/source1"
|
29
|
+
@file2 = @source_directory1 + "/second.txt"
|
30
|
+
@file3 = @source_directory1 + "/third.csv"
|
31
|
+
|
32
|
+
@source_directory2 = @root_directory + "/source2"
|
33
|
+
@file4 = @source_directory2 + "/fourth.txt"
|
34
|
+
@file5a = @source_directory2 + "/fifth-shared.yaml"
|
35
|
+
|
36
|
+
@source_directory3 = @source_directory2 + "/source3-nested"
|
37
|
+
@file5b = @source_directory3 + "/fifth-shared.yaml"
|
38
|
+
|
39
|
+
@target_directory = @root_directory + "/target"
|
40
|
+
end
|
41
|
+
|
42
|
+
before(:each) do
|
43
|
+
FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
|
44
|
+
[@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
|
45
|
+
end
|
46
|
+
|
47
|
+
after(:each) do
|
48
|
+
FileUtils.rm_rf @root_directory
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
def basenames_of files
|
53
|
+
files.map {|file| File.basename file}
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should raise an error when attempting to copy to a non-existent target directory" do
|
57
|
+
FileUtils.rm_rf @target_directory
|
58
|
+
lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
|
62
|
+
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
|
63
|
+
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
|
67
|
+
|
68
|
+
# complicated block to copy files to sub-directories of the target
|
69
|
+
# directory depending on their extension
|
70
|
+
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
|
71
|
+
if File.extname(path) == '.txt' then
|
72
|
+
File.join('txt',File.basename(path)) # put text files in txt
|
73
|
+
elsif File.extname(path) == '.csv' then
|
74
|
+
File.join("csv",File.basename(path)) # put csv files in csv
|
75
|
+
else
|
76
|
+
nil # don't copy other extensions
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# what we would expect to see from that block
|
81
|
+
txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
|
82
|
+
csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
|
83
|
+
|
84
|
+
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
# puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
|