imw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
require 'ostruct'
|
|
3
|
+
|
|
4
|
+
describe IMW::Parsers::LineParser do
|
|
5
|
+
|
|
6
|
+
before do
|
|
7
|
+
@path = File.dirname(__FILE__) + "/../../data/sample.csv"
|
|
8
|
+
@file = File.new(@path)
|
|
9
|
+
@fields = [:id, :name, :genus, :species]
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
describe "without an implemented parsing method" do
|
|
13
|
+
|
|
14
|
+
before do
|
|
15
|
+
@parser = IMW::Parsers::LineParser.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "should raise an error when attempting to parse a line" do
|
|
19
|
+
lambda { @parser.parse_line "wahtever" }.should raise_error(IMW::NotImplementedError)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "with an implemented parsing method" do
|
|
25
|
+
|
|
26
|
+
before do
|
|
27
|
+
|
|
28
|
+
@parser_class = Class.new(IMW::Parsers::LineParser)
|
|
29
|
+
@parser_class.class_eval do
|
|
30
|
+
def parse_line line
|
|
31
|
+
id, name, genus, species = line.chomp.split(',')
|
|
32
|
+
{ :id => id, :name => name, :genus => genus, :species => species }
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@parser = @parser_class.new
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "should skip lines as needed" do
|
|
40
|
+
@parser.skip_first = 1
|
|
41
|
+
results = @parser.parse!(@file)
|
|
42
|
+
results.length.should == 130
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "should read as many lines as it's asked" do
|
|
46
|
+
results = @parser.parse!(@file, :lines => 10)
|
|
47
|
+
results.length.should == 10
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
describe "when parsing into hashes" do
|
|
51
|
+
|
|
52
|
+
it "should return an array of hashes when called without a block" do
|
|
53
|
+
results = @parser.parse!(@file)
|
|
54
|
+
results.length.should == 131
|
|
55
|
+
results.first.should == { :id => "ID", :name => "Name", :genus => "Genus", :species => "Species" }
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "should pass each hash to a block when given one" do
|
|
59
|
+
results = returning([]) do |array|
|
|
60
|
+
@parser.parse!(@file) do |hsh|
|
|
61
|
+
hsh.delete(:id)
|
|
62
|
+
array << hsh
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
results.length.should == 131
|
|
66
|
+
results.first.should == { :name => "Name", :genus => "Genus", :species => "Species" }
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
describe "when parsing into objects" do
|
|
71
|
+
before { @parser.klass = OpenStruct }
|
|
72
|
+
|
|
73
|
+
it "should return an array of objects when defined with a class" do
|
|
74
|
+
results = @parser.parse!(@file)
|
|
75
|
+
results.length.should == 131
|
|
76
|
+
results.first.class.should == OpenStruct
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "should pass each object to a block when given one and defined with a class" do
|
|
80
|
+
@parser.klass = OpenStruct
|
|
81
|
+
results = returning([]) do |array|
|
|
82
|
+
@parser.parse!(@file) do |obj|
|
|
83
|
+
obj.genus = nil
|
|
84
|
+
array << obj
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
results.length.should == 131
|
|
88
|
+
results.first.class.should == OpenStruct
|
|
89
|
+
results.first.genus.should be_blank
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
require 'ostruct'
|
|
3
|
+
|
|
4
|
+
describe IMW::Parsers::RegexpParser do
|
|
5
|
+
|
|
6
|
+
before do
|
|
7
|
+
@path = "foobar.dat"
|
|
8
|
+
@text = <<EOF
|
|
9
|
+
151.199.53.145 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/main.php HTTP/1.0
|
|
10
|
+
81.227.179.120 14-Oct-2007:13:34:34-0500 GET /phpmyadmin/libraries/select_lang.lib.php HTTP/1.0
|
|
11
|
+
81.3.107.173 14-Oct-2007:13:54:26-0500 GET / HTTP/1.1
|
|
12
|
+
EOF
|
|
13
|
+
File.open(@path, 'w') { |f| f.write(@text) }
|
|
14
|
+
@file = File.new(@path)
|
|
15
|
+
|
|
16
|
+
@regexp = %r{^([\d\.]+) (\d{2}-\w{3}-\d{4}:\d{2}:\d{2}:\d{2}-\d{4}) (\w+) ([^\s]+) HTTP/([\d.]{3})$}
|
|
17
|
+
@fields = [:ip, :timestamp, :verb, :url, :version]
|
|
18
|
+
|
|
19
|
+
@parser = IMW::Parsers::RegexpParser.new :by_regexp => @regexp, :into_fields => @fields
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
describe "parsing a line which matches its regexp" do
|
|
23
|
+
it "should return an appropriate hash" do
|
|
24
|
+
@parser.parse_line(@file.readline).should == {:ip => '151.199.53.145', :timestamp => '14-Oct-2007:13:34:34-0500', :verb => 'GET', :url => '/phpmyadmin/main.php', :version => "1.0"}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe "parsing a line which doesn't match its regexp" do
|
|
29
|
+
before { @parser.regexp = /foobar/ }
|
|
30
|
+
|
|
31
|
+
it "return an empty hash if not parsing strictly" do
|
|
32
|
+
@parser.parse_line(@file.readline).should == {}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "should raise an error if parsing strictly" do
|
|
36
|
+
@parser.strict = true
|
|
37
|
+
lambda { @parser.parse_line(@file.readline) }.should raise_error IMW::ParseError
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/imw/utils/extensions/file_core_spec.rb -- spec for extensions to core file module
|
|
3
|
+
#
|
|
4
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
5
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
6
|
+
# License:: GPL 3.0
|
|
7
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
8
|
+
#
|
|
9
|
+
|
|
10
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
11
|
+
|
|
12
|
+
require 'fileutils'
|
|
13
|
+
|
|
14
|
+
require 'imw/utils/random'
|
|
15
|
+
|
|
16
|
+
describe File do
|
|
17
|
+
|
|
18
|
+
it "should return the 'name' of a file with 'name_of_file'" do
|
|
19
|
+
File.name_of_file("/path/to/some_file.txt").should eql("some_file")
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
describe "when finding the handle corresponding to a path" do
|
|
23
|
+
|
|
24
|
+
it "should correctly identify paths with the processing instruction suffix" do
|
|
25
|
+
File.handle("/path/to/the_handle#{IMW::PROCESSING_INSTRUCTION_SUFFIX}.yaml").should eql(:the_handle)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "should correctly identify paths with the metadata instruction suffix" do
|
|
29
|
+
File.handle("/path/to/the_handle#{IMW::METADATA_SUFFIX}.yaml").should eql(:the_handle)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "should raise an error if the path does not correspond to a handle" do
|
|
33
|
+
lambda {File.handle("/path/to/the_handle.txt")}.should raise_error(IMW::PathError)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
describe "when creating unique filenames" do
|
|
38
|
+
|
|
39
|
+
before(:each) do
|
|
40
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/file_core_spec"
|
|
41
|
+
@file0 = @root_directory + "/the_original.txt"
|
|
42
|
+
@file1 = @root_directory + "/the_original.txt.1"
|
|
43
|
+
@file2 = @root_directory + "/the_original.txt.2"
|
|
44
|
+
FileUtils.mkdir(@root_directory)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
after(:each) do
|
|
48
|
+
FileUtils.rm_rf @root_directory
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "should return the given path if there is no such file already" do
|
|
52
|
+
File.uniquify(@file0).should eql(@file0)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "should return the given path with a numerical suffix of `.1' if the file exists" do
|
|
56
|
+
IMW::Random.file(@file0)
|
|
57
|
+
File.uniquify(@file0).should eql(@file1)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "should return the given path with a numerical suffix o `.2' if the file exists and a file with a suffix of `.1' also exists" do
|
|
61
|
+
IMW::Random.file(@file0)
|
|
62
|
+
IMW::Random.file(@file1)
|
|
63
|
+
File.uniquify(@file0).should eql(@file2)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# puts "#{File.basename(__FILE__)}: You bend the file folder almost in half and watch as it springs back to shape." # at bottom
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/imw/utils/extensions/find_spec.rb -- spec for find.rb
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
+
# License:: GPL 3.0
|
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
13
|
+
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher"
|
|
14
|
+
|
|
15
|
+
require 'fileutils'
|
|
16
|
+
require 'set'
|
|
17
|
+
|
|
18
|
+
require 'imw/utils'
|
|
19
|
+
require 'imw/utils/random'
|
|
20
|
+
require 'imw/utils/extensions/find'
|
|
21
|
+
|
|
22
|
+
describe Find do
|
|
23
|
+
|
|
24
|
+
include Spec::Matchers::IMW
|
|
25
|
+
|
|
26
|
+
def create_sample_files
|
|
27
|
+
FileUtils.mkdir_p(@subsubdirectory)
|
|
28
|
+
[@file1,@file2,@file3,@file4,@file5,@file6].each {|path| IMW::Random.file path}
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
before(:all) do
|
|
32
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/find_extension_spec"
|
|
33
|
+
@subdirectory = @root_directory + "/subdir"
|
|
34
|
+
@subsubdirectory = @subdirectory + "/subsubdir"
|
|
35
|
+
@fake_directory = @root_directory + "/notreal"
|
|
36
|
+
@file1 = @root_directory + "/my_file1.txt"
|
|
37
|
+
@file2 = @root_directory + "/my_file2.csv"
|
|
38
|
+
@file3 = @root_directory + "/my_file3.dat"
|
|
39
|
+
@file4 = @subdirectory + "/your_file4.html"
|
|
40
|
+
@file5 = @subdirectory + "/your_file5.csv"
|
|
41
|
+
@file6 = @subdirectory + "/your_file5"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
before(:each) do
|
|
45
|
+
create_sample_files
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
after(:each) do
|
|
49
|
+
FileUtils.rm_rf @root_directory
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
describe "when listing files with absolute paths contained in a directory" do
|
|
53
|
+
|
|
54
|
+
it "should raise an error when listing a non-exsiting directory" do
|
|
55
|
+
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "should find every file by default" do
|
|
59
|
+
Find.files_in_directory(@root_directory).should match_without_regard_to_order([@file1,@file2,@file3,@file4,@file5,@file6])
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it "should only find files which match its :include argument" do
|
|
63
|
+
Find.files_in_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order([@file2,@file5])
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "should not find files which match its :exclude argument" do
|
|
67
|
+
Find.files_in_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3,@file4,@file6])
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
|
71
|
+
Find.files_in_directory(@root_directory, :include => /my/, :exclude => /.*\.csv$/).should match_without_regard_to_order([@file1,@file3])
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
describe "when listing files with relative paths contained in a directory" do
|
|
76
|
+
|
|
77
|
+
def strip_root_directory array
|
|
78
|
+
array.map {|item| item[@root_directory.length + 1,item.size]}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "should raise an error when listing a non-exsiting directory" do
|
|
82
|
+
lambda {Find.files_in_directory(@fake_directory) }.should raise_error(IMW::PathError)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it "should find every file by default" do
|
|
86
|
+
Find.files_relative_to_directory(@root_directory).should match_without_regard_to_order(strip_root_directory([@file1,@file2,@file3,@file4,@file5,@file6]))
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "should only find files which match its :include argument" do
|
|
90
|
+
Find.files_relative_to_directory(@root_directory, :include => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file2,@file5]))
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it "should not find files which match its :exclude argument" do
|
|
94
|
+
Find.files_relative_to_directory(@root_directory, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3,@file4,@file6]))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "should only find files which match its :include argument and don't match its :exclude argument" do
|
|
98
|
+
Find.files_relative_to_directory(@root_directory, :include => /^my/, :exclude => /.*\.csv$/).should match_without_regard_to_order(strip_root_directory([@file1,@file3]))
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
describe "when listing handles in a directory" do
|
|
104
|
+
|
|
105
|
+
it "should return a unique set of handles" do
|
|
106
|
+
Find.handles_in_directory(@root_directory, :include => /your/).should match_without_regard_to_order([:your_file4, :your_file5])
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# puts "#{File.basename(__FILE__)}: You throw your Monkeywrench backwards over your shoulder and run like mad to go find it. Again, and again, and again." # at bottom
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
require File.join(File.dirname(__FILE__),'../../spec_helper')
|
|
2
|
+
require 'imw'
|
|
3
|
+
require 'imw/utils/paths'
|
|
4
|
+
|
|
5
|
+
describe IMW do
|
|
6
|
+
include IMW
|
|
7
|
+
before(:each) do
|
|
8
|
+
IMW::PATHS = {
|
|
9
|
+
:data => '/data',
|
|
10
|
+
:weather => 'ftp.ncdc.noaa.gov/pub/data/noaa',
|
|
11
|
+
:first => ['1', :second, 'last'],
|
|
12
|
+
:second => ['2', :third],
|
|
13
|
+
:third => ['3'],
|
|
14
|
+
}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'is idempotent on a string' do
|
|
18
|
+
path_to('hi').should == 'hi'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'has an absolute path to the data dir' do
|
|
22
|
+
path_to(:data).should =~ %r{^/}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'handles mixed array and sym args' do
|
|
26
|
+
path_to( [:data, 'hi'], [[['there']]]).should == '/data/hi/there'
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'expands to later generations' do
|
|
30
|
+
path_to(:first).should == File.join('1/2/3/last')
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'expands interior symbols' do
|
|
34
|
+
path_to(['hadoop1:/working', :data, :weather]).should ==
|
|
35
|
+
File.join('hadoop1:/working/data/ftp.ncdc.noaa.gov/pub/data/noaa')
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. spec/imw/workflow/rip/local_spec.rb -- specs for copying files from local disk
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
+
# License:: GPL 3.0
|
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
+
#
|
|
11
|
+
require File.join(File.dirname(__FILE__),'../../../spec_helper')
|
|
12
|
+
require IMW_SPEC_DIR + "/imw/matchers/without_regard_to_order_matcher.rb"
|
|
13
|
+
|
|
14
|
+
require 'fileutils'
|
|
15
|
+
|
|
16
|
+
require 'imw/utils/random'
|
|
17
|
+
require 'imw/utils/extensions/find'
|
|
18
|
+
require 'imw/workflow/rip/local'
|
|
19
|
+
|
|
20
|
+
describe "Ripping from local disk" do
|
|
21
|
+
|
|
22
|
+
include Spec::Matchers::IMW
|
|
23
|
+
|
|
24
|
+
before(:all) do
|
|
25
|
+
@root_directory = IMW::DIRECTORIES[:dump] + "/local_spec"
|
|
26
|
+
@file1 = @root_directory + "/first.csv"
|
|
27
|
+
|
|
28
|
+
@source_directory1 = @root_directory + "/source1"
|
|
29
|
+
@file2 = @source_directory1 + "/second.txt"
|
|
30
|
+
@file3 = @source_directory1 + "/third.csv"
|
|
31
|
+
|
|
32
|
+
@source_directory2 = @root_directory + "/source2"
|
|
33
|
+
@file4 = @source_directory2 + "/fourth.txt"
|
|
34
|
+
@file5a = @source_directory2 + "/fifth-shared.yaml"
|
|
35
|
+
|
|
36
|
+
@source_directory3 = @source_directory2 + "/source3-nested"
|
|
37
|
+
@file5b = @source_directory3 + "/fifth-shared.yaml"
|
|
38
|
+
|
|
39
|
+
@target_directory = @root_directory + "/target"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
before(:each) do
|
|
43
|
+
FileUtils.mkdir([@root_directory,@source_directory1,@source_directory2,@source_directory3,@target_directory])
|
|
44
|
+
[@file1,@file2,@file3,@file4,@file5a,@file5b].each {|file| IMW::Random.file(file)}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
after(:each) do
|
|
48
|
+
FileUtils.rm_rf @root_directory
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def basenames_of files
|
|
53
|
+
files.map {|file| File.basename file}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it "should raise an error when attempting to copy to a non-existent target directory" do
|
|
57
|
+
FileUtils.rm_rf @target_directory
|
|
58
|
+
lambda { IMW::Rip.from_local_disk(@target_directory,@source_directory1)}.should raise_error(IMW::PathError)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "should copy all files in all directories and paths recursively to the target directory without any hierarchy" do
|
|
62
|
+
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2)
|
|
63
|
+
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(basenames_of([@file1,@file2,@file3,@file4,@file5a]))
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "should accept a block which establishes a hierarchy to be created in the target directory and which skips copying certain files if it returns nil" do
|
|
67
|
+
|
|
68
|
+
# complicated block to copy files to sub-directories of the target
|
|
69
|
+
# directory depending on their extension
|
|
70
|
+
IMW::Rip.from_local_disk(@target_directory,@file1,@source_directory1,@source_directory2) do |path|
|
|
71
|
+
if File.extname(path) == '.txt' then
|
|
72
|
+
File.join('txt',File.basename(path)) # put text files in txt
|
|
73
|
+
elsif File.extname(path) == '.csv' then
|
|
74
|
+
File.join("csv",File.basename(path)) # put csv files in csv
|
|
75
|
+
else
|
|
76
|
+
nil # don't copy other extensions
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# what we would expect to see from that block
|
|
81
|
+
txt = [@file2,@file4].map {|path| File.join("txt",File.basename(path))}
|
|
82
|
+
csv = [@file1,@file3].map {|path| File.join("csv",File.basename(path))}
|
|
83
|
+
|
|
84
|
+
Find.files_relative_to_directory(@target_directory).should match_without_regard_to_order(txt + csv)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# puts "#{File.basename(__FILE__)}: Having found the platter you were looking for, you stare at it, examining your reflection. What a handsome chimp you are!" # at bottom
|