imw 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -1
- data/Rakefile +10 -0
- data/TODO +18 -0
- data/VERSION +1 -1
- data/bin/imw +1 -1
- data/etc/imwrc.rb +0 -50
- data/examples/dataset.rb +12 -0
- data/lib/imw/boot.rb +55 -9
- data/lib/imw/dataset/paths.rb +15 -24
- data/lib/imw/dataset/workflow.rb +131 -72
- data/lib/imw/dataset.rb +94 -186
- data/lib/imw/parsers/html_parser.rb +1 -1
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +3 -27
- data/lib/imw/resource.rb +190 -0
- data/lib/imw/resources/archive.rb +97 -0
- data/lib/imw/resources/archives_and_compressed/bz2.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/gz.rb +18 -0
- data/lib/imw/resources/archives_and_compressed/rar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tar.rb +23 -0
- data/lib/imw/resources/archives_and_compressed/tarbz2.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/targz.rb +78 -0
- data/lib/imw/resources/archives_and_compressed/zip.rb +57 -0
- data/lib/imw/resources/archives_and_compressed.rb +32 -0
- data/lib/imw/resources/compressed_file.rb +89 -0
- data/lib/imw/resources/compressible.rb +77 -0
- data/lib/imw/resources/formats/delimited.rb +92 -0
- data/lib/imw/resources/formats/excel.rb +125 -0
- data/lib/imw/resources/formats/json.rb +53 -0
- data/lib/imw/resources/formats/sgml.rb +72 -0
- data/lib/imw/resources/formats/yaml.rb +53 -0
- data/lib/imw/resources/formats.rb +32 -0
- data/lib/imw/resources/local.rb +198 -0
- data/lib/imw/resources/remote.rb +110 -0
- data/lib/imw/resources/schemes/hdfs.rb +242 -0
- data/lib/imw/resources/schemes/http.rb +161 -0
- data/lib/imw/resources/schemes/s3.rb +137 -0
- data/lib/imw/resources/schemes.rb +19 -0
- data/lib/imw/resources.rb +118 -0
- data/lib/imw/runner.rb +5 -4
- data/lib/imw/transforms/archiver.rb +215 -0
- data/lib/imw/transforms/transferer.rb +103 -0
- data/lib/imw/transforms.rb +8 -0
- data/lib/imw/utils/error.rb +26 -30
- data/lib/imw/utils/extensions/array.rb +5 -15
- data/lib/imw/utils/extensions/hash.rb +6 -16
- data/lib/imw/utils/extensions/hpricot.rb +0 -14
- data/lib/imw/utils/extensions/string.rb +5 -15
- data/lib/imw/utils/extensions/symbol.rb +0 -13
- data/lib/imw/utils/extensions.rb +65 -0
- data/lib/imw/utils/log.rb +14 -13
- data/lib/imw/utils/misc.rb +0 -6
- data/lib/imw/utils/paths.rb +101 -42
- data/lib/imw/utils/version.rb +8 -9
- data/lib/imw/utils.rb +2 -18
- data/lib/imw.rb +92 -17
- data/spec/data/sample.csv +1 -1
- data/spec/data/sample.json +1 -0
- data/spec/data/sample.tsv +1 -1
- data/spec/data/sample.txt +1 -1
- data/spec/data/sample.xml +1 -1
- data/spec/data/sample.yaml +1 -1
- data/spec/imw/dataset/paths_spec.rb +32 -0
- data/spec/imw/dataset/workflow_spec.rb +41 -0
- data/spec/imw/resource_spec.rb +79 -0
- data/spec/imw/resources/archive_spec.rb +69 -0
- data/spec/imw/resources/archives_and_compressed/bz2_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/gz_spec.rb +15 -0
- data/spec/imw/resources/archives_and_compressed/rar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tar_spec.rb +16 -0
- data/spec/imw/resources/archives_and_compressed/tarbz2_spec.rb +24 -0
- data/spec/imw/resources/archives_and_compressed/targz_spec.rb +21 -0
- data/spec/imw/resources/archives_and_compressed/zip_spec.rb +16 -0
- data/spec/imw/resources/compressed_file_spec.rb +48 -0
- data/spec/imw/resources/compressible_spec.rb +36 -0
- data/spec/imw/resources/formats/delimited_spec.rb +33 -0
- data/spec/imw/resources/formats/json_spec.rb +32 -0
- data/spec/imw/resources/formats/sgml_spec.rb +24 -0
- data/spec/imw/resources/formats/yaml_spec.rb +41 -0
- data/spec/imw/resources/local_spec.rb +98 -0
- data/spec/imw/resources/remote_spec.rb +35 -0
- data/spec/imw/resources/schemes/hdfs_spec.rb +61 -0
- data/spec/imw/resources/schemes/http_spec.rb +19 -0
- data/spec/imw/resources/schemes/s3_spec.rb +19 -0
- data/spec/imw/transforms/archiver_spec.rb +120 -0
- data/spec/imw/transforms/transferer_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +5 -33
- data/spec/imw/utils/shared_paths_spec.rb +29 -0
- data/spec/spec_helper.rb +5 -5
- data/spec/support/paths_matcher.rb +67 -0
- data/spec/support/random.rb +39 -36
- metadata +88 -75
- data/lib/imw/dataset/task.rb +0 -41
- data/lib/imw/files/archive.rb +0 -113
- data/lib/imw/files/basicfile.rb +0 -122
- data/lib/imw/files/binary.rb +0 -28
- data/lib/imw/files/compressed_file.rb +0 -93
- data/lib/imw/files/compressed_files_and_archives.rb +0 -334
- data/lib/imw/files/compressible.rb +0 -103
- data/lib/imw/files/csv.rb +0 -113
- data/lib/imw/files/directory.rb +0 -62
- data/lib/imw/files/excel.rb +0 -84
- data/lib/imw/files/json.rb +0 -41
- data/lib/imw/files/sgml.rb +0 -46
- data/lib/imw/files/text.rb +0 -68
- data/lib/imw/files/yaml.rb +0 -46
- data/lib/imw/files.rb +0 -125
- data/lib/imw/packagers/archiver.rb +0 -126
- data/lib/imw/packagers/s3_mover.rb +0 -36
- data/lib/imw/packagers.rb +0 -8
- data/lib/imw/utils/components.rb +0 -61
- data/lib/imw/utils/config.rb +0 -46
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +0 -8
- data/lib/imw/utils/extensions/core.rb +0 -27
- data/lib/imw/utils/extensions/dir.rb +0 -24
- data/lib/imw/utils/extensions/file_core.rb +0 -64
- data/lib/imw/utils/extensions/typed_struct.rb +0 -22
- data/lib/imw/utils/extensions/uri.rb +0 -59
- data/lib/imw/utils/view/dump_csv.rb +0 -112
- data/lib/imw/utils/view/dump_csv_older.rb +0 -117
- data/lib/imw/utils/view.rb +0 -113
- data/spec/imw/dataset/datamapper/uri_spec.rb +0 -43
- data/spec/imw/dataset/datamapper_spec_helper.rb +0 -11
- data/spec/imw/files/archive_spec.rb +0 -118
- data/spec/imw/files/basicfile_spec.rb +0 -121
- data/spec/imw/files/bz2_spec.rb +0 -32
- data/spec/imw/files/compressed_file_spec.rb +0 -96
- data/spec/imw/files/compressible_spec.rb +0 -100
- data/spec/imw/files/file_spec.rb +0 -144
- data/spec/imw/files/gz_spec.rb +0 -32
- data/spec/imw/files/rar_spec.rb +0 -33
- data/spec/imw/files/tar_spec.rb +0 -31
- data/spec/imw/files/text_spec.rb +0 -23
- data/spec/imw/files/zip_spec.rb +0 -31
- data/spec/imw/files_spec.rb +0 -38
- data/spec/imw/packagers/archiver_spec.rb +0 -125
- data/spec/imw/packagers/s3_mover_spec.rb +0 -7
- data/spec/imw/utils/extensions/file_core_spec.rb +0 -72
- data/spec/imw/utils/extensions/find_spec.rb +0 -113
- data/spec/imw/workflow/rip/local_spec.rb +0 -89
- data/spec/imw/workflow/rip_spec.rb +0 -27
- data/spec/support/archive_contents_matcher.rb +0 -94
- data/spec/support/directory_contents_matcher.rb +0 -61
data/lib/imw/utils/log.rb
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
require 'logger'
|
|
2
2
|
|
|
3
3
|
module IMW
|
|
4
|
+
|
|
5
|
+
# Default log file.
|
|
4
6
|
LOG_FILE_DESTINATION = STDERR unless defined?(LOG_FILE_DESTINATION)
|
|
7
|
+
|
|
5
8
|
LOG_TIMEFORMAT = "%Y%m%d-%H:%M:%S " unless defined?(LOG_TIMEFORMAT)
|
|
6
9
|
|
|
7
10
|
class << self; attr_accessor :log end
|
|
8
|
-
|
|
9
|
-
# Create a Logger and point it at LOG_FILE_DESTINATION
|
|
10
|
-
#
|
|
11
|
-
# LOG_FILE_DESTINATION is STDOUT by default; redefine it in your
|
|
12
|
-
# ~/.imwrc, or set IMW.log yourself, if that's not cool.
|
|
13
|
-
#
|
|
11
|
+
|
|
12
|
+
# Create a Logger and point it at IMW::LOG_FILE_DESTINATION which is
|
|
13
|
+
# set in ~/.imwrc and defaults to STDERR.
|
|
14
14
|
def self.instantiate_logger!
|
|
15
15
|
IMW.log ||= Logger.new(LOG_FILE_DESTINATION)
|
|
16
16
|
IMW.log.datetime_format = "%Y%m%d-%H:%M:%S "
|
|
@@ -18,15 +18,19 @@ module IMW
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def announce *events
|
|
21
|
-
options = events.extract_options!
|
|
21
|
+
options = events.flatten.extract_options!
|
|
22
22
|
options.reverse_merge! :level => Logger::INFO
|
|
23
|
-
# puts [options, events ].inspect, "*"*76
|
|
24
23
|
IMW.log.add options[:level], events.join("\n")
|
|
25
24
|
end
|
|
26
25
|
def banner *events
|
|
27
|
-
options = events.extract_options!
|
|
26
|
+
options = events.flatten.extract_options!
|
|
28
27
|
options.reverse_merge! :level => Logger::INFO
|
|
29
|
-
["*"*75, events, "*"*75]
|
|
28
|
+
announce(["*"*75, events, "*"*75], options)
|
|
29
|
+
end
|
|
30
|
+
def warn *events
|
|
31
|
+
options = events.flatten.extract_options!
|
|
32
|
+
options.reverse_merge! :level => Logger::WARN
|
|
33
|
+
announce events, options
|
|
30
34
|
end
|
|
31
35
|
|
|
32
36
|
PROGRESS_TRACKERS = {}
|
|
@@ -61,7 +65,4 @@ module IMW
|
|
|
61
65
|
end
|
|
62
66
|
end
|
|
63
67
|
|
|
64
|
-
#
|
|
65
|
-
# Make the default logger
|
|
66
|
-
#
|
|
67
68
|
IMW.instantiate_logger!
|
data/lib/imw/utils/misc.rb
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
module IMW
|
|
2
|
-
# Return a string representing the current UTC time in the IMW
|
|
3
|
-
# format.
|
|
4
|
-
def self.current_utc_time_string
|
|
5
|
-
Time.now.utc.strftime(IMW::STRFTIME_FORMAT)
|
|
6
|
-
end
|
|
7
|
-
|
|
8
2
|
|
|
9
3
|
# A simple counter. The +value+ and +add+ methods read and
|
|
10
4
|
# increment the counter's value.
|
data/lib/imw/utils/paths.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
require 'pathname'
|
|
2
|
+
|
|
1
3
|
module IMW
|
|
2
4
|
|
|
3
5
|
# Implements methods designed to work with an object's
|
|
@@ -9,19 +11,48 @@ module IMW
|
|
|
9
11
|
# <tt>@paths</tt>.
|
|
10
12
|
module Paths
|
|
11
13
|
|
|
12
|
-
# Expands a shorthand workflow path specification to an
|
|
13
|
-
#
|
|
14
|
+
# Expands a shorthand workflow path specification to an actual
|
|
15
|
+
# file path. Strings are interpreted literally but symbols are
|
|
16
|
+
# first resolved to the paths they represent.
|
|
17
|
+
#
|
|
18
|
+
# add_path :foo, '~/whoa'
|
|
19
|
+
# path_to :foo, 'my_thing'
|
|
20
|
+
# => '~/whoa/my_thing'
|
|
14
21
|
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
# => (...)/data/ripd/gd2.mlb.com/components/game/mlb/year_2008/month_06/day_08/miniscoreboard.xml
|
|
22
|
+
# @param [String, Symbol] pathsegs the path segments to join
|
|
23
|
+
# @return [String] the resulting expanded path
|
|
18
24
|
def path_to *pathsegs
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
+
path = Pathname.new path_to_helper(*pathsegs)
|
|
26
|
+
path.absolute? ? File.expand_path(path) : path.to_s
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Return the presently defined paths for this object.
|
|
30
|
+
#
|
|
31
|
+
# @return [Hash]
|
|
32
|
+
def paths
|
|
33
|
+
@paths ||= {}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Adds a symbolic path for expansion by +path_to+.
|
|
37
|
+
#
|
|
38
|
+
# add_path :foo, '~/whoa'
|
|
39
|
+
# add_path :bar, :foo, 'baz'
|
|
40
|
+
# path_to :bar
|
|
41
|
+
# => '~/whoa/baz'
|
|
42
|
+
#
|
|
43
|
+
# @param [Symbol] sym the name of the path to store
|
|
44
|
+
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
|
45
|
+
# @return [String] the resulting path
|
|
46
|
+
def add_path sym, *pathsegs
|
|
47
|
+
paths[sym] = pathsegs.flatten
|
|
48
|
+
path_to(sym)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Removes a symbolic path for expansion by +path_to+.
|
|
52
|
+
#
|
|
53
|
+
# @param [Symbol] sym the stored path symbol to remove
|
|
54
|
+
def remove_path sym
|
|
55
|
+
paths.delete sym if paths.include? sym
|
|
25
56
|
end
|
|
26
57
|
|
|
27
58
|
private
|
|
@@ -29,7 +60,7 @@ module IMW
|
|
|
29
60
|
# +path_to_helper+ handles the recursive calls for +path_to+.
|
|
30
61
|
expanded = pathsegs.flatten.compact.map do |pathseg|
|
|
31
62
|
case
|
|
32
|
-
when pathseg.is_a?(Symbol) &&
|
|
63
|
+
when pathseg.is_a?(Symbol) && paths.include?(pathseg) then path_to(paths[pathseg])
|
|
33
64
|
when pathseg.is_a?(Symbol) && IMW::PATHS.include?(pathseg) then path_to(IMW::PATHS[pathseg])
|
|
34
65
|
when pathseg.is_a?(Symbol) then raise IMW::PathError.new("No path expansion set for #{pathseg.inspect}")
|
|
35
66
|
else pathseg
|
|
@@ -37,29 +68,70 @@ module IMW
|
|
|
37
68
|
end
|
|
38
69
|
File.join(*expanded)
|
|
39
70
|
end
|
|
40
|
-
|
|
71
|
+
end
|
|
41
72
|
|
|
42
|
-
# Adds a symbolic path for expansion by +path_to+.
|
|
43
|
-
def add_path sym, *pathsegs
|
|
44
|
-
@paths[sym] = pathsegs.flatten
|
|
45
|
-
end
|
|
46
73
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
74
|
+
# Default paths for the IMW. Chosen to make sense on most *NIX
|
|
75
|
+
# distributions.
|
|
76
|
+
DEFAULT_PATHS = {
|
|
77
|
+
:home => ENV['HOME'],
|
|
78
|
+
:data_root => "/var/lib/imw",
|
|
79
|
+
:log_root => "/var/log/imw",
|
|
80
|
+
:scripts_root => "/usr/share/imw",
|
|
81
|
+
:tmp_root => "/tmp/imw",
|
|
82
|
+
|
|
83
|
+
# the imw library
|
|
84
|
+
:imw_root => File.expand_path(File.dirname(__FILE__) + "/../../.."),
|
|
85
|
+
:imw_bin => [:imw_root, 'bin'],
|
|
86
|
+
:imw_etc => [:imw_root, 'etc'],
|
|
87
|
+
:imw_lib => [:imw_root, 'lib'],
|
|
88
|
+
|
|
89
|
+
# workflow
|
|
90
|
+
:ripd_root => [:data_root, 'ripd'],
|
|
91
|
+
:rawd_root => [:data_root, 'rawd'],
|
|
92
|
+
:fixd_root => [:data_root, 'fixd'],
|
|
93
|
+
:pkgd_root => [:data_root, 'pkgd']
|
|
94
|
+
}
|
|
95
|
+
defined?(PATHS) ? PATHS.reverse_merge!(DEFAULT_PATHS) : PATHS = DEFAULT_PATHS
|
|
52
96
|
|
|
97
|
+
# Expands a shorthand workflow path specification to an actual
|
|
98
|
+
# file path. Strings are interpreted literally but symbols are
|
|
99
|
+
# first resolved to the paths they represent.
|
|
100
|
+
#
|
|
101
|
+
# IMW.add_path :foo, '~/whoa'
|
|
102
|
+
# IMW.path_to :foo, 'my_thing'
|
|
103
|
+
# => '~/whoa/my_thing'
|
|
104
|
+
#
|
|
105
|
+
# @param [String, Symbol] pathsegs the path segments to join
|
|
106
|
+
# @return [String] the resulting expanded path
|
|
53
107
|
def self.path_to *pathsegs
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
108
|
+
path = Pathname.new IMW.path_to_helper(*pathsegs)
|
|
109
|
+
path.absolute? ? File.expand_path(path) : path.to_s
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Adds a symbolic path for expansion by +path_to+.
|
|
113
|
+
#
|
|
114
|
+
# IMW.add_path :foo, '~/whoa'
|
|
115
|
+
# IMW.add_path :bar, :foo, 'baz'
|
|
116
|
+
# IMW.path_to :bar
|
|
117
|
+
# => '~/whoa/baz'
|
|
118
|
+
#
|
|
119
|
+
# @param [Symbol] sym the name of the path to store
|
|
120
|
+
# @param [Symbol, String] pathsegs the path segments to use to define the path to the name
|
|
121
|
+
# @return [String] the resulting path
|
|
122
|
+
def self.add_path sym, *pathsegs
|
|
123
|
+
IMW::PATHS[sym] = pathsegs.flatten
|
|
124
|
+
path_to[sym]
|
|
60
125
|
end
|
|
61
126
|
|
|
62
|
-
|
|
127
|
+
# Removes a symbolic path for expansion by +path_to+.
|
|
128
|
+
#
|
|
129
|
+
# @param [Symbol] sym the stored path symbol to remove
|
|
130
|
+
def self.remove_path sym
|
|
131
|
+
IMW::PATHS.delete sym if IMW::PATHS.include? sym
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
protected
|
|
63
135
|
def self.path_to_helper *pathsegs # :nodoc:
|
|
64
136
|
# +path_to_helper+ handles the recursive calls for +path_to+.
|
|
65
137
|
expanded = pathsegs.flatten.compact.map do |pathseg|
|
|
@@ -71,17 +143,4 @@ module IMW
|
|
|
71
143
|
end
|
|
72
144
|
File.join(*expanded)
|
|
73
145
|
end
|
|
74
|
-
public
|
|
75
|
-
|
|
76
|
-
# Adds a symbolic path for expansion by +path_to+.
|
|
77
|
-
def self.add_path sym, *pathsegs
|
|
78
|
-
IMW::PATHS[sym] = pathsegs.flatten
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Removes a symbolic path for expansion by +path_to+.
|
|
82
|
-
def self.remove_path sym
|
|
83
|
-
IMW::PATHS.delete sym if IMW::PATHS.include? sym
|
|
84
|
-
end
|
|
85
146
|
end
|
|
86
|
-
|
|
87
|
-
# puts "#{File.basename(__FILE__)}: Your monkeywrench glows alternately dim then bright as you wander, suggesting to you which paths to take."
|
data/lib/imw/utils/version.rb
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
module IMWVersion #:nodoc:
|
|
1
|
+
module IMW
|
|
3
2
|
unless defined?(VERSION)
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
3
|
+
module VERSION #:nodoc:
|
|
4
|
+
MAJOR = 0
|
|
5
|
+
MINOR = 0
|
|
6
|
+
TINY = 0
|
|
7
|
+
|
|
8
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
|
9
|
+
end
|
|
11
10
|
end
|
|
12
11
|
end
|
data/lib/imw/utils.rb
CHANGED
|
@@ -1,24 +1,8 @@
|
|
|
1
|
-
#
|
|
2
|
-
# h2. lib/imw/utils.rb -- utility functions
|
|
3
|
-
#
|
|
4
|
-
# == About
|
|
5
|
-
#
|
|
6
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
-
# License:: GPL 3.0
|
|
9
|
-
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
-
#
|
|
11
|
-
|
|
12
1
|
require 'rubygems'
|
|
2
|
+
require 'fileutils'
|
|
13
3
|
require 'imw/utils/error'
|
|
14
4
|
require 'imw/utils/log'
|
|
15
|
-
require 'imw/utils/config'
|
|
16
5
|
require 'imw/utils/paths'
|
|
17
6
|
require 'imw/utils/misc'
|
|
18
|
-
require 'imw/utils/
|
|
19
|
-
require 'imw/utils/extensions/core'
|
|
20
|
-
require 'fileutils'
|
|
21
|
-
require 'pathname'
|
|
22
|
-
|
|
7
|
+
require 'imw/utils/extensions'
|
|
23
8
|
|
|
24
|
-
# puts "#{File.basename(__FILE__)}: Early economists thought they would measure the utility of an action in units of `utils'. Really." # at bottom
|
data/lib/imw.rb
CHANGED
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
require 'rubygems'
|
|
2
2
|
require 'imw/boot'
|
|
3
3
|
require 'imw/utils'
|
|
4
|
-
require 'imw/dataset'
|
|
5
|
-
require 'imw/repository'
|
|
6
|
-
require 'imw/files'
|
|
7
|
-
require 'imw/parsers'
|
|
8
|
-
require 'imw/packagers'
|
|
9
4
|
|
|
10
5
|
# The Infinite Monkeywrench (IMW) is a Ruby library for ripping,
|
|
11
6
|
# extracting, parsing, munging, and packaging datasets. It allows you
|
|
@@ -13,19 +8,99 @@ require 'imw/packagers'
|
|
|
13
8
|
# transformations of data as a network of dependencies (a la Make or
|
|
14
9
|
# Rake).
|
|
15
10
|
#
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
# of File.open). These classes do a lot of work to ensure that all
|
|
19
|
-
# objects returned by IMW.open share methods (write, read, load, dump,
|
|
20
|
-
# parse, compress, extract, &c.) while continuing to use existing
|
|
21
|
-
# implementations of these concepts.
|
|
11
|
+
# IMW has a few central concepts: resources, datasets, workflows, and
|
|
12
|
+
# repositories.
|
|
22
13
|
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
14
|
+
# Resources represent individual data resources like local files,
|
|
15
|
+
# websites, databases, &c. Resources are typically instantiated via
|
|
16
|
+
# IMW.open, with IMW doing the work of figuring out what to return
|
|
17
|
+
# based on the URI passed in.
|
|
27
18
|
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
19
|
+
# Datasets represent collections of related data resources. An
|
|
20
|
+
# IMW::Dataset comes with a pre-defined (but customizable) workflow
|
|
21
|
+
# that takes data resources through several steps: rip, parse, munge,
|
|
22
|
+
# and package. The workflow leverages Rake and so the various tasks
|
|
23
|
+
# that are necessary to process the data till it is nice and pretty
|
|
24
|
+
# can all be linked with dependencies.
|
|
25
|
+
#
|
|
26
|
+
# Repositories are collections of datasets and it is on these
|
|
27
|
+
# collections that the +imw+ command line tool operates.
|
|
30
28
|
module IMW
|
|
29
|
+
autoload :Resource, 'imw/resource'
|
|
30
|
+
autoload :Resources, 'imw/resources'
|
|
31
|
+
autoload :Repository, 'imw/repository'
|
|
32
|
+
autoload :Dataset, 'imw/dataset'
|
|
33
|
+
autoload :Transforms, 'imw/transforms'
|
|
34
|
+
autoload :Parsers, 'imw/parsers'
|
|
35
|
+
|
|
36
|
+
# Open a resource at the given +uri+. The resource will
|
|
37
|
+
# automatically be extended by modules which make sense given the
|
|
38
|
+
# +uri+.
|
|
39
|
+
#
|
|
40
|
+
# See the documentation for IMW::Resource and the various modules
|
|
41
|
+
# within IMW::Resources for more information and options.
|
|
42
|
+
#
|
|
43
|
+
# Passing in an IMW::Resource will simply return it.
|
|
44
|
+
#
|
|
45
|
+
# @param [String, Addressable::URI, IMW::Resource] obj the URI to open
|
|
46
|
+
# @return [IMW::Resource] the resulting resource, property extended for the given URI
|
|
47
|
+
def self.open obj, options={}
|
|
48
|
+
return obj if obj.is_a?(IMW::Resource)
|
|
49
|
+
IMW::Resource.new(obj, options)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Works the same way as IMW.open except opens the resource for
|
|
53
|
+
# writing.
|
|
54
|
+
#
|
|
55
|
+
# @param [String, Addressable::URI] uri the URI to open
|
|
56
|
+
# @return [IMW::Resource] the resultng resource, properly extended for the given URI and opened for writing.
|
|
57
|
+
def self.open! uri, options={}
|
|
58
|
+
IMW::Resource.new(uri, options.merge(:mode => 'w'))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# The default repository in which to place datasets. See the
|
|
62
|
+
# documentation for IMW::Repository for more information on how
|
|
63
|
+
# datasets and repositories fit together.
|
|
64
|
+
#
|
|
65
|
+
# @return [IMW::Repository] the default IMW repository
|
|
66
|
+
def self.repository
|
|
67
|
+
@@repository ||= IMW::Repository.new
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Create a dataset and put it in the default IMW repository. Also
|
|
71
|
+
# yields the dataset so you can define its workflow
|
|
72
|
+
#
|
|
73
|
+
# IMW.dataset :my_dataset do
|
|
74
|
+
#
|
|
75
|
+
# # Define some paths we're going to use
|
|
76
|
+
# add_path :raw_data, :ripd, 'raw_data.csv'
|
|
77
|
+
# add_path :fixd_data, :fixd, 'fixed_data.csv'
|
|
78
|
+
#
|
|
79
|
+
# # Copy a file from a website to this dataset's +ripd+ directory.
|
|
80
|
+
# rip do
|
|
81
|
+
# IMW.open('http://mysite.com/data_archives/2010/03/03.csv').cp(path_to(:raw_data))
|
|
82
|
+
# end
|
|
83
|
+
#
|
|
84
|
+
# # Filter the raw data to those values which match some criterion defined by <tt>accept?</tt>
|
|
85
|
+
# munge do
|
|
86
|
+
# IMW.open(path_to(:raw_data)).map do |row|
|
|
87
|
+
# row if accept?(row)
|
|
88
|
+
# end.compact.dump(path_to(:fixd_data))
|
|
89
|
+
# end
|
|
90
|
+
#
|
|
91
|
+
# # Compress this new data
|
|
92
|
+
# package do
|
|
93
|
+
# IMW.open(path_to(:fixd_data)).compress.mv(path_to(:pkgd))
|
|
94
|
+
# end
|
|
95
|
+
# end
|
|
96
|
+
#
|
|
97
|
+
# @param [Symbol, String] handle the handle to identify this dataset with
|
|
98
|
+
# @param [Hash] options a hash of options (see IMW::Dataset)
|
|
99
|
+
# @return [IMW::Dataset] the new dataset
|
|
100
|
+
def self.dataset handle, options={}, &block
|
|
101
|
+
d = IMW::Dataset.new(handle, options)
|
|
102
|
+
d.instance_eval(&block) if block_given?
|
|
103
|
+
d
|
|
104
|
+
end
|
|
105
|
+
|
|
31
106
|
end
|
data/spec/data/sample.csv
CHANGED
|
@@ -81,7 +81,7 @@ ID,Name,Genus,Species
|
|
|
81
81
|
080,Tonkean Macaque,Macaca,tonkeana
|
|
82
82
|
081,Heck's Macaque,Macaca,hecki
|
|
83
83
|
082,Gorontalo Macaque,Macaca,nigrescens
|
|
84
|
-
083,Celebes Crested Macaque or Black
|
|
84
|
+
083,Celebes Crested Macaque or Black Ape,Macaca,nigra
|
|
85
85
|
084,Crab-eating Macaque or Long-tailed Macaque or Kera,Macaca,fascicularis
|
|
86
86
|
085,Stump-tailed Macaque or Bear Macaque,Macaca,arctoides
|
|
87
87
|
086,Rhesus Macaque,Macaca,mulatta
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"monkeys":[{"monkey":{"name":"Gray-bellied Night Monkey","id":1,"genus":"Aotus","species":"lemurinus"}},{"monkey":{"name":"Panamanian Night Monkey","id":2,"genus":"Aotus","species":"zonalis"}},{"monkey":{"name":"Hern\u00e1ndez-Camacho's Night Monkey","id":3,"genus":"Aotus","species":"jorgehernandezi"}},{"monkey":{"name":"Gray-handed Night Monkey","id":4,"genus":"Aotus","species":"griseimembra"}},{"monkey":{"name":"Hershkovitz's Night Monkey","id":5,"genus":"Aotus","species":"hershkovitzi"}},{"monkey":{"name":"Brumback's Night Monkey","id":6,"genus":"Aotus","species":"brumbacki"}},{"monkey":{"name":"Three-striped Night Monkey","id":7,"genus":"Aotus","species":"trivirgatus"}},{"monkey":{"name":"Spix's Night Monkey","id":"008","genus":"Aotus","species":"vociferans"}},{"monkey":{"name":"Malaysian Lar Gibbon","id":"009","genus":"Hylobates","species":"lar lar"}},{"monkey":{"name":"Carpenter's Lar Gibbon","id":8,"genus":"Hylobates","species":"lar carpenteri"}},{"monkey":{"name":"Central Lar Gibbon","id":9,"genus":"Hylobates","species":"lar entelloides"}},{"monkey":{"name":"Sumatran Lar Gibbon","id":10,"genus":"Hylobates","species":"lar vestitus"}},{"monkey":{"name":"Yunnan Lar Gibbon","id":11,"genus":"Hylobates","species":"lar yunnanensis"}},{"monkey":{"name":"Mountain Agile Gibbon","id":12,"genus":"Hylobates","species":"agilis agilis"}},{"monkey":{"name":"Bornean White-bearded Gibbon","id":13,"genus":"Hylobates","species":"agilis albibarbis"}},{"monkey":{"name":"Lowland Agile Gibbon","id":14,"genus":"Hylobates","species":"agilis unko"}},{"monkey":{"name":"M\u00fcller's Gray Gibbon","id":15,"genus":"Hylobates","species":"muelleri muelleri"}},{"monkey":{"name":"Abbott's Gray Gibbon","id":"018","genus":"Hylobates","species":"muelleri abbotti"}},{"monkey":{"name":"Northern Gray Gibbon","id":"019","genus":"Hylobates","species":"muelleri funereus"}},{"monkey":{"name":"Black Tamarin","id":16,"genus":"Saguinas","species":"niger"}},{"monkey":{"name":"Black-mantled Tamarin","id":17,"genus":"Saguinas","species":"nigricollis"}},{"monkey":{"name":"Brown-mantled Tamarin","id":18,"genus":"Saguinas","species":"fuscicollis"}},{"monkey":{"name":"Cottontop Tamarin or Pinch\u00e9 Tamarin","id":19,"genus":"Saguinas","species":"oedipus"}},{"monkey":{"name":"Emperor Tamarin","id":20,"genus":"Saguinas","species":"imperator"}},{"monkey":{"name":"Geoffroy's Tamarin","id":21,"genus":"Saguinas","species":"geoffroyi"}},{"monkey":{"name":"Golden-mantled Tamarin","id":22,"genus":"Saguinas","species":"tripartitus"}},{"monkey":{"name":"Graells's Tamarin","id":23,"genus":"Saguinas","species":"graellsi"}},{"monkey":{"name":"Martins's Tamarin","id":"028","genus":"Saguinas","species":"martinsi"}},{"monkey":{"name":"Mottle-faced Tamarin","id":"029","genus":"Saguinas","species":"inustus"}},{"monkey":{"name":"Moustached Tamarin","id":24,"genus":"Saguinas","species":"mystax"}},{"monkey":{"name":"Pied Tamarin","id":25,"genus":"Saguinas","species":"bicolor"}},{"monkey":{"name":"Red-capped Tamarin","id":26,"genus":"Saguinas","species":"pileatus"}},{"monkey":{"name":"Red-handed Tamarin","id":27,"genus":"Saguinas","species":"midas"}},{"monkey":{"name":"White-footed Tamarin","id":28,"genus":"Saguinas","species":"leucopus"}},{"monkey":{"name":"White-lipped Tamarin","id":29,"genus":"Saguinas","species":"labiatus"}},{"monkey":{"name":"White-mantled Tamarin","id":30,"genus":"Saguinas","species":"melanoleucus"}},{"monkey":{"name":"Allen's Swamp Monkey","id":31,"genus":"Allenopithecus","species":"nigroviridis"}},{"monkey":{"name":"Angolan Talapoin","id":"038","genus":"Miopithecus","species":"talapoin"}},{"monkey":{"name":"Gabon Talapoin","id":"039","genus":"Miopithecus","species":"ogouensis"}},{"monkey":{"name":"Patas Monkey","id":32,"genus":"Erythrocebus","species":"patas"}},{"monkey":{"name":"Green Monkey","id":33,"genus":"Chlorocebus","species":"sabaeus"}},{"monkey":{"name":"Grivet","id":34,"genus":"Chlorocebus","species":"aethiops"}},{"monkey":{"name":"Bale Mountains Vervet","id":35,"genus":"Chlorocebus","species":"djamdjamensis"}},{"monkey":{"name":"Tantalus Monkey","id":36,"genus":"Chlorocebus","species":"tantalus"}},{"monkey":{"name":"Vervet Monkey","id":37,"genus":"Chlorocebus","species":"pygerythrus"}},{"monkey":{"name":"Malbrouck","id":38,"genus":"Chlorocebus","species":"cynosuros"}},{"monkey":{"name":"Dryas Monkey or Salongo Monkey","id":39,"genus":"Cercopithecus","species":"dryas"}},{"monkey":{"name":"Diana Monkey","id":"048","genus":"Cercopithecus","species":"diana"}},{"monkey":{"name":"Roloway Monkey","id":"049","genus":"Cercopithecus","species":"roloway"}},{"monkey":{"name":"Greater Spot-nosed Monkey","id":40,"genus":"Cercopithecus","species":"nictitans"}},{"monkey":{"name":"Blue Monkey","id":41,"genus":"Cercopithecus","species":"mitis"}},{"monkey":{"name":"Silver Monkey","id":42,"genus":"Cercopithecus","species":"doggetti"}},{"monkey":{"name":"Golden Monkey","id":43,"genus":"Cercopithecus","species":"kandti"}},{"monkey":{"name":"Sykes's Monkey","id":44,"genus":"Cercopithecus","species":"albogularis"}},{"monkey":{"name":"Mona Monkey","id":45,"genus":"Cercopithecus","species":"mona"}},{"monkey":{"name":"Campbell's Mona Monkey","id":46,"genus":"Cercopithecus","species":"campbelli"}},{"monkey":{"name":"Lowe's Mona Monkey","id":47,"genus":"Cercopithecus","species":"lowei"}},{"monkey":{"name":"Crested Mona Monkey","id":"058","genus":"Cercopithecus","species":"pogonias"}},{"monkey":{"name":"Wolf's Mona Monkey","id":"059","genus":"Cercopithecus","species":"wolfi"}},{"monkey":{"name":"Dent's Mona Monkey","id":48,"genus":"Cercopithecus","species":"denti"}},{"monkey":{"name":"Lesser Spot-nosed Monkey","id":49,"genus":"Cercopithecus","species":"petaurista"}},{"monkey":{"name":"White-throated Guenon","id":50,"genus":"Cercopithecus","species":"erythrogaster"}},{"monkey":{"name":"Sclater's Guenon","id":51,"genus":"Cercopithecus","species":"sclateri"}},{"monkey":{"name":"Red-eared Guenon","id":52,"genus":"Cercopithecus","species":"erythrotis"}},{"monkey":{"name":"Moustached Guenon","id":53,"genus":"Cercopithecus","species":"cephus"}},{"monkey":{"name":"Red-tailed Monkey","id":54,"genus":"Cercopithecus","species":"ascanius"}},{"monkey":{"name":"L'Hoest's Monkey","id":55,"genus":"Cercopithecus","species":"lhoesti"}},{"monkey":{"name":"Preuss's Monkey","id":"068","genus":"Cercopithecus","species":"preussi"}},{"monkey":{"name":"Sun-tailed Monkey","id":"069","genus":"Cercopithecus","species":"solatus"}},{"monkey":{"name":"Hamlyn's Monkey","id":56,"genus":"Cercopithecus","species":"hamlyni"}},{"monkey":{"name":"De Brazza's Monkey","id":57,"genus":"Cercopithecus","species":"neglectus"}},{"monkey":{"name":"Barbary Macaque","id":58,"genus":"Macaca","species":"sylvanus"}},{"monkey":{"name":"Lion-tailed Macaque","id":59,"genus":"Macaca","species":"silenus"}},{"monkey":{"name":"Southern Pig-tailed Macaque or Beruk","id":60,"genus":"Macaca","species":"nemestrina"}},{"monkey":{"name":"Northern Pig-tailed Macaque","id":61,"genus":"Macaca","species":"leonina"}},{"monkey":{"name":"Pagai Island Macaque or Bokkoi","id":62,"genus":"Macaca","species":"pagensis"}},{"monkey":{"name":"Siberut Macaque","id":63,"genus":"Macaca","species":"siberu"}},{"monkey":{"name":"Moor Macaque","id":"078","genus":"Macaca","species":"maura"}},{"monkey":{"name":"Booted Macaque","id":"079","genus":"Macaca","species":"ochreata"}},{"monkey":{"name":"Tonkean Macaque","id":"080","genus":"Macaca","species":"tonkeana"}},{"monkey":{"name":"Heck's Macaque","id":"081","genus":"Macaca","species":"hecki"}},{"monkey":{"name":"Gorontalo Macaque","id":"082","genus":"Macaca","species":"nigrescens"}},{"monkey":{"name":"Celebes Crested Macaque or Black Ape","id":"083","genus":"Macaca","species":"nigra"}},{"monkey":{"name":"Crab-eating Macaque or Long-tailed Macaque or Kera","id":"084","genus":"Macaca","species":"fascicularis"}},{"monkey":{"name":"Stump-tailed Macaque or Bear Macaque","id":"085","genus":"Macaca","species":"arctoides"}},{"monkey":{"name":"Rhesus Macaque","id":"086","genus":"Macaca","species":"mulatta"}},{"monkey":{"name":"Formosan Rock Macaque","id":"087","genus":"Macaca","species":"cyclopis"}},{"monkey":{"name":"Japanese Macaque","id":"088","genus":"Macaca","species":"fuscata"}},{"monkey":{"name":"Toque Macaque","id":"089","genus":"Macaca","species":"sinica"}},{"monkey":{"name":"Bonnet Macaque","id":"090","genus":"Macaca","species":"radiata"}},{"monkey":{"name":"Assam Macaque","id":"091","genus":"Macaca","species":"assamensis"}},{"monkey":{"name":"Tibetan Macaque or Milne-Edwards' Macaque","id":"092","genus":"Macaca","species":"thibetana"}},{"monkey":{"name":"Arunachal Macaque or Munzala","id":"093","genus":"Macaca","species":"munzala"}},{"monkey":{"name":"Grey-cheeked Mangabey","id":"094","genus":"Lophocebus","species":"albigena"}},{"monkey":{"name":"Black Crested Mangabey","id":"095","genus":"Lophocebus","species":"aterrimus"}},{"monkey":{"name":"Opdenbosch's Mangabey","id":"096","genus":"Lophocebus","species":"opdenboschi"}},{"monkey":{"name":"Uganda Mangabey","id":"097","genus":"Lophocebus","species":"ugandae"}},{"monkey":{"name":"Johnston's Mangabey","id":"098","genus":"Lophocebus","species":"johnstoni"}},{"monkey":{"name":"Osman Hill's Mangabey","id":"099","genus":"Lophocebus","species":"osmani"}},{"monkey":{"name":"Kipunji","id":100,"genus":"Rungwecebus","species":"kipunji"}},{"monkey":{"name":"Hamadryas Baboon","id":101,"genus":"Papio","species":"hamadryas"}},{"monkey":{"name":"Guinea Baboon","id":102,"genus":"Papio","species":"papio"}},{"monkey":{"name":"Olive Baboon","id":103,"genus":"Papio","species":"anubis"}},{"monkey":{"name":"Yellow Baboon","id":104,"genus":"Papio","species":"cynocephalus"}},{"monkey":{"name":"Chacma Baboon","id":105,"genus":"Papio","species":"ursinus"}},{"monkey":{"name":"Gelada","id":106,"genus":"Theropithecus","species":"gelada"}},{"monkey":{"name":"Sooty Mangabey","id":107,"genus":"Cercocebus","species":"atys"}},{"monkey":{"name":"Collared Mangabey","id":108,"genus":"Cercocebus","species":"torquatus"}},{"monkey":{"name":"Agile Mangabey","id":109,"genus":"Cercocebus","species":"agilis"}},{"monkey":{"name":"Golden-bellied Mangabey","id":110,"genus":"Cercocebus","species":"chrysogaster"}},{"monkey":{"name":"Tana River Mangabey","id":111,"genus":"Cercocebus","species":"galeritus"}},{"monkey":{"name":"Sanje Mangabey","id":112,"genus":"Cercocebus","species":"sanjei"}},{"monkey":{"name":"Mandrill","id":113,"genus":"Mandrillus","species":"sphinx"}},{"monkey":{"name":"Drill","id":114,"genus":"Mandrillus","species":"leucophaeus"}},{"monkey":{"name":"Black Colobus","id":115,"genus":"Colobus","species":"satanas"}},{"monkey":{"name":"Angola Colobus","id":116,"genus":"Colobus","species":"angolensis"}},{"monkey":{"name":"King Colobus","id":117,"genus":"Colobus","species":"polykomos"}},{"monkey":{"name":"Ursine Colobus","id":118,"genus":"Colobus","species":"vellerosus"}},{"monkey":{"name":"Mantled Guereza","id":119,"genus":"Colobus","species":"guereza"}},{"monkey":{"name":"Western Red Colobus","id":120,"genus":"Piliocolobus","species":"badius"}},{"monkey":{"name":"Pennant's Colobus","id":121,"genus":"Piliocolobus","species":"pennantii"}},{"monkey":{"name":"Preuss's Red Colobus","id":122,"genus":"Piliocolobus","species":"preussi"}},{"monkey":{"name":"Thollon's Red Colobus","id":123,"genus":"Piliocolobus","species":"tholloni"}},{"monkey":{"name":"Central African Red Colobus","id":124,"genus":"Piliocolobus","species":"foai"}},{"monkey":{"name":"Ugandan Red Colobus","id":125,"genus":"Piliocolobus","species":"tephrosceles"}},{"monkey":{"name":"Uzungwa Red Colobus","id":126,"genus":"Piliocolobus","species":"gordonorum"}},{"monkey":{"name":"Zanzibar Red Colobus","id":127,"genus":"Piliocolobus","species":"kirkii"}},{"monkey":{"name":"Tana River Red Colobus","id":128,"genus":"Piliocolobus","species":"rufomitratus"}},{"monkey":{"name":"Olive Colobus","id":129,"genus":"Procolobus","species":"verus"}},{"monkey":{"name":"Maroon Leaf Monkey","id":130,"genus":"Presbytis","species":"rubicunda"}}]}
|
data/spec/data/sample.tsv
CHANGED
|
@@ -81,7 +81,7 @@ ID Name Genus Species
|
|
|
81
81
|
080 Tonkean Macaque Macaca tonkeana
|
|
82
82
|
081 Heck's Macaque Macaca hecki
|
|
83
83
|
082 Gorontalo Macaque Macaca nigrescens
|
|
84
|
-
083 Celebes Crested Macaque or Black
|
|
84
|
+
083 Celebes Crested Macaque or Black Ape Macaca nigra
|
|
85
85
|
084 Crab-eating Macaque or Long-tailed Macaque or Kera Macaca fascicularis
|
|
86
86
|
085 Stump-tailed Macaque or Bear Macaque Macaca arctoides
|
|
87
87
|
086 Rhesus Macaque Macaca mulatta
|
data/spec/data/sample.txt
CHANGED
|
@@ -81,7 +81,7 @@ ID,Name,Genus,Species
|
|
|
81
81
|
080,Tonkean Macaque,Macaca,tonkeana
|
|
82
82
|
081,Heck's Macaque,Macaca,hecki
|
|
83
83
|
082,Gorontalo Macaque,Macaca,nigrescens
|
|
84
|
-
083,Celebes Crested Macaque or Black
|
|
84
|
+
083,Celebes Crested Macaque or Black Ape,Macaca,nigra
|
|
85
85
|
084,Crab-eating Macaque or Long-tailed Macaque or Kera,Macaca,fascicularis
|
|
86
86
|
085,Stump-tailed Macaque or Bear Macaque,Macaca,arctoides
|
|
87
87
|
086,Rhesus Macaque,Macaca,mulatta
|
data/spec/data/sample.xml
CHANGED
data/spec/data/sample.yaml
CHANGED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
require File.dirname(__FILE__) + "/../utils/shared_paths_spec"
|
|
3
|
+
|
|
4
|
+
describe IMW::Dataset do
|
|
5
|
+
|
|
6
|
+
describe 'setting default paths' do
|
|
7
|
+
|
|
8
|
+
before do
|
|
9
|
+
@dataset = IMW::Dataset.new(:testing, :root => IMWTest::TMP_DIR)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "should set its root path to the value given" do
|
|
13
|
+
@dataset.path_to(:root).should == IMWTest::TMP_DIR
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it "should set paths for each workflow dir" do
|
|
17
|
+
@dataset.workflow_dirs.each do |dir|
|
|
18
|
+
@dataset.path_to(dir).should == File.join(IMWTest::TMP_DIR, dir.to_s)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
before do
|
|
23
|
+
@path_manager = @dataset
|
|
24
|
+
end
|
|
25
|
+
it_should_behave_like "an object that manages paths"
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + "/../../spec_helper"
|
|
2
|
+
require 'imw/dataset/workflow'
|
|
3
|
+
describe IMW::Workflow do
|
|
4
|
+
|
|
5
|
+
before do
|
|
6
|
+
@dataset = IMW::Dataset.new :testing
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
it "should dynamically define methods for each workflow step" do
|
|
10
|
+
@dataset.workflow_steps.each do |step|
|
|
11
|
+
@dataset.respond_to?(step).should be_true
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
describe "initializing workflow" do
|
|
16
|
+
it "should not make any directories if no tasks are invoked" do
|
|
17
|
+
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "should only make directories once a task is invoked" do
|
|
21
|
+
@dataset[:initialize].invoke
|
|
22
|
+
@dataset.path_to(:root).should contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
describe "cleaning workflow directories" do
|
|
27
|
+
it "should clean without error even if there's nothing to clean" do
|
|
28
|
+
@dataset[:clean].invoke
|
|
29
|
+
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "should remove workflow directories when invoked" do
|
|
33
|
+
@dataset[:initialize].invoke
|
|
34
|
+
IMWTest::Random.file(@dataset.path_to(:ripd, 'foobar.txt')) # put a file in
|
|
35
|
+
@dataset[:clean].invoke
|
|
36
|
+
@dataset.path_to(:root).should_not contain(*@dataset.workflow_dirs.map(&:to_s))
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|