imw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
data/lib/imw/utils.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils.rb -- utility functions
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
+
# License:: GPL 3.0
|
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
require 'rubygems'
|
|
13
|
+
require 'imw/utils/error'
|
|
14
|
+
require 'imw/utils/log'
|
|
15
|
+
require 'imw/utils/config'
|
|
16
|
+
require 'imw/utils/paths'
|
|
17
|
+
require 'imw/utils/misc'
|
|
18
|
+
require 'imw/utils/components'
|
|
19
|
+
require 'imw/utils/extensions/core'
|
|
20
|
+
require 'fileutils'
|
|
21
|
+
require 'pathname'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# puts "#{File.basename(__FILE__)}: Early economists thought they would measure the utility of an action in units of `utils'. Really." # at bottom
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/components.rb -- define separate components of IMW
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Defines a hash <tt>IMW::COMPONENTS</tt> which keys component names
|
|
7
|
+
# to the files to be required to implement each component and defines
|
|
8
|
+
# methods to load these files.
|
|
9
|
+
#
|
|
10
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
11
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
12
|
+
# License:: GPL 3.0
|
|
13
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
14
|
+
#
|
|
15
|
+
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
|
16
|
+
|
|
17
|
+
require 'imw/utils/error'
|
|
18
|
+
|
|
19
|
+
module IMW
|
|
20
|
+
|
|
21
|
+
# Defines IMW components and the files required by each. Components
|
|
22
|
+
# can be accessed using <tt>IMW.load_components</tt> or
|
|
23
|
+
# <tt>IMW#imw_components</tt>.
|
|
24
|
+
COMPONENTS = {
|
|
25
|
+
:datamapper => ["imw/dataset/datamapper","imw/dataset/datamapper/time_and_user_stamps"],
|
|
26
|
+
:data_mapper => :datamapper,
|
|
27
|
+
:html_parser => "imw/parsers/html_parser",
|
|
28
|
+
:flat_file_parser => "imw/parsers/flat_file_parser",
|
|
29
|
+
:line_parser => "imw/parsers/line_parser",
|
|
30
|
+
:infochimps => ["imw/infochimps/infochimps_resource","imw/infochimps/icss"]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Load components of IMW as needed,
|
|
34
|
+
#
|
|
35
|
+
# IMW.load_components :datamapper, :flat_file_parser
|
|
36
|
+
def self.load_components *args
|
|
37
|
+
args.each do |component_name|
|
|
38
|
+
begin
|
|
39
|
+
require component_name.to_s
|
|
40
|
+
rescue LoadError
|
|
41
|
+
component = IMW::COMPONENTS[component_name]
|
|
42
|
+
raise IMW::Error.new("#{component_name} is an invalid IMW component. See IMW::COMPONENTS.") unless component
|
|
43
|
+
if component.is_a? Array then
|
|
44
|
+
IMW.load_components *component
|
|
45
|
+
else
|
|
46
|
+
IMW.load_components component
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Load components of IMW as needed,
|
|
53
|
+
#
|
|
54
|
+
# include IMW
|
|
55
|
+
# imw_components :datamapper, :flat_file_parser
|
|
56
|
+
def imw_components *args
|
|
57
|
+
IMW.load_components *args
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/config.rb -- configuration parsing
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# IMW looks for configuration settings in the following places, in
|
|
7
|
+
# order of increasing precedence:
|
|
8
|
+
#
|
|
9
|
+
# 1. Settings defined directly in this file.
|
|
10
|
+
#
|
|
11
|
+
# 2. From the <tt>etc/imwrc</tt> file in the IMW root directory.
|
|
12
|
+
#
|
|
13
|
+
# 3. From the <tt>.imwrc</tt> file in the user's home directory (the
|
|
14
|
+
# filename can be changed; see
|
|
15
|
+
# <tt>IMW::Config::USER_CONFIG_FILE_BASENAME</tt>).
|
|
16
|
+
#
|
|
17
|
+
# 4. From the file defined by the environment variable +IMWRC+ (the
|
|
18
|
+
# value can be changed; see
|
|
19
|
+
# <tt>IMW::Config::USER_CONFIG_FILE_ENV_VARIABLE</tt>
|
|
20
|
+
#
|
|
21
|
+
# Settings not found in one configuration location will be searched
|
|
22
|
+
# for in locations of lesser precedence.
|
|
23
|
+
#
|
|
24
|
+
# *Note:* configuration files are plain Ruby code that will be directly
|
|
25
|
+
# evaluated.
|
|
26
|
+
#
|
|
27
|
+
# Relevant settings include
|
|
28
|
+
#
|
|
29
|
+
# * interfaces with external programs (+tar+, +wget+, &c.)
|
|
30
|
+
# * paths to directories where IMW reads/writes files
|
|
31
|
+
# * correspondences between file extensions and IMW file classes
|
|
32
|
+
#
|
|
33
|
+
# For more detailed information, see the default configuration file,
|
|
34
|
+
# <tt>etc/imwrc</tt>.
|
|
35
|
+
#
|
|
36
|
+
#
|
|
37
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
38
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
39
|
+
# License:: GPL 3.0
|
|
40
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
41
|
+
#
|
|
42
|
+
|
|
43
|
+
module IMW
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# puts "#{File.basename(__FILE__)}: You carefully adjust the settings on your Monkeywrench. Glob-monsters: beware!!" # at bottom
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/error -- errors
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Error objects for IMW.
|
|
7
|
+
#
|
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
+
# License:: GPL 3.0
|
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
+
#
|
|
13
|
+
|
|
14
|
+
module IMW
|
|
15
|
+
|
|
16
|
+
# A generic error class.
|
|
17
|
+
class Error < StandardError
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class TypeError < TypeError
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
class ArgumentError < ArgumentError
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class NotImplementedError < NotImplementedError
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class ParseError < Error
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# An error meant to be used when a system call goes awry. It will
|
|
33
|
+
# report exit status and the process id of the offending call.
|
|
34
|
+
class SystemCallError < IMW::Error
|
|
35
|
+
|
|
36
|
+
def initialize(message)
|
|
37
|
+
@message = message
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def display
|
|
41
|
+
"(error code: #{$?.exitstatus}, pid: #{$?.pid}) #{@message}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def to_s
|
|
45
|
+
"(error code: #{$?.exitstatus}, pid: #{$?.pid}) #{@message}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# A error for improperly specified, inappropriate, or broken paths.
|
|
51
|
+
class PathError < IMW::Error
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/extensions/array.rb -- array extensions
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Extensions to the +Array+ class.
|
|
7
|
+
#
|
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
+
# License:: GPL 3.0
|
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
+
#
|
|
13
|
+
require 'active_support/core_ext/array/extract_options'
|
|
14
|
+
class Array #:nodoc:
|
|
15
|
+
include ActiveSupport::CoreExtensions::Array::ExtractOptions
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Array
|
|
19
|
+
|
|
20
|
+
# Return all but the last element
|
|
21
|
+
# This will be [] for both an empty array and a length-1 array
|
|
22
|
+
def most() self[0..-2] end
|
|
23
|
+
|
|
24
|
+
# Return all but the first element.
|
|
25
|
+
# This will be nil for an empty array and [] for a length-1 array
|
|
26
|
+
def rest() self[1..-1] end
|
|
27
|
+
|
|
28
|
+
# 'Un'-zip()s an array. Returns an array of arrays: the first array has the
|
|
29
|
+
# first element of each member, the second array has the second element of
|
|
30
|
+
# each member, and so on. Returns as many arrays as the first element in self
|
|
31
|
+
# and inserts a nil where the member array wasn't long enough.
|
|
32
|
+
#
|
|
33
|
+
# foo, bar = foo.zip(bar).unzip should leave foo and bar with the same values
|
|
34
|
+
# if foo and bar have the same length.
|
|
35
|
+
#
|
|
36
|
+
# Will fail on a not-array-of-arrays.
|
|
37
|
+
def unzip()
|
|
38
|
+
# An array of empty arrays, one for each vertical slot
|
|
39
|
+
vslices = self[0].map{ Array.new }
|
|
40
|
+
self.each do |hslice|
|
|
41
|
+
# push the elements of each array onto its slice.
|
|
42
|
+
vslices.zip(hslice).map{|vslice,h_el| vslice << h_el }
|
|
43
|
+
end
|
|
44
|
+
vslices
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Return a random element of this array.
|
|
48
|
+
def random_element
|
|
49
|
+
self[rand(self.length) - 1]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# convert an assoc (list of [key, val, [...]]'s) to a hash
|
|
53
|
+
def to_openstruct
|
|
54
|
+
mapped = {}
|
|
55
|
+
each{ |key,value| mapped[key] = value.to_openstruct }
|
|
56
|
+
OpenStruct.new(mapped)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Return the elements of this array in a pretty-printed string,
|
|
60
|
+
# inserting +final_string+ between the last two items.
|
|
61
|
+
#
|
|
62
|
+
# >> [:one, :two, :three].quote_items_with "or"
|
|
63
|
+
# `one', `two', or `three'
|
|
64
|
+
#
|
|
65
|
+
def quote_items_with final_string = nil
|
|
66
|
+
string_items = self.map { |item| "`" + item.to_s + "'" }
|
|
67
|
+
case string_items.length
|
|
68
|
+
when 0
|
|
69
|
+
""
|
|
70
|
+
when 1
|
|
71
|
+
string_items.first
|
|
72
|
+
when 2
|
|
73
|
+
if final_string then
|
|
74
|
+
string_items.join(" #{final_string} ")
|
|
75
|
+
else
|
|
76
|
+
string_items.join(', ')
|
|
77
|
+
end
|
|
78
|
+
else
|
|
79
|
+
string = string_items[0,string_items.length - 1].join ', '
|
|
80
|
+
if final_string then
|
|
81
|
+
string += ', ' + final_string + ' ' + string_items.last
|
|
82
|
+
else
|
|
83
|
+
string += ', ' + string_items.last
|
|
84
|
+
end
|
|
85
|
+
string
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def in_groups_of(number, fill_with = nil, &block)
|
|
90
|
+
require 'enumerator'
|
|
91
|
+
collection = dup
|
|
92
|
+
collection << fill_with until collection.size.modulo(number).zero?
|
|
93
|
+
collection.each_slice(number, &block)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Returns a single hash containing the merge of all hashes in this
|
|
97
|
+
# array. This is useful when dealing with badly written YAML files.
|
|
98
|
+
# Only merges hashes at depth zero, i.e. - this isn't recursive.
|
|
99
|
+
def merge_hashes
|
|
100
|
+
merged_hash = {}
|
|
101
|
+
self.each do |element|
|
|
102
|
+
merged_hash.merge!(element) if element.is_a?(Hash)
|
|
103
|
+
end
|
|
104
|
+
merged_hash
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Recurses through the elements of this Array collecting all String
|
|
108
|
+
# or Symbol "terminal" nodes.
|
|
109
|
+
def terminals &block
|
|
110
|
+
terminals = []
|
|
111
|
+
each do |element|
|
|
112
|
+
if element.respond_to? :terminals then
|
|
113
|
+
terminals += element.terminals
|
|
114
|
+
else
|
|
115
|
+
terminals << element
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
terminals.map! {|terminal| yield terminal } if block
|
|
119
|
+
terminals
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# puts "#{File.basename(__FILE__)}: I have a loooong list of complaints. Firstly, ..." # at bottom
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# :nodoc:
|
|
2
|
+
# for when cattr_accessor is all you need
|
|
3
|
+
#
|
|
4
|
+
require 'active_support/core_ext/array/extract_options'
|
|
5
|
+
class Array #:nodoc:
|
|
6
|
+
include ActiveSupport::CoreExtensions::Array::ExtractOptions
|
|
7
|
+
end
|
|
8
|
+
require 'active_support/core_ext/class/attribute_accessors'
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/extensions/core.rb -- extensions to the Ruby core
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Some useful extensions to basic Ruby classes. This file is required
|
|
7
|
+
# by <tt>imw/utils</tt> so any files required here are automatically
|
|
8
|
+
# required when loading IMW.
|
|
9
|
+
#
|
|
10
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
11
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
12
|
+
# License:: GPL 3.0
|
|
13
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
14
|
+
#
|
|
15
|
+
# puts "#{File.basename(__FILE__)}: Your monkeywrench does a complicated series of core-burning exercises and emerges with ripped, powerful-looking abs."
|
|
16
|
+
|
|
17
|
+
require 'imw/utils/extensions/string'
|
|
18
|
+
require 'imw/utils/extensions/array'
|
|
19
|
+
require 'imw/utils/extensions/hash'
|
|
20
|
+
require 'imw/utils/extensions/dir'
|
|
21
|
+
require 'imw/utils/extensions/struct'
|
|
22
|
+
require 'imw/utils/extensions/symbol'
|
|
23
|
+
require 'imw/utils/extensions/file_core'
|
|
24
|
+
require 'active_support/core_ext/module/aliasing'
|
|
25
|
+
require 'active_support/core_ext/object/blank'
|
|
26
|
+
require 'active_support/core_ext/object/misc'
|
|
27
|
+
#require 'active_support/core_ext/blank.rb'
|
|
28
|
+
require 'imw/utils/extensions/class/attribute_accessors'
|
|
29
|
+
# require 'ostruct'
|
|
30
|
+
require 'set'
|
|
31
|
+
|
|
32
|
+
module IMW
|
|
33
|
+
# A replacement for the standard system call which raises an
|
|
34
|
+
# IMW::SystemCallError if the command fails as well as printing the
|
|
35
|
+
# command appended to the end of <tt>error_message</tt>.
|
|
36
|
+
def self.system *commands
|
|
37
|
+
command = commands.flatten.join ' '
|
|
38
|
+
Kernel.system(command)
|
|
39
|
+
raise IMW::SystemCallError.new(command) unless $?.success?
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/extensions/dir.rb -- directory extensions
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# The Ruby +Dir+ module is rubbish. Time to clean it up a bit!
|
|
7
|
+
#
|
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
10
|
+
# License:: GPL 3.0
|
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
12
|
+
#
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Dir
|
|
16
|
+
|
|
17
|
+
# Return the absolute paths of files and directories in the
|
|
18
|
+
# directory, leaving out `.' and `..' entries.
|
|
19
|
+
def abs_contents
|
|
20
|
+
self.entries.map {|entry| File.join(self.path,entry) unless entry == '.' || entry == '..'}.compact
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# puts "#{File.basename(__FILE__)}: You open the folder and see along list of names. Some have been crossed out -- ominously..." # at bottom
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#
|
|
2
|
+
# h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
|
|
3
|
+
#
|
|
4
|
+
# == About
|
|
5
|
+
#
|
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
|
8
|
+
# License:: GPL 3.0
|
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
require 'imw/utils/error'
|
|
13
|
+
require 'imw/utils/config'
|
|
14
|
+
require 'imw/utils/extensions/string'
|
|
15
|
+
|
|
16
|
+
class File
|
|
17
|
+
|
|
18
|
+
# Returns the name of the path given:
|
|
19
|
+
#
|
|
20
|
+
# File.name_of_file("/path/to/somefile.txt") => "somefile".
|
|
21
|
+
def self.name_of_file path
|
|
22
|
+
basename(path)[0,basename(path).length - extname(path).length]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns what would be the handle of a source or dataset
|
|
26
|
+
# described by a file at +path+:
|
|
27
|
+
#
|
|
28
|
+
# File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
|
|
29
|
+
def self.handle path
|
|
30
|
+
File.basename(path).split('.').first.handle
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns a unique (non-existing) version of the given +path+ by
|
|
34
|
+
# appending successive intgers, useful for copying files ito
|
|
35
|
+
# directories without clobbering existing files (a la <tt>wget
|
|
36
|
+
# -nc</tt>).
|
|
37
|
+
#
|
|
38
|
+
# In a directory <tt>/path/to</tt> without a file named
|
|
39
|
+
# <tt>data.txt</tt>
|
|
40
|
+
#
|
|
41
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
|
|
42
|
+
#
|
|
43
|
+
# If <tt>data.txt</tt> were to already exist in that directory, then
|
|
44
|
+
#
|
|
45
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
|
|
46
|
+
#
|
|
47
|
+
# If <tt>data.txt.1</tt> were to already exist then
|
|
48
|
+
#
|
|
49
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
|
|
50
|
+
#
|
|
51
|
+
# and so on.
|
|
52
|
+
def self.uniquify path
|
|
53
|
+
orig_path = path.clone
|
|
54
|
+
copy_number = 1
|
|
55
|
+
while exist? path do
|
|
56
|
+
path = orig_path + ".#{copy_number}"
|
|
57
|
+
copy_number += 1
|
|
58
|
+
end
|
|
59
|
+
path
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
|