imw 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +15 -0
- data/CHANGELOG +0 -0
- data/LICENSE +674 -0
- data/README.rdoc +101 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/etc/imwrc.rb +76 -0
- data/lib/imw.rb +42 -0
- data/lib/imw/boot.rb +58 -0
- data/lib/imw/dataset.rb +233 -0
- data/lib/imw/dataset/datamapper.rb +66 -0
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +37 -0
- data/lib/imw/dataset/loaddump.rb +50 -0
- data/lib/imw/dataset/old/file_collection.rb +88 -0
- data/lib/imw/dataset/old/file_collection_utils.rb +71 -0
- data/lib/imw/dataset/scaffold.rb +132 -0
- data/lib/imw/dataset/scraped_uri.rb +305 -0
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +87 -0
- data/lib/imw/dataset/scrub/scrub.rb +147 -0
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +38 -0
- data/lib/imw/dataset/scrub/scrub_test.rb +60 -0
- data/lib/imw/dataset/scrub/slug.rb +101 -0
- data/lib/imw/dataset/stats.rb +73 -0
- data/lib/imw/dataset/stats/counter.rb +23 -0
- data/lib/imw/dataset/task.rb +38 -0
- data/lib/imw/dataset/workflow.rb +81 -0
- data/lib/imw/files.rb +110 -0
- data/lib/imw/files/archive.rb +113 -0
- data/lib/imw/files/basicfile.rb +122 -0
- data/lib/imw/files/binary.rb +28 -0
- data/lib/imw/files/compressed_file.rb +93 -0
- data/lib/imw/files/compressed_files_and_archives.rb +348 -0
- data/lib/imw/files/compressible.rb +103 -0
- data/lib/imw/files/csv.rb +112 -0
- data/lib/imw/files/json.rb +41 -0
- data/lib/imw/files/sgml.rb +65 -0
- data/lib/imw/files/text.rb +68 -0
- data/lib/imw/files/yaml.rb +46 -0
- data/lib/imw/packagers.rb +8 -0
- data/lib/imw/packagers/archiver.rb +108 -0
- data/lib/imw/packagers/s3_mover.rb +28 -0
- data/lib/imw/parsers.rb +7 -0
- data/lib/imw/parsers/html_parser.rb +382 -0
- data/lib/imw/parsers/html_parser/matchers.rb +306 -0
- data/lib/imw/parsers/line_parser.rb +87 -0
- data/lib/imw/parsers/regexp_parser.rb +72 -0
- data/lib/imw/utils.rb +24 -0
- data/lib/imw/utils/components.rb +61 -0
- data/lib/imw/utils/config.rb +46 -0
- data/lib/imw/utils/error.rb +54 -0
- data/lib/imw/utils/extensions/array.rb +125 -0
- data/lib/imw/utils/extensions/class/attribute_accessors.rb +8 -0
- data/lib/imw/utils/extensions/core.rb +43 -0
- data/lib/imw/utils/extensions/dir.rb +24 -0
- data/lib/imw/utils/extensions/file_core.rb +64 -0
- data/lib/imw/utils/extensions/hash.rb +218 -0
- data/lib/imw/utils/extensions/hpricot.rb +48 -0
- data/lib/imw/utils/extensions/string.rb +49 -0
- data/lib/imw/utils/extensions/struct.rb +42 -0
- data/lib/imw/utils/extensions/symbol.rb +28 -0
- data/lib/imw/utils/extensions/typed_struct.rb +22 -0
- data/lib/imw/utils/extensions/uri.rb +59 -0
- data/lib/imw/utils/log.rb +67 -0
- data/lib/imw/utils/misc.rb +63 -0
- data/lib/imw/utils/paths.rb +115 -0
- data/lib/imw/utils/uri.rb +59 -0
- data/lib/imw/utils/uuid.rb +33 -0
- data/lib/imw/utils/validate.rb +38 -0
- data/lib/imw/utils/version.rb +12 -0
- data/lib/imw/utils/view.rb +113 -0
- data/lib/imw/utils/view/dump_csv.rb +112 -0
- data/lib/imw/utils/view/dump_csv_older.rb +117 -0
- data/spec/data/sample.csv +131 -0
- data/spec/data/sample.tsv +131 -0
- data/spec/data/sample.txt +131 -0
- data/spec/data/sample.xml +653 -0
- data/spec/data/sample.yaml +652 -0
- data/spec/imw/dataset/datamapper/uri_spec.rb +43 -0
- data/spec/imw/dataset/datamapper_spec_helper.rb +11 -0
- data/spec/imw/files/archive_spec.rb +118 -0
- data/spec/imw/files/basicfile_spec.rb +121 -0
- data/spec/imw/files/bz2_spec.rb +32 -0
- data/spec/imw/files/compressed_file_spec.rb +96 -0
- data/spec/imw/files/compressible_spec.rb +100 -0
- data/spec/imw/files/file_spec.rb +144 -0
- data/spec/imw/files/gz_spec.rb +32 -0
- data/spec/imw/files/rar_spec.rb +33 -0
- data/spec/imw/files/tar_spec.rb +31 -0
- data/spec/imw/files/text_spec.rb +23 -0
- data/spec/imw/files/zip_spec.rb +31 -0
- data/spec/imw/files_spec.rb +38 -0
- data/spec/imw/packagers/archiver_spec.rb +125 -0
- data/spec/imw/packagers/s3_mover_spec.rb +7 -0
- data/spec/imw/parsers/line_parser_spec.rb +96 -0
- data/spec/imw/parsers/regexp_parser_spec.rb +42 -0
- data/spec/imw/utils/extensions/file_core_spec.rb +72 -0
- data/spec/imw/utils/extensions/find_spec.rb +113 -0
- data/spec/imw/utils/paths_spec.rb +38 -0
- data/spec/imw/workflow/rip/local_spec.rb +89 -0
- data/spec/imw/workflow/rip_spec.rb +27 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/archive_contents_matcher.rb +94 -0
- data/spec/support/custom_matchers.rb +21 -0
- data/spec/support/directory_contents_matcher.rb +61 -0
- data/spec/support/extensions.rb +18 -0
- data/spec/support/file_contents_matcher.rb +50 -0
- data/spec/support/random.rb +210 -0
- data/spec/support/without_regard_to_order_matcher.rb +58 -0
- metadata +196 -0
data/lib/imw/utils.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils.rb -- utility functions
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
+
# License:: GPL 3.0
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'imw/utils/error'
|
14
|
+
require 'imw/utils/log'
|
15
|
+
require 'imw/utils/config'
|
16
|
+
require 'imw/utils/paths'
|
17
|
+
require 'imw/utils/misc'
|
18
|
+
require 'imw/utils/components'
|
19
|
+
require 'imw/utils/extensions/core'
|
20
|
+
require 'fileutils'
|
21
|
+
require 'pathname'
|
22
|
+
|
23
|
+
|
24
|
+
# puts "#{File.basename(__FILE__)}: Early economists thought they would measure the utility of an action in units of `utils'. Really." # at bottom
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/components.rb -- define separate components of IMW
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Defines a hash <tt>IMW::COMPONENTS</tt> which keys component names
|
7
|
+
# to the files to be required to implement each component and defines
|
8
|
+
# methods to load these files.
|
9
|
+
#
|
10
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
11
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
12
|
+
# License:: GPL 3.0
|
13
|
+
# Website:: http://infinitemonkeywrench.org/
|
14
|
+
#
|
15
|
+
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
16
|
+
|
17
|
+
require 'imw/utils/error'
|
18
|
+
|
19
|
+
module IMW
|
20
|
+
|
21
|
+
# Defines IMW components and the files required by each. Components
|
22
|
+
# can be accessed using <tt>IMW.load_components</tt> or
|
23
|
+
# <tt>IMW#imw_components</tt>.
|
24
|
+
COMPONENTS = {
|
25
|
+
:datamapper => ["imw/dataset/datamapper","imw/dataset/datamapper/time_and_user_stamps"],
|
26
|
+
:data_mapper => :datamapper,
|
27
|
+
:html_parser => "imw/parsers/html_parser",
|
28
|
+
:flat_file_parser => "imw/parsers/flat_file_parser",
|
29
|
+
:line_parser => "imw/parsers/line_parser",
|
30
|
+
:infochimps => ["imw/infochimps/infochimps_resource","imw/infochimps/icss"]
|
31
|
+
}
|
32
|
+
|
33
|
+
# Load components of IMW as needed,
|
34
|
+
#
|
35
|
+
# IMW.load_components :datamapper, :flat_file_parser
|
36
|
+
def self.load_components *args
|
37
|
+
args.each do |component_name|
|
38
|
+
begin
|
39
|
+
require component_name.to_s
|
40
|
+
rescue LoadError
|
41
|
+
component = IMW::COMPONENTS[component_name]
|
42
|
+
raise IMW::Error.new("#{component_name} is an invalid IMW component. See IMW::COMPONENTS.") unless component
|
43
|
+
if component.is_a? Array then
|
44
|
+
IMW.load_components *component
|
45
|
+
else
|
46
|
+
IMW.load_components component
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Load components of IMW as needed,
|
53
|
+
#
|
54
|
+
# include IMW
|
55
|
+
# imw_components :datamapper, :flat_file_parser
|
56
|
+
def imw_components *args
|
57
|
+
IMW.load_components *args
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/config.rb -- configuration parsing
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# IMW looks for configuration settings in the following places, in
|
7
|
+
# order of increasing precedence:
|
8
|
+
#
|
9
|
+
# 1. Settings defined directly in this file.
|
10
|
+
#
|
11
|
+
# 2. From the <tt>etc/imwrc</tt> file in the IMW root directory.
|
12
|
+
#
|
13
|
+
# 3. From the <tt>.imwrc</tt> file in the user's home directory (the
|
14
|
+
# filename can be changed; see
|
15
|
+
# <tt>IMW::Config::USER_CONFIG_FILE_BASENAME</tt>).
|
16
|
+
#
|
17
|
+
# 4. From the file defined by the environment variable +IMWRC+ (the
|
18
|
+
# value can be changed; see
|
19
|
+
# <tt>IMW::Config::USER_CONFIG_FILE_ENV_VARIABLE</tt>
|
20
|
+
#
|
21
|
+
# Settings not found in one configuration location will be searched
|
22
|
+
# for in locations of lesser precedence.
|
23
|
+
#
|
24
|
+
# *Note:* configuration files are plain Ruby code that will be directly
|
25
|
+
# evaluated.
|
26
|
+
#
|
27
|
+
# Relevant settings include
|
28
|
+
#
|
29
|
+
# * interfaces with external programs (+tar+, +wget+, &c.)
|
30
|
+
# * paths to directories where IMW reads/writes files
|
31
|
+
# * correspondences between file extensions and IMW file classes
|
32
|
+
#
|
33
|
+
# For more detailed information, see the default configuration file,
|
34
|
+
# <tt>etc/imwrc</tt>.
|
35
|
+
#
|
36
|
+
#
|
37
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
38
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
39
|
+
# License:: GPL 3.0
|
40
|
+
# Website:: http://infinitemonkeywrench.org/
|
41
|
+
#
|
42
|
+
|
43
|
+
module IMW
|
44
|
+
end
|
45
|
+
|
46
|
+
# puts "#{File.basename(__FILE__)}: You carefully adjust the settings on your Monkeywrench. Glob-monsters: beware!!" # at bottom
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/error -- errors
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Error objects for IMW.
|
7
|
+
#
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
+
# License:: GPL 3.0
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
12
|
+
#
|
13
|
+
|
14
|
+
module IMW
|
15
|
+
|
16
|
+
# A generic error class.
|
17
|
+
class Error < StandardError
|
18
|
+
end
|
19
|
+
|
20
|
+
class TypeError < TypeError
|
21
|
+
end
|
22
|
+
|
23
|
+
class ArgumentError < ArgumentError
|
24
|
+
end
|
25
|
+
|
26
|
+
class NotImplementedError < NotImplementedError
|
27
|
+
end
|
28
|
+
|
29
|
+
class ParseError < Error
|
30
|
+
end
|
31
|
+
|
32
|
+
# An error meant to be used when a system call goes awry. It will
|
33
|
+
# report exit status and the process id of the offending call.
|
34
|
+
class SystemCallError < IMW::Error
|
35
|
+
|
36
|
+
def initialize(message)
|
37
|
+
@message = message
|
38
|
+
end
|
39
|
+
|
40
|
+
def display
|
41
|
+
"(error code: #{$?.exitstatus}, pid: #{$?.pid}) #{@message}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
"(error code: #{$?.exitstatus}, pid: #{$?.pid}) #{@message}"
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
# A error for improperly specified, inappropriate, or broken paths.
|
51
|
+
class PathError < IMW::Error
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/extensions/array.rb -- array extensions
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Extensions to the +Array+ class.
|
7
|
+
#
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
+
# License:: GPL 3.0
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
12
|
+
#
|
13
|
+
require 'active_support/core_ext/array/extract_options'
|
14
|
+
class Array #:nodoc:
|
15
|
+
include ActiveSupport::CoreExtensions::Array::ExtractOptions
|
16
|
+
end
|
17
|
+
|
18
|
+
class Array
|
19
|
+
|
20
|
+
# Return all but the last element
|
21
|
+
# This will be [] for both an empty array and a length-1 array
|
22
|
+
def most() self[0..-2] end
|
23
|
+
|
24
|
+
# Return all but the first element.
|
25
|
+
# This will be nil for an empty array and [] for a length-1 array
|
26
|
+
def rest() self[1..-1] end
|
27
|
+
|
28
|
+
# 'Un'-zip()s an array. Returns an array of arrays: the first array has the
|
29
|
+
# first element of each member, the second array has the second element of
|
30
|
+
# each member, and so on. Returns as many arrays as the first element in self
|
31
|
+
# and inserts a nil where the member array wasn't long enough.
|
32
|
+
#
|
33
|
+
# foo, bar = foo.zip(bar).unzip should leave foo and bar with the same values
|
34
|
+
# if foo and bar have the same length.
|
35
|
+
#
|
36
|
+
# Will fail on a not-array-of-arrays.
|
37
|
+
def unzip()
|
38
|
+
# An array of empty arrays, one for each vertical slot
|
39
|
+
vslices = self[0].map{ Array.new }
|
40
|
+
self.each do |hslice|
|
41
|
+
# push the elements of each array onto its slice.
|
42
|
+
vslices.zip(hslice).map{|vslice,h_el| vslice << h_el }
|
43
|
+
end
|
44
|
+
vslices
|
45
|
+
end
|
46
|
+
|
47
|
+
# Return a random element of this array.
|
48
|
+
def random_element
|
49
|
+
self[rand(self.length) - 1]
|
50
|
+
end
|
51
|
+
|
52
|
+
# convert an assoc (list of [key, val, [...]]'s) to a hash
|
53
|
+
def to_openstruct
|
54
|
+
mapped = {}
|
55
|
+
each{ |key,value| mapped[key] = value.to_openstruct }
|
56
|
+
OpenStruct.new(mapped)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Return the elements of this array in a pretty-printed string,
|
60
|
+
# inserting +final_string+ between the last two items.
|
61
|
+
#
|
62
|
+
# >> [:one, :two, :three].quote_items_with "or"
|
63
|
+
# `one', `two', or `three'
|
64
|
+
#
|
65
|
+
def quote_items_with final_string = nil
|
66
|
+
string_items = self.map { |item| "`" + item.to_s + "'" }
|
67
|
+
case string_items.length
|
68
|
+
when 0
|
69
|
+
""
|
70
|
+
when 1
|
71
|
+
string_items.first
|
72
|
+
when 2
|
73
|
+
if final_string then
|
74
|
+
string_items.join(" #{final_string} ")
|
75
|
+
else
|
76
|
+
string_items.join(', ')
|
77
|
+
end
|
78
|
+
else
|
79
|
+
string = string_items[0,string_items.length - 1].join ', '
|
80
|
+
if final_string then
|
81
|
+
string += ', ' + final_string + ' ' + string_items.last
|
82
|
+
else
|
83
|
+
string += ', ' + string_items.last
|
84
|
+
end
|
85
|
+
string
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def in_groups_of(number, fill_with = nil, &block)
|
90
|
+
require 'enumerator'
|
91
|
+
collection = dup
|
92
|
+
collection << fill_with until collection.size.modulo(number).zero?
|
93
|
+
collection.each_slice(number, &block)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns a single hash containing the merge of all hashes in this
|
97
|
+
# array. This is useful when dealing with badly written YAML files.
|
98
|
+
# Only merges hashes at depth zero, i.e. - this isn't recursive.
|
99
|
+
def merge_hashes
|
100
|
+
merged_hash = {}
|
101
|
+
self.each do |element|
|
102
|
+
merged_hash.merge!(element) if element.is_a?(Hash)
|
103
|
+
end
|
104
|
+
merged_hash
|
105
|
+
end
|
106
|
+
|
107
|
+
# Recurses through the elements of this Array collecting all String
|
108
|
+
# or Symbol "terminal" nodes.
|
109
|
+
def terminals &block
|
110
|
+
terminals = []
|
111
|
+
each do |element|
|
112
|
+
if element.respond_to? :terminals then
|
113
|
+
terminals += element.terminals
|
114
|
+
else
|
115
|
+
terminals << element
|
116
|
+
end
|
117
|
+
end
|
118
|
+
terminals.map! {|terminal| yield terminal } if block
|
119
|
+
terminals
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
# puts "#{File.basename(__FILE__)}: I have a loooong list of complaints. Firstly, ..." # at bottom
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# :nodoc:
|
2
|
+
# for when cattr_accessor is all you need
|
3
|
+
#
|
4
|
+
require 'active_support/core_ext/array/extract_options'
|
5
|
+
class Array #:nodoc:
|
6
|
+
include ActiveSupport::CoreExtensions::Array::ExtractOptions
|
7
|
+
end
|
8
|
+
require 'active_support/core_ext/class/attribute_accessors'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/extensions/core.rb -- extensions to the Ruby core
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Some useful extensions to basic Ruby classes. This file is required
|
7
|
+
# by <tt>imw/utils</tt> so any files required here are automatically
|
8
|
+
# required when loading IMW.
|
9
|
+
#
|
10
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
11
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
12
|
+
# License:: GPL 3.0
|
13
|
+
# Website:: http://infinitemonkeywrench.org/
|
14
|
+
#
|
15
|
+
# puts "#{File.basename(__FILE__)}: Your monkeywrench does a complicated series of core-burning exercises and emerges with ripped, powerful-looking abs."
|
16
|
+
|
17
|
+
require 'imw/utils/extensions/string'
|
18
|
+
require 'imw/utils/extensions/array'
|
19
|
+
require 'imw/utils/extensions/hash'
|
20
|
+
require 'imw/utils/extensions/dir'
|
21
|
+
require 'imw/utils/extensions/struct'
|
22
|
+
require 'imw/utils/extensions/symbol'
|
23
|
+
require 'imw/utils/extensions/file_core'
|
24
|
+
require 'active_support/core_ext/module/aliasing'
|
25
|
+
require 'active_support/core_ext/object/blank'
|
26
|
+
require 'active_support/core_ext/object/misc'
|
27
|
+
#require 'active_support/core_ext/blank.rb'
|
28
|
+
require 'imw/utils/extensions/class/attribute_accessors'
|
29
|
+
# require 'ostruct'
|
30
|
+
require 'set'
|
31
|
+
|
32
|
+
module IMW
|
33
|
+
# A replacement for the standard system call which raises an
|
34
|
+
# IMW::SystemCallError if the command fails as well as printing the
|
35
|
+
# command appended to the end of <tt>error_message</tt>.
|
36
|
+
def self.system *commands
|
37
|
+
command = commands.flatten.join ' '
|
38
|
+
Kernel.system(command)
|
39
|
+
raise IMW::SystemCallError.new(command) unless $?.success?
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/extensions/dir.rb -- directory extensions
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# The Ruby +Dir+ module is rubbish. Time to clean it up a bit!
|
7
|
+
#
|
8
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
9
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
10
|
+
# License:: GPL 3.0
|
11
|
+
# Website:: http://infinitemonkeywrench.org/
|
12
|
+
#
|
13
|
+
|
14
|
+
|
15
|
+
class Dir
|
16
|
+
|
17
|
+
# Return the absolute paths of files and directories in the
|
18
|
+
# directory, leaving out `.' and `..' entries.
|
19
|
+
def abs_contents
|
20
|
+
self.entries.map {|entry| File.join(self.path,entry) unless entry == '.' || entry == '..'}.compact
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# puts "#{File.basename(__FILE__)}: You open the folder and see along list of names. Some have been crossed out -- ominously..." # at bottom
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#
|
2
|
+
# h2. lib/imw/utils/extensions/file.rb -- extensions to built-in file class
|
3
|
+
#
|
4
|
+
# == About
|
5
|
+
#
|
6
|
+
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
7
|
+
# Copyright:: Copyright (c) 2008 infochimps.org
|
8
|
+
# License:: GPL 3.0
|
9
|
+
# Website:: http://infinitemonkeywrench.org/
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'imw/utils/error'
|
13
|
+
require 'imw/utils/config'
|
14
|
+
require 'imw/utils/extensions/string'
|
15
|
+
|
16
|
+
class File
|
17
|
+
|
18
|
+
# Returns the name of the path given:
|
19
|
+
#
|
20
|
+
# File.name_of_file("/path/to/somefile.txt") => "somefile".
|
21
|
+
def self.name_of_file path
|
22
|
+
basename(path)[0,basename(path).length - extname(path).length]
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns what would be the handle of a source or dataset
|
26
|
+
# described by a file at +path+:
|
27
|
+
#
|
28
|
+
# File.handle "/path/to/a_particular_dataset.instructions.yaml" #=> :a_particular_dataset
|
29
|
+
def self.handle path
|
30
|
+
File.basename(path).split('.').first.handle
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns a unique (non-existing) version of the given +path+ by
|
34
|
+
# appending successive intgers, useful for copying files ito
|
35
|
+
# directories without clobbering existing files (a la <tt>wget
|
36
|
+
# -nc</tt>).
|
37
|
+
#
|
38
|
+
# In a directory <tt>/path/to</tt> without a file named
|
39
|
+
# <tt>data.txt</tt>
|
40
|
+
#
|
41
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt"</tt>
|
42
|
+
#
|
43
|
+
# If <tt>data.txt</tt> were to already exist in that directory, then
|
44
|
+
#
|
45
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.1"
|
46
|
+
#
|
47
|
+
# If <tt>data.txt.1</tt> were to already exist then
|
48
|
+
#
|
49
|
+
# File.uniquify("/path/to/data.txt") #=> "/path/to/data.txt.2"
|
50
|
+
#
|
51
|
+
# and so on.
|
52
|
+
def self.uniquify path
|
53
|
+
orig_path = path.clone
|
54
|
+
copy_number = 1
|
55
|
+
while exist? path do
|
56
|
+
path = orig_path + ".#{copy_number}"
|
57
|
+
copy_number += 1
|
58
|
+
end
|
59
|
+
path
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# puts "#{File.basename(__FILE__)}: You add a bit of glitter and jazz to all the folders in the cabinet. It makes you feel happier when you have to sort through them." # at bottom
|