imw 0.2.17 → 0.2.18

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
  source :gemcutter
3
- gem 'activesupport', '2.3.5', :require => 'active_support'
3
+ gem 'activesupport', :require => 'active_support'
4
4
  gem 'addressable', :require => 'addressable/uri'
5
5
  gem 'uuidtools'
6
6
  gem 'rake'
data/Gemfile.lock CHANGED
@@ -2,8 +2,8 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  Ascii85 (1.0.0)
5
- activesupport (2.3.5)
6
- addressable (2.2.2)
5
+ activesupport (3.0.3)
6
+ addressable (2.2.4)
7
7
  aws-s3 (0.6.2)
8
8
  builder
9
9
  mime-types
@@ -12,27 +12,27 @@ GEM
12
12
  dbi (0.4.5)
13
13
  deprecated (= 2.0.1)
14
14
  deprecated (2.0.1)
15
- fastercsv (1.5.3)
15
+ fastercsv (1.5.4)
16
16
  hpricot (0.8.3)
17
- json (1.4.6)
17
+ json (1.5.1)
18
18
  mime-types (1.16)
19
- pdf-reader (0.9.0)
19
+ pdf-reader (0.9.1)
20
20
  Ascii85 (>= 0.9)
21
21
  rake (0.8.7)
22
22
  rest-client (1.6.1)
23
23
  mime-types (>= 1.16)
24
24
  rspec (1.3.0)
25
25
  ruby-ole (1.2.11.1)
26
- spreadsheet (0.6.4.1)
27
- ruby-ole
28
- uuidtools (2.1.1)
29
- xml-simple (1.0.12)
26
+ spreadsheet (0.6.5.1)
27
+ ruby-ole (>= 1.0)
28
+ uuidtools (2.1.2)
29
+ xml-simple (1.0.14)
30
30
 
31
31
  PLATFORMS
32
32
  ruby
33
33
 
34
34
  DEPENDENCIES
35
- activesupport (= 2.3.5)
35
+ activesupport
36
36
  addressable
37
37
  aws-s3
38
38
  dbi
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.17
1
+ 0.2.18
data/lib/imw/boot.rb CHANGED
@@ -1,5 +1,3 @@
1
- require 'imw/utils/extensions/hash'
2
-
3
1
  module IMW
4
2
 
5
3
  # IMW looks for configuration settings in the following places, in
@@ -105,12 +105,22 @@ module IMW
105
105
  # @return [Array<Array>]
106
106
  def snippet
107
107
  require 'fastercsv'
108
- returning([]) do |rows|
109
- row_num = 1
110
- each do |row|
111
- break if row_num > 10
112
- rows << row.size.times.map { |index| row[index] }
113
- row_num += 1
108
+ [].tap do |rows|
109
+ rows_sampled = 0
110
+ begin
111
+ each do |row|
112
+ begin
113
+ break if rows_sampled > 100
114
+ row_size = row.size.to_f
115
+ if (row.reject(&:blank?).size.to_f / row_size) >= 0.5
116
+ rows << row.size.times.map { |index| row[index] }
117
+ rows_sampled += 1
118
+ end
119
+ rescue => e
120
+ next
121
+ end
122
+ end
123
+ rescue => e
114
124
  end
115
125
  end
116
126
  end
@@ -125,7 +135,7 @@ module IMW
125
135
  #
126
136
  # @return [Hash]
127
137
  def resource_options_compatible_with_faster_csv
128
- @compatible_options ||= returning({}) do |compatible_options|
138
+ @compatible_options ||= {}.tap do |compatible_options|
129
139
  FASTER_CSV_OPTION_NAMES.each do |option_name|
130
140
  compatible_options[option_name] = resource_options[option_name] if resource_options.has_key?(option_name.to_sym)
131
141
  end
@@ -75,15 +75,22 @@ module IMW
75
75
  #
76
76
  def snippet
77
77
  require 'spreadsheet'
78
- returning([]) do |snip|
79
- row_num = 1
78
+ [].tap do |snip|
79
+ rows_sampled = 0
80
80
  Spreadsheet.open(path).worksheets.each do |worksheet|
81
81
  worksheet.each do |row|
82
- break if row_num > 10
83
- snip << row.to_a
84
- row_num += 1
82
+ begin
83
+ break if rows_sampled > 100
84
+ row_size = row.size.to_f
85
+ if (row.reject(&:blank?).size.to_f / row_size) > 0.5
86
+ snip << row.to_a
87
+ rows_sampled += 1
88
+ end
89
+ rescue => e
90
+ next
91
+ end
85
92
  end
86
- break if row_num > 10
93
+ break if rows_sampled > 10
87
94
  end
88
95
  end
89
96
  end
@@ -27,7 +27,7 @@
27
27
  #
28
28
  # @return [String, nil]
29
29
  def default_metadata_uri
30
- contents.detect { |path| path =~ /(icss|metadata).*\.(ya?ml|json)$/i } || File.join(path, "#{basename}.icss.yaml")
30
+ contents.detect { |path| path =~ /metadata.*\.(ya?ml|json)$/i } || File.join(path, "#{basename}.metadata.yaml")
31
31
  end
32
32
 
33
33
  # Return the metadata for this resource if it exists.
@@ -24,19 +24,24 @@ module IMW
24
24
  # @return [Hash]
25
25
  def schema
26
26
  return @schema if @schema
27
- @schema = IMW::Metadata::Schema.new
27
+ @schema = IMW::Metadata::Schema.new
28
28
  @schema[:type] = "record"
29
29
  @schema[:namespace] = "schema.imw.resource"
30
- @schema[:name] = (basename || '')
31
- @schema[:doc] = description
32
- @schema[:fields] = fields
33
-
34
- @schema[:non_avro ] = {}
35
- @schema[:non_avro][:snippet] = snippet if respond_to?(:snippet)
36
- @schema[:non_avro][:record_count] = record_count if respond_to?(:record_count)
37
- @schema
30
+ @schema[:name] = (basename || '')
31
+ begin
32
+ @schema[:doc] = description
33
+ @schema[:fields] = fields
34
+
35
+ @schema[:non_avro ] = {}
36
+ @schema[:non_avro][:snippet] = snippet if respond_to?(:snippet) rescue nil
37
+ @schema[:non_avro][:record_count] = record_count if respond_to?(:record_count)
38
+ @schema
39
+ rescue => e
40
+ $stdout.puts "Error in producing schema for #{self}: #{e.class} -- #{e.message}"
41
+ return @schema
42
+ end
38
43
  end
39
-
44
+
40
45
  # Return the metadata object that contains metadata for this
41
46
  # resource.
42
47
  #
@@ -20,10 +20,16 @@ module IMW
20
20
  # @return [Hash]
21
21
  def summary
22
22
  return @summary if @summary
23
- @summary = external_summary
24
- @summary[:schema] = schema if respond_to?(:schema)
25
- @summary[:contents] = resources.map(&:summary) if respond_to?(:resources)
26
- @summary
23
+ @summary = {}
24
+ begin
25
+ @summary.merge!(external_summary)
26
+ @summary[:schema] = schema if respond_to?(:schema)
27
+ @summary[:contents] = resources.map(&:summary) if respond_to?(:resources)
28
+ @summary
29
+ rescue => e
30
+ # IMW.warn "Error in producing summary for #{self}: #{e.class} -- #{e.message}"
31
+ return @summary
32
+ end
27
33
  end
28
34
 
29
35
  # Return information (usually scheme-dependent) on how this
@@ -56,7 +56,7 @@ module IMW
56
56
 
57
57
  def parse_line line
58
58
  match_data = regexp.match(line.chomp)
59
- returning({}) do |hsh|
59
+ {}.tap do |hsh|
60
60
  if match_data
61
61
  match_data.captures.each_with_index do |capture, index|
62
62
  hsh[fields[index]] = capture
@@ -202,7 +202,7 @@ module IMW
202
202
  # @yield [String] each line of the file
203
203
  # @return [Array] the result of the block on each line
204
204
  def map &block
205
- returning([]) do |output|
205
+ [].tap do |output|
206
206
  HDFS.fs(:cat, path) do |line|
207
207
  output << block.call(line)
208
208
  end
@@ -219,7 +219,7 @@ module IMW
219
219
  #
220
220
  # @return [Array<String>]
221
221
  def contents
222
- returning([]) do |paths|
222
+ [].tap do |paths|
223
223
  HDFS.fs(:ls, path) do |line|
224
224
  next if line =~ /^Found.*items$/
225
225
  paths << line.split.last
@@ -173,7 +173,7 @@ module IMW
173
173
  #
174
174
  # @return [String]
175
175
  def snippet
176
- returning([]) do |snip|
176
+ [].tap do |snip|
177
177
  (io.read(1024) || '').bytes.each do |byte|
178
178
  # CR LF SPACE ~
179
179
  snip << byte.chr if byte == 13 || byte == 10 || byte >= 32 && byte <= 126
@@ -77,7 +77,7 @@ module IMW
77
77
  #
78
78
  # @return [Array<String>]
79
79
  def tables
80
- returning([]) do |table_names|
80
+ [].tap do |table_names|
81
81
  execute("SHOW TABLES") do |row|
82
82
  table_names << row.first
83
83
  end
@@ -14,7 +14,7 @@ module IMW
14
14
  #
15
15
  # @return [Hash]
16
16
  def extension_counts
17
- @extension_counts ||= returning({}) do |counts|
17
+ @extension_counts ||= {}.tap do |counts|
18
18
  resources.each do |resource|
19
19
  next if resource.is_directory?
20
20
  counts[resource.extension] = 0 unless counts.has_key?(resource.extension)
@@ -39,7 +39,7 @@ module IMW
39
39
  #
40
40
  # @return [Hash]
41
41
  def normalized_extension_counts
42
- @normalized_extension_counts ||= returning({}) do |weighted|
42
+ @normalized_extension_counts ||= {}.tap do |weighted|
43
43
  num_files = resources.reject(&:is_directory?).length.to_f
44
44
  extension_counts.each_pair do |extension, count|
45
45
  weighted[extension] = count.to_f / num_files
@@ -51,7 +51,7 @@ module IMW
51
51
  #
52
52
  # @return [Hash]
53
53
  def extension_sizes
54
- @extension_sizes ||= returning({}) do |sizes|
54
+ @extension_sizes ||= {}.tap do |sizes|
55
55
  resources.each do |resource|
56
56
  next if resource.is_directory?
57
57
  sizes[resource.extension] = 0 unless sizes.has_key?(resource.extension)
@@ -80,7 +80,7 @@ module IMW
80
80
  #
81
81
  # @return [Hash]
82
82
  def normalized_extension_sizes
83
- @normalized_extension_sizes ||= returning({}) do |weighted|
83
+ @normalized_extension_sizes ||= {}.tap do |weighted|
84
84
  extension_sizes.each_pair do |extension, size|
85
85
  weighted[extension] = size.to_f / total_size.to_f
86
86
  end
@@ -100,7 +100,7 @@ module IMW
100
100
  #
101
101
  # @return [URI::Generic]
102
102
  def stripped_uri
103
- uri_args = returning({}) do |args|
103
+ uri_args = {}.tap do |args|
104
104
  %w[scheme userinfo host port path].each do |method|
105
105
  args[method.to_sym] = respond_to?(method) ? send(method) : uri.send(method)
106
106
  end
data/lib/imw/utils.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  require 'rubygems'
2
2
  require 'fileutils'
3
3
  require 'active_support/core_ext/object/blank'
4
- require 'active_support/core_ext/object/misc'
4
+ require 'active_support/core_ext/hash/reverse_merge'
5
+ require 'active_support/core_ext/string/starts_ends_with'
5
6
  require 'imw/utils/error'
6
7
  require 'imw/utils/log'
7
8
  require 'imw/utils/paths'
8
9
  require 'imw/utils/misc'
9
- require 'imw/utils/extensions'
10
10
 
11
11
  module IMW
12
12
 
@@ -15,5 +15,60 @@ module IMW
15
15
  autoload :DynamicallyExtendable, 'imw/utils/dynamically_extendable'
16
16
  autoload :HasURI, 'imw/utils/has_uri'
17
17
  end
18
+
19
+ # A replacement for the standard system call which raises an
20
+ # IMW::SystemCallError if the command fails which prints better
21
+ # debugging info.
22
+ #
23
+ # This function relies upon Kernel.system and obeys the same rules:
24
+ #
25
+ # - if +commands+ has only only a single element then no shell
26
+ # characters or spaces are escaped -- you have to do it yourself
27
+ # or you get to use shell characters, depending on your
28
+ # perspective.
29
+ #
30
+ # - if +commands+ is a list of elements then the second and further
31
+ # elements in the list have their shell characters and spaces
32
+ # escaped
33
+ #
34
+ # But it also has its own rules:
35
+ #
36
+ # - When one of the +commands+ is an empty or blank string,
37
+ # Kernel.system honors it and escapes it properly and sends it
38
+ # along for evaluation. This can be a problem for some programs
39
+ # and so IMW.system excludes blank (as in <tt>blank?</tt>)
40
+ # elements of +commands+.
41
+ #
42
+ # - +commands+ will be flattened (see the gotcha below)
43
+ #
44
+ # Calling out to the shell like this is often brittle. Imagine
45
+ # defining
46
+ #
47
+ # prog = 'some_prog'
48
+ # flags = '-v -f'
49
+ # args = 'file.txt'
50
+ #
51
+ # and later calling
52
+ #
53
+ # IMW.system prog, flags, args
54
+ #
55
+ # The space in the second argument ('-v -f') will be escaped and
56
+ # will therefore not be properly parsed by +some_prog+. Instead try
57
+ #
58
+ # prog = 'some_prog'
59
+ # flags = ['-v', '-f']
60
+ # args = ['file.txt']
61
+ #
62
+ # IMW.system prog, flags, *args
63
+ #
64
+ # which will work fine since +flags+ will automatically be flattend.
65
+ def self.system *commands
66
+ stripped_commands = commands.flatten.map { |command| command.to_s unless command.blank? }.compact
67
+ IMW.announce_if_verbose(stripped_commands.join(" "))
68
+ exit_code = Kernel.system(*stripped_commands)
69
+ raise IMW::SystemCallError.new($?.dup, commands.join(' ')) unless $?.success?
70
+ exit_code
71
+ end
72
+
18
73
  end
19
74
 
@@ -56,7 +56,7 @@ describe IMW::Parsers::LineParser do
56
56
  end
57
57
 
58
58
  it "should pass each hash to a block when given one" do
59
- results = returning([]) do |array|
59
+ results = [].tap do |array|
60
60
  @parser.parse!(@file) do |hsh|
61
61
  hsh.delete(:id)
62
62
  array << hsh
@@ -78,7 +78,7 @@ describe IMW::Parsers::LineParser do
78
78
 
79
79
  it "should pass each object to a block when given one and defined with a class" do
80
80
  @parser.klass = OpenStruct
81
- results = returning([]) do |array|
81
+ results = [].tap do |array|
82
82
  @parser.parse!(@file) do |obj|
83
83
  obj.genus = nil
84
84
  array << obj
@@ -38,5 +38,5 @@ EOF
38
38
  end
39
39
  end
40
40
  end
41
-
41
+
42
42
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imw
3
3
  version: !ruby/object:Gem::Version
4
- hash: 53
4
+ hash: 51
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 17
10
- version: 0.2.17
9
+ - 18
10
+ version: 0.2.18
11
11
  platform: ruby
12
12
  authors:
13
13
  - Dhruv Bansal
@@ -16,22 +16,76 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-01-17 00:00:00 -06:00
19
+ date: 2011-02-16 00:00:00 -06:00
20
20
  default_executable:
21
- dependencies: []
22
-
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ prerelease: false
24
+ name: activesupport
25
+ version_requirements: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 3
31
+ segments:
32
+ - 0
33
+ version: "0"
34
+ requirement: *id001
35
+ type: :runtime
36
+ - !ruby/object:Gem::Dependency
37
+ prerelease: false
38
+ name: addressable
39
+ version_requirements: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ hash: 3
45
+ segments:
46
+ - 0
47
+ version: "0"
48
+ requirement: *id002
49
+ type: :runtime
50
+ - !ruby/object:Gem::Dependency
51
+ prerelease: false
52
+ name: uuidtools
53
+ version_requirements: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ requirement: *id003
63
+ type: :runtime
64
+ - !ruby/object:Gem::Dependency
65
+ prerelease: false
66
+ name: rake
67
+ version_requirements: &id004 !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ requirement: *id004
77
+ type: :runtime
23
78
  description: The Infinite Monkeywrench (IMW) is a Ruby frameworks to simplify the tasks of acquiring, extracting, transforming, loading, and packaging data. It minimizes programmer time by encapsulating common data workflows and patterns and creating interfaces to many other useful Ruby libraries.
24
79
  email: coders@infochimps.org
25
80
  executables:
26
- - tsv_to_json.rb
27
81
  - imw
82
+ - tsv_to_json.rb
28
83
  extensions: []
29
84
 
30
85
  extra_rdoc_files:
31
86
  - LICENSE
32
87
  - README.rdoc
33
88
  files:
34
- - .gitignore
35
89
  - Gemfile
36
90
  - Gemfile.lock
37
91
  - LICENSE
@@ -99,13 +153,7 @@ files:
99
153
  - lib/imw/utils.rb
100
154
  - lib/imw/utils/dynamically_extendable.rb
101
155
  - lib/imw/utils/error.rb
102
- - lib/imw/utils/extensions.rb
103
- - lib/imw/utils/extensions/array.rb
104
- - lib/imw/utils/extensions/hash.rb
105
156
  - lib/imw/utils/extensions/hpricot.rb
106
- - lib/imw/utils/extensions/string.rb
107
- - lib/imw/utils/extensions/struct.rb
108
- - lib/imw/utils/extensions/symbol.rb
109
157
  - lib/imw/utils/has_uri.rb
110
158
  - lib/imw/utils/log.rb
111
159
  - lib/imw/utils/misc.rb
@@ -195,8 +243,8 @@ homepage: http://github.com/infochimps/imw
195
243
  licenses: []
196
244
 
197
245
  post_install_message:
198
- rdoc_options:
199
- - --charset=UTF-8
246
+ rdoc_options: []
247
+
200
248
  require_paths:
201
249
  - lib
202
250
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -225,52 +273,52 @@ signing_key:
225
273
  specification_version: 3
226
274
  summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
227
275
  test_files:
228
- - spec/imw/archives/targz_spec.rb
276
+ - examples/dataset.rb
277
+ - spec/imw/archives/rar_spec.rb
229
278
  - spec/imw/archives/tar_spec.rb
230
- - spec/imw/archives/zip_spec.rb
231
279
  - spec/imw/archives/tarbz2_spec.rb
232
- - spec/imw/archives/rar_spec.rb
233
- - spec/imw/tools/archiver_spec.rb
234
- - spec/imw/tools/extension_analyzer_spec.rb
235
- - spec/imw/tools/summarizer_spec.rb
236
- - spec/imw/tools/transferer_spec.rb
237
- - spec/imw/tools/aggregator_spec.rb
238
- - spec/imw/compressed_files/compressible_spec.rb
280
+ - spec/imw/archives/targz_spec.rb
281
+ - spec/imw/archives/zip_spec.rb
282
+ - spec/imw/archives_spec.rb
239
283
  - spec/imw/compressed_files/bz2_spec.rb
284
+ - spec/imw/compressed_files/compressible_spec.rb
240
285
  - spec/imw/compressed_files/gz_spec.rb
286
+ - spec/imw/compressed_files_spec.rb
241
287
  - spec/imw/dataset/paths_spec.rb
242
288
  - spec/imw/dataset/workflow_spec.rb
243
- - spec/imw/compressed_files_spec.rb
244
- - spec/imw/parsers/line_parser_spec.rb
245
- - spec/imw/parsers/regexp_parser_spec.rb
246
- - spec/imw/resource_spec.rb
247
- - spec/imw/metadata_spec.rb
289
+ - spec/imw/formats/delimited_spec.rb
248
290
  - spec/imw/formats/excel_spec.rb
249
291
  - spec/imw/formats/json_spec.rb
250
- - spec/imw/formats/yaml_spec.rb
251
- - spec/imw/formats/delimited_spec.rb
252
292
  - spec/imw/formats/sgml_spec.rb
253
- - spec/imw/archives_spec.rb
254
- - spec/imw/metadata/schema_spec.rb
293
+ - spec/imw/formats/yaml_spec.rb
255
294
  - spec/imw/metadata/contains_metadata_spec.rb
295
+ - spec/imw/metadata/field_spec.rb
256
296
  - spec/imw/metadata/has_metadata_spec.rb
257
297
  - spec/imw/metadata/has_summary_spec.rb
258
- - spec/imw/metadata/field_spec.rb
298
+ - spec/imw/metadata/schema_spec.rb
299
+ - spec/imw/metadata_spec.rb
300
+ - spec/imw/parsers/line_parser_spec.rb
301
+ - spec/imw/parsers/regexp_parser_spec.rb
302
+ - spec/imw/resource_spec.rb
303
+ - spec/imw/schemes/hdfs_spec.rb
259
304
  - spec/imw/schemes/http_spec.rb
260
305
  - spec/imw/schemes/local_spec.rb
261
- - spec/imw/schemes/sql_spec.rb
262
306
  - spec/imw/schemes/remote_spec.rb
263
- - spec/imw/schemes/hdfs_spec.rb
264
307
  - spec/imw/schemes/s3_spec.rb
308
+ - spec/imw/schemes/sql_spec.rb
309
+ - spec/imw/tools/aggregator_spec.rb
310
+ - spec/imw/tools/archiver_spec.rb
311
+ - spec/imw/tools/extension_analyzer_spec.rb
312
+ - spec/imw/tools/summarizer_spec.rb
313
+ - spec/imw/tools/transferer_spec.rb
314
+ - spec/imw/utils/dynamically_extendable_spec.rb
265
315
  - spec/imw/utils/has_uri_spec.rb
266
316
  - spec/imw/utils/paths_spec.rb
267
- - spec/imw/utils/dynamically_extendable_spec.rb
268
317
  - spec/imw/utils/shared_paths_spec.rb
269
318
  - spec/imw_spec.rb
270
319
  - spec/spec_helper.rb
271
- - spec/support/without_regard_to_order_matcher.rb
272
320
  - spec/support/custom_matchers.rb
273
- - spec/support/random.rb
274
- - spec/support/paths_matcher.rb
275
321
  - spec/support/file_contents_matcher.rb
276
- - examples/dataset.rb
322
+ - spec/support/paths_matcher.rb
323
+ - spec/support/random.rb
324
+ - spec/support/without_regard_to_order_matcher.rb
data/.gitignore DELETED
@@ -1,18 +0,0 @@
1
- \#*
2
- .#*
3
- *~
4
- .bzr
5
- doc/*
6
- .hg
7
- Icon?
8
- *.org
9
- .project
10
- .svn
11
- TAGS
12
- tmp/*
13
- *.tmproj
14
- pkg/*
15
- *gemspec
16
- tags
17
- .yardoc/*
18
- */.yardoc/*
@@ -1,114 +0,0 @@
1
- # class Array #:nodoc:
2
- # include ActiveSupport::CoreExtensions::Array::ExtractOptions
3
- # end
4
-
5
- class Array
6
-
7
- # Return all but the last element
8
- # This will be [] for both an empty array and a length-1 array
9
- def most() self[0..-2] end
10
-
11
- # Return all but the first element.
12
- # This will be nil for an empty array and [] for a length-1 array
13
- def rest() self[1..-1] end
14
-
15
- # 'Un'-zip()s an array. Returns an array of arrays: the first array has the
16
- # first element of each member, the second array has the second element of
17
- # each member, and so on. Returns as many arrays as the first element in self
18
- # and inserts a nil where the member array wasn't long enough.
19
- #
20
- # foo, bar = foo.zip(bar).unzip should leave foo and bar with the same values
21
- # if foo and bar have the same length.
22
- #
23
- # Will fail on a not-array-of-arrays.
24
- def unzip()
25
- # An array of empty arrays, one for each vertical slot
26
- vslices = self[0].map{ Array.new }
27
- self.each do |hslice|
28
- # push the elements of each array onto its slice.
29
- vslices.zip(hslice).map{|vslice,h_el| vslice << h_el }
30
- end
31
- vslices
32
- end
33
-
34
- # Return a random element of this array.
35
- def random_element
36
- self[rand(self.length) - 1]
37
- end
38
-
39
- # convert an assoc (list of [key, val, [...]]'s) to a hash
40
- def to_openstruct
41
- mapped = {}
42
- each{ |key,value| mapped[key] = value.to_openstruct }
43
- OpenStruct.new(mapped)
44
- end
45
-
46
- # Return the elements of this array in a pretty-printed string,
47
- # inserting +final_string+ between the last two items.
48
- #
49
- # >> [:one, :two, :three].quote_items_with "or"
50
- # `one', `two', or `three'
51
- #
52
- def quote_items_with final_string = nil
53
- string_items = self.map { |item| "`" + item.to_s + "'" }
54
- case string_items.length
55
- when 0
56
- ""
57
- when 1
58
- string_items.first
59
- when 2
60
- if final_string then
61
- string_items.join(" #{final_string} ")
62
- else
63
- string_items.join(', ')
64
- end
65
- else
66
- string = string_items[0,string_items.length - 1].join ', '
67
- if final_string then
68
- string += ', ' + final_string + ' ' + string_items.last
69
- else
70
- string += ', ' + string_items.last
71
- end
72
- string
73
- end
74
- end
75
-
76
- def in_groups_of(number, fill_with = nil, &block)
77
- require 'enumerator'
78
- collection = dup
79
- collection << fill_with until collection.size.modulo(number).zero?
80
- collection.each_slice(number, &block)
81
- end
82
-
83
- # Returns a single hash containing the merge of all hashes in this
84
- # array. This is useful when dealing with badly written YAML files.
85
- # Only merges hashes at depth zero, i.e. - this isn't recursive.
86
- def merge_hashes
87
- merged_hash = {}
88
- self.each do |element|
89
- merged_hash.merge!(element) if element.is_a?(Hash)
90
- end
91
- merged_hash
92
- end
93
-
94
- # Recurses through the elements of this Array collecting all String
95
- # or Symbol "terminal" nodes.
96
- def terminals &block
97
- terminals = []
98
- each do |element|
99
- if element.respond_to? :terminals then
100
- terminals += element.terminals
101
- else
102
- terminals << element
103
- end
104
- end
105
- terminals.map! {|terminal| yield terminal } if block
106
- terminals
107
- end
108
-
109
- # Emit the data in this array to the resource at the given +uri+.
110
- def emit uri
111
- IMW.open!(uri).emit(self)
112
- end
113
-
114
- end
@@ -1,206 +0,0 @@
1
- class Hash
2
- # Return the elements of this hash in a pretty-printed string,
3
- # inserting +final_string+ between the last two items.
4
- #
5
- # >> {:one => 1, :two => 2, :three => 3}.quote_keys_with "or"
6
- # `one', `two', or `three'
7
- #
8
- def quote_keys_with final_string = nil
9
- self.keys.quote_items_with final_string
10
- end
11
-
12
- # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
13
- def reverse_merge(other_hash)
14
- other_hash.merge(self)
15
- end
16
-
17
- # Stolen from ActiveSupport::CoreExtensions::Hash::ReverseMerge.
18
- def reverse_merge!(other_hash)
19
- replace(reverse_merge(other_hash))
20
- end
21
-
22
- # Create a hash from an array of keys and corresponding values.
23
- def self.zip(keys, values, default=nil, &block)
24
- hash = block_given? ? Hash.new(&block) : Hash.new(default)
25
- keys.zip(values) { |k,v| hash[k]=v }
26
- hash
27
- end
28
-
29
- # Turns a collection of pairs into a hash. The first of each pair
30
- # make the keys and the second the values. Elements with length
31
- # longer than two will lose those values.
32
- #
33
- # If there are multiple values of
34
- #
35
- def from_pairs()
36
- hsh = { }
37
- self.each{ |k,v| hsh[k] = v }
38
- hsh
39
- end
40
-
41
- # Merges self with another hash, recursively.
42
- #
43
- # first = {
44
- # :balls=> "monkey",
45
- # :data=> {
46
- # :name=> {:first=> "Sam", :middle=>"I", :last=>"am"}}}
47
- # second = {
48
- # :data=> {
49
- # :name=> {:middle=>["you", "me", "everyone we know"], :last => "are"}},
50
- # 1 => [1,2,5] }
51
- #
52
- # p first.deep_merge(second)
53
- # # => {:data=>{:name=>{:last=>"are", :middle=>["you", "me", "everyone we know"], :first=>"Sam"}}, 1=>[1, 2, 5], :balls=>"monkey"}
54
- # from http://snippets.dzone.com/posts/show/4706
55
- # From: http://pastie.textmate.org/pastes/30372, Elliott Hird
56
- def deep_merge(second)
57
- target = dup
58
- second.keys.each do |key|
59
- if second[key].is_a?(Hash) && self[key].is_a?(Hash)
60
- target[key] = target[key].deep_merge(second[key])
61
- else
62
- target[key] = second[key]
63
- end
64
- end
65
- target
66
- end
67
-
68
- # Merges self in-place with another hash, recursively.
69
- #
70
- # first = {
71
- # :balls=> "monkey",
72
- # :data=> {
73
- # :name=> {:first=> "Sam", :middle=>"I", :last=>"am"}}}
74
- # second = {
75
- # :data=> {
76
- # :name=> {:middle=>["you", "me", "everyone we know"], :last => "are"}},
77
- # 1 => [1,2,5] }
78
- #
79
- # p first.deep_merge(second)
80
- # # => {:data=>{:name=>{:last=>"are", :middle=>["you", "me", "everyone we know"], :first=>"Sam"}}, 1=>[1, 2, 5], :balls=>"monkey"}
81
- #
82
- # From: http://www.gemtacular.com/gemdocs/cerberus-0.2.2/doc/classes/Hash.html
83
- # File lib/cerberus/utils.rb, line 42
84
- def deep_merge!(second)
85
- second.keys.each do |key|
86
- if second[key].is_a?(Hash) && self[key].is_a?(Hash)
87
- self[key].deep_merge!(second[key])
88
- else
89
- self[key] = second[key]
90
- end
91
- end
92
- self
93
- end
94
-
95
- # Merge another array with this one, accumulating values that appear in both
96
- # into arrays.
97
- #
98
- # Note: array values will be flatten'ed. Sorry.
99
- #
100
- # first = {
101
- # :balls=> "monkey",
102
- # :data=> {
103
- # :name=> {:first=> "Sam", :middle=>"I", :last=>"am"}}}
104
- # second = {
105
- # :data=> {
106
- # :name=> {:middle=>["you", "me", "everyone we know"], :last => "are"}},
107
- # 1 => [1,2,5] }
108
- #
109
- # p first.deep_merge(second)
110
- # # => {:data=>{:name=>{:last=>"are", :middle=>["you", "me", "everyone we know"], :first=>"Sam"}}, 1=>[1, 2, 5], :balls=>"monkey"}
111
- # p first.keep_merge(second)
112
- # # => {:data=>{:name=>{:last=>["am", "are"], :middle=>["I", "you", "me", "everyone we know"], :first=>"Sam"}}, 1=>[1, 2, 5], :balls=>"monkey"}
113
- def keep_merge(second)
114
- target = dup
115
- second.each do |key, val2|
116
- if second[key].is_a?(Hash) && self[key].is_a?(Hash)
117
- target[key] = target[key].keep_merge(val2)
118
- else
119
- target[key] = target.include?(key) ? [target[key], val2].flatten.uniq : val2
120
- end
121
- end
122
- target
123
- end
124
-
125
- # This is polymorphic to Array#assoc -- that is, it allows you treat a Hash
126
- # and an array of pairs equivalently using assoc(). We remind you that Array#assoc
127
- #
128
- # "Searches through an array whose elements are also arrays comparing obj
129
- # with the first element of each contained array using obj.== . Returns the
130
- # first contained array that matches (that is, the first associated array)
131
- # or nil if no match is found. See also Array#rassoc."
132
- #
133
- # Note that this returns an /array/ of [key, val] pairs.
134
- def assoc(key)
135
- self.include?(key) ? [key, self[key]] : nil
136
- end
137
- def rassoc(key)
138
- self.has_value?(key) ? [key, self[key]] : nil
139
- end
140
-
141
- # Allows loading ostruct directly from YAML
142
- def to_openstruct
143
- map{ |el| el.to_openstruct }
144
- end
145
-
146
-
147
- # Slice a hash to include only the given keys. This is useful for
148
- # limiting an options hash to valid keys before passing to a method:
149
- #
150
- # def search(criteria = {})
151
- # assert_valid_keys(:mass, :velocity, :time)
152
- # end
153
- #
154
- # search(options.slice(:mass, :velocity, :time))
155
- # Returns a new hash with only the given keys.
156
- def slice(*keys)
157
- require 'set'
158
- allowed = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
159
- reject { |key,| !allowed.include?(key) }
160
- end
161
-
162
- # Replaces the hash with only the given keys.
163
- def slice!(*keys)
164
- replace(slice(*keys))
165
- end
166
-
167
- # remove all key-value pairs where the value is nil
168
- def compact
169
- reject{|k,v| v.nil? }
170
- end
171
-
172
- # Replaces the hash with its compacted self
173
- def compact!
174
- replace(compact)
175
- end
176
-
177
- # Works like <tt>Enumerable::find</tt> but loops over the keys of
178
- # this Hash instead of of arrays of [key,value] and, in the absence
179
- # of a matching key, doesn't call the default argument, merely
180
- # returns it instead.
181
- def dispatch(default=nil, &block)
182
- match = self.keys.find(nil,&block)
183
- match ? self[match] : default
184
- end
185
-
186
-
187
- # Recurses through the pairs of this Hash collecting all String or
188
- # Symbol "terminal" nodes.
189
- def terminals &block
190
- terminals = []
191
- each_value do |value|
192
- if value.respond_to? :terminals then
193
- terminals += value.terminals
194
- else
195
- terminals << value
196
- end
197
- end
198
- terminals.map! {|terminal| yield terminal } if block
199
- terminals
200
- end
201
-
202
- # Emit the data from this Hash into the given +uri+.
203
- def emit uri
204
- IMW.open!(uri).emit(self)
205
- end
206
- end
@@ -1,39 +0,0 @@
1
- class String
2
-
3
- # Does the string end with the specified +suffix+ (stolen from
4
- # <tt>ActiveSupport::CoreExtensions::String::StartsEndsWith</tt>)?
5
- def ends_with?(suffix)
6
- suffix = suffix.to_s
7
- self[-suffix.length, suffix.length] == suffix
8
- end
9
-
10
- # Does the string start with the specified +prefix+ (stolen from
11
- # <tt>ActiveSupport::CoreExtensions::String::StartsEndsWith</tt>)?
12
- def starts_with?(prefix)
13
- prefix = prefix.to_s
14
- self[0, prefix.length] == prefix
15
- end
16
-
17
- # # Downcases a string and replaces spaces with underscores. This
18
- # # works slightly differently than
19
- # # <tt>ActiveSupport::CoreExtensions::String::Inflections.underscore</tt>
20
- # # which is intended to be used for camel-cased Ruby constants.
21
- # #
22
- # # "A long and unwieldy phrase".underscore #=> "a_long_and_unwieldy_phrase"
23
- # def underscore
24
- # self.to_s.tr("-", "_").tr(" ","_").downcase
25
- # end
26
-
27
- # Returns the handle corresponding to this string as a symbol:
28
- #
29
- # "A possible title of a dataset".handle #=> :a_possible_title_of_a_dataset
30
- def to_handle
31
- self.downcase.underscore.to_sym
32
- end
33
-
34
- # Emit this string into the given +uri+.
35
- def emit uri
36
- IMW.open!(uri).emit(self)
37
- end
38
-
39
- end
@@ -1,42 +0,0 @@
1
- Struct.class_eval do
2
- def slice *attrs
3
- hsh = {}
4
- attrs.each{|attr| hsh[attr] = self.send(attr) }
5
- hsh
6
- end
7
-
8
- def to_hash
9
- slice(*self.class.members)
10
- end
11
- def self.from_hash(hsh)
12
- self.new *hsh.values_at(*self.members.map(&:to_sym))
13
- end
14
-
15
-
16
- #
17
- # values_at like a hash
18
- #
19
- def values_of *attrs
20
- slice(*attrs).values_at(*attrs)
21
- end
22
- def each_pair *args, &block
23
- self.to_hash.each_pair(*args, &block)
24
- end
25
-
26
- def merge *args
27
- self.dup.merge! *args
28
- end
29
- def merge! hashlike, &block
30
- raise "can't handle block arg yet" if block
31
- hashlike.each_pair{|k,v| self[k] = v }
32
- self
33
- end
34
- alias_method :update, :merge!
35
- def indifferent_merge *args, &block
36
- self.dup.indifferent_merge! *args
37
- end
38
- def indifferent_merge! hashlike, &block
39
- merge! hashlike.reject{|k,v| ! self.members.include?(k.to_s) }
40
- end
41
-
42
- end
@@ -1,15 +0,0 @@
1
- class Symbol
2
-
3
- # Turn the symbol into a simple proc (stolen from
4
- # <tt>ActiveSupport::CoreExtensions::Symbol</tt>).
5
- def to_proc
6
- Proc.new { |*args| args.shift.__send__(self, *args) }
7
- end
8
-
9
- # Returns the symbol itself (for compatibility with
10
- # <tt>String.uniqnae</tt> and so on.
11
- def handle
12
- self
13
- end
14
-
15
- end
@@ -1,63 +0,0 @@
1
- require 'imw/utils/extensions/string'
2
- require 'imw/utils/extensions/array'
3
- require 'imw/utils/extensions/hash'
4
- require 'imw/utils/extensions/struct'
5
- require 'imw/utils/extensions/symbol'
6
-
7
- module IMW
8
- # A replacement for the standard system call which raises an
9
- # IMW::SystemCallError if the command fails which prints better
10
- # debugging info.
11
- #
12
- # This function relies upon Kernel.system and obeys the same rules:
13
- #
14
- # - if +commands+ has only only a single element then no shell
15
- # characters or spaces are escaped -- you have to do it yourself
16
- # or you get to use shell characters, depending on your
17
- # perspective.
18
- #
19
- # - if +commands+ is a list of elements then the second and further
20
- # elements in the list have their shell characters and spaces
21
- # escaped
22
- #
23
- # But it also has its own rules:
24
- #
25
- # - When one of the +commands+ is an empty or blank string,
26
- # Kernel.system honors it and escapes it properly and sends it
27
- # along for evaluation. This can be a problem for some programs
28
- # and so IMW.system excludes blank (as in <tt>blank?</tt>)
29
- # elements of +commands+.
30
- #
31
- # - +commands+ will be flattened (see the gotcha below)
32
- #
33
- # Calling out to the shell like this is often brittle. Imagine
34
- # defining
35
- #
36
- # prog = 'some_prog'
37
- # flags = '-v -f'
38
- # args = 'file.txt'
39
- #
40
- # and later calling
41
- #
42
- # IMW.system prog, flags, args
43
- #
44
- # The space in the second argument ('-v -f') will be escaped and
45
- # will therefore not be properly parsed by +some_prog+. Instead try
46
- #
47
- # prog = 'some_prog'
48
- # flags = ['-v', '-f']
49
- # args = ['file.txt']
50
- #
51
- # IMW.system prog, flags, *args
52
- #
53
- # which will work fine since +flags+ will automatically be flattend.
54
- def self.system *commands
55
- stripped_commands = commands.flatten.map { |command| command.to_s unless command.blank? }.compact
56
- IMW.announce_if_verbose(stripped_commands.join(" "))
57
- exit_code = Kernel.system(*stripped_commands)
58
- raise IMW::SystemCallError.new($?.dup, commands.join(' ')) unless $?.success?
59
- exit_code
60
- end
61
- end
62
-
63
-