imw 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.6
1
+ 0.2.7
data/lib/imw/archives.rb CHANGED
@@ -5,13 +5,13 @@ module IMW
5
5
 
6
6
  # Handlers for archives.
7
7
  HANDLERS = [
8
- ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
9
- ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
10
- ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
11
- ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
12
- ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
13
- ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
14
- ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
8
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/i } ],
9
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/i } ],
10
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/i } ],
11
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/i } ],
12
+ ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/i } ],
13
+ ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/i } ],
14
+ ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/i } ]
15
15
  ]
16
16
 
17
17
  autoload :Rar, 'imw/archives/rar'
@@ -37,13 +37,6 @@ module IMW
37
37
  true
38
38
  end
39
39
 
40
- # Archives should not return snippets.
41
- #
42
- # @return [nil]
43
- def snippet
44
- nil
45
- end
46
-
47
40
  # Create an archive of the given +input_paths+.
48
41
  #
49
42
  # The input paths must be strings and will be shell-escaped
@@ -10,9 +10,9 @@ module IMW
10
10
  # well as the IMW::CompressedFiles::Compressible module for
11
11
  # compressing regular files.
12
12
  HANDLERS = [
13
- ["CompressedFiles::Compressible", Proc.new { |r| r.is_local? && r.is_file? && r.path != /\.(bz2|gz|tgz|tbz2)$/ } ],
14
- ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
15
- ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ]
13
+ ["CompressedFiles::Compressible", Proc.new { |r| r.is_local? && r.is_file? && r.path != /\.(bz2|gz|tgz|tbz2)$/i } ],
14
+ ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/i && r.path !~ /\.tar\.gz$/i && r.path !~ /\.tgz$/i } ],
15
+ ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/i && r.path !~ /\.tar\.bz2$/i && r.path !~ /\.tbz2$/i } ]
16
16
  ]
17
17
 
18
18
  # Defines methods for decompressing a compressed file. This
@@ -71,13 +71,6 @@ module IMW
71
71
  IMW.open(decompressed_path)
72
72
  end
73
73
 
74
- # Compressed files should not return snippets.
75
- #
76
- # @return [nil]
77
- def snippet
78
- nil
79
- end
80
-
81
74
  # Decompress this file in its present directory, overwriting any
82
75
  # existing files while keeping the original compressed file.
83
76
  #
data/lib/imw/formats.rb CHANGED
@@ -14,16 +14,16 @@ module IMW
14
14
  # Handlers which augment a resource with data format specific
15
15
  # methods.
16
16
  HANDLERS = [
17
- [ "Formats::Csv", /\.csv$/ ],
18
- [ "Formats::Tsv", /\.tsv$/ ],
19
- [ "Formats::Excel", /\.xslx?$/ ],
20
- [ "Formats::Json", /\.json$/ ],
21
- [ "Formats::Xml", /\.xml$/ ],
22
- [ "Formats::Xsl", /\.xsl$/ ],
23
- [ "Formats::Html", /\.html?$/ ],
24
- [ "Formats::Xhtml", /\.xhtml?$/ ],
25
- [ "Formats::Rdf", /\.rdf?$/ ],
26
- [ "Formats::Yaml", /\.ya?ml$/ ]
17
+ [ "Formats::Csv", /\.csv$/i ],
18
+ [ "Formats::Tsv", /\.tsv$/i ],
19
+ [ "Formats::Excel", /\.xslx?$/i ],
20
+ [ "Formats::Json", /\.json$/i ],
21
+ [ "Formats::Xml", /\.xml$/i ],
22
+ [ "Formats::Xsl", /\.xsl$/i ],
23
+ [ "Formats::Html", /\.html?$/i ],
24
+ [ "Formats::Xhtml", /\.xhtml?$/i ],
25
+ [ "Formats::Rdf", /\.rdf?$/i ],
26
+ [ "Formats::Yaml", /\.ya?ml$/i ]
27
27
  ]
28
28
  end
29
29
  end
data/lib/imw/resource.rb CHANGED
@@ -74,7 +74,14 @@ module IMW
74
74
  # accepts all the same arguments as IMW::Resource.new.
75
75
  class Resource
76
76
 
77
- attr_reader :uri, :mode
77
+ # The URI object associated with this resource.
78
+ attr_reader :uri
79
+
80
+ # The mode in which to access this resource.
81
+ attr_accessor :mode
82
+
83
+ # A copy of the options passed to this resource on initialization.
84
+ attr_accessor :resource_options
78
85
 
79
86
  # Create a new resource representing +uri+.
80
87
  #
@@ -102,8 +109,9 @@ module IMW
102
109
  # @option options [String] mode the mode to open the resource in (will be ignored when inapplicable)
103
110
  # @return [IMW::Resource]
104
111
  def initialize uri, options={}
105
- self.uri = uri
106
- @mode = options[:mode] || 'r'
112
+ self.uri = uri
113
+ self.resource_options = options
114
+ self.mode = options[:mode] || 'r'
107
115
  extend_appropriately!(options) unless options[:no_modules]
108
116
  end
109
117
 
@@ -192,6 +200,13 @@ module IMW
192
200
  @name ||= extname ? basename[0,basename.length - extname.length] : basename
193
201
  end
194
202
 
203
+ # Returns the user associated with the host of this URI.
204
+ #
205
+ # @return [String]
206
+ def user
207
+ @user ||= uri.user
208
+ end
209
+
195
210
  def to_s
196
211
  uri.to_s
197
212
  end
data/lib/imw/schemes.rb CHANGED
@@ -6,14 +6,14 @@ module IMW
6
6
  autoload :HTTP, 'imw/schemes/http'
7
7
  autoload :HTTPS, 'imw/schemes/http'
8
8
  autoload :HDFS, 'imw/schemes/hdfs'
9
-
9
+
10
10
  HANDLERS = [
11
11
  ["Schemes::Local::Base", Proc.new { |resource| resource.scheme == 'file' || resource.scheme.blank? } ],
12
12
  ["Schemes::Remote::Base", Proc.new { |resource| resource.scheme != 'file' && resource.scheme.present? } ],
13
- ["Schemes::S3", %r{^s3://} ],
14
- ["Schemes::HTTP", %r{^http://} ],
15
- ["Schemes::HTTPS", %r{^https://} ],
16
- ["Schemes::HDFS", %r{^hdfs://} ]
13
+ ["Schemes::S3", %r{^s3://}i ],
14
+ ["Schemes::HTTP", %r{^http://}i ],
15
+ ["Schemes::HTTPS", %r{^https://}i ],
16
+ ["Schemes::HDFS", %r{^hdfs://}i ]
17
17
  ]
18
18
  end
19
19
  end
@@ -151,20 +151,17 @@ module IMW
151
151
  # - extension
152
152
  # - snippet
153
153
  def summary
154
- {
154
+ data = {
155
155
  :basename => basename,
156
156
  :size => size,
157
- :extension => extension,
158
- :snippet => snippet
157
+ :extension => extension
159
158
  }
159
+ if respond_to?(:snippet)
160
+ data[:snippet] = snippet
161
+ end
162
+ data
160
163
  end
161
164
 
162
- # Return a 1024-char snippet from this local file.
163
- #
164
- # @return [String]
165
- def snippet
166
- io.read(1024)
167
- end
168
165
  end
169
166
 
170
167
  # Defines methods for manipulating the contents of a local
@@ -44,7 +44,7 @@ module IMW
44
44
  def summary
45
45
  @summary ||= inputs.map do |input|
46
46
  if input.respond_to?(:summary)
47
- input.summary
47
+ input.summary rescue {}
48
48
  else
49
49
  {}
50
50
  end
@@ -1,14 +1,72 @@
1
1
  module IMW
2
2
  module Tools
3
+
4
+ # A class to encapsulate transferring a resource from one URI to
5
+ # another.
3
6
  class Transferer
4
7
 
5
- attr_accessor :action, :source, :destination
8
+ # The action this Transferer is to take.
9
+ #
10
+ # @return [:cp, :mv]
11
+ attr_reader :action
12
+
13
+ # Set the action of this Transferer.
14
+ #
15
+ # Will raise an error unless +the_action+ is <tt>:cp</tt> or
16
+ # <tt>:mv</tt>.
17
+ #
18
+ # @param [:cp, :mv] the_action
19
+ def action= the_action
20
+ @action = case the_action.to_sym
21
+ when :cp, :copy then :cp
22
+ when :mv, :move, :mv! then :mv
23
+ else raise IMW::ArgumentError.new("action (#{the_action}) must be one of `cp' (or `copy') or `mv' (or `move' or `mv!'")
24
+ end
25
+ end
26
+
27
+ # The source resource.
28
+ #
29
+ # @return [IMW::Resource]
30
+ attr_reader :source
31
+
32
+ # Set the source for this transferer.
33
+ #
34
+ # If +the_source+ is local, will check that it exists and raise
35
+ # an error if not.
36
+ #
37
+ # @param [String, IMW::Resource] the_source
38
+ def source= the_source
39
+ s = IMW.open(the_source)
40
+ s.should_exist!("Cannot #{action_verb}") if s.is_local?
41
+ @source = s
42
+ end
6
43
 
44
+ # The destination resource.
45
+ #
46
+ # @return [IMW::Resource]
47
+ attr_reader :destination
48
+
49
+ # Set the destination for this transferer.
50
+ #
51
+ # If +the_destination+ is local, will check that its parent
52
+ # directory exists and raise an error if not.
53
+ def destination= the_destination
54
+ d = IMW.open(the_destination)
55
+ d.dir.should_exist!("Cannot #{action_verb}") if d.is_local?
56
+ @destination = d
57
+ end
58
+
59
+ # Instantiate a new transferer to take the given +action+ on
60
+ # +source+ and +destination+.
61
+ #
62
+ # @param [:cp, :mv] action the action to take
63
+ # @param [String, IMW::Resource] source
64
+ # @param [String, IMW::Resource] destination
7
65
  def initialize action, source, destination
8
- @action = normalize_action(action)
9
- @source = IMW.open(source)
10
- @destination = IMW.open(destination)
11
- raise IMW::PathError.new("Source and destination have the same URI: #{@source.uri}") if @source.uri.to_s == @destination.uri.to_s
66
+ self.action = action
67
+ self.source = source
68
+ self.destination = destination
69
+ raise IMW::PathError.new("Source and destination have the same URI: #{source}") if self.source.uri.to_s == self.destination.uri.to_s
12
70
  end
13
71
 
14
72
  # Transfer source to destination.
@@ -16,38 +74,31 @@ module IMW
16
74
  # For local transfer, will raise errors unless the necessary
17
75
  # paths exist.
18
76
  def transfer!
19
- if source.is_local?
20
- source.should_exist!("Cannot #{action}")
21
- source_scheme = 'file'
22
- else
23
- source_scheme = source.scheme
24
- end
25
-
26
- if destination.is_local?
27
- destination.dir.should_exist!("Cannot #{action}")
28
- destination_scheme = 'file'
29
- else
30
- destination_scheme = destination.scheme
31
- end
32
-
33
- method = "#{source_scheme}_to_#{destination_scheme}"
34
-
35
- if respond_to?(method)
36
- send(method)
37
- else
38
- raise IMW::NoMethodError.new("Do not know how to #{action} #{source.uri} => #{destination.uri} (#{source_scheme.inspect} => #{destination_scheme.inspect})")
39
- end
77
+ IMW.announce_if_verbose("#{action_gerund.capitalize} #{source} to #{destination}")
78
+ send(transfer_method)
40
79
  destination.reopen
41
80
  end
42
81
 
43
82
  protected
44
83
 
45
- def normalize_action action # :nodoc:
46
- case action.to_sym
47
- when :cp, :copy then :cp
48
- when :mv, :move, :mv! then :mv
49
- else raise IMW::ArgumentError.new("action (#{action}) must be one of `cp' (or `copy') or `mv' (or `move' or `mv!'")
50
- end
84
+ # Return the name of the method that should be used to transfer
85
+ # +source+ to +destination+.
86
+ #
87
+ # @return [String]
88
+ def transfer_method
89
+ source_scheme = source.is_local? ? 'file' : source.scheme
90
+ destination_scheme = destination.is_local? ? 'file' : destination.scheme
91
+ method = "#{source_scheme}_to_#{destination_scheme}"
92
+ raise IMW::NoMethodError.new("Do not know how to #{action_verb} #{source} to #{destination}") unless respond_to?(method)
93
+ method
94
+ end
95
+
96
+ def action_verb # :nodoc
97
+ action == :cp ? "copy" : "move"
98
+ end
99
+
100
+ def action_gerund # :nodoc
101
+ action == :cp ? "copying" : "moving"
51
102
  end
52
103
 
53
104
  #
@@ -0,0 +1,3 @@
1
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
+
3
+
@@ -7,65 +7,77 @@ describe IMW::Tools::Transferer do
7
7
  @nested = IMW.open('new_dir/nested.txt')
8
8
  @nested_dir = IMW.open('new_dir/nested')
9
9
  @local = IMW.open("foobar.txt")
10
+ @dest = IMW.open("barbaz.txt")
10
11
  @http = IMW.open("http://www.google.com")
11
12
  @hdfs = IMW.open("hdfs:///path/to/foobar.txt")
12
13
  @s3 = IMW.open("s3://mybucket/foo/bar")
14
+ IMWTest::Random.file(@local.path)
13
15
  end
14
16
 
15
17
  it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
16
- IMW::Tools::Transferer.new(:cp, @local, @http).should be
17
- IMW::Tools::Transferer.new(:copy, @local, @http).should be
18
- IMW::Tools::Transferer.new(:mv, @local, @http).should be
19
- IMW::Tools::Transferer.new(:move, @local, @http).should be
20
- IMW::Tools::Transferer.new(:mv!, @local, @http).should be
21
- lambda { IMW::Tools::Transferer.new(:foobar, @local, @http) }.should raise_error(IMW::ArgumentError)
18
+ @transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
19
+ @transferer.action = :cp
20
+ @transferer.action = :copy
21
+ @transferer.action = :mv
22
+ @transferer.action = :mv!
23
+ @transferer.action = :move
24
+ lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
22
25
  end
23
26
 
24
27
  it "should raise an error if the source and the destination have the same URI" do
25
28
  lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
26
29
  end
27
30
 
31
+ it "should print a log message when IMW is verbose" do
32
+ IMW.stub!(:verbose).and_return(:true)
33
+ IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
34
+ IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
35
+ end
36
+
28
37
  describe "transfering local files" do
29
38
 
30
39
  before do
31
40
  IMWTest::Random.file @local.path
41
+ @transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
32
42
  end
33
43
 
34
44
  it "should raise an error if the source doesn't exist" do
35
45
  @local.rm!
36
- lambda { IMW::Tools::Transferer.new(:cp, @local, 'barbaz.txt').transfer! }.should raise_error(IMW::PathError)
46
+ lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
37
47
  end
38
48
 
39
49
  it "should raise an error if the directory of the destination doesn't exist" do
40
- lambda { IMW::Tools::Transferer.new(:cp, @local, @nested).transfer! }.should raise_error(IMW::PathError)
50
+ lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
41
51
  end
42
52
 
43
53
  it "can copy a local file" do
44
- IMW::Tools::Transferer.new(:cp, @local, 'barbaz.txt').transfer!
54
+ @transferer.transfer!
45
55
  @local.exist?.should be_true
46
- IMW.open('barbaz.txt').exist?.should be_true
56
+ @dest.exist?.should be_true
47
57
  end
48
58
 
49
59
  it "can copy a local file to a directory" do
50
60
  FileUtils.mkdir(@dir.path)
51
- IMW::Tools::Transferer.new(:cp, @local, @dir).transfer!
61
+ @transferer.destination = @dir
62
+ @transferer.transfer!
52
63
  IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
53
64
  end
54
65
 
55
66
  it "can move a local file" do
56
- IMW::Tools::Transferer.new(:mv, @local, 'barbaz.txt').transfer!
67
+ @transferer.action = :mv
68
+ @transferer.transfer!
57
69
  @local.exist?.should be_false
58
- IMW.open('barbaz.txt').exist?.should be_true
70
+ @dest.exist?.should be_true
59
71
  end
60
72
 
61
73
  it "can move a local file to a directory" do
62
74
  FileUtils.mkdir(@dir.path)
63
- IMW::Tools::Transferer.new(:mv, @local, @dir).transfer!
75
+ @transferer.action = :mv
76
+ @transferer.destination = @dir
77
+ @transferer.transfer!
64
78
  IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
65
79
  @local.exist?.should be_false
66
80
  end
67
-
68
-
69
81
  end
70
82
 
71
83
  describe "transfering local directories" do
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ hash: 25
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 7
10
+ version: 0.2.7
5
11
  platform: ruby
6
12
  authors:
7
13
  - Dhruv Bansal
@@ -10,7 +16,7 @@ autorequire:
10
16
  bindir: bin
11
17
  cert_chain: []
12
18
 
13
- date: 2010-06-15 00:00:00 -05:00
19
+ date: 2010-07-09 00:00:00 -05:00
14
20
  default_executable: imw
15
21
  dependencies: []
16
22
 
@@ -135,6 +141,7 @@ files:
135
141
  - spec/support/paths_matcher.rb
136
142
  - spec/support/random.rb
137
143
  - spec/support/without_regard_to_order_matcher.rb
144
+ - spec/imw/schemes/sql_spec.rb
138
145
  has_rdoc: true
139
146
  homepage: http://github.com/infochimps/imw
140
147
  licenses: []
@@ -145,21 +152,27 @@ rdoc_options:
145
152
  require_paths:
146
153
  - lib
147
154
  required_ruby_version: !ruby/object:Gem::Requirement
155
+ none: false
148
156
  requirements:
149
157
  - - ">="
150
158
  - !ruby/object:Gem::Version
159
+ hash: 3
160
+ segments:
161
+ - 0
151
162
  version: "0"
152
- version:
153
163
  required_rubygems_version: !ruby/object:Gem::Requirement
164
+ none: false
154
165
  requirements:
155
166
  - - ">="
156
167
  - !ruby/object:Gem::Version
168
+ hash: 3
169
+ segments:
170
+ - 0
157
171
  version: "0"
158
- version:
159
172
  requirements: []
160
173
 
161
174
  rubyforge_project:
162
- rubygems_version: 1.3.5
175
+ rubygems_version: 1.3.7
163
176
  signing_key:
164
177
  specification_version: 3
165
178
  summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
@@ -188,6 +201,7 @@ test_files:
188
201
  - spec/imw/archives_spec.rb
189
202
  - spec/imw/schemes/http_spec.rb
190
203
  - spec/imw/schemes/local_spec.rb
204
+ - spec/imw/schemes/sql_spec.rb
191
205
  - spec/imw/schemes/remote_spec.rb
192
206
  - spec/imw/schemes/hdfs_spec.rb
193
207
  - spec/imw/schemes/s3_spec.rb