imw 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.6
1
+ 0.2.7
data/lib/imw/archives.rb CHANGED
@@ -5,13 +5,13 @@ module IMW
5
5
 
6
6
  # Handlers for archives.
7
7
  HANDLERS = [
8
- ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/ } ],
9
- ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/ } ],
10
- ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/ } ],
11
- ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/ } ],
12
- ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/ } ],
13
- ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/ } ],
14
- ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/ } ]
8
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.bz2$/i } ],
9
+ ["Archives::Tarbz2", Proc.new { |r| r.is_local? && r.path =~ /\.tbz2$/i } ],
10
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tar\.gz$/i } ],
11
+ ["Archives::Targz", Proc.new { |r| r.is_local? && r.path =~ /\.tgz$/i } ],
12
+ ["Archives::Tar", Proc.new { |r| r.is_local? && r.path =~ /\.tar$/i } ],
13
+ ["Archives::Rar", Proc.new { |r| r.is_local? && r.path =~ /\.rar$/i } ],
14
+ ["Archives::Zip", Proc.new { |r| r.is_local? && r.path =~ /\.zip$/i } ]
15
15
  ]
16
16
 
17
17
  autoload :Rar, 'imw/archives/rar'
@@ -37,13 +37,6 @@ module IMW
37
37
  true
38
38
  end
39
39
 
40
- # Archives should not return snippets.
41
- #
42
- # @return [nil]
43
- def snippet
44
- nil
45
- end
46
-
47
40
  # Create an archive of the given +input_paths+.
48
41
  #
49
42
  # The input paths must be strings and will be shell-escaped
@@ -10,9 +10,9 @@ module IMW
10
10
  # well as the IMW::CompressedFiles::Compressible module for
11
11
  # compressing regular files.
12
12
  HANDLERS = [
13
- ["CompressedFiles::Compressible", Proc.new { |r| r.is_local? && r.is_file? && r.path != /\.(bz2|gz|tgz|tbz2)$/ } ],
14
- ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/ && r.path !~ /\.tar\.gz$/ && r.path !~ /\.tgz$/ } ],
15
- ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/ && r.path !~ /\.tar\.bz2$/ && r.path !~ /\.tbz2$/ } ]
13
+ ["CompressedFiles::Compressible", Proc.new { |r| r.is_local? && r.is_file? && r.path != /\.(bz2|gz|tgz|tbz2)$/i } ],
14
+ ["CompressedFiles::Gz", Proc.new { |r| r.is_local? && r.path =~ /\.gz$/i && r.path !~ /\.tar\.gz$/i && r.path !~ /\.tgz$/i } ],
15
+ ["CompressedFiles::Bz2", Proc.new { |r| r.is_local? && r.path =~ /\.bz2$/i && r.path !~ /\.tar\.bz2$/i && r.path !~ /\.tbz2$/i } ]
16
16
  ]
17
17
 
18
18
  # Defines methods for decompressing a compressed file. This
@@ -71,13 +71,6 @@ module IMW
71
71
  IMW.open(decompressed_path)
72
72
  end
73
73
 
74
- # Compressed files should not return snippets.
75
- #
76
- # @return [nil]
77
- def snippet
78
- nil
79
- end
80
-
81
74
  # Decompress this file in its present directory, overwriting any
82
75
  # existing files while keeping the original compressed file.
83
76
  #
data/lib/imw/formats.rb CHANGED
@@ -14,16 +14,16 @@ module IMW
14
14
  # Handlers which augment a resource with data format specific
15
15
  # methods.
16
16
  HANDLERS = [
17
- [ "Formats::Csv", /\.csv$/ ],
18
- [ "Formats::Tsv", /\.tsv$/ ],
19
- [ "Formats::Excel", /\.xslx?$/ ],
20
- [ "Formats::Json", /\.json$/ ],
21
- [ "Formats::Xml", /\.xml$/ ],
22
- [ "Formats::Xsl", /\.xsl$/ ],
23
- [ "Formats::Html", /\.html?$/ ],
24
- [ "Formats::Xhtml", /\.xhtml?$/ ],
25
- [ "Formats::Rdf", /\.rdf?$/ ],
26
- [ "Formats::Yaml", /\.ya?ml$/ ]
17
+ [ "Formats::Csv", /\.csv$/i ],
18
+ [ "Formats::Tsv", /\.tsv$/i ],
19
+ [ "Formats::Excel", /\.xslx?$/i ],
20
+ [ "Formats::Json", /\.json$/i ],
21
+ [ "Formats::Xml", /\.xml$/i ],
22
+ [ "Formats::Xsl", /\.xsl$/i ],
23
+ [ "Formats::Html", /\.html?$/i ],
24
+ [ "Formats::Xhtml", /\.xhtml?$/i ],
25
+ [ "Formats::Rdf", /\.rdf?$/i ],
26
+ [ "Formats::Yaml", /\.ya?ml$/i ]
27
27
  ]
28
28
  end
29
29
  end
data/lib/imw/resource.rb CHANGED
@@ -74,7 +74,14 @@ module IMW
74
74
  # accepts all the same arguments as IMW::Resource.new.
75
75
  class Resource
76
76
 
77
- attr_reader :uri, :mode
77
+ # The URI object associated with this resource.
78
+ attr_reader :uri
79
+
80
+ # The mode in which to access this resource.
81
+ attr_accessor :mode
82
+
83
+ # A copy of the options passed to this resource on initialization.
84
+ attr_accessor :resource_options
78
85
 
79
86
  # Create a new resource representing +uri+.
80
87
  #
@@ -102,8 +109,9 @@ module IMW
102
109
  # @option options [String] mode the mode to open the resource in (will be ignored when inapplicable)
103
110
  # @return [IMW::Resource]
104
111
  def initialize uri, options={}
105
- self.uri = uri
106
- @mode = options[:mode] || 'r'
112
+ self.uri = uri
113
+ self.resource_options = options
114
+ self.mode = options[:mode] || 'r'
107
115
  extend_appropriately!(options) unless options[:no_modules]
108
116
  end
109
117
 
@@ -192,6 +200,13 @@ module IMW
192
200
  @name ||= extname ? basename[0,basename.length - extname.length] : basename
193
201
  end
194
202
 
203
+ # Returns the user associated with the host of this URI.
204
+ #
205
+ # @return [String]
206
+ def user
207
+ @user ||= uri.user
208
+ end
209
+
195
210
  def to_s
196
211
  uri.to_s
197
212
  end
data/lib/imw/schemes.rb CHANGED
@@ -6,14 +6,14 @@ module IMW
6
6
  autoload :HTTP, 'imw/schemes/http'
7
7
  autoload :HTTPS, 'imw/schemes/http'
8
8
  autoload :HDFS, 'imw/schemes/hdfs'
9
-
9
+
10
10
  HANDLERS = [
11
11
  ["Schemes::Local::Base", Proc.new { |resource| resource.scheme == 'file' || resource.scheme.blank? } ],
12
12
  ["Schemes::Remote::Base", Proc.new { |resource| resource.scheme != 'file' && resource.scheme.present? } ],
13
- ["Schemes::S3", %r{^s3://} ],
14
- ["Schemes::HTTP", %r{^http://} ],
15
- ["Schemes::HTTPS", %r{^https://} ],
16
- ["Schemes::HDFS", %r{^hdfs://} ]
13
+ ["Schemes::S3", %r{^s3://}i ],
14
+ ["Schemes::HTTP", %r{^http://}i ],
15
+ ["Schemes::HTTPS", %r{^https://}i ],
16
+ ["Schemes::HDFS", %r{^hdfs://}i ]
17
17
  ]
18
18
  end
19
19
  end
@@ -151,20 +151,17 @@ module IMW
151
151
  # - extension
152
152
  # - snippet
153
153
  def summary
154
- {
154
+ data = {
155
155
  :basename => basename,
156
156
  :size => size,
157
- :extension => extension,
158
- :snippet => snippet
157
+ :extension => extension
159
158
  }
159
+ if respond_to?(:snippet)
160
+ data[:snippet] = snippet
161
+ end
162
+ data
160
163
  end
161
164
 
162
- # Return a 1024-char snippet from this local file.
163
- #
164
- # @return [String]
165
- def snippet
166
- io.read(1024)
167
- end
168
165
  end
169
166
 
170
167
  # Defines methods for manipulating the contents of a local
@@ -44,7 +44,7 @@ module IMW
44
44
  def summary
45
45
  @summary ||= inputs.map do |input|
46
46
  if input.respond_to?(:summary)
47
- input.summary
47
+ input.summary rescue {}
48
48
  else
49
49
  {}
50
50
  end
@@ -1,14 +1,72 @@
1
1
  module IMW
2
2
  module Tools
3
+
4
+ # A class to encapsulate transferring a resource from one URI to
5
+ # another.
3
6
  class Transferer
4
7
 
5
- attr_accessor :action, :source, :destination
8
+ # The action this Transferer is to take.
9
+ #
10
+ # @return [:cp, :mv]
11
+ attr_reader :action
12
+
13
+ # Set the action of this Transferer.
14
+ #
15
+ # Will raise an error unless +the_action+ is <tt>:cp</tt> or
16
+ # <tt>:mv</tt>.
17
+ #
18
+ # @param [:cp, :mv] the_action
19
+ def action= the_action
20
+ @action = case the_action.to_sym
21
+ when :cp, :copy then :cp
22
+ when :mv, :move, :mv! then :mv
23
+ else raise IMW::ArgumentError.new("action (#{the_action}) must be one of `cp' (or `copy') or `mv' (or `move' or `mv!'")
24
+ end
25
+ end
26
+
27
+ # The source resource.
28
+ #
29
+ # @return [IMW::Resource]
30
+ attr_reader :source
31
+
32
+ # Set the source for this transferer.
33
+ #
34
+ # If +the_source+ is local, will check that it exists and raise
35
+ # an error if not.
36
+ #
37
+ # @param [String, IMW::Resource] the_source
38
+ def source= the_source
39
+ s = IMW.open(the_source)
40
+ s.should_exist!("Cannot #{action_verb}") if s.is_local?
41
+ @source = s
42
+ end
6
43
 
44
+ # The destination resource.
45
+ #
46
+ # @return [IMW::Resource]
47
+ attr_reader :destination
48
+
49
+ # Set the destination for this transferer.
50
+ #
51
+ # If +the_destination+ is local, will check that its parent
52
+ # directory exists and raise an error if not.
53
+ def destination= the_destination
54
+ d = IMW.open(the_destination)
55
+ d.dir.should_exist!("Cannot #{action_verb}") if d.is_local?
56
+ @destination = d
57
+ end
58
+
59
+ # Instantiate a new transferer to take the given +action+ on
60
+ # +source+ and +destination+.
61
+ #
62
+ # @param [:cp, :mv] action the action to take
63
+ # @param [String, IMW::Resource] source
64
+ # @param [String, IMW::Resource] destination
7
65
  def initialize action, source, destination
8
- @action = normalize_action(action)
9
- @source = IMW.open(source)
10
- @destination = IMW.open(destination)
11
- raise IMW::PathError.new("Source and destination have the same URI: #{@source.uri}") if @source.uri.to_s == @destination.uri.to_s
66
+ self.action = action
67
+ self.source = source
68
+ self.destination = destination
69
+ raise IMW::PathError.new("Source and destination have the same URI: #{source}") if self.source.uri.to_s == self.destination.uri.to_s
12
70
  end
13
71
 
14
72
  # Transfer source to destination.
@@ -16,38 +74,31 @@ module IMW
16
74
  # For local transfer, will raise errors unless the necessary
17
75
  # paths exist.
18
76
  def transfer!
19
- if source.is_local?
20
- source.should_exist!("Cannot #{action}")
21
- source_scheme = 'file'
22
- else
23
- source_scheme = source.scheme
24
- end
25
-
26
- if destination.is_local?
27
- destination.dir.should_exist!("Cannot #{action}")
28
- destination_scheme = 'file'
29
- else
30
- destination_scheme = destination.scheme
31
- end
32
-
33
- method = "#{source_scheme}_to_#{destination_scheme}"
34
-
35
- if respond_to?(method)
36
- send(method)
37
- else
38
- raise IMW::NoMethodError.new("Do not know how to #{action} #{source.uri} => #{destination.uri} (#{source_scheme.inspect} => #{destination_scheme.inspect})")
39
- end
77
+ IMW.announce_if_verbose("#{action_gerund.capitalize} #{source} to #{destination}")
78
+ send(transfer_method)
40
79
  destination.reopen
41
80
  end
42
81
 
43
82
  protected
44
83
 
45
- def normalize_action action # :nodoc:
46
- case action.to_sym
47
- when :cp, :copy then :cp
48
- when :mv, :move, :mv! then :mv
49
- else raise IMW::ArgumentError.new("action (#{action}) must be one of `cp' (or `copy') or `mv' (or `move' or `mv!'")
50
- end
84
+ # Return the name of the method that should be used to transfer
85
+ # +source+ to +destination+.
86
+ #
87
+ # @return [String]
88
+ def transfer_method
89
+ source_scheme = source.is_local? ? 'file' : source.scheme
90
+ destination_scheme = destination.is_local? ? 'file' : destination.scheme
91
+ method = "#{source_scheme}_to_#{destination_scheme}"
92
+ raise IMW::NoMethodError.new("Do not know how to #{action_verb} #{source} to #{destination}") unless respond_to?(method)
93
+ method
94
+ end
95
+
96
+ def action_verb # :nodoc
97
+ action == :cp ? "copy" : "move"
98
+ end
99
+
100
+ def action_gerund # :nodoc
101
+ action == :cp ? "copying" : "moving"
51
102
  end
52
103
 
53
104
  #
@@ -0,0 +1,3 @@
1
+ require File.join(File.dirname(__FILE__),'../../spec_helper')
2
+
3
+
@@ -7,65 +7,77 @@ describe IMW::Tools::Transferer do
7
7
  @nested = IMW.open('new_dir/nested.txt')
8
8
  @nested_dir = IMW.open('new_dir/nested')
9
9
  @local = IMW.open("foobar.txt")
10
+ @dest = IMW.open("barbaz.txt")
10
11
  @http = IMW.open("http://www.google.com")
11
12
  @hdfs = IMW.open("hdfs:///path/to/foobar.txt")
12
13
  @s3 = IMW.open("s3://mybucket/foo/bar")
14
+ IMWTest::Random.file(@local.path)
13
15
  end
14
16
 
15
17
  it "should raise an error unless the action is one of :cp, :copy, :mv :move, or :mv!" do
16
- IMW::Tools::Transferer.new(:cp, @local, @http).should be
17
- IMW::Tools::Transferer.new(:copy, @local, @http).should be
18
- IMW::Tools::Transferer.new(:mv, @local, @http).should be
19
- IMW::Tools::Transferer.new(:move, @local, @http).should be
20
- IMW::Tools::Transferer.new(:mv!, @local, @http).should be
21
- lambda { IMW::Tools::Transferer.new(:foobar, @local, @http) }.should raise_error(IMW::ArgumentError)
18
+ @transferer = IMW::Tools::Transferer.new(:cp, @local, @http)
19
+ @transferer.action = :cp
20
+ @transferer.action = :copy
21
+ @transferer.action = :mv
22
+ @transferer.action = :mv!
23
+ @transferer.action = :move
24
+ lambda { @transferer.action = :foobar }.should raise_error(IMW::ArgumentError)
22
25
  end
23
26
 
24
27
  it "should raise an error if the source and the destination have the same URI" do
25
28
  lambda { IMW::Tools::Transferer.new(:cp, @local, @local) }.should raise_error(IMW::PathError)
26
29
  end
27
30
 
31
+ it "should print a log message when IMW is verbose" do
32
+ IMW.stub!(:verbose).and_return(:true)
33
+ IMW.should_receive(:announce_if_verbose).with("Copying #{@local} to #{@dest}")
34
+ IMW::Tools::Transferer.new(:cp, @local, @dest).transfer!
35
+ end
36
+
28
37
  describe "transfering local files" do
29
38
 
30
39
  before do
31
40
  IMWTest::Random.file @local.path
41
+ @transferer = IMW::Tools::Transferer.new(:cp, @local, @dest)
32
42
  end
33
43
 
34
44
  it "should raise an error if the source doesn't exist" do
35
45
  @local.rm!
36
- lambda { IMW::Tools::Transferer.new(:cp, @local, 'barbaz.txt').transfer! }.should raise_error(IMW::PathError)
46
+ lambda { @transferer.source = @local }.should raise_error(IMW::PathError)
37
47
  end
38
48
 
39
49
  it "should raise an error if the directory of the destination doesn't exist" do
40
- lambda { IMW::Tools::Transferer.new(:cp, @local, @nested).transfer! }.should raise_error(IMW::PathError)
50
+ lambda { @transferer.destination = @nested }.should raise_error(IMW::PathError)
41
51
  end
42
52
 
43
53
  it "can copy a local file" do
44
- IMW::Tools::Transferer.new(:cp, @local, 'barbaz.txt').transfer!
54
+ @transferer.transfer!
45
55
  @local.exist?.should be_true
46
- IMW.open('barbaz.txt').exist?.should be_true
56
+ @dest.exist?.should be_true
47
57
  end
48
58
 
49
59
  it "can copy a local file to a directory" do
50
60
  FileUtils.mkdir(@dir.path)
51
- IMW::Tools::Transferer.new(:cp, @local, @dir).transfer!
61
+ @transferer.destination = @dir
62
+ @transferer.transfer!
52
63
  IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
53
64
  end
54
65
 
55
66
  it "can move a local file" do
56
- IMW::Tools::Transferer.new(:mv, @local, 'barbaz.txt').transfer!
67
+ @transferer.action = :mv
68
+ @transferer.transfer!
57
69
  @local.exist?.should be_false
58
- IMW.open('barbaz.txt').exist?.should be_true
70
+ @dest.exist?.should be_true
59
71
  end
60
72
 
61
73
  it "can move a local file to a directory" do
62
74
  FileUtils.mkdir(@dir.path)
63
- IMW::Tools::Transferer.new(:mv, @local, @dir).transfer!
75
+ @transferer.action = :mv
76
+ @transferer.destination = @dir
77
+ @transferer.transfer!
64
78
  IMW.open(File.join(@dir.path, @local.basename)).exist?.should be_true
65
79
  @local.exist?.should be_false
66
80
  end
67
-
68
-
69
81
  end
70
82
 
71
83
  describe "transfering local directories" do
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ hash: 25
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 7
10
+ version: 0.2.7
5
11
  platform: ruby
6
12
  authors:
7
13
  - Dhruv Bansal
@@ -10,7 +16,7 @@ autorequire:
10
16
  bindir: bin
11
17
  cert_chain: []
12
18
 
13
- date: 2010-06-15 00:00:00 -05:00
19
+ date: 2010-07-09 00:00:00 -05:00
14
20
  default_executable: imw
15
21
  dependencies: []
16
22
 
@@ -135,6 +141,7 @@ files:
135
141
  - spec/support/paths_matcher.rb
136
142
  - spec/support/random.rb
137
143
  - spec/support/without_regard_to_order_matcher.rb
144
+ - spec/imw/schemes/sql_spec.rb
138
145
  has_rdoc: true
139
146
  homepage: http://github.com/infochimps/imw
140
147
  licenses: []
@@ -145,21 +152,27 @@ rdoc_options:
145
152
  require_paths:
146
153
  - lib
147
154
  required_ruby_version: !ruby/object:Gem::Requirement
155
+ none: false
148
156
  requirements:
149
157
  - - ">="
150
158
  - !ruby/object:Gem::Version
159
+ hash: 3
160
+ segments:
161
+ - 0
151
162
  version: "0"
152
- version:
153
163
  required_rubygems_version: !ruby/object:Gem::Requirement
164
+ none: false
154
165
  requirements:
155
166
  - - ">="
156
167
  - !ruby/object:Gem::Version
168
+ hash: 3
169
+ segments:
170
+ - 0
157
171
  version: "0"
158
- version:
159
172
  requirements: []
160
173
 
161
174
  rubyforge_project:
162
- rubygems_version: 1.3.5
175
+ rubygems_version: 1.3.7
163
176
  signing_key:
164
177
  specification_version: 3
165
178
  summary: The Infinite Monkeywrench (IMW) makes acquiring, extracting, transforming, loading, and packaging data easy.
@@ -188,6 +201,7 @@ test_files:
188
201
  - spec/imw/archives_spec.rb
189
202
  - spec/imw/schemes/http_spec.rb
190
203
  - spec/imw/schemes/local_spec.rb
204
+ - spec/imw/schemes/sql_spec.rb
191
205
  - spec/imw/schemes/remote_spec.rb
192
206
  - spec/imw/schemes/hdfs_spec.rb
193
207
  - spec/imw/schemes/s3_spec.rb