remote_table 0.2.23 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -15,6 +15,7 @@ begin
15
15
  gem.add_dependency 'activesupport', '>=2.3.4'
16
16
  gem.add_dependency 'slither', '>=0.99.3'
17
17
  gem.add_dependency 'nokogiri', '>=1.4.1'
18
+ gem.add_dependency 'escape', '>=0.0.4'
18
19
  gem.add_development_dependency 'errata', '>=0.2.0'
19
20
  gem.require_path = "lib"
20
21
  gem.rdoc_options << '--line-numbers' << '--inline-source'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.23
1
+ 0.2.24
data/lib/remote_table.rb CHANGED
@@ -12,6 +12,7 @@ require 'active_support/version'
12
12
  require active_support_3_requirement
13
13
  end if ActiveSupport::VERSION::MAJOR == 3
14
14
  require 'fastercsv'
15
+ require 'escape'
15
16
  require 'slither'
16
17
  require 'roo'
17
18
  I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true
@@ -58,11 +59,17 @@ class RemoteTable
58
59
 
59
60
  protected
60
61
 
62
+ def self.bang(path, cmd)
63
+ tmp_path = "#{path}.tmp"
64
+ RemoteTable.backtick_with_reporting "cat #{Escape.shell_single_word path} | #{cmd} > #{Escape.shell_single_word tmp_path}"
65
+ FileUtils.mv tmp_path, path
66
+ end
67
+
61
68
  # TODO this should probably live somewhere else
62
- def self.backtick_with_reporting(cmd, raise_on_error = true)
63
- cmd = cmd.gsub /\s+/m, ' '
69
+ def self.backtick_with_reporting(cmd)
70
+ cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
64
71
  output = `#{cmd}`
65
- if raise_on_error and not $?.success?
72
+ if not $?.success?
66
73
  raise %{
67
74
  From the remote_table gem...
68
75
 
@@ -59,8 +59,7 @@ class RemoteTable
59
59
 
60
60
  def skip_rows!
61
61
  return unless skip
62
- RemoteTable.backtick_with_reporting "cat #{path} | tail -n +#{skip + 1} > #{path}.tmp"
63
- FileUtils.mv "#{path}.tmp", path
62
+ RemoteTable.bang path, "tail -n +#{skip + 1}"
64
63
  end
65
64
 
66
65
  USELESS_CHARACTERS = [
@@ -68,13 +67,11 @@ class RemoteTable
68
67
  '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
69
68
  ]
70
69
  def remove_useless_characters!
71
- RemoteTable.backtick_with_reporting "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g' #{path} > #{path}.tmp"
72
- FileUtils.mv "#{path}.tmp", path
70
+ RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
73
71
  end
74
72
 
75
73
  def convert_file_to_utf8!
76
- RemoteTable.backtick_with_reporting "iconv -c -f #{encoding} -t UTF-8 #{path} > #{path}.tmp"
77
- FileUtils.mv "#{path}.tmp", path
74
+ RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
78
75
  end
79
76
 
80
77
  def restore_file!
@@ -83,14 +80,12 @@ class RemoteTable
83
80
 
84
81
  def cut_columns!
85
82
  return unless cut
86
- RemoteTable.backtick_with_reporting "cat #{path} | cut -c #{cut} > #{path}.tmp"
87
- FileUtils.mv "#{path}.tmp", path
83
+ RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut}"
88
84
  end
89
85
 
90
86
  def crop_rows!
91
87
  return unless crop
92
- RemoteTable.backtick_with_reporting "cat #{path} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}.tmp"
93
- FileUtils.mv "#{path}.tmp", path
88
+ RemoteTable.bang path "tail -n +#{Escape.shell_single_word crop.first} | head -n #{crop.last - crop.first + 1}"
94
89
  end
95
90
 
96
91
  def format_from_filename
@@ -28,7 +28,7 @@ class RemoteTable
28
28
  return unless compression
29
29
  cmd, args = case compression
30
30
  when :zip, :exe
31
- ["unzip", "-d #{::File.dirname(path)}"]
31
+ ["unzip", "-d #{Escape.shell_single_word ::File.dirname(path)}"]
32
32
  when :bz2
33
33
  'bunzip2'
34
34
  when :gz
@@ -47,8 +47,9 @@ class RemoteTable
47
47
  end
48
48
 
49
49
  def move_and_process(path, extname, cmd, args)
50
- FileUtils.mv path, "#{path}.#{extname}"
51
- RemoteTable.backtick_with_reporting "#{cmd} #{path}.#{extname} #{args}"
50
+ new_path = "#{path}.#{extname}"
51
+ FileUtils.mv path, new_path
52
+ RemoteTable.backtick_with_reporting "#{cmd} #{Escape.shell_single_word new_path} #{args}"
52
53
  end
53
54
 
54
55
  # ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
@@ -16,17 +16,17 @@ class RemoteTable
16
16
  end
17
17
 
18
18
  def download
19
- path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
20
19
  if parsed_url.scheme == 'file'
21
20
  parsed_url.path
22
21
  else
22
+ path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
23
23
  RemoteTable.backtick_with_reporting %{
24
24
  curl
25
25
  --header "Expect: "
26
26
  --location
27
- #{"--data \"#{form_data}\"" if form_data.present?}
28
- "#{parsed_url}"
29
- --output "#{path}"
27
+ #{"--data #{Escape.shell_single_word form_data}" if form_data.present?}
28
+ #{Escape.shell_single_word parsed_url.to_s}
29
+ --output #{Escape.shell_single_word path}
30
30
  2>&1
31
31
  }
32
32
  path
data/remote_table.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{remote_table}
8
- s.version = "0.2.23"
8
+ s.version = "0.2.24"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-05-21}
12
+ s.date = %q{2010-05-24}
13
13
  s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
@@ -61,6 +61,7 @@ Gem::Specification.new do |s|
61
61
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
62
62
  s.add_runtime_dependency(%q<slither>, [">= 0.99.3"])
63
63
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
64
+ s.add_runtime_dependency(%q<escape>, [">= 0.0.4"])
64
65
  s.add_development_dependency(%q<errata>, [">= 0.2.0"])
65
66
  else
66
67
  s.add_dependency(%q<roo>, ["= 1.3.11"])
@@ -68,6 +69,7 @@ Gem::Specification.new do |s|
68
69
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
69
70
  s.add_dependency(%q<slither>, [">= 0.99.3"])
70
71
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
72
+ s.add_dependency(%q<escape>, [">= 0.0.4"])
71
73
  s.add_dependency(%q<errata>, [">= 0.2.0"])
72
74
  end
73
75
  else
@@ -76,6 +78,7 @@ Gem::Specification.new do |s|
76
78
  s.add_dependency(%q<activesupport>, [">= 2.3.4"])
77
79
  s.add_dependency(%q<slither>, [">= 0.99.3"])
78
80
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
81
+ s.add_dependency(%q<escape>, [">= 0.0.4"])
79
82
  s.add_dependency(%q<errata>, [">= 0.2.0"])
80
83
  end
81
84
  end
@@ -131,6 +131,16 @@ class RemoteTableTest < Test::Unit::TestCase
131
131
  end
132
132
 
133
133
  if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
134
+ should "work on filenames with spaces, using globbing" do
135
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
136
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
137
+ end
138
+
139
+ should "work on filenames with spaces" do
140
+ t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
141
+ assert_equal 'ASTON MARTIN', t.rows.first['MFR']
142
+ end
143
+
134
144
  should "ignore UTF-8 byte order marks" do
135
145
  t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
136
146
  assert_equal 'Tawleed', t.rows.first['name']
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 23
9
- version: 0.2.23
8
+ - 24
9
+ version: 0.2.24
10
10
  platform: ruby
11
11
  authors:
12
12
  - Seamus Abshere
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-05-21 00:00:00 -04:00
18
+ date: 2010-05-24 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -89,9 +89,23 @@ dependencies:
89
89
  type: :runtime
90
90
  version_requirements: *id005
91
91
  - !ruby/object:Gem::Dependency
92
- name: errata
92
+ name: escape
93
93
  prerelease: false
94
94
  requirement: &id006 !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ segments:
99
+ - 0
100
+ - 0
101
+ - 4
102
+ version: 0.0.4
103
+ type: :runtime
104
+ version_requirements: *id006
105
+ - !ruby/object:Gem::Dependency
106
+ name: errata
107
+ prerelease: false
108
+ requirement: &id007 !ruby/object:Gem::Requirement
95
109
  requirements:
96
110
  - - ">="
97
111
  - !ruby/object:Gem::Version
@@ -101,7 +115,7 @@ dependencies:
101
115
  - 0
102
116
  version: 0.2.0
103
117
  type: :development
104
- version_requirements: *id006
118
+ version_requirements: *id007
105
119
  description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
106
120
  email: seamus@abshere.net
107
121
  executables: []