remote_table 0.2.23 → 0.2.24
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/remote_table.rb +10 -3
- data/lib/remote_table/file.rb +5 -10
- data/lib/remote_table/package.rb +4 -3
- data/lib/remote_table/request.rb +4 -4
- data/remote_table.gemspec +5 -2
- data/test/remote_table_test.rb +10 -0
- metadata +19 -5
data/Rakefile
CHANGED
@@ -15,6 +15,7 @@ begin
|
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
16
16
|
gem.add_dependency 'slither', '>=0.99.3'
|
17
17
|
gem.add_dependency 'nokogiri', '>=1.4.1'
|
18
|
+
gem.add_dependency 'escape', '>=0.0.4'
|
18
19
|
gem.add_development_dependency 'errata', '>=0.2.0'
|
19
20
|
gem.require_path = "lib"
|
20
21
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.24
|
data/lib/remote_table.rb
CHANGED
@@ -12,6 +12,7 @@ require 'active_support/version'
|
|
12
12
|
require active_support_3_requirement
|
13
13
|
end if ActiveSupport::VERSION::MAJOR == 3
|
14
14
|
require 'fastercsv'
|
15
|
+
require 'escape'
|
15
16
|
require 'slither'
|
16
17
|
require 'roo'
|
17
18
|
I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true
|
@@ -58,11 +59,17 @@ class RemoteTable
|
|
58
59
|
|
59
60
|
protected
|
60
61
|
|
62
|
+
def self.bang(path, cmd)
|
63
|
+
tmp_path = "#{path}.tmp"
|
64
|
+
RemoteTable.backtick_with_reporting "cat #{Escape.shell_single_word path} | #{cmd} > #{Escape.shell_single_word tmp_path}"
|
65
|
+
FileUtils.mv tmp_path, path
|
66
|
+
end
|
67
|
+
|
61
68
|
# TODO this should probably live somewhere else
|
62
|
-
def self.backtick_with_reporting(cmd
|
63
|
-
cmd = cmd.gsub
|
69
|
+
def self.backtick_with_reporting(cmd)
|
70
|
+
cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
|
64
71
|
output = `#{cmd}`
|
65
|
-
if
|
72
|
+
if not $?.success?
|
66
73
|
raise %{
|
67
74
|
From the remote_table gem...
|
68
75
|
|
data/lib/remote_table/file.rb
CHANGED
@@ -59,8 +59,7 @@ class RemoteTable
|
|
59
59
|
|
60
60
|
def skip_rows!
|
61
61
|
return unless skip
|
62
|
-
RemoteTable.
|
63
|
-
FileUtils.mv "#{path}.tmp", path
|
62
|
+
RemoteTable.bang path, "tail -n +#{skip + 1}"
|
64
63
|
end
|
65
64
|
|
66
65
|
USELESS_CHARACTERS = [
|
@@ -68,13 +67,11 @@ class RemoteTable
|
|
68
67
|
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
69
68
|
]
|
70
69
|
def remove_useless_characters!
|
71
|
-
RemoteTable.
|
72
|
-
FileUtils.mv "#{path}.tmp", path
|
70
|
+
RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
|
73
71
|
end
|
74
72
|
|
75
73
|
def convert_file_to_utf8!
|
76
|
-
RemoteTable.
|
77
|
-
FileUtils.mv "#{path}.tmp", path
|
74
|
+
RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
|
78
75
|
end
|
79
76
|
|
80
77
|
def restore_file!
|
@@ -83,14 +80,12 @@ class RemoteTable
|
|
83
80
|
|
84
81
|
def cut_columns!
|
85
82
|
return unless cut
|
86
|
-
RemoteTable.
|
87
|
-
FileUtils.mv "#{path}.tmp", path
|
83
|
+
RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut}"
|
88
84
|
end
|
89
85
|
|
90
86
|
def crop_rows!
|
91
87
|
return unless crop
|
92
|
-
RemoteTable.
|
93
|
-
FileUtils.mv "#{path}.tmp", path
|
88
|
+
RemoteTable.bang path "tail -n +#{Escape.shell_single_word crop.first} | head -n #{crop.last - crop.first + 1}"
|
94
89
|
end
|
95
90
|
|
96
91
|
def format_from_filename
|
data/lib/remote_table/package.rb
CHANGED
@@ -28,7 +28,7 @@ class RemoteTable
|
|
28
28
|
return unless compression
|
29
29
|
cmd, args = case compression
|
30
30
|
when :zip, :exe
|
31
|
-
["unzip", "-d #{::File.dirname(path)}"]
|
31
|
+
["unzip", "-d #{Escape.shell_single_word ::File.dirname(path)}"]
|
32
32
|
when :bz2
|
33
33
|
'bunzip2'
|
34
34
|
when :gz
|
@@ -47,8 +47,9 @@ class RemoteTable
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def move_and_process(path, extname, cmd, args)
|
50
|
-
|
51
|
-
|
50
|
+
new_path = "#{path}.#{extname}"
|
51
|
+
FileUtils.mv path, new_path
|
52
|
+
RemoteTable.backtick_with_reporting "#{cmd} #{Escape.shell_single_word new_path} #{args}"
|
52
53
|
end
|
53
54
|
|
54
55
|
# ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
|
data/lib/remote_table/request.rb
CHANGED
@@ -16,17 +16,17 @@ class RemoteTable
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def download
|
19
|
-
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
20
19
|
if parsed_url.scheme == 'file'
|
21
20
|
parsed_url.path
|
22
21
|
else
|
22
|
+
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
23
23
|
RemoteTable.backtick_with_reporting %{
|
24
24
|
curl
|
25
25
|
--header "Expect: "
|
26
26
|
--location
|
27
|
-
#{"--data
|
28
|
-
|
29
|
-
--output
|
27
|
+
#{"--data #{Escape.shell_single_word form_data}" if form_data.present?}
|
28
|
+
#{Escape.shell_single_word parsed_url.to_s}
|
29
|
+
--output #{Escape.shell_single_word path}
|
30
30
|
2>&1
|
31
31
|
}
|
32
32
|
path
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.24"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-24}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -61,6 +61,7 @@ Gem::Specification.new do |s|
|
|
61
61
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
62
62
|
s.add_runtime_dependency(%q<slither>, [">= 0.99.3"])
|
63
63
|
s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
|
64
|
+
s.add_runtime_dependency(%q<escape>, [">= 0.0.4"])
|
64
65
|
s.add_development_dependency(%q<errata>, [">= 0.2.0"])
|
65
66
|
else
|
66
67
|
s.add_dependency(%q<roo>, ["= 1.3.11"])
|
@@ -68,6 +69,7 @@ Gem::Specification.new do |s|
|
|
68
69
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
69
70
|
s.add_dependency(%q<slither>, [">= 0.99.3"])
|
70
71
|
s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
|
72
|
+
s.add_dependency(%q<escape>, [">= 0.0.4"])
|
71
73
|
s.add_dependency(%q<errata>, [">= 0.2.0"])
|
72
74
|
end
|
73
75
|
else
|
@@ -76,6 +78,7 @@ Gem::Specification.new do |s|
|
|
76
78
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
77
79
|
s.add_dependency(%q<slither>, [">= 0.99.3"])
|
78
80
|
s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
|
81
|
+
s.add_dependency(%q<escape>, [">= 0.0.4"])
|
79
82
|
s.add_dependency(%q<errata>, [">= 0.2.0"])
|
80
83
|
end
|
81
84
|
end
|
data/test/remote_table_test.rb
CHANGED
@@ -131,6 +131,16 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
131
131
|
end
|
132
132
|
|
133
133
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
134
|
+
should "work on filenames with spaces, using globbing" do
|
135
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
136
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
137
|
+
end
|
138
|
+
|
139
|
+
should "work on filenames with spaces" do
|
140
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
141
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
142
|
+
end
|
143
|
+
|
134
144
|
should "ignore UTF-8 byte order marks" do
|
135
145
|
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
136
146
|
assert_equal 'Tawleed', t.rows.first['name']
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 24
|
9
|
+
version: 0.2.24
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-24 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -89,9 +89,23 @@ dependencies:
|
|
89
89
|
type: :runtime
|
90
90
|
version_requirements: *id005
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
|
-
name:
|
92
|
+
name: escape
|
93
93
|
prerelease: false
|
94
94
|
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
segments:
|
99
|
+
- 0
|
100
|
+
- 0
|
101
|
+
- 4
|
102
|
+
version: 0.0.4
|
103
|
+
type: :runtime
|
104
|
+
version_requirements: *id006
|
105
|
+
- !ruby/object:Gem::Dependency
|
106
|
+
name: errata
|
107
|
+
prerelease: false
|
108
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
95
109
|
requirements:
|
96
110
|
- - ">="
|
97
111
|
- !ruby/object:Gem::Version
|
@@ -101,7 +115,7 @@ dependencies:
|
|
101
115
|
- 0
|
102
116
|
version: 0.2.0
|
103
117
|
type: :development
|
104
|
-
version_requirements: *
|
118
|
+
version_requirements: *id007
|
105
119
|
description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
|
106
120
|
email: seamus@abshere.net
|
107
121
|
executables: []
|