remote_table 0.2.23 → 0.2.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/remote_table.rb +10 -3
- data/lib/remote_table/file.rb +5 -10
- data/lib/remote_table/package.rb +4 -3
- data/lib/remote_table/request.rb +4 -4
- data/remote_table.gemspec +5 -2
- data/test/remote_table_test.rb +10 -0
- metadata +19 -5
data/Rakefile
CHANGED
@@ -15,6 +15,7 @@ begin
|
|
15
15
|
gem.add_dependency 'activesupport', '>=2.3.4'
|
16
16
|
gem.add_dependency 'slither', '>=0.99.3'
|
17
17
|
gem.add_dependency 'nokogiri', '>=1.4.1'
|
18
|
+
gem.add_dependency 'escape', '>=0.0.4'
|
18
19
|
gem.add_development_dependency 'errata', '>=0.2.0'
|
19
20
|
gem.require_path = "lib"
|
20
21
|
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.24
|
data/lib/remote_table.rb
CHANGED
@@ -12,6 +12,7 @@ require 'active_support/version'
|
|
12
12
|
require active_support_3_requirement
|
13
13
|
end if ActiveSupport::VERSION::MAJOR == 3
|
14
14
|
require 'fastercsv'
|
15
|
+
require 'escape'
|
15
16
|
require 'slither'
|
16
17
|
require 'roo'
|
17
18
|
I_KNOW_I_AM_USING_AN_OLD_AND_BUGGY_VERSION_OF_LIBXML2 = true
|
@@ -58,11 +59,17 @@ class RemoteTable
|
|
58
59
|
|
59
60
|
protected
|
60
61
|
|
62
|
+
def self.bang(path, cmd)
|
63
|
+
tmp_path = "#{path}.tmp"
|
64
|
+
RemoteTable.backtick_with_reporting "cat #{Escape.shell_single_word path} | #{cmd} > #{Escape.shell_single_word tmp_path}"
|
65
|
+
FileUtils.mv tmp_path, path
|
66
|
+
end
|
67
|
+
|
61
68
|
# TODO this should probably live somewhere else
|
62
|
-
def self.backtick_with_reporting(cmd
|
63
|
-
cmd = cmd.gsub
|
69
|
+
def self.backtick_with_reporting(cmd)
|
70
|
+
cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
|
64
71
|
output = `#{cmd}`
|
65
|
-
if
|
72
|
+
if not $?.success?
|
66
73
|
raise %{
|
67
74
|
From the remote_table gem...
|
68
75
|
|
data/lib/remote_table/file.rb
CHANGED
@@ -59,8 +59,7 @@ class RemoteTable
|
|
59
59
|
|
60
60
|
def skip_rows!
|
61
61
|
return unless skip
|
62
|
-
RemoteTable.
|
63
|
-
FileUtils.mv "#{path}.tmp", path
|
62
|
+
RemoteTable.bang path, "tail -n +#{skip + 1}"
|
64
63
|
end
|
65
64
|
|
66
65
|
USELESS_CHARACTERS = [
|
@@ -68,13 +67,11 @@ class RemoteTable
|
|
68
67
|
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
69
68
|
]
|
70
69
|
def remove_useless_characters!
|
71
|
-
RemoteTable.
|
72
|
-
FileUtils.mv "#{path}.tmp", path
|
70
|
+
RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
|
73
71
|
end
|
74
72
|
|
75
73
|
def convert_file_to_utf8!
|
76
|
-
RemoteTable.
|
77
|
-
FileUtils.mv "#{path}.tmp", path
|
74
|
+
RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
|
78
75
|
end
|
79
76
|
|
80
77
|
def restore_file!
|
@@ -83,14 +80,12 @@ class RemoteTable
|
|
83
80
|
|
84
81
|
def cut_columns!
|
85
82
|
return unless cut
|
86
|
-
RemoteTable.
|
87
|
-
FileUtils.mv "#{path}.tmp", path
|
83
|
+
RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut}"
|
88
84
|
end
|
89
85
|
|
90
86
|
def crop_rows!
|
91
87
|
return unless crop
|
92
|
-
RemoteTable.
|
93
|
-
FileUtils.mv "#{path}.tmp", path
|
88
|
+
RemoteTable.bang path "tail -n +#{Escape.shell_single_word crop.first} | head -n #{crop.last - crop.first + 1}"
|
94
89
|
end
|
95
90
|
|
96
91
|
def format_from_filename
|
data/lib/remote_table/package.rb
CHANGED
@@ -28,7 +28,7 @@ class RemoteTable
|
|
28
28
|
return unless compression
|
29
29
|
cmd, args = case compression
|
30
30
|
when :zip, :exe
|
31
|
-
["unzip", "-d #{::File.dirname(path)}"]
|
31
|
+
["unzip", "-d #{Escape.shell_single_word ::File.dirname(path)}"]
|
32
32
|
when :bz2
|
33
33
|
'bunzip2'
|
34
34
|
when :gz
|
@@ -47,8 +47,9 @@ class RemoteTable
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def move_and_process(path, extname, cmd, args)
|
50
|
-
|
51
|
-
|
50
|
+
new_path = "#{path}.#{extname}"
|
51
|
+
FileUtils.mv path, new_path
|
52
|
+
RemoteTable.backtick_with_reporting "#{cmd} #{Escape.shell_single_word new_path} #{args}"
|
52
53
|
end
|
53
54
|
|
54
55
|
# ex. A: 2007-01.csv.gz (compression not capable of storing multiple files)
|
data/lib/remote_table/request.rb
CHANGED
@@ -16,17 +16,17 @@ class RemoteTable
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def download
|
19
|
-
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
20
19
|
if parsed_url.scheme == 'file'
|
21
20
|
parsed_url.path
|
22
21
|
else
|
22
|
+
path = ::File.join staging_dir_path, 'REMOTE_TABLE_PACKAGE'
|
23
23
|
RemoteTable.backtick_with_reporting %{
|
24
24
|
curl
|
25
25
|
--header "Expect: "
|
26
26
|
--location
|
27
|
-
#{"--data
|
28
|
-
|
29
|
-
--output
|
27
|
+
#{"--data #{Escape.shell_single_word form_data}" if form_data.present?}
|
28
|
+
#{Escape.shell_single_word parsed_url.to_s}
|
29
|
+
--output #{Escape.shell_single_word path}
|
30
30
|
2>&1
|
31
31
|
}
|
32
32
|
path
|
data/remote_table.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{remote_table}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.24"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
12
|
-
s.date = %q{2010-05-
|
12
|
+
s.date = %q{2010-05-24}
|
13
13
|
s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
14
14
|
s.email = %q{seamus@abshere.net}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -61,6 +61,7 @@ Gem::Specification.new do |s|
|
|
61
61
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.4"])
|
62
62
|
s.add_runtime_dependency(%q<slither>, [">= 0.99.3"])
|
63
63
|
s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
|
64
|
+
s.add_runtime_dependency(%q<escape>, [">= 0.0.4"])
|
64
65
|
s.add_development_dependency(%q<errata>, [">= 0.2.0"])
|
65
66
|
else
|
66
67
|
s.add_dependency(%q<roo>, ["= 1.3.11"])
|
@@ -68,6 +69,7 @@ Gem::Specification.new do |s|
|
|
68
69
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
69
70
|
s.add_dependency(%q<slither>, [">= 0.99.3"])
|
70
71
|
s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
|
72
|
+
s.add_dependency(%q<escape>, [">= 0.0.4"])
|
71
73
|
s.add_dependency(%q<errata>, [">= 0.2.0"])
|
72
74
|
end
|
73
75
|
else
|
@@ -76,6 +78,7 @@ Gem::Specification.new do |s|
|
|
76
78
|
s.add_dependency(%q<activesupport>, [">= 2.3.4"])
|
77
79
|
s.add_dependency(%q<slither>, [">= 0.99.3"])
|
78
80
|
s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
|
81
|
+
s.add_dependency(%q<escape>, [">= 0.0.4"])
|
79
82
|
s.add_dependency(%q<errata>, [">= 0.2.0"])
|
80
83
|
end
|
81
84
|
end
|
data/test/remote_table_test.rb
CHANGED
@@ -131,6 +131,16 @@ class RemoteTableTest < Test::Unit::TestCase
|
|
131
131
|
end
|
132
132
|
|
133
133
|
if ENV['ALL'] == 'true' or ENV['FAST'] == 'true'
|
134
|
+
should "work on filenames with spaces, using globbing" do
|
135
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
|
136
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
137
|
+
end
|
138
|
+
|
139
|
+
should "work on filenames with spaces" do
|
140
|
+
t = RemoteTable.new :url => 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
|
141
|
+
assert_equal 'ASTON MARTIN', t.rows.first['MFR']
|
142
|
+
end
|
143
|
+
|
134
144
|
should "ignore UTF-8 byte order marks" do
|
135
145
|
t = RemoteTable.new :url => 'http://www.freebase.com/type/exporttypeinstances/base/horses/horse_breed?page=0&filter_mode=type&filter_view=table&show%01p%3D%2Ftype%2Fobject%2Fname%01index=0&show%01p%3D%2Fcommon%2Ftopic%2Fimage%01index=1&show%01p%3D%2Fcommon%2Ftopic%2Farticle%01index=2&sort%01p%3D%2Ftype%2Fobject%2Ftype%01p%3Dlink%01p%3D%2Ftype%2Flink%2Ftimestamp%01index=false&=&exporttype=csv-8'
|
136
146
|
assert_equal 'Tawleed', t.rows.first['name']
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 24
|
9
|
+
version: 0.2.24
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Seamus Abshere
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-05-
|
18
|
+
date: 2010-05-24 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -89,9 +89,23 @@ dependencies:
|
|
89
89
|
type: :runtime
|
90
90
|
version_requirements: *id005
|
91
91
|
- !ruby/object:Gem::Dependency
|
92
|
-
name:
|
92
|
+
name: escape
|
93
93
|
prerelease: false
|
94
94
|
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
segments:
|
99
|
+
- 0
|
100
|
+
- 0
|
101
|
+
- 4
|
102
|
+
version: 0.0.4
|
103
|
+
type: :runtime
|
104
|
+
version_requirements: *id006
|
105
|
+
- !ruby/object:Gem::Dependency
|
106
|
+
name: errata
|
107
|
+
prerelease: false
|
108
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
95
109
|
requirements:
|
96
110
|
- - ">="
|
97
111
|
- !ruby/object:Gem::Version
|
@@ -101,7 +115,7 @@ dependencies:
|
|
101
115
|
- 0
|
102
116
|
version: 0.2.0
|
103
117
|
type: :development
|
104
|
-
version_requirements: *
|
118
|
+
version_requirements: *id007
|
105
119
|
description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
|
106
120
|
email: seamus@abshere.net
|
107
121
|
executables: []
|