remote_table 0.2.32 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +65 -0
- data/LICENSE +1 -1
- data/README.rdoc +21 -7
- data/Rakefile +12 -61
- data/lib/remote_table/cleaner.rb +19 -0
- data/lib/remote_table/executor.rb +29 -0
- data/lib/remote_table/format/delimited.rb +62 -0
- data/lib/remote_table/format/excel.rb +10 -0
- data/lib/remote_table/format/excelx.rb +10 -0
- data/lib/remote_table/format/fixed_width.rb +47 -0
- data/lib/remote_table/format/html.rb +43 -0
- data/lib/remote_table/format/mixins/rooable.rb +47 -0
- data/lib/remote_table/format/mixins/textual.rb +34 -0
- data/lib/remote_table/format/open_office.rb +10 -0
- data/lib/remote_table/format.rb +35 -0
- data/lib/remote_table/hasher.rb +25 -0
- data/lib/remote_table/local_file.rb +92 -0
- data/lib/remote_table/properties.rb +209 -0
- data/lib/remote_table/transformer.rb +17 -0
- data/lib/remote_table/version.rb +3 -0
- data/lib/remote_table.rb +91 -99
- data/remote_table.gemspec +32 -77
- data/test/{test_helper.rb → helper.rb} +9 -2
- data/test/test_big.rb +61 -0
- data/test/test_errata.rb +46 -0
- data/test/test_old_syntax.rb +229 -0
- data/test/test_old_transform.rb +49 -0
- data/test/test_remote_table.rb +13 -0
- metadata +176 -53
- data/VERSION +0 -1
- data/lib/remote_table/file/csv.rb +0 -49
- data/lib/remote_table/file/fixed_width.rb +0 -19
- data/lib/remote_table/file/html.rb +0 -37
- data/lib/remote_table/file/ods.rb +0 -11
- data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
- data/lib/remote_table/file/xls.rb +0 -11
- data/lib/remote_table/file/xlsx.rb +0 -11
- data/lib/remote_table/file.rb +0 -100
- data/lib/remote_table/package.rb +0 -89
- data/lib/remote_table/request.rb +0 -44
- data/lib/remote_table/transform.rb +0 -58
- data/test/remote_table_test.rb +0 -386
data/CHANGELOG
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
1.0.0
|
2
|
+
* Refactored to follow more Ruby conventions
|
3
|
+
* Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
|
4
|
+
* Switching to string option keys (but old syntax is supported)
|
5
|
+
[...no changelog for 0.1.6--1.0.0...sorry]
|
1
6
|
0.1.6
|
2
7
|
* For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
|
3
8
|
* Fix handling of long urls when passing off to Tempfile.
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
remote_table (1.0.0)
|
5
|
+
activesupport (>= 2.3.4)
|
6
|
+
builder
|
7
|
+
escape (>= 0.0.4)
|
8
|
+
google-spreadsheet-ruby
|
9
|
+
i18n
|
10
|
+
nokogiri (>= 1.4.1)
|
11
|
+
roo (~> 1.9)
|
12
|
+
slither (>= 0.99.4)
|
13
|
+
spreadsheet
|
14
|
+
zip
|
15
|
+
|
16
|
+
GEM
|
17
|
+
remote: http://rubygems.org/
|
18
|
+
specs:
|
19
|
+
activesupport (3.0.3)
|
20
|
+
builder (3.0.0)
|
21
|
+
columnize (0.3.2)
|
22
|
+
errata (0.2.4)
|
23
|
+
activesupport (>= 2.3.4)
|
24
|
+
remote_table (>= 0.2.31)
|
25
|
+
escape (0.0.4)
|
26
|
+
google-spreadsheet-ruby (0.1.2)
|
27
|
+
nokogiri (>= 1.4.3.1)
|
28
|
+
oauth (>= 0.3.6)
|
29
|
+
i18n (0.5.0)
|
30
|
+
linecache (0.43)
|
31
|
+
nokogiri (1.4.3.1)
|
32
|
+
oauth (0.4.4)
|
33
|
+
roo (1.9.3)
|
34
|
+
ruby-debug (0.10.4)
|
35
|
+
columnize (>= 0.1)
|
36
|
+
ruby-debug-base (~> 0.10.4.0)
|
37
|
+
ruby-debug-base (0.10.4)
|
38
|
+
linecache (>= 0.3)
|
39
|
+
ruby-ole (1.2.10.1)
|
40
|
+
shoulda (2.10.3)
|
41
|
+
slither (0.99.4)
|
42
|
+
spreadsheet (0.6.4.1)
|
43
|
+
ruby-ole
|
44
|
+
test-unit (2.1.2)
|
45
|
+
zip (2.0.2)
|
46
|
+
|
47
|
+
PLATFORMS
|
48
|
+
ruby
|
49
|
+
|
50
|
+
DEPENDENCIES
|
51
|
+
activesupport (>= 2.3.4)
|
52
|
+
builder
|
53
|
+
errata (>= 0.2.0)
|
54
|
+
escape (>= 0.0.4)
|
55
|
+
google-spreadsheet-ruby
|
56
|
+
i18n
|
57
|
+
nokogiri (>= 1.4.1)
|
58
|
+
remote_table!
|
59
|
+
roo (~> 1.9)
|
60
|
+
ruby-debug
|
61
|
+
shoulda
|
62
|
+
slither (>= 0.99.4)
|
63
|
+
spreadsheet
|
64
|
+
test-unit
|
65
|
+
zip
|
data/LICENSE
CHANGED
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
=remote_table
|
2
2
|
|
3
|
-
|
3
|
+
Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
|
4
4
|
|
5
5
|
==Real-life usage
|
6
6
|
|
@@ -8,15 +8,29 @@ Used by data_miner (http://github.com/seamusabshere/data_miner)
|
|
8
8
|
|
9
9
|
==Example
|
10
10
|
|
11
|
-
Taken from <tt>#{GEMDIR}/test/
|
11
|
+
Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
should "open an XLSX" do
|
14
|
+
t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
|
15
|
+
assert_equal "Secure encryption of all data", t[5]["Requirements"]
|
16
|
+
end
|
17
|
+
|
18
|
+
or on the console
|
19
|
+
|
20
|
+
?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
|
21
|
+
=> #<RemoteTable:0x359da50 [...]>
|
22
|
+
?> t[0]
|
23
|
+
=> {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
|
17
24
|
|
18
25
|
See the test file and also data_miner examples of custom parsers.
|
19
26
|
|
27
|
+
==Wishlist
|
28
|
+
|
29
|
+
* The new parser syntax (aka transformer) hasn't been defined yet... only the old-style syntax is available
|
30
|
+
* We currently call curl (and a lot of other utilities) using a shell. Is there a safer way to do this?
|
31
|
+
* Row hashes may come out differently for Ruby 1.8 and Ruby 1.9, which ruins the whole purpose.
|
32
|
+
* Since <tt>Enumerable</tt> provides <tt>#to_a</tt>, I'm not sure if it's caching the row loading.
|
33
|
+
|
20
34
|
==Authors
|
21
35
|
|
22
36
|
* Seamus Abshere <seamus@abshere.net>
|
@@ -24,4 +38,4 @@ See the test file and also data_miner examples of custom parsers.
|
|
24
38
|
|
25
39
|
== Copyright
|
26
40
|
|
27
|
-
Copyright (c)
|
41
|
+
Copyright (c) 2011 Brighter Planet. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,72 +1,23 @@
|
|
1
|
-
require '
|
2
|
-
|
3
|
-
|
4
|
-
begin
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "remote_table"
|
8
|
-
gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
|
9
|
-
gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
|
10
|
-
gem.email = "seamus@abshere.net"
|
11
|
-
gem.homepage = "http://github.com/seamusabshere/remote_table"
|
12
|
-
gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
|
13
|
-
# sabshere [unknown date] roo 1.9.3 doesn't work, so use old 1.3 version
|
14
|
-
gem.add_dependency 'roo', '1.3.11'
|
15
|
-
# sabshere 9/30/10 depending on fastercsv when using ruby 1.9.2 results in exiting with error
|
16
|
-
# gem.add_dependency 'fastercsv', '>=1.5.0'
|
17
|
-
gem.add_dependency 'activesupport', '>=2.3.4'
|
18
|
-
gem.add_dependency 'slither', '>=0.99.4'
|
19
|
-
gem.add_dependency 'nokogiri', '>=1.4.1'
|
20
|
-
gem.add_dependency 'escape', '>=0.0.4'
|
21
|
-
gem.add_development_dependency 'errata', '>=0.2.0'
|
22
|
-
gem.require_path = "lib"
|
23
|
-
gem.rdoc_options << '--line-numbers' << '--inline-source'
|
24
|
-
gem.requirements << 'curl'
|
25
|
-
gem.rubyforge_project = "remotetable"
|
26
|
-
end
|
27
|
-
Jeweler::GemcutterTasks.new
|
28
|
-
# Jeweler::RubyforgeTasks.new do |rubyforge|
|
29
|
-
# rubyforge.doc_task = "rdoc"
|
30
|
-
# end
|
31
|
-
rescue LoadError
|
32
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
33
|
-
end
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
34
3
|
|
4
|
+
require 'rake'
|
35
5
|
require 'rake/testtask'
|
36
6
|
Rake::TestTask.new(:test) do |test|
|
37
7
|
test.libs << 'lib' << 'test'
|
38
|
-
test.pattern = 'test
|
8
|
+
test.pattern = 'test/**/test_*.rb'
|
39
9
|
test.verbose = true
|
40
10
|
end
|
41
11
|
|
42
12
|
begin
|
43
|
-
require '
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
13
|
+
require 'rake/rdoctask'
|
14
|
+
Rake::RDocTask.new do |rdoc|
|
15
|
+
rdoc.rdoc_dir = 'rdoc'
|
16
|
+
rdoc.title = 'taps'
|
17
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
18
|
+
rdoc.rdoc_files.include('README*')
|
19
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
20
|
end
|
49
21
|
rescue LoadError
|
50
|
-
|
51
|
-
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
task :default => :test
|
59
|
-
|
60
|
-
require 'rake/rdoctask'
|
61
|
-
Rake::RDocTask.new do |rdoc|
|
62
|
-
if File.exist?('VERSION')
|
63
|
-
version = File.read('VERSION')
|
64
|
-
else
|
65
|
-
version = ""
|
66
|
-
end
|
67
|
-
|
68
|
-
rdoc.rdoc_dir = 'rdoc'
|
69
|
-
rdoc.title = "remote_table #{version}"
|
70
|
-
rdoc.rdoc_files.include('README*')
|
71
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
22
|
+
puts "Rdoc is not available"
|
72
23
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'fileutils'
|
3
|
+
class RemoteTable
|
4
|
+
class Cleaner
|
5
|
+
include ::Singleton
|
6
|
+
def paths_for_removal
|
7
|
+
@paths_for_removal ||= []
|
8
|
+
end
|
9
|
+
def cleanup
|
10
|
+
paths_for_removal.each do |path|
|
11
|
+
::FileUtils.rm_rf path
|
12
|
+
paths_for_removal.delete path
|
13
|
+
end
|
14
|
+
end
|
15
|
+
def remove_at_exit(path)
|
16
|
+
paths_for_removal << path
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'escape'
|
3
|
+
require 'fileutils'
|
4
|
+
class RemoteTable
|
5
|
+
class Executor
|
6
|
+
include ::Singleton
|
7
|
+
def bang(path, cmd)
|
8
|
+
tmp_path = "#{path}.bang.#{rand}"
|
9
|
+
backtick_with_reporting "/bin/cat #{::Escape.shell_single_word path} | #{cmd} > #{::Escape.shell_single_word tmp_path}"
|
10
|
+
::FileUtils.mv tmp_path, path
|
11
|
+
end
|
12
|
+
|
13
|
+
def backtick_with_reporting(cmd)
|
14
|
+
cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
|
15
|
+
output = `#{cmd}`
|
16
|
+
if not $?.success?
|
17
|
+
raise %{
|
18
|
+
From the remote_table gem...
|
19
|
+
|
20
|
+
Command failed:
|
21
|
+
#{cmd}
|
22
|
+
|
23
|
+
Output:
|
24
|
+
#{output}
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
if RUBY_VERSION >= '1.9'
|
2
|
+
require 'csv'
|
3
|
+
::FasterCSV = ::CSV
|
4
|
+
else
|
5
|
+
begin
|
6
|
+
require 'fastercsv'
|
7
|
+
rescue ::LoadError
|
8
|
+
$stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
|
9
|
+
raise $!
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class RemoteTable
|
14
|
+
class Format
|
15
|
+
class Delimited < Format
|
16
|
+
include Textual
|
17
|
+
def each(&blk)
|
18
|
+
backup_file!
|
19
|
+
convert_file_to_utf8!
|
20
|
+
remove_useless_characters!
|
21
|
+
skip_rows!
|
22
|
+
::FasterCSV.foreach(t.local_file.path, fastercsv_options) do |row|
|
23
|
+
ordered_hash = ::ActiveSupport::OrderedHash.new
|
24
|
+
filled_values = 0
|
25
|
+
case row
|
26
|
+
when ::FasterCSV::Row
|
27
|
+
row.each do |header, value|
|
28
|
+
next if header.blank?
|
29
|
+
value = '' if value.nil?
|
30
|
+
ordered_hash[header] = value
|
31
|
+
filled_values += 1 if value.present?
|
32
|
+
end
|
33
|
+
when ::Array
|
34
|
+
index = 0
|
35
|
+
row.each do |value|
|
36
|
+
value = '' if value.nil?
|
37
|
+
ordered_hash[index] = value
|
38
|
+
filled_values += 1 if value.present?
|
39
|
+
index += 1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
|
43
|
+
end
|
44
|
+
ensure
|
45
|
+
restore_file!
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def fastercsv_options
|
51
|
+
fastercsv_options = { :skip_blanks => !t.properties.keep_blank_rows }
|
52
|
+
if t.properties.headers == false
|
53
|
+
fastercsv_options.merge!(:headers => nil)
|
54
|
+
else
|
55
|
+
fastercsv_options.merge!(:headers => :first_row)
|
56
|
+
end
|
57
|
+
fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
|
58
|
+
fastercsv_options
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'slither'
|
2
|
+
class RemoteTable
|
3
|
+
class Format
|
4
|
+
class FixedWidth < Format
|
5
|
+
include Textual
|
6
|
+
def each(&blk)
|
7
|
+
backup_file!
|
8
|
+
convert_file_to_utf8!
|
9
|
+
remove_useless_characters!
|
10
|
+
crop_rows!
|
11
|
+
skip_rows!
|
12
|
+
cut_columns!
|
13
|
+
parser.parse[:rows].each do |hash|
|
14
|
+
hash.reject! { |k, v| k.blank? }
|
15
|
+
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
16
|
+
end
|
17
|
+
ensure
|
18
|
+
restore_file!
|
19
|
+
end
|
20
|
+
private
|
21
|
+
def parser
|
22
|
+
@parser ||= ::Slither::Parser.new definition, t.local_file.path
|
23
|
+
end
|
24
|
+
def definition
|
25
|
+
@definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
|
26
|
+
::Slither.send :definition, t.properties.schema_name
|
27
|
+
elsif t.properties.schema.is_a?(::Array)
|
28
|
+
everything = lambda { |_| true }
|
29
|
+
::Slither.define(rand.to_s) do |d|
|
30
|
+
d.rows do |row|
|
31
|
+
row.trap(&everything)
|
32
|
+
t.properties.schema.each do |name, width, options|
|
33
|
+
if name == 'spacer'
|
34
|
+
row.spacer width
|
35
|
+
else
|
36
|
+
row.column name, width, options
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
else
|
42
|
+
raise "expecting schema_name to be a String or Symbol, or schema to be an Array"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'cgi'
|
3
|
+
class RemoteTable
|
4
|
+
class Format
|
5
|
+
class HTML < Format
|
6
|
+
include Textual
|
7
|
+
def each(&blk)
|
8
|
+
backup_file!
|
9
|
+
convert_file_to_utf8!
|
10
|
+
remove_useless_characters!
|
11
|
+
html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
|
12
|
+
::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
|
13
|
+
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
14
|
+
if html_headers.nil?
|
15
|
+
html_headers = values
|
16
|
+
next
|
17
|
+
end
|
18
|
+
hash = zip html_headers, values
|
19
|
+
yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
|
20
|
+
end
|
21
|
+
ensure
|
22
|
+
restore_file!
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# http://snippets.dzone.com/posts/show/406
|
28
|
+
def zip(keys, values)
|
29
|
+
hash = ::Hash.new
|
30
|
+
keys.zip(values) { |k,v| hash[k]=v }
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
|
34
|
+
# should we be doing this in ruby?
|
35
|
+
def unescaped_html_without_soft_hyphens
|
36
|
+
str = ::CGI.unescapeHTML ::IO.read(t.local_file.path)
|
37
|
+
# get rid of MS Office baddies
|
38
|
+
str.gsub! /­/, ''
|
39
|
+
str
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'roo'
|
2
|
+
class RemoteTable
|
3
|
+
class Format
|
4
|
+
module Rooable
|
5
|
+
def each(&blk)
|
6
|
+
spreadsheet = roo_class.new t.local_file.path, nil, :ignore
|
7
|
+
spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
|
8
|
+
column_references = ::Hash.new
|
9
|
+
if t.properties.headers == false
|
10
|
+
# zero-based numeric keys
|
11
|
+
for col in (1..spreadsheet.last_column)
|
12
|
+
column_references[col] = col - 1
|
13
|
+
end
|
14
|
+
elsif t.properties.headers.is_a? ::Array
|
15
|
+
# names
|
16
|
+
for col in (1..spreadsheet.last_column)
|
17
|
+
column_references[col] = t.properties.headers[col - 1]
|
18
|
+
end
|
19
|
+
else
|
20
|
+
# read t.properties.headers from the file itself
|
21
|
+
for col in (1..spreadsheet.last_column)
|
22
|
+
column_references[col] = spreadsheet.cell(header_row, col)
|
23
|
+
column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
|
24
|
+
end
|
25
|
+
end
|
26
|
+
first_data_row.upto(spreadsheet.last_row) do |raw_row|
|
27
|
+
ordered_hash = ::ActiveSupport::OrderedHash.new
|
28
|
+
for col in (1..spreadsheet.last_column)
|
29
|
+
next if column_references[col].blank?
|
30
|
+
ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
|
31
|
+
end
|
32
|
+
yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def header_row
|
39
|
+
1 + t.properties.skip
|
40
|
+
end
|
41
|
+
|
42
|
+
def first_data_row
|
43
|
+
1 + header_row
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'escape'
|
3
|
+
class RemoteTable
|
4
|
+
class Format
|
5
|
+
module Textual
|
6
|
+
def convert_file_to_utf8!
|
7
|
+
::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
|
8
|
+
end
|
9
|
+
|
10
|
+
USELESS_CHARACTERS = [
|
11
|
+
'\xef\xbb\xbf', # UTF-8 byte order mark
|
12
|
+
'\xc2\xad' # soft hyphen, often inserted by MS Office (html: ­)
|
13
|
+
]
|
14
|
+
def remove_useless_characters!
|
15
|
+
::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
|
16
|
+
end
|
17
|
+
|
18
|
+
def skip_rows!
|
19
|
+
return unless t.properties.skip > 0
|
20
|
+
::RemoteTable.executor.bang t.local_file.path, "tail -n +#{t.properties.skip + 1}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def crop_rows!
|
24
|
+
return unless t.properties.crop
|
25
|
+
::RemoteTable.executor.bang t.local_file.path, "tail -n +#{::Escape.shell_single_word t.properties.crop.first.to_s} | head -n #{t.properties.crop.last - t.properties.crop.first + 1}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def cut_columns!
|
29
|
+
return unless t.properties.cut
|
30
|
+
::RemoteTable.executor.bang t.local_file.path, "cut -c #{::Escape.shell_single_word t.properties.cut.to_s}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class RemoteTable
|
2
|
+
class Format
|
3
|
+
class Unknown < StandardError; end
|
4
|
+
|
5
|
+
autoload :Excel, 'remote_table/format/excel'
|
6
|
+
autoload :Excelx, 'remote_table/format/excelx'
|
7
|
+
autoload :Delimited, 'remote_table/format/delimited'
|
8
|
+
autoload :OpenOffice, 'remote_table/format/open_office'
|
9
|
+
autoload :FixedWidth, 'remote_table/format/fixed_width'
|
10
|
+
autoload :HTML, 'remote_table/format/html'
|
11
|
+
|
12
|
+
autoload :Textual, 'remote_table/format/mixins/textual'
|
13
|
+
autoload :Rooable, 'remote_table/format/mixins/rooable'
|
14
|
+
|
15
|
+
attr_reader :t
|
16
|
+
|
17
|
+
def initialize(t)
|
18
|
+
@t = t
|
19
|
+
end
|
20
|
+
|
21
|
+
include ::Enumerable
|
22
|
+
def each
|
23
|
+
raise "must be defined by format"
|
24
|
+
end
|
25
|
+
|
26
|
+
def backup_file!
|
27
|
+
::FileUtils.cp t.local_file.path, "#{t.local_file.path}.backup"
|
28
|
+
end
|
29
|
+
|
30
|
+
def restore_file!
|
31
|
+
return unless ::File.readable? "#{t.local_file.path}.backup"
|
32
|
+
::FileUtils.mv "#{t.local_file.path}.backup", t.local_file.path
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
require 'digest/md5'
|
3
|
+
class RemoteTable
|
4
|
+
class Hasher
|
5
|
+
include ::Singleton
|
6
|
+
def hash(row)
|
7
|
+
normalized_hash = if RUBY_VERSION >= '1.9'
|
8
|
+
row.keys.sort.inject(::Hash.new) do |memo, k|
|
9
|
+
normalized_k = k.to_s.toutf8
|
10
|
+
normalized_v = row[k].respond_to?(:to_s) ? row[k].to_s.toutf8 : row[k]
|
11
|
+
memo[normalized_k] = normalized_v
|
12
|
+
memo
|
13
|
+
end
|
14
|
+
else
|
15
|
+
::Hash.new.replace(row)
|
16
|
+
end
|
17
|
+
# sabshere 1/21/11 may currently break across versions of ruby
|
18
|
+
# ruby-1.8.7-p174 > Marshal.dump({'a' => '1'})
|
19
|
+
# => "\004\b{\006\"\006a\"\0061"
|
20
|
+
# ruby-1.9.2-p0 > Marshal.dump({'a' => '1'})
|
21
|
+
# => "\x04\b{\x06I\"\x06a\x06:\x06ETI\"\x061\x06;\x00T"
|
22
|
+
::Digest::MD5.hexdigest ::Marshal.dump(normalized_hash)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|