remote_table 0.2.32 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ 1.0.0
2
+ * Refactored to follow more Ruby conventions
3
+ * Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
4
+ * Switching to string option keys (but old syntax is supported)
5
+ [...no changelog for 0.1.6--1.0.0...sorry]
1
6
  0.1.6
2
7
  * For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
3
8
  * Fix handling of long urls when passing off to Tempfile.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in remote_table.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ remote_table (1.0.0)
5
+ activesupport (>= 2.3.4)
6
+ builder
7
+ escape (>= 0.0.4)
8
+ google-spreadsheet-ruby
9
+ i18n
10
+ nokogiri (>= 1.4.1)
11
+ roo (~> 1.9)
12
+ slither (>= 0.99.4)
13
+ spreadsheet
14
+ zip
15
+
16
+ GEM
17
+ remote: http://rubygems.org/
18
+ specs:
19
+ activesupport (3.0.3)
20
+ builder (3.0.0)
21
+ columnize (0.3.2)
22
+ errata (0.2.4)
23
+ activesupport (>= 2.3.4)
24
+ remote_table (>= 0.2.31)
25
+ escape (0.0.4)
26
+ google-spreadsheet-ruby (0.1.2)
27
+ nokogiri (>= 1.4.3.1)
28
+ oauth (>= 0.3.6)
29
+ i18n (0.5.0)
30
+ linecache (0.43)
31
+ nokogiri (1.4.3.1)
32
+ oauth (0.4.4)
33
+ roo (1.9.3)
34
+ ruby-debug (0.10.4)
35
+ columnize (>= 0.1)
36
+ ruby-debug-base (~> 0.10.4.0)
37
+ ruby-debug-base (0.10.4)
38
+ linecache (>= 0.3)
39
+ ruby-ole (1.2.10.1)
40
+ shoulda (2.10.3)
41
+ slither (0.99.4)
42
+ spreadsheet (0.6.4.1)
43
+ ruby-ole
44
+ test-unit (2.1.2)
45
+ zip (2.0.2)
46
+
47
+ PLATFORMS
48
+ ruby
49
+
50
+ DEPENDENCIES
51
+ activesupport (>= 2.3.4)
52
+ builder
53
+ errata (>= 0.2.0)
54
+ escape (>= 0.0.4)
55
+ google-spreadsheet-ruby
56
+ i18n
57
+ nokogiri (>= 1.4.1)
58
+ remote_table!
59
+ roo (~> 1.9)
60
+ ruby-debug
61
+ shoulda
62
+ slither (>= 0.99.4)
63
+ spreadsheet
64
+ test-unit
65
+ zip
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 Brighter Planet
1
+ Copyright (c) 2011 Brighter Planet
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -1,6 +1,6 @@
1
1
  =remote_table
2
2
 
3
- Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
3
+ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
4
4
 
5
5
  ==Real-life usage
6
6
 
@@ -8,15 +8,29 @@ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
8
 
9
9
  ==Example
10
10
 
11
- Taken from <tt>#{GEMDIR}/test/remote_table_test.rb</tt>:
11
+ Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
12
12
 
13
- >> t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
14
- => #<RemoteTable:0x359da50 @transform=#<RemoteTable::Transform:0x359d154 @select=nil, @reject=nil>, @file=#<RemoteTable::File:0x35970c4 @delimiter=nil, @headers=nil, @cut=nil, @filename="98guide6.csv", @skip=nil, @schema_name=nil, @crop=nil, @format=:csv, @trap=nil, @sheet=0, @schema=nil>, @package=#<RemoteTable::Package:0x359c538 @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @filename="98guide6.csv", @compression=:zip, @packing=nil>, @request=#<RemoteTable::Request:0x3596bec @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip">>
15
- >> t.rows.first
16
- => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
13
+ should "open an XLSX" do
14
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
15
+ assert_equal "Secure encryption of all data", t[5]["Requirements"]
16
+ end
17
+
18
+ or on the console
19
+
20
+ ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
21
+ => #<RemoteTable:0x359da50 [...]>
22
+ ?> t[0]
23
+ => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
17
24
 
18
25
  See the test file and also data_miner examples of custom parsers.
19
26
 
27
+ ==Wishlist
28
+
29
+ * The new parser syntax (aka transformer) hasn't been defined yet... only the old-style syntax is available
30
+ * We currently call curl (and a lot of other utilities) using a shell. Is there a safer way to do this?
31
+ * Row hashes may come out differently for Ruby 1.8 and Ruby 1.9, which ruins the whole purpose.
32
+ * Since <tt>Enumerable</tt> provides <tt>#to_a</tt>, I'm not sure if it's caching the row loading.
33
+
20
34
  ==Authors
21
35
 
22
36
  * Seamus Abshere <seamus@abshere.net>
@@ -24,4 +38,4 @@ See the test file and also data_miner examples of custom parsers.
24
38
 
25
39
  == Copyright
26
40
 
27
- Copyright (c) 2010 Brighter Planet. See LICENSE for details.
41
+ Copyright (c) 2011 Brighter Planet. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,72 +1,23 @@
1
- require 'rubygems'
2
- require 'rake'
3
-
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "remote_table"
8
- gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
9
- gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
10
- gem.email = "seamus@abshere.net"
11
- gem.homepage = "http://github.com/seamusabshere/remote_table"
12
- gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- # sabshere [unknown date] roo 1.9.3 doesn't work, so use old 1.3 version
14
- gem.add_dependency 'roo', '1.3.11'
15
- # sabshere 9/30/10 depending on fastercsv when using ruby 1.9.2 results in exiting with error
16
- # gem.add_dependency 'fastercsv', '>=1.5.0'
17
- gem.add_dependency 'activesupport', '>=2.3.4'
18
- gem.add_dependency 'slither', '>=0.99.4'
19
- gem.add_dependency 'nokogiri', '>=1.4.1'
20
- gem.add_dependency 'escape', '>=0.0.4'
21
- gem.add_development_dependency 'errata', '>=0.2.0'
22
- gem.require_path = "lib"
23
- gem.rdoc_options << '--line-numbers' << '--inline-source'
24
- gem.requirements << 'curl'
25
- gem.rubyforge_project = "remotetable"
26
- end
27
- Jeweler::GemcutterTasks.new
28
- # Jeweler::RubyforgeTasks.new do |rubyforge|
29
- # rubyforge.doc_task = "rdoc"
30
- # end
31
- rescue LoadError
32
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
33
- end
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
34
3
 
4
+ require 'rake'
35
5
  require 'rake/testtask'
36
6
  Rake::TestTask.new(:test) do |test|
37
7
  test.libs << 'lib' << 'test'
38
- test.pattern = 'test/**/*_test.rb'
8
+ test.pattern = 'test/**/test_*.rb'
39
9
  test.verbose = true
40
10
  end
41
11
 
42
12
  begin
43
- require 'rcov/rcovtask'
44
- Rcov::RcovTask.new do |test|
45
- test.libs << 'test'
46
- test.pattern = 'test/**/*_test.rb'
47
- test.verbose = true
13
+ require 'rake/rdoctask'
14
+ Rake::RDocTask.new do |rdoc|
15
+ rdoc.rdoc_dir = 'rdoc'
16
+ rdoc.title = 'taps'
17
+ rdoc.options << '--line-numbers' << '--inline-source'
18
+ rdoc.rdoc_files.include('README*')
19
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
20
  end
49
21
  rescue LoadError
50
- task :rcov do
51
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
52
- end
53
- end
54
-
55
-
56
-
57
-
58
- task :default => :test
59
-
60
- require 'rake/rdoctask'
61
- Rake::RDocTask.new do |rdoc|
62
- if File.exist?('VERSION')
63
- version = File.read('VERSION')
64
- else
65
- version = ""
66
- end
67
-
68
- rdoc.rdoc_dir = 'rdoc'
69
- rdoc.title = "remote_table #{version}"
70
- rdoc.rdoc_files.include('README*')
71
- rdoc.rdoc_files.include('lib/**/*.rb')
22
+ puts "Rdoc is not available"
72
23
  end
@@ -0,0 +1,19 @@
1
+ require 'singleton'
2
+ require 'fileutils'
3
+ class RemoteTable
4
+ class Cleaner
5
+ include ::Singleton
6
+ def paths_for_removal
7
+ @paths_for_removal ||= []
8
+ end
9
+ def cleanup
10
+ paths_for_removal.each do |path|
11
+ ::FileUtils.rm_rf path
12
+ paths_for_removal.delete path
13
+ end
14
+ end
15
+ def remove_at_exit(path)
16
+ paths_for_removal << path
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ require 'singleton'
2
+ require 'escape'
3
+ require 'fileutils'
4
+ class RemoteTable
5
+ class Executor
6
+ include ::Singleton
7
+ def bang(path, cmd)
8
+ tmp_path = "#{path}.bang.#{rand}"
9
+ backtick_with_reporting "/bin/cat #{::Escape.shell_single_word path} | #{cmd} > #{::Escape.shell_single_word tmp_path}"
10
+ ::FileUtils.mv tmp_path, path
11
+ end
12
+
13
+ def backtick_with_reporting(cmd)
14
+ cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
15
+ output = `#{cmd}`
16
+ if not $?.success?
17
+ raise %{
18
+ From the remote_table gem...
19
+
20
+ Command failed:
21
+ #{cmd}
22
+
23
+ Output:
24
+ #{output}
25
+ }
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,62 @@
1
+ if RUBY_VERSION >= '1.9'
2
+ require 'csv'
3
+ ::FasterCSV = ::CSV
4
+ else
5
+ begin
6
+ require 'fastercsv'
7
+ rescue ::LoadError
8
+ $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
+ raise $!
10
+ end
11
+ end
12
+
13
+ class RemoteTable
14
+ class Format
15
+ class Delimited < Format
16
+ include Textual
17
+ def each(&blk)
18
+ backup_file!
19
+ convert_file_to_utf8!
20
+ remove_useless_characters!
21
+ skip_rows!
22
+ ::FasterCSV.foreach(t.local_file.path, fastercsv_options) do |row|
23
+ ordered_hash = ::ActiveSupport::OrderedHash.new
24
+ filled_values = 0
25
+ case row
26
+ when ::FasterCSV::Row
27
+ row.each do |header, value|
28
+ next if header.blank?
29
+ value = '' if value.nil?
30
+ ordered_hash[header] = value
31
+ filled_values += 1 if value.present?
32
+ end
33
+ when ::Array
34
+ index = 0
35
+ row.each do |value|
36
+ value = '' if value.nil?
37
+ ordered_hash[index] = value
38
+ filled_values += 1 if value.present?
39
+ index += 1
40
+ end
41
+ end
42
+ yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
43
+ end
44
+ ensure
45
+ restore_file!
46
+ end
47
+
48
+ private
49
+
50
+ def fastercsv_options
51
+ fastercsv_options = { :skip_blanks => !t.properties.keep_blank_rows }
52
+ if t.properties.headers == false
53
+ fastercsv_options.merge!(:headers => nil)
54
+ else
55
+ fastercsv_options.merge!(:headers => :first_row)
56
+ end
57
+ fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
58
+ fastercsv_options
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Excel < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Excel
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Excelx < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Excelx
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,47 @@
1
+ require 'slither'
2
+ class RemoteTable
3
+ class Format
4
+ class FixedWidth < Format
5
+ include Textual
6
+ def each(&blk)
7
+ backup_file!
8
+ convert_file_to_utf8!
9
+ remove_useless_characters!
10
+ crop_rows!
11
+ skip_rows!
12
+ cut_columns!
13
+ parser.parse[:rows].each do |hash|
14
+ hash.reject! { |k, v| k.blank? }
15
+ yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
16
+ end
17
+ ensure
18
+ restore_file!
19
+ end
20
+ private
21
+ def parser
22
+ @parser ||= ::Slither::Parser.new definition, t.local_file.path
23
+ end
24
+ def definition
25
+ @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
26
+ ::Slither.send :definition, t.properties.schema_name
27
+ elsif t.properties.schema.is_a?(::Array)
28
+ everything = lambda { |_| true }
29
+ ::Slither.define(rand.to_s) do |d|
30
+ d.rows do |row|
31
+ row.trap(&everything)
32
+ t.properties.schema.each do |name, width, options|
33
+ if name == 'spacer'
34
+ row.spacer width
35
+ else
36
+ row.column name, width, options
37
+ end
38
+ end
39
+ end
40
+ end
41
+ else
42
+ raise "expecting schema_name to be a String or Symbol, or schema to be an Array"
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,43 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ class HTML < Format
6
+ include Textual
7
+ def each(&blk)
8
+ backup_file!
9
+ convert_file_to_utf8!
10
+ remove_useless_characters!
11
+ html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
12
+ ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
13
+ values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
14
+ if html_headers.nil?
15
+ html_headers = values
16
+ next
17
+ end
18
+ hash = zip html_headers, values
19
+ yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
20
+ end
21
+ ensure
22
+ restore_file!
23
+ end
24
+
25
+ private
26
+
27
+ # http://snippets.dzone.com/posts/show/406
28
+ def zip(keys, values)
29
+ hash = ::Hash.new
30
+ keys.zip(values) { |k,v| hash[k]=v }
31
+ hash
32
+ end
33
+
34
+ # should we be doing this in ruby?
35
+ def unescaped_html_without_soft_hyphens
36
+ str = ::CGI.unescapeHTML ::IO.read(t.local_file.path)
37
+ # get rid of MS Office baddies
38
+ str.gsub! /&shy;/, ''
39
+ str
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,47 @@
1
+ require 'roo'
2
+ class RemoteTable
3
+ class Format
4
+ module Rooable
5
+ def each(&blk)
6
+ spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
+ spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+ column_references = ::Hash.new
9
+ if t.properties.headers == false
10
+ # zero-based numeric keys
11
+ for col in (1..spreadsheet.last_column)
12
+ column_references[col] = col - 1
13
+ end
14
+ elsif t.properties.headers.is_a? ::Array
15
+ # names
16
+ for col in (1..spreadsheet.last_column)
17
+ column_references[col] = t.properties.headers[col - 1]
18
+ end
19
+ else
20
+ # read t.properties.headers from the file itself
21
+ for col in (1..spreadsheet.last_column)
22
+ column_references[col] = spreadsheet.cell(header_row, col)
23
+ column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
24
+ end
25
+ end
26
+ first_data_row.upto(spreadsheet.last_row) do |raw_row|
27
+ ordered_hash = ::ActiveSupport::OrderedHash.new
28
+ for col in (1..spreadsheet.last_column)
29
+ next if column_references[col].blank?
30
+ ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
31
+ end
32
+ yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def header_row
39
+ 1 + t.properties.skip
40
+ end
41
+
42
+ def first_data_row
43
+ 1 + header_row
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,34 @@
1
+ require 'fileutils'
2
+ require 'escape'
3
+ class RemoteTable
4
+ class Format
5
+ module Textual
6
+ def convert_file_to_utf8!
7
+ ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
8
+ end
9
+
10
+ USELESS_CHARACTERS = [
11
+ '\xef\xbb\xbf', # UTF-8 byte order mark
12
+ '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
13
+ ]
14
+ def remove_useless_characters!
15
+ ::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
16
+ end
17
+
18
+ def skip_rows!
19
+ return unless t.properties.skip > 0
20
+ ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{t.properties.skip + 1}"
21
+ end
22
+
23
+ def crop_rows!
24
+ return unless t.properties.crop
25
+ ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{::Escape.shell_single_word t.properties.crop.first.to_s} | head -n #{t.properties.crop.last - t.properties.crop.first + 1}"
26
+ end
27
+
28
+ def cut_columns!
29
+ return unless t.properties.cut
30
+ ::RemoteTable.executor.bang t.local_file.path, "cut -c #{::Escape.shell_single_word t.properties.cut.to_s}"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class OpenOffice < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Openoffice
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,35 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Unknown < StandardError; end
4
+
5
+ autoload :Excel, 'remote_table/format/excel'
6
+ autoload :Excelx, 'remote_table/format/excelx'
7
+ autoload :Delimited, 'remote_table/format/delimited'
8
+ autoload :OpenOffice, 'remote_table/format/open_office'
9
+ autoload :FixedWidth, 'remote_table/format/fixed_width'
10
+ autoload :HTML, 'remote_table/format/html'
11
+
12
+ autoload :Textual, 'remote_table/format/mixins/textual'
13
+ autoload :Rooable, 'remote_table/format/mixins/rooable'
14
+
15
+ attr_reader :t
16
+
17
+ def initialize(t)
18
+ @t = t
19
+ end
20
+
21
+ include ::Enumerable
22
+ def each
23
+ raise "must be defined by format"
24
+ end
25
+
26
+ def backup_file!
27
+ ::FileUtils.cp t.local_file.path, "#{t.local_file.path}.backup"
28
+ end
29
+
30
+ def restore_file!
31
+ return unless ::File.readable? "#{t.local_file.path}.backup"
32
+ ::FileUtils.mv "#{t.local_file.path}.backup", t.local_file.path
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ require 'singleton'
2
+ require 'digest/md5'
3
+ class RemoteTable
4
+ class Hasher
5
+ include ::Singleton
6
+ def hash(row)
7
+ normalized_hash = if RUBY_VERSION >= '1.9'
8
+ row.keys.sort.inject(::Hash.new) do |memo, k|
9
+ normalized_k = k.to_s.toutf8
10
+ normalized_v = row[k].respond_to?(:to_s) ? row[k].to_s.toutf8 : row[k]
11
+ memo[normalized_k] = normalized_v
12
+ memo
13
+ end
14
+ else
15
+ ::Hash.new.replace(row)
16
+ end
17
+ # sabshere 1/21/11 may currently break across versions of ruby
18
+ # ruby-1.8.7-p174 > Marshal.dump({'a' => '1'})
19
+ # => "\004\b{\006\"\006a\"\0061"
20
+ # ruby-1.9.2-p0 > Marshal.dump({'a' => '1'})
21
+ # => "\x04\b{\x06I\"\x06a\x06:\x06ETI\"\x061\x06;\x00T"
22
+ ::Digest::MD5.hexdigest ::Marshal.dump(normalized_hash)
23
+ end
24
+ end
25
+ end