remote_table 0.2.32 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ 1.0.0
2
+ * Refactored to follow more Ruby conventions
3
+ * Suggesting new syntax that looks more like an Enumerable... t[5] instead of t.rows[5]
4
+ * Switching to string option keys (but old syntax is supported)
5
+ [...no changelog for 0.1.6--1.0.0...sorry]
1
6
  0.1.6
2
7
  * For CSVs, force convert headers using String#toutf8. :encoding => 'N'|'U' didn't work.
3
8
  * Fix handling of long urls when passing off to Tempfile.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in remote_table.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,65 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ remote_table (1.0.0)
5
+ activesupport (>= 2.3.4)
6
+ builder
7
+ escape (>= 0.0.4)
8
+ google-spreadsheet-ruby
9
+ i18n
10
+ nokogiri (>= 1.4.1)
11
+ roo (~> 1.9)
12
+ slither (>= 0.99.4)
13
+ spreadsheet
14
+ zip
15
+
16
+ GEM
17
+ remote: http://rubygems.org/
18
+ specs:
19
+ activesupport (3.0.3)
20
+ builder (3.0.0)
21
+ columnize (0.3.2)
22
+ errata (0.2.4)
23
+ activesupport (>= 2.3.4)
24
+ remote_table (>= 0.2.31)
25
+ escape (0.0.4)
26
+ google-spreadsheet-ruby (0.1.2)
27
+ nokogiri (>= 1.4.3.1)
28
+ oauth (>= 0.3.6)
29
+ i18n (0.5.0)
30
+ linecache (0.43)
31
+ nokogiri (1.4.3.1)
32
+ oauth (0.4.4)
33
+ roo (1.9.3)
34
+ ruby-debug (0.10.4)
35
+ columnize (>= 0.1)
36
+ ruby-debug-base (~> 0.10.4.0)
37
+ ruby-debug-base (0.10.4)
38
+ linecache (>= 0.3)
39
+ ruby-ole (1.2.10.1)
40
+ shoulda (2.10.3)
41
+ slither (0.99.4)
42
+ spreadsheet (0.6.4.1)
43
+ ruby-ole
44
+ test-unit (2.1.2)
45
+ zip (2.0.2)
46
+
47
+ PLATFORMS
48
+ ruby
49
+
50
+ DEPENDENCIES
51
+ activesupport (>= 2.3.4)
52
+ builder
53
+ errata (>= 0.2.0)
54
+ escape (>= 0.0.4)
55
+ google-spreadsheet-ruby
56
+ i18n
57
+ nokogiri (>= 1.4.1)
58
+ remote_table!
59
+ roo (~> 1.9)
60
+ ruby-debug
61
+ shoulda
62
+ slither (>= 0.99.4)
63
+ spreadsheet
64
+ test-unit
65
+ zip
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2009 Brighter Planet
1
+ Copyright (c) 2011 Brighter Planet
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.rdoc CHANGED
@@ -1,6 +1,6 @@
1
1
  =remote_table
2
2
 
3
- Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
3
+ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
4
4
 
5
5
  ==Real-life usage
6
6
 
@@ -8,15 +8,29 @@ Used by data_miner (http://github.com/seamusabshere/data_miner)
8
8
 
9
9
  ==Example
10
10
 
11
- Taken from <tt>#{GEMDIR}/test/remote_table_test.rb</tt>:
11
+ Taken from <tt>#{GEMDIR}/test/test_remote_table.rb</tt>:
12
12
 
13
- >> t = RemoteTable.new(:url => 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', :filename => '98guide6.csv')
14
- => #<RemoteTable:0x359da50 @transform=#<RemoteTable::Transform:0x359d154 @select=nil, @reject=nil>, @file=#<RemoteTable::File:0x35970c4 @delimiter=nil, @headers=nil, @cut=nil, @filename="98guide6.csv", @skip=nil, @schema_name=nil, @crop=nil, @format=:csv, @trap=nil, @sheet=0, @schema=nil>, @package=#<RemoteTable::Package:0x359c538 @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip", @filename="98guide6.csv", @compression=:zip, @packing=nil>, @request=#<RemoteTable::Request:0x3596bec @url="http://www.fueleconomy.gov/FEG/epadata/98guide6.zip">>
15
- >> t.rows.first
16
- => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
13
+ should "open an XLSX" do
14
+ t = RemoteTable.new 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx'
15
+ assert_equal "Secure encryption of all data", t[5]["Requirements"]
16
+ end
17
+
18
+ or on the console
19
+
20
+ ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
21
+ => #<RemoteTable:0x359da50 [...]>
22
+ ?> t[0]
23
+ => {"cyl"=>"6", "eng dscr"=>"DOHC VTEC", "trans dscr"=>"2MODE CLKUP", "trans"=>"Auto(L4)", "cmb"=>"20", "2pv"=>nil, "carline name"=>"NSX", "displ"=>"3.0", "ucmb"=>"23.5311", "hpv"=>nil, "4pv"=>nil, "Class"=>"TWO SEATERS", "Manufacturer"=>"ACURA", "fl"=>"P", "2lv"=>nil, "G"=>nil, "hlv"=>nil, "drv"=>"R", "cty"=>"18", "ucty"=>"19.8733", "S"=>nil, "4lv"=>nil, "fcost"=>"1050", "T"=>nil, "hwy"=>"24", "uhwy"=>"30.3612"}
17
24
 
18
25
  See the test file and also data_miner examples of custom parsers.
19
26
 
27
+ ==Wishlist
28
+
29
+ * The new parser syntax (aka transformer) hasn't been defined yet... only the old-style syntax is available
30
+ * We currently call curl (and a lot of other utilities) using a shell. Is there a safer way to do this?
31
+ * Row hashes may come out differently for Ruby 1.8 and Ruby 1.9, which ruins the whole purpose.
32
+ * Since <tt>Enumerable</tt> provides <tt>#to_a</tt>, I'm not sure if it's caching the row loading.
33
+
20
34
  ==Authors
21
35
 
22
36
  * Seamus Abshere <seamus@abshere.net>
@@ -24,4 +38,4 @@ See the test file and also data_miner examples of custom parsers.
24
38
 
25
39
  == Copyright
26
40
 
27
- Copyright (c) 2010 Brighter Planet. See LICENSE for details.
41
+ Copyright (c) 2011 Brighter Planet. See LICENSE for details.
data/Rakefile CHANGED
@@ -1,72 +1,23 @@
1
- require 'rubygems'
2
- require 'rake'
3
-
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "remote_table"
8
- gem.summary = %Q{Remotely open and parse XLS, ODS, CSV and fixed-width tables.}
9
- gem.description = %Q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
10
- gem.email = "seamus@abshere.net"
11
- gem.homepage = "http://github.com/seamusabshere/remote_table"
12
- gem.authors = ["Seamus Abshere", "Andy Rossmeissl"]
13
- # sabshere [unknown date] roo 1.9.3 doesn't work, so use old 1.3 version
14
- gem.add_dependency 'roo', '1.3.11'
15
- # sabshere 9/30/10 depending on fastercsv when using ruby 1.9.2 results in exiting with error
16
- # gem.add_dependency 'fastercsv', '>=1.5.0'
17
- gem.add_dependency 'activesupport', '>=2.3.4'
18
- gem.add_dependency 'slither', '>=0.99.4'
19
- gem.add_dependency 'nokogiri', '>=1.4.1'
20
- gem.add_dependency 'escape', '>=0.0.4'
21
- gem.add_development_dependency 'errata', '>=0.2.0'
22
- gem.require_path = "lib"
23
- gem.rdoc_options << '--line-numbers' << '--inline-source'
24
- gem.requirements << 'curl'
25
- gem.rubyforge_project = "remotetable"
26
- end
27
- Jeweler::GemcutterTasks.new
28
- # Jeweler::RubyforgeTasks.new do |rubyforge|
29
- # rubyforge.doc_task = "rdoc"
30
- # end
31
- rescue LoadError
32
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
33
- end
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
34
3
 
4
+ require 'rake'
35
5
  require 'rake/testtask'
36
6
  Rake::TestTask.new(:test) do |test|
37
7
  test.libs << 'lib' << 'test'
38
- test.pattern = 'test/**/*_test.rb'
8
+ test.pattern = 'test/**/test_*.rb'
39
9
  test.verbose = true
40
10
  end
41
11
 
42
12
  begin
43
- require 'rcov/rcovtask'
44
- Rcov::RcovTask.new do |test|
45
- test.libs << 'test'
46
- test.pattern = 'test/**/*_test.rb'
47
- test.verbose = true
13
+ require 'rake/rdoctask'
14
+ Rake::RDocTask.new do |rdoc|
15
+ rdoc.rdoc_dir = 'rdoc'
16
+ rdoc.title = 'taps'
17
+ rdoc.options << '--line-numbers' << '--inline-source'
18
+ rdoc.rdoc_files.include('README*')
19
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
20
  end
49
21
  rescue LoadError
50
- task :rcov do
51
- abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
52
- end
53
- end
54
-
55
-
56
-
57
-
58
- task :default => :test
59
-
60
- require 'rake/rdoctask'
61
- Rake::RDocTask.new do |rdoc|
62
- if File.exist?('VERSION')
63
- version = File.read('VERSION')
64
- else
65
- version = ""
66
- end
67
-
68
- rdoc.rdoc_dir = 'rdoc'
69
- rdoc.title = "remote_table #{version}"
70
- rdoc.rdoc_files.include('README*')
71
- rdoc.rdoc_files.include('lib/**/*.rb')
22
+ puts "Rdoc is not available"
72
23
  end
@@ -0,0 +1,19 @@
1
+ require 'singleton'
2
+ require 'fileutils'
3
+ class RemoteTable
4
+ class Cleaner
5
+ include ::Singleton
6
+ def paths_for_removal
7
+ @paths_for_removal ||= []
8
+ end
9
+ def cleanup
10
+ paths_for_removal.each do |path|
11
+ ::FileUtils.rm_rf path
12
+ paths_for_removal.delete path
13
+ end
14
+ end
15
+ def remove_at_exit(path)
16
+ paths_for_removal << path
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,29 @@
1
+ require 'singleton'
2
+ require 'escape'
3
+ require 'fileutils'
4
+ class RemoteTable
5
+ class Executor
6
+ include ::Singleton
7
+ def bang(path, cmd)
8
+ tmp_path = "#{path}.bang.#{rand}"
9
+ backtick_with_reporting "/bin/cat #{::Escape.shell_single_word path} | #{cmd} > #{::Escape.shell_single_word tmp_path}"
10
+ ::FileUtils.mv tmp_path, path
11
+ end
12
+
13
+ def backtick_with_reporting(cmd)
14
+ cmd = cmd.gsub /[ ]*\n[ ]*/m, ' '
15
+ output = `#{cmd}`
16
+ if not $?.success?
17
+ raise %{
18
+ From the remote_table gem...
19
+
20
+ Command failed:
21
+ #{cmd}
22
+
23
+ Output:
24
+ #{output}
25
+ }
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,62 @@
1
+ if RUBY_VERSION >= '1.9'
2
+ require 'csv'
3
+ ::FasterCSV = ::CSV
4
+ else
5
+ begin
6
+ require 'fastercsv'
7
+ rescue ::LoadError
8
+ $stderr.puts "[remote_table gem] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
9
+ raise $!
10
+ end
11
+ end
12
+
13
+ class RemoteTable
14
+ class Format
15
+ class Delimited < Format
16
+ include Textual
17
+ def each(&blk)
18
+ backup_file!
19
+ convert_file_to_utf8!
20
+ remove_useless_characters!
21
+ skip_rows!
22
+ ::FasterCSV.foreach(t.local_file.path, fastercsv_options) do |row|
23
+ ordered_hash = ::ActiveSupport::OrderedHash.new
24
+ filled_values = 0
25
+ case row
26
+ when ::FasterCSV::Row
27
+ row.each do |header, value|
28
+ next if header.blank?
29
+ value = '' if value.nil?
30
+ ordered_hash[header] = value
31
+ filled_values += 1 if value.present?
32
+ end
33
+ when ::Array
34
+ index = 0
35
+ row.each do |value|
36
+ value = '' if value.nil?
37
+ ordered_hash[index] = value
38
+ filled_values += 1 if value.present?
39
+ index += 1
40
+ end
41
+ end
42
+ yield ordered_hash if t.properties.keep_blank_rows or filled_values > 0
43
+ end
44
+ ensure
45
+ restore_file!
46
+ end
47
+
48
+ private
49
+
50
+ def fastercsv_options
51
+ fastercsv_options = { :skip_blanks => !t.properties.keep_blank_rows }
52
+ if t.properties.headers == false
53
+ fastercsv_options.merge!(:headers => nil)
54
+ else
55
+ fastercsv_options.merge!(:headers => :first_row)
56
+ end
57
+ fastercsv_options.merge!(:col_sep => t.properties.delimiter) if t.properties.delimiter
58
+ fastercsv_options
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Excel < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Excel
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Excelx < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Excelx
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,47 @@
1
+ require 'slither'
2
+ class RemoteTable
3
+ class Format
4
+ class FixedWidth < Format
5
+ include Textual
6
+ def each(&blk)
7
+ backup_file!
8
+ convert_file_to_utf8!
9
+ remove_useless_characters!
10
+ crop_rows!
11
+ skip_rows!
12
+ cut_columns!
13
+ parser.parse[:rows].each do |hash|
14
+ hash.reject! { |k, v| k.blank? }
15
+ yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
16
+ end
17
+ ensure
18
+ restore_file!
19
+ end
20
+ private
21
+ def parser
22
+ @parser ||= ::Slither::Parser.new definition, t.local_file.path
23
+ end
24
+ def definition
25
+ @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
26
+ ::Slither.send :definition, t.properties.schema_name
27
+ elsif t.properties.schema.is_a?(::Array)
28
+ everything = lambda { |_| true }
29
+ ::Slither.define(rand.to_s) do |d|
30
+ d.rows do |row|
31
+ row.trap(&everything)
32
+ t.properties.schema.each do |name, width, options|
33
+ if name == 'spacer'
34
+ row.spacer width
35
+ else
36
+ row.column name, width, options
37
+ end
38
+ end
39
+ end
40
+ end
41
+ else
42
+ raise "expecting schema_name to be a String or Symbol, or schema to be an Array"
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,43 @@
1
+ require 'nokogiri'
2
+ require 'cgi'
3
+ class RemoteTable
4
+ class Format
5
+ class HTML < Format
6
+ include Textual
7
+ def each(&blk)
8
+ backup_file!
9
+ convert_file_to_utf8!
10
+ remove_useless_characters!
11
+ html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
12
+ ::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
13
+ values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
14
+ if html_headers.nil?
15
+ html_headers = values
16
+ next
17
+ end
18
+ hash = zip html_headers, values
19
+ yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
20
+ end
21
+ ensure
22
+ restore_file!
23
+ end
24
+
25
+ private
26
+
27
+ # http://snippets.dzone.com/posts/show/406
28
+ def zip(keys, values)
29
+ hash = ::Hash.new
30
+ keys.zip(values) { |k,v| hash[k]=v }
31
+ hash
32
+ end
33
+
34
+ # should we be doing this in ruby?
35
+ def unescaped_html_without_soft_hyphens
36
+ str = ::CGI.unescapeHTML ::IO.read(t.local_file.path)
37
+ # get rid of MS Office baddies
38
+ str.gsub! /&shy;/, ''
39
+ str
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,47 @@
1
+ require 'roo'
2
+ class RemoteTable
3
+ class Format
4
+ module Rooable
5
+ def each(&blk)
6
+ spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
+ spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
8
+ column_references = ::Hash.new
9
+ if t.properties.headers == false
10
+ # zero-based numeric keys
11
+ for col in (1..spreadsheet.last_column)
12
+ column_references[col] = col - 1
13
+ end
14
+ elsif t.properties.headers.is_a? ::Array
15
+ # names
16
+ for col in (1..spreadsheet.last_column)
17
+ column_references[col] = t.properties.headers[col - 1]
18
+ end
19
+ else
20
+ # read t.properties.headers from the file itself
21
+ for col in (1..spreadsheet.last_column)
22
+ column_references[col] = spreadsheet.cell(header_row, col)
23
+ column_references[col] = spreadsheet.cell(header_row - 1, col) if column_references[col].blank? # lspreadsheetk up
24
+ end
25
+ end
26
+ first_data_row.upto(spreadsheet.last_row) do |raw_row|
27
+ ordered_hash = ::ActiveSupport::OrderedHash.new
28
+ for col in (1..spreadsheet.last_column)
29
+ next if column_references[col].blank?
30
+ ordered_hash[column_references[col]] = spreadsheet.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
31
+ end
32
+ yield ordered_hash if t.properties.keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def header_row
39
+ 1 + t.properties.skip
40
+ end
41
+
42
+ def first_data_row
43
+ 1 + header_row
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,34 @@
1
+ require 'fileutils'
2
+ require 'escape'
3
+ class RemoteTable
4
+ class Format
5
+ module Textual
6
+ def convert_file_to_utf8!
7
+ ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.encoding} -t UTF-8"
8
+ end
9
+
10
+ USELESS_CHARACTERS = [
11
+ '\xef\xbb\xbf', # UTF-8 byte order mark
12
+ '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
13
+ ]
14
+ def remove_useless_characters!
15
+ ::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
16
+ end
17
+
18
+ def skip_rows!
19
+ return unless t.properties.skip > 0
20
+ ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{t.properties.skip + 1}"
21
+ end
22
+
23
+ def crop_rows!
24
+ return unless t.properties.crop
25
+ ::RemoteTable.executor.bang t.local_file.path, "tail -n +#{::Escape.shell_single_word t.properties.crop.first.to_s} | head -n #{t.properties.crop.last - t.properties.crop.first + 1}"
26
+ end
27
+
28
+ def cut_columns!
29
+ return unless t.properties.cut
30
+ ::RemoteTable.executor.bang t.local_file.path, "cut -c #{::Escape.shell_single_word t.properties.cut.to_s}"
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ class RemoteTable
2
+ class Format
3
+ class OpenOffice < Format
4
+ include Rooable
5
+ def roo_class
6
+ ::Openoffice
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,35 @@
1
+ class RemoteTable
2
+ class Format
3
+ class Unknown < StandardError; end
4
+
5
+ autoload :Excel, 'remote_table/format/excel'
6
+ autoload :Excelx, 'remote_table/format/excelx'
7
+ autoload :Delimited, 'remote_table/format/delimited'
8
+ autoload :OpenOffice, 'remote_table/format/open_office'
9
+ autoload :FixedWidth, 'remote_table/format/fixed_width'
10
+ autoload :HTML, 'remote_table/format/html'
11
+
12
+ autoload :Textual, 'remote_table/format/mixins/textual'
13
+ autoload :Rooable, 'remote_table/format/mixins/rooable'
14
+
15
+ attr_reader :t
16
+
17
+ def initialize(t)
18
+ @t = t
19
+ end
20
+
21
+ include ::Enumerable
22
+ def each
23
+ raise "must be defined by format"
24
+ end
25
+
26
+ def backup_file!
27
+ ::FileUtils.cp t.local_file.path, "#{t.local_file.path}.backup"
28
+ end
29
+
30
+ def restore_file!
31
+ return unless ::File.readable? "#{t.local_file.path}.backup"
32
+ ::FileUtils.mv "#{t.local_file.path}.backup", t.local_file.path
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ require 'singleton'
2
+ require 'digest/md5'
3
+ class RemoteTable
4
+ class Hasher
5
+ include ::Singleton
6
+ def hash(row)
7
+ normalized_hash = if RUBY_VERSION >= '1.9'
8
+ row.keys.sort.inject(::Hash.new) do |memo, k|
9
+ normalized_k = k.to_s.toutf8
10
+ normalized_v = row[k].respond_to?(:to_s) ? row[k].to_s.toutf8 : row[k]
11
+ memo[normalized_k] = normalized_v
12
+ memo
13
+ end
14
+ else
15
+ ::Hash.new.replace(row)
16
+ end
17
+ # sabshere 1/21/11 may currently break across versions of ruby
18
+ # ruby-1.8.7-p174 > Marshal.dump({'a' => '1'})
19
+ # => "\004\b{\006\"\006a\"\0061"
20
+ # ruby-1.9.2-p0 > Marshal.dump({'a' => '1'})
21
+ # => "\x04\b{\x06I\"\x06a\x06:\x06ETI\"\x061\x06;\x00T"
22
+ ::Digest::MD5.hexdigest ::Marshal.dump(normalized_hash)
23
+ end
24
+ end
25
+ end