remote_table 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.3
1
+ 0.2.4
data/lib/remote_table.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rubygems'
2
2
  require 'digest/md5'
3
+ require 'iconv'
3
4
  require 'active_support'
4
5
  require 'tempfile'
5
6
  require 'fastercsv'
@@ -1,6 +1,7 @@
1
1
  class RemoteTable
2
2
  class File
3
3
  attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
+ attr_accessor :encoding
4
5
  attr_accessor :path
5
6
  attr_accessor :keep_blank_rows
6
7
 
@@ -17,9 +18,23 @@ class RemoteTable
17
18
  @schema = bus[:schema]
18
19
  @schema_name = bus[:schema_name]
19
20
  @trap = bus[:trap]
21
+ @encoding = bus[:encoding] || 'UTF-8'
20
22
  extend "RemoteTable::#{format.to_s.camelcase}".constantize
21
23
  end
22
24
 
25
+ class << self
26
+ # http://santanatechnotes.blogspot.com/2005/12/matching-iso-8859-1-strings-with-ruby.html
27
+ def convert_to_utf8(str, encoding)
28
+ if encoding == 'UTF-8'
29
+ str.toutf8 # just in case
30
+ else
31
+ @_iconv ||= Hash.new
32
+ @_iconv[encoding] ||= Iconv.new 'UTF-8', encoding
33
+ @_iconv[encoding].iconv(str).toutf8
34
+ end
35
+ end
36
+ end
37
+
23
38
  def tabulate(path)
24
39
  define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
25
40
  self.path = path
@@ -47,6 +62,38 @@ class RemoteTable
47
62
  end
48
63
  end
49
64
 
65
+ def backup_file!
66
+ FileUtils.cp path, "#{path}.backup"
67
+ end
68
+
69
+ def skip_rows!
70
+ return unless skip
71
+ `cat #{path} | tail -n +#{skip + 1} > #{path}.tmp`
72
+ FileUtils.mv "#{path}.tmp", path
73
+ end
74
+
75
+ def convert_file_to_utf8!
76
+ return if encoding == 'UTF8' or encoding == 'UTF-8'
77
+ `iconv -c -f #{encoding} -t UTF8 #{path} > #{path}.tmp`
78
+ FileUtils.mv "#{path}.tmp", path
79
+ end
80
+
81
+ def restore_file!
82
+ FileUtils.mv "#{path}.backup", path if ::File.readable? "#{path}.backup"
83
+ end
84
+
85
+ def cut_columns!
86
+ return unless cut
87
+ `cat #{path} | cut -c #{cut} > #{path}.tmp`
88
+ FileUtils.mv "#{path}.tmp", path
89
+ end
90
+
91
+ def crop_rows!
92
+ return unless crop
93
+ `cat #{path} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}.tmp`
94
+ FileUtils.mv "#{path}.tmp", path
95
+ end
96
+
50
97
  def format_from_filename
51
98
  extname = ::File.extname(filename).gsub('.', '')
52
99
  return :csv if extname.blank?
@@ -1,6 +1,8 @@
1
1
  class RemoteTable
2
2
  module Csv
3
3
  def each_row(&block)
4
+ backup_file!
5
+ convert_file_to_utf8!
4
6
  skip_rows!
5
7
  FasterCSV.foreach(path, fastercsv_options) do |row|
6
8
  ordered_hash = ActiveSupport::OrderedHash.new
@@ -27,13 +29,13 @@ class RemoteTable
27
29
  yield ordered_hash if keep_blank_rows or filled_values.nonzero?
28
30
  end
29
31
  ensure
30
- restore_rows!
32
+ restore_file!
31
33
  end
32
34
 
33
35
  private
34
36
 
35
37
  def fastercsv_options
36
- fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| k.to_s.toutf8 } }
38
+ fastercsv_options = { :skip_blanks => !keep_blank_rows, :header_converters => lambda { |k| RemoteTable::File.convert_to_utf8 k.to_s, encoding } }
37
39
  if headers == false
38
40
  fastercsv_options.merge!(:headers => nil)
39
41
  else
@@ -42,17 +44,5 @@ class RemoteTable
42
44
  fastercsv_options.merge!(:col_sep => delimiter) if delimiter
43
45
  fastercsv_options
44
46
  end
45
-
46
- def skip_rows!
47
- return unless skip
48
- original = "#{path}.original"
49
- FileUtils.cp(path, original)
50
- `cat #{original} | tail -n +#{skip + 1} > #{path}`
51
- end
52
-
53
- def restore_rows!
54
- return unless skip
55
- FileUtils.mv "#{path}.original", path
56
- end
57
47
  end
58
48
  end
@@ -1,6 +1,8 @@
1
1
  class RemoteTable
2
2
  module FixedWidth
3
3
  def each_row(&block)
4
+ backup_file!
5
+ convert_file_to_utf8!
4
6
  crop_rows!
5
7
  skip_rows!
6
8
  cut_columns!
@@ -10,47 +12,7 @@ class RemoteTable
10
12
  yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
11
13
  end
12
14
  ensure
13
- uncut_columns!
14
- unskip_rows!
15
- uncrop_rows!
16
- end
17
-
18
- private
19
-
20
- def cut_columns!
21
- return unless cut
22
- original = "#{path}.uncut"
23
- FileUtils.cp(path, original)
24
- `cat #{original} | cut -c #{cut} > #{path}`
25
- end
26
-
27
- def uncut_columns!
28
- return unless cut
29
- FileUtils.mv "#{path}.uncut", path
30
- end
31
-
32
- def skip_rows!
33
- return unless skip
34
- original = "#{path}.unskipped"
35
- FileUtils.cp(path, original)
36
- `cat #{original} | tail -n +#{skip + 1} > #{path}`
37
- end
38
-
39
- def unskip_rows!
40
- return unless skip
41
- FileUtils.mv "#{path}.unskipped", path
42
- end
43
-
44
- def crop_rows!
45
- return unless crop
46
- original = "#{path}.uncropped"
47
- FileUtils.cp(path, original)
48
- `cat #{original} | tail -n +#{crop.first} | head -n #{crop.last - crop.first + 1} > #{path}`
49
- end
50
-
51
- def uncrop_rows!
52
- return unless crop
53
- FileUtils.mv "#{path}.uncropped", path
15
+ restore_file!
54
16
  end
55
17
  end
56
18
  end
data/remote_table.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{remote_table}
8
- s.version = "0.2.3"
8
+ s.version = "0.2.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Seamus Abshere", "Andy Rossmeissl"]
12
- s.date = %q{2010-03-16}
12
+ s.date = %q{2010-03-24}
13
13
  s.description = %q{Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.}
14
14
  s.email = %q{seamus@abshere.net}
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seamus Abshere
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2010-03-16 00:00:00 -04:00
13
+ date: 2010-03-24 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency