remote_table 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,33 @@
1
- if RUBY_VERSION >= '1.9'
2
- require 'csv'
3
- ::RemoteTable::MyCSV = ::CSV
4
- else
5
- begin
6
- require 'fastercsv'
7
- ::RemoteTable::MyCSV = ::FasterCSV
8
- rescue ::LoadError
9
- $stderr.puts "[remote_table] You probably need to manually install the fastercsv gem and/or require it in your Gemfile."
10
- raise $!
11
- end
12
- end
13
-
14
1
  class RemoteTable
15
2
  class Format
16
3
  class Delimited < Format
4
+ if ::RUBY_VERSION >= '1.9'
5
+ require 'csv'
6
+ Engine = ::CSV
7
+ else
8
+ require 'fastercsv'
9
+ Engine = ::FasterCSV
10
+ end
11
+
17
12
  include Textual
13
+
18
14
  def each(&blk)
19
15
  remove_useless_characters!
20
16
  fix_newlines!
21
17
  transliterate_whole_file_to_utf8!
22
18
  skip_rows!
23
- MyCSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
24
- if row.is_a?(MyCSV::Row)
19
+ Engine.new(t.local_file.encoded_io, fastercsv_options).each do |row|
20
+ if row.is_a?(Engine::Row)
25
21
  hash = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (k, v)|
26
22
  if k.present?
27
23
  memo[k] = v.to_s
28
24
  end
29
25
  memo
30
26
  end
31
- yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
27
+ yield hash if t.config.keep_blank_rows or hash.any? { |k, v| v.present? }
32
28
  elsif row.is_a?(::Array)
33
29
  array = row.map { |v| v.to_s }
34
- yield array if t.properties.keep_blank_rows or array.any? { |v| v.present? }
30
+ yield array if t.config.keep_blank_rows or array.any? { |v| v.present? }
35
31
  end
36
32
  end
37
33
  ensure
@@ -54,10 +50,10 @@ class RemoteTable
54
50
  ]
55
51
 
56
52
  def fastercsv_options
57
- hsh = t.options.slice *FASTERCSV_OPTIONS
58
- hsh.merge! :skip_blanks => !t.properties.keep_blank_rows
59
- hsh.reverse_merge! :headers => t.properties.headers
60
- hsh.reverse_merge! :col_sep => t.properties.delimiter
53
+ hsh = t.config.user_specified_options.slice *FASTERCSV_OPTIONS
54
+ hsh[:skip_blanks] = !t.config.keep_blank_rows
55
+ hsh.reverse_merge! :headers => t.config.headers
56
+ hsh.reverse_merge! :col_sep => t.config.delimiter
61
57
  end
62
58
  end
63
59
  end
@@ -1,10 +1,10 @@
1
- require 'fixed_width'
2
-
3
1
  class RemoteTable
4
2
  class Format
5
3
  class FixedWidth < Format
6
4
  include Textual
7
5
  def each(&blk)
6
+ require 'fixed_width-multibyte'
7
+
8
8
  remove_useless_characters!
9
9
  fix_newlines!
10
10
  transliterate_whole_file_to_utf8!
@@ -16,7 +16,7 @@ class RemoteTable
16
16
  row.each do |k, v|
17
17
  row[k] = v.strip
18
18
  end
19
- yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
19
+ yield row if t.config.keep_blank_rows or row.any? { |k, v| v.present? }
20
20
  end
21
21
  ensure
22
22
  t.local_file.cleanup
@@ -27,21 +27,21 @@ class RemoteTable
27
27
  def parser
28
28
  return @parser if @parser.is_a?(::FixedWidth::Parser)
29
29
  if ::FixedWidth::Section.private_instance_methods.map(&:to_sym).include?(:unpacker)
30
- raise "[remote_table] You need a different (newer) version of the FixedWidth gem that supports multibyte encoding, sometime after https://github.com/timonk/fixed_width/pull/1 was incorporated"
30
+ raise ::RuntimeError, "You need a different (newer) version of the FixedWidth gem that supports multibyte encoding, sometime after https://github.com/timonk/fixed_width/pull/1 was incorporated"
31
31
  end
32
32
  @parser = ::FixedWidth::Parser.new definition, t.local_file.encoded_io
33
33
  end
34
34
 
35
35
  def definition
36
- @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
37
- ::FixedWidth.send :definition, t.properties.schema_name
38
- elsif t.properties.schema.is_a?(::Array)
36
+ @definition ||= if t.config.schema_name.is_a?(::String) or t.config.schema_name.is_a?(::Symbol)
37
+ ::FixedWidth.send :definition, t.config.schema_name
38
+ elsif t.config.schema.is_a?(::Array)
39
39
  everything = lambda { |_| true }
40
40
  srand # in case this was forked by resque
41
41
  ::FixedWidth.define(rand.to_s) do |d|
42
42
  d.rows do |row|
43
43
  row.trap(&everything)
44
- t.properties.schema.each do |name, width, options|
44
+ t.config.schema.each do |name, width, options|
45
45
  name = name.to_s
46
46
  if name == 'spacer'
47
47
  row.spacer width
@@ -52,7 +52,7 @@ class RemoteTable
52
52
  end
53
53
  end
54
54
  else
55
- raise "expecting schema_name to be a String or Symbol, or schema to be an Array"
55
+ raise ::ArgumentError, "expecting schema_name to be a String or Symbol, or schema to be an Array"
56
56
  end
57
57
  end
58
58
  end
@@ -1,5 +1,3 @@
1
- require 'nokogiri'
2
- require 'cgi'
3
1
  class RemoteTable
4
2
  class Format
5
3
  class HTML < Format
@@ -1,21 +1,22 @@
1
- require 'nokogiri'
2
- require 'cgi'
3
1
  class RemoteTable
4
2
  class Format
5
3
  module ProcessedByNokogiri
6
4
  def each
7
- raise "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" unless t.properties.row_css or t.properties.row_xpath
5
+ require 'nokogiri'
6
+ require 'cgi'
7
+
8
+ raise ::ArgumentError, "Need :row_css or :row_xpath in order to process XML or HTML" unless t.config.row_css or t.config.row_xpath
8
9
  remove_useless_characters!
9
10
  transliterate_whole_file_to_utf8!
10
11
 
11
- headers = t.properties.headers
12
+ headers = t.config.headers
12
13
 
13
14
  xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
14
- (row_css? ? xml.css(t.properties.row_css) : xml.xpath(t.properties.row_xpath)).each do |row|
15
+ (row_css? ? xml.css(t.config.row_css) : xml.xpath(t.config.row_xpath)).each do |row|
15
16
  values = if column_css?
16
- row.css(t.properties.column_css)
17
+ row.css(t.config.column_css)
17
18
  elsif column_xpath?
18
- row.xpath(t.properties.column_xpath)
19
+ row.xpath(t.config.column_xpath)
19
20
  else
20
21
  [row]
21
22
  end.map { |cell| assume_utf8 cell.content.gsub(/\s+/, ' ').strip }
@@ -23,12 +24,12 @@ class RemoteTable
23
24
  headers = values.select(&:present?)
24
25
  next
25
26
  end
26
- output = if t.properties.output_class == ::Array
27
+ output = if t.config.output_class == ::Array
27
28
  values
28
29
  else
29
30
  zip headers, values
30
31
  end
31
- if t.properties.keep_blank_rows or values.any?
32
+ if t.config.keep_blank_rows or values.any?
32
33
  yield output
33
34
  end
34
35
  end
@@ -39,15 +40,15 @@ class RemoteTable
39
40
  private
40
41
 
41
42
  def row_css?
42
- !!t.properties.row_css
43
+ !!t.config.row_css
43
44
  end
44
45
 
45
46
  def column_css?
46
- !!t.properties.column_css
47
+ !!t.config.column_css
47
48
  end
48
49
 
49
50
  def column_xpath?
50
- !!t.properties.column_xpath
51
+ !!t.config.column_xpath
51
52
  end
52
53
 
53
54
  # http://snippets.dzone.com/posts/show/406
@@ -1,33 +1,37 @@
1
- require 'roo'
2
1
  class RemoteTable
3
2
  class Format
4
3
  module ProcessedByRoo
5
4
  def each(&blk)
5
+ require 'iconv'
6
+ require 'roo'
7
+
6
8
  spreadsheet = roo_class.new t.local_file.path, nil, :ignore
7
- spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
9
+ if t.config.sheet
10
+ spreadsheet.default_sheet = t.config.sheet
11
+ end
8
12
 
9
- first_row = if t.properties.crop
10
- t.properties.crop.first + 1
13
+ first_row = if t.config.crop
14
+ t.config.crop.first + 1
11
15
  else
12
- t.properties.skip + 1
16
+ t.config.skip + 1
13
17
  end
14
18
 
15
- last_row = if t.properties.crop
16
- t.properties.crop.last
19
+ last_row = if t.config.crop
20
+ t.config.crop.last
17
21
  else
18
22
  spreadsheet.last_row
19
23
  end
20
24
 
21
- if t.properties.output_class == ::Array
25
+ if t.config.output_class == ::Array
22
26
  (first_row..last_row).each do |y|
23
27
  output = (1..spreadsheet.last_column).map do |x|
24
28
  assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
25
29
  end
26
- yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
30
+ yield output if t.config.keep_blank_rows or output.any? { |v| v.present? }
27
31
  end
28
32
  else
29
- headers = {}
30
- if t.properties.use_first_row_as_header?
33
+ headers = ::ActiveSupport::OrderedHash.new
34
+ if t.config.use_first_row_as_header?
31
35
  (1..spreadsheet.last_column).each do |x|
32
36
  v = spreadsheet.cell(first_row, x)
33
37
  v = spreadsheet.cell(first_row - 1, x) if v.blank? # look up
@@ -39,7 +43,7 @@ class RemoteTable
39
43
  # "advance the cursor"
40
44
  first_row += 1
41
45
  else
42
- t.properties.headers.each_with_index do |k, i|
46
+ t.config.headers.each_with_index do |k, i|
43
47
  headers[k] = i + 1
44
48
  end
45
49
  end
@@ -48,7 +52,7 @@ class RemoteTable
48
52
  headers.each do |k, x|
49
53
  output[k] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
50
54
  end
51
- yield output if t.properties.keep_blank_rows or output.any? { |k, v| v.present? }
55
+ yield output if t.config.keep_blank_rows or output.any? { |k, v| v.present? }
52
56
  end
53
57
  end
54
58
  ensure
@@ -7,36 +7,36 @@ class RemoteTable
7
7
  '\xc2\xad', # soft hyphen, often inserted by MS Office (html: &shy;)
8
8
  ]
9
9
  def remove_useless_characters!
10
- Utils.in_place t.local_file.path, 'perl', '-pe', "s/#{USELESS_CHARACTERS.join '//g; s/'}//g"
11
- if t.properties.internal_encoding =~ /windows.?1252/i
10
+ t.local_file.in_place :perl, "s/#{USELESS_CHARACTERS.join('//g; s/')}//g"
11
+ if t.config.internal_encoding =~ /windows.?1252/i
12
12
  # soft hyphen again, as I have seen it appear in windows 1252
13
- Utils.in_place t.local_file.path, 'perl', '-pe', 's/\xad//g'
13
+ t.local_file.in_place :perl, 's/\xad//g'
14
14
  end
15
15
  end
16
16
 
17
17
  def transliterate_whole_file_to_utf8!
18
- Utils.in_place t.local_file.path, 'iconv', '-c', '-f', t.properties.internal_encoding, '-t', t.properties.external_encoding_iconv, :ignore_error => true
19
- t.properties.update :encoding => t.properties.external_encoding
18
+ t.local_file.in_place :iconv, t.config.external_encoding_iconv, t.config.internal_encoding
19
+ t.config.user_specified_options[:encoding] = t.config.external_encoding
20
20
  end
21
21
 
22
22
  def fix_newlines!
23
- Utils.in_place t.local_file.path, 'perl', '-pe', 's/\r\n|\n|\r/\n/g'
23
+ t.local_file.in_place :perl, 's/\r\n|\n|\r/\n/g'
24
24
  end
25
25
 
26
26
  def skip_rows!
27
- return unless t.properties.skip > 0
28
- Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.skip + 1}"
27
+ return unless t.config.skip > 0
28
+ t.local_file.in_place :tail, "+#{t.config.skip + 1}"
29
29
  end
30
30
 
31
31
  def crop_rows!
32
- return unless t.properties.crop
33
- Utils.in_place t.local_file.path, 'tail', '-n', "+#{t.properties.crop.first}"
34
- Utils.in_place t.local_file.path, 'head', '-n', (t.properties.crop.last - t.properties.crop.first + 1).to_s
32
+ return unless t.config.crop
33
+ t.local_file.in_place :tail, "+#{t.config.crop.first}"
34
+ t.local_file.in_place :head, (t.config.crop.last - t.config.crop.first + 1)
35
35
  end
36
36
 
37
37
  def cut_columns!
38
- return unless t.properties.cut
39
- Utils.in_place t.local_file.path, 'cut', '-c', t.properties.cut.to_s
38
+ return unless t.config.cut
39
+ t.local_file.in_place :cut, t.config.cut
40
40
  end
41
41
  end
42
42
  end
@@ -3,6 +3,9 @@ class RemoteTable
3
3
  class OpenOffice < Format
4
4
  include ProcessedByRoo
5
5
  def roo_class
6
+ if ::RUBY_PLATFORM == 'java'
7
+ ::Kernel.warn "[remote_table] Opening ODS files on JRuby is known to fail because of a flaw in the underlying Roo library"
8
+ end
6
9
  ::Openoffice
7
10
  end
8
11
  end
@@ -1,5 +1,3 @@
1
- require 'nokogiri'
2
- require 'cgi'
3
1
  class RemoteTable
4
2
  class Format
5
3
  class XML < Format
@@ -0,0 +1,14 @@
1
+ require 'yaml'
2
+
3
+ class RemoteTable
4
+ class Format
5
+ class Yaml < Format
6
+ def each(&blk)
7
+ data = YAML.load_file t.local_file.path
8
+ data.each &blk
9
+ ensure
10
+ t.local_file.cleanup
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,13 +1,71 @@
1
1
  require 'fileutils'
2
+ require 'unix_utils'
2
3
 
3
4
  class RemoteTable
4
5
  class LocalFile #:nodoc:all
6
+ class << self
7
+ def decompress(input, compression)
8
+ output = case compression
9
+ when :zip, :exe
10
+ ::UnixUtils.unzip input
11
+ when :bz2
12
+ ::UnixUtils.bunzip2 input
13
+ when :gz
14
+ ::UnixUtils.gunzip input
15
+ else
16
+ raise ::ArgumentError, "Unrecognized compression #{compression}"
17
+ end
18
+ ::FileUtils.rm_f input
19
+ output
20
+ end
21
+
22
+ def unpack(input, packing)
23
+ output = case packing
24
+ when :tar
25
+ ::UnixUtils.untar input
26
+ else
27
+ raise ::ArgumentError, "Unrecognized packing #{packing}"
28
+ end
29
+ ::FileUtils.rm_f input
30
+ output
31
+ end
32
+
33
+ def pick(input, options = {})
34
+ options = options.symbolize_keys
35
+ if (options[:filename] or options[:glob]) and not ::File.directory?(input)
36
+ raise ::RuntimeError, "Expecting #{input} to be a directory"
37
+ end
38
+ if filename = options[:filename]
39
+ src = ::File.join input, filename
40
+ raise(::RuntimeError, "Expecting #{src} to be a file") unless ::File.file?(src)
41
+ output = ::UnixUtils.tmp_path src
42
+ ::FileUtils.mv src, output
43
+ ::FileUtils.rm_rf input if ::File.dirname(input).start_with?(::Dir.tmpdir)
44
+ elsif glob = options[:glob]
45
+ src = ::Dir[input+glob].first
46
+ raise(::RuntimeError, "Expecting #{glob} to find a file in #{input}") unless src and ::File.file?(src)
47
+ output = ::UnixUtils.tmp_path src
48
+ ::FileUtils.mv src, output
49
+ ::FileUtils.rm_rf input if ::File.dirname(input).start_with?(::Dir.tmpdir)
50
+ else
51
+ output = ::UnixUtils.tmp_path input
52
+ ::FileUtils.mv input, output
53
+ end
54
+ output
55
+ end
56
+ end
5
57
 
6
58
  attr_reader :t
7
59
 
8
60
  def initialize(t)
9
61
  @t = t
10
62
  end
63
+
64
+ def in_place(*args)
65
+ bin = args.shift
66
+ tmp_path = ::UnixUtils.send(*([bin,path]+args))
67
+ ::FileUtils.mv tmp_path, path
68
+ end
11
69
 
12
70
  def path
13
71
  generate unless generated?
@@ -16,7 +74,7 @@ class RemoteTable
16
74
 
17
75
  def encoded_io
18
76
  @encoded_io ||= if ::RUBY_VERSION >= '1.9'
19
- ::File.open path, 'rb', :internal_encoding => t.properties.internal_encoding, :external_encoding => t.properties.external_encoding
77
+ ::File.open path, 'rb', :internal_encoding => t.config.internal_encoding, :external_encoding => t.config.external_encoding
20
78
  else
21
79
  ::File.open path, 'rb'
22
80
  end
@@ -41,14 +99,18 @@ class RemoteTable
41
99
  end
42
100
 
43
101
  def generate
44
- tmp_path = Utils.download t.properties.uri, t.properties.form_data
45
- if compression = t.properties.compression
46
- tmp_path = Utils.decompress tmp_path, compression
102
+ # sabshere 7/20/11 make web requests move more slowly so you don't get accused of DOS
103
+ if ::ENV.has_key?('REMOTE_TABLE_DELAY_BETWEEN_REQUESTS')
104
+ ::Kernel.sleep ::ENV['REMOTE_TABLE_DELAY_BETWEEN_REQUESTS'].to_i
105
+ end
106
+ tmp_path = ::UnixUtils.curl t.config.uri.to_s, t.config.form_data
107
+ if compression = t.config.compression
108
+ tmp_path = LocalFile.decompress tmp_path, compression
47
109
  end
48
- if packing = t.properties.packing
49
- tmp_path = Utils.unpack tmp_path, packing
110
+ if packing = t.config.packing
111
+ tmp_path = LocalFile.unpack tmp_path, packing
50
112
  end
51
- @path = Utils.pick tmp_path, :filename => t.properties.filename, :glob => t.properties.glob
113
+ @path = LocalFile.pick tmp_path, :filename => t.config.filename, :glob => t.config.glob
52
114
  @generated = true
53
115
  end
54
116
  end