remote_table 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitattributes +1 -0
- data/README.rdoc +12 -0
- data/lib/remote_table.rb +5 -0
- data/lib/remote_table/executor.rb +3 -0
- data/lib/remote_table/format.rb +16 -9
- data/lib/remote_table/format/delimited.rb +10 -8
- data/lib/remote_table/format/fixed_width.rb +12 -5
- data/lib/remote_table/format/mixins/processed_by_nokogiri.rb +4 -2
- data/lib/remote_table/format/mixins/processed_by_roo.rb +9 -12
- data/lib/remote_table/format/mixins/textual.rb +13 -2
- data/lib/remote_table/local_file.rb +12 -0
- data/lib/remote_table/properties.rb +48 -36
- data/lib/remote_table/version.rb +1 -1
- data/remote_table.gemspec +2 -4
- data/test/helper.rb +16 -1
- data/test/support/list-en1-semic-3.neooffice.binary.ods +0 -0
- data/test/support/list-en1-semic-3.neooffice.iso-8859-1.csv +0 -0
- data/test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64 +0 -0
- data/test/support/list-en1-semic-3.neooffice.utf-8.csv +0 -0
- data/test/support/list-en1-semic-3.neooffice.utf-8.fixed_width-62 +0 -0
- data/test/support/list-en1-semic-3.neooffice.utf-8.html +0 -0
- data/test/support/list-en1-semic-3.neooffice.utf-8.xml +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma +0 -0
- data/test/support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html +0 -0
- data/test/support/list-en1-semic-3.original.iso-8859-1.csv +0 -0
- data/test/test_old_syntax.rb +1 -1
- data/test/test_old_transform.rb +26 -31
- data/test/test_remote_table.rb +34 -7
- metadata +37 -81
    
        data/.gitattributes
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            list-en1-semic-3* -crlf -diff -merge
         | 
    
        data/README.rdoc
    CHANGED
    
    | @@ -6,6 +6,13 @@ Open local or remote XLSX, XLS, ODS, CSV and fixed-width files. | |
| 6 6 |  | 
| 7 7 | 
             
            Used by http://data.brighterplanet.com and the data_miner gem (http://github.com/seamusabshere/data_miner)
         | 
| 8 8 |  | 
| 9 | 
            +
            ==Requirements
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            * POSIX operating system (not windows)
         | 
| 12 | 
            +
            * curl, iconv, perl, cat, cut, tail, etc. accessible from /usr/local/bin:/usr/bin:/bin
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            As this library matures, those should go away.
         | 
| 15 | 
            +
             | 
| 9 16 | 
             
            ==Example
         | 
| 10 17 |  | 
| 11 18 | 
             
                ?> t = RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/98guide6.zip', 'filename' => '98guide6.csv'
         | 
| @@ -136,6 +143,11 @@ More examples: | |
| 136 143 | 
             
                                            [ 'spacer',  1 ],
         | 
| 137 144 | 
             
                                            [ 'header1', 10, { :type => :string } ]]
         | 
| 138 145 |  | 
| 146 | 
            +
            ==Helpful hints
         | 
| 147 | 
            +
             | 
| 148 | 
            +
            * ASCII-8BIT is the same as BINARY
         | 
| 149 | 
            +
            * ISO-8859-1 is the same as Latin1
         | 
| 150 | 
            +
             | 
| 139 151 | 
             
            ==Custom parsers
         | 
| 140 152 |  | 
| 141 153 | 
             
            See the test file and also data_miner examples of custom parsers.
         | 
    
        data/lib/remote_table.rb
    CHANGED
    
    
| @@ -16,6 +16,9 @@ class RemoteTable | |
| 16 16 |  | 
| 17 17 | 
             
                def backtick_with_reporting(cmd, raise_on_error = false)
         | 
| 18 18 | 
             
                  cmd = cmd.gsub /\n/m, ' '
         | 
| 19 | 
            +
                  if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include? 'backtick'
         | 
| 20 | 
            +
                    $stderr.puts "[remote_table] Executing #{cmd}"
         | 
| 21 | 
            +
                  end
         | 
| 19 22 | 
             
                  pid = ::POSIX::Spawn.spawn({ 'PATH' => '/usr/local/bin:/usr/bin:/bin' }, cmd)
         | 
| 20 23 | 
             
                  stat = ::Process::waitpid pid
         | 
| 21 24 | 
             
                  if raise_on_error and not stat.success?
         | 
    
        data/lib/remote_table/format.rb
    CHANGED
    
    | @@ -24,17 +24,24 @@ class RemoteTable | |
| 24 24 | 
             
                  @t = t
         | 
| 25 25 | 
             
                end
         | 
| 26 26 |  | 
| 27 | 
            -
                def  | 
| 27 | 
            +
                def transliterate_to_utf8(str)
         | 
| 28 | 
            +
                  return if str.nil?
         | 
| 29 | 
            +
                  $stderr.puts "[remote_table translit] Before:  #{str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
         | 
| 30 | 
            +
                  transliterated_str = if ::RUBY_VERSION >= '1.9'
         | 
| 31 | 
            +
                    str.ensure_encoding t.properties.external_encoding, :external_encoding => t.properties.internal_encoding, :invalid_characters => :transcode
         | 
| 32 | 
            +
                  else
         | 
| 33 | 
            +
                    ::Iconv.conv(t.properties.external_encoding_iconv, t.properties.internal_encoding, str.to_s + ' ')[0..-2]
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                  $stderr.puts "[remote_table translit] After:   #{transliterated_str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
         | 
| 36 | 
            +
                  transliterated_str
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def assume_utf8(str)
         | 
| 40 | 
            +
                  return if str.nil?
         | 
| 28 41 | 
             
                  if ::RUBY_VERSION >= '1.9'
         | 
| 29 | 
            -
                    str. | 
| 42 | 
            +
                    str.encode! t.properties.external_encoding
         | 
| 30 43 | 
             
                  else
         | 
| 31 | 
            -
                     | 
| 32 | 
            -
                    begin
         | 
| 33 | 
            -
                      ::Iconv.conv('UTF-8//TRANSLIT', t.properties.encoding[0], str.to_s + ' ')[0..-2]
         | 
| 34 | 
            -
                    rescue ::Iconv::IllegalSequence
         | 
| 35 | 
            -
                      $stderr.puts "[remote_table] Unable to transliterate #{str} into UTF-8 given #{t.properties.encoding[0]}"
         | 
| 36 | 
            -
                      str
         | 
| 37 | 
            -
                    end
         | 
| 44 | 
            +
                    str
         | 
| 38 45 | 
             
                  end
         | 
| 39 46 | 
             
                end
         | 
| 40 47 |  | 
| @@ -17,19 +17,21 @@ class RemoteTable | |
| 17 17 | 
             
                  include Textual
         | 
| 18 18 | 
             
                  def each(&blk)
         | 
| 19 19 | 
             
                    remove_useless_characters!
         | 
| 20 | 
            +
                    fix_newlines!
         | 
| 21 | 
            +
                    transliterate_whole_file_to_utf8!
         | 
| 20 22 | 
             
                    skip_rows!
         | 
| 21 | 
            -
                    CSV. | 
| 23 | 
            +
                    CSV.new(t.local_file.encoded_io, fastercsv_options).each do |row|
         | 
| 22 24 | 
             
                      if row.is_a?(CSV::Row)
         | 
| 23 | 
            -
                         | 
| 24 | 
            -
                          if  | 
| 25 | 
            -
                             | 
| 26 | 
            -
                            memo[key] = utf8 value
         | 
| 25 | 
            +
                        hash = row.inject(::ActiveSupport::OrderedHash.new) do |memo, (k, v)|
         | 
| 26 | 
            +
                          if k.present?
         | 
| 27 | 
            +
                            memo[k] = v.to_s
         | 
| 27 28 | 
             
                          end
         | 
| 28 29 | 
             
                          memo
         | 
| 29 30 | 
             
                        end
         | 
| 30 | 
            -
                        yield  | 
| 31 | 
            -
                       | 
| 32 | 
            -
                         | 
| 31 | 
            +
                        yield hash if t.properties.keep_blank_rows or hash.any? { |k, v| v.present? }
         | 
| 32 | 
            +
                      elsif row.is_a?(::Array)
         | 
| 33 | 
            +
                        array = row.map { |v| v.to_s }
         | 
| 34 | 
            +
                        yield array if t.properties.keep_blank_rows or array.any? { |v| v.present? }
         | 
| 33 35 | 
             
                      end
         | 
| 34 36 | 
             
                    end
         | 
| 35 37 | 
             
                  ensure
         | 
| @@ -1,17 +1,20 @@ | |
| 1 | 
            -
            require ' | 
| 1 | 
            +
            require 'fixed_width'
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            class RemoteTable
         | 
| 3 4 | 
             
              class Format
         | 
| 4 5 | 
             
                class FixedWidth < Format
         | 
| 5 6 | 
             
                  include Textual
         | 
| 6 7 | 
             
                  def each(&blk)
         | 
| 7 8 | 
             
                    remove_useless_characters!
         | 
| 9 | 
            +
                    fix_newlines!
         | 
| 10 | 
            +
                    transliterate_whole_file_to_utf8!
         | 
| 8 11 | 
             
                    crop_rows!
         | 
| 9 12 | 
             
                    skip_rows!
         | 
| 10 13 | 
             
                    cut_columns!
         | 
| 11 14 | 
             
                    parser.parse[:rows].each do |row|
         | 
| 12 15 | 
             
                      row.reject! { |k, v| k.blank? }
         | 
| 13 16 | 
             
                      row.each do |k, v|
         | 
| 14 | 
            -
                        row[k] =  | 
| 17 | 
            +
                        row[k] = v.strip
         | 
| 15 18 | 
             
                      end
         | 
| 16 19 | 
             
                      yield row if t.properties.keep_blank_rows or row.any? { |k, v| v.present? }
         | 
| 17 20 | 
             
                    end
         | 
| @@ -22,16 +25,20 @@ class RemoteTable | |
| 22 25 | 
             
                  private
         | 
| 23 26 |  | 
| 24 27 | 
             
                  def parser
         | 
| 25 | 
            -
                    @parser  | 
| 28 | 
            +
                    return @parser if @parser.is_a?(::FixedWidth::Parser)
         | 
| 29 | 
            +
                    if ::FixedWidth::Section.private_instance_methods.map(&:to_sym).include?(:unpacker)
         | 
| 30 | 
            +
                      raise "[remote_table] You need a different (newer) version of the FixedWidth gem that supports multibyte encoding, sometime after https://github.com/timonk/fixed_width/pull/1 was incorporated"
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
                    @parser = ::FixedWidth::Parser.new definition, t.local_file.encoded_io
         | 
| 26 33 | 
             
                  end
         | 
| 27 34 |  | 
| 28 35 | 
             
                  def definition
         | 
| 29 36 | 
             
                    @definition ||= if t.properties.schema_name.is_a?(::String) or t.properties.schema_name.is_a?(::Symbol)
         | 
| 30 | 
            -
                      :: | 
| 37 | 
            +
                      ::FixedWidth.send :definition, t.properties.schema_name
         | 
| 31 38 | 
             
                    elsif t.properties.schema.is_a?(::Array)
         | 
| 32 39 | 
             
                      everything = lambda { |_| true }
         | 
| 33 40 | 
             
                      srand # in case this was forked by resque
         | 
| 34 | 
            -
                      :: | 
| 41 | 
            +
                      ::FixedWidth.define(rand.to_s) do |d|
         | 
| 35 42 | 
             
                        d.rows do |row|
         | 
| 36 43 | 
             
                          row.trap(&everything)
         | 
| 37 44 | 
             
                          t.properties.schema.each do |name, width, options|
         | 
| @@ -4,7 +4,9 @@ class RemoteTable | |
| 4 4 | 
             
              class Format
         | 
| 5 5 | 
             
                module ProcessedByNokogiri
         | 
| 6 6 | 
             
                  def each
         | 
| 7 | 
            +
                    raise "[remote_table] Need :row_css or :row_xpath in order to process XML or HTML" unless t.properties.row_css or t.properties.row_xpath
         | 
| 7 8 | 
             
                    remove_useless_characters!
         | 
| 9 | 
            +
                    transliterate_whole_file_to_utf8!
         | 
| 8 10 | 
             
                    first_row = true
         | 
| 9 11 | 
             
                    keys = t.properties.headers if t.properties.headers.is_a?(::Array)
         | 
| 10 12 | 
             
                    xml = nokogiri_class.parse(unescaped_xml_without_soft_hyphens, nil, 'UTF-8')
         | 
| @@ -15,7 +17,7 @@ class RemoteTable | |
| 15 17 | 
             
                        row.xpath(t.properties.column_xpath)
         | 
| 16 18 | 
             
                      else
         | 
| 17 19 | 
             
                        [row]
         | 
| 18 | 
            -
                      end.map { |cell| cell.content.gsub(/\s+/, ' ').strip }
         | 
| 20 | 
            +
                      end.map { |cell| assume_utf8 cell.content.gsub(/\s+/, ' ').strip }
         | 
| 19 21 | 
             
                      if first_row and t.properties.use_first_row_as_header?
         | 
| 20 22 | 
             
                        keys = values
         | 
| 21 23 | 
             
                        first_row = false
         | 
| @@ -57,7 +59,7 @@ class RemoteTable | |
| 57 59 |  | 
| 58 60 | 
             
                  # should we be doing this in ruby?
         | 
| 59 61 | 
             
                  def unescaped_xml_without_soft_hyphens
         | 
| 60 | 
            -
                    str = ::CGI.unescapeHTML  | 
| 62 | 
            +
                    str = ::CGI.unescapeHTML t.local_file.encoded_io.read
         | 
| 61 63 | 
             
                    # get rid of MS Office baddies
         | 
| 62 64 | 
             
                    str.gsub! '­', ''
         | 
| 63 65 | 
             
                    str
         | 
| @@ -6,9 +6,9 @@ class RemoteTable | |
| 6 6 | 
             
                    spreadsheet = roo_class.new t.local_file.path, nil, :ignore
         | 
| 7 7 | 
             
                    spreadsheet.default_sheet = t.properties.sheet.is_a?(::Numeric) ? spreadsheet.sheets[t.properties.sheet] : t.properties.sheet
         | 
| 8 8 | 
             
                    if t.properties.output_class == ::Array
         | 
| 9 | 
            -
                      ( | 
| 9 | 
            +
                      (first_row..spreadsheet.last_row).each do |y|
         | 
| 10 10 | 
             
                        output = (1..spreadsheet.last_column).map do |x|
         | 
| 11 | 
            -
                          spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
         | 
| 11 | 
            +
                          assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
         | 
| 12 12 | 
             
                        end
         | 
| 13 13 | 
             
                        yield output if t.properties.keep_blank_rows or output.any? { |v| v.present? }
         | 
| 14 14 | 
             
                      end
         | 
| @@ -16,18 +16,19 @@ class RemoteTable | |
| 16 16 | 
             
                      keys = {}
         | 
| 17 17 | 
             
                      if t.properties.use_first_row_as_header?
         | 
| 18 18 | 
             
                        (1..spreadsheet.last_column).each do |x|
         | 
| 19 | 
            -
                          keys[x] = spreadsheet.cell( | 
| 20 | 
            -
                          keys[x] = spreadsheet.cell( | 
| 19 | 
            +
                          keys[x] = spreadsheet.cell(first_row, x)
         | 
| 20 | 
            +
                          keys[x] = spreadsheet.cell(first_row - 1, x) if keys[x].blank? # look up
         | 
| 21 | 
            +
                          keys[x] = assume_utf8 keys[x]
         | 
| 21 22 | 
             
                        end
         | 
| 22 23 | 
             
                      else
         | 
| 23 24 | 
             
                        (1..spreadsheet.last_column).each do |x|
         | 
| 24 | 
            -
                          keys[x] = t.properties.headers[x - 1]
         | 
| 25 | 
            +
                          keys[x] = assume_utf8 t.properties.headers[x - 1]
         | 
| 25 26 | 
             
                        end
         | 
| 26 27 | 
             
                      end
         | 
| 27 | 
            -
                      ( | 
| 28 | 
            +
                      (first_row+1..spreadsheet.last_row).each do |y|
         | 
| 28 29 | 
             
                        output = (1..spreadsheet.last_column).inject(::ActiveSupport::OrderedHash.new) do |memo, x|
         | 
| 29 30 | 
             
                          if keys[x].present?
         | 
| 30 | 
            -
                            memo[keys[x]] = spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
         | 
| 31 | 
            +
                            memo[keys[x]] = assume_utf8 spreadsheet.cell(y, x).to_s.gsub(/<[^>]+>/, '').strip
         | 
| 31 32 | 
             
                          end
         | 
| 32 33 | 
             
                          memo
         | 
| 33 34 | 
             
                        end
         | 
| @@ -40,13 +41,9 @@ class RemoteTable | |
| 40 41 |  | 
| 41 42 | 
             
                  private
         | 
| 42 43 |  | 
| 43 | 
            -
                  def  | 
| 44 | 
            +
                  def first_row
         | 
| 44 45 | 
             
                    1 + t.properties.skip
         | 
| 45 46 | 
             
                  end
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                  def first_data_row
         | 
| 48 | 
            -
                    1 + header_row
         | 
| 49 | 
            -
                  end
         | 
| 50 47 | 
             
                end
         | 
| 51 48 | 
             
              end
         | 
| 52 49 | 
             
            end
         | 
| @@ -6,11 +6,22 @@ class RemoteTable | |
| 6 6 | 
             
                  USELESS_CHARACTERS = [
         | 
| 7 7 | 
             
                    '\xef\xbb\xbf',   # UTF-8 byte order mark
         | 
| 8 8 | 
             
                    '\xc2\xad',       # soft hyphen, often inserted by MS Office (html: ­)
         | 
| 9 | 
            -
                    '\xad',
         | 
| 10 | 
            -
                    # '\xa0'
         | 
| 11 9 | 
             
                  ]
         | 
| 12 10 | 
             
                  def remove_useless_characters!
         | 
| 13 11 | 
             
                    ::RemoteTable.executor.bang t.local_file.path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
         | 
| 12 | 
            +
                    if t.properties.internal_encoding =~ /windows.?1252/i
         | 
| 13 | 
            +
                      # soft hyphen again, as I have seen it appear in windows 1252
         | 
| 14 | 
            +
                      ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\xad//g'}
         | 
| 15 | 
            +
                    end
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
                  
         | 
| 18 | 
            +
                  def transliterate_whole_file_to_utf8!
         | 
| 19 | 
            +
                    ::RemoteTable.executor.bang t.local_file.path, "iconv -c -f #{::Escape.shell_single_word t.properties.internal_encoding} -t #{::Escape.shell_single_word t.properties.external_encoding_iconv}"
         | 
| 20 | 
            +
                    t.properties.update 'encoding' => t.properties.external_encoding
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                  
         | 
| 23 | 
            +
                  def fix_newlines!
         | 
| 24 | 
            +
                    ::RemoteTable.executor.bang t.local_file.path, %q{perl -pe 's/\r\n|\n|\r/\n/g'}
         | 
| 14 25 | 
             
                  end
         | 
| 15 26 |  | 
| 16 27 | 
             
                  def skip_rows!
         | 
| @@ -15,8 +15,20 @@ class RemoteTable | |
| 15 15 | 
             
                  @path
         | 
| 16 16 | 
             
                end
         | 
| 17 17 |  | 
| 18 | 
            +
                def encoded_io
         | 
| 19 | 
            +
                  @encoded_io ||= if ::RUBY_VERSION >= '1.9'
         | 
| 20 | 
            +
                    ::File.open path, 'rb', :internal_encoding => t.properties.internal_encoding, :external_encoding => t.properties.external_encoding
         | 
| 21 | 
            +
                  else
         | 
| 22 | 
            +
                    ::File.open path, 'rb'
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
                
         | 
| 18 26 | 
             
                def delete
         | 
| 27 | 
            +
                  if @encoded_io.respond_to?(:closed?) and !@encoded_io.closed?
         | 
| 28 | 
            +
                    @encoded_io.close
         | 
| 29 | 
            +
                  end
         | 
| 19 30 | 
             
                  ::FileUtils.rm_rf staging_dir_path
         | 
| 31 | 
            +
                  @encoded_io = nil
         | 
| 20 32 | 
             
                  @path = nil
         | 
| 21 33 | 
             
                  @staging_dir_path = nil
         | 
| 22 34 | 
             
                end
         | 
| @@ -3,8 +3,15 @@ class RemoteTable | |
| 3 3 | 
             
              # Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
         | 
| 4 4 | 
             
              class Properties
         | 
| 5 5 | 
             
                attr_reader :t
         | 
| 6 | 
            +
                attr_reader :current_options
         | 
| 7 | 
            +
                
         | 
| 6 8 | 
             
                def initialize(t)
         | 
| 7 9 | 
             
                  @t = t
         | 
| 10 | 
            +
                  @current_options = t.options.dup
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
                
         | 
| 13 | 
            +
                def update(options)
         | 
| 14 | 
            +
                  current_options.update options
         | 
| 8 15 | 
             
                end
         | 
| 9 16 |  | 
| 10 17 | 
             
                # The parsed URI of the file to get.
         | 
| @@ -22,19 +29,19 @@ class RemoteTable | |
| 22 29 | 
             
                # * call each
         | 
| 23 30 | 
             
                # Defaults to false.
         | 
| 24 31 | 
             
                def streaming
         | 
| 25 | 
            -
                   | 
| 32 | 
            +
                  current_options['streaming'] || false
         | 
| 26 33 | 
             
                end
         | 
| 27 34 |  | 
| 28 35 | 
             
                # Defaults to true.
         | 
| 29 36 | 
             
                def warn_on_multiple_downloads
         | 
| 30 | 
            -
                   | 
| 37 | 
            +
                  current_options['warn_on_multiple_downloads'] != false
         | 
| 31 38 | 
             
                end
         | 
| 32 39 |  | 
| 33 40 | 
             
                # The headers specified by the user
         | 
| 34 41 | 
             
                #
         | 
| 35 42 | 
             
                # Default: :first_row
         | 
| 36 43 | 
             
                def headers
         | 
| 37 | 
            -
                   | 
| 44 | 
            +
                  current_options['headers'].nil? ? :first_row : current_options['headers']
         | 
| 38 45 | 
             
                end
         | 
| 39 46 |  | 
| 40 47 | 
             
                def use_first_row_as_header?
         | 
| @@ -49,60 +56,65 @@ class RemoteTable | |
| 49 56 | 
             
                #
         | 
| 50 57 | 
             
                # Default: 0
         | 
| 51 58 | 
             
                def sheet
         | 
| 52 | 
            -
                   | 
| 59 | 
            +
                  current_options['sheet'] || 0
         | 
| 53 60 | 
             
                end
         | 
| 54 61 |  | 
| 55 62 | 
             
                # Whether to keep blank rows
         | 
| 56 63 | 
             
                #
         | 
| 57 64 | 
             
                # Default: false
         | 
| 58 65 | 
             
                def keep_blank_rows
         | 
| 59 | 
            -
                   | 
| 66 | 
            +
                  current_options['keep_blank_rows'] || false
         | 
| 60 67 | 
             
                end
         | 
| 61 68 |  | 
| 62 69 | 
             
                # Form data to send in with the download request
         | 
| 63 70 | 
             
                def form_data
         | 
| 64 | 
            -
                   | 
| 71 | 
            +
                  current_options['form_data']
         | 
| 65 72 | 
             
                end
         | 
| 66 73 |  | 
| 67 74 | 
             
                # How many rows to skip
         | 
| 68 75 | 
             
                #
         | 
| 69 76 | 
             
                # Default: 0
         | 
| 70 77 | 
             
                def skip
         | 
| 71 | 
            -
                   | 
| 78 | 
            +
                  current_options['skip'].to_i
         | 
| 72 79 | 
             
                end
         | 
| 73 80 |  | 
| 74 | 
            -
                 | 
| 75 | 
            -
             | 
| 76 | 
            -
                 | 
| 77 | 
            -
                 | 
| 78 | 
            -
             | 
| 81 | 
            +
                def internal_encoding
         | 
| 82 | 
            +
                  (current_options['encoding'] || 'UTF-8').upcase
         | 
| 83 | 
            +
                end
         | 
| 84 | 
            +
                
         | 
| 85 | 
            +
                def external_encoding
         | 
| 86 | 
            +
                  'UTF-8'
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
                
         | 
| 89 | 
            +
                def external_encoding_iconv
         | 
| 90 | 
            +
                  'UTF-8//TRANSLIT'
         | 
| 79 91 | 
             
                end
         | 
| 80 92 |  | 
| 81 93 | 
             
                # The delimiter
         | 
| 82 94 | 
             
                #
         | 
| 83 95 | 
             
                # Default: ","
         | 
| 84 96 | 
             
                def delimiter
         | 
| 85 | 
            -
                   | 
| 97 | 
            +
                  current_options['delimiter'] || ','
         | 
| 86 98 | 
             
                end
         | 
| 87 99 |  | 
| 88 100 | 
             
                # The XPath used to find rows
         | 
| 89 101 | 
             
                def row_xpath
         | 
| 90 | 
            -
                   | 
| 102 | 
            +
                  current_options['row_xpath']
         | 
| 91 103 | 
             
                end
         | 
| 92 104 |  | 
| 93 105 | 
             
                # The XPath used to find columns
         | 
| 94 106 | 
             
                def column_xpath
         | 
| 95 | 
            -
                   | 
| 107 | 
            +
                  current_options['column_xpath']
         | 
| 96 108 | 
             
                end
         | 
| 97 109 |  | 
| 98 110 | 
             
                # The CSS selector used to find rows
         | 
| 99 111 | 
             
                def row_css
         | 
| 100 | 
            -
                   | 
| 112 | 
            +
                  current_options['row_css']
         | 
| 101 113 | 
             
                end
         | 
| 102 114 |  | 
| 103 115 | 
             
                # The CSS selector used to find columns
         | 
| 104 116 | 
             
                def column_css
         | 
| 105 | 
            -
                   | 
| 117 | 
            +
                  current_options['column_css']
         | 
| 106 118 | 
             
                end
         | 
| 107 119 |  | 
| 108 120 | 
             
                # The compression type.
         | 
| @@ -111,8 +123,8 @@ class RemoteTable | |
| 111 123 | 
             
                #
         | 
| 112 124 | 
             
                # Can be specified as: "gz", "zip", "bz2", "exe" (treated as "zip")
         | 
| 113 125 | 
             
                def compression
         | 
| 114 | 
            -
                  clue = if  | 
| 115 | 
            -
                     | 
| 126 | 
            +
                  clue = if current_options['compression']
         | 
| 127 | 
            +
                    current_options['compression'].to_s
         | 
| 116 128 | 
             
                  else
         | 
| 117 129 | 
             
                    ::File.extname uri.path
         | 
| 118 130 | 
             
                  end
         | 
| @@ -134,8 +146,8 @@ class RemoteTable | |
| 134 146 | 
             
                #
         | 
| 135 147 | 
             
                # Can be specified as: "tar"
         | 
| 136 148 | 
             
                def packing
         | 
| 137 | 
            -
                  clue = if  | 
| 138 | 
            -
                     | 
| 149 | 
            +
                  clue = if current_options['packing']
         | 
| 150 | 
            +
                    current_options['packing'].to_s
         | 
| 139 151 | 
             
                  else
         | 
| 140 152 | 
             
                    ::File.extname(uri.path.sub(/\.#{compression}\z/, ''))
         | 
| 141 153 | 
             
                  end
         | 
| @@ -150,7 +162,7 @@ class RemoteTable | |
| 150 162 | 
             
                # Example:
         | 
| 151 163 | 
             
                #     RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'glob' => '/*.csv'
         | 
| 152 164 | 
             
                def glob
         | 
| 153 | 
            -
                   | 
| 165 | 
            +
                  current_options['glob']
         | 
| 154 166 | 
             
                end
         | 
| 155 167 |  | 
| 156 168 | 
             
                # The filename, which can be used to pick a file out of an archive.
         | 
| @@ -158,17 +170,17 @@ class RemoteTable | |
| 158 170 | 
             
                # Example:
         | 
| 159 171 | 
             
                #     RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', 'filename' => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
         | 
| 160 172 | 
             
                def filename
         | 
| 161 | 
            -
                   | 
| 173 | 
            +
                  current_options['filename']
         | 
| 162 174 | 
             
                end
         | 
| 163 175 |  | 
| 164 176 | 
             
                # Cut columns up to this character
         | 
| 165 177 | 
             
                def cut
         | 
| 166 | 
            -
                   | 
| 178 | 
            +
                  current_options['cut']
         | 
| 167 179 | 
             
                end
         | 
| 168 180 |  | 
| 169 181 | 
             
                # Crop rows after this line
         | 
| 170 182 | 
             
                def crop
         | 
| 171 | 
            -
                   | 
| 183 | 
            +
                  current_options['crop']
         | 
| 172 184 | 
             
                end
         | 
| 173 185 |  | 
| 174 186 | 
             
                # The fixed-width schema, given as an array
         | 
| @@ -183,31 +195,31 @@ class RemoteTable | |
| 183 195 | 
             
                #                                  [  'spacer',  12 ],
         | 
| 184 196 | 
             
                #                                  [  'header6', 10, { :type => :string } ]])
         | 
| 185 197 | 
             
                def schema
         | 
| 186 | 
            -
                   | 
| 198 | 
            +
                  current_options['schema']
         | 
| 187 199 | 
             
                end
         | 
| 188 200 |  | 
| 189 | 
            -
                # The name of the fixed-width schema according to  | 
| 201 | 
            +
                # The name of the fixed-width schema according to FixedWidth
         | 
| 190 202 | 
             
                def schema_name
         | 
| 191 | 
            -
                   | 
| 203 | 
            +
                  current_options['schema_name']
         | 
| 192 204 | 
             
                end
         | 
| 193 205 |  | 
| 194 206 | 
             
                # A proc to call to decide whether to return a row.
         | 
| 195 207 | 
             
                def select
         | 
| 196 | 
            -
                   | 
| 208 | 
            +
                  current_options['select']
         | 
| 197 209 | 
             
                end
         | 
| 198 210 |  | 
| 199 211 | 
             
                # A proc to call to decide whether to return a row.
         | 
| 200 212 | 
             
                def reject
         | 
| 201 | 
            -
                   | 
| 213 | 
            +
                  current_options['reject']
         | 
| 202 214 | 
             
                end
         | 
| 203 215 |  | 
| 204 216 | 
             
                # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
         | 
| 205 217 | 
             
                def errata
         | 
| 206 | 
            -
                  return unless  | 
| 207 | 
            -
                  @errata ||= if  | 
| 208 | 
            -
                    ::Errata.new  | 
| 218 | 
            +
                  return unless current_options.has_key? 'errata'
         | 
| 219 | 
            +
                  @errata ||= if current_options['errata'].is_a? ::Hash
         | 
| 220 | 
            +
                    ::Errata.new current_options['errata']
         | 
| 209 221 | 
             
                  else
         | 
| 210 | 
            -
                     | 
| 222 | 
            +
                    current_options['errata']
         | 
| 211 223 | 
             
                  end
         | 
| 212 224 | 
             
                end
         | 
| 213 225 |  | 
| @@ -220,8 +232,8 @@ class RemoteTable | |
| 220 232 | 
             
                # Can be specified as: "xlsx", "xls", "csv", "ods", "fixed_width", "html"
         | 
| 221 233 | 
             
                def format
         | 
| 222 234 | 
             
                  return Format::Delimited if uri.host == 'spreadsheets.google.com'
         | 
| 223 | 
            -
                  clue = if  | 
| 224 | 
            -
                     | 
| 235 | 
            +
                  clue = if current_options['format']
         | 
| 236 | 
            +
                    current_options['format'].to_s
         | 
| 225 237 | 
             
                  else
         | 
| 226 238 | 
             
                    ::File.extname t.local_file.path
         | 
| 227 239 | 
             
                  end
         | 
    
        data/lib/remote_table/version.rb
    CHANGED
    
    
    
        data/remote_table.gemspec
    CHANGED
    
    | @@ -21,7 +21,7 @@ Gem::Specification.new do |s| | |
| 21 21 |  | 
| 22 22 | 
             
              s.add_dependency 'activesupport', '>=2.3.4'
         | 
| 23 23 | 
             
              s.add_dependency 'roo', '~>1.9'
         | 
| 24 | 
            -
              s.add_dependency ' | 
| 24 | 
            +
              s.add_dependency 'fixed_width-multibyte' # TODO replace with fixed_width once timon gets off vacation
         | 
| 25 25 | 
             
              s.add_dependency 'i18n' # activesupport?
         | 
| 26 26 | 
             
              s.add_dependency 'builder' # roo?
         | 
| 27 27 | 
             
              s.add_dependency 'zip' # roo
         | 
| @@ -31,9 +31,7 @@ Gem::Specification.new do |s| | |
| 31 31 | 
             
              s.add_dependency 'escape', '>=0.0.4'
         | 
| 32 32 | 
             
              s.add_dependency 'posix-spawn'
         | 
| 33 33 | 
             
              s.add_dependency 'ensure-encoding'
         | 
| 34 | 
            -
               | 
| 35 | 
            -
                s.add_dependency 'fastercsv', '>=1.5.0'
         | 
| 36 | 
            -
              end
         | 
| 34 | 
            +
              s.add_dependency 'fastercsv', '>=1.5.0'
         | 
| 37 35 |  | 
| 38 36 | 
             
              s.add_development_dependency 'errata', '>=0.2.0'
         | 
| 39 37 | 
             
              s.add_development_dependency 'test-unit'
         | 
    
        data/test/helper.rb
    CHANGED
    
    | @@ -4,11 +4,26 @@ Bundler.setup | |
| 4 4 | 
             
            require 'test/unit'
         | 
| 5 5 | 
             
            require 'shoulda'
         | 
| 6 6 | 
             
            require 'ruby-debug'
         | 
| 7 | 
            -
            require 'tempfile'
         | 
| 8 7 |  | 
| 9 8 | 
             
            $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
         | 
| 10 9 | 
             
            $LOAD_PATH.unshift(File.dirname(__FILE__))
         | 
| 11 10 | 
             
            require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'remote_table'))
         | 
| 12 11 |  | 
| 13 12 | 
             
            class Test::Unit::TestCase
         | 
| 13 | 
            +
              def setup
         | 
| 14 | 
            +
                if RUBY_VERSION >= '1.9'
         | 
| 15 | 
            +
                  @old_default_internal = Encoding.default_internal
         | 
| 16 | 
            +
                  @old_default_external = Encoding.default_external
         | 
| 17 | 
            +
                  # totally random choices here
         | 
| 18 | 
            +
                  Encoding.default_internal = 'EUC-JP'
         | 
| 19 | 
            +
                  Encoding.default_external = 'Shift_JIS'
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
              
         | 
| 23 | 
            +
              def teardown
         | 
| 24 | 
            +
                if RUBY_VERSION >= '1.9'
         | 
| 25 | 
            +
                  Encoding.default_internal = @old_default_internal
         | 
| 26 | 
            +
                  Encoding.default_external = @old_default_external
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
              end
         | 
| 14 29 | 
             
            end
         | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
| Binary file | 
    
        data/test/test_old_syntax.rb
    CHANGED
    
    | @@ -16,7 +16,7 @@ $test2_rows.freeze | |
| 16 16 | 
             
            class TestOldSyntax < Test::Unit::TestCase
         | 
| 17 17 | 
             
              should "open an XLSX like an array (numbered columns)" do
         | 
| 18 18 | 
             
                t = RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :headers => false)
         | 
| 19 | 
            -
                assert_equal " | 
| 19 | 
            +
                assert_equal "Software-As-A-Service", t.rows[5][0]
         | 
| 20 20 | 
             
              end
         | 
| 21 21 |  | 
| 22 22 | 
             
              should "open an XLSX with custom headers" do
         | 
    
        data/test/test_old_transform.rb
    CHANGED
    
    | @@ -1,36 +1,31 @@ | |
| 1 1 | 
             
            require 'helper'
         | 
| 2 2 |  | 
| 3 | 
            -
            class  | 
| 3 | 
            +
            class NaturalGasParser
         | 
| 4 4 | 
             
              def initialize(options = {})
         | 
| 5 5 | 
             
                # nothing
         | 
| 6 6 | 
             
              end
         | 
| 7 | 
            -
              def add_hints!(bus)
         | 
| 8 | 
            -
                bus[:sheet] = 'Data 1'
         | 
| 9 | 
            -
                bus[:skip] = 2
         | 
| 10 | 
            -
                bus[:select] = lambda { |row| row['year'] > 1989 }
         | 
| 11 | 
            -
              end
         | 
| 12 7 | 
             
              def apply(row)
         | 
| 13 8 | 
             
                virtual_rows = []
         | 
| 14 | 
            -
                row.keys.grep( | 
| 15 | 
            -
                   | 
| 16 | 
            -
                  next if ( | 
| 17 | 
            -
                  if  | 
| 18 | 
            -
                     | 
| 19 | 
            -
             | 
| 20 | 
            -
                    /\(PADD (.*)\)/.match(first_part)
         | 
| 21 | 
            -
                    padd_part = $1
         | 
| 22 | 
            -
                    next if padd_part == '1' # skip PADD 1 because we always prefer subdistricts
         | 
| 23 | 
            -
                    locatable = "#{padd_part} (PetroleumAdministrationForDefenseDistrict)"
         | 
| 9 | 
            +
                row.keys.grep(/\A(.*) Natural Gas/) do |location_column_name|
         | 
| 10 | 
            +
                  match_1 = $1
         | 
| 11 | 
            +
                  next if (price = row[location_column_name]).blank? or (date = row['Date']).blank?
         | 
| 12 | 
            +
                  if match_1 == 'U.S.'
         | 
| 13 | 
            +
                    locatable_id = 'US'
         | 
| 14 | 
            +
                    locatable_type = 'Country'
         | 
| 24 15 | 
             
                  else
         | 
| 25 | 
            -
                     | 
| 16 | 
            +
                    locatable_id = match_1 # name
         | 
| 17 | 
            +
                    locatable_type = 'State'
         | 
| 26 18 | 
             
                  end
         | 
| 27 19 | 
             
                  date = Time.parse(date)
         | 
| 28 | 
            -
                   | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 33 | 
            -
                   | 
| 20 | 
            +
                  new_row = ActiveSupport::OrderedHash.new
         | 
| 21 | 
            +
                  new_row['locatable_id'] = locatable_id
         | 
| 22 | 
            +
                  new_row['locatable_type'] = locatable_type
         | 
| 23 | 
            +
                  new_row['price'] = price
         | 
| 24 | 
            +
                  new_row['year'] = date.year
         | 
| 25 | 
            +
                  new_row['month'] = date.month
         | 
| 26 | 
            +
                  row_hash = RemoteTable::Transform.row_hash new_row
         | 
| 27 | 
            +
                  new_row['row_hash'] = row_hash
         | 
| 28 | 
            +
                  virtual_rows << new_row
         | 
| 34 29 | 
             
                end
         | 
| 35 30 | 
             
                virtual_rows
         | 
| 36 31 | 
             
              end
         | 
| @@ -38,12 +33,12 @@ end | |
| 38 33 |  | 
| 39 34 | 
             
            class TestOldTransform < Test::Unit::TestCase
         | 
| 40 35 | 
             
              should "open an XLS with a parser" do
         | 
| 41 | 
            -
                 | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
                 | 
| 47 | 
            -
                 | 
| 36 | 
            +
                t = RemoteTable.new(:url => 'http://tonto.eia.doe.gov/dnav/ng/xls/ng_pri_sum_a_EPG0_FWA_DMcf_a.xls',
         | 
| 37 | 
            +
                       :sheet => 'Data 1',
         | 
| 38 | 
            +
                       :skip => 2,
         | 
| 39 | 
            +
                       :select => lambda { |row| row['year'].to_i > 1989 },
         | 
| 40 | 
            +
                       :transform => { :class => NaturalGasParser })
         | 
| 41 | 
            +
                assert_equal 'Country', t[0]['locatable_type']
         | 
| 42 | 
            +
                assert_equal 'US', t[0]['locatable_id']
         | 
| 48 43 | 
             
              end
         | 
| 49 | 
            -
            end
         | 
| 44 | 
            +
            end
         | 
    
        data/test/test_remote_table.rb
    CHANGED
    
    | @@ -63,12 +63,12 @@ class TestRemoteTable < Test::Unit::TestCase | |
| 63 63 |  | 
| 64 64 | 
             
              # fixes ArgumentError: invalid byte sequence in UTF-8
         | 
| 65 65 | 
             
              should %{safely strip soft hyphens and read windows-1252 html} do
         | 
| 66 | 
            -
                t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
         | 
| 66 | 
            +
                t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-A.htm", :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td', :encoding => 'windows-1252'
         | 
| 67 67 | 
             
                assert t.rows.detect { |row| row['Model'] == 'A300B4600' }
         | 
| 68 68 | 
             
              end
         | 
| 69 69 |  | 
| 70 70 | 
             
              should %{transliterate characters from ISO-8859-1} do
         | 
| 71 | 
            -
                t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv'
         | 
| 71 | 
            +
                t = RemoteTable.new :url => 'http://static.brighterplanet.com/science/data/consumables/pets/breed_genders.csv', :encoding => 'ISO-8859-1'
         | 
| 72 72 | 
             
                assert t.rows.detect { |row| row['name'] == 'Briquet Griffon Vendéen' }
         | 
| 73 73 | 
             
              end
         | 
| 74 74 |  | 
| @@ -86,15 +86,42 @@ class TestRemoteTable < Test::Unit::TestCase | |
| 86 86 | 
             
                assert(time1 != time2)
         | 
| 87 87 | 
             
              end
         | 
| 88 88 |  | 
| 89 | 
            -
               | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 89 | 
            +
              {
         | 
| 90 | 
            +
              # IMPOSSIBLE "../support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls" => {:format=>"xls",         :encoding=>"binary"},
         | 
| 91 | 
            +
              "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx"         => {:format=>"xlsx"},
         | 
| 92 | 
            +
              "../support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls"          => {:format=>"xls"},
         | 
| 93 | 
            +
              "../support/list-en1-semic-3.neooffice.binary.ods"                        => {:format=>"ods"},
         | 
| 94 | 
            +
              "../support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64"         => {:format=>"fixed_width", :encoding=>"iso-8859-1", :schema => [['name', 63, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
         | 
| 95 | 
            +
              "../support/list-en1-semic-3.neooffice.utf-8.fixed_width-62"              => {:format=>"fixed_width", :schema => [['name', 61, { :type => :string }], ['iso_3166', 2, { :type => :string }]]},
         | 
| 96 | 
            +
              # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html"          => {:format=>"html" },
         | 
| 97 | 
            +
              # TODO "../support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html"     => {:format=>"html", :encoding=>"iso-8859-1"},
         | 
| 98 | 
            +
              # TODO "../support/list-en1-semic-3.neooffice.utf-8.html"                        => {:format=>"html" },
         | 
| 99 | 
            +
              "../support/list-en1-semic-3.neooffice.utf-8.xml"                         => {:format=>"xml", :row_css=>'Row', :column_css => 'Data', :select => lambda { |row| row[1].to_s =~ /[A-Z]{2}/ }},
         | 
| 100 | 
            +
              "../support/list-en1-semic-3.neooffice.iso-8859-1.csv"                    => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
         | 
| 101 | 
            +
              "../support/list-en1-semic-3.original.iso-8859-1.csv"                     => {:format=>"csv", :encoding=>"iso-8859-1", :delimiter => ';'},
         | 
| 102 | 
            +
              "../support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma"       => {:format=>"csv", :encoding=>"MACROMAN"}, # comma because no option in excel
         | 
| 103 | 
            +
              "../support/list-en1-semic-3.neooffice.utf-8.csv"                         => {:format=>"csv", :delimiter => ';'}
         | 
| 104 | 
            +
              }.each do |k, v|
         | 
| 105 | 
            +
                should %{open #{k} with encoding #{v[:encoding] || 'default'}} do
         | 
| 106 | 
            +
                  options = v.merge(:headers => false, :skip => 2)
         | 
| 107 | 
            +
                  t = RemoteTable.new "file://#{File.expand_path(k, __FILE__)}", options
         | 
| 108 | 
            +
                  a = %{ÅLAND ISLANDS}
         | 
| 109 | 
            +
                  b = (t[1].is_a?(::Array) ? t[1][0] : t[1]['name'])
         | 
| 110 | 
            +
                  if RUBY_VERSION >= '1.9'
         | 
| 111 | 
            +
                    assert_equal 'UTF-8', a.encoding.to_s
         | 
| 112 | 
            +
                    assert_equal 'UTF-8', b.encoding.to_s
         | 
| 113 | 
            +
                  end
         | 
| 114 | 
            +
                  assert_equal a, b
         | 
| 93 115 | 
             
                end
         | 
| 94 116 | 
             
              end
         | 
| 95 117 |  | 
| 118 | 
            +
              should %{recode as UTF-8 even ISO-8859-1 (or any other encoding)} do
         | 
| 119 | 
            +
                t = RemoteTable.new 'http://www.iso.org/iso/list-en1-semic-3.txt', :skip => 2, :headers => false, :delimiter => ';', :encoding => 'ISO-8859-1'
         | 
| 120 | 
            +
                assert_equal %{ÅLAND ISLANDS}, t[1][0]
         | 
| 121 | 
            +
              end
         | 
| 122 | 
            +
              
         | 
| 96 123 | 
             
              should %{parse a big CSV that is not UTF-8} do
         | 
| 97 | 
            -
                t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false
         | 
| 124 | 
            +
                t = RemoteTable.new 'https://openflights.svn.sourceforge.net/svnroot/openflights/openflights/data/airports.dat', :headers => false#, :encoding => 'UTF-8'
         | 
| 98 125 | 
             
                assert_equal 'Goroka', t[0][1]
         | 
| 99 126 | 
             
              end
         | 
| 100 127 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,8 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: remote_table
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              hash: 27
         | 
| 5 4 | 
             
              prerelease: 
         | 
| 6 | 
            -
               | 
| 7 | 
            -
              - 1
         | 
| 8 | 
            -
              - 2
         | 
| 9 | 
            -
              - 2
         | 
| 10 | 
            -
              version: 1.2.2
         | 
| 5 | 
            +
              version: 1.2.3
         | 
| 11 6 | 
             
            platform: ruby
         | 
| 12 7 | 
             
            authors: 
         | 
| 13 8 | 
             
            - Seamus Abshere
         | 
| @@ -16,7 +11,8 @@ autorequire: | |
| 16 11 | 
             
            bindir: bin
         | 
| 17 12 | 
             
            cert_chain: []
         | 
| 18 13 |  | 
| 19 | 
            -
            date: 2011-05- | 
| 14 | 
            +
            date: 2011-05-21 00:00:00 -05:00
         | 
| 15 | 
            +
            default_executable: 
         | 
| 20 16 | 
             
            dependencies: 
         | 
| 21 17 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 22 18 | 
             
              name: activesupport
         | 
| @@ -26,11 +22,6 @@ dependencies: | |
| 26 22 | 
             
                requirements: 
         | 
| 27 23 | 
             
                - - ">="
         | 
| 28 24 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 29 | 
            -
                    hash: 11
         | 
| 30 | 
            -
                    segments: 
         | 
| 31 | 
            -
                    - 2
         | 
| 32 | 
            -
                    - 3
         | 
| 33 | 
            -
                    - 4
         | 
| 34 25 | 
             
                    version: 2.3.4
         | 
| 35 26 | 
             
              type: :runtime
         | 
| 36 27 | 
             
              version_requirements: *id001
         | 
| @@ -42,27 +33,18 @@ dependencies: | |
| 42 33 | 
             
                requirements: 
         | 
| 43 34 | 
             
                - - ~>
         | 
| 44 35 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 45 | 
            -
                    hash: 29
         | 
| 46 | 
            -
                    segments: 
         | 
| 47 | 
            -
                    - 1
         | 
| 48 | 
            -
                    - 9
         | 
| 49 36 | 
             
                    version: "1.9"
         | 
| 50 37 | 
             
              type: :runtime
         | 
| 51 38 | 
             
              version_requirements: *id002
         | 
| 52 39 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 53 | 
            -
              name:  | 
| 40 | 
            +
              name: fixed_width-multibyte
         | 
| 54 41 | 
             
              prerelease: false
         | 
| 55 42 | 
             
              requirement: &id003 !ruby/object:Gem::Requirement 
         | 
| 56 43 | 
             
                none: false
         | 
| 57 44 | 
             
                requirements: 
         | 
| 58 45 | 
             
                - - ">="
         | 
| 59 46 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 60 | 
            -
                     | 
| 61 | 
            -
                    segments: 
         | 
| 62 | 
            -
                    - 0
         | 
| 63 | 
            -
                    - 99
         | 
| 64 | 
            -
                    - 4
         | 
| 65 | 
            -
                    version: 0.99.4
         | 
| 47 | 
            +
                    version: "0"
         | 
| 66 48 | 
             
              type: :runtime
         | 
| 67 49 | 
             
              version_requirements: *id003
         | 
| 68 50 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| @@ -73,9 +55,6 @@ dependencies: | |
| 73 55 | 
             
                requirements: 
         | 
| 74 56 | 
             
                - - ">="
         | 
| 75 57 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 76 | 
            -
                    hash: 3
         | 
| 77 | 
            -
                    segments: 
         | 
| 78 | 
            -
                    - 0
         | 
| 79 58 | 
             
                    version: "0"
         | 
| 80 59 | 
             
              type: :runtime
         | 
| 81 60 | 
             
              version_requirements: *id004
         | 
| @@ -87,9 +66,6 @@ dependencies: | |
| 87 66 | 
             
                requirements: 
         | 
| 88 67 | 
             
                - - ">="
         | 
| 89 68 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 90 | 
            -
                    hash: 3
         | 
| 91 | 
            -
                    segments: 
         | 
| 92 | 
            -
                    - 0
         | 
| 93 69 | 
             
                    version: "0"
         | 
| 94 70 | 
             
              type: :runtime
         | 
| 95 71 | 
             
              version_requirements: *id005
         | 
| @@ -101,9 +77,6 @@ dependencies: | |
| 101 77 | 
             
                requirements: 
         | 
| 102 78 | 
             
                - - ">="
         | 
| 103 79 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 104 | 
            -
                    hash: 3
         | 
| 105 | 
            -
                    segments: 
         | 
| 106 | 
            -
                    - 0
         | 
| 107 80 | 
             
                    version: "0"
         | 
| 108 81 | 
             
              type: :runtime
         | 
| 109 82 | 
             
              version_requirements: *id006
         | 
| @@ -115,11 +88,6 @@ dependencies: | |
| 115 88 | 
             
                requirements: 
         | 
| 116 89 | 
             
                - - ">="
         | 
| 117 90 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 118 | 
            -
                    hash: 5
         | 
| 119 | 
            -
                    segments: 
         | 
| 120 | 
            -
                    - 1
         | 
| 121 | 
            -
                    - 4
         | 
| 122 | 
            -
                    - 1
         | 
| 123 91 | 
             
                    version: 1.4.1
         | 
| 124 92 | 
             
              type: :runtime
         | 
| 125 93 | 
             
              version_requirements: *id007
         | 
| @@ -131,9 +99,6 @@ dependencies: | |
| 131 99 | 
             
                requirements: 
         | 
| 132 100 | 
             
                - - ">="
         | 
| 133 101 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 134 | 
            -
                    hash: 3
         | 
| 135 | 
            -
                    segments: 
         | 
| 136 | 
            -
                    - 0
         | 
| 137 102 | 
             
                    version: "0"
         | 
| 138 103 | 
             
              type: :runtime
         | 
| 139 104 | 
             
              version_requirements: *id008
         | 
| @@ -145,9 +110,6 @@ dependencies: | |
| 145 110 | 
             
                requirements: 
         | 
| 146 111 | 
             
                - - ">="
         | 
| 147 112 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 148 | 
            -
                    hash: 3
         | 
| 149 | 
            -
                    segments: 
         | 
| 150 | 
            -
                    - 0
         | 
| 151 113 | 
             
                    version: "0"
         | 
| 152 114 | 
             
              type: :runtime
         | 
| 153 115 | 
             
              version_requirements: *id009
         | 
| @@ -159,11 +121,6 @@ dependencies: | |
| 159 121 | 
             
                requirements: 
         | 
| 160 122 | 
             
                - - ">="
         | 
| 161 123 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 162 | 
            -
                    hash: 23
         | 
| 163 | 
            -
                    segments: 
         | 
| 164 | 
            -
                    - 0
         | 
| 165 | 
            -
                    - 0
         | 
| 166 | 
            -
                    - 4
         | 
| 167 124 | 
             
                    version: 0.0.4
         | 
| 168 125 | 
             
              type: :runtime
         | 
| 169 126 | 
             
              version_requirements: *id010
         | 
| @@ -175,9 +132,6 @@ dependencies: | |
| 175 132 | 
             
                requirements: 
         | 
| 176 133 | 
             
                - - ">="
         | 
| 177 134 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 178 | 
            -
                    hash: 3
         | 
| 179 | 
            -
                    segments: 
         | 
| 180 | 
            -
                    - 0
         | 
| 181 135 | 
             
                    version: "0"
         | 
| 182 136 | 
             
              type: :runtime
         | 
| 183 137 | 
             
              version_requirements: *id011
         | 
| @@ -189,9 +143,6 @@ dependencies: | |
| 189 143 | 
             
                requirements: 
         | 
| 190 144 | 
             
                - - ">="
         | 
| 191 145 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 192 | 
            -
                    hash: 3
         | 
| 193 | 
            -
                    segments: 
         | 
| 194 | 
            -
                    - 0
         | 
| 195 146 | 
             
                    version: "0"
         | 
| 196 147 | 
             
              type: :runtime
         | 
| 197 148 | 
             
              version_requirements: *id012
         | 
| @@ -203,11 +154,6 @@ dependencies: | |
| 203 154 | 
             
                requirements: 
         | 
| 204 155 | 
             
                - - ">="
         | 
| 205 156 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 206 | 
            -
                    hash: 3
         | 
| 207 | 
            -
                    segments: 
         | 
| 208 | 
            -
                    - 1
         | 
| 209 | 
            -
                    - 5
         | 
| 210 | 
            -
                    - 0
         | 
| 211 157 | 
             
                    version: 1.5.0
         | 
| 212 158 | 
             
              type: :runtime
         | 
| 213 159 | 
             
              version_requirements: *id013
         | 
| @@ -219,11 +165,6 @@ dependencies: | |
| 219 165 | 
             
                requirements: 
         | 
| 220 166 | 
             
                - - ">="
         | 
| 221 167 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 222 | 
            -
                    hash: 23
         | 
| 223 | 
            -
                    segments: 
         | 
| 224 | 
            -
                    - 0
         | 
| 225 | 
            -
                    - 2
         | 
| 226 | 
            -
                    - 0
         | 
| 227 168 | 
             
                    version: 0.2.0
         | 
| 228 169 | 
             
              type: :development
         | 
| 229 170 | 
             
              version_requirements: *id014
         | 
| @@ -235,9 +176,6 @@ dependencies: | |
| 235 176 | 
             
                requirements: 
         | 
| 236 177 | 
             
                - - ">="
         | 
| 237 178 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 238 | 
            -
                    hash: 3
         | 
| 239 | 
            -
                    segments: 
         | 
| 240 | 
            -
                    - 0
         | 
| 241 179 | 
             
                    version: "0"
         | 
| 242 180 | 
             
              type: :development
         | 
| 243 181 | 
             
              version_requirements: *id015
         | 
| @@ -249,23 +187,17 @@ dependencies: | |
| 249 187 | 
             
                requirements: 
         | 
| 250 188 | 
             
                - - ">="
         | 
| 251 189 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 252 | 
            -
                    hash: 3
         | 
| 253 | 
            -
                    segments: 
         | 
| 254 | 
            -
                    - 0
         | 
| 255 190 | 
             
                    version: "0"
         | 
| 256 191 | 
             
              type: :development
         | 
| 257 192 | 
             
              version_requirements: *id016
         | 
| 258 193 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 259 | 
            -
              name: ruby- | 
| 194 | 
            +
              name: ruby-debug19
         | 
| 260 195 | 
             
              prerelease: false
         | 
| 261 196 | 
             
              requirement: &id017 !ruby/object:Gem::Requirement 
         | 
| 262 197 | 
             
                none: false
         | 
| 263 198 | 
             
                requirements: 
         | 
| 264 199 | 
             
                - - ">="
         | 
| 265 200 | 
             
                  - !ruby/object:Gem::Version 
         | 
| 266 | 
            -
                    hash: 3
         | 
| 267 | 
            -
                    segments: 
         | 
| 268 | 
            -
                    - 0
         | 
| 269 201 | 
             
                    version: "0"
         | 
| 270 202 | 
             
              type: :development
         | 
| 271 203 | 
             
              version_requirements: *id017
         | 
| @@ -280,6 +212,7 @@ extra_rdoc_files: [] | |
| 280 212 |  | 
| 281 213 | 
             
            files: 
         | 
| 282 214 | 
             
            - .document
         | 
| 215 | 
            +
            - .gitattributes
         | 
| 283 216 | 
             
            - .gitignore
         | 
| 284 217 | 
             
            - Gemfile
         | 
| 285 218 | 
             
            - LICENSE
         | 
| @@ -305,11 +238,26 @@ files: | |
| 305 238 | 
             
            - lib/remote_table/version.rb
         | 
| 306 239 | 
             
            - remote_table.gemspec
         | 
| 307 240 | 
             
            - test/helper.rb
         | 
| 241 | 
            +
            - test/support/list-en1-semic-3.neooffice.binary.ods
         | 
| 242 | 
            +
            - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
         | 
| 243 | 
            +
            - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
         | 
| 244 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.csv
         | 
| 245 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.fixed_width-62
         | 
| 246 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.html
         | 
| 247 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.xml
         | 
| 248 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls
         | 
| 249 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls
         | 
| 250 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx
         | 
| 251 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html
         | 
| 252 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma
         | 
| 253 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html
         | 
| 254 | 
            +
            - test/support/list-en1-semic-3.original.iso-8859-1.csv
         | 
| 308 255 | 
             
            - test/test_big.rb
         | 
| 309 256 | 
             
            - test/test_errata.rb
         | 
| 310 257 | 
             
            - test/test_old_syntax.rb
         | 
| 311 258 | 
             
            - test/test_old_transform.rb
         | 
| 312 259 | 
             
            - test/test_remote_table.rb
         | 
| 260 | 
            +
            has_rdoc: true
         | 
| 313 261 | 
             
            homepage: https://github.com/seamusabshere/remote_table
         | 
| 314 262 | 
             
            licenses: []
         | 
| 315 263 |  | 
| @@ -323,28 +271,36 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 323 271 | 
             
              requirements: 
         | 
| 324 272 | 
             
              - - ">="
         | 
| 325 273 | 
             
                - !ruby/object:Gem::Version 
         | 
| 326 | 
            -
                  hash: 3
         | 
| 327 | 
            -
                  segments: 
         | 
| 328 | 
            -
                  - 0
         | 
| 329 274 | 
             
                  version: "0"
         | 
| 330 275 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 331 276 | 
             
              none: false
         | 
| 332 277 | 
             
              requirements: 
         | 
| 333 278 | 
             
              - - ">="
         | 
| 334 279 | 
             
                - !ruby/object:Gem::Version 
         | 
| 335 | 
            -
                  hash: 3
         | 
| 336 | 
            -
                  segments: 
         | 
| 337 | 
            -
                  - 0
         | 
| 338 280 | 
             
                  version: "0"
         | 
| 339 281 | 
             
            requirements: []
         | 
| 340 282 |  | 
| 341 283 | 
             
            rubyforge_project: remotetable
         | 
| 342 | 
            -
            rubygems_version: 1. | 
| 284 | 
            +
            rubygems_version: 1.6.2
         | 
| 343 285 | 
             
            signing_key: 
         | 
| 344 286 | 
             
            specification_version: 3
         | 
| 345 287 | 
             
            summary: Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
         | 
| 346 288 | 
             
            test_files: 
         | 
| 347 289 | 
             
            - test/helper.rb
         | 
| 290 | 
            +
            - test/support/list-en1-semic-3.neooffice.binary.ods
         | 
| 291 | 
            +
            - test/support/list-en1-semic-3.neooffice.iso-8859-1.csv
         | 
| 292 | 
            +
            - test/support/list-en1-semic-3.neooffice.iso-8859-1.fixed_width-64
         | 
| 293 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.csv
         | 
| 294 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.fixed_width-62
         | 
| 295 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.html
         | 
| 296 | 
            +
            - test/support/list-en1-semic-3.neooffice.utf-8.xml
         | 
| 297 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1-excel-95.binary.xls
         | 
| 298 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xls
         | 
| 299 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.binary.xlsx
         | 
| 300 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.iso-8859-1.html
         | 
| 301 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.mac.csv-comma
         | 
| 302 | 
            +
            - test/support/list-en1-semic-3.office-2011-for-mac-sp1.utf-8.html
         | 
| 303 | 
            +
            - test/support/list-en1-semic-3.original.iso-8859-1.csv
         | 
| 348 304 | 
             
            - test/test_big.rb
         | 
| 349 305 | 
             
            - test/test_errata.rb
         | 
| 350 306 | 
             
            - test/test_old_syntax.rb
         |