remote_table 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,25 +1,17 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  require 'rake'
5
5
  require 'rake/testtask'
6
6
  Rake::TestTask.new(:test) do |test|
7
- test.libs << 'lib' << 'test'
7
+ test.libs << 'test'
8
8
  test.pattern = 'test/**/test_*.rb'
9
9
  test.verbose = true
10
10
  end
11
11
 
12
- begin
13
- require 'rake/rdoctask'
14
- Rake::RDocTask.new do |rdoc|
15
- rdoc.rdoc_dir = 'rdoc'
16
- rdoc.title = 'remote_table'
17
- rdoc.options << '--line-numbers' << '--inline-source'
18
- rdoc.rdoc_files.include('README*')
19
- rdoc.rdoc_files.include('lib/**/*.rb')
20
- end
21
- rescue LoadError
22
- puts "Rdoc is not available"
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new do |y|
14
+ y.options << '--no-private'
23
15
  end
24
16
 
25
17
  task :default => :test
data/lib/remote_table.rb CHANGED
@@ -1,20 +1,20 @@
1
1
  if ::RUBY_VERSION < '1.9' and $KCODE != 'UTF8'
2
- $stderr.puts "[remote_table] Ruby 1.8 detected, setting $KCODE to UTF8 so that ActiveSupport::Multibyte works properly."
2
+ ::Kernel.warn "[remote_table] Ruby 1.8 detected, setting $KCODE to UTF8 so that ActiveSupport::Multibyte works properly."
3
3
  $KCODE = 'UTF8'
4
4
  end
5
5
 
6
6
  require 'active_support'
7
7
  require 'active_support/version'
8
- %w{
9
- active_support/core_ext/hash
10
- active_support/core_ext/string
11
- active_support/core_ext/module
12
- active_support/core_ext/array
13
- }.each do |active_support_3_requirement|
14
- require active_support_3_requirement
15
- end if ::ActiveSupport::VERSION::MAJOR == 3
8
+ if ::ActiveSupport::VERSION::MAJOR >= 3
9
+ require 'active_support/core_ext'
10
+ end
16
11
  require 'hash_digest'
17
12
 
13
+ require 'remote_table/format'
14
+ require 'remote_table/config'
15
+ require 'remote_table/local_file'
16
+ require 'remote_table/transformer'
17
+
18
18
  class Hash
19
19
  attr_accessor :row_hash
20
20
  end
@@ -23,13 +23,7 @@ class Array
23
23
  attr_accessor :row_hash
24
24
  end
25
25
 
26
- class RemoteTable
27
- autoload :Format, 'remote_table/format'
28
- autoload :Properties, 'remote_table/properties'
29
- autoload :LocalFile, 'remote_table/local_file'
30
- autoload :Transformer, 'remote_table/transformer'
31
- autoload :Utils, 'remote_table/utils'
32
-
26
+ class RemoteTable
33
27
  # Legacy
34
28
  class Transform
35
29
  def self.row_hash(row)
@@ -40,7 +34,7 @@ class RemoteTable
40
34
  include ::Enumerable
41
35
 
42
36
  attr_reader :url
43
- attr_reader :options
37
+ attr_reader :config
44
38
 
45
39
  # Create a new RemoteTable.
46
40
  #
@@ -51,16 +45,16 @@ class RemoteTable
51
45
  # Old syntax:
52
46
  # RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
53
47
  #
54
- # See the <tt>Properties</tt> object for the sorts of options you can pass.
48
+ # See the <tt>Config</tt> object for the sorts of options you can pass.
55
49
  def initialize(*args)
56
- @options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
50
+ options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
51
+
57
52
  @url = if args.first.is_a? ::String
58
53
  args.first.dup
59
54
  else
60
- @options[:url].dup
55
+ options[:url].dup
61
56
  end
62
- @url.freeze
63
- @options.freeze
57
+ @config = Config.new self, options
64
58
  end
65
59
 
66
60
  # not thread safe
@@ -72,17 +66,17 @@ class RemoteTable
72
66
  retval = format.each do |row|
73
67
  transformer.transform(row).each do |virtual_row|
74
68
  virtual_row.row_hash = ::HashDigest.hexdigest row
75
- if properties.errata
76
- next if properties.errata.rejects? virtual_row
77
- properties.errata.correct! virtual_row
69
+ if config.errata
70
+ next if config.errata.rejects? virtual_row
71
+ config.errata.correct! virtual_row
78
72
  end
79
- next if properties.select and !properties.select.call(virtual_row)
80
- next if properties.reject and properties.reject.call(virtual_row)
81
- cache.push virtual_row unless properties.streaming
73
+ next if config.select and !config.select.call(virtual_row)
74
+ next if config.reject and config.reject.call(virtual_row)
75
+ cache.push virtual_row unless config.streaming
82
76
  yield virtual_row
83
77
  end
84
78
  end
85
- fully_cached! unless properties.streaming
79
+ fully_cached! unless config.streaming
86
80
  retval
87
81
  end
88
82
  end
@@ -117,17 +111,12 @@ class RemoteTable
117
111
  @local_file ||= LocalFile.new self
118
112
  end
119
113
 
120
- # Used internally to access to the properties of the table, either set by the user or implied
121
- def properties
122
- @properties ||= Properties.new self
123
- end
124
-
125
114
  # Used internally to access to the driver that reads the format
126
115
  def format
127
- @format ||= properties.format.new self
116
+ @format ||= config.format.new self
128
117
  end
129
118
 
130
- # Used internally to acess the transformer (aka parser).
119
+ # Used internally to access the transformer (aka parser).
131
120
  def transformer
132
121
  @transformer ||= Transformer.new self
133
122
  end
@@ -139,8 +128,8 @@ class RemoteTable
139
128
  def mark_download!
140
129
  @download_count ||= 0
141
130
  @download_count += 1
142
- if properties.warn_on_multiple_downloads and download_count > 1
143
- $stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
131
+ if config.warn_on_multiple_downloads and download_count > 1
132
+ ::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
144
133
  end
145
134
  end
146
135
 
@@ -1,19 +1,15 @@
1
1
  require 'uri'
2
2
  class RemoteTable
3
- # Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
4
- class Properties
3
+ # Represents the config of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
4
+ class Config
5
5
  attr_reader :t
6
- attr_reader :current_options
6
+ attr_reader :user_specified_options
7
7
 
8
- def initialize(t)
8
+ def initialize(t, user_specified_options)
9
9
  @t = t
10
- @current_options = t.options.symbolize_keys
10
+ @user_specified_options = user_specified_options
11
11
  end
12
-
13
- def update(options)
14
- current_options.update options
15
- end
16
-
12
+
17
13
  # The parsed URI of the file to get.
18
14
  def uri
19
15
  return @uri if @uri.is_a?(::URI)
@@ -29,19 +25,19 @@ class RemoteTable
29
25
  # * call each
30
26
  # Defaults to false.
31
27
  def streaming
32
- current_options[:streaming] || false
28
+ user_specified_options.fetch :streaming, false
33
29
  end
34
30
 
35
31
  # Defaults to true.
36
32
  def warn_on_multiple_downloads
37
- current_options[:warn_on_multiple_downloads] != false
33
+ user_specified_options[:warn_on_multiple_downloads] != false
38
34
  end
39
35
 
40
36
  # The headers specified by the user
41
37
  #
42
38
  # Default: :first_row
43
39
  def headers
44
- current_options[:headers].nil? ? :first_row : current_options[:headers]
40
+ user_specified_options[:headers].nil? ? :first_row : user_specified_options[:headers]
45
41
  end
46
42
 
47
43
  def use_first_row_as_header?
@@ -53,33 +49,31 @@ class RemoteTable
53
49
  end
54
50
 
55
51
  # The sheet specified by the user as a number or a string
56
- #
57
- # Default: 0
58
52
  def sheet
59
- current_options[:sheet] || 0
53
+ user_specified_options[:sheet]
60
54
  end
61
55
 
62
56
  # Whether to keep blank rows
63
57
  #
64
58
  # Default: false
65
59
  def keep_blank_rows
66
- current_options[:keep_blank_rows] || false
60
+ user_specified_options.fetch :keep_blank_rows, false
67
61
  end
68
62
 
69
63
  # Form data to send in with the download request
70
64
  def form_data
71
- current_options[:form_data]
65
+ user_specified_options[:form_data]
72
66
  end
73
67
 
74
68
  # How many rows to skip
75
69
  #
76
70
  # Default: 0
77
71
  def skip
78
- current_options[:skip] || 0
72
+ user_specified_options.fetch :skip, 0
79
73
  end
80
74
 
81
75
  def internal_encoding
82
- (current_options[:encoding] || 'UTF-8').upcase
76
+ user_specified_options.fetch(:encoding, 'UTF-8').upcase
83
77
  end
84
78
 
85
79
  def external_encoding
@@ -94,27 +88,27 @@ class RemoteTable
94
88
  #
95
89
  # Default: ","
96
90
  def delimiter
97
- current_options[:delimiter] || ','
91
+ user_specified_options.fetch :delimiter, ','
98
92
  end
99
93
 
100
94
  # The XPath used to find rows
101
95
  def row_xpath
102
- current_options[:row_xpath]
96
+ user_specified_options[:row_xpath]
103
97
  end
104
98
 
105
99
  # The XPath used to find columns
106
100
  def column_xpath
107
- current_options[:column_xpath]
101
+ user_specified_options[:column_xpath]
108
102
  end
109
103
 
110
104
  # The CSS selector used to find rows
111
105
  def row_css
112
- current_options[:row_css]
106
+ user_specified_options[:row_css]
113
107
  end
114
108
 
115
109
  # The CSS selector used to find columns
116
110
  def column_css
117
- current_options[:column_css]
111
+ user_specified_options[:column_css]
118
112
  end
119
113
 
120
114
  # The compression type.
@@ -123,8 +117,8 @@ class RemoteTable
123
117
  #
124
118
  # Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)
125
119
  def compression
126
- if current_options.has_key?(:compression)
127
- return current_options[:compression]
120
+ if user_specified_options.has_key?(:compression)
121
+ return user_specified_options[:compression]
128
122
  end
129
123
  case ::File.extname(uri.path).downcase
130
124
  when /gz/, /gunzip/
@@ -144,8 +138,8 @@ class RemoteTable
144
138
  #
145
139
  # Can be specified as: :tar
146
140
  def packing
147
- if current_options.has_key?(:packing)
148
- return current_options[:packing]
141
+ if user_specified_options.has_key?(:packing)
142
+ return user_specified_options[:packing]
149
143
  end
150
144
  if uri.path =~ %r{\.tar(?:\.|$)}i
151
145
  :tar
@@ -157,7 +151,7 @@ class RemoteTable
157
151
  # Example:
158
152
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
159
153
  def glob
160
- current_options[:glob]
154
+ user_specified_options[:glob]
161
155
  end
162
156
 
163
157
  # The filename, which can be used to pick a file out of an archive.
@@ -165,17 +159,17 @@ class RemoteTable
165
159
  # Example:
166
160
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
167
161
  def filename
168
- current_options[:filename]
162
+ user_specified_options[:filename]
169
163
  end
170
164
 
171
165
  # Cut columns up to this character
172
166
  def cut
173
- current_options[:cut]
167
+ user_specified_options[:cut]
174
168
  end
175
169
 
176
170
  # Crop rows after this line
177
171
  def crop
178
- current_options[:crop]
172
+ user_specified_options[:crop]
179
173
  end
180
174
 
181
175
  # The fixed-width schema, given as an array
@@ -190,31 +184,31 @@ class RemoteTable
190
184
  # [ 'spacer', 12 ],
191
185
  # [ 'header6', 10, { :type => :string } ]])
192
186
  def schema
193
- current_options[:schema]
187
+ user_specified_options[:schema]
194
188
  end
195
189
 
196
190
  # The name of the fixed-width schema according to FixedWidth
197
191
  def schema_name
198
- current_options[:schema_name]
192
+ user_specified_options[:schema_name]
199
193
  end
200
194
 
201
195
  # A proc to call to decide whether to return a row.
202
196
  def select
203
- current_options[:select]
197
+ user_specified_options[:select]
204
198
  end
205
199
 
206
200
  # A proc to call to decide whether to return a row.
207
201
  def reject
208
- current_options[:reject]
202
+ user_specified_options[:reject]
209
203
  end
210
204
 
211
205
  # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
212
206
  def errata
213
- return unless current_options.has_key? :errata
214
- @errata ||= if current_options[:errata].is_a? ::Hash
215
- ::Errata.new current_options[:errata]
207
+ return unless user_specified_options.has_key? :errata
208
+ @errata ||= if user_specified_options[:errata].is_a? ::Hash
209
+ ::Errata.new user_specified_options[:errata]
216
210
  else
217
- current_options[:errata]
211
+ user_specified_options[:errata]
218
212
  end
219
213
  end
220
214
 
@@ -227,8 +221,8 @@ class RemoteTable
227
221
  # Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html
228
222
  def format
229
223
  return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
230
- clue = if current_options.has_key?(:format)
231
- current_options[:format]
224
+ clue = if user_specified_options.has_key?(:format)
225
+ user_specified_options[:format]
232
226
  else
233
227
  t.local_file.path
234
228
  end
@@ -247,6 +241,8 @@ class RemoteTable
247
241
  Format::HTML
248
242
  when /xml/
249
243
  Format::XML
244
+ when /yaml/, /yml/
245
+ Format::Yaml
250
246
  else
251
247
  Format::Delimited
252
248
  end
@@ -1,22 +1,22 @@
1
- if ::RUBY_VERSION >= '1.9'
2
- require 'ensure/encoding'
3
- else
4
- require 'iconv'
1
+ require 'iconv'
2
+ if RUBY_VERSION >= '1.9'
3
+ # for an excellent explanation see http://blog.segment7.net/2010/12/17/from-iconv-iconv-to-string-encode
4
+ Kernel.warn "[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)"
5
5
  end
6
6
 
7
+ require 'remote_table/format/mixins/textual'
8
+ require 'remote_table/format/mixins/processed_by_roo'
9
+ require 'remote_table/format/mixins/processed_by_nokogiri'
10
+ require 'remote_table/format/excel'
11
+ require 'remote_table/format/excelx'
12
+ require 'remote_table/format/delimited'
13
+ require 'remote_table/format/open_office'
14
+ require 'remote_table/format/fixed_width'
15
+ require 'remote_table/format/html'
16
+ require 'remote_table/format/xml'
17
+ require 'remote_table/format/yaml'
7
18
  class RemoteTable
8
19
  class Format
9
- autoload :Excel, 'remote_table/format/excel'
10
- autoload :Excelx, 'remote_table/format/excelx'
11
- autoload :Delimited, 'remote_table/format/delimited'
12
- autoload :OpenOffice, 'remote_table/format/open_office'
13
- autoload :FixedWidth, 'remote_table/format/fixed_width'
14
- autoload :HTML, 'remote_table/format/html'
15
- autoload :XML, 'remote_table/format/xml'
16
-
17
- autoload :Textual, 'remote_table/format/mixins/textual'
18
- autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
19
- autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
20
20
 
21
21
  attr_reader :t
22
22
 
@@ -25,28 +25,25 @@ class RemoteTable
25
25
  end
26
26
 
27
27
  def transliterate_to_utf8(str)
28
- return if str.nil?
29
- $stderr.puts "[remote_table translit] Before: #{str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
30
- transliterated_str = if ::RUBY_VERSION >= '1.9'
31
- str.ensure_encoding t.properties.external_encoding, :external_encoding => t.properties.internal_encoding, :invalid_characters => :transcode
32
- else
33
- ::Iconv.conv(t.properties.external_encoding_iconv, t.properties.internal_encoding, str.to_s + ' ')[0..-2]
28
+ if str.is_a?(::String)
29
+ [ iconv.iconv(str), iconv.iconv(nil) ].join
34
30
  end
35
- $stderr.puts "[remote_table translit] After: #{transliterated_str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
36
- transliterated_str
37
31
  end
38
32
 
39
33
  def assume_utf8(str)
40
34
  if str.is_a?(::String) and ::RUBY_VERSION >= '1.9'
41
- str.encode! t.properties.external_encoding
35
+ str.encode! t.config.external_encoding
42
36
  else
43
37
  str
44
38
  end
45
39
  end
46
40
 
47
- include ::Enumerable
48
- def each
49
- raise "must be defined by format"
41
+ private
42
+
43
+ def iconv
44
+ @iconv ||= ::Iconv.new(t.config.external_encoding_iconv, t.config.internal_encoding)
50
45
  end
46
+
47
+ include ::Enumerable
51
48
  end
52
49
  end