remote_table 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,25 +1,17 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
3
 
4
4
  require 'rake'
5
5
  require 'rake/testtask'
6
6
  Rake::TestTask.new(:test) do |test|
7
- test.libs << 'lib' << 'test'
7
+ test.libs << 'test'
8
8
  test.pattern = 'test/**/test_*.rb'
9
9
  test.verbose = true
10
10
  end
11
11
 
12
- begin
13
- require 'rake/rdoctask'
14
- Rake::RDocTask.new do |rdoc|
15
- rdoc.rdoc_dir = 'rdoc'
16
- rdoc.title = 'remote_table'
17
- rdoc.options << '--line-numbers' << '--inline-source'
18
- rdoc.rdoc_files.include('README*')
19
- rdoc.rdoc_files.include('lib/**/*.rb')
20
- end
21
- rescue LoadError
22
- puts "Rdoc is not available"
12
+ require 'yard'
13
+ YARD::Rake::YardocTask.new do |y|
14
+ y.options << '--no-private'
23
15
  end
24
16
 
25
17
  task :default => :test
data/lib/remote_table.rb CHANGED
@@ -1,20 +1,20 @@
1
1
  if ::RUBY_VERSION < '1.9' and $KCODE != 'UTF8'
2
- $stderr.puts "[remote_table] Ruby 1.8 detected, setting $KCODE to UTF8 so that ActiveSupport::Multibyte works properly."
2
+ ::Kernel.warn "[remote_table] Ruby 1.8 detected, setting $KCODE to UTF8 so that ActiveSupport::Multibyte works properly."
3
3
  $KCODE = 'UTF8'
4
4
  end
5
5
 
6
6
  require 'active_support'
7
7
  require 'active_support/version'
8
- %w{
9
- active_support/core_ext/hash
10
- active_support/core_ext/string
11
- active_support/core_ext/module
12
- active_support/core_ext/array
13
- }.each do |active_support_3_requirement|
14
- require active_support_3_requirement
15
- end if ::ActiveSupport::VERSION::MAJOR == 3
8
+ if ::ActiveSupport::VERSION::MAJOR >= 3
9
+ require 'active_support/core_ext'
10
+ end
16
11
  require 'hash_digest'
17
12
 
13
+ require 'remote_table/format'
14
+ require 'remote_table/config'
15
+ require 'remote_table/local_file'
16
+ require 'remote_table/transformer'
17
+
18
18
  class Hash
19
19
  attr_accessor :row_hash
20
20
  end
@@ -23,13 +23,7 @@ class Array
23
23
  attr_accessor :row_hash
24
24
  end
25
25
 
26
- class RemoteTable
27
- autoload :Format, 'remote_table/format'
28
- autoload :Properties, 'remote_table/properties'
29
- autoload :LocalFile, 'remote_table/local_file'
30
- autoload :Transformer, 'remote_table/transformer'
31
- autoload :Utils, 'remote_table/utils'
32
-
26
+ class RemoteTable
33
27
  # Legacy
34
28
  class Transform
35
29
  def self.row_hash(row)
@@ -40,7 +34,7 @@ class RemoteTable
40
34
  include ::Enumerable
41
35
 
42
36
  attr_reader :url
43
- attr_reader :options
37
+ attr_reader :config
44
38
 
45
39
  # Create a new RemoteTable.
46
40
  #
@@ -51,16 +45,16 @@ class RemoteTable
51
45
  # Old syntax:
52
46
  # RemoteTable.new(:url => 'www.customerreferenceprogram.org/uploads/CRP_RFP_template.xlsx', :foo => 'bar')
53
47
  #
54
- # See the <tt>Properties</tt> object for the sorts of options you can pass.
48
+ # See the <tt>Config</tt> object for the sorts of options you can pass.
55
49
  def initialize(*args)
56
- @options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
50
+ options = args.last.is_a?(::Hash) ? args.last.symbolize_keys : {}
51
+
57
52
  @url = if args.first.is_a? ::String
58
53
  args.first.dup
59
54
  else
60
- @options[:url].dup
55
+ options[:url].dup
61
56
  end
62
- @url.freeze
63
- @options.freeze
57
+ @config = Config.new self, options
64
58
  end
65
59
 
66
60
  # not thread safe
@@ -72,17 +66,17 @@ class RemoteTable
72
66
  retval = format.each do |row|
73
67
  transformer.transform(row).each do |virtual_row|
74
68
  virtual_row.row_hash = ::HashDigest.hexdigest row
75
- if properties.errata
76
- next if properties.errata.rejects? virtual_row
77
- properties.errata.correct! virtual_row
69
+ if config.errata
70
+ next if config.errata.rejects? virtual_row
71
+ config.errata.correct! virtual_row
78
72
  end
79
- next if properties.select and !properties.select.call(virtual_row)
80
- next if properties.reject and properties.reject.call(virtual_row)
81
- cache.push virtual_row unless properties.streaming
73
+ next if config.select and !config.select.call(virtual_row)
74
+ next if config.reject and config.reject.call(virtual_row)
75
+ cache.push virtual_row unless config.streaming
82
76
  yield virtual_row
83
77
  end
84
78
  end
85
- fully_cached! unless properties.streaming
79
+ fully_cached! unless config.streaming
86
80
  retval
87
81
  end
88
82
  end
@@ -117,17 +111,12 @@ class RemoteTable
117
111
  @local_file ||= LocalFile.new self
118
112
  end
119
113
 
120
- # Used internally to access to the properties of the table, either set by the user or implied
121
- def properties
122
- @properties ||= Properties.new self
123
- end
124
-
125
114
  # Used internally to access to the driver that reads the format
126
115
  def format
127
- @format ||= properties.format.new self
116
+ @format ||= config.format.new self
128
117
  end
129
118
 
130
- # Used internally to acess the transformer (aka parser).
119
+ # Used internally to access the transformer (aka parser).
131
120
  def transformer
132
121
  @transformer ||= Transformer.new self
133
122
  end
@@ -139,8 +128,8 @@ class RemoteTable
139
128
  def mark_download!
140
129
  @download_count ||= 0
141
130
  @download_count += 1
142
- if properties.warn_on_multiple_downloads and download_count > 1
143
- $stderr.puts "[remote_table] Warning: #{url} has been downloaded #{download_count} times."
131
+ if config.warn_on_multiple_downloads and download_count > 1
132
+ ::Kernel.warn "[remote_table] #{url} has been downloaded #{download_count} times."
144
133
  end
145
134
  end
146
135
 
@@ -1,19 +1,15 @@
1
1
  require 'uri'
2
2
  class RemoteTable
3
- # Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
4
- class Properties
3
+ # Represents the config of a RemoteTable, whether they are explicitly set by the user or inferred automatically.
4
+ class Config
5
5
  attr_reader :t
6
- attr_reader :current_options
6
+ attr_reader :user_specified_options
7
7
 
8
- def initialize(t)
8
+ def initialize(t, user_specified_options)
9
9
  @t = t
10
- @current_options = t.options.symbolize_keys
10
+ @user_specified_options = user_specified_options
11
11
  end
12
-
13
- def update(options)
14
- current_options.update options
15
- end
16
-
12
+
17
13
  # The parsed URI of the file to get.
18
14
  def uri
19
15
  return @uri if @uri.is_a?(::URI)
@@ -29,19 +25,19 @@ class RemoteTable
29
25
  # * call each
30
26
  # Defaults to false.
31
27
  def streaming
32
- current_options[:streaming] || false
28
+ user_specified_options.fetch :streaming, false
33
29
  end
34
30
 
35
31
  # Defaults to true.
36
32
  def warn_on_multiple_downloads
37
- current_options[:warn_on_multiple_downloads] != false
33
+ user_specified_options[:warn_on_multiple_downloads] != false
38
34
  end
39
35
 
40
36
  # The headers specified by the user
41
37
  #
42
38
  # Default: :first_row
43
39
  def headers
44
- current_options[:headers].nil? ? :first_row : current_options[:headers]
40
+ user_specified_options[:headers].nil? ? :first_row : user_specified_options[:headers]
45
41
  end
46
42
 
47
43
  def use_first_row_as_header?
@@ -53,33 +49,31 @@ class RemoteTable
53
49
  end
54
50
 
55
51
  # The sheet specified by the user as a number or a string
56
- #
57
- # Default: 0
58
52
  def sheet
59
- current_options[:sheet] || 0
53
+ user_specified_options[:sheet]
60
54
  end
61
55
 
62
56
  # Whether to keep blank rows
63
57
  #
64
58
  # Default: false
65
59
  def keep_blank_rows
66
- current_options[:keep_blank_rows] || false
60
+ user_specified_options.fetch :keep_blank_rows, false
67
61
  end
68
62
 
69
63
  # Form data to send in with the download request
70
64
  def form_data
71
- current_options[:form_data]
65
+ user_specified_options[:form_data]
72
66
  end
73
67
 
74
68
  # How many rows to skip
75
69
  #
76
70
  # Default: 0
77
71
  def skip
78
- current_options[:skip] || 0
72
+ user_specified_options.fetch :skip, 0
79
73
  end
80
74
 
81
75
  def internal_encoding
82
- (current_options[:encoding] || 'UTF-8').upcase
76
+ user_specified_options.fetch(:encoding, 'UTF-8').upcase
83
77
  end
84
78
 
85
79
  def external_encoding
@@ -94,27 +88,27 @@ class RemoteTable
94
88
  #
95
89
  # Default: ","
96
90
  def delimiter
97
- current_options[:delimiter] || ','
91
+ user_specified_options.fetch :delimiter, ','
98
92
  end
99
93
 
100
94
  # The XPath used to find rows
101
95
  def row_xpath
102
- current_options[:row_xpath]
96
+ user_specified_options[:row_xpath]
103
97
  end
104
98
 
105
99
  # The XPath used to find columns
106
100
  def column_xpath
107
- current_options[:column_xpath]
101
+ user_specified_options[:column_xpath]
108
102
  end
109
103
 
110
104
  # The CSS selector used to find rows
111
105
  def row_css
112
- current_options[:row_css]
106
+ user_specified_options[:row_css]
113
107
  end
114
108
 
115
109
  # The CSS selector used to find columns
116
110
  def column_css
117
- current_options[:column_css]
111
+ user_specified_options[:column_css]
118
112
  end
119
113
 
120
114
  # The compression type.
@@ -123,8 +117,8 @@ class RemoteTable
123
117
  #
124
118
  # Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)
125
119
  def compression
126
- if current_options.has_key?(:compression)
127
- return current_options[:compression]
120
+ if user_specified_options.has_key?(:compression)
121
+ return user_specified_options[:compression]
128
122
  end
129
123
  case ::File.extname(uri.path).downcase
130
124
  when /gz/, /gunzip/
@@ -144,8 +138,8 @@ class RemoteTable
144
138
  #
145
139
  # Can be specified as: :tar
146
140
  def packing
147
- if current_options.has_key?(:packing)
148
- return current_options[:packing]
141
+ if user_specified_options.has_key?(:packing)
142
+ return user_specified_options[:packing]
149
143
  end
150
144
  if uri.path =~ %r{\.tar(?:\.|$)}i
151
145
  :tar
@@ -157,7 +151,7 @@ class RemoteTable
157
151
  # Example:
158
152
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'
159
153
  def glob
160
- current_options[:glob]
154
+ user_specified_options[:glob]
161
155
  end
162
156
 
163
157
  # The filename, which can be used to pick a file out of an archive.
@@ -165,17 +159,17 @@ class RemoteTable
165
159
  # Example:
166
160
  # RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'
167
161
  def filename
168
- current_options[:filename]
162
+ user_specified_options[:filename]
169
163
  end
170
164
 
171
165
  # Cut columns up to this character
172
166
  def cut
173
- current_options[:cut]
167
+ user_specified_options[:cut]
174
168
  end
175
169
 
176
170
  # Crop rows after this line
177
171
  def crop
178
- current_options[:crop]
172
+ user_specified_options[:crop]
179
173
  end
180
174
 
181
175
  # The fixed-width schema, given as an array
@@ -190,31 +184,31 @@ class RemoteTable
190
184
  # [ 'spacer', 12 ],
191
185
  # [ 'header6', 10, { :type => :string } ]])
192
186
  def schema
193
- current_options[:schema]
187
+ user_specified_options[:schema]
194
188
  end
195
189
 
196
190
  # The name of the fixed-width schema according to FixedWidth
197
191
  def schema_name
198
- current_options[:schema_name]
192
+ user_specified_options[:schema_name]
199
193
  end
200
194
 
201
195
  # A proc to call to decide whether to return a row.
202
196
  def select
203
- current_options[:select]
197
+ user_specified_options[:select]
204
198
  end
205
199
 
206
200
  # A proc to call to decide whether to return a row.
207
201
  def reject
208
- current_options[:reject]
202
+ user_specified_options[:reject]
209
203
  end
210
204
 
211
205
  # A hash of options to create a new Errata instance (see the Errata gem at http://github.com/seamusabshere/errata) to be used on every row.
212
206
  def errata
213
- return unless current_options.has_key? :errata
214
- @errata ||= if current_options[:errata].is_a? ::Hash
215
- ::Errata.new current_options[:errata]
207
+ return unless user_specified_options.has_key? :errata
208
+ @errata ||= if user_specified_options[:errata].is_a? ::Hash
209
+ ::Errata.new user_specified_options[:errata]
216
210
  else
217
- current_options[:errata]
211
+ user_specified_options[:errata]
218
212
  end
219
213
  end
220
214
 
@@ -227,8 +221,8 @@ class RemoteTable
227
221
  # Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html
228
222
  def format
229
223
  return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
230
- clue = if current_options.has_key?(:format)
231
- current_options[:format]
224
+ clue = if user_specified_options.has_key?(:format)
225
+ user_specified_options[:format]
232
226
  else
233
227
  t.local_file.path
234
228
  end
@@ -247,6 +241,8 @@ class RemoteTable
247
241
  Format::HTML
248
242
  when /xml/
249
243
  Format::XML
244
+ when /yaml/, /yml/
245
+ Format::Yaml
250
246
  else
251
247
  Format::Delimited
252
248
  end
@@ -1,22 +1,22 @@
1
- if ::RUBY_VERSION >= '1.9'
2
- require 'ensure/encoding'
3
- else
4
- require 'iconv'
1
+ require 'iconv'
2
+ if RUBY_VERSION >= '1.9'
3
+ # for an excellent explanation see http://blog.segment7.net/2010/12/17/from-iconv-iconv-to-string-encode
4
+ Kernel.warn "[remote_table] Apologies - using iconv because Ruby 1.9.x's String#encode doesn't have transliteration tables (yet)"
5
5
  end
6
6
 
7
+ require 'remote_table/format/mixins/textual'
8
+ require 'remote_table/format/mixins/processed_by_roo'
9
+ require 'remote_table/format/mixins/processed_by_nokogiri'
10
+ require 'remote_table/format/excel'
11
+ require 'remote_table/format/excelx'
12
+ require 'remote_table/format/delimited'
13
+ require 'remote_table/format/open_office'
14
+ require 'remote_table/format/fixed_width'
15
+ require 'remote_table/format/html'
16
+ require 'remote_table/format/xml'
17
+ require 'remote_table/format/yaml'
7
18
  class RemoteTable
8
19
  class Format
9
- autoload :Excel, 'remote_table/format/excel'
10
- autoload :Excelx, 'remote_table/format/excelx'
11
- autoload :Delimited, 'remote_table/format/delimited'
12
- autoload :OpenOffice, 'remote_table/format/open_office'
13
- autoload :FixedWidth, 'remote_table/format/fixed_width'
14
- autoload :HTML, 'remote_table/format/html'
15
- autoload :XML, 'remote_table/format/xml'
16
-
17
- autoload :Textual, 'remote_table/format/mixins/textual'
18
- autoload :ProcessedByRoo, 'remote_table/format/mixins/processed_by_roo'
19
- autoload :ProcessedByNokogiri, 'remote_table/format/mixins/processed_by_nokogiri'
20
20
 
21
21
  attr_reader :t
22
22
 
@@ -25,28 +25,25 @@ class RemoteTable
25
25
  end
26
26
 
27
27
  def transliterate_to_utf8(str)
28
- return if str.nil?
29
- $stderr.puts "[remote_table translit] Before: #{str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
30
- transliterated_str = if ::RUBY_VERSION >= '1.9'
31
- str.ensure_encoding t.properties.external_encoding, :external_encoding => t.properties.internal_encoding, :invalid_characters => :transcode
32
- else
33
- ::Iconv.conv(t.properties.external_encoding_iconv, t.properties.internal_encoding, str.to_s + ' ')[0..-2]
28
+ if str.is_a?(::String)
29
+ [ iconv.iconv(str), iconv.iconv(nil) ].join
34
30
  end
35
- $stderr.puts "[remote_table translit] After: #{transliterated_str}" if ::ENV['REMOTE_TABLE_DEBUG'] and ::ENV['REMOTE_TABLE_DEBUG'].include?('translit')
36
- transliterated_str
37
31
  end
38
32
 
39
33
  def assume_utf8(str)
40
34
  if str.is_a?(::String) and ::RUBY_VERSION >= '1.9'
41
- str.encode! t.properties.external_encoding
35
+ str.encode! t.config.external_encoding
42
36
  else
43
37
  str
44
38
  end
45
39
  end
46
40
 
47
- include ::Enumerable
48
- def each
49
- raise "must be defined by format"
41
+ private
42
+
43
+ def iconv
44
+ @iconv ||= ::Iconv.new(t.config.external_encoding_iconv, t.config.internal_encoding)
50
45
  end
46
+
47
+ include ::Enumerable
51
48
  end
52
49
  end