iron-import 0.8.5 → 0.8.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
4
- data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
2
+ SHA256:
3
+ metadata.gz: 31bf806c52f39edb39cbd3f7f2992ff1723c562e4c3a3859be0c341165746960
4
+ data.tar.gz: c140bc5f1a6affcb5e7b940a04f5a0d74c165b49ab5683815c3a63b706447c3f
5
5
  SHA512:
6
- metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
7
- data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
6
+ metadata.gz: 10f52ae0c423c8e9b0feb2bb560141d45a0d607be502ad74d6f1168a130e4d8932c6c28d666e94fc26344a570a6893f7828c02a854ef889018812466ac965968
7
+ data.tar.gz: 9f555bcf49f432cb8e3d35f3384633741280ba4853491dcaa895895d1fd00eaef1e8a33f1f780ee7efee8659b11d4f4499b9519badf7f0d9d665460a355d1aaf
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ == 0.8.6 / 2018-04-11
2
+ * Add Importer#read_lines to allow reading an arbitrary number of raw rows from a file with format detection
3
+
1
4
  == 0.8.5 / 2018-02-14
2
5
 
3
6
  * Add simple separator char detection for CSV files, currently supporting tabs & commas
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.8.5
1
+ 0.8.6
@@ -127,6 +127,42 @@ class Importer
127
127
  importer
128
128
  end
129
129
 
130
+ # Helper method for use in extracting the raw values for the first n
131
+ # rows in a given source. Uses the same format options and detection code
132
+ # used by Importer#import. If you are using a custom reader, you can pass in as
133
+ # options a lambda for :on_file or :on_stream to set the data reader to use.
134
+ def self.read_lines(num_lines, path_or_stream, options = {}, &block)
135
+ # Build a dummy importer
136
+ importer = Importer.build
137
+
138
+ # Get the reader for our inputs
139
+ custom_reader = nil
140
+ if options[:on_file]
141
+ custom_reader = Importer::CustomReader.new(importer)
142
+ custom_reader.set_reader(:file, options[:on_file])
143
+
144
+ elsif options[:on_stream]
145
+ custom_reader = Importer::CustomReader.new(importer)
146
+ custom_reader.set_reader(:stream, options[:on_stream])
147
+ end
148
+ reader = importer.find_reader(path_or_stream, options[:format], custom_reader)
149
+
150
+ # Verify we got one
151
+ raise 'Unable to find valid reader for path' unless reader
152
+
153
+ # What scopes (if any) should we limit our searching to?
154
+ scopes = options.delete(:scope)
155
+ if scopes && !scopes.is_a?(Array)
156
+ scopes = [scopes]
157
+ end
158
+
159
+ # Read in the data!
160
+ reader.load(path_or_stream, scopes) do |raw_rows|
161
+ return raw_rows.slice(0...num_lines)
162
+ end
163
+ raise 'Unable to load path or stream'
164
+ end
165
+
130
166
  # Ye standard constructor!
131
167
  def initialize(options = {})
132
168
  @scopes = {}
@@ -296,6 +332,29 @@ class Importer
296
332
  @custom_reader.set_reader(:stream, block)
297
333
  end
298
334
 
335
+ # Helper method to find the right file/stream reader given the options and
336
+ # params passed.
337
+ def find_reader(path_or_stream, format = nil, custom_reader = nil)
338
+ reader = nil
339
+ default = custom_reader ? :custom : :auto
340
+ format ||= default
341
+ if format == :custom
342
+ # Custom format selected, use our internal custom reader
343
+ reader = custom_reader
344
+
345
+ elsif format && format != :auto
346
+ # Explicit format requested
347
+ reader = DataReader::for_format(self, format)
348
+
349
+ else
350
+ # Auto select
351
+ reader = DataReader::for_source(self, path_or_stream)
352
+ end
353
+
354
+ # What did we get?
355
+ reader
356
+ end
357
+
299
358
  # First call to a freshly #build'd importer, this will read the file/stream/path supplied,
300
359
  # validate the required values, run custom validations... basically pre-parse and
301
360
  # massage the supplied data. It will return true on success, or false if one
@@ -332,23 +391,10 @@ class Importer
332
391
  def import(path_or_stream, options = {}, &block)
333
392
  # Clear all our load-time state, including all rows, header locations... you name it
334
393
  reset
335
-
336
- # Get the reader for this format
337
- default = @custom_reader ? :custom : :auto
338
- @format = options.delete(:format) { default }
339
- if @format == :custom
340
- # Custom format selected, use our internal custom reader
341
- @reader = @custom_reader
342
-
343
- elsif @format && @format != :auto
344
- # Explicit format requested
345
- @reader = DataReader::for_format(self, @format)
346
-
347
- else
348
- # Auto select
349
- @reader = DataReader::for_source(self, path_or_stream)
350
- @format = @reader.format if @reader
351
- end
394
+
395
+ # Pick a reader for this stream/file
396
+ @reader = find_reader(path_or_stream, options.delete(:format), @custom_reader)
397
+ @format = @reader.format if @reader
352
398
 
353
399
  # Verify we got one
354
400
  unless @reader
@@ -298,4 +298,34 @@ describe Importer do
298
298
  importer.to_a.should == [{:order => '223300', :date => '1973-01-02'.to_date}]
299
299
  end
300
300
 
301
+ it 'should find the right reader for a given format + path/stream' do
302
+ importer = Importer.build
303
+ importer.find_reader('foo.xls').class.should == Importer::XlsReader
304
+ importer.find_reader('foo.text', :csv).class.should == Importer::CsvReader
305
+ importer.find_reader('/bob/page.html').class.should == Importer::HtmlReader
306
+ custom = Importer::CustomReader.new(importer)
307
+ importer.find_reader('foo.text', nil, custom).class.should == Importer::CustomReader
308
+ end
309
+
310
+ it 'should allow reading raw row values' do
311
+ rows = Importer.read_lines(2, SpecHelper.sample_path('2-sheets.xlsx'), :scope => 'Sheet 2')
312
+ rows.count.should == 2
313
+ rows.first.should == ['Table 1', nil]
314
+ rows.last.should == ['Order', 'Date']
315
+ end
316
+
317
+ it 'should allow reading raw row values when using a custom reader' do
318
+ custom = lambda {|source|
319
+ File.readlines(source).collect do |line|
320
+ line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
321
+ end
322
+ }
323
+ rows = Importer.read_lines(3, SpecHelper.sample_path('icd10-custom.txt'), :on_file => custom)
324
+ rows.should == [
325
+ ['A000', 'Cholera due to Vibrio cholerae 01, biovar cholerae'],
326
+ ['A001', 'Cholera due to Vibrio cholerae 01, biovar eltor'],
327
+ ['A009', 'Cholera, unspecified']
328
+ ]
329
+ end
330
+
301
331
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.5
4
+ version: 0.8.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-14 00:00:00.000000000 Z
11
+ date: 2018-04-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
@@ -157,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
157
157
  version: '0'
158
158
  requirements: []
159
159
  rubyforge_project:
160
- rubygems_version: 2.4.3
160
+ rubygems_version: 2.7.6
161
161
  signing_key:
162
162
  specification_version: 4
163
163
  summary: CSV, HTML, XLS, and XLSX import processing support