iron-import 0.8.5 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 072a67b4a7cf2c5bb215aaba838d4e5fe87395cf
4
- data.tar.gz: 701d2f2a706225a8dc76c356fd130fd9335b5e2b
2
+ SHA256:
3
+ metadata.gz: 31bf806c52f39edb39cbd3f7f2992ff1723c562e4c3a3859be0c341165746960
4
+ data.tar.gz: c140bc5f1a6affcb5e7b940a04f5a0d74c165b49ab5683815c3a63b706447c3f
5
5
  SHA512:
6
- metadata.gz: 05ef313d18b1d2a9c8fe129120eab8e687ad7d7d6050e64184f2ed08e7301f89de32d89d6d65e61e320c892dc36703f969ae4d45bdf585eb9df9873cc3a2739b
7
- data.tar.gz: 3cf69f77fda20624fbf0eb63d87813a2361a9eba295a89330085737ef6198ed26134dbbc30295ee1af2cddf6e6c3f77565544ac6af2f971521c77bd8dbe7fead
6
+ metadata.gz: 10f52ae0c423c8e9b0feb2bb560141d45a0d607be502ad74d6f1168a130e4d8932c6c28d666e94fc26344a570a6893f7828c02a854ef889018812466ac965968
7
+ data.tar.gz: 9f555bcf49f432cb8e3d35f3384633741280ba4853491dcaa895895d1fd00eaef1e8a33f1f780ee7efee8659b11d4f4499b9519badf7f0d9d665460a355d1aaf
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ == 0.8.6 / 2018-04-11
2
+ * Add Importer#read_lines to allow reading an arbitrary number of raw rows from a file with format detection
3
+
1
4
  == 0.8.5 / 2018-02-14
2
5
 
3
6
  * Add simple separator char detection for CSV files, currently supporting tabs & commas
data/Version.txt CHANGED
@@ -1 +1 @@
1
- 0.8.5
1
+ 0.8.6
@@ -127,6 +127,42 @@ class Importer
127
127
  importer
128
128
  end
129
129
 
130
+ # Helper method for use in extracting the raw values for the first n
131
+ # rows in a given source. Uses the same format options and detection code
132
+ # used by Importer#import. If you are using a custom reader, you can pass in as
133
+ # options a lambda for :on_file or :on_stream to set the data reader to use.
134
+ def self.read_lines(num_lines, path_or_stream, options = {}, &block)
135
+ # Build a dummy importer
136
+ importer = Importer.build
137
+
138
+ # Get the reader for our inputs
139
+ custom_reader = nil
140
+ if options[:on_file]
141
+ custom_reader = Importer::CustomReader.new(importer)
142
+ custom_reader.set_reader(:file, options[:on_file])
143
+
144
+ elsif options[:on_stream]
145
+ custom_reader = Importer::CustomReader.new(importer)
146
+ custom_reader.set_reader(:stream, options[:on_stream])
147
+ end
148
+ reader = importer.find_reader(path_or_stream, options[:format], custom_reader)
149
+
150
+ # Verify we got one
151
+ raise 'Unable to find valid reader for path' unless reader
152
+
153
+ # What scopes (if any) should we limit our searching to?
154
+ scopes = options.delete(:scope)
155
+ if scopes && !scopes.is_a?(Array)
156
+ scopes = [scopes]
157
+ end
158
+
159
+ # Read in the data!
160
+ reader.load(path_or_stream, scopes) do |raw_rows|
161
+ return raw_rows.slice(0...num_lines)
162
+ end
163
+ raise 'Unable to load path or stream'
164
+ end
165
+
130
166
  # Ye standard constructor!
131
167
  def initialize(options = {})
132
168
  @scopes = {}
@@ -296,6 +332,29 @@ class Importer
296
332
  @custom_reader.set_reader(:stream, block)
297
333
  end
298
334
 
335
+ # Helper method to find the right file/stream reader given the options and
336
+ # params passed.
337
+ def find_reader(path_or_stream, format = nil, custom_reader = nil)
338
+ reader = nil
339
+ default = custom_reader ? :custom : :auto
340
+ format ||= default
341
+ if format == :custom
342
+ # Custom format selected, use our internal custom reader
343
+ reader = custom_reader
344
+
345
+ elsif format && format != :auto
346
+ # Explicit format requested
347
+ reader = DataReader::for_format(self, format)
348
+
349
+ else
350
+ # Auto select
351
+ reader = DataReader::for_source(self, path_or_stream)
352
+ end
353
+
354
+ # What did we get?
355
+ reader
356
+ end
357
+
299
358
  # First call to a freshly #build'd importer, this will read the file/stream/path supplied,
300
359
  # validate the required values, run custom validations... basically pre-parse and
301
360
  # massage the supplied data. It will return true on success, or false if one
@@ -332,23 +391,10 @@ class Importer
332
391
  def import(path_or_stream, options = {}, &block)
333
392
  # Clear all our load-time state, including all rows, header locations... you name it
334
393
  reset
335
-
336
- # Get the reader for this format
337
- default = @custom_reader ? :custom : :auto
338
- @format = options.delete(:format) { default }
339
- if @format == :custom
340
- # Custom format selected, use our internal custom reader
341
- @reader = @custom_reader
342
-
343
- elsif @format && @format != :auto
344
- # Explicit format requested
345
- @reader = DataReader::for_format(self, @format)
346
-
347
- else
348
- # Auto select
349
- @reader = DataReader::for_source(self, path_or_stream)
350
- @format = @reader.format if @reader
351
- end
394
+
395
+ # Pick a reader for this stream/file
396
+ @reader = find_reader(path_or_stream, options.delete(:format), @custom_reader)
397
+ @format = @reader.format if @reader
352
398
 
353
399
  # Verify we got one
354
400
  unless @reader
@@ -298,4 +298,34 @@ describe Importer do
298
298
  importer.to_a.should == [{:order => '223300', :date => '1973-01-02'.to_date}]
299
299
  end
300
300
 
301
+ it 'should find the right reader for a given format + path/stream' do
302
+ importer = Importer.build
303
+ importer.find_reader('foo.xls').class.should == Importer::XlsReader
304
+ importer.find_reader('foo.text', :csv).class.should == Importer::CsvReader
305
+ importer.find_reader('/bob/page.html').class.should == Importer::HtmlReader
306
+ custom = Importer::CustomReader.new(importer)
307
+ importer.find_reader('foo.text', nil, custom).class.should == Importer::CustomReader
308
+ end
309
+
310
+ it 'should allow reading raw row values' do
311
+ rows = Importer.read_lines(2, SpecHelper.sample_path('2-sheets.xlsx'), :scope => 'Sheet 2')
312
+ rows.count.should == 2
313
+ rows.first.should == ['Table 1', nil]
314
+ rows.last.should == ['Order', 'Date']
315
+ end
316
+
317
+ it 'should allow reading raw row values when using a custom reader' do
318
+ custom = lambda {|source|
319
+ File.readlines(source).collect do |line|
320
+ line.extract(/([A-TV-Z][0-9][A-Z0-9]{1,5})\s+(.*)/)
321
+ end
322
+ }
323
+ rows = Importer.read_lines(3, SpecHelper.sample_path('icd10-custom.txt'), :on_file => custom)
324
+ rows.should == [
325
+ ['A000', 'Cholera due to Vibrio cholerae 01, biovar cholerae'],
326
+ ['A001', 'Cholera due to Vibrio cholerae 01, biovar eltor'],
327
+ ['A009', 'Cholera, unspecified']
328
+ ]
329
+ end
330
+
301
331
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iron-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.5
4
+ version: 0.8.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Morris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-14 00:00:00.000000000 Z
11
+ date: 2018-04-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: iron-extensions
@@ -157,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
157
157
  version: '0'
158
158
  requirements: []
159
159
  rubyforge_project:
160
- rubygems_version: 2.4.3
160
+ rubygems_version: 2.7.6
161
161
  signing_key:
162
162
  specification_version: 4
163
163
  summary: CSV, HTML, XLS, and XLSX import processing support