tin_opener 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +6 -1
  3. data/app/controllers/tin_opener/data_sets_controller.rb +2 -1
  4. data/app/models/tin_opener/data_set.rb +2 -1
  5. data/app/views/tin_opener/data_sets/index.html.erb +1 -1
  6. data/lib/tin_opener/data_file_processor.rb +25 -11
  7. data/lib/tin_opener/data_file_processors/csv_processor.rb +24 -0
  8. data/lib/tin_opener/data_file_processors/xls_processor.rb +36 -0
  9. data/lib/tin_opener/version.rb +1 -1
  10. data/lib/tin_opener.rb +4 -0
  11. data/spec/dummy/log/development.log +516 -0
  12. data/spec/dummy/log/test.log +5894 -0
  13. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/3MR1DJHzwOb48EQrXsHiHJ7IjgDYaKg8TF_1oluekCU.cache +1 -0
  14. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/9JAtmrE8gkN4Ll_ses1H_M5ylPg_rV75JTy-LInDW5A.cache +0 -0
  15. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/BqwVbEXQReJqZ1O-vYl0bgDgn5AjRFyVyu26OlZSeDk.cache +0 -0
  16. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/Jow55IN5o80YnLiOg4Fr5N9WPT7dOSW1FsC30syKOXM.cache +1 -0
  17. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/NQA9K_2Oq3l5Jz2kmZcV4LJIDx1cJOQbwZQjZq_Ob8k.cache +1 -0
  18. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/OqdzJMApqmuOR5HA5XbQ9Ukw0t66o_kOvOr9gRTb-ys.cache +0 -0
  19. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/UoPmraIR_8GBdt82Qv7E46vB85biTLUEFnsqW5h3AHY.cache +3 -0
  20. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/YBxAUfyQDHjoEa2iz272XBdRKrtQ7tS5kmMLmymLYXU.cache +1 -0
  21. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/ZXdv0D_C5WAx0i5WNLYVhLj_WYBcK_Km4B907jOwh7k.cache +1 -0
  22. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/e8WNBOjUvEkKqQpZOIclS1Azo4QSaSyaZewShwzREds.cache +1 -0
  23. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/hkvQh5qylGCuuWPMjn7586FUemTh6giau8b0IaFgmak.cache +1 -0
  24. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/kD6BsXWNOKOD6OuqYVuxbXKliQbe8n1c_fHhCRqkqKU.cache +2 -0
  25. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/pgKZvrFeOEW9p9wqha2PA5mqALzmUehQRncxgx7HSFY.cache +0 -0
  26. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/usob9Vws5kXcHktDHWDxgEiIKUmU9eLNwvJSjWYfVSg.cache +0 -0
  27. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/x1NjeGbfRAMdAff315v2hcYniAL5QVpxdqW0rFIHRKU.cache +3 -0
  28. data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/znqRv2LKKafofH32eY4huW3WeDBZB0l-ESutpXdht9I.cache +2 -0
  29. data/spec/examples.txt +17 -7
  30. data/spec/factories/{tin_opener_data_sets.rb → data_sets.rb} +1 -1
  31. data/spec/factories/records.rb +6 -0
  32. data/spec/features/tin_opener/data_sets_spec.rb +61 -0
  33. data/spec/fixtures/data_sets/contaminacion-acustica-2015.xls +0 -0
  34. data/spec/fixtures/data_sets/encuesta-calidad-de-vida-2014.xls +0 -0
  35. data/spec/fixtures/data_sets/trabajos_planificados_m30.xml +1 -0
  36. data/spec/lib/tin_opener/data_file_processor_spec.rb +65 -0
  37. data/spec/models/tin_opener/data_set_spec.rb +1 -1
  38. data/spec/models/tin_opener/record_spec.rb +2 -2
  39. data/spec/rails_helper.rb +3 -1
  40. data/spec/spec_helper.rb +4 -2
  41. data/spec/support/url_helpers.rb +7 -0
  42. metadata +79 -5
  43. data/spec/factories/tin_opener_records.rb +0 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e222a6323203b3affe9485fe42b365a009da3a53
4
- data.tar.gz: e001229370d3121e99b23f7e5291ae3e270a76c1
3
+ metadata.gz: 9ebe2ed89555b70efb828ec91f80d77b69c0e7ff
4
+ data.tar.gz: ea1d9478b4d9f27ddd14a71beea052cd252edc97
5
5
  SHA512:
6
- metadata.gz: b2be34285d30e29fe91863479e1313150cb1f453d98e22fe6da86b4ba8e9c1c668355c0611cd48509df4428a134c23b7f8b1bba65c2a6ca1456d060d2dc423a3
7
- data.tar.gz: 4fd302a413208378608c6ca73e10add657a8c6dd4dbcb7ae3a08e4bf81db8e86f8b97d2ad93d2a30d3051a84b65c14b3fce73794d8f6475efc5b5e5beb39f319
6
+ metadata.gz: 464c8a01153d27a3edaade63dac993baf85e89cbc60c58e26826d610f73ffcced799b4aad954f78d34950ac1194ceb456dbb5dfb0f56b97d1686af0f5fcdd511
7
+ data.tar.gz: aae351b0ba80839a4f82356c89418f43a5712053320616f8409a40e37dc11639def9de02a5ce5a3e31988b2d656db6f06328bbadceb041480c93bfae2c487b41
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # tin_opener
2
2
  [![Build Status](https://travis-ci.org/dgilperez/tin_opener.svg?branch=master)](https://travis-ci.org/dgilperez/tin_opener)
3
3
  [![Code Climate](https://codeclimate.com/github/dgilperez/tin_opener/badges/gpa.svg)](https://codeclimate.com/github/dgilperez/tin_opener)
4
+ [![Gem Version](https://badge.fury.io/rb/tin_opener.svg)](https://badge.fury.io/rb/tin_opener)
4
5
 
5
6
  Opening Data, one Tin at a time.
6
7
 
@@ -29,9 +30,13 @@ This gem provides the following models:
29
30
  Data input format support:
30
31
 
31
32
  * CSV
33
+ * XLS (basic)
32
34
 
33
35
  ## TODO
34
36
 
35
37
  Support for more data formats:
36
38
 
37
- * Excel
39
+ * XLST
40
+ * XLS (test different scenarios)
41
+ * XML
42
+ * Custom Data Sources
@@ -22,7 +22,8 @@ module TinOpener
22
22
  if @data_set.save
23
23
  redirect_to @data_set, notice: 'Data set was successfully created.'
24
24
  else
25
- render :new
25
+ @data_sets = DataSet.all
26
+ render :index
26
27
  end
27
28
  end
28
29
 
@@ -16,13 +16,14 @@ module TinOpener
16
16
  private
17
17
 
18
18
  def process_data_file
19
- return unless data_file.is_a?(File)
19
+ return unless data_file.is_a?(File) || data_file.is_a?(Tempfile)
20
20
 
21
21
  data_file_processor = DataFileProcessor.new(file: data_file)
22
22
 
23
23
  self.headers = data_file_processor.headers
24
24
 
25
25
  import_records(data_file_processor.rows)
26
+ # TODO: benchmark and remove this allegedly slower alternative
26
27
  # data_file_processor.rows.each do |row|
27
28
  # records.new(row_data: row)
28
29
  # end
@@ -24,6 +24,6 @@
24
24
 
25
25
  <br>
26
26
 
27
- New Data Set
27
+ <h2>New Data Set</h2>
28
28
 
29
29
  <%= render 'form' %>
@@ -1,22 +1,36 @@
1
1
  module TinOpener
2
+ class UnsupportedDataFileError < StandardError; end
3
+
2
4
  class DataFileProcessor
3
5
  def initialize(args = {})
4
- @file = args.fetch(:file)
5
- @separator = args.fetch(:separator) { ';' }
6
+ @file = args.fetch(:file)
6
7
  end
7
8
 
8
- def headers
9
- @headers ||= rows.first.try do |row|
10
- row.transform_values do |value|
11
- value.class.name
9
+ delegate :headers, :rows, to: :file_type_processor
10
+
11
+ private
12
+
13
+ def file_type_processor
14
+ @file_type_processor ||= case mime_type
15
+ when 'text/csv', 'text/comma-separated-values', 'text/plain'
16
+ DataFileProcessors::CsvProcessor.new(file: @file)
17
+ when 'application/excel'
18
+ DataFileProcessors::XlsProcessor.new(file: @file)
19
+ else
20
+ fail UnsupportedDataFileError
12
21
  end
13
- end
14
22
  end
15
23
 
16
- def rows
17
- @csv_data ||= CSV.parse(@file, col_sep: @separator, headers: true).map do |row|
18
- row.to_hash.transform_keys{ |a| a.squeeze.strip.gsub(/\s/, '_').underscore.to_sym }
19
- end
24
+ # TODO: Current implementation extracts MIME from file extension.
25
+ # This will fail for files with bad extensions (a CSV renamed as a XLS),
26
+ # or with files with no extension
27
+ #
28
+ # Options to consider:
29
+ # - Use ruby-filemagic gem
30
+ # - Use or compare with output from (*nix only)
31
+ # `file #{@file.path} --mime-type`.gsub("\n", '').split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
32
+ def mime_type
33
+ @mime_type ||= MIME::Types.type_for(@file.path).first.try(:simplified)
20
34
  end
21
35
  end
22
36
  end
@@ -0,0 +1,24 @@
1
+ module TinOpener
2
+ module DataFileProcessors
3
+ class CsvProcessor < TinOpener::DataFileProcessor
4
+ def initialize(args = {})
5
+ super
6
+ @separator = args.fetch(:separator) { ';' }
7
+ end
8
+
9
+ def headers
10
+ @headers ||= rows.first.try do |row|
11
+ row.transform_values do |value|
12
+ value ? value.class.name : 'String'
13
+ end
14
+ end
15
+ end
16
+
17
+ def rows
18
+ @csv_data ||= CSV.parse(@file, col_sep: @separator, headers: true).map do |row|
19
+ row.to_hash.transform_keys{ |a| a.gsub(/\s+/, ' ').strip.gsub(/\s/, '_').underscore.to_sym }
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,36 @@
1
+ module TinOpener
2
+ module DataFileProcessors
3
+ class XlsProcessor < TinOpener::DataFileProcessor
4
+ def initialize(args = {})
5
+ super
6
+ end
7
+
8
+ delegate :headers, :rows, to: :csv_processor
9
+
10
+ private
11
+
12
+ def raw_data
13
+ @raw_data ||= Roo::Spreadsheet.open(@file.path)
14
+ end
15
+
16
+ def csv_processor
17
+ @csv_processor ||= CsvProcessor.new(file: tempfile.tap { write_csv_data })
18
+ end
19
+
20
+ def tempfile
21
+ @tempfile ||= Tempfile.new(filename)
22
+ end
23
+
24
+ def filename
25
+ @file.path.split('/').last
26
+ end
27
+
28
+ # TODO: Roo has a built-in .to_yaml transform. Consider using that over CSV
29
+ # whenever a YAML processor is implemented
30
+ def write_csv_data
31
+ # arguments: filename = nil, separator = ',', sheet = default_sheet
32
+ @csv_data ||= raw_data.to_csv(tempfile, ';')
33
+ end
34
+ end
35
+ end
36
+ end
@@ -1,3 +1,3 @@
1
1
  module TinOpener
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/tin_opener.rb CHANGED
@@ -1,6 +1,10 @@
1
1
  require "tin_opener/engine"
2
2
  require "tin_opener/data_file_processor"
3
+ require "tin_opener/data_file_processors/csv_processor"
4
+ require "tin_opener/data_file_processors/xls_processor"
3
5
  require "csv"
6
+ require "roo"
7
+ require "roo-xls"
4
8
  require "activerecord-import"
5
9
  require 'pry-byebug' unless Rails.env.production?
6
10