tin_opener 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/app/controllers/tin_opener/data_sets_controller.rb +2 -1
- data/app/models/tin_opener/data_set.rb +2 -1
- data/app/views/tin_opener/data_sets/index.html.erb +1 -1
- data/lib/tin_opener/data_file_processor.rb +25 -11
- data/lib/tin_opener/data_file_processors/csv_processor.rb +24 -0
- data/lib/tin_opener/data_file_processors/xls_processor.rb +36 -0
- data/lib/tin_opener/version.rb +1 -1
- data/lib/tin_opener.rb +4 -0
- data/spec/dummy/log/development.log +516 -0
- data/spec/dummy/log/test.log +5894 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/3MR1DJHzwOb48EQrXsHiHJ7IjgDYaKg8TF_1oluekCU.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/9JAtmrE8gkN4Ll_ses1H_M5ylPg_rV75JTy-LInDW5A.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/BqwVbEXQReJqZ1O-vYl0bgDgn5AjRFyVyu26OlZSeDk.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/Jow55IN5o80YnLiOg4Fr5N9WPT7dOSW1FsC30syKOXM.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/NQA9K_2Oq3l5Jz2kmZcV4LJIDx1cJOQbwZQjZq_Ob8k.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/OqdzJMApqmuOR5HA5XbQ9Ukw0t66o_kOvOr9gRTb-ys.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/UoPmraIR_8GBdt82Qv7E46vB85biTLUEFnsqW5h3AHY.cache +3 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/YBxAUfyQDHjoEa2iz272XBdRKrtQ7tS5kmMLmymLYXU.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/ZXdv0D_C5WAx0i5WNLYVhLj_WYBcK_Km4B907jOwh7k.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/e8WNBOjUvEkKqQpZOIclS1Azo4QSaSyaZewShwzREds.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/hkvQh5qylGCuuWPMjn7586FUemTh6giau8b0IaFgmak.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/kD6BsXWNOKOD6OuqYVuxbXKliQbe8n1c_fHhCRqkqKU.cache +2 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/pgKZvrFeOEW9p9wqha2PA5mqALzmUehQRncxgx7HSFY.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/usob9Vws5kXcHktDHWDxgEiIKUmU9eLNwvJSjWYfVSg.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/x1NjeGbfRAMdAff315v2hcYniAL5QVpxdqW0rFIHRKU.cache +3 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/znqRv2LKKafofH32eY4huW3WeDBZB0l-ESutpXdht9I.cache +2 -0
- data/spec/examples.txt +17 -7
- data/spec/factories/{tin_opener_data_sets.rb → data_sets.rb} +1 -1
- data/spec/factories/records.rb +6 -0
- data/spec/features/tin_opener/data_sets_spec.rb +61 -0
- data/spec/fixtures/data_sets/contaminacion-acustica-2015.xls +0 -0
- data/spec/fixtures/data_sets/encuesta-calidad-de-vida-2014.xls +0 -0
- data/spec/fixtures/data_sets/trabajos_planificados_m30.xml +1 -0
- data/spec/lib/tin_opener/data_file_processor_spec.rb +65 -0
- data/spec/models/tin_opener/data_set_spec.rb +1 -1
- data/spec/models/tin_opener/record_spec.rb +2 -2
- data/spec/rails_helper.rb +3 -1
- data/spec/spec_helper.rb +4 -2
- data/spec/support/url_helpers.rb +7 -0
- metadata +79 -5
- data/spec/factories/tin_opener_records.rb +0 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9ebe2ed89555b70efb828ec91f80d77b69c0e7ff
|
|
4
|
+
data.tar.gz: ea1d9478b4d9f27ddd14a71beea052cd252edc97
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 464c8a01153d27a3edaade63dac993baf85e89cbc60c58e26826d610f73ffcced799b4aad954f78d34950ac1194ceb456dbb5dfb0f56b97d1686af0f5fcdd511
|
|
7
|
+
data.tar.gz: aae351b0ba80839a4f82356c89418f43a5712053320616f8409a40e37dc11639def9de02a5ce5a3e31988b2d656db6f06328bbadceb041480c93bfae2c487b41
|
data/README.md
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# tin_opener
|
|
2
2
|
[](https://travis-ci.org/dgilperez/tin_opener)
|
|
3
3
|
[](https://codeclimate.com/github/dgilperez/tin_opener)
|
|
4
|
+
[](https://badge.fury.io/rb/tin_opener)
|
|
4
5
|
|
|
5
6
|
Opening Data, one Tin at a time.
|
|
6
7
|
|
|
@@ -29,9 +30,13 @@ This gem provides the following models:
|
|
|
29
30
|
Data input format support:
|
|
30
31
|
|
|
31
32
|
* CSV
|
|
33
|
+
* XLS (basic)
|
|
32
34
|
|
|
33
35
|
## TODO
|
|
34
36
|
|
|
35
37
|
Support for more data formats:
|
|
36
38
|
|
|
37
|
-
*
|
|
39
|
+
* XLST
|
|
40
|
+
* XLS (test different scenarios)
|
|
41
|
+
* XML
|
|
42
|
+
* Custom Data Sources
|
|
@@ -16,13 +16,14 @@ module TinOpener
|
|
|
16
16
|
private
|
|
17
17
|
|
|
18
18
|
def process_data_file
|
|
19
|
-
return unless data_file.is_a?(File)
|
|
19
|
+
return unless data_file.is_a?(File) || data_file.is_a?(Tempfile)
|
|
20
20
|
|
|
21
21
|
data_file_processor = DataFileProcessor.new(file: data_file)
|
|
22
22
|
|
|
23
23
|
self.headers = data_file_processor.headers
|
|
24
24
|
|
|
25
25
|
import_records(data_file_processor.rows)
|
|
26
|
+
# TODO: benchmark and remove this allegedly slower alternative
|
|
26
27
|
# data_file_processor.rows.each do |row|
|
|
27
28
|
# records.new(row_data: row)
|
|
28
29
|
# end
|
|
@@ -1,22 +1,36 @@
|
|
|
1
1
|
module TinOpener
|
|
2
|
+
class UnsupportedDataFileError < StandardError; end
|
|
3
|
+
|
|
2
4
|
class DataFileProcessor
|
|
3
5
|
def initialize(args = {})
|
|
4
|
-
@file
|
|
5
|
-
@separator = args.fetch(:separator) { ';' }
|
|
6
|
+
@file = args.fetch(:file)
|
|
6
7
|
end
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
delegate :headers, :rows, to: :file_type_processor
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def file_type_processor
|
|
14
|
+
@file_type_processor ||= case mime_type
|
|
15
|
+
when 'text/csv', 'text/comma-separated-values', 'text/plain'
|
|
16
|
+
DataFileProcessors::CsvProcessor.new(file: @file)
|
|
17
|
+
when 'application/excel'
|
|
18
|
+
DataFileProcessors::XlsProcessor.new(file: @file)
|
|
19
|
+
else
|
|
20
|
+
fail UnsupportedDataFileError
|
|
12
21
|
end
|
|
13
|
-
end
|
|
14
22
|
end
|
|
15
23
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
24
|
+
# TODO: Current implementation extracts MIME from file extension.
|
|
25
|
+
# This will fail for files with bad extensions (a CSV renamed as a XLS),
|
|
26
|
+
# or with files with no extension
|
|
27
|
+
#
|
|
28
|
+
# Options to consider:
|
|
29
|
+
# - Use ruby-filemagic gem
|
|
30
|
+
# - Use or compare with output from (*nix only)
|
|
31
|
+
# `file #{@file.path} --mime-type`.gsub("\n", '').split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
|
|
32
|
+
def mime_type
|
|
33
|
+
@mime_type ||= MIME::Types.type_for(@file.path).first.try(:simplified)
|
|
20
34
|
end
|
|
21
35
|
end
|
|
22
36
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module TinOpener
|
|
2
|
+
module DataFileProcessors
|
|
3
|
+
class CsvProcessor < TinOpener::DataFileProcessor
|
|
4
|
+
def initialize(args = {})
|
|
5
|
+
super
|
|
6
|
+
@separator = args.fetch(:separator) { ';' }
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def headers
|
|
10
|
+
@headers ||= rows.first.try do |row|
|
|
11
|
+
row.transform_values do |value|
|
|
12
|
+
value ? value.class.name : 'String'
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def rows
|
|
18
|
+
@csv_data ||= CSV.parse(@file, col_sep: @separator, headers: true).map do |row|
|
|
19
|
+
row.to_hash.transform_keys{ |a| a.gsub(/\s+/, ' ').strip.gsub(/\s/, '_').underscore.to_sym }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
module TinOpener
|
|
2
|
+
module DataFileProcessors
|
|
3
|
+
class XlsProcessor < TinOpener::DataFileProcessor
|
|
4
|
+
def initialize(args = {})
|
|
5
|
+
super
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
delegate :headers, :rows, to: :csv_processor
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def raw_data
|
|
13
|
+
@raw_data ||= Roo::Spreadsheet.open(@file.path)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def csv_processor
|
|
17
|
+
@csv_processor ||= CsvProcessor.new(file: tempfile.tap { write_csv_data })
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def tempfile
|
|
21
|
+
@tempfile ||= Tempfile.new(filename)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def filename
|
|
25
|
+
@file.path.split('/').last
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# TODO: Roo has a built-in .to_yaml transform. Consider using that over CSV
|
|
29
|
+
# whenever a YAML processor is implemented
|
|
30
|
+
def write_csv_data
|
|
31
|
+
# arguments: filename = nil, separator = ',', sheet = default_sheet
|
|
32
|
+
@csv_data ||= raw_data.to_csv(tempfile, ';')
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
data/lib/tin_opener/version.rb
CHANGED
data/lib/tin_opener.rb
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
require "tin_opener/engine"
|
|
2
2
|
require "tin_opener/data_file_processor"
|
|
3
|
+
require "tin_opener/data_file_processors/csv_processor"
|
|
4
|
+
require "tin_opener/data_file_processors/xls_processor"
|
|
3
5
|
require "csv"
|
|
6
|
+
require "roo"
|
|
7
|
+
require "roo-xls"
|
|
4
8
|
require "activerecord-import"
|
|
5
9
|
require 'pry-byebug' unless Rails.env.production?
|
|
6
10
|
|