tin_opener 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/app/controllers/tin_opener/data_sets_controller.rb +2 -1
- data/app/models/tin_opener/data_set.rb +2 -1
- data/app/views/tin_opener/data_sets/index.html.erb +1 -1
- data/lib/tin_opener/data_file_processor.rb +25 -11
- data/lib/tin_opener/data_file_processors/csv_processor.rb +24 -0
- data/lib/tin_opener/data_file_processors/xls_processor.rb +36 -0
- data/lib/tin_opener/version.rb +1 -1
- data/lib/tin_opener.rb +4 -0
- data/spec/dummy/log/development.log +516 -0
- data/spec/dummy/log/test.log +5894 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/3MR1DJHzwOb48EQrXsHiHJ7IjgDYaKg8TF_1oluekCU.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/9JAtmrE8gkN4Ll_ses1H_M5ylPg_rV75JTy-LInDW5A.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/BqwVbEXQReJqZ1O-vYl0bgDgn5AjRFyVyu26OlZSeDk.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/Jow55IN5o80YnLiOg4Fr5N9WPT7dOSW1FsC30syKOXM.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/NQA9K_2Oq3l5Jz2kmZcV4LJIDx1cJOQbwZQjZq_Ob8k.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/OqdzJMApqmuOR5HA5XbQ9Ukw0t66o_kOvOr9gRTb-ys.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/UoPmraIR_8GBdt82Qv7E46vB85biTLUEFnsqW5h3AHY.cache +3 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/YBxAUfyQDHjoEa2iz272XBdRKrtQ7tS5kmMLmymLYXU.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/ZXdv0D_C5WAx0i5WNLYVhLj_WYBcK_Km4B907jOwh7k.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/e8WNBOjUvEkKqQpZOIclS1Azo4QSaSyaZewShwzREds.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/hkvQh5qylGCuuWPMjn7586FUemTh6giau8b0IaFgmak.cache +1 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/kD6BsXWNOKOD6OuqYVuxbXKliQbe8n1c_fHhCRqkqKU.cache +2 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/pgKZvrFeOEW9p9wqha2PA5mqALzmUehQRncxgx7HSFY.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/usob9Vws5kXcHktDHWDxgEiIKUmU9eLNwvJSjWYfVSg.cache +0 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/x1NjeGbfRAMdAff315v2hcYniAL5QVpxdqW0rFIHRKU.cache +3 -0
- data/spec/dummy/tmp/cache/assets/test/sprockets/v3.0/znqRv2LKKafofH32eY4huW3WeDBZB0l-ESutpXdht9I.cache +2 -0
- data/spec/examples.txt +17 -7
- data/spec/factories/{tin_opener_data_sets.rb → data_sets.rb} +1 -1
- data/spec/factories/records.rb +6 -0
- data/spec/features/tin_opener/data_sets_spec.rb +61 -0
- data/spec/fixtures/data_sets/contaminacion-acustica-2015.xls +0 -0
- data/spec/fixtures/data_sets/encuesta-calidad-de-vida-2014.xls +0 -0
- data/spec/fixtures/data_sets/trabajos_planificados_m30.xml +1 -0
- data/spec/lib/tin_opener/data_file_processor_spec.rb +65 -0
- data/spec/models/tin_opener/data_set_spec.rb +1 -1
- data/spec/models/tin_opener/record_spec.rb +2 -2
- data/spec/rails_helper.rb +3 -1
- data/spec/spec_helper.rb +4 -2
- data/spec/support/url_helpers.rb +7 -0
- metadata +79 -5
- data/spec/factories/tin_opener_records.rb +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9ebe2ed89555b70efb828ec91f80d77b69c0e7ff
|
4
|
+
data.tar.gz: ea1d9478b4d9f27ddd14a71beea052cd252edc97
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 464c8a01153d27a3edaade63dac993baf85e89cbc60c58e26826d610f73ffcced799b4aad954f78d34950ac1194ceb456dbb5dfb0f56b97d1686af0f5fcdd511
|
7
|
+
data.tar.gz: aae351b0ba80839a4f82356c89418f43a5712053320616f8409a40e37dc11639def9de02a5ce5a3e31988b2d656db6f06328bbadceb041480c93bfae2c487b41
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# tin_opener
|
2
2
|
[![Build Status](https://travis-ci.org/dgilperez/tin_opener.svg?branch=master)](https://travis-ci.org/dgilperez/tin_opener)
|
3
3
|
[![Code Climate](https://codeclimate.com/github/dgilperez/tin_opener/badges/gpa.svg)](https://codeclimate.com/github/dgilperez/tin_opener)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/tin_opener.svg)](https://badge.fury.io/rb/tin_opener)
|
4
5
|
|
5
6
|
Opening Data, one Tin at a time.
|
6
7
|
|
@@ -29,9 +30,13 @@ This gem provides the following models:
|
|
29
30
|
Data input format support:
|
30
31
|
|
31
32
|
* CSV
|
33
|
+
* XLS (basic)
|
32
34
|
|
33
35
|
## TODO
|
34
36
|
|
35
37
|
Support for more data formats:
|
36
38
|
|
37
|
-
*
|
39
|
+
* XLST
|
40
|
+
* XLS (test different scenarios)
|
41
|
+
* XML
|
42
|
+
* Custom Data Sources
|
@@ -16,13 +16,14 @@ module TinOpener
|
|
16
16
|
private
|
17
17
|
|
18
18
|
def process_data_file
|
19
|
-
return unless data_file.is_a?(File)
|
19
|
+
return unless data_file.is_a?(File) || data_file.is_a?(Tempfile)
|
20
20
|
|
21
21
|
data_file_processor = DataFileProcessor.new(file: data_file)
|
22
22
|
|
23
23
|
self.headers = data_file_processor.headers
|
24
24
|
|
25
25
|
import_records(data_file_processor.rows)
|
26
|
+
# TODO: benchmark and remove this allegedly slower alternative
|
26
27
|
# data_file_processor.rows.each do |row|
|
27
28
|
# records.new(row_data: row)
|
28
29
|
# end
|
@@ -1,22 +1,36 @@
|
|
1
1
|
module TinOpener
|
2
|
+
class UnsupportedDataFileError < StandardError; end
|
3
|
+
|
2
4
|
class DataFileProcessor
|
3
5
|
def initialize(args = {})
|
4
|
-
@file
|
5
|
-
@separator = args.fetch(:separator) { ';' }
|
6
|
+
@file = args.fetch(:file)
|
6
7
|
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
delegate :headers, :rows, to: :file_type_processor
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def file_type_processor
|
14
|
+
@file_type_processor ||= case mime_type
|
15
|
+
when 'text/csv', 'text/comma-separated-values', 'text/plain'
|
16
|
+
DataFileProcessors::CsvProcessor.new(file: @file)
|
17
|
+
when 'application/excel'
|
18
|
+
DataFileProcessors::XlsProcessor.new(file: @file)
|
19
|
+
else
|
20
|
+
fail UnsupportedDataFileError
|
12
21
|
end
|
13
|
-
end
|
14
22
|
end
|
15
23
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
24
|
+
# TODO: Current implementation extracts MIME from file extension.
|
25
|
+
# This will fail for files with bad extensions (a CSV renamed as a XLS),
|
26
|
+
# or with files with no extension
|
27
|
+
#
|
28
|
+
# Options to consider:
|
29
|
+
# - Use ruby-filemagic gem
|
30
|
+
# - Use or compare with output from (*nix only)
|
31
|
+
# `file #{@file.path} --mime-type`.gsub("\n", '').split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
|
32
|
+
def mime_type
|
33
|
+
@mime_type ||= MIME::Types.type_for(@file.path).first.try(:simplified)
|
20
34
|
end
|
21
35
|
end
|
22
36
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module TinOpener
|
2
|
+
module DataFileProcessors
|
3
|
+
class CsvProcessor < TinOpener::DataFileProcessor
|
4
|
+
def initialize(args = {})
|
5
|
+
super
|
6
|
+
@separator = args.fetch(:separator) { ';' }
|
7
|
+
end
|
8
|
+
|
9
|
+
def headers
|
10
|
+
@headers ||= rows.first.try do |row|
|
11
|
+
row.transform_values do |value|
|
12
|
+
value ? value.class.name : 'String'
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def rows
|
18
|
+
@csv_data ||= CSV.parse(@file, col_sep: @separator, headers: true).map do |row|
|
19
|
+
row.to_hash.transform_keys{ |a| a.gsub(/\s+/, ' ').strip.gsub(/\s/, '_').underscore.to_sym }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module TinOpener
|
2
|
+
module DataFileProcessors
|
3
|
+
class XlsProcessor < TinOpener::DataFileProcessor
|
4
|
+
def initialize(args = {})
|
5
|
+
super
|
6
|
+
end
|
7
|
+
|
8
|
+
delegate :headers, :rows, to: :csv_processor
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def raw_data
|
13
|
+
@raw_data ||= Roo::Spreadsheet.open(@file.path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def csv_processor
|
17
|
+
@csv_processor ||= CsvProcessor.new(file: tempfile.tap { write_csv_data })
|
18
|
+
end
|
19
|
+
|
20
|
+
def tempfile
|
21
|
+
@tempfile ||= Tempfile.new(filename)
|
22
|
+
end
|
23
|
+
|
24
|
+
def filename
|
25
|
+
@file.path.split('/').last
|
26
|
+
end
|
27
|
+
|
28
|
+
# TODO: Roo has a built-in .to_yaml transform. Consider using that over CSV
|
29
|
+
# whenever a YAML processor is implemented
|
30
|
+
def write_csv_data
|
31
|
+
# arguments: filename = nil, separator = ',', sheet = default_sheet
|
32
|
+
@csv_data ||= raw_data.to_csv(tempfile, ';')
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/tin_opener/version.rb
CHANGED
data/lib/tin_opener.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
require "tin_opener/engine"
|
2
2
|
require "tin_opener/data_file_processor"
|
3
|
+
require "tin_opener/data_file_processors/csv_processor"
|
4
|
+
require "tin_opener/data_file_processors/xls_processor"
|
3
5
|
require "csv"
|
6
|
+
require "roo"
|
7
|
+
require "roo-xls"
|
4
8
|
require "activerecord-import"
|
5
9
|
require 'pry-byebug' unless Rails.env.production?
|
6
10
|
|