active_importer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in active_importer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ernesto Garcia
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # ActiveImporter
2
+
3
+ Define importers that load tabular data from spreadsheets or CSV files into any ActiveRecord-like ORM.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'active_importer'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install active_importer
18
+
19
+ ## Usage
20
+
21
+ Define classes that you instruct on how to import data into data models.
22
+
23
+ ```ruby
24
+ class EmployeeImporter < ActiveImporter::Base
25
+ imports Employee
26
+
27
+ column 'First name', :first_name
28
+ column 'Last name', :last_name
29
+ column 'Department', :department do |department_name|
30
+ Department.find_by(name: department_name)
31
+ end
32
+ end
33
+ ```
34
+
35
+ The importer defines what data model it imports data into, and how columns in
36
+ the data source map to fields in the model. Also, by providing a block, the
37
+ source value can be processed before being stored, as shown with the
38
+ 'Department' column in the example above.
39
+
40
+ Once defined, importers can be invoked to import a given data file.
41
+
42
+ ```ruby
43
+ EmployeeImporter.import('/path/to/file.xls')
44
+ ```
45
+
46
+ The data file is expected to contain columns with titles corresponding to the
47
+ columns declared. Any extra columns are ignored. Any errors while processing
48
+ the data file does not interrupt the whole process. Instead, errors are
49
+ notified via some callbacks defined in the importer (see below).
50
+
51
+ ### Callbacks
52
+
53
+ TODO: Document callbacks
54
+
55
+ ## Contributing
56
+
57
+ 1. Fork it
58
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
59
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
60
+ 4. Push to the branch (`git push origin my-new-feature`)
61
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'active_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "active_importer"
8
+ spec.version = ActiveImporter::VERSION
9
+ spec.authors = ["Ernesto Garcia"]
10
+ spec.email = ["gnapse@gmail.com"]
11
+ spec.description = %q{Import tabular data from spreadsheets or similar sources into data models}
12
+ spec.summary = %q{Import tabular data into data models}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "roo"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ end
@@ -0,0 +1,150 @@
1
+ require 'roo'
2
+
3
+ module ActiveImporter
4
+ class Base
5
+
6
+ #
7
+ # DSL and class variables
8
+ #
9
+
10
+ @model_class = nil
11
+ @columns = {}
12
+
13
+ def self.imports(klass)
14
+ @model_class = klass
15
+ end
16
+
17
+ def self.columns
18
+ @columns ||= {}
19
+ end
20
+
21
+ def self.model_class
22
+ @model_class
23
+ end
24
+
25
+ def model_class
26
+ self.class.model_class
27
+ end
28
+
29
+ def self.column(title, field, &block)
30
+ if columns[title]
31
+ raise "Duplicate importer column '#{title}'"
32
+ end
33
+ columns[title] = { field_name: field, transform: block }
34
+ end
35
+
36
+ def self.import(file, options = {})
37
+ new(file, options).import
38
+ end
39
+
40
+ #
41
+ # Implementation
42
+ #
43
+
44
+ attr_reader :header, :row, :model
45
+ attr_reader :row_count, :row_index
46
+ attr_reader :row_errors
47
+ attr_reader :context
48
+
49
+ def initialize(file, options = {})
50
+ @row_errors = []
51
+ @context = options.delete(:context)
52
+
53
+ @book = Roo::Spreadsheet.open(file, options)
54
+ @header = @book.row(1)
55
+ check_header
56
+
57
+ @data_row_indices = (2..@book.count)
58
+ @row_count = @data_row_indices.count
59
+ rescue => e
60
+ @book = @header = nil
61
+ @row_count = 0
62
+ @row_index = 1
63
+ import_failed(e.message)
64
+ end
65
+
66
+ def fetch_model
67
+ model_class.new
68
+ end
69
+
70
+ def import
71
+ return if @book.nil?
72
+ @data_row_indices.each do |index|
73
+ @row_index = index
74
+ @row = row_to_hash @book.row(index)
75
+ import_row
76
+ end
77
+ import_finished
78
+ end
79
+
80
+ def row_processed_count
81
+ row_index - 1
82
+ end
83
+
84
+ def row_success_count
85
+ row_processed_count - row_errors.count
86
+ end
87
+
88
+ def row_error_count
89
+ row_errors.count
90
+ end
91
+
92
+ def hook
93
+ end
94
+
95
+ def row_success
96
+ end
97
+
98
+ def row_error(error_message)
99
+ end
100
+
101
+ def import_failed(error_message)
102
+ end
103
+
104
+ def import_finished
105
+ end
106
+
107
+ private
108
+
109
+ def columns
110
+ self.class.columns
111
+ end
112
+
113
+ def check_header
114
+ # Header should contain all columns declared for this importer
115
+ unless columns.keys.all? { |item| @header.include?(item) }
116
+ raise 'Spreadsheet does not contain all the expected columns'
117
+ end
118
+ end
119
+
120
+ def import_row
121
+ @model = fetch_model
122
+ build_model
123
+ model.save!
124
+ row_success
125
+ rescue => e
126
+ @row_errors << { row_index: row_index, error_message: e.message }
127
+ row_error(e.message)
128
+ end
129
+
130
+ def build_model
131
+ row.each_pair do |key, value|
132
+ column_def = columns[key]
133
+ next if column_def.nil?
134
+ field_name = column_def[:field_name]
135
+ transform = column_def[:transform]
136
+ value = transform.call(value) if transform
137
+ model[field_name] = value
138
+ end
139
+ hook
140
+ end
141
+
142
+ def row_to_hash(row)
143
+ hash = {}
144
+ row.each_with_index do |value, index|
145
+ hash[@header[index]] = value
146
+ end
147
+ hash
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,3 @@
1
+ module ActiveImporter
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "active_importer/version"
2
+
3
+ module ActiveImporter
4
+ autoload :Base, 'active_importer/base'
5
+ end
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'stubs/employee'
3
+
4
+ describe ActiveImporter::Base do
5
+ let(:spreadsheet_data) do
6
+ [
7
+ ['Name', 'Birth Date', 'Department'],
8
+ ['John Doe', '2013-10-25', 'IT'],
9
+ ['Jane Doe', '2013-10-26', 'Sales'],
10
+ ]
11
+ end
12
+
13
+ let(:importer) { EmployeeImporter.new('/dummy/file') }
14
+
15
+ before do
16
+ expect(Roo::Spreadsheet).to receive(:open).and_return { Spreadsheet.new(spreadsheet_data) }
17
+ end
18
+
19
+ it 'imports all data from the spreadsheet into the model' do
20
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(2)
21
+ end
22
+
23
+ it 'notifies when each row has been imported successfully' do
24
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
25
+ expect(importer).not_to receive(:row_error)
26
+ expect(importer).to receive(:row_success).twice
27
+ EmployeeImporter.import('/dummy/file')
28
+ end
29
+
30
+ it 'notifies when the import process has finished' do
31
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
32
+ expect(importer).to receive(:import_finished).once
33
+ EmployeeImporter.import('/dummy/file')
34
+ end
35
+
36
+ context 'when there are rows with errors' do
37
+ let(:spreadsheet_data) do
38
+ [
39
+ ['Name', 'Birth Date', 'Department'],
40
+ ['John Doe', '2013-10-25', 'IT'],
41
+ ['Invalid', '2013-10-24', 'Management'],
42
+ ['Invalid', '2013-10-24', 'Accounting'],
43
+ ['Jane Doe', '2013-10-26', 'Sales'],
44
+ ]
45
+ end
46
+
47
+ it 'does not import those rows' do
48
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(2)
49
+ end
50
+
51
+ it 'notifies about each error' do
52
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
53
+ expect(importer).to receive(:row_error).twice
54
+ expect(importer).to receive(:row_success).twice
55
+ EmployeeImporter.import('/dummy/file')
56
+ end
57
+
58
+ it 'keeps track of each error' do
59
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
60
+ expect { EmployeeImporter.import('/dummy/file') }.to change(importer.row_errors, :count).by(2)
61
+ end
62
+ end
63
+
64
+ context 'when the import fails' do
65
+ let(:spreadsheet_data) do
66
+ [
67
+ ['Name', 'Birth Date'],
68
+ ['John Doe', '2013-10-25'],
69
+ ['Jane Doe', '2013-10-26'],
70
+ ]
71
+ end
72
+
73
+ it 'notifies the failure' do
74
+ expect_any_instance_of(EmployeeImporter).to receive(:import_failed)
75
+ EmployeeImporter.import('/dummy/file')
76
+ end
77
+ end
78
+
79
+ describe '.fetch_model' do
80
+ let(:model) { Employee.new }
81
+
82
+ it 'controls what model instance is loaded for each given row' do
83
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
84
+ expect(importer).to receive(:fetch_model).twice.and_return(model)
85
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(1)
86
+ end
87
+ end
88
+
89
+ describe '.hook' do
90
+ it 'allows the importer to modify the model for each row' do
91
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
92
+ expect(importer).to receive(:hook).twice
93
+ EmployeeImporter.import('/dummy/file')
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,23 @@
1
+ require 'active_importer'
2
+
3
+ # This file was generated by the `rspec --init` command. Conventionally, all
4
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
+ # Require this file using `require "spec_helper"` to ensure that it is only
6
+ # loaded once.
7
+ #
8
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
9
+ RSpec.configure do |config|
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ config.run_all_when_everything_filtered = true
12
+ config.filter_run :focus
13
+
14
+ config.expect_with :rspec do |c|
15
+ c.syntax = :expect
16
+ end
17
+
18
+ # Run specs in random order to surface order dependencies. If you find an
19
+ # order dependency and want to debug it, you can fix the order by providing
20
+ # the seed, which is printed after each run.
21
+ # --seed 1234
22
+ config.order = 'random'
23
+ end
@@ -0,0 +1,52 @@
1
+ class DataModel
2
+ @@count = 0
3
+
4
+ def self.count
5
+ @@count
6
+ end
7
+
8
+ attr_reader :errors
9
+
10
+ def initialize(attributes = {})
11
+ @new_record = true
12
+ @errors = []
13
+ attributes.each_pair do |key, value|
14
+ self[key] = value
15
+ end
16
+ end
17
+
18
+ def []=(field, value)
19
+ send("#{field}=", value)
20
+ end
21
+
22
+ def to_s
23
+ "#{self.class.name}(#{attributes})"
24
+ end
25
+
26
+ def save
27
+ if valid?
28
+ @@count += 1 if @new_record
29
+ @new_record = false
30
+ true
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ def save!
37
+ raise 'Invalid model' unless save
38
+ end
39
+
40
+ def new_record?
41
+ @new_record
42
+ end
43
+
44
+ def valid?
45
+ validate
46
+ @errors.empty?
47
+ end
48
+
49
+ def validate
50
+ # ...
51
+ end
52
+ end
@@ -0,0 +1,20 @@
1
+ require 'stubs/data_model'
2
+ require 'stubs/spreadsheet'
3
+
4
+ class Employee < DataModel
5
+ attr_accessor :name, :birth_date, :department, :department_id
6
+
7
+ def validate
8
+ @errors << 'Invalid name' if name == 'Invalid'
9
+ end
10
+ end
11
+
12
+ class EmployeeImporter < ActiveImporter::Base
13
+ imports Employee
14
+
15
+ column 'Name', :name
16
+ column 'Birth Date', :birth_date
17
+ column 'Department', :department_id do |value|
18
+ value.length # Quick dummy way to get an integer out of a string
19
+ end
20
+ end
@@ -0,0 +1,13 @@
1
+ class Spreadsheet
2
+ def initialize(data)
3
+ @data = data
4
+ end
5
+
6
+ def count
7
+ @data.count
8
+ end
9
+
10
+ def row(index)
11
+ @data[index-1]
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ernesto Garcia
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-11-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: roo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: Import tabular data from spreadsheets or similar sources into data models
79
+ email:
80
+ - gnapse@gmail.com
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - .rspec
87
+ - Gemfile
88
+ - LICENSE.txt
89
+ - README.md
90
+ - Rakefile
91
+ - active_importer.gemspec
92
+ - lib/active_importer.rb
93
+ - lib/active_importer/base.rb
94
+ - lib/active_importer/version.rb
95
+ - spec/active_importer/base_spec.rb
96
+ - spec/spec_helper.rb
97
+ - spec/stubs/data_model.rb
98
+ - spec/stubs/employee.rb
99
+ - spec/stubs/spreadsheet.rb
100
+ homepage: ''
101
+ licenses:
102
+ - MIT
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ! '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ segments:
114
+ - 0
115
+ hash: -168044513201808942
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ segments:
123
+ - 0
124
+ hash: -168044513201808942
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 1.8.23
128
+ signing_key:
129
+ specification_version: 3
130
+ summary: Import tabular data into data models
131
+ test_files:
132
+ - spec/active_importer/base_spec.rb
133
+ - spec/spec_helper.rb
134
+ - spec/stubs/data_model.rb
135
+ - spec/stubs/employee.rb
136
+ - spec/stubs/spreadsheet.rb