active_importer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in active_importer.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Ernesto Garcia
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # ActiveImporter
2
+
3
+ Define importers that load tabular data from spreadsheets or CSV files into any ActiveRecord-like ORM.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'active_importer'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install active_importer
18
+
19
+ ## Usage
20
+
21
+ Define classes that you instruct on how to import data into data models.
22
+
23
+ ```ruby
24
+ class EmployeeImporter < ActiveImporter::Base
25
+ imports Employee
26
+
27
+ column 'First name', :first_name
28
+ column 'Last name', :last_name
29
+ column 'Department', :department do |department_name|
30
+ Department.find_by(name: department_name)
31
+ end
32
+ end
33
+ ```
34
+
35
+ The importer defines what data model it imports data into, and how columns in
36
+ the data source map to fields in the model. Also, by providing a block, the
37
+ source value can be processed before being stored, as shown with the
38
+ 'Department' column in the example above.
39
+
40
+ Once defined, importers can be invoked to import a given data file.
41
+
42
+ ```ruby
43
+ EmployeeImporter.import('/path/to/file.xls')
44
+ ```
45
+
46
+ The data file is expected to contain columns with titles corresponding to the
47
+ columns declared. Any extra columns are ignored. Any errors while processing
48
+ the data file does not interrupt the whole process. Instead, errors are
49
+ notified via some callbacks defined in the importer (see below).
50
+
51
+ ### Callbacks
52
+
53
+ TODO: Document callbacks
54
+
55
+ ## Contributing
56
+
57
+ 1. Fork it
58
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
59
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
60
+ 4. Push to the branch (`git push origin my-new-feature`)
61
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'active_importer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "active_importer"
8
+ spec.version = ActiveImporter::VERSION
9
+ spec.authors = ["Ernesto Garcia"]
10
+ spec.email = ["gnapse@gmail.com"]
11
+ spec.description = %q{Import tabular data from spreadsheets or similar sources into data models}
12
+ spec.summary = %q{Import tabular data into data models}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "roo"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ end
@@ -0,0 +1,150 @@
1
+ require 'roo'
2
+
3
+ module ActiveImporter
4
+ class Base
5
+
6
+ #
7
+ # DSL and class variables
8
+ #
9
+
10
+ @model_class = nil
11
+ @columns = {}
12
+
13
+ def self.imports(klass)
14
+ @model_class = klass
15
+ end
16
+
17
+ def self.columns
18
+ @columns ||= {}
19
+ end
20
+
21
+ def self.model_class
22
+ @model_class
23
+ end
24
+
25
+ def model_class
26
+ self.class.model_class
27
+ end
28
+
29
+ def self.column(title, field, &block)
30
+ if columns[title]
31
+ raise "Duplicate importer column '#{title}'"
32
+ end
33
+ columns[title] = { field_name: field, transform: block }
34
+ end
35
+
36
+ def self.import(file, options = {})
37
+ new(file, options).import
38
+ end
39
+
40
+ #
41
+ # Implementation
42
+ #
43
+
44
+ attr_reader :header, :row, :model
45
+ attr_reader :row_count, :row_index
46
+ attr_reader :row_errors
47
+ attr_reader :context
48
+
49
+ def initialize(file, options = {})
50
+ @row_errors = []
51
+ @context = options.delete(:context)
52
+
53
+ @book = Roo::Spreadsheet.open(file, options)
54
+ @header = @book.row(1)
55
+ check_header
56
+
57
+ @data_row_indices = (2..@book.count)
58
+ @row_count = @data_row_indices.count
59
+ rescue => e
60
+ @book = @header = nil
61
+ @row_count = 0
62
+ @row_index = 1
63
+ import_failed(e.message)
64
+ end
65
+
66
+ def fetch_model
67
+ model_class.new
68
+ end
69
+
70
+ def import
71
+ return if @book.nil?
72
+ @data_row_indices.each do |index|
73
+ @row_index = index
74
+ @row = row_to_hash @book.row(index)
75
+ import_row
76
+ end
77
+ import_finished
78
+ end
79
+
80
+ def row_processed_count
81
+ row_index - 1
82
+ end
83
+
84
+ def row_success_count
85
+ row_processed_count - row_errors.count
86
+ end
87
+
88
+ def row_error_count
89
+ row_errors.count
90
+ end
91
+
92
+ def hook
93
+ end
94
+
95
+ def row_success
96
+ end
97
+
98
+ def row_error(error_message)
99
+ end
100
+
101
+ def import_failed(error_message)
102
+ end
103
+
104
+ def import_finished
105
+ end
106
+
107
+ private
108
+
109
+ def columns
110
+ self.class.columns
111
+ end
112
+
113
+ def check_header
114
+ # Header should contain all columns declared for this importer
115
+ unless columns.keys.all? { |item| @header.include?(item) }
116
+ raise 'Spreadsheet does not contain all the expected columns'
117
+ end
118
+ end
119
+
120
+ def import_row
121
+ @model = fetch_model
122
+ build_model
123
+ model.save!
124
+ row_success
125
+ rescue => e
126
+ @row_errors << { row_index: row_index, error_message: e.message }
127
+ row_error(e.message)
128
+ end
129
+
130
+ def build_model
131
+ row.each_pair do |key, value|
132
+ column_def = columns[key]
133
+ next if column_def.nil?
134
+ field_name = column_def[:field_name]
135
+ transform = column_def[:transform]
136
+ value = transform.call(value) if transform
137
+ model[field_name] = value
138
+ end
139
+ hook
140
+ end
141
+
142
+ def row_to_hash(row)
143
+ hash = {}
144
+ row.each_with_index do |value, index|
145
+ hash[@header[index]] = value
146
+ end
147
+ hash
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,3 @@
1
+ module ActiveImporter
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "active_importer/version"
2
+
3
+ module ActiveImporter
4
+ autoload :Base, 'active_importer/base'
5
+ end
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'stubs/employee'
3
+
4
+ describe ActiveImporter::Base do
5
+ let(:spreadsheet_data) do
6
+ [
7
+ ['Name', 'Birth Date', 'Department'],
8
+ ['John Doe', '2013-10-25', 'IT'],
9
+ ['Jane Doe', '2013-10-26', 'Sales'],
10
+ ]
11
+ end
12
+
13
+ let(:importer) { EmployeeImporter.new('/dummy/file') }
14
+
15
+ before do
16
+ expect(Roo::Spreadsheet).to receive(:open).and_return { Spreadsheet.new(spreadsheet_data) }
17
+ end
18
+
19
+ it 'imports all data from the spreadsheet into the model' do
20
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(2)
21
+ end
22
+
23
+ it 'notifies when each row has been imported successfully' do
24
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
25
+ expect(importer).not_to receive(:row_error)
26
+ expect(importer).to receive(:row_success).twice
27
+ EmployeeImporter.import('/dummy/file')
28
+ end
29
+
30
+ it 'notifies when the import process has finished' do
31
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
32
+ expect(importer).to receive(:import_finished).once
33
+ EmployeeImporter.import('/dummy/file')
34
+ end
35
+
36
+ context 'when there are rows with errors' do
37
+ let(:spreadsheet_data) do
38
+ [
39
+ ['Name', 'Birth Date', 'Department'],
40
+ ['John Doe', '2013-10-25', 'IT'],
41
+ ['Invalid', '2013-10-24', 'Management'],
42
+ ['Invalid', '2013-10-24', 'Accounting'],
43
+ ['Jane Doe', '2013-10-26', 'Sales'],
44
+ ]
45
+ end
46
+
47
+ it 'does not import those rows' do
48
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(2)
49
+ end
50
+
51
+ it 'notifies about each error' do
52
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
53
+ expect(importer).to receive(:row_error).twice
54
+ expect(importer).to receive(:row_success).twice
55
+ EmployeeImporter.import('/dummy/file')
56
+ end
57
+
58
+ it 'keeps track of each error' do
59
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
60
+ expect { EmployeeImporter.import('/dummy/file') }.to change(importer.row_errors, :count).by(2)
61
+ end
62
+ end
63
+
64
+ context 'when the import fails' do
65
+ let(:spreadsheet_data) do
66
+ [
67
+ ['Name', 'Birth Date'],
68
+ ['John Doe', '2013-10-25'],
69
+ ['Jane Doe', '2013-10-26'],
70
+ ]
71
+ end
72
+
73
+ it 'notifies the failure' do
74
+ expect_any_instance_of(EmployeeImporter).to receive(:import_failed)
75
+ EmployeeImporter.import('/dummy/file')
76
+ end
77
+ end
78
+
79
+ describe '.fetch_model' do
80
+ let(:model) { Employee.new }
81
+
82
+ it 'controls what model instance is loaded for each given row' do
83
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
84
+ expect(importer).to receive(:fetch_model).twice.and_return(model)
85
+ expect { EmployeeImporter.import('/dummy/file') }.to change(Employee, :count).by(1)
86
+ end
87
+ end
88
+
89
+ describe '.hook' do
90
+ it 'allows the importer to modify the model for each row' do
91
+ expect(EmployeeImporter).to receive(:new).once.and_return(importer)
92
+ expect(importer).to receive(:hook).twice
93
+ EmployeeImporter.import('/dummy/file')
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,23 @@
1
+ require 'active_importer'
2
+
3
+ # This file was generated by the `rspec --init` command. Conventionally, all
4
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
5
+ # Require this file using `require "spec_helper"` to ensure that it is only
6
+ # loaded once.
7
+ #
8
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
9
+ RSpec.configure do |config|
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ config.run_all_when_everything_filtered = true
12
+ config.filter_run :focus
13
+
14
+ config.expect_with :rspec do |c|
15
+ c.syntax = :expect
16
+ end
17
+
18
+ # Run specs in random order to surface order dependencies. If you find an
19
+ # order dependency and want to debug it, you can fix the order by providing
20
+ # the seed, which is printed after each run.
21
+ # --seed 1234
22
+ config.order = 'random'
23
+ end
@@ -0,0 +1,52 @@
1
+ class DataModel
2
+ @@count = 0
3
+
4
+ def self.count
5
+ @@count
6
+ end
7
+
8
+ attr_reader :errors
9
+
10
+ def initialize(attributes = {})
11
+ @new_record = true
12
+ @errors = []
13
+ attributes.each_pair do |key, value|
14
+ self[key] = value
15
+ end
16
+ end
17
+
18
+ def []=(field, value)
19
+ send("#{field}=", value)
20
+ end
21
+
22
+ def to_s
23
+ "#{self.class.name}(#{attributes})"
24
+ end
25
+
26
+ def save
27
+ if valid?
28
+ @@count += 1 if @new_record
29
+ @new_record = false
30
+ true
31
+ else
32
+ false
33
+ end
34
+ end
35
+
36
+ def save!
37
+ raise 'Invalid model' unless save
38
+ end
39
+
40
+ def new_record?
41
+ @new_record
42
+ end
43
+
44
+ def valid?
45
+ validate
46
+ @errors.empty?
47
+ end
48
+
49
+ def validate
50
+ # ...
51
+ end
52
+ end
@@ -0,0 +1,20 @@
1
+ require 'stubs/data_model'
2
+ require 'stubs/spreadsheet'
3
+
4
+ class Employee < DataModel
5
+ attr_accessor :name, :birth_date, :department, :department_id
6
+
7
+ def validate
8
+ @errors << 'Invalid name' if name == 'Invalid'
9
+ end
10
+ end
11
+
12
+ class EmployeeImporter < ActiveImporter::Base
13
+ imports Employee
14
+
15
+ column 'Name', :name
16
+ column 'Birth Date', :birth_date
17
+ column 'Department', :department_id do |value|
18
+ value.length # Quick dummy way to get an integer out of a string
19
+ end
20
+ end
@@ -0,0 +1,13 @@
1
+ class Spreadsheet
2
+ def initialize(data)
3
+ @data = data
4
+ end
5
+
6
+ def count
7
+ @data.count
8
+ end
9
+
10
+ def row(index)
11
+ @data[index-1]
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_importer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ernesto Garcia
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-11-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: roo
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: bundler
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '1.3'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '1.3'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rake
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: Import tabular data from spreadsheets or similar sources into data models
79
+ email:
80
+ - gnapse@gmail.com
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - .rspec
87
+ - Gemfile
88
+ - LICENSE.txt
89
+ - README.md
90
+ - Rakefile
91
+ - active_importer.gemspec
92
+ - lib/active_importer.rb
93
+ - lib/active_importer/base.rb
94
+ - lib/active_importer/version.rb
95
+ - spec/active_importer/base_spec.rb
96
+ - spec/spec_helper.rb
97
+ - spec/stubs/data_model.rb
98
+ - spec/stubs/employee.rb
99
+ - spec/stubs/spreadsheet.rb
100
+ homepage: ''
101
+ licenses:
102
+ - MIT
103
+ post_install_message:
104
+ rdoc_options: []
105
+ require_paths:
106
+ - lib
107
+ required_ruby_version: !ruby/object:Gem::Requirement
108
+ none: false
109
+ requirements:
110
+ - - ! '>='
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ segments:
114
+ - 0
115
+ hash: -168044513201808942
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ segments:
123
+ - 0
124
+ hash: -168044513201808942
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 1.8.23
128
+ signing_key:
129
+ specification_version: 3
130
+ summary: Import tabular data into data models
131
+ test_files:
132
+ - spec/active_importer/base_spec.rb
133
+ - spec/spec_helper.rb
134
+ - spec/stubs/data_model.rb
135
+ - spec/stubs/employee.rb
136
+ - spec/stubs/spreadsheet.rb