spreadsheet_import 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +160 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/spreadsheet_import.rb +20 -0
- data/lib/spreadsheet_import/data_processor/base_processor.rb +64 -0
- data/lib/spreadsheet_import/hash_extension.rb +9 -0
- data/lib/spreadsheet_import/importer/active_record_importer/base_importer.rb +91 -0
- data/lib/spreadsheet_import/importer/active_record_importer/bulk_importer.rb +41 -0
- data/lib/spreadsheet_import/importer/base_importer.rb +28 -0
- data/lib/spreadsheet_import/reader/base_reader.rb +14 -0
- data/lib/spreadsheet_import/reader/simple_reader.rb +44 -0
- data/lib/spreadsheet_import/version.rb +3 -0
- data/spreadsheet_import.gemspec +35 -0
- metadata +167 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: be38eaab1623d6677bc61474eedea9151cda8847
|
4
|
+
data.tar.gz: 1d34a83a16f3d9cc1d666d107d90f558245f6347
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bee3ede308bf899953dd622b06521a15fb52ce5ae3b0a0b537c5b9e9500264242a995501f233ba14ea27c470ff8ef34292536935ef4fcc76b1c7f37e89653699
|
7
|
+
data.tar.gz: 8613dc69e808d506f4a08c849283490cfa558afef73bed0ad368dc826dbd79a9aedb56a2e702aca4b26d2da0d21daad9b6c8dc3e25e4b07626396a826ef53850
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, and in the interest of
|
4
|
+
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
+
contribute through reporting issues, posting feature requests, updating
|
6
|
+
documentation, submitting pull requests or patches, and other activities.
|
7
|
+
|
8
|
+
We are committed to making participation in this project a harassment-free
|
9
|
+
experience for everyone, regardless of level of experience, gender, gender
|
10
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
+
body size, race, ethnicity, age, religion, or nationality.
|
12
|
+
|
13
|
+
Examples of unacceptable behavior by participants include:
|
14
|
+
|
15
|
+
* The use of sexualized language or imagery
|
16
|
+
* Personal attacks
|
17
|
+
* Trolling or insulting/derogatory comments
|
18
|
+
* Public or private harassment
|
19
|
+
* Publishing other's private information, such as physical or electronic
|
20
|
+
addresses, without explicit permission
|
21
|
+
* Other unethical or unprofessional conduct
|
22
|
+
|
23
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
24
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
25
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
26
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
27
|
+
threatening, offensive, or harmful.
|
28
|
+
|
29
|
+
By adopting this Code of Conduct, project maintainers commit themselves to
|
30
|
+
fairly and consistently applying these principles to every aspect of managing
|
31
|
+
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
+
Conduct may be permanently removed from the project team.
|
33
|
+
|
34
|
+
This code of conduct applies both within project spaces and in public spaces
|
35
|
+
when an individual is representing the project or its community.
|
36
|
+
|
37
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
+
reported by contacting a project maintainer at rohan.pujari@kiprosh.com. All
|
39
|
+
complaints will be reviewed and investigated and will result in a response that
|
40
|
+
is deemed necessary and appropriate to the circumstances. Maintainers are
|
41
|
+
obligated to maintain confidentiality with regard to the reporter of an
|
42
|
+
incident.
|
43
|
+
|
44
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
+
version 1.3.0, available at
|
46
|
+
[http://contributor-covenant.org/version/1/3/0/][version]
|
47
|
+
|
48
|
+
[homepage]: http://contributor-covenant.org
|
49
|
+
[version]: http://contributor-covenant.org/version/1/3/0/
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Rohan Pujari
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
# SpreadsheetImport
|
2
|
+
|
3
|
+
SpreadsheetImport gem allows importing data from csv, xls, xls, xlsx and ods file to database.
|
4
|
+
SpreadsheetImport contains 3 part (Reader, Processor, Importer). Each part depends on seprate gems and you can replace any part without effecting other part.
|
5
|
+
|
6
|
+
- Reader: Reads data from spreadsheet file. Following class are used for this functionality.
|
7
|
+
* SpreadsheetImport::BaseReader: Base class of every Reader class.
|
8
|
+
* SpreadsheetImport::SimpleReader: Inherits from base reader. Provides `each_row(required_columns)` method. This class depends on simple-spreadsheet gem https://github.com/zenkay/simple-spreadsheet.
|
9
|
+
|
10
|
+
- Processor: Processes the data read by Reader. SpreadsheetImport gem defines one processor
|
11
|
+
* SpreadsheetImport::BaseProcessor: BaseProcessor contains various hooks to process and validate data provided by Reader.
|
12
|
+
|
13
|
+
- Importer: Responsible for importing data recieved from data processor to database. SpreadsheetImport gem provides following importor.
|
14
|
+
* SpreadsheetImport::BaseImporter: Base class of all importer
|
15
|
+
* SpreadsheetImport::ActiveRecordImporter::BaseImporter: Supports import using activerecord. Depends on activerecord gem.
|
16
|
+
* SpreadsheetImport::ActiveRecordImporter::BulkImporter: Supports import using activerecord-import gem(https://github.com/zdennis/activerecord-import)
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
Add this line to your application's Gemfile:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
gem 'spreadsheet_import'
|
24
|
+
gem 'simple-spreadsheet' # if want to use SpreadsheetImport::SimpleReader
|
25
|
+
gem 'activerecord' # if want to use any impoter in SpreadsheetImport::ActiveRecordImporter
|
26
|
+
gem 'activerecord-import' # if want to use SpreadsheetImport::ActiveRecordImporter::BulkImporter
|
27
|
+
```
|
28
|
+
|
29
|
+
And then execute:
|
30
|
+
|
31
|
+
$ bundle
|
32
|
+
|
33
|
+
## Usage
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
SpreadsheetImport.import(file_url, mapping, model, options)
|
37
|
+
```
|
38
|
+
- file_url: url of csv file
|
39
|
+
- mapping: database column name to spreadsheet column number mapping hash.
|
40
|
+
|
41
|
+
Example:
|
42
|
+
|
43
|
+
{city: 1, tax_rate: 3}
|
44
|
+
|
45
|
+
city and tax_rate are column name in database. 1, 3 are column number in spreadsheet file.
|
46
|
+
- model: model class for which spreadsheet needs to be imported
|
47
|
+
- options: Hash of options
|
48
|
+
* reader: Custom reader class object
|
49
|
+
* data_processor: Custom data processor class object
|
50
|
+
* importer: Custom importer class object
|
51
|
+
|
52
|
+
## Details
|
53
|
+
- SpreadsheetImport::BaseReader:
|
54
|
+
|
55
|
+
Class inheriting from BaseReader should define `each_row(required_columns)` method. Method should accept column position as array and yield value of those columns as array for each spreadsheet row. It is upto you to implement this functionality anyway you want. BaseReader also accepts start_row(row from where reader should start reading) and end_row(row at which reader should end reading) as option while intiantiating reader. So you can also handle start_row and end_row option in `each_row` method.
|
56
|
+
|
57
|
+
- SpreadsheetImport::SimpleReader:
|
58
|
+
|
59
|
+
If you want to use this reader add simple-spreadshee gem to your project. If you dont want to use simple spreadsheet gem or have any alternative gem to read spreadsheet file then you can create your own reader class by inheriting from BaseReader. You can call all simple-spreadsheet gem method on SimpleReader object.
|
60
|
+
|
61
|
+
Example:
|
62
|
+
```ruby
|
63
|
+
reader = SimpleReader('file/abc.xls', start_row: 2, end_row: 30)
|
64
|
+
reader.cell(1, 2) # this is simple-spreadsheet gem method
|
65
|
+
```
|
66
|
+
|
67
|
+
Note: SpreadsheetImport::SimpleReader includes Enumerable module. So you can use enumerable functions like each, select etc
|
68
|
+
|
69
|
+
- Spreadsheetimport::BaseProcessor:
|
70
|
+
|
71
|
+
If you want to process data before importing it to db then create a new processor class which inherits from BaseProcessor.
|
72
|
+
BaseProcessor constructor accepts following parameters
|
73
|
+
1) reader: Reader class object
|
74
|
+
2) mapping: database column name to spreadsheet column number mapping hash
|
75
|
+
3) options: Following options are supported row_processor, row_validator and only_extract_valid_rows.
|
76
|
+
* row_processor: Accepts a class which would be responsible for processing rows comming from `each_row` method of reader. Row Processor class should define `process` method. `process` method will called with unprocessed_row and current instance of data processor as arguments. unprocessed_row is in following format
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
{db_column_name1: value_from_spreadsheet, db_column_name1: value_from_spreadsheet}`.
|
80
|
+
```
|
81
|
+
|
82
|
+
`process` method should return hash in below format.
|
83
|
+
|
84
|
+
```ruby
|
85
|
+
{db_column_name1: processed_value, db_column_name1: processed_value}`
|
86
|
+
```
|
87
|
+
|
88
|
+
There is alternative to process data other then providing row_processor option. You can inherit from BaseProcessor and add `process_row_before_import` method to your class. This method will recieve unprocessed_row and you should return processed row from it.
|
89
|
+
* row_validator: Accepts a class which is responsible for validating a processed row. Row validator class should define `validate` method. This method is called with processed row returned from `process` method of row processor or `process_row_before_import` method and current instance of data processor as second argument. `validate` method should return true or false.
|
90
|
+
There is alternative to validate data other then providing row_validatir option. You can inherit from BaseProcessor and add `valid_row_for_import?` method to your class. This method will recieve processed_row and you should return return true or false.
|
91
|
+
* only_extract_valid_rows: If true `spreadsheet_rows` method will yield on rows that are valid otherwise it will yield each row. Row is valid or not is decided either by row_validator class `validate` method or by `valid_row_for_import?` method.
|
92
|
+
|
93
|
+
Example:
|
94
|
+
|
95
|
+
Consider below csv file
|
96
|
+
```
|
97
|
+
city,county,tax rate,
|
98
|
+
Palm Desert,Some county, 0.2,
|
99
|
+
Marine Corps*,Some county 2,0.9,
|
100
|
+
```
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
# Custome data processor
|
104
|
+
class TaxFileProcessor < SpreadsheetImport::BaseProcessor
|
105
|
+
def process_row_before_import(row)
|
106
|
+
city = row[:city]
|
107
|
+
if city.present?
|
108
|
+
city = city.gsub(/[^a-zA-Z0-9\s]/,'').strip # remove special character and remove spaces from start and end
|
109
|
+
end
|
110
|
+
{ city: city, tax_rate: row[:tax_rate].to_f * 100 }
|
111
|
+
end
|
112
|
+
|
113
|
+
def valid_row_for_import?(row)
|
114
|
+
row[:city].present?
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
reader = SimpleReader('file/abc.csv, start_row: 2)
|
119
|
+
data_processor = TaxFileProcessor.new(reader, {city: 1, tax_rate: 3}, only_extract_valid_rows: true)
|
120
|
+
data_processor.spreadsheet_rows {|row| puts row}
|
121
|
+
# o/p
|
122
|
+
# { city: 'Palm Desert', tax_rate: 20.0 }
|
123
|
+
# { city: 'Marine Corps', tax_rate: 90.0 }
|
124
|
+
```
|
125
|
+
Note: SpreadsheetImport::BaseProcessor includes Enumerable module. So you can use enumerable functions like each, select etc
|
126
|
+
|
127
|
+
- SpreadsheetImport::BaseImporter: It does not implement any functionality. It just specifies some hooks that its subclass should implement. SpreadsheetImport::BaseImporter constructor accepts model and options as argument. It accepts only one option i.e data_processor object. It uses data processor `spreadsheet_rows` method which yields processed_row and valid(true or false) to call either handle_valid_row or handle_invalid_row method. By default handle_valid_row calls `create_or_update_record(row)` method. `handle_invalid_row` is blank and you can define it in you subclass if you want to handle_invalid_rows. This may be helpful in case you want to log invalid rows from csv somewhere.
|
128
|
+
|
129
|
+
- SpreadsheetImport::ActiveRecordImporter::BaseImporter: Constructor accepts following arguments
|
130
|
+
* model: ActiveRecord model class to which we want to import data
|
131
|
+
* options
|
132
|
+
* data_processor: Data processor object
|
133
|
+
* skip_validations: Skips validation if true. false by default
|
134
|
+
* skip_callbacks: Skips callback if true. false by default
|
135
|
+
* unique_by_attributes: accepts list of attribute which should be unique. If its not unique then new record won't be inserted. Decision to whether update existing record is made by update_existing_record option. For finding already existing record where query is fired which does case sensitive comparision between attribute from spreedsheet and db. If you want to add some custom implementation then create your own importer which inherits from SpreadsheetImport::ActiveRecordImporter::BaseImporter and define `find_duplicate_for_unique_by_attributes(data)` method. find_duplicate_for_unique_by_attributes will be called with data(processed_row). It should return ActiveRecord::Relation object or model object or array of model object. Currently it has following implementation `scoped_model.where(data.slice(*unique_by_attributes))`.
|
136
|
+
|
137
|
+
Note: Always use `scoped_model` method to call any ActiveRecord method
|
138
|
+
* update_existing_record: If true will update the existing record. true by default if unique_by_attributes is present.
|
139
|
+
* scoped_unique: class method or scope name as symbol. This scope is applied for finding record which are already present in database.
|
140
|
+
|
141
|
+
It defines `create_or_update_record` method called by `handle_valid_row`.
|
142
|
+
|
143
|
+
- SpreadsheetImport::ActiveRecordImporter::BulkImporter: This importer utilizes activerecord-import gem. Its faster than SpreadsheetImport::ActiveRecordImporter::BaseImporter. It is recomended for large spreedsheet files. ActiveRecord callbacks will not be called when BulkImporter is used. It inherits from SpreadsheetImport::ActiveRecordImporter::BaseImporter, so its constructor supports same argument. Passing skip_callbacks as true or false doesn't have any effect and callbacks will always be skipped. This is a penalty that you have to pay for performance :). It also support addition option of batch_size. It is the number of records that would be inserted at once.
|
144
|
+
Default batch size is 100.
|
145
|
+
|
146
|
+
## Development
|
147
|
+
|
148
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
149
|
+
|
150
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
151
|
+
|
152
|
+
## Contributing
|
153
|
+
|
154
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/spreadsheet_import. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
155
|
+
|
156
|
+
|
157
|
+
## License
|
158
|
+
|
159
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
160
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "spreadsheet_import"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spreadsheet_import/version'
|
2
|
+
|
3
|
+
module SpreadsheetImport
|
4
|
+
autoload :BaseProcessor, 'spreadsheet_import/data_processor/base_processor'
|
5
|
+
autoload :BaseReader, 'spreadsheet_import/reader/base_reader'
|
6
|
+
autoload :SimpleReader, 'spreadsheet_import/reader/simple_reader'
|
7
|
+
autoload :BaseImporter, 'spreadsheet_import/importer/base_importer'
|
8
|
+
|
9
|
+
module ActiveRecordImporter
|
10
|
+
autoload :BaseImporter, 'spreadsheet_import/importer/active_record_importer/base_importer'
|
11
|
+
autoload :BulkImporter, 'spreadsheet_import/importer/active_record_importer/bulk_importer'
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.import(file_url, mapping, model, options = {})
|
15
|
+
reader = options[:reader] || SimpleReader.new(file_url, start_row: 2)
|
16
|
+
data_processor = options[:data_processor] || BaseProcessor.new(reader, mapping)
|
17
|
+
(options[:importer] || ActiveRecordImporter::BaseImporter)
|
18
|
+
.new(model, data_processor).import
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module SpreadsheetImport
|
2
|
+
class BaseProcessor
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :reader, :mapping, :row_processor, :row_validator,
|
6
|
+
:only_extract_valid_rows
|
7
|
+
|
8
|
+
def initialize(reader, mapping, options = {})
|
9
|
+
@reader = reader
|
10
|
+
@mapping = mapping
|
11
|
+
@row_processor = options[:row_processor]
|
12
|
+
@row_validator = options[:row_validator]
|
13
|
+
@only_extract_valid_rows = options[:only_extract_valid_rows]
|
14
|
+
end
|
15
|
+
|
16
|
+
def spreadsheet_rows
|
17
|
+
reader.each_row(mapping.values) do |row|
|
18
|
+
processed_row = process_row(row)
|
19
|
+
valid_row = valid_row?(processed_row)
|
20
|
+
if only_extract_valid_rows
|
21
|
+
valid_row && yield(processed_row, true)
|
22
|
+
else
|
23
|
+
yield(processed_row, valid_row)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
alias_method :each, :spreadsheet_rows
|
28
|
+
|
29
|
+
protected
|
30
|
+
|
31
|
+
def unprocessed_row(row)
|
32
|
+
{}.tap do |attributes|
|
33
|
+
mapping.keys.each_with_index do |column_name, index|
|
34
|
+
attributes[column_name] = row[index]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def process_row(row)
|
40
|
+
unprocessed_row = unprocessed_row(row)
|
41
|
+
if row_processor
|
42
|
+
row_processor.process(unprocessed_row, self)
|
43
|
+
else
|
44
|
+
process_row_before_import(unprocessed_row)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def valid_row?(row)
|
49
|
+
if row_validator
|
50
|
+
row_validator.validate(row, self)
|
51
|
+
else
|
52
|
+
valid_row_for_import?(row)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def process_row_before_import(row)
|
57
|
+
row
|
58
|
+
end
|
59
|
+
|
60
|
+
def valid_row_for_import?(_row)
|
61
|
+
true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module SpreadsheetImport
|
2
|
+
module ActiveRecordImporter
|
3
|
+
class BaseImporter < SpreadsheetImport::BaseImporter
|
4
|
+
CALLBACKS_TO_SKIP = [:validation, :save, :create, :update, :commit]
|
5
|
+
|
6
|
+
attr_reader :skip_validations, :skip_callbacks, :unique_by_attributes,
|
7
|
+
:update_existing_record, :scoped_unique
|
8
|
+
|
9
|
+
def initialize(model, data_processor, options ={})
|
10
|
+
super
|
11
|
+
@unique_by_attributes = options[:unique_by_attributes]
|
12
|
+
@update_existing_record = if options[:update_existing_record].nil?
|
13
|
+
!unique_by_attributes.nil?
|
14
|
+
else
|
15
|
+
options[:update_existing_record]
|
16
|
+
end
|
17
|
+
@scoped_unique = options[:scoped_unique]
|
18
|
+
@skip_validations = options[:skip_validations]
|
19
|
+
@skip_callbacks = options[:skip_callbacks]
|
20
|
+
end
|
21
|
+
|
22
|
+
def find_duplicate_for_unique_by_attributes(data)
|
23
|
+
scoped_model.where(data.slice(*unique_by_attributes))
|
24
|
+
end
|
25
|
+
|
26
|
+
def create_or_update_record(data)
|
27
|
+
if unique_by_attributes
|
28
|
+
if update_existing_record
|
29
|
+
duplicate_records = find_duplicate_for_unique_by_attributes(data)
|
30
|
+
duplicate_records.present? ? update_record(duplicate_records, data) : create_record(data)
|
31
|
+
end
|
32
|
+
else
|
33
|
+
create_record(data)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def create_record(data)
|
38
|
+
record = anoymous_model.new(data)
|
39
|
+
unless record.save
|
40
|
+
handle_validation_failure(record, data)
|
41
|
+
end
|
42
|
+
record
|
43
|
+
end
|
44
|
+
|
45
|
+
def update_record(records, data)
|
46
|
+
update_only_if_data_changed(records, data)
|
47
|
+
end
|
48
|
+
|
49
|
+
def update_only_if_data_changed(records, data)
|
50
|
+
records = [records] if records.is_a?(ActiveRecord::Base)
|
51
|
+
records.each do |record|
|
52
|
+
if data.any? { |name, value| record.read_attribute(name) != value }
|
53
|
+
record.update_attributes(data)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def handle_validation_failure(record, data); end
|
61
|
+
|
62
|
+
def scoped_model
|
63
|
+
scoped_unique ? anoymous_model.send(scoped_unique) : anoymous_model
|
64
|
+
end
|
65
|
+
|
66
|
+
def anoymous_model
|
67
|
+
@anoymous_model ||= if skip_validations && skip_callbacks
|
68
|
+
table_name = model.table_name
|
69
|
+
Class.new(ActiveRecord::Base) { self.table_name = table_name }
|
70
|
+
elsif skip_validations
|
71
|
+
Class.new(model) do
|
72
|
+
def self.name
|
73
|
+
"#{superclass.name}NoValidation"
|
74
|
+
end
|
75
|
+
reset_callbacks :validate
|
76
|
+
reset_callbacks :validation
|
77
|
+
end
|
78
|
+
elsif skip_callbacks
|
79
|
+
Class.new(model) do
|
80
|
+
def self.name
|
81
|
+
"#{superclass.name}NoCallback"
|
82
|
+
end
|
83
|
+
CALLBACKS_TO_SKIP.each { |name| reset_callbacks(name) }
|
84
|
+
end
|
85
|
+
else
|
86
|
+
model
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'activerecord-import'
|
2
|
+
|
3
|
+
module SpreadsheetImport
|
4
|
+
module ActiveRecordImporter
|
5
|
+
class BulkImporter < BaseImporter
|
6
|
+
attr_reader :batch_size
|
7
|
+
|
8
|
+
def initialize(model, data_processor, options = {})
|
9
|
+
super(model, data_processor, options.merge!(skip_callbacks: true))
|
10
|
+
@batch_size = options[:batch_size] || 100
|
11
|
+
@batch = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def import
|
15
|
+
super
|
16
|
+
!@batch.length.zero? && execute_batch
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_record(data)
|
20
|
+
if unique_by_attributes.nil? || unique_in_batch?(data)
|
21
|
+
@batch << data
|
22
|
+
batch_size == @batch.length && execute_batch
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def unique_in_batch?(data)
|
27
|
+
@batch.find do |batch_record|
|
28
|
+
unique_by_attributes.all? do |attr_name|
|
29
|
+
data[attr_name] == batch_record[attr_name]
|
30
|
+
end
|
31
|
+
end.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
def execute_batch
|
35
|
+
model.import(data_processor.mapping.keys,
|
36
|
+
@batch.map(&:values), validate: !skip_validations)
|
37
|
+
@batch = []
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module SpreadsheetImport
|
2
|
+
class BaseImporter
|
3
|
+
attr_reader :model, :data_processor
|
4
|
+
|
5
|
+
def initialize(model, data_processor, options = {})
|
6
|
+
@model = model
|
7
|
+
@data_processor = data_processor
|
8
|
+
end
|
9
|
+
|
10
|
+
def import
|
11
|
+
data_processor.spreadsheet_rows do |row, valid|
|
12
|
+
if valid
|
13
|
+
handle_valid_row(row)
|
14
|
+
else
|
15
|
+
handle_invalid_row(row)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
|
22
|
+
def handle_valid_row(row)
|
23
|
+
create_or_update_record(row)
|
24
|
+
end
|
25
|
+
|
26
|
+
def handle_invalid_row(row); end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module SpreadsheetImport
|
2
|
+
class BaseReader
|
3
|
+
attr_reader :file_url, :spreadsheet, :start_row, :end_row
|
4
|
+
|
5
|
+
def initialize(file_url, options = {})
|
6
|
+
@file_url = file_url
|
7
|
+
@start_row = options[:start_row] || 1
|
8
|
+
end
|
9
|
+
|
10
|
+
def each_row(_)
|
11
|
+
raise 'each_row method must be implemented by SpreadsheetLoader::Reader subclass'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'simple-spreadsheet'
|
2
|
+
|
3
|
+
module SpreadsheetImport
|
4
|
+
class SimpleReader < BaseReader
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
def initialize(file_url, options = {})
|
8
|
+
super
|
9
|
+
@spreadsheet = SimpleSpreadsheet::Workbook.read(file_url)
|
10
|
+
@end_row = options[:end_row] || spreadsheet.last_row
|
11
|
+
end
|
12
|
+
|
13
|
+
def row_range
|
14
|
+
(start_row..end_row)
|
15
|
+
end
|
16
|
+
|
17
|
+
def default_required_columns
|
18
|
+
spreadsheet.first_column.upto(spreadsheet.last_column)
|
19
|
+
end
|
20
|
+
|
21
|
+
def each_row(required_columns = default_required_columns)
|
22
|
+
row_range.each do |row|
|
23
|
+
entire_row = required_columns.each_with_object([]) do |col, acc|
|
24
|
+
acc << spreadsheet.cell(row, col)
|
25
|
+
end
|
26
|
+
yield entire_row
|
27
|
+
end
|
28
|
+
end
|
29
|
+
alias_method :each, :each_row
|
30
|
+
|
31
|
+
# delegate all methods to SimpleSpreadsheet gem
|
32
|
+
def method_missing(method, *args)
|
33
|
+
if spreadsheet.respond_to?(method)
|
34
|
+
spreadsheet.public_send(method, *args)
|
35
|
+
else
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def respond_to_missing?(method_name, include_private = false)
|
41
|
+
spreadsheet.respond_to?(method_name, include_private) || super
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'spreadsheet_import/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "spreadsheet_import"
|
8
|
+
spec.version = SpreadsheetImport::VERSION
|
9
|
+
spec.authors = ["Rohan Pujari"]
|
10
|
+
spec.email = ["rohanpujaris@gmail.com"]
|
11
|
+
spec.summary = %q{
|
12
|
+
Import csv, xls, xls, xlsx and ods file directly to database.
|
13
|
+
Supports bulk update via activerecord-import gem
|
14
|
+
}
|
15
|
+
spec.description = %q{
|
16
|
+
Import spreadsheet directly to database.
|
17
|
+
Supports simple import as well as bulk import.
|
18
|
+
Bulk import used activerecord-import gem.
|
19
|
+
Options to unique record import, updating of duplicate record,
|
20
|
+
skipping_callback and skipping validation.
|
21
|
+
}
|
22
|
+
spec.homepage = "https://github.com/rohanpujaris/spreadsheet_import"
|
23
|
+
spec.license = "MIT"
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
spec.add_development_dependency "bundler"
|
29
|
+
spec.add_development_dependency "rake"
|
30
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
31
|
+
spec.add_development_dependency "database_cleaner"
|
32
|
+
spec.add_development_dependency "simple-spreadsheet", ">= 0.1"
|
33
|
+
spec.add_development_dependency "activerecord-import", ">= 0.2.3"
|
34
|
+
spec.add_development_dependency "sqlite3"
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: spreadsheet_import
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rohan Pujari
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-06-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: database_cleaner
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simple-spreadsheet
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.1'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.1'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activerecord-import
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.2.3
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.2.3
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: sqlite3
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: "\n Import spreadsheet directly to database.\n
|
112
|
+
\ Supports simple import as well as bulk import.\n Bulk
|
113
|
+
import used activerecord-import gem.\n Options to unique
|
114
|
+
record import, updating of duplicate record,\n skipping_callback
|
115
|
+
and skipping validation.\n "
|
116
|
+
email:
|
117
|
+
- rohanpujaris@gmail.com
|
118
|
+
executables: []
|
119
|
+
extensions: []
|
120
|
+
extra_rdoc_files: []
|
121
|
+
files:
|
122
|
+
- ".gitignore"
|
123
|
+
- ".rspec"
|
124
|
+
- ".travis.yml"
|
125
|
+
- CODE_OF_CONDUCT.md
|
126
|
+
- Gemfile
|
127
|
+
- LICENSE.txt
|
128
|
+
- README.md
|
129
|
+
- Rakefile
|
130
|
+
- bin/console
|
131
|
+
- bin/setup
|
132
|
+
- lib/spreadsheet_import.rb
|
133
|
+
- lib/spreadsheet_import/data_processor/base_processor.rb
|
134
|
+
- lib/spreadsheet_import/hash_extension.rb
|
135
|
+
- lib/spreadsheet_import/importer/active_record_importer/base_importer.rb
|
136
|
+
- lib/spreadsheet_import/importer/active_record_importer/bulk_importer.rb
|
137
|
+
- lib/spreadsheet_import/importer/base_importer.rb
|
138
|
+
- lib/spreadsheet_import/reader/base_reader.rb
|
139
|
+
- lib/spreadsheet_import/reader/simple_reader.rb
|
140
|
+
- lib/spreadsheet_import/version.rb
|
141
|
+
- spreadsheet_import.gemspec
|
142
|
+
homepage: https://github.com/rohanpujaris/spreadsheet_import
|
143
|
+
licenses:
|
144
|
+
- MIT
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.4.8
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: Import csv, xls, xls, xlsx and ods file directly to database. Supports bulk
|
166
|
+
update via activerecord-import gem
|
167
|
+
test_files: []
|