bulk_importer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +77 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/bulk_importer.gemspec +26 -0
- data/lib/bulk_importer/postgresql_module.rb +137 -0
- data/lib/bulk_importer/version.rb +3 -0
- data/lib/bulk_importer.rb +298 -0
- metadata +112 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 3a3981e26851ce310caa14a2fc257e9c651eccad
|
|
4
|
+
data.tar.gz: 27cbfe3409574d837f8af6fae3509c71def416bb
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: a25b6f749c6f54136edc8697d25dc3a5b270630f9f38a064fbb83a606038b4d45ac3a8b0f212831e2b4e5c85ebc461a6a19433181c7b3b8c1e1178c188347786
|
|
7
|
+
data.tar.gz: 7ff8e23b0af3ab71d61c8a8a18b972dc416f6bb9639985c54c8872deb23772326017c21faf7cd2a001f9ca8a8c5a7904140f037daa189f9756eb833b040d0c45
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016 Abel M. Osorio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# BulkImporter
|
|
2
|
+
|
|
3
|
+
This gem allows you to import a big amount of data from files to your database as quick as possible.
|
|
4
|
+
|
|
5
|
+
Supose you have a CSV file with 500k+ rows. Or even better: 2kk rows. Go, import it. Yes, I know, you can do it... right?. Now, do it in Rails... systematically :)
|
|
6
|
+
This task can be very upset and slow. You need a _bulk import_ operation. Thats is it. Thanks to BulkImporter you only need to write a few sentences and you'll get your data imported _and_ updated.
|
|
7
|
+
|
|
8
|
+
By now, we only support PostgreSQL 9.4+ databases thanks to the [COPY command](https://www.postgresql.org/docs/9.4/static/sql-copy.html).
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
Add this line to your application's Gemfile:
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
gem 'bulk_importer'
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
And then execute:
|
|
19
|
+
|
|
20
|
+
$ bundle
|
|
21
|
+
|
|
22
|
+
Or install it yourself as:
|
|
23
|
+
|
|
24
|
+
$ gem install bulk_importer
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
Suppose your CSV file _/tmp/names.csv_ (delimited by TABs) has the columns: _ID_ and _LOVELY_NAME_, and you need to import this information into your _names_ table, who has the fields _id_ and _name_.
|
|
29
|
+
Also, you want to keep your prexistent data but updated it if necessary.
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
csv_file = File.new '/tmp/names.csv'
|
|
33
|
+
|
|
34
|
+
csv_columns = {
|
|
35
|
+
'ID' => id,
|
|
36
|
+
'LOVELY_NAME' => name
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Suppose your table's key is the 'id' column.
|
|
40
|
+
keys = csv_columns.first
|
|
41
|
+
|
|
42
|
+
BulkImporter.import_from_csv(
|
|
43
|
+
'names',
|
|
44
|
+
csv_file,
|
|
45
|
+
csv_columns,
|
|
46
|
+
keys,
|
|
47
|
+
delimiter: '\t',
|
|
48
|
+
# UPDATE_MODE_UPDATE inserts the new data and update
|
|
49
|
+
# preexistent (it search by keys).
|
|
50
|
+
update_mode: BulkImportModule::UPDATE_MODE_UPDATE
|
|
51
|
+
)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Thats all! The imported data will be inserted in your _names_ table in a few seconds.
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
## Development
|
|
58
|
+
|
|
59
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
60
|
+
|
|
61
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
62
|
+
|
|
63
|
+
## Contributing
|
|
64
|
+
|
|
65
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/abelosorio/bulk_importer.
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
|
71
|
+
|
|
72
|
+
## TO-DO
|
|
73
|
+
|
|
74
|
+
* Write more tests!
|
|
75
|
+
* Decouple database queries. This gem should work with any database engine. Yes, even MySQL.
|
|
76
|
+
* Since PostgreSQL 9.5 we could implement the UPDATE_MODE_UPDATE method by using [INSERT](https://www.postgresql.org/docs/9.5/static/sql-insert.html) command with the [ON CONFLICT](https://www.postgresql.org/docs/9.5/static/sql-insert.html#SQL-ON-CONFLICT) option.
|
|
77
|
+
* Support different database schemas.
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "bulk_importer"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'bulk_importer/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "bulk_importer"
|
|
8
|
+
spec.version = BulkImporter::VERSION
|
|
9
|
+
spec.authors = ["Abel M. Osorio"]
|
|
10
|
+
spec.email = ["abel.m.osorio@gmail.com"]
|
|
11
|
+
|
|
12
|
+
spec.summary = %q{Bulk importer for Ruby on Rails.}
|
|
13
|
+
spec.description = %q{Import big amount of data into any table of your project.}
|
|
14
|
+
spec.homepage = "https://github.com/abelosorio/bulk_importer"
|
|
15
|
+
spec.license = "MIT"
|
|
16
|
+
|
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
18
|
+
spec.bindir = "exe"
|
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
20
|
+
spec.require_paths = ["lib"]
|
|
21
|
+
|
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
|
24
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
25
|
+
spec.add_development_dependency "active_support"
|
|
26
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# PostgreSQL module for Bulk importer.
|
|
2
|
+
#
|
|
3
|
+
# +see+ https://www.postgresql.org/docs/9.4/static/index.html
|
|
4
|
+
#
|
|
5
|
+
module PostgresqlModule
|
|
6
|
+
# Copy from file or standard input.
|
|
7
|
+
#
|
|
8
|
+
# +see+ https://www.postgresql.org/docs/9.4/static/sql-copy.html
|
|
9
|
+
#
|
|
10
|
+
# ==== Parameters
|
|
11
|
+
#
|
|
12
|
+
# * +from+ path_to_some_file|stdin.
|
|
13
|
+
# * +target+ Destination table name.
|
|
14
|
+
#
|
|
15
|
+
# ==== Options
|
|
16
|
+
#
|
|
17
|
+
# * +format+ File format. Defaults to 'csv'.
|
|
18
|
+
# * +delimiter+ Column separator character. Defaults ','.
|
|
19
|
+
# * +null+ String that represent null values. Defaults '' (empty).
|
|
20
|
+
# * +header+ File includes header? Defaults to True.
|
|
21
|
+
#
|
|
22
|
+
# ==== Return
|
|
23
|
+
#
|
|
24
|
+
# +integer+ Number of imported rows.
|
|
25
|
+
#
|
|
26
|
+
def self.copy_from(from, target, format: 'csv', delimiter: ',', null: '', header: true)
|
|
27
|
+
return -1 unless from == 'stdin' or from.is_a? File
|
|
28
|
+
|
|
29
|
+
result = ActiveRecord::Base.connection.execute self.make_import_sql(
|
|
30
|
+
from.is_a?(File) ? from.path : from,
|
|
31
|
+
target,
|
|
32
|
+
format: format,
|
|
33
|
+
delimiter: delimiter,
|
|
34
|
+
null: null,
|
|
35
|
+
header: header
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
result.cmd_tuples
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Get column types.
|
|
42
|
+
#
|
|
43
|
+
# ==== Parameters
|
|
44
|
+
#
|
|
45
|
+
# * +table+
|
|
46
|
+
#
|
|
47
|
+
# ==== Return
|
|
48
|
+
#
|
|
49
|
+
# +array+
|
|
50
|
+
#
|
|
51
|
+
def self.get_column_types(table)
|
|
52
|
+
sql = self.make_get_column_types_sql(table)
|
|
53
|
+
types = {}
|
|
54
|
+
|
|
55
|
+
ActiveRecord::Base.connection.execute(sql).each do |row|
|
|
56
|
+
types[row['name']] = row['type']
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
types
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Make the SQL statement of COPY FROM sentence
|
|
63
|
+
#
|
|
64
|
+
# ==== Options
|
|
65
|
+
#
|
|
66
|
+
# * +format+ File format. Defaults to 'csv'.
|
|
67
|
+
# * +columns+ An optional array of columns to be copied. If no column list
|
|
68
|
+
# is specified, all columns of the table will be copied.
|
|
69
|
+
# * +delimiter+ Column separator character. Defaults ','.
|
|
70
|
+
# * +null+ String that represent null values. Defaults '' (empty).
|
|
71
|
+
# * +header+ File includes header? Defaults to True.
|
|
72
|
+
#
|
|
73
|
+
# ==== Return
|
|
74
|
+
#
|
|
75
|
+
# +string+
|
|
76
|
+
#
|
|
77
|
+
def self.make_import_sql(from, target, format: 'csv', columns: [], delimiter: ',', null: '', header: true)
|
|
78
|
+
sql = []
|
|
79
|
+
|
|
80
|
+
sql << "COPY #{target}"
|
|
81
|
+
sql << "(#{columns.join(',')})" unless columns.empty?
|
|
82
|
+
sql << 'FROM'
|
|
83
|
+
sql << (from.downcase == 'stdin' ? 'STDIN' : "'#{from}'")
|
|
84
|
+
sql << self.make_import_options_sql(format, delimiter, null, header)
|
|
85
|
+
|
|
86
|
+
sql.join ' '
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Makes the SQL options to COPY command.
|
|
90
|
+
#
|
|
91
|
+
# * +format+ File format. Defaults to 'csv'.
|
|
92
|
+
# * +delimiter+ Column separator character. Defaults ','.
|
|
93
|
+
# * +null+ String that represent null values. Defaults '' (empty).
|
|
94
|
+
# * +header+ File includes header? Defaults to True.
|
|
95
|
+
#
|
|
96
|
+
# ==== Return
|
|
97
|
+
#
|
|
98
|
+
# +string+
|
|
99
|
+
#
|
|
100
|
+
def self.make_import_options_sql(format = 'csv', delimiter = ',', null = '', header = true)
|
|
101
|
+
return nil if [ format, delimiter, null, header ].all? &:blank?
|
|
102
|
+
|
|
103
|
+
options = []
|
|
104
|
+
|
|
105
|
+
options << 'WITH'
|
|
106
|
+
options << format.upcase unless format.blank?
|
|
107
|
+
options << "DELIMITER E'#{delimiter}'" unless delimiter.blank?
|
|
108
|
+
options << "NULL '#{null}'" unless null.blank?
|
|
109
|
+
options << 'HEADER' if header == true and format.downcase == 'csv'
|
|
110
|
+
|
|
111
|
+
options.join ' '
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Makes the SQL sentence to get column types. This should return two columns
|
|
115
|
+
# at least: name, type.
|
|
116
|
+
#
|
|
117
|
+
# +see+ https://www.postgresql.org/docs/9.4/static/infoschema-columns.html
|
|
118
|
+
#
|
|
119
|
+
# ==== Parameters
|
|
120
|
+
#
|
|
121
|
+
# * +table+
|
|
122
|
+
#
|
|
123
|
+
# ==== Return
|
|
124
|
+
#
|
|
125
|
+
# +PG::Result+
|
|
126
|
+
#
|
|
127
|
+
# TODO: Add support to different schemas.
|
|
128
|
+
#
|
|
129
|
+
def self.make_get_column_types_sql(table)
|
|
130
|
+
<<-eof
|
|
131
|
+
SELECT column_name as name,
|
|
132
|
+
udt_name as type
|
|
133
|
+
FROM information_schema.columns
|
|
134
|
+
WHERE table_name = '#{table}'
|
|
135
|
+
eof
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
require "bulk_importer/version"
|
|
2
|
+
require "active_support"
|
|
3
|
+
require "active_support/core_ext"
|
|
4
|
+
require "bulk_importer/postgresql_module.rb"
|
|
5
|
+
|
|
6
|
+
module BulkImporter
|
|
7
|
+
# Update modes
|
|
8
|
+
UPDATE_MODE_APPEND = 'append'
|
|
9
|
+
UPDATE_MODE_UPDATE = 'update'
|
|
10
|
+
UPDATE_MODE_REPLACE = 'replace'
|
|
11
|
+
|
|
12
|
+
# Import data from a CSV file to an existing table
|
|
13
|
+
#
|
|
14
|
+
# ==== Parameters
|
|
15
|
+
#
|
|
16
|
+
# * +target+ Target table.
|
|
17
|
+
# * +file+ Source CSV file.
|
|
18
|
+
# * +columns+ Array of CSV columns.
|
|
19
|
+
# * +keys+ Primary keys of destination table.
|
|
20
|
+
#
|
|
21
|
+
# ==== Options
|
|
22
|
+
#
|
|
23
|
+
# * +delimiter+
|
|
24
|
+
# * +null+
|
|
25
|
+
# * +header+
|
|
26
|
+
# * +update_mode+ Update mode for imported data.
|
|
27
|
+
#
|
|
28
|
+
# ==== Updated modes
|
|
29
|
+
#
|
|
30
|
+
# * +self::UPDATE_MODE_APPEND+ Move only new data (default).
|
|
31
|
+
# * +self::UPDATE_MODE_UPDATE+ Insert new data and updated prexistent.
|
|
32
|
+
# * +self::UPDATE_MODE_REPLACE+ Truncate old data and insert the new one.
|
|
33
|
+
#
|
|
34
|
+
# ==== Return
|
|
35
|
+
#
|
|
36
|
+
# +integer+ Number of imported rows.
|
|
37
|
+
#
|
|
38
|
+
def self.import_from_csv(target, file, columns, keys, delimiter: ',', null: '', header: true, update_mode: UPDATE_MODE_APPEND)
|
|
39
|
+
return -1 unless file.is_a? File
|
|
40
|
+
|
|
41
|
+
conn = ActiveRecord::Base.connection
|
|
42
|
+
temp_name = target + '_' + Time.now.to_i.to_s + '_temporal'
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
# Create temporary table (with all CSV fields)
|
|
46
|
+
Rails.logger.debug \
|
|
47
|
+
"[#{self.class}] Creating temporary table #{temp_name}(#{columns.keys})"
|
|
48
|
+
conn.execute self.make_create_temp_table_sql(temp_name, columns.keys)
|
|
49
|
+
|
|
50
|
+
# Import data
|
|
51
|
+
Rails.logger.debug \
|
|
52
|
+
"[#{self.class}] Importing data from #{file} to #{temp_name}"
|
|
53
|
+
PostgresqlModule.copy_from(
|
|
54
|
+
file,
|
|
55
|
+
temp_name,
|
|
56
|
+
format: 'csv',
|
|
57
|
+
delimiter: delimiter,
|
|
58
|
+
null: null,
|
|
59
|
+
header: header
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Move data from temporary table to target and return total imported rows
|
|
63
|
+
Rails.logger.debug \
|
|
64
|
+
"[#{self.class}] Moving new data to #{target} with mode #{update_mode}"
|
|
65
|
+
self.move_imported_data(temp_name, target, columns, keys, update_mode)
|
|
66
|
+
rescue Exception => e
|
|
67
|
+
Rails.logger.error e.message
|
|
68
|
+
Rails.logger.error e.backtrace
|
|
69
|
+
return -1
|
|
70
|
+
ensure
|
|
71
|
+
# Drop temporary table (if exists)
|
|
72
|
+
ActiveRecord::Base.connection.execute "DROP TABLE IF EXISTS #{temp_name}"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Move imported data from origin (raw imported) to destination.
|
|
77
|
+
#
|
|
78
|
+
# ==== Parameters
|
|
79
|
+
#
|
|
80
|
+
# * +origin+ Origin (temporary table).
|
|
81
|
+
# * +destination+ Destination table.
|
|
82
|
+
# * +columns+ Array of CSV columns.
|
|
83
|
+
# * +keys+ Primary keys of destination table.
|
|
84
|
+
# * +update_mode+ Update mode.
|
|
85
|
+
#
|
|
86
|
+
# ==== Return
|
|
87
|
+
#
|
|
88
|
+
# +integer+ Number of imported rows.
|
|
89
|
+
#
|
|
90
|
+
def self.move_imported_data(origin, destination, columns, keys, update_mode)
|
|
91
|
+
unless self.is_update_mode_valid update_mode
|
|
92
|
+
raise "[#{self.class}] Unknown update mode: #{update_mode}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
queries = self.make_move_imported_data_sql(
|
|
96
|
+
origin,
|
|
97
|
+
destination,
|
|
98
|
+
columns,
|
|
99
|
+
keys,
|
|
100
|
+
update_mode
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
rows = 0
|
|
104
|
+
|
|
105
|
+
queries.each do |query|
|
|
106
|
+
Rails.logger.debug "[#{self.class}] Running query <<#{query}>>"
|
|
107
|
+
rows += ActiveRecord::Base.connection.execute(query).cmd_tuples
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
return rows
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Makes the SQL command to move the imported data.
|
|
114
|
+
#
|
|
115
|
+
# ==== Parameters
|
|
116
|
+
#
|
|
117
|
+
# * +origin+ Origin (temporary table).
|
|
118
|
+
# * +destination+ Destination table.
|
|
119
|
+
# * +columns+ Array of CSV columns.
|
|
120
|
+
# * +keys+ Primary keys of destination table.
|
|
121
|
+
# * +update_mode+ Update mode.
|
|
122
|
+
#
|
|
123
|
+
# ==== Return
|
|
124
|
+
#
|
|
125
|
+
# +array+ Array of queries to execute.
|
|
126
|
+
#
|
|
127
|
+
def self.make_move_imported_data_sql(origin, destination, columns, keys, update_mode)
|
|
128
|
+
case update_mode
|
|
129
|
+
when UPDATE_MODE_APPEND
|
|
130
|
+
# Insert new
|
|
131
|
+
self.make_update_mode_append_sql origin, destination, columns, keys
|
|
132
|
+
when UPDATE_MODE_UPDATE
|
|
133
|
+
# Insert new and Update prexistent
|
|
134
|
+
self.make_update_mode_update_sql origin, destination, columns, keys
|
|
135
|
+
when UPDATE_MODE_REPLACE
|
|
136
|
+
# Truncate destination and Insert new (all)
|
|
137
|
+
self.make_update_mode_replace_sql origin, destination, columns, keys
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Makes the SQL command to append new imported data.
|
|
142
|
+
#
|
|
143
|
+
# ==== Parameters
|
|
144
|
+
#
|
|
145
|
+
# * +origin+ Origin (temporary table).
|
|
146
|
+
# * +destination+ Destination table.
|
|
147
|
+
# * +columns+ Array of CSV columns.
|
|
148
|
+
# * +keys+ Primary keys of destination table.
|
|
149
|
+
#
|
|
150
|
+
# ==== Return
|
|
151
|
+
#
|
|
152
|
+
# +array+ Array of queries to execute
|
|
153
|
+
#
|
|
154
|
+
def self.make_update_mode_append_sql(origin, destination, columns, keys)
|
|
155
|
+
sql = []
|
|
156
|
+
|
|
157
|
+
columns = columns.delete_if { |item| columns[item].nil? }
|
|
158
|
+
types = {}
|
|
159
|
+
|
|
160
|
+
pg_types = PostgresqlModule.get_column_types destination
|
|
161
|
+
columns.values.each { |i| types[columns.invert[i]] = pg_types[i] }
|
|
162
|
+
|
|
163
|
+
sql << "INSERT INTO #{destination}"
|
|
164
|
+
sql << "(#{columns.values.join(',')})"
|
|
165
|
+
sql << "SELECT #{self.keys_to_list(columns.keys, 'o', types)}"
|
|
166
|
+
sql << "FROM #{origin} o"
|
|
167
|
+
sql << "WHERE (#{self.keys_to_list(keys.keys, 'o')}) NOT IN"
|
|
168
|
+
sql << "( SELECT (#{self.keys_to_list(keys.values, 'd')})"
|
|
169
|
+
sql << "FROM #{destination} d )"
|
|
170
|
+
|
|
171
|
+
[ sql.join(' ') ]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Makes the SQL command to update prexistent data.
|
|
175
|
+
#
|
|
176
|
+
# ==== Parameters
|
|
177
|
+
#
|
|
178
|
+
# * +origin+ Origin (temporary table).
|
|
179
|
+
# * +destination+ Destination table.
|
|
180
|
+
# * +columns+ Array of CSV columns.
|
|
181
|
+
# * +keys+ Primary keys of destination table.
|
|
182
|
+
#
|
|
183
|
+
# ==== Return
|
|
184
|
+
#
|
|
185
|
+
# +array+ Array of queries to execute
|
|
186
|
+
#
|
|
187
|
+
def self.make_update_mode_update_sql(origin, destination, columns, keys)
|
|
188
|
+
q = self.make_update_mode_append_sql(origin, destination, columns, keys)
|
|
189
|
+
|
|
190
|
+
sql = []
|
|
191
|
+
types = {}
|
|
192
|
+
|
|
193
|
+
pg_types = PostgresqlModule.get_column_types destination
|
|
194
|
+
columns.values.each { |i| types[columns.invert[i]] = pg_types[i] }
|
|
195
|
+
|
|
196
|
+
sql << "UPDATE #{destination} d SET"
|
|
197
|
+
set = []
|
|
198
|
+
columns.delete_if { |item| columns[item].nil? }.keys.each do |column|
|
|
199
|
+
set << "#{columns[column]} = o.#{column}::#{types[column]}"
|
|
200
|
+
end
|
|
201
|
+
sql << set.join(',')
|
|
202
|
+
sql << "FROM #{origin} o"
|
|
203
|
+
sql << "WHERE (#{self.keys_to_list(keys.keys, 'o')})"
|
|
204
|
+
sql << "IN (#{self.keys_to_list(keys.values, 'd')})"
|
|
205
|
+
|
|
206
|
+
q << sql.join(' ')
|
|
207
|
+
|
|
208
|
+
return q
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Makes the SQL command to remove existing data and Insert new (all).
|
|
212
|
+
#
|
|
213
|
+
# ==== Parameters
|
|
214
|
+
#
|
|
215
|
+
# * +origin+ Origin (temporary table).
|
|
216
|
+
# * +destination+ Destination table.
|
|
217
|
+
# * +columns+ Array of CSV columns.
|
|
218
|
+
# * +keys+ Primary keys of destination table.
|
|
219
|
+
#
|
|
220
|
+
# ==== Return
|
|
221
|
+
#
|
|
222
|
+
# +array+ Array of queries to execute
|
|
223
|
+
#
|
|
224
|
+
def self.make_update_mode_replace_sql(origin, destination, columns, key)
|
|
225
|
+
q = []
|
|
226
|
+
|
|
227
|
+
q << "TRUNCATE TABLE #{destination}"
|
|
228
|
+
q.concat self.make_update_mode_append_sql origin, destination, columns, keys
|
|
229
|
+
|
|
230
|
+
return q
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Makes the SQL command to create a temporary table.
|
|
234
|
+
#
|
|
235
|
+
# ==== Parameters
|
|
236
|
+
#
|
|
237
|
+
# * +name+ Name of temporary table.
|
|
238
|
+
# * +columns+ Array of columns (just name).
|
|
239
|
+
#
|
|
240
|
+
# ==== Return
|
|
241
|
+
#
|
|
242
|
+
# +string+
|
|
243
|
+
#
|
|
244
|
+
def self.make_create_temp_table_sql(name, columns)
|
|
245
|
+
columns = columns.map { |i| i + ' text' }
|
|
246
|
+
"CREATE TEMPORARY TABLE #{name} (#{columns.join(',')})"
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Translate an array of keys in a list with an optional prefix.
|
|
250
|
+
#
|
|
251
|
+
# ==== Parameters
|
|
252
|
+
#
|
|
253
|
+
# * +keys+
|
|
254
|
+
# * +prefix+
|
|
255
|
+
# * +types+
|
|
256
|
+
#
|
|
257
|
+
# ==== Return
|
|
258
|
+
#
|
|
259
|
+
# * +string+
|
|
260
|
+
#
|
|
261
|
+
# TODO: Add doc
|
|
262
|
+
#
|
|
263
|
+
def self.keys_to_list(keys, prefix = nil, types = nil)
|
|
264
|
+
list = []
|
|
265
|
+
|
|
266
|
+
keys.each do |i|
|
|
267
|
+
col = i
|
|
268
|
+
col = [ prefix, i ].compact.join('.') unless prefix.nil?
|
|
269
|
+
col = [ col, types[i] ].compact.join('::') unless types.nil?
|
|
270
|
+
|
|
271
|
+
list << col
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
list.join ','
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Check if the update mode is valid.
|
|
278
|
+
#
|
|
279
|
+
# ==== Parameters
|
|
280
|
+
#
|
|
281
|
+
# * +update_mode+
|
|
282
|
+
#
|
|
283
|
+
# ==== Return
|
|
284
|
+
#
|
|
285
|
+
# +bool+
|
|
286
|
+
#
|
|
287
|
+
def self.is_update_mode_valid(update_mode)
|
|
288
|
+
valid_update_modes = []
|
|
289
|
+
|
|
290
|
+
self.constants.each do |constant|
|
|
291
|
+
if constant.to_s.start_with? 'UPDATE_MODE_'
|
|
292
|
+
valid_update_modes << self.const_get(constant)
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
valid_update_modes.include? update_mode.to_s
|
|
297
|
+
end
|
|
298
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: bulk_importer
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Abel M. Osorio
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2016-12-01 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.11'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.11'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '10.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '10.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: minitest
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '5.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '5.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: active_support
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0'
|
|
69
|
+
description: Import big amount of data into any table of your project.
|
|
70
|
+
email:
|
|
71
|
+
- abel.m.osorio@gmail.com
|
|
72
|
+
executables: []
|
|
73
|
+
extensions: []
|
|
74
|
+
extra_rdoc_files: []
|
|
75
|
+
files:
|
|
76
|
+
- ".gitignore"
|
|
77
|
+
- ".travis.yml"
|
|
78
|
+
- Gemfile
|
|
79
|
+
- LICENSE.txt
|
|
80
|
+
- README.md
|
|
81
|
+
- Rakefile
|
|
82
|
+
- bin/console
|
|
83
|
+
- bin/setup
|
|
84
|
+
- bulk_importer.gemspec
|
|
85
|
+
- lib/bulk_importer.rb
|
|
86
|
+
- lib/bulk_importer/postgresql_module.rb
|
|
87
|
+
- lib/bulk_importer/version.rb
|
|
88
|
+
homepage: https://github.com/abelosorio/bulk_importer
|
|
89
|
+
licenses:
|
|
90
|
+
- MIT
|
|
91
|
+
metadata: {}
|
|
92
|
+
post_install_message:
|
|
93
|
+
rdoc_options: []
|
|
94
|
+
require_paths:
|
|
95
|
+
- lib
|
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
|
+
requirements:
|
|
98
|
+
- - ">="
|
|
99
|
+
- !ruby/object:Gem::Version
|
|
100
|
+
version: '0'
|
|
101
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
|
+
requirements:
|
|
103
|
+
- - ">="
|
|
104
|
+
- !ruby/object:Gem::Version
|
|
105
|
+
version: '0'
|
|
106
|
+
requirements: []
|
|
107
|
+
rubyforge_project:
|
|
108
|
+
rubygems_version: 2.2.2
|
|
109
|
+
signing_key:
|
|
110
|
+
specification_version: 4
|
|
111
|
+
summary: Bulk importer for Ruby on Rails.
|
|
112
|
+
test_files: []
|