csv_fast_importer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +36 -0
- data/.ruby-version +1 -0
- data/.travis.yml +15 -0
- data/CONTRIBUTING.md +24 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +128 -0
- data/LICENSE +21 -0
- data/README.md +186 -0
- data/Rakefile +44 -0
- data/benchmark/NPRI-SubsDisp-Normalized-Since1993.csv +10000 -0
- data/benchmark/README.md +140 -0
- data/benchmark/benchmark.rb +26 -0
- data/benchmark/results.png +0 -0
- data/benchmark/results.xlsx +0 -0
- data/benchmark/strategies.rb +115 -0
- data/benchmark/tools.rb +61 -0
- data/csv_fast_importer.gemspec +42 -0
- data/lib/csv_fast_importer.rb +12 -0
- data/lib/csv_fast_importer/configuration.rb +57 -0
- data/lib/csv_fast_importer/database/mysql.rb +28 -0
- data/lib/csv_fast_importer/database/postgres.rb +36 -0
- data/lib/csv_fast_importer/database/queryable.rb +51 -0
- data/lib/csv_fast_importer/database_connection.rb +19 -0
- data/lib/csv_fast_importer/database_factory.rb +19 -0
- data/lib/csv_fast_importer/import.rb +58 -0
- data/lib/csv_fast_importer/version.rb +3 -0
- data/sample-app/.gitignore +10 -0
- data/sample-app/Gemfile +50 -0
- data/sample-app/Gemfile.lock +172 -0
- data/sample-app/README.md +23 -0
- data/sample-app/Rakefile +6 -0
- data/sample-app/app/assets/images/.keep +0 -0
- data/sample-app/app/assets/javascripts/application.js +16 -0
- data/sample-app/app/assets/stylesheets/application.css +15 -0
- data/sample-app/app/controllers/application_controller.rb +5 -0
- data/sample-app/app/controllers/concerns/.keep +0 -0
- data/sample-app/app/helpers/application_helper.rb +2 -0
- data/sample-app/app/mailers/.keep +0 -0
- data/sample-app/app/models/.keep +0 -0
- data/sample-app/app/models/concerns/.keep +0 -0
- data/sample-app/app/models/knight.rb +2 -0
- data/sample-app/app/views/layouts/application.html.erb +14 -0
- data/sample-app/bin/bundle +3 -0
- data/sample-app/bin/rails +9 -0
- data/sample-app/bin/rake +9 -0
- data/sample-app/bin/setup +29 -0
- data/sample-app/bin/spring +17 -0
- data/sample-app/config.ru +4 -0
- data/sample-app/config/application.rb +26 -0
- data/sample-app/config/boot.rb +3 -0
- data/sample-app/config/database.yml +21 -0
- data/sample-app/config/environment.rb +5 -0
- data/sample-app/config/environments/development.rb +41 -0
- data/sample-app/config/environments/production.rb +79 -0
- data/sample-app/config/environments/test.rb +42 -0
- data/sample-app/config/initializers/assets.rb +11 -0
- data/sample-app/config/initializers/backtrace_silencers.rb +7 -0
- data/sample-app/config/initializers/cookies_serializer.rb +3 -0
- data/sample-app/config/initializers/filter_parameter_logging.rb +4 -0
- data/sample-app/config/initializers/inflections.rb +16 -0
- data/sample-app/config/initializers/mime_types.rb +4 -0
- data/sample-app/config/initializers/session_store.rb +3 -0
- data/sample-app/config/initializers/wrap_parameters.rb +14 -0
- data/sample-app/config/locales/en.yml +23 -0
- data/sample-app/config/routes.rb +56 -0
- data/sample-app/config/secrets.yml +22 -0
- data/sample-app/db/development.sqlite3 +0 -0
- data/sample-app/db/migrate/20170818134706_create_knights.rb +8 -0
- data/sample-app/db/schema.rb +24 -0
- data/sample-app/db/seeds.rb +7 -0
- data/sample-app/knights.csv +3 -0
- data/sample-app/lib/assets/.keep +0 -0
- data/sample-app/lib/tasks/.keep +0 -0
- data/sample-app/lib/tasks/csv_fast_importer.rake +9 -0
- data/sample-app/log/.keep +0 -0
- data/sample-app/public/404.html +67 -0
- data/sample-app/public/422.html +67 -0
- data/sample-app/public/500.html +66 -0
- data/sample-app/public/favicon.ico +0 -0
- data/sample-app/public/robots.txt +5 -0
- data/sample-app/test/controllers/.keep +0 -0
- data/sample-app/test/fixtures/.keep +0 -0
- data/sample-app/test/fixtures/knights.yml +9 -0
- data/sample-app/test/helpers/.keep +0 -0
- data/sample-app/test/integration/.keep +0 -0
- data/sample-app/test/mailers/.keep +0 -0
- data/sample-app/test/models/.keep +0 -0
- data/sample-app/test/models/knight_test.rb +7 -0
- data/sample-app/test/test_helper.rb +10 -0
- metadata +331 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 1c6850b0ee75facc689b0992b21b3a3f94918bc9
|
|
4
|
+
data.tar.gz: 19d416bfb8f7c61f56bfab8906e25adecf329ba8
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: f5c35806af277b60c034b04f938dcf2faca4f9433a9194c985d098d5866230a58c07e9de678e69219d90097c0cbaac5fd9eb89d271936e8f35adf7c1121ac8e0
|
|
7
|
+
data.tar.gz: cd058154e47b07196e6ad1448eab4e40ae9e452cba095f383bf4589615139f290decf223f966b19e7de90635dc7c4d8cf87b5836b4ff62f9a04d16a139d170ca
|
data/.gitignore
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
*.gem
|
|
2
|
+
*.rbc
|
|
3
|
+
/.config
|
|
4
|
+
/coverage/
|
|
5
|
+
/InstalledFiles
|
|
6
|
+
/pkg/
|
|
7
|
+
/spec/reports/
|
|
8
|
+
/spec/examples.txt
|
|
9
|
+
/test/tmp/
|
|
10
|
+
/test/version_tmp/
|
|
11
|
+
/tmp/
|
|
12
|
+
|
|
13
|
+
## Specific to RubyMotion:
|
|
14
|
+
.dat*
|
|
15
|
+
.repl_history
|
|
16
|
+
build/
|
|
17
|
+
|
|
18
|
+
## Documentation cache and generated files:
|
|
19
|
+
/.yardoc/
|
|
20
|
+
/_yardoc/
|
|
21
|
+
/doc/
|
|
22
|
+
/rdoc/
|
|
23
|
+
|
|
24
|
+
## Environment normalization:
|
|
25
|
+
/.bundle/
|
|
26
|
+
/vendor/bundle
|
|
27
|
+
/lib/bundler/man/
|
|
28
|
+
|
|
29
|
+
# for a library or gem, you might want to ignore these files since the code is
|
|
30
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
31
|
+
# Gemfile.lock
|
|
32
|
+
# .ruby-version
|
|
33
|
+
# .ruby-gemset
|
|
34
|
+
|
|
35
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
|
36
|
+
.rvmrc
|
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.2.1
|
data/.travis.yml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
language: ruby
|
|
2
|
+
script:
|
|
3
|
+
- bundle exec rake test:db:create
|
|
4
|
+
- bundle exec rspec spec
|
|
5
|
+
- if [ "$RUN_BENCHMARK" = "true" ]; then DATASET_SIZE=100 bundle exec rake benchmark; fi
|
|
6
|
+
rvm:
|
|
7
|
+
2.2.0
|
|
8
|
+
matrix:
|
|
9
|
+
include:
|
|
10
|
+
- env:
|
|
11
|
+
- DB_TYPE=mysql
|
|
12
|
+
- DB_USERNAME=travis
|
|
13
|
+
- env:
|
|
14
|
+
- DB_TYPE=postgres
|
|
15
|
+
- RUN_BENCHMARK=true
|
data/CONTRIBUTING.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Contributing to CSV Fast Importer
|
|
2
|
+
|
|
3
|
+
## Report a bug
|
|
4
|
+
|
|
5
|
+
You just found a bug and you want to report it? It's very simple, open a ticket
|
|
6
|
+
on [our bug tracker](https://github.com/sogilis/csv_fast_importer/issues). Use
|
|
7
|
+
the research bar to find similar tickets and to avoid duplicate tickets.
|
|
8
|
+
|
|
9
|
+
When you report a bug, please make sure to include useful information such as:
|
|
10
|
+
|
|
11
|
+
- version of Ruby
|
|
12
|
+
- which database was used and which version
|
|
13
|
+
- steps to reproduce the bug
|
|
14
|
+
- what you expected and what happened instead
|
|
15
|
+
- if you can, include the related CSV file
|
|
16
|
+
|
|
17
|
+
## Contribute to development
|
|
18
|
+
|
|
19
|
+
- Fork the project
|
|
20
|
+
- Create a new branch (e.g. `git checkout -b feature/my-awesome-feature master`)
|
|
21
|
+
- Make your changes and create a pull request against our master branch
|
|
22
|
+
- Check your commits are self-explanatory and are clear
|
|
23
|
+
- Check your code does not fail code linting nor tests
|
|
24
|
+
- Make sure to describe your pull request
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
csv_fast_importer (1.0.0)
|
|
5
|
+
activerecord (>= 3.0)
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: https://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
active_importer (0.2.6)
|
|
11
|
+
roo
|
|
12
|
+
activemodel (4.2.5)
|
|
13
|
+
activesupport (= 4.2.5)
|
|
14
|
+
builder (~> 3.1)
|
|
15
|
+
activerecord (4.2.5)
|
|
16
|
+
activemodel (= 4.2.5)
|
|
17
|
+
activesupport (= 4.2.5)
|
|
18
|
+
arel (~> 6.0)
|
|
19
|
+
activerecord-import (0.10.0)
|
|
20
|
+
activerecord (>= 3.0)
|
|
21
|
+
activesupport (4.2.5)
|
|
22
|
+
i18n (~> 0.7)
|
|
23
|
+
json (~> 1.7, >= 1.7.7)
|
|
24
|
+
minitest (~> 5.1)
|
|
25
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
|
26
|
+
tzinfo (~> 1.1)
|
|
27
|
+
arel (6.0.3)
|
|
28
|
+
axiom-types (0.1.1)
|
|
29
|
+
descendants_tracker (~> 0.0.4)
|
|
30
|
+
ice_nine (~> 0.11.0)
|
|
31
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
32
|
+
builder (3.2.2)
|
|
33
|
+
bulk_insert (1.5.0)
|
|
34
|
+
activerecord (>= 4.1.0)
|
|
35
|
+
codacy-coverage (0.2.3)
|
|
36
|
+
rest-client (~> 1.8)
|
|
37
|
+
simplecov (~> 0.10.0)
|
|
38
|
+
coercible (1.0.0)
|
|
39
|
+
descendants_tracker (~> 0.0.1)
|
|
40
|
+
csv-importer (0.3.2)
|
|
41
|
+
virtus
|
|
42
|
+
descendants_tracker (0.0.4)
|
|
43
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
44
|
+
diff-lcs (1.2.5)
|
|
45
|
+
docile (1.1.5)
|
|
46
|
+
domain_name (0.5.20160216)
|
|
47
|
+
unf (>= 0.0.5, < 1.0.0)
|
|
48
|
+
equalizer (0.0.11)
|
|
49
|
+
ferry (2.0.0)
|
|
50
|
+
highline (~> 1.6.21)
|
|
51
|
+
progressbar (~> 0.21.0)
|
|
52
|
+
highline (1.6.21)
|
|
53
|
+
http-cookie (1.0.2)
|
|
54
|
+
domain_name (~> 0.5)
|
|
55
|
+
i18n (0.7.0)
|
|
56
|
+
ice_nine (0.11.2)
|
|
57
|
+
json (1.8.3)
|
|
58
|
+
mime-types (2.99.1)
|
|
59
|
+
mini_portile2 (2.2.0)
|
|
60
|
+
minitest (5.8.3)
|
|
61
|
+
mysql2 (0.3.20)
|
|
62
|
+
netrc (0.11.0)
|
|
63
|
+
nokogiri (1.8.0)
|
|
64
|
+
mini_portile2 (~> 2.2.0)
|
|
65
|
+
pg (0.18.4)
|
|
66
|
+
progressbar (0.21.0)
|
|
67
|
+
rake (10.4.2)
|
|
68
|
+
rest-client (1.8.0)
|
|
69
|
+
http-cookie (>= 1.0.2, < 2.0)
|
|
70
|
+
mime-types (>= 1.16, < 3.0)
|
|
71
|
+
netrc (~> 0.7)
|
|
72
|
+
roo (2.7.1)
|
|
73
|
+
nokogiri (~> 1)
|
|
74
|
+
rubyzip (~> 1.1, < 2.0.0)
|
|
75
|
+
rspec (3.4.0)
|
|
76
|
+
rspec-core (~> 3.4.0)
|
|
77
|
+
rspec-expectations (~> 3.4.0)
|
|
78
|
+
rspec-mocks (~> 3.4.0)
|
|
79
|
+
rspec-core (3.4.4)
|
|
80
|
+
rspec-support (~> 3.4.0)
|
|
81
|
+
rspec-expectations (3.4.0)
|
|
82
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
83
|
+
rspec-support (~> 3.4.0)
|
|
84
|
+
rspec-mocks (3.4.1)
|
|
85
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
86
|
+
rspec-support (~> 3.4.0)
|
|
87
|
+
rspec-support (3.4.1)
|
|
88
|
+
rubyzip (1.2.1)
|
|
89
|
+
simplecov (0.10.0)
|
|
90
|
+
docile (~> 1.1.0)
|
|
91
|
+
json (~> 1.8)
|
|
92
|
+
simplecov-html (~> 0.10.0)
|
|
93
|
+
simplecov-html (0.10.0)
|
|
94
|
+
smarter_csv (1.1.4)
|
|
95
|
+
thread_safe (0.3.5)
|
|
96
|
+
tzinfo (1.2.2)
|
|
97
|
+
thread_safe (~> 0.1)
|
|
98
|
+
unf (0.1.4)
|
|
99
|
+
unf_ext
|
|
100
|
+
unf_ext (0.0.7.2)
|
|
101
|
+
upsert (2.2.1)
|
|
102
|
+
virtus (1.0.5)
|
|
103
|
+
axiom-types (~> 0.1)
|
|
104
|
+
coercible (~> 1.0)
|
|
105
|
+
descendants_tracker (~> 0.0, >= 0.0.3)
|
|
106
|
+
equalizer (~> 0.0, >= 0.0.9)
|
|
107
|
+
|
|
108
|
+
PLATFORMS
|
|
109
|
+
ruby
|
|
110
|
+
|
|
111
|
+
DEPENDENCIES
|
|
112
|
+
active_importer
|
|
113
|
+
activerecord-import
|
|
114
|
+
bulk_insert
|
|
115
|
+
bundler (~> 1.10)
|
|
116
|
+
codacy-coverage
|
|
117
|
+
csv-importer
|
|
118
|
+
csv_fast_importer!
|
|
119
|
+
ferry
|
|
120
|
+
mysql2 (>= 0.3.10)
|
|
121
|
+
pg (>= 0.18.4)
|
|
122
|
+
rake (~> 10.0)
|
|
123
|
+
rspec
|
|
124
|
+
smarter_csv
|
|
125
|
+
upsert
|
|
126
|
+
|
|
127
|
+
BUNDLED WITH
|
|
128
|
+
1.13.2
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016 Jean-Baptiste Mille
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
[](https://travis-ci.org/sogilis/csv_fast_importer) [](https://www.codacy.com/app/Jibidus/csv_fast_importer?utm_source=github.com&utm_medium=referral&utm_content=sogilis/csv_fast_importer&utm_campaign=Badge_Grade)
|
|
2
|
+
|
|
3
|
+
# CSV Fast Importer
|
|
4
|
+
|
|
5
|
+
A gem to import CSV files' content into a PostgreSQL or MySQL database. It is respectively based on [PostgreSQL `COPY`](https://wiki.postgresql.org/wiki/COPY) and [MySQL `LOAD DATA INFILE`](https://dev.mysql.com/doc/refman/5.7/en/load-data.html) which are designed to be as fast as possible.
|
|
6
|
+
|
|
7
|
+
## Why?
|
|
8
|
+
|
|
9
|
+
CSV importation is a common task which can be done by more than 6 different gems, but none of them is able to import **1 million of lines in a few seconds** (see benchmark below), hence the creation of this gem.
|
|
10
|
+
|
|
11
|
+
Here is an indicative benchmark to compare available solutions. It represents the **duration (ms)** to import a **10 000 lines** csv file into a local PostgreSQL instance on a laptop running OSX (lower is better):
|
|
12
|
+
|
|
13
|
+

|
|
14
|
+
|
|
15
|
+
Like all benchmarks, some tuning can produce different results, yet this chart gives a big picture. See [benchmark details](benchmark/README.md).
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
|
|
19
|
+
- Rails (ActiveRecord in fact)
|
|
20
|
+
- PostgreSQL or MySQL
|
|
21
|
+
|
|
22
|
+
## Limitations
|
|
23
|
+
|
|
24
|
+
- Usual ActiveRecord process (validations, callbacks, computed fields like `created_at`...) is bypassed. This is the price for performance
|
|
25
|
+
- Custom enclosing field (ex: `"`) is not supported yet
|
|
26
|
+
- Custom line serparator (ex: `\r\n` for windows file) is not supported yet
|
|
27
|
+
- MySQL: encoding is not supported yet
|
|
28
|
+
- MySQL: transaction is not supported yet
|
|
29
|
+
- MySQL: row_index is not supported yet
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
Add the dependency to your Gemfile:
|
|
34
|
+
|
|
35
|
+
```gemfile
|
|
36
|
+
gem 'csv_fast_importer`
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Run `bundle install`.
|
|
40
|
+
|
|
41
|
+
You can install the gem by yourself too:
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
$ gem install csv_fast_importer
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**For MySQL** :warning: enable `local_infile` for both [client](https://dev.mysql.com/doc/refman/5.7/en/source-configuration-options.html#option_cmake_enabled_local_infile) and [server](https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_local_infile). In Rails application, juste add `local_infile: true` to your database config file `databse.yml` to configure the database client. See [Security Issues with LOAD DATA LOCAL](https://dev.mysql.com/doc/refman/5.7/en/load-data-local.html) for more details.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
Actually, CSV Fast Importer needs `active_record` to work. Setup your database
|
|
53
|
+
configuration as in a usual Rails project. Then, use the `CsvFastImporter`
|
|
54
|
+
class:
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
require 'csv_fast_importer'
|
|
58
|
+
|
|
59
|
+
file = File.new '/path/to/knights.csv'
|
|
60
|
+
imported_lines_count = CsvFastImporter.import(file)
|
|
61
|
+
|
|
62
|
+
puts imported_lines_count
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Under the hood, CSV Fast Importer deletes data from the `knights` table and
|
|
66
|
+
imports those from `knights.csv` by mapping columns' names to table's fields.
|
|
67
|
+
Note: mapping is case insensitive so **database fields' names must be lowercase**.
|
|
68
|
+
For instance, a `FIRSTNAME` CSV column will be mapped to the `firstname` field.
|
|
69
|
+
|
|
70
|
+
### Options
|
|
71
|
+
|
|
72
|
+
| Option key | Purpose | Default value |
|
|
73
|
+
| ------------ | ------------- | ------------- |
|
|
74
|
+
| *encoding* | File encoding. *PostgreSQL only*| `'UTF-8'` |
|
|
75
|
+
| *col_sep* | Column separator in file | `';'` |
|
|
76
|
+
| *destination* | Destination table | given base filename (without extension) |
|
|
77
|
+
| *mapping* | Column mapping | `{}` |
|
|
78
|
+
| *row_index_column* | Column name where inserting file row index (not used when `nil`). *PostgreSQL only* | `nil` |
|
|
79
|
+
| *transaction* | Execute DELETE and INSERT in same transaction. *PostgreSQL only* | `:enabled` |
|
|
80
|
+
| *deletion* | Row deletion method (`:delete` for SQL DELETE, `:truncate` for SQL TRUNCATE or `:none` for no deletion before import) | `:delete` |
|
|
81
|
+
|
|
82
|
+
Your CSV file should be encoding in UTF-8 but you can specify another encoding
|
|
83
|
+
with the `encoding` option (*PostgreSQL only*).
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
CsvFastImporter.import file, encoding: 'ISO-8859-1'
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
You can specify a different separator column with the `col_sep` option (`;` by
|
|
90
|
+
default):
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
CsvFastImporter.import file, col_sep: '|'
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
By default, CSV Fast Importer computes the database table's name by taking the
|
|
97
|
+
`basename` of the imported file. For instance, considering the imported file
|
|
98
|
+
`/path/to/knights.csv`, the table's name will be `knights`. To bypass
|
|
99
|
+
this default behaviour, specify the `destination` option:
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
file = File.new '/path/to/clients.csv'
|
|
103
|
+
CsvFastImporter.import file, destination: 'knights'
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Finally, you can precise a custom mapping between CSV file's columns and
|
|
107
|
+
database fields with the `mapping` option.
|
|
108
|
+
|
|
109
|
+
Considering the following `knights.csv` file:
|
|
110
|
+
|
|
111
|
+
```csv
|
|
112
|
+
NAME;KNIGHT_EMAIL
|
|
113
|
+
Perceval;perceval@logre.cel
|
|
114
|
+
Lancelot;lancelot@logre.cel
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
To map the `KNIGHT_EMAIL` column to the `email` database field:
|
|
118
|
+
|
|
119
|
+
```ruby
|
|
120
|
+
CsvFastImporter.import file, mapping: { email: :knight_email }
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## How to contribute?
|
|
124
|
+
|
|
125
|
+
You can fork and submit new pull request (with tests and explanations).
|
|
126
|
+
First of all, you need to initialize your environment :
|
|
127
|
+
|
|
128
|
+
```sh
|
|
129
|
+
$ bundle install
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Then, start your PostgreSQL database (ex: [Postgres.app](http://postgresapp.com) for the Mac) and setup database environment:
|
|
133
|
+
|
|
134
|
+
```sh
|
|
135
|
+
$ bundle exec rake test:db:create
|
|
136
|
+
```
|
|
137
|
+
This will connect to `localhost` PostgreSQL database without user (see `config/database.postgres.yml`) and create a new database dedicated to tests.
|
|
138
|
+
|
|
139
|
+
Finally, you can run all tests with RSpec like this:
|
|
140
|
+
|
|
141
|
+
```sh
|
|
142
|
+
$ bundle exec rspec
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
By default, PostgreSQL is used. You can set another database with environment variables like this for MySQL:
|
|
146
|
+
```sh
|
|
147
|
+
$ DB_TYPE=mysql DB_ROOT_PASSWORD=password DB_USERNAME=username bundle exec rake test:db:create
|
|
148
|
+
$ DB_TYPE=mysql DB_USERNAME=username bundle exec rspec
|
|
149
|
+
```
|
|
150
|
+
This will connect to mysql with `root` user (with `password` as password) and create database for user `username`.
|
|
151
|
+
Use `DB_TYPE=mysql DB_USERNAME=` (with empty username) for anonymous account.
|
|
152
|
+
|
|
153
|
+
*Warning*: Mysql tests require your local database permits LOCAL works. Check your Mysql instance with following command: `SHOW GLOBAL VARIABLES LIKE 'local_infile'` (should be `ON`).
|
|
154
|
+
|
|
155
|
+
## Versioning
|
|
156
|
+
|
|
157
|
+
`master` is the development branch and releases are published as tags.
|
|
158
|
+
|
|
159
|
+
**We're not ready for the production yet (version < 1.0) so use this gem with
|
|
160
|
+
precaution.**
|
|
161
|
+
|
|
162
|
+
We follow the [Semantic Versioning 2.0.0](http://semver.org/) for our gem
|
|
163
|
+
releases.
|
|
164
|
+
|
|
165
|
+
In few words:
|
|
166
|
+
|
|
167
|
+
> Given a version number MAJOR.MINOR.PATCH, increment the:
|
|
168
|
+
>
|
|
169
|
+
> 1. MAJOR version when you make incompatible API changes,
|
|
170
|
+
> 2. MINOR version when you add functionality in a backwards-compatible manner,
|
|
171
|
+
> and
|
|
172
|
+
> 3. PATCH version when you make backwards-compatible bug fixes.
|
|
173
|
+
|
|
174
|
+
## Backlog (unordered)
|
|
175
|
+
|
|
176
|
+
- [ ] Support any column and table case
|
|
177
|
+
- [ ] Support custom enclosing field (ex: `"`)
|
|
178
|
+
- [ ] Support custom line serparator (ex: \r\n for windows file)
|
|
179
|
+
- [ ] Support custom type convertion
|
|
180
|
+
- [ ] MySQL: support encoding parameter. See https://dev.mysql.com/doc/refman/5.5/en/charset-charsets.html
|
|
181
|
+
- [ ] MySQL: support transaction parameter
|
|
182
|
+
- [ ] MySQL: support row_index_column parameter
|
|
183
|
+
- [ ] MySQL: run multiple SQL queries in single statement
|
|
184
|
+
- [ ] Refactor tests (with should-> must / should -> expect / subject...)
|
|
185
|
+
- [ ] Reduce technical debt on db connection (test & benchmark)
|
|
186
|
+
- [ ] SQLite support
|
data/Rakefile
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
require 'bundler/gem_tasks'
|
|
2
|
+
|
|
3
|
+
namespace :test do
|
|
4
|
+
namespace :db do
|
|
5
|
+
|
|
6
|
+
desc "Create test database"
|
|
7
|
+
task :create do
|
|
8
|
+
|
|
9
|
+
require_relative './spec/config/test_database.rb'
|
|
10
|
+
|
|
11
|
+
db = TestDatabase.new
|
|
12
|
+
case db.type
|
|
13
|
+
when :postgres
|
|
14
|
+
require 'pg'
|
|
15
|
+
db.connect 'postgres'
|
|
16
|
+
ActiveRecord::Base.connection.execute "CREATE DATABASE #{db.name}"
|
|
17
|
+
|
|
18
|
+
when :mysql
|
|
19
|
+
require 'mysql2'
|
|
20
|
+
client_config = db.configuration
|
|
21
|
+
.merge(database: nil,
|
|
22
|
+
username: 'root',
|
|
23
|
+
password: ENV['DB_ROOT_PASSWORD'],
|
|
24
|
+
flags: Mysql2::Client::MULTI_STATEMENTS)
|
|
25
|
+
client = Mysql2::Client.new(client_config)
|
|
26
|
+
client.query <<-SQL
|
|
27
|
+
CREATE DATABASE #{db.name};
|
|
28
|
+
GRANT ALL ON #{db.name}.* TO '#{db.configuration['username']}'@'#{db.configuration['host']}';
|
|
29
|
+
FLUSH PRIVILEGES;
|
|
30
|
+
SQL
|
|
31
|
+
else
|
|
32
|
+
raise "Unknown database type: #{db.type}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
puts "Test database \"#{db.name}\" created."
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
desc "Benchmark all available solutions to import CSV in Postgres database"
|
|
42
|
+
task :benchmark do
|
|
43
|
+
require_relative 'benchmark/benchmark'
|
|
44
|
+
end
|