csv_fast_importer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +36 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +15 -0
  5. data/CONTRIBUTING.md +24 -0
  6. data/Gemfile +3 -0
  7. data/Gemfile.lock +128 -0
  8. data/LICENSE +21 -0
  9. data/README.md +186 -0
  10. data/Rakefile +44 -0
  11. data/benchmark/NPRI-SubsDisp-Normalized-Since1993.csv +10000 -0
  12. data/benchmark/README.md +140 -0
  13. data/benchmark/benchmark.rb +26 -0
  14. data/benchmark/results.png +0 -0
  15. data/benchmark/results.xlsx +0 -0
  16. data/benchmark/strategies.rb +115 -0
  17. data/benchmark/tools.rb +61 -0
  18. data/csv_fast_importer.gemspec +42 -0
  19. data/lib/csv_fast_importer.rb +12 -0
  20. data/lib/csv_fast_importer/configuration.rb +57 -0
  21. data/lib/csv_fast_importer/database/mysql.rb +28 -0
  22. data/lib/csv_fast_importer/database/postgres.rb +36 -0
  23. data/lib/csv_fast_importer/database/queryable.rb +51 -0
  24. data/lib/csv_fast_importer/database_connection.rb +19 -0
  25. data/lib/csv_fast_importer/database_factory.rb +19 -0
  26. data/lib/csv_fast_importer/import.rb +58 -0
  27. data/lib/csv_fast_importer/version.rb +3 -0
  28. data/sample-app/.gitignore +10 -0
  29. data/sample-app/Gemfile +50 -0
  30. data/sample-app/Gemfile.lock +172 -0
  31. data/sample-app/README.md +23 -0
  32. data/sample-app/Rakefile +6 -0
  33. data/sample-app/app/assets/images/.keep +0 -0
  34. data/sample-app/app/assets/javascripts/application.js +16 -0
  35. data/sample-app/app/assets/stylesheets/application.css +15 -0
  36. data/sample-app/app/controllers/application_controller.rb +5 -0
  37. data/sample-app/app/controllers/concerns/.keep +0 -0
  38. data/sample-app/app/helpers/application_helper.rb +2 -0
  39. data/sample-app/app/mailers/.keep +0 -0
  40. data/sample-app/app/models/.keep +0 -0
  41. data/sample-app/app/models/concerns/.keep +0 -0
  42. data/sample-app/app/models/knight.rb +2 -0
  43. data/sample-app/app/views/layouts/application.html.erb +14 -0
  44. data/sample-app/bin/bundle +3 -0
  45. data/sample-app/bin/rails +9 -0
  46. data/sample-app/bin/rake +9 -0
  47. data/sample-app/bin/setup +29 -0
  48. data/sample-app/bin/spring +17 -0
  49. data/sample-app/config.ru +4 -0
  50. data/sample-app/config/application.rb +26 -0
  51. data/sample-app/config/boot.rb +3 -0
  52. data/sample-app/config/database.yml +21 -0
  53. data/sample-app/config/environment.rb +5 -0
  54. data/sample-app/config/environments/development.rb +41 -0
  55. data/sample-app/config/environments/production.rb +79 -0
  56. data/sample-app/config/environments/test.rb +42 -0
  57. data/sample-app/config/initializers/assets.rb +11 -0
  58. data/sample-app/config/initializers/backtrace_silencers.rb +7 -0
  59. data/sample-app/config/initializers/cookies_serializer.rb +3 -0
  60. data/sample-app/config/initializers/filter_parameter_logging.rb +4 -0
  61. data/sample-app/config/initializers/inflections.rb +16 -0
  62. data/sample-app/config/initializers/mime_types.rb +4 -0
  63. data/sample-app/config/initializers/session_store.rb +3 -0
  64. data/sample-app/config/initializers/wrap_parameters.rb +14 -0
  65. data/sample-app/config/locales/en.yml +23 -0
  66. data/sample-app/config/routes.rb +56 -0
  67. data/sample-app/config/secrets.yml +22 -0
  68. data/sample-app/db/development.sqlite3 +0 -0
  69. data/sample-app/db/migrate/20170818134706_create_knights.rb +8 -0
  70. data/sample-app/db/schema.rb +24 -0
  71. data/sample-app/db/seeds.rb +7 -0
  72. data/sample-app/knights.csv +3 -0
  73. data/sample-app/lib/assets/.keep +0 -0
  74. data/sample-app/lib/tasks/.keep +0 -0
  75. data/sample-app/lib/tasks/csv_fast_importer.rake +9 -0
  76. data/sample-app/log/.keep +0 -0
  77. data/sample-app/public/404.html +67 -0
  78. data/sample-app/public/422.html +67 -0
  79. data/sample-app/public/500.html +66 -0
  80. data/sample-app/public/favicon.ico +0 -0
  81. data/sample-app/public/robots.txt +5 -0
  82. data/sample-app/test/controllers/.keep +0 -0
  83. data/sample-app/test/fixtures/.keep +0 -0
  84. data/sample-app/test/fixtures/knights.yml +9 -0
  85. data/sample-app/test/helpers/.keep +0 -0
  86. data/sample-app/test/integration/.keep +0 -0
  87. data/sample-app/test/mailers/.keep +0 -0
  88. data/sample-app/test/models/.keep +0 -0
  89. data/sample-app/test/models/knight_test.rb +7 -0
  90. data/sample-app/test/test_helper.rb +10 -0
  91. metadata +331 -0
@@ -0,0 +1,28 @@
1
+ require_relative './queryable'
2
+
3
+ module CsvFastImporter
4
+ module Database
5
+ class Mysql < Queryable
6
+ identifier_quote_character '`'
7
+
8
+ def verify_compatibility(configuration)
9
+ raise 'Transactional not supported with MySQL database' if configuration.transactional_forced?
10
+ end
11
+
12
+ def bulk_import(file, table, columns, row_index_column: nil, column_separator:, encoding:)
13
+ columns_list_query = columns.map { |column| identify(column) }.join(',')
14
+ execute <<-SQL
15
+ LOAD DATA LOCAL INFILE '#{File.expand_path(file)}'
16
+ INTO TABLE #{identify(table)}
17
+ CHARACTER SET UTF8
18
+ FIELDS TERMINATED BY '#{column_separator}' OPTIONALLY ENCLOSED BY '"'
19
+ LINES TERMINATED BY '\\n'
20
+ IGNORE 1 LINES
21
+ (#{columns_list_query})
22
+ ;
23
+ SQL
24
+ query('SELECT ROW_COUNT()')
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,36 @@
1
+ require_relative './queryable'
2
+
3
+ module CsvFastImporter
4
+ module Database
5
+ class Postgres < Queryable
6
+ identifier_quote_character '"'
7
+
8
+ def verify_compatibility(configuration)
9
+ #TODO verify postgresql version
10
+ end
11
+
12
+ def bulk_import(file, table, columns, row_index_column: nil, column_separator:, encoding:)
13
+ sql_columns = columns
14
+ sql_columns = [row_index_column] + sql_columns unless row_index_column.nil?
15
+ columns_list_query = sql_columns.map { |column| identify(column) }
16
+ .join(',')
17
+
18
+ row_index = 0
19
+ connection.copy_data <<-SQL do
20
+ COPY #{identify(table)} (#{columns_list_query})
21
+ FROM STDIN
22
+ DELIMITER '#{column_separator}'
23
+ CSV
24
+ ENCODING '#{encoding}';
25
+ SQL
26
+ while line = file.gets do
27
+ row_index += 1
28
+ line.prepend row_index.to_s << column_separator unless row_index_column.nil?
29
+ connection.put_copy_data line
30
+ end
31
+ end
32
+ row_index
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,51 @@
1
+ module CsvFastImporter
2
+ module Database
3
+
4
+ # Inherit from this class to create new custom database implementation
5
+ # Do not forget to call .identifier_quote_character
6
+ class Queryable
7
+
8
+ def initialize(connection)
9
+ @connection = connection
10
+ end
11
+
12
+ # Character used around identifiers (table or column name) to handle special characters
13
+ def self.identifier_quote_character(character)
14
+ define_method "identify" do |identifier|
15
+ character + identifier + character
16
+ end
17
+ end
18
+
19
+ def identify(table_or_column)
20
+ raise '#identify method not available. #identifier_quote_character is certainly missing'
21
+ end
22
+
23
+ def connection
24
+ @connection.raw_connection
25
+ end
26
+
27
+ def execute(query)
28
+ @connection.execute query
29
+ end
30
+
31
+ def query(query)
32
+ @connection.select_value query
33
+ end
34
+
35
+ def transaction
36
+ @connection.transaction do
37
+ yield
38
+ end
39
+ end
40
+
41
+ def delete_all(table)
42
+ execute "DELETE FROM #{identify(table)}"
43
+ end
44
+
45
+ def truncate(table)
46
+ execute "TRUNCATE TABLE #{identify(table)}"
47
+ end
48
+
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ require 'active_record'
2
+
3
+ module CsvFastImporter
4
+
5
+ # Provide access to database driver/adapter
6
+ class DatabaseConnection
7
+
8
+ def self.adapter_name
9
+ @adapter_name ||= base_connection.adapter_name
10
+ .downcase
11
+ .to_sym
12
+ end
13
+
14
+ def self.base_connection
15
+ @base_connection ||= ActiveRecord::Base.connection
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ require_relative './database_connection'
2
+ require_relative './database/mysql'
3
+ require_relative './database/postgres'
4
+
5
+ module CsvFastImporter
6
+
7
+ # Build corresponding CsvFastImporter database implementation based current adapter (fetched from DatabaseConnection)
8
+ module DatabaseFactory
9
+ DATABASES = { postgresql: CsvFastImporter::Database::Postgres,
10
+ mysql2: CsvFastImporter::Database::Mysql
11
+ }
12
+
13
+ def self.build
14
+ adapter = CsvFastImporter::DatabaseConnection.adapter_name
15
+ return DATABASES[adapter].new(CsvFastImporter::DatabaseConnection.base_connection) if DATABASES.has_key?(adapter)
16
+ raise "Database adapter #{adapter} not supported by CsvFastImporter. Only #{DATABASES.keys.join(", ")} are supported"
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,58 @@
1
+ require_relative './database_factory'
2
+
3
+ module CsvFastImporter
4
+
5
+ # Responsible for the main process
6
+ class Import
7
+
8
+ def initialize(configuration)
9
+ @configuration = configuration
10
+ end
11
+
12
+ def run
13
+ @db = CsvFastImporter::DatabaseFactory.build
14
+ @db.verify_compatibility @configuration
15
+
16
+ row_index = 0
17
+ within_transaction_if(@configuration.transactional?) do
18
+ table = @configuration.destination_table
19
+ columns = db_columns(@configuration)
20
+ if @configuration.deletion?
21
+ if @configuration.truncate?
22
+ @db.truncate table
23
+ else
24
+ @db.delete_all table
25
+ end
26
+ end
27
+ row_index = @db.bulk_import(@configuration.file,
28
+ table,
29
+ columns,
30
+ row_index_column: @configuration.row_index_column,
31
+ column_separator: @configuration.column_separator,
32
+ encoding: @configuration.encoding)
33
+ end
34
+ row_index
35
+ end
36
+
37
+ def db_columns(configuration)
38
+ file_columns = configuration.file
39
+ .gets
40
+ .split(configuration.column_separator)
41
+ .map(&:strip)
42
+ db_columns = file_columns.map(&:downcase)
43
+ .map { |column| configuration.mapping[column] || column }
44
+ db_columns
45
+ end
46
+
47
+ def within_transaction_if(transactional)
48
+ if transactional
49
+ @db.transaction do
50
+ yield
51
+ end
52
+ else
53
+ yield
54
+ end
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,3 @@
1
+ module CSVFastImporter
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,10 @@
1
+ # Ignore bundler config.
2
+ .bundle
3
+
4
+ # Ignore all logfiles and tempfiles.
5
+ log/*
6
+ !/log/.keep
7
+ tmp
8
+
9
+ # Ignore cache
10
+ vendor/cache
@@ -0,0 +1,50 @@
1
+ source 'https://rubygems.org'
2
+
3
+
4
+ # Bundle edge Rails instead: gem 'rails', github: 'rails/rails'
5
+ gem 'rails', '4.2.6'
6
+ # Use sqlite3 as the database for Active Record
7
+ #gem 'sqlite3'
8
+ gem 'pg'
9
+ # Use SCSS for stylesheets
10
+ gem 'sass-rails', '~> 5.0'
11
+ # Use Uglifier as compressor for JavaScript assets
12
+ gem 'uglifier', '>= 1.3.0'
13
+ # Use CoffeeScript for .coffee assets and views
14
+ gem 'coffee-rails', '~> 4.1.0'
15
+ # See https://github.com/rails/execjs#readme for more supported runtimes
16
+ # gem 'therubyracer', platforms: :ruby
17
+
18
+ # Use jquery as the JavaScript library
19
+ gem 'jquery-rails'
20
+ # Turbolinks makes following links in your web application faster. Read more: https://github.com/rails/turbolinks
21
+ gem 'turbolinks'
22
+ # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder
23
+ gem 'jbuilder', '~> 2.0'
24
+ # bundle exec rake doc:rails generates the API under doc/api.
25
+ gem 'sdoc', '~> 0.4.0', group: :doc
26
+
27
+ gem 'csv_fast_importer'
28
+
29
+ # Use ActiveModel has_secure_password
30
+ # gem 'bcrypt', '~> 3.1.7'
31
+
32
+ # Use Unicorn as the app server
33
+ # gem 'unicorn'
34
+
35
+ # Use Capistrano for deployment
36
+ # gem 'capistrano-rails', group: :development
37
+
38
+ group :development, :test do
39
+ # Call 'byebug' anywhere in the code to stop execution and get a debugger console
40
+ gem 'byebug'
41
+ end
42
+
43
+ group :development do
44
+ # Access an IRB console on exception pages or by using <%= console %> in views
45
+ gem 'web-console', '~> 2.0'
46
+
47
+ # Spring speeds up development by keeping your application running in the background. Read more: https://github.com/rails/spring
48
+ gem 'spring'
49
+ end
50
+
@@ -0,0 +1,172 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ actionmailer (4.2.6)
5
+ actionpack (= 4.2.6)
6
+ actionview (= 4.2.6)
7
+ activejob (= 4.2.6)
8
+ mail (~> 2.5, >= 2.5.4)
9
+ rails-dom-testing (~> 1.0, >= 1.0.5)
10
+ actionpack (4.2.6)
11
+ actionview (= 4.2.6)
12
+ activesupport (= 4.2.6)
13
+ rack (~> 1.6)
14
+ rack-test (~> 0.6.2)
15
+ rails-dom-testing (~> 1.0, >= 1.0.5)
16
+ rails-html-sanitizer (~> 1.0, >= 1.0.2)
17
+ actionview (4.2.6)
18
+ activesupport (= 4.2.6)
19
+ builder (~> 3.1)
20
+ erubis (~> 2.7.0)
21
+ rails-dom-testing (~> 1.0, >= 1.0.5)
22
+ rails-html-sanitizer (~> 1.0, >= 1.0.2)
23
+ activejob (4.2.6)
24
+ activesupport (= 4.2.6)
25
+ globalid (>= 0.3.0)
26
+ activemodel (4.2.6)
27
+ activesupport (= 4.2.6)
28
+ builder (~> 3.1)
29
+ activerecord (4.2.6)
30
+ activemodel (= 4.2.6)
31
+ activesupport (= 4.2.6)
32
+ arel (~> 6.0)
33
+ activesupport (4.2.6)
34
+ i18n (~> 0.7)
35
+ json (~> 1.7, >= 1.7.7)
36
+ minitest (~> 5.1)
37
+ thread_safe (~> 0.3, >= 0.3.4)
38
+ tzinfo (~> 1.1)
39
+ arel (6.0.4)
40
+ binding_of_caller (0.7.2)
41
+ debug_inspector (>= 0.0.1)
42
+ builder (3.2.3)
43
+ byebug (9.0.6)
44
+ coffee-rails (4.1.1)
45
+ coffee-script (>= 2.2.0)
46
+ railties (>= 4.0.0, < 5.1.x)
47
+ coffee-script (2.4.1)
48
+ coffee-script-source
49
+ execjs
50
+ coffee-script-source (1.12.2)
51
+ concurrent-ruby (1.0.5)
52
+ csv_fast_importer (1.0.0)
53
+ activerecord (>= 3.0)
54
+ debug_inspector (0.0.3)
55
+ erubis (2.7.0)
56
+ execjs (2.7.0)
57
+ ffi (1.9.18)
58
+ globalid (0.4.0)
59
+ activesupport (>= 4.2.0)
60
+ i18n (0.8.6)
61
+ jbuilder (2.7.0)
62
+ activesupport (>= 4.2.0)
63
+ multi_json (>= 1.2)
64
+ jquery-rails (4.3.1)
65
+ rails-dom-testing (>= 1, < 3)
66
+ railties (>= 4.2.0)
67
+ thor (>= 0.14, < 2.0)
68
+ json (1.8.6)
69
+ loofah (2.0.3)
70
+ nokogiri (>= 1.5.9)
71
+ mail (2.6.6)
72
+ mime-types (>= 1.16, < 4)
73
+ mime-types (3.1)
74
+ mime-types-data (~> 3.2015)
75
+ mime-types-data (3.2016.0521)
76
+ mini_portile2 (2.2.0)
77
+ minitest (5.10.3)
78
+ multi_json (1.12.1)
79
+ nokogiri (1.8.0)
80
+ mini_portile2 (~> 2.2.0)
81
+ pg (0.19.0)
82
+ rack (1.6.8)
83
+ rack-test (0.6.3)
84
+ rack (>= 1.0)
85
+ rails (4.2.6)
86
+ actionmailer (= 4.2.6)
87
+ actionpack (= 4.2.6)
88
+ actionview (= 4.2.6)
89
+ activejob (= 4.2.6)
90
+ activemodel (= 4.2.6)
91
+ activerecord (= 4.2.6)
92
+ activesupport (= 4.2.6)
93
+ bundler (>= 1.3.0, < 2.0)
94
+ railties (= 4.2.6)
95
+ sprockets-rails
96
+ rails-deprecated_sanitizer (1.0.3)
97
+ activesupport (>= 4.2.0.alpha)
98
+ rails-dom-testing (1.0.8)
99
+ activesupport (>= 4.2.0.beta, < 5.0)
100
+ nokogiri (~> 1.6)
101
+ rails-deprecated_sanitizer (>= 1.0.1)
102
+ rails-html-sanitizer (1.0.3)
103
+ loofah (~> 2.0)
104
+ railties (4.2.6)
105
+ actionpack (= 4.2.6)
106
+ activesupport (= 4.2.6)
107
+ rake (>= 0.8.7)
108
+ thor (>= 0.18.1, < 2.0)
109
+ rake (12.0.0)
110
+ rb-fsevent (0.10.2)
111
+ rb-inotify (0.9.10)
112
+ ffi (>= 0.5.0, < 2)
113
+ rdoc (4.3.0)
114
+ sass (3.5.1)
115
+ sass-listen (~> 4.0.0)
116
+ sass-listen (4.0.0)
117
+ rb-fsevent (~> 0.9, >= 0.9.4)
118
+ rb-inotify (~> 0.9, >= 0.9.7)
119
+ sass-rails (5.0.6)
120
+ railties (>= 4.0.0, < 6)
121
+ sass (~> 3.1)
122
+ sprockets (>= 2.8, < 4.0)
123
+ sprockets-rails (>= 2.0, < 4.0)
124
+ tilt (>= 1.1, < 3)
125
+ sdoc (0.4.2)
126
+ json (~> 1.7, >= 1.7.7)
127
+ rdoc (~> 4.0)
128
+ spring (2.0.2)
129
+ activesupport (>= 4.2)
130
+ sprockets (3.7.1)
131
+ concurrent-ruby (~> 1.0)
132
+ rack (> 1, < 3)
133
+ sprockets-rails (3.2.0)
134
+ actionpack (>= 4.0)
135
+ activesupport (>= 4.0)
136
+ sprockets (>= 3.0.0)
137
+ thor (0.20.0)
138
+ thread_safe (0.3.6)
139
+ tilt (2.0.8)
140
+ turbolinks (5.0.1)
141
+ turbolinks-source (~> 5)
142
+ turbolinks-source (5.0.3)
143
+ tzinfo (1.2.3)
144
+ thread_safe (~> 0.1)
145
+ uglifier (3.2.0)
146
+ execjs (>= 0.3.0, < 3)
147
+ web-console (2.3.0)
148
+ activemodel (>= 4.0)
149
+ binding_of_caller (>= 0.7.2)
150
+ railties (>= 4.0)
151
+ sprockets-rails (>= 2.0, < 4.0)
152
+
153
+ PLATFORMS
154
+ ruby
155
+
156
+ DEPENDENCIES
157
+ byebug
158
+ coffee-rails (~> 4.1.0)
159
+ csv_fast_importer
160
+ jbuilder (~> 2.0)
161
+ jquery-rails
162
+ pg
163
+ rails (= 4.2.6)
164
+ sass-rails (~> 5.0)
165
+ sdoc (~> 0.4.0)
166
+ spring
167
+ turbolinks
168
+ uglifier (>= 1.3.0)
169
+ web-console (~> 2.0)
170
+
171
+ BUNDLED WITH
172
+ 1.13.2