activerecord-import 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +32 -0
  3. data/.rubocop.yml +49 -0
  4. data/.rubocop_todo.yml +36 -0
  5. data/.travis.yml +74 -0
  6. data/Brewfile +3 -0
  7. data/CHANGELOG.md +430 -0
  8. data/Gemfile +59 -0
  9. data/LICENSE +56 -0
  10. data/README.markdown +619 -0
  11. data/Rakefile +68 -0
  12. data/activerecord-import.gemspec +23 -0
  13. data/benchmarks/README +32 -0
  14. data/benchmarks/benchmark.rb +68 -0
  15. data/benchmarks/lib/base.rb +138 -0
  16. data/benchmarks/lib/cli_parser.rb +107 -0
  17. data/benchmarks/lib/float.rb +15 -0
  18. data/benchmarks/lib/mysql2_benchmark.rb +19 -0
  19. data/benchmarks/lib/output_to_csv.rb +19 -0
  20. data/benchmarks/lib/output_to_html.rb +64 -0
  21. data/benchmarks/models/test_innodb.rb +3 -0
  22. data/benchmarks/models/test_memory.rb +3 -0
  23. data/benchmarks/models/test_myisam.rb +3 -0
  24. data/benchmarks/schema/mysql_schema.rb +16 -0
  25. data/gemfiles/3.2.gemfile +2 -0
  26. data/gemfiles/4.0.gemfile +2 -0
  27. data/gemfiles/4.1.gemfile +2 -0
  28. data/gemfiles/4.2.gemfile +2 -0
  29. data/gemfiles/5.0.gemfile +2 -0
  30. data/gemfiles/5.1.gemfile +2 -0
  31. data/gemfiles/5.2.gemfile +2 -0
  32. data/gemfiles/6.0.gemfile +1 -0
  33. data/gemfiles/6.1.gemfile +1 -0
  34. data/lib/activerecord-import.rb +6 -0
  35. data/lib/activerecord-import/active_record/adapters/abstract_adapter.rb +9 -0
  36. data/lib/activerecord-import/active_record/adapters/jdbcmysql_adapter.rb +6 -0
  37. data/lib/activerecord-import/active_record/adapters/jdbcpostgresql_adapter.rb +6 -0
  38. data/lib/activerecord-import/active_record/adapters/jdbcsqlite3_adapter.rb +6 -0
  39. data/lib/activerecord-import/active_record/adapters/mysql2_adapter.rb +6 -0
  40. data/lib/activerecord-import/active_record/adapters/postgresql_adapter.rb +6 -0
  41. data/lib/activerecord-import/active_record/adapters/seamless_database_pool_adapter.rb +7 -0
  42. data/lib/activerecord-import/active_record/adapters/sqlite3_adapter.rb +6 -0
  43. data/lib/activerecord-import/adapters/abstract_adapter.rb +66 -0
  44. data/lib/activerecord-import/adapters/em_mysql2_adapter.rb +5 -0
  45. data/lib/activerecord-import/adapters/mysql2_adapter.rb +5 -0
  46. data/lib/activerecord-import/adapters/mysql_adapter.rb +129 -0
  47. data/lib/activerecord-import/adapters/postgresql_adapter.rb +217 -0
  48. data/lib/activerecord-import/adapters/sqlite3_adapter.rb +180 -0
  49. data/lib/activerecord-import/base.rb +43 -0
  50. data/lib/activerecord-import/import.rb +1059 -0
  51. data/lib/activerecord-import/mysql2.rb +7 -0
  52. data/lib/activerecord-import/postgresql.rb +7 -0
  53. data/lib/activerecord-import/sqlite3.rb +7 -0
  54. data/lib/activerecord-import/synchronize.rb +66 -0
  55. data/lib/activerecord-import/value_sets_parser.rb +77 -0
  56. data/lib/activerecord-import/version.rb +5 -0
  57. data/test/adapters/jdbcmysql.rb +1 -0
  58. data/test/adapters/jdbcpostgresql.rb +1 -0
  59. data/test/adapters/jdbcsqlite3.rb +1 -0
  60. data/test/adapters/makara_postgis.rb +1 -0
  61. data/test/adapters/mysql2.rb +1 -0
  62. data/test/adapters/mysql2_makara.rb +1 -0
  63. data/test/adapters/mysql2spatial.rb +1 -0
  64. data/test/adapters/postgis.rb +1 -0
  65. data/test/adapters/postgresql.rb +1 -0
  66. data/test/adapters/postgresql_makara.rb +1 -0
  67. data/test/adapters/seamless_database_pool.rb +1 -0
  68. data/test/adapters/spatialite.rb +1 -0
  69. data/test/adapters/sqlite3.rb +1 -0
  70. data/test/database.yml.sample +52 -0
  71. data/test/import_test.rb +903 -0
  72. data/test/jdbcmysql/import_test.rb +5 -0
  73. data/test/jdbcpostgresql/import_test.rb +4 -0
  74. data/test/jdbcsqlite3/import_test.rb +4 -0
  75. data/test/makara_postgis/import_test.rb +8 -0
  76. data/test/models/account.rb +3 -0
  77. data/test/models/alarm.rb +2 -0
  78. data/test/models/bike_maker.rb +7 -0
  79. data/test/models/book.rb +9 -0
  80. data/test/models/car.rb +3 -0
  81. data/test/models/chapter.rb +4 -0
  82. data/test/models/dictionary.rb +4 -0
  83. data/test/models/discount.rb +3 -0
  84. data/test/models/end_note.rb +4 -0
  85. data/test/models/group.rb +3 -0
  86. data/test/models/promotion.rb +3 -0
  87. data/test/models/question.rb +3 -0
  88. data/test/models/rule.rb +3 -0
  89. data/test/models/tag.rb +4 -0
  90. data/test/models/topic.rb +23 -0
  91. data/test/models/user.rb +3 -0
  92. data/test/models/user_token.rb +4 -0
  93. data/test/models/vendor.rb +7 -0
  94. data/test/models/widget.rb +24 -0
  95. data/test/mysql2/import_test.rb +5 -0
  96. data/test/mysql2_makara/import_test.rb +6 -0
  97. data/test/mysqlspatial2/import_test.rb +6 -0
  98. data/test/postgis/import_test.rb +8 -0
  99. data/test/postgresql/import_test.rb +4 -0
  100. data/test/schema/generic_schema.rb +194 -0
  101. data/test/schema/jdbcpostgresql_schema.rb +1 -0
  102. data/test/schema/mysql2_schema.rb +19 -0
  103. data/test/schema/postgis_schema.rb +1 -0
  104. data/test/schema/postgresql_schema.rb +47 -0
  105. data/test/schema/sqlite3_schema.rb +13 -0
  106. data/test/schema/version.rb +10 -0
  107. data/test/sqlite3/import_test.rb +4 -0
  108. data/test/support/active_support/test_case_extensions.rb +75 -0
  109. data/test/support/assertions.rb +73 -0
  110. data/test/support/factories.rb +64 -0
  111. data/test/support/generate.rb +29 -0
  112. data/test/support/mysql/import_examples.rb +98 -0
  113. data/test/support/postgresql/import_examples.rb +563 -0
  114. data/test/support/shared_examples/on_duplicate_key_ignore.rb +43 -0
  115. data/test/support/shared_examples/on_duplicate_key_update.rb +368 -0
  116. data/test/support/shared_examples/recursive_import.rb +216 -0
  117. data/test/support/sqlite3/import_examples.rb +231 -0
  118. data/test/synchronize_test.rb +41 -0
  119. data/test/test_helper.rb +75 -0
  120. data/test/travis/database.yml +66 -0
  121. data/test/value_sets_bytes_parser_test.rb +104 -0
  122. data/test/value_sets_records_parser_test.rb +32 -0
  123. metadata +259 -0
@@ -0,0 +1,217 @@
1
+ module ActiveRecord::Import::PostgreSQLAdapter
2
+ include ActiveRecord::Import::ImportSupport
3
+ include ActiveRecord::Import::OnDuplicateKeyUpdateSupport
4
+
5
+ MIN_VERSION_FOR_UPSERT = 90_500
6
+
7
+ def insert_many( sql, values, options = {}, *args ) # :nodoc:
8
+ number_of_inserts = 1
9
+ returned_values = []
10
+ ids = []
11
+ results = []
12
+
13
+ base_sql, post_sql = if sql.is_a?( String )
14
+ [sql, '']
15
+ elsif sql.is_a?( Array )
16
+ [sql.shift, sql.join( ' ' )]
17
+ end
18
+
19
+ sql2insert = base_sql + values.join( ',' ) + post_sql
20
+
21
+ columns = returning_columns(options)
22
+ if columns.blank? || (options[:no_returning] && !options[:recursive])
23
+ insert( sql2insert, *args )
24
+ else
25
+ returned_values = if columns.size > 1
26
+ # Select composite columns
27
+ select_rows( sql2insert, *args )
28
+ else
29
+ select_values( sql2insert, *args )
30
+ end
31
+ query_cache.clear if query_cache_enabled
32
+ end
33
+
34
+ if options[:returning].blank?
35
+ ids = returned_values
36
+ elsif options[:primary_key].blank?
37
+ results = returned_values
38
+ else
39
+ # split primary key and returning columns
40
+ ids, results = split_ids_and_results(returned_values, columns, options)
41
+ end
42
+
43
+ ActiveRecord::Import::Result.new([], number_of_inserts, ids, results)
44
+ end
45
+
46
+ def split_ids_and_results(values, columns, options)
47
+ ids = []
48
+ results = []
49
+ id_indexes = Array(options[:primary_key]).map { |key| columns.index(key) }
50
+ returning_indexes = Array(options[:returning]).map { |key| columns.index(key) }
51
+
52
+ values.each do |value|
53
+ value_array = Array(value)
54
+ ids << id_indexes.map { |i| value_array[i] }
55
+ results << returning_indexes.map { |i| value_array[i] }
56
+ end
57
+
58
+ ids.map!(&:first) if id_indexes.size == 1
59
+ results.map!(&:first) if returning_indexes.size == 1
60
+
61
+ [ids, results]
62
+ end
63
+
64
+ def next_value_for_sequence(sequence_name)
65
+ %{nextval('#{sequence_name}')}
66
+ end
67
+
68
+ def post_sql_statements( table_name, options ) # :nodoc:
69
+ sql = []
70
+
71
+ if supports_on_duplicate_key_update?
72
+ # Options :recursive and :on_duplicate_key_ignore are mutually exclusive
73
+ if (options[:ignore] || options[:on_duplicate_key_ignore]) && !options[:on_duplicate_key_update] && !options[:recursive]
74
+ sql << sql_for_on_duplicate_key_ignore( table_name, options[:on_duplicate_key_ignore] )
75
+ end
76
+ elsif logger && options[:on_duplicate_key_ignore] && !options[:on_duplicate_key_update]
77
+ logger.warn "Ignoring on_duplicate_key_ignore because it is not supported by the database."
78
+ end
79
+
80
+ sql += super(table_name, options)
81
+
82
+ columns = returning_columns(options)
83
+ unless columns.blank? || (options[:no_returning] && !options[:recursive])
84
+ sql << " RETURNING \"#{columns.join('", "')}\""
85
+ end
86
+
87
+ sql
88
+ end
89
+
90
+ def returning_columns(options)
91
+ columns = []
92
+ columns += Array(options[:primary_key]) if options[:primary_key].present?
93
+ columns |= Array(options[:returning]) if options[:returning].present?
94
+ columns
95
+ end
96
+
97
+ # Add a column to be updated on duplicate key update
98
+ def add_column_for_on_duplicate_key_update( column, options = {} ) # :nodoc:
99
+ arg = options[:on_duplicate_key_update]
100
+ if arg.is_a?( Hash )
101
+ columns = arg.fetch( :columns ) { arg[:columns] = [] }
102
+ case columns
103
+ when Array then columns << column.to_sym unless columns.include?( column.to_sym )
104
+ when Hash then columns[column.to_sym] = column.to_sym
105
+ end
106
+ elsif arg.is_a?( Array )
107
+ arg << column.to_sym unless arg.include?( column.to_sym )
108
+ end
109
+ end
110
+
111
+ # Returns a generated ON CONFLICT DO NOTHING statement given the passed
112
+ # in +args+.
113
+ def sql_for_on_duplicate_key_ignore( table_name, *args ) # :nodoc:
114
+ arg = args.first
115
+ conflict_target = sql_for_conflict_target( arg ) if arg.is_a?( Hash )
116
+ " ON CONFLICT #{conflict_target}DO NOTHING"
117
+ end
118
+
119
+ # Returns a generated ON CONFLICT DO UPDATE statement given the passed
120
+ # in +args+.
121
+ def sql_for_on_duplicate_key_update( table_name, *args ) # :nodoc:
122
+ arg, primary_key, locking_column = args
123
+ arg = { columns: arg } if arg.is_a?( Array ) || arg.is_a?( String )
124
+ return unless arg.is_a?( Hash )
125
+
126
+ sql = ' ON CONFLICT '
127
+ conflict_target = sql_for_conflict_target( arg )
128
+
129
+ columns = arg.fetch( :columns, [] )
130
+ condition = arg[:condition]
131
+ if columns.respond_to?( :empty? ) && columns.empty?
132
+ return sql << "#{conflict_target}DO NOTHING"
133
+ end
134
+
135
+ conflict_target ||= sql_for_default_conflict_target( table_name, primary_key )
136
+ unless conflict_target
137
+ raise ArgumentError, 'Expected :conflict_target or :constraint_name to be specified'
138
+ end
139
+
140
+ sql << "#{conflict_target}DO UPDATE SET "
141
+ if columns.is_a?( Array )
142
+ sql << sql_for_on_duplicate_key_update_as_array( table_name, locking_column, columns )
143
+ elsif columns.is_a?( Hash )
144
+ sql << sql_for_on_duplicate_key_update_as_hash( table_name, locking_column, columns )
145
+ elsif columns.is_a?( String )
146
+ sql << columns
147
+ else
148
+ raise ArgumentError, 'Expected :columns to be an Array or Hash'
149
+ end
150
+
151
+ sql << " WHERE #{condition}" if condition.present?
152
+
153
+ sql
154
+ end
155
+
156
+ def sql_for_on_duplicate_key_update_as_array( table_name, locking_column, arr ) # :nodoc:
157
+ results = arr.map do |column|
158
+ qc = quote_column_name( column )
159
+ "#{qc}=EXCLUDED.#{qc}"
160
+ end
161
+ increment_locking_column!(results, locking_column)
162
+ results.join( ',' )
163
+ end
164
+
165
+ def sql_for_on_duplicate_key_update_as_hash( table_name, locking_column, hsh ) # :nodoc:
166
+ results = hsh.map do |column1, column2|
167
+ qc1 = quote_column_name( column1 )
168
+ qc2 = quote_column_name( column2 )
169
+ "#{qc1}=EXCLUDED.#{qc2}"
170
+ end
171
+ increment_locking_column!(results, locking_column)
172
+ results.join( ',' )
173
+ end
174
+
175
+ def sql_for_conflict_target( args = {} )
176
+ constraint_name = args[:constraint_name]
177
+ conflict_target = args[:conflict_target]
178
+ index_predicate = args[:index_predicate]
179
+ if constraint_name.present?
180
+ "ON CONSTRAINT #{constraint_name} "
181
+ elsif conflict_target.present?
182
+ '(' << Array( conflict_target ).reject( &:blank? ).join( ', ' ) << ') '.tap do |sql|
183
+ sql << "WHERE #{index_predicate} " if index_predicate
184
+ end
185
+ end
186
+ end
187
+
188
+ def sql_for_default_conflict_target( table_name, primary_key )
189
+ conflict_target = Array(primary_key).join(', ')
190
+ "(#{conflict_target}) " if conflict_target.present?
191
+ end
192
+
193
+ # Return true if the statement is a duplicate key record error
194
+ def duplicate_key_update_error?(exception) # :nodoc:
195
+ exception.is_a?(ActiveRecord::StatementInvalid) && exception.to_s.include?('duplicate key')
196
+ end
197
+
198
+ def supports_on_duplicate_key_update?
199
+ database_version >= MIN_VERSION_FOR_UPSERT
200
+ end
201
+
202
+ def supports_setting_primary_key_of_imported_objects?
203
+ true
204
+ end
205
+
206
+ def increment_locking_column!(results, locking_column)
207
+ if locking_column.present?
208
+ results << "\"#{locking_column}\"=EXCLUDED.\"#{locking_column}\"+1"
209
+ end
210
+ end
211
+
212
+ private
213
+
214
+ def database_version
215
+ defined?(postgresql_version) ? postgresql_version : super
216
+ end
217
+ end
@@ -0,0 +1,180 @@
1
+ module ActiveRecord::Import::SQLite3Adapter
2
+ include ActiveRecord::Import::ImportSupport
3
+ include ActiveRecord::Import::OnDuplicateKeyUpdateSupport
4
+
5
+ MIN_VERSION_FOR_IMPORT = "3.7.11".freeze
6
+ MIN_VERSION_FOR_UPSERT = "3.24.0".freeze
7
+ SQLITE_LIMIT_COMPOUND_SELECT = 500
8
+
9
+ # Override our conformance to ActiveRecord::Import::ImportSupport interface
10
+ # to ensure that we only support import in supported version of SQLite.
11
+ # Which INSERT statements with multiple value sets was introduced in 3.7.11.
12
+ def supports_import?
13
+ database_version >= MIN_VERSION_FOR_IMPORT
14
+ end
15
+
16
+ def supports_on_duplicate_key_update?
17
+ database_version >= MIN_VERSION_FOR_UPSERT
18
+ end
19
+
20
+ # +sql+ can be a single string or an array. If it is an array all
21
+ # elements that are in position >= 1 will be appended to the final SQL.
22
+ def insert_many( sql, values, _options = {}, *args ) # :nodoc:
23
+ number_of_inserts = 0
24
+
25
+ base_sql, post_sql = if sql.is_a?( String )
26
+ [sql, '']
27
+ elsif sql.is_a?( Array )
28
+ [sql.shift, sql.join( ' ' )]
29
+ end
30
+
31
+ value_sets = ::ActiveRecord::Import::ValueSetsRecordsParser.parse(values,
32
+ max_records: SQLITE_LIMIT_COMPOUND_SELECT)
33
+
34
+ transaction(requires_new: true) do
35
+ value_sets.each do |value_set|
36
+ number_of_inserts += 1
37
+ sql2insert = base_sql + value_set.join( ',' ) + post_sql
38
+ insert( sql2insert, *args )
39
+ end
40
+ end
41
+
42
+ ActiveRecord::Import::Result.new([], number_of_inserts, [], [])
43
+ end
44
+
45
+ def pre_sql_statements( options )
46
+ sql = []
47
+ # Options :recursive and :on_duplicate_key_ignore are mutually exclusive
48
+ if !supports_on_duplicate_key_update? && (options[:ignore] || options[:on_duplicate_key_ignore])
49
+ sql << "OR IGNORE"
50
+ end
51
+ sql + super
52
+ end
53
+
54
+ def post_sql_statements( table_name, options ) # :nodoc:
55
+ sql = []
56
+
57
+ if supports_on_duplicate_key_update?
58
+ # Options :recursive and :on_duplicate_key_ignore are mutually exclusive
59
+ if (options[:ignore] || options[:on_duplicate_key_ignore]) && !options[:on_duplicate_key_update]
60
+ sql << sql_for_on_duplicate_key_ignore( options[:on_duplicate_key_ignore] )
61
+ end
62
+ end
63
+
64
+ sql + super
65
+ end
66
+
67
+ def next_value_for_sequence(sequence_name)
68
+ %{nextval('#{sequence_name}')}
69
+ end
70
+
71
+ # Add a column to be updated on duplicate key update
72
+ def add_column_for_on_duplicate_key_update( column, options = {} ) # :nodoc:
73
+ arg = options[:on_duplicate_key_update]
74
+ if arg.is_a?( Hash )
75
+ columns = arg.fetch( :columns ) { arg[:columns] = [] }
76
+ case columns
77
+ when Array then columns << column.to_sym unless columns.include?( column.to_sym )
78
+ when Hash then columns[column.to_sym] = column.to_sym
79
+ end
80
+ elsif arg.is_a?( Array )
81
+ arg << column.to_sym unless arg.include?( column.to_sym )
82
+ end
83
+ end
84
+
85
+ # Returns a generated ON CONFLICT DO NOTHING statement given the passed
86
+ # in +args+.
87
+ def sql_for_on_duplicate_key_ignore( *args ) # :nodoc:
88
+ arg = args.first
89
+ conflict_target = sql_for_conflict_target( arg ) if arg.is_a?( Hash )
90
+ " ON CONFLICT #{conflict_target}DO NOTHING"
91
+ end
92
+
93
+ # Returns a generated ON CONFLICT DO UPDATE statement given the passed
94
+ # in +args+.
95
+ def sql_for_on_duplicate_key_update( _table_name, *args ) # :nodoc:
96
+ arg, primary_key, locking_column = args
97
+ arg = { columns: arg } if arg.is_a?( Array ) || arg.is_a?( String )
98
+ return unless arg.is_a?( Hash )
99
+
100
+ sql = ' ON CONFLICT '
101
+ conflict_target = sql_for_conflict_target( arg )
102
+
103
+ columns = arg.fetch( :columns, [] )
104
+ condition = arg[:condition]
105
+ if columns.respond_to?( :empty? ) && columns.empty?
106
+ return sql << "#{conflict_target}DO NOTHING"
107
+ end
108
+
109
+ conflict_target ||= sql_for_default_conflict_target( primary_key )
110
+ unless conflict_target
111
+ raise ArgumentError, 'Expected :conflict_target to be specified'
112
+ end
113
+
114
+ sql << "#{conflict_target}DO UPDATE SET "
115
+ if columns.is_a?( Array )
116
+ sql << sql_for_on_duplicate_key_update_as_array( locking_column, columns )
117
+ elsif columns.is_a?( Hash )
118
+ sql << sql_for_on_duplicate_key_update_as_hash( locking_column, columns )
119
+ elsif columns.is_a?( String )
120
+ sql << columns
121
+ else
122
+ raise ArgumentError, 'Expected :columns to be an Array or Hash'
123
+ end
124
+
125
+ sql << " WHERE #{condition}" if condition.present?
126
+
127
+ sql
128
+ end
129
+
130
+ def sql_for_on_duplicate_key_update_as_array( locking_column, arr ) # :nodoc:
131
+ results = arr.map do |column|
132
+ qc = quote_column_name( column )
133
+ "#{qc}=EXCLUDED.#{qc}"
134
+ end
135
+ increment_locking_column!(results, locking_column)
136
+ results.join( ',' )
137
+ end
138
+
139
+ def sql_for_on_duplicate_key_update_as_hash( locking_column, hsh ) # :nodoc:
140
+ results = hsh.map do |column1, column2|
141
+ qc1 = quote_column_name( column1 )
142
+ qc2 = quote_column_name( column2 )
143
+ "#{qc1}=EXCLUDED.#{qc2}"
144
+ end
145
+ increment_locking_column!(results, locking_column)
146
+ results.join( ',' )
147
+ end
148
+
149
+ def sql_for_conflict_target( args = {} )
150
+ conflict_target = args[:conflict_target]
151
+ index_predicate = args[:index_predicate]
152
+ if conflict_target.present?
153
+ '(' << Array( conflict_target ).reject( &:blank? ).join( ', ' ) << ') '.tap do |sql|
154
+ sql << "WHERE #{index_predicate} " if index_predicate
155
+ end
156
+ end
157
+ end
158
+
159
+ def sql_for_default_conflict_target( primary_key )
160
+ conflict_target = Array(primary_key).join(', ')
161
+ "(#{conflict_target}) " if conflict_target.present?
162
+ end
163
+
164
+ # Return true if the statement is a duplicate key record error
165
+ def duplicate_key_update_error?(exception) # :nodoc:
166
+ exception.is_a?(ActiveRecord::StatementInvalid) && exception.to_s.include?('duplicate key')
167
+ end
168
+
169
+ def increment_locking_column!(results, locking_column)
170
+ if locking_column.present?
171
+ results << "\"#{locking_column}\"=EXCLUDED.\"#{locking_column}\"+1"
172
+ end
173
+ end
174
+
175
+ private
176
+
177
+ def database_version
178
+ defined?(sqlite_version) ? sqlite_version : super
179
+ end
180
+ end
@@ -0,0 +1,43 @@
1
+ require "pathname"
2
+ require "active_record"
3
+ require "active_record/version"
4
+
5
+ module ActiveRecord::Import
6
+ ADAPTER_PATH = "activerecord-import/active_record/adapters".freeze
7
+
8
+ def self.base_adapter(adapter)
9
+ case adapter
10
+ when 'mysql2_makara' then 'mysql2'
11
+ when 'mysql2spatial' then 'mysql2'
12
+ when 'spatialite' then 'sqlite3'
13
+ when 'postgresql_makara' then 'postgresql'
14
+ when 'makara_postgis' then 'postgresql'
15
+ when 'postgis' then 'postgresql'
16
+ when 'cockroachdb' then 'postgresql'
17
+ else adapter
18
+ end
19
+ end
20
+
21
+ # Loads the import functionality for a specific database adapter
22
+ def self.require_adapter(adapter)
23
+ require File.join(ADAPTER_PATH, "/#{base_adapter(adapter)}_adapter")
24
+ rescue LoadError
25
+ # fallback
26
+ end
27
+
28
+ # Loads the import functionality for the passed in ActiveRecord connection
29
+ def self.load_from_connection_pool(connection_pool)
30
+ adapter =
31
+ if connection_pool.respond_to?(:db_config) # ActiveRecord >= 6.1
32
+ connection_pool.db_config.adapter
33
+ else
34
+ connection_pool.spec.config[:adapter]
35
+ end
36
+ require_adapter adapter
37
+ end
38
+ end
39
+
40
+ require 'activerecord-import/import'
41
+ require 'activerecord-import/active_record/adapters/abstract_adapter'
42
+ require 'activerecord-import/synchronize'
43
+ require 'activerecord-import/value_sets_parser'
@@ -0,0 +1,1059 @@
1
+ require "ostruct"
2
+
3
+ module ActiveRecord::Import::ConnectionAdapters; end
4
+
5
+ module ActiveRecord::Import #:nodoc:
6
+ Result = Struct.new(:failed_instances, :num_inserts, :ids, :results)
7
+
8
+ module ImportSupport #:nodoc:
9
+ def supports_import? #:nodoc:
10
+ true
11
+ end
12
+ end
13
+
14
+ module OnDuplicateKeyUpdateSupport #:nodoc:
15
+ def supports_on_duplicate_key_update? #:nodoc:
16
+ true
17
+ end
18
+ end
19
+
20
+ class MissingColumnError < StandardError
21
+ def initialize(name, index)
22
+ super "Missing column for value <#{name}> at index #{index}"
23
+ end
24
+ end
25
+
26
+ class Validator
27
+ def initialize(klass, options = {})
28
+ @options = options
29
+ @validator_class = klass
30
+ init_validations(klass)
31
+ end
32
+
33
+ def init_validations(klass)
34
+ @validate_callbacks = klass._validate_callbacks.dup
35
+
36
+ @validate_callbacks.each_with_index do |callback, i|
37
+ filter = callback.raw_filter
38
+ next unless filter.class.name =~ /Validations::PresenceValidator/ ||
39
+ (!@options[:validate_uniqueness] &&
40
+ filter.is_a?(ActiveRecord::Validations::UniquenessValidator))
41
+
42
+ callback = callback.dup
43
+ filter = filter.dup
44
+ attrs = filter.instance_variable_get(:@attributes).dup
45
+
46
+ if filter.is_a?(ActiveRecord::Validations::UniquenessValidator)
47
+ attrs = []
48
+ else
49
+ associations = klass.reflect_on_all_associations(:belongs_to)
50
+ associations.each do |assoc|
51
+ if (index = attrs.index(assoc.name))
52
+ key = assoc.foreign_key.to_sym
53
+ attrs[index] = key unless attrs.include?(key)
54
+ end
55
+ end
56
+ end
57
+
58
+ filter.instance_variable_set(:@attributes, attrs)
59
+
60
+ if @validate_callbacks.respond_to?(:chain, true)
61
+ @validate_callbacks.send(:chain).tap do |chain|
62
+ callback.instance_variable_set(:@filter, filter)
63
+ chain[i] = callback
64
+ end
65
+ else
66
+ callback.raw_filter = filter
67
+ callback.filter = callback.send(:_compile_filter, filter)
68
+ @validate_callbacks[i] = callback
69
+ end
70
+ end
71
+ end
72
+
73
+ def valid_model?(model)
74
+ init_validations(model.class) unless model.class == @validator_class
75
+
76
+ validation_context = @options[:validate_with_context]
77
+ validation_context ||= (model.new_record? ? :create : :update)
78
+ current_context = model.send(:validation_context)
79
+
80
+ begin
81
+ model.send(:validation_context=, validation_context)
82
+ model.errors.clear
83
+
84
+ model.run_callbacks(:validation) do
85
+ if defined?(ActiveSupport::Callbacks::Filters::Environment) # ActiveRecord >= 4.1
86
+ runner = @validate_callbacks.compile
87
+ env = ActiveSupport::Callbacks::Filters::Environment.new(model, false, nil)
88
+ if runner.respond_to?(:call) # ActiveRecord < 5.1
89
+ runner.call(env)
90
+ else # ActiveRecord 5.1
91
+ # Note that this is a gross simplification of ActiveSupport::Callbacks#run_callbacks.
92
+ # It's technically possible for there to exist an "around" callback in the
93
+ # :validate chain, but this would be an aberration, since Rails doesn't define
94
+ # "around_validate". Still, rather than silently ignoring such callbacks, we
95
+ # explicitly raise a RuntimeError, since activerecord-import was asked to perform
96
+ # validations and it's unable to do so.
97
+ #
98
+ # The alternative here would be to copy-and-paste the bulk of the
99
+ # ActiveSupport::Callbacks#run_callbacks method, which is undesirable if there's
100
+ # no real-world use case for it.
101
+ raise "The :validate callback chain contains an 'around' callback, which is unsupported" unless runner.final?
102
+ runner.invoke_before(env)
103
+ runner.invoke_after(env)
104
+ end
105
+ elsif @validate_callbacks.method(:compile).arity == 0 # ActiveRecord = 4.0
106
+ model.instance_eval @validate_callbacks.compile
107
+ else # ActiveRecord 3.x
108
+ model.instance_eval @validate_callbacks.compile(nil, model)
109
+ end
110
+ end
111
+
112
+ model.errors.empty?
113
+ ensure
114
+ model.send(:validation_context=, current_context)
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ class ActiveRecord::Associations::CollectionProxy
121
+ def bulk_import(*args, &block)
122
+ @association.bulk_import(*args, &block)
123
+ end
124
+ alias import bulk_import unless respond_to? :import
125
+ end
126
+
127
+ class ActiveRecord::Associations::CollectionAssociation
128
+ def bulk_import(*args, &block)
129
+ unless owner.persisted?
130
+ raise ActiveRecord::RecordNotSaved, "You cannot call import unless the parent is saved"
131
+ end
132
+
133
+ options = args.last.is_a?(Hash) ? args.pop : {}
134
+
135
+ model_klass = reflection.klass
136
+ symbolized_foreign_key = reflection.foreign_key.to_sym
137
+
138
+ symbolized_column_names = if model_klass.connection.respond_to?(:supports_virtual_columns?) && model_klass.connection.supports_virtual_columns?
139
+ model_klass.columns.reject(&:virtual?).map { |c| c.name.to_sym }
140
+ else
141
+ model_klass.column_names.map(&:to_sym)
142
+ end
143
+
144
+ owner_primary_key = reflection.active_record_primary_key.to_sym
145
+ owner_primary_key_value = owner.send(owner_primary_key)
146
+
147
+ # assume array of model objects
148
+ if args.last.is_a?( Array ) && args.last.first.is_a?(ActiveRecord::Base)
149
+ if args.length == 2
150
+ models = args.last
151
+ column_names = args.first.dup
152
+ else
153
+ models = args.first
154
+ column_names = symbolized_column_names
155
+ end
156
+
157
+ unless symbolized_column_names.include?(symbolized_foreign_key)
158
+ column_names << symbolized_foreign_key
159
+ end
160
+
161
+ models.each do |m|
162
+ m.public_send "#{symbolized_foreign_key}=", owner_primary_key_value
163
+ m.public_send "#{reflection.type}=", owner.class.name if reflection.type
164
+ end
165
+
166
+ return model_klass.bulk_import column_names, models, options
167
+
168
+ # supports array of hash objects
169
+ elsif args.last.is_a?( Array ) && args.last.first.is_a?(Hash)
170
+ if args.length == 2
171
+ array_of_hashes = args.last
172
+ column_names = args.first.dup
173
+ allow_extra_hash_keys = true
174
+ else
175
+ array_of_hashes = args.first
176
+ column_names = array_of_hashes.first.keys
177
+ allow_extra_hash_keys = false
178
+ end
179
+
180
+ symbolized_column_names = column_names.map(&:to_sym)
181
+ unless symbolized_column_names.include?(symbolized_foreign_key)
182
+ column_names << symbolized_foreign_key
183
+ end
184
+
185
+ if reflection.type && !symbolized_column_names.include?(reflection.type.to_sym)
186
+ column_names << reflection.type.to_sym
187
+ end
188
+
189
+ array_of_attributes = array_of_hashes.map do |h|
190
+ error_message = model_klass.send(:validate_hash_import, h, symbolized_column_names, allow_extra_hash_keys)
191
+
192
+ raise ArgumentError, error_message if error_message
193
+
194
+ column_names.map do |key|
195
+ if key == symbolized_foreign_key
196
+ owner_primary_key_value
197
+ elsif reflection.type && key == reflection.type.to_sym
198
+ owner.class.name
199
+ else
200
+ h[key]
201
+ end
202
+ end
203
+ end
204
+
205
+ return model_klass.bulk_import column_names, array_of_attributes, options
206
+
207
+ # supports empty array
208
+ elsif args.last.is_a?( Array ) && args.last.empty?
209
+ return ActiveRecord::Import::Result.new([], 0, [])
210
+
211
+ # supports 2-element array and array
212
+ elsif args.size == 2 && args.first.is_a?( Array ) && args.last.is_a?( Array )
213
+ column_names, array_of_attributes = args
214
+
215
+ # dup the passed args so we don't modify unintentionally
216
+ column_names = column_names.dup
217
+ array_of_attributes = array_of_attributes.map(&:dup)
218
+
219
+ symbolized_column_names = column_names.map(&:to_sym)
220
+
221
+ if symbolized_column_names.include?(symbolized_foreign_key)
222
+ index = symbolized_column_names.index(symbolized_foreign_key)
223
+ array_of_attributes.each { |attrs| attrs[index] = owner_primary_key_value }
224
+ else
225
+ column_names << symbolized_foreign_key
226
+ array_of_attributes.each { |attrs| attrs << owner_primary_key_value }
227
+ end
228
+
229
+ if reflection.type
230
+ symbolized_type = reflection.type.to_sym
231
+ if symbolized_column_names.include?(symbolized_type)
232
+ index = symbolized_column_names.index(symbolized_type)
233
+ array_of_attributes.each { |attrs| attrs[index] = owner.class.name }
234
+ else
235
+ column_names << symbolized_type
236
+ array_of_attributes.each { |attrs| attrs << owner.class.name }
237
+ end
238
+ end
239
+
240
+ return model_klass.bulk_import column_names, array_of_attributes, options
241
+ else
242
+ raise ArgumentError, "Invalid arguments!"
243
+ end
244
+ end
245
+ alias import bulk_import unless respond_to? :import
246
+ end
247
+
248
+ class ActiveRecord::Base
249
+ class << self
250
+ def establish_connection_with_activerecord_import(*args)
251
+ conn = establish_connection_without_activerecord_import(*args)
252
+ ActiveRecord::Import.load_from_connection_pool connection_pool
253
+ conn
254
+ end
255
+
256
+ alias establish_connection_without_activerecord_import establish_connection
257
+ alias establish_connection establish_connection_with_activerecord_import
258
+
259
+ # Returns true if the current database connection adapter
260
+ # supports import functionality, otherwise returns false.
261
+ def supports_import?(*args)
262
+ connection.respond_to?(:supports_import?) && connection.supports_import?(*args)
263
+ end
264
+
265
+ # Returns true if the current database connection adapter
266
+ # supports on duplicate key update functionality, otherwise
267
+ # returns false.
268
+ def supports_on_duplicate_key_update?
269
+ connection.respond_to?(:supports_on_duplicate_key_update?) && connection.supports_on_duplicate_key_update?
270
+ end
271
+
272
+ # returns true if the current database connection adapter
273
+ # supports setting the primary key of bulk imported models, otherwise
274
+ # returns false
275
+ def supports_setting_primary_key_of_imported_objects?
276
+ connection.respond_to?(:supports_setting_primary_key_of_imported_objects?) && connection.supports_setting_primary_key_of_imported_objects?
277
+ end
278
+
279
+ # Imports a collection of values to the database.
280
+ #
281
+ # This is more efficient than using ActiveRecord::Base#create or
282
+ # ActiveRecord::Base#save multiple times. This method works well if
283
+ # you want to create more than one record at a time and do not care
284
+ # about having ActiveRecord objects returned for each record
285
+ # inserted.
286
+ #
287
+ # This can be used with or without validations. It does not utilize
288
+ # the ActiveRecord::Callbacks during creation/modification while
289
+ # performing the import.
290
+ #
291
+ # == Usage
292
+ # Model.import array_of_models
293
+ # Model.import column_names, array_of_models
294
+ # Model.import array_of_hash_objects
295
+ # Model.import column_names, array_of_hash_objects
296
+ # Model.import column_names, array_of_values
297
+ # Model.import column_names, array_of_values, options
298
+ #
299
+ # ==== Model.import array_of_models
300
+ #
301
+ # With this form you can call _import_ passing in an array of model
302
+ # objects that you want updated.
303
+ #
304
+ # ==== Model.import column_names, array_of_values
305
+ #
306
+ # The first parameter +column_names+ is an array of symbols or
307
+ # strings which specify the columns that you want to update.
308
+ #
309
+ # The second parameter, +array_of_values+, is an array of
310
+ # arrays. Each subarray is a single set of values for a new
311
+ # record. The order of values in each subarray should match up to
312
+ # the order of the +column_names+.
313
+ #
314
+ # ==== Model.import column_names, array_of_values, options
315
+ #
316
+ # The first two parameters are the same as the above form. The third
317
+ # parameter, +options+, is a hash. This is optional. Please see
318
+ # below for what +options+ are available.
319
+ #
320
+ # == Options
321
+ # * +validate+ - true|false, tells import whether or not to use
322
+ # ActiveRecord validations. Validations are enforced by default.
323
+ # It skips the uniqueness validation for performance reasons.
324
+ # You can find more details here:
325
+ # https://github.com/zdennis/activerecord-import/issues/228
326
+ # * +ignore+ - true|false, an alias for on_duplicate_key_ignore.
327
+ # * +on_duplicate_key_ignore+ - true|false, tells import to discard
328
+ # records that contain duplicate keys. For Postgres 9.5+ it adds
329
+ # ON CONFLICT DO NOTHING, for MySQL it uses INSERT IGNORE, and for
330
+ # SQLite it uses INSERT OR IGNORE. Cannot be enabled on a
331
+ # recursive import. For database adapters that normally support
332
+ # setting primary keys on imported objects, this option prevents
333
+ # that from occurring.
334
+ # * +on_duplicate_key_update+ - :all, an Array, or Hash, tells import to
335
+ # use MySQL's ON DUPLICATE KEY UPDATE or Postgres/SQLite ON CONFLICT
336
+ # DO UPDATE ability. See On Duplicate Key Update below.
337
+ # * +synchronize+ - an array of ActiveRecord instances for the model
338
+ # that you are currently importing data into. This synchronizes
339
+ # existing model instances in memory with updates from the import.
340
+ # * +timestamps+ - true|false, tells import to not add timestamps
341
+ # (if false) even if record timestamps is disabled in ActiveRecord::Base
342
+ # * +recursive+ - true|false, tells import to import all has_many/has_one
343
+ # associations if the adapter supports setting the primary keys of the
344
+ # newly imported objects. PostgreSQL only.
345
+ # * +batch_size+ - an integer value to specify the max number of records to
346
+ # include per insert. Defaults to the total number of records to import.
347
+ #
348
+ # == Examples
349
+ # class BlogPost < ActiveRecord::Base ; end
350
+ #
351
+ # # Example using array of model objects
352
+ # posts = [ BlogPost.new author_name: 'Zach Dennis', title: 'AREXT',
353
+ # BlogPost.new author_name: 'Zach Dennis', title: 'AREXT2',
354
+ # BlogPost.new author_name: 'Zach Dennis', title: 'AREXT3' ]
355
+ # BlogPost.import posts
356
+ #
357
+ # # Example using array_of_hash_objects
358
+ # # NOTE: column_names will be determined by using the keys of the first hash in the array. If later hashes in the
359
+ # # array have different keys an exception will be raised. If you have hashes to import with different sets of keys
360
+ # # we recommend grouping these into batches before importing.
361
+ # values = [ {author_name: 'zdennis', title: 'test post'} ], [ {author_name: 'jdoe', title: 'another test post'} ] ]
362
+ # BlogPost.import values
363
+ #
364
+ # # Example using column_names and array_of_hash_objects
365
+ # columns = [ :author_name, :title ]
366
+ # values = [ {author_name: 'zdennis', title: 'test post'} ], [ {author_name: 'jdoe', title: 'another test post'} ] ]
367
+ # BlogPost.import columns, values
368
+ #
369
+ # # Example using column_names and array_of_values
370
+ # columns = [ :author_name, :title ]
371
+ # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
372
+ # BlogPost.import columns, values
373
+ #
374
+ # # Example using column_names, array_of_value and options
375
+ # columns = [ :author_name, :title ]
376
+ # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
377
+ # BlogPost.import( columns, values, validate: false )
378
+ #
379
+ # # Example synchronizing existing instances in memory
380
+ # post = BlogPost.where(author_name: 'zdennis').first
381
+ # puts post.author_name # => 'zdennis'
382
+ # columns = [ :author_name, :title ]
383
+ # values = [ [ 'yoda', 'test post' ] ]
384
+ # BlogPost.import posts, synchronize: [ post ]
385
+ # puts post.author_name # => 'yoda'
386
+ #
387
+ # # Example synchronizing unsaved/new instances in memory by using a uniqued imported field
388
+ # posts = [BlogPost.new(title: "Foo"), BlogPost.new(title: "Bar")]
389
+ # BlogPost.import posts, synchronize: posts, synchronize_keys: [:title]
390
+ # puts posts.first.persisted? # => true
391
+ #
392
+ # == On Duplicate Key Update (MySQL)
393
+ #
394
+ # The :on_duplicate_key_update option can be either :all, an Array, or a Hash.
395
+ #
396
+ # ==== Using :all
397
+ #
398
+ # The :on_duplicate_key_update option can be set to :all. All columns
399
+ # other than the primary key are updated. If a list of column names is
400
+ # supplied, only those columns will be updated. Below is an example:
401
+ #
402
+ # BlogPost.import columns, values, on_duplicate_key_update: :all
403
+ #
404
+ # ==== Using an Array
405
+ #
406
+ # The :on_duplicate_key_update option can be an array of column
407
+ # names. The column names are the only fields that are updated if
408
+ # a duplicate record is found. Below is an example:
409
+ #
410
+ # BlogPost.import columns, values, on_duplicate_key_update: [ :date_modified, :content, :author ]
411
+ #
412
+ # ==== Using A Hash
413
+ #
414
+ # The :on_duplicate_key_update option can be a hash of column names
415
+ # to model attribute name mappings. This gives you finer grained
416
+ # control over what fields are updated with what attributes on your
417
+ # model. Below is an example:
418
+ #
419
+ # BlogPost.import columns, attributes, on_duplicate_key_update: { title: :title }
420
+ #
421
+ # == On Duplicate Key Update (Postgres 9.5+ and SQLite 3.24+)
422
+ #
423
+ # The :on_duplicate_key_update option can be :all, an Array, or a Hash with up to
424
+ # three attributes, :conflict_target (and optionally :index_predicate) or
425
+ # :constraint_name (Postgres), and :columns.
426
+ #
427
+ # ==== Using :all
428
+ #
429
+ # The :on_duplicate_key_update option can be set to :all. All columns
430
+ # other than the primary key are updated. If a list of column names is
431
+ # supplied, only those columns will be updated. Below is an example:
432
+ #
433
+ # BlogPost.import columns, values, on_duplicate_key_update: :all
434
+ #
435
+ # ==== Using an Array
436
+ #
437
+ # The :on_duplicate_key_update option can be an array of column
438
+ # names. This option only handles inserts that conflict with the
439
+ # primary key. If a table does not have a primary key, this will
440
+ # not work. The column names are the only fields that are updated
441
+ # if a duplicate record is found. Below is an example:
442
+ #
443
+ # BlogPost.import columns, values, on_duplicate_key_update: [ :date_modified, :content, :author ]
444
+ #
445
+ # ==== Using a Hash
446
+ #
447
+ # The :on_duplicate_key_update option can be a hash with up to three
448
+ # attributes, :conflict_target (and optionally :index_predicate) or
449
+ # :constraint_name, and :columns. Unlike MySQL, Postgres requires the
450
+ # conflicting constraint to be explicitly specified. Using this option
451
+ # allows you to specify a constraint other than the primary key.
452
+ #
453
+ # ===== :conflict_target
454
+ #
455
+ # The :conflict_target attribute specifies the columns that make up the
456
+ # conflicting unique constraint and can be a single column or an array of
457
+ # column names. This attribute is ignored if :constraint_name is included,
458
+ # but it is the preferred method of identifying a constraint. It will
459
+ # default to the primary key. Below is an example:
460
+ #
461
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: [ :author_id, :slug ], columns: [ :date_modified ] }
462
+ #
463
+ # ===== :index_predicate
464
+ #
465
+ # The :index_predicate attribute optionally specifies a WHERE condition
466
+ # on :conflict_target, which is required for matching against partial
467
+ # indexes. This attribute is ignored if :constraint_name is included.
468
+ # Below is an example:
469
+ #
470
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: [ :author_id, :slug ], index_predicate: 'status <> 0', columns: [ :date_modified ] }
471
+ #
472
+ # ===== :constraint_name
473
+ #
474
+ # The :constraint_name attribute explicitly identifies the conflicting
475
+ # unique index by name. Postgres documentation discourages using this method
476
+ # of identifying an index unless absolutely necessary. Below is an example:
477
+ #
478
+ # BlogPost.import columns, values, on_duplicate_key_update: { constraint_name: :blog_posts_pkey, columns: [ :date_modified ] }
479
+ #
480
+ # ===== :condition
481
+ #
482
+ # The :condition attribute optionally specifies a WHERE condition
483
+ # on :conflict_action. Only rows for which this expression returns true will be updated.
484
+ # Note that it's evaluated last, after a conflict has been identified as a candidate to update.
485
+ # Below is an example:
486
+ #
487
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: [ :author_id ], condition: "blog_posts.title NOT LIKE '%sample%'", columns: [ :author_name ] }
488
+ #
489
+ # ===== :columns
490
+ #
491
+ # The :columns attribute can be either :all, an Array, or a Hash.
492
+ #
493
+ # ===== Using :all
494
+ #
495
+ # The :columns attribute can be :all. All columns other than the primary key will be updated.
496
+ # If a list of column names is supplied, only those columns will be updated.
497
+ # Below is an example:
498
+ #
499
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: :slug, columns: :all }
500
+ #
501
+ # ===== Using an Array
502
+ #
503
+ # The :columns attribute can be an array of column names. The column names
504
+ # are the only fields that are updated if a duplicate record is found.
505
+ # Below is an example:
506
+ #
507
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: :slug, columns: [ :date_modified, :content, :author ] }
508
+ #
509
+ # ===== Using a Hash
510
+ #
511
+ # The :columns option can be a hash of column names to model attribute name
512
+ # mappings. This gives you finer grained control over what fields are updated
513
+ # with what attributes on your model. Below is an example:
514
+ #
515
+ # BlogPost.import columns, attributes, on_duplicate_key_update: { conflict_target: :slug, columns: { title: :title } }
516
+ #
517
+ # = Returns
518
+ # This returns an object which responds to +failed_instances+ and +num_inserts+.
519
+ # * failed_instances - an array of objects that fails validation and were not committed to the database. An empty array if no validation is performed.
520
+ # * num_inserts - the number of insert statements it took to import the data
521
+ # * ids - the primary keys of the imported ids if the adapter supports it, otherwise an empty array.
522
+ # * results - import results if the adapter supports it, otherwise an empty array.
523
+ def bulk_import(*args)
524
+ if args.first.is_a?( Array ) && args.first.first.is_a?(ActiveRecord::Base)
525
+ options = {}
526
+ options.merge!( args.pop ) if args.last.is_a?(Hash)
527
+
528
+ models = args.first
529
+ import_helper(models, options)
530
+ else
531
+ import_helper(*args)
532
+ end
533
+ end
534
+ alias import bulk_import unless ActiveRecord::Base.respond_to? :import
535
+
536
+ # Imports a collection of values if all values are valid. Import fails at the
537
+ # first encountered validation error and raises ActiveRecord::RecordInvalid
538
+ # with the failed instance.
539
+ def bulk_import!(*args)
540
+ options = args.last.is_a?( Hash ) ? args.pop : {}
541
+ options[:validate] = true
542
+ options[:raise_error] = true
543
+
544
+ bulk_import(*args, options)
545
+ end
546
+ alias import! bulk_import! unless ActiveRecord::Base.respond_to? :import!
547
+
548
+ def import_helper( *args )
549
+ options = { validate: true, timestamps: true }
550
+ options.merge!( args.pop ) if args.last.is_a? Hash
551
+ # making sure that current model's primary key is used
552
+ options[:primary_key] = primary_key
553
+ options[:locking_column] = locking_column if attribute_names.include?(locking_column)
554
+
555
+ is_validating = options[:validate_with_context].present? ? true : options[:validate]
556
+ validator = ActiveRecord::Import::Validator.new(self, options)
557
+
558
+ # assume array of model objects
559
+ if args.last.is_a?( Array ) && args.last.first.is_a?(ActiveRecord::Base)
560
+ if args.length == 2
561
+ models = args.last
562
+ column_names = args.first.dup
563
+ else
564
+ models = args.first
565
+ column_names = if connection.respond_to?(:supports_virtual_columns?) && connection.supports_virtual_columns?
566
+ columns.reject(&:virtual?).map(&:name)
567
+ else
568
+ self.column_names.dup
569
+ end
570
+ end
571
+
572
+ if models.first.id.nil?
573
+ Array(primary_key).each do |c|
574
+ if column_names.include?(c) && columns_hash[c].type == :uuid
575
+ column_names.delete(c)
576
+ end
577
+ end
578
+ end
579
+
580
+ update_attrs = if record_timestamps && options[:timestamps]
581
+ if respond_to?(:timestamp_attributes_for_update, true)
582
+ send(:timestamp_attributes_for_update).map(&:to_sym)
583
+ else
584
+ new.send(:timestamp_attributes_for_update_in_model)
585
+ end
586
+ end
587
+
588
+ array_of_attributes = []
589
+
590
+ models.each do |model|
591
+ if supports_setting_primary_key_of_imported_objects?
592
+ load_association_ids(model)
593
+ end
594
+
595
+ if is_validating && !validator.valid_model?(model)
596
+ raise(ActiveRecord::RecordInvalid, model) if options[:raise_error]
597
+ next
598
+ end
599
+
600
+ array_of_attributes << column_names.map do |name|
601
+ if model.persisted? &&
602
+ update_attrs && update_attrs.include?(name.to_sym) &&
603
+ !model.send("#{name}_changed?")
604
+ nil
605
+ else
606
+ model.read_attribute(name.to_s)
607
+ end
608
+ end
609
+ end
610
+ # supports array of hash objects
611
+ elsif args.last.is_a?( Array ) && args.last.first.is_a?(Hash)
612
+ if args.length == 2
613
+ array_of_hashes = args.last
614
+ column_names = args.first.dup
615
+ allow_extra_hash_keys = true
616
+ else
617
+ array_of_hashes = args.first
618
+ column_names = array_of_hashes.first.keys
619
+ allow_extra_hash_keys = false
620
+ end
621
+
622
+ array_of_attributes = array_of_hashes.map do |h|
623
+ error_message = validate_hash_import(h, column_names, allow_extra_hash_keys)
624
+
625
+ raise ArgumentError, error_message if error_message
626
+
627
+ column_names.map do |key|
628
+ h[key]
629
+ end
630
+ end
631
+ # supports empty array
632
+ elsif args.last.is_a?( Array ) && args.last.empty?
633
+ return ActiveRecord::Import::Result.new([], 0, [], [])
634
+ # supports 2-element array and array
635
+ elsif args.size == 2 && args.first.is_a?( Array ) && args.last.is_a?( Array )
636
+
637
+ unless args.last.first.is_a?(Array)
638
+ raise ArgumentError, "Last argument should be a two dimensional array '[[]]'. First element in array was a #{args.last.first.class}"
639
+ end
640
+
641
+ column_names, array_of_attributes = args
642
+
643
+ # dup the passed args so we don't modify unintentionally
644
+ column_names = column_names.dup
645
+ array_of_attributes = array_of_attributes.map(&:dup)
646
+ else
647
+ raise ArgumentError, "Invalid arguments!"
648
+ end
649
+
650
+ # Force the primary key col into the insert if it's not
651
+ # on the list and we are using a sequence and stuff a nil
652
+ # value for it into each row so the sequencer will fire later
653
+ symbolized_column_names = Array(column_names).map(&:to_sym)
654
+ symbolized_primary_key = Array(primary_key).map(&:to_sym)
655
+
656
+ if !symbolized_primary_key.to_set.subset?(symbolized_column_names.to_set) && connection.prefetch_primary_key? && sequence_name
657
+ column_count = column_names.size
658
+ column_names.concat(Array(primary_key)).uniq!
659
+ columns_added = column_names.size - column_count
660
+ new_fields = Array.new(columns_added)
661
+ array_of_attributes.each { |a| a.concat(new_fields) }
662
+ end
663
+
664
+ # Don't modify incoming arguments
665
+ on_duplicate_key_update = options[:on_duplicate_key_update]
666
+ if on_duplicate_key_update
667
+ updatable_columns = symbolized_column_names.reject { |c| symbolized_primary_key.include? c }
668
+ options[:on_duplicate_key_update] = if on_duplicate_key_update.is_a?(Hash)
669
+ on_duplicate_key_update.each_with_object({}) do |(k, v), duped_options|
670
+ duped_options[k] = if k == :columns && v == :all
671
+ updatable_columns
672
+ elsif v.duplicable?
673
+ v.dup
674
+ else
675
+ v
676
+ end
677
+ end
678
+ elsif on_duplicate_key_update == :all
679
+ updatable_columns
680
+ elsif on_duplicate_key_update.duplicable?
681
+ on_duplicate_key_update.dup
682
+ else
683
+ on_duplicate_key_update
684
+ end
685
+ end
686
+
687
+ timestamps = {}
688
+
689
+ # record timestamps unless disabled in ActiveRecord::Base
690
+ if record_timestamps && options[:timestamps]
691
+ timestamps = add_special_rails_stamps column_names, array_of_attributes, options
692
+ end
693
+
694
+ return_obj = if is_validating
695
+ import_with_validations( column_names, array_of_attributes, options ) do |failed_instances|
696
+ if models
697
+ models.each { |m| failed_instances << m if m.errors.any? }
698
+ else
699
+ # create instances for each of our column/value sets
700
+ arr = validations_array_for_column_names_and_attributes( column_names, array_of_attributes )
701
+
702
+ # keep track of the instance and the position it is currently at. if this fails
703
+ # validation we'll use the index to remove it from the array_of_attributes
704
+ arr.each_with_index do |hsh, i|
705
+ model = new
706
+ hsh.each_pair { |k, v| model[k] = v }
707
+ next if validator.valid_model?(model)
708
+ raise(ActiveRecord::RecordInvalid, model) if options[:raise_error]
709
+ array_of_attributes[i] = nil
710
+ failure = model.dup
711
+ failure.errors.send(:initialize_dup, model.errors)
712
+ failed_instances << failure
713
+ end
714
+ array_of_attributes.compact!
715
+ end
716
+ end
717
+ else
718
+ import_without_validations_or_callbacks( column_names, array_of_attributes, options )
719
+ end
720
+
721
+ if options[:synchronize]
722
+ sync_keys = options[:synchronize_keys] || Array(primary_key)
723
+ synchronize( options[:synchronize], sync_keys)
724
+ end
725
+ return_obj.num_inserts = 0 if return_obj.num_inserts.nil?
726
+
727
+ # if we have ids, then set the id on the models and mark the models as clean.
728
+ if models && supports_setting_primary_key_of_imported_objects?
729
+ set_attributes_and_mark_clean(models, return_obj, timestamps, options)
730
+
731
+ # if there are auto-save associations on the models we imported that are new, import them as well
732
+ import_associations(models, options.dup) if options[:recursive]
733
+ end
734
+
735
+ return_obj
736
+ end
737
+
738
+ # Imports the passed in +column_names+ and +array_of_attributes+
739
+ # given the passed in +options+ Hash with validations. Returns an
740
+ # object with the methods +failed_instances+ and +num_inserts+.
741
+ # +failed_instances+ is an array of instances that failed validations.
742
+ # +num_inserts+ is the number of inserts it took to import the data. See
743
+ # ActiveRecord::Base.import for more information on
744
+ # +column_names+, +array_of_attributes+ and +options+.
745
+ def import_with_validations( column_names, array_of_attributes, options = {} )
746
+ failed_instances = []
747
+
748
+ yield failed_instances if block_given?
749
+
750
+ result = if options[:all_or_none] && failed_instances.any?
751
+ ActiveRecord::Import::Result.new([], 0, [], [])
752
+ else
753
+ import_without_validations_or_callbacks( column_names, array_of_attributes, options )
754
+ end
755
+ ActiveRecord::Import::Result.new(failed_instances, result.num_inserts, result.ids, result.results)
756
+ end
757
+
758
+ # Imports the passed in +column_names+ and +array_of_attributes+
759
+ # given the passed in +options+ Hash. This will return the number
760
+ # of insert operations it took to create these records without
761
+ # validations or callbacks. See ActiveRecord::Base.import for more
762
+ # information on +column_names+, +array_of_attributes_ and
763
+ # +options+.
764
+ def import_without_validations_or_callbacks( column_names, array_of_attributes, options = {} )
765
+ return ActiveRecord::Import::Result.new([], 0, [], []) if array_of_attributes.empty?
766
+
767
+ column_names = column_names.map(&:to_sym)
768
+ scope_columns, scope_values = scope_attributes.to_a.transpose
769
+
770
+ unless scope_columns.blank?
771
+ scope_columns.zip(scope_values).each do |name, value|
772
+ name_as_sym = name.to_sym
773
+ next if column_names.include?(name_as_sym)
774
+
775
+ is_sti = (name_as_sym == inheritance_column.to_sym && self < base_class)
776
+ value = Array(value).first if is_sti
777
+
778
+ column_names << name_as_sym
779
+ array_of_attributes.each { |attrs| attrs << value }
780
+ end
781
+ end
782
+
783
+ columns = column_names.each_with_index.map do |name, i|
784
+ column = columns_hash[name.to_s]
785
+
786
+ raise ActiveRecord::Import::MissingColumnError.new(name.to_s, i) if column.nil?
787
+
788
+ column
789
+ end
790
+
791
+ columns_sql = "(#{column_names.map { |name| connection.quote_column_name(name) }.join(',')})"
792
+ pre_sql_statements = connection.pre_sql_statements( options )
793
+ insert_sql = ['INSERT', pre_sql_statements, "INTO #{quoted_table_name} #{columns_sql} VALUES "]
794
+ insert_sql = insert_sql.flatten.join(' ')
795
+ values_sql = values_sql_for_columns_and_attributes(columns, array_of_attributes)
796
+
797
+ number_inserted = 0
798
+ ids = []
799
+ results = []
800
+ if supports_import?
801
+ # generate the sql
802
+ post_sql_statements = connection.post_sql_statements( quoted_table_name, options )
803
+
804
+ batch_size = options[:batch_size] || values_sql.size
805
+ values_sql.each_slice(batch_size) do |batch_values|
806
+ # perform the inserts
807
+ result = connection.insert_many( [insert_sql, post_sql_statements].flatten,
808
+ batch_values,
809
+ options,
810
+ "#{model_name} Create Many Without Validations Or Callbacks" )
811
+ number_inserted += result.num_inserts
812
+ ids += result.ids
813
+ results += result.results
814
+ end
815
+ else
816
+ transaction(requires_new: true) do
817
+ values_sql.each do |values|
818
+ ids << connection.insert(insert_sql + values)
819
+ number_inserted += 1
820
+ end
821
+ end
822
+ end
823
+ ActiveRecord::Import::Result.new([], number_inserted, ids, results)
824
+ end
825
+
826
+ private
827
+
828
+ def set_attributes_and_mark_clean(models, import_result, timestamps, options)
829
+ return if models.nil?
830
+ models -= import_result.failed_instances
831
+
832
+ # if ids were returned for all models we know all were updated
833
+ if models.size == import_result.ids.size
834
+ import_result.ids.each_with_index do |id, index|
835
+ model = models[index]
836
+ model.id = id
837
+
838
+ timestamps.each do |attr, value|
839
+ model.send(attr + "=", value)
840
+ end
841
+ end
842
+ end
843
+
844
+ if models.size == import_result.results.size
845
+ columns = Array(options[:returning])
846
+ single_column = "#{columns.first}=" if columns.size == 1
847
+ import_result.results.each_with_index do |result, index|
848
+ model = models[index]
849
+
850
+ if single_column
851
+ model.send(single_column, result)
852
+ else
853
+ columns.each_with_index do |column, col_index|
854
+ model.send("#{column}=", result[col_index])
855
+ end
856
+ end
857
+ end
858
+ end
859
+
860
+ models.each do |model|
861
+ if model.respond_to?(:changes_applied) # Rails 4.1.8 and higher
862
+ model.changes_internally_applied if model.respond_to?(:changes_internally_applied) # legacy behavior for Rails 5.1
863
+ model.changes_applied
864
+ elsif model.respond_to?(:clear_changes_information) # Rails 4.0 and higher
865
+ model.clear_changes_information
866
+ else # Rails 3.2
867
+ model.instance_variable_get(:@changed_attributes).clear
868
+ end
869
+ model.instance_variable_set(:@new_record, false)
870
+ end
871
+ end
872
+
873
+ # Sync belongs_to association ids with foreign key field
874
+ def load_association_ids(model)
875
+ association_reflections = model.class.reflect_on_all_associations(:belongs_to)
876
+ association_reflections.each do |association_reflection|
877
+ column_name = association_reflection.foreign_key
878
+ next if association_reflection.options[:polymorphic]
879
+ association = model.association(association_reflection.name)
880
+ association = association.target
881
+ next if association.blank? || model.public_send(column_name).present?
882
+
883
+ association_primary_key = association_reflection.association_primary_key
884
+ model.public_send("#{column_name}=", association.send(association_primary_key))
885
+ end
886
+ end
887
+
888
+ def import_associations(models, options)
889
+ # now, for all the dirty associations, collect them into a new set of models, then recurse.
890
+ # notes:
891
+ # does not handle associations that reference themselves
892
+ # should probably take a hash to associations to follow.
893
+ return if models.nil?
894
+ associated_objects_by_class = {}
895
+ models.each { |model| find_associated_objects_for_import(associated_objects_by_class, model) }
896
+
897
+ # :on_duplicate_key_update and :returning not supported for associations
898
+ options.delete(:on_duplicate_key_update)
899
+ options.delete(:returning)
900
+
901
+ associated_objects_by_class.each_value do |associations|
902
+ associations.each_value do |associated_records|
903
+ associated_records.first.class.bulk_import(associated_records, options) unless associated_records.empty?
904
+ end
905
+ end
906
+ end
907
+
908
+ # We are eventually going to call Class.import <objects> so we build up a hash
909
+ # of class => objects to import.
910
+ def find_associated_objects_for_import(associated_objects_by_class, model)
911
+ associated_objects_by_class[model.class.name] ||= {}
912
+ return associated_objects_by_class unless model.id
913
+
914
+ association_reflections =
915
+ model.class.reflect_on_all_associations(:has_one) +
916
+ model.class.reflect_on_all_associations(:has_many)
917
+ association_reflections.each do |association_reflection|
918
+ associated_objects_by_class[model.class.name][association_reflection.name] ||= []
919
+
920
+ association = model.association(association_reflection.name)
921
+ association.loaded!
922
+
923
+ # Wrap target in an array if not already
924
+ association = Array(association.target)
925
+
926
+ changed_objects = association.select { |a| a.new_record? || a.changed? }
927
+ changed_objects.each do |child|
928
+ child.public_send("#{association_reflection.foreign_key}=", model.id)
929
+ # For polymorphic associations
930
+ association_reflection.type.try do |type|
931
+ child.public_send("#{type}=", model.class.base_class.name)
932
+ end
933
+ end
934
+ associated_objects_by_class[model.class.name][association_reflection.name].concat changed_objects
935
+ end
936
+ associated_objects_by_class
937
+ end
938
+
939
+ # Returns SQL the VALUES for an INSERT statement given the passed in +columns+
940
+ # and +array_of_attributes+.
941
+ def values_sql_for_columns_and_attributes(columns, array_of_attributes) # :nodoc:
942
+ # connection gets called a *lot* in this high intensity loop.
943
+ # Reuse the same one w/in the loop, otherwise it would keep being re-retreived (= lots of time for large imports)
944
+ connection_memo = connection
945
+
946
+ array_of_attributes.map do |arr|
947
+ my_values = arr.each_with_index.map do |val, j|
948
+ column = columns[j]
949
+
950
+ # be sure to query sequence_name *last*, only if cheaper tests fail, because it's costly
951
+ if val.nil? && Array(primary_key).first == column.name && !sequence_name.blank?
952
+ connection_memo.next_value_for_sequence(sequence_name)
953
+ elsif val.respond_to?(:to_sql)
954
+ "(#{val.to_sql})"
955
+ elsif column
956
+ if respond_to?(:type_caster) # Rails 5.0 and higher
957
+ type = type_for_attribute(column.name)
958
+ val = type.type == :boolean ? type.cast(val) : type.serialize(val)
959
+ connection_memo.quote(val)
960
+ elsif column.respond_to?(:type_cast_from_user) # Rails 4.2
961
+ connection_memo.quote(column.type_cast_from_user(val), column)
962
+ else # Rails 3.2, 4.0 and 4.1
963
+ if serialized_attributes.include?(column.name)
964
+ val = serialized_attributes[column.name].dump(val)
965
+ end
966
+ # Fixes #443 to support binary (i.e. bytea) columns on PG
967
+ val = column.type_cast(val) unless column.type.to_sym == :binary
968
+ connection_memo.quote(val, column)
969
+ end
970
+ else
971
+ raise ArgumentError, "Number of values (#{arr.length}) exceeds number of columns (#{columns.length})"
972
+ end
973
+ end
974
+ "(#{my_values.join(',')})"
975
+ end
976
+ end
977
+
978
+ def add_special_rails_stamps( column_names, array_of_attributes, options )
979
+ timestamp_columns = {}
980
+ timestamps = {}
981
+
982
+ if respond_to?(:all_timestamp_attributes_in_model, true) # Rails 5.1 and higher
983
+ timestamp_columns[:create] = timestamp_attributes_for_create_in_model
984
+ timestamp_columns[:update] = timestamp_attributes_for_update_in_model
985
+ else
986
+ instance = new
987
+ timestamp_columns[:create] = instance.send(:timestamp_attributes_for_create_in_model)
988
+ timestamp_columns[:update] = instance.send(:timestamp_attributes_for_update_in_model)
989
+ end
990
+
991
+ # use tz as set in ActiveRecord::Base
992
+ timestamp = ActiveRecord::Base.default_timezone == :utc ? Time.now.utc : Time.now
993
+
994
+ [:create, :update].each do |action|
995
+ timestamp_columns[action].each do |column|
996
+ column = column.to_s
997
+ timestamps[column] = timestamp
998
+
999
+ index = column_names.index(column) || column_names.index(column.to_sym)
1000
+ if index
1001
+ # replace every instance of the array of attributes with our value
1002
+ array_of_attributes.each { |arr| arr[index] = timestamp if arr[index].nil? }
1003
+ else
1004
+ column_names << column
1005
+ array_of_attributes.each { |arr| arr << timestamp }
1006
+ end
1007
+
1008
+ if supports_on_duplicate_key_update? && action == :update
1009
+ connection.add_column_for_on_duplicate_key_update(column, options)
1010
+ end
1011
+ end
1012
+ end
1013
+
1014
+ timestamps
1015
+ end
1016
+
1017
+ # Returns an Array of Hashes for the passed in +column_names+ and +array_of_attributes+.
1018
+ def validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # :nodoc:
1019
+ array_of_attributes.map { |values| Hash[column_names.zip(values)] }
1020
+ end
1021
+
1022
+ # Checks that the imported hash has the required_keys, optionally also checks that the hash has
1023
+ # no keys beyond those required when `allow_extra_keys` is false.
1024
+ # returns `nil` if validation passes, or an error message if it fails
1025
+ def validate_hash_import(hash, required_keys, allow_extra_keys) # :nodoc:
1026
+ extra_keys = allow_extra_keys ? [] : hash.keys - required_keys
1027
+ missing_keys = required_keys - hash.keys
1028
+
1029
+ return nil if extra_keys.empty? && missing_keys.empty?
1030
+
1031
+ if allow_extra_keys
1032
+ <<-EOS
1033
+ Hash key mismatch.
1034
+
1035
+ When importing an array of hashes with provided columns_names, each hash must contain keys for all column_names.
1036
+
1037
+ Required keys: #{required_keys}
1038
+ Missing keys: #{missing_keys}
1039
+
1040
+ Hash: #{hash}
1041
+ EOS
1042
+ else
1043
+ <<-EOS
1044
+ Hash key mismatch.
1045
+
1046
+ When importing an array of hashes, all hashes must have the same keys.
1047
+ If you have records that are missing some values, we recommend you either set default values
1048
+ for the missing keys or group these records into batches by key set before importing.
1049
+
1050
+ Required keys: #{required_keys}
1051
+ Extra keys: #{extra_keys}
1052
+ Missing keys: #{missing_keys}
1053
+
1054
+ Hash: #{hash}
1055
+ EOS
1056
+ end
1057
+ end
1058
+ end
1059
+ end