activerecord-import-uuid 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. data/.gitignore +32 -0
  2. data/.rubocop.yml +49 -0
  3. data/.rubocop_todo.yml +36 -0
  4. data/.travis.yml +52 -0
  5. data/Brewfile +3 -0
  6. data/CHANGELOG.md +87 -0
  7. data/Gemfile +54 -0
  8. data/LICENSE +56 -0
  9. data/README.markdown +101 -0
  10. data/Rakefile +66 -0
  11. data/activerecord-import.gemspec +23 -0
  12. data/benchmarks/README +32 -0
  13. data/benchmarks/benchmark.rb +67 -0
  14. data/benchmarks/lib/base.rb +138 -0
  15. data/benchmarks/lib/cli_parser.rb +106 -0
  16. data/benchmarks/lib/float.rb +15 -0
  17. data/benchmarks/lib/mysql2_benchmark.rb +19 -0
  18. data/benchmarks/lib/output_to_csv.rb +19 -0
  19. data/benchmarks/lib/output_to_html.rb +64 -0
  20. data/benchmarks/models/test_innodb.rb +3 -0
  21. data/benchmarks/models/test_memory.rb +3 -0
  22. data/benchmarks/models/test_myisam.rb +3 -0
  23. data/benchmarks/schema/mysql_schema.rb +16 -0
  24. data/gemfiles/3.2.gemfile +3 -0
  25. data/gemfiles/4.0.gemfile +3 -0
  26. data/gemfiles/4.1.gemfile +3 -0
  27. data/gemfiles/4.2.gemfile +7 -0
  28. data/gemfiles/5.0.gemfile +3 -0
  29. data/lib/activerecord-import.rb +19 -0
  30. data/lib/activerecord-import/active_record/adapters/abstract_adapter.rb +9 -0
  31. data/lib/activerecord-import/active_record/adapters/jdbcmysql_adapter.rb +6 -0
  32. data/lib/activerecord-import/active_record/adapters/jdbcpostgresql_adapter.rb +6 -0
  33. data/lib/activerecord-import/active_record/adapters/mysql2_adapter.rb +6 -0
  34. data/lib/activerecord-import/active_record/adapters/postgresql_adapter.rb +6 -0
  35. data/lib/activerecord-import/active_record/adapters/seamless_database_pool_adapter.rb +7 -0
  36. data/lib/activerecord-import/active_record/adapters/sqlite3_adapter.rb +6 -0
  37. data/lib/activerecord-import/adapters/abstract_adapter.rb +78 -0
  38. data/lib/activerecord-import/adapters/em_mysql2_adapter.rb +5 -0
  39. data/lib/activerecord-import/adapters/mysql2_adapter.rb +5 -0
  40. data/lib/activerecord-import/adapters/mysql_adapter.rb +114 -0
  41. data/lib/activerecord-import/adapters/postgresql_adapter.rb +144 -0
  42. data/lib/activerecord-import/adapters/sqlite3_adapter.rb +51 -0
  43. data/lib/activerecord-import/base.rb +38 -0
  44. data/lib/activerecord-import/import.rb +660 -0
  45. data/lib/activerecord-import/mysql2.rb +7 -0
  46. data/lib/activerecord-import/postgresql.rb +7 -0
  47. data/lib/activerecord-import/sqlite3.rb +7 -0
  48. data/lib/activerecord-import/synchronize.rb +66 -0
  49. data/lib/activerecord-import/value_sets_parser.rb +55 -0
  50. data/lib/activerecord-import/version.rb +5 -0
  51. data/test/adapters/jdbcmysql.rb +1 -0
  52. data/test/adapters/jdbcpostgresql.rb +1 -0
  53. data/test/adapters/mysql2.rb +1 -0
  54. data/test/adapters/mysql2_makara.rb +1 -0
  55. data/test/adapters/mysql2spatial.rb +1 -0
  56. data/test/adapters/postgis.rb +1 -0
  57. data/test/adapters/postgresql.rb +1 -0
  58. data/test/adapters/postgresql_makara.rb +1 -0
  59. data/test/adapters/seamless_database_pool.rb +1 -0
  60. data/test/adapters/spatialite.rb +1 -0
  61. data/test/adapters/sqlite3.rb +1 -0
  62. data/test/database.yml.sample +52 -0
  63. data/test/import_test.rb +574 -0
  64. data/test/jdbcmysql/import_test.rb +6 -0
  65. data/test/jdbcpostgresql/import_test.rb +5 -0
  66. data/test/models/book.rb +7 -0
  67. data/test/models/chapter.rb +4 -0
  68. data/test/models/discount.rb +3 -0
  69. data/test/models/end_note.rb +4 -0
  70. data/test/models/group.rb +3 -0
  71. data/test/models/promotion.rb +3 -0
  72. data/test/models/question.rb +3 -0
  73. data/test/models/rule.rb +3 -0
  74. data/test/models/topic.rb +9 -0
  75. data/test/models/widget.rb +24 -0
  76. data/test/mysql2/import_test.rb +5 -0
  77. data/test/mysql2_makara/import_test.rb +6 -0
  78. data/test/mysqlspatial2/import_test.rb +6 -0
  79. data/test/postgis/import_test.rb +4 -0
  80. data/test/postgresql/import_test.rb +8 -0
  81. data/test/schema/generic_schema.rb +144 -0
  82. data/test/schema/mysql_schema.rb +16 -0
  83. data/test/schema/version.rb +10 -0
  84. data/test/sqlite3/import_test.rb +52 -0
  85. data/test/support/active_support/test_case_extensions.rb +70 -0
  86. data/test/support/assertions.rb +73 -0
  87. data/test/support/factories.rb +57 -0
  88. data/test/support/generate.rb +29 -0
  89. data/test/support/mysql/import_examples.rb +85 -0
  90. data/test/support/postgresql/import_examples.rb +242 -0
  91. data/test/support/shared_examples/on_duplicate_key_update.rb +103 -0
  92. data/test/support/shared_examples/recursive_import.rb +122 -0
  93. data/test/synchronize_test.rb +33 -0
  94. data/test/test_helper.rb +59 -0
  95. data/test/travis/database.yml +62 -0
  96. data/test/value_sets_bytes_parser_test.rb +93 -0
  97. data/test/value_sets_records_parser_test.rb +32 -0
  98. metadata +225 -0
@@ -0,0 +1,144 @@
1
+ module ActiveRecord::Import::PostgreSQLAdapter
2
+ include ActiveRecord::Import::ImportSupport
3
+ include ActiveRecord::Import::OnDuplicateKeyUpdateSupport
4
+
5
+ MIN_VERSION_FOR_UPSERT = 90_500
6
+
7
+ def insert_many( sql, values, *args ) # :nodoc:
8
+ number_of_inserts = 1
9
+ ids = []
10
+
11
+ base_sql, post_sql = if sql.is_a?( String )
12
+ [sql, '']
13
+ elsif sql.is_a?( Array )
14
+ [sql.shift, sql.join( ' ' )]
15
+ end
16
+
17
+ sql2insert = base_sql + values.join( ',' ) + post_sql
18
+ if post_sql =~ /RETURNING\s/
19
+ ids = select_values( sql2insert, *args )
20
+ else
21
+ insert( sql2insert, *args )
22
+ end
23
+
24
+ ActiveRecord::Base.connection.query_cache.clear
25
+
26
+ [number_of_inserts, ids]
27
+ end
28
+
29
+ def next_value_for_sequence(sequence_name)
30
+ %{nextval('#{sequence_name}')}
31
+ end
32
+
33
+ def post_sql_statements( table_name, options ) # :nodoc:
34
+ if options[:no_returning] || options[:primary_key].blank?
35
+ super(table_name, options)
36
+ else
37
+ super(table_name, options) << "RETURNING #{options[:primary_key]}"
38
+ end
39
+ end
40
+
41
+ # Add a column to be updated on duplicate key update
42
+ def add_column_for_on_duplicate_key_update( column, options = {} ) # :nodoc:
43
+ arg = options[:on_duplicate_key_update]
44
+ if arg.is_a?( Hash )
45
+ columns = arg.fetch( :columns ) { arg[:columns] = [] }
46
+ case columns
47
+ when Array then columns << column.to_sym unless columns.include?( column.to_sym )
48
+ when Hash then columns[column.to_sym] = column.to_sym
49
+ end
50
+ elsif arg.is_a?( Array )
51
+ arg << column.to_sym unless arg.include?( column.to_sym )
52
+ end
53
+ end
54
+
55
+ # Returns a generated ON CONFLICT DO NOTHING statement given the passed
56
+ # in +args+.
57
+ def sql_for_on_duplicate_key_ignore( table_name, *args ) # :nodoc:
58
+ arg = args.first
59
+ conflict_target = sql_for_conflict_target( arg ) if arg.is_a?( Hash )
60
+ " ON CONFLICT #{conflict_target}DO NOTHING"
61
+ end
62
+
63
+ # Returns a generated ON CONFLICT DO UPDATE statement given the passed
64
+ # in +args+.
65
+ def sql_for_on_duplicate_key_update( table_name, *args ) # :nodoc:
66
+ arg = args.first
67
+ arg = { columns: arg } if arg.is_a?( Array ) || arg.is_a?( String )
68
+ return unless arg.is_a?( Hash )
69
+
70
+ sql = " ON CONFLICT "
71
+ conflict_target = sql_for_conflict_target( arg )
72
+
73
+ columns = arg.fetch( :columns, [] )
74
+ if columns.respond_to?( :empty? ) && columns.empty?
75
+ return sql << "#{conflict_target}DO NOTHING"
76
+ end
77
+
78
+ conflict_target ||= sql_for_default_conflict_target( table_name )
79
+ unless conflict_target
80
+ raise ArgumentError, 'Expected :conflict_target or :constraint_name to be specified'
81
+ end
82
+
83
+ sql << "#{conflict_target}DO UPDATE SET "
84
+ if columns.is_a?( Array )
85
+ sql << sql_for_on_duplicate_key_update_as_array( table_name, columns )
86
+ elsif columns.is_a?( Hash )
87
+ sql << sql_for_on_duplicate_key_update_as_hash( table_name, columns )
88
+ elsif columns.is_a?( String )
89
+ sql << columns
90
+ else
91
+ raise ArgumentError, 'Expected :columns to be an Array or Hash'
92
+ end
93
+ sql
94
+ end
95
+
96
+ def sql_for_on_duplicate_key_update_as_array( table_name, arr ) # :nodoc:
97
+ results = arr.map do |column|
98
+ qc = quote_column_name( column )
99
+ "#{qc}=EXCLUDED.#{qc}"
100
+ end
101
+ results.join( ',' )
102
+ end
103
+
104
+ def sql_for_on_duplicate_key_update_as_hash( table_name, hsh ) # :nodoc:
105
+ results = hsh.map do |column1, column2|
106
+ qc1 = quote_column_name( column1 )
107
+ qc2 = quote_column_name( column2 )
108
+ "#{qc1}=EXCLUDED.#{qc2}"
109
+ end
110
+ results.join( ',' )
111
+ end
112
+
113
+ def sql_for_conflict_target( args = {} )
114
+ constraint_name = args[:constraint_name]
115
+ conflict_target = args[:conflict_target]
116
+ if constraint_name.present?
117
+ "ON CONSTRAINT #{constraint_name} "
118
+ elsif conflict_target.present?
119
+ '(' << Array( conflict_target ).reject( &:empty? ).join( ', ' ) << ') '
120
+ end
121
+ end
122
+
123
+ def sql_for_default_conflict_target( table_name )
124
+ conflict_target = primary_key( table_name )
125
+ "(#{conflict_target}) " if conflict_target
126
+ end
127
+
128
+ # Return true if the statement is a duplicate key record error
129
+ def duplicate_key_update_error?(exception) # :nodoc:
130
+ exception.is_a?(ActiveRecord::StatementInvalid) && exception.to_s.include?('duplicate key')
131
+ end
132
+
133
+ def supports_on_duplicate_key_update?(current_version = postgresql_version)
134
+ current_version >= MIN_VERSION_FOR_UPSERT
135
+ end
136
+
137
+ def supports_on_duplicate_key_ignore?(current_version = postgresql_version)
138
+ supports_on_duplicate_key_update?(current_version)
139
+ end
140
+
141
+ def support_setting_primary_key_of_imported_objects?
142
+ true
143
+ end
144
+ end
@@ -0,0 +1,51 @@
1
+ module ActiveRecord::Import::SQLite3Adapter
2
+ include ActiveRecord::Import::ImportSupport
3
+
4
+ MIN_VERSION_FOR_IMPORT = "3.7.11".freeze
5
+ SQLITE_LIMIT_COMPOUND_SELECT = 500
6
+
7
+ # Override our conformance to ActiveRecord::Import::ImportSupport interface
8
+ # to ensure that we only support import in supported version of SQLite.
9
+ # Which INSERT statements with multiple value sets was introduced in 3.7.11.
10
+ def supports_import?(current_version = sqlite_version)
11
+ if current_version >= MIN_VERSION_FOR_IMPORT
12
+ true
13
+ else
14
+ false
15
+ end
16
+ end
17
+
18
+ # +sql+ can be a single string or an array. If it is an array all
19
+ # elements that are in position >= 1 will be appended to the final SQL.
20
+ def insert_many(sql, values, *args) # :nodoc:
21
+ number_of_inserts = 0
22
+ ids = []
23
+
24
+ base_sql, post_sql = if sql.is_a?( String )
25
+ [sql, '']
26
+ elsif sql.is_a?( Array )
27
+ [sql.shift, sql.join( ' ' )]
28
+ end
29
+
30
+ value_sets = ::ActiveRecord::Import::ValueSetsRecordsParser.parse(values,
31
+ max_records: SQLITE_LIMIT_COMPOUND_SELECT)
32
+
33
+ value_sets.each do |value_set|
34
+ number_of_inserts += 1
35
+ sql2insert = base_sql + value_set.join( ',' ) + post_sql
36
+ first_insert_id = insert( sql2insert, *args )
37
+ last_insert_id = first_insert_id + value_set.size - 1
38
+ ids.concat((first_insert_id..last_insert_id).to_a)
39
+ end
40
+
41
+ [number_of_inserts, ids]
42
+ end
43
+
44
+ def next_value_for_sequence(sequence_name)
45
+ %{nextval('#{sequence_name}')}
46
+ end
47
+
48
+ def support_setting_primary_key_of_imported_objects?
49
+ true
50
+ end
51
+ end
@@ -0,0 +1,38 @@
1
+ require "pathname"
2
+ require "active_record"
3
+ require "active_record/version"
4
+
5
+ module ActiveRecord::Import
6
+ ADAPTER_PATH = "activerecord-import/active_record/adapters".freeze
7
+
8
+ def self.base_adapter(adapter)
9
+ case adapter
10
+ when 'mysql2_makara' then 'mysql2'
11
+ when 'mysql2spatial' then 'mysql2'
12
+ when 'spatialite' then 'sqlite3'
13
+ when 'postgresql_makara' then 'postgresql'
14
+ when 'postgis' then 'postgresql'
15
+ else adapter
16
+ end
17
+ end
18
+
19
+ # Loads the import functionality for a specific database adapter
20
+ def self.require_adapter(adapter)
21
+ require File.join(ADAPTER_PATH, "/abstract_adapter")
22
+ begin
23
+ require File.join(ADAPTER_PATH, "/#{base_adapter(adapter)}_adapter")
24
+ rescue LoadError
25
+ # fallback
26
+ end
27
+ end
28
+
29
+ # Loads the import functionality for the passed in ActiveRecord connection
30
+ def self.load_from_connection_pool(connection_pool)
31
+ require_adapter connection_pool.spec.config[:adapter]
32
+ end
33
+ end
34
+
35
+ require 'activerecord-import/import'
36
+ require 'activerecord-import/active_record/adapters/abstract_adapter'
37
+ require 'activerecord-import/synchronize'
38
+ require 'activerecord-import/value_sets_parser'
@@ -0,0 +1,660 @@
1
+ require "ostruct"
2
+
3
+ module ActiveRecord::Import::ConnectionAdapters; end
4
+
5
+ module ActiveRecord::Import #:nodoc:
6
+ Result = Struct.new(:failed_instances, :num_inserts, :ids)
7
+
8
+ module ImportSupport #:nodoc:
9
+ def supports_import? #:nodoc:
10
+ true
11
+ end
12
+ end
13
+
14
+ module OnDuplicateKeyUpdateSupport #:nodoc:
15
+ def supports_on_duplicate_key_update? #:nodoc:
16
+ true
17
+ end
18
+ end
19
+
20
+ class MissingColumnError < StandardError
21
+ def initialize(name, index)
22
+ super "Missing column for value <#{name}> at index #{index}"
23
+ end
24
+ end
25
+ end
26
+
27
+ class ActiveRecord::Associations::CollectionProxy
28
+ def import(*args, &block)
29
+ @association.import(*args, &block)
30
+ end
31
+ end
32
+
33
+ class ActiveRecord::Associations::CollectionAssociation
34
+ def import(*args, &block)
35
+ unless owner.persisted?
36
+ raise ActiveRecord::RecordNotSaved, "You cannot call import unless the parent is saved"
37
+ end
38
+
39
+ options = args.last.is_a?(Hash) ? args.pop : {}
40
+
41
+ model_klass = reflection.klass
42
+ symbolized_foreign_key = reflection.foreign_key.to_sym
43
+ symbolized_column_names = model_klass.column_names.map(&:to_sym)
44
+
45
+ owner_primary_key = owner.class.primary_key
46
+ owner_primary_key_value = owner.send(owner_primary_key)
47
+
48
+ # assume array of model objects
49
+ if args.last.is_a?( Array ) && args.last.first.is_a?(ActiveRecord::Base)
50
+ if args.length == 2
51
+ models = args.last
52
+ column_names = args.first
53
+ else
54
+ models = args.first
55
+ column_names = symbolized_column_names
56
+ end
57
+
58
+ unless symbolized_column_names.include?(symbolized_foreign_key)
59
+ column_names << symbolized_foreign_key
60
+ end
61
+
62
+ models.each do |m|
63
+ m.public_send "#{symbolized_foreign_key}=", owner_primary_key_value
64
+ m.public_send "#{reflection.type}=", owner.class.name if reflection.type
65
+ end
66
+
67
+ return model_klass.import column_names, models, options
68
+
69
+ # supports empty array
70
+ elsif args.last.is_a?( Array ) && args.last.empty?
71
+ return ActiveRecord::Import::Result.new([], 0, [])
72
+
73
+ # supports 2-element array and array
74
+ elsif args.size == 2 && args.first.is_a?( Array ) && args.last.is_a?( Array )
75
+ column_names, array_of_attributes = args
76
+ symbolized_column_names = column_names.map(&:to_s)
77
+
78
+ if symbolized_column_names.include?(symbolized_foreign_key)
79
+ index = symbolized_column_names.index(symbolized_foreign_key)
80
+ array_of_attributes.each { |attrs| attrs[index] = owner_primary_key_value }
81
+ else
82
+ column_names << symbolized_foreign_key
83
+ array_of_attributes.each { |attrs| attrs << owner_primary_key_value }
84
+ end
85
+
86
+ if reflection.type
87
+ column_names << reflection.type
88
+ array_of_attributes.each { |attrs| attrs << owner.class.name }
89
+ end
90
+
91
+ return model_klass.import column_names, array_of_attributes, options
92
+ else
93
+ raise ArgumentError, "Invalid arguments!"
94
+ end
95
+ end
96
+ end
97
+
98
+ class ActiveRecord::Base
99
+ class << self
100
+ # use tz as set in ActiveRecord::Base
101
+ tproc = lambda do
102
+ ActiveRecord::Base.default_timezone == :utc ? Time.now.utc : Time.now
103
+ end
104
+
105
+ AREXT_RAILS_COLUMNS = {
106
+ create: { "created_on" => tproc,
107
+ "created_at" => tproc },
108
+ update: { "updated_on" => tproc,
109
+ "updated_at" => tproc }
110
+ }.freeze
111
+ AREXT_RAILS_COLUMN_NAMES = AREXT_RAILS_COLUMNS[:create].keys + AREXT_RAILS_COLUMNS[:update].keys
112
+
113
+ # Returns true if the current database connection adapter
114
+ # supports import functionality, otherwise returns false.
115
+ def supports_import?(*args)
116
+ connection.respond_to?(:supports_import?) && connection.supports_import?(*args)
117
+ end
118
+
119
+ # Returns true if the current database connection adapter
120
+ # supports on duplicate key update functionality, otherwise
121
+ # returns false.
122
+ def supports_on_duplicate_key_update?
123
+ connection.supports_on_duplicate_key_update?
124
+ end
125
+
126
+ # returns true if the current database connection adapter
127
+ # supports setting the primary key of bulk imported models, otherwise
128
+ # returns false
129
+ def support_setting_primary_key_of_imported_objects?
130
+ connection.respond_to?(:support_setting_primary_key_of_imported_objects?) && connection.support_setting_primary_key_of_imported_objects?
131
+ end
132
+
133
+ # Imports a collection of values to the database.
134
+ #
135
+ # This is more efficient than using ActiveRecord::Base#create or
136
+ # ActiveRecord::Base#save multiple times. This method works well if
137
+ # you want to create more than one record at a time and do not care
138
+ # about having ActiveRecord objects returned for each record
139
+ # inserted.
140
+ #
141
+ # This can be used with or without validations. It does not utilize
142
+ # the ActiveRecord::Callbacks during creation/modification while
143
+ # performing the import.
144
+ #
145
+ # == Usage
146
+ # Model.import array_of_models
147
+ # Model.import column_names, array_of_values
148
+ # Model.import column_names, array_of_values, options
149
+ #
150
+ # ==== Model.import array_of_models
151
+ #
152
+ # With this form you can call _import_ passing in an array of model
153
+ # objects that you want updated.
154
+ #
155
+ # ==== Model.import column_names, array_of_values
156
+ #
157
+ # The first parameter +column_names+ is an array of symbols or
158
+ # strings which specify the columns that you want to update.
159
+ #
160
+ # The second parameter, +array_of_values+, is an array of
161
+ # arrays. Each subarray is a single set of values for a new
162
+ # record. The order of values in each subarray should match up to
163
+ # the order of the +column_names+.
164
+ #
165
+ # ==== Model.import column_names, array_of_values, options
166
+ #
167
+ # The first two parameters are the same as the above form. The third
168
+ # parameter, +options+, is a hash. This is optional. Please see
169
+ # below for what +options+ are available.
170
+ #
171
+ # == Options
172
+ # * +validate+ - true|false, tells import whether or not to use
173
+ # ActiveRecord validations. Validations are enforced by default.
174
+ # * +ignore+ - true|false, tells import to use MySQL's INSERT IGNORE
175
+ # to discard records that contain duplicate keys.
176
+ # * +on_duplicate_key_ignore+ - true|false, tells import to use
177
+ # Postgres 9.5+ ON CONFLICT DO NOTHING. Cannot be enabled on a
178
+ # recursive import.
179
+ # * +on_duplicate_key_update+ - an Array or Hash, tells import to
180
+ # use MySQL's ON DUPLICATE KEY UPDATE or Postgres 9.5+ ON CONFLICT
181
+ # DO UPDATE ability. See On Duplicate Key Update below.
182
+ # * +synchronize+ - an array of ActiveRecord instances for the model
183
+ # that you are currently importing data into. This synchronizes
184
+ # existing model instances in memory with updates from the import.
185
+ # * +timestamps+ - true|false, tells import to not add timestamps
186
+ # (if false) even if record timestamps is disabled in ActiveRecord::Base
187
+ # * +recursive+ - true|false, tells import to import all has_many/has_one
188
+ # associations if the adapter supports setting the primary keys of the
189
+ # newly imported objects.
190
+ # * +batch_size+ - an integer value to specify the max number of records to
191
+ # include per insert. Defaults to the total number of records to import.
192
+ #
193
+ # == Examples
194
+ # class BlogPost < ActiveRecord::Base ; end
195
+ #
196
+ # # Example using array of model objects
197
+ # posts = [ BlogPost.new author_name: 'Zach Dennis', title: 'AREXT',
198
+ # BlogPost.new author_name: 'Zach Dennis', title: 'AREXT2',
199
+ # BlogPost.new author_name: 'Zach Dennis', title: 'AREXT3' ]
200
+ # BlogPost.import posts
201
+ #
202
+ # # Example using column_names and array_of_values
203
+ # columns = [ :author_name, :title ]
204
+ # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
205
+ # BlogPost.import columns, values
206
+ #
207
+ # # Example using column_names, array_of_value and options
208
+ # columns = [ :author_name, :title ]
209
+ # values = [ [ 'zdennis', 'test post' ], [ 'jdoe', 'another test post' ] ]
210
+ # BlogPost.import( columns, values, validate: false )
211
+ #
212
+ # # Example synchronizing existing instances in memory
213
+ # post = BlogPost.where(author_name: 'zdennis').first
214
+ # puts post.author_name # => 'zdennis'
215
+ # columns = [ :author_name, :title ]
216
+ # values = [ [ 'yoda', 'test post' ] ]
217
+ # BlogPost.import posts, synchronize: [ post ]
218
+ # puts post.author_name # => 'yoda'
219
+ #
220
+ # # Example synchronizing unsaved/new instances in memory by using a uniqued imported field
221
+ # posts = [BlogPost.new(title: "Foo"), BlogPost.new(title: "Bar")]
222
+ # BlogPost.import posts, synchronize: posts, synchronize_keys: [:title]
223
+ # puts posts.first.persisted? # => true
224
+ #
225
+ # == On Duplicate Key Update (MySQL)
226
+ #
227
+ # The :on_duplicate_key_update option can be either an Array or a Hash.
228
+ #
229
+ # ==== Using an Array
230
+ #
231
+ # The :on_duplicate_key_update option can be an array of column
232
+ # names. The column names are the only fields that are updated if
233
+ # a duplicate record is found. Below is an example:
234
+ #
235
+ # BlogPost.import columns, values, on_duplicate_key_update: [ :date_modified, :content, :author ]
236
+ #
237
+ # ==== Using A Hash
238
+ #
239
+ # The :on_duplicate_key_update option can be a hash of column names
240
+ # to model attribute name mappings. This gives you finer grained
241
+ # control over what fields are updated with what attributes on your
242
+ # model. Below is an example:
243
+ #
244
+ # BlogPost.import columns, attributes, on_duplicate_key_update: { title: :title }
245
+ #
246
+ # == On Duplicate Key Update (Postgres 9.5+)
247
+ #
248
+ # The :on_duplicate_key_update option can be an Array or a Hash with up to
249
+ # two attributes, :conflict_target or :constraint_name and :columns.
250
+ #
251
+ # ==== Using an Array
252
+ #
253
+ # The :on_duplicate_key_update option can be an array of column
254
+ # names. This option only handles inserts that conflict with the
255
+ # primary key. If a table does not have a primary key, this will
256
+ # not work. The column names are the only fields that are updated
257
+ # if a duplicate record is found. Below is an example:
258
+ #
259
+ # BlogPost.import columns, values, on_duplicate_key_update: [ :date_modified, :content, :author ]
260
+ #
261
+ # ==== Using a Hash
262
+ #
263
+ # The :on_duplicate_update option can be a hash with up to two attributes,
264
+ # :conflict_target or constraint_name, and :columns. Unlike MySQL, Postgres
265
+ # requires the conflicting constraint to be explicitly specified. Using this
266
+ # option allows you to specify a constraint other than the primary key.
267
+ #
268
+ # ====== :conflict_target
269
+ #
270
+ # The :conflict_target attribute specifies the columns that make up the
271
+ # conflicting unique constraint and can be a single column or an array of
272
+ # column names. This attribute is ignored if :constraint_name is included,
273
+ # but it is the preferred method of identifying a constraint. It will
274
+ # default to the primary key. Below is an example:
275
+ #
276
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: [:author_id, :slug], columns: [ :date_modified ] }
277
+ #
278
+ # ====== :constraint_name
279
+ #
280
+ # The :constraint_name attribute explicitly identifies the conflicting
281
+ # unique index by name. Postgres documentation discourages using this method
282
+ # of identifying an index unless absolutely necessary. Below is an example:
283
+ #
284
+ # BlogPost.import columns, values, on_duplicate_key_update: { constraint_name: :blog_posts_pkey, columns: [ :date_modified ] }
285
+ #
286
+ # ====== :columns
287
+ #
288
+ # The :columns attribute can be either an Array or a Hash.
289
+ #
290
+ # ======== Using an Array
291
+ #
292
+ # The :columns attribute can be an array of column names. The column names
293
+ # are the only fields that are updated if a duplicate record is found.
294
+ # Below is an example:
295
+ #
296
+ # BlogPost.import columns, values, on_duplicate_key_update: { conflict_target: :slug, columns: [ :date_modified, :content, :author ] }
297
+ #
298
+ # ======== Using a Hash
299
+ #
300
+ # The :columns option can be a hash of column names to model attribute name
301
+ # mappings. This gives you finer grained control over what fields are updated
302
+ # with what attributes on your model. Below is an example:
303
+ #
304
+ # BlogPost.import columns, attributes, on_duplicate_key_update: { conflict_target: :slug, columns: { title: :title } }
305
+ #
306
+ # = Returns
307
+ # This returns an object which responds to +failed_instances+ and +num_inserts+.
308
+ # * failed_instances - an array of objects that fails validation and were not committed to the database. An empty array if no validation is performed.
309
+ # * num_inserts - the number of insert statements it took to import the data
310
+ # * ids - the primary keys of the imported ids, if the adpater supports it, otherwise and empty array.
311
+ def import(*args)
312
+ if args.first.is_a?( Array ) && args.first.first.is_a?(ActiveRecord::Base)
313
+ options = {}
314
+ options.merge!( args.pop ) if args.last.is_a?(Hash)
315
+
316
+ models = args.first
317
+ import_helper(models, options)
318
+ else
319
+ import_helper(*args)
320
+ end
321
+ end
322
+
323
+ # Imports a collection of values if all values are valid. Import fails at the
324
+ # first encountered validation error and raises ActiveRecord::RecordInvalid
325
+ # with the failed instance.
326
+ def import!(*args)
327
+ options = args.last.is_a?( Hash ) ? args.pop : {}
328
+ options[:validate] = true
329
+ options[:raise_error] = true
330
+
331
+ import(*args, options)
332
+ end
333
+
334
+ def import_helper( *args )
335
+ options = { validate: true, timestamps: true, primary_key: primary_key }
336
+ options.merge!( args.pop ) if args.last.is_a? Hash
337
+
338
+ # Don't modify incoming arguments
339
+ if options[:on_duplicate_key_update]
340
+ options[:on_duplicate_key_update] = options[:on_duplicate_key_update].dup
341
+ end
342
+
343
+ is_validating = options[:validate]
344
+ is_validating = true unless options[:validate_with_context].nil?
345
+
346
+ # assume array of model objects
347
+ if args.last.is_a?( Array ) && args.last.first.is_a?(ActiveRecord::Base)
348
+ if args.length == 2
349
+ models = args.last
350
+ column_names = args.first
351
+ else
352
+ models = args.first
353
+ column_names = self.column_names.dup
354
+ end
355
+
356
+ array_of_attributes = models.map do |model|
357
+ # this next line breaks sqlite.so with a segmentation fault
358
+ # if model.new_record? || options[:on_duplicate_key_update]
359
+ column_names.map do |name|
360
+ model.read_attribute_before_type_cast(name.to_s)
361
+ end
362
+ # end
363
+ end
364
+ # supports empty array
365
+ elsif args.last.is_a?( Array ) && args.last.empty?
366
+ return ActiveRecord::Import::Result.new([], 0, [])
367
+ # supports 2-element array and array
368
+ elsif args.size == 2 && args.first.is_a?( Array ) && args.last.is_a?( Array )
369
+ column_names, array_of_attributes = args
370
+ array_of_attributes = array_of_attributes.map(&:dup)
371
+ else
372
+ raise ArgumentError, "Invalid arguments!"
373
+ end
374
+
375
+ # dup the passed in array so we don't modify it unintentionally
376
+ column_names = column_names.dup
377
+
378
+ # Force the primary key col into the insert if it's not
379
+ # on the list and we are using a sequence and stuff a nil
380
+ # value for it into each row so the sequencer will fire later
381
+ if !column_names.include?(primary_key) && connection.prefetch_primary_key? && sequence_name
382
+ column_names << primary_key
383
+ array_of_attributes.each { |a| a << nil }
384
+ end
385
+
386
+ # record timestamps unless disabled in ActiveRecord::Base
387
+ if record_timestamps && options.delete( :timestamps )
388
+ add_special_rails_stamps column_names, array_of_attributes, options
389
+ end
390
+
391
+ return_obj = if is_validating
392
+ if models
393
+ import_with_validations( column_names, array_of_attributes, options ) do |failed|
394
+ models.each_with_index do |model, i|
395
+ model = model.dup if options[:recursive]
396
+ next if model.valid?(options[:validate_with_context])
397
+ raise(ActiveRecord::RecordInvalid, model) if options[:raise_error]
398
+ array_of_attributes[i] = nil
399
+ failed << model
400
+ end
401
+ end
402
+ else
403
+ import_with_validations( column_names, array_of_attributes, options )
404
+ end
405
+ else
406
+ (num_inserts, ids) = import_without_validations_or_callbacks( column_names, array_of_attributes, options )
407
+ ActiveRecord::Import::Result.new([], num_inserts, ids)
408
+ end
409
+
410
+ if options[:synchronize]
411
+ sync_keys = options[:synchronize_keys] || [primary_key]
412
+ synchronize( options[:synchronize], sync_keys)
413
+ end
414
+ return_obj.num_inserts = 0 if return_obj.num_inserts.nil?
415
+
416
+ # if we have ids, then set the id on the models and mark the models as clean.
417
+ if models && support_setting_primary_key_of_imported_objects?
418
+ set_ids_and_mark_clean(models, return_obj)
419
+
420
+ # if there are auto-save associations on the models we imported that are new, import them as well
421
+ import_associations(models, options.dup) if options[:recursive]
422
+ end
423
+
424
+ return_obj
425
+ end
426
+
427
+ # TODO import_from_table needs to be implemented.
428
+ def import_from_table( options ) # :nodoc:
429
+ end
430
+
431
+ # Imports the passed in +column_names+ and +array_of_attributes+
432
+ # given the passed in +options+ Hash with validations. Returns an
433
+ # object with the methods +failed_instances+ and +num_inserts+.
434
+ # +failed_instances+ is an array of instances that failed validations.
435
+ # +num_inserts+ is the number of inserts it took to import the data. See
436
+ # ActiveRecord::Base.import for more information on
437
+ # +column_names+, +array_of_attributes+ and +options+.
438
+ def import_with_validations( column_names, array_of_attributes, options = {} )
439
+ failed_instances = []
440
+
441
+ if block_given?
442
+ yield failed_instances
443
+ else
444
+ # create instances for each of our column/value sets
445
+ arr = validations_array_for_column_names_and_attributes( column_names, array_of_attributes )
446
+
447
+ # keep track of the instance and the position it is currently at. if this fails
448
+ # validation we'll use the index to remove it from the array_of_attributes
449
+ model = new
450
+ arr.each_with_index do |hsh, i|
451
+ hsh.each_pair { |k, v| model[k] = v }
452
+ next if model.valid?(options[:validate_with_context])
453
+ raise(ActiveRecord::RecordInvalid, model) if options[:raise_error]
454
+ array_of_attributes[i] = nil
455
+ failed_instances << model.dup
456
+ end
457
+ end
458
+
459
+ array_of_attributes.compact!
460
+
461
+ num_inserts, ids = if array_of_attributes.empty? || options[:all_or_none] && failed_instances.any?
462
+ [0, []]
463
+ else
464
+ import_without_validations_or_callbacks( column_names, array_of_attributes, options )
465
+ end
466
+ ActiveRecord::Import::Result.new(failed_instances, num_inserts, ids)
467
+ end
468
+
469
+ # Imports the passed in +column_names+ and +array_of_attributes+
470
+ # given the passed in +options+ Hash. This will return the number
471
+ # of insert operations it took to create these records without
472
+ # validations or callbacks. See ActiveRecord::Base.import for more
473
+ # information on +column_names+, +array_of_attributes_ and
474
+ # +options+.
475
+ def import_without_validations_or_callbacks( column_names, array_of_attributes, options = {} )
476
+ column_names = column_names.map(&:to_sym)
477
+ scope_columns, scope_values = scope_attributes.to_a.transpose
478
+
479
+ unless scope_columns.blank?
480
+ scope_columns.zip(scope_values).each do |name, value|
481
+ name_as_sym = name.to_sym
482
+ next if column_names.include?(name_as_sym)
483
+
484
+ is_sti = (name_as_sym == inheritance_column.to_sym && self < base_class)
485
+ value = value.first if is_sti
486
+
487
+ column_names << name_as_sym
488
+ array_of_attributes.each { |attrs| attrs << value }
489
+ end
490
+ end
491
+
492
+ columns = column_names.each_with_index.map do |name, i|
493
+ column = columns_hash[name.to_s]
494
+
495
+ raise ActiveRecord::Import::MissingColumnError.new(name.to_s, i) if column.nil?
496
+
497
+ column
498
+ end
499
+
500
+ columns_sql = "(#{column_names.map { |name| connection.quote_column_name(name) }.join(',')})"
501
+ insert_sql = "INSERT #{options[:ignore] ? 'IGNORE ' : ''}INTO #{quoted_table_name} #{columns_sql} VALUES "
502
+ values_sql = values_sql_for_columns_and_attributes(columns, array_of_attributes)
503
+
504
+ number_inserted = 0
505
+ ids = []
506
+ if supports_import?
507
+ # generate the sql
508
+ post_sql_statements = connection.post_sql_statements( quoted_table_name, options )
509
+
510
+ batch_size = options[:batch_size] || values_sql.size
511
+ values_sql.each_slice(batch_size) do |batch_values|
512
+ # perform the inserts
513
+ result = connection.insert_many( [insert_sql, post_sql_statements].flatten,
514
+ batch_values,
515
+ "#{self.class.name} Create Many Without Validations Or Callbacks" )
516
+ number_inserted += result[0]
517
+ ids += result[1]
518
+ end
519
+ else
520
+ values_sql.each do |values|
521
+ ids << connection.insert(insert_sql + values)
522
+ number_inserted += 1
523
+ end
524
+ end
525
+ [number_inserted, ids]
526
+ end
527
+
528
+ private
529
+
530
+ def set_ids_and_mark_clean(models, import_result)
531
+ return if models.nil?
532
+ import_result.ids.each_with_index do |id, index|
533
+ model = models[index]
534
+ model.id = id.to_i
535
+ if model.respond_to?(:clear_changes_information) # Rails 4.0 and higher
536
+ model.clear_changes_information
537
+ else # Rails 3.2
538
+ model.instance_variable_get(:@changed_attributes).clear
539
+ end
540
+ model.instance_variable_set(:@new_record, false)
541
+ end
542
+ end
543
+
544
+ def import_associations(models, options)
545
+ # now, for all the dirty associations, collect them into a new set of models, then recurse.
546
+ # notes:
547
+ # does not handle associations that reference themselves
548
+ # should probably take a hash to associations to follow.
549
+ return if models.nil?
550
+ associated_objects_by_class = {}
551
+ models.each { |model| find_associated_objects_for_import(associated_objects_by_class, model) }
552
+
553
+ # :on_duplicate_key_update not supported for associations
554
+ options.delete(:on_duplicate_key_update)
555
+
556
+ associated_objects_by_class.each_value do |associations|
557
+ associations.each_value do |associated_records|
558
+ associated_records.first.class.import(associated_records, options) unless associated_records.empty?
559
+ end
560
+ end
561
+ end
562
+
563
+ # We are eventually going to call Class.import <objects> so we build up a hash
564
+ # of class => objects to import.
565
+ def find_associated_objects_for_import(associated_objects_by_class, model)
566
+ associated_objects_by_class[model.class.name] ||= {}
567
+
568
+ association_reflections =
569
+ model.class.reflect_on_all_associations(:has_one) +
570
+ model.class.reflect_on_all_associations(:has_many)
571
+ association_reflections.each do |association_reflection|
572
+ associated_objects_by_class[model.class.name][association_reflection.name] ||= []
573
+
574
+ association = model.association(association_reflection.name)
575
+ association.loaded!
576
+
577
+ # Wrap target in an array if not already
578
+ association = Array(association.target)
579
+
580
+ changed_objects = association.select { |a| a.new_record? || a.changed? }
581
+ changed_objects.each do |child|
582
+ child.public_send("#{association_reflection.foreign_key}=", model.id)
583
+ # For polymorphic associations
584
+ association_reflection.type.try do |type|
585
+ child.public_send("#{type}=", model.class.name)
586
+ end
587
+ end
588
+ associated_objects_by_class[model.class.name][association_reflection.name].concat changed_objects
589
+ end
590
+ associated_objects_by_class
591
+ end
592
+
593
+ # Returns SQL the VALUES for an INSERT statement given the passed in +columns+
594
+ # and +array_of_attributes+.
595
+ def values_sql_for_columns_and_attributes(columns, array_of_attributes) # :nodoc:
596
+ # connection gets called a *lot* in this high intensity loop.
597
+ # Reuse the same one w/in the loop, otherwise it would keep being re-retreived (= lots of time for large imports)
598
+ connection_memo = connection
599
+ array_of_attributes.map do |arr|
600
+ my_values = arr.each_with_index.map do |val, j|
601
+ column = columns[j]
602
+
603
+ # be sure to query sequence_name *last*, only if cheaper tests fail, because it's costly
604
+ if val.nil? && column.name == primary_key && !sequence_name.blank?
605
+ connection_memo.next_value_for_sequence(sequence_name)
606
+ elsif column
607
+ if respond_to?(:type_caster) && type_caster.respond_to?(:type_cast_for_database) # Rails 5.0 and higher
608
+ connection_memo.quote(type_caster.type_cast_for_database(column.name, val))
609
+ elsif column.respond_to?(:type_cast_from_user) # Rails 4.2 and higher
610
+ connection_memo.quote(column.type_cast_from_user(val), column)
611
+ else # Rails 3.2, 4.0 and 4.1
612
+ if serialized_attributes.include?(column.name)
613
+ val = serialized_attributes[column.name].dump(val)
614
+ end
615
+ connection_memo.quote(column.type_cast(val), column)
616
+ end
617
+ end
618
+ end
619
+ "(#{my_values.join(',')})"
620
+ end
621
+ end
622
+
623
+ def add_special_rails_stamps( column_names, array_of_attributes, options )
624
+ AREXT_RAILS_COLUMNS[:create].each_pair do |key, blk|
625
+ next unless self.column_names.include?(key)
626
+ value = blk.call
627
+ index = column_names.index(key) || column_names.index(key.to_sym)
628
+ if index
629
+ # replace every instance of the array of attributes with our value
630
+ array_of_attributes.each { |arr| arr[index] = value if arr[index].nil? }
631
+ else
632
+ column_names << key
633
+ array_of_attributes.each { |arr| arr << value }
634
+ end
635
+ end
636
+
637
+ AREXT_RAILS_COLUMNS[:update].each_pair do |key, blk|
638
+ next unless self.column_names.include?(key)
639
+ value = blk.call
640
+ index = column_names.index(key) || column_names.index(key.to_sym)
641
+ if index
642
+ # replace every instance of the array of attributes with our value
643
+ array_of_attributes.each { |arr| arr[index] = value }
644
+ else
645
+ column_names << key
646
+ array_of_attributes.each { |arr| arr << value }
647
+ end
648
+
649
+ if supports_on_duplicate_key_update?
650
+ connection.add_column_for_on_duplicate_key_update(key, options)
651
+ end
652
+ end
653
+ end
654
+
655
+ # Returns an Array of Hashes for the passed in +column_names+ and +array_of_attributes+.
656
+ def validations_array_for_column_names_and_attributes( column_names, array_of_attributes ) # :nodoc:
657
+ array_of_attributes.map { |values| Hash[column_names.zip(values)] }
658
+ end
659
+ end
660
+ end