fast_inserter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4e0df4e509d7f47f48a2d9ef2616c91586bc295f
4
+ data.tar.gz: 06523b7bc59f458a99e57824a92e41f5d9b6722a
5
+ SHA512:
6
+ metadata.gz: e63d8e35019b8de1cb16861e82350ce292fed7e4d9293738b53d40ada215c5ff16fe77e3c8daa7ba573f3b7758782166eac7d4f688686f45a0b2946acd5e7b32
7
+ data.tar.gz: a7c7e1264a29a90ed2d9ed4e52e222843e564bd67a161f7727fd8f5f9c422be27d1d1f33fd9227658b44a55d03cc2d4b1d9a284a7c35bdf1d0d90316f12cd22e
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.3.0
data/.travis.yml ADDED
@@ -0,0 +1,21 @@
1
+ language: ruby
2
+ sudo: required
3
+ notifications:
4
+ email: false
5
+ rvm:
6
+ - 2.2.4
7
+ - 2.3.0
8
+ - ruby-head
9
+ env:
10
+ matrix:
11
+ - DB=pg
12
+ - DB=mysql
13
+ - DB=sqlite
14
+ before_install:
15
+ # Sqlite set up involves installing a more modern version which we move into a script.
16
+ - sh -c "if [ '$DB' = 'sqlite' ]; then ./ci/install_modern_sqlite.sh; fi;"
17
+ before_script:
18
+ # PG and mysql is simpler.
19
+ - sh -c "if [ '$DB' = 'pg' ]; then psql -c 'DROP DATABASE IF EXISTS fast_inserter;' -U postgres; fi"
20
+ - sh -c "if [ '$DB' = 'pg' ]; then psql -c 'create database fast_inserter;' -U postgres; fi"
21
+ - sh -c "if [ '$DB' = 'mysql' ]; then mysql -e 'create database IF NOT EXISTS fast_inserter;'; fi"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fast_inserter.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Stryder Corp
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,113 @@
1
+ # FastInserter
2
+
3
+ [![Build Status](https://travis-ci.org/strydercorp/fast_inserter.svg?branch=master)](https://travis-ci.org/strydercorp/fast_inserter)
4
+
5
+ Use raw SQL to insert database records in bulk. Supports uniqueness constraints, timestamps, and checking for existing records.
6
+
7
+ The motivation for this library from the fact that rails does validations on each and every inserted record in the join table. And, even if you pass validate: false, it still loads each record and inserts one by one. This leads to very slow insertion of large number (thoasands) of records.
8
+
9
+ This library skips active record altogether and uses raw sql to insert records. However, using raw sql goes around all your business logic, so we provide ways to still have niceties like uniqueness constraints and timestamps.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'fast_inserter'
17
+ ```
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install fast_inserter
26
+
27
+ ## Runtime dependencies
28
+
29
+ * activerecord: Fast inserter depends on active record for handling database connections, database configuration, executing the sql, and sql sanitization.
30
+
31
+ ## Usage
32
+
33
+ In most cases, you probably don't want to use this library and instead should active record. However, should you need to use this library, usage instructions are below.
34
+
35
+ A basic usage for inserting multiple 'MassEmailsUser' records:
36
+
37
+ ```ruby
38
+ @mass_email = MassEmail.find(params[:id])
39
+ user_ids = [1, 2, 3, 4] # ids to fast insert
40
+ params = {
41
+ table: 'mass_emails_users',
42
+ static_columns: {
43
+ mass_email_id: @mass_email.id
44
+ },
45
+ additional_columns: {
46
+ created_by_id: current_user.id
47
+ },
48
+ options: {
49
+ timestamps: true,
50
+ unique: true,
51
+ check_for_existing: true,
52
+ group_size: 10_000
53
+ },
54
+ variable_column: 'user_id',
55
+ values: user_ids
56
+ }
57
+ inserter = FastInserter::Base.new(params)
58
+ inserter.fast_insert
59
+ ```
60
+
61
+ Let's walkthrough the options.
62
+
63
+ ### table
64
+
65
+ Defines the table name to insert into
66
+
67
+ ### static_columns
68
+
69
+ These are columns and the values for the columns which will not change for each insertion.
70
+
71
+ ### additional_columns
72
+
73
+ These are also static columns which will not changed, but these columns will not be used for uniqueness validation constraints.
74
+
75
+ ### timestamps
76
+
77
+ Includes created_at and updated_at timestamps to each record. Default is false.
78
+
79
+ ### unique
80
+
81
+ Ensures that the 'values' parameter is a unique set of values. Default is false.
82
+
83
+ ### check_for_existing
84
+
85
+ Queries the table for any values which already exist and removes them from the values to be inserted. This query uses 'static_columns' and 'variable_column' for determining uniqueness. Default is false.
86
+
87
+ ### group_size
88
+
89
+ Insertions will be broken up into batches. This specifies the number of records you want to insert per batch. Default is 10,000.
90
+
91
+ ### variable_column
92
+
93
+ The name of the column which we will be dynamically inserting records for. This is the only column which changes per-record being inserted.
94
+
95
+ ### values
96
+
97
+ The large list of values to use for the 'variable_column' value when inserting the records.
98
+
99
+ ## Development
100
+
101
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
102
+
103
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
104
+
105
+ ## Contributing
106
+
107
+ Bug reports and pull requests are welcome on GitHub at https://github.com/strydercorp/fast_inserter. All code must run on sqlite, pg, and mysql (tests are set up CI already).
108
+
109
+
110
+ ## License
111
+
112
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
113
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "fast_inserter"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env bash
2
+ set -e # halt script on error
3
+
4
+ sudo apt-get autoremove sqlite3
5
+ sudo apt-get install python-software-properties
6
+ sudo apt-add-repository -y ppa:travis-ci/sqlite3
7
+ sudo apt-get -y update
8
+ sudo apt-cache show sqlite3
9
+ sudo apt-get install sqlite3=3.7.15.1-1~travis1
10
+ sudo sqlite3 -version
11
+ sudo psql --version
12
+ sudo mysql --version
13
+ gem update bundler
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fast_inserter/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fast_inserter"
8
+ spec.version = FastInserter::VERSION
9
+ spec.authors = ["Scott Ringwelski", "Brandon Gafford", "Jordon Dornbos"]
10
+ spec.email = ["scott@joinhandshake.com", "brandon@joinhandshake.com", "jordon@joinhandshake.com"]
11
+
12
+ spec.summary = %q{Quickly insert database records in bulk}
13
+ spec.description = %q{Use raw SQL to insert database records in bulk. Supports uniqueness constraints, timestamps, and checking for existing records.}
14
+ spec.homepage = "https://github.com/strydercorp/fast_inserter."
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_runtime_dependency 'activerecord', '>= 4.1.0'
23
+
24
+ spec.add_development_dependency "bundler"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "database_cleaner"
28
+
29
+ case ENV['DB']
30
+ when "mysql"; spec.add_development_dependency "mysql2"
31
+ when "sqlite"; spec.add_development_dependency "sqlite3"
32
+ when "pg"; spec.add_development_dependency "pg"
33
+ else spec.add_development_dependency "sqlite3" # Default
34
+ end
35
+ end
@@ -0,0 +1,158 @@
1
+ # Highly based off of https://github.com/sportngin/m2m_fast_insert
2
+ # Unfortunately, that gem was not up to date for rails 4.
3
+ #
4
+ # NOTE:
5
+ # - Only 'static_column' values and 'values' are sanitized for SQL injection.
6
+ #
7
+ # Example usage:
8
+ # @mass_email = MassEmail.find(params[:id])
9
+ # user_ids = [1, 2, 3, 4] # ids to fast insert
10
+ # params = {
11
+ # table: 'mass_emails_users',
12
+ # static_columns: {
13
+ # mass_email_id: @mass_email.id
14
+ # },
15
+ # additional_columns: {
16
+ # created_by_id: current_user.id
17
+ # },
18
+ # options: {
19
+ # timestamps: true,
20
+ # unique: true,
21
+ # check_for_existing: true,
22
+ # group_size: 1_000
23
+ # },
24
+ # variable_column: 'user_id',
25
+ # values: user_ids
26
+ # }
27
+ # inserter = FastInserter::Base.new(params)
28
+ # inserter.fast_insert
29
+ #
30
+ # Options:
31
+ # unique: true
32
+ # Makes sure all values are unique
33
+ # check_for_existing: true
34
+ # Checks if values already exist in the database and only inserts nonexisting values
35
+ # This checks values scoped to static columns.
36
+ # timestamps: true
37
+ # Adds created_at and updated_at columns to insert statement
38
+ # additional_columns: Hash
39
+ # A hash representing additional column values to set that you don't want
40
+ # to include in uniqueness checks or other pre-insertion operations.
41
+ # group_size: Integer
42
+ # The number of items you want to insert per batch of records. Default 10_000.
43
+ #
44
+ module FastInserter
45
+ class Base
46
+ def initialize(params)
47
+ @table_name = params[:table]
48
+ @static_columns = params[:static_columns]
49
+ @additional_columns = params[:additional_columns]
50
+ @variable_column = params[:variable_column]
51
+ @options = params[:options] || {}
52
+
53
+ # We want to break up the insertions into multiple transactiosn in case there
54
+ # is a very large amount of values. This avoids PG:OutOfMemory errors and smooths
55
+ # out the load. The second 'false' param means don't fill in the last group with nil elements.
56
+ all_values = params[:values]
57
+ all_values.uniq! if @options[:unique]
58
+ group_size = Integer(params[:group_size] || 10_000)
59
+ @value_groups = all_values.in_groups_of(group_size, false)
60
+ end
61
+
62
+ # Iterates through the value groups (which is all values in groups of smaller sizes)
63
+ # and generates and executes a transaction to insert those groups one at a time
64
+ def fast_insert
65
+ return if nothing_to_insert?
66
+
67
+ @value_groups.each do |group|
68
+ fast_insert_group(group)
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def nothing_to_insert?
75
+ @value_groups.empty?
76
+ end
77
+
78
+ # For a given group of IDS, generates the transaction to execute and does. If we want to check existing
79
+ # we make sure that we check the existing within the same transaction.
80
+ def fast_insert_group(group)
81
+ if @options[:check_for_existing]
82
+ ActiveRecord::Base.transaction do
83
+ non_existing_values = group.map(&:to_s) - existing_values(group)
84
+ sql_string = insertion_sql_for_group(non_existing_values)
85
+ ActiveRecord::Base.connection.execute(sql_string) unless non_existing_values.empty?
86
+ end
87
+ else
88
+ sql_string = insertion_sql_for_group(group)
89
+ ActiveRecord::Base.connection.execute(sql_string)
90
+ end
91
+ end
92
+
93
+ # Queries for the existing values for a given group of values
94
+ def existing_values(group_of_values)
95
+ values_to_check = ActiveRecord::Base.send(:sanitize_sql_array, ["?", group_of_values])
96
+ sql = "SELECT #{@variable_column} FROM #{@table_name} WHERE #{existing_values_static_columns} AND #{@variable_column} IN (#{values_to_check})"
97
+
98
+ # NOTE: There are more elegant ways to get this field out of the resultset, but each database adaptor returns a different type
99
+ # of result from 'execute(sql)'. Potential classes for 'result' is Array (sqlite), Mysql2::Result (mysql2), PG::Result (pg). Each
100
+ # result can be enumerated into a list of arrays (mysql) or list of hashes (sqlite, pg)
101
+ results = ActiveRecord::Base.connection.execute(sql)
102
+ results.to_a.map do |result|
103
+ if result.is_a?(Hash)
104
+ result[@variable_column].to_s
105
+ elsif result.is_a?(Array)
106
+ result[0].to_s
107
+ end
108
+ end
109
+ end
110
+
111
+ def existing_values_static_columns
112
+ @static_columns.map do |key, value|
113
+ sanitized_value = ActiveRecord::Base.send(:sanitize_sql_array, ["?", value])
114
+ "#{key} = #{sanitized_value}"
115
+ end.join(' AND ')
116
+ end
117
+
118
+ def insertion_sql_for_group(group_of_values)
119
+ "INSERT INTO #{@table_name} (#{column_names}) VALUES #{insert_values(group_of_values)}"
120
+ end
121
+
122
+ def column_names
123
+ "#{all_static_columns.keys.join(', ')}, #{@variable_column}"
124
+ end
125
+
126
+ def all_static_columns
127
+ @all_static_columns ||= begin
128
+ rv = @static_columns.dup
129
+
130
+ if @options[:timestamps]
131
+ time = Time.now
132
+ rv[:created_at] = time
133
+ rv[:updated_at] = time
134
+ end
135
+
136
+ if @additional_columns.present?
137
+ @additional_columns.each do |key, value|
138
+ rv[key] = value
139
+ end
140
+ end
141
+
142
+ rv
143
+ end
144
+ end
145
+
146
+ def insert_values(group_of_values)
147
+ rv = []
148
+ static_column_values = ActiveRecord::Base.send(:sanitize_sql_array, ["?", all_static_columns.values])
149
+
150
+ group_of_values.each do |value|
151
+ value = ActiveRecord::Base.send(:sanitize_sql_array, ["?", value])
152
+ rv << "(#{static_column_values},#{value})"
153
+ end
154
+
155
+ rv.join(', ')
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,3 @@
1
+ module FastInserter
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,5 @@
1
+ require "fast_inserter/fast_inserter_base"
2
+ require "fast_inserter/version"
3
+
4
+ module FastInserter
5
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fast_inserter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Scott Ringwelski
8
+ - Brandon Gafford
9
+ - Jordon Dornbos
10
+ autorequire:
11
+ bindir: exe
12
+ cert_chain: []
13
+ date: 2016-02-12 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activerecord
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ">="
20
+ - !ruby/object:Gem::Version
21
+ version: 4.1.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: 4.1.0
29
+ - !ruby/object:Gem::Dependency
30
+ name: bundler
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ - !ruby/object:Gem::Dependency
44
+ name: rake
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '10.0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '10.0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: rspec
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: database_cleaner
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ - !ruby/object:Gem::Dependency
86
+ name: sqlite3
87
+ requirement: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ type: :development
93
+ prerelease: false
94
+ version_requirements: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ description: Use raw SQL to insert database records in bulk. Supports uniqueness constraints,
100
+ timestamps, and checking for existing records.
101
+ email:
102
+ - scott@joinhandshake.com
103
+ - brandon@joinhandshake.com
104
+ - jordon@joinhandshake.com
105
+ executables: []
106
+ extensions: []
107
+ extra_rdoc_files: []
108
+ files:
109
+ - ".gitignore"
110
+ - ".rspec"
111
+ - ".ruby-version"
112
+ - ".travis.yml"
113
+ - Gemfile
114
+ - LICENSE.txt
115
+ - README.md
116
+ - Rakefile
117
+ - bin/console
118
+ - bin/setup
119
+ - ci/install_modern_sqlite.sh
120
+ - fast_inserter.gemspec
121
+ - lib/fast_inserter.rb
122
+ - lib/fast_inserter/fast_inserter_base.rb
123
+ - lib/fast_inserter/version.rb
124
+ homepage: https://github.com/strydercorp/fast_inserter.
125
+ licenses:
126
+ - MIT
127
+ metadata: {}
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 2.5.1
145
+ signing_key:
146
+ specification_version: 4
147
+ summary: Quickly insert database records in bulk
148
+ test_files: []