multisert 0.0.0.alpha → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/README.md CHANGED
@@ -18,7 +18,70 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- TODO: Write usage instructions here
21
+ Let's start with a table:
22
+
23
+ ```sql
24
+ CREATE TABLE IF NOT EXISTS some_database.some_table (
25
+ field_1 int default null,
26
+ field_2 int default null,
27
+ field_3 int default null,
28
+ field_4 int default null
29
+ );
30
+ ```
31
+
32
+ Now let's say we want to insert 1,000,000 records after running the
33
+ current iterator through `some_magical_calculation` into our table from above.
34
+ Let's assume that `some_magical_calculation` takes a single integer input and
35
+ returns an array of 4 values.
36
+
37
+ ```ruby
38
+ (0..1_000_000).each do |i|
39
+ res = some_magical_calculation(i)
40
+ dbclient.query %[
41
+ INSERT INTO some_database.some_table (field_1, field_2, field_3, field_4)
42
+ VALUES (#{res[0]}, #{res[1]}, #{res[2]}, #{res[3]})]
43
+ end
44
+ ```
45
+
46
+ This works, but we can improve it's speed by bundling up inserts using
47
+ `Multisert`:
48
+
49
+ ```ruby
50
+ buffer = Multisert.new connection: dbclient,
51
+ database: 'some_database',
52
+ table: 'some_table',
53
+ fields: ['field_1', 'field_2', 'field_3', 'field_4']
54
+
55
+ (0..1_000_000).each do |i|
56
+ res = some_magical_calculation(i)
57
+ buffer << res
58
+ end
59
+ buffer.flush!
60
+ ```
61
+
62
+ We start by creating a new Multisert instance, providing the database
63
+ connection, database and table, and fields as attributes. Next, as we get the
64
+ results from `some_magical_calculation`, we shovel each into the Multisert
65
+ instance. As we iterate through, the Multisert instance will build up the
66
+ records and then flush itself to the specified database table when it hits an
67
+ internal count (default is 10_000, but can be set via the `max_buffer_count`
68
+ attribute). One last thing to note is the `buffer.flush!` at the end of the
69
+ script. This ensures that any pending entries are written to the database table
70
+ that were not automatically taken care of by the auto-flush that will kick in
71
+ during the iteration.
72
+
73
+ ## FAQ
74
+
75
+ ### Packet Too Large / Connection Lost Errors
76
+
77
+ You may run into the "Packet Too Large" error when attempting to run a
78
+ multisert. This can comeback as this error explicitly or as a "Connection
79
+ Lost" error, depending on your mysql client.
80
+
81
+ To learn more, [read the documentation](http://dev.mysql.com/doc/refman/5.5/en//packet-too-large.html).
82
+
83
+ If you need to you can adjust the buffer size by setting `max_buffer_count`
84
+ attribute. Generally, 10,000 to 100,000 is a pretty good starting range.
22
85
 
23
86
  ## Contributing
24
87
 
data/Rakefile CHANGED
@@ -1,2 +1,28 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
+ begin
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.rspec_opts = '-b'
8
+ end
9
+
10
+ task default: :spec
11
+ rescue LoadError
12
+ $stderr.puts "rspec not available, spec task not provided"
13
+ end
14
+
15
+ begin
16
+ require 'cane/rake_task'
17
+
18
+ desc "Run cane to check quality metrics"
19
+ Cane::RakeTask.new(:quality) do |cane|
20
+ cane.abc_max = 10
21
+ cane.style_glob = "lib/**/*.rb"
22
+ cane.no_doc = true
23
+ end
24
+
25
+ task :default => :quality
26
+ rescue LoadError
27
+ warn "cane not available, quality task not provided."
28
+ end
@@ -1,5 +1,80 @@
1
- require "multisert/version"
1
+ class Multisert
2
+ MAX_BUFFER_COUNT_DEFAULT = 10_000
2
3
 
3
- module Multisert
4
- # Your code goes here...
4
+ attr_accessor :connection
5
+ attr_accessor :database
6
+ attr_accessor :table
7
+ attr_accessor :fields
8
+ attr_writer :max_buffer_count
9
+
10
+ def initialize attrs = {}
11
+ attrs.each do |attr, value|
12
+ self.send "#{attr}=", value
13
+ end
14
+ end
15
+
16
+ def fields
17
+ @fields ||= []
18
+ end
19
+
20
+ def entries
21
+ @entries ||= []
22
+ end
23
+
24
+ def << entry
25
+ entries << entry
26
+ flush! if flush_buffer?
27
+ entry
28
+ end
29
+
30
+ def flush!
31
+ return if buffer_empty?
32
+ @connection.query multisert_sql
33
+ reset_entries!
34
+ end
35
+
36
+ def max_buffer_count
37
+ @max_buffer_count || MAX_BUFFER_COUNT_DEFAULT
38
+ end
39
+
40
+ private
41
+
42
+ def buffer_empty?
43
+ entries.empty?
44
+ end
45
+
46
+ def flush_buffer?
47
+ entries.count >= max_buffer_count
48
+ end
49
+
50
+ def reset_entries!
51
+ @entries = []
52
+ end
53
+
54
+ def multisert_sql
55
+ "#{multisert_preamble} #{multisert_values}"
56
+ end
57
+
58
+ def multisert_preamble
59
+ "INSERT INTO #{database}.#{table} (#{fields.join(',')}) VALUES"
60
+ end
61
+
62
+ def multisert_values
63
+ @entries.reduce([]) { |memo, entries|
64
+ memo << "(#{entries.map { |e| cast e }.join(',')})"
65
+ memo
66
+ }.join(",")
67
+ end
68
+
69
+ def cast value
70
+ case value
71
+ when String
72
+ # TODO: want to escape the string too, checking for " and ;
73
+ "'#{value}'"
74
+ when Date
75
+ "'#{value}'"
76
+ else
77
+ value
78
+ end
79
+ end
5
80
  end
@@ -1,3 +1,3 @@
1
- module Multisert
2
- VERSION = "0.0.0.alpha"
1
+ class Multisert
2
+ VERSION = "0.0.1"
3
3
  end
@@ -6,7 +6,7 @@ Gem::Specification.new do |gem|
6
6
  gem.email = ["jeff.iacono@gmail.com"]
7
7
  gem.description = %q{Buffer to handle bulk INSERTs}
8
8
  gem.summary = %q{Buffer to handle bulk INSERTs}
9
- gem.homepage = ""
9
+ gem.homepage = "https://github.com/jeffreyiacono/multisert"
10
10
 
11
11
  gem.files = `git ls-files`.split($\)
12
12
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -14,4 +14,9 @@ Gem::Specification.new do |gem|
14
14
  gem.name = "multisert"
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Multisert::VERSION
17
+
18
+ gem.add_development_dependency "mysql2"
19
+ gem.add_development_dependency "rake"
20
+ gem.add_development_dependency "cane"
21
+ gem.add_development_dependency "rspec", [">= 2"]
17
22
  end
@@ -0,0 +1,151 @@
1
+ require 'mysql2'
2
+ require './spec/spec_helper'
3
+ require './lib/multisert'
4
+
5
+ # TODO: allow overriding in yaml config
6
+ TEST_DATABASE = 'multisert_test'
7
+ TEST_TABLE = 'test_data'
8
+
9
+ # TODO: make into yaml config
10
+ $connection = Mysql2::Client.new(host: 'localhost', username: 'root')
11
+
12
+ $cleaner = MrClean.new(database: TEST_DATABASE, connection: $connection) do |mgr|
13
+ mgr.create_table_schemas << %[
14
+ CREATE TABLE IF NOT EXISTS #{mgr.database}.#{TEST_TABLE} (
15
+ test_field_int_1 int default null,
16
+ test_field_int_2 int default null,
17
+ test_field_int_3 int default null,
18
+ test_field_int_4 int default null,
19
+ test_field_varchar varchar(10) default null,
20
+ test_field_date DATE default null
21
+ )]
22
+ end
23
+
24
+ describe Multisert do
25
+ describe "<<" do
26
+ let(:buffer) { described_class.new }
27
+
28
+ it "addes to the entries" do
29
+ buffer << [1, 2, 3]
30
+ buffer.entries.should == [[1, 2, 3]]
31
+ end
32
+
33
+ it "calls #flush! when the number of entries equals (or exceeds) max buffer count" do
34
+ buffer.max_buffer_count = 2
35
+ buffer.should_receive(:flush!)
36
+ buffer << [1, 2, 3]
37
+ buffer << [1, 2, 3]
38
+ end
39
+ end
40
+
41
+ describe "#flush!" do
42
+ let(:connection) { $connection }
43
+ let(:buffer) { described_class.new }
44
+
45
+ before do
46
+ $cleaner.ensure_clean_database! teardown_tables: (!!ENV['TEARDOWN'] || false)
47
+ end
48
+
49
+ it "does not fall over when there are no entries" do
50
+ flush_records = connection.query "DELETE FROM #{TEST_DATABASE}.#{TEST_TABLE}"
51
+ flush_records.to_a.should == []
52
+
53
+ buffer.flush!
54
+
55
+ flush_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
56
+ flush_records.to_a.should == []
57
+ buffer.entries.should == []
58
+ end
59
+
60
+ it "multi-inserts all added entries" do
61
+ pre_flush_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
62
+ pre_flush_records.to_a.should == []
63
+
64
+ buffer.connection = connection
65
+ buffer.database = TEST_DATABASE
66
+ buffer.table = TEST_TABLE
67
+ buffer.fields = ['test_field_int_1',
68
+ 'test_field_int_2',
69
+ 'test_field_int_3',
70
+ 'test_field_int_4']
71
+
72
+ buffer << [ 1, 3, 4, 5]
73
+ buffer << [ 6, 7, 8, 9]
74
+ buffer << [10, 11, 12, 13]
75
+ buffer << [14, 15, 16, 17]
76
+
77
+ buffer.flush!
78
+
79
+ post_flush_records = connection.query %[
80
+ SELECT
81
+ test_field_int_1
82
+ , test_field_int_2
83
+ , test_field_int_3
84
+ , test_field_int_4
85
+ FROM #{TEST_DATABASE}.#{TEST_TABLE}]
86
+
87
+ post_flush_records.to_a.should == [
88
+ {'test_field_int_1' => 1, 'test_field_int_2' => 3, 'test_field_int_3' => 4, 'test_field_int_4' => 5},
89
+ {'test_field_int_1' => 6, 'test_field_int_2' => 7, 'test_field_int_3' => 8, 'test_field_int_4' => 9},
90
+ {'test_field_int_1' => 10, 'test_field_int_2' => 11, 'test_field_int_3' => 12, 'test_field_int_4' => 13},
91
+ {'test_field_int_1' => 14, 'test_field_int_2' => 15, 'test_field_int_3' => 16, 'test_field_int_4' => 17}]
92
+
93
+ buffer.entries.should == []
94
+ end
95
+
96
+ it "works with strings" do
97
+ pre_flush_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
98
+ pre_flush_records.to_a.should == []
99
+
100
+ buffer.connection = connection
101
+ buffer.database = TEST_DATABASE
102
+ buffer.table = TEST_TABLE
103
+ buffer.fields = ['test_field_varchar']
104
+
105
+ buffer << ['a']
106
+ buffer << ['b']
107
+ buffer << ['c']
108
+ buffer << ['d']
109
+
110
+ buffer.flush!
111
+
112
+ post_flush_records = connection.query %[SELECT test_field_varchar FROM #{TEST_DATABASE}.#{TEST_TABLE}]
113
+ post_flush_records.to_a.should == [
114
+ {'test_field_varchar' => 'a'},
115
+ {'test_field_varchar' => 'b'},
116
+ {'test_field_varchar' => 'c'},
117
+ {'test_field_varchar' => 'd'}]
118
+
119
+ buffer.entries.should == []
120
+ end
121
+
122
+ it "works with strings that have illegal characters"
123
+
124
+ it "works with dates" do
125
+ pre_flush_records = connection.query "SELECT * FROM #{TEST_DATABASE}.#{TEST_TABLE}"
126
+ pre_flush_records.to_a.should == []
127
+
128
+ buffer.connection = connection
129
+ buffer.database = TEST_DATABASE
130
+ buffer.table = TEST_TABLE
131
+ buffer.fields = ['test_field_date']
132
+
133
+ buffer << [Date.new(2013, 1, 15)]
134
+ buffer << [Date.new(2013, 1, 16)]
135
+ buffer << [Date.new(2013, 1, 17)]
136
+ buffer << [Date.new(2013, 1, 18)]
137
+
138
+ buffer.flush!
139
+
140
+ post_flush_records = connection.query %[SELECT test_field_date FROM #{TEST_DATABASE}.#{TEST_TABLE}]
141
+
142
+ post_flush_records.to_a.should == [
143
+ {'test_field_date' => Date.parse('2013-01-15')},
144
+ {'test_field_date' => Date.parse('2013-01-16')},
145
+ {'test_field_date' => Date.parse('2013-01-17')},
146
+ {'test_field_date' => Date.parse('2013-01-18')}]
147
+
148
+ buffer.entries.should == []
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,46 @@
1
+ class MrClean
2
+ attr_accessor :connection, :database, :create_table_schemas
3
+
4
+ def initialize attrs = {}
5
+ @connection = attrs[:connection]
6
+ @database = attrs[:database]
7
+ @create_table_schemas = attrs[:create_table_schemas] || []
8
+ yield self if block_given?
9
+ end
10
+
11
+ def ensure_clean_database! opts = {}
12
+ clean_database! !!opts[:teardown_tables]
13
+ ensure_tables!
14
+ end
15
+
16
+ private
17
+
18
+ def database_exists?
19
+ @connection.query('show databases').to_a.map { |database|
20
+ database['Database']
21
+ }.include?(@database)
22
+ end
23
+
24
+ def ensure_database!
25
+ @connection.query "create database if not exists #{@database}"
26
+ end
27
+
28
+ def clean_database! teardown_tables
29
+ return unless database_exists?
30
+ @connection.query("show tables in #{@database}").to_a.each do |table|
31
+ if teardown_tables
32
+ puts "TEARING DOWN"
33
+ @connection.query("drop table if exists #{@database}.#{table["Tables_in_#{@database}"]}")
34
+ else
35
+ @connection.query("truncate #{@database}.#{table["Tables_in_#{@database}"]}")
36
+ end
37
+ end
38
+ end
39
+
40
+ def ensure_tables!
41
+ ensure_database!
42
+ @create_table_schemas.each do |create_table_schema|
43
+ @connection.query create_table_schema
44
+ end
45
+ end
46
+ end
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisert
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.alpha
5
- prerelease: 6
4
+ version: 0.0.1
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jeff Iacono
@@ -10,7 +10,71 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
  date: 2013-03-07 00:00:00.000000000 Z
13
- dependencies: []
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mysql2
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: cane
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '2'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '2'
14
78
  description: Buffer to handle bulk INSERTs
15
79
  email:
16
80
  - jeff.iacono@gmail.com
@@ -19,6 +83,7 @@ extensions: []
19
83
  extra_rdoc_files: []
20
84
  files:
21
85
  - .gitignore
86
+ - .rspec
22
87
  - Gemfile
23
88
  - LICENSE
24
89
  - README.md
@@ -26,7 +91,9 @@ files:
26
91
  - lib/multisert.rb
27
92
  - lib/multisert/version.rb
28
93
  - multisert.gemspec
29
- homepage: ''
94
+ - spec/multisert_spec.rb
95
+ - spec/spec_helper.rb
96
+ homepage: https://github.com/jeffreyiacono/multisert
30
97
  licenses: []
31
98
  post_install_message:
32
99
  rdoc_options: []
@@ -41,13 +108,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
41
108
  required_rubygems_version: !ruby/object:Gem::Requirement
42
109
  none: false
43
110
  requirements:
44
- - - ! '>'
111
+ - - ! '>='
45
112
  - !ruby/object:Gem::Version
46
- version: 1.3.1
113
+ version: '0'
47
114
  requirements: []
48
115
  rubyforge_project:
49
116
  rubygems_version: 1.8.24
50
117
  signing_key:
51
118
  specification_version: 3
52
119
  summary: Buffer to handle bulk INSERTs
53
- test_files: []
120
+ test_files:
121
+ - spec/multisert_spec.rb
122
+ - spec/spec_helper.rb