lhm-shopify 3.3.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/test.yml +34 -0
  3. data/.gitignore +17 -0
  4. data/.rubocop.yml +183 -0
  5. data/.travis.yml +21 -0
  6. data/CHANGELOG.md +216 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE +27 -0
  9. data/README.md +284 -0
  10. data/Rakefile +22 -0
  11. data/bin/.gitkeep +0 -0
  12. data/dbdeployer/config.json +32 -0
  13. data/dbdeployer/install.sh +64 -0
  14. data/dev.yml +20 -0
  15. data/gemfiles/ar-2.3_mysql.gemfile +6 -0
  16. data/gemfiles/ar-3.2_mysql.gemfile +5 -0
  17. data/gemfiles/ar-3.2_mysql2.gemfile +5 -0
  18. data/gemfiles/ar-4.0_mysql2.gemfile +5 -0
  19. data/gemfiles/ar-4.1_mysql2.gemfile +5 -0
  20. data/gemfiles/ar-4.2_mysql2.gemfile +5 -0
  21. data/gemfiles/ar-5.0_mysql2.gemfile +5 -0
  22. data/lhm.gemspec +34 -0
  23. data/lib/lhm.rb +131 -0
  24. data/lib/lhm/atomic_switcher.rb +52 -0
  25. data/lib/lhm/chunk_finder.rb +32 -0
  26. data/lib/lhm/chunk_insert.rb +51 -0
  27. data/lib/lhm/chunker.rb +87 -0
  28. data/lib/lhm/cleanup/current.rb +74 -0
  29. data/lib/lhm/command.rb +48 -0
  30. data/lib/lhm/entangler.rb +117 -0
  31. data/lib/lhm/intersection.rb +51 -0
  32. data/lib/lhm/invoker.rb +98 -0
  33. data/lib/lhm/locked_switcher.rb +74 -0
  34. data/lib/lhm/migration.rb +43 -0
  35. data/lib/lhm/migrator.rb +237 -0
  36. data/lib/lhm/printer.rb +59 -0
  37. data/lib/lhm/railtie.rb +9 -0
  38. data/lib/lhm/sql_helper.rb +77 -0
  39. data/lib/lhm/sql_retry.rb +61 -0
  40. data/lib/lhm/table.rb +121 -0
  41. data/lib/lhm/table_name.rb +23 -0
  42. data/lib/lhm/test_support.rb +35 -0
  43. data/lib/lhm/throttler.rb +36 -0
  44. data/lib/lhm/throttler/slave_lag.rb +145 -0
  45. data/lib/lhm/throttler/threads_running.rb +53 -0
  46. data/lib/lhm/throttler/time.rb +29 -0
  47. data/lib/lhm/timestamp.rb +11 -0
  48. data/lib/lhm/version.rb +6 -0
  49. data/shipit.rubygems.yml +0 -0
  50. data/spec/.lhm.example +4 -0
  51. data/spec/README.md +58 -0
  52. data/spec/fixtures/bigint_table.ddl +4 -0
  53. data/spec/fixtures/composite_primary_key.ddl +7 -0
  54. data/spec/fixtures/custom_primary_key.ddl +6 -0
  55. data/spec/fixtures/destination.ddl +6 -0
  56. data/spec/fixtures/lines.ddl +7 -0
  57. data/spec/fixtures/origin.ddl +6 -0
  58. data/spec/fixtures/permissions.ddl +5 -0
  59. data/spec/fixtures/small_table.ddl +4 -0
  60. data/spec/fixtures/tracks.ddl +5 -0
  61. data/spec/fixtures/users.ddl +14 -0
  62. data/spec/fixtures/wo_id_int_column.ddl +6 -0
  63. data/spec/integration/atomic_switcher_spec.rb +93 -0
  64. data/spec/integration/chunk_insert_spec.rb +29 -0
  65. data/spec/integration/chunker_spec.rb +185 -0
  66. data/spec/integration/cleanup_spec.rb +136 -0
  67. data/spec/integration/entangler_spec.rb +66 -0
  68. data/spec/integration/integration_helper.rb +237 -0
  69. data/spec/integration/invoker_spec.rb +33 -0
  70. data/spec/integration/lhm_spec.rb +585 -0
  71. data/spec/integration/lock_wait_timeout_spec.rb +30 -0
  72. data/spec/integration/locked_switcher_spec.rb +50 -0
  73. data/spec/integration/sql_retry/lock_wait_spec.rb +125 -0
  74. data/spec/integration/sql_retry/lock_wait_timeout_test_helper.rb +101 -0
  75. data/spec/integration/table_spec.rb +91 -0
  76. data/spec/test_helper.rb +32 -0
  77. data/spec/unit/atomic_switcher_spec.rb +31 -0
  78. data/spec/unit/chunk_finder_spec.rb +73 -0
  79. data/spec/unit/chunk_insert_spec.rb +44 -0
  80. data/spec/unit/chunker_spec.rb +166 -0
  81. data/spec/unit/entangler_spec.rb +124 -0
  82. data/spec/unit/intersection_spec.rb +51 -0
  83. data/spec/unit/lhm_spec.rb +29 -0
  84. data/spec/unit/locked_switcher_spec.rb +51 -0
  85. data/spec/unit/migrator_spec.rb +146 -0
  86. data/spec/unit/printer_spec.rb +97 -0
  87. data/spec/unit/sql_helper_spec.rb +32 -0
  88. data/spec/unit/table_name_spec.rb +39 -0
  89. data/spec/unit/table_spec.rb +47 -0
  90. data/spec/unit/throttler/slave_lag_spec.rb +317 -0
  91. data/spec/unit/throttler/threads_running_spec.rb +64 -0
  92. data/spec/unit/throttler_spec.rb +124 -0
  93. data/spec/unit/unit_helper.rb +13 -0
  94. metadata +239 -0
data/lib/lhm/table.rb ADDED
@@ -0,0 +1,121 @@
1
+ # Copyright (c) 2011 - 2013, SoundCloud Ltd., Rany Keddo, Tobias Bielohlawek, Tobias
2
+ # Schmidt
3
+
4
+ require 'lhm/sql_helper'
5
+
6
+ module Lhm
7
+ class Table
8
+ attr_reader :name, :columns, :indices, :pk, :ddl
9
+
10
+ def initialize(name, pk = 'id', ddl = nil)
11
+ @name = name
12
+ @table_name = TableName.new(name)
13
+ @columns = {}
14
+ @indices = {}
15
+ @pk = pk
16
+ @ddl = ddl
17
+ end
18
+
19
+ def satisfies_id_column_requirement?
20
+ !!((id = columns['id']) &&
21
+ id[:type] =~ /(bigint|int)(\(\d+\))?/)
22
+ end
23
+
24
+ def destination_name
25
+ @destination_name ||= @table_name.new
26
+ end
27
+
28
+ def self.parse(table_name, connection)
29
+ Parser.new(table_name, connection).parse
30
+ end
31
+
32
+ class Parser
33
+ include SqlHelper
34
+
35
+ def initialize(table_name, connection)
36
+ @table_name = table_name.to_s
37
+ @schema_name = connection.current_database
38
+ @connection = connection
39
+ end
40
+
41
+ def ddl
42
+ sql = "show create table `#{ @table_name }`"
43
+ specification = nil
44
+ @connection.execute(sql).each { |row| specification = row.last }
45
+ specification
46
+ end
47
+
48
+ def parse
49
+ schema = read_information_schema
50
+
51
+ Table.new(@table_name, extract_primary_key(schema), ddl).tap do |table|
52
+ schema.each do |defn|
53
+ column_name = struct_key(defn, 'COLUMN_NAME')
54
+ column_type = struct_key(defn, 'COLUMN_TYPE')
55
+ is_nullable = struct_key(defn, 'IS_NULLABLE')
56
+ column_default = struct_key(defn, 'COLUMN_DEFAULT')
57
+ comment = struct_key(defn, 'COLUMN_COMMENT')
58
+ collate = struct_key(defn, 'COLLATION_NAME')
59
+
60
+ table.columns[defn[column_name]] = {
61
+ :type => defn[column_type],
62
+ :is_nullable => defn[is_nullable],
63
+ :column_default => defn[column_default],
64
+ :comment => defn[comment],
65
+ :collate => defn[collate],
66
+ }
67
+ end
68
+
69
+ extract_indices(read_indices).each do |idx, columns|
70
+ table.indices[idx] = columns
71
+ end
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def read_information_schema
78
+ @connection.select_all %Q{
79
+ select *
80
+ from information_schema.columns
81
+ where table_name = '#{ @table_name }'
82
+ and table_schema = '#{ @schema_name }'
83
+ }
84
+ end
85
+
86
+ def read_indices
87
+ @connection.select_all %Q{
88
+ show indexes from `#{ @schema_name }`.`#{ @table_name }`
89
+ where key_name != 'PRIMARY'
90
+ }
91
+ end
92
+
93
+ def extract_indices(indices)
94
+ indices.
95
+ map do |row|
96
+ key_name = struct_key(row, 'Key_name')
97
+ column_name = struct_key(row, 'COLUMN_NAME')
98
+ [row[key_name], row[column_name]]
99
+ end.
100
+ inject(Hash.new { |h, k| h[k] = [] }) do |memo, (idx, column)|
101
+ memo[idx] << column
102
+ memo
103
+ end
104
+ end
105
+
106
+ def extract_primary_key(schema)
107
+ cols = schema.select do |defn|
108
+ column_key = struct_key(defn, 'COLUMN_KEY')
109
+ defn[column_key] == 'PRI'
110
+ end
111
+
112
+ keys = cols.map do |defn|
113
+ column_name = struct_key(defn, 'COLUMN_NAME')
114
+ defn[column_name]
115
+ end
116
+
117
+ keys.length == 1 ? keys.first : keys
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,23 @@
1
+ module Lhm
2
+ class TableName
3
+ def initialize(original, time = Time.now)
4
+ @original = original
5
+ @time = time
6
+ @timestamp = Timestamp.new(time)
7
+ end
8
+
9
+ attr_reader :original
10
+
11
+ def archived
12
+ "lhma_#{@timestamp}_#{@original}"[0...64]
13
+ end
14
+
15
+ def failed
16
+ archived[0...57] + "_failed"
17
+ end
18
+
19
+ def new
20
+ "lhmn_#{@original}"[0...64]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+ module Lhm
3
+ module TestMigrator
4
+ def initialize(*)
5
+ super
6
+ @name = @origin.name
7
+ end
8
+
9
+ def execute
10
+ @statements.each do |stmt|
11
+ @connection.execute(tagged(stmt))
12
+ end
13
+ end
14
+ end
15
+
16
+ module TestInvoker
17
+ def run(options = {})
18
+ normalize_options(options)
19
+ set_session_lock_wait_timeouts
20
+ @migrator.run
21
+ rescue => e
22
+ Lhm.logger.error("LHM run failed with exception=#{e.class} message=#{e.message}")
23
+ raise
24
+ end
25
+ end
26
+
27
+ # Patch LHM to execute ALTER TABLE directly on original tables,
28
+ # without the online migration dance.
29
+ # This mode is designed for local/CI environments where we can speed
30
+ # things up by not invoking "real" LHM logic.
31
+ def self.execute_inline!
32
+ Lhm::Migrator.prepend(TestMigrator)
33
+ Lhm::Invoker.prepend(TestInvoker)
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'lhm/throttler/time'
2
+ require 'lhm/throttler/slave_lag'
3
+ require 'lhm/throttler/threads_running'
4
+
5
+ module Lhm
6
+ module Throttler
7
+ CLASSES = { :time_throttler => Throttler::Time,
8
+ :slave_lag_throttler => Throttler::SlaveLag,
9
+ :threads_running_throttler => Throttler::ThreadsRunning }
10
+
11
+ def throttler
12
+ @throttler ||= Throttler::Time.new
13
+ end
14
+
15
+ def setup_throttler(type, options = {})
16
+ @throttler = Factory.create_throttler(type, options)
17
+ end
18
+
19
+ class Factory
20
+ def self.create_throttler(type, options = {})
21
+ case type
22
+ when Lhm::Command
23
+ type
24
+ when Symbol
25
+ CLASSES[type].new(options)
26
+ when String
27
+ CLASSES[type.to_sym].new(options)
28
+ when Class
29
+ type.new(options)
30
+ else
31
+ raise ArgumentError, 'type argument must be a Symbol, String or Class'
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,145 @@
1
+ module Lhm
2
+ module Throttler
3
+
4
+ def self.format_hosts(hosts)
5
+ formatted_hosts = []
6
+ hosts.each do |host|
7
+ if host && !host.match(/localhost/) && !host.match(/127.0.0.1/)
8
+ formatted_hosts << host.partition(':')[0]
9
+ end
10
+ end
11
+ formatted_hosts
12
+ end
13
+
14
+ class SlaveLag
15
+ include Command
16
+
17
+ INITIAL_TIMEOUT = 0.1
18
+ DEFAULT_STRIDE = 2_000
19
+ DEFAULT_MAX_ALLOWED_LAG = 10
20
+
21
+ MAX_TIMEOUT = INITIAL_TIMEOUT * 1024
22
+
23
+ attr_accessor :timeout_seconds, :allowed_lag, :stride, :connection
24
+
25
+ def initialize(options = {})
26
+ @timeout_seconds = INITIAL_TIMEOUT
27
+ @stride = options[:stride] || DEFAULT_STRIDE
28
+ @allowed_lag = options[:allowed_lag] || DEFAULT_MAX_ALLOWED_LAG
29
+ @slaves = {}
30
+ @get_config = options[:current_config]
31
+ @check_only = options[:check_only]
32
+ end
33
+
34
+ def execute
35
+ sleep(throttle_seconds)
36
+ end
37
+
38
+ private
39
+
40
+ def throttle_seconds
41
+ lag = max_current_slave_lag
42
+
43
+ if lag > @allowed_lag && @timeout_seconds < MAX_TIMEOUT
44
+ Lhm.logger.info("Increasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds * 2} because #{lag} seconds of slave lag detected is greater than the maximum of #{@allowed_lag} seconds allowed.")
45
+ @timeout_seconds = @timeout_seconds * 2
46
+ elsif lag <= @allowed_lag && @timeout_seconds > INITIAL_TIMEOUT
47
+ Lhm.logger.info("Decreasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds / 2} because #{lag} seconds of slave lag detected is less than or equal to the #{@allowed_lag} seconds allowed.")
48
+ @timeout_seconds = @timeout_seconds / 2
49
+ else
50
+ @timeout_seconds
51
+ end
52
+ end
53
+
54
+ def slaves
55
+ @slaves[@connection] ||= get_slaves
56
+ end
57
+
58
+ def get_slaves
59
+ slaves = []
60
+ if @check_only.nil? or !@check_only.respond_to?(:call)
61
+ slave_hosts = master_slave_hosts
62
+ while slave_hosts.any? do
63
+ host = slave_hosts.pop
64
+ slave = Slave.new(host, @get_config)
65
+ if !slaves.map(&:host).include?(host) && slave.connection
66
+ slaves << slave
67
+ slave_hosts.concat(slave.slave_hosts)
68
+ end
69
+ end
70
+ else
71
+ slave_config = @check_only.call
72
+ slaves << Slave.new(slave_config['host'], @get_config)
73
+ end
74
+ slaves
75
+ end
76
+
77
+ def master_slave_hosts
78
+ Throttler.format_hosts(@connection.select_values(Slave::SQL_SELECT_SLAVE_HOSTS))
79
+ end
80
+
81
+ def max_current_slave_lag
82
+ max = slaves.map { |slave| slave.lag }.push(0).max
83
+ Lhm.logger.info "Max current slave lag: #{max}"
84
+ max
85
+ end
86
+ end
87
+
88
+ class Slave
89
+ SQL_SELECT_SLAVE_HOSTS = "SELECT host FROM information_schema.processlist WHERE command LIKE 'Binlog Dump%'"
90
+ SQL_SELECT_MAX_SLAVE_LAG = 'SHOW SLAVE STATUS'
91
+
92
+ attr_reader :host, :connection
93
+
94
+ def initialize(host, connection_config = nil)
95
+ @host = host
96
+ @connection_config = prepare_connection_config(connection_config)
97
+ @connection = client(@connection_config)
98
+ end
99
+
100
+ def slave_hosts
101
+ Throttler.format_hosts(query_connection(SQL_SELECT_SLAVE_HOSTS, 'host'))
102
+ end
103
+
104
+ def lag
105
+ query_connection(SQL_SELECT_MAX_SLAVE_LAG, 'Seconds_Behind_Master').first.to_i
106
+ end
107
+
108
+ private
109
+
110
+ def client(config)
111
+ begin
112
+ Lhm.logger.info "Connecting to #{@host} on database: #{config[:database]}"
113
+ Mysql2::Client.new(config)
114
+ rescue Mysql2::Error => e
115
+ Lhm.logger.info "Error connecting to #{@host}: #{e}"
116
+ nil
117
+ end
118
+ end
119
+
120
+ def prepare_connection_config(config_proc)
121
+ config = if config_proc
122
+ if config_proc.respond_to?(:call) # if we get a proc
123
+ config_proc.call
124
+ else
125
+ raise ArgumentError, "Expected #{config_proc.inspect} to respond to `call`"
126
+ end
127
+ else # otherwise default to ActiveRecord provided config
128
+ ActiveRecord::Base.connection_pool.spec.config.dup
129
+ end
130
+ config.deep_symbolize_keys!
131
+ config[:host] = @host
132
+ config
133
+ end
134
+
135
+ def query_connection(query, result)
136
+ begin
137
+ @connection.query(query).map { |row| row[result] }
138
+ rescue Mysql2::Error => e
139
+ Lhm.logger.info "Unable to connect and/or query #{host}: #{e}"
140
+ [nil]
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,53 @@
1
+ module Lhm
2
+ module Throttler
3
+ class ThreadsRunning
4
+ include Command
5
+
6
+ DEFAULT_INITIAL_TIMEOUT = 0.1
7
+ DEFAULT_HEALTHY_RANGE = (0..50)
8
+
9
+ attr_accessor :timeout_seconds, :healthy_range, :connection
10
+ attr_reader :max_timeout_seconds, :initial_timeout_seconds
11
+
12
+ def initialize(options = {})
13
+ @initial_timeout_seconds = options[:initial_timeout] || DEFAULT_INITIAL_TIMEOUT
14
+ @max_timeout_seconds = options[:max_timeout] || (@initial_timeout_seconds * 1024)
15
+ @timeout_seconds = @initial_timeout_seconds
16
+ @healthy_range = options[:healthy_range] || DEFAULT_HEALTHY_RANGE
17
+ @connection = options[:connection]
18
+ end
19
+
20
+ def threads_running
21
+ query = <<~SQL.squish
22
+ SELECT COUNT(*) as Threads_running
23
+ FROM (
24
+ SELECT 1 FROM performance_schema.threads
25
+ WHERE NAME='thread/sql/one_connection'
26
+ AND PROCESSLIST_STATE IS NOT NULL
27
+ LIMIT #{@healthy_range.max + 1}
28
+ ) AS LIM
29
+ SQL
30
+
31
+ @connection.select_value(query)
32
+ end
33
+
34
+ def throttle_seconds
35
+ current_threads_running = threads_running
36
+
37
+ if !healthy_range.cover?(current_threads_running) && @timeout_seconds < @max_timeout_seconds
38
+ Lhm.logger.info("Increasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds * 2} because threads running is greater than the maximum of #{@healthy_range.max} allowed.")
39
+ @timeout_seconds = @timeout_seconds * 2
40
+ elsif healthy_range.cover?(current_threads_running) && @timeout_seconds > @initial_timeout_seconds
41
+ Lhm.logger.info("Decreasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds / 2} because threads running is less than the maximum of #{@healthy_range.max} allowed.")
42
+ @timeout_seconds = @timeout_seconds / 2
43
+ else
44
+ @timeout_seconds
45
+ end
46
+ end
47
+
48
+ def execute
49
+ sleep throttle_seconds
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ module Lhm
2
+ module Throttler
3
+ class Time
4
+ include Command
5
+
6
+ DEFAULT_TIMEOUT = 0.1
7
+ DEFAULT_STRIDE = 2_000
8
+
9
+ attr_accessor :timeout_seconds
10
+ attr_accessor :stride
11
+
12
+ def initialize(options = {})
13
+ @timeout_seconds = options[:delay] || DEFAULT_TIMEOUT
14
+ @stride = options[:stride] || DEFAULT_STRIDE
15
+ end
16
+
17
+ def execute
18
+ sleep timeout_seconds
19
+ end
20
+ end
21
+
22
+ class LegacyTime < Time
23
+ def initialize(timeout, stride)
24
+ @timeout_seconds = timeout / 1000.0
25
+ @stride = stride
26
+ end
27
+ end
28
+ end
29
+ end