lhm-shopify 3.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/test.yml +34 -0
  3. data/.gitignore +17 -0
  4. data/.rubocop.yml +183 -0
  5. data/.travis.yml +21 -0
  6. data/CHANGELOG.md +216 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE +27 -0
  9. data/README.md +284 -0
  10. data/Rakefile +22 -0
  11. data/bin/.gitkeep +0 -0
  12. data/dbdeployer/config.json +32 -0
  13. data/dbdeployer/install.sh +64 -0
  14. data/dev.yml +20 -0
  15. data/gemfiles/ar-2.3_mysql.gemfile +6 -0
  16. data/gemfiles/ar-3.2_mysql.gemfile +5 -0
  17. data/gemfiles/ar-3.2_mysql2.gemfile +5 -0
  18. data/gemfiles/ar-4.0_mysql2.gemfile +5 -0
  19. data/gemfiles/ar-4.1_mysql2.gemfile +5 -0
  20. data/gemfiles/ar-4.2_mysql2.gemfile +5 -0
  21. data/gemfiles/ar-5.0_mysql2.gemfile +5 -0
  22. data/lhm.gemspec +34 -0
  23. data/lib/lhm.rb +131 -0
  24. data/lib/lhm/atomic_switcher.rb +52 -0
  25. data/lib/lhm/chunk_finder.rb +32 -0
  26. data/lib/lhm/chunk_insert.rb +51 -0
  27. data/lib/lhm/chunker.rb +87 -0
  28. data/lib/lhm/cleanup/current.rb +74 -0
  29. data/lib/lhm/command.rb +48 -0
  30. data/lib/lhm/entangler.rb +117 -0
  31. data/lib/lhm/intersection.rb +51 -0
  32. data/lib/lhm/invoker.rb +98 -0
  33. data/lib/lhm/locked_switcher.rb +74 -0
  34. data/lib/lhm/migration.rb +43 -0
  35. data/lib/lhm/migrator.rb +237 -0
  36. data/lib/lhm/printer.rb +59 -0
  37. data/lib/lhm/railtie.rb +9 -0
  38. data/lib/lhm/sql_helper.rb +77 -0
  39. data/lib/lhm/sql_retry.rb +61 -0
  40. data/lib/lhm/table.rb +121 -0
  41. data/lib/lhm/table_name.rb +23 -0
  42. data/lib/lhm/test_support.rb +35 -0
  43. data/lib/lhm/throttler.rb +36 -0
  44. data/lib/lhm/throttler/slave_lag.rb +145 -0
  45. data/lib/lhm/throttler/threads_running.rb +53 -0
  46. data/lib/lhm/throttler/time.rb +29 -0
  47. data/lib/lhm/timestamp.rb +11 -0
  48. data/lib/lhm/version.rb +6 -0
  49. data/shipit.rubygems.yml +0 -0
  50. data/spec/.lhm.example +4 -0
  51. data/spec/README.md +58 -0
  52. data/spec/fixtures/bigint_table.ddl +4 -0
  53. data/spec/fixtures/composite_primary_key.ddl +7 -0
  54. data/spec/fixtures/custom_primary_key.ddl +6 -0
  55. data/spec/fixtures/destination.ddl +6 -0
  56. data/spec/fixtures/lines.ddl +7 -0
  57. data/spec/fixtures/origin.ddl +6 -0
  58. data/spec/fixtures/permissions.ddl +5 -0
  59. data/spec/fixtures/small_table.ddl +4 -0
  60. data/spec/fixtures/tracks.ddl +5 -0
  61. data/spec/fixtures/users.ddl +14 -0
  62. data/spec/fixtures/wo_id_int_column.ddl +6 -0
  63. data/spec/integration/atomic_switcher_spec.rb +93 -0
  64. data/spec/integration/chunk_insert_spec.rb +29 -0
  65. data/spec/integration/chunker_spec.rb +185 -0
  66. data/spec/integration/cleanup_spec.rb +136 -0
  67. data/spec/integration/entangler_spec.rb +66 -0
  68. data/spec/integration/integration_helper.rb +237 -0
  69. data/spec/integration/invoker_spec.rb +33 -0
  70. data/spec/integration/lhm_spec.rb +585 -0
  71. data/spec/integration/lock_wait_timeout_spec.rb +30 -0
  72. data/spec/integration/locked_switcher_spec.rb +50 -0
  73. data/spec/integration/sql_retry/lock_wait_spec.rb +125 -0
  74. data/spec/integration/sql_retry/lock_wait_timeout_test_helper.rb +101 -0
  75. data/spec/integration/table_spec.rb +91 -0
  76. data/spec/test_helper.rb +32 -0
  77. data/spec/unit/atomic_switcher_spec.rb +31 -0
  78. data/spec/unit/chunk_finder_spec.rb +73 -0
  79. data/spec/unit/chunk_insert_spec.rb +44 -0
  80. data/spec/unit/chunker_spec.rb +166 -0
  81. data/spec/unit/entangler_spec.rb +124 -0
  82. data/spec/unit/intersection_spec.rb +51 -0
  83. data/spec/unit/lhm_spec.rb +29 -0
  84. data/spec/unit/locked_switcher_spec.rb +51 -0
  85. data/spec/unit/migrator_spec.rb +146 -0
  86. data/spec/unit/printer_spec.rb +97 -0
  87. data/spec/unit/sql_helper_spec.rb +32 -0
  88. data/spec/unit/table_name_spec.rb +39 -0
  89. data/spec/unit/table_spec.rb +47 -0
  90. data/spec/unit/throttler/slave_lag_spec.rb +317 -0
  91. data/spec/unit/throttler/threads_running_spec.rb +64 -0
  92. data/spec/unit/throttler_spec.rb +124 -0
  93. data/spec/unit/unit_helper.rb +13 -0
  94. metadata +239 -0
data/lib/lhm/table.rb ADDED
@@ -0,0 +1,121 @@
1
+ # Copyright (c) 2011 - 2013, SoundCloud Ltd., Rany Keddo, Tobias Bielohlawek, Tobias
2
+ # Schmidt
3
+
4
+ require 'lhm/sql_helper'
5
+
6
+ module Lhm
7
+ class Table
8
+ attr_reader :name, :columns, :indices, :pk, :ddl
9
+
10
+ def initialize(name, pk = 'id', ddl = nil)
11
+ @name = name
12
+ @table_name = TableName.new(name)
13
+ @columns = {}
14
+ @indices = {}
15
+ @pk = pk
16
+ @ddl = ddl
17
+ end
18
+
19
+ def satisfies_id_column_requirement?
20
+ !!((id = columns['id']) &&
21
+ id[:type] =~ /(bigint|int)(\(\d+\))?/)
22
+ end
23
+
24
+ def destination_name
25
+ @destination_name ||= @table_name.new
26
+ end
27
+
28
+ def self.parse(table_name, connection)
29
+ Parser.new(table_name, connection).parse
30
+ end
31
+
32
+ class Parser
33
+ include SqlHelper
34
+
35
+ def initialize(table_name, connection)
36
+ @table_name = table_name.to_s
37
+ @schema_name = connection.current_database
38
+ @connection = connection
39
+ end
40
+
41
+ def ddl
42
+ sql = "show create table `#{ @table_name }`"
43
+ specification = nil
44
+ @connection.execute(sql).each { |row| specification = row.last }
45
+ specification
46
+ end
47
+
48
+ def parse
49
+ schema = read_information_schema
50
+
51
+ Table.new(@table_name, extract_primary_key(schema), ddl).tap do |table|
52
+ schema.each do |defn|
53
+ column_name = struct_key(defn, 'COLUMN_NAME')
54
+ column_type = struct_key(defn, 'COLUMN_TYPE')
55
+ is_nullable = struct_key(defn, 'IS_NULLABLE')
56
+ column_default = struct_key(defn, 'COLUMN_DEFAULT')
57
+ comment = struct_key(defn, 'COLUMN_COMMENT')
58
+ collate = struct_key(defn, 'COLLATION_NAME')
59
+
60
+ table.columns[defn[column_name]] = {
61
+ :type => defn[column_type],
62
+ :is_nullable => defn[is_nullable],
63
+ :column_default => defn[column_default],
64
+ :comment => defn[comment],
65
+ :collate => defn[collate],
66
+ }
67
+ end
68
+
69
+ extract_indices(read_indices).each do |idx, columns|
70
+ table.indices[idx] = columns
71
+ end
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def read_information_schema
78
+ @connection.select_all %Q{
79
+ select *
80
+ from information_schema.columns
81
+ where table_name = '#{ @table_name }'
82
+ and table_schema = '#{ @schema_name }'
83
+ }
84
+ end
85
+
86
+ def read_indices
87
+ @connection.select_all %Q{
88
+ show indexes from `#{ @schema_name }`.`#{ @table_name }`
89
+ where key_name != 'PRIMARY'
90
+ }
91
+ end
92
+
93
+ def extract_indices(indices)
94
+ indices.
95
+ map do |row|
96
+ key_name = struct_key(row, 'Key_name')
97
+ column_name = struct_key(row, 'COLUMN_NAME')
98
+ [row[key_name], row[column_name]]
99
+ end.
100
+ inject(Hash.new { |h, k| h[k] = [] }) do |memo, (idx, column)|
101
+ memo[idx] << column
102
+ memo
103
+ end
104
+ end
105
+
106
+ def extract_primary_key(schema)
107
+ cols = schema.select do |defn|
108
+ column_key = struct_key(defn, 'COLUMN_KEY')
109
+ defn[column_key] == 'PRI'
110
+ end
111
+
112
+ keys = cols.map do |defn|
113
+ column_name = struct_key(defn, 'COLUMN_NAME')
114
+ defn[column_name]
115
+ end
116
+
117
+ keys.length == 1 ? keys.first : keys
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,23 @@
1
+ module Lhm
2
+ class TableName
3
+ def initialize(original, time = Time.now)
4
+ @original = original
5
+ @time = time
6
+ @timestamp = Timestamp.new(time)
7
+ end
8
+
9
+ attr_reader :original
10
+
11
+ def archived
12
+ "lhma_#{@timestamp}_#{@original}"[0...64]
13
+ end
14
+
15
+ def failed
16
+ archived[0...57] + "_failed"
17
+ end
18
+
19
+ def new
20
+ "lhmn_#{@original}"[0...64]
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+ module Lhm
3
+ module TestMigrator
4
+ def initialize(*)
5
+ super
6
+ @name = @origin.name
7
+ end
8
+
9
+ def execute
10
+ @statements.each do |stmt|
11
+ @connection.execute(tagged(stmt))
12
+ end
13
+ end
14
+ end
15
+
16
+ module TestInvoker
17
+ def run(options = {})
18
+ normalize_options(options)
19
+ set_session_lock_wait_timeouts
20
+ @migrator.run
21
+ rescue => e
22
+ Lhm.logger.error("LHM run failed with exception=#{e.class} message=#{e.message}")
23
+ raise
24
+ end
25
+ end
26
+
27
+ # Patch LHM to execute ALTER TABLE directly on original tables,
28
+ # without the online migration dance.
29
+ # This mode is designed for local/CI environments where we can speed
30
+ # things up by not invoking "real" LHM logic.
31
+ def self.execute_inline!
32
+ Lhm::Migrator.prepend(TestMigrator)
33
+ Lhm::Invoker.prepend(TestInvoker)
34
+ end
35
+ end
@@ -0,0 +1,36 @@
1
+ require 'lhm/throttler/time'
2
+ require 'lhm/throttler/slave_lag'
3
+ require 'lhm/throttler/threads_running'
4
+
5
+ module Lhm
6
+ module Throttler
7
+ CLASSES = { :time_throttler => Throttler::Time,
8
+ :slave_lag_throttler => Throttler::SlaveLag,
9
+ :threads_running_throttler => Throttler::ThreadsRunning }
10
+
11
+ def throttler
12
+ @throttler ||= Throttler::Time.new
13
+ end
14
+
15
+ def setup_throttler(type, options = {})
16
+ @throttler = Factory.create_throttler(type, options)
17
+ end
18
+
19
+ class Factory
20
+ def self.create_throttler(type, options = {})
21
+ case type
22
+ when Lhm::Command
23
+ type
24
+ when Symbol
25
+ CLASSES[type].new(options)
26
+ when String
27
+ CLASSES[type.to_sym].new(options)
28
+ when Class
29
+ type.new(options)
30
+ else
31
+ raise ArgumentError, 'type argument must be a Symbol, String or Class'
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,145 @@
1
+ module Lhm
2
+ module Throttler
3
+
4
+ def self.format_hosts(hosts)
5
+ formatted_hosts = []
6
+ hosts.each do |host|
7
+ if host && !host.match(/localhost/) && !host.match(/127.0.0.1/)
8
+ formatted_hosts << host.partition(':')[0]
9
+ end
10
+ end
11
+ formatted_hosts
12
+ end
13
+
14
+ class SlaveLag
15
+ include Command
16
+
17
+ INITIAL_TIMEOUT = 0.1
18
+ DEFAULT_STRIDE = 2_000
19
+ DEFAULT_MAX_ALLOWED_LAG = 10
20
+
21
+ MAX_TIMEOUT = INITIAL_TIMEOUT * 1024
22
+
23
+ attr_accessor :timeout_seconds, :allowed_lag, :stride, :connection
24
+
25
+ def initialize(options = {})
26
+ @timeout_seconds = INITIAL_TIMEOUT
27
+ @stride = options[:stride] || DEFAULT_STRIDE
28
+ @allowed_lag = options[:allowed_lag] || DEFAULT_MAX_ALLOWED_LAG
29
+ @slaves = {}
30
+ @get_config = options[:current_config]
31
+ @check_only = options[:check_only]
32
+ end
33
+
34
+ def execute
35
+ sleep(throttle_seconds)
36
+ end
37
+
38
+ private
39
+
40
+ def throttle_seconds
41
+ lag = max_current_slave_lag
42
+
43
+ if lag > @allowed_lag && @timeout_seconds < MAX_TIMEOUT
44
+ Lhm.logger.info("Increasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds * 2} because #{lag} seconds of slave lag detected is greater than the maximum of #{@allowed_lag} seconds allowed.")
45
+ @timeout_seconds = @timeout_seconds * 2
46
+ elsif lag <= @allowed_lag && @timeout_seconds > INITIAL_TIMEOUT
47
+ Lhm.logger.info("Decreasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds / 2} because #{lag} seconds of slave lag detected is less than or equal to the #{@allowed_lag} seconds allowed.")
48
+ @timeout_seconds = @timeout_seconds / 2
49
+ else
50
+ @timeout_seconds
51
+ end
52
+ end
53
+
54
+ def slaves
55
+ @slaves[@connection] ||= get_slaves
56
+ end
57
+
58
+ def get_slaves
59
+ slaves = []
60
+ if @check_only.nil? or !@check_only.respond_to?(:call)
61
+ slave_hosts = master_slave_hosts
62
+ while slave_hosts.any? do
63
+ host = slave_hosts.pop
64
+ slave = Slave.new(host, @get_config)
65
+ if !slaves.map(&:host).include?(host) && slave.connection
66
+ slaves << slave
67
+ slave_hosts.concat(slave.slave_hosts)
68
+ end
69
+ end
70
+ else
71
+ slave_config = @check_only.call
72
+ slaves << Slave.new(slave_config['host'], @get_config)
73
+ end
74
+ slaves
75
+ end
76
+
77
+ def master_slave_hosts
78
+ Throttler.format_hosts(@connection.select_values(Slave::SQL_SELECT_SLAVE_HOSTS))
79
+ end
80
+
81
+ def max_current_slave_lag
82
+ max = slaves.map { |slave| slave.lag }.push(0).max
83
+ Lhm.logger.info "Max current slave lag: #{max}"
84
+ max
85
+ end
86
+ end
87
+
88
+ class Slave
89
+ SQL_SELECT_SLAVE_HOSTS = "SELECT host FROM information_schema.processlist WHERE command LIKE 'Binlog Dump%'"
90
+ SQL_SELECT_MAX_SLAVE_LAG = 'SHOW SLAVE STATUS'
91
+
92
+ attr_reader :host, :connection
93
+
94
+ def initialize(host, connection_config = nil)
95
+ @host = host
96
+ @connection_config = prepare_connection_config(connection_config)
97
+ @connection = client(@connection_config)
98
+ end
99
+
100
+ def slave_hosts
101
+ Throttler.format_hosts(query_connection(SQL_SELECT_SLAVE_HOSTS, 'host'))
102
+ end
103
+
104
+ def lag
105
+ query_connection(SQL_SELECT_MAX_SLAVE_LAG, 'Seconds_Behind_Master').first.to_i
106
+ end
107
+
108
+ private
109
+
110
+ def client(config)
111
+ begin
112
+ Lhm.logger.info "Connecting to #{@host} on database: #{config[:database]}"
113
+ Mysql2::Client.new(config)
114
+ rescue Mysql2::Error => e
115
+ Lhm.logger.info "Error connecting to #{@host}: #{e}"
116
+ nil
117
+ end
118
+ end
119
+
120
+ def prepare_connection_config(config_proc)
121
+ config = if config_proc
122
+ if config_proc.respond_to?(:call) # if we get a proc
123
+ config_proc.call
124
+ else
125
+ raise ArgumentError, "Expected #{config_proc.inspect} to respond to `call`"
126
+ end
127
+ else # otherwise default to ActiveRecord provided config
128
+ ActiveRecord::Base.connection_pool.spec.config.dup
129
+ end
130
+ config.deep_symbolize_keys!
131
+ config[:host] = @host
132
+ config
133
+ end
134
+
135
+ def query_connection(query, result)
136
+ begin
137
+ @connection.query(query).map { |row| row[result] }
138
+ rescue Mysql2::Error => e
139
+ Lhm.logger.info "Unable to connect and/or query #{host}: #{e}"
140
+ [nil]
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,53 @@
1
+ module Lhm
2
+ module Throttler
3
+ class ThreadsRunning
4
+ include Command
5
+
6
+ DEFAULT_INITIAL_TIMEOUT = 0.1
7
+ DEFAULT_HEALTHY_RANGE = (0..50)
8
+
9
+ attr_accessor :timeout_seconds, :healthy_range, :connection
10
+ attr_reader :max_timeout_seconds, :initial_timeout_seconds
11
+
12
+ def initialize(options = {})
13
+ @initial_timeout_seconds = options[:initial_timeout] || DEFAULT_INITIAL_TIMEOUT
14
+ @max_timeout_seconds = options[:max_timeout] || (@initial_timeout_seconds * 1024)
15
+ @timeout_seconds = @initial_timeout_seconds
16
+ @healthy_range = options[:healthy_range] || DEFAULT_HEALTHY_RANGE
17
+ @connection = options[:connection]
18
+ end
19
+
20
+ def threads_running
21
+ query = <<~SQL.squish
22
+ SELECT COUNT(*) as Threads_running
23
+ FROM (
24
+ SELECT 1 FROM performance_schema.threads
25
+ WHERE NAME='thread/sql/one_connection'
26
+ AND PROCESSLIST_STATE IS NOT NULL
27
+ LIMIT #{@healthy_range.max + 1}
28
+ ) AS LIM
29
+ SQL
30
+
31
+ @connection.select_value(query)
32
+ end
33
+
34
+ def throttle_seconds
35
+ current_threads_running = threads_running
36
+
37
+ if !healthy_range.cover?(current_threads_running) && @timeout_seconds < @max_timeout_seconds
38
+ Lhm.logger.info("Increasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds * 2} because threads running is greater than the maximum of #{@healthy_range.max} allowed.")
39
+ @timeout_seconds = @timeout_seconds * 2
40
+ elsif healthy_range.cover?(current_threads_running) && @timeout_seconds > @initial_timeout_seconds
41
+ Lhm.logger.info("Decreasing timeout between strides from #{@timeout_seconds} to #{@timeout_seconds / 2} because threads running is less than the maximum of #{@healthy_range.max} allowed.")
42
+ @timeout_seconds = @timeout_seconds / 2
43
+ else
44
+ @timeout_seconds
45
+ end
46
+ end
47
+
48
+ def execute
49
+ sleep throttle_seconds
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ module Lhm
2
+ module Throttler
3
+ class Time
4
+ include Command
5
+
6
+ DEFAULT_TIMEOUT = 0.1
7
+ DEFAULT_STRIDE = 2_000
8
+
9
+ attr_accessor :timeout_seconds
10
+ attr_accessor :stride
11
+
12
+ def initialize(options = {})
13
+ @timeout_seconds = options[:delay] || DEFAULT_TIMEOUT
14
+ @stride = options[:stride] || DEFAULT_STRIDE
15
+ end
16
+
17
+ def execute
18
+ sleep timeout_seconds
19
+ end
20
+ end
21
+
22
+ class LegacyTime < Time
23
+ def initialize(timeout, stride)
24
+ @timeout_seconds = timeout / 1000.0
25
+ @stride = stride
26
+ end
27
+ end
28
+ end
29
+ end