groonga-delta 1.0.0 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ddcaa199b89bab185924a694bf7e64e1b6a8c2cd63d1ea855f59a5ac456c317e
4
- data.tar.gz: 06a1a8e01e343b4fca3faf2cc8139ed8adaadc6aeb55c7c6315e1c1bc7f40962
3
+ metadata.gz: 5837cbbd7824ebf557d24be10007166c476dadf2157a96071522307d0ffbd0d7
4
+ data.tar.gz: '086fa03eb3a6c9d1a055ec8ffecf0a8b7e9102dcee0fd1d3326ece684f4e5560'
5
5
  SHA512:
6
- metadata.gz: 39252c1602f63ff3eeddafdb676bdeef1334bed4df5a0e29fb8d14f0ae1467b98b925829333be865badf7bc3ec9fae332477e4dd6dc78c81cfddfa12e4bd8734
7
- data.tar.gz: cc50bce6f1da66d697bf0ce97a191e1d67fa474af9a795c0f709e15d0fbf60f94a551af902b58a2f6eac6fbf2c637524335728269e1e211f3c1f35e84eb232d5
6
+ metadata.gz: a3db1aedcdbf3bb39312bfb091a778091111eec880488917a4d21598834e08a703181f383f47d06e2646e873d1ea1ac5d8e42ed46d71328ee8ecb8c837aa080d
7
+ data.tar.gz: 584ecff5ca3bf44fc9f4baea4ea8bd5f6c60227ac0f05367dd0860c74a563b26f4bcac747c74a7ce1c98a77f77100c2c736817ca6daa4af8138aec1372996f1c
data/doc/text/news.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # NEWS
2
2
 
3
+ ## 1.0.3 - 2022-07-21
4
+
5
+ ### Improvements
6
+
7
+ * `import`: Dropped support for `mysqlbinlog`.
8
+
9
+ * Changed to use atomic write for `status.yaml`.
10
+
11
+ * Changed to keep 100 logs from 7 logs by default.
12
+
13
+ ### Fixes
14
+
15
+ * `import`: Fixed a bug that events to be processed can't be detected.
16
+
17
+ ## 1.0.2 - 2022-06-21
18
+
19
+ ### Improvements
20
+
21
+ * `import`: Added support for logging all MySQL replication event
22
+ details by `debug` log level.
23
+
24
+ * `import`: Improved error handling on record generation.
25
+
26
+ * `import`: Added support for deleting a record by number/time key.
27
+
28
+ * `import`: Added support for vacuuming old delta files.
29
+
30
+ ### Fixes
31
+
32
+ * `import`: Fixed a bug that retrying from an error may cause "no
33
+ table map" error for row events. We need to retry from the last
34
+ table map event.
35
+
36
+ * `apply`: Fixed a bug that delta files not applied yet may not be
37
+ applied.
38
+
39
+ ## 1.0.1 - 2022-06-09
40
+
41
+ ### Improvements
42
+
43
+ * Added support for `SIGINT` by `SIGTERM`.
44
+
3
45
  ## 1.0.0 - 2022-03-07
4
46
 
5
47
  Initial release.
@@ -34,6 +34,16 @@ module GroongaDelta
34
34
  sleep(@config.polling_interval)
35
35
  end
36
36
  true
37
+ rescue Interrupt
38
+ true
39
+ rescue SignalException => error
40
+ case error.signm
41
+ when "SIGTERM"
42
+ true
43
+ else
44
+ @config.logger.error(error) if @config
45
+ raise
46
+ end
37
47
  rescue => error
38
48
  @config.logger.error(error) if @config
39
49
  raise
@@ -64,7 +64,7 @@ module GroongaDelta
64
64
  end
65
65
 
66
66
  def log_age
67
- @data["log_age"] || 7
67
+ @data["log_age"] || 100
68
68
  end
69
69
 
70
70
  def log_max_size
@@ -13,6 +13,8 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
+ require "pp"
17
+
16
18
  module GroongaDelta
17
19
  class Error < StandardError
18
20
  end
@@ -25,4 +27,22 @@ module GroongaDelta
25
27
 
26
28
  class ProcessError < Error
27
29
  end
30
+
31
+ class GenerationError < Error
32
+ attr_reader :source_record
33
+ attr_reader :groonga_column
34
+ attr_reader :detail
35
+ def initialize(source_record, groonga_column, detail)
36
+ @source_record = source_record
37
+ @groonga_column = groonga_column
38
+ @detail = detail
39
+ message =
40
+ "failed to generate a Groonga record:\n" +
41
+ "source record: #{PP.pp(source_record, '')}" +
42
+ "Groonga column: #{PP.pp(groonga_column, '')}" +
43
+ "detail: #{@detail.message}(#{@detail.class})\n" +
44
+ @detail.backtrace.join("\n")
45
+ super(message)
46
+ end
47
+ end
28
48
  end
@@ -16,6 +16,8 @@
16
16
  require_relative "command"
17
17
  require_relative "import-config"
18
18
  require_relative "import-status"
19
+ require_relative "local-writer"
20
+ require_relative "local-vacuumer"
19
21
 
20
22
  module GroongaDelta
21
23
  class ImportCommand < Command
@@ -23,14 +25,16 @@ module GroongaDelta
23
25
  def prepare
24
26
  @config = ImportConfig.new(@dir)
25
27
  @status = ImportStatus.new(@dir)
28
+ @writer = LocalWriter.new(@config)
29
+ @vacuumer = LocalVacuumer.new(@config)
26
30
  @sources = []
27
31
  if @config.local
28
32
  require_relative "local-source"
29
- @sources << LocalSource.new(@config, @status)
33
+ @sources << LocalSource.new(@config, @status, @writer)
30
34
  end
31
35
  if @config.mysql
32
36
  require_relative "mysql-source"
33
- @sources << MySQLSource.new(@config, @status)
37
+ @sources << MySQLSource.new(@config, @status, @writer)
34
38
  end
35
39
  end
36
40
 
@@ -38,6 +42,7 @@ module GroongaDelta
38
42
  @sources.each do |source|
39
43
  source.import
40
44
  end
45
+ @vacuumer.vacuum
41
46
  end
42
47
  end
43
48
  end
@@ -38,6 +38,10 @@ module GroongaDelta
38
38
  Local.new(@dir, @data["local"])
39
39
  end
40
40
 
41
+ def vacuum
42
+ Vacuum.new(@data["vacuum"] || {})
43
+ end
44
+
41
45
  def mapping
42
46
  Mapping.new(@data["mapping"] || {})
43
47
  end
@@ -55,10 +59,6 @@ module GroongaDelta
55
59
  resolve_path(@data["binlog_dir"] || "binlog")
56
60
  end
57
61
 
58
- def mysqlbinlog
59
- @data["mysqlbinlog"] || "mysqlbinlog"
60
- end
61
-
62
62
  def host
63
63
  @data["host"] || "localhost"
64
64
  end
@@ -164,5 +164,44 @@ module GroongaDelta
164
164
  @data["initial_max_number"] || Float::INFINITY
165
165
  end
166
166
  end
167
+
168
+ class Vacuum
169
+ def initialize(data)
170
+ @data = data
171
+ end
172
+
173
+ def keep_span
174
+ resolve_span(@data["keep_span"])
175
+ end
176
+
177
+ private
178
+ def resolve_span(value)
179
+ case value
180
+ when String
181
+ case value
182
+ when /\A(\d+(?:\.\d+)?)(?:s|sec|second|seconds)?\z/
183
+ Float($1)
184
+ when /\A(\d+(?:\.\d+)?)(?:m|min|minute|minutes)\z/
185
+ Float($1) * 60
186
+ when /\A(\d+(?:\.\d+)?)(?:h|hr|hour|hours)\z/
187
+ Float($1) * 60 * 60
188
+ when /\A(\d+(?:\.\d+)?)(?:d|day|days)\z/
189
+ Float($1) * 60 * 60 * 24
190
+ when /\A(\d+(?:\.\d+)?)(?:w|week|weeks)\z/
191
+ Float($1) * 60 * 60 * 24 * 7
192
+ when /\A(\d+(?:\.\d+)?)(?:month|months)\z/
193
+ # Same as systemd. See systemd.time(7)
194
+ Float($1) * 60 * 60 * 24 * 30.44
195
+ when /\A(\d+(?:\.\d+)?)(?:y|year|years)\z/
196
+ # Same as systemd. See systemd.time(7)
197
+ Float($1) * 60 * 60 * 24 * 365.25
198
+ else
199
+ raise ConfigError, "invalid span value: #{value.inspect}"
200
+ end
201
+ else
202
+ value
203
+ end
204
+ end
205
+ end
167
206
  end
168
207
  end
@@ -38,12 +38,20 @@ module GroongaDelta
38
38
  @status.update("mysql" => new_data)
39
39
  end
40
40
 
41
- def file
42
- self["file"]
41
+ def last_file
42
+ self["last_file"] || self["file"] # For backward compatibility
43
43
  end
44
44
 
45
- def position
46
- self["position"]
45
+ def last_position
46
+ self["last_position"] || self["position"] # For backward compatibility
47
+ end
48
+
49
+ def last_table_map_file
50
+ self["last_table_map_file"] || self["file"]
51
+ end
52
+
53
+ def last_table_map_position
54
+ self["last_table_map_position"]
47
55
  end
48
56
  end
49
57
 
@@ -13,12 +13,9 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
- require "fileutils"
17
-
18
16
  require "groonga/client"
19
- require "parquet"
20
17
 
21
- require_relative "error"
18
+ require_relative "local-reader"
22
19
 
23
20
  module GroongaDelta
24
21
  class LocalDelta
@@ -30,9 +27,9 @@ module GroongaDelta
30
27
  end
31
28
 
32
29
  def apply
33
- start_time = read_current_status
30
+ reader = LocalReader.new(@logger, @delta_dir)
31
+ start_time = read_current_status(reader)
34
32
  current_time = Time.now.utc
35
- targets = list_targets(@delta_dir, start_time, current_time)
36
33
  client_options = {
37
34
  url: @config.groonga.url,
38
35
  read_timeout: @config.groonga.read_timeout,
@@ -44,7 +41,7 @@ module GroongaDelta
44
41
  target_commands: [],
45
42
  target_tables: [],
46
43
  target_columns: [])
47
- targets.sort_by(&:timestamp).each do |target|
44
+ reader.each(start_time, current_time) do |target|
48
45
  target.apply(@logger, client, processor)
49
46
  @status.update("start_time" => [
50
47
  target.timestamp.to_i,
@@ -55,289 +52,22 @@ module GroongaDelta
55
52
  end
56
53
 
57
54
  private
58
- def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
59
- Time.utc(year,
60
- month,
61
- day,
62
- hour,
63
- minute,
64
- Rational(second * 1_000_000_000 + nanosecond,
65
- 1_000_000_000))
66
- end
67
-
68
- def read_current_status
55
+ def read_current_status(reader)
69
56
  start_time_unix_time, start_time_nanosecond = @status.start_time
70
57
  if start_time_unix_time
71
58
  start_time = Time.at(start_time_unix_time).utc
72
- build_time(start_time.year,
73
- start_time.month,
74
- start_time.day,
75
- start_time.hour,
76
- start_time.min,
77
- start_time.sec,
78
- start_time_nanosecond)
59
+ reader.build_time(start_time.year,
60
+ start_time.month,
61
+ start_time.day,
62
+ start_time.hour,
63
+ start_time.min,
64
+ start_time.sec,
65
+ start_time_nanosecond)
79
66
  else
80
67
  Time.at(0).utc
81
68
  end
82
69
  end
83
70
 
84
- def each_target_path(dir,
85
- min_timestamp,
86
- max_timestamp,
87
- accept_directory: true,
88
- &block)
89
- Dir.glob("#{dir}/*") do |path|
90
- base_name = File.basename(path)
91
- if accept_directory and File.directory?(path)
92
- timestamp = parse_directory_timestamp(base_name)
93
- next if timestamp.nil?
94
- next if min_timestamp and timestamp <= min_timestamp
95
- next if max_timestamp and timestamp > max_timestamp
96
- each_target_path(path,
97
- min_timestamp,
98
- max_timestamp,
99
- accept_directory: false,
100
- &block)
101
- elsif File.file?(path)
102
- timestamp, action, post_match = parse_file_timestamp(base_name)
103
- next if timestamp.nil?
104
- next if min_timestamp and timestamp <= min_timestamp
105
- next if max_timestamp and timestamp > max_timestamp
106
- yield(path, timestamp, action, post_match)
107
- end
108
- end
109
- end
110
-
111
- def each_packed_target_path(dir, min_timestamp, max_timestamp)
112
- return unless min_timestamp.to_i.zero?
113
- Dir.glob("#{dir}/packed/*") do |path|
114
- next unless File.directory?(path)
115
- timestamp, action, post_match = parse_file_timestamp(File.basename(path))
116
- next if action
117
- next unless post_match.empty?
118
- yield(path, timestamp)
119
- end
120
- end
121
-
122
- def list_targets(dir, start_time, current_timestamp)
123
- targets = []
124
- list_schema_targets(dir, start_time, current_timestamp, targets)
125
- Dir.glob("#{dir}/data/*") do |path|
126
- next unless File.directory?(path)
127
- name = File.basename(path)
128
- list_table_targets(path, name, start_time, current_timestamp, targets)
129
- end
130
- targets
131
- end
132
-
133
- def each_schema_target(dir, min_timestamp, max_timestamp)
134
- each_target_path(dir,
135
- min_timestamp,
136
- max_timestamp) do |path, timestamp, action, post_match|
137
- next if action
138
- next unless post_match == ".grn"
139
- yield(SchemaTarget.new(path, timestamp))
140
- end
141
- end
142
-
143
- def list_schema_targets(dir, start_time, current_timestamp, targets)
144
- latest_packed_target = nil
145
- each_packed_target_path("#{dir}/schema",
146
- start_time,
147
- current_timestamp) do |path, timestamp|
148
- if latest_packed_target and latest_packed_target.timestamp > timestamp
149
- next
150
- end
151
- latest_packed_target = PackedSchemaTarget.new(path, timestamp)
152
- end
153
- if latest_packed_target
154
- targets << latest_packed_target
155
- each_schema_target(latest_packed_target.path, nil, nil) do |target|
156
- latest_packed_target.targets << target
157
- end
158
- end
159
- each_schema_target("#{dir}/schema",
160
- latest_packed_target&.timestamp || start_time,
161
- current_timestamp) do |target|
162
- targets << target
163
- end
164
- end
165
-
166
- TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
167
- def each_table_target(dir, name, min_timestamp, max_timestamp)
168
- each_target_path(dir,
169
- min_timestamp,
170
- max_timestamp) do |path, timestamp, action, post_match|
171
- next if action.nil?
172
- next unless TABLE_TARGET_SUFFIXES.include?(post_match)
173
- yield(TableTarget.new(path, timestamp, name, action))
174
- end
175
- end
176
-
177
- def list_table_targets(dir, name, start_time, current_timestamp, targets)
178
- latest_packed_target = nil
179
- each_packed_target_path(dir,
180
- start_time,
181
- current_timestamp) do |path, timestamp|
182
- if latest_packed_target and latest_packed_target.timestamp > timestamp
183
- next
184
- end
185
- latest_packed_target = PackedTableTarget.new(path, timestamp, name)
186
- end
187
- if latest_packed_target
188
- targets << latest_packed_target
189
- each_table_target(latest_packed_target.path, name, nil, nil) do |target|
190
- latest_packed_target.targets << target
191
- end
192
- end
193
- each_table_target(dir,
194
- name,
195
- latest_packed_target&.timestamp || start_time,
196
- current_timestamp) do |target|
197
- targets << target
198
- end
199
- end
200
-
201
- def parse_directory_timestamp(base_name)
202
- case base_name
203
- when /\A(\d{4})-(\d{2})-(\d{2})\z/
204
- match = Regexp.last_match
205
- year = match[1].to_i
206
- month = match[2].to_i
207
- day = match[3].to_i
208
- build_time(year, month, day)
209
- else
210
- nil
211
- end
212
- end
213
-
214
- def parse_file_timestamp(base_name)
215
- case base_name
216
- when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
217
- match = Regexp.last_match
218
- year = match[1].to_i
219
- month = match[2].to_i
220
- day = match[3].to_i
221
- hour = match[4].to_i
222
- minute = match[5].to_i
223
- second = match[6].to_i
224
- nanosecond = match[7].to_i
225
- action = match[8]
226
- timestamp = build_time(year,
227
- month,
228
- day,
229
- hour,
230
- minute,
231
- second,
232
- nanosecond)
233
- [timestamp, action, match.post_match]
234
- else
235
- nil
236
- end
237
- end
238
-
239
- module ApplyLoggable
240
- private
241
- def apply_log(logger, path)
242
- logger.info("Start applying: #{path}")
243
- result = yield
244
- logger.info("Applied: #{path}")
245
- result
246
- end
247
- end
248
-
249
- class SchemaTarget
250
- include ApplyLoggable
251
-
252
- attr_reader :path
253
- attr_reader :timestamp
254
- def initialize(path, timestamp)
255
- @path = path
256
- @timestamp = timestamp
257
- end
258
-
259
- def apply(logger, client, processor)
260
- apply_log(logger, @path) do
261
- processor.load(@path)
262
- end
263
- end
264
- end
265
-
266
- class PackedSchemaTarget
267
- include ApplyLoggable
268
-
269
- attr_reader :path
270
- attr_reader :timestamp
271
- attr_reader :targets
272
- def initialize(path, timestamp)
273
- @path = path
274
- @timestamp = timestamp
275
- @targets = []
276
- end
277
-
278
- def apply(logger, client, processor)
279
- apply_log(logger, @path) do
280
- @targets.sort_by(&:timestamp).each do |target|
281
- target.apply(logger, client, processor)
282
- end
283
- end
284
- end
285
- end
286
-
287
- class TableTarget
288
- include ApplyLoggable
289
-
290
- attr_reader :path
291
- attr_reader :timestamp
292
- attr_reader :name
293
- attr_reader :action
294
- def initialize(path, timestamp, name, action)
295
- @path = path
296
- @timestamp = timestamp
297
- @name = name
298
- @action = action
299
- end
300
-
301
- def apply(logger, client, processor)
302
- apply_log(logger, @path) do
303
- if @path.end_with?(".grn")
304
- processor.load(@path)
305
- else
306
- # TODO: Add support for @action == "delete"
307
- table = Arrow::Table.load(@path)
308
- command = Groonga::Command::Load.new(table: @name,
309
- values: table,
310
- command_version: "3")
311
- response = client.load(command.arguments)
312
- processor.process_response(response, command)
313
- end
314
- end
315
- end
316
- end
317
-
318
- class PackedTableTarget
319
- include ApplyLoggable
320
-
321
- attr_reader :path
322
- attr_reader :timestamp
323
- attr_reader :name
324
- attr_reader :targets
325
- def initialize(path, timestamp, name)
326
- @path = path
327
- @timestamp = timestamp
328
- @name = name
329
- @targets = []
330
- end
331
-
332
- def apply(logger, client, processor)
333
- apply_log(logger, @path) do
334
- @targets.sort_by(&:timestamp).each do |target|
335
- target.apply(logger, client, processor)
336
- end
337
- end
338
- end
339
- end
340
-
341
71
  class CommandProcessor < Groonga::Client::CommandProcessor
342
72
  def initialize(config, *args)
343
73
  @config = config