groonga-delta 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f518dc76e259b9697b95be025f4e55b1e7fad3daea738a405b3d6a564c65ed76
4
- data.tar.gz: ef1be1c1f200ddc94b9460c7216d9e7832993fdbe1ada926f31e87e01e5c5628
3
+ metadata.gz: ccfa632a6c046239e0163f15306f5479a64c220aa9e419bd415376bfeff5eae2
4
+ data.tar.gz: 3ab279d8f62f66bda8e83fc71c3cb27b5472339e5e373868ccbfde0376759c67
5
5
  SHA512:
6
- metadata.gz: 54cb45f40a7d6a676abee8f41c9eb66dd6992f7f0b07b030e9222a75da0435b02ed61361b7bff051e67acfde7cd7d1fc41ad1186c51606051ea921c5cebb674e
7
- data.tar.gz: b932e4c42a5f424fb2b6e4f423c9c248bc032acffa6884fa953aeee2f0863022125666d072b18bbe5bc01d4f4eae68fc7156cb8401d9280380e08b57a2e458e0
6
+ metadata.gz: eaa509b863868cd7fa1944d52603c312dc980707d975db9b52dd8b74131b2eac293cecc71dab48abb0895a22203dc2c59942750186e491209a3f395b481496b0
7
+ data.tar.gz: 377115552df709288fdff0f1eff36d76e1a99b14e83409f2ab29f5d263b15e94b756d387f357f50423f2015c67f66badb662100d8dac8fdacac67f53d45af0c4
data/doc/text/news.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # NEWS
2
2
 
3
+ ## 1.0.2 - 2022-06-21
4
+
5
+ ### Improvements
6
+
7
+ * `import`: Added support for logging all MySQL replication event
8
+ details by `debug` log level.
9
+
10
+ * `import`: Improved error handling on record generation.
11
+
12
+ * `import`: Added support for deleting a record by number/time key.
13
+
14
+ * `import`: Added support for vacuuming old delta files.
15
+
16
+ ### Fixes
17
+
18
+ * `import`: Fixed a bug that retrying from an error may cause "no
19
+ table map" error for row events. We need to retry from the last
20
+ table map event.
21
+
22
+ * `apply`: Fixed a bug that delta files not applied yet may not be
23
+ applied.
24
+
3
25
  ## 1.0.1 - 2022-06-09
4
26
 
5
27
  ### Improvements
@@ -13,6 +13,8 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
+ require "pp"
17
+
16
18
  module GroongaDelta
17
19
  class Error < StandardError
18
20
  end
@@ -25,4 +27,22 @@ module GroongaDelta
25
27
 
26
28
  class ProcessError < Error
27
29
  end
30
+
31
+ class GenerationError < Error
32
+ attr_reader :source_record
33
+ attr_reader :groonga_column
34
+ attr_reader :detail
35
+ def initialize(source_record, groonga_column, detail)
36
+ @source_record = source_record
37
+ @groonga_column = groonga_column
38
+ @detail = detail
39
+ message =
40
+ "failed to generate a Groonga record:\n" +
41
+ "source record: #{PP.pp(source_record, '')}" +
42
+ "Groonga column: #{PP.pp(groonga_column, '')}" +
43
+ "detail: #{@detail.message}(#{@detail.class})\n" +
44
+ @detail.backtrace.join("\n")
45
+ super(message)
46
+ end
47
+ end
28
48
  end
@@ -16,6 +16,8 @@
16
16
  require_relative "command"
17
17
  require_relative "import-config"
18
18
  require_relative "import-status"
19
+ require_relative "local-writer"
20
+ require_relative "local-vacuumer"
19
21
 
20
22
  module GroongaDelta
21
23
  class ImportCommand < Command
@@ -23,14 +25,16 @@ module GroongaDelta
23
25
  def prepare
24
26
  @config = ImportConfig.new(@dir)
25
27
  @status = ImportStatus.new(@dir)
28
+ @writer = LocalWriter.new(@config)
29
+ @vacuumer = LocalVacuumer.new(@config)
26
30
  @sources = []
27
31
  if @config.local
28
32
  require_relative "local-source"
29
- @sources << LocalSource.new(@config, @status)
33
+ @sources << LocalSource.new(@config, @status, @writer)
30
34
  end
31
35
  if @config.mysql
32
36
  require_relative "mysql-source"
33
- @sources << MySQLSource.new(@config, @status)
37
+ @sources << MySQLSource.new(@config, @status, @writer)
34
38
  end
35
39
  end
36
40
 
@@ -38,6 +42,7 @@ module GroongaDelta
38
42
  @sources.each do |source|
39
43
  source.import
40
44
  end
45
+ @vacuumer.vacuum
41
46
  end
42
47
  end
43
48
  end
@@ -38,6 +38,10 @@ module GroongaDelta
38
38
  Local.new(@dir, @data["local"])
39
39
  end
40
40
 
41
+ def vacuum
42
+ Vacuum.new(@data["vacuum"] || {})
43
+ end
44
+
41
45
  def mapping
42
46
  Mapping.new(@data["mapping"] || {})
43
47
  end
@@ -164,5 +168,44 @@ module GroongaDelta
164
168
  @data["initial_max_number"] || Float::INFINITY
165
169
  end
166
170
  end
171
+
172
+ class Vacuum
173
+ def initialize(data)
174
+ @data = data
175
+ end
176
+
177
+ def keep_span
178
+ resolve_span(@data["keep_span"])
179
+ end
180
+
181
+ private
182
+ def resolve_span(value)
183
+ case value
184
+ when String
185
+ case value
186
+ when /\A(\d+(?:\.\d+))(?:s|sec|second|seconds)?\z/
187
+ Float($1)
188
+ when /\A(\d+(?:\.\d+))(?:m|min|minute|minutes)\z/
189
+ Float($1) * 60
190
+ when /\A(\d+(?:\.\d+))(?:h|hr|hour|hours)\z/
191
+ Float($1) * 60 * 60
192
+ when /\A(\d+(?:\.\d+))(?:d|day|days)\z/
193
+ Float($1) * 60 * 60 * 24
194
+ when /\A(\d+(?:\.\d+))(?:w|week|weeks)\z/
195
+ Float($1) * 60 * 60 * 24 * 7
196
+ when /\A(\d+(?:\.\d+))(?:month|months)\z/
197
+ # Same as systemd. See systemd.time(7)
198
+ Float($1) * 60 * 60 * 24 * 30.44
199
+ when /\A(\d+(?:\.\d+))(?:y|year|years)\z/
200
+ # Same as systemd. See systemd.time(7)
201
+ Float($1) * 60 * 60 * 24 * 365.25
202
+ else
203
+ raise ConfigError, "invalid span value: #{value.inspect}"
204
+ end
205
+ else
206
+ value
207
+ end
208
+ end
209
+ end
167
210
  end
168
211
  end
@@ -45,6 +45,10 @@ module GroongaDelta
45
45
  def position
46
46
  self["position"]
47
47
  end
48
+
49
+ def last_table_map_position
50
+ self["last_table_map_position"]
51
+ end
48
52
  end
49
53
 
50
54
  class Local
@@ -13,12 +13,9 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
- require "fileutils"
17
-
18
16
  require "groonga/client"
19
- require "parquet"
20
17
 
21
- require_relative "error"
18
+ require_relative "local-reader"
22
19
 
23
20
  module GroongaDelta
24
21
  class LocalDelta
@@ -30,9 +27,9 @@ module GroongaDelta
30
27
  end
31
28
 
32
29
  def apply
33
- start_time = read_current_status
30
+ reader = LocalReader.new(@logger, @delta_dir)
31
+ start_time = read_current_status(reader)
34
32
  current_time = Time.now.utc
35
- targets = list_targets(@delta_dir, start_time, current_time)
36
33
  client_options = {
37
34
  url: @config.groonga.url,
38
35
  read_timeout: @config.groonga.read_timeout,
@@ -44,7 +41,7 @@ module GroongaDelta
44
41
  target_commands: [],
45
42
  target_tables: [],
46
43
  target_columns: [])
47
- targets.sort_by(&:timestamp).each do |target|
44
+ reader.each(start_time, current_time) do |target|
48
45
  target.apply(@logger, client, processor)
49
46
  @status.update("start_time" => [
50
47
  target.timestamp.to_i,
@@ -55,289 +52,22 @@ module GroongaDelta
55
52
  end
56
53
 
57
54
  private
58
- def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
59
- Time.utc(year,
60
- month,
61
- day,
62
- hour,
63
- minute,
64
- Rational(second * 1_000_000_000 + nanosecond,
65
- 1_000_000_000))
66
- end
67
-
68
- def read_current_status
55
+ def read_current_status(reader)
69
56
  start_time_unix_time, start_time_nanosecond = @status.start_time
70
57
  if start_time_unix_time
71
58
  start_time = Time.at(start_time_unix_time).utc
72
- build_time(start_time.year,
73
- start_time.month,
74
- start_time.day,
75
- start_time.hour,
76
- start_time.min,
77
- start_time.sec,
78
- start_time_nanosecond)
59
+ reader.build_time(start_time.year,
60
+ start_time.month,
61
+ start_time.day,
62
+ start_time.hour,
63
+ start_time.min,
64
+ start_time.sec,
65
+ start_time_nanosecond)
79
66
  else
80
67
  Time.at(0).utc
81
68
  end
82
69
  end
83
70
 
84
- def each_target_path(dir,
85
- min_timestamp,
86
- max_timestamp,
87
- accept_directory: true,
88
- &block)
89
- Dir.glob("#{dir}/*") do |path|
90
- base_name = File.basename(path)
91
- if accept_directory and File.directory?(path)
92
- timestamp = parse_directory_timestamp(base_name)
93
- next if timestamp.nil?
94
- next if min_timestamp and timestamp <= min_timestamp
95
- next if max_timestamp and timestamp > max_timestamp
96
- each_target_path(path,
97
- min_timestamp,
98
- max_timestamp,
99
- accept_directory: false,
100
- &block)
101
- elsif File.file?(path)
102
- timestamp, action, post_match = parse_file_timestamp(base_name)
103
- next if timestamp.nil?
104
- next if min_timestamp and timestamp <= min_timestamp
105
- next if max_timestamp and timestamp > max_timestamp
106
- yield(path, timestamp, action, post_match)
107
- end
108
- end
109
- end
110
-
111
- def each_packed_target_path(dir, min_timestamp, max_timestamp)
112
- return unless min_timestamp.to_i.zero?
113
- Dir.glob("#{dir}/packed/*") do |path|
114
- next unless File.directory?(path)
115
- timestamp, action, post_match = parse_file_timestamp(File.basename(path))
116
- next if action
117
- next unless post_match.empty?
118
- yield(path, timestamp)
119
- end
120
- end
121
-
122
- def list_targets(dir, start_time, current_timestamp)
123
- targets = []
124
- list_schema_targets(dir, start_time, current_timestamp, targets)
125
- Dir.glob("#{dir}/data/*") do |path|
126
- next unless File.directory?(path)
127
- name = File.basename(path)
128
- list_table_targets(path, name, start_time, current_timestamp, targets)
129
- end
130
- targets
131
- end
132
-
133
- def each_schema_target(dir, min_timestamp, max_timestamp)
134
- each_target_path(dir,
135
- min_timestamp,
136
- max_timestamp) do |path, timestamp, action, post_match|
137
- next if action
138
- next unless post_match == ".grn"
139
- yield(SchemaTarget.new(path, timestamp))
140
- end
141
- end
142
-
143
- def list_schema_targets(dir, start_time, current_timestamp, targets)
144
- latest_packed_target = nil
145
- each_packed_target_path("#{dir}/schema",
146
- start_time,
147
- current_timestamp) do |path, timestamp|
148
- if latest_packed_target and latest_packed_target.timestamp > timestamp
149
- next
150
- end
151
- latest_packed_target = PackedSchemaTarget.new(path, timestamp)
152
- end
153
- if latest_packed_target
154
- targets << latest_packed_target
155
- each_schema_target(latest_packed_target.path, nil, nil) do |target|
156
- latest_packed_target.targets << target
157
- end
158
- end
159
- each_schema_target("#{dir}/schema",
160
- latest_packed_target&.timestamp || start_time,
161
- current_timestamp) do |target|
162
- targets << target
163
- end
164
- end
165
-
166
- TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
167
- def each_table_target(dir, name, min_timestamp, max_timestamp)
168
- each_target_path(dir,
169
- min_timestamp,
170
- max_timestamp) do |path, timestamp, action, post_match|
171
- next if action.nil?
172
- next unless TABLE_TARGET_SUFFIXES.include?(post_match)
173
- yield(TableTarget.new(path, timestamp, name, action))
174
- end
175
- end
176
-
177
- def list_table_targets(dir, name, start_time, current_timestamp, targets)
178
- latest_packed_target = nil
179
- each_packed_target_path(dir,
180
- start_time,
181
- current_timestamp) do |path, timestamp|
182
- if latest_packed_target and latest_packed_target.timestamp > timestamp
183
- next
184
- end
185
- latest_packed_target = PackedTableTarget.new(path, timestamp, name)
186
- end
187
- if latest_packed_target
188
- targets << latest_packed_target
189
- each_table_target(latest_packed_target.path, name, nil, nil) do |target|
190
- latest_packed_target.targets << target
191
- end
192
- end
193
- each_table_target(dir,
194
- name,
195
- latest_packed_target&.timestamp || start_time,
196
- current_timestamp) do |target|
197
- targets << target
198
- end
199
- end
200
-
201
- def parse_directory_timestamp(base_name)
202
- case base_name
203
- when /\A(\d{4})-(\d{2})-(\d{2})\z/
204
- match = Regexp.last_match
205
- year = match[1].to_i
206
- month = match[2].to_i
207
- day = match[3].to_i
208
- build_time(year, month, day)
209
- else
210
- nil
211
- end
212
- end
213
-
214
- def parse_file_timestamp(base_name)
215
- case base_name
216
- when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
217
- match = Regexp.last_match
218
- year = match[1].to_i
219
- month = match[2].to_i
220
- day = match[3].to_i
221
- hour = match[4].to_i
222
- minute = match[5].to_i
223
- second = match[6].to_i
224
- nanosecond = match[7].to_i
225
- action = match[8]
226
- timestamp = build_time(year,
227
- month,
228
- day,
229
- hour,
230
- minute,
231
- second,
232
- nanosecond)
233
- [timestamp, action, match.post_match]
234
- else
235
- nil
236
- end
237
- end
238
-
239
- module ApplyLoggable
240
- private
241
- def apply_log(logger, path)
242
- logger.info("Start applying: #{path}")
243
- result = yield
244
- logger.info("Applied: #{path}")
245
- result
246
- end
247
- end
248
-
249
- class SchemaTarget
250
- include ApplyLoggable
251
-
252
- attr_reader :path
253
- attr_reader :timestamp
254
- def initialize(path, timestamp)
255
- @path = path
256
- @timestamp = timestamp
257
- end
258
-
259
- def apply(logger, client, processor)
260
- apply_log(logger, @path) do
261
- processor.load(@path)
262
- end
263
- end
264
- end
265
-
266
- class PackedSchemaTarget
267
- include ApplyLoggable
268
-
269
- attr_reader :path
270
- attr_reader :timestamp
271
- attr_reader :targets
272
- def initialize(path, timestamp)
273
- @path = path
274
- @timestamp = timestamp
275
- @targets = []
276
- end
277
-
278
- def apply(logger, client, processor)
279
- apply_log(logger, @path) do
280
- @targets.sort_by(&:timestamp).each do |target|
281
- target.apply(logger, client, processor)
282
- end
283
- end
284
- end
285
- end
286
-
287
- class TableTarget
288
- include ApplyLoggable
289
-
290
- attr_reader :path
291
- attr_reader :timestamp
292
- attr_reader :name
293
- attr_reader :action
294
- def initialize(path, timestamp, name, action)
295
- @path = path
296
- @timestamp = timestamp
297
- @name = name
298
- @action = action
299
- end
300
-
301
- def apply(logger, client, processor)
302
- apply_log(logger, @path) do
303
- if @path.end_with?(".grn")
304
- processor.load(@path)
305
- else
306
- # TODO: Add support for @action == "delete"
307
- table = Arrow::Table.load(@path)
308
- command = Groonga::Command::Load.new(table: @name,
309
- values: table,
310
- command_version: "3")
311
- response = client.load(command.arguments)
312
- processor.process_response(response, command)
313
- end
314
- end
315
- end
316
- end
317
-
318
- class PackedTableTarget
319
- include ApplyLoggable
320
-
321
- attr_reader :path
322
- attr_reader :timestamp
323
- attr_reader :name
324
- attr_reader :targets
325
- def initialize(path, timestamp, name)
326
- @path = path
327
- @timestamp = timestamp
328
- @name = name
329
- @targets = []
330
- end
331
-
332
- def apply(logger, client, processor)
333
- apply_log(logger, @path) do
334
- @targets.sort_by(&:timestamp).each do |target|
335
- target.apply(logger, client, processor)
336
- end
337
- end
338
- end
339
- end
340
-
341
71
  class CommandProcessor < Groonga::Client::CommandProcessor
342
72
  def initialize(config, *args)
343
73
  @config = config
@@ -0,0 +1,353 @@
1
+ # Copyright (C) 2021-2022 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "fileutils"
17
+
18
+ require "groonga/command"
19
+ require "parquet"
20
+
21
+ require_relative "error"
22
+
23
+ module GroongaDelta
24
+ class LocalReader
25
+ def initialize(logger, dir)
26
+ @logger = logger
27
+ @dir = dir
28
+ end
29
+
30
+ def each(min_timestamp=nil, max_timestamp=nil, &block)
31
+ unless block_given?
32
+ return to_enum(__method__, min_timestamp, max_timestamp)
33
+ end
34
+
35
+ targets = list_targets(@dir, min_timestamp, max_timestamp)
36
+ targets.sort_by(&:timestamp).each(&block)
37
+ end
38
+
39
+ def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
40
+ Time.utc(year,
41
+ month,
42
+ day,
43
+ hour,
44
+ minute,
45
+ Rational(second * 1_000_000_000 + nanosecond,
46
+ 1_000_000_000))
47
+ end
48
+
49
+ private
50
+ def each_target_path(dir,
51
+ min_timestamp,
52
+ max_timestamp,
53
+ accept_directory: true,
54
+ &block)
55
+ if min_timestamp
56
+ min_timestamp_day = Time.utc(min_timestamp.year,
57
+ min_timestamp.month,
58
+ min_timestamp.day)
59
+ end
60
+ if max_timestamp
61
+ max_timestamp_day = Time.utc(max_timestamp.year,
62
+ max_timestamp.month,
63
+ max_timestamp.day)
64
+ end
65
+ Dir.glob("#{dir}/*") do |path|
66
+ base_name = File.basename(path)
67
+ if accept_directory and File.directory?(path)
68
+ timestamp = parse_directory_timestamp(base_name)
69
+ next if timestamp.nil?
70
+ next if min_timestamp_day and timestamp < min_timestamp_day
71
+ next if max_timestamp_day and timestamp > max_timestamp_day
72
+ each_target_path(path,
73
+ min_timestamp,
74
+ max_timestamp,
75
+ accept_directory: false,
76
+ &block)
77
+ elsif File.file?(path)
78
+ timestamp, action, post_match = parse_file_timestamp(base_name)
79
+ next if timestamp.nil?
80
+ next if min_timestamp and timestamp <= min_timestamp
81
+ next if max_timestamp and timestamp > max_timestamp
82
+ yield(path, timestamp, action, post_match)
83
+ end
84
+ end
85
+ end
86
+
87
+ def each_packed_target_path(dir, min_timestamp, max_timestamp)
88
+ return unless min_timestamp.to_i.zero?
89
+ Dir.glob("#{dir}/packed/*") do |path|
90
+ next unless File.directory?(path)
91
+ timestamp, action, post_match = parse_file_timestamp(File.basename(path))
92
+ next if action
93
+ next unless post_match.empty?
94
+ yield(path, timestamp)
95
+ end
96
+ end
97
+
98
+ def list_targets(dir, min_timestamp, max_timestamp)
99
+ targets = []
100
+ list_schema_targets(dir, min_timestamp, max_timestamp, targets)
101
+ Dir.glob("#{dir}/data/*") do |path|
102
+ next unless File.directory?(path)
103
+ name = File.basename(path)
104
+ list_table_targets(path, name, min_timestamp, max_timestamp, targets)
105
+ end
106
+ targets
107
+ end
108
+
109
+ def each_schema_target(dir, min_timestamp, max_timestamp)
110
+ each_target_path(dir,
111
+ min_timestamp,
112
+ max_timestamp) do |path, timestamp, action, post_match|
113
+ next if action
114
+ next unless post_match == ".grn"
115
+ yield(SchemaTarget.new(path, timestamp))
116
+ end
117
+ end
118
+
119
+ def list_schema_targets(dir, min_timestamp, max_timestamp, targets)
120
+ latest_packed_target = nil
121
+ each_packed_target_path("#{dir}/schema",
122
+ min_timestamp,
123
+ max_timestamp) do |path, timestamp|
124
+ if latest_packed_target and latest_packed_target.timestamp > timestamp
125
+ next
126
+ end
127
+ latest_packed_target = PackedSchemaTarget.new(path, timestamp)
128
+ end
129
+ if latest_packed_target
130
+ targets << latest_packed_target
131
+ each_schema_target(latest_packed_target.path, nil, nil) do |target|
132
+ latest_packed_target.targets << target
133
+ end
134
+ end
135
+ each_schema_target("#{dir}/schema",
136
+ latest_packed_target&.timestamp || min_timestamp,
137
+ max_timestamp) do |target|
138
+ targets << target
139
+ end
140
+ end
141
+
142
+ TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
143
+ def each_table_target(dir, name, min_timestamp, max_timestamp)
144
+ each_target_path(dir,
145
+ min_timestamp,
146
+ max_timestamp) do |path, timestamp, action, post_match|
147
+ next if action.nil?
148
+ next unless TABLE_TARGET_SUFFIXES.include?(post_match)
149
+ yield(TableTarget.new(path, timestamp, name, action))
150
+ end
151
+ end
152
+
153
+ def list_table_targets(dir, name, min_timestamp, max_timestamp, targets)
154
+ latest_packed_target = nil
155
+ each_packed_target_path(dir,
156
+ min_timestamp,
157
+ max_timestamp) do |path, timestamp|
158
+ if latest_packed_target and latest_packed_target.timestamp > timestamp
159
+ next
160
+ end
161
+ latest_packed_target = PackedTableTarget.new(path, timestamp, name)
162
+ end
163
+ if latest_packed_target
164
+ targets << latest_packed_target
165
+ each_table_target(latest_packed_target.path, name, nil, nil) do |target|
166
+ latest_packed_target.targets << target
167
+ end
168
+ end
169
+ each_table_target(dir,
170
+ name,
171
+ latest_packed_target&.timestamp || min_timestamp,
172
+ max_timestamp) do |target|
173
+ targets << target
174
+ end
175
+ end
176
+
177
+ def parse_directory_timestamp(base_name)
178
+ case base_name
179
+ when /\A(\d{4})-(\d{2})-(\d{2})\z/
180
+ match = Regexp.last_match
181
+ year = match[1].to_i
182
+ month = match[2].to_i
183
+ day = match[3].to_i
184
+ build_time(year, month, day)
185
+ else
186
+ nil
187
+ end
188
+ end
189
+
190
+ def parse_file_timestamp(base_name)
191
+ case base_name
192
+ when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
193
+ match = Regexp.last_match
194
+ year = match[1].to_i
195
+ month = match[2].to_i
196
+ day = match[3].to_i
197
+ hour = match[4].to_i
198
+ minute = match[5].to_i
199
+ second = match[6].to_i
200
+ nanosecond = match[7].to_i
201
+ action = match[8]
202
+ timestamp = build_time(year,
203
+ month,
204
+ day,
205
+ hour,
206
+ minute,
207
+ second,
208
+ nanosecond)
209
+ [timestamp, action, match.post_match]
210
+ else
211
+ nil
212
+ end
213
+ end
214
+
215
+ module Loggable
216
+ private
217
+ def log(logger, path, before_message, after_message)
218
+ logger.info("#{before_message}: #{path}")
219
+ result = yield
220
+ logger.info("#{after_message}: #{path}")
221
+ result
222
+ end
223
+
224
+ def apply_log(logger, path, &block)
225
+ log(logger, path, "Start applying", "Applied", &block)
226
+ end
227
+
228
+ def vacuum_log(logger, path, &block)
229
+ log(logger, path, "Start vacuuming", "Vacuumed", &block)
230
+ end
231
+ end
232
+
233
+ class SchemaTarget
234
+ include Loggable
235
+
236
+ attr_reader :path
237
+ attr_reader :timestamp
238
+ def initialize(path, timestamp)
239
+ @path = path
240
+ @timestamp = timestamp
241
+ end
242
+
243
+ def apply(logger, client, processor)
244
+ apply_log(logger, @path) do
245
+ processor.load(@path)
246
+ end
247
+ end
248
+
249
+ def vacuum(logger)
250
+ vacuum_log(logger, @path) do
251
+ FileUtils.rm(@path)
252
+ end
253
+ end
254
+ end
255
+
256
+ class PackedSchemaTarget
257
+ include Loggable
258
+
259
+ attr_reader :path
260
+ attr_reader :timestamp
261
+ attr_reader :targets
262
+ def initialize(path, timestamp)
263
+ @path = path
264
+ @timestamp = timestamp
265
+ @targets = []
266
+ end
267
+
268
+ def apply(logger, client, processor)
269
+ apply_log(logger, @path) do
270
+ @targets.sort_by(&:timestamp).each do |target|
271
+ target.apply(logger, client, processor)
272
+ end
273
+ end
274
+ end
275
+
276
+ def vacuum(logger)
277
+ vacuum_log(logger, @path) do
278
+ @targets.sort_by(&:timestamp).each do |target|
279
+ target.vacuum(logger)
280
+ end
281
+ end
282
+ end
283
+ end
284
+
285
+ class TableTarget
286
+ include Loggable
287
+
288
+ attr_reader :path
289
+ attr_reader :timestamp
290
+ attr_reader :name
291
+ attr_reader :action
292
+ def initialize(path, timestamp, name, action)
293
+ @path = path
294
+ @timestamp = timestamp
295
+ @name = name
296
+ @action = action
297
+ end
298
+
299
+ def apply(logger, client, processor)
300
+ apply_log(logger, @path) do
301
+ if @path.end_with?(".grn")
302
+ processor.load(@path)
303
+ else
304
+ # TODO: Add support for @action == "delete"
305
+ table = Arrow::Table.load(@path)
306
+ command = Groonga::Command::Load.new(table: @name,
307
+ values: table,
308
+ command_version: "3")
309
+ response = client.load(command.arguments)
310
+ processor.process_response(response, command)
311
+ end
312
+ end
313
+ end
314
+
315
+ def vacuum(logger)
316
+ vacuum_log(logger, @path) do
317
+ FileUtils.rm(@path)
318
+ end
319
+ end
320
+ end
321
+
322
+ class PackedTableTarget
323
+ include Loggable
324
+
325
+ attr_reader :path
326
+ attr_reader :timestamp
327
+ attr_reader :name
328
+ attr_reader :targets
329
+ def initialize(path, timestamp, name)
330
+ @path = path
331
+ @timestamp = timestamp
332
+ @name = name
333
+ @targets = []
334
+ end
335
+
336
+ def apply(logger, client, processor)
337
+ apply_log(logger, @path) do
338
+ @targets.sort_by(&:timestamp).each do |target|
339
+ target.apply(logger, client, processor)
340
+ end
341
+ end
342
+ end
343
+
344
+ def vacuum(logger)
345
+ vacuum_log(logger, @path) do
346
+ @targets.sort_by(&:timestamp).each do |target|
347
+ target.vacuum(logger)
348
+ end
349
+ end
350
+ end
351
+ end
352
+ end
353
+ end
@@ -15,15 +15,15 @@
15
15
 
16
16
  require "groonga/command/parser"
17
17
 
18
- require_relative "writer"
18
+ require_relative "local-writer"
19
19
 
20
20
  module GroongaDelta
21
21
  class LocalSource
22
- def initialize(config, status)
22
+ def initialize(config, status, writer)
23
23
  @logger = config.logger
24
- @writer = Writer.new(@logger, config.delta_dir)
25
24
  @config = config.local
26
25
  @status = status.local
26
+ @writer = writer
27
27
  end
28
28
 
29
29
  def import
@@ -0,0 +1,39 @@
1
+ # Copyright (C) 2022 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "fileutils"
17
+
18
+ require_relative "local-reader"
19
+
20
+ module GroongaDelta
21
+ class LocalVacuumer
22
+ def initialize(config)
23
+ @logger = config.logger
24
+ @delta_dir = config.delta_dir
25
+ @config = config.vacuum
26
+ end
27
+
28
+ def vacuum
29
+ keep_span = @config.keep_span
30
+ return if keep_span.nil?
31
+ return if keep_span < 0
32
+ reader = LocalReader.new(@logger, @delta_dir)
33
+ max_timestamp = Time.now.utc - keep_span
34
+ reader.each(nil, max_timestamp) do |target|
35
+ target.vacuum(@logger)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -20,10 +20,11 @@ require "groonga/command"
20
20
  require "parquet"
21
21
 
22
22
  module GroongaDelta
23
- class Writer
24
- def initialize(logger, dir)
25
- @logger = logger
26
- @dir = dir
23
+ class LocalWriter
24
+ def initialize(config)
25
+ @config = config
26
+ @logger = @config.logger
27
+ @dir = @config.delta_dir
27
28
  end
28
29
 
29
30
  def write_upserts(table, records, packed: false)
@@ -74,7 +75,7 @@ module GroongaDelta
74
75
  delete = Groonga::Command::Delete.new
75
76
  delete[:table] = table
76
77
  keys.each do |key|
77
- delete[:key] = key
78
+ delete[:key] = format_key(key)
78
79
  output.puts(delete.to_command_format)
79
80
  end
80
81
  end
@@ -131,5 +132,16 @@ module GroongaDelta
131
132
  open_output: open_output,
132
133
  &block)
133
134
  end
135
+
136
+ def format_key(key)
137
+ case key
138
+ when Integer, Float
139
+ key.to_s
140
+ when Time
141
+ key.strftime("%Y-%m-%d %H:%M:%S.%6N")
142
+ else
143
+ key
144
+ end
145
+ end
134
146
  end
135
147
  end
@@ -142,8 +142,12 @@ module GroongaDelta
142
142
  def generate_record(source_record)
143
143
  record = {}
144
144
  @groonga_columns.each do |groonga_column|
145
- value = groonga_column.generate_value(source_record)
146
- record[groonga_column.name.to_sym] = value
145
+ begin
146
+ value = groonga_column.generate_value(source_record)
147
+ record[groonga_column.name.to_sym] = value
148
+ rescue => error
149
+ raise GenerationError.new(source_record, groonga_column, error)
150
+ end
147
151
  end
148
152
  record
149
153
  end
@@ -255,7 +259,16 @@ module GroongaDelta
255
259
  end
256
260
 
257
261
  def normalize_value(value)
258
- case type
262
+ case @type
263
+ when nil, "ShortText", "Text", "LongText"
264
+ encoding = value.encoding
265
+ if encoding == Encoding::ASCII_8BIT
266
+ value.force_encoding(Encoding::UTF_8)
267
+ return value if value.valid_encoding?
268
+ value.encode(Encoding::UTF_8, encoding)
269
+ else
270
+ value.encode(Encoding::UTF_8)
271
+ end
259
272
  when "Time"
260
273
  time_max = @restriction.time_max
261
274
  time_min = @restriction.time_min
@@ -17,17 +17,17 @@ require "arrow"
17
17
  require "mysql2"
18
18
 
19
19
  require_relative "error"
20
- require_relative "writer"
20
+ require_relative "local-writer"
21
21
 
22
22
  module GroongaDelta
23
23
  class MySQLSource
24
- def initialize(config, status)
24
+ def initialize(config, status, writer)
25
25
  @logger = config.logger
26
- @writer = Writer.new(@logger, config.delta_dir)
27
26
  @config = config.mysql
28
27
  @binlog_dir = @config.binlog_dir
29
28
  @mapping = config.mapping
30
29
  @status = status.mysql
30
+ @writer = writer
31
31
  @tables = {}
32
32
  end
33
33
 
@@ -53,7 +53,7 @@ module GroongaDelta
53
53
 
54
54
  private
55
55
  def import_mysqlbinlog
56
- file, position = read_current_status
56
+ file, position, last_table_map_position = read_current_status
57
57
  FileUtils.mkdir_p(@binlog_dir)
58
58
  local_file = File.join(@binlog_dir, file)
59
59
  unless File.exist?(local_file.succ)
@@ -79,23 +79,26 @@ module GroongaDelta
79
79
  binlog.checksum = @config.checksum
80
80
  binlog.ignore_rotate = true
81
81
  binlog.each_event do |event|
82
- next if event[:position] < position
82
+ next if event[:position] < last_table_map_position
83
83
  case event[:type]
84
84
  when :rotate_event
85
- @status.update("file" => event[:event][:name],
86
- "position" => event[:event][:pos])
85
+ file = event[:event][:name]
86
+ when :table_map_event
87
+ last_table_map_position = event[:position]
87
88
  when :write_rows_event_v1,
88
89
  :write_rows_event_v2,
89
90
  :update_rows_event_v1,
90
91
  :update_rows_event_v2,
91
92
  :delete_rows_event_v1,
92
93
  :delete_rows_event_v2
94
+ next if event[:position] < position
93
95
  normalized_type = event[:type].to_s.gsub(/_v\d\z/, "").to_sym
94
96
  import_rows_event(normalized_type,
95
97
  event[:event][:table][:db],
96
98
  event[:event][:table][:table],
97
99
  file,
98
- event[:header][:next_position]) do
100
+ event[:header][:next_position],
101
+ last_table_map_position) do
99
102
  case normalized_type
100
103
  when :write_rows_event,
101
104
  :update_rows_event
@@ -114,7 +117,7 @@ module GroongaDelta
114
117
  end
115
118
 
116
119
  def import_mysql2_replication
117
- file, position = read_current_status
120
+ file, position, last_table_map_position = read_current_status
118
121
  is_mysql_56_or_later = mysql(@config.select_user,
119
122
  @config.select_password) do |select_client|
120
123
  mysql_version(select_client) >= Gem::Version.new("5.6")
@@ -128,31 +131,43 @@ module GroongaDelta
128
131
  checksum: "NONE")
129
132
  end
130
133
  replication_client.file_name = file
131
- replication_client.start_position = position
134
+ current_event_position = last_table_map_position
135
+ replication_client.start_position = current_event_position
132
136
  replication_client.open do
133
137
  replication_client.each do |event|
134
- case event
135
- when Mysql2Replication::RotateEvent
136
- file = event.file_name
137
- when Mysql2Replication::RowsEvent
138
- event_name = event.class.name.split("::").last
139
- normalized_type =
140
- event_name.scan(/[A-Z][a-z]+/).
141
- collect(&:downcase).
142
- join("_").
143
- to_sym
144
- import_rows_event(normalized_type,
145
- event.table_map.database,
146
- event.table_map.table,
147
- file,
148
- event.next_position) do
149
- case normalized_type
150
- when :update_rows_event
151
- event.updated_rows
152
- else
153
- event.rows
138
+ begin
139
+ @logger.debug do
140
+ event.inspect
141
+ end
142
+ next if current_event_position < position
143
+ case event
144
+ when Mysql2Replication::RotateEvent
145
+ file = event.file_name
146
+ when Mysql2Replication::TableMapEvent
147
+ last_table_map_event = current_event_position
148
+ when Mysql2Replication::RowsEvent
149
+ event_name = event.class.name.split("::").last
150
+ normalized_type =
151
+ event_name.scan(/[A-Z][a-z]+/).
152
+ collect(&:downcase).
153
+ join("_").
154
+ to_sym
155
+ import_rows_event(normalized_type,
156
+ event.table_map.database,
157
+ event.table_map.table,
158
+ file,
159
+ event.next_position,
160
+ last_table_map_position) do
161
+ case normalized_type
162
+ when :update_rows_event
163
+ event.updated_rows
164
+ else
165
+ event.rows
166
+ end
154
167
  end
155
168
  end
169
+ ensure
170
+ current_event_position = event.next_position
156
171
  end
157
172
  end
158
173
  end
@@ -164,6 +179,7 @@ module GroongaDelta
164
179
  table_name,
165
180
  file,
166
181
  next_position,
182
+ last_table_map_position,
167
183
  &block)
168
184
  source_table = @mapping[database_name, table_name]
169
185
  return if source_table.nil?
@@ -189,7 +205,8 @@ module GroongaDelta
189
205
  groonga_record_keys)
190
206
  end
191
207
  @status.update("file" => file,
192
- "position" => next_position)
208
+ "position" => next_position,
209
+ "last_table_map_position" => last_table_map_position)
193
210
  end
194
211
 
195
212
  def wait_process(command_line, pid, output_read, error_read)
@@ -264,7 +281,7 @@ module GroongaDelta
264
281
 
265
282
  def read_current_status
266
283
  if @status.file
267
- [@status.file, @status.position]
284
+ [@status.file, @status.position, @status.last_table_map_position]
268
285
  else
269
286
  file = nil
270
287
  position = 0
@@ -288,8 +305,9 @@ module GroongaDelta
288
305
  end
289
306
  end
290
307
  @status.update("file" => file,
291
- "position" => position)
292
- [file, position]
308
+ "position" => position,
309
+ "last_table_map_position" => position)
310
+ [file, position, position]
293
311
  end
294
312
  end
295
313
 
@@ -14,5 +14,5 @@
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
16
  module GroongaDelta
17
- VERSION = "1.0.1"
17
+ VERSION = "1.0.2"
18
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga-delta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sutou Kouhei
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-09 00:00:00.000000000 Z
11
+ date: 2022-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: groonga-client
@@ -80,13 +80,15 @@ files:
80
80
  - lib/groonga-delta/import-config.rb
81
81
  - lib/groonga-delta/import-status.rb
82
82
  - lib/groonga-delta/local-delta.rb
83
+ - lib/groonga-delta/local-reader.rb
83
84
  - lib/groonga-delta/local-source.rb
85
+ - lib/groonga-delta/local-vacuumer.rb
86
+ - lib/groonga-delta/local-writer.rb
84
87
  - lib/groonga-delta/ltsv-log-formatter.rb
85
88
  - lib/groonga-delta/mapping.rb
86
89
  - lib/groonga-delta/mysql-source.rb
87
90
  - lib/groonga-delta/status.rb
88
91
  - lib/groonga-delta/version.rb
89
- - lib/groonga-delta/writer.rb
90
92
  homepage: https://github.com/groonga/groonga-delta
91
93
  licenses:
92
94
  - GPL-3.0+