groonga-delta 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f518dc76e259b9697b95be025f4e55b1e7fad3daea738a405b3d6a564c65ed76
4
- data.tar.gz: ef1be1c1f200ddc94b9460c7216d9e7832993fdbe1ada926f31e87e01e5c5628
3
+ metadata.gz: ccfa632a6c046239e0163f15306f5479a64c220aa9e419bd415376bfeff5eae2
4
+ data.tar.gz: 3ab279d8f62f66bda8e83fc71c3cb27b5472339e5e373868ccbfde0376759c67
5
5
  SHA512:
6
- metadata.gz: 54cb45f40a7d6a676abee8f41c9eb66dd6992f7f0b07b030e9222a75da0435b02ed61361b7bff051e67acfde7cd7d1fc41ad1186c51606051ea921c5cebb674e
7
- data.tar.gz: b932e4c42a5f424fb2b6e4f423c9c248bc032acffa6884fa953aeee2f0863022125666d072b18bbe5bc01d4f4eae68fc7156cb8401d9280380e08b57a2e458e0
6
+ metadata.gz: eaa509b863868cd7fa1944d52603c312dc980707d975db9b52dd8b74131b2eac293cecc71dab48abb0895a22203dc2c59942750186e491209a3f395b481496b0
7
+ data.tar.gz: 377115552df709288fdff0f1eff36d76e1a99b14e83409f2ab29f5d263b15e94b756d387f357f50423f2015c67f66badb662100d8dac8fdacac67f53d45af0c4
data/doc/text/news.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # NEWS
2
2
 
3
+ ## 1.0.2 - 2022-06-21
4
+
5
+ ### Improvements
6
+
7
+ * `import`: Added support for logging all MySQL replication event
8
+ details by `debug` log level.
9
+
10
+ * `import`: Improved error handling on record generation.
11
+
12
+ * `import`: Added support for deleting a record by number/time key.
13
+
14
+ * `import`: Added support for vacuuming old delta files.
15
+
16
+ ### Fixes
17
+
18
+ * `import`: Fixed a bug that retrying from an error may cause "no
19
+ table map" error for row events. We need to retry from the last
20
+ table map event.
21
+
22
+ * `apply`: Fixed a bug that delta files not applied yet may not be
23
+ applied.
24
+
3
25
  ## 1.0.1 - 2022-06-09
4
26
 
5
27
  ### Improvements
@@ -13,6 +13,8 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
+ require "pp"
17
+
16
18
  module GroongaDelta
17
19
  class Error < StandardError
18
20
  end
@@ -25,4 +27,22 @@ module GroongaDelta
25
27
 
26
28
  class ProcessError < Error
27
29
  end
30
+
31
+ class GenerationError < Error
32
+ attr_reader :source_record
33
+ attr_reader :groonga_column
34
+ attr_reader :detail
35
+ def initialize(source_record, groonga_column, detail)
36
+ @source_record = source_record
37
+ @groonga_column = groonga_column
38
+ @detail = detail
39
+ message =
40
+ "failed to generate a Groonga record:\n" +
41
+ "source record: #{PP.pp(source_record, '')}" +
42
+ "Groonga column: #{PP.pp(groonga_column, '')}" +
43
+ "detail: #{@detail.message}(#{@detail.class})\n" +
44
+ @detail.backtrace.join("\n")
45
+ super(message)
46
+ end
47
+ end
28
48
  end
@@ -16,6 +16,8 @@
16
16
  require_relative "command"
17
17
  require_relative "import-config"
18
18
  require_relative "import-status"
19
+ require_relative "local-writer"
20
+ require_relative "local-vacuumer"
19
21
 
20
22
  module GroongaDelta
21
23
  class ImportCommand < Command
@@ -23,14 +25,16 @@ module GroongaDelta
23
25
  def prepare
24
26
  @config = ImportConfig.new(@dir)
25
27
  @status = ImportStatus.new(@dir)
28
+ @writer = LocalWriter.new(@config)
29
+ @vacuumer = LocalVacuumer.new(@config)
26
30
  @sources = []
27
31
  if @config.local
28
32
  require_relative "local-source"
29
- @sources << LocalSource.new(@config, @status)
33
+ @sources << LocalSource.new(@config, @status, @writer)
30
34
  end
31
35
  if @config.mysql
32
36
  require_relative "mysql-source"
33
- @sources << MySQLSource.new(@config, @status)
37
+ @sources << MySQLSource.new(@config, @status, @writer)
34
38
  end
35
39
  end
36
40
 
@@ -38,6 +42,7 @@ module GroongaDelta
38
42
  @sources.each do |source|
39
43
  source.import
40
44
  end
45
+ @vacuumer.vacuum
41
46
  end
42
47
  end
43
48
  end
@@ -38,6 +38,10 @@ module GroongaDelta
38
38
  Local.new(@dir, @data["local"])
39
39
  end
40
40
 
41
+ def vacuum
42
+ Vacuum.new(@data["vacuum"] || {})
43
+ end
44
+
41
45
  def mapping
42
46
  Mapping.new(@data["mapping"] || {})
43
47
  end
@@ -164,5 +168,44 @@ module GroongaDelta
164
168
  @data["initial_max_number"] || Float::INFINITY
165
169
  end
166
170
  end
171
+
172
+ class Vacuum
173
+ def initialize(data)
174
+ @data = data
175
+ end
176
+
177
+ def keep_span
178
+ resolve_span(@data["keep_span"])
179
+ end
180
+
181
+ private
182
+ def resolve_span(value)
183
+ case value
184
+ when String
185
+ case value
186
+ when /\A(\d+(?:\.\d+))(?:s|sec|second|seconds)?\z/
187
+ Float($1)
188
+ when /\A(\d+(?:\.\d+))(?:m|min|minute|minutes)\z/
189
+ Float($1) * 60
190
+ when /\A(\d+(?:\.\d+))(?:h|hr|hour|hours)\z/
191
+ Float($1) * 60 * 60
192
+ when /\A(\d+(?:\.\d+))(?:d|day|days)\z/
193
+ Float($1) * 60 * 60 * 24
194
+ when /\A(\d+(?:\.\d+))(?:w|week|weeks)\z/
195
+ Float($1) * 60 * 60 * 24 * 7
196
+ when /\A(\d+(?:\.\d+))(?:month|months)\z/
197
+ # Same as systemd. See systemd.time(7)
198
+ Float($1) * 60 * 60 * 24 * 30.44
199
+ when /\A(\d+(?:\.\d+))(?:y|year|years)\z/
200
+ # Same as systemd. See systemd.time(7)
201
+ Float($1) * 60 * 60 * 24 * 365.25
202
+ else
203
+ raise ConfigError, "invalid span value: #{value.inspect}"
204
+ end
205
+ else
206
+ value
207
+ end
208
+ end
209
+ end
167
210
  end
168
211
  end
@@ -45,6 +45,10 @@ module GroongaDelta
45
45
  def position
46
46
  self["position"]
47
47
  end
48
+
49
+ def last_table_map_position
50
+ self["last_table_map_position"]
51
+ end
48
52
  end
49
53
 
50
54
  class Local
@@ -13,12 +13,9 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
- require "fileutils"
17
-
18
16
  require "groonga/client"
19
- require "parquet"
20
17
 
21
- require_relative "error"
18
+ require_relative "local-reader"
22
19
 
23
20
  module GroongaDelta
24
21
  class LocalDelta
@@ -30,9 +27,9 @@ module GroongaDelta
30
27
  end
31
28
 
32
29
  def apply
33
- start_time = read_current_status
30
+ reader = LocalReader.new(@logger, @delta_dir)
31
+ start_time = read_current_status(reader)
34
32
  current_time = Time.now.utc
35
- targets = list_targets(@delta_dir, start_time, current_time)
36
33
  client_options = {
37
34
  url: @config.groonga.url,
38
35
  read_timeout: @config.groonga.read_timeout,
@@ -44,7 +41,7 @@ module GroongaDelta
44
41
  target_commands: [],
45
42
  target_tables: [],
46
43
  target_columns: [])
47
- targets.sort_by(&:timestamp).each do |target|
44
+ reader.each(start_time, current_time) do |target|
48
45
  target.apply(@logger, client, processor)
49
46
  @status.update("start_time" => [
50
47
  target.timestamp.to_i,
@@ -55,289 +52,22 @@ module GroongaDelta
55
52
  end
56
53
 
57
54
  private
58
- def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
59
- Time.utc(year,
60
- month,
61
- day,
62
- hour,
63
- minute,
64
- Rational(second * 1_000_000_000 + nanosecond,
65
- 1_000_000_000))
66
- end
67
-
68
- def read_current_status
55
+ def read_current_status(reader)
69
56
  start_time_unix_time, start_time_nanosecond = @status.start_time
70
57
  if start_time_unix_time
71
58
  start_time = Time.at(start_time_unix_time).utc
72
- build_time(start_time.year,
73
- start_time.month,
74
- start_time.day,
75
- start_time.hour,
76
- start_time.min,
77
- start_time.sec,
78
- start_time_nanosecond)
59
+ reader.build_time(start_time.year,
60
+ start_time.month,
61
+ start_time.day,
62
+ start_time.hour,
63
+ start_time.min,
64
+ start_time.sec,
65
+ start_time_nanosecond)
79
66
  else
80
67
  Time.at(0).utc
81
68
  end
82
69
  end
83
70
 
84
- def each_target_path(dir,
85
- min_timestamp,
86
- max_timestamp,
87
- accept_directory: true,
88
- &block)
89
- Dir.glob("#{dir}/*") do |path|
90
- base_name = File.basename(path)
91
- if accept_directory and File.directory?(path)
92
- timestamp = parse_directory_timestamp(base_name)
93
- next if timestamp.nil?
94
- next if min_timestamp and timestamp <= min_timestamp
95
- next if max_timestamp and timestamp > max_timestamp
96
- each_target_path(path,
97
- min_timestamp,
98
- max_timestamp,
99
- accept_directory: false,
100
- &block)
101
- elsif File.file?(path)
102
- timestamp, action, post_match = parse_file_timestamp(base_name)
103
- next if timestamp.nil?
104
- next if min_timestamp and timestamp <= min_timestamp
105
- next if max_timestamp and timestamp > max_timestamp
106
- yield(path, timestamp, action, post_match)
107
- end
108
- end
109
- end
110
-
111
- def each_packed_target_path(dir, min_timestamp, max_timestamp)
112
- return unless min_timestamp.to_i.zero?
113
- Dir.glob("#{dir}/packed/*") do |path|
114
- next unless File.directory?(path)
115
- timestamp, action, post_match = parse_file_timestamp(File.basename(path))
116
- next if action
117
- next unless post_match.empty?
118
- yield(path, timestamp)
119
- end
120
- end
121
-
122
- def list_targets(dir, start_time, current_timestamp)
123
- targets = []
124
- list_schema_targets(dir, start_time, current_timestamp, targets)
125
- Dir.glob("#{dir}/data/*") do |path|
126
- next unless File.directory?(path)
127
- name = File.basename(path)
128
- list_table_targets(path, name, start_time, current_timestamp, targets)
129
- end
130
- targets
131
- end
132
-
133
- def each_schema_target(dir, min_timestamp, max_timestamp)
134
- each_target_path(dir,
135
- min_timestamp,
136
- max_timestamp) do |path, timestamp, action, post_match|
137
- next if action
138
- next unless post_match == ".grn"
139
- yield(SchemaTarget.new(path, timestamp))
140
- end
141
- end
142
-
143
- def list_schema_targets(dir, start_time, current_timestamp, targets)
144
- latest_packed_target = nil
145
- each_packed_target_path("#{dir}/schema",
146
- start_time,
147
- current_timestamp) do |path, timestamp|
148
- if latest_packed_target and latest_packed_target.timestamp > timestamp
149
- next
150
- end
151
- latest_packed_target = PackedSchemaTarget.new(path, timestamp)
152
- end
153
- if latest_packed_target
154
- targets << latest_packed_target
155
- each_schema_target(latest_packed_target.path, nil, nil) do |target|
156
- latest_packed_target.targets << target
157
- end
158
- end
159
- each_schema_target("#{dir}/schema",
160
- latest_packed_target&.timestamp || start_time,
161
- current_timestamp) do |target|
162
- targets << target
163
- end
164
- end
165
-
166
- TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
167
- def each_table_target(dir, name, min_timestamp, max_timestamp)
168
- each_target_path(dir,
169
- min_timestamp,
170
- max_timestamp) do |path, timestamp, action, post_match|
171
- next if action.nil?
172
- next unless TABLE_TARGET_SUFFIXES.include?(post_match)
173
- yield(TableTarget.new(path, timestamp, name, action))
174
- end
175
- end
176
-
177
- def list_table_targets(dir, name, start_time, current_timestamp, targets)
178
- latest_packed_target = nil
179
- each_packed_target_path(dir,
180
- start_time,
181
- current_timestamp) do |path, timestamp|
182
- if latest_packed_target and latest_packed_target.timestamp > timestamp
183
- next
184
- end
185
- latest_packed_target = PackedTableTarget.new(path, timestamp, name)
186
- end
187
- if latest_packed_target
188
- targets << latest_packed_target
189
- each_table_target(latest_packed_target.path, name, nil, nil) do |target|
190
- latest_packed_target.targets << target
191
- end
192
- end
193
- each_table_target(dir,
194
- name,
195
- latest_packed_target&.timestamp || start_time,
196
- current_timestamp) do |target|
197
- targets << target
198
- end
199
- end
200
-
201
- def parse_directory_timestamp(base_name)
202
- case base_name
203
- when /\A(\d{4})-(\d{2})-(\d{2})\z/
204
- match = Regexp.last_match
205
- year = match[1].to_i
206
- month = match[2].to_i
207
- day = match[3].to_i
208
- build_time(year, month, day)
209
- else
210
- nil
211
- end
212
- end
213
-
214
- def parse_file_timestamp(base_name)
215
- case base_name
216
- when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
217
- match = Regexp.last_match
218
- year = match[1].to_i
219
- month = match[2].to_i
220
- day = match[3].to_i
221
- hour = match[4].to_i
222
- minute = match[5].to_i
223
- second = match[6].to_i
224
- nanosecond = match[7].to_i
225
- action = match[8]
226
- timestamp = build_time(year,
227
- month,
228
- day,
229
- hour,
230
- minute,
231
- second,
232
- nanosecond)
233
- [timestamp, action, match.post_match]
234
- else
235
- nil
236
- end
237
- end
238
-
239
- module ApplyLoggable
240
- private
241
- def apply_log(logger, path)
242
- logger.info("Start applying: #{path}")
243
- result = yield
244
- logger.info("Applied: #{path}")
245
- result
246
- end
247
- end
248
-
249
- class SchemaTarget
250
- include ApplyLoggable
251
-
252
- attr_reader :path
253
- attr_reader :timestamp
254
- def initialize(path, timestamp)
255
- @path = path
256
- @timestamp = timestamp
257
- end
258
-
259
- def apply(logger, client, processor)
260
- apply_log(logger, @path) do
261
- processor.load(@path)
262
- end
263
- end
264
- end
265
-
266
- class PackedSchemaTarget
267
- include ApplyLoggable
268
-
269
- attr_reader :path
270
- attr_reader :timestamp
271
- attr_reader :targets
272
- def initialize(path, timestamp)
273
- @path = path
274
- @timestamp = timestamp
275
- @targets = []
276
- end
277
-
278
- def apply(logger, client, processor)
279
- apply_log(logger, @path) do
280
- @targets.sort_by(&:timestamp).each do |target|
281
- target.apply(logger, client, processor)
282
- end
283
- end
284
- end
285
- end
286
-
287
- class TableTarget
288
- include ApplyLoggable
289
-
290
- attr_reader :path
291
- attr_reader :timestamp
292
- attr_reader :name
293
- attr_reader :action
294
- def initialize(path, timestamp, name, action)
295
- @path = path
296
- @timestamp = timestamp
297
- @name = name
298
- @action = action
299
- end
300
-
301
- def apply(logger, client, processor)
302
- apply_log(logger, @path) do
303
- if @path.end_with?(".grn")
304
- processor.load(@path)
305
- else
306
- # TODO: Add support for @action == "delete"
307
- table = Arrow::Table.load(@path)
308
- command = Groonga::Command::Load.new(table: @name,
309
- values: table,
310
- command_version: "3")
311
- response = client.load(command.arguments)
312
- processor.process_response(response, command)
313
- end
314
- end
315
- end
316
- end
317
-
318
- class PackedTableTarget
319
- include ApplyLoggable
320
-
321
- attr_reader :path
322
- attr_reader :timestamp
323
- attr_reader :name
324
- attr_reader :targets
325
- def initialize(path, timestamp, name)
326
- @path = path
327
- @timestamp = timestamp
328
- @name = name
329
- @targets = []
330
- end
331
-
332
- def apply(logger, client, processor)
333
- apply_log(logger, @path) do
334
- @targets.sort_by(&:timestamp).each do |target|
335
- target.apply(logger, client, processor)
336
- end
337
- end
338
- end
339
- end
340
-
341
71
  class CommandProcessor < Groonga::Client::CommandProcessor
342
72
  def initialize(config, *args)
343
73
  @config = config
@@ -0,0 +1,353 @@
1
+ # Copyright (C) 2021-2022 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "fileutils"
17
+
18
+ require "groonga/command"
19
+ require "parquet"
20
+
21
+ require_relative "error"
22
+
23
+ module GroongaDelta
24
+ class LocalReader
25
+ def initialize(logger, dir)
26
+ @logger = logger
27
+ @dir = dir
28
+ end
29
+
30
+ def each(min_timestamp=nil, max_timestamp=nil, &block)
31
+ unless block_given?
32
+ return to_enum(__method__, min_timestamp, max_timestamp)
33
+ end
34
+
35
+ targets = list_targets(@dir, min_timestamp, max_timestamp)
36
+ targets.sort_by(&:timestamp).each(&block)
37
+ end
38
+
39
+ def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
40
+ Time.utc(year,
41
+ month,
42
+ day,
43
+ hour,
44
+ minute,
45
+ Rational(second * 1_000_000_000 + nanosecond,
46
+ 1_000_000_000))
47
+ end
48
+
49
+ private
50
+ def each_target_path(dir,
51
+ min_timestamp,
52
+ max_timestamp,
53
+ accept_directory: true,
54
+ &block)
55
+ if min_timestamp
56
+ min_timestamp_day = Time.utc(min_timestamp.year,
57
+ min_timestamp.month,
58
+ min_timestamp.day)
59
+ end
60
+ if max_timestamp
61
+ max_timestamp_day = Time.utc(max_timestamp.year,
62
+ max_timestamp.month,
63
+ max_timestamp.day)
64
+ end
65
+ Dir.glob("#{dir}/*") do |path|
66
+ base_name = File.basename(path)
67
+ if accept_directory and File.directory?(path)
68
+ timestamp = parse_directory_timestamp(base_name)
69
+ next if timestamp.nil?
70
+ next if min_timestamp_day and timestamp < min_timestamp_day
71
+ next if max_timestamp_day and timestamp > max_timestamp_day
72
+ each_target_path(path,
73
+ min_timestamp,
74
+ max_timestamp,
75
+ accept_directory: false,
76
+ &block)
77
+ elsif File.file?(path)
78
+ timestamp, action, post_match = parse_file_timestamp(base_name)
79
+ next if timestamp.nil?
80
+ next if min_timestamp and timestamp <= min_timestamp
81
+ next if max_timestamp and timestamp > max_timestamp
82
+ yield(path, timestamp, action, post_match)
83
+ end
84
+ end
85
+ end
86
+
87
+ def each_packed_target_path(dir, min_timestamp, max_timestamp)
88
+ return unless min_timestamp.to_i.zero?
89
+ Dir.glob("#{dir}/packed/*") do |path|
90
+ next unless File.directory?(path)
91
+ timestamp, action, post_match = parse_file_timestamp(File.basename(path))
92
+ next if action
93
+ next unless post_match.empty?
94
+ yield(path, timestamp)
95
+ end
96
+ end
97
+
98
+ def list_targets(dir, min_timestamp, max_timestamp)
99
+ targets = []
100
+ list_schema_targets(dir, min_timestamp, max_timestamp, targets)
101
+ Dir.glob("#{dir}/data/*") do |path|
102
+ next unless File.directory?(path)
103
+ name = File.basename(path)
104
+ list_table_targets(path, name, min_timestamp, max_timestamp, targets)
105
+ end
106
+ targets
107
+ end
108
+
109
+ def each_schema_target(dir, min_timestamp, max_timestamp)
110
+ each_target_path(dir,
111
+ min_timestamp,
112
+ max_timestamp) do |path, timestamp, action, post_match|
113
+ next if action
114
+ next unless post_match == ".grn"
115
+ yield(SchemaTarget.new(path, timestamp))
116
+ end
117
+ end
118
+
119
+ def list_schema_targets(dir, min_timestamp, max_timestamp, targets)
120
+ latest_packed_target = nil
121
+ each_packed_target_path("#{dir}/schema",
122
+ min_timestamp,
123
+ max_timestamp) do |path, timestamp|
124
+ if latest_packed_target and latest_packed_target.timestamp > timestamp
125
+ next
126
+ end
127
+ latest_packed_target = PackedSchemaTarget.new(path, timestamp)
128
+ end
129
+ if latest_packed_target
130
+ targets << latest_packed_target
131
+ each_schema_target(latest_packed_target.path, nil, nil) do |target|
132
+ latest_packed_target.targets << target
133
+ end
134
+ end
135
+ each_schema_target("#{dir}/schema",
136
+ latest_packed_target&.timestamp || min_timestamp,
137
+ max_timestamp) do |target|
138
+ targets << target
139
+ end
140
+ end
141
+
142
+ TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
143
+ def each_table_target(dir, name, min_timestamp, max_timestamp)
144
+ each_target_path(dir,
145
+ min_timestamp,
146
+ max_timestamp) do |path, timestamp, action, post_match|
147
+ next if action.nil?
148
+ next unless TABLE_TARGET_SUFFIXES.include?(post_match)
149
+ yield(TableTarget.new(path, timestamp, name, action))
150
+ end
151
+ end
152
+
153
+ def list_table_targets(dir, name, min_timestamp, max_timestamp, targets)
154
+ latest_packed_target = nil
155
+ each_packed_target_path(dir,
156
+ min_timestamp,
157
+ max_timestamp) do |path, timestamp|
158
+ if latest_packed_target and latest_packed_target.timestamp > timestamp
159
+ next
160
+ end
161
+ latest_packed_target = PackedTableTarget.new(path, timestamp, name)
162
+ end
163
+ if latest_packed_target
164
+ targets << latest_packed_target
165
+ each_table_target(latest_packed_target.path, name, nil, nil) do |target|
166
+ latest_packed_target.targets << target
167
+ end
168
+ end
169
+ each_table_target(dir,
170
+ name,
171
+ latest_packed_target&.timestamp || min_timestamp,
172
+ max_timestamp) do |target|
173
+ targets << target
174
+ end
175
+ end
176
+
177
+ def parse_directory_timestamp(base_name)
178
+ case base_name
179
+ when /\A(\d{4})-(\d{2})-(\d{2})\z/
180
+ match = Regexp.last_match
181
+ year = match[1].to_i
182
+ month = match[2].to_i
183
+ day = match[3].to_i
184
+ build_time(year, month, day)
185
+ else
186
+ nil
187
+ end
188
+ end
189
+
190
+ def parse_file_timestamp(base_name)
191
+ case base_name
192
+ when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
193
+ match = Regexp.last_match
194
+ year = match[1].to_i
195
+ month = match[2].to_i
196
+ day = match[3].to_i
197
+ hour = match[4].to_i
198
+ minute = match[5].to_i
199
+ second = match[6].to_i
200
+ nanosecond = match[7].to_i
201
+ action = match[8]
202
+ timestamp = build_time(year,
203
+ month,
204
+ day,
205
+ hour,
206
+ minute,
207
+ second,
208
+ nanosecond)
209
+ [timestamp, action, match.post_match]
210
+ else
211
+ nil
212
+ end
213
+ end
214
+
215
+ module Loggable
216
+ private
217
+ def log(logger, path, before_message, after_message)
218
+ logger.info("#{before_message}: #{path}")
219
+ result = yield
220
+ logger.info("#{after_message}: #{path}")
221
+ result
222
+ end
223
+
224
+ def apply_log(logger, path, &block)
225
+ log(logger, path, "Start applying", "Applied", &block)
226
+ end
227
+
228
+ def vacuum_log(logger, path, &block)
229
+ log(logger, path, "Start vacuuming", "Vacuumed", &block)
230
+ end
231
+ end
232
+
233
+ class SchemaTarget
234
+ include Loggable
235
+
236
+ attr_reader :path
237
+ attr_reader :timestamp
238
+ def initialize(path, timestamp)
239
+ @path = path
240
+ @timestamp = timestamp
241
+ end
242
+
243
+ def apply(logger, client, processor)
244
+ apply_log(logger, @path) do
245
+ processor.load(@path)
246
+ end
247
+ end
248
+
249
+ def vacuum(logger)
250
+ vacuum_log(logger, @path) do
251
+ FileUtils.rm(@path)
252
+ end
253
+ end
254
+ end
255
+
256
+ class PackedSchemaTarget
257
+ include Loggable
258
+
259
+ attr_reader :path
260
+ attr_reader :timestamp
261
+ attr_reader :targets
262
+ def initialize(path, timestamp)
263
+ @path = path
264
+ @timestamp = timestamp
265
+ @targets = []
266
+ end
267
+
268
+ def apply(logger, client, processor)
269
+ apply_log(logger, @path) do
270
+ @targets.sort_by(&:timestamp).each do |target|
271
+ target.apply(logger, client, processor)
272
+ end
273
+ end
274
+ end
275
+
276
+ def vacuum(logger)
277
+ vacuum_log(logger, @path) do
278
+ @targets.sort_by(&:timestamp).each do |target|
279
+ target.vacuum(logger)
280
+ end
281
+ end
282
+ end
283
+ end
284
+
285
+ class TableTarget
286
+ include Loggable
287
+
288
+ attr_reader :path
289
+ attr_reader :timestamp
290
+ attr_reader :name
291
+ attr_reader :action
292
+ def initialize(path, timestamp, name, action)
293
+ @path = path
294
+ @timestamp = timestamp
295
+ @name = name
296
+ @action = action
297
+ end
298
+
299
+ def apply(logger, client, processor)
300
+ apply_log(logger, @path) do
301
+ if @path.end_with?(".grn")
302
+ processor.load(@path)
303
+ else
304
+ # TODO: Add support for @action == "delete"
305
+ table = Arrow::Table.load(@path)
306
+ command = Groonga::Command::Load.new(table: @name,
307
+ values: table,
308
+ command_version: "3")
309
+ response = client.load(command.arguments)
310
+ processor.process_response(response, command)
311
+ end
312
+ end
313
+ end
314
+
315
+ def vacuum(logger)
316
+ vacuum_log(logger, @path) do
317
+ FileUtils.rm(@path)
318
+ end
319
+ end
320
+ end
321
+
322
+ class PackedTableTarget
323
+ include Loggable
324
+
325
+ attr_reader :path
326
+ attr_reader :timestamp
327
+ attr_reader :name
328
+ attr_reader :targets
329
+ def initialize(path, timestamp, name)
330
+ @path = path
331
+ @timestamp = timestamp
332
+ @name = name
333
+ @targets = []
334
+ end
335
+
336
+ def apply(logger, client, processor)
337
+ apply_log(logger, @path) do
338
+ @targets.sort_by(&:timestamp).each do |target|
339
+ target.apply(logger, client, processor)
340
+ end
341
+ end
342
+ end
343
+
344
+ def vacuum(logger)
345
+ vacuum_log(logger, @path) do
346
+ @targets.sort_by(&:timestamp).each do |target|
347
+ target.vacuum(logger)
348
+ end
349
+ end
350
+ end
351
+ end
352
+ end
353
+ end
@@ -15,15 +15,15 @@
15
15
 
16
16
  require "groonga/command/parser"
17
17
 
18
- require_relative "writer"
18
+ require_relative "local-writer"
19
19
 
20
20
  module GroongaDelta
21
21
  class LocalSource
22
- def initialize(config, status)
22
+ def initialize(config, status, writer)
23
23
  @logger = config.logger
24
- @writer = Writer.new(@logger, config.delta_dir)
25
24
  @config = config.local
26
25
  @status = status.local
26
+ @writer = writer
27
27
  end
28
28
 
29
29
  def import
@@ -0,0 +1,39 @@
1
+ # Copyright (C) 2022 Sutou Kouhei <kou@clear-code.com>
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require "fileutils"
17
+
18
+ require_relative "local-reader"
19
+
20
+ module GroongaDelta
21
+ class LocalVacuumer
22
+ def initialize(config)
23
+ @logger = config.logger
24
+ @delta_dir = config.delta_dir
25
+ @config = config.vacuum
26
+ end
27
+
28
+ def vacuum
29
+ keep_span = @config.keep_span
30
+ return if keep_span.nil?
31
+ return if keep_span < 0
32
+ reader = LocalReader.new(@logger, @delta_dir)
33
+ max_timestamp = Time.now.utc - keep_span
34
+ reader.each(nil, max_timestamp) do |target|
35
+ target.vacuum(@logger)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -20,10 +20,11 @@ require "groonga/command"
20
20
  require "parquet"
21
21
 
22
22
  module GroongaDelta
23
- class Writer
24
- def initialize(logger, dir)
25
- @logger = logger
26
- @dir = dir
23
+ class LocalWriter
24
+ def initialize(config)
25
+ @config = config
26
+ @logger = @config.logger
27
+ @dir = @config.delta_dir
27
28
  end
28
29
 
29
30
  def write_upserts(table, records, packed: false)
@@ -74,7 +75,7 @@ module GroongaDelta
74
75
  delete = Groonga::Command::Delete.new
75
76
  delete[:table] = table
76
77
  keys.each do |key|
77
- delete[:key] = key
78
+ delete[:key] = format_key(key)
78
79
  output.puts(delete.to_command_format)
79
80
  end
80
81
  end
@@ -131,5 +132,16 @@ module GroongaDelta
131
132
  open_output: open_output,
132
133
  &block)
133
134
  end
135
+
136
+ def format_key(key)
137
+ case key
138
+ when Integer, Float
139
+ key.to_s
140
+ when Time
141
+ key.strftime("%Y-%m-%d %H:%M:%S.%6N")
142
+ else
143
+ key
144
+ end
145
+ end
134
146
  end
135
147
  end
@@ -142,8 +142,12 @@ module GroongaDelta
142
142
  def generate_record(source_record)
143
143
  record = {}
144
144
  @groonga_columns.each do |groonga_column|
145
- value = groonga_column.generate_value(source_record)
146
- record[groonga_column.name.to_sym] = value
145
+ begin
146
+ value = groonga_column.generate_value(source_record)
147
+ record[groonga_column.name.to_sym] = value
148
+ rescue => error
149
+ raise GenerationError.new(source_record, groonga_column, error)
150
+ end
147
151
  end
148
152
  record
149
153
  end
@@ -255,7 +259,16 @@ module GroongaDelta
255
259
  end
256
260
 
257
261
  def normalize_value(value)
258
- case type
262
+ case @type
263
+ when nil, "ShortText", "Text", "LongText"
264
+ encoding = value.encoding
265
+ if encoding == Encoding::ASCII_8BIT
266
+ value.force_encoding(Encoding::UTF_8)
267
+ return value if value.valid_encoding?
268
+ value.encode(Encoding::UTF_8, encoding)
269
+ else
270
+ value.encode(Encoding::UTF_8)
271
+ end
259
272
  when "Time"
260
273
  time_max = @restriction.time_max
261
274
  time_min = @restriction.time_min
@@ -17,17 +17,17 @@ require "arrow"
17
17
  require "mysql2"
18
18
 
19
19
  require_relative "error"
20
- require_relative "writer"
20
+ require_relative "local-writer"
21
21
 
22
22
  module GroongaDelta
23
23
  class MySQLSource
24
- def initialize(config, status)
24
+ def initialize(config, status, writer)
25
25
  @logger = config.logger
26
- @writer = Writer.new(@logger, config.delta_dir)
27
26
  @config = config.mysql
28
27
  @binlog_dir = @config.binlog_dir
29
28
  @mapping = config.mapping
30
29
  @status = status.mysql
30
+ @writer = writer
31
31
  @tables = {}
32
32
  end
33
33
 
@@ -53,7 +53,7 @@ module GroongaDelta
53
53
 
54
54
  private
55
55
  def import_mysqlbinlog
56
- file, position = read_current_status
56
+ file, position, last_table_map_position = read_current_status
57
57
  FileUtils.mkdir_p(@binlog_dir)
58
58
  local_file = File.join(@binlog_dir, file)
59
59
  unless File.exist?(local_file.succ)
@@ -79,23 +79,26 @@ module GroongaDelta
79
79
  binlog.checksum = @config.checksum
80
80
  binlog.ignore_rotate = true
81
81
  binlog.each_event do |event|
82
- next if event[:position] < position
82
+ next if event[:position] < last_table_map_position
83
83
  case event[:type]
84
84
  when :rotate_event
85
- @status.update("file" => event[:event][:name],
86
- "position" => event[:event][:pos])
85
+ file = event[:event][:name]
86
+ when :table_map_event
87
+ last_table_map_position = event[:position]
87
88
  when :write_rows_event_v1,
88
89
  :write_rows_event_v2,
89
90
  :update_rows_event_v1,
90
91
  :update_rows_event_v2,
91
92
  :delete_rows_event_v1,
92
93
  :delete_rows_event_v2
94
+ next if event[:position] < position
93
95
  normalized_type = event[:type].to_s.gsub(/_v\d\z/, "").to_sym
94
96
  import_rows_event(normalized_type,
95
97
  event[:event][:table][:db],
96
98
  event[:event][:table][:table],
97
99
  file,
98
- event[:header][:next_position]) do
100
+ event[:header][:next_position],
101
+ last_table_map_position) do
99
102
  case normalized_type
100
103
  when :write_rows_event,
101
104
  :update_rows_event
@@ -114,7 +117,7 @@ module GroongaDelta
114
117
  end
115
118
 
116
119
  def import_mysql2_replication
117
- file, position = read_current_status
120
+ file, position, last_table_map_position = read_current_status
118
121
  is_mysql_56_or_later = mysql(@config.select_user,
119
122
  @config.select_password) do |select_client|
120
123
  mysql_version(select_client) >= Gem::Version.new("5.6")
@@ -128,31 +131,43 @@ module GroongaDelta
128
131
  checksum: "NONE")
129
132
  end
130
133
  replication_client.file_name = file
131
- replication_client.start_position = position
134
+ current_event_position = last_table_map_position
135
+ replication_client.start_position = current_event_position
132
136
  replication_client.open do
133
137
  replication_client.each do |event|
134
- case event
135
- when Mysql2Replication::RotateEvent
136
- file = event.file_name
137
- when Mysql2Replication::RowsEvent
138
- event_name = event.class.name.split("::").last
139
- normalized_type =
140
- event_name.scan(/[A-Z][a-z]+/).
141
- collect(&:downcase).
142
- join("_").
143
- to_sym
144
- import_rows_event(normalized_type,
145
- event.table_map.database,
146
- event.table_map.table,
147
- file,
148
- event.next_position) do
149
- case normalized_type
150
- when :update_rows_event
151
- event.updated_rows
152
- else
153
- event.rows
138
+ begin
139
+ @logger.debug do
140
+ event.inspect
141
+ end
142
+ next if current_event_position < position
143
+ case event
144
+ when Mysql2Replication::RotateEvent
145
+ file = event.file_name
146
+ when Mysql2Replication::TableMapEvent
147
+ last_table_map_event = current_event_position
148
+ when Mysql2Replication::RowsEvent
149
+ event_name = event.class.name.split("::").last
150
+ normalized_type =
151
+ event_name.scan(/[A-Z][a-z]+/).
152
+ collect(&:downcase).
153
+ join("_").
154
+ to_sym
155
+ import_rows_event(normalized_type,
156
+ event.table_map.database,
157
+ event.table_map.table,
158
+ file,
159
+ event.next_position,
160
+ last_table_map_position) do
161
+ case normalized_type
162
+ when :update_rows_event
163
+ event.updated_rows
164
+ else
165
+ event.rows
166
+ end
154
167
  end
155
168
  end
169
+ ensure
170
+ current_event_position = event.next_position
156
171
  end
157
172
  end
158
173
  end
@@ -164,6 +179,7 @@ module GroongaDelta
164
179
  table_name,
165
180
  file,
166
181
  next_position,
182
+ last_table_map_position,
167
183
  &block)
168
184
  source_table = @mapping[database_name, table_name]
169
185
  return if source_table.nil?
@@ -189,7 +205,8 @@ module GroongaDelta
189
205
  groonga_record_keys)
190
206
  end
191
207
  @status.update("file" => file,
192
- "position" => next_position)
208
+ "position" => next_position,
209
+ "last_table_map_position" => last_table_map_position)
193
210
  end
194
211
 
195
212
  def wait_process(command_line, pid, output_read, error_read)
@@ -264,7 +281,7 @@ module GroongaDelta
264
281
 
265
282
  def read_current_status
266
283
  if @status.file
267
- [@status.file, @status.position]
284
+ [@status.file, @status.position, @status.last_table_map_position]
268
285
  else
269
286
  file = nil
270
287
  position = 0
@@ -288,8 +305,9 @@ module GroongaDelta
288
305
  end
289
306
  end
290
307
  @status.update("file" => file,
291
- "position" => position)
292
- [file, position]
308
+ "position" => position,
309
+ "last_table_map_position" => position)
310
+ [file, position, position]
293
311
  end
294
312
  end
295
313
 
@@ -14,5 +14,5 @@
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
16
  module GroongaDelta
17
- VERSION = "1.0.1"
17
+ VERSION = "1.0.2"
18
18
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga-delta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sutou Kouhei
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-09 00:00:00.000000000 Z
11
+ date: 2022-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: groonga-client
@@ -80,13 +80,15 @@ files:
80
80
  - lib/groonga-delta/import-config.rb
81
81
  - lib/groonga-delta/import-status.rb
82
82
  - lib/groonga-delta/local-delta.rb
83
+ - lib/groonga-delta/local-reader.rb
83
84
  - lib/groonga-delta/local-source.rb
85
+ - lib/groonga-delta/local-vacuumer.rb
86
+ - lib/groonga-delta/local-writer.rb
84
87
  - lib/groonga-delta/ltsv-log-formatter.rb
85
88
  - lib/groonga-delta/mapping.rb
86
89
  - lib/groonga-delta/mysql-source.rb
87
90
  - lib/groonga-delta/status.rb
88
91
  - lib/groonga-delta/version.rb
89
- - lib/groonga-delta/writer.rb
90
92
  homepage: https://github.com/groonga/groonga-delta
91
93
  licenses:
92
94
  - GPL-3.0+