groonga-delta 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ddcaa199b89bab185924a694bf7e64e1b6a8c2cd63d1ea855f59a5ac456c317e
4
- data.tar.gz: 06a1a8e01e343b4fca3faf2cc8139ed8adaadc6aeb55c7c6315e1c1bc7f40962
3
+ metadata.gz: 5837cbbd7824ebf557d24be10007166c476dadf2157a96071522307d0ffbd0d7
4
+ data.tar.gz: '086fa03eb3a6c9d1a055ec8ffecf0a8b7e9102dcee0fd1d3326ece684f4e5560'
5
5
  SHA512:
6
- metadata.gz: 39252c1602f63ff3eeddafdb676bdeef1334bed4df5a0e29fb8d14f0ae1467b98b925829333be865badf7bc3ec9fae332477e4dd6dc78c81cfddfa12e4bd8734
7
- data.tar.gz: cc50bce6f1da66d697bf0ce97a191e1d67fa474af9a795c0f709e15d0fbf60f94a551af902b58a2f6eac6fbf2c637524335728269e1e211f3c1f35e84eb232d5
6
+ metadata.gz: a3db1aedcdbf3bb39312bfb091a778091111eec880488917a4d21598834e08a703181f383f47d06e2646e873d1ea1ac5d8e42ed46d71328ee8ecb8c837aa080d
7
+ data.tar.gz: 584ecff5ca3bf44fc9f4baea4ea8bd5f6c60227ac0f05367dd0860c74a563b26f4bcac747c74a7ce1c98a77f77100c2c736817ca6daa4af8138aec1372996f1c
data/doc/text/news.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # NEWS
2
2
 
3
+ ## 1.0.3 - 2022-07-21
4
+
5
+ ### Improvements
6
+
7
+ * `import`: Dropped support for `mysqlbinlog`.
8
+
9
+ * Changed to use atomic write for `status.yaml`.
10
+
11
+ * Changed to keep 100 logs from 7 logs by default.
12
+
13
+ ### Fixes
14
+
15
+ * `import`: Fixed a bug that events to be processed can't be detected.
16
+
17
+ ## 1.0.2 - 2022-06-21
18
+
19
+ ### Improvements
20
+
21
+ * `import`: Added support for logging all MySQL replication event
22
+ details by `debug` log level.
23
+
24
+ * `import`: Improved error handling on record generation.
25
+
26
+ * `import`: Added support for deleting a record by number/time key.
27
+
28
+ * `import`: Added support for vacuuming old delta files.
29
+
30
+ ### Fixes
31
+
32
+ * `import`: Fixed a bug that retrying from an error may cause "no
33
+ table map" error for row events. We need to retry from the last
34
+ table map event.
35
+
36
+ * `apply`: Fixed a bug that delta files not applied yet may not be
37
+ applied.
38
+
39
+ ## 1.0.1 - 2022-06-09
40
+
41
+ ### Improvements
42
+
43
+ * Added support for `SIGINT` by `SIGTERM`.
44
+
3
45
  ## 1.0.0 - 2022-03-07
4
46
 
5
47
  Initial release.
@@ -34,6 +34,16 @@ module GroongaDelta
34
34
  sleep(@config.polling_interval)
35
35
  end
36
36
  true
37
+ rescue Interrupt
38
+ true
39
+ rescue SignalException => error
40
+ case error.signm
41
+ when "SIGTERM"
42
+ true
43
+ else
44
+ @config.logger.error(error) if @config
45
+ raise
46
+ end
37
47
  rescue => error
38
48
  @config.logger.error(error) if @config
39
49
  raise
@@ -64,7 +64,7 @@ module GroongaDelta
64
64
  end
65
65
 
66
66
  def log_age
67
- @data["log_age"] || 7
67
+ @data["log_age"] || 100
68
68
  end
69
69
 
70
70
  def log_max_size
@@ -13,6 +13,8 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
+ require "pp"
17
+
16
18
  module GroongaDelta
17
19
  class Error < StandardError
18
20
  end
@@ -25,4 +27,22 @@ module GroongaDelta
25
27
 
26
28
  class ProcessError < Error
27
29
  end
30
+
31
+ class GenerationError < Error
32
+ attr_reader :source_record
33
+ attr_reader :groonga_column
34
+ attr_reader :detail
35
+ def initialize(source_record, groonga_column, detail)
36
+ @source_record = source_record
37
+ @groonga_column = groonga_column
38
+ @detail = detail
39
+ message =
40
+ "failed to generate a Groonga record:\n" +
41
+ "source record: #{PP.pp(source_record, '')}" +
42
+ "Groonga column: #{PP.pp(groonga_column, '')}" +
43
+ "detail: #{@detail.message}(#{@detail.class})\n" +
44
+ @detail.backtrace.join("\n")
45
+ super(message)
46
+ end
47
+ end
28
48
  end
@@ -16,6 +16,8 @@
16
16
  require_relative "command"
17
17
  require_relative "import-config"
18
18
  require_relative "import-status"
19
+ require_relative "local-writer"
20
+ require_relative "local-vacuumer"
19
21
 
20
22
  module GroongaDelta
21
23
  class ImportCommand < Command
@@ -23,14 +25,16 @@ module GroongaDelta
23
25
  def prepare
24
26
  @config = ImportConfig.new(@dir)
25
27
  @status = ImportStatus.new(@dir)
28
+ @writer = LocalWriter.new(@config)
29
+ @vacuumer = LocalVacuumer.new(@config)
26
30
  @sources = []
27
31
  if @config.local
28
32
  require_relative "local-source"
29
- @sources << LocalSource.new(@config, @status)
33
+ @sources << LocalSource.new(@config, @status, @writer)
30
34
  end
31
35
  if @config.mysql
32
36
  require_relative "mysql-source"
33
- @sources << MySQLSource.new(@config, @status)
37
+ @sources << MySQLSource.new(@config, @status, @writer)
34
38
  end
35
39
  end
36
40
 
@@ -38,6 +42,7 @@ module GroongaDelta
38
42
  @sources.each do |source|
39
43
  source.import
40
44
  end
45
+ @vacuumer.vacuum
41
46
  end
42
47
  end
43
48
  end
@@ -38,6 +38,10 @@ module GroongaDelta
38
38
  Local.new(@dir, @data["local"])
39
39
  end
40
40
 
41
+ def vacuum
42
+ Vacuum.new(@data["vacuum"] || {})
43
+ end
44
+
41
45
  def mapping
42
46
  Mapping.new(@data["mapping"] || {})
43
47
  end
@@ -55,10 +59,6 @@ module GroongaDelta
55
59
  resolve_path(@data["binlog_dir"] || "binlog")
56
60
  end
57
61
 
58
- def mysqlbinlog
59
- @data["mysqlbinlog"] || "mysqlbinlog"
60
- end
61
-
62
62
  def host
63
63
  @data["host"] || "localhost"
64
64
  end
@@ -164,5 +164,44 @@ module GroongaDelta
164
164
  @data["initial_max_number"] || Float::INFINITY
165
165
  end
166
166
  end
167
+
168
+ class Vacuum
169
+ def initialize(data)
170
+ @data = data
171
+ end
172
+
173
+ def keep_span
174
+ resolve_span(@data["keep_span"])
175
+ end
176
+
177
+ private
178
+ def resolve_span(value)
179
+ case value
180
+ when String
181
+ case value
182
+ when /\A(\d+(?:\.\d+)?)(?:s|sec|second|seconds)?\z/
183
+ Float($1)
184
+ when /\A(\d+(?:\.\d+)?)(?:m|min|minute|minutes)\z/
185
+ Float($1) * 60
186
+ when /\A(\d+(?:\.\d+)?)(?:h|hr|hour|hours)\z/
187
+ Float($1) * 60 * 60
188
+ when /\A(\d+(?:\.\d+)?)(?:d|day|days)\z/
189
+ Float($1) * 60 * 60 * 24
190
+ when /\A(\d+(?:\.\d+)?)(?:w|week|weeks)\z/
191
+ Float($1) * 60 * 60 * 24 * 7
192
+ when /\A(\d+(?:\.\d+)?)(?:month|months)\z/
193
+ # Same as systemd. See systemd.time(7)
194
+ Float($1) * 60 * 60 * 24 * 30.44
195
+ when /\A(\d+(?:\.\d+)?)(?:y|year|years)\z/
196
+ # Same as systemd. See systemd.time(7)
197
+ Float($1) * 60 * 60 * 24 * 365.25
198
+ else
199
+ raise ConfigError, "invalid span value: #{value.inspect}"
200
+ end
201
+ else
202
+ value
203
+ end
204
+ end
205
+ end
167
206
  end
168
207
  end
@@ -38,12 +38,20 @@ module GroongaDelta
38
38
  @status.update("mysql" => new_data)
39
39
  end
40
40
 
41
- def file
42
- self["file"]
41
+ def last_file
42
+ self["last_file"] || self["file"] # For backward compatibility
43
43
  end
44
44
 
45
- def position
46
- self["position"]
45
+ def last_position
46
+ self["last_position"] || self["position"] # For backward compatibility
47
+ end
48
+
49
+ def last_table_map_file
50
+ self["last_table_map_file"] || self["file"]
51
+ end
52
+
53
+ def last_table_map_position
54
+ self["last_table_map_position"]
47
55
  end
48
56
  end
49
57
 
@@ -13,12 +13,9 @@
13
13
  # You should have received a copy of the GNU General Public License
14
14
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
15
 
16
- require "fileutils"
17
-
18
16
  require "groonga/client"
19
- require "parquet"
20
17
 
21
- require_relative "error"
18
+ require_relative "local-reader"
22
19
 
23
20
  module GroongaDelta
24
21
  class LocalDelta
@@ -30,9 +27,9 @@ module GroongaDelta
30
27
  end
31
28
 
32
29
  def apply
33
- start_time = read_current_status
30
+ reader = LocalReader.new(@logger, @delta_dir)
31
+ start_time = read_current_status(reader)
34
32
  current_time = Time.now.utc
35
- targets = list_targets(@delta_dir, start_time, current_time)
36
33
  client_options = {
37
34
  url: @config.groonga.url,
38
35
  read_timeout: @config.groonga.read_timeout,
@@ -44,7 +41,7 @@ module GroongaDelta
44
41
  target_commands: [],
45
42
  target_tables: [],
46
43
  target_columns: [])
47
- targets.sort_by(&:timestamp).each do |target|
44
+ reader.each(start_time, current_time) do |target|
48
45
  target.apply(@logger, client, processor)
49
46
  @status.update("start_time" => [
50
47
  target.timestamp.to_i,
@@ -55,289 +52,22 @@ module GroongaDelta
55
52
  end
56
53
 
57
54
  private
58
- def build_time(year, month, day, hour=0, minute=0, second=0, nanosecond=0)
59
- Time.utc(year,
60
- month,
61
- day,
62
- hour,
63
- minute,
64
- Rational(second * 1_000_000_000 + nanosecond,
65
- 1_000_000_000))
66
- end
67
-
68
- def read_current_status
55
+ def read_current_status(reader)
69
56
  start_time_unix_time, start_time_nanosecond = @status.start_time
70
57
  if start_time_unix_time
71
58
  start_time = Time.at(start_time_unix_time).utc
72
- build_time(start_time.year,
73
- start_time.month,
74
- start_time.day,
75
- start_time.hour,
76
- start_time.min,
77
- start_time.sec,
78
- start_time_nanosecond)
59
+ reader.build_time(start_time.year,
60
+ start_time.month,
61
+ start_time.day,
62
+ start_time.hour,
63
+ start_time.min,
64
+ start_time.sec,
65
+ start_time_nanosecond)
79
66
  else
80
67
  Time.at(0).utc
81
68
  end
82
69
  end
83
70
 
84
- def each_target_path(dir,
85
- min_timestamp,
86
- max_timestamp,
87
- accept_directory: true,
88
- &block)
89
- Dir.glob("#{dir}/*") do |path|
90
- base_name = File.basename(path)
91
- if accept_directory and File.directory?(path)
92
- timestamp = parse_directory_timestamp(base_name)
93
- next if timestamp.nil?
94
- next if min_timestamp and timestamp <= min_timestamp
95
- next if max_timestamp and timestamp > max_timestamp
96
- each_target_path(path,
97
- min_timestamp,
98
- max_timestamp,
99
- accept_directory: false,
100
- &block)
101
- elsif File.file?(path)
102
- timestamp, action, post_match = parse_file_timestamp(base_name)
103
- next if timestamp.nil?
104
- next if min_timestamp and timestamp <= min_timestamp
105
- next if max_timestamp and timestamp > max_timestamp
106
- yield(path, timestamp, action, post_match)
107
- end
108
- end
109
- end
110
-
111
- def each_packed_target_path(dir, min_timestamp, max_timestamp)
112
- return unless min_timestamp.to_i.zero?
113
- Dir.glob("#{dir}/packed/*") do |path|
114
- next unless File.directory?(path)
115
- timestamp, action, post_match = parse_file_timestamp(File.basename(path))
116
- next if action
117
- next unless post_match.empty?
118
- yield(path, timestamp)
119
- end
120
- end
121
-
122
- def list_targets(dir, start_time, current_timestamp)
123
- targets = []
124
- list_schema_targets(dir, start_time, current_timestamp, targets)
125
- Dir.glob("#{dir}/data/*") do |path|
126
- next unless File.directory?(path)
127
- name = File.basename(path)
128
- list_table_targets(path, name, start_time, current_timestamp, targets)
129
- end
130
- targets
131
- end
132
-
133
- def each_schema_target(dir, min_timestamp, max_timestamp)
134
- each_target_path(dir,
135
- min_timestamp,
136
- max_timestamp) do |path, timestamp, action, post_match|
137
- next if action
138
- next unless post_match == ".grn"
139
- yield(SchemaTarget.new(path, timestamp))
140
- end
141
- end
142
-
143
- def list_schema_targets(dir, start_time, current_timestamp, targets)
144
- latest_packed_target = nil
145
- each_packed_target_path("#{dir}/schema",
146
- start_time,
147
- current_timestamp) do |path, timestamp|
148
- if latest_packed_target and latest_packed_target.timestamp > timestamp
149
- next
150
- end
151
- latest_packed_target = PackedSchemaTarget.new(path, timestamp)
152
- end
153
- if latest_packed_target
154
- targets << latest_packed_target
155
- each_schema_target(latest_packed_target.path, nil, nil) do |target|
156
- latest_packed_target.targets << target
157
- end
158
- end
159
- each_schema_target("#{dir}/schema",
160
- latest_packed_target&.timestamp || start_time,
161
- current_timestamp) do |target|
162
- targets << target
163
- end
164
- end
165
-
166
- TABLE_TARGET_SUFFIXES = [".grn", ".parquet"]
167
- def each_table_target(dir, name, min_timestamp, max_timestamp)
168
- each_target_path(dir,
169
- min_timestamp,
170
- max_timestamp) do |path, timestamp, action, post_match|
171
- next if action.nil?
172
- next unless TABLE_TARGET_SUFFIXES.include?(post_match)
173
- yield(TableTarget.new(path, timestamp, name, action))
174
- end
175
- end
176
-
177
- def list_table_targets(dir, name, start_time, current_timestamp, targets)
178
- latest_packed_target = nil
179
- each_packed_target_path(dir,
180
- start_time,
181
- current_timestamp) do |path, timestamp|
182
- if latest_packed_target and latest_packed_target.timestamp > timestamp
183
- next
184
- end
185
- latest_packed_target = PackedTableTarget.new(path, timestamp, name)
186
- end
187
- if latest_packed_target
188
- targets << latest_packed_target
189
- each_table_target(latest_packed_target.path, name, nil, nil) do |target|
190
- latest_packed_target.targets << target
191
- end
192
- end
193
- each_table_target(dir,
194
- name,
195
- latest_packed_target&.timestamp || start_time,
196
- current_timestamp) do |target|
197
- targets << target
198
- end
199
- end
200
-
201
- def parse_directory_timestamp(base_name)
202
- case base_name
203
- when /\A(\d{4})-(\d{2})-(\d{2})\z/
204
- match = Regexp.last_match
205
- year = match[1].to_i
206
- month = match[2].to_i
207
- day = match[3].to_i
208
- build_time(year, month, day)
209
- else
210
- nil
211
- end
212
- end
213
-
214
- def parse_file_timestamp(base_name)
215
- case base_name
216
- when /\A(\d{4})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{2})-(\d{9})(?:-(\w+))?/
217
- match = Regexp.last_match
218
- year = match[1].to_i
219
- month = match[2].to_i
220
- day = match[3].to_i
221
- hour = match[4].to_i
222
- minute = match[5].to_i
223
- second = match[6].to_i
224
- nanosecond = match[7].to_i
225
- action = match[8]
226
- timestamp = build_time(year,
227
- month,
228
- day,
229
- hour,
230
- minute,
231
- second,
232
- nanosecond)
233
- [timestamp, action, match.post_match]
234
- else
235
- nil
236
- end
237
- end
238
-
239
- module ApplyLoggable
240
- private
241
- def apply_log(logger, path)
242
- logger.info("Start applying: #{path}")
243
- result = yield
244
- logger.info("Applied: #{path}")
245
- result
246
- end
247
- end
248
-
249
- class SchemaTarget
250
- include ApplyLoggable
251
-
252
- attr_reader :path
253
- attr_reader :timestamp
254
- def initialize(path, timestamp)
255
- @path = path
256
- @timestamp = timestamp
257
- end
258
-
259
- def apply(logger, client, processor)
260
- apply_log(logger, @path) do
261
- processor.load(@path)
262
- end
263
- end
264
- end
265
-
266
- class PackedSchemaTarget
267
- include ApplyLoggable
268
-
269
- attr_reader :path
270
- attr_reader :timestamp
271
- attr_reader :targets
272
- def initialize(path, timestamp)
273
- @path = path
274
- @timestamp = timestamp
275
- @targets = []
276
- end
277
-
278
- def apply(logger, client, processor)
279
- apply_log(logger, @path) do
280
- @targets.sort_by(&:timestamp).each do |target|
281
- target.apply(logger, client, processor)
282
- end
283
- end
284
- end
285
- end
286
-
287
- class TableTarget
288
- include ApplyLoggable
289
-
290
- attr_reader :path
291
- attr_reader :timestamp
292
- attr_reader :name
293
- attr_reader :action
294
- def initialize(path, timestamp, name, action)
295
- @path = path
296
- @timestamp = timestamp
297
- @name = name
298
- @action = action
299
- end
300
-
301
- def apply(logger, client, processor)
302
- apply_log(logger, @path) do
303
- if @path.end_with?(".grn")
304
- processor.load(@path)
305
- else
306
- # TODO: Add support for @action == "delete"
307
- table = Arrow::Table.load(@path)
308
- command = Groonga::Command::Load.new(table: @name,
309
- values: table,
310
- command_version: "3")
311
- response = client.load(command.arguments)
312
- processor.process_response(response, command)
313
- end
314
- end
315
- end
316
- end
317
-
318
- class PackedTableTarget
319
- include ApplyLoggable
320
-
321
- attr_reader :path
322
- attr_reader :timestamp
323
- attr_reader :name
324
- attr_reader :targets
325
- def initialize(path, timestamp, name)
326
- @path = path
327
- @timestamp = timestamp
328
- @name = name
329
- @targets = []
330
- end
331
-
332
- def apply(logger, client, processor)
333
- apply_log(logger, @path) do
334
- @targets.sort_by(&:timestamp).each do |target|
335
- target.apply(logger, client, processor)
336
- end
337
- end
338
- end
339
- end
340
-
341
71
  class CommandProcessor < Groonga::Client::CommandProcessor
342
72
  def initialize(config, *args)
343
73
  @config = config