groonga-delta 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE.txt +674 -0
- data/README.md +43 -0
- data/Rakefile +36 -0
- data/bin/groonga-delta-apply +21 -0
- data/bin/groonga-delta-import +21 -0
- data/doc/text/news.md +5 -0
- data/groonga-delta.gemspec +54 -0
- data/lib/groonga-delta/apply-command.rb +38 -0
- data/lib/groonga-delta/apply-config.rb +78 -0
- data/lib/groonga-delta/apply-status.rb +24 -0
- data/lib/groonga-delta/command.rb +75 -0
- data/lib/groonga-delta/config.rb +99 -0
- data/lib/groonga-delta/error.rb +28 -0
- data/lib/groonga-delta/import-command.rb +43 -0
- data/lib/groonga-delta/import-config.rb +168 -0
- data/lib/groonga-delta/import-status.rb +68 -0
- data/lib/groonga-delta/local-delta.rb +386 -0
- data/lib/groonga-delta/local-source.rb +134 -0
- data/lib/groonga-delta/ltsv-log-formatter.rb +50 -0
- data/lib/groonga-delta/mapping.rb +314 -0
- data/lib/groonga-delta/mysql-source.rb +391 -0
- data/lib/groonga-delta/status.rb +43 -0
- data/lib/groonga-delta/version.rb +18 -0
- data/lib/groonga-delta/writer.rb +135 -0
- data/lib/groonga-delta.rb +18 -0
- metadata +114 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
# Copyright (C) 2021-2022 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require "groonga/command/parser"
|
17
|
+
|
18
|
+
require_relative "writer"
|
19
|
+
|
20
|
+
module GroongaDelta
|
21
|
+
class LocalSource
|
22
|
+
def initialize(config, status)
|
23
|
+
@logger = config.logger
|
24
|
+
@writer = Writer.new(@logger, config.delta_dir)
|
25
|
+
@config = config.local
|
26
|
+
@status = status.local
|
27
|
+
end
|
28
|
+
|
29
|
+
def import
|
30
|
+
latest_number = @status.number || -1
|
31
|
+
targets = []
|
32
|
+
Dir.glob("#{@config.dir}/*.grn") do |path|
|
33
|
+
case File.basename(path)
|
34
|
+
when /\A\d+/
|
35
|
+
number = Regexp.last_match[0]
|
36
|
+
number = Integer(number, 10)
|
37
|
+
next if number <= latest_number
|
38
|
+
targets << [number, path]
|
39
|
+
else
|
40
|
+
next
|
41
|
+
end
|
42
|
+
end
|
43
|
+
targets.sort_by! {|number, _path| number}
|
44
|
+
parser = create_command_parser
|
45
|
+
targets.each do |number, path|
|
46
|
+
if latest_number == -1 and number > @config.initial_max_number
|
47
|
+
@logger.info("Stopped initial import")
|
48
|
+
break
|
49
|
+
end
|
50
|
+
@logger.info("Start importing: #{path}")
|
51
|
+
File.open(path) do |input|
|
52
|
+
last_line = nil
|
53
|
+
input.each_line do |line|
|
54
|
+
last_line = line
|
55
|
+
parser << line
|
56
|
+
end
|
57
|
+
if last_line and not last_line.end_with?("\n")
|
58
|
+
parser << line
|
59
|
+
end
|
60
|
+
end
|
61
|
+
@logger.info("Imported: #{path}")
|
62
|
+
@status.update("number" => number)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
def create_command_parser
|
68
|
+
parser = Groonga::Command::Parser.new
|
69
|
+
|
70
|
+
parser.on_command do |command|
|
71
|
+
write_command(command)
|
72
|
+
end
|
73
|
+
|
74
|
+
parser.on_load_columns do |command, columns|
|
75
|
+
command[:columns] ||= columns.join(",")
|
76
|
+
end
|
77
|
+
|
78
|
+
split_load_chunk_size = 10000
|
79
|
+
load_values = []
|
80
|
+
parser.on_load_value do |command, value|
|
81
|
+
unless command[:values]
|
82
|
+
load_values << value
|
83
|
+
if load_values.size == split_load_chunk_size
|
84
|
+
write_load_command(command, load_values)
|
85
|
+
load_values.clear
|
86
|
+
end
|
87
|
+
end
|
88
|
+
command.original_source.clear
|
89
|
+
end
|
90
|
+
|
91
|
+
parser.on_load_complete do |command|
|
92
|
+
if command[:values]
|
93
|
+
write_load_command(command)
|
94
|
+
else
|
95
|
+
unless load_values.empty?
|
96
|
+
write_load_command(command, load_values)
|
97
|
+
load_values.clear
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
parser
|
103
|
+
end
|
104
|
+
|
105
|
+
def write_command(command)
|
106
|
+
case command.command_name
|
107
|
+
when "delete"
|
108
|
+
if command[:key]
|
109
|
+
@writer.write_deletes(command[:table], [command[:key]])
|
110
|
+
else
|
111
|
+
raise NotImplementedError,
|
112
|
+
"delete by not _key isn't supported yet: #{command.to_s}"
|
113
|
+
end
|
114
|
+
else
|
115
|
+
@writer.write_schema(command)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def write_load_command(command, values=nil)
|
120
|
+
columns = command.columns
|
121
|
+
values ||= command.values
|
122
|
+
if columns
|
123
|
+
original_values = values
|
124
|
+
values = Enumerator.new do |yielder|
|
125
|
+
yielder << columns
|
126
|
+
values.each do |value|
|
127
|
+
yielder << value
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
@writer.write_upserts(command.table, values)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# Copyright (C) 2022 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
module GroongaDelta
|
17
|
+
class LTSVLogFormatter
|
18
|
+
def call(severity, time, program_name, message)
|
19
|
+
prefix = "timestamp:%{timestamp}\tseverity:%{severity}\tpid:%{pid}" % {
|
20
|
+
severity: severity,
|
21
|
+
timestamp: time.strftime("%Y-%m-%dT%H:%M:%S.%N"),
|
22
|
+
pid: Process.pid,
|
23
|
+
}
|
24
|
+
formatted = ""
|
25
|
+
backtrace = nil
|
26
|
+
case message
|
27
|
+
when String
|
28
|
+
when Exception
|
29
|
+
backtrace = message.backtrace
|
30
|
+
message = "#{message.class}: #{message}"
|
31
|
+
else
|
32
|
+
message = message.inspect
|
33
|
+
end
|
34
|
+
message.each_line(chomp: true) do |line, i|
|
35
|
+
formatted << "#{prefix}\tmessage:#{escape_value(line)}\n"
|
36
|
+
end
|
37
|
+
if backtrace
|
38
|
+
backtrace.each do |trace|
|
39
|
+
formatted << "#{prefix}\tmessage:#{escape_value(trace)}\n"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
formatted
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def escape_value(value)
|
47
|
+
value.gsub(/[\t\r\n]/, " ")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,314 @@
|
|
1
|
+
# Copyright (C) 2021-2022 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require_relative "error"
|
17
|
+
|
18
|
+
module GroongaDelta
|
19
|
+
class Mapping
|
20
|
+
def initialize(data)
|
21
|
+
@data = data
|
22
|
+
build_source_databases
|
23
|
+
end
|
24
|
+
|
25
|
+
def source_databases
|
26
|
+
@source_databases
|
27
|
+
end
|
28
|
+
|
29
|
+
def [](source_database_name, source_table_name=nil)
|
30
|
+
if source_table_name.nil?
|
31
|
+
@source_databases_index[source_database_name]
|
32
|
+
else
|
33
|
+
@source_tables_index[[source_database_name, source_table_name]]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
def build_source_databases
|
39
|
+
raw_source_databases = {}
|
40
|
+
@data.each do |groonga_table_name, details|
|
41
|
+
restriction = Restriction.new(details["restriction"])
|
42
|
+
(details["sources"] || []).each do |source|
|
43
|
+
raw_groonga_columns = source["columns"]
|
44
|
+
groonga_columns = []
|
45
|
+
source_column_names = []
|
46
|
+
raw_groonga_columns.each do |name, template|
|
47
|
+
if template.is_a?(Hash)
|
48
|
+
options = template
|
49
|
+
template = options["template"]
|
50
|
+
expression = options["expression"]
|
51
|
+
type = options["type"]
|
52
|
+
source_column_names.concat(options["source_column_names"] || [])
|
53
|
+
else
|
54
|
+
expression = nil
|
55
|
+
type = nil
|
56
|
+
end
|
57
|
+
groonga_columns << GroongaColumn.new(name,
|
58
|
+
template,
|
59
|
+
expression,
|
60
|
+
type,
|
61
|
+
restriction)
|
62
|
+
if template
|
63
|
+
template.scan(/%{(.*?)}/).flatten.each do |source_column_name|
|
64
|
+
source_column_names << source_column_name.to_sym
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
source_column_names.uniq!
|
69
|
+
groonga_table = GroongaTable.new(groonga_table_name,
|
70
|
+
groonga_columns)
|
71
|
+
source_table = SourceTable.new(source["table"],
|
72
|
+
source_column_names,
|
73
|
+
source["filter"],
|
74
|
+
groonga_table)
|
75
|
+
source_tables = (raw_source_databases[source["database"]] ||= [])
|
76
|
+
source_tables << source_table
|
77
|
+
end
|
78
|
+
end
|
79
|
+
@source_databases = []
|
80
|
+
@source_databases_index = {}
|
81
|
+
@source_tables_index = {}
|
82
|
+
raw_source_databases.each do |source_database_name, source_tables|
|
83
|
+
source_database = SourceDatabase.new(source_database_name,
|
84
|
+
source_tables)
|
85
|
+
@source_databases << source_database
|
86
|
+
@source_databases_index[source_database.name] = source_database
|
87
|
+
source_tables.each do |source_table|
|
88
|
+
@source_tables_index[[source_database.name, source_table.name]] =
|
89
|
+
source_table
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class Restriction
|
95
|
+
attr_reader :time_max
|
96
|
+
attr_reader :time_min
|
97
|
+
def initialize(data)
|
98
|
+
@data = data
|
99
|
+
@time_max = time_value("time", "max")
|
100
|
+
@time_min = time_value("time", "min")
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
def time_value(*keys)
|
105
|
+
return nil if @data.nil?
|
106
|
+
value = @data.dig(*keys)
|
107
|
+
return value if value.nil?
|
108
|
+
Time.parse(value).localtime
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
class SourceDatabase
|
113
|
+
attr_reader :name
|
114
|
+
attr_reader :source_tables
|
115
|
+
def initialize(name, source_tables)
|
116
|
+
@name = name
|
117
|
+
@source_tables = source_tables
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
class SourceTable
|
122
|
+
attr_reader :name
|
123
|
+
attr_reader :source_column_names
|
124
|
+
attr_reader :source_filter
|
125
|
+
attr_reader :groonga_table
|
126
|
+
def initialize(name, source_column_names, source_filter, groonga_table)
|
127
|
+
@name = name
|
128
|
+
@source_column_names = source_column_names
|
129
|
+
@source_filter = source_filter
|
130
|
+
@groonga_table = groonga_table
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class GroongaTable
|
135
|
+
attr_reader :name
|
136
|
+
attr_reader :groonga_columns
|
137
|
+
def initialize(name, groonga_columns)
|
138
|
+
@name = name
|
139
|
+
@groonga_columns = groonga_columns
|
140
|
+
end
|
141
|
+
|
142
|
+
def generate_record(source_record)
|
143
|
+
record = {}
|
144
|
+
@groonga_columns.each do |groonga_column|
|
145
|
+
value = groonga_column.generate_value(source_record)
|
146
|
+
record[groonga_column.name.to_sym] = value
|
147
|
+
end
|
148
|
+
record
|
149
|
+
end
|
150
|
+
|
151
|
+
def generate_record_batch(source_records)
|
152
|
+
fields = @groonga_columns.collect do |groonga_column|
|
153
|
+
{
|
154
|
+
name: groonga_column.name,
|
155
|
+
data_type: groonga_column.arrow_type,
|
156
|
+
}
|
157
|
+
end
|
158
|
+
builder = Arrow::RecordBatchBuilder.new(fields)
|
159
|
+
groonga_records = Enumerator.new do |yielder|
|
160
|
+
source_records.each do |source_record|
|
161
|
+
yielder << generate_record(source_record)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
builder.append_records(groonga_records)
|
165
|
+
builder.flush
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
class GroongaColumn
|
170
|
+
attr_reader :name
|
171
|
+
attr_reader :template
|
172
|
+
attr_reader :expression
|
173
|
+
attr_reader :type
|
174
|
+
attr_reader :restriction
|
175
|
+
def initialize(name, template, expression, type, restriction)
|
176
|
+
@name = name
|
177
|
+
@template = template
|
178
|
+
@expression = expression
|
179
|
+
@type = type
|
180
|
+
@restriction = restriction
|
181
|
+
end
|
182
|
+
|
183
|
+
def generate_value(source_record)
|
184
|
+
if @template
|
185
|
+
value = cast(@template % source_record)
|
186
|
+
else
|
187
|
+
evaluator = ExpressionEvaluator.new(source_record)
|
188
|
+
value = evaluator.evaluate(@expression)
|
189
|
+
end
|
190
|
+
normalize_value(value)
|
191
|
+
end
|
192
|
+
|
193
|
+
def arrow_type
|
194
|
+
case @type
|
195
|
+
when nil, "ShortText", "Text", "LongText"
|
196
|
+
:string
|
197
|
+
when "Bool"
|
198
|
+
:boolean
|
199
|
+
when "Time"
|
200
|
+
Arrow::TimestampDataType.new(:nano)
|
201
|
+
else
|
202
|
+
@type
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
def cast(value)
|
208
|
+
case @type
|
209
|
+
when nil, "ShortText", "Text", "LongText"
|
210
|
+
value
|
211
|
+
when /\AU?Int(?:8|16|32|64)\z/
|
212
|
+
return 0 if value.empty?
|
213
|
+
Integer(value, 10)
|
214
|
+
when "Float"
|
215
|
+
return 0.0 if value.empty?
|
216
|
+
Float(value)
|
217
|
+
when "Bool"
|
218
|
+
return false if value.empty?
|
219
|
+
case value
|
220
|
+
when "0"
|
221
|
+
false
|
222
|
+
else
|
223
|
+
true
|
224
|
+
end
|
225
|
+
when "Time"
|
226
|
+
case value
|
227
|
+
when /\A(\d{4})-(\d{2})-(\d{2})\z/
|
228
|
+
match = Regexp.last_match
|
229
|
+
year = Integer(match[1], 10)
|
230
|
+
month = Integer(match[2], 10)
|
231
|
+
day = Integer(match[3], 10)
|
232
|
+
Time.new(year, month, day)
|
233
|
+
when /\A(\d{4})-(\d{2})-(\d{2})\
|
234
|
+
(\d{2}):(\d{2}):(\d{2})\
|
235
|
+
([+-])(\d{2})(\d{2})\z/x
|
236
|
+
match = Regexp.last_match
|
237
|
+
year = Integer(match[1], 10)
|
238
|
+
month = Integer(match[2], 10)
|
239
|
+
day = Integer(match[3], 10)
|
240
|
+
hour = Integer(match[4], 10)
|
241
|
+
minute = Integer(match[5], 10)
|
242
|
+
second = Integer(match[6], 10)
|
243
|
+
timezone_sign = match[7]
|
244
|
+
timezone_hour = match[8]
|
245
|
+
timezone_minute = match[9]
|
246
|
+
timezone = "#{timezone_sign}#{timezone_hour}:#{timezone_minute}"
|
247
|
+
time = Time.new(year, month, day, hour, minute, second, timezone)
|
248
|
+
time.utc.localtime
|
249
|
+
else
|
250
|
+
value
|
251
|
+
end
|
252
|
+
else
|
253
|
+
raise ConfigError, "Unknown type: #{@type}: #{value.inspect}"
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def normalize_value(value)
|
258
|
+
case type
|
259
|
+
when "Time"
|
260
|
+
time_max = @restriction.time_max
|
261
|
+
time_min = @restriction.time_min
|
262
|
+
if !value.is_a?(Time) and value.respond_to?(:to_time)
|
263
|
+
value = value.to_time
|
264
|
+
end
|
265
|
+
return value if time_max.nil? and time_min.nil?
|
266
|
+
return value unless value.is_a?(Time)
|
267
|
+
if time_max and value >= time_max
|
268
|
+
time_max
|
269
|
+
elsif time_min and value <= time_min
|
270
|
+
time_min
|
271
|
+
else
|
272
|
+
value
|
273
|
+
end
|
274
|
+
else
|
275
|
+
value
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
class ExpressionEvaluator
|
281
|
+
class Context < BasicObject
|
282
|
+
def html_untag(text)
|
283
|
+
text.gsub(/<.*?>/, "")
|
284
|
+
end
|
285
|
+
|
286
|
+
def groonga_escape_query(text)
|
287
|
+
case text
|
288
|
+
when /[+\-><~*()"\\: ]/
|
289
|
+
"\"#{text.gsub("\"", "\\\"")}\""
|
290
|
+
when "OR"
|
291
|
+
"\"OR\""
|
292
|
+
else
|
293
|
+
text
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def initialize(source_record)
|
299
|
+
@context = Context.new
|
300
|
+
context_singleton_class =
|
301
|
+
Kernel.instance_method(:singleton_class).bind(@context).call
|
302
|
+
source_record.each do |key, value|
|
303
|
+
context_singleton_class.define_method(key) do
|
304
|
+
value
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
def evaluate(expression)
|
310
|
+
@context.instance_eval(expression, __FILE__, __LINE__)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|