groonga-command 1.4.5 → 1.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +6 -0
- data/groonga-command.gemspec +7 -6
- data/lib/groonga/command/load.rb +164 -1
- data/lib/groonga/command/version.rb +1 -1
- data/test/command/test-load.rb +54 -1
- metadata +22 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04a4658a7058ae319eef6558e4b0890cf63f1374cbc18ca6620f89c1713d1079
|
4
|
+
data.tar.gz: 585f68bee46bc0455b5b0be0056636b9b61518bf53354513081191ceb7156996
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6141e8378ce9f64024c481af745a519e598c40b15b313447537d78030f24ce8d7a84ca1f2df52ad06176a1153792f2edbde20cf32d4dce40ff1b043a53780ba3
|
7
|
+
data.tar.gz: eee5697534989943e5d0b038b14af59eea0a811c9aab09e2bf25d4251c7783efac899846c6a631e3de291b16d95fc33f9f88fe5e318be87eba61c7331870deba
|
data/doc/text/news.md
CHANGED
data/groonga-command.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2012-
|
3
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -54,12 +54,13 @@ Gem::Specification.new do |spec|
|
|
54
54
|
|
55
55
|
spec.add_runtime_dependency("json")
|
56
56
|
|
57
|
-
spec.add_development_dependency("test-unit")
|
58
|
-
spec.add_development_dependency("test-unit-notify")
|
59
|
-
spec.add_development_dependency("rake")
|
60
57
|
spec.add_development_dependency("bundler")
|
61
58
|
spec.add_development_dependency("packnga")
|
62
|
-
spec.add_development_dependency("
|
59
|
+
spec.add_development_dependency("rake")
|
60
|
+
spec.add_development_dependency("red-arrow")
|
63
61
|
spec.add_development_dependency("redcarpet")
|
62
|
+
spec.add_development_dependency("test-unit")
|
63
|
+
spec.add_development_dependency("test-unit-notify")
|
64
|
+
spec.add_development_dependency("yard")
|
64
65
|
end
|
65
66
|
|
data/lib/groonga/command/load.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2012-
|
1
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,6 +16,11 @@
|
|
16
16
|
|
17
17
|
require "json"
|
18
18
|
|
19
|
+
begin
|
20
|
+
require "arrow"
|
21
|
+
rescue LoadError
|
22
|
+
end
|
23
|
+
|
19
24
|
require "groonga/command/base"
|
20
25
|
|
21
26
|
module Groonga
|
@@ -66,6 +71,23 @@ module Groonga
|
|
66
71
|
@columns ||= parse_columns(self[:columns])
|
67
72
|
end
|
68
73
|
|
74
|
+
# Builds `Arrow::Table` for data of this `load` command.
|
75
|
+
#
|
76
|
+
# This requires red-arrow gem. If red-arrow gem isn't available,
|
77
|
+
# `NotImplementedError` is raised.
|
78
|
+
#
|
79
|
+
# @return [Arrow::Table, nil] `Arrow::Table` if there is one or more
|
80
|
+
# records, `nil` otherwise.
|
81
|
+
#
|
82
|
+
# @since 1.4.6
|
83
|
+
def build_arrow_table
|
84
|
+
unless defined?(::Arrow)
|
85
|
+
raise NotImplementedError, "red-arrow is required"
|
86
|
+
end
|
87
|
+
builder = ArrowTableBuilder.new(columns, values)
|
88
|
+
builder.build
|
89
|
+
end
|
90
|
+
|
69
91
|
# @return [Boolean] `true` if `output_ids` value is `"yes"`.
|
70
92
|
#
|
71
93
|
# @since 1.3.0
|
@@ -85,6 +107,147 @@ module Groonga
|
|
85
107
|
return columns if columns.nil?
|
86
108
|
columns.split(/\s*,\s*/)
|
87
109
|
end
|
110
|
+
|
111
|
+
class ArrowTableBuilder
|
112
|
+
def initialize(columns, values)
|
113
|
+
@columns = columns
|
114
|
+
@values = values
|
115
|
+
end
|
116
|
+
|
117
|
+
def build
|
118
|
+
raw_table = build_raw_table
|
119
|
+
return nil if raw_table.empty?
|
120
|
+
build_arrow_table(raw_table)
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
def build_raw_table
|
125
|
+
raw_table = {}
|
126
|
+
if @values.first.is_a?(Array)
|
127
|
+
columns = @columns
|
128
|
+
if columns
|
129
|
+
records = @values
|
130
|
+
else
|
131
|
+
columns = @values.first
|
132
|
+
records = @values[1..-1]
|
133
|
+
end
|
134
|
+
records.each_with_index do |record, i|
|
135
|
+
columns.zip(record).each do |name, value|
|
136
|
+
raw_table[name] ||= []
|
137
|
+
raw_table[name][i] = value
|
138
|
+
end
|
139
|
+
end
|
140
|
+
else
|
141
|
+
@values.each_with_index do |record, i|
|
142
|
+
record.each do |name, value|
|
143
|
+
raw_table[name] ||= []
|
144
|
+
raw_table[name][i] = value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
raw_table.each_key do |key|
|
148
|
+
if @values.size > raw_table[key].size
|
149
|
+
raw_table[key][@values.size - 1] = nil
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
raw_table
|
154
|
+
end
|
155
|
+
|
156
|
+
def build_arrow_table(raw_table)
|
157
|
+
arrow_fields = []
|
158
|
+
arrow_arrays = []
|
159
|
+
raw_table.each do |name, raw_array|
|
160
|
+
sample = raw_array.find {|element| not element.nil?}
|
161
|
+
case sample
|
162
|
+
when Array
|
163
|
+
data_type = nil
|
164
|
+
raw_array.each do |sub_raw_array|
|
165
|
+
next if sub_raw_array.nil?
|
166
|
+
data_type = detect_arrow_data_type(sub_raw_array)
|
167
|
+
break if data_type
|
168
|
+
end
|
169
|
+
data_type ||= :string
|
170
|
+
arrow_array = build_arrow_array(data_type, raw_array)
|
171
|
+
when Hash
|
172
|
+
arrow_array = build_arrow_array(arrow_weight_vector_data_type,
|
173
|
+
raw_array)
|
174
|
+
else
|
175
|
+
data_type = detect_arrow_data_type(raw_array) || :string
|
176
|
+
if data_type == :string
|
177
|
+
raw_array = raw_array.collect do |element|
|
178
|
+
element&.to_s
|
179
|
+
end
|
180
|
+
end
|
181
|
+
data_type = Arrow::DataType.resolve(data_type)
|
182
|
+
arrow_array = data_type.build_array(raw_array)
|
183
|
+
end
|
184
|
+
arrow_fields << Arrow::Field.new(name,
|
185
|
+
arrow_array.value_data_type)
|
186
|
+
arrow_arrays << arrow_array
|
187
|
+
end
|
188
|
+
arrow_schema = Arrow::Schema.new(arrow_fields)
|
189
|
+
Arrow::Table.new(arrow_schema, arrow_arrays)
|
190
|
+
end
|
191
|
+
|
192
|
+
def prepare_raw_array(raw_array)
|
193
|
+
raw_array.collect do |element|
|
194
|
+
case element
|
195
|
+
when Array
|
196
|
+
prepare_raw_array(element)
|
197
|
+
when Hash
|
198
|
+
element.collect do |value, weight|
|
199
|
+
{
|
200
|
+
"value" => value,
|
201
|
+
"weight" => weight,
|
202
|
+
}
|
203
|
+
end
|
204
|
+
else
|
205
|
+
element
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def build_arrow_array(data_type, raw_array)
|
211
|
+
arrow_list_field = Arrow::Field.new("item", data_type)
|
212
|
+
arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
|
213
|
+
raw_array = prepare_raw_array(raw_array)
|
214
|
+
Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array)
|
215
|
+
end
|
216
|
+
|
217
|
+
def arrow_weight_vector_data_type
|
218
|
+
Arrow::StructDataType.new("value" => :string,
|
219
|
+
"weight" => :int32)
|
220
|
+
end
|
221
|
+
|
222
|
+
def detect_arrow_data_type(raw_array)
|
223
|
+
type = nil
|
224
|
+
raw_array.each do |element|
|
225
|
+
case element
|
226
|
+
when nil
|
227
|
+
when true, false
|
228
|
+
type ||= :boolean
|
229
|
+
when Integer
|
230
|
+
if element >= (2 ** 63)
|
231
|
+
type = nil if type == :int64
|
232
|
+
type ||= :uint64
|
233
|
+
else
|
234
|
+
type ||= :int64
|
235
|
+
end
|
236
|
+
when Float
|
237
|
+
type = nil if type == :int64
|
238
|
+
type ||= :double
|
239
|
+
when Hash
|
240
|
+
arrow_list_field =
|
241
|
+
Arrow::Field.new("item", arrow_weight_vector_data_type)
|
242
|
+
arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
|
243
|
+
return arrow_list_data_type
|
244
|
+
else
|
245
|
+
return :string
|
246
|
+
end
|
247
|
+
end
|
248
|
+
type
|
249
|
+
end
|
250
|
+
end
|
88
251
|
end
|
89
252
|
end
|
90
253
|
end
|
data/test/command/test-load.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2012-
|
1
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -116,4 +116,57 @@ class LoadCommandTest < Test::Unit::TestCase
|
|
116
116
|
end
|
117
117
|
end
|
118
118
|
end
|
119
|
+
|
120
|
+
sub_test_case("#build_arrow_table") do
|
121
|
+
def setup
|
122
|
+
omit("red-arrow is needed") unless defined?(Arrow)
|
123
|
+
end
|
124
|
+
|
125
|
+
sub_test_case("Array") do
|
126
|
+
def test_no_columns_argument
|
127
|
+
command = load_command({"values" => [
|
128
|
+
["column1", "column2"],
|
129
|
+
["value1", "value2"],
|
130
|
+
].to_json})
|
131
|
+
assert_equal(Arrow::Table.new({
|
132
|
+
"column1" => ["value1"],
|
133
|
+
"column2" => ["value2"],
|
134
|
+
}),
|
135
|
+
command.build_arrow_table)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_columns_argument
|
139
|
+
command = load_command({"columns" => ["column1", "column2"].join(", "),
|
140
|
+
"values" => [
|
141
|
+
["value1", "value2"],
|
142
|
+
].to_json})
|
143
|
+
assert_equal(Arrow::Table.new({
|
144
|
+
"column1" => ["value1"],
|
145
|
+
"column2" => ["value2"],
|
146
|
+
}),
|
147
|
+
command.build_arrow_table)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
sub_test_case("Hash") do
|
152
|
+
def test_integer
|
153
|
+
command = load_command({"values" => [
|
154
|
+
{
|
155
|
+
"column1" => 1,
|
156
|
+
"column2" => 2,
|
157
|
+
},
|
158
|
+
{
|
159
|
+
"column1" => 10,
|
160
|
+
"column2" => 20,
|
161
|
+
},
|
162
|
+
].to_json})
|
163
|
+
columns = {
|
164
|
+
"column1" => Arrow::Int64Array.new([1, 10]),
|
165
|
+
"column2" => Arrow::Int64Array.new([2, 20]),
|
166
|
+
}
|
167
|
+
assert_equal(Arrow::Table.new(columns),
|
168
|
+
command.build_arrow_table)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
119
172
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: groonga-command
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: packnga
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: red-arrow
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: redcarpet
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: test-unit
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,21 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: test-unit-notify
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: yard
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
114
128
|
requirements:
|
115
129
|
- - ">="
|