groonga-command 1.4.5 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +6 -0
- data/groonga-command.gemspec +7 -6
- data/lib/groonga/command/load.rb +164 -1
- data/lib/groonga/command/version.rb +1 -1
- data/test/command/test-load.rb +54 -1
- metadata +22 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04a4658a7058ae319eef6558e4b0890cf63f1374cbc18ca6620f89c1713d1079
|
4
|
+
data.tar.gz: 585f68bee46bc0455b5b0be0056636b9b61518bf53354513081191ceb7156996
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6141e8378ce9f64024c481af745a519e598c40b15b313447537d78030f24ce8d7a84ca1f2df52ad06176a1153792f2edbde20cf32d4dce40ff1b043a53780ba3
|
7
|
+
data.tar.gz: eee5697534989943e5d0b038b14af59eea0a811c9aab09e2bf25d4251c7783efac899846c6a631e3de291b16d95fc33f9f88fe5e318be87eba61c7331870deba
|
data/doc/text/news.md
CHANGED
data/groonga-command.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- ruby -*-
|
2
2
|
#
|
3
|
-
# Copyright (C) 2012-
|
3
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
4
4
|
#
|
5
5
|
# This library is free software; you can redistribute it and/or
|
6
6
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -54,12 +54,13 @@ Gem::Specification.new do |spec|
|
|
54
54
|
|
55
55
|
spec.add_runtime_dependency("json")
|
56
56
|
|
57
|
-
spec.add_development_dependency("test-unit")
|
58
|
-
spec.add_development_dependency("test-unit-notify")
|
59
|
-
spec.add_development_dependency("rake")
|
60
57
|
spec.add_development_dependency("bundler")
|
61
58
|
spec.add_development_dependency("packnga")
|
62
|
-
spec.add_development_dependency("
|
59
|
+
spec.add_development_dependency("rake")
|
60
|
+
spec.add_development_dependency("red-arrow")
|
63
61
|
spec.add_development_dependency("redcarpet")
|
62
|
+
spec.add_development_dependency("test-unit")
|
63
|
+
spec.add_development_dependency("test-unit-notify")
|
64
|
+
spec.add_development_dependency("yard")
|
64
65
|
end
|
65
66
|
|
data/lib/groonga/command/load.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2012-
|
1
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -16,6 +16,11 @@
|
|
16
16
|
|
17
17
|
require "json"
|
18
18
|
|
19
|
+
begin
|
20
|
+
require "arrow"
|
21
|
+
rescue LoadError
|
22
|
+
end
|
23
|
+
|
19
24
|
require "groonga/command/base"
|
20
25
|
|
21
26
|
module Groonga
|
@@ -66,6 +71,23 @@ module Groonga
|
|
66
71
|
@columns ||= parse_columns(self[:columns])
|
67
72
|
end
|
68
73
|
|
74
|
+
# Builds `Arrow::Table` for data of this `load` command.
|
75
|
+
#
|
76
|
+
# This requires red-arrow gem. If red-arrow gem isn't available,
|
77
|
+
# `NotImplementedError` is raised.
|
78
|
+
#
|
79
|
+
# @return [Arrow::Table, nil] `Arrow::Table` if there is one or more
|
80
|
+
# records, `nil` otherwise.
|
81
|
+
#
|
82
|
+
# @since 1.4.6
|
83
|
+
def build_arrow_table
|
84
|
+
unless defined?(::Arrow)
|
85
|
+
raise NotImplementedError, "red-arrow is required"
|
86
|
+
end
|
87
|
+
builder = ArrowTableBuilder.new(columns, values)
|
88
|
+
builder.build
|
89
|
+
end
|
90
|
+
|
69
91
|
# @return [Boolean] `true` if `output_ids` value is `"yes"`.
|
70
92
|
#
|
71
93
|
# @since 1.3.0
|
@@ -85,6 +107,147 @@ module Groonga
|
|
85
107
|
return columns if columns.nil?
|
86
108
|
columns.split(/\s*,\s*/)
|
87
109
|
end
|
110
|
+
|
111
|
+
class ArrowTableBuilder
|
112
|
+
def initialize(columns, values)
|
113
|
+
@columns = columns
|
114
|
+
@values = values
|
115
|
+
end
|
116
|
+
|
117
|
+
def build
|
118
|
+
raw_table = build_raw_table
|
119
|
+
return nil if raw_table.empty?
|
120
|
+
build_arrow_table(raw_table)
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
def build_raw_table
|
125
|
+
raw_table = {}
|
126
|
+
if @values.first.is_a?(Array)
|
127
|
+
columns = @columns
|
128
|
+
if columns
|
129
|
+
records = @values
|
130
|
+
else
|
131
|
+
columns = @values.first
|
132
|
+
records = @values[1..-1]
|
133
|
+
end
|
134
|
+
records.each_with_index do |record, i|
|
135
|
+
columns.zip(record).each do |name, value|
|
136
|
+
raw_table[name] ||= []
|
137
|
+
raw_table[name][i] = value
|
138
|
+
end
|
139
|
+
end
|
140
|
+
else
|
141
|
+
@values.each_with_index do |record, i|
|
142
|
+
record.each do |name, value|
|
143
|
+
raw_table[name] ||= []
|
144
|
+
raw_table[name][i] = value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
raw_table.each_key do |key|
|
148
|
+
if @values.size > raw_table[key].size
|
149
|
+
raw_table[key][@values.size - 1] = nil
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
raw_table
|
154
|
+
end
|
155
|
+
|
156
|
+
def build_arrow_table(raw_table)
|
157
|
+
arrow_fields = []
|
158
|
+
arrow_arrays = []
|
159
|
+
raw_table.each do |name, raw_array|
|
160
|
+
sample = raw_array.find {|element| not element.nil?}
|
161
|
+
case sample
|
162
|
+
when Array
|
163
|
+
data_type = nil
|
164
|
+
raw_array.each do |sub_raw_array|
|
165
|
+
next if sub_raw_array.nil?
|
166
|
+
data_type = detect_arrow_data_type(sub_raw_array)
|
167
|
+
break if data_type
|
168
|
+
end
|
169
|
+
data_type ||= :string
|
170
|
+
arrow_array = build_arrow_array(data_type, raw_array)
|
171
|
+
when Hash
|
172
|
+
arrow_array = build_arrow_array(arrow_weight_vector_data_type,
|
173
|
+
raw_array)
|
174
|
+
else
|
175
|
+
data_type = detect_arrow_data_type(raw_array) || :string
|
176
|
+
if data_type == :string
|
177
|
+
raw_array = raw_array.collect do |element|
|
178
|
+
element&.to_s
|
179
|
+
end
|
180
|
+
end
|
181
|
+
data_type = Arrow::DataType.resolve(data_type)
|
182
|
+
arrow_array = data_type.build_array(raw_array)
|
183
|
+
end
|
184
|
+
arrow_fields << Arrow::Field.new(name,
|
185
|
+
arrow_array.value_data_type)
|
186
|
+
arrow_arrays << arrow_array
|
187
|
+
end
|
188
|
+
arrow_schema = Arrow::Schema.new(arrow_fields)
|
189
|
+
Arrow::Table.new(arrow_schema, arrow_arrays)
|
190
|
+
end
|
191
|
+
|
192
|
+
def prepare_raw_array(raw_array)
|
193
|
+
raw_array.collect do |element|
|
194
|
+
case element
|
195
|
+
when Array
|
196
|
+
prepare_raw_array(element)
|
197
|
+
when Hash
|
198
|
+
element.collect do |value, weight|
|
199
|
+
{
|
200
|
+
"value" => value,
|
201
|
+
"weight" => weight,
|
202
|
+
}
|
203
|
+
end
|
204
|
+
else
|
205
|
+
element
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def build_arrow_array(data_type, raw_array)
|
211
|
+
arrow_list_field = Arrow::Field.new("item", data_type)
|
212
|
+
arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
|
213
|
+
raw_array = prepare_raw_array(raw_array)
|
214
|
+
Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array)
|
215
|
+
end
|
216
|
+
|
217
|
+
def arrow_weight_vector_data_type
|
218
|
+
Arrow::StructDataType.new("value" => :string,
|
219
|
+
"weight" => :int32)
|
220
|
+
end
|
221
|
+
|
222
|
+
def detect_arrow_data_type(raw_array)
|
223
|
+
type = nil
|
224
|
+
raw_array.each do |element|
|
225
|
+
case element
|
226
|
+
when nil
|
227
|
+
when true, false
|
228
|
+
type ||= :boolean
|
229
|
+
when Integer
|
230
|
+
if element >= (2 ** 63)
|
231
|
+
type = nil if type == :int64
|
232
|
+
type ||= :uint64
|
233
|
+
else
|
234
|
+
type ||= :int64
|
235
|
+
end
|
236
|
+
when Float
|
237
|
+
type = nil if type == :int64
|
238
|
+
type ||= :double
|
239
|
+
when Hash
|
240
|
+
arrow_list_field =
|
241
|
+
Arrow::Field.new("item", arrow_weight_vector_data_type)
|
242
|
+
arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
|
243
|
+
return arrow_list_data_type
|
244
|
+
else
|
245
|
+
return :string
|
246
|
+
end
|
247
|
+
end
|
248
|
+
type
|
249
|
+
end
|
250
|
+
end
|
88
251
|
end
|
89
252
|
end
|
90
253
|
end
|
data/test/command/test-load.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2012-
|
1
|
+
# Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -116,4 +116,57 @@ class LoadCommandTest < Test::Unit::TestCase
|
|
116
116
|
end
|
117
117
|
end
|
118
118
|
end
|
119
|
+
|
120
|
+
sub_test_case("#build_arrow_table") do
|
121
|
+
def setup
|
122
|
+
omit("red-arrow is needed") unless defined?(Arrow)
|
123
|
+
end
|
124
|
+
|
125
|
+
sub_test_case("Array") do
|
126
|
+
def test_no_columns_argument
|
127
|
+
command = load_command({"values" => [
|
128
|
+
["column1", "column2"],
|
129
|
+
["value1", "value2"],
|
130
|
+
].to_json})
|
131
|
+
assert_equal(Arrow::Table.new({
|
132
|
+
"column1" => ["value1"],
|
133
|
+
"column2" => ["value2"],
|
134
|
+
}),
|
135
|
+
command.build_arrow_table)
|
136
|
+
end
|
137
|
+
|
138
|
+
def test_columns_argument
|
139
|
+
command = load_command({"columns" => ["column1", "column2"].join(", "),
|
140
|
+
"values" => [
|
141
|
+
["value1", "value2"],
|
142
|
+
].to_json})
|
143
|
+
assert_equal(Arrow::Table.new({
|
144
|
+
"column1" => ["value1"],
|
145
|
+
"column2" => ["value2"],
|
146
|
+
}),
|
147
|
+
command.build_arrow_table)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
sub_test_case("Hash") do
|
152
|
+
def test_integer
|
153
|
+
command = load_command({"values" => [
|
154
|
+
{
|
155
|
+
"column1" => 1,
|
156
|
+
"column2" => 2,
|
157
|
+
},
|
158
|
+
{
|
159
|
+
"column1" => 10,
|
160
|
+
"column2" => 20,
|
161
|
+
},
|
162
|
+
].to_json})
|
163
|
+
columns = {
|
164
|
+
"column1" => Arrow::Int64Array.new([1, 10]),
|
165
|
+
"column2" => Arrow::Int64Array.new([2, 20]),
|
166
|
+
}
|
167
|
+
assert_equal(Arrow::Table.new(columns),
|
168
|
+
command.build_arrow_table)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
119
172
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: groonga-command
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: json
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: packnga
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: red-arrow
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: redcarpet
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: test-unit
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,21 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: test-unit-notify
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: yard
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
114
128
|
requirements:
|
115
129
|
- - ">="
|