groonga-command 1.4.5 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e8e7e4033e77829c575d2ef6832f8ad5c8d8883c0279d5fa56b8f368f66d671
4
- data.tar.gz: ba868ba8b7f5a04b17ace4c18fe438cd7ecd2e3581d072f8cda4431d09f2ca84
3
+ metadata.gz: 04a4658a7058ae319eef6558e4b0890cf63f1374cbc18ca6620f89c1713d1079
4
+ data.tar.gz: 585f68bee46bc0455b5b0be0056636b9b61518bf53354513081191ceb7156996
5
5
  SHA512:
6
- metadata.gz: fcde76968b9b36f013419f3a38ece4caf3e81550b272a3bcf906b4c9e011be6a4d008a91ef79f7578ac0fd9365233994c1cbbf08c0ac0c25c9babfab75887193
7
- data.tar.gz: 6afc4ddf0367b6de2e38de283176f92f82bd95c03d18ed25113983f16b368b207b1728db10d7fb98699766ba1d63c4f8022eb2bbd86f1c95dfa309701cda6f40
6
+ metadata.gz: 6141e8378ce9f64024c481af745a519e598c40b15b313447537d78030f24ce8d7a84ca1f2df52ad06176a1153792f2edbde20cf32d4dce40ff1b043a53780ba3
7
+ data.tar.gz: eee5697534989943e5d0b038b14af59eea0a811c9aab09e2bf25d4251c7783efac899846c6a631e3de291b16d95fc33f9f88fe5e318be87eba61c7331870deba
data/doc/text/news.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.4.6: 2020-01-10
4
+
5
+ ### Improvements
6
+
7
+ * {Groonga::Command::Load#build_arrow_table}: Added.
8
+
3
9
  ## 1.4.5: 2019-09-02
4
10
 
5
11
  ### Improvements
@@ -1,6 +1,6 @@
1
- # -*- mode: ruby; coding: utf-8 -*-
1
+ # -*- ruby -*-
2
2
  #
3
- # Copyright (C) 2012-2013 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
4
4
  #
5
5
  # This library is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the GNU Lesser General Public
@@ -54,12 +54,13 @@ Gem::Specification.new do |spec|
54
54
 
55
55
  spec.add_runtime_dependency("json")
56
56
 
57
- spec.add_development_dependency("test-unit")
58
- spec.add_development_dependency("test-unit-notify")
59
- spec.add_development_dependency("rake")
60
57
  spec.add_development_dependency("bundler")
61
58
  spec.add_development_dependency("packnga")
62
- spec.add_development_dependency("yard")
59
+ spec.add_development_dependency("rake")
60
+ spec.add_development_dependency("red-arrow")
63
61
  spec.add_development_dependency("redcarpet")
62
+ spec.add_development_dependency("test-unit")
63
+ spec.add_development_dependency("test-unit-notify")
64
+ spec.add_development_dependency("yard")
64
65
  end
65
66
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -16,6 +16,11 @@
16
16
 
17
17
  require "json"
18
18
 
19
+ begin
20
+ require "arrow"
21
+ rescue LoadError
22
+ end
23
+
19
24
  require "groonga/command/base"
20
25
 
21
26
  module Groonga
@@ -66,6 +71,23 @@ module Groonga
66
71
  @columns ||= parse_columns(self[:columns])
67
72
  end
68
73
 
74
+ # Builds `Arrow::Table` for data of this `load` command.
75
+ #
76
+ # This requires red-arrow gem. If red-arrow gem isn't available,
77
+ # `NotImplementedError` is raised.
78
+ #
79
+ # @return [Arrow::Table, nil] `Arrow::Table` if there is one or more
80
+ # records, `nil` otherwise.
81
+ #
82
+ # @since 1.4.6
83
+ def build_arrow_table
84
+ unless defined?(::Arrow)
85
+ raise NotImplementedError, "red-arrow is required"
86
+ end
87
+ builder = ArrowTableBuilder.new(columns, values)
88
+ builder.build
89
+ end
90
+
69
91
  # @return [Boolean] `true` if `output_ids` value is `"yes"`.
70
92
  #
71
93
  # @since 1.3.0
@@ -85,6 +107,147 @@ module Groonga
85
107
  return columns if columns.nil?
86
108
  columns.split(/\s*,\s*/)
87
109
  end
110
+
111
+ class ArrowTableBuilder
112
+ def initialize(columns, values)
113
+ @columns = columns
114
+ @values = values
115
+ end
116
+
117
+ def build
118
+ raw_table = build_raw_table
119
+ return nil if raw_table.empty?
120
+ build_arrow_table(raw_table)
121
+ end
122
+
123
+ private
124
+ def build_raw_table
125
+ raw_table = {}
126
+ if @values.first.is_a?(Array)
127
+ columns = @columns
128
+ if columns
129
+ records = @values
130
+ else
131
+ columns = @values.first
132
+ records = @values[1..-1]
133
+ end
134
+ records.each_with_index do |record, i|
135
+ columns.zip(record).each do |name, value|
136
+ raw_table[name] ||= []
137
+ raw_table[name][i] = value
138
+ end
139
+ end
140
+ else
141
+ @values.each_with_index do |record, i|
142
+ record.each do |name, value|
143
+ raw_table[name] ||= []
144
+ raw_table[name][i] = value
145
+ end
146
+ end
147
+ raw_table.each_key do |key|
148
+ if @values.size > raw_table[key].size
149
+ raw_table[key][@values.size - 1] = nil
150
+ end
151
+ end
152
+ end
153
+ raw_table
154
+ end
155
+
156
+ def build_arrow_table(raw_table)
157
+ arrow_fields = []
158
+ arrow_arrays = []
159
+ raw_table.each do |name, raw_array|
160
+ sample = raw_array.find {|element| not element.nil?}
161
+ case sample
162
+ when Array
163
+ data_type = nil
164
+ raw_array.each do |sub_raw_array|
165
+ next if sub_raw_array.nil?
166
+ data_type = detect_arrow_data_type(sub_raw_array)
167
+ break if data_type
168
+ end
169
+ data_type ||= :string
170
+ arrow_array = build_arrow_array(data_type, raw_array)
171
+ when Hash
172
+ arrow_array = build_arrow_array(arrow_weight_vector_data_type,
173
+ raw_array)
174
+ else
175
+ data_type = detect_arrow_data_type(raw_array) || :string
176
+ if data_type == :string
177
+ raw_array = raw_array.collect do |element|
178
+ element&.to_s
179
+ end
180
+ end
181
+ data_type = Arrow::DataType.resolve(data_type)
182
+ arrow_array = data_type.build_array(raw_array)
183
+ end
184
+ arrow_fields << Arrow::Field.new(name,
185
+ arrow_array.value_data_type)
186
+ arrow_arrays << arrow_array
187
+ end
188
+ arrow_schema = Arrow::Schema.new(arrow_fields)
189
+ Arrow::Table.new(arrow_schema, arrow_arrays)
190
+ end
191
+
192
+ def prepare_raw_array(raw_array)
193
+ raw_array.collect do |element|
194
+ case element
195
+ when Array
196
+ prepare_raw_array(element)
197
+ when Hash
198
+ element.collect do |value, weight|
199
+ {
200
+ "value" => value,
201
+ "weight" => weight,
202
+ }
203
+ end
204
+ else
205
+ element
206
+ end
207
+ end
208
+ end
209
+
210
+ def build_arrow_array(data_type, raw_array)
211
+ arrow_list_field = Arrow::Field.new("item", data_type)
212
+ arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
213
+ raw_array = prepare_raw_array(raw_array)
214
+ Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array)
215
+ end
216
+
217
+ def arrow_weight_vector_data_type
218
+ Arrow::StructDataType.new("value" => :string,
219
+ "weight" => :int32)
220
+ end
221
+
222
+ def detect_arrow_data_type(raw_array)
223
+ type = nil
224
+ raw_array.each do |element|
225
+ case element
226
+ when nil
227
+ when true, false
228
+ type ||= :boolean
229
+ when Integer
230
+ if element >= (2 ** 63)
231
+ type = nil if type == :int64
232
+ type ||= :uint64
233
+ else
234
+ type ||= :int64
235
+ end
236
+ when Float
237
+ type = nil if type == :int64
238
+ type ||= :double
239
+ when Hash
240
+ arrow_list_field =
241
+ Arrow::Field.new("item", arrow_weight_vector_data_type)
242
+ arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
243
+ return arrow_list_data_type
244
+ else
245
+ return :string
246
+ end
247
+ end
248
+ type
249
+ end
250
+ end
88
251
  end
89
252
  end
90
253
  end
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Groonga
18
18
  module Command
19
- VERSION = "1.4.5"
19
+ VERSION = "1.4.6"
20
20
  end
21
21
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -116,4 +116,57 @@ class LoadCommandTest < Test::Unit::TestCase
116
116
  end
117
117
  end
118
118
  end
119
+
120
+ sub_test_case("#build_arrow_table") do
121
+ def setup
122
+ omit("red-arrow is needed") unless defined?(Arrow)
123
+ end
124
+
125
+ sub_test_case("Array") do
126
+ def test_no_columns_argument
127
+ command = load_command({"values" => [
128
+ ["column1", "column2"],
129
+ ["value1", "value2"],
130
+ ].to_json})
131
+ assert_equal(Arrow::Table.new({
132
+ "column1" => ["value1"],
133
+ "column2" => ["value2"],
134
+ }),
135
+ command.build_arrow_table)
136
+ end
137
+
138
+ def test_columns_argument
139
+ command = load_command({"columns" => ["column1", "column2"].join(", "),
140
+ "values" => [
141
+ ["value1", "value2"],
142
+ ].to_json})
143
+ assert_equal(Arrow::Table.new({
144
+ "column1" => ["value1"],
145
+ "column2" => ["value2"],
146
+ }),
147
+ command.build_arrow_table)
148
+ end
149
+ end
150
+
151
+ sub_test_case("Hash") do
152
+ def test_integer
153
+ command = load_command({"values" => [
154
+ {
155
+ "column1" => 1,
156
+ "column2" => 2,
157
+ },
158
+ {
159
+ "column1" => 10,
160
+ "column2" => 20,
161
+ },
162
+ ].to_json})
163
+ columns = {
164
+ "column1" => Arrow::Int64Array.new([1, 10]),
165
+ "column2" => Arrow::Int64Array.new([2, 20]),
166
+ }
167
+ assert_equal(Arrow::Table.new(columns),
168
+ command.build_arrow_table)
169
+ end
170
+ end
171
+ end
119
172
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga-command
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.5
4
+ version: 1.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-02 00:00:00.000000000 Z
11
+ date: 2020-01-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: test-unit
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: test-unit-notify
42
+ name: packnga
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: bundler
70
+ name: red-arrow
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -81,7 +81,7 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: packnga
84
+ name: redcarpet
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -95,7 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: yard
98
+ name: test-unit
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -109,7 +109,21 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: redcarpet
112
+ name: test-unit-notify
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: yard
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - ">="