groonga-command 1.4.5 → 1.4.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1e8e7e4033e77829c575d2ef6832f8ad5c8d8883c0279d5fa56b8f368f66d671
4
- data.tar.gz: ba868ba8b7f5a04b17ace4c18fe438cd7ecd2e3581d072f8cda4431d09f2ca84
3
+ metadata.gz: 04a4658a7058ae319eef6558e4b0890cf63f1374cbc18ca6620f89c1713d1079
4
+ data.tar.gz: 585f68bee46bc0455b5b0be0056636b9b61518bf53354513081191ceb7156996
5
5
  SHA512:
6
- metadata.gz: fcde76968b9b36f013419f3a38ece4caf3e81550b272a3bcf906b4c9e011be6a4d008a91ef79f7578ac0fd9365233994c1cbbf08c0ac0c25c9babfab75887193
7
- data.tar.gz: 6afc4ddf0367b6de2e38de283176f92f82bd95c03d18ed25113983f16b368b207b1728db10d7fb98699766ba1d63c4f8022eb2bbd86f1c95dfa309701cda6f40
6
+ metadata.gz: 6141e8378ce9f64024c481af745a519e598c40b15b313447537d78030f24ce8d7a84ca1f2df52ad06176a1153792f2edbde20cf32d4dce40ff1b043a53780ba3
7
+ data.tar.gz: eee5697534989943e5d0b038b14af59eea0a811c9aab09e2bf25d4251c7783efac899846c6a631e3de291b16d95fc33f9f88fe5e318be87eba61c7331870deba
data/doc/text/news.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.4.6: 2020-01-10
4
+
5
+ ### Improvements
6
+
7
+ * {Groonga::Command::Load#build_arrow_table}: Added.
8
+
3
9
  ## 1.4.5: 2019-09-02
4
10
 
5
11
  ### Improvements
@@ -1,6 +1,6 @@
1
- # -*- mode: ruby; coding: utf-8 -*-
1
+ # -*- ruby -*-
2
2
  #
3
- # Copyright (C) 2012-2013 Kouhei Sutou <kou@clear-code.com>
3
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
4
4
  #
5
5
  # This library is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the GNU Lesser General Public
@@ -54,12 +54,13 @@ Gem::Specification.new do |spec|
54
54
 
55
55
  spec.add_runtime_dependency("json")
56
56
 
57
- spec.add_development_dependency("test-unit")
58
- spec.add_development_dependency("test-unit-notify")
59
- spec.add_development_dependency("rake")
60
57
  spec.add_development_dependency("bundler")
61
58
  spec.add_development_dependency("packnga")
62
- spec.add_development_dependency("yard")
59
+ spec.add_development_dependency("rake")
60
+ spec.add_development_dependency("red-arrow")
63
61
  spec.add_development_dependency("redcarpet")
62
+ spec.add_development_dependency("test-unit")
63
+ spec.add_development_dependency("test-unit-notify")
64
+ spec.add_development_dependency("yard")
64
65
  end
65
66
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -16,6 +16,11 @@
16
16
 
17
17
  require "json"
18
18
 
19
+ begin
20
+ require "arrow"
21
+ rescue LoadError
22
+ end
23
+
19
24
  require "groonga/command/base"
20
25
 
21
26
  module Groonga
@@ -66,6 +71,23 @@ module Groonga
66
71
  @columns ||= parse_columns(self[:columns])
67
72
  end
68
73
 
74
+ # Builds `Arrow::Table` for data of this `load` command.
75
+ #
76
+ # This requires red-arrow gem. If red-arrow gem isn't available,
77
+ # `NotImplementedError` is raised.
78
+ #
79
+ # @return [Arrow::Table, nil] `Arrow::Table` if there is one or more
80
+ # records, `nil` otherwise.
81
+ #
82
+ # @since 1.4.6
83
+ def build_arrow_table
84
+ unless defined?(::Arrow)
85
+ raise NotImplementedError, "red-arrow is required"
86
+ end
87
+ builder = ArrowTableBuilder.new(columns, values)
88
+ builder.build
89
+ end
90
+
69
91
  # @return [Boolean] `true` if `output_ids` value is `"yes"`.
70
92
  #
71
93
  # @since 1.3.0
@@ -85,6 +107,147 @@ module Groonga
85
107
  return columns if columns.nil?
86
108
  columns.split(/\s*,\s*/)
87
109
  end
110
+
111
+ class ArrowTableBuilder
112
+ def initialize(columns, values)
113
+ @columns = columns
114
+ @values = values
115
+ end
116
+
117
+ def build
118
+ raw_table = build_raw_table
119
+ return nil if raw_table.empty?
120
+ build_arrow_table(raw_table)
121
+ end
122
+
123
+ private
124
+ def build_raw_table
125
+ raw_table = {}
126
+ if @values.first.is_a?(Array)
127
+ columns = @columns
128
+ if columns
129
+ records = @values
130
+ else
131
+ columns = @values.first
132
+ records = @values[1..-1]
133
+ end
134
+ records.each_with_index do |record, i|
135
+ columns.zip(record).each do |name, value|
136
+ raw_table[name] ||= []
137
+ raw_table[name][i] = value
138
+ end
139
+ end
140
+ else
141
+ @values.each_with_index do |record, i|
142
+ record.each do |name, value|
143
+ raw_table[name] ||= []
144
+ raw_table[name][i] = value
145
+ end
146
+ end
147
+ raw_table.each_key do |key|
148
+ if @values.size > raw_table[key].size
149
+ raw_table[key][@values.size - 1] = nil
150
+ end
151
+ end
152
+ end
153
+ raw_table
154
+ end
155
+
156
+ def build_arrow_table(raw_table)
157
+ arrow_fields = []
158
+ arrow_arrays = []
159
+ raw_table.each do |name, raw_array|
160
+ sample = raw_array.find {|element| not element.nil?}
161
+ case sample
162
+ when Array
163
+ data_type = nil
164
+ raw_array.each do |sub_raw_array|
165
+ next if sub_raw_array.nil?
166
+ data_type = detect_arrow_data_type(sub_raw_array)
167
+ break if data_type
168
+ end
169
+ data_type ||= :string
170
+ arrow_array = build_arrow_array(data_type, raw_array)
171
+ when Hash
172
+ arrow_array = build_arrow_array(arrow_weight_vector_data_type,
173
+ raw_array)
174
+ else
175
+ data_type = detect_arrow_data_type(raw_array) || :string
176
+ if data_type == :string
177
+ raw_array = raw_array.collect do |element|
178
+ element&.to_s
179
+ end
180
+ end
181
+ data_type = Arrow::DataType.resolve(data_type)
182
+ arrow_array = data_type.build_array(raw_array)
183
+ end
184
+ arrow_fields << Arrow::Field.new(name,
185
+ arrow_array.value_data_type)
186
+ arrow_arrays << arrow_array
187
+ end
188
+ arrow_schema = Arrow::Schema.new(arrow_fields)
189
+ Arrow::Table.new(arrow_schema, arrow_arrays)
190
+ end
191
+
192
+ def prepare_raw_array(raw_array)
193
+ raw_array.collect do |element|
194
+ case element
195
+ when Array
196
+ prepare_raw_array(element)
197
+ when Hash
198
+ element.collect do |value, weight|
199
+ {
200
+ "value" => value,
201
+ "weight" => weight,
202
+ }
203
+ end
204
+ else
205
+ element
206
+ end
207
+ end
208
+ end
209
+
210
+ def build_arrow_array(data_type, raw_array)
211
+ arrow_list_field = Arrow::Field.new("item", data_type)
212
+ arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
213
+ raw_array = prepare_raw_array(raw_array)
214
+ Arrow::ListArrayBuilder.build(arrow_list_data_type, raw_array)
215
+ end
216
+
217
+ def arrow_weight_vector_data_type
218
+ Arrow::StructDataType.new("value" => :string,
219
+ "weight" => :int32)
220
+ end
221
+
222
+ def detect_arrow_data_type(raw_array)
223
+ type = nil
224
+ raw_array.each do |element|
225
+ case element
226
+ when nil
227
+ when true, false
228
+ type ||= :boolean
229
+ when Integer
230
+ if element >= (2 ** 63)
231
+ type = nil if type == :int64
232
+ type ||= :uint64
233
+ else
234
+ type ||= :int64
235
+ end
236
+ when Float
237
+ type = nil if type == :int64
238
+ type ||= :double
239
+ when Hash
240
+ arrow_list_field =
241
+ Arrow::Field.new("item", arrow_weight_vector_data_type)
242
+ arrow_list_data_type = Arrow::ListDataType.new(arrow_list_field)
243
+ return arrow_list_data_type
244
+ else
245
+ return :string
246
+ end
247
+ end
248
+ type
249
+ end
250
+ end
88
251
  end
89
252
  end
90
253
  end
@@ -16,6 +16,6 @@
16
16
 
17
17
  module Groonga
18
18
  module Command
19
- VERSION = "1.4.5"
19
+ VERSION = "1.4.6"
20
20
  end
21
21
  end
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2012-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2012-2020 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -116,4 +116,57 @@ class LoadCommandTest < Test::Unit::TestCase
116
116
  end
117
117
  end
118
118
  end
119
+
120
+ sub_test_case("#build_arrow_table") do
121
+ def setup
122
+ omit("red-arrow is needed") unless defined?(Arrow)
123
+ end
124
+
125
+ sub_test_case("Array") do
126
+ def test_no_columns_argument
127
+ command = load_command({"values" => [
128
+ ["column1", "column2"],
129
+ ["value1", "value2"],
130
+ ].to_json})
131
+ assert_equal(Arrow::Table.new({
132
+ "column1" => ["value1"],
133
+ "column2" => ["value2"],
134
+ }),
135
+ command.build_arrow_table)
136
+ end
137
+
138
+ def test_columns_argument
139
+ command = load_command({"columns" => ["column1", "column2"].join(", "),
140
+ "values" => [
141
+ ["value1", "value2"],
142
+ ].to_json})
143
+ assert_equal(Arrow::Table.new({
144
+ "column1" => ["value1"],
145
+ "column2" => ["value2"],
146
+ }),
147
+ command.build_arrow_table)
148
+ end
149
+ end
150
+
151
+ sub_test_case("Hash") do
152
+ def test_integer
153
+ command = load_command({"values" => [
154
+ {
155
+ "column1" => 1,
156
+ "column2" => 2,
157
+ },
158
+ {
159
+ "column1" => 10,
160
+ "column2" => 20,
161
+ },
162
+ ].to_json})
163
+ columns = {
164
+ "column1" => Arrow::Int64Array.new([1, 10]),
165
+ "column2" => Arrow::Int64Array.new([2, 20]),
166
+ }
167
+ assert_equal(Arrow::Table.new(columns),
168
+ command.build_arrow_table)
169
+ end
170
+ end
171
+ end
119
172
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: groonga-command
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.5
4
+ version: 1.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-02 00:00:00.000000000 Z
11
+ date: 2020-01-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: test-unit
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: test-unit-notify
42
+ name: packnga
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: bundler
70
+ name: red-arrow
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -81,7 +81,7 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: packnga
84
+ name: redcarpet
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -95,7 +95,7 @@ dependencies:
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
- name: yard
98
+ name: test-unit
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -109,7 +109,21 @@ dependencies:
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
- name: redcarpet
112
+ name: test-unit-notify
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: yard
113
127
  requirement: !ruby/object:Gem::Requirement
114
128
  requirements:
115
129
  - - ">="