red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -84,12 +84,13 @@ namespace red_arrow {
84
84
  VISIT(UInt16)
85
85
  VISIT(UInt32)
86
86
  VISIT(UInt64)
87
- // TODO
88
- // VISIT(HalfFloat)
87
+ VISIT(HalfFloat)
89
88
  VISIT(Float)
90
89
  VISIT(Double)
91
90
  VISIT(Binary)
91
+ VISIT(LargeBinary)
92
92
  VISIT(String)
93
+ VISIT(LargeString)
93
94
  VISIT(FixedSizeBinary)
94
95
  VISIT(Date32)
95
96
  VISIT(Date64)
@@ -99,7 +100,10 @@ namespace red_arrow {
99
100
  VISIT(MonthInterval)
100
101
  VISIT(DayTimeInterval)
101
102
  VISIT(MonthDayNanoInterval)
103
+ VISIT(Duration)
102
104
  VISIT(List)
105
+ VISIT(LargeList)
106
+ VISIT(FixedSizeList)
103
107
  VISIT(Struct)
104
108
  VISIT(Map)
105
109
  VISIT(SparseUnion)
@@ -145,6 +149,133 @@ namespace red_arrow {
145
149
  // The number of columns.
146
150
  const int n_columns_;
147
151
  };
152
+
153
+ class RawRecordsProducer : private Converter, public arrow::ArrayVisitor {
154
+ public:
155
+ explicit RawRecordsProducer()
156
+ : Converter(),
157
+ record_(Qnil),
158
+ column_index_(0),
159
+ row_offset_(0) {
160
+ }
161
+
162
+ void produce(const arrow::RecordBatch& record_batch) {
163
+ rb::protect([&] {
164
+ const auto n_columns = record_batch.num_columns();
165
+ const auto n_rows = record_batch.num_rows();
166
+ for (int64_t i = 0; i < n_rows; ++i) {
167
+ record_ = rb_ary_new_capa(n_columns);
168
+ row_offset_ = i;
169
+ for (int i = 0; i < n_columns; ++i) {
170
+ const auto array = record_batch.column(i).get();
171
+ column_index_ = i;
172
+ check_status(array->Accept(this),
173
+ "[record-batch][each-raw-record]");
174
+ }
175
+ rb_yield(record_);
176
+ }
177
+ return Qnil;
178
+ });
179
+ }
180
+
181
+ void produce(const arrow::Table& table) {
182
+ rb::protect([&] {
183
+ const auto n_columns = table.num_columns();
184
+ const auto n_rows = table.num_rows();
185
+ std::vector<int> chunk_indexes(n_columns);
186
+ std::vector<int64_t> row_offsets(n_columns);
187
+ for (int64_t i_row = 0; i_row < n_rows; ++i_row) {
188
+ record_ = rb_ary_new_capa(n_columns);
189
+ for (int i_column = 0; i_column < n_columns; ++i_column) {
190
+ column_index_ = i_column;
191
+ const auto chunked_array = table.column(i_column).get();
192
+ auto& chunk_index = chunk_indexes[i_column];
193
+ auto& row_offset = row_offsets[i_column];
194
+ auto array = chunked_array->chunk(chunk_index).get();
195
+ while (array->length() == row_offset) {
196
+ ++chunk_index;
197
+ row_offset = 0;
198
+ array = chunked_array->chunk(chunk_index).get();
199
+ }
200
+ row_offset_ = row_offset;
201
+ check_status(array->Accept(this),
202
+ "[table][each-raw-record]");
203
+ ++row_offset;
204
+ }
205
+ rb_yield(record_);
206
+ }
207
+
208
+ return Qnil;
209
+ });
210
+ }
211
+
212
+ #define VISIT(TYPE) \
213
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
214
+ convert(array); \
215
+ return arrow::Status::OK(); \
216
+ }
217
+
218
+ VISIT(Null)
219
+ VISIT(Boolean)
220
+ VISIT(Int8)
221
+ VISIT(Int16)
222
+ VISIT(Int32)
223
+ VISIT(Int64)
224
+ VISIT(UInt8)
225
+ VISIT(UInt16)
226
+ VISIT(UInt32)
227
+ VISIT(UInt64)
228
+ VISIT(HalfFloat)
229
+ VISIT(Float)
230
+ VISIT(Double)
231
+ VISIT(Binary)
232
+ VISIT(LargeBinary)
233
+ VISIT(String)
234
+ VISIT(LargeString)
235
+ VISIT(FixedSizeBinary)
236
+ VISIT(Date32)
237
+ VISIT(Date64)
238
+ VISIT(Time32)
239
+ VISIT(Time64)
240
+ VISIT(Timestamp)
241
+ VISIT(MonthInterval)
242
+ VISIT(DayTimeInterval)
243
+ VISIT(MonthDayNanoInterval)
244
+ VISIT(Duration)
245
+ VISIT(List)
246
+ VISIT(LargeList)
247
+ VISIT(FixedSizeList)
248
+ VISIT(Struct)
249
+ VISIT(Map)
250
+ VISIT(SparseUnion)
251
+ VISIT(DenseUnion)
252
+ VISIT(Dictionary)
253
+ VISIT(Decimal128)
254
+ VISIT(Decimal256)
255
+ // TODO
256
+ // VISIT(Extension)
257
+
258
+ #undef VISIT
259
+
260
+ private:
261
+ template <typename ArrayType>
262
+ void convert(const ArrayType& array) {
263
+ auto value = Qnil;
264
+ if (!array.IsNull(row_offset_)) {
265
+ value = convert_value(array, row_offset_);
266
+ }
267
+ rb_ary_store(record_, column_index_, value);
268
+ }
269
+
270
+ // Destination for converted record.
271
+ VALUE record_;
272
+
273
+ // The current column index.
274
+ int column_index_;
275
+
276
+ // The current row offset.
277
+ int64_t row_offset_;
278
+ };
148
279
  }
149
280
 
150
281
  VALUE
@@ -182,4 +313,36 @@ namespace red_arrow {
182
313
 
183
314
  return records;
184
315
  }
316
+
317
+ VALUE
318
+ record_batch_each_raw_record(VALUE rb_record_batch) {
319
+ auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch));
320
+ auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get();
321
+ RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows());
322
+
323
+ try {
324
+ RawRecordsProducer producer;
325
+ producer.produce(*record_batch);
326
+ } catch (rb::State& state) {
327
+ state.jump();
328
+ }
329
+
330
+ return Qnil;
331
+ }
332
+
333
+ VALUE
334
+ table_each_raw_record(VALUE rb_table) {
335
+ auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
336
+ auto table = garrow_table_get_raw(garrow_table).get();
337
+ RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows());
338
+
339
+ try {
340
+ RawRecordsProducer producer;
341
+ producer.produce(*table);
342
+ } catch (rb::State& state) {
343
+ state.jump();
344
+ }
345
+
346
+ return Qnil;
347
+ }
185
348
  }
@@ -59,6 +59,8 @@ namespace red_arrow {
59
59
 
60
60
  VALUE record_batch_raw_records(VALUE obj);
61
61
  VALUE table_raw_records(VALUE obj);
62
+ VALUE record_batch_each_raw_record(VALUE obj);
63
+ VALUE table_each_raw_record(VALUE obj);
62
64
 
63
65
  inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) {
64
66
  switch (unit) {
data/ext/arrow/values.cpp CHANGED
@@ -65,12 +65,13 @@ namespace red_arrow {
65
65
  VISIT(UInt16)
66
66
  VISIT(UInt32)
67
67
  VISIT(UInt64)
68
- // TODO
69
- // VISIT(HalfFloat)
68
+ VISIT(HalfFloat)
70
69
  VISIT(Float)
71
70
  VISIT(Double)
72
71
  VISIT(Binary)
72
+ VISIT(LargeBinary)
73
73
  VISIT(String)
74
+ VISIT(LargeString)
74
75
  VISIT(FixedSizeBinary)
75
76
  VISIT(Date32)
76
77
  VISIT(Date64)
@@ -80,7 +81,10 @@ namespace red_arrow {
80
81
  VISIT(MonthInterval)
81
82
  VISIT(DayTimeInterval)
82
83
  VISIT(MonthDayNanoInterval)
84
+ VISIT(Duration)
83
85
  VISIT(List)
86
+ VISIT(LargeList)
87
+ VISIT(FixedSizeList)
84
88
  VISIT(Struct)
85
89
  VISIT(Map)
86
90
  VISIT(SparseUnion)
@@ -33,6 +33,11 @@ module Arrow
33
33
  end
34
34
  if builder_info
35
35
  builder = builder_info[:builder]
36
+ if builder.nil? and builder_info[:builder_type]
37
+ builder = create_builder(builder_info)
38
+ end
39
+ end
40
+ if builder
36
41
  builder.build(values)
37
42
  else
38
43
  Arrow::StringArray.new(values)
@@ -69,14 +74,23 @@ module Arrow
69
74
  detected: true,
70
75
  }
71
76
  when Integer
72
- if value < 0
77
+ builder_info ||= {}
78
+ min = builder_info[:min] || value
79
+ max = builder_info[:max] || value
80
+ min = value if value < min
81
+ max = value if value > max
82
+
83
+ if builder_info[:builder_type] == :int || value < 0
73
84
  {
74
- builder: IntArrayBuilder.new,
75
- detected: true,
85
+ builder_type: :int,
86
+ min: min,
87
+ max: max,
76
88
  }
77
89
  else
78
90
  {
79
- builder: UIntArrayBuilder.new,
91
+ builder_type: :uint,
92
+ min: min,
93
+ max: max,
80
94
  }
81
95
  end
82
96
  when Time
@@ -121,28 +135,44 @@ module Arrow
121
135
  detected: true,
122
136
  }
123
137
  when BigDecimal
124
- if value.to_arrow.is_a?(Decimal128)
125
- {
126
- builder: Decimal128ArrayBuilder.new,
127
- }
128
- else
138
+ builder_info ||= {}
139
+ if builder_info[:builder] or value.nan? or value.infinite?
129
140
  {
130
- builder: Decimal256ArrayBuilder.new,
141
+ builder: StringArrayBuilder.new,
131
142
  detected: true,
132
143
  }
144
+ else
145
+ precision = [builder_info[:precision] || 0, value.precision].max
146
+ scale = [builder_info[:scale] || 0, value.scale].max
147
+ if precision <= Decimal128DataType::MAX_PRECISION
148
+ {
149
+ builder_type: :decimal128,
150
+ precision: precision,
151
+ scale: scale,
152
+ }
153
+ else
154
+ {
155
+ builder_type: :decimal256,
156
+ precision: precision,
157
+ scale: scale,
158
+ }
159
+ end
133
160
  end
134
161
  when ::Array
135
- sub_builder_info = nil
162
+ sub_builder_info = builder_info && builder_info[:value_builder_info]
136
163
  value.each do |sub_value|
137
164
  sub_builder_info = detect_builder_info(sub_value, sub_builder_info)
138
165
  break if sub_builder_info and sub_builder_info[:detected]
139
166
  end
140
- if sub_builder_info and sub_builder_info[:detected]
141
- sub_value_data_type = sub_builder_info[:builder].value_data_type
167
+ if sub_builder_info
168
+ sub_builder = sub_builder_info[:builder] || create_builder(sub_builder_info)
169
+ return sub_builder_info unless sub_builder
170
+ sub_value_data_type = sub_builder.value_data_type
142
171
  field = Field.new("item", sub_value_data_type)
143
172
  {
144
173
  builder: ListArrayBuilder.new(ListDataType.new(field)),
145
- detected: true,
174
+ value_builder_info: sub_builder_info,
175
+ detected: sub_builder_info[:detected],
146
176
  }
147
177
  else
148
178
  builder_info
@@ -154,6 +184,51 @@ module Arrow
154
184
  }
155
185
  end
156
186
  end
187
+
188
+ def create_builder(builder_info)
189
+ builder_type = builder_info[:builder_type]
190
+ case builder_type
191
+ when :decimal128
192
+ data_type = Decimal128DataType.new(builder_info[:precision],
193
+ builder_info[:scale])
194
+ Decimal128ArrayBuilder.new(data_type)
195
+ when :decimal256
196
+ data_type = Decimal256DataType.new(builder_info[:precision],
197
+ builder_info[:scale])
198
+ Decimal256ArrayBuilder.new(data_type)
199
+ when :int
200
+ min = builder_info[:min]
201
+ max = builder_info[:max]
202
+
203
+ if GLib::MININT8 <= min && max <= GLib::MAXINT8
204
+ Int8ArrayBuilder.new
205
+ elsif GLib::MININT16 <= min && max <= GLib::MAXINT16
206
+ Int16ArrayBuilder.new
207
+ elsif GLib::MININT32 <= min && max <= GLib::MAXINT32
208
+ Int32ArrayBuilder.new
209
+ elsif GLib::MININT64 <= min && max <= GLib::MAXINT64
210
+ Int64ArrayBuilder.new
211
+ else
212
+ StringArrayBuilder.new
213
+ end
214
+ when :uint
215
+ max = builder_info[:max]
216
+
217
+ if max <= GLib::MAXUINT8
218
+ UInt8ArrayBuilder.new
219
+ elsif max <= GLib::MAXUINT16
220
+ UInt16ArrayBuilder.new
221
+ elsif max <= GLib::MAXUINT32
222
+ UInt32ArrayBuilder.new
223
+ elsif max <= GLib::MAXUINT64
224
+ UInt64ArrayBuilder.new
225
+ else
226
+ StringArrayBuilder.new
227
+ end
228
+ else
229
+ nil
230
+ end
231
+ end
157
232
  end
158
233
 
159
234
  def build(values)
@@ -15,28 +15,36 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class Time32DataTypeTest < Test::Unit::TestCase
19
- sub_test_case(".new") do
20
- test("Arrow::TimeUnit") do
21
- assert_equal("time32[ms]",
22
- Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI).to_s)
18
+ module Arrow
19
+ module ArrayComputable
20
+ def min(options: nil)
21
+ compute("min", options: options).value
23
22
  end
24
23
 
25
- test("Symbol") do
26
- assert_equal("time32[ms]",
27
- Arrow::Time32DataType.new(:milli).to_s)
24
+ def max(options: nil)
25
+ compute("max", options: options).value
28
26
  end
29
27
 
30
- test("unit: Arrow::TimeUnit") do
31
- data_type = Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
32
- assert_equal("time32[ms]",
33
- data_type.to_s)
28
+ def uniq
29
+ unique.values
34
30
  end
35
31
 
36
- test("unit: Symbol") do
37
- data_type = Arrow::Time32DataType.new(unit: :milli)
38
- assert_equal("time32[ms]",
39
- data_type.to_s)
32
+ # Finds the index of the first occurrence of a given value.
33
+ #
34
+ # @param value [Object] The value to be compared.
35
+ #
36
+ # @return [Integer] The index of the first occurrence of a given
37
+ # value on found, -1 on not found.
38
+ #
39
+ # @since 12.0.0
40
+ def index(value)
41
+ value = Scalar.resolve(value, value_data_type)
42
+ compute("index", options: {value: value}).value
43
+ end
44
+
45
+ private
46
+ def compute(name, options: nil)
47
+ Function.find(name).execute([self], options).value
40
48
  end
41
49
  end
42
50
  end
@@ -15,33 +15,28 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class BufferTest < Test::Unit::TestCase
19
- sub_test_case(".new") do
20
- test("GC") do
21
- data = "Hello"
22
- data_id = data.object_id
23
- _buffer = Arrow::Buffer.new(data)
24
- data = nil
25
- GC.start
26
- assert_equal("Hello", ObjectSpace._id2ref(data_id))
27
- end
28
- end
29
-
30
- sub_test_case("instance methods") do
31
- def setup
32
- @buffer = Arrow::Buffer.new("Hello")
33
- end
34
-
35
- sub_test_case("#==") do
36
- test("Arrow::Buffer") do
37
- assert do
38
- @buffer == @buffer
18
+ module Arrow
19
+ class ArrayStatistics
20
+ if method_defined?(:null_count_exact)
21
+ alias_method :null_count_raw, :null_count
22
+ def null_count
23
+ return nil unless has_null_count?
24
+ if null_count_exact?
25
+ null_count_exact
26
+ else
27
+ null_count_approximate
39
28
  end
40
29
  end
30
+ end
41
31
 
42
- test("not Arrow::Buffer") do
43
- assert do
44
- not (@buffer == 29)
32
+ if method_defined?(:distinct_count_exact)
33
+ alias_method :distinct_count_raw, :distinct_count
34
+ def distinct_count
35
+ return nil unless has_distinct_count?
36
+ if distinct_count_exact?
37
+ distinct_count_exact
38
+ else
39
+ distinct_count_approximate
45
40
  end
46
41
  end
47
42
  end
data/lib/arrow/array.rb CHANGED
@@ -18,8 +18,11 @@
18
18
  module Arrow
19
19
  class Array
20
20
  include Enumerable
21
+
22
+ include ArrayComputable
21
23
  include GenericFilterable
22
24
  include GenericTakeable
25
+ include InputReferable
23
26
 
24
27
  class << self
25
28
  def new(*args)
@@ -30,9 +33,32 @@ module Arrow
30
33
  end
31
34
 
32
35
  def builder_class
33
- builder_class_name = "#{name}Builder"
34
- return nil unless const_defined?(builder_class_name)
35
- const_get(builder_class_name)
36
+ local_name = name.split("::").last
37
+ builder_class_name = "#{local_name}Builder"
38
+ return nil unless Arrow.const_defined?(builder_class_name)
39
+ Arrow.const_get(builder_class_name)
40
+ end
41
+
42
+ # @api private
43
+ def try_convert(value)
44
+ case value
45
+ when ::Array
46
+ begin
47
+ new(value)
48
+ rescue ArgumentError
49
+ nil
50
+ end
51
+ else
52
+ if value.respond_to?(:to_arrow_array)
53
+ begin
54
+ value.to_arrow_array
55
+ rescue RangeError
56
+ nil
57
+ end
58
+ else
59
+ nil
60
+ end
61
+ end
36
62
  end
37
63
  end
38
64
 
@@ -67,6 +93,8 @@ module Arrow
67
93
  equal_options(other, options)
68
94
  end
69
95
 
96
+ alias_method :size, :length
97
+
70
98
  def each
71
99
  return to_enum(__method__) unless block_given?
72
100
 
@@ -87,6 +115,14 @@ module Arrow
87
115
  self
88
116
  end
89
117
 
118
+ def to_arrow_array
119
+ self
120
+ end
121
+
122
+ def to_arrow_chunked_array
123
+ ChunkedArray.new([self])
124
+ end
125
+
90
126
  alias_method :value_data_type_raw, :value_data_type
91
127
  def value_data_type
92
128
  @value_data_type ||= value_data_type_raw
@@ -217,7 +253,7 @@ module Arrow
217
253
  "[array][resolve] need to implement " +
218
254
  "a feature that building #{value_data_type} array " +
219
255
  "from raw Ruby Array"
220
- raise NotImpelemented, message
256
+ raise NotImplementedError, message
221
257
  end
222
258
  other_array
223
259
  elsif other_array.respond_to?(:value_data_type)
@@ -18,8 +18,35 @@
18
18
  module Arrow
19
19
  class ChunkedArray
20
20
  include Enumerable
21
+
22
+ include ArrayComputable
21
23
  include GenericFilterable
22
24
  include GenericTakeable
25
+ include InputReferable
26
+
27
+ def freeze
28
+ unless frozen?
29
+ # Ensure caching
30
+ chunks
31
+ end
32
+ super
33
+ end
34
+
35
+ def to_arrow
36
+ self
37
+ end
38
+
39
+ def to_arrow_array
40
+ if n_chunks.zero?
41
+ value_data_type.build_array([])
42
+ else
43
+ combine
44
+ end
45
+ end
46
+
47
+ def to_arrow_chunked_array
48
+ self
49
+ end
23
50
 
24
51
  alias_method :size, :n_rows
25
52
  unless method_defined?(:length)
@@ -28,7 +55,16 @@ module Arrow
28
55
 
29
56
  alias_method :chunks_raw, :chunks
30
57
  def chunks
31
- @chunks ||= chunks_raw
58
+ @chunks ||= chunks_raw.tap do |_chunks|
59
+ _chunks.each do |chunk|
60
+ share_input(chunk)
61
+ end
62
+ end
63
+ end
64
+
65
+ alias_method :get_chunk_raw, :get_chunk
66
+ def get_chunk(i)
67
+ chunks[i]
32
68
  end
33
69
 
34
70
  def null?(i)
@@ -87,5 +123,24 @@ module Arrow
87
123
  first_chunk.class.new(to_a)
88
124
  end
89
125
  end
126
+
127
+ def count(options: nil)
128
+ compute("count", options: options).value
129
+ end
130
+
131
+ def sum(options: nil)
132
+ compute("sum", options: options).value
133
+ end
134
+
135
+ def unique
136
+ compute("unique")
137
+ end
138
+
139
+ def cast(target_data_type, options: nil)
140
+ casted_chunks = chunks.collect do |chunk|
141
+ chunk.cast(target_data_type, options)
142
+ end
143
+ self.class.new(casted_chunks)
144
+ end
90
145
  end
91
146
  end
@@ -143,5 +143,14 @@ module Arrow
143
143
  find_column(selector)
144
144
  end
145
145
  end
146
+
147
+ # Return column names in this object.
148
+ #
149
+ # @return [::Array<String>] column names.
150
+ #
151
+ # @since 11.0.0
152
+ def column_names
153
+ @column_names ||= columns.collect(&:name)
154
+ end
146
155
  end
147
156
  end