red-arrow 0.15.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,16 +53,66 @@ module Arrow
53
53
  #
54
54
  # @example Create a list data type with field description
55
55
  # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
- def initialize(field)
57
- if field.is_a?(Hash) and field.key?(:field)
58
- description = field
59
- field = description[:field]
60
- end
61
- if field.is_a?(Hash)
62
- field_description = field
63
- field = Field.new(field_description)
56
+ #
57
+ # @overload initialize(data_type)
58
+ #
59
+ # @param data_type [Arrow::DataType, String, Symbol,
60
+ # ::Array<String>, ::Array<Symbol>, Hash] The element data
61
+ # type of the list data type. A field is created with the
62
+ # default name `"item"` from the data type automatically.
63
+ #
64
+ # See {Arrow::DataType.resolve} how to specify data type.
65
+ #
66
+ # @example Create a list data type with {Arrow::DataType}
67
+ # Arrow::ListDataType.new(Arrow::BooleanDataType.new)
68
+ #
69
+ # @example Create a list data type with data type name as String
70
+ # Arrow::ListDataType.new("boolean")
71
+ #
72
+ # @example Create a list data type with data type name as Symbol
73
+ # Arrow::ListDataType.new(:boolean)
74
+ #
75
+ # @example Create a list data type with data type as Array
76
+ # Arrow::ListDataType.new([:time32, :milli])
77
+ def initialize(arg)
78
+ data_type = resolve_data_type(arg)
79
+ if data_type
80
+ field = Field.new(default_field_name, data_type)
81
+ else
82
+ field = resolve_field(arg)
64
83
  end
65
84
  initialize_raw(field)
66
85
  end
86
+
87
+ private
88
+ def resolve_data_type(arg)
89
+ case arg
90
+ when DataType, String, Symbol, ::Array
91
+ DataType.resolve(arg)
92
+ when Hash
93
+ return nil if arg[:name]
94
+ return nil unless arg[:type]
95
+ DataType.resolve(arg)
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ def default_field_name
102
+ "item"
103
+ end
104
+
105
+ def resolve_field(arg)
106
+ if arg.is_a?(Hash) and arg.key?(:field)
107
+ description = arg
108
+ arg = description[:field]
109
+ end
110
+ if arg.is_a?(Hash)
111
+ field_description = arg
112
+ Field.new(field_description)
113
+ else
114
+ arg
115
+ end
116
+ end
67
117
  end
68
118
  end
data/lib/arrow/loader.rb CHANGED
@@ -32,9 +32,15 @@ module Arrow
32
32
  end
33
33
 
34
34
  def require_libraries
35
+ require "arrow/column-containable"
36
+ require "arrow/field-containable"
37
+ require "arrow/generic-filterable"
38
+ require "arrow/generic-takeable"
39
+ require "arrow/record-containable"
40
+
35
41
  require "arrow/array"
36
42
  require "arrow/array-builder"
37
- require "arrow/binary-array-builder"
43
+ require "arrow/bigdecimal-extension"
38
44
  require "arrow/chunked-array"
39
45
  require "arrow/column"
40
46
  require "arrow/compression-type"
@@ -53,8 +59,10 @@ module Arrow
53
59
  require "arrow/dictionary-data-type"
54
60
  require "arrow/field"
55
61
  require "arrow/file-output-stream"
62
+ require "arrow/group"
56
63
  require "arrow/list-array-builder"
57
64
  require "arrow/list-data-type"
65
+ require "arrow/null-array"
58
66
  require "arrow/null-array-builder"
59
67
  require "arrow/path-extension"
60
68
  require "arrow/record"
@@ -16,21 +16,9 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- class BinaryArrayBuilder
20
- def append_values(values, is_valids=nil)
21
- if is_valids
22
- is_valids.each_with_index do |is_valid, i|
23
- if is_valid
24
- append_value(values[i])
25
- else
26
- append_null
27
- end
28
- end
29
- else
30
- values.each do |value|
31
- append_value(value)
32
- end
33
- end
19
+ class NullArray
20
+ def get_value(i)
21
+ nil
34
22
  end
35
23
  end
36
24
  end
@@ -15,9 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/column-containable"
19
- require "arrow/record-containable"
20
-
21
18
  module Arrow
22
19
  class RecordBatch
23
20
  include ColumnContainable
data/lib/arrow/schema.rb CHANGED
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/field-containable"
19
-
20
18
  module Arrow
21
19
  class Schema
22
20
  include FieldContainable
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/field-containable"
19
-
20
18
  module Arrow
21
19
  class StructDataType
22
20
  include FieldContainable
@@ -41,6 +41,8 @@ module Arrow
41
41
  available_formats << match_data.post_match
42
42
  end
43
43
  end
44
+ deprecated_formats = ["batch", "stream"]
45
+ available_formats -= deprecated_formats
44
46
  message = "Arrow::Table load format must be one of ["
45
47
  message << available_formats.join(", ")
46
48
  message << "]: #{format.inspect}"
@@ -119,18 +121,30 @@ module Arrow
119
121
  load_raw(input, reader)
120
122
  end
121
123
 
122
- def load_as_batch
124
+ # @since 1.0.0
125
+ def load_as_arrow_file
123
126
  input = open_input_stream
124
127
  reader = RecordBatchFileReader.new(input)
125
128
  load_raw(input, reader)
126
129
  end
127
130
 
128
- def load_as_stream
131
+ # @deprecated Use `format: :arrow_file` instead.
132
+ def load_as_batch
133
+ load_as_arrow_file
134
+ end
135
+
136
+ # @since 1.0.0
137
+ def load_as_arrow_streaming
129
138
  input = open_input_stream
130
139
  reader = RecordBatchStreamReader.new(input)
131
140
  load_raw(input, reader)
132
141
  end
133
142
 
143
+ # @deprecated Use `format: :arrow_streaming` instead.
144
+ def load_as_stream
145
+ load_as_arrow_streaming
146
+ end
147
+
134
148
  if Arrow.const_defined?(:ORCFileReader)
135
149
  def load_as_orc
136
150
  input = open_input_stream
@@ -143,16 +157,25 @@ module Arrow
143
157
  end
144
158
  end
145
159
 
146
- def load_as_csv
147
- options = @options.dup
160
+ def csv_load(options)
148
161
  options.delete(:format)
149
162
  if @input.is_a?(Buffer)
150
- CSVLoader.load(@input.data.to_s, options)
163
+ CSVLoader.load(@input.data.to_s, **options)
151
164
  else
152
- CSVLoader.load(Pathname.new(@input), options)
165
+ CSVLoader.load(Pathname.new(@input), **options)
153
166
  end
154
167
  end
155
168
 
169
+ def load_as_csv
170
+ csv_load(@options.dup)
171
+ end
172
+
173
+ def load_as_tsv
174
+ options = @options.dup
175
+ options[:delimiter] = "\t"
176
+ csv_load(options.dup)
177
+ end
178
+
156
179
  def load_as_feather
157
180
  input = open_input_stream
158
181
  reader = FeatherFileReader.new(input)
@@ -42,6 +42,8 @@ module Arrow
42
42
  available_formats << match_data.post_match
43
43
  end
44
44
  end
45
+ deprecated_formats = ["batch", "stream"]
46
+ available_formats -= deprecated_formats
45
47
  message = "Arrow::Table save format must be one of ["
46
48
  message << available_formats.join(", ")
47
49
  message << "]: #{format.inspect}"
@@ -110,30 +112,48 @@ module Arrow
110
112
  end
111
113
 
112
114
  def save_as_arrow
113
- save_as_batch
115
+ save_as_arrow_file
114
116
  end
115
117
 
116
- def save_as_batch
118
+ # @since 1.0.0
119
+ def save_as_arrow_file
117
120
  save_raw(RecordBatchFileWriter)
118
121
  end
119
122
 
120
- def save_as_stream
123
+ # @deprecated Use `format: :arrow_batch` instead.
124
+ def save_as_batch
125
+ save_as_arrow_file
126
+ end
127
+
128
+ # @since 1.0.0
129
+ def save_as_arrow_streaming
121
130
  save_raw(RecordBatchStreamWriter)
122
131
  end
123
132
 
124
- def save_as_csv
133
+ # @deprecated Use `format: :arrow_streaming` instead.
134
+ def save_as_stream
135
+ save_as_arrow_streaming
136
+ end
137
+
138
+ def csv_save(**options)
125
139
  open_output_stream do |output|
126
- csv = CSV.new(output)
140
+ csv = CSV.new(output, **options)
127
141
  names = @table.schema.fields.collect(&:name)
128
142
  csv << names
129
- @table.each_record(reuse_record: true) do |record|
130
- csv << names.collect do |name|
131
- record[name]
132
- end
143
+ @table.raw_records.each do |record|
144
+ csv << record
133
145
  end
134
146
  end
135
147
  end
136
148
 
149
+ def save_as_csv
150
+ csv_save
151
+ end
152
+
153
+ def save_as_tsv
154
+ csv_save(col_sep: "\t")
155
+ end
156
+
137
157
  def save_as_feather
138
158
  open_output_stream do |output|
139
159
  FeatherFileWriter.open(output) do |writer|
data/lib/arrow/table.rb CHANGED
@@ -15,13 +15,11 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/column-containable"
19
- require "arrow/group"
20
- require "arrow/record-containable"
21
-
22
18
  module Arrow
23
19
  class Table
24
20
  include ColumnContainable
21
+ include GenericFilterable
22
+ include GenericTakeable
25
23
  include RecordContainable
26
24
 
27
25
  class << self
@@ -306,13 +304,13 @@ module Arrow
306
304
  end
307
305
  end
308
306
 
309
- ranges = []
307
+ sliced_tables = []
310
308
  slicers.each do |slicer|
311
309
  slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
312
310
  case slicer
313
311
  when Integer
314
312
  slicer += n_rows if slicer < 0
315
- ranges << [slicer, n_rows - 1]
313
+ sliced_tables << slice_by_range(slicer, n_rows - 1)
316
314
  when Range
317
315
  original_from = from = slicer.first
318
316
  to = slicer.last
@@ -325,17 +323,9 @@ module Arrow
325
323
  raise ArgumentError, message
326
324
  end
327
325
  to += n_rows if to < 0
328
- ranges << [from, to]
329
- when ::Array
330
- boolean_array_to_slice_ranges(slicer, 0, ranges)
331
- when ChunkedArray
332
- offset = 0
333
- slicer.each_chunk do |array|
334
- boolean_array_to_slice_ranges(array, offset, ranges)
335
- offset += array.length
336
- end
337
- when BooleanArray
338
- boolean_array_to_slice_ranges(slicer, 0, ranges)
326
+ sliced_tables << slice_by_range(from, to)
327
+ when ::Array, BooleanArray, ChunkedArray
328
+ sliced_tables << filter(slicer)
339
329
  else
340
330
  message = "slicer must be Integer, Range, (from, to), " +
341
331
  "Arrow::ChunkedArray of Arrow::BooleanArray, " +
@@ -343,7 +333,11 @@ module Arrow
343
333
  raise ArgumentError, message
344
334
  end
345
335
  end
346
- slice_by_ranges(ranges)
336
+ if sliced_tables.size > 1
337
+ sliced_tables[0].concatenate(sliced_tables[1..-1])
338
+ else
339
+ sliced_tables[0]
340
+ end
347
341
  end
348
342
 
349
343
  # TODO
@@ -514,38 +508,8 @@ module Arrow
514
508
  end
515
509
 
516
510
  private
517
- def boolean_array_to_slice_ranges(array, offset, ranges)
518
- in_target = false
519
- target_start = nil
520
- array.each_with_index do |is_target, i|
521
- if is_target
522
- unless in_target
523
- target_start = offset + i
524
- in_target = true
525
- end
526
- else
527
- if in_target
528
- ranges << [target_start, offset + i - 1]
529
- target_start = nil
530
- in_target = false
531
- end
532
- end
533
- end
534
- if in_target
535
- ranges << [target_start, offset + array.length - 1]
536
- end
537
- end
538
-
539
- def slice_by_ranges(ranges)
540
- sliced_table = []
541
- ranges.each do |from, to|
542
- sliced_table << slice_raw(from, to - from + 1)
543
- end
544
- if sliced_table.size > 1
545
- sliced_table[0].concatenate(sliced_table[1..-1])
546
- else
547
- sliced_table[0]
548
- end
511
+ def slice_by_range(from, to)
512
+ slice_raw(from, to - from + 1)
549
513
  end
550
514
 
551
515
  def ensure_raw_column(name, data)
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.15.1"
19
+ VERSION = "0.16.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -59,5 +59,7 @@ Gem::Specification.new do |spec|
59
59
  spec.add_development_dependency("test-unit")
60
60
  spec.add_development_dependency("yard")
61
61
 
62
- spec.metadata["msys2_mingw_dependencies"] = "arrow"
62
+ required_msys2_package_version = version_components[0, 3].join(".")
63
+ spec.metadata["msys2_mingw_dependencies"] =
64
+ "arrow>=#{required_msys2_package_version}"
63
65
  end
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
60
60
  DateTime.new(2018, 1, 5, 0, 23, 21),
61
61
  ])
62
62
  end
63
+
64
+ test("list<boolean>s") do
65
+ assert_build(Arrow::ArrayBuilder,
66
+ [
67
+ [nil, true, false],
68
+ nil,
69
+ [false],
70
+ ])
71
+ end
72
+
73
+ test("list<string>s") do
74
+ assert_build(Arrow::ArrayBuilder,
75
+ [
76
+ ["Hello", "World"],
77
+ ["Apache Arrow"],
78
+ ])
79
+ end
63
80
  end
64
81
 
65
82
  sub_test_case("specific builder") do