red-arrow 0.15.1 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,16 +53,66 @@ module Arrow
53
53
  #
54
54
  # @example Create a list data type with field description
55
55
  # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
- def initialize(field)
57
- if field.is_a?(Hash) and field.key?(:field)
58
- description = field
59
- field = description[:field]
60
- end
61
- if field.is_a?(Hash)
62
- field_description = field
63
- field = Field.new(field_description)
56
+ #
57
+ # @overload initialize(data_type)
58
+ #
59
+ # @param data_type [Arrow::DataType, String, Symbol,
60
+ # ::Array<String>, ::Array<Symbol>, Hash] The element data
61
+ # type of the list data type. A field is created with the
62
+ # default name `"item"` from the data type automatically.
63
+ #
64
+ # See {Arrow::DataType.resolve} how to specify data type.
65
+ #
66
+ # @example Create a list data type with {Arrow::DataType}
67
+ # Arrow::ListDataType.new(Arrow::BooleanDataType.new)
68
+ #
69
+ # @example Create a list data type with data type name as String
70
+ # Arrow::ListDataType.new("boolean")
71
+ #
72
+ # @example Create a list data type with data type name as Symbol
73
+ # Arrow::ListDataType.new(:boolean)
74
+ #
75
+ # @example Create a list data type with data type as Array
76
+ # Arrow::ListDataType.new([:time32, :milli])
77
+ def initialize(arg)
78
+ data_type = resolve_data_type(arg)
79
+ if data_type
80
+ field = Field.new(default_field_name, data_type)
81
+ else
82
+ field = resolve_field(arg)
64
83
  end
65
84
  initialize_raw(field)
66
85
  end
86
+
87
+ private
88
+ def resolve_data_type(arg)
89
+ case arg
90
+ when DataType, String, Symbol, ::Array
91
+ DataType.resolve(arg)
92
+ when Hash
93
+ return nil if arg[:name]
94
+ return nil unless arg[:type]
95
+ DataType.resolve(arg)
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ def default_field_name
102
+ "item"
103
+ end
104
+
105
+ def resolve_field(arg)
106
+ if arg.is_a?(Hash) and arg.key?(:field)
107
+ description = arg
108
+ arg = description[:field]
109
+ end
110
+ if arg.is_a?(Hash)
111
+ field_description = arg
112
+ Field.new(field_description)
113
+ else
114
+ arg
115
+ end
116
+ end
67
117
  end
68
118
  end
data/lib/arrow/loader.rb CHANGED
@@ -32,9 +32,15 @@ module Arrow
32
32
  end
33
33
 
34
34
  def require_libraries
35
+ require "arrow/column-containable"
36
+ require "arrow/field-containable"
37
+ require "arrow/generic-filterable"
38
+ require "arrow/generic-takeable"
39
+ require "arrow/record-containable"
40
+
35
41
  require "arrow/array"
36
42
  require "arrow/array-builder"
37
- require "arrow/binary-array-builder"
43
+ require "arrow/bigdecimal-extension"
38
44
  require "arrow/chunked-array"
39
45
  require "arrow/column"
40
46
  require "arrow/compression-type"
@@ -53,8 +59,10 @@ module Arrow
53
59
  require "arrow/dictionary-data-type"
54
60
  require "arrow/field"
55
61
  require "arrow/file-output-stream"
62
+ require "arrow/group"
56
63
  require "arrow/list-array-builder"
57
64
  require "arrow/list-data-type"
65
+ require "arrow/null-array"
58
66
  require "arrow/null-array-builder"
59
67
  require "arrow/path-extension"
60
68
  require "arrow/record"
@@ -16,21 +16,9 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- class BinaryArrayBuilder
20
- def append_values(values, is_valids=nil)
21
- if is_valids
22
- is_valids.each_with_index do |is_valid, i|
23
- if is_valid
24
- append_value(values[i])
25
- else
26
- append_null
27
- end
28
- end
29
- else
30
- values.each do |value|
31
- append_value(value)
32
- end
33
- end
19
+ class NullArray
20
+ def get_value(i)
21
+ nil
34
22
  end
35
23
  end
36
24
  end
@@ -15,9 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/column-containable"
19
- require "arrow/record-containable"
20
-
21
18
  module Arrow
22
19
  class RecordBatch
23
20
  include ColumnContainable
data/lib/arrow/schema.rb CHANGED
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/field-containable"
19
-
20
18
  module Arrow
21
19
  class Schema
22
20
  include FieldContainable
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/field-containable"
19
-
20
18
  module Arrow
21
19
  class StructDataType
22
20
  include FieldContainable
@@ -41,6 +41,8 @@ module Arrow
41
41
  available_formats << match_data.post_match
42
42
  end
43
43
  end
44
+ deprecated_formats = ["batch", "stream"]
45
+ available_formats -= deprecated_formats
44
46
  message = "Arrow::Table load format must be one of ["
45
47
  message << available_formats.join(", ")
46
48
  message << "]: #{format.inspect}"
@@ -119,18 +121,30 @@ module Arrow
119
121
  load_raw(input, reader)
120
122
  end
121
123
 
122
- def load_as_batch
124
+ # @since 1.0.0
125
+ def load_as_arrow_file
123
126
  input = open_input_stream
124
127
  reader = RecordBatchFileReader.new(input)
125
128
  load_raw(input, reader)
126
129
  end
127
130
 
128
- def load_as_stream
131
+ # @deprecated Use `format: :arrow_file` instead.
132
+ def load_as_batch
133
+ load_as_arrow_file
134
+ end
135
+
136
+ # @since 1.0.0
137
+ def load_as_arrow_streaming
129
138
  input = open_input_stream
130
139
  reader = RecordBatchStreamReader.new(input)
131
140
  load_raw(input, reader)
132
141
  end
133
142
 
143
+ # @deprecated Use `format: :arrow_streaming` instead.
144
+ def load_as_stream
145
+ load_as_arrow_streaming
146
+ end
147
+
134
148
  if Arrow.const_defined?(:ORCFileReader)
135
149
  def load_as_orc
136
150
  input = open_input_stream
@@ -143,16 +157,25 @@ module Arrow
143
157
  end
144
158
  end
145
159
 
146
- def load_as_csv
147
- options = @options.dup
160
+ def csv_load(options)
148
161
  options.delete(:format)
149
162
  if @input.is_a?(Buffer)
150
- CSVLoader.load(@input.data.to_s, options)
163
+ CSVLoader.load(@input.data.to_s, **options)
151
164
  else
152
- CSVLoader.load(Pathname.new(@input), options)
165
+ CSVLoader.load(Pathname.new(@input), **options)
153
166
  end
154
167
  end
155
168
 
169
+ def load_as_csv
170
+ csv_load(@options.dup)
171
+ end
172
+
173
+ def load_as_tsv
174
+ options = @options.dup
175
+ options[:delimiter] = "\t"
176
+ csv_load(options.dup)
177
+ end
178
+
156
179
  def load_as_feather
157
180
  input = open_input_stream
158
181
  reader = FeatherFileReader.new(input)
@@ -42,6 +42,8 @@ module Arrow
42
42
  available_formats << match_data.post_match
43
43
  end
44
44
  end
45
+ deprecated_formats = ["batch", "stream"]
46
+ available_formats -= deprecated_formats
45
47
  message = "Arrow::Table save format must be one of ["
46
48
  message << available_formats.join(", ")
47
49
  message << "]: #{format.inspect}"
@@ -110,30 +112,48 @@ module Arrow
110
112
  end
111
113
 
112
114
  def save_as_arrow
113
- save_as_batch
115
+ save_as_arrow_file
114
116
  end
115
117
 
116
- def save_as_batch
118
+ # @since 1.0.0
119
+ def save_as_arrow_file
117
120
  save_raw(RecordBatchFileWriter)
118
121
  end
119
122
 
120
- def save_as_stream
123
+ # @deprecated Use `format: :arrow_batch` instead.
124
+ def save_as_batch
125
+ save_as_arrow_file
126
+ end
127
+
128
+ # @since 1.0.0
129
+ def save_as_arrow_streaming
121
130
  save_raw(RecordBatchStreamWriter)
122
131
  end
123
132
 
124
- def save_as_csv
133
+ # @deprecated Use `format: :arrow_streaming` instead.
134
+ def save_as_stream
135
+ save_as_arrow_streaming
136
+ end
137
+
138
+ def csv_save(**options)
125
139
  open_output_stream do |output|
126
- csv = CSV.new(output)
140
+ csv = CSV.new(output, **options)
127
141
  names = @table.schema.fields.collect(&:name)
128
142
  csv << names
129
- @table.each_record(reuse_record: true) do |record|
130
- csv << names.collect do |name|
131
- record[name]
132
- end
143
+ @table.raw_records.each do |record|
144
+ csv << record
133
145
  end
134
146
  end
135
147
  end
136
148
 
149
+ def save_as_csv
150
+ csv_save
151
+ end
152
+
153
+ def save_as_tsv
154
+ csv_save(col_sep: "\t")
155
+ end
156
+
137
157
  def save_as_feather
138
158
  open_output_stream do |output|
139
159
  FeatherFileWriter.open(output) do |writer|
data/lib/arrow/table.rb CHANGED
@@ -15,13 +15,11 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/column-containable"
19
- require "arrow/group"
20
- require "arrow/record-containable"
21
-
22
18
  module Arrow
23
19
  class Table
24
20
  include ColumnContainable
21
+ include GenericFilterable
22
+ include GenericTakeable
25
23
  include RecordContainable
26
24
 
27
25
  class << self
@@ -306,13 +304,13 @@ module Arrow
306
304
  end
307
305
  end
308
306
 
309
- ranges = []
307
+ sliced_tables = []
310
308
  slicers.each do |slicer|
311
309
  slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
312
310
  case slicer
313
311
  when Integer
314
312
  slicer += n_rows if slicer < 0
315
- ranges << [slicer, n_rows - 1]
313
+ sliced_tables << slice_by_range(slicer, n_rows - 1)
316
314
  when Range
317
315
  original_from = from = slicer.first
318
316
  to = slicer.last
@@ -325,17 +323,9 @@ module Arrow
325
323
  raise ArgumentError, message
326
324
  end
327
325
  to += n_rows if to < 0
328
- ranges << [from, to]
329
- when ::Array
330
- boolean_array_to_slice_ranges(slicer, 0, ranges)
331
- when ChunkedArray
332
- offset = 0
333
- slicer.each_chunk do |array|
334
- boolean_array_to_slice_ranges(array, offset, ranges)
335
- offset += array.length
336
- end
337
- when BooleanArray
338
- boolean_array_to_slice_ranges(slicer, 0, ranges)
326
+ sliced_tables << slice_by_range(from, to)
327
+ when ::Array, BooleanArray, ChunkedArray
328
+ sliced_tables << filter(slicer)
339
329
  else
340
330
  message = "slicer must be Integer, Range, (from, to), " +
341
331
  "Arrow::ChunkedArray of Arrow::BooleanArray, " +
@@ -343,7 +333,11 @@ module Arrow
343
333
  raise ArgumentError, message
344
334
  end
345
335
  end
346
- slice_by_ranges(ranges)
336
+ if sliced_tables.size > 1
337
+ sliced_tables[0].concatenate(sliced_tables[1..-1])
338
+ else
339
+ sliced_tables[0]
340
+ end
347
341
  end
348
342
 
349
343
  # TODO
@@ -514,38 +508,8 @@ module Arrow
514
508
  end
515
509
 
516
510
  private
517
- def boolean_array_to_slice_ranges(array, offset, ranges)
518
- in_target = false
519
- target_start = nil
520
- array.each_with_index do |is_target, i|
521
- if is_target
522
- unless in_target
523
- target_start = offset + i
524
- in_target = true
525
- end
526
- else
527
- if in_target
528
- ranges << [target_start, offset + i - 1]
529
- target_start = nil
530
- in_target = false
531
- end
532
- end
533
- end
534
- if in_target
535
- ranges << [target_start, offset + array.length - 1]
536
- end
537
- end
538
-
539
- def slice_by_ranges(ranges)
540
- sliced_table = []
541
- ranges.each do |from, to|
542
- sliced_table << slice_raw(from, to - from + 1)
543
- end
544
- if sliced_table.size > 1
545
- sliced_table[0].concatenate(sliced_table[1..-1])
546
- else
547
- sliced_table[0]
548
- end
511
+ def slice_by_range(from, to)
512
+ slice_raw(from, to - from + 1)
549
513
  end
550
514
 
551
515
  def ensure_raw_column(name, data)
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.15.1"
19
+ VERSION = "0.16.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -59,5 +59,7 @@ Gem::Specification.new do |spec|
59
59
  spec.add_development_dependency("test-unit")
60
60
  spec.add_development_dependency("yard")
61
61
 
62
- spec.metadata["msys2_mingw_dependencies"] = "arrow"
62
+ required_msys2_package_version = version_components[0, 3].join(".")
63
+ spec.metadata["msys2_mingw_dependencies"] =
64
+ "arrow>=#{required_msys2_package_version}"
63
65
  end
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
60
60
  DateTime.new(2018, 1, 5, 0, 23, 21),
61
61
  ])
62
62
  end
63
+
64
+ test("list<boolean>s") do
65
+ assert_build(Arrow::ArrayBuilder,
66
+ [
67
+ [nil, true, false],
68
+ nil,
69
+ [false],
70
+ ])
71
+ end
72
+
73
+ test("list<string>s") do
74
+ assert_build(Arrow::ArrayBuilder,
75
+ [
76
+ ["Hello", "World"],
77
+ ["Apache Arrow"],
78
+ ])
79
+ end
63
80
  end
64
81
 
65
82
  sub_test_case("specific builder") do