red-arrow 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +4 -0
  4. data/lib/arrow/array-builder.rb +6 -2
  5. data/lib/arrow/array.rb +6 -2
  6. data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
  7. data/lib/arrow/csv-loader.rb +102 -2
  8. data/lib/arrow/csv-read-options.rb +25 -0
  9. data/lib/arrow/data-type.rb +135 -0
  10. data/lib/arrow/decimal128-array-builder.rb +64 -0
  11. data/lib/arrow/decimal128-data-type.rb +69 -0
  12. data/lib/arrow/dense-union-data-type.rb +90 -0
  13. data/lib/arrow/dictionary-data-type.rb +106 -0
  14. data/lib/arrow/field-containable.rb +35 -0
  15. data/lib/arrow/field.rb +92 -8
  16. data/lib/arrow/file-output-stream.rb +34 -0
  17. data/lib/arrow/list-array-builder.rb +96 -0
  18. data/lib/arrow/list-data-type.rb +68 -0
  19. data/lib/arrow/loader.rb +30 -5
  20. data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
  21. data/lib/arrow/record-batch-builder.rb +115 -0
  22. data/lib/arrow/record-batch.rb +25 -0
  23. data/lib/arrow/schema.rb +97 -0
  24. data/lib/arrow/sparse-union-data-type.rb +90 -0
  25. data/lib/arrow/struct-array-builder.rb +146 -0
  26. data/lib/arrow/struct-array.rb +34 -0
  27. data/lib/arrow/struct-data-type.rb +130 -0
  28. data/lib/arrow/struct.rb +68 -0
  29. data/lib/arrow/table-loader.rb +65 -25
  30. data/lib/arrow/table-saver.rb +73 -24
  31. data/lib/arrow/table.rb +11 -2
  32. data/lib/arrow/time32-data-type.rb +61 -0
  33. data/lib/arrow/time64-data-type.rb +61 -0
  34. data/lib/arrow/timestamp-data-type.rb +57 -0
  35. data/lib/arrow/version.rb +5 -7
  36. data/lib/arrow/writable.rb +22 -0
  37. data/red-arrow.gemspec +8 -4
  38. data/test/helper.rb +1 -2
  39. data/test/test-csv-loader.rb +27 -0
  40. data/test/test-data-type.rb +47 -0
  41. data/test/test-decimal128-array-builder.rb +95 -0
  42. data/test/test-decimal128-array.rb +38 -0
  43. data/test/test-decimal128-data-type.rb +31 -0
  44. data/test/test-dense-union-data-type.rb +41 -0
  45. data/test/test-dictionary-data-type.rb +40 -0
  46. data/test/test-feather.rb +34 -0
  47. data/test/test-field.rb +71 -0
  48. data/test/test-file-output-stream.rb +54 -0
  49. data/test/test-list-array-builder.rb +79 -0
  50. data/test/test-list-array.rb +32 -0
  51. data/test/test-list-data-type.rb +43 -0
  52. data/test/test-record-batch-builder.rb +116 -0
  53. data/test/test-record-batch.rb +82 -27
  54. data/test/test-schema.rb +104 -0
  55. data/test/test-sparse-union-data-type.rb +41 -0
  56. data/test/test-struct-array-builder.rb +180 -0
  57. data/test/test-struct-array.rb +60 -15
  58. data/test/test-struct-data-type.rb +112 -0
  59. data/test/test-struct.rb +81 -0
  60. data/test/test-table.rb +165 -29
  61. data/test/test-time32-data-type.rb +42 -0
  62. data/test/test-time64-data-type.rb +42 -0
  63. data/test/test-timestamp-data-type.rb +42 -0
  64. metadata +99 -10
@@ -0,0 +1,96 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class ListArrayBuilder
20
+ class << self
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :append_value_raw, :append_value
28
+
29
+ # @overload append_value
30
+ #
31
+ # Starts appending a list record. You also need to append list
32
+ # value by {#value_builder}.
33
+ #
34
+ # @overload append_value(list)
35
+ #
36
+ # Appends a list record including list value.
37
+ #
38
+ # @param value [nil, ::Array] The list value of the record.
39
+ #
40
+ # If this is `nil`, the list record is null.
41
+ #
42
+ # If this is `Array`, it's the list value of the record.
43
+ #
44
+ # @since 0.12.0
45
+ def append_value(*args)
46
+ n_args = args.size
47
+
48
+ case n_args
49
+ when 0
50
+ append_value_raw
51
+ when 1
52
+ value = args[0]
53
+ case value
54
+ when nil
55
+ append_null
56
+ when ::Array
57
+ append_value_raw
58
+ @value_builder ||= value_builder
59
+ @value_builder.append_values(value, nil)
60
+ else
61
+ message = "list value must be nil or Array: #{value.inspect}"
62
+ raise ArgumentError, message
63
+ end
64
+ else
65
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
66
+ raise ArgumentError, message
67
+ end
68
+ end
69
+
70
+ def append_values(lists, is_valids=nil)
71
+ if is_valids
72
+ is_valids.each_with_index do |is_valid, i|
73
+ if is_valid
74
+ append_value(lists[i])
75
+ else
76
+ append_null
77
+ end
78
+ end
79
+ else
80
+ lists.each do |list|
81
+ append_value(list)
82
+ end
83
+ end
84
+ end
85
+
86
+ # @since 0.12.0
87
+ def append(*values)
88
+ if values.empty?
89
+ # For backward compatibility
90
+ append_value
91
+ else
92
+ super
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,68 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class ListDataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::ListDataType}.
24
+ #
25
+ # @overload initialize(field)
26
+ #
27
+ # @param field [Arrow::Field, Hash] The field of the list data
28
+ # type. You can also specify field description by `Hash`.
29
+ #
30
+ # See {Arrow::Field.new} how to specify field description.
31
+ #
32
+ # @example Create a list data type with {Arrow::Field}
33
+ # visible_field = Arrow::Field.new("visible", :boolean)
34
+ # Arrow::ListDataType.new(visible_field)
35
+ #
36
+ # @example Create a list data type with field description
37
+ # Arrow::ListDataType.new(name: "visible", type: :boolean)
38
+ #
39
+ # @overload initialize(description)
40
+ #
41
+ # @param description [Hash] The description of the list data
42
+ # type. It must have `:field` value.
43
+ #
44
+ # @option description [Arrow::Field, Hash] :field The field of
45
+ # the list data type. You can also specify field description
46
+ # by `Hash`.
47
+ #
48
+ # See {Arrow::Field.new} how to specify field description.
49
+ #
50
+ # @example Create a list data type with {Arrow::Field}
51
+ # visible_field = Arrow::Field.new("visible", :boolean)
52
+ # Arrow::ListDataType.new(field: visible_field)
53
+ #
54
+ # @example Create a list data type with field description
55
+ # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
+ def initialize(field)
57
+ if field.is_a?(Hash) and field.key?(:field)
58
+ description = field
59
+ field = description[:field]
60
+ end
61
+ if field.is_a?(Hash)
62
+ field_description = field
63
+ field = Field.new(field_description)
64
+ end
65
+ initialize_raw(field)
66
+ end
67
+ end
68
+ end
@@ -35,18 +35,35 @@ module Arrow
35
35
  require "arrow/array-builder"
36
36
  require "arrow/chunked-array"
37
37
  require "arrow/column"
38
+ require "arrow/compression-type"
38
39
  require "arrow/csv-loader"
39
- require "arrow/csv-reader"
40
+ require "arrow/csv-read-options"
41
+ require "arrow/data-type"
40
42
  require "arrow/date32-array"
41
43
  require "arrow/date32-array-builder"
42
44
  require "arrow/date64-array"
43
45
  require "arrow/date64-array-builder"
46
+ require "arrow/decimal128-array-builder"
47
+ require "arrow/decimal128-data-type"
48
+ require "arrow/dense-union-data-type"
49
+ require "arrow/dictionary-data-type"
44
50
  require "arrow/field"
51
+ require "arrow/file-output-stream"
52
+ require "arrow/list-array-builder"
53
+ require "arrow/list-data-type"
54
+ require "arrow/path-extension"
45
55
  require "arrow/record"
46
56
  require "arrow/record-batch"
57
+ require "arrow/record-batch-builder"
58
+ require "arrow/record-batch-file-reader"
59
+ require "arrow/record-batch-stream-reader"
47
60
  require "arrow/rolling-window"
61
+ require "arrow/schema"
48
62
  require "arrow/slicer"
63
+ require "arrow/sparse-union-data-type"
49
64
  require "arrow/struct-array"
65
+ require "arrow/struct-array-builder"
66
+ require "arrow/struct-data-type"
50
67
  require "arrow/table"
51
68
  require "arrow/table-formatter"
52
69
  require "arrow/table-list-formatter"
@@ -54,11 +71,12 @@ module Arrow
54
71
  require "arrow/table-loader"
55
72
  require "arrow/table-saver"
56
73
  require "arrow/tensor"
74
+ require "arrow/time32-data-type"
75
+ require "arrow/time64-data-type"
57
76
  require "arrow/timestamp-array"
58
77
  require "arrow/timestamp-array-builder"
59
-
60
- require "arrow/record-batch-file-reader"
61
- require "arrow/record-batch-stream-reader"
78
+ require "arrow/timestamp-data-type"
79
+ require "arrow/writable"
62
80
  end
63
81
 
64
82
  def load_object_info(info)
@@ -72,6 +90,13 @@ module Arrow
72
90
 
73
91
  def load_method_info(info, klass, method_name)
74
92
  case klass.name
93
+ when /Builder\z/
94
+ case method_name
95
+ when "append"
96
+ return
97
+ else
98
+ super
99
+ end
75
100
  when "Arrow::StringArray"
76
101
  case method_name
77
102
  when "get_value"
@@ -87,7 +112,7 @@ module Arrow
87
112
  end
88
113
  super(info, klass, method_name)
89
114
  else
90
- super
115
+ super
91
116
  end
92
117
  end
93
118
  end
@@ -15,40 +15,31 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "csv"
19
-
20
18
  module Arrow
21
- class CSVReader
22
- def initialize(csv)
23
- @csv = csv
19
+ class PathExtension
20
+ def initialize(path)
21
+ @path = path
24
22
  end
25
23
 
26
- def read
27
- values_set = []
28
- @csv.each do |row|
29
- if row.is_a?(CSV::Row)
30
- row = row.collect(&:last)
31
- end
32
- row.each_with_index do |value, i|
33
- values = (values_set[i] ||= [])
34
- values << value
35
- end
36
- end
37
- return nil if values_set.empty?
24
+ def extract
25
+ basename = ::File.basename(@path)
26
+ components = basename.split(".")
27
+ return {} if components.size == 1
38
28
 
39
- arrays = values_set.collect.with_index do |values, i|
40
- ArrayBuilder.build(values)
41
- end
42
- if @csv.headers
43
- names = @csv.headers
29
+ extension = components.last.downcase
30
+ if components.size > 2
31
+ compression = CompressionType.resolve_extension(extension)
32
+ if compression
33
+ {
34
+ format: components[-2].downcase,
35
+ compression: compression,
36
+ }
37
+ else
38
+ {format: extension}
39
+ end
44
40
  else
45
- names = arrays.size.times.collect(&:to_s)
46
- end
47
- raw_table = {}
48
- names.each_with_index do |name, i|
49
- raw_table[name] = arrays[i]
41
+ {format: extension}
50
42
  end
51
- Table.new(raw_table)
52
43
  end
53
44
  end
54
45
  end
@@ -0,0 +1,115 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchBuilder
20
+ class << self
21
+ # @since 0.12.0
22
+ def build(schema, data)
23
+ builder = new(schema)
24
+ builder.append(data)
25
+ builder.flush
26
+ end
27
+ end
28
+
29
+ alias_method :initialize_raw, :initialize
30
+ private :initialize_raw
31
+ def initialize(schema)
32
+ unless schema.is_a?(Schema)
33
+ schema = Schema.new(schema)
34
+ end
35
+ initialize_raw(schema)
36
+ @name_to_index = {}
37
+ schema.fields.each_with_index do |field, i|
38
+ @name_to_index[field.name] = i
39
+ end
40
+ end
41
+
42
+ # @since 0.12.0
43
+ def [](name_or_index)
44
+ case name_or_index
45
+ when String, Symbol
46
+ name = name_or_index
47
+ self[resolve_name(name)]
48
+ else
49
+ index = name_or_index
50
+ column_builders[index]
51
+ end
52
+ end
53
+
54
+ # @since 0.12.0
55
+ def append(*values)
56
+ values.each do |value|
57
+ case value
58
+ when Hash
59
+ append_columns(value)
60
+ else
61
+ append_records(value)
62
+ end
63
+ end
64
+ end
65
+
66
+ # @since 0.12.0
67
+ def append_records(records)
68
+ n = n_fields
69
+ columns = n.times.collect do
70
+ []
71
+ end
72
+ records.each_with_index do |record, nth_record|
73
+ case record
74
+ when nil
75
+ when Hash
76
+ record.each do |name, value|
77
+ nth_column = resolve_name(name)
78
+ next if nth_column.nil?
79
+ columns[nth_column] << value
80
+ end
81
+ else
82
+ record.each_with_index do |value, nth_column|
83
+ columns[nth_column] << value
84
+ end
85
+ end
86
+ columns.each do |column|
87
+ column << nil if column.size != (nth_record + 1)
88
+ end
89
+ end
90
+ columns.each_with_index do |column, i|
91
+ self[i].append(*column)
92
+ end
93
+ end
94
+
95
+ # @since 0.12.0
96
+ def append_columns(columns)
97
+ columns.each do |name, values|
98
+ self[name].append(*values)
99
+ end
100
+ end
101
+
102
+ private
103
+ def resolve_name(name)
104
+ @name_to_index[name.to_s]
105
+ end
106
+
107
+ # TODO: Make public with good name. Is column_builders good enough?
108
+ # builders? sub_builders?
109
+ def column_builders
110
+ @column_builders ||= n_fields.times.collect do |i|
111
+ get_field(i)
112
+ end
113
+ end
114
+ end
115
+ end
@@ -22,6 +22,22 @@ module Arrow
22
22
  include RecordContainable
23
23
  include Enumerable
24
24
 
25
+ class << self
26
+ def new(*args)
27
+ n_args = args.size
28
+ case n_args
29
+ when 2
30
+ schema, data = args
31
+ RecordBatchBuilder.build(schema, data)
32
+ when 3
33
+ super
34
+ else
35
+ message = "wrong number of arguments (given #{n_args}, expected 2..3)"
36
+ raise ArgumentError, message
37
+ end
38
+ end
39
+ end
40
+
25
41
  alias_method :each, :each_record
26
42
 
27
43
  alias_method :columns_raw, :columns
@@ -29,6 +45,15 @@ module Arrow
29
45
  @columns ||= columns_raw
30
46
  end
31
47
 
48
+ # Converts the record batch to {Arrow::Table}.
49
+ #
50
+ # @return [Arrow::Table]
51
+ #
52
+ # @since 0.12.0
53
+ def to_table
54
+ Table.new(schema, [self])
55
+ end
56
+
32
57
  def respond_to_missing?(name, include_private)
33
58
  return true if find_column(name)
34
59
  super