red-arrow 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +4 -0
  4. data/lib/arrow/array-builder.rb +6 -2
  5. data/lib/arrow/array.rb +6 -2
  6. data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
  7. data/lib/arrow/csv-loader.rb +102 -2
  8. data/lib/arrow/csv-read-options.rb +25 -0
  9. data/lib/arrow/data-type.rb +135 -0
  10. data/lib/arrow/decimal128-array-builder.rb +64 -0
  11. data/lib/arrow/decimal128-data-type.rb +69 -0
  12. data/lib/arrow/dense-union-data-type.rb +90 -0
  13. data/lib/arrow/dictionary-data-type.rb +106 -0
  14. data/lib/arrow/field-containable.rb +35 -0
  15. data/lib/arrow/field.rb +92 -8
  16. data/lib/arrow/file-output-stream.rb +34 -0
  17. data/lib/arrow/list-array-builder.rb +96 -0
  18. data/lib/arrow/list-data-type.rb +68 -0
  19. data/lib/arrow/loader.rb +30 -5
  20. data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
  21. data/lib/arrow/record-batch-builder.rb +115 -0
  22. data/lib/arrow/record-batch.rb +25 -0
  23. data/lib/arrow/schema.rb +97 -0
  24. data/lib/arrow/sparse-union-data-type.rb +90 -0
  25. data/lib/arrow/struct-array-builder.rb +146 -0
  26. data/lib/arrow/struct-array.rb +34 -0
  27. data/lib/arrow/struct-data-type.rb +130 -0
  28. data/lib/arrow/struct.rb +68 -0
  29. data/lib/arrow/table-loader.rb +65 -25
  30. data/lib/arrow/table-saver.rb +73 -24
  31. data/lib/arrow/table.rb +11 -2
  32. data/lib/arrow/time32-data-type.rb +61 -0
  33. data/lib/arrow/time64-data-type.rb +61 -0
  34. data/lib/arrow/timestamp-data-type.rb +57 -0
  35. data/lib/arrow/version.rb +5 -7
  36. data/lib/arrow/writable.rb +22 -0
  37. data/red-arrow.gemspec +8 -4
  38. data/test/helper.rb +1 -2
  39. data/test/test-csv-loader.rb +27 -0
  40. data/test/test-data-type.rb +47 -0
  41. data/test/test-decimal128-array-builder.rb +95 -0
  42. data/test/test-decimal128-array.rb +38 -0
  43. data/test/test-decimal128-data-type.rb +31 -0
  44. data/test/test-dense-union-data-type.rb +41 -0
  45. data/test/test-dictionary-data-type.rb +40 -0
  46. data/test/test-feather.rb +34 -0
  47. data/test/test-field.rb +71 -0
  48. data/test/test-file-output-stream.rb +54 -0
  49. data/test/test-list-array-builder.rb +79 -0
  50. data/test/test-list-array.rb +32 -0
  51. data/test/test-list-data-type.rb +43 -0
  52. data/test/test-record-batch-builder.rb +116 -0
  53. data/test/test-record-batch.rb +82 -27
  54. data/test/test-schema.rb +104 -0
  55. data/test/test-sparse-union-data-type.rb +41 -0
  56. data/test/test-struct-array-builder.rb +180 -0
  57. data/test/test-struct-array.rb +60 -15
  58. data/test/test-struct-data-type.rb +112 -0
  59. data/test/test-struct.rb +81 -0
  60. data/test/test-table.rb +165 -29
  61. data/test/test-time32-data-type.rb +42 -0
  62. data/test/test-time64-data-type.rb +42 -0
  63. data/test/test-timestamp-data-type.rb +42 -0
  64. metadata +99 -10
@@ -0,0 +1,96 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class ListArrayBuilder
20
+ class << self
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :append_value_raw, :append_value
28
+
29
+ # @overload append_value
30
+ #
31
+ # Starts appending a list record. You also need to append list
32
+ # value by {#value_builder}.
33
+ #
34
+ # @overload append_value(list)
35
+ #
36
+ # Appends a list record including list value.
37
+ #
38
+ # @param value [nil, ::Array] The list value of the record.
39
+ #
40
+ # If this is `nil`, the list record is null.
41
+ #
42
+ # If this is `Array`, it's the list value of the record.
43
+ #
44
+ # @since 0.12.0
45
+ def append_value(*args)
46
+ n_args = args.size
47
+
48
+ case n_args
49
+ when 0
50
+ append_value_raw
51
+ when 1
52
+ value = args[0]
53
+ case value
54
+ when nil
55
+ append_null
56
+ when ::Array
57
+ append_value_raw
58
+ @value_builder ||= value_builder
59
+ @value_builder.append_values(value, nil)
60
+ else
61
+ message = "list value must be nil or Array: #{value.inspect}"
62
+ raise ArgumentError, message
63
+ end
64
+ else
65
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
66
+ raise ArgumentError, message
67
+ end
68
+ end
69
+
70
+ def append_values(lists, is_valids=nil)
71
+ if is_valids
72
+ is_valids.each_with_index do |is_valid, i|
73
+ if is_valid
74
+ append_value(lists[i])
75
+ else
76
+ append_null
77
+ end
78
+ end
79
+ else
80
+ lists.each do |list|
81
+ append_value(list)
82
+ end
83
+ end
84
+ end
85
+
86
+ # @since 0.12.0
87
+ def append(*values)
88
+ if values.empty?
89
+ # For backward compatibility
90
+ append_value
91
+ else
92
+ super
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,68 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class ListDataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::ListDataType}.
24
+ #
25
+ # @overload initialize(field)
26
+ #
27
+ # @param field [Arrow::Field, Hash] The field of the list data
28
+ # type. You can also specify field description by `Hash`.
29
+ #
30
+ # See {Arrow::Field.new} how to specify field description.
31
+ #
32
+ # @example Create a list data type with {Arrow::Field}
33
+ # visible_field = Arrow::Field.new("visible", :boolean)
34
+ # Arrow::ListDataType.new(visible_field)
35
+ #
36
+ # @example Create a list data type with field description
37
+ # Arrow::ListDataType.new(name: "visible", type: :boolean)
38
+ #
39
+ # @overload initialize(description)
40
+ #
41
+ # @param description [Hash] The description of the list data
42
+ # type. It must have `:field` value.
43
+ #
44
+ # @option description [Arrow::Field, Hash] :field The field of
45
+ # the list data type. You can also specify field description
46
+ # by `Hash`.
47
+ #
48
+ # See {Arrow::Field.new} how to specify field description.
49
+ #
50
+ # @example Create a list data type with {Arrow::Field}
51
+ # visible_field = Arrow::Field.new("visible", :boolean)
52
+ # Arrow::ListDataType.new(field: visible_field)
53
+ #
54
+ # @example Create a list data type with field description
55
+ # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
+ def initialize(field)
57
+ if field.is_a?(Hash) and field.key?(:field)
58
+ description = field
59
+ field = description[:field]
60
+ end
61
+ if field.is_a?(Hash)
62
+ field_description = field
63
+ field = Field.new(field_description)
64
+ end
65
+ initialize_raw(field)
66
+ end
67
+ end
68
+ end
@@ -35,18 +35,35 @@ module Arrow
35
35
  require "arrow/array-builder"
36
36
  require "arrow/chunked-array"
37
37
  require "arrow/column"
38
+ require "arrow/compression-type"
38
39
  require "arrow/csv-loader"
39
- require "arrow/csv-reader"
40
+ require "arrow/csv-read-options"
41
+ require "arrow/data-type"
40
42
  require "arrow/date32-array"
41
43
  require "arrow/date32-array-builder"
42
44
  require "arrow/date64-array"
43
45
  require "arrow/date64-array-builder"
46
+ require "arrow/decimal128-array-builder"
47
+ require "arrow/decimal128-data-type"
48
+ require "arrow/dense-union-data-type"
49
+ require "arrow/dictionary-data-type"
44
50
  require "arrow/field"
51
+ require "arrow/file-output-stream"
52
+ require "arrow/list-array-builder"
53
+ require "arrow/list-data-type"
54
+ require "arrow/path-extension"
45
55
  require "arrow/record"
46
56
  require "arrow/record-batch"
57
+ require "arrow/record-batch-builder"
58
+ require "arrow/record-batch-file-reader"
59
+ require "arrow/record-batch-stream-reader"
47
60
  require "arrow/rolling-window"
61
+ require "arrow/schema"
48
62
  require "arrow/slicer"
63
+ require "arrow/sparse-union-data-type"
49
64
  require "arrow/struct-array"
65
+ require "arrow/struct-array-builder"
66
+ require "arrow/struct-data-type"
50
67
  require "arrow/table"
51
68
  require "arrow/table-formatter"
52
69
  require "arrow/table-list-formatter"
@@ -54,11 +71,12 @@ module Arrow
54
71
  require "arrow/table-loader"
55
72
  require "arrow/table-saver"
56
73
  require "arrow/tensor"
74
+ require "arrow/time32-data-type"
75
+ require "arrow/time64-data-type"
57
76
  require "arrow/timestamp-array"
58
77
  require "arrow/timestamp-array-builder"
59
-
60
- require "arrow/record-batch-file-reader"
61
- require "arrow/record-batch-stream-reader"
78
+ require "arrow/timestamp-data-type"
79
+ require "arrow/writable"
62
80
  end
63
81
 
64
82
  def load_object_info(info)
@@ -72,6 +90,13 @@ module Arrow
72
90
 
73
91
  def load_method_info(info, klass, method_name)
74
92
  case klass.name
93
+ when /Builder\z/
94
+ case method_name
95
+ when "append"
96
+ return
97
+ else
98
+ super
99
+ end
75
100
  when "Arrow::StringArray"
76
101
  case method_name
77
102
  when "get_value"
@@ -87,7 +112,7 @@ module Arrow
87
112
  end
88
113
  super(info, klass, method_name)
89
114
  else
90
- super
115
+ super
91
116
  end
92
117
  end
93
118
  end
@@ -15,40 +15,31 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "csv"
19
-
20
18
  module Arrow
21
- class CSVReader
22
- def initialize(csv)
23
- @csv = csv
19
+ class PathExtension
20
+ def initialize(path)
21
+ @path = path
24
22
  end
25
23
 
26
- def read
27
- values_set = []
28
- @csv.each do |row|
29
- if row.is_a?(CSV::Row)
30
- row = row.collect(&:last)
31
- end
32
- row.each_with_index do |value, i|
33
- values = (values_set[i] ||= [])
34
- values << value
35
- end
36
- end
37
- return nil if values_set.empty?
24
+ def extract
25
+ basename = ::File.basename(@path)
26
+ components = basename.split(".")
27
+ return {} if components.size == 1
38
28
 
39
- arrays = values_set.collect.with_index do |values, i|
40
- ArrayBuilder.build(values)
41
- end
42
- if @csv.headers
43
- names = @csv.headers
29
+ extension = components.last.downcase
30
+ if components.size > 2
31
+ compression = CompressionType.resolve_extension(extension)
32
+ if compression
33
+ {
34
+ format: components[-2].downcase,
35
+ compression: compression,
36
+ }
37
+ else
38
+ {format: extension}
39
+ end
44
40
  else
45
- names = arrays.size.times.collect(&:to_s)
46
- end
47
- raw_table = {}
48
- names.each_with_index do |name, i|
49
- raw_table[name] = arrays[i]
41
+ {format: extension}
50
42
  end
51
- Table.new(raw_table)
52
43
  end
53
44
  end
54
45
  end
@@ -0,0 +1,115 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchBuilder
20
+ class << self
21
+ # @since 0.12.0
22
+ def build(schema, data)
23
+ builder = new(schema)
24
+ builder.append(data)
25
+ builder.flush
26
+ end
27
+ end
28
+
29
+ alias_method :initialize_raw, :initialize
30
+ private :initialize_raw
31
+ def initialize(schema)
32
+ unless schema.is_a?(Schema)
33
+ schema = Schema.new(schema)
34
+ end
35
+ initialize_raw(schema)
36
+ @name_to_index = {}
37
+ schema.fields.each_with_index do |field, i|
38
+ @name_to_index[field.name] = i
39
+ end
40
+ end
41
+
42
+ # @since 0.12.0
43
+ def [](name_or_index)
44
+ case name_or_index
45
+ when String, Symbol
46
+ name = name_or_index
47
+ self[resolve_name(name)]
48
+ else
49
+ index = name_or_index
50
+ column_builders[index]
51
+ end
52
+ end
53
+
54
+ # @since 0.12.0
55
+ def append(*values)
56
+ values.each do |value|
57
+ case value
58
+ when Hash
59
+ append_columns(value)
60
+ else
61
+ append_records(value)
62
+ end
63
+ end
64
+ end
65
+
66
+ # @since 0.12.0
67
+ def append_records(records)
68
+ n = n_fields
69
+ columns = n.times.collect do
70
+ []
71
+ end
72
+ records.each_with_index do |record, nth_record|
73
+ case record
74
+ when nil
75
+ when Hash
76
+ record.each do |name, value|
77
+ nth_column = resolve_name(name)
78
+ next if nth_column.nil?
79
+ columns[nth_column] << value
80
+ end
81
+ else
82
+ record.each_with_index do |value, nth_column|
83
+ columns[nth_column] << value
84
+ end
85
+ end
86
+ columns.each do |column|
87
+ column << nil if column.size != (nth_record + 1)
88
+ end
89
+ end
90
+ columns.each_with_index do |column, i|
91
+ self[i].append(*column)
92
+ end
93
+ end
94
+
95
+ # @since 0.12.0
96
+ def append_columns(columns)
97
+ columns.each do |name, values|
98
+ self[name].append(*values)
99
+ end
100
+ end
101
+
102
+ private
103
+ def resolve_name(name)
104
+ @name_to_index[name.to_s]
105
+ end
106
+
107
+ # TODO: Make public with good name. Is column_builders good enough?
108
+ # builders? sub_builders?
109
+ def column_builders
110
+ @column_builders ||= n_fields.times.collect do |i|
111
+ get_field(i)
112
+ end
113
+ end
114
+ end
115
+ end
@@ -22,6 +22,22 @@ module Arrow
22
22
  include RecordContainable
23
23
  include Enumerable
24
24
 
25
+ class << self
26
+ def new(*args)
27
+ n_args = args.size
28
+ case n_args
29
+ when 2
30
+ schema, data = args
31
+ RecordBatchBuilder.build(schema, data)
32
+ when 3
33
+ super
34
+ else
35
+ message = "wrong number of arguments (given #{n_args}, expected 2..3)"
36
+ raise ArgumentError, message
37
+ end
38
+ end
39
+ end
40
+
25
41
  alias_method :each, :each_record
26
42
 
27
43
  alias_method :columns_raw, :columns
@@ -29,6 +45,15 @@ module Arrow
29
45
  @columns ||= columns_raw
30
46
  end
31
47
 
48
+ # Converts the record batch to {Arrow::Table}.
49
+ #
50
+ # @return [Arrow::Table]
51
+ #
52
+ # @since 0.12.0
53
+ def to_table
54
+ Table.new(schema, [self])
55
+ end
56
+
32
57
  def respond_to_missing?(name, include_private)
33
58
  return true if find_column(name)
34
59
  super