red-arrow 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +4 -0
  4. data/lib/arrow/array-builder.rb +6 -2
  5. data/lib/arrow/array.rb +6 -2
  6. data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
  7. data/lib/arrow/csv-loader.rb +102 -2
  8. data/lib/arrow/csv-read-options.rb +25 -0
  9. data/lib/arrow/data-type.rb +135 -0
  10. data/lib/arrow/decimal128-array-builder.rb +64 -0
  11. data/lib/arrow/decimal128-data-type.rb +69 -0
  12. data/lib/arrow/dense-union-data-type.rb +90 -0
  13. data/lib/arrow/dictionary-data-type.rb +106 -0
  14. data/lib/arrow/field-containable.rb +35 -0
  15. data/lib/arrow/field.rb +92 -8
  16. data/lib/arrow/file-output-stream.rb +34 -0
  17. data/lib/arrow/list-array-builder.rb +96 -0
  18. data/lib/arrow/list-data-type.rb +68 -0
  19. data/lib/arrow/loader.rb +30 -5
  20. data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
  21. data/lib/arrow/record-batch-builder.rb +115 -0
  22. data/lib/arrow/record-batch.rb +25 -0
  23. data/lib/arrow/schema.rb +97 -0
  24. data/lib/arrow/sparse-union-data-type.rb +90 -0
  25. data/lib/arrow/struct-array-builder.rb +146 -0
  26. data/lib/arrow/struct-array.rb +34 -0
  27. data/lib/arrow/struct-data-type.rb +130 -0
  28. data/lib/arrow/struct.rb +68 -0
  29. data/lib/arrow/table-loader.rb +65 -25
  30. data/lib/arrow/table-saver.rb +73 -24
  31. data/lib/arrow/table.rb +11 -2
  32. data/lib/arrow/time32-data-type.rb +61 -0
  33. data/lib/arrow/time64-data-type.rb +61 -0
  34. data/lib/arrow/timestamp-data-type.rb +57 -0
  35. data/lib/arrow/version.rb +5 -7
  36. data/lib/arrow/writable.rb +22 -0
  37. data/red-arrow.gemspec +8 -4
  38. data/test/helper.rb +1 -2
  39. data/test/test-csv-loader.rb +27 -0
  40. data/test/test-data-type.rb +47 -0
  41. data/test/test-decimal128-array-builder.rb +95 -0
  42. data/test/test-decimal128-array.rb +38 -0
  43. data/test/test-decimal128-data-type.rb +31 -0
  44. data/test/test-dense-union-data-type.rb +41 -0
  45. data/test/test-dictionary-data-type.rb +40 -0
  46. data/test/test-feather.rb +34 -0
  47. data/test/test-field.rb +71 -0
  48. data/test/test-file-output-stream.rb +54 -0
  49. data/test/test-list-array-builder.rb +79 -0
  50. data/test/test-list-array.rb +32 -0
  51. data/test/test-list-data-type.rb +43 -0
  52. data/test/test-record-batch-builder.rb +116 -0
  53. data/test/test-record-batch.rb +82 -27
  54. data/test/test-schema.rb +104 -0
  55. data/test/test-sparse-union-data-type.rb +41 -0
  56. data/test/test-struct-array-builder.rb +180 -0
  57. data/test/test-struct-array.rb +60 -15
  58. data/test/test-struct-data-type.rb +112 -0
  59. data/test/test-struct.rb +81 -0
  60. data/test/test-table.rb +165 -29
  61. data/test/test-time32-data-type.rb +42 -0
  62. data/test/test-time64-data-type.rb +42 -0
  63. data/test/test-timestamp-data-type.rb +42 -0
  64. metadata +99 -10
@@ -0,0 +1,69 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Decimal128DataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::Decimal128DataType}.
24
+ #
25
+ # @overload initialize(precision, scale)
26
+ #
27
+ # @param precision [Integer] The precision of the decimal data
28
+ # type. It's the number of digits including the number of
29
+ # digits after the decimal point.
30
+ #
31
+ # @param scale [Integer] The scale of the decimal data
32
+ # type. It's the number of digits after the decimal point.
33
+ #
34
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
35
+ # Arrow::Decimal128DataType.new(8, 2)
36
+ #
37
+ # @overload initialize(description)
38
+ #
39
+ # @param description [Hash] The description of the decimal data
40
+ # type. It must have `:precision` and `:scale` values.
41
+ #
42
+ # @option description [Integer] :precision The precision of the
43
+ # decimal data type. It's the number of digits including the
44
+ # number of digits after the decimal point.
45
+ #
46
+ # @option description [Integer] :scale The scale of the decimal
47
+ # data type. It's the number of digits after the decimal
48
+ # point.
49
+ #
50
+ # @example Create a decimal data type for "XXXXXX.YY" decimal
51
+ # Arrow::Decimal128DataType.new(precision: 8,
52
+ # scale: 2)
53
+ def initialize(*args)
54
+ n_args = args.size
55
+ case n_args
56
+ when 1
57
+ description = args[0]
58
+ precision = description[:precision]
59
+ scale = description[:scale]
60
+ when 2
61
+ precision, scale = args
62
+ else
63
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
64
+ raise ArgumentError, message
65
+ end
66
+ initialize_raw(precision, scale)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,90 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DenseUnionDataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::DenseUnionDataType}.
24
+ #
25
+ # @overload initialize(fields, type_codes)
26
+ #
27
+ # @param fields [::Array<Arrow::Field, Hash>] The fields of the
28
+ # dense union data type. You can mix {Arrow::Field} and field
29
+ # description in the fields.
30
+ #
31
+ # See {Arrow::Field.new} how to specify field description.
32
+ #
33
+ # @param type_codes [::Array<Integer>] The IDs that indicates
34
+ # corresponding fields.
35
+ #
36
+ # @example Create a dense union data type for {2: visible, 9: count}
37
+ # fields = [
38
+ # Arrow::Field.new("visible", :boolean),
39
+ # {
40
+ # name: "count",
41
+ # type: :int32,
42
+ # },
43
+ # ]
44
+ # Arrow::DenseUnionDataType.new(fields, [2, 9])
45
+ #
46
+ # @overload initialize(description)
47
+ #
48
+ # @param description [Hash] The description of the dense union
49
+ # data type. It must have `:fields` and `:type_codes` values.
50
+ #
51
+ # @option description [::Array<Arrow::Field, Hash>] :fields The
52
+ # fields of the dense union data type. You can mix
53
+ # {Arrow::Field} and field description in the fields.
54
+ #
55
+ # See {Arrow::Field.new} how to specify field description.
56
+ #
57
+ # @option description [::Array<Integer>] :type_codes The IDs
58
+ # that indicates corresponding fields.
59
+ #
60
+ # @example Create a dense union data type for {2: visible, 9: count}
61
+ # fields = [
62
+ # Arrow::Field.new("visible", :boolean),
63
+ # {
64
+ # name: "count",
65
+ # type: :int32,
66
+ # },
67
+ # ]
68
+ # Arrow::DenseUnionDataType.new(fields: fields,
69
+ # type_codes: [2, 9])
70
+ def initialize(*args)
71
+ n_args = args.size
72
+ case n_args
73
+ when 1
74
+ description = args[0]
75
+ fields = description[:fields]
76
+ type_codes = description[:type_codes]
77
+ when 2
78
+ fields, type_codes = args
79
+ else
80
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
81
+ raise ArgumentError, message
82
+ end
83
+ fields = fields.collect do |field|
84
+ field = Field.new(field) unless field.is_a?(Field)
85
+ field
86
+ end
87
+ initialize_raw(fields, type_codes)
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,106 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryDataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::DictionaryDataType}.
24
+ #
25
+ # @overload initialize(index_data_type, dictionary, ordered)
26
+ #
27
+ # @param index_data_type [Arrow::DataType, Hash, String, Symbol]
28
+ # The index data type of the dictionary data type. It must be
29
+ # signed integer data types. Here are available signed integer
30
+ # data types:
31
+ #
32
+ # * Arrow::Int8DataType
33
+ # * Arrow::Int16DataType
34
+ # * Arrow::Int32DataType
35
+ # * Arrow::Int64DataType
36
+ #
37
+ # You can specify data type as a description by `Hash`.
38
+ #
39
+ # See {Arrow::DataType.resolve} how to specify data type
40
+ # description.
41
+ #
42
+ # @param dictionary [Arrow::Array] The real values of the
43
+ # dictionary data type.
44
+ #
45
+ # @param ordered [Boolean] Whether dictionary contents are
46
+ # ordered or not.
47
+ #
48
+ # @example Create a dictionary data type for {0: "Hello", 1: "World"}
49
+ # index_data_type = :int8
50
+ # dictionary = Arrow::StringArray.new(["Hello", "World"])
51
+ # ordered = true
52
+ # Arrow::DictionaryDataType.new(index_data_type,
53
+ # dictionary,
54
+ # ordered)
55
+ #
56
+ # @overload initialize(description)
57
+ #
58
+ # @param description [Hash] The description of the dictionary
59
+ # data type. It must have `:index_data_type`, `:dictionary`
60
+ # and `:ordered` values.
61
+ #
62
+ # @option description [Arrow::DataType, Hash, String, Symbol]
63
+ # :index_data_type The index data type of the dictionary data
64
+ # type. It must be signed integer data types. Here are
65
+ # available signed integer data types:
66
+ #
67
+ # * Arrow::Int8DataType
68
+ # * Arrow::Int16DataType
69
+ # * Arrow::Int32DataType
70
+ # * Arrow::Int64DataType
71
+ #
72
+ # You can specify data type as a description by `Hash`.
73
+ #
74
+ # See {Arrow::DataType.resolve} how to specify data type
75
+ # description.
76
+ #
77
+ # @option description [Arrow::Array] :dictionary The real values
78
+ # of the dictionary data type.
79
+ #
80
+ # @option description [Boolean] :ordered Whether dictionary
81
+ # contents are ordered or not.
82
+ #
83
+ # @example Create a dictionary data type for {0: "Hello", 1: "World"}
84
+ # dictionary = Arrow::StringArray.new(["Hello", "World"])
85
+ # Arrow::DictionaryDataType.new(index_data_type: :int8,
86
+ # dictionary: dictionary,
87
+ # ordered: true)
88
+ def initialize(*args)
89
+ n_args = args.size
90
+ case n_args
91
+ when 1
92
+ description = args[0]
93
+ index_data_type = description[:index_data_type]
94
+ dictionary = description[:dictionary]
95
+ ordered = description[:ordered]
96
+ when 3
97
+ index_data_type, dictionary, ordered = args
98
+ else
99
+ message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)"
100
+ raise ArgumentError, message
101
+ end
102
+ index_data_type = DataType.resolve(index_data_type)
103
+ initialize_raw(index_data_type, dictionary, ordered)
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,35 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module FieldContainable
20
+ def find_field(name_or_index)
21
+ case name_or_index
22
+ when String, Symbol
23
+ name = name_or_index
24
+ get_field_by_name(name)
25
+ when Integer
26
+ index = name_or_index
27
+ get_field(index)
28
+ else
29
+ message = "field name or index must be String, Symbol or Integer"
30
+ message << ": <#{name_or_index.inspect}>"
31
+ raise ArgumentError, message
32
+ end
33
+ end
34
+ end
35
+ end
@@ -18,16 +18,100 @@
18
18
  module Arrow
19
19
  class Field
20
20
  alias_method :initialize_raw, :initialize
21
- def initialize(name, data_type)
22
- case data_type
23
- when String, Symbol
24
- data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
25
- data_type_class_name = "#{data_type_name}DataType"
26
- if Arrow.const_defined?(data_type_class_name)
27
- data_type_class = Arrow.const_get(data_type_class_name)
28
- data_type = data_type_class.new
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::Field}.
24
+ #
25
+ # @overload initialize(name, data_type)
26
+ #
27
+ # @param name [String, Symbol] The name of the field.
28
+ #
29
+ # @param data_type [Arrow::DataType, Hash, String, Symbol] The
30
+ # data type of the field.
31
+ #
32
+ # You can specify data type as a description by `Hash`.
33
+ #
34
+ # See {Arrow::DataType.resolve} how to specify data type
35
+ # description.
36
+ #
37
+ # @example Create a field with {Arrow::DataType}s
38
+ # Arrow::Field.new("visible", Arrow::BooleanDataType.new)
39
+ #
40
+ # @example Create a field with data type description
41
+ # Arrow::Field.new("visible", :boolean)
42
+ #
43
+ # @example Create a field with name as `Symbol`
44
+ # Arrow::Field.new(:visible, :boolean)
45
+ #
46
+ # @overload initialize(description)
47
+ #
48
+ # @param description [Hash] The description of the field.
49
+ #
50
+ # Field description is a raw `Hash`. Field description must
51
+ # have `:name` and `:data_type` values. `:name` is the name of
52
+ # the field. `:data_type` is the data type of the field. You
53
+ # can use {Arrow::DataType} or data type description as
54
+ # `:data_type` value.
55
+ #
56
+ # See {Arrow::DataType.resolve} how to specify data type
57
+ # description.
58
+ #
59
+ # There is a shortcut for convenience. If field description
60
+ # doesn't have `:data_type`, all keys except `:name` are
61
+ # processes as data type description. For example, the
62
+ # following field descrptions are the same:
63
+ #
64
+ # ```ruby
65
+ # {name: "visible", data_type: {type: :boolean}}
66
+ # {name: "visible", type: :boolean} # Shortcut version
67
+ # ```
68
+ #
69
+ # @option description [String, Symbol] :name The name of the field.
70
+ #
71
+ # @option description [Arrow::DataType, Hash] :data_type The
72
+ # data type of the field. You can specify data type description
73
+ # by `Hash`.
74
+ #
75
+ # See {Arrow::DataType.resolve} how to specify data type
76
+ # description.
77
+ #
78
+ # @example Create a field with {Arrow::DataType}s
79
+ # Arrow::Field.new(name: "visible",
80
+ # data_type: Arrow::BooleanDataType.new)
81
+ #
82
+ # @example Create a field with data type description
83
+ # Arrow::Field.new(name: "visible", data_type: {type: :boolean}
84
+ #
85
+ # @example Create a field with shortcut form
86
+ # Arrow::Field.new(name: "visible", type: :boolean)
87
+ def initialize(*args)
88
+ n_args = args.size
89
+ case n_args
90
+ when 1
91
+ description = args[0]
92
+ name = nil
93
+ data_type = nil
94
+ data_type_description = {}
95
+ description.each do |key, value|
96
+ key = key.to_sym
97
+ case key
98
+ when :name
99
+ name = value
100
+ when :data_type
101
+ data_type = DataType.resolve(value)
102
+ else
103
+ data_type_description[key] = value
104
+ end
29
105
  end
106
+ data_type ||= DataType.resolve(data_type_description)
107
+ when 2
108
+ name = args[0]
109
+ data_type = DataType.resolve(args[1])
110
+ else
111
+ message = "wrong number of arguments (given #{n_args}, expected 1..2)"
112
+ raise ArgumentError, message
30
113
  end
114
+
31
115
  initialize_raw(name, data_type)
32
116
  end
33
117
  end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class FileOutputStream
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+ def initialize(path, options={})
23
+ append = nil
24
+ case options
25
+ when true, false
26
+ append = options
27
+ when Hash
28
+ append = options[:append]
29
+ end
30
+ append = false if append.nil?
31
+ initialize_raw(path, append)
32
+ end
33
+ end
34
+ end