red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -18,31 +18,19 @@
18
18
  module Arrow
19
19
  class TimestampArrayBuilder
20
20
  class << self
21
- def build(unit_or_data_type, values)
22
- builder = new(unit_or_data_type)
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
23
  builder.build(values)
24
24
  end
25
25
  end
26
26
 
27
- alias_method :initialize_raw, :initialize
28
- def initialize(unit_or_data_type)
29
- case unit_or_data_type
30
- when DataType
31
- data_type = unit_or_data_type
32
- else
33
- unit = unit_or_data_type
34
- data_type = TimestampDataType.new(unit)
35
- end
36
- initialize_raw(data_type)
37
- end
38
-
39
27
  private
40
28
  def unit_id
41
29
  @unit_id ||= value_data_type.unit.nick.to_sym
42
30
  end
43
31
 
44
32
  def convert_to_arrow_value(value)
45
- if value.respond_to?(:to_time) and not value.is_a?(Time)
33
+ if value.respond_to?(:to_time) and not value.is_a?(::Time)
46
34
  value = value.to_time
47
35
  end
48
36
 
@@ -17,41 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class TimestampDataType
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- # Creates a new {Arrow::TimestampDataType}.
24
- #
25
- # @overload initialize(unit)
26
- #
27
- # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
- # timestamp data type.
29
- #
30
- # @example Create a timestamp data type with Arrow::TimeUnit
31
- # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
- #
33
- # @example Create a timestamp data type with Symbol
34
- # Arrow::TimestampDataType.new(:milli)
35
- #
36
- # @overload initialize(description)
37
- #
38
- # @param description [Hash] The description of the timestamp data
39
- # type. It must have `:unit` value.
40
- #
41
- # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
- # the timestamp data type.
43
- #
44
- # @example Create a timestamp data type with Arrow::TimeUnit
45
- # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
- #
47
- # @example Create a timestamp data type with Symbol
48
- # Arrow::TimestampDataType.new(unit: :milli)
49
- def initialize(unit)
50
- if unit.is_a?(Hash)
51
- description = unit
52
- unit = description[:unit]
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Symbol, Arrow::TimeUnit
25
+ new(value)
26
+ else
27
+ super
28
+ end
53
29
  end
54
- initialize_raw(unit)
55
30
  end
56
31
  end
57
32
  end
@@ -15,11 +15,19 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class Date64ArrayTest < Test::Unit::TestCase
19
- test("#[]") do
20
- n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
21
- array = Arrow::Date64Array.new([n_msecs_since_epoch])
22
- assert_equal(DateTime.new(2017, 8, 28, 0, 0, 0),
23
- array[0])
18
+ module Arrow
19
+ class TimestampParser
20
+ class << self
21
+ def try_convert(value)
22
+ case value
23
+ when :iso8601
24
+ ISO8601TimestampParser.new
25
+ when String
26
+ StrptimeTimestampParser.new(value)
27
+ else
28
+ nil
29
+ end
30
+ end
31
+ end
24
32
  end
25
33
  end
@@ -0,0 +1,59 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class UnionArrayBuilder
20
+ def append_values(values, is_valids=nil)
21
+ if is_valids
22
+ is_valids.each_with_index do |is_valid, i|
23
+ if is_valid
24
+ append_value(values[i])
25
+ else
26
+ append_null
27
+ end
28
+ end
29
+ else
30
+ values.each do |value|
31
+ append_value(value)
32
+ end
33
+ end
34
+ end
35
+
36
+ alias_method :append_child_raw, :append_child
37
+ def append_child(builder, filed_name=nil)
38
+ @child_infos = nil
39
+ append_child_raw(builder, field_name)
40
+ end
41
+
42
+ private
43
+ def child_infos
44
+ @child_infos ||= create_child_infos
45
+ end
46
+
47
+ def create_child_infos
48
+ infos = {}
49
+ type = value_data_type
50
+ type.fields.zip(children, type.type_codes).each do |field, child, id|
51
+ infos[field.name] = {
52
+ builder: child,
53
+ id: id,
54
+ }
55
+ end
56
+ infos
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class UnionArray
20
+ def fields
21
+ @fields ||= n_fields.times.collect do |i|
22
+ get_field(i)
23
+ end
24
+ end
25
+ end
26
+ end
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "8.0.0"
19
+ VERSION = "24.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/lib/arrow.rb CHANGED
@@ -15,16 +15,11 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "extpp/setup"
19
- require "gio2"
20
-
21
18
  require "arrow/version"
22
19
 
23
- require "arrow/loader"
24
-
25
20
  module Arrow
26
21
  class Error < StandardError
27
22
  end
28
-
29
- Loader.load
30
23
  end
24
+
25
+ require_relative "arrow/#{RUBY_ENGINE}"
data/red-arrow.gemspec CHANGED
@@ -20,7 +20,11 @@
20
20
  require_relative "lib/arrow/version"
21
21
 
22
22
  Gem::Specification.new do |spec|
23
+ is_jruby = RUBY_ENGINE == "jruby"
24
+
23
25
  spec.name = "red-arrow"
26
+ spec.platform = "java" if is_jruby
27
+
24
28
  version_components = [
25
29
  Arrow::Version::MAJOR.to_s,
26
30
  Arrow::Version::MINOR.to_s,
@@ -29,13 +33,13 @@ Gem::Specification.new do |spec|
29
33
  ]
30
34
  spec.version = version_components.compact.join(".")
31
35
  spec.homepage = "https://arrow.apache.org/"
32
- spec.authors = ["Apache Arrow Developers"]
36
+ spec.authors = ["The Apache Software Foundation"]
33
37
  spec.email = ["dev@arrow.apache.org"]
34
38
 
35
39
  spec.summary = "Red Arrow is the Ruby bindings of Apache Arrow"
36
40
  spec.description =
37
41
  "Apache Arrow is a common in-memory columnar data store. " +
38
- "It's useful to share and process large data."
42
+ "It's useful to share and process large data efficiently."
39
43
  spec.license = "Apache-2.0"
40
44
  spec.files = ["README.md", "Rakefile", "Gemfile", "#{spec.name}.gemspec"]
41
45
  spec.files += ["LICENSE.txt", "NOTICE.txt"]
@@ -43,16 +47,75 @@ Gem::Specification.new do |spec|
43
47
  spec.files += Dir.glob("lib/**/*.rb")
44
48
  spec.files += Dir.glob("image/*.*")
45
49
  spec.files += Dir.glob("doc/text/*")
46
- spec.test_files += Dir.glob("test/**/*")
47
- spec.extensions = ["ext/arrow/extconf.rb"]
50
+ spec.extensions = ["ext/arrow/extconf.rb"] unless is_jruby
51
+
52
+ required_arrow_glib_version = version_components[0, 3].join(".")
53
+
54
+ spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
55
+ spec.add_runtime_dependency("csv")
56
+ if is_jruby
57
+ spec.add_runtime_dependency("jar-dependencies")
58
+ spec.requirements << "jar org.apache.arrow, arrow-vector, #{spec.version}"
59
+ spec.requirements << "jar org.apache.arrow, arrow-memory-netty, #{spec.version}"
60
+ else
61
+ spec.add_runtime_dependency("extpp", ">= 0.1.2")
62
+ spec.add_runtime_dependency("gio2", ">= 4.2.3")
63
+ spec.add_runtime_dependency("pkg-config")
64
+
65
+ repository_url_prefix = "https://packages.apache.org/artifactory/arrow"
66
+ [
67
+ # Try without additional repository
68
+ ["amazon_linux", "arrow-glib-devel"],
69
+ # Retry with additional repository
70
+ [
71
+ "amazon_linux",
72
+ "#{repository_url_prefix}/amazon-linux/%{version}/" +
73
+ "apache-arrow-release-latest.rpm",
74
+ ],
75
+ ["amazon_linux", "arrow-glib-devel"],
76
+
77
+ # Try without additional repository
78
+ ["centos", "arrow-glib-devel"],
79
+ # Retry with additional repository
80
+ [
81
+ "centos",
82
+ "#{repository_url_prefix}/centos/%{major_version}-stream/" +
83
+ "apache-arrow-release-latest.rpm",
84
+ ],
85
+ ["centos", "arrow-glib-devel"],
86
+
87
+ ["conda", "arrow-c-glib"],
88
+
89
+ # Try without additional repository
90
+ ["debian", "libarrow-glib-dev"],
91
+ # Retry with additional repository
92
+ [
93
+ "debian",
94
+ "#{repository_url_prefix}/%{distribution}/" +
95
+ "apache-arrow-apt-source-latest-%{code_name}.deb",
96
+ ],
97
+ ["debian", "libarrow-glib-dev"],
98
+
99
+ ["fedora", "libarrow-glib-devel"],
100
+
101
+ ["homebrew", "apache-arrow-glib"],
48
102
 
49
- spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
50
- spec.add_runtime_dependency("extpp", ">= 0.0.7")
51
- spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
- spec.add_runtime_dependency("native-package-installer")
53
- spec.add_runtime_dependency("pkg-config")
103
+ # Try without additional repository
104
+ ["rhel", "arrow-glib-devel"],
105
+ # Retry with additional repository
106
+ [
107
+ "rhel",
108
+ "#{repository_url_prefix}/almalinux/%{major_version}/" +
109
+ "apache-arrow-release-latest.rpm",
110
+ ],
111
+ ["rhel", "arrow-glib-devel"],
112
+ ].each do |platform, package|
113
+ spec.requirements <<
114
+ "system: arrow-glib>=#{required_arrow_glib_version}: " +
115
+ "#{platform}: #{package}"
116
+ end
117
+ end
54
118
 
55
- required_msys2_package_version = version_components[0, 3].join(".")
56
119
  spec.metadata["msys2_mingw_dependencies"] =
57
- "arrow>=#{required_msys2_package_version}"
120
+ "arrow>=#{required_arrow_glib_version}"
58
121
  end