red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
data/lib/arrow/scalar.rb CHANGED
@@ -17,6 +17,73 @@
17
17
 
18
18
  module Arrow
19
19
  class Scalar
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when self
25
+ value
26
+ when true, false
27
+ BooleanScalar.new(value)
28
+ when Symbol, String
29
+ StringScalar.new(value.to_s)
30
+ when Integer
31
+ Int64Scalar.new(value)
32
+ when Float
33
+ DoubleScalar.new(value)
34
+ else
35
+ nil
36
+ end
37
+ end
38
+
39
+ # Ensure returning suitable {Arrow::Scalar}.
40
+ #
41
+ # @overload resolve(scalar)
42
+ #
43
+ # Returns the given scalar itself. This is convenient to
44
+ # use this method as {Arrow::Scalar} converter.
45
+ #
46
+ # @param scalar [Arrow::Scalar] The scalar.
47
+ #
48
+ # @return [Arrow::Scalar] The given scalar itself.
49
+ #
50
+ # @overload resolve(value)
51
+ #
52
+ # Creates a suitable scalar from the given value. For example,
53
+ # you can create {Arrow::BooleanScalar} from `true`.
54
+ #
55
+ # @param value [Object] The value.
56
+ #
57
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
58
+ #
59
+ # @overload resolve(value, data_type)
60
+ #
61
+ # Creates a scalar of `data_type.scalar_class` from the given
62
+ # value. For example, you can create {Arrow::Int32Scalar} from
63
+ # `29` and {Arrow::Int32DataType}.
64
+ #
65
+ # @param value [Object] The value.
66
+ #
67
+ # @param data_type [Arrow::DataType] The {Arrow::DataType} to
68
+ # decide the returned scalar class.
69
+ #
70
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
71
+ #
72
+ # @since 12.0.0
73
+ def resolve(value, data_type=nil)
74
+ return try_convert(value) if data_type.nil?
75
+
76
+ data_type = DataType.resolve(data_type)
77
+ scalar_class = data_type.scalar_class
78
+ case value
79
+ when Scalar
80
+ return value if value.class == scalar_class
81
+ value = value.value
82
+ end
83
+ scalar_class.new(value)
84
+ end
85
+ end
86
+
20
87
  # @param other [Arrow::Scalar] The scalar to be compared.
21
88
  # @param options [Arrow::EqualOptions, Hash] (nil)
22
89
  # The options to custom how to compare.
data/lib/arrow/slicer.rb CHANGED
@@ -162,6 +162,40 @@ module Arrow
162
162
  def reject(&block)
163
163
  RejectCondition.new(@column, block)
164
164
  end
165
+
166
+ def end_with?(substring, ignore_case: false)
167
+ MatchSubstringFamilyCondition.new("ends_with",
168
+ @column, substring, ignore_case)
169
+ end
170
+
171
+ def match_like?(pattern, ignore_case: false)
172
+ MatchSubstringFamilyCondition.new("match_like",
173
+ @column, pattern, ignore_case)
174
+ end
175
+
176
+ def match_substring?(pattern, ignore_case: nil)
177
+ case pattern
178
+ when String
179
+ ignore_case = false if ignore_case.nil?
180
+ MatchSubstringFamilyCondition.new("match_substring",
181
+ @column, pattern, ignore_case)
182
+ when Regexp
183
+ ignore_case = pattern.casefold? if ignore_case.nil?
184
+ MatchSubstringFamilyCondition.new("match_substring_regex",
185
+ @column,
186
+ pattern.source,
187
+ ignore_case)
188
+ else
189
+ message =
190
+ "pattern must be either String or Regexp: #{pattern.inspect}"
191
+ raise ArgumentError, message
192
+ end
193
+ end
194
+
195
+ def start_with?(substring, ignore_case: false)
196
+ MatchSubstringFamilyCondition.new("starts_with",
197
+ @column, substring, ignore_case)
198
+ end
165
199
  end
166
200
 
167
201
  class NotColumnCondition < Condition
@@ -351,5 +385,32 @@ module Arrow
351
385
  BooleanArray.new(raw_array)
352
386
  end
353
387
  end
388
+
389
+ class MatchSubstringFamilyCondition < Condition
390
+ def initialize(function, column, pattern, ignore_case, invert: false)
391
+ @function = function
392
+ @column = column
393
+ @options = MatchSubstringOptions.new
394
+ @options.pattern = pattern
395
+ @options.ignore_case = ignore_case
396
+ @invert = invert
397
+ end
398
+
399
+ def !@
400
+ MatchSubstringFamilyCondition.new(@function,
401
+ @column,
402
+ @options.pattern,
403
+ @options.ignore_case?,
404
+ invert: !@invert)
405
+ end
406
+
407
+ def evaluate
408
+ datum = Function.find(@function).execute([@column.data], @options)
409
+ if @invert
410
+ datum = Function.find("invert").execute([datum])
411
+ end
412
+ datum.value
413
+ end
414
+ end
354
415
  end
355
416
  end
@@ -79,9 +79,9 @@ module Arrow
79
79
  # target and corresponding order is used. `"+"` uses ascending
80
80
  # order and `"-"` uses ascending order.
81
81
  #
82
- # If `target` is not a String nor `target` doesn't start with the
83
- # leading order mark, sort column target is `target` as-is and
84
- # ascending order is used.
82
+ # If `target` is either not a String or `target` doesn't start
83
+ # with the leading order mark, sort column is `target` as-is
84
+ # and ascending order is used.
85
85
  #
86
86
  # @example String without the leading order mark
87
87
  # key = Arrow::SortKey.new("count")
@@ -0,0 +1,56 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_infos.each do |child_key, child_info|
46
+ builder = child_info[:builder]
47
+ if child_key == key
48
+ builder.append(value.values[0])
49
+ else
50
+ builder.append_null
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArray
20
+ def get_value(i)
21
+ child_id = get_child_id(i)
22
+ field = fields[child_id]
23
+ field[i]
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,29 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StreamDecoder
20
+ def consume(data)
21
+ case data
22
+ when Buffer
23
+ consume_buffer(data)
24
+ else
25
+ consume_bytes(data)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -15,17 +15,33 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- class Decimal256DataTypeTest < Test::Unit::TestCase
19
- sub_test_case(".new") do
20
- test("ordered arguments") do
21
- assert_equal("decimal256(8, 2)",
22
- Arrow::Decimal256DataType.new(8, 2).to_s)
18
+ module Arrow
19
+ class StreamListener < StreamListenerRaw
20
+ type_register
21
+
22
+ def on_eos
23
+ end
24
+
25
+ def on_record_batch_decoded(record_batch, metadata)
26
+ end
27
+
28
+ def on_schema(schema, filtered_schema)
29
+ end
30
+
31
+ private
32
+ def virtual_do_on_eos
33
+ on_eos
34
+ true
35
+ end
36
+
37
+ def virtual_do_on_record_batch_decoded(record_batch, metadata)
38
+ on_record_batch_decoded(record_batch, metadata)
39
+ true
23
40
  end
24
41
 
25
- test("description") do
26
- assert_equal("decimal256(8, 2)",
27
- Arrow::Decimal256DataType.new(precision: 8,
28
- scale: 2).to_s)
42
+ def virtual_do_on_schema_decoded(schema, filtered_schema)
43
+ on_schema_decoded(schema, filtered_schema)
44
+ true
29
45
  end
30
46
  end
31
47
  end
@@ -0,0 +1,30 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StringArrayBuilder
20
+ private
21
+ def convert_to_arrow_value(value)
22
+ case value
23
+ when GLib::Bytes, String
24
+ value
25
+ else
26
+ value.to_s
27
+ end
28
+ end
29
+ end
30
+ end
@@ -110,11 +110,6 @@ module Arrow
110
110
  end
111
111
  end
112
112
 
113
- alias_method :append_null_raw, :append_null
114
- def append_null
115
- append_null_raw
116
- end
117
-
118
113
  # @since 0.12.0
119
114
  def append(*values)
120
115
  if values.empty?
@@ -24,7 +24,8 @@ module Arrow
24
24
  attr_reader :head_values
25
25
  attr_reader :tail_values
26
26
  attr_reader :sample_values
27
- def initialize(column, head_values, tail_values)
27
+ def initialize(table_formatter, column, head_values, tail_values)
28
+ @table_formatter = table_formatter
28
29
  @column = column
29
30
  @head_values = head_values
30
31
  @tail_values = tail_values
@@ -36,6 +37,15 @@ module Arrow
36
37
  @data_type ||= @column.data_type
37
38
  end
38
39
 
40
+ def formatted_data_type_name
41
+ @formatted_data_type_name ||= "(#{data_type.name})"
42
+ end
43
+
44
+ def aligned_data_type_name
45
+ @aligned_data_type_name ||=
46
+ "%*s" % [aligned_name.size, formatted_data_type_name]
47
+ end
48
+
39
49
  def name
40
50
  @name ||= @column.name
41
51
  end
@@ -63,14 +73,18 @@ module Arrow
63
73
  formatted_value = format_value(value[field_name], field_value_width)
64
74
  "#{formatted_name}: #{formatted_value}"
65
75
  end
66
- formatted = "{"
76
+ formatted = +"{"
67
77
  formatted << formatted_values.join(", ")
68
78
  formatted << "}"
69
79
  "%-*s" % [width, formatted]
70
80
  when nil
71
81
  "%*s" % [width, FORMATTED_NULL]
72
82
  else
73
- "%-*s" % [width, value.to_s]
83
+ value = value.to_s
84
+ if value.encoding == Encoding::ASCII_8BIT
85
+ value = value.each_byte.collect {|byte| "%X" % byte}.join
86
+ end
87
+ "%-*s" % [width, value]
74
88
  end
75
89
  end
76
90
 
@@ -90,9 +104,16 @@ module Arrow
90
104
  end
91
105
 
92
106
  def format_aligned_name(name, data_type, sample_values)
107
+ if @table_formatter.show_column_type?
108
+ min_width = formatted_data_type_name.size
109
+ else
110
+ min_width = 0
111
+ end
93
112
  case data_type
94
113
  when TimestampDataType
95
- "%*s" % [::Time.now.iso8601.size, name]
114
+ width = ::Time.now.iso8601.size
115
+ width = min_width if width < min_width
116
+ "%*s" % [width, name]
96
117
  when IntegerDataType
97
118
  have_null = false
98
119
  have_negative = false
@@ -118,9 +139,12 @@ module Arrow
118
139
  end
119
140
  width += 1 if have_negative # Need "-"
120
141
  width = [width, FORMATTED_NULL.size].max if have_null
142
+ width = min_width if width < min_width
121
143
  "%*s" % [width, name]
122
144
  when FloatDataType, DoubleDataType
123
- "%*s" % [FLOAT_N_DIGITS, name]
145
+ width = FLOAT_N_DIGITS
146
+ width = min_width if width < min_width
147
+ "%*s" % [width, name]
124
148
  when StructDataType
125
149
  field_widths = data_type.fields.collect do |field|
126
150
  field_value_width = compute_field_value_width(field, sample_values)
@@ -130,9 +154,11 @@ module Arrow
130
154
  if field_widths.size > 0
131
155
  width += (", ".size * (field_widths.size - 1))
132
156
  end
157
+ width = min_width if width < min_width
133
158
  "%*s" % [width, name]
134
159
  else
135
- name
160
+ width = min_width
161
+ "%*s" % [width, name]
136
162
  end
137
163
  end
138
164
  end
@@ -143,7 +169,7 @@ module Arrow
143
169
  end
144
170
 
145
171
  def format
146
- text = ""
172
+ text = +""
147
173
  n_rows = @table.n_rows
148
174
  border = @options[:border] || 10
149
175
 
@@ -159,7 +185,7 @@ module Arrow
159
185
  else
160
186
  tail_values = []
161
187
  end
162
- ColumnFormatter.new(column, head_values, tail_values)
188
+ ColumnFormatter.new(self, column, head_values, tail_values)
163
189
  end
164
190
 
165
191
  format_header(text, column_formatters)
@@ -186,5 +212,9 @@ module Arrow
186
212
 
187
213
  text
188
214
  end
215
+
216
+ def show_column_type?
217
+ @options.fetch(:show_column_type, true)
218
+ end
189
219
  end
190
220
  end
@@ -27,9 +27,9 @@ module Arrow
27
27
  text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
28
28
  row.each_with_index do |column_value, nth_column|
29
29
  column_formatter = column_formatters[nth_column]
30
- formatted_name = column_formatter.name
31
- formatted_value = column_formatter.format_value(column_value)
32
- text << "#{formatted_name}: #{formatted_value}\n"
30
+ text << column_formatter.name
31
+ text << "(#{column_formatter.data_type.name})" if show_column_type?
32
+ text << ": #{column_formatter.format_value(column_value)}\n"
33
33
  end
34
34
  end
35
35
  end
@@ -161,7 +161,7 @@ module Arrow
161
161
  record_batches << record_batch
162
162
  end
163
163
  table = Table.new(schema, record_batches)
164
- table.instance_variable_set(:@input, input)
164
+ table.refer_input(input)
165
165
  table
166
166
  end
167
167
 
@@ -211,7 +211,7 @@ module Arrow
211
211
  field_indexes = @options[:field_indexes]
212
212
  reader.set_field_indexes(field_indexes) if field_indexes
213
213
  table = reader.read_stripes
214
- table.instance_variable_set(:@input, input)
214
+ table.refer_input(input)
215
215
  table
216
216
  end
217
217
  end
@@ -245,16 +245,22 @@ module Arrow
245
245
  open_input_stream do |input|
246
246
  reader = FeatherFileReader.new(input)
247
247
  table = reader.read
248
- table.instance_variable_set(:@input, input)
248
+ table.refer_input(input)
249
249
  table
250
250
  end
251
251
  end
252
252
 
253
253
  def load_as_json
254
254
  open_input_stream do |input|
255
- reader = JSONReader.new(input)
255
+ options = JSONReadOptions.new
256
+ @options.each do |key, value|
257
+ next if value.nil?
258
+ setter = :"#{key}="
259
+ options.__send__(setter, value) if options.respond_to?(setter)
260
+ end
261
+ reader = JSONReader.new(input, options)
256
262
  table = reader.read
257
- table.instance_variable_set(:@input, input)
263
+ table.refer_input(input)
258
264
  table
259
265
  end
260
266
  end
@@ -51,6 +51,7 @@ module Arrow
51
51
  raise ArgumentError, message
52
52
  end
53
53
  __send__(custom_save_method)
54
+ @table
54
55
  end
55
56
 
56
57
  private
@@ -129,9 +130,9 @@ module Arrow
129
130
  end
130
131
  end
131
132
 
132
- def save_raw(writer_class)
133
+ def save_raw(writer_class, *args)
133
134
  open_output_stream do |output|
134
- writer_class.open(output, @table.schema) do |writer|
135
+ writer_class.open(output, @table.schema, *args) do |writer|
135
136
  writer.write_table(@table)
136
137
  end
137
138
  end
@@ -143,7 +144,7 @@ module Arrow
143
144
 
144
145
  # @since 1.0.0
145
146
  def save_as_arrow_file
146
- save_raw(RecordBatchFileWriter)
147
+ save_raw(RecordBatchFileWriter, nil, @options[:metadata])
147
148
  end
148
149
 
149
150
  # @deprecated Use `format: :arrow_batch` instead.
@@ -26,6 +26,13 @@ module Arrow
26
26
  text << "\t"
27
27
  text << column_formatter.aligned_name
28
28
  end
29
+ if show_column_type?
30
+ text << "\n"
31
+ column_formatters.each do |column_formatter|
32
+ text << "\t"
33
+ text << column_formatter.aligned_data_type_name
34
+ end
35
+ end
29
36
  text << "\n"
30
37
  end
31
38