red-arrow 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TableSaver
17
+ class << self
18
+ def save(table, path, options={})
19
+ new(table, path, options).save
20
+ end
21
+ end
22
+
23
+ def initialize(table, path, options={})
24
+ @table = table
25
+ @path = path
26
+ @options = options
27
+ end
28
+
29
+ def save
30
+ path = @path
31
+ path = path.to_path if path.respond_to?(:to_path)
32
+ format = @options[:format] || guess_format(path) || :arrow
33
+
34
+ custom_save_method = "save_as_#{format}"
35
+ unless respond_to?(custom_save_method, true)
36
+ available_formats = []
37
+ (methods(true) | private_methods(true)).each do |name|
38
+ match_data = /\Asave_as_/.match(name.to_s)
39
+ if match_data
40
+ available_formats << match_data.post_match
41
+ end
42
+ end
43
+ message = "Arrow::Table save format must be one of ["
44
+ message << available_formats.join(", ")
45
+ message << "]: #{format.inspect}"
46
+ raise ArgumentError, message
47
+ end
48
+ __send__(custom_save_method, path)
49
+ end
50
+
51
+ private
52
+ def guess_format(path)
53
+ extension = ::File.extname(path).gsub(/\A\./, "").downcase
54
+ return nil if extension.empty?
55
+
56
+ return extension if respond_to?("save_as_#{extension}", true)
57
+
58
+ nil
59
+ end
60
+
61
+ def save_raw(writer_class, path)
62
+ FileOutputStream.open(path, false) do |output|
63
+ writer_class.open(output, @table.schema) do |writer|
64
+ writer.write_table(@table)
65
+ end
66
+ end
67
+ end
68
+
69
+ def save_as_arrow(path)
70
+ save_as_batch(path)
71
+ end
72
+
73
+ def save_as_batch(path)
74
+ save_raw(RecordBatchFileWriter, path)
75
+ end
76
+
77
+ def save_as_stream(path)
78
+ save_raw(RecordBatchStreamWriter, path)
79
+ end
80
+
81
+ def save_as_csv(path)
82
+ CSV.open(path, "w") do |csv|
83
+ names = @table.schema.fields.collect(&:name)
84
+ csv << names
85
+ @table.each_record(reuse_record: true) do |record|
86
+ csv << names.collect do |name|
87
+ record[name]
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -12,8 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ require "arrow/record-containable"
16
+
15
17
  module Arrow
16
18
  class Table
19
+ include RecordContainable
20
+
21
+ class << self
22
+ def load(path, options={})
23
+ TableLoader.load(path, options)
24
+ end
25
+ end
26
+
17
27
  alias_method :initialize_raw, :initialize
18
28
  def initialize(schema_or_raw_table_or_columns, columns=nil)
19
29
  if columns.nil?
@@ -38,16 +48,8 @@ module Arrow
38
48
  initialize_raw(schema, columns)
39
49
  end
40
50
 
41
- def each_column
42
- return to_enum(__method__) unless block_given?
43
-
44
- n_columns.times do |i|
45
- yield(get_column(i))
46
- end
47
- end
48
-
49
51
  def columns
50
- @columns ||= each_column.to_a
52
+ @columns ||= n_columns.times.collect {|i| get_column(i)}
51
53
  end
52
54
 
53
55
  def each_record_batch
@@ -64,19 +66,12 @@ module Arrow
64
66
  # @return [Arrow::Column, Array<Arrow::Column>, nil]
65
67
  def [](*args)
66
68
  if args.size == 1
67
- case args[0]
68
- when String, Symbol
69
- find_column(args[0])
70
- else
71
- message = "#{self.class}\#[#{args[0].inspect}]: " +
72
- "Must be String or Symbol"
73
- raise ArgumentError, message
74
- end
69
+ find_column(args[0])
75
70
  else
76
71
  new_columns = args.collect do |column_name|
77
72
  column = find_column(column_name)
78
73
  if column.nil?
79
- message = "Unknown column: <#{column_name.inspect}>: #{inspect}"
74
+ message = "unknown column: <#{column_name.inspect}>: #{inspect}"
80
75
  raise ArgumentError, message
81
76
  end
82
77
  column
@@ -246,6 +241,11 @@ module Arrow
246
241
  self.class.new(selected_columns)
247
242
  end
248
243
 
244
+ def save(path, options={})
245
+ saver = TableSaver.new(self, path, options)
246
+ saver.save
247
+ end
248
+
249
249
  def to_s(options={})
250
250
  formatter = TableFormatter.new(self, options)
251
251
  formatter.format
@@ -269,13 +269,6 @@ module Arrow
269
269
  end
270
270
 
271
271
  private
272
- def find_column(name)
273
- name = name.to_s
274
- columns.find do |column|
275
- column.name == name
276
- end
277
- end
278
-
279
272
  def slice_by_ranges(ranges)
280
273
  sliced_columns = columns.collect do |column|
281
274
  chunks = []
@@ -0,0 +1,59 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TimestampArrayBuilder
17
+ # TODO: Workaround for Apache Arrow GLib 0.8.0
18
+ alias_method :initialize_raw, :initialize
19
+ def initialize(data_type)
20
+ initialize_raw(data_type)
21
+ data_type_name = data_type.to_s
22
+ if data_type_name.end_with?("[s]")
23
+ @unit_id = :second
24
+ elsif data_type_name.end_with?("[ms]")
25
+ @unit_id = :milli
26
+ elsif data_type_name.end_with?("[us]")
27
+ @unit_id = :micro
28
+ else
29
+ @unit_id = :nano
30
+ end
31
+ end
32
+
33
+ private
34
+ def unit_id
35
+ @unit_id ||= unit.nick.to_sym
36
+ end
37
+
38
+ def convert_to_arrow_value(value)
39
+ if value.respond_to?(:to_time) and not value.is_a?(Time)
40
+ value = value.to_time
41
+ end
42
+
43
+ if value.is_a?(Time)
44
+ case unit_id
45
+ when :second
46
+ value.to_i
47
+ when :milli
48
+ value.to_i * 1_000 + value.usec / 1000
49
+ when :micro
50
+ value.to_i * 1_000_000 + value.usec
51
+ else
52
+ value.to_i * 1_000_000_000 + value.nsec
53
+ end
54
+ else
55
+ value
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,5 +13,5 @@
13
13
  # limitations under the License.
14
14
 
15
15
  module Arrow
16
- VERSION = "0.8.0"
16
+ VERSION = "0.8.1"
17
17
  end
@@ -14,58 +14,96 @@
14
14
 
15
15
  class ArrayBuilderTest < Test::Unit::TestCase
16
16
  sub_test_case(".build") do
17
- test("empty") do
18
- array = Arrow::Int32ArrayBuilder.build([])
19
- assert_equal([],
20
- array.to_a)
17
+ def assert_build(builder_class, raw_array)
18
+ array = builder_class.build(raw_array)
19
+ assert_equal(raw_array, array.to_a)
21
20
  end
22
21
 
23
- test("values") do
24
- array = Arrow::Int32ArrayBuilder.build([1, -2])
25
- assert_equal([1, -2],
26
- array.to_a)
27
- end
22
+ sub_test_case("generic builder") do
23
+ test("strings") do
24
+ assert_build(Arrow::ArrayBuilder,
25
+ ["Hello", nil, "World"])
26
+ end
28
27
 
29
- test("values, nils") do
30
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil])
31
- assert_equal([1, -2, nil, nil],
32
- array.to_a)
33
- end
28
+ test("boolean") do
29
+ assert_build(Arrow::ArrayBuilder,
30
+ [true, nil, false])
31
+ end
34
32
 
35
- test("values, nils, values") do
36
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil, 3, -4])
37
- assert_equal([1, -2, nil, nil, 3, -4],
38
- array.to_a)
39
- end
33
+ test("positive integers") do
34
+ assert_build(Arrow::ArrayBuilder,
35
+ [1, nil, 2, nil, 3])
36
+ end
40
37
 
41
- test("values, nils, values, nils") do
42
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil, 3, -4, nil, nil])
43
- assert_equal([1, -2, nil, nil, 3, -4, nil, nil],
44
- array.to_a)
45
- end
38
+ test("negative integers") do
39
+ assert_build(Arrow::ArrayBuilder,
40
+ [nil, -1, nil, -2, nil, -3])
41
+ end
46
42
 
47
- test("nils") do
48
- array = Arrow::Int32ArrayBuilder.build([nil, nil])
49
- assert_equal([nil, nil],
50
- array.to_a)
51
- end
43
+ test("times") do
44
+ assert_build(Arrow::ArrayBuilder,
45
+ [Time.at(0), Time.at(1), Time.at(2)])
46
+ end
52
47
 
53
- test("nils, values") do
54
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4])
55
- assert_equal([nil, nil, 3, -4],
56
- array.to_a)
57
- end
48
+ test("dates") do
49
+ assert_build(Arrow::ArrayBuilder,
50
+ [Date.new(2018, 1, 4), Date.new(2018, 1, 5)])
51
+ end
58
52
 
59
- test("nils, values, nil") do
60
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4, nil, nil])
61
- assert_equal([nil, nil, 3, -4, nil, nil],
62
- array.to_a)
53
+ test("datetimes") do
54
+ assert_build(Arrow::ArrayBuilder,
55
+ [
56
+ DateTime.new(2018, 1, 4, 23, 18, 23),
57
+ DateTime.new(2018, 1, 5, 0, 23, 21),
58
+ ])
59
+ end
63
60
  end
64
61
 
65
- test("nils, values, nil, values") do
66
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4, nil, nil, 5, -6])
67
- assert_equal([nil, nil, 3, -4, nil, nil, 5, -6],
68
- array.to_a)
62
+ sub_test_case("specific builder") do
63
+ test("empty") do
64
+ assert_build(Arrow::Int32ArrayBuilder,
65
+ [])
66
+ end
67
+
68
+ test("values") do
69
+ assert_build(Arrow::Int32ArrayBuilder,
70
+ [1, -2])
71
+ end
72
+
73
+ test("values, nils") do
74
+ assert_build(Arrow::Int32ArrayBuilder,
75
+ [1, -2, nil, nil])
76
+ end
77
+
78
+ test("values, nils, values") do
79
+ assert_build(Arrow::Int32ArrayBuilder,
80
+ [1, -2, nil, nil, 3, -4])
81
+ end
82
+
83
+ test("values, nils, values, nils") do
84
+ assert_build(Arrow::Int32ArrayBuilder,
85
+ [1, -2, nil, nil, 3, -4, nil, nil])
86
+ end
87
+
88
+ test("nils") do
89
+ assert_build(Arrow::Int32ArrayBuilder,
90
+ [nil, nil])
91
+ end
92
+
93
+ test("nils, values") do
94
+ assert_build(Arrow::Int32ArrayBuilder,
95
+ [nil, nil, 3, -4])
96
+ end
97
+
98
+ test("nils, values, nil") do
99
+ assert_build(Arrow::Int32ArrayBuilder,
100
+ [nil, nil, 3, -4, nil, nil])
101
+ end
102
+
103
+ test("nils, values, nil, values") do
104
+ assert_build(Arrow::Int32ArrayBuilder,
105
+ [nil, nil, 3, -4, nil, nil, 5, -6])
106
+ end
69
107
  end
70
108
  end
71
109
  end
@@ -0,0 +1,79 @@
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class CSVLoaderTest < Test::Unit::TestCase
16
+ include Helper::Fixture
17
+
18
+ sub_test_case(".load") do
19
+ test("String: data: with header") do
20
+ data = fixture_path("with-header.csv").read
21
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
22
+ name score
23
+ 0 alice 10
24
+ 1 bob 29
25
+ 2 chris -1
26
+ TABLE
27
+ end
28
+
29
+ test("String: data: without header") do
30
+ data = fixture_path("without-header.csv").read
31
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
32
+ 0 1
33
+ 0 alice 10
34
+ 1 bob 29
35
+ 2 chris -1
36
+ TABLE
37
+ end
38
+
39
+ test("String: path: with header") do
40
+ path = fixture_path("with-header.csv").to_s
41
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
42
+ name score
43
+ 0 alice 10
44
+ 1 bob 29
45
+ 2 chris -1
46
+ TABLE
47
+ end
48
+
49
+ test("String: path: without header") do
50
+ path = fixture_path("without-header.csv").to_s
51
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
52
+ 0 1
53
+ 0 alice 10
54
+ 1 bob 29
55
+ 2 chris -1
56
+ TABLE
57
+ end
58
+
59
+ test("Pathname: with header") do
60
+ path = fixture_path("with-header.csv")
61
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
62
+ name score
63
+ 0 alice 10
64
+ 1 bob 29
65
+ 2 chris -1
66
+ TABLE
67
+ end
68
+
69
+ test("Pathname: without header") do
70
+ path = fixture_path("without-header.csv")
71
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
72
+ 0 1
73
+ 0 alice 10
74
+ 1 bob 29
75
+ 2 chris -1
76
+ TABLE
77
+ end
78
+ end
79
+ end