red-arrow 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,93 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TableSaver
17
+ class << self
18
+ def save(table, path, options={})
19
+ new(table, path, options).save
20
+ end
21
+ end
22
+
23
+ def initialize(table, path, options={})
24
+ @table = table
25
+ @path = path
26
+ @options = options
27
+ end
28
+
29
+ def save
30
+ path = @path
31
+ path = path.to_path if path.respond_to?(:to_path)
32
+ format = @options[:format] || guess_format(path) || :arrow
33
+
34
+ custom_save_method = "save_as_#{format}"
35
+ unless respond_to?(custom_save_method, true)
36
+ available_formats = []
37
+ (methods(true) | private_methods(true)).each do |name|
38
+ match_data = /\Asave_as_/.match(name.to_s)
39
+ if match_data
40
+ available_formats << match_data.post_match
41
+ end
42
+ end
43
+ message = "Arrow::Table save format must be one of ["
44
+ message << available_formats.join(", ")
45
+ message << "]: #{format.inspect}"
46
+ raise ArgumentError, message
47
+ end
48
+ __send__(custom_save_method, path)
49
+ end
50
+
51
+ private
52
+ def guess_format(path)
53
+ extension = ::File.extname(path).gsub(/\A\./, "").downcase
54
+ return nil if extension.empty?
55
+
56
+ return extension if respond_to?("save_as_#{extension}", true)
57
+
58
+ nil
59
+ end
60
+
61
+ def save_raw(writer_class, path)
62
+ FileOutputStream.open(path, false) do |output|
63
+ writer_class.open(output, @table.schema) do |writer|
64
+ writer.write_table(@table)
65
+ end
66
+ end
67
+ end
68
+
69
+ def save_as_arrow(path)
70
+ save_as_batch(path)
71
+ end
72
+
73
+ def save_as_batch(path)
74
+ save_raw(RecordBatchFileWriter, path)
75
+ end
76
+
77
+ def save_as_stream(path)
78
+ save_raw(RecordBatchStreamWriter, path)
79
+ end
80
+
81
+ def save_as_csv(path)
82
+ CSV.open(path, "w") do |csv|
83
+ names = @table.schema.fields.collect(&:name)
84
+ csv << names
85
+ @table.each_record(reuse_record: true) do |record|
86
+ csv << names.collect do |name|
87
+ record[name]
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -12,8 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ require "arrow/record-containable"
16
+
15
17
  module Arrow
16
18
  class Table
19
+ include RecordContainable
20
+
21
+ class << self
22
+ def load(path, options={})
23
+ TableLoader.load(path, options)
24
+ end
25
+ end
26
+
17
27
  alias_method :initialize_raw, :initialize
18
28
  def initialize(schema_or_raw_table_or_columns, columns=nil)
19
29
  if columns.nil?
@@ -38,16 +48,8 @@ module Arrow
38
48
  initialize_raw(schema, columns)
39
49
  end
40
50
 
41
- def each_column
42
- return to_enum(__method__) unless block_given?
43
-
44
- n_columns.times do |i|
45
- yield(get_column(i))
46
- end
47
- end
48
-
49
51
  def columns
50
- @columns ||= each_column.to_a
52
+ @columns ||= n_columns.times.collect {|i| get_column(i)}
51
53
  end
52
54
 
53
55
  def each_record_batch
@@ -64,19 +66,12 @@ module Arrow
64
66
  # @return [Arrow::Column, Array<Arrow::Column>, nil]
65
67
  def [](*args)
66
68
  if args.size == 1
67
- case args[0]
68
- when String, Symbol
69
- find_column(args[0])
70
- else
71
- message = "#{self.class}\#[#{args[0].inspect}]: " +
72
- "Must be String or Symbol"
73
- raise ArgumentError, message
74
- end
69
+ find_column(args[0])
75
70
  else
76
71
  new_columns = args.collect do |column_name|
77
72
  column = find_column(column_name)
78
73
  if column.nil?
79
- message = "Unknown column: <#{column_name.inspect}>: #{inspect}"
74
+ message = "unknown column: <#{column_name.inspect}>: #{inspect}"
80
75
  raise ArgumentError, message
81
76
  end
82
77
  column
@@ -246,6 +241,11 @@ module Arrow
246
241
  self.class.new(selected_columns)
247
242
  end
248
243
 
244
+ def save(path, options={})
245
+ saver = TableSaver.new(self, path, options)
246
+ saver.save
247
+ end
248
+
249
249
  def to_s(options={})
250
250
  formatter = TableFormatter.new(self, options)
251
251
  formatter.format
@@ -269,13 +269,6 @@ module Arrow
269
269
  end
270
270
 
271
271
  private
272
- def find_column(name)
273
- name = name.to_s
274
- columns.find do |column|
275
- column.name == name
276
- end
277
- end
278
-
279
272
  def slice_by_ranges(ranges)
280
273
  sliced_columns = columns.collect do |column|
281
274
  chunks = []
@@ -0,0 +1,59 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TimestampArrayBuilder
17
+ # TODO: Workaround for Apache Arrow GLib 0.8.0
18
+ alias_method :initialize_raw, :initialize
19
+ def initialize(data_type)
20
+ initialize_raw(data_type)
21
+ data_type_name = data_type.to_s
22
+ if data_type_name.end_with?("[s]")
23
+ @unit_id = :second
24
+ elsif data_type_name.end_with?("[ms]")
25
+ @unit_id = :milli
26
+ elsif data_type_name.end_with?("[us]")
27
+ @unit_id = :micro
28
+ else
29
+ @unit_id = :nano
30
+ end
31
+ end
32
+
33
+ private
34
+ def unit_id
35
+ @unit_id ||= unit.nick.to_sym
36
+ end
37
+
38
+ def convert_to_arrow_value(value)
39
+ if value.respond_to?(:to_time) and not value.is_a?(Time)
40
+ value = value.to_time
41
+ end
42
+
43
+ if value.is_a?(Time)
44
+ case unit_id
45
+ when :second
46
+ value.to_i
47
+ when :milli
48
+ value.to_i * 1_000 + value.usec / 1000
49
+ when :micro
50
+ value.to_i * 1_000_000 + value.usec
51
+ else
52
+ value.to_i * 1_000_000_000 + value.nsec
53
+ end
54
+ else
55
+ value
56
+ end
57
+ end
58
+ end
59
+ end
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,5 +13,5 @@
13
13
  # limitations under the License.
14
14
 
15
15
  module Arrow
16
- VERSION = "0.8.0"
16
+ VERSION = "0.8.1"
17
17
  end
@@ -14,58 +14,96 @@
14
14
 
15
15
  class ArrayBuilderTest < Test::Unit::TestCase
16
16
  sub_test_case(".build") do
17
- test("empty") do
18
- array = Arrow::Int32ArrayBuilder.build([])
19
- assert_equal([],
20
- array.to_a)
17
+ def assert_build(builder_class, raw_array)
18
+ array = builder_class.build(raw_array)
19
+ assert_equal(raw_array, array.to_a)
21
20
  end
22
21
 
23
- test("values") do
24
- array = Arrow::Int32ArrayBuilder.build([1, -2])
25
- assert_equal([1, -2],
26
- array.to_a)
27
- end
22
+ sub_test_case("generic builder") do
23
+ test("strings") do
24
+ assert_build(Arrow::ArrayBuilder,
25
+ ["Hello", nil, "World"])
26
+ end
28
27
 
29
- test("values, nils") do
30
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil])
31
- assert_equal([1, -2, nil, nil],
32
- array.to_a)
33
- end
28
+ test("boolean") do
29
+ assert_build(Arrow::ArrayBuilder,
30
+ [true, nil, false])
31
+ end
34
32
 
35
- test("values, nils, values") do
36
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil, 3, -4])
37
- assert_equal([1, -2, nil, nil, 3, -4],
38
- array.to_a)
39
- end
33
+ test("positive integers") do
34
+ assert_build(Arrow::ArrayBuilder,
35
+ [1, nil, 2, nil, 3])
36
+ end
40
37
 
41
- test("values, nils, values, nils") do
42
- array = Arrow::Int32ArrayBuilder.build([1, -2, nil, nil, 3, -4, nil, nil])
43
- assert_equal([1, -2, nil, nil, 3, -4, nil, nil],
44
- array.to_a)
45
- end
38
+ test("negative integers") do
39
+ assert_build(Arrow::ArrayBuilder,
40
+ [nil, -1, nil, -2, nil, -3])
41
+ end
46
42
 
47
- test("nils") do
48
- array = Arrow::Int32ArrayBuilder.build([nil, nil])
49
- assert_equal([nil, nil],
50
- array.to_a)
51
- end
43
+ test("times") do
44
+ assert_build(Arrow::ArrayBuilder,
45
+ [Time.at(0), Time.at(1), Time.at(2)])
46
+ end
52
47
 
53
- test("nils, values") do
54
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4])
55
- assert_equal([nil, nil, 3, -4],
56
- array.to_a)
57
- end
48
+ test("dates") do
49
+ assert_build(Arrow::ArrayBuilder,
50
+ [Date.new(2018, 1, 4), Date.new(2018, 1, 5)])
51
+ end
58
52
 
59
- test("nils, values, nil") do
60
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4, nil, nil])
61
- assert_equal([nil, nil, 3, -4, nil, nil],
62
- array.to_a)
53
+ test("datetimes") do
54
+ assert_build(Arrow::ArrayBuilder,
55
+ [
56
+ DateTime.new(2018, 1, 4, 23, 18, 23),
57
+ DateTime.new(2018, 1, 5, 0, 23, 21),
58
+ ])
59
+ end
63
60
  end
64
61
 
65
- test("nils, values, nil, values") do
66
- array = Arrow::Int32ArrayBuilder.build([nil, nil, 3, -4, nil, nil, 5, -6])
67
- assert_equal([nil, nil, 3, -4, nil, nil, 5, -6],
68
- array.to_a)
62
+ sub_test_case("specific builder") do
63
+ test("empty") do
64
+ assert_build(Arrow::Int32ArrayBuilder,
65
+ [])
66
+ end
67
+
68
+ test("values") do
69
+ assert_build(Arrow::Int32ArrayBuilder,
70
+ [1, -2])
71
+ end
72
+
73
+ test("values, nils") do
74
+ assert_build(Arrow::Int32ArrayBuilder,
75
+ [1, -2, nil, nil])
76
+ end
77
+
78
+ test("values, nils, values") do
79
+ assert_build(Arrow::Int32ArrayBuilder,
80
+ [1, -2, nil, nil, 3, -4])
81
+ end
82
+
83
+ test("values, nils, values, nils") do
84
+ assert_build(Arrow::Int32ArrayBuilder,
85
+ [1, -2, nil, nil, 3, -4, nil, nil])
86
+ end
87
+
88
+ test("nils") do
89
+ assert_build(Arrow::Int32ArrayBuilder,
90
+ [nil, nil])
91
+ end
92
+
93
+ test("nils, values") do
94
+ assert_build(Arrow::Int32ArrayBuilder,
95
+ [nil, nil, 3, -4])
96
+ end
97
+
98
+ test("nils, values, nil") do
99
+ assert_build(Arrow::Int32ArrayBuilder,
100
+ [nil, nil, 3, -4, nil, nil])
101
+ end
102
+
103
+ test("nils, values, nil, values") do
104
+ assert_build(Arrow::Int32ArrayBuilder,
105
+ [nil, nil, 3, -4, nil, nil, 5, -6])
106
+ end
69
107
  end
70
108
  end
71
109
  end
@@ -0,0 +1,79 @@
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ class CSVLoaderTest < Test::Unit::TestCase
16
+ include Helper::Fixture
17
+
18
+ sub_test_case(".load") do
19
+ test("String: data: with header") do
20
+ data = fixture_path("with-header.csv").read
21
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
22
+ name score
23
+ 0 alice 10
24
+ 1 bob 29
25
+ 2 chris -1
26
+ TABLE
27
+ end
28
+
29
+ test("String: data: without header") do
30
+ data = fixture_path("without-header.csv").read
31
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(data).to_s)
32
+ 0 1
33
+ 0 alice 10
34
+ 1 bob 29
35
+ 2 chris -1
36
+ TABLE
37
+ end
38
+
39
+ test("String: path: with header") do
40
+ path = fixture_path("with-header.csv").to_s
41
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
42
+ name score
43
+ 0 alice 10
44
+ 1 bob 29
45
+ 2 chris -1
46
+ TABLE
47
+ end
48
+
49
+ test("String: path: without header") do
50
+ path = fixture_path("without-header.csv").to_s
51
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
52
+ 0 1
53
+ 0 alice 10
54
+ 1 bob 29
55
+ 2 chris -1
56
+ TABLE
57
+ end
58
+
59
+ test("Pathname: with header") do
60
+ path = fixture_path("with-header.csv")
61
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
62
+ name score
63
+ 0 alice 10
64
+ 1 bob 29
65
+ 2 chris -1
66
+ TABLE
67
+ end
68
+
69
+ test("Pathname: without header") do
70
+ path = fixture_path("without-header.csv")
71
+ assert_equal(<<-TABLE, Arrow::CSVLoader.load(path).to_s)
72
+ 0 1
73
+ 0 alice 10
74
+ 1 bob 29
75
+ 2 chris -1
76
+ TABLE
77
+ end
78
+ end
79
+ end