red-arrow 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,29 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date32ArrayBuilder
17
+ private
18
+ UNIX_EPOCH = Date.new(1970, 1, 1)
19
+ def convert_to_arrow_value(value)
20
+ value = value.to_date if value.respond_to?(:to_date)
21
+
22
+ if value.is_a?(Date)
23
+ (value - UNIX_EPOCH).to_i
24
+ else
25
+ value
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,27 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date32Array
17
+ def get_value(i)
18
+ to_date(get_raw_value(i))
19
+ end
20
+
21
+ private
22
+ UNIX_EPOCH = 2440588
23
+ def to_date(raw_value)
24
+ Date.jd(UNIX_EPOCH + raw_value)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,30 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date64ArrayBuilder
17
+ private
18
+ def convert_to_arrow_value(value)
19
+ if value.respond_to?(:to_time) and not value.is_a?(Time)
20
+ value = value.to_time
21
+ end
22
+
23
+ if value.is_a?(Time)
24
+ value.to_i * 1_000 + value.usec / 1000
25
+ else
26
+ value
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date64Array
17
+ def get_value(i)
18
+ to_datetime(get_raw_value(i))
19
+ end
20
+
21
+ private
22
+ def to_datetime(raw_value)
23
+ Time.at(*raw_value.divmod(1_000)).to_datetime
24
+ end
25
+ end
26
+ end
@@ -33,14 +33,23 @@ module Arrow
33
33
  require "arrow/buffer"
34
34
  require "arrow/chunked-array"
35
35
  require "arrow/column"
36
+ require "arrow/csv-loader"
36
37
  require "arrow/csv-reader"
38
+ require "arrow/date32-array"
39
+ require "arrow/date32-array-builder"
40
+ require "arrow/date64-array"
41
+ require "arrow/date64-array-builder"
37
42
  require "arrow/field"
43
+ require "arrow/record"
38
44
  require "arrow/record-batch"
39
45
  require "arrow/slicer"
40
46
  require "arrow/table"
41
47
  require "arrow/table-formatter"
48
+ require "arrow/table-loader"
49
+ require "arrow/table-saver"
42
50
  require "arrow/tensor"
43
51
  require "arrow/timestamp-array"
52
+ require "arrow/timestamp-array-builder"
44
53
 
45
54
  require "arrow/record-batch-file-reader"
46
55
  require "arrow/record-batch-stream-reader"
@@ -67,7 +76,7 @@ module Arrow
67
76
  method_name = "get_value"
68
77
  end
69
78
  super(info, klass, method_name)
70
- when "Arrow::TimestampArray"
79
+ when "Arrow::TimestampArray", "Arrow::Date32Array", "Arrow::Date64Array"
71
80
  case method_name
72
81
  when "get_value"
73
82
  method_name = "get_raw_value"
@@ -12,57 +12,31 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- require "arrow/record"
15
+ require "arrow/record-containable"
16
16
 
17
17
  module Arrow
18
18
  class RecordBatch
19
+ include RecordContainable
19
20
  include Enumerable
20
21
 
21
- def each(reuse_record: false)
22
- unless block_given?
23
- return to_enum(__method__, reuse_record: reuse_record)
24
- end
25
-
26
- if reuse_record
27
- record = Record.new(self, nil)
28
- n_rows.times do |i|
29
- record.index = i
30
- yield(record)
31
- end
32
- else
33
- n_rows.times do |i|
34
- yield(Record.new(self, i))
35
- end
36
- end
37
- end
38
-
39
- def find_column(name_or_index)
40
- case name_or_index
41
- when String, Symbol
42
- name = name_or_index
43
- index = resolve_name(name)
44
- else
45
- index = name_or_index
46
- end
47
- columns[index]
48
- end
22
+ alias_method :each, :each_record
49
23
 
50
24
  alias_method :columns_raw, :columns
51
25
  def columns
52
26
  @columns ||= columns_raw
53
27
  end
54
28
 
55
- private
56
- def resolve_name(name)
57
- (@name_to_index ||= build_name_to_index)[name.to_s]
29
+ def respond_to_missing?(name, include_private)
30
+ return true if find_column(name)
31
+ super
58
32
  end
59
33
 
60
- def build_name_to_index
61
- index = {}
62
- schema.fields.each_with_index do |field, i|
63
- index[field.name] = i
34
+ def method_missing(name, *args, &block)
35
+ if args.empty?
36
+ column = find_column(name)
37
+ return column if column
64
38
  end
65
- index
39
+ super
66
40
  end
67
41
  end
68
42
  end
@@ -0,0 +1,70 @@
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ module RecordContainable
17
+ def each_column(&block)
18
+ return to_enum(__method__) unless block_given?
19
+
20
+ columns.each(&block)
21
+ end
22
+
23
+ def each_record(reuse_record: false)
24
+ unless block_given?
25
+ return to_enum(__method__, reuse_record: reuse_record)
26
+ end
27
+
28
+ if reuse_record
29
+ record = Record.new(self, nil)
30
+ n_rows.times do |i|
31
+ record.index = i
32
+ yield(record)
33
+ end
34
+ else
35
+ n_rows.times do |i|
36
+ yield(Record.new(self, i))
37
+ end
38
+ end
39
+ end
40
+
41
+ def find_column(name_or_index)
42
+ case name_or_index
43
+ when String, Symbol
44
+ name = name_or_index.to_s
45
+ index = resolve_column_name(name)
46
+ return nil if index.nil?
47
+ columns[index]
48
+ when Integer
49
+ index = name_or_index
50
+ columns[index]
51
+ else
52
+ message = "column name or index must be String, Symbol or Integer"
53
+ raise ArgumentError, message
54
+ end
55
+ end
56
+
57
+ private
58
+ def resolve_column_name(name)
59
+ (@column_name_to_index ||= build_column_name_resolve_table)[name]
60
+ end
61
+
62
+ def build_column_name_resolve_table
63
+ table = {}
64
+ schema.fields.each_with_index do |field, i|
65
+ table[field.name] = i
66
+ end
67
+ table
68
+ end
69
+ end
70
+ end
@@ -15,25 +15,40 @@
15
15
  module Arrow
16
16
  class Record
17
17
  attr_accessor :index
18
- def initialize(record_batch, index)
19
- @record_batch = record_batch
18
+ def initialize(record_container, index)
19
+ @record_container = record_container
20
20
  @index = index
21
21
  end
22
22
 
23
23
  def [](column_name_or_column_index)
24
- @record_batch.find_column(column_name_or_column_index)[@index]
24
+ column = @record_container.find_column(column_name_or_column_index)
25
+ return nil if column.nil?
26
+ column[@index]
25
27
  end
26
28
 
27
29
  def columns
28
- @record_batch.columns
30
+ @record_container.columns
29
31
  end
30
32
 
31
33
  def to_h
32
34
  attributes = {}
33
- @record_batch.schema.fields.each_with_index do |field, i|
35
+ @record_container.schema.fields.each_with_index do |field, i|
34
36
  attributes[field.name] = self[i]
35
37
  end
36
38
  attributes
37
39
  end
40
+
41
+ def respond_to_missing?(name, include_private)
42
+ return true if @record_container.find_column(name)
43
+ super
44
+ end
45
+
46
+ def method_missing(name, *args, &block)
47
+ if args.empty?
48
+ column = @record_container.find_column(name)
49
+ return column[@index] if column
50
+ end
51
+ super
52
+ end
38
53
  end
39
54
  end
@@ -0,0 +1,117 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TableLoader
17
+ class << self
18
+ def load(path, options={})
19
+ new(path, options).load
20
+ end
21
+ end
22
+
23
+ def initialize(path, options={})
24
+ @path = path
25
+ @options = options
26
+ end
27
+
28
+ def load
29
+ path = @path
30
+ path = path.to_path if path.respond_to?(:to_path)
31
+ format = @options[:format] || guess_format(path) || :arrow
32
+
33
+ custom_load_method = "load_as_#{format}"
34
+ unless respond_to?(custom_load_method, true)
35
+ available_formats = []
36
+ (methods(true) | private_methods(true)).each do |name|
37
+ match_data = /\Aload_as_/.match(name.to_s)
38
+ if match_data
39
+ available_formats << match_data.post_match
40
+ end
41
+ end
42
+ message = "Arrow::Table load format must be one of ["
43
+ message << available_formats.join(", ")
44
+ message << "]: #{format.inspect}"
45
+ raise ArgumentError, message
46
+ end
47
+ __send__(custom_load_method, path)
48
+ end
49
+
50
+ private
51
+ def guess_format(path)
52
+ extension = ::File.extname(path).gsub(/\A\./, "").downcase
53
+ return nil if extension.empty?
54
+
55
+ return extension if respond_to?("load_as_#{extension}", true)
56
+
57
+ nil
58
+ end
59
+
60
+ def load_raw(input, reader)
61
+ schema = reader.schema
62
+ chunked_arrays = []
63
+ reader.each do |record_batch|
64
+ record_batch.columns.each_with_index do |array, i|
65
+ chunked_array = (chunked_arrays[i] ||= [])
66
+ chunked_array << array
67
+ end
68
+ end
69
+ columns = schema.fields.collect.with_index do |field, i|
70
+ Column.new(field, ChunkedArray.new(chunked_arrays[i]))
71
+ end
72
+ table = Table.new(schema, columns)
73
+ table.instance_variable_set(:@input, input)
74
+ table
75
+ end
76
+
77
+ def load_as_arrow(path)
78
+ input = nil
79
+ reader = nil
80
+ error = nil
81
+ reader_class_candidates = [
82
+ RecordBatchFileReader,
83
+ RecordBatchStreamReader,
84
+ ]
85
+ reader_class_candidates.each do |reader_class_candidate|
86
+ input = MemoryMappedInputStream.new(path)
87
+ begin
88
+ reader = reader_class_candidate.new(input)
89
+ rescue Arrow::Error
90
+ error = $!
91
+ else
92
+ break
93
+ end
94
+ end
95
+ raise error if reader.nil?
96
+ load_raw(input, reader)
97
+ end
98
+
99
+ def load_as_batch(path)
100
+ input = MemoryMappedInputStream.new(path)
101
+ reader = RecordBatchFileReader.new(input)
102
+ load_raw(input, reader)
103
+ end
104
+
105
+ def load_as_stream(path)
106
+ input = MemoryMappedInputStream.new(path)
107
+ reader = RecordBatchStreamReader.new(input)
108
+ load_raw(input, reader)
109
+ end
110
+
111
+ def load_as_csv(path)
112
+ options = @options.dup
113
+ options.delete(:format)
114
+ CSVLoader.load(Pathname.new(path), options)
115
+ end
116
+ end
117
+ end