red-arrow 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date32ArrayBuilder
17
+ private
18
+ UNIX_EPOCH = Date.new(1970, 1, 1)
19
+ def convert_to_arrow_value(value)
20
+ value = value.to_date if value.respond_to?(:to_date)
21
+
22
+ if value.is_a?(Date)
23
+ (value - UNIX_EPOCH).to_i
24
+ else
25
+ value
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,27 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date32Array
17
+ def get_value(i)
18
+ to_date(get_raw_value(i))
19
+ end
20
+
21
+ private
22
+ UNIX_EPOCH = 2440588
23
+ def to_date(raw_value)
24
+ Date.jd(UNIX_EPOCH + raw_value)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,30 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date64ArrayBuilder
17
+ private
18
+ def convert_to_arrow_value(value)
19
+ if value.respond_to?(:to_time) and not value.is_a?(Time)
20
+ value = value.to_time
21
+ end
22
+
23
+ if value.is_a?(Time)
24
+ value.to_i * 1_000 + value.usec / 1000
25
+ else
26
+ value
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,26 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Date64Array
17
+ def get_value(i)
18
+ to_datetime(get_raw_value(i))
19
+ end
20
+
21
+ private
22
+ def to_datetime(raw_value)
23
+ Time.at(*raw_value.divmod(1_000)).to_datetime
24
+ end
25
+ end
26
+ end
@@ -33,14 +33,23 @@ module Arrow
33
33
  require "arrow/buffer"
34
34
  require "arrow/chunked-array"
35
35
  require "arrow/column"
36
+ require "arrow/csv-loader"
36
37
  require "arrow/csv-reader"
38
+ require "arrow/date32-array"
39
+ require "arrow/date32-array-builder"
40
+ require "arrow/date64-array"
41
+ require "arrow/date64-array-builder"
37
42
  require "arrow/field"
43
+ require "arrow/record"
38
44
  require "arrow/record-batch"
39
45
  require "arrow/slicer"
40
46
  require "arrow/table"
41
47
  require "arrow/table-formatter"
48
+ require "arrow/table-loader"
49
+ require "arrow/table-saver"
42
50
  require "arrow/tensor"
43
51
  require "arrow/timestamp-array"
52
+ require "arrow/timestamp-array-builder"
44
53
 
45
54
  require "arrow/record-batch-file-reader"
46
55
  require "arrow/record-batch-stream-reader"
@@ -67,7 +76,7 @@ module Arrow
67
76
  method_name = "get_value"
68
77
  end
69
78
  super(info, klass, method_name)
70
- when "Arrow::TimestampArray"
79
+ when "Arrow::TimestampArray", "Arrow::Date32Array", "Arrow::Date64Array"
71
80
  case method_name
72
81
  when "get_value"
73
82
  method_name = "get_raw_value"
@@ -12,57 +12,31 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- require "arrow/record"
15
+ require "arrow/record-containable"
16
16
 
17
17
  module Arrow
18
18
  class RecordBatch
19
+ include RecordContainable
19
20
  include Enumerable
20
21
 
21
- def each(reuse_record: false)
22
- unless block_given?
23
- return to_enum(__method__, reuse_record: reuse_record)
24
- end
25
-
26
- if reuse_record
27
- record = Record.new(self, nil)
28
- n_rows.times do |i|
29
- record.index = i
30
- yield(record)
31
- end
32
- else
33
- n_rows.times do |i|
34
- yield(Record.new(self, i))
35
- end
36
- end
37
- end
38
-
39
- def find_column(name_or_index)
40
- case name_or_index
41
- when String, Symbol
42
- name = name_or_index
43
- index = resolve_name(name)
44
- else
45
- index = name_or_index
46
- end
47
- columns[index]
48
- end
22
+ alias_method :each, :each_record
49
23
 
50
24
  alias_method :columns_raw, :columns
51
25
  def columns
52
26
  @columns ||= columns_raw
53
27
  end
54
28
 
55
- private
56
- def resolve_name(name)
57
- (@name_to_index ||= build_name_to_index)[name.to_s]
29
+ def respond_to_missing?(name, include_private)
30
+ return true if find_column(name)
31
+ super
58
32
  end
59
33
 
60
- def build_name_to_index
61
- index = {}
62
- schema.fields.each_with_index do |field, i|
63
- index[field.name] = i
34
+ def method_missing(name, *args, &block)
35
+ if args.empty?
36
+ column = find_column(name)
37
+ return column if column
64
38
  end
65
- index
39
+ super
66
40
  end
67
41
  end
68
42
  end
@@ -0,0 +1,70 @@
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ module RecordContainable
17
+ def each_column(&block)
18
+ return to_enum(__method__) unless block_given?
19
+
20
+ columns.each(&block)
21
+ end
22
+
23
+ def each_record(reuse_record: false)
24
+ unless block_given?
25
+ return to_enum(__method__, reuse_record: reuse_record)
26
+ end
27
+
28
+ if reuse_record
29
+ record = Record.new(self, nil)
30
+ n_rows.times do |i|
31
+ record.index = i
32
+ yield(record)
33
+ end
34
+ else
35
+ n_rows.times do |i|
36
+ yield(Record.new(self, i))
37
+ end
38
+ end
39
+ end
40
+
41
+ def find_column(name_or_index)
42
+ case name_or_index
43
+ when String, Symbol
44
+ name = name_or_index.to_s
45
+ index = resolve_column_name(name)
46
+ return nil if index.nil?
47
+ columns[index]
48
+ when Integer
49
+ index = name_or_index
50
+ columns[index]
51
+ else
52
+ message = "column name or index must be String, Symbol or Integer"
53
+ raise ArgumentError, message
54
+ end
55
+ end
56
+
57
+ private
58
+ def resolve_column_name(name)
59
+ (@column_name_to_index ||= build_column_name_resolve_table)[name]
60
+ end
61
+
62
+ def build_column_name_resolve_table
63
+ table = {}
64
+ schema.fields.each_with_index do |field, i|
65
+ table[field.name] = i
66
+ end
67
+ table
68
+ end
69
+ end
70
+ end
@@ -15,25 +15,40 @@
15
15
  module Arrow
16
16
  class Record
17
17
  attr_accessor :index
18
- def initialize(record_batch, index)
19
- @record_batch = record_batch
18
+ def initialize(record_container, index)
19
+ @record_container = record_container
20
20
  @index = index
21
21
  end
22
22
 
23
23
  def [](column_name_or_column_index)
24
- @record_batch.find_column(column_name_or_column_index)[@index]
24
+ column = @record_container.find_column(column_name_or_column_index)
25
+ return nil if column.nil?
26
+ column[@index]
25
27
  end
26
28
 
27
29
  def columns
28
- @record_batch.columns
30
+ @record_container.columns
29
31
  end
30
32
 
31
33
  def to_h
32
34
  attributes = {}
33
- @record_batch.schema.fields.each_with_index do |field, i|
35
+ @record_container.schema.fields.each_with_index do |field, i|
34
36
  attributes[field.name] = self[i]
35
37
  end
36
38
  attributes
37
39
  end
40
+
41
+ def respond_to_missing?(name, include_private)
42
+ return true if @record_container.find_column(name)
43
+ super
44
+ end
45
+
46
+ def method_missing(name, *args, &block)
47
+ if args.empty?
48
+ column = @record_container.find_column(name)
49
+ return column[@index] if column
50
+ end
51
+ super
52
+ end
38
53
  end
39
54
  end
@@ -0,0 +1,117 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class TableLoader
17
+ class << self
18
+ def load(path, options={})
19
+ new(path, options).load
20
+ end
21
+ end
22
+
23
+ def initialize(path, options={})
24
+ @path = path
25
+ @options = options
26
+ end
27
+
28
+ def load
29
+ path = @path
30
+ path = path.to_path if path.respond_to?(:to_path)
31
+ format = @options[:format] || guess_format(path) || :arrow
32
+
33
+ custom_load_method = "load_as_#{format}"
34
+ unless respond_to?(custom_load_method, true)
35
+ available_formats = []
36
+ (methods(true) | private_methods(true)).each do |name|
37
+ match_data = /\Aload_as_/.match(name.to_s)
38
+ if match_data
39
+ available_formats << match_data.post_match
40
+ end
41
+ end
42
+ message = "Arrow::Table load format must be one of ["
43
+ message << available_formats.join(", ")
44
+ message << "]: #{format.inspect}"
45
+ raise ArgumentError, message
46
+ end
47
+ __send__(custom_load_method, path)
48
+ end
49
+
50
+ private
51
+ def guess_format(path)
52
+ extension = ::File.extname(path).gsub(/\A\./, "").downcase
53
+ return nil if extension.empty?
54
+
55
+ return extension if respond_to?("load_as_#{extension}", true)
56
+
57
+ nil
58
+ end
59
+
60
+ def load_raw(input, reader)
61
+ schema = reader.schema
62
+ chunked_arrays = []
63
+ reader.each do |record_batch|
64
+ record_batch.columns.each_with_index do |array, i|
65
+ chunked_array = (chunked_arrays[i] ||= [])
66
+ chunked_array << array
67
+ end
68
+ end
69
+ columns = schema.fields.collect.with_index do |field, i|
70
+ Column.new(field, ChunkedArray.new(chunked_arrays[i]))
71
+ end
72
+ table = Table.new(schema, columns)
73
+ table.instance_variable_set(:@input, input)
74
+ table
75
+ end
76
+
77
+ def load_as_arrow(path)
78
+ input = nil
79
+ reader = nil
80
+ error = nil
81
+ reader_class_candidates = [
82
+ RecordBatchFileReader,
83
+ RecordBatchStreamReader,
84
+ ]
85
+ reader_class_candidates.each do |reader_class_candidate|
86
+ input = MemoryMappedInputStream.new(path)
87
+ begin
88
+ reader = reader_class_candidate.new(input)
89
+ rescue Arrow::Error
90
+ error = $!
91
+ else
92
+ break
93
+ end
94
+ end
95
+ raise error if reader.nil?
96
+ load_raw(input, reader)
97
+ end
98
+
99
+ def load_as_batch(path)
100
+ input = MemoryMappedInputStream.new(path)
101
+ reader = RecordBatchFileReader.new(input)
102
+ load_raw(input, reader)
103
+ end
104
+
105
+ def load_as_stream(path)
106
+ input = MemoryMappedInputStream.new(path)
107
+ reader = RecordBatchStreamReader.new(input)
108
+ load_raw(input, reader)
109
+ end
110
+
111
+ def load_as_csv(path)
112
+ options = @options.dup
113
+ options.delete(:format)
114
+ CSVLoader.load(Pathname.new(path), options)
115
+ end
116
+ end
117
+ end