red-arrow 0.4.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 244f843a8c061c6853ae9325eb442adada478f30
4
- data.tar.gz: 304840d2642191a717b21695db9012b8c8037fe3
3
+ metadata.gz: add6b81f8b6fe0d623d022d16b358b756bf0a559
4
+ data.tar.gz: ffa5678bd352df22212c7dd3d3b6beea6491bbde
5
5
  SHA512:
6
- metadata.gz: c8cb38809f2a5532569bd07fa1c39471887f79ce330faf5bc0c1543912cce125926d6e0d042c3a302c91ebbfe85a4f0cc56e639b109d04a656152dd0bbd733fe
7
- data.tar.gz: 7e0c529708ae390dbd3a275308c7e14f5c7c1484ca630951c415ad0102b09a0aa5decc32875cbc74554f291aab2eed1e144ca14d07ef58d8b6121e0bd9db86ba
6
+ metadata.gz: da4959b8123e205d5fd16d17704ecc1e93d29659294575ac6c5797e7f578b8ce0c43710f344c909a70b0b61def35a5d1f9d6c1d1de59679d3ad6bb037df2671f
7
+ data.tar.gz: 11c7f909073eaa609aa2402a629016a27ecac19a0ac85a54468fba5d5ea433b23c41cfa6179ff176363d17c61f86b2233812440c3bb26d32459a145d14d7d06a
data/README.md CHANGED
@@ -20,7 +20,7 @@ gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow
20
20
 
21
21
  ## Install
22
22
 
23
- Install Arrow GLib before install Red Arrow. Use [Apache Arrow packages](https://github.com/red-data-tools/arrow-packages) for installing Arrow GLib.
23
+ Install Arrow GLib before install Red Arrow. Use [packages.red-data-tools.org](https://github.com/red-data-tools/packages.red-data-tools.org) for installing Arrow GLib.
24
24
 
25
25
  Install Red Arrow after you install Arrow GLib:
26
26
 
@@ -50,7 +50,8 @@ require "arrow"
50
50
 
51
51
  ## License
52
52
 
53
- Apache License 2.0. See doc/text/apache-2.0.txt for details.
53
+ Apache License 2.0. See `doc/text/apache-2.0.txt` and `NOTICE` for
54
+ details.
54
55
 
55
56
  (Kouhei Sutou has a right to change the license including contributed
56
57
  patches.)
@@ -30,7 +30,7 @@ end
30
30
  namespace :dependency do
31
31
  desc "Check dependency"
32
32
  task :check do
33
- unless PKGConfig.check_version?("arrow-glib")
33
+ unless PKGConfig.check_version?("arrow-glib", 0, 8, 0)
34
34
  unless NativePackageInstaller.install(:debian => "libarrow-glib-dev",
35
35
  :redhat => "arrow-glib-devel")
36
36
  exit(false)
@@ -1,6 +1,42 @@
1
1
  # News
2
2
 
3
- ## 0.7.0 - 2017-09-19
3
+ ## 0.8.0 - 2018-01-04
4
+
5
+ ### Improvements
6
+
7
+ * Required Apache Arrow 0.8.0.
8
+
9
+ * Update README. [GitHub#5][Patch by mikisou]
10
+
11
+ * `Arrow::Table#each_record_batch`: Added.
12
+
13
+ * `Arrow::ArrayBuilder#build`: Added.
14
+
15
+ * `Arrow::CSVReader`: Added.
16
+
17
+ * `Arrow::Array#[]`: Added `NULL` support.
18
+
19
+ * `Arrow::TimestampArray`: Added.
20
+
21
+ * `Arrow::Table#to_s`: Added table style format.
22
+
23
+ * `Arrow::Table#slice`: Added.
24
+
25
+ * `Arrow::Table#[]`: Added.
26
+
27
+ * `Arrow::Table`: Added dynamic column name reader.
28
+
29
+ * `Arrow::Table#merge`: Added.
30
+
31
+ * `Arrow::Table#remove_column`: Added column name support.
32
+
33
+ * `Arrow::Table#select_columns`: Added.
34
+
35
+ ### Thanks
36
+
37
+ * mikisou
38
+
39
+ ## 0.4.1 - 2017-09-19
4
40
 
5
41
  ### Improvements
6
42
 
Binary file
@@ -17,50 +17,54 @@ module Arrow
17
17
  class << self
18
18
  def build(values)
19
19
  builder = new
20
- if builder.respond_to?(:append_values)
21
- start_index = 0
22
- current_index = 0
23
- status = :value
24
- values.each do |value|
25
- if value.nil?
26
- if status == :value
27
- if start_index != current_index
28
- builder.append_values(values[start_index...current_index])
29
- start_index = current_index
30
- end
31
- status = :null
32
- end
33
- else
34
- if status == :null
35
- builder.append_nulls(current_index - start_index)
20
+ builder.build(values)
21
+ end
22
+ end
23
+
24
+ def build(values)
25
+ if respond_to?(:append_values)
26
+ start_index = 0
27
+ current_index = 0
28
+ status = :value
29
+ values.each do |value|
30
+ if value.nil?
31
+ if status == :value
32
+ if start_index != current_index
33
+ append_values(values[start_index...current_index])
36
34
  start_index = current_index
37
- status = :value
38
35
  end
36
+ status = :null
39
37
  end
40
- current_index += 1
41
- end
42
- if start_index != current_index
43
- if status == :value
44
- if start_index == 0 and current_index == values.size
45
- builder.append_values(values)
46
- else
47
- builder.append_values(values[start_index...current_index])
48
- end
49
- else
50
- builder.append_nulls(current_index - start_index)
38
+ else
39
+ if status == :null
40
+ append_nulls(current_index - start_index)
41
+ start_index = current_index
42
+ status = :value
51
43
  end
52
44
  end
53
- else
54
- values.each do |value|
55
- if value.nil?
56
- builder.append_null
45
+ current_index += 1
46
+ end
47
+ if start_index != current_index
48
+ if status == :value
49
+ if start_index == 0 and current_index == values.size
50
+ append_values(values)
57
51
  else
58
- builder.append(value)
52
+ append_values(values[start_index...current_index])
59
53
  end
54
+ else
55
+ append_nulls(current_index - start_index)
56
+ end
57
+ end
58
+ else
59
+ values.each do |value|
60
+ if value.nil?
61
+ append_null
62
+ else
63
+ append(value)
60
64
  end
61
65
  end
62
- builder.finish
63
66
  end
67
+ finish
64
68
  end
65
69
  end
66
70
  end
@@ -28,13 +28,19 @@ module Arrow
28
28
  end
29
29
  end
30
30
 
31
+ def [](i)
32
+ if null?(i)
33
+ nil
34
+ else
35
+ get_value(i)
36
+ end
37
+ end
38
+
31
39
  def each
40
+ return to_enum(__method__) unless block_given?
41
+
32
42
  length.times do |i|
33
- if null?(i)
34
- yield(nil)
35
- else
36
- yield(self[i])
37
- end
43
+ yield(self[i])
38
44
  end
39
45
  end
40
46
 
@@ -16,13 +16,29 @@ module Arrow
16
16
  class ChunkedArray
17
17
  include Enumerable
18
18
 
19
+ def [](i)
20
+ n_chunks.times do |j|
21
+ array = get_chunk(j)
22
+ return array[i] if i < array.length
23
+ i -= array.length
24
+ end
25
+ nil
26
+ end
27
+
19
28
  def each(&block)
20
29
  return to_enum(__method__) unless block_given?
21
30
 
22
- n_chunks.times do |i|
23
- array = get_chunk(i)
31
+ each_chunk do |array|
24
32
  array.each(&block)
25
33
  end
26
34
  end
35
+
36
+ def each_chunk
37
+ return to_enum(__method__) unless block_given?
38
+
39
+ n_chunks.times do |i|
40
+ yield(get_chunk(i))
41
+ end
42
+ end
27
43
  end
28
44
  end
@@ -16,6 +16,10 @@ module Arrow
16
16
  class Column
17
17
  include Enumerable
18
18
 
19
+ def [](i)
20
+ data[i]
21
+ end
22
+
19
23
  def each(&block)
20
24
  return to_enum(__method__) unless block_given?
21
25
 
@@ -0,0 +1,162 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require "csv"
16
+ require "pathname"
17
+ require "time"
18
+
19
+ module Arrow
20
+ class CSVReader
21
+ class << self
22
+ def read(csv, **options)
23
+ case csv
24
+ when Pathname
25
+ path = csv.to_path
26
+ options = update_csv_parse_options(options, :open_csv, path)
27
+ open_csv(path, **options) do |_csv|
28
+ read(_csv)
29
+ end
30
+ when /\A.+\.csv\z/i
31
+ read(Pathname.new(csv), **options)
32
+ when String
33
+ options = update_csv_parse_options(options, :parse_csv_data, csv)
34
+ parse_csv_data(csv, **options) do |_csv|
35
+ read(_csv)
36
+ end
37
+ else
38
+ new(csv).read
39
+ end
40
+ end
41
+
42
+ private
43
+ def open_csv(path, **options)
44
+ CSV.open(path, **options) do |csv|
45
+ yield(csv)
46
+ end
47
+ end
48
+
49
+ def parse_csv_data(data, **options)
50
+ csv = CSV.new(data, **options)
51
+ begin
52
+ yield(csv)
53
+ ensure
54
+ csv.close
55
+ end
56
+ end
57
+
58
+ ISO8601_CONVERTER = lambda do |field|
59
+ begin
60
+ encoded_field = field.encode(CSV::ConverterEncoding)
61
+ rescue EncodingError
62
+ field
63
+ else
64
+ begin
65
+ Time.iso8601(encoded_field)
66
+ rescue ArgumentError
67
+ field
68
+ end
69
+ end
70
+ end
71
+
72
+ def update_csv_parse_options(options, create_csv, *args)
73
+ return options unless options.empty?
74
+
75
+ new_options = options.merge(converters: [:all, ISO8601_CONVERTER])
76
+ __send__(create_csv, *args, **new_options) do |csv|
77
+ row1 = csv.shift
78
+ if row1.nil?
79
+ new_options[:headers] = false
80
+ return new_options
81
+ end
82
+ if row1.any?(&:nil?)
83
+ new_options[:headers] = false
84
+ return new_options
85
+ end
86
+
87
+ row2 = csv.shift
88
+ return new_options if row2.nil?
89
+ if row2.any?(&:nil?)
90
+ new_options[:headers] = true
91
+ return new_options
92
+ end
93
+
94
+ if row1.collect(&:class) != row2.collect(&:class)
95
+ new_options[:headers] = true
96
+ return new_options
97
+ end
98
+
99
+ new_options
100
+ end
101
+ end
102
+ end
103
+
104
+ def initialize(csv)
105
+ @csv = csv
106
+ end
107
+
108
+ def read
109
+ builders = []
110
+ values_set = []
111
+ @csv.each do |row|
112
+ if row.is_a?(CSV::Row)
113
+ row = row.collect(&:last)
114
+ end
115
+ row.each_with_index do |value, i|
116
+ builders[i] ||= create_builder(value)
117
+ values = (values_set[i] ||= [])
118
+ case value
119
+ when Time
120
+ value = value.to_i * (10 ** 9) + value.nsec
121
+ end
122
+ values << value
123
+ end
124
+ end
125
+ return nil if values_set.empty?
126
+
127
+ arrays = values_set.collect.with_index do |values, i|
128
+ builders[i].build(values)
129
+ end
130
+ if @csv.headers
131
+ names = @csv.headers
132
+ else
133
+ names = builders.size.times.collect(&:to_s)
134
+ end
135
+ fields = names.collect.with_index do |name, i|
136
+ Arrow::Field.new(name, arrays[i].value_data_type)
137
+ end
138
+ schema = Schema.new(fields)
139
+ columns = arrays.collect.with_index do |array, i|
140
+ Column.new(fields[i], array)
141
+ end
142
+ Table.new(schema, columns)
143
+ end
144
+
145
+ private
146
+ def create_builder(sample_value)
147
+ case sample_value
148
+ when Integer
149
+ IntArrayBuilder.new
150
+ when Float
151
+ DoubleArrayBuilder.new
152
+ when String
153
+ StringArrayBuilder.new
154
+ when Time
155
+ data_type = TimestampDataType.new(:nano)
156
+ TimestampArrayBuilder.new(data_type)
157
+ else
158
+ nil
159
+ end
160
+ end
161
+ end
162
+ end
@@ -33,10 +33,14 @@ module Arrow
33
33
  require "arrow/buffer"
34
34
  require "arrow/chunked-array"
35
35
  require "arrow/column"
36
+ require "arrow/csv-reader"
36
37
  require "arrow/field"
37
38
  require "arrow/record-batch"
39
+ require "arrow/slicer"
38
40
  require "arrow/table"
41
+ require "arrow/table-formatter"
39
42
  require "arrow/tensor"
43
+ require "arrow/timestamp-array"
40
44
 
41
45
  require "arrow/record-batch-file-reader"
42
46
  require "arrow/record-batch-stream-reader"
@@ -57,22 +61,20 @@ module Arrow
57
61
  case klass.name
58
62
  when "Arrow::StringArray"
59
63
  case method_name
60
- when "[]"
61
- method_name = "get_value"
64
+ when "get_value"
65
+ method_name = "get_raw_value"
62
66
  when "get_string"
63
- method_name = "[]"
67
+ method_name = "get_value"
68
+ end
69
+ super(info, klass, method_name)
70
+ when "Arrow::TimestampArray"
71
+ case method_name
72
+ when "get_value"
73
+ method_name = "get_raw_value"
64
74
  end
65
75
  super(info, klass, method_name)
66
76
  else
67
- super
68
- end
69
- end
70
-
71
- def rubyish_method_name(function_info, options={})
72
- if function_info.n_in_args == 1 and function_info.name == "get_value"
73
- "[]"
74
- else
75
- super
77
+ super
76
78
  end
77
79
  end
78
80
  end