red-arrow 0.4.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 244f843a8c061c6853ae9325eb442adada478f30
4
- data.tar.gz: 304840d2642191a717b21695db9012b8c8037fe3
3
+ metadata.gz: add6b81f8b6fe0d623d022d16b358b756bf0a559
4
+ data.tar.gz: ffa5678bd352df22212c7dd3d3b6beea6491bbde
5
5
  SHA512:
6
- metadata.gz: c8cb38809f2a5532569bd07fa1c39471887f79ce330faf5bc0c1543912cce125926d6e0d042c3a302c91ebbfe85a4f0cc56e639b109d04a656152dd0bbd733fe
7
- data.tar.gz: 7e0c529708ae390dbd3a275308c7e14f5c7c1484ca630951c415ad0102b09a0aa5decc32875cbc74554f291aab2eed1e144ca14d07ef58d8b6121e0bd9db86ba
6
+ metadata.gz: da4959b8123e205d5fd16d17704ecc1e93d29659294575ac6c5797e7f578b8ce0c43710f344c909a70b0b61def35a5d1f9d6c1d1de59679d3ad6bb037df2671f
7
+ data.tar.gz: 11c7f909073eaa609aa2402a629016a27ecac19a0ac85a54468fba5d5ea433b23c41cfa6179ff176363d17c61f86b2233812440c3bb26d32459a145d14d7d06a
data/README.md CHANGED
@@ -20,7 +20,7 @@ gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow
20
20
 
21
21
  ## Install
22
22
 
23
- Install Arrow GLib before install Red Arrow. Use [Apache Arrow packages](https://github.com/red-data-tools/arrow-packages) for installing Arrow GLib.
23
+ Install Arrow GLib before install Red Arrow. Use [packages.red-data-tools.org](https://github.com/red-data-tools/packages.red-data-tools.org) for installing Arrow GLib.
24
24
 
25
25
  Install Red Arrow after you install Arrow GLib:
26
26
 
@@ -50,7 +50,8 @@ require "arrow"
50
50
 
51
51
  ## License
52
52
 
53
- Apache License 2.0. See doc/text/apache-2.0.txt for details.
53
+ Apache License 2.0. See `doc/text/apache-2.0.txt` and `NOTICE` for
54
+ details.
54
55
 
55
56
  (Kouhei Sutou has a right to change the license including contributed
56
57
  patches.)
@@ -30,7 +30,7 @@ end
30
30
  namespace :dependency do
31
31
  desc "Check dependency"
32
32
  task :check do
33
- unless PKGConfig.check_version?("arrow-glib")
33
+ unless PKGConfig.check_version?("arrow-glib", 0, 8, 0)
34
34
  unless NativePackageInstaller.install(:debian => "libarrow-glib-dev",
35
35
  :redhat => "arrow-glib-devel")
36
36
  exit(false)
@@ -1,6 +1,42 @@
1
1
  # News
2
2
 
3
- ## 0.7.0 - 2017-09-19
3
+ ## 0.8.0 - 2018-01-04
4
+
5
+ ### Improvements
6
+
7
+ * Required Apache Arrow 0.8.0.
8
+
9
+ * Update README. [GitHub#5][Patch by mikisou]
10
+
11
+ * `Arrow::Table#each_record_batch`: Added.
12
+
13
+ * `Arrow::ArrayBuilder#build`: Added.
14
+
15
+ * `Arrow::CSVReader`: Added.
16
+
17
+ * `Arrow::Array#[]`: Added `NULL` support.
18
+
19
+ * `Arrow::TimestampArray`: Added.
20
+
21
+ * `Arrow::Table#to_s`: Added table style format.
22
+
23
+ * `Arrow::Table#slice`: Added.
24
+
25
+ * `Arrow::Table#[]`: Added.
26
+
27
+ * `Arrow::Table`: Added dynamic column name reader.
28
+
29
+ * `Arrow::Table#merge`: Added.
30
+
31
+ * `Arrow::Table#remove_column`: Added column name support.
32
+
33
+ * `Arrow::Table#select_columns`: Added.
34
+
35
+ ### Thanks
36
+
37
+ * mikisou
38
+
39
+ ## 0.4.1 - 2017-09-19
4
40
 
5
41
  ### Improvements
6
42
 
Binary file
@@ -17,50 +17,54 @@ module Arrow
17
17
  class << self
18
18
  def build(values)
19
19
  builder = new
20
- if builder.respond_to?(:append_values)
21
- start_index = 0
22
- current_index = 0
23
- status = :value
24
- values.each do |value|
25
- if value.nil?
26
- if status == :value
27
- if start_index != current_index
28
- builder.append_values(values[start_index...current_index])
29
- start_index = current_index
30
- end
31
- status = :null
32
- end
33
- else
34
- if status == :null
35
- builder.append_nulls(current_index - start_index)
20
+ builder.build(values)
21
+ end
22
+ end
23
+
24
+ def build(values)
25
+ if respond_to?(:append_values)
26
+ start_index = 0
27
+ current_index = 0
28
+ status = :value
29
+ values.each do |value|
30
+ if value.nil?
31
+ if status == :value
32
+ if start_index != current_index
33
+ append_values(values[start_index...current_index])
36
34
  start_index = current_index
37
- status = :value
38
35
  end
36
+ status = :null
39
37
  end
40
- current_index += 1
41
- end
42
- if start_index != current_index
43
- if status == :value
44
- if start_index == 0 and current_index == values.size
45
- builder.append_values(values)
46
- else
47
- builder.append_values(values[start_index...current_index])
48
- end
49
- else
50
- builder.append_nulls(current_index - start_index)
38
+ else
39
+ if status == :null
40
+ append_nulls(current_index - start_index)
41
+ start_index = current_index
42
+ status = :value
51
43
  end
52
44
  end
53
- else
54
- values.each do |value|
55
- if value.nil?
56
- builder.append_null
45
+ current_index += 1
46
+ end
47
+ if start_index != current_index
48
+ if status == :value
49
+ if start_index == 0 and current_index == values.size
50
+ append_values(values)
57
51
  else
58
- builder.append(value)
52
+ append_values(values[start_index...current_index])
59
53
  end
54
+ else
55
+ append_nulls(current_index - start_index)
56
+ end
57
+ end
58
+ else
59
+ values.each do |value|
60
+ if value.nil?
61
+ append_null
62
+ else
63
+ append(value)
60
64
  end
61
65
  end
62
- builder.finish
63
66
  end
67
+ finish
64
68
  end
65
69
  end
66
70
  end
@@ -28,13 +28,19 @@ module Arrow
28
28
  end
29
29
  end
30
30
 
31
+ def [](i)
32
+ if null?(i)
33
+ nil
34
+ else
35
+ get_value(i)
36
+ end
37
+ end
38
+
31
39
  def each
40
+ return to_enum(__method__) unless block_given?
41
+
32
42
  length.times do |i|
33
- if null?(i)
34
- yield(nil)
35
- else
36
- yield(self[i])
37
- end
43
+ yield(self[i])
38
44
  end
39
45
  end
40
46
 
@@ -16,13 +16,29 @@ module Arrow
16
16
  class ChunkedArray
17
17
  include Enumerable
18
18
 
19
+ def [](i)
20
+ n_chunks.times do |j|
21
+ array = get_chunk(j)
22
+ return array[i] if i < array.length
23
+ i -= array.length
24
+ end
25
+ nil
26
+ end
27
+
19
28
  def each(&block)
20
29
  return to_enum(__method__) unless block_given?
21
30
 
22
- n_chunks.times do |i|
23
- array = get_chunk(i)
31
+ each_chunk do |array|
24
32
  array.each(&block)
25
33
  end
26
34
  end
35
+
36
+ def each_chunk
37
+ return to_enum(__method__) unless block_given?
38
+
39
+ n_chunks.times do |i|
40
+ yield(get_chunk(i))
41
+ end
42
+ end
27
43
  end
28
44
  end
@@ -16,6 +16,10 @@ module Arrow
16
16
  class Column
17
17
  include Enumerable
18
18
 
19
+ def [](i)
20
+ data[i]
21
+ end
22
+
19
23
  def each(&block)
20
24
  return to_enum(__method__) unless block_given?
21
25
 
@@ -0,0 +1,162 @@
1
+ # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ require "csv"
16
+ require "pathname"
17
+ require "time"
18
+
19
+ module Arrow
20
+ class CSVReader
21
+ class << self
22
+ def read(csv, **options)
23
+ case csv
24
+ when Pathname
25
+ path = csv.to_path
26
+ options = update_csv_parse_options(options, :open_csv, path)
27
+ open_csv(path, **options) do |_csv|
28
+ read(_csv)
29
+ end
30
+ when /\A.+\.csv\z/i
31
+ read(Pathname.new(csv), **options)
32
+ when String
33
+ options = update_csv_parse_options(options, :parse_csv_data, csv)
34
+ parse_csv_data(csv, **options) do |_csv|
35
+ read(_csv)
36
+ end
37
+ else
38
+ new(csv).read
39
+ end
40
+ end
41
+
42
+ private
43
+ def open_csv(path, **options)
44
+ CSV.open(path, **options) do |csv|
45
+ yield(csv)
46
+ end
47
+ end
48
+
49
+ def parse_csv_data(data, **options)
50
+ csv = CSV.new(data, **options)
51
+ begin
52
+ yield(csv)
53
+ ensure
54
+ csv.close
55
+ end
56
+ end
57
+
58
+ ISO8601_CONVERTER = lambda do |field|
59
+ begin
60
+ encoded_field = field.encode(CSV::ConverterEncoding)
61
+ rescue EncodingError
62
+ field
63
+ else
64
+ begin
65
+ Time.iso8601(encoded_field)
66
+ rescue ArgumentError
67
+ field
68
+ end
69
+ end
70
+ end
71
+
72
+ def update_csv_parse_options(options, create_csv, *args)
73
+ return options unless options.empty?
74
+
75
+ new_options = options.merge(converters: [:all, ISO8601_CONVERTER])
76
+ __send__(create_csv, *args, **new_options) do |csv|
77
+ row1 = csv.shift
78
+ if row1.nil?
79
+ new_options[:headers] = false
80
+ return new_options
81
+ end
82
+ if row1.any?(&:nil?)
83
+ new_options[:headers] = false
84
+ return new_options
85
+ end
86
+
87
+ row2 = csv.shift
88
+ return new_options if row2.nil?
89
+ if row2.any?(&:nil?)
90
+ new_options[:headers] = true
91
+ return new_options
92
+ end
93
+
94
+ if row1.collect(&:class) != row2.collect(&:class)
95
+ new_options[:headers] = true
96
+ return new_options
97
+ end
98
+
99
+ new_options
100
+ end
101
+ end
102
+ end
103
+
104
+ def initialize(csv)
105
+ @csv = csv
106
+ end
107
+
108
+ def read
109
+ builders = []
110
+ values_set = []
111
+ @csv.each do |row|
112
+ if row.is_a?(CSV::Row)
113
+ row = row.collect(&:last)
114
+ end
115
+ row.each_with_index do |value, i|
116
+ builders[i] ||= create_builder(value)
117
+ values = (values_set[i] ||= [])
118
+ case value
119
+ when Time
120
+ value = value.to_i * (10 ** 9) + value.nsec
121
+ end
122
+ values << value
123
+ end
124
+ end
125
+ return nil if values_set.empty?
126
+
127
+ arrays = values_set.collect.with_index do |values, i|
128
+ builders[i].build(values)
129
+ end
130
+ if @csv.headers
131
+ names = @csv.headers
132
+ else
133
+ names = builders.size.times.collect(&:to_s)
134
+ end
135
+ fields = names.collect.with_index do |name, i|
136
+ Arrow::Field.new(name, arrays[i].value_data_type)
137
+ end
138
+ schema = Schema.new(fields)
139
+ columns = arrays.collect.with_index do |array, i|
140
+ Column.new(fields[i], array)
141
+ end
142
+ Table.new(schema, columns)
143
+ end
144
+
145
+ private
146
+ def create_builder(sample_value)
147
+ case sample_value
148
+ when Integer
149
+ IntArrayBuilder.new
150
+ when Float
151
+ DoubleArrayBuilder.new
152
+ when String
153
+ StringArrayBuilder.new
154
+ when Time
155
+ data_type = TimestampDataType.new(:nano)
156
+ TimestampArrayBuilder.new(data_type)
157
+ else
158
+ nil
159
+ end
160
+ end
161
+ end
162
+ end
@@ -33,10 +33,14 @@ module Arrow
33
33
  require "arrow/buffer"
34
34
  require "arrow/chunked-array"
35
35
  require "arrow/column"
36
+ require "arrow/csv-reader"
36
37
  require "arrow/field"
37
38
  require "arrow/record-batch"
39
+ require "arrow/slicer"
38
40
  require "arrow/table"
41
+ require "arrow/table-formatter"
39
42
  require "arrow/tensor"
43
+ require "arrow/timestamp-array"
40
44
 
41
45
  require "arrow/record-batch-file-reader"
42
46
  require "arrow/record-batch-stream-reader"
@@ -57,22 +61,20 @@ module Arrow
57
61
  case klass.name
58
62
  when "Arrow::StringArray"
59
63
  case method_name
60
- when "[]"
61
- method_name = "get_value"
64
+ when "get_value"
65
+ method_name = "get_raw_value"
62
66
  when "get_string"
63
- method_name = "[]"
67
+ method_name = "get_value"
68
+ end
69
+ super(info, klass, method_name)
70
+ when "Arrow::TimestampArray"
71
+ case method_name
72
+ when "get_value"
73
+ method_name = "get_raw_value"
64
74
  end
65
75
  super(info, klass, method_name)
66
76
  else
67
- super
68
- end
69
- end
70
-
71
- def rubyish_method_name(function_info, options={})
72
- if function_info.n_in_args == 1 and function_info.name == "get_value"
73
- "[]"
74
- else
75
- super
77
+ super
76
78
  end
77
79
  end
78
80
  end