polars-df 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
module Polars
|
2
|
+
class DataFrame
|
3
|
+
attr_accessor :_df
|
4
|
+
|
5
|
+
def initialize(data = nil)
|
6
|
+
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
7
|
+
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
8
|
+
data = {}
|
9
|
+
result.columns.each_with_index do |k, i|
|
10
|
+
data[k] = result.rows.map { |r| r[i] }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
if data.nil?
|
15
|
+
self._df = hash_to_rbdf({})
|
16
|
+
elsif data.is_a?(Hash)
|
17
|
+
data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
|
18
|
+
self._df = hash_to_rbdf(data)
|
19
|
+
elsif data.is_a?(Array)
|
20
|
+
self._df = sequence_to_rbdf(data)
|
21
|
+
elsif data.is_a?(Series)
|
22
|
+
self._df = series_to_rbdf(data)
|
23
|
+
else
|
24
|
+
raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self._from_rbdf(rb_df)
|
29
|
+
df = DataFrame.allocate
|
30
|
+
df._df = rb_df
|
31
|
+
df
|
32
|
+
end
|
33
|
+
|
34
|
+
def self._read_csv(file, has_header: true)
|
35
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
36
|
+
file = Utils.format_path(file)
|
37
|
+
end
|
38
|
+
|
39
|
+
_from_rbdf(RbDataFrame.read_csv(file, has_header))
|
40
|
+
end
|
41
|
+
|
42
|
+
def self._read_parquet(file)
|
43
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
44
|
+
file = Utils.format_path(file)
|
45
|
+
end
|
46
|
+
|
47
|
+
_from_rbdf(RbDataFrame.read_parquet(file))
|
48
|
+
end
|
49
|
+
|
50
|
+
def self._read_json(file)
|
51
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
52
|
+
file = Utils.format_path(file)
|
53
|
+
end
|
54
|
+
|
55
|
+
_from_rbdf(RbDataFrame.read_json(file))
|
56
|
+
end
|
57
|
+
|
58
|
+
def self._read_ndjson(file)
|
59
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
60
|
+
file = Utils.format_path(file)
|
61
|
+
end
|
62
|
+
|
63
|
+
_from_rbdf(RbDataFrame.read_ndjson(file))
|
64
|
+
end
|
65
|
+
|
66
|
+
def shape
|
67
|
+
_df.shape
|
68
|
+
end
|
69
|
+
|
70
|
+
def height
|
71
|
+
_df.height
|
72
|
+
end
|
73
|
+
|
74
|
+
def width
|
75
|
+
_df.width
|
76
|
+
end
|
77
|
+
|
78
|
+
def columns
|
79
|
+
_df.columns
|
80
|
+
end
|
81
|
+
|
82
|
+
def dtypes
|
83
|
+
_df.dtypes.map(&:to_sym)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_s
|
87
|
+
_df.to_s
|
88
|
+
end
|
89
|
+
alias_method :inspect, :to_s
|
90
|
+
|
91
|
+
def include?(name)
|
92
|
+
columns.include?(name)
|
93
|
+
end
|
94
|
+
|
95
|
+
def [](name)
|
96
|
+
Utils.wrap_s(_df.column(name))
|
97
|
+
end
|
98
|
+
|
99
|
+
def to_series(index = 0)
|
100
|
+
if index < 0
|
101
|
+
index = columns.length + index
|
102
|
+
end
|
103
|
+
Utils.wrap_s(_df.select_at_idx(index))
|
104
|
+
end
|
105
|
+
|
106
|
+
def write_json(
|
107
|
+
file,
|
108
|
+
pretty: false,
|
109
|
+
row_oriented: false
|
110
|
+
)
|
111
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
112
|
+
file = Utils.format_path(file)
|
113
|
+
end
|
114
|
+
|
115
|
+
_df.write_json(file, pretty, row_oriented)
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def write_ndjson(file)
|
120
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
121
|
+
file = Utils.format_path(file)
|
122
|
+
end
|
123
|
+
|
124
|
+
_df.write_ndjson(file)
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
|
128
|
+
def write_csv(
|
129
|
+
file = nil,
|
130
|
+
has_header: true,
|
131
|
+
sep: ",",
|
132
|
+
quote: '"',
|
133
|
+
batch_size: 1024,
|
134
|
+
datetime_format: nil,
|
135
|
+
date_format: nil,
|
136
|
+
time_format: nil,
|
137
|
+
float_precision: nil,
|
138
|
+
null_value: nil
|
139
|
+
)
|
140
|
+
if sep.length > 1
|
141
|
+
raise ArgumentError, "only single byte separator is allowed"
|
142
|
+
elsif quote.length > 1
|
143
|
+
raise ArgumentError, "only single byte quote char is allowed"
|
144
|
+
elsif null_value == ""
|
145
|
+
null_value = nil
|
146
|
+
end
|
147
|
+
|
148
|
+
if file.nil?
|
149
|
+
buffer = StringIO.new
|
150
|
+
buffer.set_encoding(Encoding::BINARY)
|
151
|
+
_df.write_csv(
|
152
|
+
buffer,
|
153
|
+
has_header,
|
154
|
+
sep.ord,
|
155
|
+
quote.ord,
|
156
|
+
batch_size,
|
157
|
+
datetime_format,
|
158
|
+
date_format,
|
159
|
+
time_format,
|
160
|
+
float_precision,
|
161
|
+
null_value
|
162
|
+
)
|
163
|
+
buffer.rewind
|
164
|
+
return buffer.read.force_encoding(Encoding::UTF_8)
|
165
|
+
end
|
166
|
+
|
167
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
168
|
+
file = Utils.format_path(file)
|
169
|
+
end
|
170
|
+
|
171
|
+
_df.write_csv(
|
172
|
+
file,
|
173
|
+
has_header,
|
174
|
+
sep.ord,
|
175
|
+
quote.ord,
|
176
|
+
batch_size,
|
177
|
+
datetime_format,
|
178
|
+
date_format,
|
179
|
+
time_format,
|
180
|
+
float_precision,
|
181
|
+
null_value,
|
182
|
+
)
|
183
|
+
nil
|
184
|
+
end
|
185
|
+
|
186
|
+
def write_parquet(
|
187
|
+
file,
|
188
|
+
compression: "zstd",
|
189
|
+
compression_level: nil,
|
190
|
+
statistics: false,
|
191
|
+
row_group_size: nil
|
192
|
+
)
|
193
|
+
if compression.nil?
|
194
|
+
compression = "uncompressed"
|
195
|
+
end
|
196
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
197
|
+
file = Utils.format_path(file)
|
198
|
+
end
|
199
|
+
|
200
|
+
_df.write_parquet(
|
201
|
+
file, compression, compression_level, statistics, row_group_size
|
202
|
+
)
|
203
|
+
end
|
204
|
+
|
205
|
+
def filter(predicate)
|
206
|
+
lazy.filter(predicate).collect
|
207
|
+
end
|
208
|
+
|
209
|
+
def sort(by, reverse: false, nulls_last: false)
|
210
|
+
_from_rbdf(_df.sort(by, reverse, nulls_last))
|
211
|
+
end
|
212
|
+
|
213
|
+
def frame_equal(other, null_equal: true)
|
214
|
+
_df.frame_equal(other._df, null_equal)
|
215
|
+
end
|
216
|
+
|
217
|
+
def limit(n = 5)
|
218
|
+
head(n)
|
219
|
+
end
|
220
|
+
|
221
|
+
def head(n = 5)
|
222
|
+
_from_rbdf(_df.head(n))
|
223
|
+
end
|
224
|
+
|
225
|
+
def tail(n = 5)
|
226
|
+
_from_rbdf(_df.tail(n))
|
227
|
+
end
|
228
|
+
|
229
|
+
def groupby(by, maintain_order: false)
|
230
|
+
lazy.groupby(by, maintain_order: maintain_order)
|
231
|
+
end
|
232
|
+
|
233
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
234
|
+
lazy
|
235
|
+
.join(
|
236
|
+
other.lazy,
|
237
|
+
left_on: left_on,
|
238
|
+
right_on: right_on,
|
239
|
+
on: on,
|
240
|
+
how: how,
|
241
|
+
suffix: suffix,
|
242
|
+
)
|
243
|
+
.collect(no_optimization: true)
|
244
|
+
end
|
245
|
+
|
246
|
+
def with_column(column)
|
247
|
+
lazy
|
248
|
+
.with_column(column)
|
249
|
+
.collect(no_optimization: true, string_cache: false)
|
250
|
+
end
|
251
|
+
|
252
|
+
def lazy
|
253
|
+
wrap_ldf(_df.lazy)
|
254
|
+
end
|
255
|
+
|
256
|
+
def select(exprs)
|
257
|
+
_from_rbdf(
|
258
|
+
lazy
|
259
|
+
.select(exprs)
|
260
|
+
.collect(no_optimization: true, string_cache: false)
|
261
|
+
._df
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
265
|
+
def mean(axis: 0, null_strategy: "ignore")
|
266
|
+
case axis
|
267
|
+
when 0
|
268
|
+
_from_rbdf(_df.mean)
|
269
|
+
when 1
|
270
|
+
Utils.wrap_s(_df.hmean(null_strategy))
|
271
|
+
else
|
272
|
+
raise ArgumentError, "Axis should be 0 or 1."
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def with_columns(exprs)
|
277
|
+
if !exprs.nil? && !exprs.is_a?(Array)
|
278
|
+
exprs = [exprs]
|
279
|
+
end
|
280
|
+
lazy
|
281
|
+
.with_columns(exprs)
|
282
|
+
.collect(no_optimization: true, string_cache: false)
|
283
|
+
end
|
284
|
+
|
285
|
+
def rechunk
|
286
|
+
_from_rbdf(_df.rechunk)
|
287
|
+
end
|
288
|
+
|
289
|
+
def null_count
|
290
|
+
_from_rbdf(_df.null_count)
|
291
|
+
end
|
292
|
+
|
293
|
+
private
|
294
|
+
|
295
|
+
def hash_to_rbdf(data)
|
296
|
+
RbDataFrame.read_hash(data)
|
297
|
+
end
|
298
|
+
|
299
|
+
def sequence_to_rbdf(data)
|
300
|
+
RbDataFrame.new(data.map(&:_s))
|
301
|
+
end
|
302
|
+
|
303
|
+
def series_to_rbdf(data)
|
304
|
+
RbDataFrame.new([data._s])
|
305
|
+
end
|
306
|
+
|
307
|
+
def wrap_ldf(ldf)
|
308
|
+
LazyFrame._from_rbldf(ldf)
|
309
|
+
end
|
310
|
+
|
311
|
+
def _from_rbdf(rb_df)
|
312
|
+
self.class._from_rbdf(rb_df)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
data/lib/polars/expr.rb
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
module Polars
|
2
|
+
class Expr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def self._from_rbexpr(rbexpr)
|
6
|
+
expr = Expr.allocate
|
7
|
+
expr._rbexpr = rbexpr
|
8
|
+
expr
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
_rbexpr.to_str
|
13
|
+
end
|
14
|
+
alias_method :inspect, :to_s
|
15
|
+
|
16
|
+
def ^(other)
|
17
|
+
wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
|
18
|
+
end
|
19
|
+
|
20
|
+
def &(other)
|
21
|
+
wrap_expr(_rbexpr._and(_to_rbexpr(other)))
|
22
|
+
end
|
23
|
+
|
24
|
+
def |(other)
|
25
|
+
wrap_expr(_rbexpr._or(_to_rbexpr(other)))
|
26
|
+
end
|
27
|
+
|
28
|
+
def *(other)
|
29
|
+
wrap_expr(_rbexpr * _to_rbexpr(other))
|
30
|
+
end
|
31
|
+
|
32
|
+
def >=(other)
|
33
|
+
wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
|
34
|
+
end
|
35
|
+
|
36
|
+
def <=(other)
|
37
|
+
wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
|
38
|
+
end
|
39
|
+
|
40
|
+
def ==(other)
|
41
|
+
wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
|
42
|
+
end
|
43
|
+
|
44
|
+
def !=(other)
|
45
|
+
wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
|
46
|
+
end
|
47
|
+
|
48
|
+
def <(other)
|
49
|
+
wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
|
50
|
+
end
|
51
|
+
|
52
|
+
def >(other)
|
53
|
+
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
54
|
+
end
|
55
|
+
|
56
|
+
def alias(name)
|
57
|
+
wrap_expr(_rbexpr._alias(name))
|
58
|
+
end
|
59
|
+
|
60
|
+
def suffix(suffix)
|
61
|
+
wrap_expr(_rbexpr.suffix(suffix))
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_not
|
65
|
+
wrap_expr(_rbexpr.is_not)
|
66
|
+
end
|
67
|
+
|
68
|
+
def is_null
|
69
|
+
wrap_expr(_rbexpr.is_null)
|
70
|
+
end
|
71
|
+
|
72
|
+
def is_not_null
|
73
|
+
wrap_expr(_rbexpr.is_not_null)
|
74
|
+
end
|
75
|
+
|
76
|
+
def count
|
77
|
+
wrap_expr(_rbexpr.count)
|
78
|
+
end
|
79
|
+
|
80
|
+
def len
|
81
|
+
count
|
82
|
+
end
|
83
|
+
|
84
|
+
def sort(reverse: false, nulls_last: false)
|
85
|
+
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
|
86
|
+
end
|
87
|
+
|
88
|
+
def sort_by(by, reverse: false)
|
89
|
+
if !by.is_a?(Array)
|
90
|
+
by = [by]
|
91
|
+
end
|
92
|
+
if !reverse.is_a?(Array)
|
93
|
+
reverse = [reverse]
|
94
|
+
end
|
95
|
+
by = Utils.selection_to_rbexpr_list(by)
|
96
|
+
|
97
|
+
wrap_expr(_rbexpr.sort_by(by, reverse))
|
98
|
+
end
|
99
|
+
|
100
|
+
def fill_null(value = nil, strategy: nil, limit: nil)
|
101
|
+
if !value.nil? && !strategy.nil?
|
102
|
+
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
|
103
|
+
elsif value.nil? && strategy.nil?
|
104
|
+
raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
|
105
|
+
elsif ["forward", "backward"].include?(strategy) && !limit.nil?
|
106
|
+
raise ArgumentError, "can only specify 'limit' when strategy is set to 'backward' or 'forward'"
|
107
|
+
end
|
108
|
+
|
109
|
+
if !value.nil?
|
110
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
111
|
+
wrap_expr(_rbexpr.fill_null(value._rbexpr))
|
112
|
+
else
|
113
|
+
wrap_expr(_rbexpr.fill_null_with_strategy(strategy, limit))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def fill_nan(fill_value)
|
118
|
+
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
119
|
+
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
|
120
|
+
end
|
121
|
+
|
122
|
+
def reverse
|
123
|
+
wrap_expr(_rbexpr.reverse)
|
124
|
+
end
|
125
|
+
|
126
|
+
def std(ddof: 1)
|
127
|
+
wrap_expr(_rbexpr.std(ddof))
|
128
|
+
end
|
129
|
+
|
130
|
+
def var(ddof: 1)
|
131
|
+
wrap_expr(_rbexpr.var(ddof))
|
132
|
+
end
|
133
|
+
|
134
|
+
def max
|
135
|
+
wrap_expr(_rbexpr.max)
|
136
|
+
end
|
137
|
+
|
138
|
+
def min
|
139
|
+
wrap_expr(_rbexpr.min)
|
140
|
+
end
|
141
|
+
|
142
|
+
def nan_max
|
143
|
+
wrap_expr(_rbexpr.nan_max)
|
144
|
+
end
|
145
|
+
|
146
|
+
def nan_min
|
147
|
+
wrap_expr(_rbexpr.nan_min)
|
148
|
+
end
|
149
|
+
|
150
|
+
def sum
|
151
|
+
wrap_expr(_rbexpr.sum)
|
152
|
+
end
|
153
|
+
|
154
|
+
def mean
|
155
|
+
wrap_expr(_rbexpr.mean)
|
156
|
+
end
|
157
|
+
|
158
|
+
def median
|
159
|
+
wrap_expr(_rbexpr.median)
|
160
|
+
end
|
161
|
+
|
162
|
+
def product
|
163
|
+
wrap_expr(_rbexpr.product)
|
164
|
+
end
|
165
|
+
|
166
|
+
def n_unique
|
167
|
+
wrap_expr(_rbexpr.n_unique)
|
168
|
+
end
|
169
|
+
|
170
|
+
def unique(maintain_order: false)
|
171
|
+
if maintain_order
|
172
|
+
wrap_expr(_rbexpr.unique_stable)
|
173
|
+
else
|
174
|
+
wrap_expr(_rbexpr.unique)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def first
|
179
|
+
wrap_expr(_rbexpr.first)
|
180
|
+
end
|
181
|
+
|
182
|
+
def last
|
183
|
+
wrap_expr(_rbexpr.last)
|
184
|
+
end
|
185
|
+
|
186
|
+
def over(expr)
|
187
|
+
rbexprs = Utils.selection_to_rbexpr_list(expr)
|
188
|
+
wrap_expr(_rbexpr.over(rbexprs))
|
189
|
+
end
|
190
|
+
|
191
|
+
def filter(predicate)
|
192
|
+
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
193
|
+
end
|
194
|
+
|
195
|
+
def head(n = 10)
|
196
|
+
wrap_expr(_rbexpr.head(n))
|
197
|
+
end
|
198
|
+
|
199
|
+
def tail(n = 10)
|
200
|
+
wrap_expr(_rbexpr.tail(n))
|
201
|
+
end
|
202
|
+
|
203
|
+
def limit(n = 10)
|
204
|
+
head(n)
|
205
|
+
end
|
206
|
+
|
207
|
+
def interpolate
|
208
|
+
wrap_expr(_rbexpr.interpolate)
|
209
|
+
end
|
210
|
+
|
211
|
+
def list
|
212
|
+
wrap_expr(_rbexpr.list)
|
213
|
+
end
|
214
|
+
|
215
|
+
def str
|
216
|
+
StringExpr.new(self)
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def wrap_expr(expr)
|
222
|
+
Utils.wrap_expr(expr)
|
223
|
+
end
|
224
|
+
|
225
|
+
def _to_rbexpr(other)
|
226
|
+
_to_expr(other)._rbexpr
|
227
|
+
end
|
228
|
+
|
229
|
+
def _to_expr(other)
|
230
|
+
other.is_a?(Expr) ? other : Utils.lit(other)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
def concat(items, rechunk: true, how: "vertical", parallel: true)
|
4
|
+
if items.empty?
|
5
|
+
raise ArgumentError, "cannot concat empty list"
|
6
|
+
end
|
7
|
+
|
8
|
+
first = items[0]
|
9
|
+
if first.is_a?(DataFrame)
|
10
|
+
if how == "vertical"
|
11
|
+
out = Utils.wrap_df(_concat_df(items))
|
12
|
+
elsif how == "diagonal"
|
13
|
+
out = Utils.wrap_df(_diag_concat_df(items))
|
14
|
+
elsif how == "horizontal"
|
15
|
+
out = Utils.wrap_df(_hor_concat_df(items))
|
16
|
+
else
|
17
|
+
raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
|
18
|
+
end
|
19
|
+
elsif first.is_a?(LazyFrame)
|
20
|
+
if how == "vertical"
|
21
|
+
# TODO
|
22
|
+
return Utils.wrap_ldf(_concat_lf(items, rechunk, parallel))
|
23
|
+
else
|
24
|
+
raise ArgumentError, "Lazy only allows 'vertical' concat strategy."
|
25
|
+
end
|
26
|
+
elsif first.is_a?(Series)
|
27
|
+
# TODO
|
28
|
+
out = Utils.wrap_s(_concat_series(items))
|
29
|
+
elsif first.is_a?(Expr)
|
30
|
+
out = first
|
31
|
+
items[1..-1].each do |e|
|
32
|
+
out = out.append(e)
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
|
36
|
+
end
|
37
|
+
|
38
|
+
if rechunk
|
39
|
+
out.rechunk
|
40
|
+
else
|
41
|
+
out
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/polars/io.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
def read_csv(file, has_header: true)
|
4
|
+
_prepare_file_arg(file) do |data|
|
5
|
+
DataFrame._read_csv(data, has_header: has_header)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
def read_parquet(file)
|
10
|
+
_prepare_file_arg(file) do |data|
|
11
|
+
DataFrame._read_parquet(data)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def read_json(file)
|
16
|
+
DataFrame._read_json(file)
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_ndjson(file)
|
20
|
+
DataFrame._read_ndjson(file)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def _prepare_file_arg(file)
|
26
|
+
if file.is_a?(String) && file =~ /\Ahttps?:\/\//
|
27
|
+
raise ArgumentError, "use URI(...) for remote files"
|
28
|
+
end
|
29
|
+
|
30
|
+
if defined?(URI) && file.is_a?(URI)
|
31
|
+
require "open-uri"
|
32
|
+
|
33
|
+
file = URI.open(file)
|
34
|
+
end
|
35
|
+
|
36
|
+
yield file
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|