polars-df 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
module Polars
|
2
|
+
class DataFrame
|
3
|
+
attr_accessor :_df
|
4
|
+
|
5
|
+
def initialize(data = nil)
|
6
|
+
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
7
|
+
result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
|
8
|
+
data = {}
|
9
|
+
result.columns.each_with_index do |k, i|
|
10
|
+
data[k] = result.rows.map { |r| r[i] }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
if data.nil?
|
15
|
+
self._df = hash_to_rbdf({})
|
16
|
+
elsif data.is_a?(Hash)
|
17
|
+
data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
|
18
|
+
self._df = hash_to_rbdf(data)
|
19
|
+
elsif data.is_a?(Array)
|
20
|
+
self._df = sequence_to_rbdf(data)
|
21
|
+
elsif data.is_a?(Series)
|
22
|
+
self._df = series_to_rbdf(data)
|
23
|
+
else
|
24
|
+
raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self._from_rbdf(rb_df)
|
29
|
+
df = DataFrame.allocate
|
30
|
+
df._df = rb_df
|
31
|
+
df
|
32
|
+
end
|
33
|
+
|
34
|
+
def self._read_csv(file, has_header: true)
|
35
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
36
|
+
file = Utils.format_path(file)
|
37
|
+
end
|
38
|
+
|
39
|
+
_from_rbdf(RbDataFrame.read_csv(file, has_header))
|
40
|
+
end
|
41
|
+
|
42
|
+
def self._read_parquet(file)
|
43
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
44
|
+
file = Utils.format_path(file)
|
45
|
+
end
|
46
|
+
|
47
|
+
_from_rbdf(RbDataFrame.read_parquet(file))
|
48
|
+
end
|
49
|
+
|
50
|
+
def self._read_json(file)
|
51
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
52
|
+
file = Utils.format_path(file)
|
53
|
+
end
|
54
|
+
|
55
|
+
_from_rbdf(RbDataFrame.read_json(file))
|
56
|
+
end
|
57
|
+
|
58
|
+
def self._read_ndjson(file)
|
59
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
60
|
+
file = Utils.format_path(file)
|
61
|
+
end
|
62
|
+
|
63
|
+
_from_rbdf(RbDataFrame.read_ndjson(file))
|
64
|
+
end
|
65
|
+
|
66
|
+
def shape
|
67
|
+
_df.shape
|
68
|
+
end
|
69
|
+
|
70
|
+
def height
|
71
|
+
_df.height
|
72
|
+
end
|
73
|
+
|
74
|
+
def width
|
75
|
+
_df.width
|
76
|
+
end
|
77
|
+
|
78
|
+
def columns
|
79
|
+
_df.columns
|
80
|
+
end
|
81
|
+
|
82
|
+
def dtypes
|
83
|
+
_df.dtypes.map(&:to_sym)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_s
|
87
|
+
_df.to_s
|
88
|
+
end
|
89
|
+
alias_method :inspect, :to_s
|
90
|
+
|
91
|
+
def include?(name)
|
92
|
+
columns.include?(name)
|
93
|
+
end
|
94
|
+
|
95
|
+
def [](name)
|
96
|
+
Utils.wrap_s(_df.column(name))
|
97
|
+
end
|
98
|
+
|
99
|
+
def to_series(index = 0)
|
100
|
+
if index < 0
|
101
|
+
index = columns.length + index
|
102
|
+
end
|
103
|
+
Utils.wrap_s(_df.select_at_idx(index))
|
104
|
+
end
|
105
|
+
|
106
|
+
def write_json(
|
107
|
+
file,
|
108
|
+
pretty: false,
|
109
|
+
row_oriented: false
|
110
|
+
)
|
111
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
112
|
+
file = Utils.format_path(file)
|
113
|
+
end
|
114
|
+
|
115
|
+
_df.write_json(file, pretty, row_oriented)
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def write_ndjson(file)
|
120
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
121
|
+
file = Utils.format_path(file)
|
122
|
+
end
|
123
|
+
|
124
|
+
_df.write_ndjson(file)
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
|
128
|
+
def write_csv(
|
129
|
+
file = nil,
|
130
|
+
has_header: true,
|
131
|
+
sep: ",",
|
132
|
+
quote: '"',
|
133
|
+
batch_size: 1024,
|
134
|
+
datetime_format: nil,
|
135
|
+
date_format: nil,
|
136
|
+
time_format: nil,
|
137
|
+
float_precision: nil,
|
138
|
+
null_value: nil
|
139
|
+
)
|
140
|
+
if sep.length > 1
|
141
|
+
raise ArgumentError, "only single byte separator is allowed"
|
142
|
+
elsif quote.length > 1
|
143
|
+
raise ArgumentError, "only single byte quote char is allowed"
|
144
|
+
elsif null_value == ""
|
145
|
+
null_value = nil
|
146
|
+
end
|
147
|
+
|
148
|
+
if file.nil?
|
149
|
+
buffer = StringIO.new
|
150
|
+
buffer.set_encoding(Encoding::BINARY)
|
151
|
+
_df.write_csv(
|
152
|
+
buffer,
|
153
|
+
has_header,
|
154
|
+
sep.ord,
|
155
|
+
quote.ord,
|
156
|
+
batch_size,
|
157
|
+
datetime_format,
|
158
|
+
date_format,
|
159
|
+
time_format,
|
160
|
+
float_precision,
|
161
|
+
null_value
|
162
|
+
)
|
163
|
+
buffer.rewind
|
164
|
+
return buffer.read.force_encoding(Encoding::UTF_8)
|
165
|
+
end
|
166
|
+
|
167
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
168
|
+
file = Utils.format_path(file)
|
169
|
+
end
|
170
|
+
|
171
|
+
_df.write_csv(
|
172
|
+
file,
|
173
|
+
has_header,
|
174
|
+
sep.ord,
|
175
|
+
quote.ord,
|
176
|
+
batch_size,
|
177
|
+
datetime_format,
|
178
|
+
date_format,
|
179
|
+
time_format,
|
180
|
+
float_precision,
|
181
|
+
null_value,
|
182
|
+
)
|
183
|
+
nil
|
184
|
+
end
|
185
|
+
|
186
|
+
def write_parquet(
|
187
|
+
file,
|
188
|
+
compression: "zstd",
|
189
|
+
compression_level: nil,
|
190
|
+
statistics: false,
|
191
|
+
row_group_size: nil
|
192
|
+
)
|
193
|
+
if compression.nil?
|
194
|
+
compression = "uncompressed"
|
195
|
+
end
|
196
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
197
|
+
file = Utils.format_path(file)
|
198
|
+
end
|
199
|
+
|
200
|
+
_df.write_parquet(
|
201
|
+
file, compression, compression_level, statistics, row_group_size
|
202
|
+
)
|
203
|
+
end
|
204
|
+
|
205
|
+
def filter(predicate)
|
206
|
+
lazy.filter(predicate).collect
|
207
|
+
end
|
208
|
+
|
209
|
+
def sort(by, reverse: false, nulls_last: false)
|
210
|
+
_from_rbdf(_df.sort(by, reverse, nulls_last))
|
211
|
+
end
|
212
|
+
|
213
|
+
def frame_equal(other, null_equal: true)
|
214
|
+
_df.frame_equal(other._df, null_equal)
|
215
|
+
end
|
216
|
+
|
217
|
+
def limit(n = 5)
|
218
|
+
head(n)
|
219
|
+
end
|
220
|
+
|
221
|
+
def head(n = 5)
|
222
|
+
_from_rbdf(_df.head(n))
|
223
|
+
end
|
224
|
+
|
225
|
+
def tail(n = 5)
|
226
|
+
_from_rbdf(_df.tail(n))
|
227
|
+
end
|
228
|
+
|
229
|
+
def groupby(by, maintain_order: false)
|
230
|
+
lazy.groupby(by, maintain_order: maintain_order)
|
231
|
+
end
|
232
|
+
|
233
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
234
|
+
lazy
|
235
|
+
.join(
|
236
|
+
other.lazy,
|
237
|
+
left_on: left_on,
|
238
|
+
right_on: right_on,
|
239
|
+
on: on,
|
240
|
+
how: how,
|
241
|
+
suffix: suffix,
|
242
|
+
)
|
243
|
+
.collect(no_optimization: true)
|
244
|
+
end
|
245
|
+
|
246
|
+
def with_column(column)
|
247
|
+
lazy
|
248
|
+
.with_column(column)
|
249
|
+
.collect(no_optimization: true, string_cache: false)
|
250
|
+
end
|
251
|
+
|
252
|
+
def lazy
|
253
|
+
wrap_ldf(_df.lazy)
|
254
|
+
end
|
255
|
+
|
256
|
+
def select(exprs)
|
257
|
+
_from_rbdf(
|
258
|
+
lazy
|
259
|
+
.select(exprs)
|
260
|
+
.collect(no_optimization: true, string_cache: false)
|
261
|
+
._df
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
265
|
+
def mean(axis: 0, null_strategy: "ignore")
|
266
|
+
case axis
|
267
|
+
when 0
|
268
|
+
_from_rbdf(_df.mean)
|
269
|
+
when 1
|
270
|
+
Utils.wrap_s(_df.hmean(null_strategy))
|
271
|
+
else
|
272
|
+
raise ArgumentError, "Axis should be 0 or 1."
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def with_columns(exprs)
|
277
|
+
if !exprs.nil? && !exprs.is_a?(Array)
|
278
|
+
exprs = [exprs]
|
279
|
+
end
|
280
|
+
lazy
|
281
|
+
.with_columns(exprs)
|
282
|
+
.collect(no_optimization: true, string_cache: false)
|
283
|
+
end
|
284
|
+
|
285
|
+
def rechunk
|
286
|
+
_from_rbdf(_df.rechunk)
|
287
|
+
end
|
288
|
+
|
289
|
+
def null_count
|
290
|
+
_from_rbdf(_df.null_count)
|
291
|
+
end
|
292
|
+
|
293
|
+
private
|
294
|
+
|
295
|
+
def hash_to_rbdf(data)
|
296
|
+
RbDataFrame.read_hash(data)
|
297
|
+
end
|
298
|
+
|
299
|
+
def sequence_to_rbdf(data)
|
300
|
+
RbDataFrame.new(data.map(&:_s))
|
301
|
+
end
|
302
|
+
|
303
|
+
def series_to_rbdf(data)
|
304
|
+
RbDataFrame.new([data._s])
|
305
|
+
end
|
306
|
+
|
307
|
+
def wrap_ldf(ldf)
|
308
|
+
LazyFrame._from_rbldf(ldf)
|
309
|
+
end
|
310
|
+
|
311
|
+
def _from_rbdf(rb_df)
|
312
|
+
self.class._from_rbdf(rb_df)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
data/lib/polars/expr.rb
ADDED
@@ -0,0 +1,233 @@
|
|
1
|
+
module Polars
|
2
|
+
class Expr
|
3
|
+
attr_accessor :_rbexpr
|
4
|
+
|
5
|
+
def self._from_rbexpr(rbexpr)
|
6
|
+
expr = Expr.allocate
|
7
|
+
expr._rbexpr = rbexpr
|
8
|
+
expr
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
_rbexpr.to_str
|
13
|
+
end
|
14
|
+
alias_method :inspect, :to_s
|
15
|
+
|
16
|
+
def ^(other)
|
17
|
+
wrap_expr(_rbexpr._xor(_to_rbexpr(other)))
|
18
|
+
end
|
19
|
+
|
20
|
+
def &(other)
|
21
|
+
wrap_expr(_rbexpr._and(_to_rbexpr(other)))
|
22
|
+
end
|
23
|
+
|
24
|
+
def |(other)
|
25
|
+
wrap_expr(_rbexpr._or(_to_rbexpr(other)))
|
26
|
+
end
|
27
|
+
|
28
|
+
def *(other)
|
29
|
+
wrap_expr(_rbexpr * _to_rbexpr(other))
|
30
|
+
end
|
31
|
+
|
32
|
+
def >=(other)
|
33
|
+
wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
|
34
|
+
end
|
35
|
+
|
36
|
+
def <=(other)
|
37
|
+
wrap_expr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
|
38
|
+
end
|
39
|
+
|
40
|
+
def ==(other)
|
41
|
+
wrap_expr(_rbexpr.eq(_to_expr(other)._rbexpr))
|
42
|
+
end
|
43
|
+
|
44
|
+
def !=(other)
|
45
|
+
wrap_expr(_rbexpr.neq(_to_expr(other)._rbexpr))
|
46
|
+
end
|
47
|
+
|
48
|
+
def <(other)
|
49
|
+
wrap_expr(_rbexpr.lt(_to_expr(other)._rbexpr))
|
50
|
+
end
|
51
|
+
|
52
|
+
def >(other)
|
53
|
+
wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr))
|
54
|
+
end
|
55
|
+
|
56
|
+
def alias(name)
|
57
|
+
wrap_expr(_rbexpr._alias(name))
|
58
|
+
end
|
59
|
+
|
60
|
+
def suffix(suffix)
|
61
|
+
wrap_expr(_rbexpr.suffix(suffix))
|
62
|
+
end
|
63
|
+
|
64
|
+
def is_not
|
65
|
+
wrap_expr(_rbexpr.is_not)
|
66
|
+
end
|
67
|
+
|
68
|
+
def is_null
|
69
|
+
wrap_expr(_rbexpr.is_null)
|
70
|
+
end
|
71
|
+
|
72
|
+
def is_not_null
|
73
|
+
wrap_expr(_rbexpr.is_not_null)
|
74
|
+
end
|
75
|
+
|
76
|
+
def count
|
77
|
+
wrap_expr(_rbexpr.count)
|
78
|
+
end
|
79
|
+
|
80
|
+
def len
|
81
|
+
count
|
82
|
+
end
|
83
|
+
|
84
|
+
def sort(reverse: false, nulls_last: false)
|
85
|
+
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
|
86
|
+
end
|
87
|
+
|
88
|
+
def sort_by(by, reverse: false)
|
89
|
+
if !by.is_a?(Array)
|
90
|
+
by = [by]
|
91
|
+
end
|
92
|
+
if !reverse.is_a?(Array)
|
93
|
+
reverse = [reverse]
|
94
|
+
end
|
95
|
+
by = Utils.selection_to_rbexpr_list(by)
|
96
|
+
|
97
|
+
wrap_expr(_rbexpr.sort_by(by, reverse))
|
98
|
+
end
|
99
|
+
|
100
|
+
def fill_null(value = nil, strategy: nil, limit: nil)
|
101
|
+
if !value.nil? && !strategy.nil?
|
102
|
+
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
|
103
|
+
elsif value.nil? && strategy.nil?
|
104
|
+
raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
|
105
|
+
elsif ["forward", "backward"].include?(strategy) && !limit.nil?
|
106
|
+
raise ArgumentError, "can only specify 'limit' when strategy is set to 'backward' or 'forward'"
|
107
|
+
end
|
108
|
+
|
109
|
+
if !value.nil?
|
110
|
+
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
|
111
|
+
wrap_expr(_rbexpr.fill_null(value._rbexpr))
|
112
|
+
else
|
113
|
+
wrap_expr(_rbexpr.fill_null_with_strategy(strategy, limit))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def fill_nan(fill_value)
|
118
|
+
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
|
119
|
+
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
|
120
|
+
end
|
121
|
+
|
122
|
+
def reverse
|
123
|
+
wrap_expr(_rbexpr.reverse)
|
124
|
+
end
|
125
|
+
|
126
|
+
def std(ddof: 1)
|
127
|
+
wrap_expr(_rbexpr.std(ddof))
|
128
|
+
end
|
129
|
+
|
130
|
+
def var(ddof: 1)
|
131
|
+
wrap_expr(_rbexpr.var(ddof))
|
132
|
+
end
|
133
|
+
|
134
|
+
def max
|
135
|
+
wrap_expr(_rbexpr.max)
|
136
|
+
end
|
137
|
+
|
138
|
+
def min
|
139
|
+
wrap_expr(_rbexpr.min)
|
140
|
+
end
|
141
|
+
|
142
|
+
def nan_max
|
143
|
+
wrap_expr(_rbexpr.nan_max)
|
144
|
+
end
|
145
|
+
|
146
|
+
def nan_min
|
147
|
+
wrap_expr(_rbexpr.nan_min)
|
148
|
+
end
|
149
|
+
|
150
|
+
def sum
|
151
|
+
wrap_expr(_rbexpr.sum)
|
152
|
+
end
|
153
|
+
|
154
|
+
def mean
|
155
|
+
wrap_expr(_rbexpr.mean)
|
156
|
+
end
|
157
|
+
|
158
|
+
def median
|
159
|
+
wrap_expr(_rbexpr.median)
|
160
|
+
end
|
161
|
+
|
162
|
+
def product
|
163
|
+
wrap_expr(_rbexpr.product)
|
164
|
+
end
|
165
|
+
|
166
|
+
def n_unique
|
167
|
+
wrap_expr(_rbexpr.n_unique)
|
168
|
+
end
|
169
|
+
|
170
|
+
def unique(maintain_order: false)
|
171
|
+
if maintain_order
|
172
|
+
wrap_expr(_rbexpr.unique_stable)
|
173
|
+
else
|
174
|
+
wrap_expr(_rbexpr.unique)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def first
|
179
|
+
wrap_expr(_rbexpr.first)
|
180
|
+
end
|
181
|
+
|
182
|
+
def last
|
183
|
+
wrap_expr(_rbexpr.last)
|
184
|
+
end
|
185
|
+
|
186
|
+
def over(expr)
|
187
|
+
rbexprs = Utils.selection_to_rbexpr_list(expr)
|
188
|
+
wrap_expr(_rbexpr.over(rbexprs))
|
189
|
+
end
|
190
|
+
|
191
|
+
def filter(predicate)
|
192
|
+
wrap_expr(_rbexpr.filter(predicate._rbexpr))
|
193
|
+
end
|
194
|
+
|
195
|
+
def head(n = 10)
|
196
|
+
wrap_expr(_rbexpr.head(n))
|
197
|
+
end
|
198
|
+
|
199
|
+
def tail(n = 10)
|
200
|
+
wrap_expr(_rbexpr.tail(n))
|
201
|
+
end
|
202
|
+
|
203
|
+
def limit(n = 10)
|
204
|
+
head(n)
|
205
|
+
end
|
206
|
+
|
207
|
+
def interpolate
|
208
|
+
wrap_expr(_rbexpr.interpolate)
|
209
|
+
end
|
210
|
+
|
211
|
+
def list
|
212
|
+
wrap_expr(_rbexpr.list)
|
213
|
+
end
|
214
|
+
|
215
|
+
def str
|
216
|
+
StringExpr.new(self)
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def wrap_expr(expr)
|
222
|
+
Utils.wrap_expr(expr)
|
223
|
+
end
|
224
|
+
|
225
|
+
def _to_rbexpr(other)
|
226
|
+
_to_expr(other)._rbexpr
|
227
|
+
end
|
228
|
+
|
229
|
+
def _to_expr(other)
|
230
|
+
other.is_a?(Expr) ? other : Utils.lit(other)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
def concat(items, rechunk: true, how: "vertical", parallel: true)
|
4
|
+
if items.empty?
|
5
|
+
raise ArgumentError, "cannot concat empty list"
|
6
|
+
end
|
7
|
+
|
8
|
+
first = items[0]
|
9
|
+
if first.is_a?(DataFrame)
|
10
|
+
if how == "vertical"
|
11
|
+
out = Utils.wrap_df(_concat_df(items))
|
12
|
+
elsif how == "diagonal"
|
13
|
+
out = Utils.wrap_df(_diag_concat_df(items))
|
14
|
+
elsif how == "horizontal"
|
15
|
+
out = Utils.wrap_df(_hor_concat_df(items))
|
16
|
+
else
|
17
|
+
raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
|
18
|
+
end
|
19
|
+
elsif first.is_a?(LazyFrame)
|
20
|
+
if how == "vertical"
|
21
|
+
# TODO
|
22
|
+
return Utils.wrap_ldf(_concat_lf(items, rechunk, parallel))
|
23
|
+
else
|
24
|
+
raise ArgumentError, "Lazy only allows 'vertical' concat strategy."
|
25
|
+
end
|
26
|
+
elsif first.is_a?(Series)
|
27
|
+
# TODO
|
28
|
+
out = Utils.wrap_s(_concat_series(items))
|
29
|
+
elsif first.is_a?(Expr)
|
30
|
+
out = first
|
31
|
+
items[1..-1].each do |e|
|
32
|
+
out = out.append(e)
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
|
36
|
+
end
|
37
|
+
|
38
|
+
if rechunk
|
39
|
+
out.rechunk
|
40
|
+
else
|
41
|
+
out
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/polars/io.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
def read_csv(file, has_header: true)
|
4
|
+
_prepare_file_arg(file) do |data|
|
5
|
+
DataFrame._read_csv(data, has_header: has_header)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
def read_parquet(file)
|
10
|
+
_prepare_file_arg(file) do |data|
|
11
|
+
DataFrame._read_parquet(data)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def read_json(file)
|
16
|
+
DataFrame._read_json(file)
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_ndjson(file)
|
20
|
+
DataFrame._read_ndjson(file)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def _prepare_file_arg(file)
|
26
|
+
if file.is_a?(String) && file =~ /\Ahttps?:\/\//
|
27
|
+
raise ArgumentError, "use URI(...) for remote files"
|
28
|
+
end
|
29
|
+
|
30
|
+
if defined?(URI) && file.is_a?(URI)
|
31
|
+
require "open-uri"
|
32
|
+
|
33
|
+
file = URI.open(file)
|
34
|
+
end
|
35
|
+
|
36
|
+
yield file
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|