polars-df 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +105 -5
- data/ext/polars/src/dataframe.rs +132 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +132 -0
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +77 -3
- data/ext/polars/src/series.rs +8 -9
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/data_frame.rb +585 -19
- data/lib/polars/expr.rb +17 -2
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +156 -2
- data/lib/polars/lazy_functions.rb +154 -11
- data/lib/polars/series.rb +806 -18
- data/lib/polars/utils.rb +33 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -0
- metadata +5 -2
data/lib/polars/expr.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Polars
|
2
|
+
# Expressions that can be used in various contexts.
|
2
3
|
class Expr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def self._from_rbexpr(rbexpr)
|
6
9
|
expr = Expr.allocate
|
7
10
|
expr._rbexpr = rbexpr
|
@@ -80,6 +83,7 @@ module Polars
|
|
80
83
|
# def to_physical
|
81
84
|
# end
|
82
85
|
|
86
|
+
#
|
83
87
|
def any
|
84
88
|
wrap_expr(_rbexpr.any)
|
85
89
|
end
|
@@ -104,7 +108,9 @@ module Polars
|
|
104
108
|
wrap_expr(_rbexpr._alias(name))
|
105
109
|
end
|
106
110
|
|
107
|
-
# TODO support symbols
|
111
|
+
# TODO support symbols for exclude
|
112
|
+
|
113
|
+
#
|
108
114
|
def exclude(columns)
|
109
115
|
if columns.is_a?(String)
|
110
116
|
columns = [columns]
|
@@ -140,6 +146,7 @@ module Polars
|
|
140
146
|
# def map_alias
|
141
147
|
# end
|
142
148
|
|
149
|
+
#
|
143
150
|
def is_not
|
144
151
|
wrap_expr(_rbexpr.is_not)
|
145
152
|
end
|
@@ -293,7 +300,8 @@ module Polars
|
|
293
300
|
# def take
|
294
301
|
# end
|
295
302
|
|
296
|
-
|
303
|
+
#
|
304
|
+
def shift(periods = 1)
|
297
305
|
wrap_expr(_rbexpr.shift(periods))
|
298
306
|
end
|
299
307
|
|
@@ -439,6 +447,7 @@ module Polars
|
|
439
447
|
# def apply
|
440
448
|
# end
|
441
449
|
|
450
|
+
#
|
442
451
|
def flatten
|
443
452
|
wrap_expr(_rbexpr.explode)
|
444
453
|
end
|
@@ -471,6 +480,7 @@ module Polars
|
|
471
480
|
# def is_in
|
472
481
|
# end
|
473
482
|
|
483
|
+
#
|
474
484
|
def repeat_by(by)
|
475
485
|
by = Utils.expr_to_lit_or_expr(by, false)
|
476
486
|
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
|
@@ -482,6 +492,7 @@ module Polars
|
|
482
492
|
# def _hash
|
483
493
|
# end
|
484
494
|
|
495
|
+
#
|
485
496
|
def reinterpret(signed: false)
|
486
497
|
wrap_expr(_rbexpr.reinterpret(signed))
|
487
498
|
end
|
@@ -489,6 +500,7 @@ module Polars
|
|
489
500
|
# def _inspect
|
490
501
|
# end
|
491
502
|
|
503
|
+
#
|
492
504
|
def interpolate
|
493
505
|
wrap_expr(_rbexpr.interpolate)
|
494
506
|
end
|
@@ -520,6 +532,7 @@ module Polars
|
|
520
532
|
# def rolling_apply
|
521
533
|
# end
|
522
534
|
|
535
|
+
#
|
523
536
|
def rolling_skew(window_size, bias: true)
|
524
537
|
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
|
525
538
|
end
|
@@ -650,6 +663,7 @@ module Polars
|
|
650
663
|
# def extend_constant
|
651
664
|
# end
|
652
665
|
|
666
|
+
#
|
653
667
|
def value_counts(multithreaded: false, sort: false)
|
654
668
|
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
|
655
669
|
end
|
@@ -672,6 +686,7 @@ module Polars
|
|
672
686
|
# def set_sorted
|
673
687
|
# end
|
674
688
|
|
689
|
+
#
|
675
690
|
def list
|
676
691
|
wrap_expr(_rbexpr.list)
|
677
692
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -1,8 +1,245 @@
|
|
1
1
|
module Polars
|
2
2
|
module IO
|
3
|
-
def read_csv(
|
3
|
+
def read_csv(
|
4
|
+
file,
|
5
|
+
has_header: true,
|
6
|
+
columns: nil,
|
7
|
+
new_columns: nil,
|
8
|
+
sep: ",",
|
9
|
+
comment_char: nil,
|
10
|
+
quote_char: '"',
|
11
|
+
skip_rows: 0,
|
12
|
+
dtypes: nil,
|
13
|
+
null_values: nil,
|
14
|
+
ignore_errors: false,
|
15
|
+
parse_dates: false,
|
16
|
+
n_threads: nil,
|
17
|
+
infer_schema_length: 100,
|
18
|
+
batch_size: 8192,
|
19
|
+
n_rows: nil,
|
20
|
+
encoding: "utf8",
|
21
|
+
low_memory: false,
|
22
|
+
rechunk: true,
|
23
|
+
storage_options: nil,
|
24
|
+
skip_rows_after_header: 0,
|
25
|
+
row_count_name: nil,
|
26
|
+
row_count_offset: 0,
|
27
|
+
sample_size: 1024,
|
28
|
+
eol_char: "\n"
|
29
|
+
)
|
30
|
+
_check_arg_is_1byte("sep", sep, false)
|
31
|
+
_check_arg_is_1byte("comment_char", comment_char, false)
|
32
|
+
_check_arg_is_1byte("quote_char", quote_char, true)
|
33
|
+
_check_arg_is_1byte("eol_char", eol_char, false)
|
34
|
+
|
35
|
+
projection, columns = Utils.handle_projection_columns(columns)
|
36
|
+
|
37
|
+
storage_options ||= {}
|
38
|
+
|
39
|
+
if columns && !has_header
|
40
|
+
columns.each do |column|
|
41
|
+
if !column.start_with?("column_")
|
42
|
+
raise ArgumentError, "Specified column names do not start with \"column_\", but autogenerated header names were requested."
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
if projection || new_columns
|
48
|
+
raise Todo
|
49
|
+
end
|
50
|
+
|
51
|
+
df = nil
|
4
52
|
_prepare_file_arg(file) do |data|
|
5
|
-
DataFrame._read_csv(
|
53
|
+
df = DataFrame._read_csv(
|
54
|
+
data,
|
55
|
+
has_header: has_header,
|
56
|
+
columns: columns || projection,
|
57
|
+
sep: sep,
|
58
|
+
comment_char: comment_char,
|
59
|
+
quote_char: quote_char,
|
60
|
+
skip_rows: skip_rows,
|
61
|
+
dtypes: dtypes,
|
62
|
+
null_values: null_values,
|
63
|
+
ignore_errors: ignore_errors,
|
64
|
+
parse_dates: parse_dates,
|
65
|
+
n_threads: n_threads,
|
66
|
+
infer_schema_length: infer_schema_length,
|
67
|
+
batch_size: batch_size,
|
68
|
+
n_rows: n_rows,
|
69
|
+
encoding: encoding == "utf8-lossy" ? encoding : "utf8",
|
70
|
+
low_memory: low_memory,
|
71
|
+
rechunk: rechunk,
|
72
|
+
skip_rows_after_header: skip_rows_after_header,
|
73
|
+
row_count_name: row_count_name,
|
74
|
+
row_count_offset: row_count_offset,
|
75
|
+
sample_size: sample_size,
|
76
|
+
eol_char: eol_char
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
if new_columns
|
81
|
+
Utils._update_columns(df, new_columns)
|
82
|
+
else
|
83
|
+
df
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def scan_csv(
|
88
|
+
file,
|
89
|
+
has_header: true,
|
90
|
+
sep: ",",
|
91
|
+
comment_char: nil,
|
92
|
+
quote_char: '"',
|
93
|
+
skip_rows: 0,
|
94
|
+
dtypes: nil,
|
95
|
+
null_values: nil,
|
96
|
+
ignore_errors: false,
|
97
|
+
cache: true,
|
98
|
+
with_column_names: nil,
|
99
|
+
infer_schema_length: 100,
|
100
|
+
n_rows: nil,
|
101
|
+
encoding: "utf8",
|
102
|
+
low_memory: false,
|
103
|
+
rechunk: true,
|
104
|
+
skip_rows_after_header: 0,
|
105
|
+
row_count_name: nil,
|
106
|
+
row_count_offset: 0,
|
107
|
+
parse_dates: false,
|
108
|
+
eol_char: "\n"
|
109
|
+
)
|
110
|
+
_check_arg_is_1byte("sep", sep, false)
|
111
|
+
_check_arg_is_1byte("comment_char", comment_char, false)
|
112
|
+
_check_arg_is_1byte("quote_char", quote_char, true)
|
113
|
+
|
114
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
115
|
+
file = Utils.format_path(file)
|
116
|
+
end
|
117
|
+
|
118
|
+
LazyFrame._scan_csv(
|
119
|
+
file,
|
120
|
+
has_header: has_header,
|
121
|
+
sep: sep,
|
122
|
+
comment_char: comment_char,
|
123
|
+
quote_char: quote_char,
|
124
|
+
skip_rows: skip_rows,
|
125
|
+
dtypes: dtypes,
|
126
|
+
null_values: null_values,
|
127
|
+
ignore_errors: ignore_errors,
|
128
|
+
cache: cache,
|
129
|
+
with_column_names: with_column_names,
|
130
|
+
infer_schema_length: infer_schema_length,
|
131
|
+
n_rows: n_rows,
|
132
|
+
low_memory: low_memory,
|
133
|
+
rechunk: rechunk,
|
134
|
+
skip_rows_after_header: skip_rows_after_header,
|
135
|
+
encoding: encoding,
|
136
|
+
row_count_name: row_count_name,
|
137
|
+
row_count_offset: row_count_offset,
|
138
|
+
parse_dates: parse_dates,
|
139
|
+
eol_char: eol_char,
|
140
|
+
)
|
141
|
+
end
|
142
|
+
|
143
|
+
def scan_ipc(
|
144
|
+
file,
|
145
|
+
n_rows: nil,
|
146
|
+
cache: true,
|
147
|
+
rechunk: true,
|
148
|
+
row_count_name: nil,
|
149
|
+
row_count_offset: 0,
|
150
|
+
storage_options: nil,
|
151
|
+
memory_map: true
|
152
|
+
)
|
153
|
+
LazyFrame._scan_ipc(
|
154
|
+
file,
|
155
|
+
n_rows: n_rows,
|
156
|
+
cache: cache,
|
157
|
+
rechunk: rechunk,
|
158
|
+
row_count_name: row_count_name,
|
159
|
+
row_count_offset: row_count_offset,
|
160
|
+
storage_options: storage_options,
|
161
|
+
memory_map: memory_map
|
162
|
+
)
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_parquet(
|
166
|
+
file,
|
167
|
+
n_rows: nil,
|
168
|
+
cache: true,
|
169
|
+
parallel: "auto",
|
170
|
+
rechunk: true,
|
171
|
+
row_count_name: nil,
|
172
|
+
row_count_offset: 0,
|
173
|
+
storage_options: nil,
|
174
|
+
low_memory: false
|
175
|
+
)
|
176
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
177
|
+
file = Utils.format_path(file)
|
178
|
+
end
|
179
|
+
|
180
|
+
LazyFrame._scan_parquet(
|
181
|
+
file,
|
182
|
+
n_rows:n_rows,
|
183
|
+
cache: cache,
|
184
|
+
parallel: parallel,
|
185
|
+
rechunk: rechunk,
|
186
|
+
row_count_name: row_count_name,
|
187
|
+
row_count_offset: row_count_offset,
|
188
|
+
storage_options: storage_options,
|
189
|
+
low_memory: low_memory
|
190
|
+
)
|
191
|
+
end
|
192
|
+
|
193
|
+
def scan_ndjson(
|
194
|
+
file,
|
195
|
+
infer_schema_length: 100,
|
196
|
+
batch_size: 1024,
|
197
|
+
n_rows: nil,
|
198
|
+
low_memory: false,
|
199
|
+
rechunk: true,
|
200
|
+
row_count_name: nil,
|
201
|
+
row_count_offset: 0
|
202
|
+
)
|
203
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
204
|
+
file = Utils.format_path(file)
|
205
|
+
end
|
206
|
+
|
207
|
+
LazyFrame._scan_ndjson(
|
208
|
+
file,
|
209
|
+
infer_schema_length: infer_schema_length,
|
210
|
+
batch_size: batch_size,
|
211
|
+
n_rows: n_rows,
|
212
|
+
low_memory: low_memory,
|
213
|
+
rechunk: rechunk,
|
214
|
+
row_count_name: row_count_name,
|
215
|
+
row_count_offset: row_count_offset,
|
216
|
+
)
|
217
|
+
end
|
218
|
+
|
219
|
+
# def read_avro
|
220
|
+
# end
|
221
|
+
|
222
|
+
def read_ipc(
|
223
|
+
file,
|
224
|
+
columns: nil,
|
225
|
+
n_rows: nil,
|
226
|
+
memory_map: true,
|
227
|
+
storage_options: nil,
|
228
|
+
row_count_name: nil,
|
229
|
+
row_count_offset: 0,
|
230
|
+
rechunk: true
|
231
|
+
)
|
232
|
+
storage_options ||= {}
|
233
|
+
_prepare_file_arg(file, **storage_options) do |data|
|
234
|
+
DataFrame._read_ipc(
|
235
|
+
data,
|
236
|
+
columns: columns,
|
237
|
+
n_rows: n_rows,
|
238
|
+
row_count_name: row_count_name,
|
239
|
+
row_count_offset: row_count_offset,
|
240
|
+
rechunk: rechunk,
|
241
|
+
memory_map: memory_map
|
242
|
+
)
|
6
243
|
end
|
7
244
|
end
|
8
245
|
|
@@ -20,6 +257,96 @@ module Polars
|
|
20
257
|
DataFrame._read_ndjson(file)
|
21
258
|
end
|
22
259
|
|
260
|
+
# def read_sql
|
261
|
+
# end
|
262
|
+
|
263
|
+
# def read_excel
|
264
|
+
# end
|
265
|
+
|
266
|
+
def read_csv_batched(
|
267
|
+
file,
|
268
|
+
has_header: true,
|
269
|
+
columns: nil,
|
270
|
+
new_columns: nil,
|
271
|
+
sep: ",",
|
272
|
+
comment_char: nil,
|
273
|
+
quote_char: '"',
|
274
|
+
skip_rows: 0,
|
275
|
+
dtypes: nil,
|
276
|
+
null_values: nil,
|
277
|
+
ignore_errors: false,
|
278
|
+
parse_dates: false,
|
279
|
+
n_threads: nil,
|
280
|
+
infer_schema_length: 100,
|
281
|
+
batch_size: 50_000,
|
282
|
+
n_rows: nil,
|
283
|
+
encoding: "utf8",
|
284
|
+
low_memory: false,
|
285
|
+
rechunk: true,
|
286
|
+
skip_rows_after_header: 0,
|
287
|
+
row_count_name: nil,
|
288
|
+
row_count_offset: 0,
|
289
|
+
sample_size: 1024,
|
290
|
+
eol_char: "\n"
|
291
|
+
)
|
292
|
+
projection, columns = Utils.handle_projection_columns(columns)
|
293
|
+
|
294
|
+
if columns && !has_header
|
295
|
+
columns.each do |column|
|
296
|
+
if !column.start_with?("column_")
|
297
|
+
raise ArgumentError, "Specified column names do not start with \"column_\", but autogenerated header names were requested."
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
if projection || new_columns
|
303
|
+
raise Todo
|
304
|
+
end
|
305
|
+
|
306
|
+
BatchedCsvReader.new(
|
307
|
+
file,
|
308
|
+
has_header: has_header,
|
309
|
+
columns: columns || projection,
|
310
|
+
sep: sep,
|
311
|
+
comment_char: comment_char,
|
312
|
+
quote_char: quote_char,
|
313
|
+
skip_rows: skip_rows,
|
314
|
+
dtypes: dtypes,
|
315
|
+
null_values: null_values,
|
316
|
+
ignore_errors: ignore_errors,
|
317
|
+
parse_dates: parse_dates,
|
318
|
+
n_threads: n_threads,
|
319
|
+
infer_schema_length: infer_schema_length,
|
320
|
+
batch_size: batch_size,
|
321
|
+
n_rows: n_rows,
|
322
|
+
encoding: encoding == "utf8-lossy" ? encoding : "utf8",
|
323
|
+
low_memory: low_memory,
|
324
|
+
rechunk: rechunk,
|
325
|
+
skip_rows_after_header: skip_rows_after_header,
|
326
|
+
row_count_name: row_count_name,
|
327
|
+
row_count_offset: row_count_offset,
|
328
|
+
sample_size: sample_size,
|
329
|
+
eol_char: eol_char,
|
330
|
+
new_columns: new_columns
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
334
|
+
def read_ipc_schema(file)
|
335
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
336
|
+
file = Utils.format_path(file)
|
337
|
+
end
|
338
|
+
|
339
|
+
_ipc_schema(file)
|
340
|
+
end
|
341
|
+
|
342
|
+
def read_parquet_schema(file)
|
343
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
344
|
+
file = Utils.format_path(file)
|
345
|
+
end
|
346
|
+
|
347
|
+
_parquet_schema(file)
|
348
|
+
end
|
349
|
+
|
23
350
|
private
|
24
351
|
|
25
352
|
def _prepare_file_arg(file)
|
@@ -35,5 +362,18 @@ module Polars
|
|
35
362
|
|
36
363
|
yield file
|
37
364
|
end
|
365
|
+
|
366
|
+
def _check_arg_is_1byte(arg_name, arg, can_be_empty = false)
|
367
|
+
if arg.is_a?(String)
|
368
|
+
arg_byte_length = arg.bytesize
|
369
|
+
if can_be_empty
|
370
|
+
if arg_byte_length > 1
|
371
|
+
raise ArgumentError, "#{arg_name} should be a single byte character or empty, but is #{arg_byte_length} bytes long."
|
372
|
+
end
|
373
|
+
elsif arg_byte_length != 1
|
374
|
+
raise ArgumentError, "#{arg_name} should be a single byte character, but is #{arg_byte_length} bytes long."
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
38
378
|
end
|
39
379
|
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -1,13 +1,157 @@
|
|
1
1
|
module Polars
|
2
|
+
# Representation of a Lazy computation graph/query againat a DataFrame.
|
2
3
|
class LazyFrame
|
4
|
+
# @private
|
3
5
|
attr_accessor :_ldf
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def self._from_rbldf(rb_ldf)
|
6
9
|
ldf = LazyFrame.allocate
|
7
10
|
ldf._ldf = rb_ldf
|
8
11
|
ldf
|
9
12
|
end
|
10
13
|
|
14
|
+
# @private
|
15
|
+
def self._scan_csv(
|
16
|
+
file,
|
17
|
+
has_header: true,
|
18
|
+
sep: ",",
|
19
|
+
comment_char: nil,
|
20
|
+
quote_char: '"',
|
21
|
+
skip_rows: 0,
|
22
|
+
dtypes: nil,
|
23
|
+
null_values: nil,
|
24
|
+
ignore_errors: false,
|
25
|
+
cache: true,
|
26
|
+
with_column_names: nil,
|
27
|
+
infer_schema_length: 100,
|
28
|
+
n_rows: nil,
|
29
|
+
encoding: "utf8",
|
30
|
+
low_memory: false,
|
31
|
+
rechunk: true,
|
32
|
+
skip_rows_after_header: 0,
|
33
|
+
row_count_name: nil,
|
34
|
+
row_count_offset: 0,
|
35
|
+
parse_dates: false,
|
36
|
+
eol_char: "\n"
|
37
|
+
)
|
38
|
+
dtype_list = nil
|
39
|
+
if !dtypes.nil?
|
40
|
+
dtype_list = []
|
41
|
+
dtypes.each do |k, v|
|
42
|
+
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
processed_null_values = Utils._process_null_values(null_values)
|
46
|
+
|
47
|
+
_from_rbldf(
|
48
|
+
RbLazyFrame.new_from_csv(
|
49
|
+
file,
|
50
|
+
sep,
|
51
|
+
has_header,
|
52
|
+
ignore_errors,
|
53
|
+
skip_rows,
|
54
|
+
n_rows,
|
55
|
+
cache,
|
56
|
+
dtype_list,
|
57
|
+
low_memory,
|
58
|
+
comment_char,
|
59
|
+
quote_char,
|
60
|
+
processed_null_values,
|
61
|
+
infer_schema_length,
|
62
|
+
with_column_names,
|
63
|
+
rechunk,
|
64
|
+
skip_rows_after_header,
|
65
|
+
encoding,
|
66
|
+
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
67
|
+
parse_dates,
|
68
|
+
eol_char
|
69
|
+
)
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
# @private
|
74
|
+
def self._scan_parquet(
|
75
|
+
file,
|
76
|
+
n_rows: nil,
|
77
|
+
cache: true,
|
78
|
+
parallel: "auto",
|
79
|
+
rechunk: true,
|
80
|
+
row_count_name: nil,
|
81
|
+
row_count_offset: 0,
|
82
|
+
storage_options: nil,
|
83
|
+
low_memory: false
|
84
|
+
)
|
85
|
+
_from_rbldf(
|
86
|
+
RbLazyFrame.new_from_parquet(
|
87
|
+
file,
|
88
|
+
n_rows,
|
89
|
+
cache,
|
90
|
+
parallel,
|
91
|
+
rechunk,
|
92
|
+
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
93
|
+
low_memory
|
94
|
+
)
|
95
|
+
)
|
96
|
+
end
|
97
|
+
|
98
|
+
# @private
|
99
|
+
def self._scan_ipc(
|
100
|
+
file,
|
101
|
+
n_rows: nil,
|
102
|
+
cache: true,
|
103
|
+
rechunk: true,
|
104
|
+
row_count_name: nil,
|
105
|
+
row_count_offset: 0,
|
106
|
+
storage_options: nil,
|
107
|
+
memory_map: true
|
108
|
+
)
|
109
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
110
|
+
file = Utils.format_path(file)
|
111
|
+
end
|
112
|
+
|
113
|
+
_from_rbldf(
|
114
|
+
RbLazyFrame.new_from_ipc(
|
115
|
+
file,
|
116
|
+
n_rows,
|
117
|
+
cache,
|
118
|
+
rechunk,
|
119
|
+
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
120
|
+
memory_map
|
121
|
+
)
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
# @private
|
126
|
+
def self._scan_ndjson(
|
127
|
+
file,
|
128
|
+
infer_schema_length: nil,
|
129
|
+
batch_size: nil,
|
130
|
+
n_rows: nil,
|
131
|
+
low_memory: false,
|
132
|
+
rechunk: true,
|
133
|
+
row_count_name: nil,
|
134
|
+
row_count_offset: 0
|
135
|
+
)
|
136
|
+
_from_rbldf(
|
137
|
+
RbLazyFrame.new_from_ndjson(
|
138
|
+
file,
|
139
|
+
infer_schema_length,
|
140
|
+
batch_size,
|
141
|
+
n_rows,
|
142
|
+
low_memory,
|
143
|
+
rechunk,
|
144
|
+
Utils._prepare_row_count_args(row_count_name, row_count_offset)
|
145
|
+
)
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
# def self.from_json
|
150
|
+
# end
|
151
|
+
|
152
|
+
# def self.read_json
|
153
|
+
# end
|
154
|
+
|
11
155
|
# def columns
|
12
156
|
# end
|
13
157
|
|
@@ -53,6 +197,7 @@ module Polars
|
|
53
197
|
# def profile
|
54
198
|
# end
|
55
199
|
|
200
|
+
#
|
56
201
|
def collect(
|
57
202
|
type_coercion: true,
|
58
203
|
predicate_pushdown: true,
|
@@ -90,6 +235,7 @@ module Polars
|
|
90
235
|
# def fetch
|
91
236
|
# end
|
92
237
|
|
238
|
+
#
|
93
239
|
def lazy
|
94
240
|
self
|
95
241
|
end
|
@@ -100,6 +246,7 @@ module Polars
|
|
100
246
|
# def cleared
|
101
247
|
# end
|
102
248
|
|
249
|
+
#
|
103
250
|
def filter(predicate)
|
104
251
|
_from_rbldf(
|
105
252
|
_ldf.filter(
|
@@ -128,6 +275,7 @@ module Polars
|
|
128
275
|
# def join_asof
|
129
276
|
# end
|
130
277
|
|
278
|
+
#
|
131
279
|
def join(
|
132
280
|
other,
|
133
281
|
left_on: nil,
|
@@ -202,6 +350,7 @@ module Polars
|
|
202
350
|
# def with_context
|
203
351
|
# end
|
204
352
|
|
353
|
+
#
|
205
354
|
def with_column(column)
|
206
355
|
with_columns([column])
|
207
356
|
end
|
@@ -209,6 +358,7 @@ module Polars
|
|
209
358
|
# def drop
|
210
359
|
# end
|
211
360
|
|
361
|
+
#
|
212
362
|
def rename(mapping)
|
213
363
|
existing = mapping.keys
|
214
364
|
_new = mapping.values
|
@@ -251,6 +401,7 @@ module Polars
|
|
251
401
|
# def fill_null
|
252
402
|
# end
|
253
403
|
|
404
|
+
#
|
254
405
|
def fill_nan(fill_value)
|
255
406
|
if !fill_value.is_a?(Expr)
|
256
407
|
fill_value = Utils.lit(fill_value)
|
@@ -282,8 +433,11 @@ module Polars
|
|
282
433
|
# def quantile
|
283
434
|
# end
|
284
435
|
|
285
|
-
#
|
286
|
-
|
436
|
+
#
|
437
|
+
def explode(columns)
|
438
|
+
columns = Utils.selection_to_rbexpr_list(columns)
|
439
|
+
_from_rbldf(_ldf.explode(columns))
|
440
|
+
end
|
287
441
|
|
288
442
|
# def unique
|
289
443
|
# end
|