polars-df 0.13.0-aarch64-linux-musl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39059 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,423 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.arr namespace.
|
3
|
+
class ArrayNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "arr"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Compute the min values of the sub-arrays.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# s = Polars::Series.new(
|
19
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
20
|
+
# )
|
21
|
+
# s.arr.min
|
22
|
+
# # =>
|
23
|
+
# # shape: (2,)
|
24
|
+
# # Series: 'a' [i64]
|
25
|
+
# # [
|
26
|
+
# # 1
|
27
|
+
# # 3
|
28
|
+
# # ]
|
29
|
+
def min
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Compute the max values of the sub-arrays.
|
34
|
+
#
|
35
|
+
# @return [Series]
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# s = Polars::Series.new(
|
39
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
40
|
+
# )
|
41
|
+
# s.arr.max
|
42
|
+
# # =>
|
43
|
+
# # shape: (2,)
|
44
|
+
# # Series: 'a' [i64]
|
45
|
+
# # [
|
46
|
+
# # 2
|
47
|
+
# # 4
|
48
|
+
# # ]
|
49
|
+
def max
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
# Compute the sum values of the sub-arrays.
|
54
|
+
#
|
55
|
+
# @return [Series]
|
56
|
+
#
|
57
|
+
# @example
|
58
|
+
# df = Polars::DataFrame.new(
|
59
|
+
# {"a" => [[1, 2], [4, 3]]},
|
60
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
61
|
+
# )
|
62
|
+
# df.select(Polars.col("a").arr.sum)
|
63
|
+
# # =>
|
64
|
+
# # shape: (2, 1)
|
65
|
+
# # ┌─────┐
|
66
|
+
# # │ a │
|
67
|
+
# # │ --- │
|
68
|
+
# # │ i64 │
|
69
|
+
# # ╞═════╡
|
70
|
+
# # │ 3 │
|
71
|
+
# # │ 7 │
|
72
|
+
# # └─────┘
|
73
|
+
def sum
|
74
|
+
super
|
75
|
+
end
|
76
|
+
|
77
|
+
# Get the unique/distinct values in the array.
|
78
|
+
#
|
79
|
+
# @param maintain_order [Boolean]
|
80
|
+
# Maintain order of data. This requires more work.
|
81
|
+
#
|
82
|
+
# @return [Series]
|
83
|
+
#
|
84
|
+
# @example
|
85
|
+
# df = Polars::DataFrame.new(
|
86
|
+
# {
|
87
|
+
# "a" => [[1, 1, 2]]
|
88
|
+
# },
|
89
|
+
# schema_overrides: {"a" => Polars::Array.new(Polars::Int64, 3)}
|
90
|
+
# )
|
91
|
+
# df.select(Polars.col("a").arr.unique)
|
92
|
+
# # =>
|
93
|
+
# # shape: (1, 1)
|
94
|
+
# # ┌───────────┐
|
95
|
+
# # │ a │
|
96
|
+
# # │ --- │
|
97
|
+
# # │ list[i64] │
|
98
|
+
# # ╞═══════════╡
|
99
|
+
# # │ [1, 2] │
|
100
|
+
# # └───────────┘
|
101
|
+
def unique(maintain_order: false)
|
102
|
+
super
|
103
|
+
end
|
104
|
+
|
105
|
+
# Convert an Array column into a List column with the same inner data type.
|
106
|
+
#
|
107
|
+
# @return [Series]
|
108
|
+
#
|
109
|
+
# @example
|
110
|
+
# s = Polars::Series.new([[1, 2], [3, 4]], dtype: Polars::Array.new(Polars::Int8, 2))
|
111
|
+
# s.arr.to_list
|
112
|
+
# # =>
|
113
|
+
# # shape: (2,)
|
114
|
+
# # Series: '' [list[i8]]
|
115
|
+
# # [
|
116
|
+
# # [1, 2]
|
117
|
+
# # [3, 4]
|
118
|
+
# # ]
|
119
|
+
def to_list
|
120
|
+
super
|
121
|
+
end
|
122
|
+
|
123
|
+
# Evaluate whether any boolean value is true for every subarray.
|
124
|
+
#
|
125
|
+
# @return [Series]
|
126
|
+
#
|
127
|
+
# @example
|
128
|
+
# s = Polars::Series.new(
|
129
|
+
# [[true, true], [false, true], [false, false], [nil, nil], nil],
|
130
|
+
# dtype: Polars::Array.new(Polars::Boolean, 2)
|
131
|
+
# )
|
132
|
+
# s.arr.any
|
133
|
+
# # =>
|
134
|
+
# # shape: (5,)
|
135
|
+
# # Series: '' [bool]
|
136
|
+
# # [
|
137
|
+
# # true
|
138
|
+
# # true
|
139
|
+
# # false
|
140
|
+
# # false
|
141
|
+
# # null
|
142
|
+
# # ]
|
143
|
+
def any
|
144
|
+
super
|
145
|
+
end
|
146
|
+
|
147
|
+
# Evaluate whether all boolean values are true for every subarray.
|
148
|
+
#
|
149
|
+
# @return [Series]
|
150
|
+
#
|
151
|
+
# @example
|
152
|
+
# s = Polars::Series.new(
|
153
|
+
# [[true, true], [false, true], [false, false], [nil, nil], nil],
|
154
|
+
# dtype: Polars::Array.new(Polars::Boolean, 2)
|
155
|
+
# )
|
156
|
+
# s.arr.all
|
157
|
+
# # =>
|
158
|
+
# # shape: (5,)
|
159
|
+
# # Series: '' [bool]
|
160
|
+
# # [
|
161
|
+
# # true
|
162
|
+
# # false
|
163
|
+
# # false
|
164
|
+
# # true
|
165
|
+
# # null
|
166
|
+
# # ]
|
167
|
+
def all
|
168
|
+
super
|
169
|
+
end
|
170
|
+
|
171
|
+
# Sort the arrays in this column.
|
172
|
+
#
|
173
|
+
# @param descending [Boolean]
|
174
|
+
# Sort in descending order.
|
175
|
+
# @param nulls_last [Boolean]
|
176
|
+
# Place null values last.
|
177
|
+
#
|
178
|
+
# @return [Series]
|
179
|
+
#
|
180
|
+
# @example
|
181
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
182
|
+
# s.arr.sort
|
183
|
+
# # =>
|
184
|
+
# # shape: (2,)
|
185
|
+
# # Series: 'a' [array[i64, 3]]
|
186
|
+
# # [
|
187
|
+
# # [1, 2, 3]
|
188
|
+
# # [1, 2, 9]
|
189
|
+
# # ]
|
190
|
+
#
|
191
|
+
# @example
|
192
|
+
# s.arr.sort(descending: true)
|
193
|
+
# # =>
|
194
|
+
# # shape: (2,)
|
195
|
+
# # Series: 'a' [array[i64, 3]]
|
196
|
+
# # [
|
197
|
+
# # [3, 2, 1]
|
198
|
+
# # [9, 2, 1]
|
199
|
+
# # ]
|
200
|
+
def sort(descending: false, nulls_last: false)
|
201
|
+
super
|
202
|
+
end
|
203
|
+
|
204
|
+
# Reverse the arrays in this column.
|
205
|
+
#
|
206
|
+
# @return [Series]
|
207
|
+
#
|
208
|
+
# @example
|
209
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
210
|
+
# s.arr.reverse
|
211
|
+
# # =>
|
212
|
+
# # shape: (2,)
|
213
|
+
# # Series: 'a' [array[i64, 3]]
|
214
|
+
# # [
|
215
|
+
# # [1, 2, 3]
|
216
|
+
# # [2, 1, 9]
|
217
|
+
# # ]
|
218
|
+
def reverse
|
219
|
+
super
|
220
|
+
end
|
221
|
+
|
222
|
+
# Retrieve the index of the minimal value in every sub-array.
|
223
|
+
#
|
224
|
+
# @return [Series]
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# s = Polars::Series.new("a", [[3, 2, 1], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
228
|
+
# s.arr.arg_min
|
229
|
+
# # =>
|
230
|
+
# # shape: (2,)
|
231
|
+
# # Series: 'a' [u32]
|
232
|
+
# # [
|
233
|
+
# # 2
|
234
|
+
# # 1
|
235
|
+
# # ]
|
236
|
+
def arg_min
|
237
|
+
super
|
238
|
+
end
|
239
|
+
|
240
|
+
# Retrieve the index of the maximum value in every sub-array.
|
241
|
+
#
|
242
|
+
# @return [Series]
|
243
|
+
#
|
244
|
+
# @example
|
245
|
+
# s = Polars::Series.new("a", [[0, 9, 3], [9, 1, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
246
|
+
# s.arr.arg_max
|
247
|
+
# # =>
|
248
|
+
# # shape: (2,)
|
249
|
+
# # Series: 'a' [u32]
|
250
|
+
# # [
|
251
|
+
# # 1
|
252
|
+
# # 0
|
253
|
+
# # ]
|
254
|
+
def arg_max
|
255
|
+
super
|
256
|
+
end
|
257
|
+
|
258
|
+
# Get the value by index in the sub-arrays.
|
259
|
+
#
|
260
|
+
# So index `0` would return the first item of every sublist
|
261
|
+
# and index `-1` would return the last item of every sublist
|
262
|
+
# if an index is out of bounds, it will return a `nil`.
|
263
|
+
#
|
264
|
+
# @param index [Integer]
|
265
|
+
# Index to return per sublist
|
266
|
+
#
|
267
|
+
# @return [Series]
|
268
|
+
#
|
269
|
+
# @example
|
270
|
+
# s = Polars::Series.new(
|
271
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
272
|
+
# )
|
273
|
+
# s.arr.get(Polars::Series.new([1, -2, 4]))
|
274
|
+
# # =>
|
275
|
+
# # shape: (3,)
|
276
|
+
# # Series: 'a' [i32]
|
277
|
+
# # [
|
278
|
+
# # 2
|
279
|
+
# # 5
|
280
|
+
# # null
|
281
|
+
# # ]
|
282
|
+
def get(index)
|
283
|
+
super
|
284
|
+
end
|
285
|
+
|
286
|
+
# Get the first value of the sub-arrays.
|
287
|
+
#
|
288
|
+
# @return [Series]
|
289
|
+
#
|
290
|
+
# @example
|
291
|
+
# s = Polars::Series.new(
|
292
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
293
|
+
# )
|
294
|
+
# s.arr.first
|
295
|
+
# # =>
|
296
|
+
# # shape: (3,)
|
297
|
+
# # Series: 'a' [i32]
|
298
|
+
# # [
|
299
|
+
# # 1
|
300
|
+
# # 4
|
301
|
+
# # 7
|
302
|
+
# # ]
|
303
|
+
def first
|
304
|
+
super
|
305
|
+
end
|
306
|
+
|
307
|
+
# Get the last value of the sub-arrays.
|
308
|
+
#
|
309
|
+
# @return [Series]
|
310
|
+
#
|
311
|
+
# @example
|
312
|
+
# s = Polars::Series.new(
|
313
|
+
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
314
|
+
# )
|
315
|
+
# s.arr.last
|
316
|
+
# # =>
|
317
|
+
# # shape: (3,)
|
318
|
+
# # Series: 'a' [i32]
|
319
|
+
# # [
|
320
|
+
# # 3
|
321
|
+
# # 6
|
322
|
+
# # 9
|
323
|
+
# # ]
|
324
|
+
def last
|
325
|
+
super
|
326
|
+
end
|
327
|
+
|
328
|
+
# Join all string items in a sub-array and place a separator between them.
|
329
|
+
#
|
330
|
+
# This errors if inner type of array `!= String`.
|
331
|
+
#
|
332
|
+
# @param separator [String]
|
333
|
+
# string to separate the items with
|
334
|
+
# @param ignore_nulls [Boolean]
|
335
|
+
# Ignore null values (default).
|
336
|
+
#
|
337
|
+
# If set to `False`, null values will be propagated.
|
338
|
+
# If the sub-list contains any null values, the output is `nil`.
|
339
|
+
#
|
340
|
+
# @return [Series]
|
341
|
+
#
|
342
|
+
# @example
|
343
|
+
# s = Polars::Series.new([["x", "y"], ["a", "b"]], dtype: Polars::Array.new(Polars::String, 2))
|
344
|
+
# s.arr.join("-")
|
345
|
+
# # =>
|
346
|
+
# # shape: (2,)
|
347
|
+
# # Series: '' [str]
|
348
|
+
# # [
|
349
|
+
# # "x-y"
|
350
|
+
# # "a-b"
|
351
|
+
# # ]
|
352
|
+
def join(separator, ignore_nulls: true)
|
353
|
+
super
|
354
|
+
end
|
355
|
+
|
356
|
+
# Returns a column with a separate row for every array element.
|
357
|
+
#
|
358
|
+
# @return [Series]
|
359
|
+
#
|
360
|
+
# @example
|
361
|
+
# s = Polars::Series.new("a", [[1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int64, 3))
|
362
|
+
# s.arr.explode
|
363
|
+
# # =>
|
364
|
+
# # shape: (6,)
|
365
|
+
# # Series: 'a' [i64]
|
366
|
+
# # [
|
367
|
+
# # 1
|
368
|
+
# # 2
|
369
|
+
# # 3
|
370
|
+
# # 4
|
371
|
+
# # 5
|
372
|
+
# # 6
|
373
|
+
# # ]
|
374
|
+
def explode
|
375
|
+
super
|
376
|
+
end
|
377
|
+
|
378
|
+
# Check if sub-arrays contain the given item.
|
379
|
+
#
|
380
|
+
# @param item [Object]
|
381
|
+
# Item that will be checked for membership
|
382
|
+
#
|
383
|
+
# @return [Series]
|
384
|
+
#
|
385
|
+
# @example
|
386
|
+
# s = Polars::Series.new(
|
387
|
+
# "a", [[3, 2, 1], [1, 2, 3], [4, 5, 6]], dtype: Polars::Array.new(Polars::Int32, 3)
|
388
|
+
# )
|
389
|
+
# s.arr.contains(1)
|
390
|
+
# # =>
|
391
|
+
# # shape: (3,)
|
392
|
+
# # Series: 'a' [bool]
|
393
|
+
# # [
|
394
|
+
# # true
|
395
|
+
# # true
|
396
|
+
# # false
|
397
|
+
# # ]
|
398
|
+
def contains(item)
|
399
|
+
super
|
400
|
+
end
|
401
|
+
|
402
|
+
# Count how often the value produced by `element` occurs.
|
403
|
+
#
|
404
|
+
# @param element [Object]
|
405
|
+
# An expression that produces a single value
|
406
|
+
#
|
407
|
+
# @return [Series]
|
408
|
+
#
|
409
|
+
# @example
|
410
|
+
# s = Polars::Series.new("a", [[1, 2, 3], [2, 2, 2]], dtype: Polars::Array.new(Polars::Int64, 3))
|
411
|
+
# s.arr.count_matches(2)
|
412
|
+
# # =>
|
413
|
+
# # shape: (2,)
|
414
|
+
# # Series: 'a' [u32]
|
415
|
+
# # [
|
416
|
+
# # 1
|
417
|
+
# # 3
|
418
|
+
# # ]
|
419
|
+
def count_matches(element)
|
420
|
+
super
|
421
|
+
end
|
422
|
+
end
|
423
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module Polars
|
2
|
+
# @private
|
3
|
+
class BatchedCsvReader
|
4
|
+
attr_accessor :_reader, :new_columns
|
5
|
+
|
6
|
+
def initialize(
|
7
|
+
file,
|
8
|
+
has_header: true,
|
9
|
+
columns: nil,
|
10
|
+
sep: ",",
|
11
|
+
comment_char: nil,
|
12
|
+
quote_char: '"',
|
13
|
+
skip_rows: 0,
|
14
|
+
dtypes: nil,
|
15
|
+
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
17
|
+
ignore_errors: false,
|
18
|
+
parse_dates: false,
|
19
|
+
n_threads: nil,
|
20
|
+
infer_schema_length: 100,
|
21
|
+
batch_size: 50_000,
|
22
|
+
n_rows: nil,
|
23
|
+
encoding: "utf8",
|
24
|
+
low_memory: false,
|
25
|
+
rechunk: true,
|
26
|
+
skip_rows_after_header: 0,
|
27
|
+
row_count_name: nil,
|
28
|
+
row_count_offset: 0,
|
29
|
+
sample_size: 1024,
|
30
|
+
eol_char: "\n",
|
31
|
+
new_columns: nil,
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
35
|
+
)
|
36
|
+
if Utils.pathlike?(file)
|
37
|
+
path = Utils.normalize_filepath(file)
|
38
|
+
end
|
39
|
+
|
40
|
+
dtype_list = nil
|
41
|
+
dtype_slice = nil
|
42
|
+
if !dtypes.nil?
|
43
|
+
if dtypes.is_a?(Hash)
|
44
|
+
dtype_list = []
|
45
|
+
dtypes.each do |k, v|
|
46
|
+
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
47
|
+
end
|
48
|
+
elsif dtypes.is_a?(::Array)
|
49
|
+
dtype_slice = dtypes
|
50
|
+
else
|
51
|
+
raise ArgumentError, "dtype arg should be list or dict"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
processed_null_values = Utils._process_null_values(null_values)
|
56
|
+
projection, columns = Utils.handle_projection_columns(columns)
|
57
|
+
|
58
|
+
self._reader = RbBatchedCsv.new(
|
59
|
+
infer_schema_length,
|
60
|
+
batch_size,
|
61
|
+
has_header,
|
62
|
+
ignore_errors,
|
63
|
+
n_rows,
|
64
|
+
skip_rows,
|
65
|
+
projection,
|
66
|
+
sep,
|
67
|
+
rechunk,
|
68
|
+
columns,
|
69
|
+
encoding,
|
70
|
+
n_threads,
|
71
|
+
path,
|
72
|
+
dtype_list,
|
73
|
+
dtype_slice,
|
74
|
+
low_memory,
|
75
|
+
comment_char,
|
76
|
+
quote_char,
|
77
|
+
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
79
|
+
parse_dates,
|
80
|
+
skip_rows_after_header,
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
82
|
+
sample_size,
|
83
|
+
eol_char,
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
87
|
+
)
|
88
|
+
self.new_columns = new_columns
|
89
|
+
end
|
90
|
+
|
91
|
+
def next_batches(n)
|
92
|
+
batches = _reader.next_batches(n)
|
93
|
+
if !batches.nil?
|
94
|
+
if new_columns
|
95
|
+
batches.map { |df| Utils._update_columns(Utils.wrap_df(df), new_columns) }
|
96
|
+
else
|
97
|
+
batches.map { |df| Utils.wrap_df(df) }
|
98
|
+
end
|
99
|
+
else
|
100
|
+
nil
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for binary related expressions.
|
3
|
+
class BinaryExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Check if binaries in Series contain a binary substring.
|
13
|
+
#
|
14
|
+
# @param lit [String]
|
15
|
+
# The binary substring to look for
|
16
|
+
#
|
17
|
+
# @return [Expr]
|
18
|
+
def contains(lit)
|
19
|
+
Utils.wrap_expr(_rbexpr.binary_contains(lit))
|
20
|
+
end
|
21
|
+
|
22
|
+
# Check if string values end with a binary substring.
|
23
|
+
#
|
24
|
+
# @param sub [String]
|
25
|
+
# Suffix substring.
|
26
|
+
#
|
27
|
+
# @return [Expr]
|
28
|
+
def ends_with(sub)
|
29
|
+
Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Check if values start with a binary substring.
|
33
|
+
#
|
34
|
+
# @param sub [String]
|
35
|
+
# Prefix substring.
|
36
|
+
#
|
37
|
+
# @return [Expr]
|
38
|
+
def starts_with(sub)
|
39
|
+
Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decode a value using the provided encoding.
|
43
|
+
#
|
44
|
+
# @param encoding ["hex", "base64"]
|
45
|
+
# The encoding to use.
|
46
|
+
# @param strict [Boolean]
|
47
|
+
# Raise an error if the underlying value cannot be decoded,
|
48
|
+
# otherwise mask out with a null value.
|
49
|
+
#
|
50
|
+
# @return [Expr]
|
51
|
+
def decode(encoding, strict: true)
|
52
|
+
if encoding == "hex"
|
53
|
+
Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
|
54
|
+
elsif encoding == "base64"
|
55
|
+
Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
|
56
|
+
else
|
57
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Encode a value using the provided encoding.
|
62
|
+
#
|
63
|
+
# @param encoding ["hex", "base64"]
|
64
|
+
# The encoding to use.
|
65
|
+
#
|
66
|
+
# @return [Expr]
|
67
|
+
def encode(encoding)
|
68
|
+
if encoding == "hex"
|
69
|
+
Utils.wrap_expr(_rbexpr.binary_hex_encode)
|
70
|
+
elsif encoding == "base64"
|
71
|
+
Utils.wrap_expr(_rbexpr.binary_base64_encode)
|
72
|
+
else
|
73
|
+
raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.bin namespace.
|
3
|
+
class BinaryNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "bin"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Check if binaries in Series contain a binary substring.
|
14
|
+
#
|
15
|
+
# @param lit [String]
|
16
|
+
# The binary substring to look for
|
17
|
+
#
|
18
|
+
# @return [Series]
|
19
|
+
def contains(lit)
|
20
|
+
super
|
21
|
+
end
|
22
|
+
|
23
|
+
# Check if string values end with a binary substring.
|
24
|
+
#
|
25
|
+
# @param sub [String]
|
26
|
+
# Suffix substring.
|
27
|
+
#
|
28
|
+
# @return [Series]
|
29
|
+
def ends_with(sub)
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if values start with a binary substring.
|
34
|
+
#
|
35
|
+
# @param sub [String]
|
36
|
+
# Prefix substring.
|
37
|
+
#
|
38
|
+
# @return [Series]
|
39
|
+
def starts_with(sub)
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
# Decode a value using the provided encoding.
|
44
|
+
#
|
45
|
+
# @param encoding ["hex", "base64"]
|
46
|
+
# The encoding to use.
|
47
|
+
# @param strict [Boolean]
|
48
|
+
# Raise an error if the underlying value cannot be decoded,
|
49
|
+
# otherwise mask out with a null value.
|
50
|
+
#
|
51
|
+
# @return [Series]
|
52
|
+
def decode(encoding, strict: true)
|
53
|
+
super
|
54
|
+
end
|
55
|
+
|
56
|
+
# Encode a value using the provided encoding.
|
57
|
+
#
|
58
|
+
# @param encoding ["hex", "base64"]
|
59
|
+
# The encoding to use.
|
60
|
+
#
|
61
|
+
# @return [Series]
|
62
|
+
def encode(encoding)
|
63
|
+
super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for categorical related expressions.
|
3
|
+
class CatExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Get the categories stored in this data type.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::Series.new(
|
18
|
+
# "cats", ["foo", "bar", "foo", "foo", "ham"], dtype: Polars::Categorical
|
19
|
+
# ).to_frame
|
20
|
+
# df.select(Polars.col("cats").cat.get_categories)
|
21
|
+
# # =>
|
22
|
+
# # shape: (3, 1)
|
23
|
+
# # ┌──────┐
|
24
|
+
# # │ cats │
|
25
|
+
# # │ --- │
|
26
|
+
# # │ str │
|
27
|
+
# # ╞══════╡
|
28
|
+
# # │ foo │
|
29
|
+
# # │ bar │
|
30
|
+
# # │ ham │
|
31
|
+
# # └──────┘
|
32
|
+
def get_categories
|
33
|
+
Utils.wrap_expr(_rbexpr.cat_get_categories)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|