polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
@@ -0,0 +1,466 @@
|
|
1
|
+
module Polars
|
2
|
+
# Base class for all Polars data types.
|
3
|
+
class DataType
|
4
|
+
# Return this DataType's fundamental/root type class.
|
5
|
+
#
|
6
|
+
# @return [Class]
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# Polars::Datetime.new("ns").base_type
|
10
|
+
# # => Polars::Datetime
|
11
|
+
# @example
|
12
|
+
# Polars::List.new(Polars::Int32).base_type
|
13
|
+
# # => Polars::List
|
14
|
+
# @example
|
15
|
+
# Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
|
16
|
+
# # => Polars::Struct
|
17
|
+
def self.base_type
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
# Return this DataType's fundamental/root type class.
|
22
|
+
#
|
23
|
+
# @return [Class]
|
24
|
+
def base_type
|
25
|
+
is_a?(DataType) ? self.class : self
|
26
|
+
end
|
27
|
+
|
28
|
+
# Check if this DataType is the same as another DataType.
|
29
|
+
#
|
30
|
+
# @return [Boolean]
|
31
|
+
def self.==(other)
|
32
|
+
eql?(other) || other.is_a?(self)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Check if this DataType is the same as another DataType.
|
36
|
+
#
|
37
|
+
# @return [Boolean]
|
38
|
+
def ==(other)
|
39
|
+
if other.is_a?(Class)
|
40
|
+
is_a?(other)
|
41
|
+
else
|
42
|
+
other.instance_of?(self.class)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Check whether the data type is a numeric type.
|
47
|
+
#
|
48
|
+
# @return [Boolean]
|
49
|
+
def self.numeric?
|
50
|
+
self < NumericType
|
51
|
+
end
|
52
|
+
|
53
|
+
# Check whether the data type is a decimal type.
|
54
|
+
#
|
55
|
+
# @return [Boolean]
|
56
|
+
def self.decimal?
|
57
|
+
self == Decimal
|
58
|
+
end
|
59
|
+
|
60
|
+
# Check whether the data type is an integer type.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
def self.integer?
|
64
|
+
self < IntegerType
|
65
|
+
end
|
66
|
+
|
67
|
+
# Check whether the data type is a signed integer type.
|
68
|
+
#
|
69
|
+
# @return [Boolean]
|
70
|
+
def self.signed_integer?
|
71
|
+
self < SignedIntegerType
|
72
|
+
end
|
73
|
+
|
74
|
+
# Check whether the data type is an unsigned integer type.
|
75
|
+
#
|
76
|
+
# @return [Boolean]
|
77
|
+
def self.unsigned_integer?
|
78
|
+
self < UnsignedIntegerType
|
79
|
+
end
|
80
|
+
|
81
|
+
# Check whether the data type is a float type.
|
82
|
+
#
|
83
|
+
# @return [Boolean]
|
84
|
+
def self.float?
|
85
|
+
self < FloatType
|
86
|
+
end
|
87
|
+
|
88
|
+
# Check whether the data type is a temporal type.
|
89
|
+
#
|
90
|
+
# @return [Boolean]
|
91
|
+
def self.temporal?
|
92
|
+
self < TemporalType
|
93
|
+
end
|
94
|
+
|
95
|
+
# Check whether the data type is a nested type.
|
96
|
+
#
|
97
|
+
# @return [Boolean]
|
98
|
+
def self.nested?
|
99
|
+
self < NestedType
|
100
|
+
end
|
101
|
+
|
102
|
+
[:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
|
103
|
+
define_method(v) do
|
104
|
+
self.class.public_send(v)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns a string representing the data type.
|
109
|
+
#
|
110
|
+
# @return [String]
|
111
|
+
def to_s
|
112
|
+
self.class.name
|
113
|
+
end
|
114
|
+
|
115
|
+
# Returns a string representing the data type.
|
116
|
+
#
|
117
|
+
# @return [String]
|
118
|
+
def inspect
|
119
|
+
to_s
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Base class for numeric data types.
|
124
|
+
class NumericType < DataType
|
125
|
+
end
|
126
|
+
|
127
|
+
# Base class for integral data types.
|
128
|
+
class IntegerType < NumericType
|
129
|
+
end
|
130
|
+
|
131
|
+
# @private
|
132
|
+
IntegralType = IntegerType
|
133
|
+
|
134
|
+
# Base class for signed integer data types.
|
135
|
+
class SignedIntegerType < IntegerType
|
136
|
+
end
|
137
|
+
|
138
|
+
# Base class for unsigned integer data types.
|
139
|
+
class UnsignedIntegerType < IntegerType
|
140
|
+
end
|
141
|
+
|
142
|
+
# Base class for float data types.
|
143
|
+
class FloatType < NumericType
|
144
|
+
end
|
145
|
+
|
146
|
+
# Base class for temporal data types.
|
147
|
+
class TemporalType < DataType
|
148
|
+
end
|
149
|
+
|
150
|
+
# Base class for nested data types.
|
151
|
+
class NestedType < DataType
|
152
|
+
end
|
153
|
+
|
154
|
+
# 8-bit signed integer type.
|
155
|
+
class Int8 < SignedIntegerType
|
156
|
+
end
|
157
|
+
|
158
|
+
# 16-bit signed integer type.
|
159
|
+
class Int16 < SignedIntegerType
|
160
|
+
end
|
161
|
+
|
162
|
+
# 32-bit signed integer type.
|
163
|
+
class Int32 < SignedIntegerType
|
164
|
+
end
|
165
|
+
|
166
|
+
# 64-bit signed integer type.
|
167
|
+
class Int64 < SignedIntegerType
|
168
|
+
end
|
169
|
+
|
170
|
+
# 8-bit unsigned integer type.
|
171
|
+
class UInt8 < UnsignedIntegerType
|
172
|
+
end
|
173
|
+
|
174
|
+
# 16-bit unsigned integer type.
|
175
|
+
class UInt16 < UnsignedIntegerType
|
176
|
+
end
|
177
|
+
|
178
|
+
# 32-bit unsigned integer type.
|
179
|
+
class UInt32 < UnsignedIntegerType
|
180
|
+
end
|
181
|
+
|
182
|
+
# 64-bit unsigned integer type.
|
183
|
+
class UInt64 < UnsignedIntegerType
|
184
|
+
end
|
185
|
+
|
186
|
+
# 32-bit floating point type.
|
187
|
+
class Float32 < FloatType
|
188
|
+
end
|
189
|
+
|
190
|
+
# 64-bit floating point type.
|
191
|
+
class Float64 < FloatType
|
192
|
+
end
|
193
|
+
|
194
|
+
# Decimal 128-bit type with an optional precision and non-negative scale.
|
195
|
+
#
|
196
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
197
|
+
class Decimal < NumericType
|
198
|
+
attr_reader :precision, :scale
|
199
|
+
|
200
|
+
def initialize(precision, scale)
|
201
|
+
@precision = precision
|
202
|
+
@scale = scale
|
203
|
+
end
|
204
|
+
|
205
|
+
def ==(other)
|
206
|
+
if other.eql?(Decimal)
|
207
|
+
true
|
208
|
+
elsif other.is_a?(Decimal)
|
209
|
+
precision == other.precision && scale == other.scale
|
210
|
+
else
|
211
|
+
false
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def to_s
|
216
|
+
"#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# Boolean type.
|
221
|
+
class Boolean < DataType
|
222
|
+
end
|
223
|
+
|
224
|
+
# UTF-8 encoded string type.
|
225
|
+
class String < DataType
|
226
|
+
end
|
227
|
+
|
228
|
+
# @private
|
229
|
+
# Allow Utf8 as an alias for String
|
230
|
+
Utf8 = String
|
231
|
+
|
232
|
+
# Binary type.
|
233
|
+
class Binary < DataType
|
234
|
+
end
|
235
|
+
|
236
|
+
# Calendar date type.
|
237
|
+
class Date < TemporalType
|
238
|
+
end
|
239
|
+
|
240
|
+
# Time of day type.
|
241
|
+
class Time < TemporalType
|
242
|
+
end
|
243
|
+
|
244
|
+
# Calendar date and time type.
|
245
|
+
class Datetime < TemporalType
|
246
|
+
attr_reader :time_unit, :time_zone
|
247
|
+
alias_method :tu, :time_unit
|
248
|
+
|
249
|
+
def initialize(time_unit = "us", time_zone = nil)
|
250
|
+
@time_unit = time_unit || "us"
|
251
|
+
@time_zone = time_zone
|
252
|
+
end
|
253
|
+
|
254
|
+
def ==(other)
|
255
|
+
if other.eql?(Datetime)
|
256
|
+
true
|
257
|
+
elsif other.is_a?(Datetime)
|
258
|
+
time_unit == other.time_unit && time_zone == other.time_zone
|
259
|
+
else
|
260
|
+
false
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def to_s
|
265
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# Time duration/delta type.
|
270
|
+
class Duration < TemporalType
|
271
|
+
attr_reader :time_unit
|
272
|
+
alias_method :tu, :time_unit
|
273
|
+
|
274
|
+
def initialize(time_unit = "us")
|
275
|
+
@time_unit = time_unit
|
276
|
+
end
|
277
|
+
|
278
|
+
def ==(other)
|
279
|
+
if other.eql?(Duration)
|
280
|
+
true
|
281
|
+
elsif other.is_a?(Duration)
|
282
|
+
time_unit == other.time_unit
|
283
|
+
else
|
284
|
+
false
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_s
|
289
|
+
"#{self.class.name}(time_unit: #{time_unit.inspect})"
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# A categorical encoding of a set of strings.
|
294
|
+
class Categorical < DataType
|
295
|
+
def initialize(ordering = "physical")
|
296
|
+
@ordering = ordering
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
# A fixed set categorical encoding of a set of strings.
|
301
|
+
#
|
302
|
+
# NOTE: this is an experimental work-in-progress feature and may not work as expected.
|
303
|
+
class Enum < DataType
|
304
|
+
attr_reader :categories
|
305
|
+
|
306
|
+
def initialize(categories)
|
307
|
+
if !categories.is_a?(Series)
|
308
|
+
categories = Series.new(categories)
|
309
|
+
end
|
310
|
+
|
311
|
+
if categories.empty?
|
312
|
+
self.categories = Series.new("category", [], dtype: String)
|
313
|
+
return
|
314
|
+
end
|
315
|
+
|
316
|
+
if categories.null_count > 0
|
317
|
+
msg = "Enum categories must not contain null values"
|
318
|
+
raise TypeError, msg
|
319
|
+
end
|
320
|
+
|
321
|
+
if (dtype = categories.dtype) != String
|
322
|
+
msg = "Enum categories must be strings; found data of type #{dtype}"
|
323
|
+
raise TypeError, msg
|
324
|
+
end
|
325
|
+
|
326
|
+
if categories.n_unique != categories.len
|
327
|
+
duplicate = categories.filter(categories.is_duplicated)[0]
|
328
|
+
msg = "Enum categories must be unique; found duplicate #{duplicate}"
|
329
|
+
raise ArgumentError, msg
|
330
|
+
end
|
331
|
+
|
332
|
+
@categories = categories.rechunk.alias("category")
|
333
|
+
end
|
334
|
+
|
335
|
+
def ==(other)
|
336
|
+
if other.eql?(Enum)
|
337
|
+
true
|
338
|
+
elsif other.is_a?(Enum)
|
339
|
+
categories == other.categories
|
340
|
+
else
|
341
|
+
false
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def to_s
|
346
|
+
"#{self.class.name}(categories: #{categories.to_a.inspect})"
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
# Type for wrapping arbitrary Ruby objects.
|
351
|
+
class Object < DataType
|
352
|
+
end
|
353
|
+
|
354
|
+
# Type representing Null / None values.
|
355
|
+
class Null < DataType
|
356
|
+
end
|
357
|
+
|
358
|
+
# Type representing Datatype values that could not be determined statically.
|
359
|
+
class Unknown < DataType
|
360
|
+
end
|
361
|
+
|
362
|
+
# Nested list/array type.
|
363
|
+
class List < NestedType
|
364
|
+
attr_reader :inner
|
365
|
+
|
366
|
+
def initialize(inner)
|
367
|
+
@inner = Utils.rb_type_to_dtype(inner)
|
368
|
+
end
|
369
|
+
|
370
|
+
def ==(other)
|
371
|
+
if other.eql?(List)
|
372
|
+
true
|
373
|
+
elsif other.is_a?(List)
|
374
|
+
@inner.nil? || other.inner.nil? || @inner == other.inner
|
375
|
+
else
|
376
|
+
false
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
380
|
+
def to_s
|
381
|
+
"#{self.class.name}(#{inner})"
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
# Nested list/array type.
|
386
|
+
class Array < NestedType
|
387
|
+
attr_reader :inner, :width
|
388
|
+
|
389
|
+
def initialize(inner, width)
|
390
|
+
if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
|
391
|
+
inner, width = width, inner
|
392
|
+
end
|
393
|
+
@inner = Utils.rb_type_to_dtype(inner) if inner
|
394
|
+
@width = width
|
395
|
+
end
|
396
|
+
|
397
|
+
def ==(other)
|
398
|
+
if other.eql?(Array)
|
399
|
+
true
|
400
|
+
elsif other.is_a?(Array)
|
401
|
+
if @width != other.width
|
402
|
+
false
|
403
|
+
elsif @inner.nil? || other.inner.nil?
|
404
|
+
true
|
405
|
+
else
|
406
|
+
@inner == other.inner
|
407
|
+
end
|
408
|
+
else
|
409
|
+
false
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def to_s
|
414
|
+
"#{self.class.name}(#{inner}, width: #{width.inspect})"
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
# Definition of a single field within a `Struct` DataType.
|
419
|
+
class Field
|
420
|
+
attr_reader :name, :dtype
|
421
|
+
|
422
|
+
def initialize(name, dtype)
|
423
|
+
@name = name
|
424
|
+
@dtype = Utils.rb_type_to_dtype(dtype)
|
425
|
+
end
|
426
|
+
|
427
|
+
def ==(other)
|
428
|
+
name == other.name && dtype == other.dtype
|
429
|
+
end
|
430
|
+
|
431
|
+
def to_s
|
432
|
+
"#{self.class.name}(#{name.inspect}, #{dtype})"
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
# Struct composite type.
|
437
|
+
class Struct < NestedType
|
438
|
+
attr_reader :fields
|
439
|
+
|
440
|
+
def initialize(fields)
|
441
|
+
if fields.is_a?(Hash)
|
442
|
+
@fields = fields.map { |n, d| Field.new(n, d) }
|
443
|
+
else
|
444
|
+
@fields = fields
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
def ==(other)
|
449
|
+
if other.eql?(Struct)
|
450
|
+
true
|
451
|
+
elsif other.is_a?(Struct)
|
452
|
+
fields == other.fields
|
453
|
+
else
|
454
|
+
false
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def to_s
|
459
|
+
"#{self.class.name}(#{fields.to_h { |f| [f.name, f.dtype] }})"
|
460
|
+
end
|
461
|
+
|
462
|
+
def to_schema
|
463
|
+
@fields.to_h { |f| [f.name, f.dtype] }
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|