polars-df 0.10.0-x86_64-linux-musl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,466 @@
1
+ module Polars
2
+ # Base class for all Polars data types.
3
+ class DataType
4
+ # Return this DataType's fundamental/root type class.
5
+ #
6
+ # @return [Class]
7
+ #
8
+ # @example
9
+ # Polars::Datetime.new("ns").base_type
10
+ # # => Polars::Datetime
11
+ # @example
12
+ # Polars::List.new(Polars::Int32).base_type
13
+ # # => Polars::List
14
+ # @example
15
+ # Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
16
+ # # => Polars::Struct
17
+ def self.base_type
18
+ self
19
+ end
20
+
21
+ # Return this DataType's fundamental/root type class.
22
+ #
23
+ # @return [Class]
24
+ def base_type
25
+ is_a?(DataType) ? self.class : self
26
+ end
27
+
28
+ # Check if this DataType is the same as another DataType.
29
+ #
30
+ # @return [Boolean]
31
+ def self.==(other)
32
+ eql?(other) || other.is_a?(self)
33
+ end
34
+
35
+ # Check if this DataType is the same as another DataType.
36
+ #
37
+ # @return [Boolean]
38
+ def ==(other)
39
+ if other.is_a?(Class)
40
+ is_a?(other)
41
+ else
42
+ other.instance_of?(self.class)
43
+ end
44
+ end
45
+
46
+ # Check whether the data type is a numeric type.
47
+ #
48
+ # @return [Boolean]
49
+ def self.numeric?
50
+ self < NumericType
51
+ end
52
+
53
+ # Check whether the data type is a decimal type.
54
+ #
55
+ # @return [Boolean]
56
+ def self.decimal?
57
+ self == Decimal
58
+ end
59
+
60
+ # Check whether the data type is an integer type.
61
+ #
62
+ # @return [Boolean]
63
+ def self.integer?
64
+ self < IntegerType
65
+ end
66
+
67
+ # Check whether the data type is a signed integer type.
68
+ #
69
+ # @return [Boolean]
70
+ def self.signed_integer?
71
+ self < SignedIntegerType
72
+ end
73
+
74
+ # Check whether the data type is an unsigned integer type.
75
+ #
76
+ # @return [Boolean]
77
+ def self.unsigned_integer?
78
+ self < UnsignedIntegerType
79
+ end
80
+
81
+ # Check whether the data type is a float type.
82
+ #
83
+ # @return [Boolean]
84
+ def self.float?
85
+ self < FloatType
86
+ end
87
+
88
+ # Check whether the data type is a temporal type.
89
+ #
90
+ # @return [Boolean]
91
+ def self.temporal?
92
+ self < TemporalType
93
+ end
94
+
95
+ # Check whether the data type is a nested type.
96
+ #
97
+ # @return [Boolean]
98
+ def self.nested?
99
+ self < NestedType
100
+ end
101
+
102
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
+ define_method(v) do
104
+ self.class.public_send(v)
105
+ end
106
+ end
107
+
108
+ # Returns a string representing the data type.
109
+ #
110
+ # @return [String]
111
+ def to_s
112
+ self.class.name
113
+ end
114
+
115
+ # Returns a string representing the data type.
116
+ #
117
+ # @return [String]
118
+ def inspect
119
+ to_s
120
+ end
121
+ end
122
+
123
+ # Base class for numeric data types.
124
+ class NumericType < DataType
125
+ end
126
+
127
+ # Base class for integral data types.
128
+ class IntegerType < NumericType
129
+ end
130
+
131
+ # @private
132
+ IntegralType = IntegerType
133
+
134
+ # Base class for signed integer data types.
135
+ class SignedIntegerType < IntegerType
136
+ end
137
+
138
+ # Base class for unsigned integer data types.
139
+ class UnsignedIntegerType < IntegerType
140
+ end
141
+
142
+ # Base class for float data types.
143
+ class FloatType < NumericType
144
+ end
145
+
146
+ # Base class for temporal data types.
147
+ class TemporalType < DataType
148
+ end
149
+
150
+ # Base class for nested data types.
151
+ class NestedType < DataType
152
+ end
153
+
154
+ # 8-bit signed integer type.
155
+ class Int8 < SignedIntegerType
156
+ end
157
+
158
+ # 16-bit signed integer type.
159
+ class Int16 < SignedIntegerType
160
+ end
161
+
162
+ # 32-bit signed integer type.
163
+ class Int32 < SignedIntegerType
164
+ end
165
+
166
+ # 64-bit signed integer type.
167
+ class Int64 < SignedIntegerType
168
+ end
169
+
170
+ # 8-bit unsigned integer type.
171
+ class UInt8 < UnsignedIntegerType
172
+ end
173
+
174
+ # 16-bit unsigned integer type.
175
+ class UInt16 < UnsignedIntegerType
176
+ end
177
+
178
+ # 32-bit unsigned integer type.
179
+ class UInt32 < UnsignedIntegerType
180
+ end
181
+
182
+ # 64-bit unsigned integer type.
183
+ class UInt64 < UnsignedIntegerType
184
+ end
185
+
186
+ # 32-bit floating point type.
187
+ class Float32 < FloatType
188
+ end
189
+
190
+ # 64-bit floating point type.
191
+ class Float64 < FloatType
192
+ end
193
+
194
+ # Decimal 128-bit type with an optional precision and non-negative scale.
195
+ #
196
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
197
+ class Decimal < NumericType
198
+ attr_reader :precision, :scale
199
+
200
+ def initialize(precision, scale)
201
+ @precision = precision
202
+ @scale = scale
203
+ end
204
+
205
+ def ==(other)
206
+ if other.eql?(Decimal)
207
+ true
208
+ elsif other.is_a?(Decimal)
209
+ precision == other.precision && scale == other.scale
210
+ else
211
+ false
212
+ end
213
+ end
214
+
215
+ def to_s
216
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
217
+ end
218
+ end
219
+
220
+ # Boolean type.
221
+ class Boolean < DataType
222
+ end
223
+
224
+ # UTF-8 encoded string type.
225
+ class String < DataType
226
+ end
227
+
228
+ # @private
229
+ # Allow Utf8 as an alias for String
230
+ Utf8 = String
231
+
232
+ # Binary type.
233
+ class Binary < DataType
234
+ end
235
+
236
+ # Calendar date type.
237
+ class Date < TemporalType
238
+ end
239
+
240
+ # Time of day type.
241
+ class Time < TemporalType
242
+ end
243
+
244
+ # Calendar date and time type.
245
+ class Datetime < TemporalType
246
+ attr_reader :time_unit, :time_zone
247
+ alias_method :tu, :time_unit
248
+
249
+ def initialize(time_unit = "us", time_zone = nil)
250
+ @time_unit = time_unit || "us"
251
+ @time_zone = time_zone
252
+ end
253
+
254
+ def ==(other)
255
+ if other.eql?(Datetime)
256
+ true
257
+ elsif other.is_a?(Datetime)
258
+ time_unit == other.time_unit && time_zone == other.time_zone
259
+ else
260
+ false
261
+ end
262
+ end
263
+
264
+ def to_s
265
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
266
+ end
267
+ end
268
+
269
+ # Time duration/delta type.
270
+ class Duration < TemporalType
271
+ attr_reader :time_unit
272
+ alias_method :tu, :time_unit
273
+
274
+ def initialize(time_unit = "us")
275
+ @time_unit = time_unit
276
+ end
277
+
278
+ def ==(other)
279
+ if other.eql?(Duration)
280
+ true
281
+ elsif other.is_a?(Duration)
282
+ time_unit == other.time_unit
283
+ else
284
+ false
285
+ end
286
+ end
287
+
288
+ def to_s
289
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
290
+ end
291
+ end
292
+
293
+ # A categorical encoding of a set of strings.
294
+ class Categorical < DataType
295
+ def initialize(ordering = "physical")
296
+ @ordering = ordering
297
+ end
298
+ end
299
+
300
+ # A fixed set categorical encoding of a set of strings.
301
+ #
302
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
303
+ class Enum < DataType
304
+ attr_reader :categories
305
+
306
+ def initialize(categories)
307
+ if !categories.is_a?(Series)
308
+ categories = Series.new(categories)
309
+ end
310
+
311
+ if categories.empty?
312
+ self.categories = Series.new("category", [], dtype: String)
313
+ return
314
+ end
315
+
316
+ if categories.null_count > 0
317
+ msg = "Enum categories must not contain null values"
318
+ raise TypeError, msg
319
+ end
320
+
321
+ if (dtype = categories.dtype) != String
322
+ msg = "Enum categories must be strings; found data of type #{dtype}"
323
+ raise TypeError, msg
324
+ end
325
+
326
+ if categories.n_unique != categories.len
327
+ duplicate = categories.filter(categories.is_duplicated)[0]
328
+ msg = "Enum categories must be unique; found duplicate #{duplicate}"
329
+ raise ArgumentError, msg
330
+ end
331
+
332
+ @categories = categories.rechunk.alias("category")
333
+ end
334
+
335
+ def ==(other)
336
+ if other.eql?(Enum)
337
+ true
338
+ elsif other.is_a?(Enum)
339
+ categories == other.categories
340
+ else
341
+ false
342
+ end
343
+ end
344
+
345
+ def to_s
346
+ "#{self.class.name}(categories: #{categories.to_a.inspect})"
347
+ end
348
+ end
349
+
350
+ # Type for wrapping arbitrary Ruby objects.
351
+ class Object < DataType
352
+ end
353
+
354
+ # Type representing Null / None values.
355
+ class Null < DataType
356
+ end
357
+
358
+ # Type representing Datatype values that could not be determined statically.
359
+ class Unknown < DataType
360
+ end
361
+
362
+ # Nested list/array type.
363
+ class List < NestedType
364
+ attr_reader :inner
365
+
366
+ def initialize(inner)
367
+ @inner = Utils.rb_type_to_dtype(inner)
368
+ end
369
+
370
+ def ==(other)
371
+ if other.eql?(List)
372
+ true
373
+ elsif other.is_a?(List)
374
+ @inner.nil? || other.inner.nil? || @inner == other.inner
375
+ else
376
+ false
377
+ end
378
+ end
379
+
380
+ def to_s
381
+ "#{self.class.name}(#{inner})"
382
+ end
383
+ end
384
+
385
+ # Nested list/array type.
386
+ class Array < NestedType
387
+ attr_reader :inner, :width
388
+
389
+ def initialize(inner, width)
390
+ if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
391
+ inner, width = width, inner
392
+ end
393
+ @inner = Utils.rb_type_to_dtype(inner) if inner
394
+ @width = width
395
+ end
396
+
397
+ def ==(other)
398
+ if other.eql?(Array)
399
+ true
400
+ elsif other.is_a?(Array)
401
+ if @width != other.width
402
+ false
403
+ elsif @inner.nil? || other.inner.nil?
404
+ true
405
+ else
406
+ @inner == other.inner
407
+ end
408
+ else
409
+ false
410
+ end
411
+ end
412
+
413
+ def to_s
414
+ "#{self.class.name}(#{inner}, width: #{width.inspect})"
415
+ end
416
+ end
417
+
418
+ # Definition of a single field within a `Struct` DataType.
419
+ class Field
420
+ attr_reader :name, :dtype
421
+
422
+ def initialize(name, dtype)
423
+ @name = name
424
+ @dtype = Utils.rb_type_to_dtype(dtype)
425
+ end
426
+
427
+ def ==(other)
428
+ name == other.name && dtype == other.dtype
429
+ end
430
+
431
+ def to_s
432
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
433
+ end
434
+ end
435
+
436
+ # Struct composite type.
437
+ class Struct < NestedType
438
+ attr_reader :fields
439
+
440
+ def initialize(fields)
441
+ if fields.is_a?(Hash)
442
+ @fields = fields.map { |n, d| Field.new(n, d) }
443
+ else
444
+ @fields = fields
445
+ end
446
+ end
447
+
448
+ def ==(other)
449
+ if other.eql?(Struct)
450
+ true
451
+ elsif other.is_a?(Struct)
452
+ fields == other.fields
453
+ else
454
+ false
455
+ end
456
+ end
457
+
458
+ def to_s
459
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
460
+ end
461
+
462
+ def to_schema
463
+ @fields.to_h { |f| [f.name, f.dtype] }
464
+ end
465
+ end
466
+ end