polars-df 0.13.0-x64-mingw-ucrt

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,466 @@
1
+ module Polars
2
+ # Base class for all Polars data types.
3
+ class DataType
4
+ # Return this DataType's fundamental/root type class.
5
+ #
6
+ # @return [Class]
7
+ #
8
+ # @example
9
+ # Polars::Datetime.new("ns").base_type
10
+ # # => Polars::Datetime
11
+ # @example
12
+ # Polars::List.new(Polars::Int32).base_type
13
+ # # => Polars::List
14
+ # @example
15
+ # Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
16
+ # # => Polars::Struct
17
+ def self.base_type
18
+ self
19
+ end
20
+
21
+ # Return this DataType's fundamental/root type class.
22
+ #
23
+ # @return [Class]
24
+ def base_type
25
+ is_a?(DataType) ? self.class : self
26
+ end
27
+
28
+ # Check if this DataType is the same as another DataType.
29
+ #
30
+ # @return [Boolean]
31
+ def self.==(other)
32
+ eql?(other) || other.is_a?(self)
33
+ end
34
+
35
+ # Check if this DataType is the same as another DataType.
36
+ #
37
+ # @return [Boolean]
38
+ def ==(other)
39
+ if other.is_a?(Class)
40
+ is_a?(other)
41
+ else
42
+ other.instance_of?(self.class)
43
+ end
44
+ end
45
+
46
+ # Check whether the data type is a numeric type.
47
+ #
48
+ # @return [Boolean]
49
+ def self.numeric?
50
+ self < NumericType
51
+ end
52
+
53
+ # Check whether the data type is a decimal type.
54
+ #
55
+ # @return [Boolean]
56
+ def self.decimal?
57
+ self == Decimal
58
+ end
59
+
60
+ # Check whether the data type is an integer type.
61
+ #
62
+ # @return [Boolean]
63
+ def self.integer?
64
+ self < IntegerType
65
+ end
66
+
67
+ # Check whether the data type is a signed integer type.
68
+ #
69
+ # @return [Boolean]
70
+ def self.signed_integer?
71
+ self < SignedIntegerType
72
+ end
73
+
74
+ # Check whether the data type is an unsigned integer type.
75
+ #
76
+ # @return [Boolean]
77
+ def self.unsigned_integer?
78
+ self < UnsignedIntegerType
79
+ end
80
+
81
+ # Check whether the data type is a float type.
82
+ #
83
+ # @return [Boolean]
84
+ def self.float?
85
+ self < FloatType
86
+ end
87
+
88
+ # Check whether the data type is a temporal type.
89
+ #
90
+ # @return [Boolean]
91
+ def self.temporal?
92
+ self < TemporalType
93
+ end
94
+
95
+ # Check whether the data type is a nested type.
96
+ #
97
+ # @return [Boolean]
98
+ def self.nested?
99
+ self < NestedType
100
+ end
101
+
102
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
+ define_method(v) do
104
+ self.class.public_send(v)
105
+ end
106
+ end
107
+
108
+ # Returns a string representing the data type.
109
+ #
110
+ # @return [String]
111
+ def to_s
112
+ self.class.name
113
+ end
114
+
115
+ # Returns a string representing the data type.
116
+ #
117
+ # @return [String]
118
+ def inspect
119
+ to_s
120
+ end
121
+ end
122
+
123
+ # Base class for numeric data types.
124
+ class NumericType < DataType
125
+ end
126
+
127
+ # Base class for integral data types.
128
+ class IntegerType < NumericType
129
+ end
130
+
131
+ # @private
132
+ IntegralType = IntegerType
133
+
134
+ # Base class for signed integer data types.
135
+ class SignedIntegerType < IntegerType
136
+ end
137
+
138
+ # Base class for unsigned integer data types.
139
+ class UnsignedIntegerType < IntegerType
140
+ end
141
+
142
+ # Base class for float data types.
143
+ class FloatType < NumericType
144
+ end
145
+
146
+ # Base class for temporal data types.
147
+ class TemporalType < DataType
148
+ end
149
+
150
+ # Base class for nested data types.
151
+ class NestedType < DataType
152
+ end
153
+
154
+ # 8-bit signed integer type.
155
+ class Int8 < SignedIntegerType
156
+ end
157
+
158
+ # 16-bit signed integer type.
159
+ class Int16 < SignedIntegerType
160
+ end
161
+
162
+ # 32-bit signed integer type.
163
+ class Int32 < SignedIntegerType
164
+ end
165
+
166
+ # 64-bit signed integer type.
167
+ class Int64 < SignedIntegerType
168
+ end
169
+
170
+ # 8-bit unsigned integer type.
171
+ class UInt8 < UnsignedIntegerType
172
+ end
173
+
174
+ # 16-bit unsigned integer type.
175
+ class UInt16 < UnsignedIntegerType
176
+ end
177
+
178
+ # 32-bit unsigned integer type.
179
+ class UInt32 < UnsignedIntegerType
180
+ end
181
+
182
+ # 64-bit unsigned integer type.
183
+ class UInt64 < UnsignedIntegerType
184
+ end
185
+
186
+ # 32-bit floating point type.
187
+ class Float32 < FloatType
188
+ end
189
+
190
+ # 64-bit floating point type.
191
+ class Float64 < FloatType
192
+ end
193
+
194
+ # Decimal 128-bit type with an optional precision and non-negative scale.
195
+ #
196
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
197
+ class Decimal < NumericType
198
+ attr_reader :precision, :scale
199
+
200
+ def initialize(precision, scale)
201
+ @precision = precision
202
+ @scale = scale
203
+ end
204
+
205
+ def ==(other)
206
+ if other.eql?(Decimal)
207
+ true
208
+ elsif other.is_a?(Decimal)
209
+ precision == other.precision && scale == other.scale
210
+ else
211
+ false
212
+ end
213
+ end
214
+
215
+ def to_s
216
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
217
+ end
218
+ end
219
+
220
+ # Boolean type.
221
+ class Boolean < DataType
222
+ end
223
+
224
+ # UTF-8 encoded string type.
225
+ class String < DataType
226
+ end
227
+
228
+ # @private
229
+ # Allow Utf8 as an alias for String
230
+ Utf8 = String
231
+
232
+ # Binary type.
233
+ class Binary < DataType
234
+ end
235
+
236
+ # Calendar date type.
237
+ class Date < TemporalType
238
+ end
239
+
240
+ # Time of day type.
241
+ class Time < TemporalType
242
+ end
243
+
244
+ # Calendar date and time type.
245
+ class Datetime < TemporalType
246
+ attr_reader :time_unit, :time_zone
247
+ alias_method :tu, :time_unit
248
+
249
+ def initialize(time_unit = "us", time_zone = nil)
250
+ @time_unit = time_unit || "us"
251
+ @time_zone = time_zone
252
+ end
253
+
254
+ def ==(other)
255
+ if other.eql?(Datetime)
256
+ true
257
+ elsif other.is_a?(Datetime)
258
+ time_unit == other.time_unit && time_zone == other.time_zone
259
+ else
260
+ false
261
+ end
262
+ end
263
+
264
+ def to_s
265
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
266
+ end
267
+ end
268
+
269
+ # Time duration/delta type.
270
+ class Duration < TemporalType
271
+ attr_reader :time_unit
272
+ alias_method :tu, :time_unit
273
+
274
+ def initialize(time_unit = "us")
275
+ @time_unit = time_unit
276
+ end
277
+
278
+ def ==(other)
279
+ if other.eql?(Duration)
280
+ true
281
+ elsif other.is_a?(Duration)
282
+ time_unit == other.time_unit
283
+ else
284
+ false
285
+ end
286
+ end
287
+
288
+ def to_s
289
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
290
+ end
291
+ end
292
+
293
+ # A categorical encoding of a set of strings.
294
+ class Categorical < DataType
295
+ def initialize(ordering = "physical")
296
+ @ordering = ordering
297
+ end
298
+ end
299
+
300
+ # A fixed set categorical encoding of a set of strings.
301
+ #
302
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
303
+ class Enum < DataType
304
+ attr_reader :categories
305
+
306
+ def initialize(categories)
307
+ if !categories.is_a?(Series)
308
+ categories = Series.new(categories)
309
+ end
310
+
311
+ if categories.empty?
312
+ self.categories = Series.new("category", [], dtype: String)
313
+ return
314
+ end
315
+
316
+ if categories.null_count > 0
317
+ msg = "Enum categories must not contain null values"
318
+ raise TypeError, msg
319
+ end
320
+
321
+ if (dtype = categories.dtype) != String
322
+ msg = "Enum categories must be strings; found data of type #{dtype}"
323
+ raise TypeError, msg
324
+ end
325
+
326
+ if categories.n_unique != categories.len
327
+ duplicate = categories.filter(categories.is_duplicated)[0]
328
+ msg = "Enum categories must be unique; found duplicate #{duplicate}"
329
+ raise ArgumentError, msg
330
+ end
331
+
332
+ @categories = categories.rechunk.alias("category")
333
+ end
334
+
335
+ def ==(other)
336
+ if other.eql?(Enum)
337
+ true
338
+ elsif other.is_a?(Enum)
339
+ categories == other.categories
340
+ else
341
+ false
342
+ end
343
+ end
344
+
345
+ def to_s
346
+ "#{self.class.name}(categories: #{categories.to_a.inspect})"
347
+ end
348
+ end
349
+
350
+ # Type for wrapping arbitrary Ruby objects.
351
+ class Object < DataType
352
+ end
353
+
354
+ # Type representing Null / None values.
355
+ class Null < DataType
356
+ end
357
+
358
+ # Type representing Datatype values that could not be determined statically.
359
+ class Unknown < DataType
360
+ end
361
+
362
+ # Nested list/array type.
363
+ class List < NestedType
364
+ attr_reader :inner
365
+
366
+ def initialize(inner)
367
+ @inner = Utils.rb_type_to_dtype(inner)
368
+ end
369
+
370
+ def ==(other)
371
+ if other.eql?(List)
372
+ true
373
+ elsif other.is_a?(List)
374
+ @inner.nil? || other.inner.nil? || @inner == other.inner
375
+ else
376
+ false
377
+ end
378
+ end
379
+
380
+ def to_s
381
+ "#{self.class.name}(#{inner})"
382
+ end
383
+ end
384
+
385
+ # Nested list/array type.
386
+ class Array < NestedType
387
+ attr_reader :inner, :width
388
+
389
+ def initialize(inner, width)
390
+ if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
391
+ inner, width = width, inner
392
+ end
393
+ @inner = Utils.rb_type_to_dtype(inner) if inner
394
+ @width = width
395
+ end
396
+
397
+ def ==(other)
398
+ if other.eql?(Array)
399
+ true
400
+ elsif other.is_a?(Array)
401
+ if @width != other.width
402
+ false
403
+ elsif @inner.nil? || other.inner.nil?
404
+ true
405
+ else
406
+ @inner == other.inner
407
+ end
408
+ else
409
+ false
410
+ end
411
+ end
412
+
413
+ def to_s
414
+ "#{self.class.name}(#{inner}, width: #{width.inspect})"
415
+ end
416
+ end
417
+
418
+ # Definition of a single field within a `Struct` DataType.
419
+ class Field
420
+ attr_reader :name, :dtype
421
+
422
+ def initialize(name, dtype)
423
+ @name = name
424
+ @dtype = Utils.rb_type_to_dtype(dtype)
425
+ end
426
+
427
+ def ==(other)
428
+ name == other.name && dtype == other.dtype
429
+ end
430
+
431
+ def to_s
432
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
433
+ end
434
+ end
435
+
436
+ # Struct composite type.
437
+ class Struct < NestedType
438
+ attr_reader :fields
439
+
440
+ def initialize(fields)
441
+ if fields.is_a?(Hash)
442
+ @fields = fields.map { |n, d| Field.new(n, d) }
443
+ else
444
+ @fields = fields
445
+ end
446
+ end
447
+
448
+ def ==(other)
449
+ if other.eql?(Struct)
450
+ true
451
+ elsif other.is_a?(Struct)
452
+ fields == other.fields
453
+ else
454
+ false
455
+ end
456
+ end
457
+
458
+ def to_s
459
+ "#{self.class.name}(#{fields.to_h { |f| [f.name, f.dtype] }})"
460
+ end
461
+
462
+ def to_schema
463
+ @fields.to_h { |f| [f.name, f.dtype] }
464
+ end
465
+ end
466
+ end