polars-df 0.8.0-x86_64-linux → 0.9.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +1726 -754
  6. data/LICENSE.txt +1 -1
  7. data/README.md +2 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +449 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/cat_expr.rb +24 -0
  14. data/lib/polars/cat_name_space.rb +75 -0
  15. data/lib/polars/config.rb +2 -2
  16. data/lib/polars/data_frame.rb +179 -43
  17. data/lib/polars/data_types.rb +191 -28
  18. data/lib/polars/date_time_expr.rb +31 -14
  19. data/lib/polars/exceptions.rb +12 -1
  20. data/lib/polars/expr.rb +866 -186
  21. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  22. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  23. data/lib/polars/functions/as_datatype.rb +248 -0
  24. data/lib/polars/functions/col.rb +47 -0
  25. data/lib/polars/functions/eager.rb +182 -0
  26. data/lib/polars/functions/lazy.rb +1280 -0
  27. data/lib/polars/functions/len.rb +49 -0
  28. data/lib/polars/functions/lit.rb +35 -0
  29. data/lib/polars/functions/random.rb +16 -0
  30. data/lib/polars/functions/range/date_range.rb +103 -0
  31. data/lib/polars/functions/range/int_range.rb +51 -0
  32. data/lib/polars/functions/repeat.rb +144 -0
  33. data/lib/polars/functions/whenthen.rb +27 -0
  34. data/lib/polars/functions.rb +29 -416
  35. data/lib/polars/group_by.rb +2 -2
  36. data/lib/polars/io.rb +18 -25
  37. data/lib/polars/lazy_frame.rb +367 -53
  38. data/lib/polars/list_expr.rb +152 -6
  39. data/lib/polars/list_name_space.rb +102 -0
  40. data/lib/polars/meta_expr.rb +175 -7
  41. data/lib/polars/series.rb +273 -34
  42. data/lib/polars/string_cache.rb +75 -0
  43. data/lib/polars/string_expr.rb +412 -96
  44. data/lib/polars/string_name_space.rb +4 -4
  45. data/lib/polars/testing.rb +507 -0
  46. data/lib/polars/utils.rb +52 -8
  47. data/lib/polars/version.rb +1 -1
  48. data/lib/polars.rb +15 -2
  49. metadata +33 -4
  50. data/lib/polars/lazy_functions.rb +0 -1181
@@ -1,24 +1,122 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ # Return this DataType's fundamental/root type class.
5
+ #
6
+ # @return [Class]
7
+ #
8
+ # @example
9
+ # Polars::Datetime.new("ns").base_type
10
+ # # => Polars::Datetime
11
+ # @example
12
+ # Polars::List.new(Polars::Int32).base_type
13
+ # # => Polars::List
14
+ # @example
15
+ # Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
16
+ # # => Polars::Struct
4
17
  def self.base_type
5
18
  self
6
19
  end
7
20
 
21
+ # Return this DataType's fundamental/root type class.
22
+ #
23
+ # @return [Class]
8
24
  def base_type
9
25
  is_a?(DataType) ? self.class : self
10
26
  end
11
27
 
28
+ # Check if this DataType is the same as another DataType.
29
+ #
30
+ # @return [Boolean]
31
+ def self.==(other)
32
+ eql?(other) || other.is_a?(self)
33
+ end
34
+
35
+ # Check if this DataType is the same as another DataType.
36
+ #
37
+ # @return [Boolean]
38
+ def ==(other)
39
+ if other.is_a?(Class)
40
+ is_a?(other)
41
+ else
42
+ other.instance_of?(self.class)
43
+ end
44
+ end
45
+
46
+ # Check whether the data type is a numeric type.
47
+ #
48
+ # @return [Boolean]
49
+ def self.numeric?
50
+ self < NumericType
51
+ end
52
+
53
+ # Check whether the data type is a decimal type.
54
+ #
55
+ # @return [Boolean]
56
+ def self.decimal?
57
+ self == Decimal
58
+ end
59
+
60
+ # Check whether the data type is an integer type.
61
+ #
62
+ # @return [Boolean]
63
+ def self.integer?
64
+ self < IntegerType
65
+ end
66
+
67
+ # Check whether the data type is a signed integer type.
68
+ #
69
+ # @return [Boolean]
70
+ def self.signed_integer?
71
+ self < SignedIntegerType
72
+ end
73
+
74
+ # Check whether the data type is an unsigned integer type.
75
+ #
76
+ # @return [Boolean]
77
+ def self.unsigned_integer?
78
+ self < UnsignedIntegerType
79
+ end
80
+
81
+ # Check whether the data type is a float type.
82
+ #
83
+ # @return [Boolean]
84
+ def self.float?
85
+ self < FloatType
86
+ end
87
+
88
+ # Check whether the data type is a temporal type.
89
+ #
90
+ # @return [Boolean]
91
+ def self.temporal?
92
+ self < TemporalType
93
+ end
94
+
95
+ # Check whether the data type is a nested type.
96
+ #
97
+ # @return [Boolean]
12
98
  def self.nested?
13
- false
99
+ self < NestedType
14
100
  end
15
101
 
16
- def nested?
17
- self.class.nested?
102
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
+ define_method(v) do
104
+ self.class.public_send(v)
105
+ end
18
106
  end
19
107
 
20
- def self.==(other)
21
- eql?(other) || other.is_a?(self)
108
+ # Returns a string representing the data type.
109
+ #
110
+ # @return [String]
111
+ def to_s
112
+ self.class.name
113
+ end
114
+
115
+ # Returns a string representing the data type.
116
+ #
117
+ # @return [String]
118
+ def inspect
119
+ to_s
22
120
  end
23
121
  end
24
122
 
@@ -27,15 +125,22 @@ module Polars
27
125
  end
28
126
 
29
127
  # Base class for integral data types.
30
- class IntegralType < NumericType
128
+ class IntegerType < NumericType
31
129
  end
32
130
 
33
- # Base class for fractional data types.
34
- class FractionalType < NumericType
131
+ # @private
132
+ IntegralType = IntegerType
133
+
134
+ # Base class for signed integer data types.
135
+ class SignedIntegerType < IntegerType
136
+ end
137
+
138
+ # Base class for unsigned integer data types.
139
+ class UnsignedIntegerType < IntegerType
35
140
  end
36
141
 
37
142
  # Base class for float data types.
38
- class FloatType < FractionalType
143
+ class FloatType < NumericType
39
144
  end
40
145
 
41
146
  # Base class for temporal data types.
@@ -44,41 +149,38 @@ module Polars
44
149
 
45
150
  # Base class for nested data types.
46
151
  class NestedType < DataType
47
- def self.nested?
48
- true
49
- end
50
152
  end
51
153
 
52
154
  # 8-bit signed integer type.
53
- class Int8 < IntegralType
155
+ class Int8 < SignedIntegerType
54
156
  end
55
157
 
56
158
  # 16-bit signed integer type.
57
- class Int16 < IntegralType
159
+ class Int16 < SignedIntegerType
58
160
  end
59
161
 
60
162
  # 32-bit signed integer type.
61
- class Int32 < IntegralType
163
+ class Int32 < SignedIntegerType
62
164
  end
63
165
 
64
166
  # 64-bit signed integer type.
65
- class Int64 < IntegralType
167
+ class Int64 < SignedIntegerType
66
168
  end
67
169
 
68
170
  # 8-bit unsigned integer type.
69
- class UInt8 < IntegralType
171
+ class UInt8 < UnsignedIntegerType
70
172
  end
71
173
 
72
174
  # 16-bit unsigned integer type.
73
- class UInt16 < IntegralType
175
+ class UInt16 < UnsignedIntegerType
74
176
  end
75
177
 
76
178
  # 32-bit unsigned integer type.
77
- class UInt32 < IntegralType
179
+ class UInt32 < UnsignedIntegerType
78
180
  end
79
181
 
80
182
  # 64-bit unsigned integer type.
81
- class UInt64 < IntegralType
183
+ class UInt64 < UnsignedIntegerType
82
184
  end
83
185
 
84
186
  # 32-bit floating point type.
@@ -92,7 +194,7 @@ module Polars
92
194
  # Decimal 128-bit type with an optional precision and non-negative scale.
93
195
  #
94
196
  # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
- class Decimal < FractionalType
197
+ class Decimal < NumericType
96
198
  attr_reader :precision, :scale
97
199
 
98
200
  def initialize(precision, scale)
@@ -123,6 +225,7 @@ module Polars
123
225
  class String < DataType
124
226
  end
125
227
 
228
+ # @private
126
229
  # Allow Utf8 as an alias for String
127
230
  Utf8 = String
128
231
 
@@ -189,6 +292,59 @@ module Polars
189
292
 
190
293
  # A categorical encoding of a set of strings.
191
294
  class Categorical < DataType
295
+ def initialize(ordering = "physical")
296
+ @ordering = ordering
297
+ end
298
+ end
299
+
300
+ # A fixed set categorical encoding of a set of strings.
301
+ #
302
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
303
+ class Enum < DataType
304
+ attr_reader :categories
305
+
306
+ def initialize(categories)
307
+ if !categories.is_a?(Series)
308
+ categories = Series.new(categories)
309
+ end
310
+
311
+ if categories.empty?
312
+ self.categories = Series.new("category", [], dtype: String)
313
+ return
314
+ end
315
+
316
+ if categories.null_count > 0
317
+ msg = "Enum categories must not contain null values"
318
+ raise TypeError, msg
319
+ end
320
+
321
+ if (dtype = categories.dtype) != String
322
+ msg = "Enum categories must be strings; found data of type #{dtype}"
323
+ raise TypeError, msg
324
+ end
325
+
326
+ if categories.n_unique != categories.len
327
+ duplicate = categories.filter(categories.is_duplicated)[0]
328
+ msg = "Enum categories must be unique; found duplicate #{duplicate}"
329
+ raise ArgumentError, msg
330
+ end
331
+
332
+ @categories = categories.rechunk.alias("category")
333
+ end
334
+
335
+ def ==(other)
336
+ if other.eql?(Enum)
337
+ true
338
+ elsif other.is_a?(Enum)
339
+ categories == other.categories
340
+ else
341
+ false
342
+ end
343
+ end
344
+
345
+ def to_s
346
+ "#{self.class.name}(categories: #{categories.to_a.inspect})"
347
+ end
192
348
  end
193
349
 
194
350
  # Type for wrapping arbitrary Ruby objects.
@@ -228,27 +384,34 @@ module Polars
228
384
 
229
385
  # Nested list/array type.
230
386
  class Array < NestedType
231
- attr_reader :width, :inner
387
+ attr_reader :inner, :width
232
388
 
233
- def initialize(width, inner = nil)
234
- @width = width
389
+ def initialize(inner, width)
390
+ if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
391
+ inner, width = width, inner
392
+ end
235
393
  @inner = Utils.rb_type_to_dtype(inner) if inner
394
+ @width = width
236
395
  end
237
396
 
238
- # TODO check width?
239
397
  def ==(other)
240
398
  if other.eql?(Array)
241
399
  true
242
400
  elsif other.is_a?(Array)
243
- @inner.nil? || other.inner.nil? || @inner == other.inner
401
+ if @width != other.width
402
+ false
403
+ elsif @inner.nil? || other.inner.nil?
404
+ true
405
+ else
406
+ @inner == other.inner
407
+ end
244
408
  else
245
409
  false
246
410
  end
247
411
  end
248
412
 
249
- # TODO add width?
250
413
  def to_s
251
- "#{self.class.name}(#{inner})"
414
+ "#{self.class.name}(#{inner}, width: #{width.inspect})"
252
415
  end
253
416
  end
254
417
 
@@ -1066,9 +1066,10 @@ module Polars
1066
1066
  # # │ 2020-04-01 00:00:00 ┆ 31 │
1067
1067
  # # │ 2020-05-01 00:00:00 ┆ 30 │
1068
1068
  # # └─────────────────────┴───────────┘
1069
- def days
1070
- Utils.wrap_expr(_rbexpr.duration_days)
1069
+ def total_days
1070
+ Utils.wrap_expr(_rbexpr.dt_total_days)
1071
1071
  end
1072
+ alias_method :days, :total_days
1072
1073
 
1073
1074
  # Extract the hours from a Duration type.
1074
1075
  #
@@ -1100,9 +1101,10 @@ module Polars
1100
1101
  # # │ 2020-01-03 00:00:00 ┆ 24 │
1101
1102
  # # │ 2020-01-04 00:00:00 ┆ 24 │
1102
1103
  # # └─────────────────────┴────────────┘
1103
- def hours
1104
- Utils.wrap_expr(_rbexpr.duration_hours)
1104
+ def total_hours
1105
+ Utils.wrap_expr(_rbexpr.dt_total_hours)
1105
1106
  end
1107
+ alias_method :hours, :total_hours
1106
1108
 
1107
1109
  # Extract the minutes from a Duration type.
1108
1110
  #
@@ -1134,9 +1136,10 @@ module Polars
1134
1136
  # # │ 2020-01-03 00:00:00 ┆ 1440 │
1135
1137
  # # │ 2020-01-04 00:00:00 ┆ 1440 │
1136
1138
  # # └─────────────────────┴──────────────┘
1137
- def minutes
1138
- Utils.wrap_expr(_rbexpr.duration_minutes)
1139
+ def total_minutes
1140
+ Utils.wrap_expr(_rbexpr.dt_total_minutes)
1139
1141
  end
1142
+ alias_method :minutes, :total_minutes
1140
1143
 
1141
1144
  # Extract the seconds from a Duration type.
1142
1145
  #
@@ -1169,9 +1172,10 @@ module Polars
1169
1172
  # # │ 2020-01-01 00:03:00 ┆ 60 │
1170
1173
  # # │ 2020-01-01 00:04:00 ┆ 60 │
1171
1174
  # # └─────────────────────┴──────────────┘
1172
- def seconds
1173
- Utils.wrap_expr(_rbexpr.duration_seconds)
1175
+ def total_seconds
1176
+ Utils.wrap_expr(_rbexpr.dt_total_seconds)
1174
1177
  end
1178
+ alias_method :seconds, :total_seconds
1175
1179
 
1176
1180
  # Extract the milliseconds from a Duration type.
1177
1181
  #
@@ -1202,15 +1206,18 @@ module Polars
1202
1206
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1203
1207
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1204
1208
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1209
+ # # │ 2020-01-01 00:00:00.004 ┆ 1 │
1205
1210
  # # │ … ┆ … │
1211
+ # # │ 2020-01-01 00:00:00.996 ┆ 1 │
1206
1212
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1207
1213
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1208
1214
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
1209
1215
  # # │ 2020-01-01 00:00:01 ┆ 1 │
1210
1216
  # # └─────────────────────────┴───────────────────┘
1211
- def milliseconds
1212
- Utils.wrap_expr(_rbexpr.duration_milliseconds)
1217
+ def total_milliseconds
1218
+ Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
1213
1219
  end
1220
+ alias_method :milliseconds, :total_milliseconds
1214
1221
 
1215
1222
  # Extract the microseconds from a Duration type.
1216
1223
  #
@@ -1241,15 +1248,18 @@ module Polars
1241
1248
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1242
1249
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1243
1250
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1251
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000 │
1244
1252
  # # │ … ┆ … │
1253
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000 │
1245
1254
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1246
1255
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1247
1256
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
1248
1257
  # # │ 2020-01-01 00:00:01 ┆ 1000 │
1249
1258
  # # └─────────────────────────┴───────────────────┘
1250
- def microseconds
1251
- Utils.wrap_expr(_rbexpr.duration_microseconds)
1259
+ def total_microseconds
1260
+ Utils.wrap_expr(_rbexpr.dt_total_microseconds)
1252
1261
  end
1262
+ alias_method :microseconds, :total_microseconds
1253
1263
 
1254
1264
  # Extract the nanoseconds from a Duration type.
1255
1265
  #
@@ -1280,15 +1290,18 @@ module Polars
1280
1290
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1281
1291
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1282
1292
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1293
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000000 │
1283
1294
  # # │ … ┆ … │
1295
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000000 │
1284
1296
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1285
1297
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1286
1298
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
1287
1299
  # # │ 2020-01-01 00:00:01 ┆ 1000000 │
1288
1300
  # # └─────────────────────────┴──────────────────┘
1289
- def nanoseconds
1290
- Utils.wrap_expr(_rbexpr.duration_nanoseconds)
1301
+ def total_nanoseconds
1302
+ Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
1291
1303
  end
1304
+ alias_method :nanoseconds, :total_nanoseconds
1292
1305
 
1293
1306
  # Offset this date by a relative time offset.
1294
1307
  #
@@ -1372,7 +1385,9 @@ module Polars
1372
1385
  # # │ 2000-02-01 02:00:00 │
1373
1386
  # # │ 2000-03-01 02:00:00 │
1374
1387
  # # │ 2000-04-01 02:00:00 │
1388
+ # # │ 2000-05-01 02:00:00 │
1375
1389
  # # │ … │
1390
+ # # │ 2000-08-01 02:00:00 │
1376
1391
  # # │ 2000-09-01 02:00:00 │
1377
1392
  # # │ 2000-10-01 02:00:00 │
1378
1393
  # # │ 2000-11-01 02:00:00 │
@@ -1408,7 +1423,9 @@ module Polars
1408
1423
  # # │ 2000-02-29 02:00:00 │
1409
1424
  # # │ 2000-03-31 02:00:00 │
1410
1425
  # # │ 2000-04-30 02:00:00 │
1426
+ # # │ 2000-05-31 02:00:00 │
1411
1427
  # # │ … │
1428
+ # # │ 2000-08-31 02:00:00 │
1412
1429
  # # │ 2000-09-30 02:00:00 │
1413
1430
  # # │ 2000-10-31 02:00:00 │
1414
1431
  # # │ 2000-11-30 02:00:00 │
@@ -1,15 +1,26 @@
1
1
  module Polars
2
2
  # @private
3
+ # Base class for all Polars errors.
3
4
  class Error < StandardError; end
4
5
 
5
6
  # @private
7
+ # Exception raised when an unsupported testing assert is made.
8
+ class InvalidAssert < Error; end
9
+
10
+ # @private
11
+ # Exception raised when the number of returned rows does not match expectation.
6
12
  class RowsException < Error; end
7
13
 
8
14
  # @private
15
+ # Exception raised when no rows are returned, but at least one row is expected.
16
+ class NoRowsReturned < RowsException; end
17
+
18
+ # @private
19
+ # Exception raised when more rows than expected are returned.
9
20
  class TooManyRowsReturned < RowsException; end
10
21
 
11
22
  # @private
12
- class NoRowsReturned < RowsException; end
23
+ class AssertionError < Error; end
13
24
 
14
25
  # @private
15
26
  class Todo < Error