polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,122 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ # Return this DataType's fundamental/root type class.
5
+ #
6
+ # @return [Class]
7
+ #
8
+ # @example
9
+ # Polars::Datetime.new("ns").base_type
10
+ # # => Polars::Datetime
11
+ # @example
12
+ # Polars::List.new(Polars::Int32).base_type
13
+ # # => Polars::List
14
+ # @example
15
+ # Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
16
+ # # => Polars::Struct
4
17
  def self.base_type
5
18
  self
6
19
  end
7
20
 
21
+ # Return this DataType's fundamental/root type class.
22
+ #
23
+ # @return [Class]
8
24
  def base_type
9
25
  is_a?(DataType) ? self.class : self
10
26
  end
11
27
 
28
+ # Check if this DataType is the same as another DataType.
29
+ #
30
+ # @return [Boolean]
31
+ def self.==(other)
32
+ eql?(other) || other.is_a?(self)
33
+ end
34
+
35
+ # Check if this DataType is the same as another DataType.
36
+ #
37
+ # @return [Boolean]
38
+ def ==(other)
39
+ if other.is_a?(Class)
40
+ is_a?(other)
41
+ else
42
+ other.instance_of?(self.class)
43
+ end
44
+ end
45
+
46
+ # Check whether the data type is a numeric type.
47
+ #
48
+ # @return [Boolean]
49
+ def self.numeric?
50
+ self < NumericType
51
+ end
52
+
53
+ # Check whether the data type is a decimal type.
54
+ #
55
+ # @return [Boolean]
56
+ def self.decimal?
57
+ self == Decimal
58
+ end
59
+
60
+ # Check whether the data type is an integer type.
61
+ #
62
+ # @return [Boolean]
63
+ def self.integer?
64
+ self < IntegerType
65
+ end
66
+
67
+ # Check whether the data type is a signed integer type.
68
+ #
69
+ # @return [Boolean]
70
+ def self.signed_integer?
71
+ self < SignedIntegerType
72
+ end
73
+
74
+ # Check whether the data type is an unsigned integer type.
75
+ #
76
+ # @return [Boolean]
77
+ def self.unsigned_integer?
78
+ self < UnsignedIntegerType
79
+ end
80
+
81
+ # Check whether the data type is a float type.
82
+ #
83
+ # @return [Boolean]
84
+ def self.float?
85
+ self < FloatType
86
+ end
87
+
88
+ # Check whether the data type is a temporal type.
89
+ #
90
+ # @return [Boolean]
91
+ def self.temporal?
92
+ self < TemporalType
93
+ end
94
+
95
+ # Check whether the data type is a nested type.
96
+ #
97
+ # @return [Boolean]
12
98
  def self.nested?
13
- false
99
+ self < NestedType
14
100
  end
15
101
 
16
- def nested?
17
- self.class.nested?
102
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
+ define_method(v) do
104
+ self.class.public_send(v)
105
+ end
18
106
  end
19
107
 
20
- def self.==(other)
21
- eql?(other) || other.is_a?(self)
108
+ # Returns a string representing the data type.
109
+ #
110
+ # @return [String]
111
+ def to_s
112
+ self.class.name
113
+ end
114
+
115
+ # Returns a string representing the data type.
116
+ #
117
+ # @return [String]
118
+ def inspect
119
+ to_s
22
120
  end
23
121
  end
24
122
 
@@ -27,15 +125,22 @@ module Polars
27
125
  end
28
126
 
29
127
  # Base class for integral data types.
30
- class IntegralType < NumericType
128
+ class IntegerType < NumericType
31
129
  end
32
130
 
33
- # Base class for fractional data types.
34
- class FractionalType < NumericType
131
+ # @private
132
+ IntegralType = IntegerType
133
+
134
+ # Base class for signed integer data types.
135
+ class SignedIntegerType < IntegerType
136
+ end
137
+
138
+ # Base class for unsigned integer data types.
139
+ class UnsignedIntegerType < IntegerType
35
140
  end
36
141
 
37
142
  # Base class for float data types.
38
- class FloatType < FractionalType
143
+ class FloatType < NumericType
39
144
  end
40
145
 
41
146
  # Base class for temporal data types.
@@ -44,41 +149,38 @@ module Polars
44
149
 
45
150
  # Base class for nested data types.
46
151
  class NestedType < DataType
47
- def self.nested?
48
- true
49
- end
50
152
  end
51
153
 
52
154
  # 8-bit signed integer type.
53
- class Int8 < IntegralType
155
+ class Int8 < SignedIntegerType
54
156
  end
55
157
 
56
158
  # 16-bit signed integer type.
57
- class Int16 < IntegralType
159
+ class Int16 < SignedIntegerType
58
160
  end
59
161
 
60
162
  # 32-bit signed integer type.
61
- class Int32 < IntegralType
163
+ class Int32 < SignedIntegerType
62
164
  end
63
165
 
64
166
  # 64-bit signed integer type.
65
- class Int64 < IntegralType
167
+ class Int64 < SignedIntegerType
66
168
  end
67
169
 
68
170
  # 8-bit unsigned integer type.
69
- class UInt8 < IntegralType
171
+ class UInt8 < UnsignedIntegerType
70
172
  end
71
173
 
72
174
  # 16-bit unsigned integer type.
73
- class UInt16 < IntegralType
175
+ class UInt16 < UnsignedIntegerType
74
176
  end
75
177
 
76
178
  # 32-bit unsigned integer type.
77
- class UInt32 < IntegralType
179
+ class UInt32 < UnsignedIntegerType
78
180
  end
79
181
 
80
182
  # 64-bit unsigned integer type.
81
- class UInt64 < IntegralType
183
+ class UInt64 < UnsignedIntegerType
82
184
  end
83
185
 
84
186
  # 32-bit floating point type.
@@ -92,7 +194,7 @@ module Polars
92
194
  # Decimal 128-bit type with an optional precision and non-negative scale.
93
195
  #
94
196
  # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
- class Decimal < FractionalType
197
+ class Decimal < NumericType
96
198
  attr_reader :precision, :scale
97
199
 
98
200
  def initialize(precision, scale)
@@ -123,6 +225,7 @@ module Polars
123
225
  class String < DataType
124
226
  end
125
227
 
228
+ # @private
126
229
  # Allow Utf8 as an alias for String
127
230
  Utf8 = String
128
231
 
@@ -189,6 +292,59 @@ module Polars
189
292
 
190
293
  # A categorical encoding of a set of strings.
191
294
  class Categorical < DataType
295
+ def initialize(ordering = "physical")
296
+ @ordering = ordering
297
+ end
298
+ end
299
+
300
+ # A fixed set categorical encoding of a set of strings.
301
+ #
302
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
303
+ class Enum < DataType
304
+ attr_reader :categories
305
+
306
+ def initialize(categories)
307
+ if !categories.is_a?(Series)
308
+ categories = Series.new(categories)
309
+ end
310
+
311
+ if categories.empty?
312
+ self.categories = Series.new("category", [], dtype: String)
313
+ return
314
+ end
315
+
316
+ if categories.null_count > 0
317
+ msg = "Enum categories must not contain null values"
318
+ raise TypeError, msg
319
+ end
320
+
321
+ if (dtype = categories.dtype) != String
322
+ msg = "Enum categories must be strings; found data of type #{dtype}"
323
+ raise TypeError, msg
324
+ end
325
+
326
+ if categories.n_unique != categories.len
327
+ duplicate = categories.filter(categories.is_duplicated)[0]
328
+ msg = "Enum categories must be unique; found duplicate #{duplicate}"
329
+ raise ArgumentError, msg
330
+ end
331
+
332
+ @categories = categories.rechunk.alias("category")
333
+ end
334
+
335
+ def ==(other)
336
+ if other.eql?(Enum)
337
+ true
338
+ elsif other.is_a?(Enum)
339
+ categories == other.categories
340
+ else
341
+ false
342
+ end
343
+ end
344
+
345
+ def to_s
346
+ "#{self.class.name}(categories: #{categories.to_a.inspect})"
347
+ end
192
348
  end
193
349
 
194
350
  # Type for wrapping arbitrary Ruby objects.
@@ -228,27 +384,34 @@ module Polars
228
384
 
229
385
  # Nested list/array type.
230
386
  class Array < NestedType
231
- attr_reader :width, :inner
387
+ attr_reader :inner, :width
232
388
 
233
- def initialize(width, inner = nil)
234
- @width = width
389
+ def initialize(inner, width)
390
+ if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
391
+ inner, width = width, inner
392
+ end
235
393
  @inner = Utils.rb_type_to_dtype(inner) if inner
394
+ @width = width
236
395
  end
237
396
 
238
- # TODO check width?
239
397
  def ==(other)
240
398
  if other.eql?(Array)
241
399
  true
242
400
  elsif other.is_a?(Array)
243
- @inner.nil? || other.inner.nil? || @inner == other.inner
401
+ if @width != other.width
402
+ false
403
+ elsif @inner.nil? || other.inner.nil?
404
+ true
405
+ else
406
+ @inner == other.inner
407
+ end
244
408
  else
245
409
  false
246
410
  end
247
411
  end
248
412
 
249
- # TODO add width?
250
413
  def to_s
251
- "#{self.class.name}(#{inner})"
414
+ "#{self.class.name}(#{inner}, width: #{width.inspect})"
252
415
  end
253
416
  end
254
417
 
@@ -1027,14 +1027,20 @@ module Polars
1027
1027
  # Different from `convert_time_zone`, this will also modify
1028
1028
  # the underlying timestamp,
1029
1029
  #
1030
- # @param tz [String]
1031
- # Time zone for the `Datetime` Series.
1030
+ # @param time_zone [String]
1031
+ # Time zone for the `Datetime` Series. Pass `nil` to unset time zone.
1032
+ # @param use_earliest [Boolean]
1033
+ # Determine how to deal with ambiguous datetimes.
1034
+ # @param ambiguous [String]
1035
+ # Determine how to deal with ambiguous datetimes.
1036
+ # @param non_existent [String]
1037
+ # Determine how to deal with non-existent datetimes.
1032
1038
  #
1033
1039
  # @return [Expr]
1034
- def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
1040
+ def replace_time_zone(time_zone, use_earliest: nil, ambiguous: "raise", non_existent: "raise")
1035
1041
  ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
1036
1042
  ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
1037
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
1043
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(time_zone, ambiguous._rbexpr, non_existent))
1038
1044
  end
1039
1045
 
1040
1046
  # Extract the days from a Duration type.
@@ -1066,9 +1072,10 @@ module Polars
1066
1072
  # # │ 2020-04-01 00:00:00 ┆ 31 │
1067
1073
  # # │ 2020-05-01 00:00:00 ┆ 30 │
1068
1074
  # # └─────────────────────┴───────────┘
1069
- def days
1070
- Utils.wrap_expr(_rbexpr.duration_days)
1075
+ def total_days
1076
+ Utils.wrap_expr(_rbexpr.dt_total_days)
1071
1077
  end
1078
+ alias_method :days, :total_days
1072
1079
 
1073
1080
  # Extract the hours from a Duration type.
1074
1081
  #
@@ -1100,9 +1107,10 @@ module Polars
1100
1107
  # # │ 2020-01-03 00:00:00 ┆ 24 │
1101
1108
  # # │ 2020-01-04 00:00:00 ┆ 24 │
1102
1109
  # # └─────────────────────┴────────────┘
1103
- def hours
1104
- Utils.wrap_expr(_rbexpr.duration_hours)
1110
+ def total_hours
1111
+ Utils.wrap_expr(_rbexpr.dt_total_hours)
1105
1112
  end
1113
+ alias_method :hours, :total_hours
1106
1114
 
1107
1115
  # Extract the minutes from a Duration type.
1108
1116
  #
@@ -1134,9 +1142,10 @@ module Polars
1134
1142
  # # │ 2020-01-03 00:00:00 ┆ 1440 │
1135
1143
  # # │ 2020-01-04 00:00:00 ┆ 1440 │
1136
1144
  # # └─────────────────────┴──────────────┘
1137
- def minutes
1138
- Utils.wrap_expr(_rbexpr.duration_minutes)
1145
+ def total_minutes
1146
+ Utils.wrap_expr(_rbexpr.dt_total_minutes)
1139
1147
  end
1148
+ alias_method :minutes, :total_minutes
1140
1149
 
1141
1150
  # Extract the seconds from a Duration type.
1142
1151
  #
@@ -1169,9 +1178,10 @@ module Polars
1169
1178
  # # │ 2020-01-01 00:03:00 ┆ 60 │
1170
1179
  # # │ 2020-01-01 00:04:00 ┆ 60 │
1171
1180
  # # └─────────────────────┴──────────────┘
1172
- def seconds
1173
- Utils.wrap_expr(_rbexpr.duration_seconds)
1181
+ def total_seconds
1182
+ Utils.wrap_expr(_rbexpr.dt_total_seconds)
1174
1183
  end
1184
+ alias_method :seconds, :total_seconds
1175
1185
 
1176
1186
  # Extract the milliseconds from a Duration type.
1177
1187
  #
@@ -1202,15 +1212,18 @@ module Polars
1202
1212
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1203
1213
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1204
1214
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1215
+ # # │ 2020-01-01 00:00:00.004 ┆ 1 │
1205
1216
  # # │ … ┆ … │
1217
+ # # │ 2020-01-01 00:00:00.996 ┆ 1 │
1206
1218
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1207
1219
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1208
1220
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
1209
1221
  # # │ 2020-01-01 00:00:01 ┆ 1 │
1210
1222
  # # └─────────────────────────┴───────────────────┘
1211
- def milliseconds
1212
- Utils.wrap_expr(_rbexpr.duration_milliseconds)
1223
+ def total_milliseconds
1224
+ Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
1213
1225
  end
1226
+ alias_method :milliseconds, :total_milliseconds
1214
1227
 
1215
1228
  # Extract the microseconds from a Duration type.
1216
1229
  #
@@ -1241,15 +1254,18 @@ module Polars
1241
1254
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1242
1255
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1243
1256
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1257
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000 │
1244
1258
  # # │ … ┆ … │
1259
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000 │
1245
1260
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1246
1261
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1247
1262
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
1248
1263
  # # │ 2020-01-01 00:00:01 ┆ 1000 │
1249
1264
  # # └─────────────────────────┴───────────────────┘
1250
- def microseconds
1251
- Utils.wrap_expr(_rbexpr.duration_microseconds)
1265
+ def total_microseconds
1266
+ Utils.wrap_expr(_rbexpr.dt_total_microseconds)
1252
1267
  end
1268
+ alias_method :microseconds, :total_microseconds
1253
1269
 
1254
1270
  # Extract the nanoseconds from a Duration type.
1255
1271
  #
@@ -1280,15 +1296,18 @@ module Polars
1280
1296
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1281
1297
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1282
1298
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1299
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000000 │
1283
1300
  # # │ … ┆ … │
1301
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000000 │
1284
1302
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1285
1303
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1286
1304
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
1287
1305
  # # │ 2020-01-01 00:00:01 ┆ 1000000 │
1288
1306
  # # └─────────────────────────┴──────────────────┘
1289
- def nanoseconds
1290
- Utils.wrap_expr(_rbexpr.duration_nanoseconds)
1307
+ def total_nanoseconds
1308
+ Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
1291
1309
  end
1310
+ alias_method :nanoseconds, :total_nanoseconds
1292
1311
 
1293
1312
  # Offset this date by a relative time offset.
1294
1313
  #
@@ -1372,7 +1391,9 @@ module Polars
1372
1391
  # # │ 2000-02-01 02:00:00 │
1373
1392
  # # │ 2000-03-01 02:00:00 │
1374
1393
  # # │ 2000-04-01 02:00:00 │
1394
+ # # │ 2000-05-01 02:00:00 │
1375
1395
  # # │ … │
1396
+ # # │ 2000-08-01 02:00:00 │
1376
1397
  # # │ 2000-09-01 02:00:00 │
1377
1398
  # # │ 2000-10-01 02:00:00 │
1378
1399
  # # │ 2000-11-01 02:00:00 │
@@ -1408,7 +1429,9 @@ module Polars
1408
1429
  # # │ 2000-02-29 02:00:00 │
1409
1430
  # # │ 2000-03-31 02:00:00 │
1410
1431
  # # │ 2000-04-30 02:00:00 │
1432
+ # # │ 2000-05-31 02:00:00 │
1411
1433
  # # │ … │
1434
+ # # │ 2000-08-31 02:00:00 │
1412
1435
  # # │ 2000-09-30 02:00:00 │
1413
1436
  # # │ 2000-10-31 02:00:00 │
1414
1437
  # # │ 2000-11-30 02:00:00 │
@@ -910,8 +910,14 @@ module Polars
910
910
  # Different from `with_time_zone`, this will also modify
911
911
  # the underlying timestamp.
912
912
  #
913
- # @param tz [String]
914
- # Time zone for the `Datetime` Series.
913
+ # @param time_zone [String]
914
+ # Time zone for the `Datetime` Series. Pass `nil` to unset time zone.
915
+ # @param use_earliest [Boolean]
916
+ # Determine how to deal with ambiguous datetimes.
917
+ # @param ambiguous [String]
918
+ # Determine how to deal with ambiguous datetimes.
919
+ # @param non_existent [String]
920
+ # Determine how to deal with non-existent datetimes.
915
921
  #
916
922
  # @return [Series]
917
923
  #
@@ -982,7 +988,7 @@ module Polars
982
988
  # # 1585717200
983
989
  # # 1588309200
984
990
  # # ]
985
- def replace_time_zone(tz)
991
+ def replace_time_zone(time_zone, use_earliest: nil, ambiguous: "raise", non_existent: "raise")
986
992
  super
987
993
  end
988
994
 
@@ -1,15 +1,26 @@
1
1
  module Polars
2
2
  # @private
3
+ # Base class for all Polars errors.
3
4
  class Error < StandardError; end
4
5
 
5
6
  # @private
7
+ # Exception raised when an unsupported testing assert is made.
8
+ class InvalidAssert < Error; end
9
+
10
+ # @private
11
+ # Exception raised when the number of returned rows does not match expectation.
6
12
  class RowsException < Error; end
7
13
 
8
14
  # @private
15
+ # Exception raised when no rows are returned, but at least one row is expected.
16
+ class NoRowsReturned < RowsException; end
17
+
18
+ # @private
19
+ # Exception raised when more rows than expected are returned.
9
20
  class TooManyRowsReturned < RowsException; end
10
21
 
11
22
  # @private
12
- class NoRowsReturned < RowsException; end
23
+ class AssertionError < Error; end
13
24
 
14
25
  # @private
15
26
  class Todo < Error