polars-df 0.8.0-x86_64-linux → 0.10.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE-THIRD-PARTY.txt +3112 -1613
  6. data/LICENSE.txt +1 -1
  7. data/README.md +3 -2
  8. data/lib/polars/3.1/polars.so +0 -0
  9. data/lib/polars/3.2/polars.so +0 -0
  10. data/lib/polars/3.3/polars.so +0 -0
  11. data/lib/polars/array_expr.rb +453 -0
  12. data/lib/polars/array_name_space.rb +346 -0
  13. data/lib/polars/batched_csv_reader.rb +4 -2
  14. data/lib/polars/cat_expr.rb +24 -0
  15. data/lib/polars/cat_name_space.rb +75 -0
  16. data/lib/polars/config.rb +2 -2
  17. data/lib/polars/data_frame.rb +306 -96
  18. data/lib/polars/data_types.rb +191 -28
  19. data/lib/polars/date_time_expr.rb +41 -18
  20. data/lib/polars/date_time_name_space.rb +9 -3
  21. data/lib/polars/exceptions.rb +12 -1
  22. data/lib/polars/expr.rb +898 -215
  23. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  24. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  25. data/lib/polars/functions/as_datatype.rb +248 -0
  26. data/lib/polars/functions/col.rb +47 -0
  27. data/lib/polars/functions/eager.rb +182 -0
  28. data/lib/polars/functions/lazy.rb +1280 -0
  29. data/lib/polars/functions/len.rb +49 -0
  30. data/lib/polars/functions/lit.rb +35 -0
  31. data/lib/polars/functions/random.rb +16 -0
  32. data/lib/polars/functions/range/date_range.rb +103 -0
  33. data/lib/polars/functions/range/int_range.rb +51 -0
  34. data/lib/polars/functions/repeat.rb +144 -0
  35. data/lib/polars/functions/whenthen.rb +96 -0
  36. data/lib/polars/functions.rb +29 -416
  37. data/lib/polars/group_by.rb +2 -2
  38. data/lib/polars/io.rb +36 -31
  39. data/lib/polars/lazy_frame.rb +405 -88
  40. data/lib/polars/list_expr.rb +158 -8
  41. data/lib/polars/list_name_space.rb +102 -0
  42. data/lib/polars/meta_expr.rb +175 -7
  43. data/lib/polars/series.rb +282 -41
  44. data/lib/polars/string_cache.rb +75 -0
  45. data/lib/polars/string_expr.rb +413 -96
  46. data/lib/polars/string_name_space.rb +4 -4
  47. data/lib/polars/testing.rb +507 -0
  48. data/lib/polars/utils.rb +106 -8
  49. data/lib/polars/version.rb +1 -1
  50. data/lib/polars/whenthen.rb +83 -0
  51. data/lib/polars.rb +16 -4
  52. metadata +34 -6
  53. data/lib/polars/lazy_functions.rb +0 -1181
  54. data/lib/polars/when.rb +0 -16
  55. data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,122 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ # Return this DataType's fundamental/root type class.
5
+ #
6
+ # @return [Class]
7
+ #
8
+ # @example
9
+ # Polars::Datetime.new("ns").base_type
10
+ # # => Polars::Datetime
11
+ # @example
12
+ # Polars::List.new(Polars::Int32).base_type
13
+ # # => Polars::List
14
+ # @example
15
+ # Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)]).base_type
16
+ # # => Polars::Struct
4
17
  def self.base_type
5
18
  self
6
19
  end
7
20
 
21
+ # Return this DataType's fundamental/root type class.
22
+ #
23
+ # @return [Class]
8
24
  def base_type
9
25
  is_a?(DataType) ? self.class : self
10
26
  end
11
27
 
28
+ # Check if this DataType is the same as another DataType.
29
+ #
30
+ # @return [Boolean]
31
+ def self.==(other)
32
+ eql?(other) || other.is_a?(self)
33
+ end
34
+
35
+ # Check if this DataType is the same as another DataType.
36
+ #
37
+ # @return [Boolean]
38
+ def ==(other)
39
+ if other.is_a?(Class)
40
+ is_a?(other)
41
+ else
42
+ other.instance_of?(self.class)
43
+ end
44
+ end
45
+
46
+ # Check whether the data type is a numeric type.
47
+ #
48
+ # @return [Boolean]
49
+ def self.numeric?
50
+ self < NumericType
51
+ end
52
+
53
+ # Check whether the data type is a decimal type.
54
+ #
55
+ # @return [Boolean]
56
+ def self.decimal?
57
+ self == Decimal
58
+ end
59
+
60
+ # Check whether the data type is an integer type.
61
+ #
62
+ # @return [Boolean]
63
+ def self.integer?
64
+ self < IntegerType
65
+ end
66
+
67
+ # Check whether the data type is a signed integer type.
68
+ #
69
+ # @return [Boolean]
70
+ def self.signed_integer?
71
+ self < SignedIntegerType
72
+ end
73
+
74
+ # Check whether the data type is an unsigned integer type.
75
+ #
76
+ # @return [Boolean]
77
+ def self.unsigned_integer?
78
+ self < UnsignedIntegerType
79
+ end
80
+
81
+ # Check whether the data type is a float type.
82
+ #
83
+ # @return [Boolean]
84
+ def self.float?
85
+ self < FloatType
86
+ end
87
+
88
+ # Check whether the data type is a temporal type.
89
+ #
90
+ # @return [Boolean]
91
+ def self.temporal?
92
+ self < TemporalType
93
+ end
94
+
95
+ # Check whether the data type is a nested type.
96
+ #
97
+ # @return [Boolean]
12
98
  def self.nested?
13
- false
99
+ self < NestedType
14
100
  end
15
101
 
16
- def nested?
17
- self.class.nested?
102
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
103
+ define_method(v) do
104
+ self.class.public_send(v)
105
+ end
18
106
  end
19
107
 
20
- def self.==(other)
21
- eql?(other) || other.is_a?(self)
108
+ # Returns a string representing the data type.
109
+ #
110
+ # @return [String]
111
+ def to_s
112
+ self.class.name
113
+ end
114
+
115
+ # Returns a string representing the data type.
116
+ #
117
+ # @return [String]
118
+ def inspect
119
+ to_s
22
120
  end
23
121
  end
24
122
 
@@ -27,15 +125,22 @@ module Polars
27
125
  end
28
126
 
29
127
  # Base class for integral data types.
30
- class IntegralType < NumericType
128
+ class IntegerType < NumericType
31
129
  end
32
130
 
33
- # Base class for fractional data types.
34
- class FractionalType < NumericType
131
+ # @private
132
+ IntegralType = IntegerType
133
+
134
+ # Base class for signed integer data types.
135
+ class SignedIntegerType < IntegerType
136
+ end
137
+
138
+ # Base class for unsigned integer data types.
139
+ class UnsignedIntegerType < IntegerType
35
140
  end
36
141
 
37
142
  # Base class for float data types.
38
- class FloatType < FractionalType
143
+ class FloatType < NumericType
39
144
  end
40
145
 
41
146
  # Base class for temporal data types.
@@ -44,41 +149,38 @@ module Polars
44
149
 
45
150
  # Base class for nested data types.
46
151
  class NestedType < DataType
47
- def self.nested?
48
- true
49
- end
50
152
  end
51
153
 
52
154
  # 8-bit signed integer type.
53
- class Int8 < IntegralType
155
+ class Int8 < SignedIntegerType
54
156
  end
55
157
 
56
158
  # 16-bit signed integer type.
57
- class Int16 < IntegralType
159
+ class Int16 < SignedIntegerType
58
160
  end
59
161
 
60
162
  # 32-bit signed integer type.
61
- class Int32 < IntegralType
163
+ class Int32 < SignedIntegerType
62
164
  end
63
165
 
64
166
  # 64-bit signed integer type.
65
- class Int64 < IntegralType
167
+ class Int64 < SignedIntegerType
66
168
  end
67
169
 
68
170
  # 8-bit unsigned integer type.
69
- class UInt8 < IntegralType
171
+ class UInt8 < UnsignedIntegerType
70
172
  end
71
173
 
72
174
  # 16-bit unsigned integer type.
73
- class UInt16 < IntegralType
175
+ class UInt16 < UnsignedIntegerType
74
176
  end
75
177
 
76
178
  # 32-bit unsigned integer type.
77
- class UInt32 < IntegralType
179
+ class UInt32 < UnsignedIntegerType
78
180
  end
79
181
 
80
182
  # 64-bit unsigned integer type.
81
- class UInt64 < IntegralType
183
+ class UInt64 < UnsignedIntegerType
82
184
  end
83
185
 
84
186
  # 32-bit floating point type.
@@ -92,7 +194,7 @@ module Polars
92
194
  # Decimal 128-bit type with an optional precision and non-negative scale.
93
195
  #
94
196
  # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
- class Decimal < FractionalType
197
+ class Decimal < NumericType
96
198
  attr_reader :precision, :scale
97
199
 
98
200
  def initialize(precision, scale)
@@ -123,6 +225,7 @@ module Polars
123
225
  class String < DataType
124
226
  end
125
227
 
228
+ # @private
126
229
  # Allow Utf8 as an alias for String
127
230
  Utf8 = String
128
231
 
@@ -189,6 +292,59 @@ module Polars
189
292
 
190
293
  # A categorical encoding of a set of strings.
191
294
  class Categorical < DataType
295
+ def initialize(ordering = "physical")
296
+ @ordering = ordering
297
+ end
298
+ end
299
+
300
+ # A fixed set categorical encoding of a set of strings.
301
+ #
302
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
303
+ class Enum < DataType
304
+ attr_reader :categories
305
+
306
+ def initialize(categories)
307
+ if !categories.is_a?(Series)
308
+ categories = Series.new(categories)
309
+ end
310
+
311
+ if categories.empty?
312
+ self.categories = Series.new("category", [], dtype: String)
313
+ return
314
+ end
315
+
316
+ if categories.null_count > 0
317
+ msg = "Enum categories must not contain null values"
318
+ raise TypeError, msg
319
+ end
320
+
321
+ if (dtype = categories.dtype) != String
322
+ msg = "Enum categories must be strings; found data of type #{dtype}"
323
+ raise TypeError, msg
324
+ end
325
+
326
+ if categories.n_unique != categories.len
327
+ duplicate = categories.filter(categories.is_duplicated)[0]
328
+ msg = "Enum categories must be unique; found duplicate #{duplicate}"
329
+ raise ArgumentError, msg
330
+ end
331
+
332
+ @categories = categories.rechunk.alias("category")
333
+ end
334
+
335
+ def ==(other)
336
+ if other.eql?(Enum)
337
+ true
338
+ elsif other.is_a?(Enum)
339
+ categories == other.categories
340
+ else
341
+ false
342
+ end
343
+ end
344
+
345
+ def to_s
346
+ "#{self.class.name}(categories: #{categories.to_a.inspect})"
347
+ end
192
348
  end
193
349
 
194
350
  # Type for wrapping arbitrary Ruby objects.
@@ -228,27 +384,34 @@ module Polars
228
384
 
229
385
  # Nested list/array type.
230
386
  class Array < NestedType
231
- attr_reader :width, :inner
387
+ attr_reader :inner, :width
232
388
 
233
- def initialize(width, inner = nil)
234
- @width = width
389
+ def initialize(inner, width)
390
+ if width.is_a?(DataType) || (width.is_a?(Class) && width < DataType)
391
+ inner, width = width, inner
392
+ end
235
393
  @inner = Utils.rb_type_to_dtype(inner) if inner
394
+ @width = width
236
395
  end
237
396
 
238
- # TODO check width?
239
397
  def ==(other)
240
398
  if other.eql?(Array)
241
399
  true
242
400
  elsif other.is_a?(Array)
243
- @inner.nil? || other.inner.nil? || @inner == other.inner
401
+ if @width != other.width
402
+ false
403
+ elsif @inner.nil? || other.inner.nil?
404
+ true
405
+ else
406
+ @inner == other.inner
407
+ end
244
408
  else
245
409
  false
246
410
  end
247
411
  end
248
412
 
249
- # TODO add width?
250
413
  def to_s
251
- "#{self.class.name}(#{inner})"
414
+ "#{self.class.name}(#{inner}, width: #{width.inspect})"
252
415
  end
253
416
  end
254
417
 
@@ -1027,14 +1027,20 @@ module Polars
1027
1027
  # Different from `convert_time_zone`, this will also modify
1028
1028
  # the underlying timestamp,
1029
1029
  #
1030
- # @param tz [String]
1031
- # Time zone for the `Datetime` Series.
1030
+ # @param time_zone [String]
1031
+ # Time zone for the `Datetime` Series. Pass `nil` to unset time zone.
1032
+ # @param use_earliest [Boolean]
1033
+ # Determine how to deal with ambiguous datetimes.
1034
+ # @param ambiguous [String]
1035
+ # Determine how to deal with ambiguous datetimes.
1036
+ # @param non_existent [String]
1037
+ # Determine how to deal with non-existent datetimes.
1032
1038
  #
1033
1039
  # @return [Expr]
1034
- def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
1040
+ def replace_time_zone(time_zone, use_earliest: nil, ambiguous: "raise", non_existent: "raise")
1035
1041
  ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
1036
1042
  ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
1037
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
1043
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(time_zone, ambiguous._rbexpr, non_existent))
1038
1044
  end
1039
1045
 
1040
1046
  # Extract the days from a Duration type.
@@ -1066,9 +1072,10 @@ module Polars
1066
1072
  # # │ 2020-04-01 00:00:00 ┆ 31 │
1067
1073
  # # │ 2020-05-01 00:00:00 ┆ 30 │
1068
1074
  # # └─────────────────────┴───────────┘
1069
- def days
1070
- Utils.wrap_expr(_rbexpr.duration_days)
1075
+ def total_days
1076
+ Utils.wrap_expr(_rbexpr.dt_total_days)
1071
1077
  end
1078
+ alias_method :days, :total_days
1072
1079
 
1073
1080
  # Extract the hours from a Duration type.
1074
1081
  #
@@ -1100,9 +1107,10 @@ module Polars
1100
1107
  # # │ 2020-01-03 00:00:00 ┆ 24 │
1101
1108
  # # │ 2020-01-04 00:00:00 ┆ 24 │
1102
1109
  # # └─────────────────────┴────────────┘
1103
- def hours
1104
- Utils.wrap_expr(_rbexpr.duration_hours)
1110
+ def total_hours
1111
+ Utils.wrap_expr(_rbexpr.dt_total_hours)
1105
1112
  end
1113
+ alias_method :hours, :total_hours
1106
1114
 
1107
1115
  # Extract the minutes from a Duration type.
1108
1116
  #
@@ -1134,9 +1142,10 @@ module Polars
1134
1142
  # # │ 2020-01-03 00:00:00 ┆ 1440 │
1135
1143
  # # │ 2020-01-04 00:00:00 ┆ 1440 │
1136
1144
  # # └─────────────────────┴──────────────┘
1137
- def minutes
1138
- Utils.wrap_expr(_rbexpr.duration_minutes)
1145
+ def total_minutes
1146
+ Utils.wrap_expr(_rbexpr.dt_total_minutes)
1139
1147
  end
1148
+ alias_method :minutes, :total_minutes
1140
1149
 
1141
1150
  # Extract the seconds from a Duration type.
1142
1151
  #
@@ -1169,9 +1178,10 @@ module Polars
1169
1178
  # # │ 2020-01-01 00:03:00 ┆ 60 │
1170
1179
  # # │ 2020-01-01 00:04:00 ┆ 60 │
1171
1180
  # # └─────────────────────┴──────────────┘
1172
- def seconds
1173
- Utils.wrap_expr(_rbexpr.duration_seconds)
1181
+ def total_seconds
1182
+ Utils.wrap_expr(_rbexpr.dt_total_seconds)
1174
1183
  end
1184
+ alias_method :seconds, :total_seconds
1175
1185
 
1176
1186
  # Extract the milliseconds from a Duration type.
1177
1187
  #
@@ -1202,15 +1212,18 @@ module Polars
1202
1212
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1203
1213
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1204
1214
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1215
+ # # │ 2020-01-01 00:00:00.004 ┆ 1 │
1205
1216
  # # │ … ┆ … │
1217
+ # # │ 2020-01-01 00:00:00.996 ┆ 1 │
1206
1218
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1207
1219
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1208
1220
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
1209
1221
  # # │ 2020-01-01 00:00:01 ┆ 1 │
1210
1222
  # # └─────────────────────────┴───────────────────┘
1211
- def milliseconds
1212
- Utils.wrap_expr(_rbexpr.duration_milliseconds)
1223
+ def total_milliseconds
1224
+ Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
1213
1225
  end
1226
+ alias_method :milliseconds, :total_milliseconds
1214
1227
 
1215
1228
  # Extract the microseconds from a Duration type.
1216
1229
  #
@@ -1241,15 +1254,18 @@ module Polars
1241
1254
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1242
1255
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1243
1256
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1257
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000 │
1244
1258
  # # │ … ┆ … │
1259
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000 │
1245
1260
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1246
1261
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1247
1262
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
1248
1263
  # # │ 2020-01-01 00:00:01 ┆ 1000 │
1249
1264
  # # └─────────────────────────┴───────────────────┘
1250
- def microseconds
1251
- Utils.wrap_expr(_rbexpr.duration_microseconds)
1265
+ def total_microseconds
1266
+ Utils.wrap_expr(_rbexpr.dt_total_microseconds)
1252
1267
  end
1268
+ alias_method :microseconds, :total_microseconds
1253
1269
 
1254
1270
  # Extract the nanoseconds from a Duration type.
1255
1271
  #
@@ -1280,15 +1296,18 @@ module Polars
1280
1296
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1281
1297
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1282
1298
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1299
+ # # │ 2020-01-01 00:00:00.004 ┆ 1000000 │
1283
1300
  # # │ … ┆ … │
1301
+ # # │ 2020-01-01 00:00:00.996 ┆ 1000000 │
1284
1302
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1285
1303
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1286
1304
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
1287
1305
  # # │ 2020-01-01 00:00:01 ┆ 1000000 │
1288
1306
  # # └─────────────────────────┴──────────────────┘
1289
- def nanoseconds
1290
- Utils.wrap_expr(_rbexpr.duration_nanoseconds)
1307
+ def total_nanoseconds
1308
+ Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
1291
1309
  end
1310
+ alias_method :nanoseconds, :total_nanoseconds
1292
1311
 
1293
1312
  # Offset this date by a relative time offset.
1294
1313
  #
@@ -1372,7 +1391,9 @@ module Polars
1372
1391
  # # │ 2000-02-01 02:00:00 │
1373
1392
  # # │ 2000-03-01 02:00:00 │
1374
1393
  # # │ 2000-04-01 02:00:00 │
1394
+ # # │ 2000-05-01 02:00:00 │
1375
1395
  # # │ … │
1396
+ # # │ 2000-08-01 02:00:00 │
1376
1397
  # # │ 2000-09-01 02:00:00 │
1377
1398
  # # │ 2000-10-01 02:00:00 │
1378
1399
  # # │ 2000-11-01 02:00:00 │
@@ -1408,7 +1429,9 @@ module Polars
1408
1429
  # # │ 2000-02-29 02:00:00 │
1409
1430
  # # │ 2000-03-31 02:00:00 │
1410
1431
  # # │ 2000-04-30 02:00:00 │
1432
+ # # │ 2000-05-31 02:00:00 │
1411
1433
  # # │ … │
1434
+ # # │ 2000-08-31 02:00:00 │
1412
1435
  # # │ 2000-09-30 02:00:00 │
1413
1436
  # # │ 2000-10-31 02:00:00 │
1414
1437
  # # │ 2000-11-30 02:00:00 │
@@ -910,8 +910,14 @@ module Polars
910
910
  # Different from `with_time_zone`, this will also modify
911
911
  # the underlying timestamp.
912
912
  #
913
- # @param tz [String]
914
- # Time zone for the `Datetime` Series.
913
+ # @param time_zone [String]
914
+ # Time zone for the `Datetime` Series. Pass `nil` to unset time zone.
915
+ # @param use_earliest [Boolean]
916
+ # Determine how to deal with ambiguous datetimes.
917
+ # @param ambiguous [String]
918
+ # Determine how to deal with ambiguous datetimes.
919
+ # @param non_existent [String]
920
+ # Determine how to deal with non-existent datetimes.
915
921
  #
916
922
  # @return [Series]
917
923
  #
@@ -982,7 +988,7 @@ module Polars
982
988
  # # 1585717200
983
989
  # # 1588309200
984
990
  # # ]
985
- def replace_time_zone(tz)
991
+ def replace_time_zone(time_zone, use_earliest: nil, ambiguous: "raise", non_existent: "raise")
986
992
  super
987
993
  end
988
994
 
@@ -1,15 +1,26 @@
1
1
  module Polars
2
2
  # @private
3
+ # Base class for all Polars errors.
3
4
  class Error < StandardError; end
4
5
 
5
6
  # @private
7
+ # Exception raised when an unsupported testing assert is made.
8
+ class InvalidAssert < Error; end
9
+
10
+ # @private
11
+ # Exception raised when the number of returned rows does not match expectation.
6
12
  class RowsException < Error; end
7
13
 
8
14
  # @private
15
+ # Exception raised when no rows are returned, but at least one row is expected.
16
+ class NoRowsReturned < RowsException; end
17
+
18
+ # @private
19
+ # Exception raised when more rows than expected are returned.
9
20
  class TooManyRowsReturned < RowsException; end
10
21
 
11
22
  # @private
12
- class NoRowsReturned < RowsException; end
23
+ class AssertionError < Error; end
13
24
 
14
25
  # @private
15
26
  class Todo < Error