polars-df 0.13.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +208 -0
  4. data/Cargo.lock +2556 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +39278 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +104 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +36 -0
  18. data/lib/polars/cat_name_space.rb +88 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +98 -0
  21. data/lib/polars/data_frame.rb +5191 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1397 -0
  24. data/lib/polars/date_time_name_space.rb +1287 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +38 -0
  27. data/lib/polars/expr.rb +7256 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +271 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1329 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +136 -0
  39. data/lib/polars/functions/range/datetime_range.rb +149 -0
  40. data/lib/polars/functions/range/int_range.rb +51 -0
  41. data/lib/polars/functions/range/time_range.rb +141 -0
  42. data/lib/polars/functions/repeat.rb +144 -0
  43. data/lib/polars/functions/whenthen.rb +96 -0
  44. data/lib/polars/functions.rb +57 -0
  45. data/lib/polars/group_by.rb +613 -0
  46. data/lib/polars/io/avro.rb +24 -0
  47. data/lib/polars/io/csv.rb +696 -0
  48. data/lib/polars/io/database.rb +73 -0
  49. data/lib/polars/io/ipc.rb +275 -0
  50. data/lib/polars/io/json.rb +29 -0
  51. data/lib/polars/io/ndjson.rb +80 -0
  52. data/lib/polars/io/parquet.rb +233 -0
  53. data/lib/polars/lazy_frame.rb +2708 -0
  54. data/lib/polars/lazy_group_by.rb +181 -0
  55. data/lib/polars/list_expr.rb +791 -0
  56. data/lib/polars/list_name_space.rb +449 -0
  57. data/lib/polars/meta_expr.rb +222 -0
  58. data/lib/polars/name_expr.rb +198 -0
  59. data/lib/polars/plot.rb +109 -0
  60. data/lib/polars/rolling_group_by.rb +35 -0
  61. data/lib/polars/series.rb +4444 -0
  62. data/lib/polars/slice.rb +104 -0
  63. data/lib/polars/sql_context.rb +194 -0
  64. data/lib/polars/string_cache.rb +75 -0
  65. data/lib/polars/string_expr.rb +1495 -0
  66. data/lib/polars/string_name_space.rb +811 -0
  67. data/lib/polars/struct_expr.rb +98 -0
  68. data/lib/polars/struct_name_space.rb +96 -0
  69. data/lib/polars/testing.rb +507 -0
  70. data/lib/polars/utils/constants.rb +9 -0
  71. data/lib/polars/utils/convert.rb +97 -0
  72. data/lib/polars/utils/parse.rb +89 -0
  73. data/lib/polars/utils/various.rb +76 -0
  74. data/lib/polars/utils/wrap.rb +19 -0
  75. data/lib/polars/utils.rb +130 -0
  76. data/lib/polars/version.rb +4 -0
  77. data/lib/polars/whenthen.rb +83 -0
  78. data/lib/polars-df.rb +1 -0
  79. data/lib/polars.rb +91 -0
  80. metadata +138 -0
@@ -0,0 +1,98 @@
1
+ module Polars
2
+ # Namespace for struct related expressions.
3
+ class StructExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
15
+ def [](item)
16
+ if item.is_a?(::String)
17
+ field(item)
18
+ elsif item.is_a?(Integer)
19
+ Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
20
+ else
21
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
22
+ end
23
+ end
24
+
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # │ b │
55
+ # # └─────┘
56
+ def field(name)
57
+ Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
58
+ end
59
+
60
+ # Rename the fields of the struct.
61
+ #
62
+ # @param names [Array]
63
+ # New names in the order of the struct's fields
64
+ #
65
+ # @return [Expr]
66
+ #
67
+ # @example
68
+ # df = (
69
+ # Polars::DataFrame.new(
70
+ # {
71
+ # "int" => [1, 2],
72
+ # "str" => ["a", "b"],
73
+ # "bool" => [true, nil],
74
+ # "list" => [[1, 2], [3]]
75
+ # }
76
+ # )
77
+ # .to_struct("my_struct")
78
+ # .to_frame
79
+ # )
80
+ # df = df.with_column(
81
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
82
+ # )
83
+ # df.select(Polars.col("my_struct").struct.field("INT"))
84
+ # # =>
85
+ # # shape: (2, 1)
86
+ # # ┌─────┐
87
+ # # │ INT │
88
+ # # │ --- │
89
+ # # │ i64 │
90
+ # # ╞═════╡
91
+ # # │ 1 │
92
+ # # │ 2 │
93
+ # # └─────┘
94
+ def rename_fields(names)
95
+ Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(::String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+
64
+ # Get the struct definition as a name/dtype schema dict.
65
+ #
66
+ # @return [Object]
67
+ def schema
68
+ if _s.nil?
69
+ {}
70
+ else
71
+ _s.dtype.to_schema
72
+ end
73
+ end
74
+
75
+ # Convert this struct Series to a DataFrame with a separate column for each field.
76
+ #
77
+ # @return [DataFrame]
78
+ #
79
+ # @example
80
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
81
+ # s.struct.unnest
82
+ # # =>
83
+ # # shape: (2, 2)
84
+ # # ┌─────┬─────┐
85
+ # # │ a ┆ b │
86
+ # # │ --- ┆ --- │
87
+ # # │ i64 ┆ i64 │
88
+ # # ╞═════╪═════╡
89
+ # # │ 1 ┆ 2 │
90
+ # # │ 3 ┆ 4 │
91
+ # # └─────┴─────┘
92
+ def unnest
93
+ Utils.wrap_df(_s.struct_unnest)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,507 @@
1
+ module Polars
2
+ module Testing
3
+ # Assert that the left and right frame are equal.
4
+ #
5
+ # Raises a detailed `AssertionError` if the frames differ.
6
+ # This function is intended for use in unit tests.
7
+ #
8
+ # @param left [Object]
9
+ # The first DataFrame or LazyFrame to compare.
10
+ # @param right [Object]
11
+ # The second DataFrame or LazyFrame to compare.
12
+ # @param check_row_order [Boolean]
13
+ # Require row order to match.
14
+ # @param check_column_order [Boolean]
15
+ # Require column order to match.
16
+ # @param check_dtype [Boolean]
17
+ # Require data types to match.
18
+ # @param check_exact [Boolean]
19
+ # Require float values to match exactly. If set to `false`, values are considered
20
+ # equal when within tolerance of each other (see `rtol` and `atol`).
21
+ # Only affects columns with a Float data type.
22
+ # @param rtol [Float]
23
+ # Relative tolerance for inexact checking. Fraction of values in `right`.
24
+ # @param atol [Float]
25
+ # Absolute tolerance for inexact checking.
26
+ # @param categorical_as_str [Boolean]
27
+ # Cast categorical columns to string before comparing. Enabling this helps
28
+ # compare columns that do not share the same string cache.
29
+ #
30
+ # @return [nil]
31
+ def assert_frame_equal(
32
+ left,
33
+ right,
34
+ check_row_order: true,
35
+ check_column_order: true,
36
+ check_dtype: true,
37
+ check_exact: false,
38
+ rtol: 1e-5,
39
+ atol: 1e-8,
40
+ categorical_as_str: false
41
+ )
42
+ lazy = _assert_correct_input_type(left, right)
43
+ objects = lazy ? "LazyFrames" : "DataFrames"
44
+
45
+ _assert_frame_schema_equal(
46
+ left,
47
+ right,
48
+ check_column_order: check_column_order,
49
+ check_dtype: check_dtype,
50
+ objects: objects,
51
+ )
52
+
53
+ if lazy
54
+ left, right = left.collect, right.collect
55
+ end
56
+
57
+ if left.height != right.height
58
+ raise_assertion_error(
59
+ objects, "number of rows does not match", left.height, right.height
60
+ )
61
+ end
62
+
63
+ if !check_row_order
64
+ left, right = _sort_dataframes(left, right)
65
+ end
66
+
67
+ left.columns.each do |c|
68
+ s_left, s_right = left.get_column(c), right.get_column(c)
69
+ begin
70
+ _assert_series_values_equal(
71
+ s_left,
72
+ s_right,
73
+ check_exact: check_exact,
74
+ rtol: rtol,
75
+ atol: atol,
76
+ categorical_as_str: categorical_as_str
77
+ )
78
+ rescue AssertionError
79
+ raise_assertion_error(
80
+ objects,
81
+ "value mismatch for column #{c.inspect}",
82
+ s_left.to_a,
83
+ s_right.to_a
84
+ )
85
+ end
86
+ end
87
+ end
88
+
89
+ # Assert that the left and right frame are **not** equal.
90
+ #
91
+ # This function is intended for use in unit tests.
92
+ #
93
+ # @param left [Object]
94
+ # The first DataFrame or LazyFrame to compare.
95
+ # @param right [Object]
96
+ # The second DataFrame or LazyFrame to compare.
97
+ # @param check_row_order [Boolean]
98
+ # Require row order to match.
99
+ # @param check_column_order [Boolean]
100
+ # Require column order to match.
101
+ # @param check_dtype [Boolean]
102
+ # Require data types to match.
103
+ # @param check_exact [Boolean]
104
+ # Require float values to match exactly. If set to `false`, values are considered
105
+ # equal when within tolerance of each other (see `rtol` and `atol`).
106
+ # Only affects columns with a Float data type.
107
+ # @param rtol [Float]
108
+ # Relative tolerance for inexact checking. Fraction of values in `right`.
109
+ # @param atol [Float]
110
+ # Absolute tolerance for inexact checking.
111
+ # @param categorical_as_str [Boolean]
112
+ # Cast categorical columns to string before comparing. Enabling this helps
113
+ # compare columns that do not share the same string cache.
114
+ #
115
+ # @return [nil]
116
+ def assert_frame_not_equal(
117
+ left,
118
+ right,
119
+ check_row_order: true,
120
+ check_column_order: true,
121
+ check_dtype: true,
122
+ check_exact: false,
123
+ rtol: 1e-5,
124
+ atol: 1e-8,
125
+ categorical_as_str: false
126
+ )
127
+ begin
128
+ assert_frame_equal(
129
+ left,
130
+ right,
131
+ check_column_order: check_column_order,
132
+ check_row_order: check_row_order,
133
+ check_dtype: check_dtype,
134
+ check_exact: check_exact,
135
+ rtol: rtol,
136
+ atol: atol,
137
+ categorical_as_str: categorical_as_str
138
+ )
139
+ rescue AssertionError
140
+ return
141
+ end
142
+
143
+ msg = "frames are equal"
144
+ raise AssertionError, msg
145
+ end
146
+
147
+ # Assert that the left and right Series are equal.
148
+ #
149
+ # Raises a detailed `AssertionError` if the Series differ.
150
+ # This function is intended for use in unit tests.
151
+ #
152
+ # @param left [Object]
153
+ # The first Series to compare.
154
+ # @param right [Object]
155
+ # The second Series to compare.
156
+ # @param check_dtype [Boolean]
157
+ # Require data types to match.
158
+ # @param check_names [Boolean]
159
+ # Require names to match.
160
+ # @param check_exact [Boolean]
161
+ # Require float values to match exactly. If set to `false`, values are considered
162
+ # equal when within tolerance of each other (see `rtol` and `atol`).
163
+ # Only affects columns with a Float data type.
164
+ # @param rtol [Float]
165
+ # Relative tolerance for inexact checking, given as a fraction of the values in
166
+ # `right`.
167
+ # @param atol [Float]
168
+ # Absolute tolerance for inexact checking.
169
+ # @param categorical_as_str [Boolean]
170
+ # Cast categorical columns to string before comparing. Enabling this helps
171
+ # compare columns that do not share the same string cache.
172
+ #
173
+ # @return [nil]
174
+ def assert_series_equal(
175
+ left,
176
+ right,
177
+ check_dtype: true,
178
+ check_names: true,
179
+ check_exact: false,
180
+ rtol: 1e-5,
181
+ atol: 1e-8,
182
+ categorical_as_str: false
183
+ )
184
+ if !(left.is_a?(Series) && right.is_a?(Series))
185
+ raise_assertion_error(
186
+ "inputs",
187
+ "unexpected input types",
188
+ left.class.name,
189
+ right.class.name
190
+ )
191
+ end
192
+
193
+ if left.len != right.len
194
+ raise_assertion_error("Series", "length mismatch", left.len, right.len)
195
+ end
196
+
197
+ if check_names && left.name != right.name
198
+ raise_assertion_error("Series", "name mismatch", left.name, right.name)
199
+ end
200
+
201
+ if check_dtype && left.dtype != right.dtype
202
+ raise_assertion_error("Series", "dtype mismatch", left.dtype, right.dtype)
203
+ end
204
+
205
+ _assert_series_values_equal(
206
+ left,
207
+ right,
208
+ check_exact: check_exact,
209
+ rtol: rtol,
210
+ atol: atol,
211
+ categorical_as_str: categorical_as_str
212
+ )
213
+ end
214
+
215
+ # Assert that the left and right Series are **not** equal.
216
+ #
217
+ # This function is intended for use in unit tests.
218
+ #
219
+ # @param left [Object]
220
+ # The first Series to compare.
221
+ # @param right [Object]
222
+ # The second Series to compare.
223
+ # @param check_dtype [Boolean]
224
+ # Require data types to match.
225
+ # @param check_names [Boolean]
226
+ # Require names to match.
227
+ # @param check_exact [Boolean]
228
+ # Require float values to match exactly. If set to `false`, values are considered
229
+ # equal when within tolerance of each other (see `rtol` and `atol`).
230
+ # Only affects columns with a Float data type.
231
+ # @param rtol [Float]
232
+ # Relative tolerance for inexact checking, given as a fraction of the values in
233
+ # `right`.
234
+ # @param atol [Float]
235
+ # Absolute tolerance for inexact checking.
236
+ # @param categorical_as_str [Boolean]
237
+ # Cast categorical columns to string before comparing. Enabling this helps
238
+ # compare columns that do not share the same string cache.
239
+ #
240
+ # @return [nil]
241
+ def assert_series_not_equal(
242
+ left,
243
+ right,
244
+ check_dtype: true,
245
+ check_names: true,
246
+ check_exact: false,
247
+ rtol: 1e-5,
248
+ atol: 1e-8,
249
+ categorical_as_str: false
250
+ )
251
+ begin
252
+ assert_series_equal(
253
+ left,
254
+ right,
255
+ check_dtype: check_dtype,
256
+ check_names: check_names,
257
+ check_exact: check_exact,
258
+ rtol: rtol,
259
+ atol: atol,
260
+ categorical_as_str: categorical_as_str
261
+ )
262
+ rescue AssertionError
263
+ return
264
+ end
265
+
266
+ msg = "Series are equal"
267
+ raise AssertionError, msg
268
+ end
269
+
270
+ private
271
+
272
+ def _assert_correct_input_type(left, right)
273
+ if left.is_a?(DataFrame) && right.is_a?(DataFrame)
274
+ false
275
+ elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
276
+ true
277
+ else
278
+ raise_assertion_error(
279
+ "inputs",
280
+ "unexpected input types",
281
+ left.class.name,
282
+ right.class.name
283
+ )
284
+ end
285
+ end
286
+
287
+ def _assert_frame_schema_equal(
288
+ left,
289
+ right,
290
+ check_dtype:,
291
+ check_column_order:,
292
+ objects:
293
+ )
294
+ left_schema, right_schema = left.schema, right.schema
295
+
296
+ # Fast path for equal frames
297
+ if left_schema == right_schema
298
+ return
299
+ end
300
+
301
+ # Special error message for when column names do not match
302
+ if left_schema.keys != right_schema.keys
303
+ if (left_not_right = right_schema.keys - left_schema.keys).any?
304
+ msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
305
+ raise AssertionError, msg
306
+ else
307
+ right_not_left = right_schema.keys - left_schema.keys
308
+ msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
309
+ raise AssertionError, msg
310
+ end
311
+ end
312
+
313
+ if check_column_order
314
+ left_columns, right_columns = left_schema.keys, right_schema.keys
315
+ if left_columns != right_columns
316
+ detail = "columns are not in the same order"
317
+ raise_assertion_error(objects, detail, left_columns, right_columns)
318
+ end
319
+ end
320
+
321
+ if check_dtype
322
+ left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
323
+ if check_column_order || left_schema_dict != right_schema_dict
324
+ detail = "dtypes do not match"
325
+ raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
326
+ end
327
+ end
328
+ end
329
+
330
+ def _sort_dataframes(left, right)
331
+ by = left.columns
332
+ begin
333
+ left = left.sort(by)
334
+ right = right.sort(by)
335
+ rescue
336
+ msg = "cannot set `check_row_order: false` on frame with unsortable columns"
337
+ raise InvalidAssert, msg
338
+ end
339
+ [left, right]
340
+ end
341
+
342
+ def _assert_series_values_equal(
343
+ left,
344
+ right,
345
+ check_exact:,
346
+ rtol:,
347
+ atol:,
348
+ categorical_as_str:
349
+ )
350
+ if categorical_as_str
351
+ if left.dtype == Categorical
352
+ left = left.cast(String)
353
+ end
354
+ if right.dtype == Categorical
355
+ right = right.cast(String)
356
+ end
357
+ end
358
+
359
+ # Determine unequal elements
360
+ begin
361
+ unequal = left.ne_missing(right)
362
+ rescue
363
+ raise_assertion_error(
364
+ "Series",
365
+ "incompatible data types",
366
+ left.dtype,
367
+ right.dtype
368
+ )
369
+ end
370
+
371
+ # Check nested dtypes in separate function
372
+ if _comparing_nested_floats(left.dtype, right.dtype)
373
+ begin
374
+ _assert_series_nested_values_equal(
375
+ left: left.filter(unequal),
376
+ right: right.filter(unequal),
377
+ check_exact: check_exact,
378
+ rtol: rtol,
379
+ atol: atol,
380
+ categorical_as_str: categorical_as_str
381
+ )
382
+ rescue AssertionError
383
+ raise_assertion_error(
384
+ "Series",
385
+ "nested value mismatch",
386
+ left.to_a,
387
+ right.to_a
388
+ )
389
+ else
390
+ return
391
+ end
392
+ end
393
+
394
+ # If no differences found during exact checking, we're done
395
+ if !unequal.any
396
+ return
397
+ end
398
+
399
+ # Only do inexact checking for float types
400
+ if check_exact || !left.dtype.float? || !right.dtype.float?
401
+ raise_assertion_error(
402
+ "Series", "exact value mismatch", left.to_a, right.to_a
403
+ )
404
+ end
405
+
406
+ _assert_series_null_values_match(left, right)
407
+ _assert_series_nan_values_match(left, right)
408
+ _assert_series_values_within_tolerance(
409
+ left,
410
+ right,
411
+ unequal,
412
+ rtol: rtol,
413
+ atol: atol
414
+ )
415
+ end
416
+
417
+ def _assert_series_nested_values_equal(
418
+ left,
419
+ right,
420
+ check_exact:,
421
+ rtol:,
422
+ atol:,
423
+ categorical_as_str:
424
+ )
425
+ # compare nested lists element-wise
426
+ if _comparing_lists(left.dtype, right.dtype)
427
+ left.zip(right) do |s1, s2|
428
+ if s1.nil? || s2.nil?
429
+ raise_assertion_error("Series", "nested value mismatch", s1, s2)
430
+ end
431
+
432
+ _assert_series_values_equal(
433
+ s1,
434
+ s2,
435
+ check_exact: check_exact,
436
+ rtol: rtol,
437
+ atol: atol,
438
+ categorical_as_str: categorical_as_str
439
+ )
440
+ end
441
+
442
+ # unnest structs as series and compare
443
+ else
444
+ ls, rs = left.struct.unnest, right.struct.unnest
445
+ ls.zip(rs) do |s1, s2|
446
+ _assert_series_values_equal(
447
+ s1,
448
+ s2,
449
+ check_exact: check_exact,
450
+ rtol: rtol,
451
+ atol: atol,
452
+ categorical_as_str: categorical_as_str
453
+ )
454
+ end
455
+ end
456
+ end
457
+
458
+ def _assert_series_null_values_match(left, right)
459
+ null_value_mismatch = left.is_null != right.is_null
460
+ if null_value_mismatch.any
461
+ raise_assertion_error(
462
+ "Series", "null value mismatch", left.to_a, right.to_a
463
+ )
464
+ end
465
+ end
466
+
467
+ def _assert_series_nan_values_match(left, right)
468
+ if !_comparing_floats(left.dtype, right.dtype)
469
+ return
470
+ end
471
+ nan_value_mismatch = left.is_nan != right.is_nan
472
+ if nan_value_mismatch.any
473
+ raise_assertion_error(
474
+ "Series",
475
+ "nan value mismatch",
476
+ left.to_a,
477
+ right.to_a
478
+ )
479
+ end
480
+ end
481
+
482
+ def _comparing_floats(left, right)
483
+ left.is_float && right.is_float
484
+ end
485
+
486
+ def _comparing_lists(left, right)
487
+ [List, Array].include?(left) && [List, Array].include?(right)
488
+ end
489
+
490
+ def _comparing_structs(left, right)
491
+ left == Struct && right == Struct
492
+ end
493
+
494
+ def _comparing_nested_floats(left, right)
495
+ if !(_comparing_lists(left, right) || _comparing_structs(left, right))
496
+ return false
497
+ end
498
+
499
+ left.float? && right.float?
500
+ end
501
+
502
+ def raise_assertion_error(objects, detail, left, right)
503
+ msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
504
+ raise AssertionError, msg
505
+ end
506
+ end
507
+ end
@@ -0,0 +1,9 @@
1
+ module Polars
2
+ module Utils
3
+ SECONDS_PER_DAY = 86_400
4
+ SECONDS_PER_HOUR = 3_600
5
+ NS_PER_SECOND = 1_000_000_000
6
+ US_PER_SECOND = 1_000_000
7
+ MS_PER_SECOND = 1_000
8
+ end
9
+ end