polars-df 0.10.0-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +175 -0
  4. data/Cargo.lock +2536 -0
  5. data/Cargo.toml +6 -0
  6. data/LICENSE-THIRD-PARTY.txt +38726 -0
  7. data/LICENSE.txt +20 -0
  8. data/README.md +437 -0
  9. data/lib/polars/3.1/polars.so +0 -0
  10. data/lib/polars/3.2/polars.so +0 -0
  11. data/lib/polars/3.3/polars.so +0 -0
  12. data/lib/polars/array_expr.rb +537 -0
  13. data/lib/polars/array_name_space.rb +423 -0
  14. data/lib/polars/batched_csv_reader.rb +98 -0
  15. data/lib/polars/binary_expr.rb +77 -0
  16. data/lib/polars/binary_name_space.rb +66 -0
  17. data/lib/polars/cat_expr.rb +72 -0
  18. data/lib/polars/cat_name_space.rb +125 -0
  19. data/lib/polars/config.rb +530 -0
  20. data/lib/polars/convert.rb +93 -0
  21. data/lib/polars/data_frame.rb +5418 -0
  22. data/lib/polars/data_types.rb +466 -0
  23. data/lib/polars/date_time_expr.rb +1444 -0
  24. data/lib/polars/date_time_name_space.rb +1484 -0
  25. data/lib/polars/dynamic_group_by.rb +52 -0
  26. data/lib/polars/exceptions.rb +31 -0
  27. data/lib/polars/expr.rb +6105 -0
  28. data/lib/polars/expr_dispatch.rb +22 -0
  29. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  30. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  31. data/lib/polars/functions/as_datatype.rb +248 -0
  32. data/lib/polars/functions/col.rb +47 -0
  33. data/lib/polars/functions/eager.rb +182 -0
  34. data/lib/polars/functions/lazy.rb +1280 -0
  35. data/lib/polars/functions/len.rb +49 -0
  36. data/lib/polars/functions/lit.rb +35 -0
  37. data/lib/polars/functions/random.rb +16 -0
  38. data/lib/polars/functions/range/date_range.rb +103 -0
  39. data/lib/polars/functions/range/int_range.rb +51 -0
  40. data/lib/polars/functions/repeat.rb +144 -0
  41. data/lib/polars/functions/whenthen.rb +96 -0
  42. data/lib/polars/functions.rb +57 -0
  43. data/lib/polars/group_by.rb +548 -0
  44. data/lib/polars/io.rb +890 -0
  45. data/lib/polars/lazy_frame.rb +2833 -0
  46. data/lib/polars/lazy_group_by.rb +84 -0
  47. data/lib/polars/list_expr.rb +791 -0
  48. data/lib/polars/list_name_space.rb +445 -0
  49. data/lib/polars/meta_expr.rb +222 -0
  50. data/lib/polars/name_expr.rb +198 -0
  51. data/lib/polars/plot.rb +109 -0
  52. data/lib/polars/rolling_group_by.rb +37 -0
  53. data/lib/polars/series.rb +4527 -0
  54. data/lib/polars/slice.rb +104 -0
  55. data/lib/polars/sql_context.rb +194 -0
  56. data/lib/polars/string_cache.rb +75 -0
  57. data/lib/polars/string_expr.rb +1519 -0
  58. data/lib/polars/string_name_space.rb +810 -0
  59. data/lib/polars/struct_expr.rb +98 -0
  60. data/lib/polars/struct_name_space.rb +96 -0
  61. data/lib/polars/testing.rb +507 -0
  62. data/lib/polars/utils.rb +422 -0
  63. data/lib/polars/version.rb +4 -0
  64. data/lib/polars/whenthen.rb +83 -0
  65. data/lib/polars-df.rb +1 -0
  66. data/lib/polars.rb +72 -0
  67. metadata +125 -0
@@ -0,0 +1,98 @@
1
+ module Polars
2
+ # Namespace for struct related expressions.
3
+ class StructExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Retrieve one of the fields of this `Struct` as a new Series.
13
+ #
14
+ # @return [Expr]
15
+ def [](item)
16
+ if item.is_a?(::String)
17
+ field(item)
18
+ elsif item.is_a?(Integer)
19
+ Utils.wrap_expr(_rbexpr.struct_field_by_index(item))
20
+ else
21
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
22
+ end
23
+ end
24
+
25
+ # Retrieve one of the fields of this `Struct` as a new Series.
26
+ #
27
+ # @param name [String]
28
+ # Name of the field
29
+ #
30
+ # @return [Expr]
31
+ #
32
+ # @example
33
+ # df = (
34
+ # Polars::DataFrame.new(
35
+ # {
36
+ # "int" => [1, 2],
37
+ # "str" => ["a", "b"],
38
+ # "bool" => [true, nil],
39
+ # "list" => [[1, 2], [3]]
40
+ # }
41
+ # )
42
+ # .to_struct("my_struct")
43
+ # .to_frame
44
+ # )
45
+ # df.select(Polars.col("my_struct").struct.field("str"))
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ str │
50
+ # # │ --- │
51
+ # # │ str │
52
+ # # ╞═════╡
53
+ # # │ a │
54
+ # # │ b │
55
+ # # └─────┘
56
+ def field(name)
57
+ Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
58
+ end
59
+
60
+ # Rename the fields of the struct.
61
+ #
62
+ # @param names [Array]
63
+ # New names in the order of the struct's fields
64
+ #
65
+ # @return [Expr]
66
+ #
67
+ # @example
68
+ # df = (
69
+ # Polars::DataFrame.new(
70
+ # {
71
+ # "int" => [1, 2],
72
+ # "str" => ["a", "b"],
73
+ # "bool" => [true, nil],
74
+ # "list" => [[1, 2], [3]]
75
+ # }
76
+ # )
77
+ # .to_struct("my_struct")
78
+ # .to_frame
79
+ # )
80
+ # df = df.with_column(
81
+ # Polars.col("my_struct").struct.rename_fields(["INT", "STR", "BOOL", "LIST"])
82
+ # )
83
+ # df.select(Polars.col("my_struct").struct.field("INT"))
84
+ # # =>
85
+ # # shape: (2, 1)
86
+ # # ┌─────┐
87
+ # # │ INT │
88
+ # # │ --- │
89
+ # # │ i64 │
90
+ # # ╞═════╡
91
+ # # │ 1 │
92
+ # # │ 2 │
93
+ # # └─────┘
94
+ def rename_fields(names)
95
+ Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,96 @@
1
+ module Polars
2
+ # Series.struct namespace.
3
+ class StructNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "struct"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Retrieve one of the fields of this `Struct` as a new Series.
14
+ #
15
+ # @return [Series]
16
+ def [](item)
17
+ if item.is_a?(Integer)
18
+ field(fields[item])
19
+ elsif item.is_a?(::String)
20
+ field(item)
21
+ else
22
+ raise ArgumentError, "expected type Integer or String, got #{item.class.name}"
23
+ end
24
+ end
25
+
26
+ # Convert this Struct Series to a DataFrame.
27
+ #
28
+ # @return [DataFrame]
29
+ def to_frame
30
+ Utils.wrap_df(_s.struct_to_frame)
31
+ end
32
+
33
+ # Get the names of the fields.
34
+ #
35
+ # @return [Array]
36
+ def fields
37
+ if _s.nil?
38
+ []
39
+ else
40
+ _s.struct_fields
41
+ end
42
+ end
43
+
44
+ # Retrieve one of the fields of this `Struct` as a new Series.
45
+ #
46
+ # @param name [String]
47
+ # Name of the field
48
+ #
49
+ # @return [Series]
50
+ def field(name)
51
+ super
52
+ end
53
+
54
+ # Rename the fields of the struct.
55
+ #
56
+ # @param names [Array]
57
+ # New names in the order of the struct's fields
58
+ #
59
+ # @return [Series]
60
+ def rename_fields(names)
61
+ super
62
+ end
63
+
64
+ # Get the struct definition as a name/dtype schema dict.
65
+ #
66
+ # @return [Object]
67
+ def schema
68
+ if _s.nil?
69
+ {}
70
+ else
71
+ _s.dtype.to_schema
72
+ end
73
+ end
74
+
75
+ # Convert this struct Series to a DataFrame with a separate column for each field.
76
+ #
77
+ # @return [DataFrame]
78
+ #
79
+ # @example
80
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
81
+ # s.struct.unnest
82
+ # # =>
83
+ # # shape: (2, 2)
84
+ # # ┌─────┬─────┐
85
+ # # │ a ┆ b │
86
+ # # │ --- ┆ --- │
87
+ # # │ i64 ┆ i64 │
88
+ # # ╞═════╪═════╡
89
+ # # │ 1 ┆ 2 │
90
+ # # │ 3 ┆ 4 │
91
+ # # └─────┴─────┘
92
+ def unnest
93
+ Utils.wrap_df(_s.struct_unnest)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,507 @@
1
+ module Polars
2
+ module Testing
3
+ # Assert that the left and right frame are equal.
4
+ #
5
+ # Raises a detailed `AssertionError` if the frames differ.
6
+ # This function is intended for use in unit tests.
7
+ #
8
+ # @param left [Object]
9
+ # The first DataFrame or LazyFrame to compare.
10
+ # @param right [Object]
11
+ # The second DataFrame or LazyFrame to compare.
12
+ # @param check_row_order [Boolean]
13
+ # Require row order to match.
14
+ # @param check_column_order [Boolean]
15
+ # Require column order to match.
16
+ # @param check_dtype [Boolean]
17
+ # Require data types to match.
18
+ # @param check_exact [Boolean]
19
+ # Require float values to match exactly. If set to `false`, values are considered
20
+ # equal when within tolerance of each other (see `rtol` and `atol`).
21
+ # Only affects columns with a Float data type.
22
+ # @param rtol [Float]
23
+ # Relative tolerance for inexact checking. Fraction of values in `right`.
24
+ # @param atol [Float]
25
+ # Absolute tolerance for inexact checking.
26
+ # @param categorical_as_str [Boolean]
27
+ # Cast categorical columns to string before comparing. Enabling this helps
28
+ # compare columns that do not share the same string cache.
29
+ #
30
+ # @return [nil]
31
+ def assert_frame_equal(
32
+ left,
33
+ right,
34
+ check_row_order: true,
35
+ check_column_order: true,
36
+ check_dtype: true,
37
+ check_exact: false,
38
+ rtol: 1e-5,
39
+ atol: 1e-8,
40
+ categorical_as_str: false
41
+ )
42
+ lazy = _assert_correct_input_type(left, right)
43
+ objects = lazy ? "LazyFrames" : "DataFrames"
44
+
45
+ _assert_frame_schema_equal(
46
+ left,
47
+ right,
48
+ check_column_order: check_column_order,
49
+ check_dtype: check_dtype,
50
+ objects: objects,
51
+ )
52
+
53
+ if lazy
54
+ left, right = left.collect, right.collect
55
+ end
56
+
57
+ if left.height != right.height
58
+ raise_assertion_error(
59
+ objects, "number of rows does not match", left.height, right.height
60
+ )
61
+ end
62
+
63
+ if !check_row_order
64
+ left, right = _sort_dataframes(left, right)
65
+ end
66
+
67
+ left.columns.each do |c|
68
+ s_left, s_right = left.get_column(c), right.get_column(c)
69
+ begin
70
+ _assert_series_values_equal(
71
+ s_left,
72
+ s_right,
73
+ check_exact: check_exact,
74
+ rtol: rtol,
75
+ atol: atol,
76
+ categorical_as_str: categorical_as_str
77
+ )
78
+ rescue AssertionError
79
+ raise_assertion_error(
80
+ objects,
81
+ "value mismatch for column #{c.inspect}",
82
+ s_left.to_a,
83
+ s_right.to_a
84
+ )
85
+ end
86
+ end
87
+ end
88
+
89
+ # Assert that the left and right frame are **not** equal.
90
+ #
91
+ # This function is intended for use in unit tests.
92
+ #
93
+ # @param left [Object]
94
+ # The first DataFrame or LazyFrame to compare.
95
+ # @param right [Object]
96
+ # The second DataFrame or LazyFrame to compare.
97
+ # @param check_row_order [Boolean]
98
+ # Require row order to match.
99
+ # @param check_column_order [Boolean]
100
+ # Require column order to match.
101
+ # @param check_dtype [Boolean]
102
+ # Require data types to match.
103
+ # @param check_exact [Boolean]
104
+ # Require float values to match exactly. If set to `false`, values are considered
105
+ # equal when within tolerance of each other (see `rtol` and `atol`).
106
+ # Only affects columns with a Float data type.
107
+ # @param rtol [Float]
108
+ # Relative tolerance for inexact checking. Fraction of values in `right`.
109
+ # @param atol [Float]
110
+ # Absolute tolerance for inexact checking.
111
+ # @param categorical_as_str [Boolean]
112
+ # Cast categorical columns to string before comparing. Enabling this helps
113
+ # compare columns that do not share the same string cache.
114
+ #
115
+ # @return [nil]
116
+ def assert_frame_not_equal(
117
+ left,
118
+ right,
119
+ check_row_order: true,
120
+ check_column_order: true,
121
+ check_dtype: true,
122
+ check_exact: false,
123
+ rtol: 1e-5,
124
+ atol: 1e-8,
125
+ categorical_as_str: false
126
+ )
127
+ begin
128
+ assert_frame_equal(
129
+ left,
130
+ right,
131
+ check_column_order: check_column_order,
132
+ check_row_order: check_row_order,
133
+ check_dtype: check_dtype,
134
+ check_exact: check_exact,
135
+ rtol: rtol,
136
+ atol: atol,
137
+ categorical_as_str: categorical_as_str
138
+ )
139
+ rescue AssertionError
140
+ return
141
+ end
142
+
143
+ msg = "frames are equal"
144
+ raise AssertionError, msg
145
+ end
146
+
147
+ # Assert that the left and right Series are equal.
148
+ #
149
+ # Raises a detailed `AssertionError` if the Series differ.
150
+ # This function is intended for use in unit tests.
151
+ #
152
+ # @param left [Object]
153
+ # The first Series to compare.
154
+ # @param right [Object]
155
+ # The second Series to compare.
156
+ # @param check_dtype [Boolean]
157
+ # Require data types to match.
158
+ # @param check_names [Boolean]
159
+ # Require names to match.
160
+ # @param check_exact [Boolean]
161
+ # Require float values to match exactly. If set to `false`, values are considered
162
+ # equal when within tolerance of each other (see `rtol` and `atol`).
163
+ # Only affects columns with a Float data type.
164
+ # @param rtol [Float]
165
+ # Relative tolerance for inexact checking, given as a fraction of the values in
166
+ # `right`.
167
+ # @param atol [Float]
168
+ # Absolute tolerance for inexact checking.
169
+ # @param categorical_as_str [Boolean]
170
+ # Cast categorical columns to string before comparing. Enabling this helps
171
+ # compare columns that do not share the same string cache.
172
+ #
173
+ # @return [nil]
174
+ def assert_series_equal(
175
+ left,
176
+ right,
177
+ check_dtype: true,
178
+ check_names: true,
179
+ check_exact: false,
180
+ rtol: 1e-5,
181
+ atol: 1e-8,
182
+ categorical_as_str: false
183
+ )
184
+ if !(left.is_a?(Series) && right.is_a?(Series))
185
+ raise_assertion_error(
186
+ "inputs",
187
+ "unexpected input types",
188
+ left.class.name,
189
+ right.class.name
190
+ )
191
+ end
192
+
193
+ if left.len != right.len
194
+ raise_assertion_error("Series", "length mismatch", left.len, right.len)
195
+ end
196
+
197
+ if check_names && left.name != right.name
198
+ raise_assertion_error("Series", "name mismatch", left.name, right.name)
199
+ end
200
+
201
+ if check_dtype && left.dtype != right.dtype
202
+ raise_assertion_error("Series", "dtype mismatch", left.dtype, right.dtype)
203
+ end
204
+
205
+ _assert_series_values_equal(
206
+ left,
207
+ right,
208
+ check_exact: check_exact,
209
+ rtol: rtol,
210
+ atol: atol,
211
+ categorical_as_str: categorical_as_str
212
+ )
213
+ end
214
+
215
+ # Assert that the left and right Series are **not** equal.
216
+ #
217
+ # This function is intended for use in unit tests.
218
+ #
219
+ # @param left [Object]
220
+ # The first Series to compare.
221
+ # @param right [Object]
222
+ # The second Series to compare.
223
+ # @param check_dtype [Boolean]
224
+ # Require data types to match.
225
+ # @param check_names [Boolean]
226
+ # Require names to match.
227
+ # @param check_exact [Boolean]
228
+ # Require float values to match exactly. If set to `false`, values are considered
229
+ # equal when within tolerance of each other (see `rtol` and `atol`).
230
+ # Only affects columns with a Float data type.
231
+ # @param rtol [Float]
232
+ # Relative tolerance for inexact checking, given as a fraction of the values in
233
+ # `right`.
234
+ # @param atol [Float]
235
+ # Absolute tolerance for inexact checking.
236
+ # @param categorical_as_str [Boolean]
237
+ # Cast categorical columns to string before comparing. Enabling this helps
238
+ # compare columns that do not share the same string cache.
239
+ #
240
+ # @return [nil]
241
+ def assert_series_not_equal(
242
+ left,
243
+ right,
244
+ check_dtype: true,
245
+ check_names: true,
246
+ check_exact: false,
247
+ rtol: 1e-5,
248
+ atol: 1e-8,
249
+ categorical_as_str: false
250
+ )
251
+ begin
252
+ assert_series_equal(
253
+ left,
254
+ right,
255
+ check_dtype: check_dtype,
256
+ check_names: check_names,
257
+ check_exact: check_exact,
258
+ rtol: rtol,
259
+ atol: atol,
260
+ categorical_as_str: categorical_as_str
261
+ )
262
+ rescue AssertionError
263
+ return
264
+ end
265
+
266
+ msg = "Series are equal"
267
+ raise AssertionError, msg
268
+ end
269
+
270
+ private
271
+
272
+ def _assert_correct_input_type(left, right)
273
+ if left.is_a?(DataFrame) && right.is_a?(DataFrame)
274
+ return false
275
+ elsif left.is_a?(LazyFrame) && right.is_a?(DataFrame)
276
+ return true
277
+ else
278
+ raise_assertion_error(
279
+ "inputs",
280
+ "unexpected input types",
281
+ left.class.name,
282
+ right.class.name
283
+ )
284
+ end
285
+ end
286
+
287
+ def _assert_frame_schema_equal(
288
+ left,
289
+ right,
290
+ check_dtype:,
291
+ check_column_order:,
292
+ objects:
293
+ )
294
+ left_schema, right_schema = left.schema, right.schema
295
+
296
+ # Fast path for equal frames
297
+ if left_schema == right_schema
298
+ return
299
+ end
300
+
301
+ # Special error message for when column names do not match
302
+ if left_schema.keys != right_schema.keys
303
+ if (left_not_right = right_schema.keys - left_schema.keys).any?
304
+ msg = "columns #{left_not_right.inspect} in left #{objects[..-1]}, but not in right"
305
+ raise AssertionError, msg
306
+ else
307
+ right_not_left = right_schema.keys - left_schema.keys
308
+ msg = "columns #{right_not_left.inspect} in right #{objects[..-1]}, but not in left"
309
+ raise AssertionError, msg
310
+ end
311
+ end
312
+
313
+ if check_column_order
314
+ left_columns, right_columns = left_schema.keys, right_schema.keys
315
+ if left_columns != right_columns
316
+ detail = "columns are not in the same order"
317
+ raise_assertion_error(objects, detail, left_columns, right_columns)
318
+ end
319
+ end
320
+
321
+ if check_dtype
322
+ left_schema_dict, right_schema_dict = left_schema.to_h, right_schema.to_h
323
+ if check_column_order || left_schema_dict != right_schema_dict
324
+ detail = "dtypes do not match"
325
+ raise_assertion_error(objects, detail, left_schema_dict, right_schema_dict)
326
+ end
327
+ end
328
+ end
329
+
330
+ def _sort_dataframes(left, right)
331
+ by = left.columns
332
+ begin
333
+ left = left.sort(by)
334
+ right = right.sort(by)
335
+ rescue
336
+ msg = "cannot set `check_row_order: false` on frame with unsortable columns"
337
+ raise InvalidAssert, msg
338
+ end
339
+ [left, right]
340
+ end
341
+
342
+ def _assert_series_values_equal(
343
+ left,
344
+ right,
345
+ check_exact:,
346
+ rtol:,
347
+ atol:,
348
+ categorical_as_str:
349
+ )
350
+ if categorical_as_str
351
+ if left.dtype == Categorical
352
+ left = left.cast(String)
353
+ end
354
+ if right.dtype == Categorical
355
+ right = right.cast(String)
356
+ end
357
+ end
358
+
359
+ # Determine unequal elements
360
+ begin
361
+ unequal = left.ne_missing(right)
362
+ rescue
363
+ raise_assertion_error(
364
+ "Series",
365
+ "incompatible data types",
366
+ left.dtype,
367
+ right.dtype
368
+ )
369
+ end
370
+
371
+ # Check nested dtypes in separate function
372
+ if _comparing_nested_floats(left.dtype, right.dtype)
373
+ begin
374
+ _assert_series_nested_values_equal(
375
+ left: left.filter(unequal),
376
+ right: right.filter(unequal),
377
+ check_exact: check_exact,
378
+ rtol: rtol,
379
+ atol: atol,
380
+ categorical_as_str: categorical_as_str
381
+ )
382
+ rescue AssertionError
383
+ raise_assertion_error(
384
+ "Series",
385
+ "nested value mismatch",
386
+ left.to_a,
387
+ right.to_a
388
+ )
389
+ else
390
+ return
391
+ end
392
+ end
393
+
394
+ # If no differences found during exact checking, we're done
395
+ if !unequal.any
396
+ return
397
+ end
398
+
399
+ # Only do inexact checking for float types
400
+ if check_exact || !left.dtype.float? || !right.dtype.float?
401
+ raise_assertion_error(
402
+ "Series", "exact value mismatch", left.to_a, right.to_a
403
+ )
404
+ end
405
+
406
+ _assert_series_null_values_match(left, right)
407
+ _assert_series_nan_values_match(left, right)
408
+ _assert_series_values_within_tolerance(
409
+ left,
410
+ right,
411
+ unequal,
412
+ rtol: rtol,
413
+ atol: atol
414
+ )
415
+ end
416
+
417
+ def _assert_series_nested_values_equal(
418
+ left,
419
+ right,
420
+ check_exact:,
421
+ rtol:,
422
+ atol:,
423
+ categorical_as_str:
424
+ )
425
+ # compare nested lists element-wise
426
+ if _comparing_lists(left.dtype, right.dtype)
427
+ left.zip(right) do |s1, s2|
428
+ if s1.nil? || s2.nil?
429
+ raise_assertion_error("Series", "nested value mismatch", s1, s2)
430
+ end
431
+
432
+ _assert_series_values_equal(
433
+ s1,
434
+ s2,
435
+ check_exact: check_exact,
436
+ rtol: rtol,
437
+ atol: atol,
438
+ categorical_as_str: categorical_as_str
439
+ )
440
+ end
441
+
442
+ # unnest structs as series and compare
443
+ else
444
+ ls, rs = left.struct.unnest, right.struct.unnest
445
+ ls.zip(rs) do |s1, s2|
446
+ _assert_series_values_equal(
447
+ s1,
448
+ s2,
449
+ check_exact: check_exact,
450
+ rtol: rtol,
451
+ atol: atol,
452
+ categorical_as_str: categorical_as_str
453
+ )
454
+ end
455
+ end
456
+ end
457
+
458
+ def _assert_series_null_values_match(left, right)
459
+ null_value_mismatch = left.is_null != right.is_null
460
+ if null_value_mismatch.any
461
+ raise_assertion_error(
462
+ "Series", "null value mismatch", left.to_a, right.to_a
463
+ )
464
+ end
465
+ end
466
+
467
+ def _assert_series_nan_values_match(left, right)
468
+ if !_comparing_floats(left.dtype, right.dtype)
469
+ return
470
+ end
471
+ nan_value_mismatch = left.is_nan != right.is_nan
472
+ if nan_value_mismatch.any
473
+ raise_assertion_error(
474
+ "Series",
475
+ "nan value mismatch",
476
+ left.to_a,
477
+ right.to_a
478
+ )
479
+ end
480
+ end
481
+
482
+ def _comparing_floats(left, right)
483
+ left.is_float && right.is_float
484
+ end
485
+
486
+ def _comparing_lists(left, right)
487
+ [List, Array].include?(left) && [List, Array].include?(right)
488
+ end
489
+
490
+ def _comparing_structs(left, right)
491
+ left == Struct && right == Struct
492
+ end
493
+
494
+ def _comparing_nested_floats(left, right)
495
+ if !(_comparing_lists(left, right) || _comparing_structs(left, right))
496
+ return false
497
+ end
498
+
499
+ left.float? && right.float?
500
+ end
501
+
502
+ def raise_assertion_error(objects, detail, left, right)
503
+ msg = "#{objects} are different (#{detail})\n[left]: #{left}\n[right]: #{right}"
504
+ raise AssertionError, msg
505
+ end
506
+ end
507
+ end