polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/data_frame.rb
CHANGED
@@ -20,15 +20,9 @@ module Polars
|
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
21
|
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
22
|
schema ||= columns
|
23
|
-
raise Todo if schema_overrides
|
24
23
|
|
25
|
-
# TODO deprecate in favor of read_sql
|
26
24
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
27
|
-
|
28
|
-
data = {}
|
29
|
-
result.columns.each_with_index do |k, i|
|
30
|
-
data[k] = result.rows.map { |r| r[i] }
|
31
|
-
end
|
25
|
+
raise ArgumentError, "Use read_database instead"
|
32
26
|
end
|
33
27
|
|
34
28
|
if data.nil?
|
@@ -125,10 +119,10 @@ module Polars
|
|
125
119
|
|
126
120
|
processed_null_values = Utils._process_null_values(null_values)
|
127
121
|
|
128
|
-
if columns.is_a?(String)
|
122
|
+
if columns.is_a?(::String)
|
129
123
|
columns = [columns]
|
130
124
|
end
|
131
|
-
if file.is_a?(String) && file.include?("*")
|
125
|
+
if file.is_a?(::String) && file.include?("*")
|
132
126
|
dtypes_dict = nil
|
133
127
|
if !dtype_list.nil?
|
134
128
|
dtypes_dict = dtype_list.to_h
|
@@ -212,11 +206,11 @@ module Polars
|
|
212
206
|
if Utils.pathlike?(source)
|
213
207
|
source = Utils.normalise_filepath(source)
|
214
208
|
end
|
215
|
-
if columns.is_a?(String)
|
209
|
+
if columns.is_a?(::String)
|
216
210
|
columns = [columns]
|
217
211
|
end
|
218
212
|
|
219
|
-
if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
|
213
|
+
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
220
214
|
scan =
|
221
215
|
Polars.scan_parquet(
|
222
216
|
source,
|
@@ -275,11 +269,11 @@ module Polars
|
|
275
269
|
if Utils.pathlike?(file)
|
276
270
|
file = Utils.normalise_filepath(file)
|
277
271
|
end
|
278
|
-
if columns.is_a?(String)
|
272
|
+
if columns.is_a?(::String)
|
279
273
|
columns = [columns]
|
280
274
|
end
|
281
275
|
|
282
|
-
if file.is_a?(String) && file.include?("*")
|
276
|
+
if file.is_a?(::String) && file.include?("*")
|
283
277
|
raise Todo
|
284
278
|
end
|
285
279
|
|
@@ -417,7 +411,7 @@ module Polars
|
|
417
411
|
# }
|
418
412
|
# )
|
419
413
|
# df.dtypes
|
420
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
414
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
421
415
|
def dtypes
|
422
416
|
_df.dtypes
|
423
417
|
end
|
@@ -435,7 +429,7 @@ module Polars
|
|
435
429
|
# }
|
436
430
|
# )
|
437
431
|
# df.schema
|
438
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
432
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
439
433
|
def schema
|
440
434
|
columns.zip(dtypes).to_h
|
441
435
|
end
|
@@ -595,13 +589,13 @@ module Polars
|
|
595
589
|
return df.slice(row_selection, 1)
|
596
590
|
end
|
597
591
|
# df[2, "a"]
|
598
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
592
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
599
593
|
return self[col_selection][row_selection]
|
600
594
|
end
|
601
595
|
end
|
602
596
|
|
603
597
|
# column selection can be "a" and ["a", "b"]
|
604
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
598
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
605
599
|
col_selection = [col_selection]
|
606
600
|
end
|
607
601
|
|
@@ -627,7 +621,7 @@ module Polars
|
|
627
621
|
|
628
622
|
# select single column
|
629
623
|
# df["foo"]
|
630
|
-
if item.is_a?(String) || item.is_a?(Symbol)
|
624
|
+
if item.is_a?(::String) || item.is_a?(Symbol)
|
631
625
|
return Utils.wrap_s(_df.column(item.to_s))
|
632
626
|
end
|
633
627
|
|
@@ -653,7 +647,7 @@ module Polars
|
|
653
647
|
|
654
648
|
if item.is_a?(Series)
|
655
649
|
dtype = item.dtype
|
656
|
-
if dtype ==
|
650
|
+
if dtype == String
|
657
651
|
return _from_rbdf(_df.select(item))
|
658
652
|
elsif dtype == UInt32
|
659
653
|
return _from_rbdf(_df.take_with_series(item._s))
|
@@ -704,7 +698,7 @@ module Polars
|
|
704
698
|
s[row_selection] = value
|
705
699
|
|
706
700
|
if col_selection.is_a?(Integer)
|
707
|
-
|
701
|
+
replace_column(col_selection, s)
|
708
702
|
elsif Utils.strlike?(col_selection)
|
709
703
|
replace(col_selection, s)
|
710
704
|
end
|
@@ -905,6 +899,7 @@ module Polars
|
|
905
899
|
def write_csv(
|
906
900
|
file = nil,
|
907
901
|
has_header: true,
|
902
|
+
include_header: nil,
|
908
903
|
sep: ",",
|
909
904
|
quote: '"',
|
910
905
|
batch_size: 1024,
|
@@ -914,6 +909,8 @@ module Polars
|
|
914
909
|
float_precision: nil,
|
915
910
|
null_value: nil
|
916
911
|
)
|
912
|
+
include_header = has_header if include_header.nil?
|
913
|
+
|
917
914
|
if sep.length > 1
|
918
915
|
raise ArgumentError, "only single byte separator is allowed"
|
919
916
|
elsif quote.length > 1
|
@@ -927,7 +924,7 @@ module Polars
|
|
927
924
|
buffer.set_encoding(Encoding::BINARY)
|
928
925
|
_df.write_csv(
|
929
926
|
buffer,
|
930
|
-
|
927
|
+
include_header,
|
931
928
|
sep.ord,
|
932
929
|
quote.ord,
|
933
930
|
batch_size,
|
@@ -946,7 +943,7 @@ module Polars
|
|
946
943
|
|
947
944
|
_df.write_csv(
|
948
945
|
file,
|
949
|
-
|
946
|
+
include_header,
|
950
947
|
sep.ord,
|
951
948
|
quote.ord,
|
952
949
|
batch_size,
|
@@ -1151,22 +1148,8 @@ module Polars
|
|
1151
1148
|
# # │ b ┆ 1 ┆ 2 ┆ 3 │
|
1152
1149
|
# # └─────┴─────┴─────┴─────┘
|
1153
1150
|
def transpose(include_header: false, header_name: "column", column_names: nil)
|
1154
|
-
|
1155
|
-
|
1156
|
-
names = []
|
1157
|
-
n = df.width
|
1158
|
-
if include_header
|
1159
|
-
names << header_name
|
1160
|
-
n -= 1
|
1161
|
-
end
|
1162
|
-
|
1163
|
-
column_names = column_names.each
|
1164
|
-
n.times do
|
1165
|
-
names << column_names.next
|
1166
|
-
end
|
1167
|
-
df.columns = names
|
1168
|
-
end
|
1169
|
-
df
|
1151
|
+
keep_names_as = include_header ? header_name : nil
|
1152
|
+
_from_rbdf(_df.transpose(keep_names_as, column_names))
|
1170
1153
|
end
|
1171
1154
|
|
1172
1155
|
# Reverse the DataFrame.
|
@@ -1239,7 +1222,7 @@ module Polars
|
|
1239
1222
|
# @example
|
1240
1223
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1241
1224
|
# s = Polars::Series.new("baz", [97, 98, 99])
|
1242
|
-
# df.
|
1225
|
+
# df.insert_column(1, s)
|
1243
1226
|
# # =>
|
1244
1227
|
# # shape: (3, 3)
|
1245
1228
|
# # ┌─────┬─────┬─────┐
|
@@ -1261,7 +1244,7 @@ module Polars
|
|
1261
1244
|
# }
|
1262
1245
|
# )
|
1263
1246
|
# s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
|
1264
|
-
# df.
|
1247
|
+
# df.insert_column(3, s)
|
1265
1248
|
# # =>
|
1266
1249
|
# # shape: (4, 4)
|
1267
1250
|
# # ┌─────┬──────┬───────┬──────┐
|
@@ -1274,13 +1257,14 @@ module Polars
|
|
1274
1257
|
# # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
|
1275
1258
|
# # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
|
1276
1259
|
# # └─────┴──────┴───────┴──────┘
|
1277
|
-
def
|
1260
|
+
def insert_column(index, series)
|
1278
1261
|
if index < 0
|
1279
1262
|
index = columns.length + index
|
1280
1263
|
end
|
1281
|
-
_df.
|
1264
|
+
_df.insert_column(index, series._s)
|
1282
1265
|
self
|
1283
1266
|
end
|
1267
|
+
alias_method :insert_at_idx, :insert_column
|
1284
1268
|
|
1285
1269
|
# Filter the rows in the DataFrame based on a predicate expression.
|
1286
1270
|
#
|
@@ -1384,7 +1368,7 @@ module Polars
|
|
1384
1368
|
]
|
1385
1369
|
)._df
|
1386
1370
|
)
|
1387
|
-
summary.
|
1371
|
+
summary.insert_column(
|
1388
1372
|
0,
|
1389
1373
|
Polars::Series.new(
|
1390
1374
|
"describe",
|
@@ -1405,11 +1389,12 @@ module Polars
|
|
1405
1389
|
# df = Polars::DataFrame.new(
|
1406
1390
|
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
1407
1391
|
# )
|
1408
|
-
# df.
|
1392
|
+
# df.get_column_index("ham")
|
1409
1393
|
# # => 2
|
1410
|
-
def
|
1411
|
-
_df.
|
1394
|
+
def get_column_index(name)
|
1395
|
+
_df.get_column_index(name)
|
1412
1396
|
end
|
1397
|
+
alias_method :find_idx_by_name, :get_column_index
|
1413
1398
|
|
1414
1399
|
# Replace a column at an index location.
|
1415
1400
|
#
|
@@ -1429,7 +1414,7 @@ module Polars
|
|
1429
1414
|
# }
|
1430
1415
|
# )
|
1431
1416
|
# s = Polars::Series.new("apple", [10, 20, 30])
|
1432
|
-
# df.
|
1417
|
+
# df.replace_column(0, s)
|
1433
1418
|
# # =>
|
1434
1419
|
# # shape: (3, 3)
|
1435
1420
|
# # ┌───────┬─────┬─────┐
|
@@ -1441,13 +1426,14 @@ module Polars
|
|
1441
1426
|
# # │ 20 ┆ 7 ┆ b │
|
1442
1427
|
# # │ 30 ┆ 8 ┆ c │
|
1443
1428
|
# # └───────┴─────┴─────┘
|
1444
|
-
def
|
1429
|
+
def replace_column(index, series)
|
1445
1430
|
if index < 0
|
1446
1431
|
index = columns.length + index
|
1447
1432
|
end
|
1448
|
-
_df.
|
1433
|
+
_df.replace_column(index, series._s)
|
1449
1434
|
self
|
1450
1435
|
end
|
1436
|
+
alias_method :replace_at_idx, :replace_column
|
1451
1437
|
|
1452
1438
|
# Sort the DataFrame by column.
|
1453
1439
|
#
|
@@ -1541,13 +1527,14 @@ module Polars
|
|
1541
1527
|
# "ham" => ["c", "b", "a"]
|
1542
1528
|
# }
|
1543
1529
|
# )
|
1544
|
-
# df1.
|
1530
|
+
# df1.equals(df1)
|
1545
1531
|
# # => true
|
1546
|
-
# df1.
|
1532
|
+
# df1.equals(df2)
|
1547
1533
|
# # => false
|
1548
|
-
def
|
1549
|
-
_df.
|
1534
|
+
def equals(other, null_equal: true)
|
1535
|
+
_df.equals(other._df, null_equal)
|
1550
1536
|
end
|
1537
|
+
alias_method :frame_equal, :equals
|
1551
1538
|
|
1552
1539
|
# Replace a column by a new Series.
|
1553
1540
|
#
|
@@ -1733,7 +1720,7 @@ module Polars
|
|
1733
1720
|
# # │ 3 ┆ 8 ┆ c │
|
1734
1721
|
# # └─────┴─────┴─────┘
|
1735
1722
|
def drop_nulls(subset: nil)
|
1736
|
-
if subset.is_a?(String)
|
1723
|
+
if subset.is_a?(::String)
|
1737
1724
|
subset = [subset]
|
1738
1725
|
end
|
1739
1726
|
_from_rbdf(_df.drop_nulls(subset))
|
@@ -1811,13 +1798,13 @@ module Polars
|
|
1811
1798
|
_from_rbdf(_df.with_row_count(name, offset))
|
1812
1799
|
end
|
1813
1800
|
|
1814
|
-
# Start a
|
1801
|
+
# Start a group by operation.
|
1815
1802
|
#
|
1816
1803
|
# @param by [Object]
|
1817
1804
|
# Column(s) to group by.
|
1818
1805
|
# @param maintain_order [Boolean]
|
1819
1806
|
# Make sure that the order of the groups remain consistent. This is more
|
1820
|
-
# expensive than a default
|
1807
|
+
# expensive than a default group by. Note that this only works in expression
|
1821
1808
|
# aggregations.
|
1822
1809
|
#
|
1823
1810
|
# @return [GroupBy]
|
@@ -1830,7 +1817,7 @@ module Polars
|
|
1830
1817
|
# "c" => [6, 5, 4, 3, 2, 1]
|
1831
1818
|
# }
|
1832
1819
|
# )
|
1833
|
-
# df.
|
1820
|
+
# df.group_by("a").agg(Polars.col("b").sum).sort("a")
|
1834
1821
|
# # =>
|
1835
1822
|
# # shape: (3, 2)
|
1836
1823
|
# # ┌─────┬─────┐
|
@@ -1842,25 +1829,26 @@ module Polars
|
|
1842
1829
|
# # │ b ┆ 11 │
|
1843
1830
|
# # │ c ┆ 6 │
|
1844
1831
|
# # └─────┴─────┘
|
1845
|
-
def
|
1832
|
+
def group_by(by, maintain_order: false)
|
1846
1833
|
if !Utils.bool?(maintain_order)
|
1847
|
-
raise TypeError, "invalid input for
|
1834
|
+
raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
|
1848
1835
|
end
|
1849
1836
|
GroupBy.new(
|
1850
|
-
|
1837
|
+
self,
|
1851
1838
|
by,
|
1852
|
-
self.class,
|
1853
1839
|
maintain_order: maintain_order
|
1854
1840
|
)
|
1855
1841
|
end
|
1842
|
+
alias_method :groupby, :group_by
|
1843
|
+
alias_method :group, :group_by
|
1856
1844
|
|
1857
1845
|
# Create rolling groups based on a time column.
|
1858
1846
|
#
|
1859
1847
|
# Also works for index values of type `:i32` or `:i64`.
|
1860
1848
|
#
|
1861
|
-
# Different from a `
|
1849
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
1862
1850
|
# individual values and are not of constant intervals. For constant intervals use
|
1863
|
-
# *
|
1851
|
+
# *group_by_dynamic*
|
1864
1852
|
#
|
1865
1853
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
1866
1854
|
# by using the following string language:
|
@@ -1880,7 +1868,7 @@ module Polars
|
|
1880
1868
|
# Or combine them:
|
1881
1869
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1882
1870
|
#
|
1883
|
-
# In case of a
|
1871
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
1884
1872
|
#
|
1885
1873
|
# - **"1i" # length 1**
|
1886
1874
|
# - **"10i" # length 10**
|
@@ -1891,7 +1879,7 @@ module Polars
|
|
1891
1879
|
# This column must be sorted in ascending order. If not the output will not
|
1892
1880
|
# make sense.
|
1893
1881
|
#
|
1894
|
-
# In case of a rolling
|
1882
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
1895
1883
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1896
1884
|
# performance matters use an `:i64` column.
|
1897
1885
|
# @param period [Object]
|
@@ -1923,7 +1911,7 @@ module Polars
|
|
1923
1911
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1924
1912
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1925
1913
|
# )
|
1926
|
-
# df.
|
1914
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
1927
1915
|
# [
|
1928
1916
|
# Polars.sum("a").alias("sum_a"),
|
1929
1917
|
# Polars.min("a").alias("min_a"),
|
@@ -1944,7 +1932,7 @@ module Polars
|
|
1944
1932
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1945
1933
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1946
1934
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1947
|
-
def
|
1935
|
+
def group_by_rolling(
|
1948
1936
|
index_column:,
|
1949
1937
|
period:,
|
1950
1938
|
offset: nil,
|
@@ -1954,11 +1942,12 @@ module Polars
|
|
1954
1942
|
)
|
1955
1943
|
RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
|
1956
1944
|
end
|
1945
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1957
1946
|
|
1958
1947
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1959
1948
|
#
|
1960
1949
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1961
|
-
# normal
|
1950
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1962
1951
|
# window could be seen as a rolling window, with a window size determined by
|
1963
1952
|
# dates/times/values instead of slots in the DataFrame.
|
1964
1953
|
#
|
@@ -1986,7 +1975,7 @@ module Polars
|
|
1986
1975
|
# Or combine them:
|
1987
1976
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1988
1977
|
#
|
1989
|
-
# In case of a
|
1978
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1990
1979
|
#
|
1991
1980
|
# - "1i" # length 1
|
1992
1981
|
# - "10i" # length 10
|
@@ -1997,7 +1986,7 @@ module Polars
|
|
1997
1986
|
# This column must be sorted in ascending order. If not the output will not
|
1998
1987
|
# make sense.
|
1999
1988
|
#
|
2000
|
-
# In case of a dynamic
|
1989
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
2001
1990
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
2002
1991
|
# performance matters use an `:i64` column.
|
2003
1992
|
# @param every
|
@@ -2048,7 +2037,7 @@ module Polars
|
|
2048
2037
|
# # └─────────────────────┴─────┘
|
2049
2038
|
#
|
2050
2039
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
2051
|
-
# df.
|
2040
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
2052
2041
|
# [
|
2053
2042
|
# Polars.col("time").min.alias("time_min"),
|
2054
2043
|
# Polars.col("time").max.alias("time_max")
|
@@ -2068,7 +2057,7 @@ module Polars
|
|
2068
2057
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
2069
2058
|
#
|
2070
2059
|
# @example The window boundaries can also be added to the aggregation result.
|
2071
|
-
# df.
|
2060
|
+
# df.group_by_dynamic(
|
2072
2061
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
2073
2062
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
2074
2063
|
# # =>
|
@@ -2085,7 +2074,7 @@ module Polars
|
|
2085
2074
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2086
2075
|
#
|
2087
2076
|
# @example When closed="left", should not include right end of interval.
|
2088
|
-
# df.
|
2077
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
2089
2078
|
# [
|
2090
2079
|
# Polars.col("time").count.alias("time_count"),
|
2091
2080
|
# Polars.col("time").alias("time_agg_list")
|
@@ -2105,7 +2094,7 @@ module Polars
|
|
2105
2094
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
2106
2095
|
#
|
2107
2096
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2108
|
-
# df.
|
2097
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
2109
2098
|
# [Polars.col("time").count.alias("time_count")]
|
2110
2099
|
# )
|
2111
2100
|
# # =>
|
@@ -2122,7 +2111,7 @@ module Polars
|
|
2122
2111
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
2123
2112
|
# # └─────────────────────┴────────────┘
|
2124
2113
|
#
|
2125
|
-
# @example Dynamic
|
2114
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
2126
2115
|
# df = Polars::DataFrame.new(
|
2127
2116
|
# {
|
2128
2117
|
# "time" => Polars.date_range(
|
@@ -2133,7 +2122,7 @@ module Polars
|
|
2133
2122
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
2134
2123
|
# }
|
2135
2124
|
# )
|
2136
|
-
# df.
|
2125
|
+
# df.group_by_dynamic(
|
2137
2126
|
# "time",
|
2138
2127
|
# every: "1h",
|
2139
2128
|
# closed: "both",
|
@@ -2156,14 +2145,14 @@ module Polars
|
|
2156
2145
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
2157
2146
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2158
2147
|
#
|
2159
|
-
# @example Dynamic
|
2148
|
+
# @example Dynamic group by on an index column.
|
2160
2149
|
# df = Polars::DataFrame.new(
|
2161
2150
|
# {
|
2162
2151
|
# "idx" => Polars.arange(0, 6, eager: true),
|
2163
2152
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
2164
2153
|
# }
|
2165
2154
|
# )
|
2166
|
-
# df.
|
2155
|
+
# df.group_by_dynamic(
|
2167
2156
|
# "idx",
|
2168
2157
|
# every: "2i",
|
2169
2158
|
# period: "3i",
|
@@ -2181,7 +2170,7 @@ module Polars
|
|
2181
2170
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2182
2171
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
2183
2172
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
2184
|
-
def
|
2173
|
+
def group_by_dynamic(
|
2185
2174
|
index_column,
|
2186
2175
|
every:,
|
2187
2176
|
period: nil,
|
@@ -2205,6 +2194,7 @@ module Polars
|
|
2205
2194
|
start_by
|
2206
2195
|
)
|
2207
2196
|
end
|
2197
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
2208
2198
|
|
2209
2199
|
# Upsample a DataFrame at a regular frequency.
|
2210
2200
|
#
|
@@ -2281,7 +2271,7 @@ module Polars
|
|
2281
2271
|
if by.nil?
|
2282
2272
|
by = []
|
2283
2273
|
end
|
2284
|
-
if by.is_a?(String)
|
2274
|
+
if by.is_a?(::String)
|
2285
2275
|
by = [by]
|
2286
2276
|
end
|
2287
2277
|
if offset.nil?
|
@@ -2475,17 +2465,17 @@ module Polars
|
|
2475
2465
|
# @example
|
2476
2466
|
# df.join(other_df, on: "ham", how: "outer")
|
2477
2467
|
# # =>
|
2478
|
-
# # shape: (4,
|
2479
|
-
# #
|
2480
|
-
# # │ foo ┆ bar ┆ ham
|
2481
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
2482
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
2483
|
-
# #
|
2484
|
-
# # │ 1 ┆ 6.0 ┆ a
|
2485
|
-
# # │ 2 ┆ 7.0 ┆ b
|
2486
|
-
# # │ null ┆ null ┆
|
2487
|
-
# # │ 3 ┆ 8.0 ┆ c
|
2488
|
-
# #
|
2468
|
+
# # shape: (4, 5)
|
2469
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
2470
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
2471
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2472
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
2473
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
2474
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
2475
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
2476
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
2477
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
2478
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
2489
2479
|
#
|
2490
2480
|
# @example
|
2491
2481
|
# df.join(other_df, on: "ham", how: "left")
|
@@ -3125,17 +3115,17 @@ module Polars
|
|
3125
3115
|
sort_columns: false,
|
3126
3116
|
separator: "_"
|
3127
3117
|
)
|
3128
|
-
if values.is_a?(String)
|
3118
|
+
if values.is_a?(::String)
|
3129
3119
|
values = [values]
|
3130
3120
|
end
|
3131
|
-
if index.is_a?(String)
|
3121
|
+
if index.is_a?(::String)
|
3132
3122
|
index = [index]
|
3133
3123
|
end
|
3134
|
-
if columns.is_a?(String)
|
3124
|
+
if columns.is_a?(::String)
|
3135
3125
|
columns = [columns]
|
3136
3126
|
end
|
3137
3127
|
|
3138
|
-
if aggregate_fn.is_a?(String)
|
3128
|
+
if aggregate_fn.is_a?(::String)
|
3139
3129
|
case aggregate_fn
|
3140
3130
|
when "first"
|
3141
3131
|
aggregate_expr = Polars.element.first._rbexpr
|
@@ -3220,10 +3210,10 @@ module Polars
|
|
3220
3210
|
# # │ z ┆ c ┆ 6 │
|
3221
3211
|
# # └─────┴──────────┴───────┘
|
3222
3212
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
|
3223
|
-
if value_vars.is_a?(String)
|
3213
|
+
if value_vars.is_a?(::String)
|
3224
3214
|
value_vars = [value_vars]
|
3225
3215
|
end
|
3226
|
-
if id_vars.is_a?(String)
|
3216
|
+
if id_vars.is_a?(::String)
|
3227
3217
|
id_vars = [id_vars]
|
3228
3218
|
end
|
3229
3219
|
if value_vars.nil?
|
@@ -3437,7 +3427,7 @@ module Polars
|
|
3437
3427
|
# # │ C ┆ 2 ┆ l │
|
3438
3428
|
# # └─────┴─────┴─────┘}
|
3439
3429
|
def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
|
3440
|
-
if groups.is_a?(String)
|
3430
|
+
if groups.is_a?(::String)
|
3441
3431
|
groups = [groups]
|
3442
3432
|
elsif !groups.is_a?(::Array)
|
3443
3433
|
groups = Array(groups)
|
@@ -3464,8 +3454,10 @@ module Polars
|
|
3464
3454
|
|
3465
3455
|
# Shift values by the given period.
|
3466
3456
|
#
|
3467
|
-
# @param
|
3457
|
+
# @param n [Integer]
|
3468
3458
|
# Number of places to shift (may be negative).
|
3459
|
+
# @param fill_value [Object]
|
3460
|
+
# Fill the resulting null values with this value.
|
3469
3461
|
#
|
3470
3462
|
# @return [DataFrame]
|
3471
3463
|
#
|
@@ -3503,8 +3495,8 @@ module Polars
|
|
3503
3495
|
# # │ 3 ┆ 8 ┆ c │
|
3504
3496
|
# # │ null ┆ null ┆ null │
|
3505
3497
|
# # └──────┴──────┴──────┘
|
3506
|
-
def shift(
|
3507
|
-
|
3498
|
+
def shift(n, fill_value: nil)
|
3499
|
+
lazy.shift(n, fill_value: fill_value).collect(_eager: true)
|
3508
3500
|
end
|
3509
3501
|
|
3510
3502
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -3537,9 +3529,7 @@ module Polars
|
|
3537
3529
|
# # │ 2 ┆ 7 ┆ b │
|
3538
3530
|
# # └─────┴─────┴─────┘
|
3539
3531
|
def shift_and_fill(periods, fill_value)
|
3540
|
-
|
3541
|
-
.shift_and_fill(periods, fill_value)
|
3542
|
-
.collect(no_optimization: true, string_cache: false)
|
3532
|
+
shift(periods, fill_value: fill_value)
|
3543
3533
|
end
|
3544
3534
|
|
3545
3535
|
# Get a mask of all duplicated rows in this DataFrame.
|
@@ -3788,9 +3778,9 @@ module Polars
|
|
3788
3778
|
# # └─────┴─────┴─────┘
|
3789
3779
|
def max(axis: 0)
|
3790
3780
|
if axis == 0
|
3791
|
-
|
3781
|
+
lazy.max.collect(_eager: true)
|
3792
3782
|
elsif axis == 1
|
3793
|
-
Utils.wrap_s(_df.
|
3783
|
+
Utils.wrap_s(_df.max_horizontal)
|
3794
3784
|
else
|
3795
3785
|
raise ArgumentError, "Axis should be 0 or 1."
|
3796
3786
|
end
|
@@ -3820,9 +3810,9 @@ module Polars
|
|
3820
3810
|
# # └─────┴─────┴─────┘
|
3821
3811
|
def min(axis: 0)
|
3822
3812
|
if axis == 0
|
3823
|
-
|
3813
|
+
lazy.min.collect(_eager: true)
|
3824
3814
|
elsif axis == 1
|
3825
|
-
Utils.wrap_s(_df.
|
3815
|
+
Utils.wrap_s(_df.min_horizontal)
|
3826
3816
|
else
|
3827
3817
|
raise ArgumentError, "Axis should be 0 or 1."
|
3828
3818
|
end
|
@@ -3869,9 +3859,9 @@ module Polars
|
|
3869
3859
|
def sum(axis: 0, null_strategy: "ignore")
|
3870
3860
|
case axis
|
3871
3861
|
when 0
|
3872
|
-
|
3862
|
+
lazy.sum.collect(_eager: true)
|
3873
3863
|
when 1
|
3874
|
-
Utils.wrap_s(_df.
|
3864
|
+
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3875
3865
|
else
|
3876
3866
|
raise ArgumentError, "Axis should be 0 or 1."
|
3877
3867
|
end
|
@@ -3907,9 +3897,9 @@ module Polars
|
|
3907
3897
|
def mean(axis: 0, null_strategy: "ignore")
|
3908
3898
|
case axis
|
3909
3899
|
when 0
|
3910
|
-
|
3900
|
+
lazy.mean.collect(_eager: true)
|
3911
3901
|
when 1
|
3912
|
-
Utils.wrap_s(_df.
|
3902
|
+
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3913
3903
|
else
|
3914
3904
|
raise ArgumentError, "Axis should be 0 or 1."
|
3915
3905
|
end
|
@@ -3953,7 +3943,7 @@ module Polars
|
|
3953
3943
|
# # │ 0.816497 ┆ 0.816497 ┆ null │
|
3954
3944
|
# # └──────────┴──────────┴──────┘
|
3955
3945
|
def std(ddof: 1)
|
3956
|
-
|
3946
|
+
lazy.std(ddof: ddof).collect(_eager: true)
|
3957
3947
|
end
|
3958
3948
|
|
3959
3949
|
# Aggregate the columns of this DataFrame to their variance value.
|
@@ -3994,7 +3984,7 @@ module Polars
|
|
3994
3984
|
# # │ 0.666667 ┆ 0.666667 ┆ null │
|
3995
3985
|
# # └──────────┴──────────┴──────┘
|
3996
3986
|
def var(ddof: 1)
|
3997
|
-
|
3987
|
+
lazy.var(ddof: ddof).collect(_eager: true)
|
3998
3988
|
end
|
3999
3989
|
|
4000
3990
|
# Aggregate the columns of this DataFrame to their median value.
|
@@ -4020,7 +4010,7 @@ module Polars
|
|
4020
4010
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4021
4011
|
# # └─────┴─────┴──────┘
|
4022
4012
|
def median
|
4023
|
-
|
4013
|
+
lazy.median.collect(_eager: true)
|
4024
4014
|
end
|
4025
4015
|
|
4026
4016
|
# Aggregate the columns of this DataFrame to their product values.
|
@@ -4077,7 +4067,7 @@ module Polars
|
|
4077
4067
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4078
4068
|
# # └─────┴─────┴──────┘
|
4079
4069
|
def quantile(quantile, interpolation: "nearest")
|
4080
|
-
|
4070
|
+
lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
|
4081
4071
|
end
|
4082
4072
|
|
4083
4073
|
# Get one hot encoded dummy variables.
|
@@ -4108,7 +4098,7 @@ module Polars
|
|
4108
4098
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4109
4099
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4110
4100
|
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4111
|
-
if columns.is_a?(String)
|
4101
|
+
if columns.is_a?(::String)
|
4112
4102
|
columns = [columns]
|
4113
4103
|
end
|
4114
4104
|
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
@@ -4294,15 +4284,20 @@ module Polars
|
|
4294
4284
|
end
|
4295
4285
|
|
4296
4286
|
if n.nil? && !frac.nil?
|
4287
|
+
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4288
|
+
|
4297
4289
|
_from_rbdf(
|
4298
|
-
_df.sample_frac(frac, with_replacement, shuffle, seed)
|
4290
|
+
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4299
4291
|
)
|
4300
4292
|
end
|
4301
4293
|
|
4302
4294
|
if n.nil?
|
4303
4295
|
n = 1
|
4304
4296
|
end
|
4305
|
-
|
4297
|
+
|
4298
|
+
n = Series.new("", [n]) unless n.is_a?(Series)
|
4299
|
+
|
4300
|
+
_from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
|
4306
4301
|
end
|
4307
4302
|
|
4308
4303
|
# Apply a horizontal reduction on a DataFrame.
|
@@ -4601,7 +4596,7 @@ module Polars
|
|
4601
4596
|
#
|
4602
4597
|
# @example
|
4603
4598
|
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
|
4604
|
-
# s.
|
4599
|
+
# s.gather_every(2)
|
4605
4600
|
# # =>
|
4606
4601
|
# # shape: (2, 2)
|
4607
4602
|
# # ┌─────┬─────┐
|
@@ -4612,9 +4607,10 @@ module Polars
|
|
4612
4607
|
# # │ 1 ┆ 5 │
|
4613
4608
|
# # │ 3 ┆ 7 │
|
4614
4609
|
# # └─────┴─────┘
|
4615
|
-
def
|
4616
|
-
select(Utils.col("*").
|
4610
|
+
def gather_every(n, offset = 0)
|
4611
|
+
select(Utils.col("*").gather_every(n, offset))
|
4617
4612
|
end
|
4613
|
+
alias_method :take_every, :gather_every
|
4618
4614
|
|
4619
4615
|
# Hash and combine the rows in this DataFrame.
|
4620
4616
|
#
|
@@ -4671,16 +4667,16 @@ module Polars
|
|
4671
4667
|
# df.interpolate
|
4672
4668
|
# # =>
|
4673
4669
|
# # shape: (4, 3)
|
4674
|
-
# #
|
4675
|
-
# # │ foo
|
4676
|
-
# # │ ---
|
4677
|
-
# # │
|
4678
|
-
# #
|
4679
|
-
# # │ 1
|
4680
|
-
# # │ 5
|
4681
|
-
# # │ 9
|
4682
|
-
# # │ 10
|
4683
|
-
# #
|
4670
|
+
# # ┌──────┬──────┬──────────┐
|
4671
|
+
# # │ foo ┆ bar ┆ baz │
|
4672
|
+
# # │ --- ┆ --- ┆ --- │
|
4673
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
4674
|
+
# # ╞══════╪══════╪══════════╡
|
4675
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
4676
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
4677
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
4678
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
4679
|
+
# # └──────┴──────┴──────────┘
|
4684
4680
|
def interpolate
|
4685
4681
|
select(Utils.col("*").interpolate)
|
4686
4682
|
end
|
@@ -4762,7 +4758,7 @@ module Polars
|
|
4762
4758
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
4763
4759
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
4764
4760
|
def unnest(names)
|
4765
|
-
if names.is_a?(String)
|
4761
|
+
if names.is_a?(::String)
|
4766
4762
|
names = [names]
|
4767
4763
|
end
|
4768
4764
|
_from_rbdf(_df.unnest(names))
|
@@ -4875,10 +4871,10 @@ module Polars
|
|
4875
4871
|
if val.is_a?(Hash) && dtype != Struct
|
4876
4872
|
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4877
4873
|
elsif !Utils.arrlen(val).nil?
|
4878
|
-
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4879
|
-
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4874
|
+
updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
|
4875
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4880
4876
|
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4881
|
-
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4877
|
+
updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4882
4878
|
else
|
4883
4879
|
raise Todo
|
4884
4880
|
end
|
@@ -4935,7 +4931,7 @@ module Polars
|
|
4935
4931
|
end
|
4936
4932
|
column_names =
|
4937
4933
|
(schema || []).map.with_index do |col, i|
|
4938
|
-
if col.is_a?(String)
|
4934
|
+
if col.is_a?(::String)
|
4939
4935
|
col || "column_#{i}"
|
4940
4936
|
else
|
4941
4937
|
col[0]
|
@@ -4948,12 +4944,12 @@ module Polars
|
|
4948
4944
|
lookup = column_names.zip(lookup_names || []).to_h
|
4949
4945
|
|
4950
4946
|
column_dtypes =
|
4951
|
-
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4947
|
+
(schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
|
4952
4948
|
[lookup[col[0]] || col[0], col[1]]
|
4953
4949
|
end
|
4954
4950
|
|
4955
|
-
if schema_overrides
|
4956
|
-
|
4951
|
+
if schema_overrides && schema_overrides.any?
|
4952
|
+
column_dtypes.merge!(schema_overrides)
|
4957
4953
|
end
|
4958
4954
|
|
4959
4955
|
column_dtypes.each do |col, dtype|
|
@@ -5056,13 +5052,54 @@ module Polars
|
|
5056
5052
|
return rbdf
|
5057
5053
|
elsif data[0].is_a?(::Array)
|
5058
5054
|
if orient.nil? && !columns.nil?
|
5059
|
-
|
5055
|
+
first_element = data[0]
|
5056
|
+
row_types = first_element.filter_map { |value| value.class }.uniq
|
5057
|
+
if row_types.include?(Integer) && row_types.include?(Float)
|
5058
|
+
row_types.delete(Integer)
|
5059
|
+
end
|
5060
|
+
orient = row_types.length == 1 ? "col" : "row"
|
5060
5061
|
end
|
5061
5062
|
|
5062
5063
|
if orient == "row"
|
5063
|
-
|
5064
|
+
column_names, schema_overrides = _unpack_schema(
|
5065
|
+
schema, schema_overrides: schema_overrides, n_expected: first_element.length
|
5066
|
+
)
|
5067
|
+
local_schema_override = (
|
5068
|
+
schema_overrides.any? ? (raise Todo) : {}
|
5069
|
+
)
|
5070
|
+
if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
|
5071
|
+
raise ArgumentError, "the row data does not match the number of columns"
|
5072
|
+
end
|
5073
|
+
|
5074
|
+
unpack_nested = false
|
5075
|
+
local_schema_override.each do |col, tp|
|
5076
|
+
raise Todo
|
5077
|
+
end
|
5078
|
+
|
5079
|
+
if unpack_nested
|
5080
|
+
raise Todo
|
5081
|
+
else
|
5082
|
+
rbdf = RbDataFrame.read_rows(
|
5083
|
+
data,
|
5084
|
+
infer_schema_length,
|
5085
|
+
local_schema_override.any? ? local_schema_override : nil
|
5086
|
+
)
|
5087
|
+
end
|
5088
|
+
if column_names.any? || schema_overrides.any?
|
5089
|
+
rbdf = _post_apply_columns(
|
5090
|
+
rbdf, column_names, schema_overrides: schema_overrides
|
5091
|
+
)
|
5092
|
+
end
|
5093
|
+
return rbdf
|
5064
5094
|
elsif orient == "col" || orient.nil?
|
5065
|
-
|
5095
|
+
column_names, schema_overrides = _unpack_schema(
|
5096
|
+
schema, schema_overrides: schema_overrides, n_expected: data.length
|
5097
|
+
)
|
5098
|
+
data_series =
|
5099
|
+
data.map.with_index do |element, i|
|
5100
|
+
Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
|
5101
|
+
end
|
5102
|
+
return RbDataFrame.new(data_series)
|
5066
5103
|
else
|
5067
5104
|
raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
|
5068
5105
|
end
|
@@ -5108,10 +5145,10 @@ module Polars
|
|
5108
5145
|
|
5109
5146
|
def _compare_to_other_df(other, op)
|
5110
5147
|
if columns != other.columns
|
5111
|
-
raise
|
5148
|
+
raise ArgumentError, "DataFrame columns do not match"
|
5112
5149
|
end
|
5113
5150
|
if shape != other.shape
|
5114
|
-
raise
|
5151
|
+
raise ArgumentError, "DataFrame dimensions do not match"
|
5115
5152
|
end
|
5116
5153
|
|
5117
5154
|
suffix = "__POLARS_CMP_OTHER"
|