polars-df 0.6.0-x86_64-darwin → 0.8.0-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +5523 -6947
- data/README.md +8 -7
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.bundle +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +8 -5
data/lib/polars/data_frame.rb
CHANGED
@@ -20,15 +20,9 @@ module Polars
|
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
21
|
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
22
|
schema ||= columns
|
23
|
-
raise Todo if schema_overrides
|
24
23
|
|
25
|
-
# TODO deprecate in favor of read_sql
|
26
24
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
27
|
-
|
28
|
-
data = {}
|
29
|
-
result.columns.each_with_index do |k, i|
|
30
|
-
data[k] = result.rows.map { |r| r[i] }
|
31
|
-
end
|
25
|
+
raise ArgumentError, "Use read_database instead"
|
32
26
|
end
|
33
27
|
|
34
28
|
if data.nil?
|
@@ -125,10 +119,10 @@ module Polars
|
|
125
119
|
|
126
120
|
processed_null_values = Utils._process_null_values(null_values)
|
127
121
|
|
128
|
-
if columns.is_a?(String)
|
122
|
+
if columns.is_a?(::String)
|
129
123
|
columns = [columns]
|
130
124
|
end
|
131
|
-
if file.is_a?(String) && file.include?("*")
|
125
|
+
if file.is_a?(::String) && file.include?("*")
|
132
126
|
dtypes_dict = nil
|
133
127
|
if !dtype_list.nil?
|
134
128
|
dtypes_dict = dtype_list.to_h
|
@@ -212,11 +206,11 @@ module Polars
|
|
212
206
|
if Utils.pathlike?(source)
|
213
207
|
source = Utils.normalise_filepath(source)
|
214
208
|
end
|
215
|
-
if columns.is_a?(String)
|
209
|
+
if columns.is_a?(::String)
|
216
210
|
columns = [columns]
|
217
211
|
end
|
218
212
|
|
219
|
-
if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
|
213
|
+
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
220
214
|
scan =
|
221
215
|
Polars.scan_parquet(
|
222
216
|
source,
|
@@ -275,11 +269,11 @@ module Polars
|
|
275
269
|
if Utils.pathlike?(file)
|
276
270
|
file = Utils.normalise_filepath(file)
|
277
271
|
end
|
278
|
-
if columns.is_a?(String)
|
272
|
+
if columns.is_a?(::String)
|
279
273
|
columns = [columns]
|
280
274
|
end
|
281
275
|
|
282
|
-
if file.is_a?(String) && file.include?("*")
|
276
|
+
if file.is_a?(::String) && file.include?("*")
|
283
277
|
raise Todo
|
284
278
|
end
|
285
279
|
|
@@ -417,7 +411,7 @@ module Polars
|
|
417
411
|
# }
|
418
412
|
# )
|
419
413
|
# df.dtypes
|
420
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
414
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
421
415
|
def dtypes
|
422
416
|
_df.dtypes
|
423
417
|
end
|
@@ -435,7 +429,7 @@ module Polars
|
|
435
429
|
# }
|
436
430
|
# )
|
437
431
|
# df.schema
|
438
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
432
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
439
433
|
def schema
|
440
434
|
columns.zip(dtypes).to_h
|
441
435
|
end
|
@@ -595,13 +589,13 @@ module Polars
|
|
595
589
|
return df.slice(row_selection, 1)
|
596
590
|
end
|
597
591
|
# df[2, "a"]
|
598
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
592
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
599
593
|
return self[col_selection][row_selection]
|
600
594
|
end
|
601
595
|
end
|
602
596
|
|
603
597
|
# column selection can be "a" and ["a", "b"]
|
604
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
598
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
605
599
|
col_selection = [col_selection]
|
606
600
|
end
|
607
601
|
|
@@ -627,7 +621,7 @@ module Polars
|
|
627
621
|
|
628
622
|
# select single column
|
629
623
|
# df["foo"]
|
630
|
-
if item.is_a?(String) || item.is_a?(Symbol)
|
624
|
+
if item.is_a?(::String) || item.is_a?(Symbol)
|
631
625
|
return Utils.wrap_s(_df.column(item.to_s))
|
632
626
|
end
|
633
627
|
|
@@ -653,7 +647,7 @@ module Polars
|
|
653
647
|
|
654
648
|
if item.is_a?(Series)
|
655
649
|
dtype = item.dtype
|
656
|
-
if dtype ==
|
650
|
+
if dtype == String
|
657
651
|
return _from_rbdf(_df.select(item))
|
658
652
|
elsif dtype == UInt32
|
659
653
|
return _from_rbdf(_df.take_with_series(item._s))
|
@@ -704,7 +698,7 @@ module Polars
|
|
704
698
|
s[row_selection] = value
|
705
699
|
|
706
700
|
if col_selection.is_a?(Integer)
|
707
|
-
|
701
|
+
replace_column(col_selection, s)
|
708
702
|
elsif Utils.strlike?(col_selection)
|
709
703
|
replace(col_selection, s)
|
710
704
|
end
|
@@ -905,6 +899,7 @@ module Polars
|
|
905
899
|
def write_csv(
|
906
900
|
file = nil,
|
907
901
|
has_header: true,
|
902
|
+
include_header: nil,
|
908
903
|
sep: ",",
|
909
904
|
quote: '"',
|
910
905
|
batch_size: 1024,
|
@@ -914,6 +909,8 @@ module Polars
|
|
914
909
|
float_precision: nil,
|
915
910
|
null_value: nil
|
916
911
|
)
|
912
|
+
include_header = has_header if include_header.nil?
|
913
|
+
|
917
914
|
if sep.length > 1
|
918
915
|
raise ArgumentError, "only single byte separator is allowed"
|
919
916
|
elsif quote.length > 1
|
@@ -927,7 +924,7 @@ module Polars
|
|
927
924
|
buffer.set_encoding(Encoding::BINARY)
|
928
925
|
_df.write_csv(
|
929
926
|
buffer,
|
930
|
-
|
927
|
+
include_header,
|
931
928
|
sep.ord,
|
932
929
|
quote.ord,
|
933
930
|
batch_size,
|
@@ -946,7 +943,7 @@ module Polars
|
|
946
943
|
|
947
944
|
_df.write_csv(
|
948
945
|
file,
|
949
|
-
|
946
|
+
include_header,
|
950
947
|
sep.ord,
|
951
948
|
quote.ord,
|
952
949
|
batch_size,
|
@@ -1151,22 +1148,8 @@ module Polars
|
|
1151
1148
|
# # │ b ┆ 1 ┆ 2 ┆ 3 │
|
1152
1149
|
# # └─────┴─────┴─────┴─────┘
|
1153
1150
|
def transpose(include_header: false, header_name: "column", column_names: nil)
|
1154
|
-
|
1155
|
-
|
1156
|
-
names = []
|
1157
|
-
n = df.width
|
1158
|
-
if include_header
|
1159
|
-
names << header_name
|
1160
|
-
n -= 1
|
1161
|
-
end
|
1162
|
-
|
1163
|
-
column_names = column_names.each
|
1164
|
-
n.times do
|
1165
|
-
names << column_names.next
|
1166
|
-
end
|
1167
|
-
df.columns = names
|
1168
|
-
end
|
1169
|
-
df
|
1151
|
+
keep_names_as = include_header ? header_name : nil
|
1152
|
+
_from_rbdf(_df.transpose(keep_names_as, column_names))
|
1170
1153
|
end
|
1171
1154
|
|
1172
1155
|
# Reverse the DataFrame.
|
@@ -1239,7 +1222,7 @@ module Polars
|
|
1239
1222
|
# @example
|
1240
1223
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1241
1224
|
# s = Polars::Series.new("baz", [97, 98, 99])
|
1242
|
-
# df.
|
1225
|
+
# df.insert_column(1, s)
|
1243
1226
|
# # =>
|
1244
1227
|
# # shape: (3, 3)
|
1245
1228
|
# # ┌─────┬─────┬─────┐
|
@@ -1261,7 +1244,7 @@ module Polars
|
|
1261
1244
|
# }
|
1262
1245
|
# )
|
1263
1246
|
# s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
|
1264
|
-
# df.
|
1247
|
+
# df.insert_column(3, s)
|
1265
1248
|
# # =>
|
1266
1249
|
# # shape: (4, 4)
|
1267
1250
|
# # ┌─────┬──────┬───────┬──────┐
|
@@ -1274,13 +1257,14 @@ module Polars
|
|
1274
1257
|
# # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
|
1275
1258
|
# # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
|
1276
1259
|
# # └─────┴──────┴───────┴──────┘
|
1277
|
-
def
|
1260
|
+
def insert_column(index, series)
|
1278
1261
|
if index < 0
|
1279
1262
|
index = columns.length + index
|
1280
1263
|
end
|
1281
|
-
_df.
|
1264
|
+
_df.insert_column(index, series._s)
|
1282
1265
|
self
|
1283
1266
|
end
|
1267
|
+
alias_method :insert_at_idx, :insert_column
|
1284
1268
|
|
1285
1269
|
# Filter the rows in the DataFrame based on a predicate expression.
|
1286
1270
|
#
|
@@ -1384,7 +1368,7 @@ module Polars
|
|
1384
1368
|
]
|
1385
1369
|
)._df
|
1386
1370
|
)
|
1387
|
-
summary.
|
1371
|
+
summary.insert_column(
|
1388
1372
|
0,
|
1389
1373
|
Polars::Series.new(
|
1390
1374
|
"describe",
|
@@ -1405,11 +1389,12 @@ module Polars
|
|
1405
1389
|
# df = Polars::DataFrame.new(
|
1406
1390
|
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
1407
1391
|
# )
|
1408
|
-
# df.
|
1392
|
+
# df.get_column_index("ham")
|
1409
1393
|
# # => 2
|
1410
|
-
def
|
1411
|
-
_df.
|
1394
|
+
def get_column_index(name)
|
1395
|
+
_df.get_column_index(name)
|
1412
1396
|
end
|
1397
|
+
alias_method :find_idx_by_name, :get_column_index
|
1413
1398
|
|
1414
1399
|
# Replace a column at an index location.
|
1415
1400
|
#
|
@@ -1429,7 +1414,7 @@ module Polars
|
|
1429
1414
|
# }
|
1430
1415
|
# )
|
1431
1416
|
# s = Polars::Series.new("apple", [10, 20, 30])
|
1432
|
-
# df.
|
1417
|
+
# df.replace_column(0, s)
|
1433
1418
|
# # =>
|
1434
1419
|
# # shape: (3, 3)
|
1435
1420
|
# # ┌───────┬─────┬─────┐
|
@@ -1441,13 +1426,14 @@ module Polars
|
|
1441
1426
|
# # │ 20 ┆ 7 ┆ b │
|
1442
1427
|
# # │ 30 ┆ 8 ┆ c │
|
1443
1428
|
# # └───────┴─────┴─────┘
|
1444
|
-
def
|
1429
|
+
def replace_column(index, series)
|
1445
1430
|
if index < 0
|
1446
1431
|
index = columns.length + index
|
1447
1432
|
end
|
1448
|
-
_df.
|
1433
|
+
_df.replace_column(index, series._s)
|
1449
1434
|
self
|
1450
1435
|
end
|
1436
|
+
alias_method :replace_at_idx, :replace_column
|
1451
1437
|
|
1452
1438
|
# Sort the DataFrame by column.
|
1453
1439
|
#
|
@@ -1541,13 +1527,14 @@ module Polars
|
|
1541
1527
|
# "ham" => ["c", "b", "a"]
|
1542
1528
|
# }
|
1543
1529
|
# )
|
1544
|
-
# df1.
|
1530
|
+
# df1.equals(df1)
|
1545
1531
|
# # => true
|
1546
|
-
# df1.
|
1532
|
+
# df1.equals(df2)
|
1547
1533
|
# # => false
|
1548
|
-
def
|
1549
|
-
_df.
|
1534
|
+
def equals(other, null_equal: true)
|
1535
|
+
_df.equals(other._df, null_equal)
|
1550
1536
|
end
|
1537
|
+
alias_method :frame_equal, :equals
|
1551
1538
|
|
1552
1539
|
# Replace a column by a new Series.
|
1553
1540
|
#
|
@@ -1733,7 +1720,7 @@ module Polars
|
|
1733
1720
|
# # │ 3 ┆ 8 ┆ c │
|
1734
1721
|
# # └─────┴─────┴─────┘
|
1735
1722
|
def drop_nulls(subset: nil)
|
1736
|
-
if subset.is_a?(String)
|
1723
|
+
if subset.is_a?(::String)
|
1737
1724
|
subset = [subset]
|
1738
1725
|
end
|
1739
1726
|
_from_rbdf(_df.drop_nulls(subset))
|
@@ -1811,13 +1798,13 @@ module Polars
|
|
1811
1798
|
_from_rbdf(_df.with_row_count(name, offset))
|
1812
1799
|
end
|
1813
1800
|
|
1814
|
-
# Start a
|
1801
|
+
# Start a group by operation.
|
1815
1802
|
#
|
1816
1803
|
# @param by [Object]
|
1817
1804
|
# Column(s) to group by.
|
1818
1805
|
# @param maintain_order [Boolean]
|
1819
1806
|
# Make sure that the order of the groups remain consistent. This is more
|
1820
|
-
# expensive than a default
|
1807
|
+
# expensive than a default group by. Note that this only works in expression
|
1821
1808
|
# aggregations.
|
1822
1809
|
#
|
1823
1810
|
# @return [GroupBy]
|
@@ -1830,7 +1817,7 @@ module Polars
|
|
1830
1817
|
# "c" => [6, 5, 4, 3, 2, 1]
|
1831
1818
|
# }
|
1832
1819
|
# )
|
1833
|
-
# df.
|
1820
|
+
# df.group_by("a").agg(Polars.col("b").sum).sort("a")
|
1834
1821
|
# # =>
|
1835
1822
|
# # shape: (3, 2)
|
1836
1823
|
# # ┌─────┬─────┐
|
@@ -1842,25 +1829,26 @@ module Polars
|
|
1842
1829
|
# # │ b ┆ 11 │
|
1843
1830
|
# # │ c ┆ 6 │
|
1844
1831
|
# # └─────┴─────┘
|
1845
|
-
def
|
1832
|
+
def group_by(by, maintain_order: false)
|
1846
1833
|
if !Utils.bool?(maintain_order)
|
1847
|
-
raise TypeError, "invalid input for
|
1834
|
+
raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
|
1848
1835
|
end
|
1849
1836
|
GroupBy.new(
|
1850
|
-
|
1837
|
+
self,
|
1851
1838
|
by,
|
1852
|
-
self.class,
|
1853
1839
|
maintain_order: maintain_order
|
1854
1840
|
)
|
1855
1841
|
end
|
1842
|
+
alias_method :groupby, :group_by
|
1843
|
+
alias_method :group, :group_by
|
1856
1844
|
|
1857
1845
|
# Create rolling groups based on a time column.
|
1858
1846
|
#
|
1859
1847
|
# Also works for index values of type `:i32` or `:i64`.
|
1860
1848
|
#
|
1861
|
-
# Different from a `
|
1849
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
1862
1850
|
# individual values and are not of constant intervals. For constant intervals use
|
1863
|
-
# *
|
1851
|
+
# *group_by_dynamic*
|
1864
1852
|
#
|
1865
1853
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
1866
1854
|
# by using the following string language:
|
@@ -1880,7 +1868,7 @@ module Polars
|
|
1880
1868
|
# Or combine them:
|
1881
1869
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1882
1870
|
#
|
1883
|
-
# In case of a
|
1871
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
1884
1872
|
#
|
1885
1873
|
# - **"1i" # length 1**
|
1886
1874
|
# - **"10i" # length 10**
|
@@ -1891,7 +1879,7 @@ module Polars
|
|
1891
1879
|
# This column must be sorted in ascending order. If not the output will not
|
1892
1880
|
# make sense.
|
1893
1881
|
#
|
1894
|
-
# In case of a rolling
|
1882
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
1895
1883
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1896
1884
|
# performance matters use an `:i64` column.
|
1897
1885
|
# @param period [Object]
|
@@ -1923,7 +1911,7 @@ module Polars
|
|
1923
1911
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1924
1912
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1925
1913
|
# )
|
1926
|
-
# df.
|
1914
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
1927
1915
|
# [
|
1928
1916
|
# Polars.sum("a").alias("sum_a"),
|
1929
1917
|
# Polars.min("a").alias("min_a"),
|
@@ -1944,7 +1932,7 @@ module Polars
|
|
1944
1932
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1945
1933
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1946
1934
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1947
|
-
def
|
1935
|
+
def group_by_rolling(
|
1948
1936
|
index_column:,
|
1949
1937
|
period:,
|
1950
1938
|
offset: nil,
|
@@ -1954,11 +1942,12 @@ module Polars
|
|
1954
1942
|
)
|
1955
1943
|
RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
|
1956
1944
|
end
|
1945
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1957
1946
|
|
1958
1947
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1959
1948
|
#
|
1960
1949
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1961
|
-
# normal
|
1950
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1962
1951
|
# window could be seen as a rolling window, with a window size determined by
|
1963
1952
|
# dates/times/values instead of slots in the DataFrame.
|
1964
1953
|
#
|
@@ -1986,7 +1975,7 @@ module Polars
|
|
1986
1975
|
# Or combine them:
|
1987
1976
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1988
1977
|
#
|
1989
|
-
# In case of a
|
1978
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1990
1979
|
#
|
1991
1980
|
# - "1i" # length 1
|
1992
1981
|
# - "10i" # length 10
|
@@ -1997,7 +1986,7 @@ module Polars
|
|
1997
1986
|
# This column must be sorted in ascending order. If not the output will not
|
1998
1987
|
# make sense.
|
1999
1988
|
#
|
2000
|
-
# In case of a dynamic
|
1989
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
2001
1990
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
2002
1991
|
# performance matters use an `:i64` column.
|
2003
1992
|
# @param every
|
@@ -2048,7 +2037,7 @@ module Polars
|
|
2048
2037
|
# # └─────────────────────┴─────┘
|
2049
2038
|
#
|
2050
2039
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
2051
|
-
# df.
|
2040
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
2052
2041
|
# [
|
2053
2042
|
# Polars.col("time").min.alias("time_min"),
|
2054
2043
|
# Polars.col("time").max.alias("time_max")
|
@@ -2068,7 +2057,7 @@ module Polars
|
|
2068
2057
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
2069
2058
|
#
|
2070
2059
|
# @example The window boundaries can also be added to the aggregation result.
|
2071
|
-
# df.
|
2060
|
+
# df.group_by_dynamic(
|
2072
2061
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
2073
2062
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
2074
2063
|
# # =>
|
@@ -2085,7 +2074,7 @@ module Polars
|
|
2085
2074
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2086
2075
|
#
|
2087
2076
|
# @example When closed="left", should not include right end of interval.
|
2088
|
-
# df.
|
2077
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
2089
2078
|
# [
|
2090
2079
|
# Polars.col("time").count.alias("time_count"),
|
2091
2080
|
# Polars.col("time").alias("time_agg_list")
|
@@ -2105,7 +2094,7 @@ module Polars
|
|
2105
2094
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
2106
2095
|
#
|
2107
2096
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2108
|
-
# df.
|
2097
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
2109
2098
|
# [Polars.col("time").count.alias("time_count")]
|
2110
2099
|
# )
|
2111
2100
|
# # =>
|
@@ -2122,7 +2111,7 @@ module Polars
|
|
2122
2111
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
2123
2112
|
# # └─────────────────────┴────────────┘
|
2124
2113
|
#
|
2125
|
-
# @example Dynamic
|
2114
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
2126
2115
|
# df = Polars::DataFrame.new(
|
2127
2116
|
# {
|
2128
2117
|
# "time" => Polars.date_range(
|
@@ -2133,7 +2122,7 @@ module Polars
|
|
2133
2122
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
2134
2123
|
# }
|
2135
2124
|
# )
|
2136
|
-
# df.
|
2125
|
+
# df.group_by_dynamic(
|
2137
2126
|
# "time",
|
2138
2127
|
# every: "1h",
|
2139
2128
|
# closed: "both",
|
@@ -2156,14 +2145,14 @@ module Polars
|
|
2156
2145
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
2157
2146
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2158
2147
|
#
|
2159
|
-
# @example Dynamic
|
2148
|
+
# @example Dynamic group by on an index column.
|
2160
2149
|
# df = Polars::DataFrame.new(
|
2161
2150
|
# {
|
2162
2151
|
# "idx" => Polars.arange(0, 6, eager: true),
|
2163
2152
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
2164
2153
|
# }
|
2165
2154
|
# )
|
2166
|
-
# df.
|
2155
|
+
# df.group_by_dynamic(
|
2167
2156
|
# "idx",
|
2168
2157
|
# every: "2i",
|
2169
2158
|
# period: "3i",
|
@@ -2181,7 +2170,7 @@ module Polars
|
|
2181
2170
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2182
2171
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
2183
2172
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
2184
|
-
def
|
2173
|
+
def group_by_dynamic(
|
2185
2174
|
index_column,
|
2186
2175
|
every:,
|
2187
2176
|
period: nil,
|
@@ -2205,6 +2194,7 @@ module Polars
|
|
2205
2194
|
start_by
|
2206
2195
|
)
|
2207
2196
|
end
|
2197
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
2208
2198
|
|
2209
2199
|
# Upsample a DataFrame at a regular frequency.
|
2210
2200
|
#
|
@@ -2281,7 +2271,7 @@ module Polars
|
|
2281
2271
|
if by.nil?
|
2282
2272
|
by = []
|
2283
2273
|
end
|
2284
|
-
if by.is_a?(String)
|
2274
|
+
if by.is_a?(::String)
|
2285
2275
|
by = [by]
|
2286
2276
|
end
|
2287
2277
|
if offset.nil?
|
@@ -2475,17 +2465,17 @@ module Polars
|
|
2475
2465
|
# @example
|
2476
2466
|
# df.join(other_df, on: "ham", how: "outer")
|
2477
2467
|
# # =>
|
2478
|
-
# # shape: (4,
|
2479
|
-
# #
|
2480
|
-
# # │ foo ┆ bar ┆ ham
|
2481
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
2482
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
2483
|
-
# #
|
2484
|
-
# # │ 1 ┆ 6.0 ┆ a
|
2485
|
-
# # │ 2 ┆ 7.0 ┆ b
|
2486
|
-
# # │ null ┆ null ┆
|
2487
|
-
# # │ 3 ┆ 8.0 ┆ c
|
2488
|
-
# #
|
2468
|
+
# # shape: (4, 5)
|
2469
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
2470
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
2471
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2472
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
2473
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
2474
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
2475
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
2476
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
2477
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
2478
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
2489
2479
|
#
|
2490
2480
|
# @example
|
2491
2481
|
# df.join(other_df, on: "ham", how: "left")
|
@@ -3125,17 +3115,17 @@ module Polars
|
|
3125
3115
|
sort_columns: false,
|
3126
3116
|
separator: "_"
|
3127
3117
|
)
|
3128
|
-
if values.is_a?(String)
|
3118
|
+
if values.is_a?(::String)
|
3129
3119
|
values = [values]
|
3130
3120
|
end
|
3131
|
-
if index.is_a?(String)
|
3121
|
+
if index.is_a?(::String)
|
3132
3122
|
index = [index]
|
3133
3123
|
end
|
3134
|
-
if columns.is_a?(String)
|
3124
|
+
if columns.is_a?(::String)
|
3135
3125
|
columns = [columns]
|
3136
3126
|
end
|
3137
3127
|
|
3138
|
-
if aggregate_fn.is_a?(String)
|
3128
|
+
if aggregate_fn.is_a?(::String)
|
3139
3129
|
case aggregate_fn
|
3140
3130
|
when "first"
|
3141
3131
|
aggregate_expr = Polars.element.first._rbexpr
|
@@ -3220,10 +3210,10 @@ module Polars
|
|
3220
3210
|
# # │ z ┆ c ┆ 6 │
|
3221
3211
|
# # └─────┴──────────┴───────┘
|
3222
3212
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
|
3223
|
-
if value_vars.is_a?(String)
|
3213
|
+
if value_vars.is_a?(::String)
|
3224
3214
|
value_vars = [value_vars]
|
3225
3215
|
end
|
3226
|
-
if id_vars.is_a?(String)
|
3216
|
+
if id_vars.is_a?(::String)
|
3227
3217
|
id_vars = [id_vars]
|
3228
3218
|
end
|
3229
3219
|
if value_vars.nil?
|
@@ -3437,7 +3427,7 @@ module Polars
|
|
3437
3427
|
# # │ C ┆ 2 ┆ l │
|
3438
3428
|
# # └─────┴─────┴─────┘}
|
3439
3429
|
def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
|
3440
|
-
if groups.is_a?(String)
|
3430
|
+
if groups.is_a?(::String)
|
3441
3431
|
groups = [groups]
|
3442
3432
|
elsif !groups.is_a?(::Array)
|
3443
3433
|
groups = Array(groups)
|
@@ -3464,8 +3454,10 @@ module Polars
|
|
3464
3454
|
|
3465
3455
|
# Shift values by the given period.
|
3466
3456
|
#
|
3467
|
-
# @param
|
3457
|
+
# @param n [Integer]
|
3468
3458
|
# Number of places to shift (may be negative).
|
3459
|
+
# @param fill_value [Object]
|
3460
|
+
# Fill the resulting null values with this value.
|
3469
3461
|
#
|
3470
3462
|
# @return [DataFrame]
|
3471
3463
|
#
|
@@ -3503,8 +3495,8 @@ module Polars
|
|
3503
3495
|
# # │ 3 ┆ 8 ┆ c │
|
3504
3496
|
# # │ null ┆ null ┆ null │
|
3505
3497
|
# # └──────┴──────┴──────┘
|
3506
|
-
def shift(
|
3507
|
-
|
3498
|
+
def shift(n, fill_value: nil)
|
3499
|
+
lazy.shift(n, fill_value: fill_value).collect(_eager: true)
|
3508
3500
|
end
|
3509
3501
|
|
3510
3502
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -3537,9 +3529,7 @@ module Polars
|
|
3537
3529
|
# # │ 2 ┆ 7 ┆ b │
|
3538
3530
|
# # └─────┴─────┴─────┘
|
3539
3531
|
def shift_and_fill(periods, fill_value)
|
3540
|
-
|
3541
|
-
.shift_and_fill(periods, fill_value)
|
3542
|
-
.collect(no_optimization: true, string_cache: false)
|
3532
|
+
shift(periods, fill_value: fill_value)
|
3543
3533
|
end
|
3544
3534
|
|
3545
3535
|
# Get a mask of all duplicated rows in this DataFrame.
|
@@ -3788,9 +3778,9 @@ module Polars
|
|
3788
3778
|
# # └─────┴─────┴─────┘
|
3789
3779
|
def max(axis: 0)
|
3790
3780
|
if axis == 0
|
3791
|
-
|
3781
|
+
lazy.max.collect(_eager: true)
|
3792
3782
|
elsif axis == 1
|
3793
|
-
Utils.wrap_s(_df.
|
3783
|
+
Utils.wrap_s(_df.max_horizontal)
|
3794
3784
|
else
|
3795
3785
|
raise ArgumentError, "Axis should be 0 or 1."
|
3796
3786
|
end
|
@@ -3820,9 +3810,9 @@ module Polars
|
|
3820
3810
|
# # └─────┴─────┴─────┘
|
3821
3811
|
def min(axis: 0)
|
3822
3812
|
if axis == 0
|
3823
|
-
|
3813
|
+
lazy.min.collect(_eager: true)
|
3824
3814
|
elsif axis == 1
|
3825
|
-
Utils.wrap_s(_df.
|
3815
|
+
Utils.wrap_s(_df.min_horizontal)
|
3826
3816
|
else
|
3827
3817
|
raise ArgumentError, "Axis should be 0 or 1."
|
3828
3818
|
end
|
@@ -3869,9 +3859,9 @@ module Polars
|
|
3869
3859
|
def sum(axis: 0, null_strategy: "ignore")
|
3870
3860
|
case axis
|
3871
3861
|
when 0
|
3872
|
-
|
3862
|
+
lazy.sum.collect(_eager: true)
|
3873
3863
|
when 1
|
3874
|
-
Utils.wrap_s(_df.
|
3864
|
+
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3875
3865
|
else
|
3876
3866
|
raise ArgumentError, "Axis should be 0 or 1."
|
3877
3867
|
end
|
@@ -3907,9 +3897,9 @@ module Polars
|
|
3907
3897
|
def mean(axis: 0, null_strategy: "ignore")
|
3908
3898
|
case axis
|
3909
3899
|
when 0
|
3910
|
-
|
3900
|
+
lazy.mean.collect(_eager: true)
|
3911
3901
|
when 1
|
3912
|
-
Utils.wrap_s(_df.
|
3902
|
+
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3913
3903
|
else
|
3914
3904
|
raise ArgumentError, "Axis should be 0 or 1."
|
3915
3905
|
end
|
@@ -3953,7 +3943,7 @@ module Polars
|
|
3953
3943
|
# # │ 0.816497 ┆ 0.816497 ┆ null │
|
3954
3944
|
# # └──────────┴──────────┴──────┘
|
3955
3945
|
def std(ddof: 1)
|
3956
|
-
|
3946
|
+
lazy.std(ddof: ddof).collect(_eager: true)
|
3957
3947
|
end
|
3958
3948
|
|
3959
3949
|
# Aggregate the columns of this DataFrame to their variance value.
|
@@ -3994,7 +3984,7 @@ module Polars
|
|
3994
3984
|
# # │ 0.666667 ┆ 0.666667 ┆ null │
|
3995
3985
|
# # └──────────┴──────────┴──────┘
|
3996
3986
|
def var(ddof: 1)
|
3997
|
-
|
3987
|
+
lazy.var(ddof: ddof).collect(_eager: true)
|
3998
3988
|
end
|
3999
3989
|
|
4000
3990
|
# Aggregate the columns of this DataFrame to their median value.
|
@@ -4020,7 +4010,7 @@ module Polars
|
|
4020
4010
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4021
4011
|
# # └─────┴─────┴──────┘
|
4022
4012
|
def median
|
4023
|
-
|
4013
|
+
lazy.median.collect(_eager: true)
|
4024
4014
|
end
|
4025
4015
|
|
4026
4016
|
# Aggregate the columns of this DataFrame to their product values.
|
@@ -4077,7 +4067,7 @@ module Polars
|
|
4077
4067
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4078
4068
|
# # └─────┴─────┴──────┘
|
4079
4069
|
def quantile(quantile, interpolation: "nearest")
|
4080
|
-
|
4070
|
+
lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
|
4081
4071
|
end
|
4082
4072
|
|
4083
4073
|
# Get one hot encoded dummy variables.
|
@@ -4108,7 +4098,7 @@ module Polars
|
|
4108
4098
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4109
4099
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4110
4100
|
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4111
|
-
if columns.is_a?(String)
|
4101
|
+
if columns.is_a?(::String)
|
4112
4102
|
columns = [columns]
|
4113
4103
|
end
|
4114
4104
|
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
@@ -4294,15 +4284,20 @@ module Polars
|
|
4294
4284
|
end
|
4295
4285
|
|
4296
4286
|
if n.nil? && !frac.nil?
|
4287
|
+
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4288
|
+
|
4297
4289
|
_from_rbdf(
|
4298
|
-
_df.sample_frac(frac, with_replacement, shuffle, seed)
|
4290
|
+
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4299
4291
|
)
|
4300
4292
|
end
|
4301
4293
|
|
4302
4294
|
if n.nil?
|
4303
4295
|
n = 1
|
4304
4296
|
end
|
4305
|
-
|
4297
|
+
|
4298
|
+
n = Series.new("", [n]) unless n.is_a?(Series)
|
4299
|
+
|
4300
|
+
_from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
|
4306
4301
|
end
|
4307
4302
|
|
4308
4303
|
# Apply a horizontal reduction on a DataFrame.
|
@@ -4601,7 +4596,7 @@ module Polars
|
|
4601
4596
|
#
|
4602
4597
|
# @example
|
4603
4598
|
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
|
4604
|
-
# s.
|
4599
|
+
# s.gather_every(2)
|
4605
4600
|
# # =>
|
4606
4601
|
# # shape: (2, 2)
|
4607
4602
|
# # ┌─────┬─────┐
|
@@ -4612,9 +4607,10 @@ module Polars
|
|
4612
4607
|
# # │ 1 ┆ 5 │
|
4613
4608
|
# # │ 3 ┆ 7 │
|
4614
4609
|
# # └─────┴─────┘
|
4615
|
-
def
|
4616
|
-
select(Utils.col("*").
|
4610
|
+
def gather_every(n, offset = 0)
|
4611
|
+
select(Utils.col("*").gather_every(n, offset))
|
4617
4612
|
end
|
4613
|
+
alias_method :take_every, :gather_every
|
4618
4614
|
|
4619
4615
|
# Hash and combine the rows in this DataFrame.
|
4620
4616
|
#
|
@@ -4671,16 +4667,16 @@ module Polars
|
|
4671
4667
|
# df.interpolate
|
4672
4668
|
# # =>
|
4673
4669
|
# # shape: (4, 3)
|
4674
|
-
# #
|
4675
|
-
# # │ foo
|
4676
|
-
# # │ ---
|
4677
|
-
# # │
|
4678
|
-
# #
|
4679
|
-
# # │ 1
|
4680
|
-
# # │ 5
|
4681
|
-
# # │ 9
|
4682
|
-
# # │ 10
|
4683
|
-
# #
|
4670
|
+
# # ┌──────┬──────┬──────────┐
|
4671
|
+
# # │ foo ┆ bar ┆ baz │
|
4672
|
+
# # │ --- ┆ --- ┆ --- │
|
4673
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
4674
|
+
# # ╞══════╪══════╪══════════╡
|
4675
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
4676
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
4677
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
4678
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
4679
|
+
# # └──────┴──────┴──────────┘
|
4684
4680
|
def interpolate
|
4685
4681
|
select(Utils.col("*").interpolate)
|
4686
4682
|
end
|
@@ -4762,7 +4758,7 @@ module Polars
|
|
4762
4758
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
4763
4759
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
4764
4760
|
def unnest(names)
|
4765
|
-
if names.is_a?(String)
|
4761
|
+
if names.is_a?(::String)
|
4766
4762
|
names = [names]
|
4767
4763
|
end
|
4768
4764
|
_from_rbdf(_df.unnest(names))
|
@@ -4875,10 +4871,10 @@ module Polars
|
|
4875
4871
|
if val.is_a?(Hash) && dtype != Struct
|
4876
4872
|
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4877
4873
|
elsif !Utils.arrlen(val).nil?
|
4878
|
-
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4879
|
-
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4874
|
+
updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
|
4875
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4880
4876
|
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4881
|
-
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4877
|
+
updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4882
4878
|
else
|
4883
4879
|
raise Todo
|
4884
4880
|
end
|
@@ -4935,7 +4931,7 @@ module Polars
|
|
4935
4931
|
end
|
4936
4932
|
column_names =
|
4937
4933
|
(schema || []).map.with_index do |col, i|
|
4938
|
-
if col.is_a?(String)
|
4934
|
+
if col.is_a?(::String)
|
4939
4935
|
col || "column_#{i}"
|
4940
4936
|
else
|
4941
4937
|
col[0]
|
@@ -4948,12 +4944,12 @@ module Polars
|
|
4948
4944
|
lookup = column_names.zip(lookup_names || []).to_h
|
4949
4945
|
|
4950
4946
|
column_dtypes =
|
4951
|
-
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
4947
|
+
(schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
|
4952
4948
|
[lookup[col[0]] || col[0], col[1]]
|
4953
4949
|
end
|
4954
4950
|
|
4955
|
-
if schema_overrides
|
4956
|
-
|
4951
|
+
if schema_overrides && schema_overrides.any?
|
4952
|
+
column_dtypes.merge!(schema_overrides)
|
4957
4953
|
end
|
4958
4954
|
|
4959
4955
|
column_dtypes.each do |col, dtype|
|
@@ -5056,13 +5052,54 @@ module Polars
|
|
5056
5052
|
return rbdf
|
5057
5053
|
elsif data[0].is_a?(::Array)
|
5058
5054
|
if orient.nil? && !columns.nil?
|
5059
|
-
|
5055
|
+
first_element = data[0]
|
5056
|
+
row_types = first_element.filter_map { |value| value.class }.uniq
|
5057
|
+
if row_types.include?(Integer) && row_types.include?(Float)
|
5058
|
+
row_types.delete(Integer)
|
5059
|
+
end
|
5060
|
+
orient = row_types.length == 1 ? "col" : "row"
|
5060
5061
|
end
|
5061
5062
|
|
5062
5063
|
if orient == "row"
|
5063
|
-
|
5064
|
+
column_names, schema_overrides = _unpack_schema(
|
5065
|
+
schema, schema_overrides: schema_overrides, n_expected: first_element.length
|
5066
|
+
)
|
5067
|
+
local_schema_override = (
|
5068
|
+
schema_overrides.any? ? (raise Todo) : {}
|
5069
|
+
)
|
5070
|
+
if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
|
5071
|
+
raise ArgumentError, "the row data does not match the number of columns"
|
5072
|
+
end
|
5073
|
+
|
5074
|
+
unpack_nested = false
|
5075
|
+
local_schema_override.each do |col, tp|
|
5076
|
+
raise Todo
|
5077
|
+
end
|
5078
|
+
|
5079
|
+
if unpack_nested
|
5080
|
+
raise Todo
|
5081
|
+
else
|
5082
|
+
rbdf = RbDataFrame.read_rows(
|
5083
|
+
data,
|
5084
|
+
infer_schema_length,
|
5085
|
+
local_schema_override.any? ? local_schema_override : nil
|
5086
|
+
)
|
5087
|
+
end
|
5088
|
+
if column_names.any? || schema_overrides.any?
|
5089
|
+
rbdf = _post_apply_columns(
|
5090
|
+
rbdf, column_names, schema_overrides: schema_overrides
|
5091
|
+
)
|
5092
|
+
end
|
5093
|
+
return rbdf
|
5064
5094
|
elsif orient == "col" || orient.nil?
|
5065
|
-
|
5095
|
+
column_names, schema_overrides = _unpack_schema(
|
5096
|
+
schema, schema_overrides: schema_overrides, n_expected: data.length
|
5097
|
+
)
|
5098
|
+
data_series =
|
5099
|
+
data.map.with_index do |element, i|
|
5100
|
+
Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
|
5101
|
+
end
|
5102
|
+
return RbDataFrame.new(data_series)
|
5066
5103
|
else
|
5067
5104
|
raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
|
5068
5105
|
end
|
@@ -5108,10 +5145,10 @@ module Polars
|
|
5108
5145
|
|
5109
5146
|
def _compare_to_other_df(other, op)
|
5110
5147
|
if columns != other.columns
|
5111
|
-
raise
|
5148
|
+
raise ArgumentError, "DataFrame columns do not match"
|
5112
5149
|
end
|
5113
5150
|
if shape != other.shape
|
5114
|
-
raise
|
5151
|
+
raise ArgumentError, "DataFrame dimensions do not match"
|
5115
5152
|
end
|
5116
5153
|
|
5117
5154
|
suffix = "__POLARS_CMP_OTHER"
|