polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1978 -1459
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/data_frame.rb
CHANGED
@@ -47,8 +47,8 @@ module Polars
|
|
47
47
|
end
|
48
48
|
|
49
49
|
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
|
50
|
+
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
52
|
_from_rbdf(rbdf)
|
53
53
|
end
|
54
54
|
|
@@ -119,10 +119,10 @@ module Polars
|
|
119
119
|
|
120
120
|
processed_null_values = Utils._process_null_values(null_values)
|
121
121
|
|
122
|
-
if columns.is_a?(String)
|
122
|
+
if columns.is_a?(::String)
|
123
123
|
columns = [columns]
|
124
124
|
end
|
125
|
-
if file.is_a?(String) && file.include?("*")
|
125
|
+
if file.is_a?(::String) && file.include?("*")
|
126
126
|
dtypes_dict = nil
|
127
127
|
if !dtype_list.nil?
|
128
128
|
dtypes_dict = dtype_list.to_h
|
@@ -206,11 +206,11 @@ module Polars
|
|
206
206
|
if Utils.pathlike?(source)
|
207
207
|
source = Utils.normalise_filepath(source)
|
208
208
|
end
|
209
|
-
if columns.is_a?(String)
|
209
|
+
if columns.is_a?(::String)
|
210
210
|
columns = [columns]
|
211
211
|
end
|
212
212
|
|
213
|
-
if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
|
213
|
+
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
214
214
|
scan =
|
215
215
|
Polars.scan_parquet(
|
216
216
|
source,
|
@@ -269,11 +269,11 @@ module Polars
|
|
269
269
|
if Utils.pathlike?(file)
|
270
270
|
file = Utils.normalise_filepath(file)
|
271
271
|
end
|
272
|
-
if columns.is_a?(String)
|
272
|
+
if columns.is_a?(::String)
|
273
273
|
columns = [columns]
|
274
274
|
end
|
275
275
|
|
276
|
-
if file.is_a?(String) && file.include?("*")
|
276
|
+
if file.is_a?(::String) && file.include?("*")
|
277
277
|
raise Todo
|
278
278
|
end
|
279
279
|
|
@@ -411,7 +411,7 @@ module Polars
|
|
411
411
|
# }
|
412
412
|
# )
|
413
413
|
# df.dtypes
|
414
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
414
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
415
415
|
def dtypes
|
416
416
|
_df.dtypes
|
417
417
|
end
|
@@ -429,7 +429,7 @@ module Polars
|
|
429
429
|
# }
|
430
430
|
# )
|
431
431
|
# df.schema
|
432
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
432
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
433
433
|
def schema
|
434
434
|
columns.zip(dtypes).to_h
|
435
435
|
end
|
@@ -589,13 +589,13 @@ module Polars
|
|
589
589
|
return df.slice(row_selection, 1)
|
590
590
|
end
|
591
591
|
# df[2, "a"]
|
592
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
592
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
593
593
|
return self[col_selection][row_selection]
|
594
594
|
end
|
595
595
|
end
|
596
596
|
|
597
597
|
# column selection can be "a" and ["a", "b"]
|
598
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
598
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
599
599
|
col_selection = [col_selection]
|
600
600
|
end
|
601
601
|
|
@@ -621,8 +621,8 @@ module Polars
|
|
621
621
|
|
622
622
|
# select single column
|
623
623
|
# df["foo"]
|
624
|
-
if item.is_a?(String) || item.is_a?(Symbol)
|
625
|
-
return Utils.wrap_s(_df.
|
624
|
+
if item.is_a?(::String) || item.is_a?(Symbol)
|
625
|
+
return Utils.wrap_s(_df.get_column(item.to_s))
|
626
626
|
end
|
627
627
|
|
628
628
|
# df[idx]
|
@@ -647,7 +647,7 @@ module Polars
|
|
647
647
|
|
648
648
|
if item.is_a?(Series)
|
649
649
|
dtype = item.dtype
|
650
|
-
if dtype ==
|
650
|
+
if dtype == String
|
651
651
|
return _from_rbdf(_df.select(item))
|
652
652
|
elsif dtype == UInt32
|
653
653
|
return _from_rbdf(_df.take_with_series(item._s))
|
@@ -698,7 +698,7 @@ module Polars
|
|
698
698
|
s[row_selection] = value
|
699
699
|
|
700
700
|
if col_selection.is_a?(Integer)
|
701
|
-
|
701
|
+
replace_column(col_selection, s)
|
702
702
|
elsif Utils.strlike?(col_selection)
|
703
703
|
replace(col_selection, s)
|
704
704
|
end
|
@@ -1084,7 +1084,7 @@ module Polars
|
|
1084
1084
|
# df.estimated_size
|
1085
1085
|
# # => 25888898
|
1086
1086
|
# df.estimated_size("mb")
|
1087
|
-
# # =>
|
1087
|
+
# # => 26.702880859375
|
1088
1088
|
def estimated_size(unit = "b")
|
1089
1089
|
sz = _df.estimated_size
|
1090
1090
|
Utils.scale_bytes(sz, to: unit)
|
@@ -1222,7 +1222,7 @@ module Polars
|
|
1222
1222
|
# @example
|
1223
1223
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1224
1224
|
# s = Polars::Series.new("baz", [97, 98, 99])
|
1225
|
-
# df.
|
1225
|
+
# df.insert_column(1, s)
|
1226
1226
|
# # =>
|
1227
1227
|
# # shape: (3, 3)
|
1228
1228
|
# # ┌─────┬─────┬─────┐
|
@@ -1244,7 +1244,7 @@ module Polars
|
|
1244
1244
|
# }
|
1245
1245
|
# )
|
1246
1246
|
# s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
|
1247
|
-
# df.
|
1247
|
+
# df.insert_column(3, s)
|
1248
1248
|
# # =>
|
1249
1249
|
# # shape: (4, 4)
|
1250
1250
|
# # ┌─────┬──────┬───────┬──────┐
|
@@ -1257,13 +1257,14 @@ module Polars
|
|
1257
1257
|
# # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
|
1258
1258
|
# # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
|
1259
1259
|
# # └─────┴──────┴───────┴──────┘
|
1260
|
-
def
|
1260
|
+
def insert_column(index, series)
|
1261
1261
|
if index < 0
|
1262
1262
|
index = columns.length + index
|
1263
1263
|
end
|
1264
|
-
_df.
|
1264
|
+
_df.insert_column(index, series._s)
|
1265
1265
|
self
|
1266
1266
|
end
|
1267
|
+
alias_method :insert_at_idx, :insert_column
|
1267
1268
|
|
1268
1269
|
# Filter the rows in the DataFrame based on a predicate expression.
|
1269
1270
|
#
|
@@ -1367,7 +1368,7 @@ module Polars
|
|
1367
1368
|
]
|
1368
1369
|
)._df
|
1369
1370
|
)
|
1370
|
-
summary.
|
1371
|
+
summary.insert_column(
|
1371
1372
|
0,
|
1372
1373
|
Polars::Series.new(
|
1373
1374
|
"describe",
|
@@ -1388,11 +1389,12 @@ module Polars
|
|
1388
1389
|
# df = Polars::DataFrame.new(
|
1389
1390
|
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
1390
1391
|
# )
|
1391
|
-
# df.
|
1392
|
+
# df.get_column_index("ham")
|
1392
1393
|
# # => 2
|
1393
|
-
def
|
1394
|
-
_df.
|
1394
|
+
def get_column_index(name)
|
1395
|
+
_df.get_column_index(name)
|
1395
1396
|
end
|
1397
|
+
alias_method :find_idx_by_name, :get_column_index
|
1396
1398
|
|
1397
1399
|
# Replace a column at an index location.
|
1398
1400
|
#
|
@@ -1412,7 +1414,7 @@ module Polars
|
|
1412
1414
|
# }
|
1413
1415
|
# )
|
1414
1416
|
# s = Polars::Series.new("apple", [10, 20, 30])
|
1415
|
-
# df.
|
1417
|
+
# df.replace_column(0, s)
|
1416
1418
|
# # =>
|
1417
1419
|
# # shape: (3, 3)
|
1418
1420
|
# # ┌───────┬─────┬─────┐
|
@@ -1424,13 +1426,14 @@ module Polars
|
|
1424
1426
|
# # │ 20 ┆ 7 ┆ b │
|
1425
1427
|
# # │ 30 ┆ 8 ┆ c │
|
1426
1428
|
# # └───────┴─────┴─────┘
|
1427
|
-
def
|
1429
|
+
def replace_column(index, series)
|
1428
1430
|
if index < 0
|
1429
1431
|
index = columns.length + index
|
1430
1432
|
end
|
1431
|
-
_df.
|
1433
|
+
_df.replace_column(index, series._s)
|
1432
1434
|
self
|
1433
1435
|
end
|
1436
|
+
alias_method :replace_at_idx, :replace_column
|
1434
1437
|
|
1435
1438
|
# Sort the DataFrame by column.
|
1436
1439
|
#
|
@@ -1524,13 +1527,14 @@ module Polars
|
|
1524
1527
|
# "ham" => ["c", "b", "a"]
|
1525
1528
|
# }
|
1526
1529
|
# )
|
1527
|
-
# df1.
|
1530
|
+
# df1.equals(df1)
|
1528
1531
|
# # => true
|
1529
|
-
# df1.
|
1532
|
+
# df1.equals(df2)
|
1530
1533
|
# # => false
|
1531
|
-
def
|
1532
|
-
_df.
|
1534
|
+
def equals(other, null_equal: true)
|
1535
|
+
_df.equals(other._df, null_equal)
|
1533
1536
|
end
|
1537
|
+
alias_method :frame_equal, :equals
|
1534
1538
|
|
1535
1539
|
# Replace a column by a new Series.
|
1536
1540
|
#
|
@@ -1716,7 +1720,7 @@ module Polars
|
|
1716
1720
|
# # │ 3 ┆ 8 ┆ c │
|
1717
1721
|
# # └─────┴─────┴─────┘
|
1718
1722
|
def drop_nulls(subset: nil)
|
1719
|
-
if subset.is_a?(String)
|
1723
|
+
if subset.is_a?(::String)
|
1720
1724
|
subset = [subset]
|
1721
1725
|
end
|
1722
1726
|
_from_rbdf(_df.drop_nulls(subset))
|
@@ -1778,7 +1782,7 @@ module Polars
|
|
1778
1782
|
# "b" => [2, 4, 6]
|
1779
1783
|
# }
|
1780
1784
|
# )
|
1781
|
-
# df.
|
1785
|
+
# df.with_row_index
|
1782
1786
|
# # =>
|
1783
1787
|
# # shape: (3, 3)
|
1784
1788
|
# # ┌────────┬─────┬─────┐
|
@@ -1790,9 +1794,10 @@ module Polars
|
|
1790
1794
|
# # │ 1 ┆ 3 ┆ 4 │
|
1791
1795
|
# # │ 2 ┆ 5 ┆ 6 │
|
1792
1796
|
# # └────────┴─────┴─────┘
|
1793
|
-
def
|
1794
|
-
_from_rbdf(_df.
|
1797
|
+
def with_row_index(name: "row_nr", offset: 0)
|
1798
|
+
_from_rbdf(_df.with_row_index(name, offset))
|
1795
1799
|
end
|
1800
|
+
alias_method :with_row_count, :with_row_index
|
1796
1801
|
|
1797
1802
|
# Start a group by operation.
|
1798
1803
|
#
|
@@ -2267,7 +2272,7 @@ module Polars
|
|
2267
2272
|
if by.nil?
|
2268
2273
|
by = []
|
2269
2274
|
end
|
2270
|
-
if by.is_a?(String)
|
2275
|
+
if by.is_a?(::String)
|
2271
2276
|
by = [by]
|
2272
2277
|
end
|
2273
2278
|
if offset.nil?
|
@@ -2429,6 +2434,8 @@ module Polars
|
|
2429
2434
|
# Join strategy.
|
2430
2435
|
# @param suffix [String]
|
2431
2436
|
# Suffix to append to columns with a duplicate name.
|
2437
|
+
# @param join_nulls [Boolean]
|
2438
|
+
# Join on null values. By default null values will never produce matches.
|
2432
2439
|
#
|
2433
2440
|
# @return [DataFrame]
|
2434
2441
|
#
|
@@ -2461,17 +2468,17 @@ module Polars
|
|
2461
2468
|
# @example
|
2462
2469
|
# df.join(other_df, on: "ham", how: "outer")
|
2463
2470
|
# # =>
|
2464
|
-
# # shape: (4,
|
2465
|
-
# #
|
2466
|
-
# # │ foo ┆ bar ┆ ham
|
2467
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
2468
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
2469
|
-
# #
|
2470
|
-
# # │ 1 ┆ 6.0 ┆ a
|
2471
|
-
# # │ 2 ┆ 7.0 ┆ b
|
2472
|
-
# # │ null ┆ null ┆
|
2473
|
-
# # │ 3 ┆ 8.0 ┆ c
|
2474
|
-
# #
|
2471
|
+
# # shape: (4, 5)
|
2472
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
2473
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
2474
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2475
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
2476
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
2477
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
2478
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
2479
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
2480
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
2481
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
2475
2482
|
#
|
2476
2483
|
# @example
|
2477
2484
|
# df.join(other_df, on: "ham", how: "left")
|
@@ -2511,7 +2518,7 @@ module Polars
|
|
2511
2518
|
# # ╞═════╪═════╪═════╡
|
2512
2519
|
# # │ 3 ┆ 8.0 ┆ c │
|
2513
2520
|
# # └─────┴─────┴─────┘
|
2514
|
-
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
2521
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
|
2515
2522
|
lazy
|
2516
2523
|
.join(
|
2517
2524
|
other.lazy,
|
@@ -2520,6 +2527,7 @@ module Polars
|
|
2520
2527
|
on: on,
|
2521
2528
|
how: how,
|
2522
2529
|
suffix: suffix,
|
2530
|
+
join_nulls: join_nulls
|
2523
2531
|
)
|
2524
2532
|
.collect(no_optimization: true)
|
2525
2533
|
end
|
@@ -2863,7 +2871,7 @@ module Polars
|
|
2863
2871
|
# "c" => [true, true, false, nil]
|
2864
2872
|
# }
|
2865
2873
|
# )
|
2866
|
-
# df.
|
2874
|
+
# df.clear
|
2867
2875
|
# # =>
|
2868
2876
|
# # shape: (0, 3)
|
2869
2877
|
# # ┌─────┬─────┬──────┐
|
@@ -2872,9 +2880,31 @@ module Polars
|
|
2872
2880
|
# # │ i64 ┆ f64 ┆ bool │
|
2873
2881
|
# # ╞═════╪═════╪══════╡
|
2874
2882
|
# # └─────┴─────┴──────┘
|
2875
|
-
|
2876
|
-
|
2883
|
+
#
|
2884
|
+
# @example
|
2885
|
+
# df.clear(2)
|
2886
|
+
# # =>
|
2887
|
+
# # shape: (2, 3)
|
2888
|
+
# # ┌──────┬──────┬──────┐
|
2889
|
+
# # │ a ┆ b ┆ c │
|
2890
|
+
# # │ --- ┆ --- ┆ --- │
|
2891
|
+
# # │ i64 ┆ f64 ┆ bool │
|
2892
|
+
# # ╞══════╪══════╪══════╡
|
2893
|
+
# # │ null ┆ null ┆ null │
|
2894
|
+
# # │ null ┆ null ┆ null │
|
2895
|
+
# # └──────┴──────┴──────┘
|
2896
|
+
def clear(n = 0)
|
2897
|
+
if n == 0
|
2898
|
+
_from_rbdf(_df.clear)
|
2899
|
+
elsif n > 0 || len > 0
|
2900
|
+
self.class.new(
|
2901
|
+
schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
|
2902
|
+
)
|
2903
|
+
else
|
2904
|
+
clone
|
2905
|
+
end
|
2877
2906
|
end
|
2907
|
+
alias_method :cleared, :clear
|
2878
2908
|
|
2879
2909
|
# clone handled by initialize_copy
|
2880
2910
|
|
@@ -3111,17 +3141,17 @@ module Polars
|
|
3111
3141
|
sort_columns: false,
|
3112
3142
|
separator: "_"
|
3113
3143
|
)
|
3114
|
-
if values.is_a?(String)
|
3144
|
+
if values.is_a?(::String)
|
3115
3145
|
values = [values]
|
3116
3146
|
end
|
3117
|
-
if index.is_a?(String)
|
3147
|
+
if index.is_a?(::String)
|
3118
3148
|
index = [index]
|
3119
3149
|
end
|
3120
|
-
if columns.is_a?(String)
|
3150
|
+
if columns.is_a?(::String)
|
3121
3151
|
columns = [columns]
|
3122
3152
|
end
|
3123
3153
|
|
3124
|
-
if aggregate_fn.is_a?(String)
|
3154
|
+
if aggregate_fn.is_a?(::String)
|
3125
3155
|
case aggregate_fn
|
3126
3156
|
when "first"
|
3127
3157
|
aggregate_expr = Polars.element.first._rbexpr
|
@@ -3137,8 +3167,11 @@ module Polars
|
|
3137
3167
|
aggregate_expr = Polars.element.median._rbexpr
|
3138
3168
|
when "last"
|
3139
3169
|
aggregate_expr = Polars.element.last._rbexpr
|
3170
|
+
when "len"
|
3171
|
+
aggregate_expr = Polars.len._rbexpr
|
3140
3172
|
when "count"
|
3141
|
-
|
3173
|
+
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3174
|
+
aggregate_expr = Polars.len._rbexpr
|
3142
3175
|
else
|
3143
3176
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3144
3177
|
end
|
@@ -3150,9 +3183,9 @@ module Polars
|
|
3150
3183
|
|
3151
3184
|
_from_rbdf(
|
3152
3185
|
_df.pivot_expr(
|
3153
|
-
values,
|
3154
3186
|
index,
|
3155
3187
|
columns,
|
3188
|
+
values,
|
3156
3189
|
maintain_order,
|
3157
3190
|
sort_columns,
|
3158
3191
|
aggregate_expr,
|
@@ -3206,10 +3239,10 @@ module Polars
|
|
3206
3239
|
# # │ z ┆ c ┆ 6 │
|
3207
3240
|
# # └─────┴──────────┴───────┘
|
3208
3241
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
|
3209
|
-
if value_vars.is_a?(String)
|
3242
|
+
if value_vars.is_a?(::String)
|
3210
3243
|
value_vars = [value_vars]
|
3211
3244
|
end
|
3212
|
-
if id_vars.is_a?(String)
|
3245
|
+
if id_vars.is_a?(::String)
|
3213
3246
|
id_vars = [id_vars]
|
3214
3247
|
end
|
3215
3248
|
if value_vars.nil?
|
@@ -3423,7 +3456,7 @@ module Polars
|
|
3423
3456
|
# # │ C ┆ 2 ┆ l │
|
3424
3457
|
# # └─────┴─────┴─────┘}
|
3425
3458
|
def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
|
3426
|
-
if groups.is_a?(String)
|
3459
|
+
if groups.is_a?(::String)
|
3427
3460
|
groups = [groups]
|
3428
3461
|
elsif !groups.is_a?(::Array)
|
3429
3462
|
groups = Array(groups)
|
@@ -3587,8 +3620,13 @@ module Polars
|
|
3587
3620
|
|
3588
3621
|
# Select columns from this DataFrame.
|
3589
3622
|
#
|
3590
|
-
# @param exprs [
|
3591
|
-
# Column
|
3623
|
+
# @param exprs [Array]
|
3624
|
+
# Column(s) to select, specified as positional arguments.
|
3625
|
+
# Accepts expression input. Strings are parsed as column names,
|
3626
|
+
# other non-expression inputs are parsed as literals.
|
3627
|
+
# @param named_exprs [Hash]
|
3628
|
+
# Additional columns to select, specified as keyword arguments.
|
3629
|
+
# The columns will be renamed to the keyword used.
|
3592
3630
|
#
|
3593
3631
|
# @return [DataFrame]
|
3594
3632
|
#
|
@@ -3668,23 +3706,25 @@ module Polars
|
|
3668
3706
|
# # │ 0 │
|
3669
3707
|
# # │ 10 │
|
3670
3708
|
# # └─────────┘
|
3671
|
-
def select(exprs)
|
3672
|
-
|
3673
|
-
lazy
|
3674
|
-
.select(exprs)
|
3675
|
-
.collect(no_optimization: true, string_cache: false)
|
3676
|
-
._df
|
3677
|
-
)
|
3709
|
+
def select(*exprs, **named_exprs)
|
3710
|
+
lazy.select(*exprs, **named_exprs).collect(_eager: true)
|
3678
3711
|
end
|
3679
3712
|
|
3680
|
-
# Add
|
3713
|
+
# Add columns to this DataFrame.
|
3714
|
+
#
|
3715
|
+
# Added columns will replace existing columns with the same name.
|
3681
3716
|
#
|
3682
3717
|
# @param exprs [Array]
|
3683
|
-
#
|
3718
|
+
# Column(s) to add, specified as positional arguments.
|
3719
|
+
# Accepts expression input. Strings are parsed as column names, other
|
3720
|
+
# non-expression inputs are parsed as literals.
|
3721
|
+
# @param named_exprs [Hash]
|
3722
|
+
# Additional columns to add, specified as keyword arguments.
|
3723
|
+
# The columns will be renamed to the keyword used.
|
3684
3724
|
#
|
3685
3725
|
# @return [DataFrame]
|
3686
3726
|
#
|
3687
|
-
# @example
|
3727
|
+
# @example Pass an expression to add it as a new column.
|
3688
3728
|
# df = Polars::DataFrame.new(
|
3689
3729
|
# {
|
3690
3730
|
# "a" => [1, 2, 3, 4],
|
@@ -3692,11 +3732,41 @@ module Polars
|
|
3692
3732
|
# "c" => [true, true, false, true]
|
3693
3733
|
# }
|
3694
3734
|
# )
|
3735
|
+
# df.with_columns((Polars.col("a") ** 2).alias("a^2"))
|
3736
|
+
# # =>
|
3737
|
+
# # shape: (4, 4)
|
3738
|
+
# # ┌─────┬──────┬───────┬──────┐
|
3739
|
+
# # │ a ┆ b ┆ c ┆ a^2 │
|
3740
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
3741
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 │
|
3742
|
+
# # ╞═════╪══════╪═══════╪══════╡
|
3743
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 │
|
3744
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 │
|
3745
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 │
|
3746
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 │
|
3747
|
+
# # └─────┴──────┴───────┴──────┘
|
3748
|
+
#
|
3749
|
+
# @example Added columns will replace existing columns with the same name.
|
3750
|
+
# df.with_columns(Polars.col("a").cast(Polars::Float64))
|
3751
|
+
# # =>
|
3752
|
+
# # shape: (4, 3)
|
3753
|
+
# # ┌─────┬──────┬───────┐
|
3754
|
+
# # │ a ┆ b ┆ c │
|
3755
|
+
# # │ --- ┆ --- ┆ --- │
|
3756
|
+
# # │ f64 ┆ f64 ┆ bool │
|
3757
|
+
# # ╞═════╪══════╪═══════╡
|
3758
|
+
# # │ 1.0 ┆ 0.5 ┆ true │
|
3759
|
+
# # │ 2.0 ┆ 4.0 ┆ true │
|
3760
|
+
# # │ 3.0 ┆ 10.0 ┆ false │
|
3761
|
+
# # │ 4.0 ┆ 13.0 ┆ true │
|
3762
|
+
# # └─────┴──────┴───────┘
|
3763
|
+
#
|
3764
|
+
# @example Multiple columns can be added by passing a list of expressions.
|
3695
3765
|
# df.with_columns(
|
3696
3766
|
# [
|
3697
3767
|
# (Polars.col("a") ** 2).alias("a^2"),
|
3698
3768
|
# (Polars.col("b") / 2).alias("b/2"),
|
3699
|
-
# (Polars.col("c").
|
3769
|
+
# (Polars.col("c").not_).alias("not c"),
|
3700
3770
|
# ]
|
3701
3771
|
# )
|
3702
3772
|
# # =>
|
@@ -3711,13 +3781,45 @@ module Polars
|
|
3711
3781
|
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
3712
3782
|
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
3713
3783
|
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
3714
|
-
|
3715
|
-
|
3716
|
-
|
3717
|
-
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3784
|
+
#
|
3785
|
+
# @example Multiple columns also can be added using positional arguments instead of a list.
|
3786
|
+
# df.with_columns(
|
3787
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
3788
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
3789
|
+
# (Polars.col("c").not_).alias("not c"),
|
3790
|
+
# )
|
3791
|
+
# # =>
|
3792
|
+
# # shape: (4, 6)
|
3793
|
+
# # ┌─────┬──────┬───────┬──────┬──────┬───────┐
|
3794
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3796
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
|
3797
|
+
# # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
|
3798
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
|
3799
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
|
3800
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
3801
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
3802
|
+
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
3803
|
+
#
|
3804
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
3805
|
+
# df.with_columns(
|
3806
|
+
# ab: Polars.col("a") * Polars.col("b"),
|
3807
|
+
# not_c: Polars.col("c").not_
|
3808
|
+
# )
|
3809
|
+
# # =>
|
3810
|
+
# # shape: (4, 5)
|
3811
|
+
# # ┌─────┬──────┬───────┬──────┬───────┐
|
3812
|
+
# # │ a ┆ b ┆ c ┆ ab ┆ not_c │
|
3813
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3814
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ bool │
|
3815
|
+
# # ╞═════╪══════╪═══════╪══════╪═══════╡
|
3816
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 0.5 ┆ false │
|
3817
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 8.0 ┆ false │
|
3818
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 30.0 ┆ true │
|
3819
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
|
3820
|
+
# # └─────┴──────┴───────┴──────┴───────┘
|
3821
|
+
def with_columns(*exprs, **named_exprs)
|
3822
|
+
lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
|
3721
3823
|
end
|
3722
3824
|
|
3723
3825
|
# Get number of chunks used by the ChunkedArrays of this DataFrame.
|
@@ -3774,7 +3876,7 @@ module Polars
|
|
3774
3876
|
# # └─────┴─────┴─────┘
|
3775
3877
|
def max(axis: 0)
|
3776
3878
|
if axis == 0
|
3777
|
-
|
3879
|
+
lazy.max.collect(_eager: true)
|
3778
3880
|
elsif axis == 1
|
3779
3881
|
Utils.wrap_s(_df.max_horizontal)
|
3780
3882
|
else
|
@@ -3806,7 +3908,7 @@ module Polars
|
|
3806
3908
|
# # └─────┴─────┴─────┘
|
3807
3909
|
def min(axis: 0)
|
3808
3910
|
if axis == 0
|
3809
|
-
|
3911
|
+
lazy.min.collect(_eager: true)
|
3810
3912
|
elsif axis == 1
|
3811
3913
|
Utils.wrap_s(_df.min_horizontal)
|
3812
3914
|
else
|
@@ -3855,7 +3957,7 @@ module Polars
|
|
3855
3957
|
def sum(axis: 0, null_strategy: "ignore")
|
3856
3958
|
case axis
|
3857
3959
|
when 0
|
3858
|
-
|
3960
|
+
lazy.sum.collect(_eager: true)
|
3859
3961
|
when 1
|
3860
3962
|
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3861
3963
|
else
|
@@ -3893,7 +3995,7 @@ module Polars
|
|
3893
3995
|
def mean(axis: 0, null_strategy: "ignore")
|
3894
3996
|
case axis
|
3895
3997
|
when 0
|
3896
|
-
|
3998
|
+
lazy.mean.collect(_eager: true)
|
3897
3999
|
when 1
|
3898
4000
|
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3899
4001
|
else
|
@@ -3939,7 +4041,7 @@ module Polars
|
|
3939
4041
|
# # │ 0.816497 ┆ 0.816497 ┆ null │
|
3940
4042
|
# # └──────────┴──────────┴──────┘
|
3941
4043
|
def std(ddof: 1)
|
3942
|
-
|
4044
|
+
lazy.std(ddof: ddof).collect(_eager: true)
|
3943
4045
|
end
|
3944
4046
|
|
3945
4047
|
# Aggregate the columns of this DataFrame to their variance value.
|
@@ -3980,7 +4082,7 @@ module Polars
|
|
3980
4082
|
# # │ 0.666667 ┆ 0.666667 ┆ null │
|
3981
4083
|
# # └──────────┴──────────┴──────┘
|
3982
4084
|
def var(ddof: 1)
|
3983
|
-
|
4085
|
+
lazy.var(ddof: ddof).collect(_eager: true)
|
3984
4086
|
end
|
3985
4087
|
|
3986
4088
|
# Aggregate the columns of this DataFrame to their median value.
|
@@ -4006,7 +4108,7 @@ module Polars
|
|
4006
4108
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4007
4109
|
# # └─────┴─────┴──────┘
|
4008
4110
|
def median
|
4009
|
-
|
4111
|
+
lazy.median.collect(_eager: true)
|
4010
4112
|
end
|
4011
4113
|
|
4012
4114
|
# Aggregate the columns of this DataFrame to their product values.
|
@@ -4063,7 +4165,7 @@ module Polars
|
|
4063
4165
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4064
4166
|
# # └─────┴─────┴──────┘
|
4065
4167
|
def quantile(quantile, interpolation: "nearest")
|
4066
|
-
|
4168
|
+
lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
|
4067
4169
|
end
|
4068
4170
|
|
4069
4171
|
# Get one hot encoded dummy variables.
|
@@ -4094,7 +4196,7 @@ module Polars
|
|
4094
4196
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4095
4197
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4096
4198
|
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4097
|
-
if columns.is_a?(String)
|
4199
|
+
if columns.is_a?(::String)
|
4098
4200
|
columns = [columns]
|
4099
4201
|
end
|
4100
4202
|
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
@@ -4359,7 +4461,7 @@ module Polars
|
|
4359
4461
|
# # null
|
4360
4462
|
# # ]
|
4361
4463
|
#
|
4362
|
-
# @example A horizontal boolean or, similar to a row-wise .any
|
4464
|
+
# @example A horizontal boolean or, similar to a row-wise .any:
|
4363
4465
|
# df = Polars::DataFrame.new(
|
4364
4466
|
# {
|
4365
4467
|
# "a" => [false, false, true],
|
@@ -4482,7 +4584,7 @@ module Polars
|
|
4482
4584
|
# # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
|
4483
4585
|
def rows(named: false)
|
4484
4586
|
if named
|
4485
|
-
columns = columns
|
4587
|
+
columns = self.columns
|
4486
4588
|
_df.row_tuples.map do |v|
|
4487
4589
|
columns.zip(v).to_h
|
4488
4590
|
end
|
@@ -4523,7 +4625,7 @@ module Polars
|
|
4523
4625
|
return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
|
4524
4626
|
|
4525
4627
|
# load into the local namespace for a modest performance boost in the hot loops
|
4526
|
-
columns = columns
|
4628
|
+
columns = self.columns
|
4527
4629
|
|
4528
4630
|
# note: buffering rows results in a 2-4x speedup over individual calls
|
4529
4631
|
# to ".row(i)", so it should only be disabled in extremely specific cases.
|
@@ -4603,8 +4705,8 @@ module Polars
|
|
4603
4705
|
# # │ 1 ┆ 5 │
|
4604
4706
|
# # │ 3 ┆ 7 │
|
4605
4707
|
# # └─────┴─────┘
|
4606
|
-
def gather_every(n)
|
4607
|
-
select(Utils.col("*").gather_every(n))
|
4708
|
+
def gather_every(n, offset = 0)
|
4709
|
+
select(Utils.col("*").gather_every(n, offset))
|
4608
4710
|
end
|
4609
4711
|
alias_method :take_every, :gather_every
|
4610
4712
|
|
@@ -4754,19 +4856,57 @@ module Polars
|
|
4754
4856
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
4755
4857
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
4756
4858
|
def unnest(names)
|
4757
|
-
if names.is_a?(String)
|
4859
|
+
if names.is_a?(::String)
|
4758
4860
|
names = [names]
|
4759
4861
|
end
|
4760
4862
|
_from_rbdf(_df.unnest(names))
|
4761
4863
|
end
|
4762
4864
|
|
4763
|
-
#
|
4865
|
+
# Requires NumPy
|
4764
4866
|
# def corr
|
4765
4867
|
# end
|
4766
4868
|
|
4767
|
-
#
|
4768
|
-
#
|
4769
|
-
#
|
4869
|
+
# Take two sorted DataFrames and merge them by the sorted key.
|
4870
|
+
#
|
4871
|
+
# The output of this operation will also be sorted.
|
4872
|
+
# It is the callers responsibility that the frames are sorted
|
4873
|
+
# by that key otherwise the output will not make sense.
|
4874
|
+
#
|
4875
|
+
# The schemas of both DataFrames must be equal.
|
4876
|
+
#
|
4877
|
+
# @param other [DataFrame]
|
4878
|
+
# Other DataFrame that must be merged
|
4879
|
+
# @param key [String]
|
4880
|
+
# Key that is sorted.
|
4881
|
+
#
|
4882
|
+
# @return [DataFrame]
|
4883
|
+
#
|
4884
|
+
# @example
|
4885
|
+
# df0 = Polars::DataFrame.new(
|
4886
|
+
# {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
|
4887
|
+
# ).sort("age")
|
4888
|
+
# df1 = Polars::DataFrame.new(
|
4889
|
+
# {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
|
4890
|
+
# ).sort("age")
|
4891
|
+
# df0.merge_sorted(df1, "age")
|
4892
|
+
# # =>
|
4893
|
+
# # shape: (7, 2)
|
4894
|
+
# # ┌────────┬─────┐
|
4895
|
+
# # │ name ┆ age │
|
4896
|
+
# # │ --- ┆ --- │
|
4897
|
+
# # │ str ┆ i64 │
|
4898
|
+
# # ╞════════╪═════╡
|
4899
|
+
# # │ bob ┆ 18 │
|
4900
|
+
# # │ thomas ┆ 20 │
|
4901
|
+
# # │ anna ┆ 21 │
|
4902
|
+
# # │ megan ┆ 33 │
|
4903
|
+
# # │ steve ┆ 42 │
|
4904
|
+
# # │ steve ┆ 42 │
|
4905
|
+
# # │ elise ┆ 44 │
|
4906
|
+
# # └────────┴─────┘
|
4907
|
+
def merge_sorted(other, key)
|
4908
|
+
lazy.merge_sorted(other.lazy, key).collect(_eager: true)
|
4909
|
+
end
|
4770
4910
|
|
4771
4911
|
# Indicate that one or multiple columns are sorted.
|
4772
4912
|
#
|
@@ -4808,7 +4948,7 @@ module Polars
|
|
4808
4948
|
end
|
4809
4949
|
|
4810
4950
|
def _pos_idxs(idxs, dim)
|
4811
|
-
idx_type =
|
4951
|
+
idx_type = Plr.get_index_type
|
4812
4952
|
|
4813
4953
|
if idxs.is_a?(Series)
|
4814
4954
|
if idxs.dtype == idx_type
|
@@ -4867,10 +5007,10 @@ module Polars
|
|
4867
5007
|
if val.is_a?(Hash) && dtype != Struct
|
4868
5008
|
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4869
5009
|
elsif !Utils.arrlen(val).nil?
|
4870
|
-
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4871
|
-
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
5010
|
+
updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
|
5011
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4872
5012
|
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4873
|
-
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
5013
|
+
updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4874
5014
|
else
|
4875
5015
|
raise Todo
|
4876
5016
|
end
|
@@ -4927,7 +5067,7 @@ module Polars
|
|
4927
5067
|
end
|
4928
5068
|
column_names =
|
4929
5069
|
(schema || []).map.with_index do |col, i|
|
4930
|
-
if col.is_a?(String)
|
5070
|
+
if col.is_a?(::String)
|
4931
5071
|
col || "column_#{i}"
|
4932
5072
|
else
|
4933
5073
|
col[0]
|
@@ -4940,7 +5080,7 @@ module Polars
|
|
4940
5080
|
lookup = column_names.zip(lookup_names || []).to_h
|
4941
5081
|
|
4942
5082
|
column_dtypes =
|
4943
|
-
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
5083
|
+
(schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
|
4944
5084
|
[lookup[col[0]] || col[0], col[1]]
|
4945
5085
|
end
|
4946
5086
|
|
@@ -5041,14 +5181,14 @@ module Polars
|
|
5041
5181
|
elsif data[0].is_a?(Hash)
|
5042
5182
|
column_names, dtypes = _unpack_schema(columns)
|
5043
5183
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5044
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
5184
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
5045
5185
|
if column_names
|
5046
5186
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5047
5187
|
end
|
5048
5188
|
return rbdf
|
5049
5189
|
elsif data[0].is_a?(::Array)
|
5190
|
+
first_element = data[0]
|
5050
5191
|
if orient.nil? && !columns.nil?
|
5051
|
-
first_element = data[0]
|
5052
5192
|
row_types = first_element.filter_map { |value| value.class }.uniq
|
5053
5193
|
if row_types.include?(Integer) && row_types.include?(Float)
|
5054
5194
|
row_types.delete(Integer)
|