polars-df 0.7.0-x86_64-linux → 0.9.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +1978 -1459
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/{3.0 → 3.3}/polars.so +0 -0
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +36 -7
- data/lib/polars/lazy_functions.rb +0 -1197
data/lib/polars/data_frame.rb
CHANGED
@@ -47,8 +47,8 @@ module Polars
|
|
47
47
|
end
|
48
48
|
|
49
49
|
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
|
50
|
+
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
52
|
_from_rbdf(rbdf)
|
53
53
|
end
|
54
54
|
|
@@ -119,10 +119,10 @@ module Polars
|
|
119
119
|
|
120
120
|
processed_null_values = Utils._process_null_values(null_values)
|
121
121
|
|
122
|
-
if columns.is_a?(String)
|
122
|
+
if columns.is_a?(::String)
|
123
123
|
columns = [columns]
|
124
124
|
end
|
125
|
-
if file.is_a?(String) && file.include?("*")
|
125
|
+
if file.is_a?(::String) && file.include?("*")
|
126
126
|
dtypes_dict = nil
|
127
127
|
if !dtype_list.nil?
|
128
128
|
dtypes_dict = dtype_list.to_h
|
@@ -206,11 +206,11 @@ module Polars
|
|
206
206
|
if Utils.pathlike?(source)
|
207
207
|
source = Utils.normalise_filepath(source)
|
208
208
|
end
|
209
|
-
if columns.is_a?(String)
|
209
|
+
if columns.is_a?(::String)
|
210
210
|
columns = [columns]
|
211
211
|
end
|
212
212
|
|
213
|
-
if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
|
213
|
+
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
214
214
|
scan =
|
215
215
|
Polars.scan_parquet(
|
216
216
|
source,
|
@@ -269,11 +269,11 @@ module Polars
|
|
269
269
|
if Utils.pathlike?(file)
|
270
270
|
file = Utils.normalise_filepath(file)
|
271
271
|
end
|
272
|
-
if columns.is_a?(String)
|
272
|
+
if columns.is_a?(::String)
|
273
273
|
columns = [columns]
|
274
274
|
end
|
275
275
|
|
276
|
-
if file.is_a?(String) && file.include?("*")
|
276
|
+
if file.is_a?(::String) && file.include?("*")
|
277
277
|
raise Todo
|
278
278
|
end
|
279
279
|
|
@@ -411,7 +411,7 @@ module Polars
|
|
411
411
|
# }
|
412
412
|
# )
|
413
413
|
# df.dtypes
|
414
|
-
# # => [Polars::Int64, Polars::Float64, Polars::
|
414
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
415
415
|
def dtypes
|
416
416
|
_df.dtypes
|
417
417
|
end
|
@@ -429,7 +429,7 @@ module Polars
|
|
429
429
|
# }
|
430
430
|
# )
|
431
431
|
# df.schema
|
432
|
-
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::
|
432
|
+
# # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String}
|
433
433
|
def schema
|
434
434
|
columns.zip(dtypes).to_h
|
435
435
|
end
|
@@ -589,13 +589,13 @@ module Polars
|
|
589
589
|
return df.slice(row_selection, 1)
|
590
590
|
end
|
591
591
|
# df[2, "a"]
|
592
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
592
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
593
593
|
return self[col_selection][row_selection]
|
594
594
|
end
|
595
595
|
end
|
596
596
|
|
597
597
|
# column selection can be "a" and ["a", "b"]
|
598
|
-
if col_selection.is_a?(String) || col_selection.is_a?(Symbol)
|
598
|
+
if col_selection.is_a?(::String) || col_selection.is_a?(Symbol)
|
599
599
|
col_selection = [col_selection]
|
600
600
|
end
|
601
601
|
|
@@ -621,8 +621,8 @@ module Polars
|
|
621
621
|
|
622
622
|
# select single column
|
623
623
|
# df["foo"]
|
624
|
-
if item.is_a?(String) || item.is_a?(Symbol)
|
625
|
-
return Utils.wrap_s(_df.
|
624
|
+
if item.is_a?(::String) || item.is_a?(Symbol)
|
625
|
+
return Utils.wrap_s(_df.get_column(item.to_s))
|
626
626
|
end
|
627
627
|
|
628
628
|
# df[idx]
|
@@ -647,7 +647,7 @@ module Polars
|
|
647
647
|
|
648
648
|
if item.is_a?(Series)
|
649
649
|
dtype = item.dtype
|
650
|
-
if dtype ==
|
650
|
+
if dtype == String
|
651
651
|
return _from_rbdf(_df.select(item))
|
652
652
|
elsif dtype == UInt32
|
653
653
|
return _from_rbdf(_df.take_with_series(item._s))
|
@@ -698,7 +698,7 @@ module Polars
|
|
698
698
|
s[row_selection] = value
|
699
699
|
|
700
700
|
if col_selection.is_a?(Integer)
|
701
|
-
|
701
|
+
replace_column(col_selection, s)
|
702
702
|
elsif Utils.strlike?(col_selection)
|
703
703
|
replace(col_selection, s)
|
704
704
|
end
|
@@ -1084,7 +1084,7 @@ module Polars
|
|
1084
1084
|
# df.estimated_size
|
1085
1085
|
# # => 25888898
|
1086
1086
|
# df.estimated_size("mb")
|
1087
|
-
# # =>
|
1087
|
+
# # => 26.702880859375
|
1088
1088
|
def estimated_size(unit = "b")
|
1089
1089
|
sz = _df.estimated_size
|
1090
1090
|
Utils.scale_bytes(sz, to: unit)
|
@@ -1222,7 +1222,7 @@ module Polars
|
|
1222
1222
|
# @example
|
1223
1223
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
1224
1224
|
# s = Polars::Series.new("baz", [97, 98, 99])
|
1225
|
-
# df.
|
1225
|
+
# df.insert_column(1, s)
|
1226
1226
|
# # =>
|
1227
1227
|
# # shape: (3, 3)
|
1228
1228
|
# # ┌─────┬─────┬─────┐
|
@@ -1244,7 +1244,7 @@ module Polars
|
|
1244
1244
|
# }
|
1245
1245
|
# )
|
1246
1246
|
# s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
|
1247
|
-
# df.
|
1247
|
+
# df.insert_column(3, s)
|
1248
1248
|
# # =>
|
1249
1249
|
# # shape: (4, 4)
|
1250
1250
|
# # ┌─────┬──────┬───────┬──────┐
|
@@ -1257,13 +1257,14 @@ module Polars
|
|
1257
1257
|
# # │ 3 ┆ 10.0 ┆ false ┆ 20.5 │
|
1258
1258
|
# # │ 4 ┆ 13.0 ┆ true ┆ 0.0 │
|
1259
1259
|
# # └─────┴──────┴───────┴──────┘
|
1260
|
-
def
|
1260
|
+
def insert_column(index, series)
|
1261
1261
|
if index < 0
|
1262
1262
|
index = columns.length + index
|
1263
1263
|
end
|
1264
|
-
_df.
|
1264
|
+
_df.insert_column(index, series._s)
|
1265
1265
|
self
|
1266
1266
|
end
|
1267
|
+
alias_method :insert_at_idx, :insert_column
|
1267
1268
|
|
1268
1269
|
# Filter the rows in the DataFrame based on a predicate expression.
|
1269
1270
|
#
|
@@ -1367,7 +1368,7 @@ module Polars
|
|
1367
1368
|
]
|
1368
1369
|
)._df
|
1369
1370
|
)
|
1370
|
-
summary.
|
1371
|
+
summary.insert_column(
|
1371
1372
|
0,
|
1372
1373
|
Polars::Series.new(
|
1373
1374
|
"describe",
|
@@ -1388,11 +1389,12 @@ module Polars
|
|
1388
1389
|
# df = Polars::DataFrame.new(
|
1389
1390
|
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
1390
1391
|
# )
|
1391
|
-
# df.
|
1392
|
+
# df.get_column_index("ham")
|
1392
1393
|
# # => 2
|
1393
|
-
def
|
1394
|
-
_df.
|
1394
|
+
def get_column_index(name)
|
1395
|
+
_df.get_column_index(name)
|
1395
1396
|
end
|
1397
|
+
alias_method :find_idx_by_name, :get_column_index
|
1396
1398
|
|
1397
1399
|
# Replace a column at an index location.
|
1398
1400
|
#
|
@@ -1412,7 +1414,7 @@ module Polars
|
|
1412
1414
|
# }
|
1413
1415
|
# )
|
1414
1416
|
# s = Polars::Series.new("apple", [10, 20, 30])
|
1415
|
-
# df.
|
1417
|
+
# df.replace_column(0, s)
|
1416
1418
|
# # =>
|
1417
1419
|
# # shape: (3, 3)
|
1418
1420
|
# # ┌───────┬─────┬─────┐
|
@@ -1424,13 +1426,14 @@ module Polars
|
|
1424
1426
|
# # │ 20 ┆ 7 ┆ b │
|
1425
1427
|
# # │ 30 ┆ 8 ┆ c │
|
1426
1428
|
# # └───────┴─────┴─────┘
|
1427
|
-
def
|
1429
|
+
def replace_column(index, series)
|
1428
1430
|
if index < 0
|
1429
1431
|
index = columns.length + index
|
1430
1432
|
end
|
1431
|
-
_df.
|
1433
|
+
_df.replace_column(index, series._s)
|
1432
1434
|
self
|
1433
1435
|
end
|
1436
|
+
alias_method :replace_at_idx, :replace_column
|
1434
1437
|
|
1435
1438
|
# Sort the DataFrame by column.
|
1436
1439
|
#
|
@@ -1524,13 +1527,14 @@ module Polars
|
|
1524
1527
|
# "ham" => ["c", "b", "a"]
|
1525
1528
|
# }
|
1526
1529
|
# )
|
1527
|
-
# df1.
|
1530
|
+
# df1.equals(df1)
|
1528
1531
|
# # => true
|
1529
|
-
# df1.
|
1532
|
+
# df1.equals(df2)
|
1530
1533
|
# # => false
|
1531
|
-
def
|
1532
|
-
_df.
|
1534
|
+
def equals(other, null_equal: true)
|
1535
|
+
_df.equals(other._df, null_equal)
|
1533
1536
|
end
|
1537
|
+
alias_method :frame_equal, :equals
|
1534
1538
|
|
1535
1539
|
# Replace a column by a new Series.
|
1536
1540
|
#
|
@@ -1716,7 +1720,7 @@ module Polars
|
|
1716
1720
|
# # │ 3 ┆ 8 ┆ c │
|
1717
1721
|
# # └─────┴─────┴─────┘
|
1718
1722
|
def drop_nulls(subset: nil)
|
1719
|
-
if subset.is_a?(String)
|
1723
|
+
if subset.is_a?(::String)
|
1720
1724
|
subset = [subset]
|
1721
1725
|
end
|
1722
1726
|
_from_rbdf(_df.drop_nulls(subset))
|
@@ -1778,7 +1782,7 @@ module Polars
|
|
1778
1782
|
# "b" => [2, 4, 6]
|
1779
1783
|
# }
|
1780
1784
|
# )
|
1781
|
-
# df.
|
1785
|
+
# df.with_row_index
|
1782
1786
|
# # =>
|
1783
1787
|
# # shape: (3, 3)
|
1784
1788
|
# # ┌────────┬─────┬─────┐
|
@@ -1790,9 +1794,10 @@ module Polars
|
|
1790
1794
|
# # │ 1 ┆ 3 ┆ 4 │
|
1791
1795
|
# # │ 2 ┆ 5 ┆ 6 │
|
1792
1796
|
# # └────────┴─────┴─────┘
|
1793
|
-
def
|
1794
|
-
_from_rbdf(_df.
|
1797
|
+
def with_row_index(name: "row_nr", offset: 0)
|
1798
|
+
_from_rbdf(_df.with_row_index(name, offset))
|
1795
1799
|
end
|
1800
|
+
alias_method :with_row_count, :with_row_index
|
1796
1801
|
|
1797
1802
|
# Start a group by operation.
|
1798
1803
|
#
|
@@ -2267,7 +2272,7 @@ module Polars
|
|
2267
2272
|
if by.nil?
|
2268
2273
|
by = []
|
2269
2274
|
end
|
2270
|
-
if by.is_a?(String)
|
2275
|
+
if by.is_a?(::String)
|
2271
2276
|
by = [by]
|
2272
2277
|
end
|
2273
2278
|
if offset.nil?
|
@@ -2429,6 +2434,8 @@ module Polars
|
|
2429
2434
|
# Join strategy.
|
2430
2435
|
# @param suffix [String]
|
2431
2436
|
# Suffix to append to columns with a duplicate name.
|
2437
|
+
# @param join_nulls [Boolean]
|
2438
|
+
# Join on null values. By default null values will never produce matches.
|
2432
2439
|
#
|
2433
2440
|
# @return [DataFrame]
|
2434
2441
|
#
|
@@ -2461,17 +2468,17 @@ module Polars
|
|
2461
2468
|
# @example
|
2462
2469
|
# df.join(other_df, on: "ham", how: "outer")
|
2463
2470
|
# # =>
|
2464
|
-
# # shape: (4,
|
2465
|
-
# #
|
2466
|
-
# # │ foo ┆ bar ┆ ham
|
2467
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
2468
|
-
# # │ i64 ┆ f64 ┆ str ┆ str
|
2469
|
-
# #
|
2470
|
-
# # │ 1 ┆ 6.0 ┆ a
|
2471
|
-
# # │ 2 ┆ 7.0 ┆ b
|
2472
|
-
# # │ null ┆ null ┆
|
2473
|
-
# # │ 3 ┆ 8.0 ┆ c
|
2474
|
-
# #
|
2471
|
+
# # shape: (4, 5)
|
2472
|
+
# # ┌──────┬──────┬──────┬───────┬───────────┐
|
2473
|
+
# # │ foo ┆ bar ┆ ham ┆ apple ┆ ham_right │
|
2474
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
2475
|
+
# # │ i64 ┆ f64 ┆ str ┆ str ┆ str │
|
2476
|
+
# # ╞══════╪══════╪══════╪═══════╪═══════════╡
|
2477
|
+
# # │ 1 ┆ 6.0 ┆ a ┆ x ┆ a │
|
2478
|
+
# # │ 2 ┆ 7.0 ┆ b ┆ y ┆ b │
|
2479
|
+
# # │ null ┆ null ┆ null ┆ z ┆ d │
|
2480
|
+
# # │ 3 ┆ 8.0 ┆ c ┆ null ┆ null │
|
2481
|
+
# # └──────┴──────┴──────┴───────┴───────────┘
|
2475
2482
|
#
|
2476
2483
|
# @example
|
2477
2484
|
# df.join(other_df, on: "ham", how: "left")
|
@@ -2511,7 +2518,7 @@ module Polars
|
|
2511
2518
|
# # ╞═════╪═════╪═════╡
|
2512
2519
|
# # │ 3 ┆ 8.0 ┆ c │
|
2513
2520
|
# # └─────┴─────┴─────┘
|
2514
|
-
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
2521
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
|
2515
2522
|
lazy
|
2516
2523
|
.join(
|
2517
2524
|
other.lazy,
|
@@ -2520,6 +2527,7 @@ module Polars
|
|
2520
2527
|
on: on,
|
2521
2528
|
how: how,
|
2522
2529
|
suffix: suffix,
|
2530
|
+
join_nulls: join_nulls
|
2523
2531
|
)
|
2524
2532
|
.collect(no_optimization: true)
|
2525
2533
|
end
|
@@ -2863,7 +2871,7 @@ module Polars
|
|
2863
2871
|
# "c" => [true, true, false, nil]
|
2864
2872
|
# }
|
2865
2873
|
# )
|
2866
|
-
# df.
|
2874
|
+
# df.clear
|
2867
2875
|
# # =>
|
2868
2876
|
# # shape: (0, 3)
|
2869
2877
|
# # ┌─────┬─────┬──────┐
|
@@ -2872,9 +2880,31 @@ module Polars
|
|
2872
2880
|
# # │ i64 ┆ f64 ┆ bool │
|
2873
2881
|
# # ╞═════╪═════╪══════╡
|
2874
2882
|
# # └─────┴─────┴──────┘
|
2875
|
-
|
2876
|
-
|
2883
|
+
#
|
2884
|
+
# @example
|
2885
|
+
# df.clear(2)
|
2886
|
+
# # =>
|
2887
|
+
# # shape: (2, 3)
|
2888
|
+
# # ┌──────┬──────┬──────┐
|
2889
|
+
# # │ a ┆ b ┆ c │
|
2890
|
+
# # │ --- ┆ --- ┆ --- │
|
2891
|
+
# # │ i64 ┆ f64 ┆ bool │
|
2892
|
+
# # ╞══════╪══════╪══════╡
|
2893
|
+
# # │ null ┆ null ┆ null │
|
2894
|
+
# # │ null ┆ null ┆ null │
|
2895
|
+
# # └──────┴──────┴──────┘
|
2896
|
+
def clear(n = 0)
|
2897
|
+
if n == 0
|
2898
|
+
_from_rbdf(_df.clear)
|
2899
|
+
elsif n > 0 || len > 0
|
2900
|
+
self.class.new(
|
2901
|
+
schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
|
2902
|
+
)
|
2903
|
+
else
|
2904
|
+
clone
|
2905
|
+
end
|
2877
2906
|
end
|
2907
|
+
alias_method :cleared, :clear
|
2878
2908
|
|
2879
2909
|
# clone handled by initialize_copy
|
2880
2910
|
|
@@ -3111,17 +3141,17 @@ module Polars
|
|
3111
3141
|
sort_columns: false,
|
3112
3142
|
separator: "_"
|
3113
3143
|
)
|
3114
|
-
if values.is_a?(String)
|
3144
|
+
if values.is_a?(::String)
|
3115
3145
|
values = [values]
|
3116
3146
|
end
|
3117
|
-
if index.is_a?(String)
|
3147
|
+
if index.is_a?(::String)
|
3118
3148
|
index = [index]
|
3119
3149
|
end
|
3120
|
-
if columns.is_a?(String)
|
3150
|
+
if columns.is_a?(::String)
|
3121
3151
|
columns = [columns]
|
3122
3152
|
end
|
3123
3153
|
|
3124
|
-
if aggregate_fn.is_a?(String)
|
3154
|
+
if aggregate_fn.is_a?(::String)
|
3125
3155
|
case aggregate_fn
|
3126
3156
|
when "first"
|
3127
3157
|
aggregate_expr = Polars.element.first._rbexpr
|
@@ -3137,8 +3167,11 @@ module Polars
|
|
3137
3167
|
aggregate_expr = Polars.element.median._rbexpr
|
3138
3168
|
when "last"
|
3139
3169
|
aggregate_expr = Polars.element.last._rbexpr
|
3170
|
+
when "len"
|
3171
|
+
aggregate_expr = Polars.len._rbexpr
|
3140
3172
|
when "count"
|
3141
|
-
|
3173
|
+
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3174
|
+
aggregate_expr = Polars.len._rbexpr
|
3142
3175
|
else
|
3143
3176
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3144
3177
|
end
|
@@ -3150,9 +3183,9 @@ module Polars
|
|
3150
3183
|
|
3151
3184
|
_from_rbdf(
|
3152
3185
|
_df.pivot_expr(
|
3153
|
-
values,
|
3154
3186
|
index,
|
3155
3187
|
columns,
|
3188
|
+
values,
|
3156
3189
|
maintain_order,
|
3157
3190
|
sort_columns,
|
3158
3191
|
aggregate_expr,
|
@@ -3206,10 +3239,10 @@ module Polars
|
|
3206
3239
|
# # │ z ┆ c ┆ 6 │
|
3207
3240
|
# # └─────┴──────────┴───────┘
|
3208
3241
|
def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
|
3209
|
-
if value_vars.is_a?(String)
|
3242
|
+
if value_vars.is_a?(::String)
|
3210
3243
|
value_vars = [value_vars]
|
3211
3244
|
end
|
3212
|
-
if id_vars.is_a?(String)
|
3245
|
+
if id_vars.is_a?(::String)
|
3213
3246
|
id_vars = [id_vars]
|
3214
3247
|
end
|
3215
3248
|
if value_vars.nil?
|
@@ -3423,7 +3456,7 @@ module Polars
|
|
3423
3456
|
# # │ C ┆ 2 ┆ l │
|
3424
3457
|
# # └─────┴─────┴─────┘}
|
3425
3458
|
def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
|
3426
|
-
if groups.is_a?(String)
|
3459
|
+
if groups.is_a?(::String)
|
3427
3460
|
groups = [groups]
|
3428
3461
|
elsif !groups.is_a?(::Array)
|
3429
3462
|
groups = Array(groups)
|
@@ -3587,8 +3620,13 @@ module Polars
|
|
3587
3620
|
|
3588
3621
|
# Select columns from this DataFrame.
|
3589
3622
|
#
|
3590
|
-
# @param exprs [
|
3591
|
-
# Column
|
3623
|
+
# @param exprs [Array]
|
3624
|
+
# Column(s) to select, specified as positional arguments.
|
3625
|
+
# Accepts expression input. Strings are parsed as column names,
|
3626
|
+
# other non-expression inputs are parsed as literals.
|
3627
|
+
# @param named_exprs [Hash]
|
3628
|
+
# Additional columns to select, specified as keyword arguments.
|
3629
|
+
# The columns will be renamed to the keyword used.
|
3592
3630
|
#
|
3593
3631
|
# @return [DataFrame]
|
3594
3632
|
#
|
@@ -3668,23 +3706,25 @@ module Polars
|
|
3668
3706
|
# # │ 0 │
|
3669
3707
|
# # │ 10 │
|
3670
3708
|
# # └─────────┘
|
3671
|
-
def select(exprs)
|
3672
|
-
|
3673
|
-
lazy
|
3674
|
-
.select(exprs)
|
3675
|
-
.collect(no_optimization: true, string_cache: false)
|
3676
|
-
._df
|
3677
|
-
)
|
3709
|
+
def select(*exprs, **named_exprs)
|
3710
|
+
lazy.select(*exprs, **named_exprs).collect(_eager: true)
|
3678
3711
|
end
|
3679
3712
|
|
3680
|
-
# Add
|
3713
|
+
# Add columns to this DataFrame.
|
3714
|
+
#
|
3715
|
+
# Added columns will replace existing columns with the same name.
|
3681
3716
|
#
|
3682
3717
|
# @param exprs [Array]
|
3683
|
-
#
|
3718
|
+
# Column(s) to add, specified as positional arguments.
|
3719
|
+
# Accepts expression input. Strings are parsed as column names, other
|
3720
|
+
# non-expression inputs are parsed as literals.
|
3721
|
+
# @param named_exprs [Hash]
|
3722
|
+
# Additional columns to add, specified as keyword arguments.
|
3723
|
+
# The columns will be renamed to the keyword used.
|
3684
3724
|
#
|
3685
3725
|
# @return [DataFrame]
|
3686
3726
|
#
|
3687
|
-
# @example
|
3727
|
+
# @example Pass an expression to add it as a new column.
|
3688
3728
|
# df = Polars::DataFrame.new(
|
3689
3729
|
# {
|
3690
3730
|
# "a" => [1, 2, 3, 4],
|
@@ -3692,11 +3732,41 @@ module Polars
|
|
3692
3732
|
# "c" => [true, true, false, true]
|
3693
3733
|
# }
|
3694
3734
|
# )
|
3735
|
+
# df.with_columns((Polars.col("a") ** 2).alias("a^2"))
|
3736
|
+
# # =>
|
3737
|
+
# # shape: (4, 4)
|
3738
|
+
# # ┌─────┬──────┬───────┬──────┐
|
3739
|
+
# # │ a ┆ b ┆ c ┆ a^2 │
|
3740
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
3741
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 │
|
3742
|
+
# # ╞═════╪══════╪═══════╪══════╡
|
3743
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 │
|
3744
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 │
|
3745
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 │
|
3746
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 │
|
3747
|
+
# # └─────┴──────┴───────┴──────┘
|
3748
|
+
#
|
3749
|
+
# @example Added columns will replace existing columns with the same name.
|
3750
|
+
# df.with_columns(Polars.col("a").cast(Polars::Float64))
|
3751
|
+
# # =>
|
3752
|
+
# # shape: (4, 3)
|
3753
|
+
# # ┌─────┬──────┬───────┐
|
3754
|
+
# # │ a ┆ b ┆ c │
|
3755
|
+
# # │ --- ┆ --- ┆ --- │
|
3756
|
+
# # │ f64 ┆ f64 ┆ bool │
|
3757
|
+
# # ╞═════╪══════╪═══════╡
|
3758
|
+
# # │ 1.0 ┆ 0.5 ┆ true │
|
3759
|
+
# # │ 2.0 ┆ 4.0 ┆ true │
|
3760
|
+
# # │ 3.0 ┆ 10.0 ┆ false │
|
3761
|
+
# # │ 4.0 ┆ 13.0 ┆ true │
|
3762
|
+
# # └─────┴──────┴───────┘
|
3763
|
+
#
|
3764
|
+
# @example Multiple columns can be added by passing a list of expressions.
|
3695
3765
|
# df.with_columns(
|
3696
3766
|
# [
|
3697
3767
|
# (Polars.col("a") ** 2).alias("a^2"),
|
3698
3768
|
# (Polars.col("b") / 2).alias("b/2"),
|
3699
|
-
# (Polars.col("c").
|
3769
|
+
# (Polars.col("c").not_).alias("not c"),
|
3700
3770
|
# ]
|
3701
3771
|
# )
|
3702
3772
|
# # =>
|
@@ -3711,13 +3781,45 @@ module Polars
|
|
3711
3781
|
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
3712
3782
|
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
3713
3783
|
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
3714
|
-
|
3715
|
-
|
3716
|
-
|
3717
|
-
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3784
|
+
#
|
3785
|
+
# @example Multiple columns also can be added using positional arguments instead of a list.
|
3786
|
+
# df.with_columns(
|
3787
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
3788
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
3789
|
+
# (Polars.col("c").not_).alias("not c"),
|
3790
|
+
# )
|
3791
|
+
# # =>
|
3792
|
+
# # shape: (4, 6)
|
3793
|
+
# # ┌─────┬──────┬───────┬──────┬──────┬───────┐
|
3794
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3795
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3796
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ bool │
|
3797
|
+
# # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
|
3798
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1.0 ┆ 0.25 ┆ false │
|
3799
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4.0 ┆ 2.0 ┆ false │
|
3800
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9.0 ┆ 5.0 ┆ true │
|
3801
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
|
3802
|
+
# # └─────┴──────┴───────┴──────┴──────┴───────┘
|
3803
|
+
#
|
3804
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
3805
|
+
# df.with_columns(
|
3806
|
+
# ab: Polars.col("a") * Polars.col("b"),
|
3807
|
+
# not_c: Polars.col("c").not_
|
3808
|
+
# )
|
3809
|
+
# # =>
|
3810
|
+
# # shape: (4, 5)
|
3811
|
+
# # ┌─────┬──────┬───────┬──────┬───────┐
|
3812
|
+
# # │ a ┆ b ┆ c ┆ ab ┆ not_c │
|
3813
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3814
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ bool │
|
3815
|
+
# # ╞═════╪══════╪═══════╪══════╪═══════╡
|
3816
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 0.5 ┆ false │
|
3817
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 8.0 ┆ false │
|
3818
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 30.0 ┆ true │
|
3819
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
|
3820
|
+
# # └─────┴──────┴───────┴──────┴───────┘
|
3821
|
+
def with_columns(*exprs, **named_exprs)
|
3822
|
+
lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
|
3721
3823
|
end
|
3722
3824
|
|
3723
3825
|
# Get number of chunks used by the ChunkedArrays of this DataFrame.
|
@@ -3774,7 +3876,7 @@ module Polars
|
|
3774
3876
|
# # └─────┴─────┴─────┘
|
3775
3877
|
def max(axis: 0)
|
3776
3878
|
if axis == 0
|
3777
|
-
|
3879
|
+
lazy.max.collect(_eager: true)
|
3778
3880
|
elsif axis == 1
|
3779
3881
|
Utils.wrap_s(_df.max_horizontal)
|
3780
3882
|
else
|
@@ -3806,7 +3908,7 @@ module Polars
|
|
3806
3908
|
# # └─────┴─────┴─────┘
|
3807
3909
|
def min(axis: 0)
|
3808
3910
|
if axis == 0
|
3809
|
-
|
3911
|
+
lazy.min.collect(_eager: true)
|
3810
3912
|
elsif axis == 1
|
3811
3913
|
Utils.wrap_s(_df.min_horizontal)
|
3812
3914
|
else
|
@@ -3855,7 +3957,7 @@ module Polars
|
|
3855
3957
|
def sum(axis: 0, null_strategy: "ignore")
|
3856
3958
|
case axis
|
3857
3959
|
when 0
|
3858
|
-
|
3960
|
+
lazy.sum.collect(_eager: true)
|
3859
3961
|
when 1
|
3860
3962
|
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3861
3963
|
else
|
@@ -3893,7 +3995,7 @@ module Polars
|
|
3893
3995
|
def mean(axis: 0, null_strategy: "ignore")
|
3894
3996
|
case axis
|
3895
3997
|
when 0
|
3896
|
-
|
3998
|
+
lazy.mean.collect(_eager: true)
|
3897
3999
|
when 1
|
3898
4000
|
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3899
4001
|
else
|
@@ -3939,7 +4041,7 @@ module Polars
|
|
3939
4041
|
# # │ 0.816497 ┆ 0.816497 ┆ null │
|
3940
4042
|
# # └──────────┴──────────┴──────┘
|
3941
4043
|
def std(ddof: 1)
|
3942
|
-
|
4044
|
+
lazy.std(ddof: ddof).collect(_eager: true)
|
3943
4045
|
end
|
3944
4046
|
|
3945
4047
|
# Aggregate the columns of this DataFrame to their variance value.
|
@@ -3980,7 +4082,7 @@ module Polars
|
|
3980
4082
|
# # │ 0.666667 ┆ 0.666667 ┆ null │
|
3981
4083
|
# # └──────────┴──────────┴──────┘
|
3982
4084
|
def var(ddof: 1)
|
3983
|
-
|
4085
|
+
lazy.var(ddof: ddof).collect(_eager: true)
|
3984
4086
|
end
|
3985
4087
|
|
3986
4088
|
# Aggregate the columns of this DataFrame to their median value.
|
@@ -4006,7 +4108,7 @@ module Polars
|
|
4006
4108
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4007
4109
|
# # └─────┴─────┴──────┘
|
4008
4110
|
def median
|
4009
|
-
|
4111
|
+
lazy.median.collect(_eager: true)
|
4010
4112
|
end
|
4011
4113
|
|
4012
4114
|
# Aggregate the columns of this DataFrame to their product values.
|
@@ -4063,7 +4165,7 @@ module Polars
|
|
4063
4165
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4064
4166
|
# # └─────┴─────┴──────┘
|
4065
4167
|
def quantile(quantile, interpolation: "nearest")
|
4066
|
-
|
4168
|
+
lazy.quantile(quantile, interpolation: interpolation).collect(_eager: true)
|
4067
4169
|
end
|
4068
4170
|
|
4069
4171
|
# Get one hot encoded dummy variables.
|
@@ -4094,7 +4196,7 @@ module Polars
|
|
4094
4196
|
# # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
|
4095
4197
|
# # └───────┴───────┴───────┴───────┴───────┴───────┘
|
4096
4198
|
def to_dummies(columns: nil, separator: "_", drop_first: false)
|
4097
|
-
if columns.is_a?(String)
|
4199
|
+
if columns.is_a?(::String)
|
4098
4200
|
columns = [columns]
|
4099
4201
|
end
|
4100
4202
|
_from_rbdf(_df.to_dummies(columns, separator, drop_first))
|
@@ -4359,7 +4461,7 @@ module Polars
|
|
4359
4461
|
# # null
|
4360
4462
|
# # ]
|
4361
4463
|
#
|
4362
|
-
# @example A horizontal boolean or, similar to a row-wise .any
|
4464
|
+
# @example A horizontal boolean or, similar to a row-wise .any:
|
4363
4465
|
# df = Polars::DataFrame.new(
|
4364
4466
|
# {
|
4365
4467
|
# "a" => [false, false, true],
|
@@ -4482,7 +4584,7 @@ module Polars
|
|
4482
4584
|
# # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
|
4483
4585
|
def rows(named: false)
|
4484
4586
|
if named
|
4485
|
-
columns = columns
|
4587
|
+
columns = self.columns
|
4486
4588
|
_df.row_tuples.map do |v|
|
4487
4589
|
columns.zip(v).to_h
|
4488
4590
|
end
|
@@ -4523,7 +4625,7 @@ module Polars
|
|
4523
4625
|
return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
|
4524
4626
|
|
4525
4627
|
# load into the local namespace for a modest performance boost in the hot loops
|
4526
|
-
columns = columns
|
4628
|
+
columns = self.columns
|
4527
4629
|
|
4528
4630
|
# note: buffering rows results in a 2-4x speedup over individual calls
|
4529
4631
|
# to ".row(i)", so it should only be disabled in extremely specific cases.
|
@@ -4603,8 +4705,8 @@ module Polars
|
|
4603
4705
|
# # │ 1 ┆ 5 │
|
4604
4706
|
# # │ 3 ┆ 7 │
|
4605
4707
|
# # └─────┴─────┘
|
4606
|
-
def gather_every(n)
|
4607
|
-
select(Utils.col("*").gather_every(n))
|
4708
|
+
def gather_every(n, offset = 0)
|
4709
|
+
select(Utils.col("*").gather_every(n, offset))
|
4608
4710
|
end
|
4609
4711
|
alias_method :take_every, :gather_every
|
4610
4712
|
|
@@ -4754,19 +4856,57 @@ module Polars
|
|
4754
4856
|
# # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
|
4755
4857
|
# # └────────┴─────┴─────┴──────┴───────────┴───────┘
|
4756
4858
|
def unnest(names)
|
4757
|
-
if names.is_a?(String)
|
4859
|
+
if names.is_a?(::String)
|
4758
4860
|
names = [names]
|
4759
4861
|
end
|
4760
4862
|
_from_rbdf(_df.unnest(names))
|
4761
4863
|
end
|
4762
4864
|
|
4763
|
-
#
|
4865
|
+
# Requires NumPy
|
4764
4866
|
# def corr
|
4765
4867
|
# end
|
4766
4868
|
|
4767
|
-
#
|
4768
|
-
#
|
4769
|
-
#
|
4869
|
+
# Take two sorted DataFrames and merge them by the sorted key.
|
4870
|
+
#
|
4871
|
+
# The output of this operation will also be sorted.
|
4872
|
+
# It is the callers responsibility that the frames are sorted
|
4873
|
+
# by that key otherwise the output will not make sense.
|
4874
|
+
#
|
4875
|
+
# The schemas of both DataFrames must be equal.
|
4876
|
+
#
|
4877
|
+
# @param other [DataFrame]
|
4878
|
+
# Other DataFrame that must be merged
|
4879
|
+
# @param key [String]
|
4880
|
+
# Key that is sorted.
|
4881
|
+
#
|
4882
|
+
# @return [DataFrame]
|
4883
|
+
#
|
4884
|
+
# @example
|
4885
|
+
# df0 = Polars::DataFrame.new(
|
4886
|
+
# {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
|
4887
|
+
# ).sort("age")
|
4888
|
+
# df1 = Polars::DataFrame.new(
|
4889
|
+
# {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
|
4890
|
+
# ).sort("age")
|
4891
|
+
# df0.merge_sorted(df1, "age")
|
4892
|
+
# # =>
|
4893
|
+
# # shape: (7, 2)
|
4894
|
+
# # ┌────────┬─────┐
|
4895
|
+
# # │ name ┆ age │
|
4896
|
+
# # │ --- ┆ --- │
|
4897
|
+
# # │ str ┆ i64 │
|
4898
|
+
# # ╞════════╪═════╡
|
4899
|
+
# # │ bob ┆ 18 │
|
4900
|
+
# # │ thomas ┆ 20 │
|
4901
|
+
# # │ anna ┆ 21 │
|
4902
|
+
# # │ megan ┆ 33 │
|
4903
|
+
# # │ steve ┆ 42 │
|
4904
|
+
# # │ steve ┆ 42 │
|
4905
|
+
# # │ elise ┆ 44 │
|
4906
|
+
# # └────────┴─────┘
|
4907
|
+
def merge_sorted(other, key)
|
4908
|
+
lazy.merge_sorted(other.lazy, key).collect(_eager: true)
|
4909
|
+
end
|
4770
4910
|
|
4771
4911
|
# Indicate that one or multiple columns are sorted.
|
4772
4912
|
#
|
@@ -4808,7 +4948,7 @@ module Polars
|
|
4808
4948
|
end
|
4809
4949
|
|
4810
4950
|
def _pos_idxs(idxs, dim)
|
4811
|
-
idx_type =
|
4951
|
+
idx_type = Plr.get_index_type
|
4812
4952
|
|
4813
4953
|
if idxs.is_a?(Series)
|
4814
4954
|
if idxs.dtype == idx_type
|
@@ -4867,10 +5007,10 @@ module Polars
|
|
4867
5007
|
if val.is_a?(Hash) && dtype != Struct
|
4868
5008
|
updated_data[name] = DataFrame.new(val).to_struct(name)
|
4869
5009
|
elsif !Utils.arrlen(val).nil?
|
4870
|
-
updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
|
4871
|
-
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
5010
|
+
updated_data[name] = Series.new(::String.new(name), val, dtype: dtype)
|
5011
|
+
elsif val.nil? || [Integer, Float, TrueClass, FalseClass, ::String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
|
4872
5012
|
dtype = Polars::Float64 if val.nil? && dtype.nil?
|
4873
|
-
updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
5013
|
+
updated_data[name] = Series.new(::String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
|
4874
5014
|
else
|
4875
5015
|
raise Todo
|
4876
5016
|
end
|
@@ -4927,7 +5067,7 @@ module Polars
|
|
4927
5067
|
end
|
4928
5068
|
column_names =
|
4929
5069
|
(schema || []).map.with_index do |col, i|
|
4930
|
-
if col.is_a?(String)
|
5070
|
+
if col.is_a?(::String)
|
4931
5071
|
col || "column_#{i}"
|
4932
5072
|
else
|
4933
5073
|
col[0]
|
@@ -4940,7 +5080,7 @@ module Polars
|
|
4940
5080
|
lookup = column_names.zip(lookup_names || []).to_h
|
4941
5081
|
|
4942
5082
|
column_dtypes =
|
4943
|
-
(schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
|
5083
|
+
(schema || []).select { |col| !col.is_a?(::String) && col[1] }.to_h do |col|
|
4944
5084
|
[lookup[col[0]] || col[0], col[1]]
|
4945
5085
|
end
|
4946
5086
|
|
@@ -5041,14 +5181,14 @@ module Polars
|
|
5041
5181
|
elsif data[0].is_a?(Hash)
|
5042
5182
|
column_names, dtypes = _unpack_schema(columns)
|
5043
5183
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5044
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
5184
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
5045
5185
|
if column_names
|
5046
5186
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5047
5187
|
end
|
5048
5188
|
return rbdf
|
5049
5189
|
elsif data[0].is_a?(::Array)
|
5190
|
+
first_element = data[0]
|
5050
5191
|
if orient.nil? && !columns.nil?
|
5051
|
-
first_element = data[0]
|
5052
5192
|
row_types = first_element.filter_map { |value| value.class }.uniq
|
5053
5193
|
if row_types.include?(Integer) && row_types.include?(Float)
|
5054
5194
|
row_types.delete(Integer)
|