polars-df 0.8.0-arm64-darwin → 0.10.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +3112 -1613
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +34 -6
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/data_frame.rb
CHANGED
@@ -47,8 +47,8 @@ module Polars
|
|
47
47
|
end
|
48
48
|
|
49
49
|
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
|
50
|
+
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
52
|
_from_rbdf(rbdf)
|
53
53
|
end
|
54
54
|
|
@@ -91,7 +91,8 @@ module Polars
|
|
91
91
|
row_count_name: nil,
|
92
92
|
row_count_offset: 0,
|
93
93
|
sample_size: 1024,
|
94
|
-
eol_char: "\n"
|
94
|
+
eol_char: "\n",
|
95
|
+
truncate_ragged_lines: false
|
95
96
|
)
|
96
97
|
if Utils.pathlike?(file)
|
97
98
|
path = Utils.normalise_filepath(file)
|
@@ -147,7 +148,8 @@ module Polars
|
|
147
148
|
skip_rows_after_header: skip_rows_after_header,
|
148
149
|
row_count_name: row_count_name,
|
149
150
|
row_count_offset: row_count_offset,
|
150
|
-
eol_char: eol_char
|
151
|
+
eol_char: eol_char,
|
152
|
+
truncate_ragged_lines: truncate_ragged_lines
|
151
153
|
)
|
152
154
|
if columns.nil?
|
153
155
|
return _from_rbdf(scan.collect._df)
|
@@ -186,7 +188,8 @@ module Polars
|
|
186
188
|
skip_rows_after_header,
|
187
189
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
188
190
|
sample_size,
|
189
|
-
eol_char
|
191
|
+
eol_char,
|
192
|
+
truncate_ragged_lines
|
190
193
|
)
|
191
194
|
)
|
192
195
|
end
|
@@ -622,7 +625,7 @@ module Polars
|
|
622
625
|
# select single column
|
623
626
|
# df["foo"]
|
624
627
|
if item.is_a?(::String) || item.is_a?(Symbol)
|
625
|
-
return Utils.wrap_s(_df.
|
628
|
+
return Utils.wrap_s(_df.get_column(item.to_s))
|
626
629
|
end
|
627
630
|
|
628
631
|
# df[idx]
|
@@ -814,8 +817,6 @@ module Polars
|
|
814
817
|
|
815
818
|
# Serialize to JSON representation.
|
816
819
|
#
|
817
|
-
# @return [nil]
|
818
|
-
#
|
819
820
|
# @param file [String]
|
820
821
|
# File path to which the result should be written.
|
821
822
|
# @param pretty [Boolean]
|
@@ -823,17 +824,45 @@ module Polars
|
|
823
824
|
# @param row_oriented [Boolean]
|
824
825
|
# Write to row oriented json. This is slower, but more common.
|
825
826
|
#
|
826
|
-
# @
|
827
|
+
# @return [nil]
|
828
|
+
#
|
829
|
+
# @example
|
830
|
+
# df = Polars::DataFrame.new(
|
831
|
+
# {
|
832
|
+
# "foo" => [1, 2, 3],
|
833
|
+
# "bar" => [6, 7, 8]
|
834
|
+
# }
|
835
|
+
# )
|
836
|
+
# df.write_json
|
837
|
+
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
|
838
|
+
#
|
839
|
+
# @example
|
840
|
+
# df.write_json(row_oriented: true)
|
841
|
+
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
|
827
842
|
def write_json(
|
828
|
-
file,
|
843
|
+
file = nil,
|
829
844
|
pretty: false,
|
830
845
|
row_oriented: false
|
831
846
|
)
|
832
847
|
if Utils.pathlike?(file)
|
833
848
|
file = Utils.normalise_filepath(file)
|
834
849
|
end
|
835
|
-
|
836
|
-
|
850
|
+
to_string_io = !file.nil? && file.is_a?(StringIO)
|
851
|
+
if file.nil? || to_string_io
|
852
|
+
buf = StringIO.new
|
853
|
+
buf.set_encoding(Encoding::BINARY)
|
854
|
+
_df.write_json(buf, pretty, row_oriented)
|
855
|
+
json_bytes = buf.string
|
856
|
+
|
857
|
+
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
858
|
+
if to_string_io
|
859
|
+
file.write(json_str)
|
860
|
+
else
|
861
|
+
return json_str
|
862
|
+
end
|
863
|
+
else
|
864
|
+
_df.write_json(file, pretty, row_oriented)
|
865
|
+
end
|
837
866
|
nil
|
838
867
|
end
|
839
868
|
|
@@ -843,12 +872,36 @@ module Polars
|
|
843
872
|
# File path to which the result should be written.
|
844
873
|
#
|
845
874
|
# @return [nil]
|
846
|
-
|
875
|
+
#
|
876
|
+
# @example
|
877
|
+
# df = Polars::DataFrame.new(
|
878
|
+
# {
|
879
|
+
# "foo" => [1, 2, 3],
|
880
|
+
# "bar" => [6, 7, 8]
|
881
|
+
# }
|
882
|
+
# )
|
883
|
+
# df.write_ndjson()
|
884
|
+
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
885
|
+
def write_ndjson(file = nil)
|
847
886
|
if Utils.pathlike?(file)
|
848
887
|
file = Utils.normalise_filepath(file)
|
849
888
|
end
|
850
|
-
|
851
|
-
|
889
|
+
to_string_io = !file.nil? && file.is_a?(StringIO)
|
890
|
+
if file.nil? || to_string_io
|
891
|
+
buf = StringIO.new
|
892
|
+
buf.set_encoding(Encoding::BINARY)
|
893
|
+
_df.write_ndjson(buf)
|
894
|
+
json_bytes = buf.string
|
895
|
+
|
896
|
+
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
897
|
+
if to_string_io
|
898
|
+
file.write(json_str)
|
899
|
+
else
|
900
|
+
return json_str
|
901
|
+
end
|
902
|
+
else
|
903
|
+
_df.write_ndjson(file)
|
904
|
+
end
|
852
905
|
nil
|
853
906
|
end
|
854
907
|
|
@@ -1010,7 +1063,7 @@ module Polars
|
|
1010
1063
|
|
1011
1064
|
# Write to Apache Parquet file.
|
1012
1065
|
#
|
1013
|
-
# @param file [String]
|
1066
|
+
# @param file [String, Pathname, StringIO]
|
1014
1067
|
# File path to which the file should be written.
|
1015
1068
|
# @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
|
1016
1069
|
# Choose "zstd" for good compression performance.
|
@@ -1027,10 +1080,9 @@ module Polars
|
|
1027
1080
|
# @param statistics [Boolean]
|
1028
1081
|
# Write statistics to the parquet headers. This requires extra compute.
|
1029
1082
|
# @param row_group_size [Integer, nil]
|
1030
|
-
# Size of the row groups in number of rows.
|
1031
|
-
#
|
1032
|
-
#
|
1033
|
-
# writing speeds.
|
1083
|
+
# Size of the row groups in number of rows. Defaults to 512^2 rows.
|
1084
|
+
# @param data_page_size [Integer, nil]
|
1085
|
+
# Size of the data page in bytes. Defaults to 1024^2 bytes.
|
1034
1086
|
#
|
1035
1087
|
# @return [nil]
|
1036
1088
|
def write_parquet(
|
@@ -1038,7 +1090,8 @@ module Polars
|
|
1038
1090
|
compression: "zstd",
|
1039
1091
|
compression_level: nil,
|
1040
1092
|
statistics: false,
|
1041
|
-
row_group_size: nil
|
1093
|
+
row_group_size: nil,
|
1094
|
+
data_page_size: nil
|
1042
1095
|
)
|
1043
1096
|
if compression.nil?
|
1044
1097
|
compression = "uncompressed"
|
@@ -1048,7 +1101,7 @@ module Polars
|
|
1048
1101
|
end
|
1049
1102
|
|
1050
1103
|
_df.write_parquet(
|
1051
|
-
file, compression, compression_level, statistics, row_group_size
|
1104
|
+
file, compression, compression_level, statistics, row_group_size, data_page_size
|
1052
1105
|
)
|
1053
1106
|
end
|
1054
1107
|
|
@@ -1084,7 +1137,7 @@ module Polars
|
|
1084
1137
|
# df.estimated_size
|
1085
1138
|
# # => 25888898
|
1086
1139
|
# df.estimated_size("mb")
|
1087
|
-
# # =>
|
1140
|
+
# # => 17.0601749420166
|
1088
1141
|
def estimated_size(unit = "b")
|
1089
1142
|
sz = _df.estimated_size
|
1090
1143
|
Utils.scale_bytes(sz, to: unit)
|
@@ -1782,7 +1835,7 @@ module Polars
|
|
1782
1835
|
# "b" => [2, 4, 6]
|
1783
1836
|
# }
|
1784
1837
|
# )
|
1785
|
-
# df.
|
1838
|
+
# df.with_row_index
|
1786
1839
|
# # =>
|
1787
1840
|
# # shape: (3, 3)
|
1788
1841
|
# # ┌────────┬─────┬─────┐
|
@@ -1794,9 +1847,10 @@ module Polars
|
|
1794
1847
|
# # │ 1 ┆ 3 ┆ 4 │
|
1795
1848
|
# # │ 2 ┆ 5 ┆ 6 │
|
1796
1849
|
# # └────────┴─────┴─────┘
|
1797
|
-
def
|
1798
|
-
_from_rbdf(_df.
|
1850
|
+
def with_row_index(name: "row_nr", offset: 0)
|
1851
|
+
_from_rbdf(_df.with_row_index(name, offset))
|
1799
1852
|
end
|
1853
|
+
alias_method :with_row_count, :with_row_index
|
1800
1854
|
|
1801
1855
|
# Start a group by operation.
|
1802
1856
|
#
|
@@ -2160,12 +2214,13 @@ module Polars
|
|
2160
2214
|
# closed: "right"
|
2161
2215
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
2162
2216
|
# # =>
|
2163
|
-
# # shape: (
|
2217
|
+
# # shape: (4, 4)
|
2164
2218
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
2165
2219
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
2166
2220
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
2167
2221
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
2168
2222
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
2223
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
2169
2224
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
2170
2225
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2171
2226
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -2433,6 +2488,8 @@ module Polars
|
|
2433
2488
|
# Join strategy.
|
2434
2489
|
# @param suffix [String]
|
2435
2490
|
# Suffix to append to columns with a duplicate name.
|
2491
|
+
# @param join_nulls [Boolean]
|
2492
|
+
# Join on null values. By default null values will never produce matches.
|
2436
2493
|
#
|
2437
2494
|
# @return [DataFrame]
|
2438
2495
|
#
|
@@ -2515,7 +2572,7 @@ module Polars
|
|
2515
2572
|
# # ╞═════╪═════╪═════╡
|
2516
2573
|
# # │ 3 ┆ 8.0 ┆ c │
|
2517
2574
|
# # └─────┴─────┴─────┘
|
2518
|
-
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
2575
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
|
2519
2576
|
lazy
|
2520
2577
|
.join(
|
2521
2578
|
other.lazy,
|
@@ -2524,6 +2581,7 @@ module Polars
|
|
2524
2581
|
on: on,
|
2525
2582
|
how: how,
|
2526
2583
|
suffix: suffix,
|
2584
|
+
join_nulls: join_nulls
|
2527
2585
|
)
|
2528
2586
|
.collect(no_optimization: true)
|
2529
2587
|
end
|
@@ -2617,26 +2675,26 @@ module Polars
|
|
2617
2675
|
# # ┌─────┬─────┬───────────┐
|
2618
2676
|
# # │ a ┆ b ┆ b_squared │
|
2619
2677
|
# # │ --- ┆ --- ┆ --- │
|
2620
|
-
# # │ i64 ┆ i64 ┆
|
2678
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
2621
2679
|
# # ╞═════╪═════╪═══════════╡
|
2622
|
-
# # │ 1 ┆ 2 ┆ 4
|
2623
|
-
# # │ 3 ┆ 4 ┆ 16
|
2624
|
-
# # │ 5 ┆ 6 ┆ 36
|
2680
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
2681
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
2682
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
2625
2683
|
# # └─────┴─────┴───────────┘
|
2626
2684
|
#
|
2627
2685
|
# @example Replaced
|
2628
2686
|
# df.with_column(Polars.col("a") ** 2)
|
2629
2687
|
# # =>
|
2630
2688
|
# # shape: (3, 2)
|
2631
|
-
# #
|
2632
|
-
# # │ a
|
2633
|
-
# # │ ---
|
2634
|
-
# # │
|
2635
|
-
# #
|
2636
|
-
# # │ 1
|
2637
|
-
# # │ 9
|
2638
|
-
# # │ 25
|
2639
|
-
# #
|
2689
|
+
# # ┌─────┬─────┐
|
2690
|
+
# # │ a ┆ b │
|
2691
|
+
# # │ --- ┆ --- │
|
2692
|
+
# # │ i64 ┆ i64 │
|
2693
|
+
# # ╞═════╪═════╡
|
2694
|
+
# # │ 1 ┆ 2 │
|
2695
|
+
# # │ 9 ┆ 4 │
|
2696
|
+
# # │ 25 ┆ 6 │
|
2697
|
+
# # └─────┴─────┘
|
2640
2698
|
def with_column(column)
|
2641
2699
|
lazy
|
2642
2700
|
.with_column(column)
|
@@ -2803,16 +2861,36 @@ module Polars
|
|
2803
2861
|
# # │ 2 ┆ 7.0 │
|
2804
2862
|
# # │ 3 ┆ 8.0 │
|
2805
2863
|
# # └─────┴─────┘
|
2806
|
-
|
2807
|
-
|
2808
|
-
|
2809
|
-
|
2810
|
-
|
2811
|
-
|
2812
|
-
|
2813
|
-
|
2814
|
-
|
2815
|
-
|
2864
|
+
#
|
2865
|
+
# @example Drop multiple columns by passing a list of column names.
|
2866
|
+
# df.drop(["bar", "ham"])
|
2867
|
+
# # =>
|
2868
|
+
# # shape: (3, 1)
|
2869
|
+
# # ┌─────┐
|
2870
|
+
# # │ foo │
|
2871
|
+
# # │ --- │
|
2872
|
+
# # │ i64 │
|
2873
|
+
# # ╞═════╡
|
2874
|
+
# # │ 1 │
|
2875
|
+
# # │ 2 │
|
2876
|
+
# # │ 3 │
|
2877
|
+
# # └─────┘
|
2878
|
+
#
|
2879
|
+
# @example Use positional arguments to drop multiple columns.
|
2880
|
+
# df.drop("foo", "ham")
|
2881
|
+
# # =>
|
2882
|
+
# # shape: (3, 1)
|
2883
|
+
# # ┌─────┐
|
2884
|
+
# # │ bar │
|
2885
|
+
# # │ --- │
|
2886
|
+
# # │ f64 │
|
2887
|
+
# # ╞═════╡
|
2888
|
+
# # │ 6.0 │
|
2889
|
+
# # │ 7.0 │
|
2890
|
+
# # │ 8.0 │
|
2891
|
+
# # └─────┘
|
2892
|
+
def drop(*columns)
|
2893
|
+
lazy.drop(*columns).collect(_eager: true)
|
2816
2894
|
end
|
2817
2895
|
|
2818
2896
|
# Drop in place.
|
@@ -2867,7 +2945,7 @@ module Polars
|
|
2867
2945
|
# "c" => [true, true, false, nil]
|
2868
2946
|
# }
|
2869
2947
|
# )
|
2870
|
-
# df.
|
2948
|
+
# df.clear
|
2871
2949
|
# # =>
|
2872
2950
|
# # shape: (0, 3)
|
2873
2951
|
# # ┌─────┬─────┬──────┐
|
@@ -2876,9 +2954,31 @@ module Polars
|
|
2876
2954
|
# # │ i64 ┆ f64 ┆ bool │
|
2877
2955
|
# # ╞═════╪═════╪══════╡
|
2878
2956
|
# # └─────┴─────┴──────┘
|
2879
|
-
|
2880
|
-
|
2957
|
+
#
|
2958
|
+
# @example
|
2959
|
+
# df.clear(2)
|
2960
|
+
# # =>
|
2961
|
+
# # shape: (2, 3)
|
2962
|
+
# # ┌──────┬──────┬──────┐
|
2963
|
+
# # │ a ┆ b ┆ c │
|
2964
|
+
# # │ --- ┆ --- ┆ --- │
|
2965
|
+
# # │ i64 ┆ f64 ┆ bool │
|
2966
|
+
# # ╞══════╪══════╪══════╡
|
2967
|
+
# # │ null ┆ null ┆ null │
|
2968
|
+
# # │ null ┆ null ┆ null │
|
2969
|
+
# # └──────┴──────┴──────┘
|
2970
|
+
def clear(n = 0)
|
2971
|
+
if n == 0
|
2972
|
+
_from_rbdf(_df.clear)
|
2973
|
+
elsif n > 0 || len > 0
|
2974
|
+
self.class.new(
|
2975
|
+
schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
|
2976
|
+
)
|
2977
|
+
else
|
2978
|
+
clone
|
2979
|
+
end
|
2881
2980
|
end
|
2981
|
+
alias_method :cleared, :clear
|
2882
2982
|
|
2883
2983
|
# clone handled by initialize_copy
|
2884
2984
|
|
@@ -3141,8 +3241,11 @@ module Polars
|
|
3141
3241
|
aggregate_expr = Polars.element.median._rbexpr
|
3142
3242
|
when "last"
|
3143
3243
|
aggregate_expr = Polars.element.last._rbexpr
|
3244
|
+
when "len"
|
3245
|
+
aggregate_expr = Polars.len._rbexpr
|
3144
3246
|
when "count"
|
3145
|
-
|
3247
|
+
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3248
|
+
aggregate_expr = Polars.len._rbexpr
|
3146
3249
|
else
|
3147
3250
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3148
3251
|
end
|
@@ -3154,9 +3257,9 @@ module Polars
|
|
3154
3257
|
|
3155
3258
|
_from_rbdf(
|
3156
3259
|
_df.pivot_expr(
|
3157
|
-
values,
|
3158
3260
|
index,
|
3159
3261
|
columns,
|
3262
|
+
values,
|
3160
3263
|
maintain_order,
|
3161
3264
|
sort_columns,
|
3162
3265
|
aggregate_expr,
|
@@ -3591,8 +3694,13 @@ module Polars
|
|
3591
3694
|
|
3592
3695
|
# Select columns from this DataFrame.
|
3593
3696
|
#
|
3594
|
-
# @param exprs [
|
3595
|
-
# Column
|
3697
|
+
# @param exprs [Array]
|
3698
|
+
# Column(s) to select, specified as positional arguments.
|
3699
|
+
# Accepts expression input. Strings are parsed as column names,
|
3700
|
+
# other non-expression inputs are parsed as literals.
|
3701
|
+
# @param named_exprs [Hash]
|
3702
|
+
# Additional columns to select, specified as keyword arguments.
|
3703
|
+
# The columns will be renamed to the keyword used.
|
3596
3704
|
#
|
3597
3705
|
# @return [DataFrame]
|
3598
3706
|
#
|
@@ -3672,23 +3780,25 @@ module Polars
|
|
3672
3780
|
# # │ 0 │
|
3673
3781
|
# # │ 10 │
|
3674
3782
|
# # └─────────┘
|
3675
|
-
def select(exprs)
|
3676
|
-
|
3677
|
-
lazy
|
3678
|
-
.select(exprs)
|
3679
|
-
.collect(no_optimization: true, string_cache: false)
|
3680
|
-
._df
|
3681
|
-
)
|
3783
|
+
def select(*exprs, **named_exprs)
|
3784
|
+
lazy.select(*exprs, **named_exprs).collect(_eager: true)
|
3682
3785
|
end
|
3683
3786
|
|
3684
|
-
# Add
|
3787
|
+
# Add columns to this DataFrame.
|
3788
|
+
#
|
3789
|
+
# Added columns will replace existing columns with the same name.
|
3685
3790
|
#
|
3686
3791
|
# @param exprs [Array]
|
3687
|
-
#
|
3792
|
+
# Column(s) to add, specified as positional arguments.
|
3793
|
+
# Accepts expression input. Strings are parsed as column names, other
|
3794
|
+
# non-expression inputs are parsed as literals.
|
3795
|
+
# @param named_exprs [Hash]
|
3796
|
+
# Additional columns to add, specified as keyword arguments.
|
3797
|
+
# The columns will be renamed to the keyword used.
|
3688
3798
|
#
|
3689
3799
|
# @return [DataFrame]
|
3690
3800
|
#
|
3691
|
-
# @example
|
3801
|
+
# @example Pass an expression to add it as a new column.
|
3692
3802
|
# df = Polars::DataFrame.new(
|
3693
3803
|
# {
|
3694
3804
|
# "a" => [1, 2, 3, 4],
|
@@ -3696,32 +3806,94 @@ module Polars
|
|
3696
3806
|
# "c" => [true, true, false, true]
|
3697
3807
|
# }
|
3698
3808
|
# )
|
3809
|
+
# df.with_columns((Polars.col("a") ** 2).alias("a^2"))
|
3810
|
+
# # =>
|
3811
|
+
# # shape: (4, 4)
|
3812
|
+
# # ┌─────┬──────┬───────┬─────┐
|
3813
|
+
# # │ a ┆ b ┆ c ┆ a^2 │
|
3814
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
3815
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 │
|
3816
|
+
# # ╞═════╪══════╪═══════╪═════╡
|
3817
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 │
|
3818
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 │
|
3819
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 │
|
3820
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 │
|
3821
|
+
# # └─────┴──────┴───────┴─────┘
|
3822
|
+
#
|
3823
|
+
# @example Added columns will replace existing columns with the same name.
|
3824
|
+
# df.with_columns(Polars.col("a").cast(Polars::Float64))
|
3825
|
+
# # =>
|
3826
|
+
# # shape: (4, 3)
|
3827
|
+
# # ┌─────┬──────┬───────┐
|
3828
|
+
# # │ a ┆ b ┆ c │
|
3829
|
+
# # │ --- ┆ --- ┆ --- │
|
3830
|
+
# # │ f64 ┆ f64 ┆ bool │
|
3831
|
+
# # ╞═════╪══════╪═══════╡
|
3832
|
+
# # │ 1.0 ┆ 0.5 ┆ true │
|
3833
|
+
# # │ 2.0 ┆ 4.0 ┆ true │
|
3834
|
+
# # │ 3.0 ┆ 10.0 ┆ false │
|
3835
|
+
# # │ 4.0 ┆ 13.0 ┆ true │
|
3836
|
+
# # └─────┴──────┴───────┘
|
3837
|
+
#
|
3838
|
+
# @example Multiple columns can be added by passing a list of expressions.
|
3699
3839
|
# df.with_columns(
|
3700
3840
|
# [
|
3701
3841
|
# (Polars.col("a") ** 2).alias("a^2"),
|
3702
3842
|
# (Polars.col("b") / 2).alias("b/2"),
|
3703
|
-
# (Polars.col("c").
|
3843
|
+
# (Polars.col("c").not_).alias("not c"),
|
3704
3844
|
# ]
|
3705
3845
|
# )
|
3706
3846
|
# # =>
|
3707
3847
|
# # shape: (4, 6)
|
3708
|
-
# #
|
3709
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
3710
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
3711
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
3712
|
-
# #
|
3713
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
3714
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
3715
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
3716
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
3717
|
-
# #
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3721
|
-
|
3722
|
-
|
3723
|
-
|
3724
|
-
|
3848
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
3849
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3850
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3851
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
3852
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
3853
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
3854
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
3855
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
3856
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
3857
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
3858
|
+
#
|
3859
|
+
# @example Multiple columns also can be added using positional arguments instead of a list.
|
3860
|
+
# df.with_columns(
|
3861
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
3862
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
3863
|
+
# (Polars.col("c").not_).alias("not c"),
|
3864
|
+
# )
|
3865
|
+
# # =>
|
3866
|
+
# # shape: (4, 6)
|
3867
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
3868
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3869
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3870
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
3871
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
3872
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
3873
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
3874
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
3875
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
3876
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
3877
|
+
#
|
3878
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
3879
|
+
# df.with_columns(
|
3880
|
+
# ab: Polars.col("a") * Polars.col("b"),
|
3881
|
+
# not_c: Polars.col("c").not_
|
3882
|
+
# )
|
3883
|
+
# # =>
|
3884
|
+
# # shape: (4, 5)
|
3885
|
+
# # ┌─────┬──────┬───────┬──────┬───────┐
|
3886
|
+
# # │ a ┆ b ┆ c ┆ ab ┆ not_c │
|
3887
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3888
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ bool │
|
3889
|
+
# # ╞═════╪══════╪═══════╪══════╪═══════╡
|
3890
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 0.5 ┆ false │
|
3891
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 8.0 ┆ false │
|
3892
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 30.0 ┆ true │
|
3893
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
|
3894
|
+
# # └─────┴──────┴───────┴──────┴───────┘
|
3895
|
+
def with_columns(*exprs, **named_exprs)
|
3896
|
+
lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
|
3725
3897
|
end
|
3726
3898
|
|
3727
3899
|
# Get number of chunks used by the ChunkedArrays of this DataFrame.
|
@@ -4363,7 +4535,7 @@ module Polars
|
|
4363
4535
|
# # null
|
4364
4536
|
# # ]
|
4365
4537
|
#
|
4366
|
-
# @example A horizontal boolean or, similar to a row-wise .any
|
4538
|
+
# @example A horizontal boolean or, similar to a row-wise .any:
|
4367
4539
|
# df = Polars::DataFrame.new(
|
4368
4540
|
# {
|
4369
4541
|
# "a" => [false, false, true],
|
@@ -4486,7 +4658,7 @@ module Polars
|
|
4486
4658
|
# # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
|
4487
4659
|
def rows(named: false)
|
4488
4660
|
if named
|
4489
|
-
columns = columns
|
4661
|
+
columns = self.columns
|
4490
4662
|
_df.row_tuples.map do |v|
|
4491
4663
|
columns.zip(v).to_h
|
4492
4664
|
end
|
@@ -4527,7 +4699,7 @@ module Polars
|
|
4527
4699
|
return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
|
4528
4700
|
|
4529
4701
|
# load into the local namespace for a modest performance boost in the hot loops
|
4530
|
-
columns = columns
|
4702
|
+
columns = self.columns
|
4531
4703
|
|
4532
4704
|
# note: buffering rows results in a 2-4x speedup over individual calls
|
4533
4705
|
# to ".row(i)", so it should only be disabled in extremely specific cases.
|
@@ -4764,13 +4936,51 @@ module Polars
|
|
4764
4936
|
_from_rbdf(_df.unnest(names))
|
4765
4937
|
end
|
4766
4938
|
|
4767
|
-
#
|
4939
|
+
# Requires NumPy
|
4768
4940
|
# def corr
|
4769
4941
|
# end
|
4770
4942
|
|
4771
|
-
#
|
4772
|
-
#
|
4773
|
-
#
|
4943
|
+
# Take two sorted DataFrames and merge them by the sorted key.
|
4944
|
+
#
|
4945
|
+
# The output of this operation will also be sorted.
|
4946
|
+
# It is the callers responsibility that the frames are sorted
|
4947
|
+
# by that key otherwise the output will not make sense.
|
4948
|
+
#
|
4949
|
+
# The schemas of both DataFrames must be equal.
|
4950
|
+
#
|
4951
|
+
# @param other [DataFrame]
|
4952
|
+
# Other DataFrame that must be merged
|
4953
|
+
# @param key [String]
|
4954
|
+
# Key that is sorted.
|
4955
|
+
#
|
4956
|
+
# @return [DataFrame]
|
4957
|
+
#
|
4958
|
+
# @example
|
4959
|
+
# df0 = Polars::DataFrame.new(
|
4960
|
+
# {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
|
4961
|
+
# ).sort("age")
|
4962
|
+
# df1 = Polars::DataFrame.new(
|
4963
|
+
# {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
|
4964
|
+
# ).sort("age")
|
4965
|
+
# df0.merge_sorted(df1, "age")
|
4966
|
+
# # =>
|
4967
|
+
# # shape: (7, 2)
|
4968
|
+
# # ┌────────┬─────┐
|
4969
|
+
# # │ name ┆ age │
|
4970
|
+
# # │ --- ┆ --- │
|
4971
|
+
# # │ str ┆ i64 │
|
4972
|
+
# # ╞════════╪═════╡
|
4973
|
+
# # │ bob ┆ 18 │
|
4974
|
+
# # │ thomas ┆ 20 │
|
4975
|
+
# # │ anna ┆ 21 │
|
4976
|
+
# # │ megan ┆ 33 │
|
4977
|
+
# # │ steve ┆ 42 │
|
4978
|
+
# # │ steve ┆ 42 │
|
4979
|
+
# # │ elise ┆ 44 │
|
4980
|
+
# # └────────┴─────┘
|
4981
|
+
def merge_sorted(other, key)
|
4982
|
+
lazy.merge_sorted(other.lazy, key).collect(_eager: true)
|
4983
|
+
end
|
4774
4984
|
|
4775
4985
|
# Indicate that one or multiple columns are sorted.
|
4776
4986
|
#
|
@@ -4812,7 +5022,7 @@ module Polars
|
|
4812
5022
|
end
|
4813
5023
|
|
4814
5024
|
def _pos_idxs(idxs, dim)
|
4815
|
-
idx_type =
|
5025
|
+
idx_type = Plr.get_index_type
|
4816
5026
|
|
4817
5027
|
if idxs.is_a?(Series)
|
4818
5028
|
if idxs.dtype == idx_type
|
@@ -5045,14 +5255,14 @@ module Polars
|
|
5045
5255
|
elsif data[0].is_a?(Hash)
|
5046
5256
|
column_names, dtypes = _unpack_schema(columns)
|
5047
5257
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5048
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
5258
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
5049
5259
|
if column_names
|
5050
5260
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5051
5261
|
end
|
5052
5262
|
return rbdf
|
5053
5263
|
elsif data[0].is_a?(::Array)
|
5264
|
+
first_element = data[0]
|
5054
5265
|
if orient.nil? && !columns.nil?
|
5055
|
-
first_element = data[0]
|
5056
5266
|
row_types = first_element.filter_map { |value| value.class }.uniq
|
5057
5267
|
if row_types.include?(Integer) && row_types.include?(Float)
|
5058
5268
|
row_types.delete(Integer)
|