polars-df 0.8.0-aarch64-linux → 0.10.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE-THIRD-PARTY.txt +3112 -1613
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +34 -6
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/lib/polars/data_frame.rb
CHANGED
@@ -47,8 +47,8 @@ module Polars
|
|
47
47
|
end
|
48
48
|
|
49
49
|
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
|
50
|
+
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
52
|
_from_rbdf(rbdf)
|
53
53
|
end
|
54
54
|
|
@@ -91,7 +91,8 @@ module Polars
|
|
91
91
|
row_count_name: nil,
|
92
92
|
row_count_offset: 0,
|
93
93
|
sample_size: 1024,
|
94
|
-
eol_char: "\n"
|
94
|
+
eol_char: "\n",
|
95
|
+
truncate_ragged_lines: false
|
95
96
|
)
|
96
97
|
if Utils.pathlike?(file)
|
97
98
|
path = Utils.normalise_filepath(file)
|
@@ -147,7 +148,8 @@ module Polars
|
|
147
148
|
skip_rows_after_header: skip_rows_after_header,
|
148
149
|
row_count_name: row_count_name,
|
149
150
|
row_count_offset: row_count_offset,
|
150
|
-
eol_char: eol_char
|
151
|
+
eol_char: eol_char,
|
152
|
+
truncate_ragged_lines: truncate_ragged_lines
|
151
153
|
)
|
152
154
|
if columns.nil?
|
153
155
|
return _from_rbdf(scan.collect._df)
|
@@ -186,7 +188,8 @@ module Polars
|
|
186
188
|
skip_rows_after_header,
|
187
189
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
188
190
|
sample_size,
|
189
|
-
eol_char
|
191
|
+
eol_char,
|
192
|
+
truncate_ragged_lines
|
190
193
|
)
|
191
194
|
)
|
192
195
|
end
|
@@ -622,7 +625,7 @@ module Polars
|
|
622
625
|
# select single column
|
623
626
|
# df["foo"]
|
624
627
|
if item.is_a?(::String) || item.is_a?(Symbol)
|
625
|
-
return Utils.wrap_s(_df.
|
628
|
+
return Utils.wrap_s(_df.get_column(item.to_s))
|
626
629
|
end
|
627
630
|
|
628
631
|
# df[idx]
|
@@ -814,8 +817,6 @@ module Polars
|
|
814
817
|
|
815
818
|
# Serialize to JSON representation.
|
816
819
|
#
|
817
|
-
# @return [nil]
|
818
|
-
#
|
819
820
|
# @param file [String]
|
820
821
|
# File path to which the result should be written.
|
821
822
|
# @param pretty [Boolean]
|
@@ -823,17 +824,45 @@ module Polars
|
|
823
824
|
# @param row_oriented [Boolean]
|
824
825
|
# Write to row oriented json. This is slower, but more common.
|
825
826
|
#
|
826
|
-
# @
|
827
|
+
# @return [nil]
|
828
|
+
#
|
829
|
+
# @example
|
830
|
+
# df = Polars::DataFrame.new(
|
831
|
+
# {
|
832
|
+
# "foo" => [1, 2, 3],
|
833
|
+
# "bar" => [6, 7, 8]
|
834
|
+
# }
|
835
|
+
# )
|
836
|
+
# df.write_json
|
837
|
+
# # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
|
838
|
+
#
|
839
|
+
# @example
|
840
|
+
# df.write_json(row_oriented: true)
|
841
|
+
# # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
|
827
842
|
def write_json(
|
828
|
-
file,
|
843
|
+
file = nil,
|
829
844
|
pretty: false,
|
830
845
|
row_oriented: false
|
831
846
|
)
|
832
847
|
if Utils.pathlike?(file)
|
833
848
|
file = Utils.normalise_filepath(file)
|
834
849
|
end
|
835
|
-
|
836
|
-
|
850
|
+
to_string_io = !file.nil? && file.is_a?(StringIO)
|
851
|
+
if file.nil? || to_string_io
|
852
|
+
buf = StringIO.new
|
853
|
+
buf.set_encoding(Encoding::BINARY)
|
854
|
+
_df.write_json(buf, pretty, row_oriented)
|
855
|
+
json_bytes = buf.string
|
856
|
+
|
857
|
+
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
858
|
+
if to_string_io
|
859
|
+
file.write(json_str)
|
860
|
+
else
|
861
|
+
return json_str
|
862
|
+
end
|
863
|
+
else
|
864
|
+
_df.write_json(file, pretty, row_oriented)
|
865
|
+
end
|
837
866
|
nil
|
838
867
|
end
|
839
868
|
|
@@ -843,12 +872,36 @@ module Polars
|
|
843
872
|
# File path to which the result should be written.
|
844
873
|
#
|
845
874
|
# @return [nil]
|
846
|
-
|
875
|
+
#
|
876
|
+
# @example
|
877
|
+
# df = Polars::DataFrame.new(
|
878
|
+
# {
|
879
|
+
# "foo" => [1, 2, 3],
|
880
|
+
# "bar" => [6, 7, 8]
|
881
|
+
# }
|
882
|
+
# )
|
883
|
+
# df.write_ndjson()
|
884
|
+
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
885
|
+
def write_ndjson(file = nil)
|
847
886
|
if Utils.pathlike?(file)
|
848
887
|
file = Utils.normalise_filepath(file)
|
849
888
|
end
|
850
|
-
|
851
|
-
|
889
|
+
to_string_io = !file.nil? && file.is_a?(StringIO)
|
890
|
+
if file.nil? || to_string_io
|
891
|
+
buf = StringIO.new
|
892
|
+
buf.set_encoding(Encoding::BINARY)
|
893
|
+
_df.write_ndjson(buf)
|
894
|
+
json_bytes = buf.string
|
895
|
+
|
896
|
+
json_str = json_bytes.force_encoding(Encoding::UTF_8)
|
897
|
+
if to_string_io
|
898
|
+
file.write(json_str)
|
899
|
+
else
|
900
|
+
return json_str
|
901
|
+
end
|
902
|
+
else
|
903
|
+
_df.write_ndjson(file)
|
904
|
+
end
|
852
905
|
nil
|
853
906
|
end
|
854
907
|
|
@@ -1010,7 +1063,7 @@ module Polars
|
|
1010
1063
|
|
1011
1064
|
# Write to Apache Parquet file.
|
1012
1065
|
#
|
1013
|
-
# @param file [String]
|
1066
|
+
# @param file [String, Pathname, StringIO]
|
1014
1067
|
# File path to which the file should be written.
|
1015
1068
|
# @param compression ["lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"]
|
1016
1069
|
# Choose "zstd" for good compression performance.
|
@@ -1027,10 +1080,9 @@ module Polars
|
|
1027
1080
|
# @param statistics [Boolean]
|
1028
1081
|
# Write statistics to the parquet headers. This requires extra compute.
|
1029
1082
|
# @param row_group_size [Integer, nil]
|
1030
|
-
# Size of the row groups in number of rows.
|
1031
|
-
#
|
1032
|
-
#
|
1033
|
-
# writing speeds.
|
1083
|
+
# Size of the row groups in number of rows. Defaults to 512^2 rows.
|
1084
|
+
# @param data_page_size [Integer, nil]
|
1085
|
+
# Size of the data page in bytes. Defaults to 1024^2 bytes.
|
1034
1086
|
#
|
1035
1087
|
# @return [nil]
|
1036
1088
|
def write_parquet(
|
@@ -1038,7 +1090,8 @@ module Polars
|
|
1038
1090
|
compression: "zstd",
|
1039
1091
|
compression_level: nil,
|
1040
1092
|
statistics: false,
|
1041
|
-
row_group_size: nil
|
1093
|
+
row_group_size: nil,
|
1094
|
+
data_page_size: nil
|
1042
1095
|
)
|
1043
1096
|
if compression.nil?
|
1044
1097
|
compression = "uncompressed"
|
@@ -1048,7 +1101,7 @@ module Polars
|
|
1048
1101
|
end
|
1049
1102
|
|
1050
1103
|
_df.write_parquet(
|
1051
|
-
file, compression, compression_level, statistics, row_group_size
|
1104
|
+
file, compression, compression_level, statistics, row_group_size, data_page_size
|
1052
1105
|
)
|
1053
1106
|
end
|
1054
1107
|
|
@@ -1084,7 +1137,7 @@ module Polars
|
|
1084
1137
|
# df.estimated_size
|
1085
1138
|
# # => 25888898
|
1086
1139
|
# df.estimated_size("mb")
|
1087
|
-
# # =>
|
1140
|
+
# # => 17.0601749420166
|
1088
1141
|
def estimated_size(unit = "b")
|
1089
1142
|
sz = _df.estimated_size
|
1090
1143
|
Utils.scale_bytes(sz, to: unit)
|
@@ -1782,7 +1835,7 @@ module Polars
|
|
1782
1835
|
# "b" => [2, 4, 6]
|
1783
1836
|
# }
|
1784
1837
|
# )
|
1785
|
-
# df.
|
1838
|
+
# df.with_row_index
|
1786
1839
|
# # =>
|
1787
1840
|
# # shape: (3, 3)
|
1788
1841
|
# # ┌────────┬─────┬─────┐
|
@@ -1794,9 +1847,10 @@ module Polars
|
|
1794
1847
|
# # │ 1 ┆ 3 ┆ 4 │
|
1795
1848
|
# # │ 2 ┆ 5 ┆ 6 │
|
1796
1849
|
# # └────────┴─────┴─────┘
|
1797
|
-
def
|
1798
|
-
_from_rbdf(_df.
|
1850
|
+
def with_row_index(name: "row_nr", offset: 0)
|
1851
|
+
_from_rbdf(_df.with_row_index(name, offset))
|
1799
1852
|
end
|
1853
|
+
alias_method :with_row_count, :with_row_index
|
1800
1854
|
|
1801
1855
|
# Start a group by operation.
|
1802
1856
|
#
|
@@ -2160,12 +2214,13 @@ module Polars
|
|
2160
2214
|
# closed: "right"
|
2161
2215
|
# ).agg(Polars.col("A").alias("A_agg_list"))
|
2162
2216
|
# # =>
|
2163
|
-
# # shape: (
|
2217
|
+
# # shape: (4, 4)
|
2164
2218
|
# # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
|
2165
2219
|
# # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list │
|
2166
2220
|
# # │ --- ┆ --- ┆ --- ┆ --- │
|
2167
2221
|
# # │ i64 ┆ i64 ┆ i64 ┆ list[str] │
|
2168
2222
|
# # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
|
2223
|
+
# # │ -2 ┆ 1 ┆ -2 ┆ ["A", "A"] │
|
2169
2224
|
# # │ 0 ┆ 3 ┆ 0 ┆ ["A", "B", "B"] │
|
2170
2225
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2171
2226
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
@@ -2433,6 +2488,8 @@ module Polars
|
|
2433
2488
|
# Join strategy.
|
2434
2489
|
# @param suffix [String]
|
2435
2490
|
# Suffix to append to columns with a duplicate name.
|
2491
|
+
# @param join_nulls [Boolean]
|
2492
|
+
# Join on null values. By default null values will never produce matches.
|
2436
2493
|
#
|
2437
2494
|
# @return [DataFrame]
|
2438
2495
|
#
|
@@ -2515,7 +2572,7 @@ module Polars
|
|
2515
2572
|
# # ╞═════╪═════╪═════╡
|
2516
2573
|
# # │ 3 ┆ 8.0 ┆ c │
|
2517
2574
|
# # └─────┴─────┴─────┘
|
2518
|
-
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
|
2575
|
+
def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right", join_nulls: false)
|
2519
2576
|
lazy
|
2520
2577
|
.join(
|
2521
2578
|
other.lazy,
|
@@ -2524,6 +2581,7 @@ module Polars
|
|
2524
2581
|
on: on,
|
2525
2582
|
how: how,
|
2526
2583
|
suffix: suffix,
|
2584
|
+
join_nulls: join_nulls
|
2527
2585
|
)
|
2528
2586
|
.collect(no_optimization: true)
|
2529
2587
|
end
|
@@ -2617,26 +2675,26 @@ module Polars
|
|
2617
2675
|
# # ┌─────┬─────┬───────────┐
|
2618
2676
|
# # │ a ┆ b ┆ b_squared │
|
2619
2677
|
# # │ --- ┆ --- ┆ --- │
|
2620
|
-
# # │ i64 ┆ i64 ┆
|
2678
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
2621
2679
|
# # ╞═════╪═════╪═══════════╡
|
2622
|
-
# # │ 1 ┆ 2 ┆ 4
|
2623
|
-
# # │ 3 ┆ 4 ┆ 16
|
2624
|
-
# # │ 5 ┆ 6 ┆ 36
|
2680
|
+
# # │ 1 ┆ 2 ┆ 4 │
|
2681
|
+
# # │ 3 ┆ 4 ┆ 16 │
|
2682
|
+
# # │ 5 ┆ 6 ┆ 36 │
|
2625
2683
|
# # └─────┴─────┴───────────┘
|
2626
2684
|
#
|
2627
2685
|
# @example Replaced
|
2628
2686
|
# df.with_column(Polars.col("a") ** 2)
|
2629
2687
|
# # =>
|
2630
2688
|
# # shape: (3, 2)
|
2631
|
-
# #
|
2632
|
-
# # │ a
|
2633
|
-
# # │ ---
|
2634
|
-
# # │
|
2635
|
-
# #
|
2636
|
-
# # │ 1
|
2637
|
-
# # │ 9
|
2638
|
-
# # │ 25
|
2639
|
-
# #
|
2689
|
+
# # ┌─────┬─────┐
|
2690
|
+
# # │ a ┆ b │
|
2691
|
+
# # │ --- ┆ --- │
|
2692
|
+
# # │ i64 ┆ i64 │
|
2693
|
+
# # ╞═════╪═════╡
|
2694
|
+
# # │ 1 ┆ 2 │
|
2695
|
+
# # │ 9 ┆ 4 │
|
2696
|
+
# # │ 25 ┆ 6 │
|
2697
|
+
# # └─────┴─────┘
|
2640
2698
|
def with_column(column)
|
2641
2699
|
lazy
|
2642
2700
|
.with_column(column)
|
@@ -2803,16 +2861,36 @@ module Polars
|
|
2803
2861
|
# # │ 2 ┆ 7.0 │
|
2804
2862
|
# # │ 3 ┆ 8.0 │
|
2805
2863
|
# # └─────┴─────┘
|
2806
|
-
|
2807
|
-
|
2808
|
-
|
2809
|
-
|
2810
|
-
|
2811
|
-
|
2812
|
-
|
2813
|
-
|
2814
|
-
|
2815
|
-
|
2864
|
+
#
|
2865
|
+
# @example Drop multiple columns by passing a list of column names.
|
2866
|
+
# df.drop(["bar", "ham"])
|
2867
|
+
# # =>
|
2868
|
+
# # shape: (3, 1)
|
2869
|
+
# # ┌─────┐
|
2870
|
+
# # │ foo │
|
2871
|
+
# # │ --- │
|
2872
|
+
# # │ i64 │
|
2873
|
+
# # ╞═════╡
|
2874
|
+
# # │ 1 │
|
2875
|
+
# # │ 2 │
|
2876
|
+
# # │ 3 │
|
2877
|
+
# # └─────┘
|
2878
|
+
#
|
2879
|
+
# @example Use positional arguments to drop multiple columns.
|
2880
|
+
# df.drop("foo", "ham")
|
2881
|
+
# # =>
|
2882
|
+
# # shape: (3, 1)
|
2883
|
+
# # ┌─────┐
|
2884
|
+
# # │ bar │
|
2885
|
+
# # │ --- │
|
2886
|
+
# # │ f64 │
|
2887
|
+
# # ╞═════╡
|
2888
|
+
# # │ 6.0 │
|
2889
|
+
# # │ 7.0 │
|
2890
|
+
# # │ 8.0 │
|
2891
|
+
# # └─────┘
|
2892
|
+
def drop(*columns)
|
2893
|
+
lazy.drop(*columns).collect(_eager: true)
|
2816
2894
|
end
|
2817
2895
|
|
2818
2896
|
# Drop in place.
|
@@ -2867,7 +2945,7 @@ module Polars
|
|
2867
2945
|
# "c" => [true, true, false, nil]
|
2868
2946
|
# }
|
2869
2947
|
# )
|
2870
|
-
# df.
|
2948
|
+
# df.clear
|
2871
2949
|
# # =>
|
2872
2950
|
# # shape: (0, 3)
|
2873
2951
|
# # ┌─────┬─────┬──────┐
|
@@ -2876,9 +2954,31 @@ module Polars
|
|
2876
2954
|
# # │ i64 ┆ f64 ┆ bool │
|
2877
2955
|
# # ╞═════╪═════╪══════╡
|
2878
2956
|
# # └─────┴─────┴──────┘
|
2879
|
-
|
2880
|
-
|
2957
|
+
#
|
2958
|
+
# @example
|
2959
|
+
# df.clear(2)
|
2960
|
+
# # =>
|
2961
|
+
# # shape: (2, 3)
|
2962
|
+
# # ┌──────┬──────┬──────┐
|
2963
|
+
# # │ a ┆ b ┆ c │
|
2964
|
+
# # │ --- ┆ --- ┆ --- │
|
2965
|
+
# # │ i64 ┆ f64 ┆ bool │
|
2966
|
+
# # ╞══════╪══════╪══════╡
|
2967
|
+
# # │ null ┆ null ┆ null │
|
2968
|
+
# # │ null ┆ null ┆ null │
|
2969
|
+
# # └──────┴──────┴──────┘
|
2970
|
+
def clear(n = 0)
|
2971
|
+
if n == 0
|
2972
|
+
_from_rbdf(_df.clear)
|
2973
|
+
elsif n > 0 || len > 0
|
2974
|
+
self.class.new(
|
2975
|
+
schema.to_h { |nm, tp| [nm, Series.new(nm, [], dtype: tp).extend_constant(nil, n)] }
|
2976
|
+
)
|
2977
|
+
else
|
2978
|
+
clone
|
2979
|
+
end
|
2881
2980
|
end
|
2981
|
+
alias_method :cleared, :clear
|
2882
2982
|
|
2883
2983
|
# clone handled by initialize_copy
|
2884
2984
|
|
@@ -3141,8 +3241,11 @@ module Polars
|
|
3141
3241
|
aggregate_expr = Polars.element.median._rbexpr
|
3142
3242
|
when "last"
|
3143
3243
|
aggregate_expr = Polars.element.last._rbexpr
|
3244
|
+
when "len"
|
3245
|
+
aggregate_expr = Polars.len._rbexpr
|
3144
3246
|
when "count"
|
3145
|
-
|
3247
|
+
warn "`aggregate_function: \"count\"` input for `pivot` is deprecated. Use `aggregate_function: \"len\"` instead."
|
3248
|
+
aggregate_expr = Polars.len._rbexpr
|
3146
3249
|
else
|
3147
3250
|
raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
|
3148
3251
|
end
|
@@ -3154,9 +3257,9 @@ module Polars
|
|
3154
3257
|
|
3155
3258
|
_from_rbdf(
|
3156
3259
|
_df.pivot_expr(
|
3157
|
-
values,
|
3158
3260
|
index,
|
3159
3261
|
columns,
|
3262
|
+
values,
|
3160
3263
|
maintain_order,
|
3161
3264
|
sort_columns,
|
3162
3265
|
aggregate_expr,
|
@@ -3591,8 +3694,13 @@ module Polars
|
|
3591
3694
|
|
3592
3695
|
# Select columns from this DataFrame.
|
3593
3696
|
#
|
3594
|
-
# @param exprs [
|
3595
|
-
# Column
|
3697
|
+
# @param exprs [Array]
|
3698
|
+
# Column(s) to select, specified as positional arguments.
|
3699
|
+
# Accepts expression input. Strings are parsed as column names,
|
3700
|
+
# other non-expression inputs are parsed as literals.
|
3701
|
+
# @param named_exprs [Hash]
|
3702
|
+
# Additional columns to select, specified as keyword arguments.
|
3703
|
+
# The columns will be renamed to the keyword used.
|
3596
3704
|
#
|
3597
3705
|
# @return [DataFrame]
|
3598
3706
|
#
|
@@ -3672,23 +3780,25 @@ module Polars
|
|
3672
3780
|
# # │ 0 │
|
3673
3781
|
# # │ 10 │
|
3674
3782
|
# # └─────────┘
|
3675
|
-
def select(exprs)
|
3676
|
-
|
3677
|
-
lazy
|
3678
|
-
.select(exprs)
|
3679
|
-
.collect(no_optimization: true, string_cache: false)
|
3680
|
-
._df
|
3681
|
-
)
|
3783
|
+
def select(*exprs, **named_exprs)
|
3784
|
+
lazy.select(*exprs, **named_exprs).collect(_eager: true)
|
3682
3785
|
end
|
3683
3786
|
|
3684
|
-
# Add
|
3787
|
+
# Add columns to this DataFrame.
|
3788
|
+
#
|
3789
|
+
# Added columns will replace existing columns with the same name.
|
3685
3790
|
#
|
3686
3791
|
# @param exprs [Array]
|
3687
|
-
#
|
3792
|
+
# Column(s) to add, specified as positional arguments.
|
3793
|
+
# Accepts expression input. Strings are parsed as column names, other
|
3794
|
+
# non-expression inputs are parsed as literals.
|
3795
|
+
# @param named_exprs [Hash]
|
3796
|
+
# Additional columns to add, specified as keyword arguments.
|
3797
|
+
# The columns will be renamed to the keyword used.
|
3688
3798
|
#
|
3689
3799
|
# @return [DataFrame]
|
3690
3800
|
#
|
3691
|
-
# @example
|
3801
|
+
# @example Pass an expression to add it as a new column.
|
3692
3802
|
# df = Polars::DataFrame.new(
|
3693
3803
|
# {
|
3694
3804
|
# "a" => [1, 2, 3, 4],
|
@@ -3696,32 +3806,94 @@ module Polars
|
|
3696
3806
|
# "c" => [true, true, false, true]
|
3697
3807
|
# }
|
3698
3808
|
# )
|
3809
|
+
# df.with_columns((Polars.col("a") ** 2).alias("a^2"))
|
3810
|
+
# # =>
|
3811
|
+
# # shape: (4, 4)
|
3812
|
+
# # ┌─────┬──────┬───────┬─────┐
|
3813
|
+
# # │ a ┆ b ┆ c ┆ a^2 │
|
3814
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
3815
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 │
|
3816
|
+
# # ╞═════╪══════╪═══════╪═════╡
|
3817
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 │
|
3818
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 │
|
3819
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 │
|
3820
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 │
|
3821
|
+
# # └─────┴──────┴───────┴─────┘
|
3822
|
+
#
|
3823
|
+
# @example Added columns will replace existing columns with the same name.
|
3824
|
+
# df.with_columns(Polars.col("a").cast(Polars::Float64))
|
3825
|
+
# # =>
|
3826
|
+
# # shape: (4, 3)
|
3827
|
+
# # ┌─────┬──────┬───────┐
|
3828
|
+
# # │ a ┆ b ┆ c │
|
3829
|
+
# # │ --- ┆ --- ┆ --- │
|
3830
|
+
# # │ f64 ┆ f64 ┆ bool │
|
3831
|
+
# # ╞═════╪══════╪═══════╡
|
3832
|
+
# # │ 1.0 ┆ 0.5 ┆ true │
|
3833
|
+
# # │ 2.0 ┆ 4.0 ┆ true │
|
3834
|
+
# # │ 3.0 ┆ 10.0 ┆ false │
|
3835
|
+
# # │ 4.0 ┆ 13.0 ┆ true │
|
3836
|
+
# # └─────┴──────┴───────┘
|
3837
|
+
#
|
3838
|
+
# @example Multiple columns can be added by passing a list of expressions.
|
3699
3839
|
# df.with_columns(
|
3700
3840
|
# [
|
3701
3841
|
# (Polars.col("a") ** 2).alias("a^2"),
|
3702
3842
|
# (Polars.col("b") / 2).alias("b/2"),
|
3703
|
-
# (Polars.col("c").
|
3843
|
+
# (Polars.col("c").not_).alias("not c"),
|
3704
3844
|
# ]
|
3705
3845
|
# )
|
3706
3846
|
# # =>
|
3707
3847
|
# # shape: (4, 6)
|
3708
|
-
# #
|
3709
|
-
# # │ a ┆ b ┆ c ┆ a^2
|
3710
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
3711
|
-
# # │ i64 ┆ f64 ┆ bool ┆
|
3712
|
-
# #
|
3713
|
-
# # │ 1 ┆ 0.5 ┆ true ┆ 1
|
3714
|
-
# # │ 2 ┆ 4.0 ┆ true ┆ 4
|
3715
|
-
# # │ 3 ┆ 10.0 ┆ false ┆ 9
|
3716
|
-
# # │ 4 ┆ 13.0 ┆ true ┆ 16
|
3717
|
-
# #
|
3718
|
-
|
3719
|
-
|
3720
|
-
|
3721
|
-
|
3722
|
-
|
3723
|
-
|
3724
|
-
|
3848
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
3849
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3850
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3851
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
3852
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
3853
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
3854
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
3855
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
3856
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
3857
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
3858
|
+
#
|
3859
|
+
# @example Multiple columns also can be added using positional arguments instead of a list.
|
3860
|
+
# df.with_columns(
|
3861
|
+
# (Polars.col("a") ** 2).alias("a^2"),
|
3862
|
+
# (Polars.col("b") / 2).alias("b/2"),
|
3863
|
+
# (Polars.col("c").not_).alias("not c"),
|
3864
|
+
# )
|
3865
|
+
# # =>
|
3866
|
+
# # shape: (4, 6)
|
3867
|
+
# # ┌─────┬──────┬───────┬─────┬──────┬───────┐
|
3868
|
+
# # │ a ┆ b ┆ c ┆ a^2 ┆ b/2 ┆ not c │
|
3869
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3870
|
+
# # │ i64 ┆ f64 ┆ bool ┆ i64 ┆ f64 ┆ bool │
|
3871
|
+
# # ╞═════╪══════╪═══════╪═════╪══════╪═══════╡
|
3872
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 1 ┆ 0.25 ┆ false │
|
3873
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 4 ┆ 2.0 ┆ false │
|
3874
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 9 ┆ 5.0 ┆ true │
|
3875
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 16 ┆ 6.5 ┆ false │
|
3876
|
+
# # └─────┴──────┴───────┴─────┴──────┴───────┘
|
3877
|
+
#
|
3878
|
+
# @example Use keyword arguments to easily name your expression inputs.
|
3879
|
+
# df.with_columns(
|
3880
|
+
# ab: Polars.col("a") * Polars.col("b"),
|
3881
|
+
# not_c: Polars.col("c").not_
|
3882
|
+
# )
|
3883
|
+
# # =>
|
3884
|
+
# # shape: (4, 5)
|
3885
|
+
# # ┌─────┬──────┬───────┬──────┬───────┐
|
3886
|
+
# # │ a ┆ b ┆ c ┆ ab ┆ not_c │
|
3887
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
3888
|
+
# # │ i64 ┆ f64 ┆ bool ┆ f64 ┆ bool │
|
3889
|
+
# # ╞═════╪══════╪═══════╪══════╪═══════╡
|
3890
|
+
# # │ 1 ┆ 0.5 ┆ true ┆ 0.5 ┆ false │
|
3891
|
+
# # │ 2 ┆ 4.0 ┆ true ┆ 8.0 ┆ false │
|
3892
|
+
# # │ 3 ┆ 10.0 ┆ false ┆ 30.0 ┆ true │
|
3893
|
+
# # │ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
|
3894
|
+
# # └─────┴──────┴───────┴──────┴───────┘
|
3895
|
+
def with_columns(*exprs, **named_exprs)
|
3896
|
+
lazy.with_columns(*exprs, **named_exprs).collect(_eager: true)
|
3725
3897
|
end
|
3726
3898
|
|
3727
3899
|
# Get number of chunks used by the ChunkedArrays of this DataFrame.
|
@@ -4363,7 +4535,7 @@ module Polars
|
|
4363
4535
|
# # null
|
4364
4536
|
# # ]
|
4365
4537
|
#
|
4366
|
-
# @example A horizontal boolean or, similar to a row-wise .any
|
4538
|
+
# @example A horizontal boolean or, similar to a row-wise .any:
|
4367
4539
|
# df = Polars::DataFrame.new(
|
4368
4540
|
# {
|
4369
4541
|
# "a" => [false, false, true],
|
@@ -4486,7 +4658,7 @@ module Polars
|
|
4486
4658
|
# # => [{"a"=>1, "b"=>2}, {"a"=>3, "b"=>4}, {"a"=>5, "b"=>6}]
|
4487
4659
|
def rows(named: false)
|
4488
4660
|
if named
|
4489
|
-
columns = columns
|
4661
|
+
columns = self.columns
|
4490
4662
|
_df.row_tuples.map do |v|
|
4491
4663
|
columns.zip(v).to_h
|
4492
4664
|
end
|
@@ -4527,7 +4699,7 @@ module Polars
|
|
4527
4699
|
return to_enum(:iter_rows, named: named, buffer_size: buffer_size) unless block_given?
|
4528
4700
|
|
4529
4701
|
# load into the local namespace for a modest performance boost in the hot loops
|
4530
|
-
columns = columns
|
4702
|
+
columns = self.columns
|
4531
4703
|
|
4532
4704
|
# note: buffering rows results in a 2-4x speedup over individual calls
|
4533
4705
|
# to ".row(i)", so it should only be disabled in extremely specific cases.
|
@@ -4764,13 +4936,51 @@ module Polars
|
|
4764
4936
|
_from_rbdf(_df.unnest(names))
|
4765
4937
|
end
|
4766
4938
|
|
4767
|
-
#
|
4939
|
+
# Requires NumPy
|
4768
4940
|
# def corr
|
4769
4941
|
# end
|
4770
4942
|
|
4771
|
-
#
|
4772
|
-
#
|
4773
|
-
#
|
4943
|
+
# Take two sorted DataFrames and merge them by the sorted key.
|
4944
|
+
#
|
4945
|
+
# The output of this operation will also be sorted.
|
4946
|
+
# It is the callers responsibility that the frames are sorted
|
4947
|
+
# by that key otherwise the output will not make sense.
|
4948
|
+
#
|
4949
|
+
# The schemas of both DataFrames must be equal.
|
4950
|
+
#
|
4951
|
+
# @param other [DataFrame]
|
4952
|
+
# Other DataFrame that must be merged
|
4953
|
+
# @param key [String]
|
4954
|
+
# Key that is sorted.
|
4955
|
+
#
|
4956
|
+
# @return [DataFrame]
|
4957
|
+
#
|
4958
|
+
# @example
|
4959
|
+
# df0 = Polars::DataFrame.new(
|
4960
|
+
# {"name" => ["steve", "elise", "bob"], "age" => [42, 44, 18]}
|
4961
|
+
# ).sort("age")
|
4962
|
+
# df1 = Polars::DataFrame.new(
|
4963
|
+
# {"name" => ["anna", "megan", "steve", "thomas"], "age" => [21, 33, 42, 20]}
|
4964
|
+
# ).sort("age")
|
4965
|
+
# df0.merge_sorted(df1, "age")
|
4966
|
+
# # =>
|
4967
|
+
# # shape: (7, 2)
|
4968
|
+
# # ┌────────┬─────┐
|
4969
|
+
# # │ name ┆ age │
|
4970
|
+
# # │ --- ┆ --- │
|
4971
|
+
# # │ str ┆ i64 │
|
4972
|
+
# # ╞════════╪═════╡
|
4973
|
+
# # │ bob ┆ 18 │
|
4974
|
+
# # │ thomas ┆ 20 │
|
4975
|
+
# # │ anna ┆ 21 │
|
4976
|
+
# # │ megan ┆ 33 │
|
4977
|
+
# # │ steve ┆ 42 │
|
4978
|
+
# # │ steve ┆ 42 │
|
4979
|
+
# # │ elise ┆ 44 │
|
4980
|
+
# # └────────┴─────┘
|
4981
|
+
def merge_sorted(other, key)
|
4982
|
+
lazy.merge_sorted(other.lazy, key).collect(_eager: true)
|
4983
|
+
end
|
4774
4984
|
|
4775
4985
|
# Indicate that one or multiple columns are sorted.
|
4776
4986
|
#
|
@@ -4812,7 +5022,7 @@ module Polars
|
|
4812
5022
|
end
|
4813
5023
|
|
4814
5024
|
def _pos_idxs(idxs, dim)
|
4815
|
-
idx_type =
|
5025
|
+
idx_type = Plr.get_index_type
|
4816
5026
|
|
4817
5027
|
if idxs.is_a?(Series)
|
4818
5028
|
if idxs.dtype == idx_type
|
@@ -5045,14 +5255,14 @@ module Polars
|
|
5045
5255
|
elsif data[0].is_a?(Hash)
|
5046
5256
|
column_names, dtypes = _unpack_schema(columns)
|
5047
5257
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5048
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
|
5258
|
+
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
5049
5259
|
if column_names
|
5050
5260
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5051
5261
|
end
|
5052
5262
|
return rbdf
|
5053
5263
|
elsif data[0].is_a?(::Array)
|
5264
|
+
first_element = data[0]
|
5054
5265
|
if orient.nil? && !columns.nil?
|
5055
|
-
first_element = data[0]
|
5056
5266
|
row_types = first_element.filter_map { |value| value.class }.uniq
|
5057
5267
|
if row_types.include?(Integer) && row_types.include?(Float)
|
5058
5268
|
row_types.delete(Integer)
|