polars-df 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +17 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +121 -93
- data/ext/polars/src/dataframe.rs +116 -71
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +68 -284
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +85 -58
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +47 -42
- data/ext/polars/src/lib.rs +156 -72
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +3 -3
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/construction.rs +4 -4
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +7 -27
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +18 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/lib/polars/data_frame.rb
CHANGED
@@ -20,15 +20,9 @@ module Polars
|
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
21
|
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
22
|
schema ||= columns
|
23
|
-
raise Todo if schema_overrides
|
24
23
|
|
25
|
-
# TODO deprecate in favor of read_sql
|
26
24
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
27
|
-
|
28
|
-
data = {}
|
29
|
-
result.columns.each_with_index do |k, i|
|
30
|
-
data[k] = result.rows.map { |r| r[i] }
|
31
|
-
end
|
25
|
+
raise ArgumentError, "Use read_database instead"
|
32
26
|
end
|
33
27
|
|
34
28
|
if data.nil?
|
@@ -905,6 +899,7 @@ module Polars
|
|
905
899
|
def write_csv(
|
906
900
|
file = nil,
|
907
901
|
has_header: true,
|
902
|
+
include_header: nil,
|
908
903
|
sep: ",",
|
909
904
|
quote: '"',
|
910
905
|
batch_size: 1024,
|
@@ -914,6 +909,8 @@ module Polars
|
|
914
909
|
float_precision: nil,
|
915
910
|
null_value: nil
|
916
911
|
)
|
912
|
+
include_header = has_header if include_header.nil?
|
913
|
+
|
917
914
|
if sep.length > 1
|
918
915
|
raise ArgumentError, "only single byte separator is allowed"
|
919
916
|
elsif quote.length > 1
|
@@ -927,7 +924,7 @@ module Polars
|
|
927
924
|
buffer.set_encoding(Encoding::BINARY)
|
928
925
|
_df.write_csv(
|
929
926
|
buffer,
|
930
|
-
|
927
|
+
include_header,
|
931
928
|
sep.ord,
|
932
929
|
quote.ord,
|
933
930
|
batch_size,
|
@@ -946,7 +943,7 @@ module Polars
|
|
946
943
|
|
947
944
|
_df.write_csv(
|
948
945
|
file,
|
949
|
-
|
946
|
+
include_header,
|
950
947
|
sep.ord,
|
951
948
|
quote.ord,
|
952
949
|
batch_size,
|
@@ -1151,22 +1148,8 @@ module Polars
|
|
1151
1148
|
# # │ b ┆ 1 ┆ 2 ┆ 3 │
|
1152
1149
|
# # └─────┴─────┴─────┴─────┘
|
1153
1150
|
def transpose(include_header: false, header_name: "column", column_names: nil)
|
1154
|
-
|
1155
|
-
|
1156
|
-
names = []
|
1157
|
-
n = df.width
|
1158
|
-
if include_header
|
1159
|
-
names << header_name
|
1160
|
-
n -= 1
|
1161
|
-
end
|
1162
|
-
|
1163
|
-
column_names = column_names.each
|
1164
|
-
n.times do
|
1165
|
-
names << column_names.next
|
1166
|
-
end
|
1167
|
-
df.columns = names
|
1168
|
-
end
|
1169
|
-
df
|
1151
|
+
keep_names_as = include_header ? header_name : nil
|
1152
|
+
_from_rbdf(_df.transpose(keep_names_as, column_names))
|
1170
1153
|
end
|
1171
1154
|
|
1172
1155
|
# Reverse the DataFrame.
|
@@ -1811,13 +1794,13 @@ module Polars
|
|
1811
1794
|
_from_rbdf(_df.with_row_count(name, offset))
|
1812
1795
|
end
|
1813
1796
|
|
1814
|
-
# Start a
|
1797
|
+
# Start a group by operation.
|
1815
1798
|
#
|
1816
1799
|
# @param by [Object]
|
1817
1800
|
# Column(s) to group by.
|
1818
1801
|
# @param maintain_order [Boolean]
|
1819
1802
|
# Make sure that the order of the groups remain consistent. This is more
|
1820
|
-
# expensive than a default
|
1803
|
+
# expensive than a default group by. Note that this only works in expression
|
1821
1804
|
# aggregations.
|
1822
1805
|
#
|
1823
1806
|
# @return [GroupBy]
|
@@ -1830,7 +1813,7 @@ module Polars
|
|
1830
1813
|
# "c" => [6, 5, 4, 3, 2, 1]
|
1831
1814
|
# }
|
1832
1815
|
# )
|
1833
|
-
# df.
|
1816
|
+
# df.group_by("a").agg(Polars.col("b").sum).sort("a")
|
1834
1817
|
# # =>
|
1835
1818
|
# # shape: (3, 2)
|
1836
1819
|
# # ┌─────┬─────┐
|
@@ -1842,25 +1825,26 @@ module Polars
|
|
1842
1825
|
# # │ b ┆ 11 │
|
1843
1826
|
# # │ c ┆ 6 │
|
1844
1827
|
# # └─────┴─────┘
|
1845
|
-
def
|
1828
|
+
def group_by(by, maintain_order: false)
|
1846
1829
|
if !Utils.bool?(maintain_order)
|
1847
|
-
raise TypeError, "invalid input for
|
1830
|
+
raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
|
1848
1831
|
end
|
1849
1832
|
GroupBy.new(
|
1850
|
-
|
1833
|
+
self,
|
1851
1834
|
by,
|
1852
|
-
self.class,
|
1853
1835
|
maintain_order: maintain_order
|
1854
1836
|
)
|
1855
1837
|
end
|
1838
|
+
alias_method :groupby, :group_by
|
1839
|
+
alias_method :group, :group_by
|
1856
1840
|
|
1857
1841
|
# Create rolling groups based on a time column.
|
1858
1842
|
#
|
1859
1843
|
# Also works for index values of type `:i32` or `:i64`.
|
1860
1844
|
#
|
1861
|
-
# Different from a `
|
1845
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
1862
1846
|
# individual values and are not of constant intervals. For constant intervals use
|
1863
|
-
# *
|
1847
|
+
# *group_by_dynamic*
|
1864
1848
|
#
|
1865
1849
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
1866
1850
|
# by using the following string language:
|
@@ -1880,7 +1864,7 @@ module Polars
|
|
1880
1864
|
# Or combine them:
|
1881
1865
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1882
1866
|
#
|
1883
|
-
# In case of a
|
1867
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
1884
1868
|
#
|
1885
1869
|
# - **"1i" # length 1**
|
1886
1870
|
# - **"10i" # length 10**
|
@@ -1891,7 +1875,7 @@ module Polars
|
|
1891
1875
|
# This column must be sorted in ascending order. If not the output will not
|
1892
1876
|
# make sense.
|
1893
1877
|
#
|
1894
|
-
# In case of a rolling
|
1878
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
1895
1879
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1896
1880
|
# performance matters use an `:i64` column.
|
1897
1881
|
# @param period [Object]
|
@@ -1923,7 +1907,7 @@ module Polars
|
|
1923
1907
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1924
1908
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1925
1909
|
# )
|
1926
|
-
# df.
|
1910
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
1927
1911
|
# [
|
1928
1912
|
# Polars.sum("a").alias("sum_a"),
|
1929
1913
|
# Polars.min("a").alias("min_a"),
|
@@ -1944,7 +1928,7 @@ module Polars
|
|
1944
1928
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1945
1929
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1946
1930
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1947
|
-
def
|
1931
|
+
def group_by_rolling(
|
1948
1932
|
index_column:,
|
1949
1933
|
period:,
|
1950
1934
|
offset: nil,
|
@@ -1954,11 +1938,12 @@ module Polars
|
|
1954
1938
|
)
|
1955
1939
|
RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
|
1956
1940
|
end
|
1941
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1957
1942
|
|
1958
1943
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1959
1944
|
#
|
1960
1945
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1961
|
-
# normal
|
1946
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1962
1947
|
# window could be seen as a rolling window, with a window size determined by
|
1963
1948
|
# dates/times/values instead of slots in the DataFrame.
|
1964
1949
|
#
|
@@ -1986,7 +1971,7 @@ module Polars
|
|
1986
1971
|
# Or combine them:
|
1987
1972
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1988
1973
|
#
|
1989
|
-
# In case of a
|
1974
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1990
1975
|
#
|
1991
1976
|
# - "1i" # length 1
|
1992
1977
|
# - "10i" # length 10
|
@@ -1997,7 +1982,7 @@ module Polars
|
|
1997
1982
|
# This column must be sorted in ascending order. If not the output will not
|
1998
1983
|
# make sense.
|
1999
1984
|
#
|
2000
|
-
# In case of a dynamic
|
1985
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
2001
1986
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
2002
1987
|
# performance matters use an `:i64` column.
|
2003
1988
|
# @param every
|
@@ -2048,7 +2033,7 @@ module Polars
|
|
2048
2033
|
# # └─────────────────────┴─────┘
|
2049
2034
|
#
|
2050
2035
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
2051
|
-
# df.
|
2036
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
2052
2037
|
# [
|
2053
2038
|
# Polars.col("time").min.alias("time_min"),
|
2054
2039
|
# Polars.col("time").max.alias("time_max")
|
@@ -2068,7 +2053,7 @@ module Polars
|
|
2068
2053
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
2069
2054
|
#
|
2070
2055
|
# @example The window boundaries can also be added to the aggregation result.
|
2071
|
-
# df.
|
2056
|
+
# df.group_by_dynamic(
|
2072
2057
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
2073
2058
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
2074
2059
|
# # =>
|
@@ -2085,7 +2070,7 @@ module Polars
|
|
2085
2070
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2086
2071
|
#
|
2087
2072
|
# @example When closed="left", should not include right end of interval.
|
2088
|
-
# df.
|
2073
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
2089
2074
|
# [
|
2090
2075
|
# Polars.col("time").count.alias("time_count"),
|
2091
2076
|
# Polars.col("time").alias("time_agg_list")
|
@@ -2105,7 +2090,7 @@ module Polars
|
|
2105
2090
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
2106
2091
|
#
|
2107
2092
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2108
|
-
# df.
|
2093
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
2109
2094
|
# [Polars.col("time").count.alias("time_count")]
|
2110
2095
|
# )
|
2111
2096
|
# # =>
|
@@ -2122,7 +2107,7 @@ module Polars
|
|
2122
2107
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
2123
2108
|
# # └─────────────────────┴────────────┘
|
2124
2109
|
#
|
2125
|
-
# @example Dynamic
|
2110
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
2126
2111
|
# df = Polars::DataFrame.new(
|
2127
2112
|
# {
|
2128
2113
|
# "time" => Polars.date_range(
|
@@ -2133,7 +2118,7 @@ module Polars
|
|
2133
2118
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
2134
2119
|
# }
|
2135
2120
|
# )
|
2136
|
-
# df.
|
2121
|
+
# df.group_by_dynamic(
|
2137
2122
|
# "time",
|
2138
2123
|
# every: "1h",
|
2139
2124
|
# closed: "both",
|
@@ -2156,14 +2141,14 @@ module Polars
|
|
2156
2141
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
2157
2142
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2158
2143
|
#
|
2159
|
-
# @example Dynamic
|
2144
|
+
# @example Dynamic group by on an index column.
|
2160
2145
|
# df = Polars::DataFrame.new(
|
2161
2146
|
# {
|
2162
2147
|
# "idx" => Polars.arange(0, 6, eager: true),
|
2163
2148
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
2164
2149
|
# }
|
2165
2150
|
# )
|
2166
|
-
# df.
|
2151
|
+
# df.group_by_dynamic(
|
2167
2152
|
# "idx",
|
2168
2153
|
# every: "2i",
|
2169
2154
|
# period: "3i",
|
@@ -2181,7 +2166,7 @@ module Polars
|
|
2181
2166
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2182
2167
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
2183
2168
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
2184
|
-
def
|
2169
|
+
def group_by_dynamic(
|
2185
2170
|
index_column,
|
2186
2171
|
every:,
|
2187
2172
|
period: nil,
|
@@ -2205,6 +2190,7 @@ module Polars
|
|
2205
2190
|
start_by
|
2206
2191
|
)
|
2207
2192
|
end
|
2193
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
2208
2194
|
|
2209
2195
|
# Upsample a DataFrame at a regular frequency.
|
2210
2196
|
#
|
@@ -3464,8 +3450,10 @@ module Polars
|
|
3464
3450
|
|
3465
3451
|
# Shift values by the given period.
|
3466
3452
|
#
|
3467
|
-
# @param
|
3453
|
+
# @param n [Integer]
|
3468
3454
|
# Number of places to shift (may be negative).
|
3455
|
+
# @param fill_value [Object]
|
3456
|
+
# Fill the resulting null values with this value.
|
3469
3457
|
#
|
3470
3458
|
# @return [DataFrame]
|
3471
3459
|
#
|
@@ -3503,8 +3491,8 @@ module Polars
|
|
3503
3491
|
# # │ 3 ┆ 8 ┆ c │
|
3504
3492
|
# # │ null ┆ null ┆ null │
|
3505
3493
|
# # └──────┴──────┴──────┘
|
3506
|
-
def shift(
|
3507
|
-
|
3494
|
+
def shift(n, fill_value: nil)
|
3495
|
+
lazy.shift(n, fill_value: fill_value).collect(_eager: true)
|
3508
3496
|
end
|
3509
3497
|
|
3510
3498
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -3537,9 +3525,7 @@ module Polars
|
|
3537
3525
|
# # │ 2 ┆ 7 ┆ b │
|
3538
3526
|
# # └─────┴─────┴─────┘
|
3539
3527
|
def shift_and_fill(periods, fill_value)
|
3540
|
-
|
3541
|
-
.shift_and_fill(periods, fill_value)
|
3542
|
-
.collect(no_optimization: true, string_cache: false)
|
3528
|
+
shift(periods, fill_value: fill_value)
|
3543
3529
|
end
|
3544
3530
|
|
3545
3531
|
# Get a mask of all duplicated rows in this DataFrame.
|
@@ -3790,7 +3776,7 @@ module Polars
|
|
3790
3776
|
if axis == 0
|
3791
3777
|
_from_rbdf(_df.max)
|
3792
3778
|
elsif axis == 1
|
3793
|
-
Utils.wrap_s(_df.
|
3779
|
+
Utils.wrap_s(_df.max_horizontal)
|
3794
3780
|
else
|
3795
3781
|
raise ArgumentError, "Axis should be 0 or 1."
|
3796
3782
|
end
|
@@ -3822,7 +3808,7 @@ module Polars
|
|
3822
3808
|
if axis == 0
|
3823
3809
|
_from_rbdf(_df.min)
|
3824
3810
|
elsif axis == 1
|
3825
|
-
Utils.wrap_s(_df.
|
3811
|
+
Utils.wrap_s(_df.min_horizontal)
|
3826
3812
|
else
|
3827
3813
|
raise ArgumentError, "Axis should be 0 or 1."
|
3828
3814
|
end
|
@@ -3871,7 +3857,7 @@ module Polars
|
|
3871
3857
|
when 0
|
3872
3858
|
_from_rbdf(_df.sum)
|
3873
3859
|
when 1
|
3874
|
-
Utils.wrap_s(_df.
|
3860
|
+
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3875
3861
|
else
|
3876
3862
|
raise ArgumentError, "Axis should be 0 or 1."
|
3877
3863
|
end
|
@@ -3909,7 +3895,7 @@ module Polars
|
|
3909
3895
|
when 0
|
3910
3896
|
_from_rbdf(_df.mean)
|
3911
3897
|
when 1
|
3912
|
-
Utils.wrap_s(_df.
|
3898
|
+
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3913
3899
|
else
|
3914
3900
|
raise ArgumentError, "Axis should be 0 or 1."
|
3915
3901
|
end
|
@@ -4294,15 +4280,20 @@ module Polars
|
|
4294
4280
|
end
|
4295
4281
|
|
4296
4282
|
if n.nil? && !frac.nil?
|
4283
|
+
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4284
|
+
|
4297
4285
|
_from_rbdf(
|
4298
|
-
_df.sample_frac(frac, with_replacement, shuffle, seed)
|
4286
|
+
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4299
4287
|
)
|
4300
4288
|
end
|
4301
4289
|
|
4302
4290
|
if n.nil?
|
4303
4291
|
n = 1
|
4304
4292
|
end
|
4305
|
-
|
4293
|
+
|
4294
|
+
n = Series.new("", [n]) unless n.is_a?(Series)
|
4295
|
+
|
4296
|
+
_from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
|
4306
4297
|
end
|
4307
4298
|
|
4308
4299
|
# Apply a horizontal reduction on a DataFrame.
|
@@ -4601,7 +4592,7 @@ module Polars
|
|
4601
4592
|
#
|
4602
4593
|
# @example
|
4603
4594
|
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
|
4604
|
-
# s.
|
4595
|
+
# s.gather_every(2)
|
4605
4596
|
# # =>
|
4606
4597
|
# # shape: (2, 2)
|
4607
4598
|
# # ┌─────┬─────┐
|
@@ -4612,9 +4603,10 @@ module Polars
|
|
4612
4603
|
# # │ 1 ┆ 5 │
|
4613
4604
|
# # │ 3 ┆ 7 │
|
4614
4605
|
# # └─────┴─────┘
|
4615
|
-
def
|
4616
|
-
select(Utils.col("*").
|
4606
|
+
def gather_every(n)
|
4607
|
+
select(Utils.col("*").gather_every(n))
|
4617
4608
|
end
|
4609
|
+
alias_method :take_every, :gather_every
|
4618
4610
|
|
4619
4611
|
# Hash and combine the rows in this DataFrame.
|
4620
4612
|
#
|
@@ -4671,16 +4663,16 @@ module Polars
|
|
4671
4663
|
# df.interpolate
|
4672
4664
|
# # =>
|
4673
4665
|
# # shape: (4, 3)
|
4674
|
-
# #
|
4675
|
-
# # │ foo
|
4676
|
-
# # │ ---
|
4677
|
-
# # │
|
4678
|
-
# #
|
4679
|
-
# # │ 1
|
4680
|
-
# # │ 5
|
4681
|
-
# # │ 9
|
4682
|
-
# # │ 10
|
4683
|
-
# #
|
4666
|
+
# # ┌──────┬──────┬──────────┐
|
4667
|
+
# # │ foo ┆ bar ┆ baz │
|
4668
|
+
# # │ --- ┆ --- ┆ --- │
|
4669
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
4670
|
+
# # ╞══════╪══════╪══════════╡
|
4671
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
4672
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
4673
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
4674
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
4675
|
+
# # └──────┴──────┴──────────┘
|
4684
4676
|
def interpolate
|
4685
4677
|
select(Utils.col("*").interpolate)
|
4686
4678
|
end
|
@@ -4952,8 +4944,8 @@ module Polars
|
|
4952
4944
|
[lookup[col[0]] || col[0], col[1]]
|
4953
4945
|
end
|
4954
4946
|
|
4955
|
-
if schema_overrides
|
4956
|
-
|
4947
|
+
if schema_overrides && schema_overrides.any?
|
4948
|
+
column_dtypes.merge!(schema_overrides)
|
4957
4949
|
end
|
4958
4950
|
|
4959
4951
|
column_dtypes.each do |col, dtype|
|
@@ -5056,13 +5048,54 @@ module Polars
|
|
5056
5048
|
return rbdf
|
5057
5049
|
elsif data[0].is_a?(::Array)
|
5058
5050
|
if orient.nil? && !columns.nil?
|
5059
|
-
|
5051
|
+
first_element = data[0]
|
5052
|
+
row_types = first_element.filter_map { |value| value.class }.uniq
|
5053
|
+
if row_types.include?(Integer) && row_types.include?(Float)
|
5054
|
+
row_types.delete(Integer)
|
5055
|
+
end
|
5056
|
+
orient = row_types.length == 1 ? "col" : "row"
|
5060
5057
|
end
|
5061
5058
|
|
5062
5059
|
if orient == "row"
|
5063
|
-
|
5060
|
+
column_names, schema_overrides = _unpack_schema(
|
5061
|
+
schema, schema_overrides: schema_overrides, n_expected: first_element.length
|
5062
|
+
)
|
5063
|
+
local_schema_override = (
|
5064
|
+
schema_overrides.any? ? (raise Todo) : {}
|
5065
|
+
)
|
5066
|
+
if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
|
5067
|
+
raise ArgumentError, "the row data does not match the number of columns"
|
5068
|
+
end
|
5069
|
+
|
5070
|
+
unpack_nested = false
|
5071
|
+
local_schema_override.each do |col, tp|
|
5072
|
+
raise Todo
|
5073
|
+
end
|
5074
|
+
|
5075
|
+
if unpack_nested
|
5076
|
+
raise Todo
|
5077
|
+
else
|
5078
|
+
rbdf = RbDataFrame.read_rows(
|
5079
|
+
data,
|
5080
|
+
infer_schema_length,
|
5081
|
+
local_schema_override.any? ? local_schema_override : nil
|
5082
|
+
)
|
5083
|
+
end
|
5084
|
+
if column_names.any? || schema_overrides.any?
|
5085
|
+
rbdf = _post_apply_columns(
|
5086
|
+
rbdf, column_names, schema_overrides: schema_overrides
|
5087
|
+
)
|
5088
|
+
end
|
5089
|
+
return rbdf
|
5064
5090
|
elsif orient == "col" || orient.nil?
|
5065
|
-
|
5091
|
+
column_names, schema_overrides = _unpack_schema(
|
5092
|
+
schema, schema_overrides: schema_overrides, n_expected: data.length
|
5093
|
+
)
|
5094
|
+
data_series =
|
5095
|
+
data.map.with_index do |element, i|
|
5096
|
+
Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
|
5097
|
+
end
|
5098
|
+
return RbDataFrame.new(data_series)
|
5066
5099
|
else
|
5067
5100
|
raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
|
5068
5101
|
end
|
@@ -5108,10 +5141,10 @@ module Polars
|
|
5108
5141
|
|
5109
5142
|
def _compare_to_other_df(other, op)
|
5110
5143
|
if columns != other.columns
|
5111
|
-
raise
|
5144
|
+
raise ArgumentError, "DataFrame columns do not match"
|
5112
5145
|
end
|
5113
5146
|
if shape != other.shape
|
5114
|
-
raise
|
5147
|
+
raise ArgumentError, "DataFrame dimensions do not match"
|
5115
5148
|
end
|
5116
5149
|
|
5117
5150
|
suffix = "__POLARS_CMP_OTHER"
|
@@ -97,15 +97,20 @@ module Polars
|
|
97
97
|
# # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
|
98
98
|
# # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
|
99
99
|
# # └─────────────────────┴─────────────────────┘
|
100
|
-
def truncate(every, offset: nil)
|
100
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
101
101
|
if offset.nil?
|
102
102
|
offset = "0ns"
|
103
103
|
end
|
104
104
|
|
105
|
+
if !every.is_a?(Expr)
|
106
|
+
every = Utils._timedelta_to_pl_duration(every)
|
107
|
+
end
|
108
|
+
every = Utils.parse_as_expression(every, str_as_lit: true)
|
109
|
+
|
105
110
|
Utils.wrap_expr(
|
106
111
|
_rbexpr.dt_truncate(
|
107
|
-
|
108
|
-
Utils._timedelta_to_pl_duration(offset)
|
112
|
+
every,
|
113
|
+
Utils._timedelta_to_pl_duration(offset),
|
109
114
|
)
|
110
115
|
)
|
111
116
|
end
|
@@ -1026,21 +1031,10 @@ module Polars
|
|
1026
1031
|
# Time zone for the `Datetime` Series.
|
1027
1032
|
#
|
1028
1033
|
# @return [Expr]
|
1029
|
-
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
-
Utils.
|
1031
|
-
|
1032
|
-
|
1033
|
-
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
1034
|
-
#
|
1035
|
-
# This method takes a naive Datetime Series and makes this time zone aware.
|
1036
|
-
# It does not move the time to another time zone.
|
1037
|
-
#
|
1038
|
-
# @param tz [String]
|
1039
|
-
# Time zone for the `Datetime` Series.
|
1040
|
-
#
|
1041
|
-
# @return [Expr]
|
1042
|
-
def tz_localize(tz)
|
1043
|
-
Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
|
1034
|
+
def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
|
1035
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
1036
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
1037
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
|
1044
1038
|
end
|
1045
1039
|
|
1046
1040
|
# Extract the days from a Duration type.
|
@@ -1348,6 +1342,7 @@ module Polars
|
|
1348
1342
|
# # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
|
1349
1343
|
# # └─────────────────────┴─────────────────────┘
|
1350
1344
|
def offset_by(by)
|
1345
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
1351
1346
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1352
1347
|
end
|
1353
1348
|
|
@@ -23,18 +23,8 @@ module Polars
|
|
23
23
|
# @return [Object]
|
24
24
|
#
|
25
25
|
# @example
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# # shape: (3,)
|
29
|
-
# # Series: '' [datetime[μs]]
|
30
|
-
# # [
|
31
|
-
# # 2001-01-01 00:00:00
|
32
|
-
# # 2001-01-02 00:00:00
|
33
|
-
# # 2001-01-03 00:00:00
|
34
|
-
# # ]
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# date.dt.min
|
26
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
27
|
+
# s.dt.min
|
38
28
|
# # => 2001-01-01 00:00:00 UTC
|
39
29
|
def min
|
40
30
|
Utils.wrap_s(_s).min
|
@@ -45,18 +35,8 @@ module Polars
|
|
45
35
|
# @return [Object]
|
46
36
|
#
|
47
37
|
# @example
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# # shape: (3,)
|
51
|
-
# # Series: '' [datetime[μs]]
|
52
|
-
# # [
|
53
|
-
# # 2001-01-01 00:00:00
|
54
|
-
# # 2001-01-02 00:00:00
|
55
|
-
# # 2001-01-03 00:00:00
|
56
|
-
# # ]
|
57
|
-
#
|
58
|
-
# @example
|
59
|
-
# date.dt.max
|
38
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
39
|
+
# s.dt.max
|
60
40
|
# # => 2001-01-03 00:00:00 UTC
|
61
41
|
def max
|
62
42
|
Utils.wrap_s(_s).max
|
@@ -1400,7 +1380,7 @@ module Polars
|
|
1400
1380
|
# # 2001-01-01 00:30:00
|
1401
1381
|
# # 2001-01-01 01:00:00
|
1402
1382
|
# # ]
|
1403
|
-
def truncate(every, offset: nil)
|
1383
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
1404
1384
|
super
|
1405
1385
|
end
|
1406
1386
|
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A dynamic grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class DynamicGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -34,7 +34,7 @@ module Polars
|
|
34
34
|
|
35
35
|
def agg(aggs)
|
36
36
|
@df.lazy
|
37
|
-
.
|
37
|
+
.group_by_dynamic(
|
38
38
|
@time_column,
|
39
39
|
every: @every,
|
40
40
|
period: @period,
|