polars-df 0.6.0-x86_64-linux → 0.7.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +468 -538
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +3223 -4194
- data/README.md +8 -7
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +115 -82
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +5 -25
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +177 -94
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +93 -66
- data/lib/polars/lazy_functions.rb +36 -48
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +26 -13
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/utils.rb +12 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/data_frame.rb
CHANGED
@@ -20,15 +20,9 @@ module Polars
|
|
20
20
|
# this does not yield conclusive results, column orientation is used.
|
21
21
|
def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
|
22
22
|
schema ||= columns
|
23
|
-
raise Todo if schema_overrides
|
24
23
|
|
25
|
-
# TODO deprecate in favor of read_sql
|
26
24
|
if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
|
27
|
-
|
28
|
-
data = {}
|
29
|
-
result.columns.each_with_index do |k, i|
|
30
|
-
data[k] = result.rows.map { |r| r[i] }
|
31
|
-
end
|
25
|
+
raise ArgumentError, "Use read_database instead"
|
32
26
|
end
|
33
27
|
|
34
28
|
if data.nil?
|
@@ -905,6 +899,7 @@ module Polars
|
|
905
899
|
def write_csv(
|
906
900
|
file = nil,
|
907
901
|
has_header: true,
|
902
|
+
include_header: nil,
|
908
903
|
sep: ",",
|
909
904
|
quote: '"',
|
910
905
|
batch_size: 1024,
|
@@ -914,6 +909,8 @@ module Polars
|
|
914
909
|
float_precision: nil,
|
915
910
|
null_value: nil
|
916
911
|
)
|
912
|
+
include_header = has_header if include_header.nil?
|
913
|
+
|
917
914
|
if sep.length > 1
|
918
915
|
raise ArgumentError, "only single byte separator is allowed"
|
919
916
|
elsif quote.length > 1
|
@@ -927,7 +924,7 @@ module Polars
|
|
927
924
|
buffer.set_encoding(Encoding::BINARY)
|
928
925
|
_df.write_csv(
|
929
926
|
buffer,
|
930
|
-
|
927
|
+
include_header,
|
931
928
|
sep.ord,
|
932
929
|
quote.ord,
|
933
930
|
batch_size,
|
@@ -946,7 +943,7 @@ module Polars
|
|
946
943
|
|
947
944
|
_df.write_csv(
|
948
945
|
file,
|
949
|
-
|
946
|
+
include_header,
|
950
947
|
sep.ord,
|
951
948
|
quote.ord,
|
952
949
|
batch_size,
|
@@ -1151,22 +1148,8 @@ module Polars
|
|
1151
1148
|
# # │ b ┆ 1 ┆ 2 ┆ 3 │
|
1152
1149
|
# # └─────┴─────┴─────┴─────┘
|
1153
1150
|
def transpose(include_header: false, header_name: "column", column_names: nil)
|
1154
|
-
|
1155
|
-
|
1156
|
-
names = []
|
1157
|
-
n = df.width
|
1158
|
-
if include_header
|
1159
|
-
names << header_name
|
1160
|
-
n -= 1
|
1161
|
-
end
|
1162
|
-
|
1163
|
-
column_names = column_names.each
|
1164
|
-
n.times do
|
1165
|
-
names << column_names.next
|
1166
|
-
end
|
1167
|
-
df.columns = names
|
1168
|
-
end
|
1169
|
-
df
|
1151
|
+
keep_names_as = include_header ? header_name : nil
|
1152
|
+
_from_rbdf(_df.transpose(keep_names_as, column_names))
|
1170
1153
|
end
|
1171
1154
|
|
1172
1155
|
# Reverse the DataFrame.
|
@@ -1811,13 +1794,13 @@ module Polars
|
|
1811
1794
|
_from_rbdf(_df.with_row_count(name, offset))
|
1812
1795
|
end
|
1813
1796
|
|
1814
|
-
# Start a
|
1797
|
+
# Start a group by operation.
|
1815
1798
|
#
|
1816
1799
|
# @param by [Object]
|
1817
1800
|
# Column(s) to group by.
|
1818
1801
|
# @param maintain_order [Boolean]
|
1819
1802
|
# Make sure that the order of the groups remain consistent. This is more
|
1820
|
-
# expensive than a default
|
1803
|
+
# expensive than a default group by. Note that this only works in expression
|
1821
1804
|
# aggregations.
|
1822
1805
|
#
|
1823
1806
|
# @return [GroupBy]
|
@@ -1830,7 +1813,7 @@ module Polars
|
|
1830
1813
|
# "c" => [6, 5, 4, 3, 2, 1]
|
1831
1814
|
# }
|
1832
1815
|
# )
|
1833
|
-
# df.
|
1816
|
+
# df.group_by("a").agg(Polars.col("b").sum).sort("a")
|
1834
1817
|
# # =>
|
1835
1818
|
# # shape: (3, 2)
|
1836
1819
|
# # ┌─────┬─────┐
|
@@ -1842,25 +1825,26 @@ module Polars
|
|
1842
1825
|
# # │ b ┆ 11 │
|
1843
1826
|
# # │ c ┆ 6 │
|
1844
1827
|
# # └─────┴─────┘
|
1845
|
-
def
|
1828
|
+
def group_by(by, maintain_order: false)
|
1846
1829
|
if !Utils.bool?(maintain_order)
|
1847
|
-
raise TypeError, "invalid input for
|
1830
|
+
raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
|
1848
1831
|
end
|
1849
1832
|
GroupBy.new(
|
1850
|
-
|
1833
|
+
self,
|
1851
1834
|
by,
|
1852
|
-
self.class,
|
1853
1835
|
maintain_order: maintain_order
|
1854
1836
|
)
|
1855
1837
|
end
|
1838
|
+
alias_method :groupby, :group_by
|
1839
|
+
alias_method :group, :group_by
|
1856
1840
|
|
1857
1841
|
# Create rolling groups based on a time column.
|
1858
1842
|
#
|
1859
1843
|
# Also works for index values of type `:i32` or `:i64`.
|
1860
1844
|
#
|
1861
|
-
# Different from a `
|
1845
|
+
# Different from a `dynamic_group_by` the windows are now determined by the
|
1862
1846
|
# individual values and are not of constant intervals. For constant intervals use
|
1863
|
-
# *
|
1847
|
+
# *group_by_dynamic*
|
1864
1848
|
#
|
1865
1849
|
# The `period` and `offset` arguments are created either from a timedelta, or
|
1866
1850
|
# by using the following string language:
|
@@ -1880,7 +1864,7 @@ module Polars
|
|
1880
1864
|
# Or combine them:
|
1881
1865
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1882
1866
|
#
|
1883
|
-
# In case of a
|
1867
|
+
# In case of a group_by_rolling on an integer column, the windows are defined by:
|
1884
1868
|
#
|
1885
1869
|
# - **"1i" # length 1**
|
1886
1870
|
# - **"10i" # length 10**
|
@@ -1891,7 +1875,7 @@ module Polars
|
|
1891
1875
|
# This column must be sorted in ascending order. If not the output will not
|
1892
1876
|
# make sense.
|
1893
1877
|
#
|
1894
|
-
# In case of a rolling
|
1878
|
+
# In case of a rolling group by on indices, dtype needs to be one of
|
1895
1879
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
1896
1880
|
# performance matters use an `:i64` column.
|
1897
1881
|
# @param period [Object]
|
@@ -1923,7 +1907,7 @@ module Polars
|
|
1923
1907
|
# df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
|
1924
1908
|
# Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
|
1925
1909
|
# )
|
1926
|
-
# df.
|
1910
|
+
# df.group_by_rolling(index_column: "dt", period: "2d").agg(
|
1927
1911
|
# [
|
1928
1912
|
# Polars.sum("a").alias("sum_a"),
|
1929
1913
|
# Polars.min("a").alias("min_a"),
|
@@ -1944,7 +1928,7 @@ module Polars
|
|
1944
1928
|
# # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
|
1945
1929
|
# # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
|
1946
1930
|
# # └─────────────────────┴───────┴───────┴───────┘
|
1947
|
-
def
|
1931
|
+
def group_by_rolling(
|
1948
1932
|
index_column:,
|
1949
1933
|
period:,
|
1950
1934
|
offset: nil,
|
@@ -1954,11 +1938,12 @@ module Polars
|
|
1954
1938
|
)
|
1955
1939
|
RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
|
1956
1940
|
end
|
1941
|
+
alias_method :groupby_rolling, :group_by_rolling
|
1957
1942
|
|
1958
1943
|
# Group based on a time value (or index value of type `:i32`, `:i64`).
|
1959
1944
|
#
|
1960
1945
|
# Time windows are calculated and rows are assigned to windows. Different from a
|
1961
|
-
# normal
|
1946
|
+
# normal group by is that a row can be member of multiple groups. The time/index
|
1962
1947
|
# window could be seen as a rolling window, with a window size determined by
|
1963
1948
|
# dates/times/values instead of slots in the DataFrame.
|
1964
1949
|
#
|
@@ -1986,7 +1971,7 @@ module Polars
|
|
1986
1971
|
# Or combine them:
|
1987
1972
|
# "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
|
1988
1973
|
#
|
1989
|
-
# In case of a
|
1974
|
+
# In case of a group_by_dynamic on an integer column, the windows are defined by:
|
1990
1975
|
#
|
1991
1976
|
# - "1i" # length 1
|
1992
1977
|
# - "10i" # length 10
|
@@ -1997,7 +1982,7 @@ module Polars
|
|
1997
1982
|
# This column must be sorted in ascending order. If not the output will not
|
1998
1983
|
# make sense.
|
1999
1984
|
#
|
2000
|
-
# In case of a dynamic
|
1985
|
+
# In case of a dynamic group by on indices, dtype needs to be one of
|
2001
1986
|
# `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
|
2002
1987
|
# performance matters use an `:i64` column.
|
2003
1988
|
# @param every
|
@@ -2048,7 +2033,7 @@ module Polars
|
|
2048
2033
|
# # └─────────────────────┴─────┘
|
2049
2034
|
#
|
2050
2035
|
# @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
|
2051
|
-
# df.
|
2036
|
+
# df.group_by_dynamic("time", every: "1h", closed: "right").agg(
|
2052
2037
|
# [
|
2053
2038
|
# Polars.col("time").min.alias("time_min"),
|
2054
2039
|
# Polars.col("time").max.alias("time_max")
|
@@ -2068,7 +2053,7 @@ module Polars
|
|
2068
2053
|
# # └─────────────────────┴─────────────────────┴─────────────────────┘
|
2069
2054
|
#
|
2070
2055
|
# @example The window boundaries can also be added to the aggregation result.
|
2071
|
-
# df.
|
2056
|
+
# df.group_by_dynamic(
|
2072
2057
|
# "time", every: "1h", include_boundaries: true, closed: "right"
|
2073
2058
|
# ).agg([Polars.col("time").count.alias("time_count")])
|
2074
2059
|
# # =>
|
@@ -2085,7 +2070,7 @@ module Polars
|
|
2085
2070
|
# # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2086
2071
|
#
|
2087
2072
|
# @example When closed="left", should not include right end of interval.
|
2088
|
-
# df.
|
2073
|
+
# df.group_by_dynamic("time", every: "1h", closed: "left").agg(
|
2089
2074
|
# [
|
2090
2075
|
# Polars.col("time").count.alias("time_count"),
|
2091
2076
|
# Polars.col("time").alias("time_agg_list")
|
@@ -2105,7 +2090,7 @@ module Polars
|
|
2105
2090
|
# # └─────────────────────┴────────────┴───────────────────────────────────┘
|
2106
2091
|
#
|
2107
2092
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2108
|
-
# df.
|
2093
|
+
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
2109
2094
|
# [Polars.col("time").count.alias("time_count")]
|
2110
2095
|
# )
|
2111
2096
|
# # =>
|
@@ -2122,7 +2107,7 @@ module Polars
|
|
2122
2107
|
# # │ 2021-12-16 03:00:00 ┆ 1 │
|
2123
2108
|
# # └─────────────────────┴────────────┘
|
2124
2109
|
#
|
2125
|
-
# @example Dynamic
|
2110
|
+
# @example Dynamic group bys can also be combined with grouping on normal keys.
|
2126
2111
|
# df = Polars::DataFrame.new(
|
2127
2112
|
# {
|
2128
2113
|
# "time" => Polars.date_range(
|
@@ -2133,7 +2118,7 @@ module Polars
|
|
2133
2118
|
# "groups" => ["a", "a", "a", "b", "b", "a", "a"]
|
2134
2119
|
# }
|
2135
2120
|
# )
|
2136
|
-
# df.
|
2121
|
+
# df.group_by_dynamic(
|
2137
2122
|
# "time",
|
2138
2123
|
# every: "1h",
|
2139
2124
|
# closed: "both",
|
@@ -2156,14 +2141,14 @@ module Polars
|
|
2156
2141
|
# # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
|
2157
2142
|
# # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
|
2158
2143
|
#
|
2159
|
-
# @example Dynamic
|
2144
|
+
# @example Dynamic group by on an index column.
|
2160
2145
|
# df = Polars::DataFrame.new(
|
2161
2146
|
# {
|
2162
2147
|
# "idx" => Polars.arange(0, 6, eager: true),
|
2163
2148
|
# "A" => ["A", "A", "B", "B", "B", "C"]
|
2164
2149
|
# }
|
2165
2150
|
# )
|
2166
|
-
# df.
|
2151
|
+
# df.group_by_dynamic(
|
2167
2152
|
# "idx",
|
2168
2153
|
# every: "2i",
|
2169
2154
|
# period: "3i",
|
@@ -2181,7 +2166,7 @@ module Polars
|
|
2181
2166
|
# # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
|
2182
2167
|
# # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
|
2183
2168
|
# # └─────────────────┴─────────────────┴─────┴─────────────────┘
|
2184
|
-
def
|
2169
|
+
def group_by_dynamic(
|
2185
2170
|
index_column,
|
2186
2171
|
every:,
|
2187
2172
|
period: nil,
|
@@ -2205,6 +2190,7 @@ module Polars
|
|
2205
2190
|
start_by
|
2206
2191
|
)
|
2207
2192
|
end
|
2193
|
+
alias_method :groupby_dynamic, :group_by_dynamic
|
2208
2194
|
|
2209
2195
|
# Upsample a DataFrame at a regular frequency.
|
2210
2196
|
#
|
@@ -3464,8 +3450,10 @@ module Polars
|
|
3464
3450
|
|
3465
3451
|
# Shift values by the given period.
|
3466
3452
|
#
|
3467
|
-
# @param
|
3453
|
+
# @param n [Integer]
|
3468
3454
|
# Number of places to shift (may be negative).
|
3455
|
+
# @param fill_value [Object]
|
3456
|
+
# Fill the resulting null values with this value.
|
3469
3457
|
#
|
3470
3458
|
# @return [DataFrame]
|
3471
3459
|
#
|
@@ -3503,8 +3491,8 @@ module Polars
|
|
3503
3491
|
# # │ 3 ┆ 8 ┆ c │
|
3504
3492
|
# # │ null ┆ null ┆ null │
|
3505
3493
|
# # └──────┴──────┴──────┘
|
3506
|
-
def shift(
|
3507
|
-
|
3494
|
+
def shift(n, fill_value: nil)
|
3495
|
+
lazy.shift(n, fill_value: fill_value).collect(_eager: true)
|
3508
3496
|
end
|
3509
3497
|
|
3510
3498
|
# Shift the values by a given period and fill the resulting null values.
|
@@ -3537,9 +3525,7 @@ module Polars
|
|
3537
3525
|
# # │ 2 ┆ 7 ┆ b │
|
3538
3526
|
# # └─────┴─────┴─────┘
|
3539
3527
|
def shift_and_fill(periods, fill_value)
|
3540
|
-
|
3541
|
-
.shift_and_fill(periods, fill_value)
|
3542
|
-
.collect(no_optimization: true, string_cache: false)
|
3528
|
+
shift(periods, fill_value: fill_value)
|
3543
3529
|
end
|
3544
3530
|
|
3545
3531
|
# Get a mask of all duplicated rows in this DataFrame.
|
@@ -3790,7 +3776,7 @@ module Polars
|
|
3790
3776
|
if axis == 0
|
3791
3777
|
_from_rbdf(_df.max)
|
3792
3778
|
elsif axis == 1
|
3793
|
-
Utils.wrap_s(_df.
|
3779
|
+
Utils.wrap_s(_df.max_horizontal)
|
3794
3780
|
else
|
3795
3781
|
raise ArgumentError, "Axis should be 0 or 1."
|
3796
3782
|
end
|
@@ -3822,7 +3808,7 @@ module Polars
|
|
3822
3808
|
if axis == 0
|
3823
3809
|
_from_rbdf(_df.min)
|
3824
3810
|
elsif axis == 1
|
3825
|
-
Utils.wrap_s(_df.
|
3811
|
+
Utils.wrap_s(_df.min_horizontal)
|
3826
3812
|
else
|
3827
3813
|
raise ArgumentError, "Axis should be 0 or 1."
|
3828
3814
|
end
|
@@ -3871,7 +3857,7 @@ module Polars
|
|
3871
3857
|
when 0
|
3872
3858
|
_from_rbdf(_df.sum)
|
3873
3859
|
when 1
|
3874
|
-
Utils.wrap_s(_df.
|
3860
|
+
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
3875
3861
|
else
|
3876
3862
|
raise ArgumentError, "Axis should be 0 or 1."
|
3877
3863
|
end
|
@@ -3909,7 +3895,7 @@ module Polars
|
|
3909
3895
|
when 0
|
3910
3896
|
_from_rbdf(_df.mean)
|
3911
3897
|
when 1
|
3912
|
-
Utils.wrap_s(_df.
|
3898
|
+
Utils.wrap_s(_df.mean_horizontal(null_strategy))
|
3913
3899
|
else
|
3914
3900
|
raise ArgumentError, "Axis should be 0 or 1."
|
3915
3901
|
end
|
@@ -4294,15 +4280,20 @@ module Polars
|
|
4294
4280
|
end
|
4295
4281
|
|
4296
4282
|
if n.nil? && !frac.nil?
|
4283
|
+
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4284
|
+
|
4297
4285
|
_from_rbdf(
|
4298
|
-
_df.sample_frac(frac, with_replacement, shuffle, seed)
|
4286
|
+
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4299
4287
|
)
|
4300
4288
|
end
|
4301
4289
|
|
4302
4290
|
if n.nil?
|
4303
4291
|
n = 1
|
4304
4292
|
end
|
4305
|
-
|
4293
|
+
|
4294
|
+
n = Series.new("", [n]) unless n.is_a?(Series)
|
4295
|
+
|
4296
|
+
_from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
|
4306
4297
|
end
|
4307
4298
|
|
4308
4299
|
# Apply a horizontal reduction on a DataFrame.
|
@@ -4601,7 +4592,7 @@ module Polars
|
|
4601
4592
|
#
|
4602
4593
|
# @example
|
4603
4594
|
# s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
|
4604
|
-
# s.
|
4595
|
+
# s.gather_every(2)
|
4605
4596
|
# # =>
|
4606
4597
|
# # shape: (2, 2)
|
4607
4598
|
# # ┌─────┬─────┐
|
@@ -4612,9 +4603,10 @@ module Polars
|
|
4612
4603
|
# # │ 1 ┆ 5 │
|
4613
4604
|
# # │ 3 ┆ 7 │
|
4614
4605
|
# # └─────┴─────┘
|
4615
|
-
def
|
4616
|
-
select(Utils.col("*").
|
4606
|
+
def gather_every(n)
|
4607
|
+
select(Utils.col("*").gather_every(n))
|
4617
4608
|
end
|
4609
|
+
alias_method :take_every, :gather_every
|
4618
4610
|
|
4619
4611
|
# Hash and combine the rows in this DataFrame.
|
4620
4612
|
#
|
@@ -4671,16 +4663,16 @@ module Polars
|
|
4671
4663
|
# df.interpolate
|
4672
4664
|
# # =>
|
4673
4665
|
# # shape: (4, 3)
|
4674
|
-
# #
|
4675
|
-
# # │ foo
|
4676
|
-
# # │ ---
|
4677
|
-
# # │
|
4678
|
-
# #
|
4679
|
-
# # │ 1
|
4680
|
-
# # │ 5
|
4681
|
-
# # │ 9
|
4682
|
-
# # │ 10
|
4683
|
-
# #
|
4666
|
+
# # ┌──────┬──────┬──────────┐
|
4667
|
+
# # │ foo ┆ bar ┆ baz │
|
4668
|
+
# # │ --- ┆ --- ┆ --- │
|
4669
|
+
# # │ f64 ┆ f64 ┆ f64 │
|
4670
|
+
# # ╞══════╪══════╪══════════╡
|
4671
|
+
# # │ 1.0 ┆ 6.0 ┆ 1.0 │
|
4672
|
+
# # │ 5.0 ┆ 7.0 ┆ 3.666667 │
|
4673
|
+
# # │ 9.0 ┆ 9.0 ┆ 6.333333 │
|
4674
|
+
# # │ 10.0 ┆ null ┆ 9.0 │
|
4675
|
+
# # └──────┴──────┴──────────┘
|
4684
4676
|
def interpolate
|
4685
4677
|
select(Utils.col("*").interpolate)
|
4686
4678
|
end
|
@@ -4952,8 +4944,8 @@ module Polars
|
|
4952
4944
|
[lookup[col[0]] || col[0], col[1]]
|
4953
4945
|
end
|
4954
4946
|
|
4955
|
-
if schema_overrides
|
4956
|
-
|
4947
|
+
if schema_overrides && schema_overrides.any?
|
4948
|
+
column_dtypes.merge!(schema_overrides)
|
4957
4949
|
end
|
4958
4950
|
|
4959
4951
|
column_dtypes.each do |col, dtype|
|
@@ -5056,13 +5048,54 @@ module Polars
|
|
5056
5048
|
return rbdf
|
5057
5049
|
elsif data[0].is_a?(::Array)
|
5058
5050
|
if orient.nil? && !columns.nil?
|
5059
|
-
|
5051
|
+
first_element = data[0]
|
5052
|
+
row_types = first_element.filter_map { |value| value.class }.uniq
|
5053
|
+
if row_types.include?(Integer) && row_types.include?(Float)
|
5054
|
+
row_types.delete(Integer)
|
5055
|
+
end
|
5056
|
+
orient = row_types.length == 1 ? "col" : "row"
|
5060
5057
|
end
|
5061
5058
|
|
5062
5059
|
if orient == "row"
|
5063
|
-
|
5060
|
+
column_names, schema_overrides = _unpack_schema(
|
5061
|
+
schema, schema_overrides: schema_overrides, n_expected: first_element.length
|
5062
|
+
)
|
5063
|
+
local_schema_override = (
|
5064
|
+
schema_overrides.any? ? (raise Todo) : {}
|
5065
|
+
)
|
5066
|
+
if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
|
5067
|
+
raise ArgumentError, "the row data does not match the number of columns"
|
5068
|
+
end
|
5069
|
+
|
5070
|
+
unpack_nested = false
|
5071
|
+
local_schema_override.each do |col, tp|
|
5072
|
+
raise Todo
|
5073
|
+
end
|
5074
|
+
|
5075
|
+
if unpack_nested
|
5076
|
+
raise Todo
|
5077
|
+
else
|
5078
|
+
rbdf = RbDataFrame.read_rows(
|
5079
|
+
data,
|
5080
|
+
infer_schema_length,
|
5081
|
+
local_schema_override.any? ? local_schema_override : nil
|
5082
|
+
)
|
5083
|
+
end
|
5084
|
+
if column_names.any? || schema_overrides.any?
|
5085
|
+
rbdf = _post_apply_columns(
|
5086
|
+
rbdf, column_names, schema_overrides: schema_overrides
|
5087
|
+
)
|
5088
|
+
end
|
5089
|
+
return rbdf
|
5064
5090
|
elsif orient == "col" || orient.nil?
|
5065
|
-
|
5091
|
+
column_names, schema_overrides = _unpack_schema(
|
5092
|
+
schema, schema_overrides: schema_overrides, n_expected: data.length
|
5093
|
+
)
|
5094
|
+
data_series =
|
5095
|
+
data.map.with_index do |element, i|
|
5096
|
+
Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
|
5097
|
+
end
|
5098
|
+
return RbDataFrame.new(data_series)
|
5066
5099
|
else
|
5067
5100
|
raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
|
5068
5101
|
end
|
@@ -5108,10 +5141,10 @@ module Polars
|
|
5108
5141
|
|
5109
5142
|
def _compare_to_other_df(other, op)
|
5110
5143
|
if columns != other.columns
|
5111
|
-
raise
|
5144
|
+
raise ArgumentError, "DataFrame columns do not match"
|
5112
5145
|
end
|
5113
5146
|
if shape != other.shape
|
5114
|
-
raise
|
5147
|
+
raise ArgumentError, "DataFrame dimensions do not match"
|
5115
5148
|
end
|
5116
5149
|
|
5117
5150
|
suffix = "__POLARS_CMP_OTHER"
|
@@ -97,15 +97,20 @@ module Polars
|
|
97
97
|
# # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
|
98
98
|
# # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
|
99
99
|
# # └─────────────────────┴─────────────────────┘
|
100
|
-
def truncate(every, offset: nil)
|
100
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
101
101
|
if offset.nil?
|
102
102
|
offset = "0ns"
|
103
103
|
end
|
104
104
|
|
105
|
+
if !every.is_a?(Expr)
|
106
|
+
every = Utils._timedelta_to_pl_duration(every)
|
107
|
+
end
|
108
|
+
every = Utils.parse_as_expression(every, str_as_lit: true)
|
109
|
+
|
105
110
|
Utils.wrap_expr(
|
106
111
|
_rbexpr.dt_truncate(
|
107
|
-
|
108
|
-
Utils._timedelta_to_pl_duration(offset)
|
112
|
+
every,
|
113
|
+
Utils._timedelta_to_pl_duration(offset),
|
109
114
|
)
|
110
115
|
)
|
111
116
|
end
|
@@ -1026,21 +1031,10 @@ module Polars
|
|
1026
1031
|
# Time zone for the `Datetime` Series.
|
1027
1032
|
#
|
1028
1033
|
# @return [Expr]
|
1029
|
-
def replace_time_zone(tz, use_earliest: nil)
|
1030
|
-
Utils.
|
1031
|
-
|
1032
|
-
|
1033
|
-
# Localize tz-naive Datetime Series to tz-aware Datetime Series.
|
1034
|
-
#
|
1035
|
-
# This method takes a naive Datetime Series and makes this time zone aware.
|
1036
|
-
# It does not move the time to another time zone.
|
1037
|
-
#
|
1038
|
-
# @param tz [String]
|
1039
|
-
# Time zone for the `Datetime` Series.
|
1040
|
-
#
|
1041
|
-
# @return [Expr]
|
1042
|
-
def tz_localize(tz)
|
1043
|
-
Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
|
1034
|
+
def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
|
1035
|
+
ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
|
1036
|
+
ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
|
1037
|
+
Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
|
1044
1038
|
end
|
1045
1039
|
|
1046
1040
|
# Extract the days from a Duration type.
|
@@ -1348,6 +1342,7 @@ module Polars
|
|
1348
1342
|
# # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
|
1349
1343
|
# # └─────────────────────┴─────────────────────┘
|
1350
1344
|
def offset_by(by)
|
1345
|
+
by = Utils.parse_as_expression(by, str_as_lit: true)
|
1351
1346
|
Utils.wrap_expr(_rbexpr.dt_offset_by(by))
|
1352
1347
|
end
|
1353
1348
|
|
@@ -23,18 +23,8 @@ module Polars
|
|
23
23
|
# @return [Object]
|
24
24
|
#
|
25
25
|
# @example
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# # shape: (3,)
|
29
|
-
# # Series: '' [datetime[μs]]
|
30
|
-
# # [
|
31
|
-
# # 2001-01-01 00:00:00
|
32
|
-
# # 2001-01-02 00:00:00
|
33
|
-
# # 2001-01-03 00:00:00
|
34
|
-
# # ]
|
35
|
-
#
|
36
|
-
# @example
|
37
|
-
# date.dt.min
|
26
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
27
|
+
# s.dt.min
|
38
28
|
# # => 2001-01-01 00:00:00 UTC
|
39
29
|
def min
|
40
30
|
Utils.wrap_s(_s).min
|
@@ -45,18 +35,8 @@ module Polars
|
|
45
35
|
# @return [Object]
|
46
36
|
#
|
47
37
|
# @example
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# # shape: (3,)
|
51
|
-
# # Series: '' [datetime[μs]]
|
52
|
-
# # [
|
53
|
-
# # 2001-01-01 00:00:00
|
54
|
-
# # 2001-01-02 00:00:00
|
55
|
-
# # 2001-01-03 00:00:00
|
56
|
-
# # ]
|
57
|
-
#
|
58
|
-
# @example
|
59
|
-
# date.dt.max
|
38
|
+
# s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
|
39
|
+
# s.dt.max
|
60
40
|
# # => 2001-01-03 00:00:00 UTC
|
61
41
|
def max
|
62
42
|
Utils.wrap_s(_s).max
|
@@ -1400,7 +1380,7 @@ module Polars
|
|
1400
1380
|
# # 2001-01-01 00:30:00
|
1401
1381
|
# # 2001-01-01 01:00:00
|
1402
1382
|
# # ]
|
1403
|
-
def truncate(every, offset: nil)
|
1383
|
+
def truncate(every, offset: nil, use_earliest: nil)
|
1404
1384
|
super
|
1405
1385
|
end
|
1406
1386
|
|
@@ -2,7 +2,7 @@ module Polars
|
|
2
2
|
# A dynamic grouper.
|
3
3
|
#
|
4
4
|
# This has an `.agg` method which allows you to run all polars expressions in a
|
5
|
-
#
|
5
|
+
# group by context.
|
6
6
|
class DynamicGroupBy
|
7
7
|
def initialize(
|
8
8
|
df,
|
@@ -34,7 +34,7 @@ module Polars
|
|
34
34
|
|
35
35
|
def agg(aggs)
|
36
36
|
@df.lazy
|
37
|
-
.
|
37
|
+
.group_by_dynamic(
|
38
38
|
@time_column,
|
39
39
|
every: @every,
|
40
40
|
period: @period,
|