polars-df 0.6.0-x86_64-darwin → 0.7.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,15 +20,9 @@ module Polars
20
20
  # this does not yield conclusive results, column orientation is used.
21
21
  def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
22
  schema ||= columns
23
- raise Todo if schema_overrides
24
23
 
25
- # TODO deprecate in favor of read_sql
26
24
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
27
- result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
28
- data = {}
29
- result.columns.each_with_index do |k, i|
30
- data[k] = result.rows.map { |r| r[i] }
31
- end
25
+ raise ArgumentError, "Use read_database instead"
32
26
  end
33
27
 
34
28
  if data.nil?
@@ -905,6 +899,7 @@ module Polars
905
899
  def write_csv(
906
900
  file = nil,
907
901
  has_header: true,
902
+ include_header: nil,
908
903
  sep: ",",
909
904
  quote: '"',
910
905
  batch_size: 1024,
@@ -914,6 +909,8 @@ module Polars
914
909
  float_precision: nil,
915
910
  null_value: nil
916
911
  )
912
+ include_header = has_header if include_header.nil?
913
+
917
914
  if sep.length > 1
918
915
  raise ArgumentError, "only single byte separator is allowed"
919
916
  elsif quote.length > 1
@@ -927,7 +924,7 @@ module Polars
927
924
  buffer.set_encoding(Encoding::BINARY)
928
925
  _df.write_csv(
929
926
  buffer,
930
- has_header,
927
+ include_header,
931
928
  sep.ord,
932
929
  quote.ord,
933
930
  batch_size,
@@ -946,7 +943,7 @@ module Polars
946
943
 
947
944
  _df.write_csv(
948
945
  file,
949
- has_header,
946
+ include_header,
950
947
  sep.ord,
951
948
  quote.ord,
952
949
  batch_size,
@@ -1151,22 +1148,8 @@ module Polars
1151
1148
  # # │ b ┆ 1 ┆ 2 ┆ 3 │
1152
1149
  # # └─────┴─────┴─────┴─────┘
1153
1150
  def transpose(include_header: false, header_name: "column", column_names: nil)
1154
- df = _from_rbdf(_df.transpose(include_header, header_name))
1155
- if !column_names.nil?
1156
- names = []
1157
- n = df.width
1158
- if include_header
1159
- names << header_name
1160
- n -= 1
1161
- end
1162
-
1163
- column_names = column_names.each
1164
- n.times do
1165
- names << column_names.next
1166
- end
1167
- df.columns = names
1168
- end
1169
- df
1151
+ keep_names_as = include_header ? header_name : nil
1152
+ _from_rbdf(_df.transpose(keep_names_as, column_names))
1170
1153
  end
1171
1154
 
1172
1155
  # Reverse the DataFrame.
@@ -1811,13 +1794,13 @@ module Polars
1811
1794
  _from_rbdf(_df.with_row_count(name, offset))
1812
1795
  end
1813
1796
 
1814
- # Start a groupby operation.
1797
+ # Start a group by operation.
1815
1798
  #
1816
1799
  # @param by [Object]
1817
1800
  # Column(s) to group by.
1818
1801
  # @param maintain_order [Boolean]
1819
1802
  # Make sure that the order of the groups remain consistent. This is more
1820
- # expensive than a default groupby. Note that this only works in expression
1803
+ # expensive than a default group by. Note that this only works in expression
1821
1804
  # aggregations.
1822
1805
  #
1823
1806
  # @return [GroupBy]
@@ -1830,7 +1813,7 @@ module Polars
1830
1813
  # "c" => [6, 5, 4, 3, 2, 1]
1831
1814
  # }
1832
1815
  # )
1833
- # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1816
+ # df.group_by("a").agg(Polars.col("b").sum).sort("a")
1834
1817
  # # =>
1835
1818
  # # shape: (3, 2)
1836
1819
  # # ┌─────┬─────┐
@@ -1842,25 +1825,26 @@ module Polars
1842
1825
  # # │ b ┆ 11 │
1843
1826
  # # │ c ┆ 6 │
1844
1827
  # # └─────┴─────┘
1845
- def groupby(by, maintain_order: false)
1828
+ def group_by(by, maintain_order: false)
1846
1829
  if !Utils.bool?(maintain_order)
1847
- raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1830
+ raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
1848
1831
  end
1849
1832
  GroupBy.new(
1850
- _df,
1833
+ self,
1851
1834
  by,
1852
- self.class,
1853
1835
  maintain_order: maintain_order
1854
1836
  )
1855
1837
  end
1838
+ alias_method :groupby, :group_by
1839
+ alias_method :group, :group_by
1856
1840
 
1857
1841
  # Create rolling groups based on a time column.
1858
1842
  #
1859
1843
  # Also works for index values of type `:i32` or `:i64`.
1860
1844
  #
1861
- # Different from a `dynamic_groupby` the windows are now determined by the
1845
+ # Different from a `dynamic_group_by` the windows are now determined by the
1862
1846
  # individual values and are not of constant intervals. For constant intervals use
1863
- # *groupby_dynamic*
1847
+ # *group_by_dynamic*
1864
1848
  #
1865
1849
  # The `period` and `offset` arguments are created either from a timedelta, or
1866
1850
  # by using the following string language:
@@ -1880,7 +1864,7 @@ module Polars
1880
1864
  # Or combine them:
1881
1865
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1882
1866
  #
1883
- # In case of a groupby_rolling on an integer column, the windows are defined by:
1867
+ # In case of a group_by_rolling on an integer column, the windows are defined by:
1884
1868
  #
1885
1869
  # - **"1i" # length 1**
1886
1870
  # - **"10i" # length 10**
@@ -1891,7 +1875,7 @@ module Polars
1891
1875
  # This column must be sorted in ascending order. If not the output will not
1892
1876
  # make sense.
1893
1877
  #
1894
- # In case of a rolling groupby on indices, dtype needs to be one of
1878
+ # In case of a rolling group by on indices, dtype needs to be one of
1895
1879
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1896
1880
  # performance matters use an `:i64` column.
1897
1881
  # @param period [Object]
@@ -1923,7 +1907,7 @@ module Polars
1923
1907
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1924
1908
  # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
1925
1909
  # )
1926
- # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1910
+ # df.group_by_rolling(index_column: "dt", period: "2d").agg(
1927
1911
  # [
1928
1912
  # Polars.sum("a").alias("sum_a"),
1929
1913
  # Polars.min("a").alias("min_a"),
@@ -1944,7 +1928,7 @@ module Polars
1944
1928
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
1945
1929
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
1946
1930
  # # └─────────────────────┴───────┴───────┴───────┘
1947
- def groupby_rolling(
1931
+ def group_by_rolling(
1948
1932
  index_column:,
1949
1933
  period:,
1950
1934
  offset: nil,
@@ -1954,11 +1938,12 @@ module Polars
1954
1938
  )
1955
1939
  RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
1956
1940
  end
1941
+ alias_method :groupby_rolling, :group_by_rolling
1957
1942
 
1958
1943
  # Group based on a time value (or index value of type `:i32`, `:i64`).
1959
1944
  #
1960
1945
  # Time windows are calculated and rows are assigned to windows. Different from a
1961
- # normal groupby is that a row can be member of multiple groups. The time/index
1946
+ # normal group by is that a row can be member of multiple groups. The time/index
1962
1947
  # window could be seen as a rolling window, with a window size determined by
1963
1948
  # dates/times/values instead of slots in the DataFrame.
1964
1949
  #
@@ -1986,7 +1971,7 @@ module Polars
1986
1971
  # Or combine them:
1987
1972
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1988
1973
  #
1989
- # In case of a groupby_dynamic on an integer column, the windows are defined by:
1974
+ # In case of a group_by_dynamic on an integer column, the windows are defined by:
1990
1975
  #
1991
1976
  # - "1i" # length 1
1992
1977
  # - "10i" # length 10
@@ -1997,7 +1982,7 @@ module Polars
1997
1982
  # This column must be sorted in ascending order. If not the output will not
1998
1983
  # make sense.
1999
1984
  #
2000
- # In case of a dynamic groupby on indices, dtype needs to be one of
1985
+ # In case of a dynamic group by on indices, dtype needs to be one of
2001
1986
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
2002
1987
  # performance matters use an `:i64` column.
2003
1988
  # @param every
@@ -2048,7 +2033,7 @@ module Polars
2048
2033
  # # └─────────────────────┴─────┘
2049
2034
  #
2050
2035
  # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
2051
- # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
2036
+ # df.group_by_dynamic("time", every: "1h", closed: "right").agg(
2052
2037
  # [
2053
2038
  # Polars.col("time").min.alias("time_min"),
2054
2039
  # Polars.col("time").max.alias("time_max")
@@ -2068,7 +2053,7 @@ module Polars
2068
2053
  # # └─────────────────────┴─────────────────────┴─────────────────────┘
2069
2054
  #
2070
2055
  # @example The window boundaries can also be added to the aggregation result.
2071
- # df.groupby_dynamic(
2056
+ # df.group_by_dynamic(
2072
2057
  # "time", every: "1h", include_boundaries: true, closed: "right"
2073
2058
  # ).agg([Polars.col("time").count.alias("time_count")])
2074
2059
  # # =>
@@ -2085,7 +2070,7 @@ module Polars
2085
2070
  # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2086
2071
  #
2087
2072
  # @example When closed="left", should not include right end of interval.
2088
- # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
2073
+ # df.group_by_dynamic("time", every: "1h", closed: "left").agg(
2089
2074
  # [
2090
2075
  # Polars.col("time").count.alias("time_count"),
2091
2076
  # Polars.col("time").alias("time_agg_list")
@@ -2105,7 +2090,7 @@ module Polars
2105
2090
  # # └─────────────────────┴────────────┴───────────────────────────────────┘
2106
2091
  #
2107
2092
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
2108
- # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
2093
+ # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
2109
2094
  # [Polars.col("time").count.alias("time_count")]
2110
2095
  # )
2111
2096
  # # =>
@@ -2122,7 +2107,7 @@ module Polars
2122
2107
  # # │ 2021-12-16 03:00:00 ┆ 1 │
2123
2108
  # # └─────────────────────┴────────────┘
2124
2109
  #
2125
- # @example Dynamic groupbys can also be combined with grouping on normal keys.
2110
+ # @example Dynamic group bys can also be combined with grouping on normal keys.
2126
2111
  # df = Polars::DataFrame.new(
2127
2112
  # {
2128
2113
  # "time" => Polars.date_range(
@@ -2133,7 +2118,7 @@ module Polars
2133
2118
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
2134
2119
  # }
2135
2120
  # )
2136
- # df.groupby_dynamic(
2121
+ # df.group_by_dynamic(
2137
2122
  # "time",
2138
2123
  # every: "1h",
2139
2124
  # closed: "both",
@@ -2156,14 +2141,14 @@ module Polars
2156
2141
  # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
2157
2142
  # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2158
2143
  #
2159
- # @example Dynamic groupby on an index column.
2144
+ # @example Dynamic group by on an index column.
2160
2145
  # df = Polars::DataFrame.new(
2161
2146
  # {
2162
2147
  # "idx" => Polars.arange(0, 6, eager: true),
2163
2148
  # "A" => ["A", "A", "B", "B", "B", "C"]
2164
2149
  # }
2165
2150
  # )
2166
- # df.groupby_dynamic(
2151
+ # df.group_by_dynamic(
2167
2152
  # "idx",
2168
2153
  # every: "2i",
2169
2154
  # period: "3i",
@@ -2181,7 +2166,7 @@ module Polars
2181
2166
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
2182
2167
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
2183
2168
  # # └─────────────────┴─────────────────┴─────┴─────────────────┘
2184
- def groupby_dynamic(
2169
+ def group_by_dynamic(
2185
2170
  index_column,
2186
2171
  every:,
2187
2172
  period: nil,
@@ -2205,6 +2190,7 @@ module Polars
2205
2190
  start_by
2206
2191
  )
2207
2192
  end
2193
+ alias_method :groupby_dynamic, :group_by_dynamic
2208
2194
 
2209
2195
  # Upsample a DataFrame at a regular frequency.
2210
2196
  #
@@ -3464,8 +3450,10 @@ module Polars
3464
3450
 
3465
3451
  # Shift values by the given period.
3466
3452
  #
3467
- # @param periods [Integer]
3453
+ # @param n [Integer]
3468
3454
  # Number of places to shift (may be negative).
3455
+ # @param fill_value [Object]
3456
+ # Fill the resulting null values with this value.
3469
3457
  #
3470
3458
  # @return [DataFrame]
3471
3459
  #
@@ -3503,8 +3491,8 @@ module Polars
3503
3491
  # # │ 3 ┆ 8 ┆ c │
3504
3492
  # # │ null ┆ null ┆ null │
3505
3493
  # # └──────┴──────┴──────┘
3506
- def shift(periods)
3507
- _from_rbdf(_df.shift(periods))
3494
+ def shift(n, fill_value: nil)
3495
+ lazy.shift(n, fill_value: fill_value).collect(_eager: true)
3508
3496
  end
3509
3497
 
3510
3498
  # Shift the values by a given period and fill the resulting null values.
@@ -3537,9 +3525,7 @@ module Polars
3537
3525
  # # │ 2 ┆ 7 ┆ b │
3538
3526
  # # └─────┴─────┴─────┘
3539
3527
  def shift_and_fill(periods, fill_value)
3540
- lazy
3541
- .shift_and_fill(periods, fill_value)
3542
- .collect(no_optimization: true, string_cache: false)
3528
+ shift(periods, fill_value: fill_value)
3543
3529
  end
3544
3530
 
3545
3531
  # Get a mask of all duplicated rows in this DataFrame.
@@ -3790,7 +3776,7 @@ module Polars
3790
3776
  if axis == 0
3791
3777
  _from_rbdf(_df.max)
3792
3778
  elsif axis == 1
3793
- Utils.wrap_s(_df.hmax)
3779
+ Utils.wrap_s(_df.max_horizontal)
3794
3780
  else
3795
3781
  raise ArgumentError, "Axis should be 0 or 1."
3796
3782
  end
@@ -3822,7 +3808,7 @@ module Polars
3822
3808
  if axis == 0
3823
3809
  _from_rbdf(_df.min)
3824
3810
  elsif axis == 1
3825
- Utils.wrap_s(_df.hmin)
3811
+ Utils.wrap_s(_df.min_horizontal)
3826
3812
  else
3827
3813
  raise ArgumentError, "Axis should be 0 or 1."
3828
3814
  end
@@ -3871,7 +3857,7 @@ module Polars
3871
3857
  when 0
3872
3858
  _from_rbdf(_df.sum)
3873
3859
  when 1
3874
- Utils.wrap_s(_df.hsum(null_strategy))
3860
+ Utils.wrap_s(_df.sum_horizontal(null_strategy))
3875
3861
  else
3876
3862
  raise ArgumentError, "Axis should be 0 or 1."
3877
3863
  end
@@ -3909,7 +3895,7 @@ module Polars
3909
3895
  when 0
3910
3896
  _from_rbdf(_df.mean)
3911
3897
  when 1
3912
- Utils.wrap_s(_df.hmean(null_strategy))
3898
+ Utils.wrap_s(_df.mean_horizontal(null_strategy))
3913
3899
  else
3914
3900
  raise ArgumentError, "Axis should be 0 or 1."
3915
3901
  end
@@ -4294,15 +4280,20 @@ module Polars
4294
4280
  end
4295
4281
 
4296
4282
  if n.nil? && !frac.nil?
4283
+ frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4284
+
4297
4285
  _from_rbdf(
4298
- _df.sample_frac(frac, with_replacement, shuffle, seed)
4286
+ _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4299
4287
  )
4300
4288
  end
4301
4289
 
4302
4290
  if n.nil?
4303
4291
  n = 1
4304
4292
  end
4305
- _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
4293
+
4294
+ n = Series.new("", [n]) unless n.is_a?(Series)
4295
+
4296
+ _from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
4306
4297
  end
4307
4298
 
4308
4299
  # Apply a horizontal reduction on a DataFrame.
@@ -4601,7 +4592,7 @@ module Polars
4601
4592
  #
4602
4593
  # @example
4603
4594
  # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
4604
- # s.take_every(2)
4595
+ # s.gather_every(2)
4605
4596
  # # =>
4606
4597
  # # shape: (2, 2)
4607
4598
  # # ┌─────┬─────┐
@@ -4612,9 +4603,10 @@ module Polars
4612
4603
  # # │ 1 ┆ 5 │
4613
4604
  # # │ 3 ┆ 7 │
4614
4605
  # # └─────┴─────┘
4615
- def take_every(n)
4616
- select(Utils.col("*").take_every(n))
4606
+ def gather_every(n)
4607
+ select(Utils.col("*").gather_every(n))
4617
4608
  end
4609
+ alias_method :take_every, :gather_every
4618
4610
 
4619
4611
  # Hash and combine the rows in this DataFrame.
4620
4612
  #
@@ -4671,16 +4663,16 @@ module Polars
4671
4663
  # df.interpolate
4672
4664
  # # =>
4673
4665
  # # shape: (4, 3)
4674
- # # ┌─────┬──────┬─────┐
4675
- # # │ foo ┆ bar ┆ baz
4676
- # # │ --- ┆ --- ┆ ---
4677
- # # │ i64 i64i64
4678
- # # ╞═════╪══════╪═════╡
4679
- # # │ 1 ┆ 6 ┆ 1
4680
- # # │ 5 ┆ 7 ┆ 3
4681
- # # │ 9 ┆ 9 ┆ 6
4682
- # # │ 10 ┆ null ┆ 9
4683
- # # └─────┴──────┴─────┘
4666
+ # # ┌──────┬──────┬──────────┐
4667
+ # # │ foo ┆ bar ┆ baz
4668
+ # # │ --- ┆ --- ┆ ---
4669
+ # # │ f64 f64f64
4670
+ # # ╞══════╪══════╪══════════╡
4671
+ # # │ 1.0 ┆ 6.0 ┆ 1.0
4672
+ # # │ 5.0 ┆ 7.0 ┆ 3.666667
4673
+ # # │ 9.0 ┆ 9.0 ┆ 6.333333
4674
+ # # │ 10.0 ┆ null ┆ 9.0
4675
+ # # └──────┴──────┴──────────┘
4684
4676
  def interpolate
4685
4677
  select(Utils.col("*").interpolate)
4686
4678
  end
@@ -4952,8 +4944,8 @@ module Polars
4952
4944
  [lookup[col[0]] || col[0], col[1]]
4953
4945
  end
4954
4946
 
4955
- if schema_overrides
4956
- raise Todo
4947
+ if schema_overrides && schema_overrides.any?
4948
+ column_dtypes.merge!(schema_overrides)
4957
4949
  end
4958
4950
 
4959
4951
  column_dtypes.each do |col, dtype|
@@ -5056,13 +5048,54 @@ module Polars
5056
5048
  return rbdf
5057
5049
  elsif data[0].is_a?(::Array)
5058
5050
  if orient.nil? && !columns.nil?
5059
- orient = columns.length == data.length ? "col" : "row"
5051
+ first_element = data[0]
5052
+ row_types = first_element.filter_map { |value| value.class }.uniq
5053
+ if row_types.include?(Integer) && row_types.include?(Float)
5054
+ row_types.delete(Integer)
5055
+ end
5056
+ orient = row_types.length == 1 ? "col" : "row"
5060
5057
  end
5061
5058
 
5062
5059
  if orient == "row"
5063
- raise Todo
5060
+ column_names, schema_overrides = _unpack_schema(
5061
+ schema, schema_overrides: schema_overrides, n_expected: first_element.length
5062
+ )
5063
+ local_schema_override = (
5064
+ schema_overrides.any? ? (raise Todo) : {}
5065
+ )
5066
+ if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
5067
+ raise ArgumentError, "the row data does not match the number of columns"
5068
+ end
5069
+
5070
+ unpack_nested = false
5071
+ local_schema_override.each do |col, tp|
5072
+ raise Todo
5073
+ end
5074
+
5075
+ if unpack_nested
5076
+ raise Todo
5077
+ else
5078
+ rbdf = RbDataFrame.read_rows(
5079
+ data,
5080
+ infer_schema_length,
5081
+ local_schema_override.any? ? local_schema_override : nil
5082
+ )
5083
+ end
5084
+ if column_names.any? || schema_overrides.any?
5085
+ rbdf = _post_apply_columns(
5086
+ rbdf, column_names, schema_overrides: schema_overrides
5087
+ )
5088
+ end
5089
+ return rbdf
5064
5090
  elsif orient == "col" || orient.nil?
5065
- raise Todo
5091
+ column_names, schema_overrides = _unpack_schema(
5092
+ schema, schema_overrides: schema_overrides, n_expected: data.length
5093
+ )
5094
+ data_series =
5095
+ data.map.with_index do |element, i|
5096
+ Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
5097
+ end
5098
+ return RbDataFrame.new(data_series)
5066
5099
  else
5067
5100
  raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
5068
5101
  end
@@ -5108,10 +5141,10 @@ module Polars
5108
5141
 
5109
5142
  def _compare_to_other_df(other, op)
5110
5143
  if columns != other.columns
5111
- raise ArgmentError, "DataFrame columns do not match"
5144
+ raise ArgumentError, "DataFrame columns do not match"
5112
5145
  end
5113
5146
  if shape != other.shape
5114
- raise ArgmentError, "DataFrame dimensions do not match"
5147
+ raise ArgumentError, "DataFrame dimensions do not match"
5115
5148
  end
5116
5149
 
5117
5150
  suffix = "__POLARS_CMP_OTHER"
@@ -97,15 +97,20 @@ module Polars
97
97
  # # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
98
98
  # # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
99
99
  # # └─────────────────────┴─────────────────────┘
100
- def truncate(every, offset: nil)
100
+ def truncate(every, offset: nil, use_earliest: nil)
101
101
  if offset.nil?
102
102
  offset = "0ns"
103
103
  end
104
104
 
105
+ if !every.is_a?(Expr)
106
+ every = Utils._timedelta_to_pl_duration(every)
107
+ end
108
+ every = Utils.parse_as_expression(every, str_as_lit: true)
109
+
105
110
  Utils.wrap_expr(
106
111
  _rbexpr.dt_truncate(
107
- Utils._timedelta_to_pl_duration(every),
108
- Utils._timedelta_to_pl_duration(offset)
112
+ every,
113
+ Utils._timedelta_to_pl_duration(offset),
109
114
  )
110
115
  )
111
116
  end
@@ -1026,21 +1031,10 @@ module Polars
1026
1031
  # Time zone for the `Datetime` Series.
1027
1032
  #
1028
1033
  # @return [Expr]
1029
- def replace_time_zone(tz, use_earliest: nil)
1030
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
1031
- end
1032
-
1033
- # Localize tz-naive Datetime Series to tz-aware Datetime Series.
1034
- #
1035
- # This method takes a naive Datetime Series and makes this time zone aware.
1036
- # It does not move the time to another time zone.
1037
- #
1038
- # @param tz [String]
1039
- # Time zone for the `Datetime` Series.
1040
- #
1041
- # @return [Expr]
1042
- def tz_localize(tz)
1043
- Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
1034
+ def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
1035
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
1036
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
1037
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
1044
1038
  end
1045
1039
 
1046
1040
  # Extract the days from a Duration type.
@@ -1348,6 +1342,7 @@ module Polars
1348
1342
  # # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
1349
1343
  # # └─────────────────────┴─────────────────────┘
1350
1344
  def offset_by(by)
1345
+ by = Utils.parse_as_expression(by, str_as_lit: true)
1351
1346
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1352
1347
  end
1353
1348
 
@@ -23,18 +23,8 @@ module Polars
23
23
  # @return [Object]
24
24
  #
25
25
  # @example
26
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
- # # =>
28
- # # shape: (3,)
29
- # # Series: '' [datetime[μs]]
30
- # # [
31
- # # 2001-01-01 00:00:00
32
- # # 2001-01-02 00:00:00
33
- # # 2001-01-03 00:00:00
34
- # # ]
35
- #
36
- # @example
37
- # date.dt.min
26
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
+ # s.dt.min
38
28
  # # => 2001-01-01 00:00:00 UTC
39
29
  def min
40
30
  Utils.wrap_s(_s).min
@@ -45,18 +35,8 @@ module Polars
45
35
  # @return [Object]
46
36
  #
47
37
  # @example
48
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
49
- # # =>
50
- # # shape: (3,)
51
- # # Series: '' [datetime[μs]]
52
- # # [
53
- # # 2001-01-01 00:00:00
54
- # # 2001-01-02 00:00:00
55
- # # 2001-01-03 00:00:00
56
- # # ]
57
- #
58
- # @example
59
- # date.dt.max
38
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
39
+ # s.dt.max
60
40
  # # => 2001-01-03 00:00:00 UTC
61
41
  def max
62
42
  Utils.wrap_s(_s).max
@@ -1400,7 +1380,7 @@ module Polars
1400
1380
  # # 2001-01-01 00:30:00
1401
1381
  # # 2001-01-01 01:00:00
1402
1382
  # # ]
1403
- def truncate(every, offset: nil)
1383
+ def truncate(every, offset: nil, use_earliest: nil)
1404
1384
  super
1405
1385
  end
1406
1386
 
@@ -2,7 +2,7 @@ module Polars
2
2
  # A dynamic grouper.
3
3
  #
4
4
  # This has an `.agg` method which allows you to run all polars expressions in a
5
- # groupby context.
5
+ # group by context.
6
6
  class DynamicGroupBy
7
7
  def initialize(
8
8
  df,
@@ -34,7 +34,7 @@ module Polars
34
34
 
35
35
  def agg(aggs)
36
36
  @df.lazy
37
- .groupby_dynamic(
37
+ .group_by_dynamic(
38
38
  @time_column,
39
39
  every: @every,
40
40
  period: @period,