polars-df 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -20,15 +20,9 @@ module Polars
20
20
  # this does not yield conclusive results, column orientation is used.
21
21
  def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
22
  schema ||= columns
23
- raise Todo if schema_overrides
24
23
 
25
- # TODO deprecate in favor of read_sql
26
24
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
27
- result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
28
- data = {}
29
- result.columns.each_with_index do |k, i|
30
- data[k] = result.rows.map { |r| r[i] }
31
- end
25
+ raise ArgumentError, "Use read_database instead"
32
26
  end
33
27
 
34
28
  if data.nil?
@@ -905,6 +899,7 @@ module Polars
905
899
  def write_csv(
906
900
  file = nil,
907
901
  has_header: true,
902
+ include_header: nil,
908
903
  sep: ",",
909
904
  quote: '"',
910
905
  batch_size: 1024,
@@ -914,6 +909,8 @@ module Polars
914
909
  float_precision: nil,
915
910
  null_value: nil
916
911
  )
912
+ include_header = has_header if include_header.nil?
913
+
917
914
  if sep.length > 1
918
915
  raise ArgumentError, "only single byte separator is allowed"
919
916
  elsif quote.length > 1
@@ -927,7 +924,7 @@ module Polars
927
924
  buffer.set_encoding(Encoding::BINARY)
928
925
  _df.write_csv(
929
926
  buffer,
930
- has_header,
927
+ include_header,
931
928
  sep.ord,
932
929
  quote.ord,
933
930
  batch_size,
@@ -946,7 +943,7 @@ module Polars
946
943
 
947
944
  _df.write_csv(
948
945
  file,
949
- has_header,
946
+ include_header,
950
947
  sep.ord,
951
948
  quote.ord,
952
949
  batch_size,
@@ -1151,22 +1148,8 @@ module Polars
1151
1148
  # # │ b ┆ 1 ┆ 2 ┆ 3 │
1152
1149
  # # └─────┴─────┴─────┴─────┘
1153
1150
  def transpose(include_header: false, header_name: "column", column_names: nil)
1154
- df = _from_rbdf(_df.transpose(include_header, header_name))
1155
- if !column_names.nil?
1156
- names = []
1157
- n = df.width
1158
- if include_header
1159
- names << header_name
1160
- n -= 1
1161
- end
1162
-
1163
- column_names = column_names.each
1164
- n.times do
1165
- names << column_names.next
1166
- end
1167
- df.columns = names
1168
- end
1169
- df
1151
+ keep_names_as = include_header ? header_name : nil
1152
+ _from_rbdf(_df.transpose(keep_names_as, column_names))
1170
1153
  end
1171
1154
 
1172
1155
  # Reverse the DataFrame.
@@ -1811,13 +1794,13 @@ module Polars
1811
1794
  _from_rbdf(_df.with_row_count(name, offset))
1812
1795
  end
1813
1796
 
1814
- # Start a groupby operation.
1797
+ # Start a group by operation.
1815
1798
  #
1816
1799
  # @param by [Object]
1817
1800
  # Column(s) to group by.
1818
1801
  # @param maintain_order [Boolean]
1819
1802
  # Make sure that the order of the groups remain consistent. This is more
1820
- # expensive than a default groupby. Note that this only works in expression
1803
+ # expensive than a default group by. Note that this only works in expression
1821
1804
  # aggregations.
1822
1805
  #
1823
1806
  # @return [GroupBy]
@@ -1830,7 +1813,7 @@ module Polars
1830
1813
  # "c" => [6, 5, 4, 3, 2, 1]
1831
1814
  # }
1832
1815
  # )
1833
- # df.groupby("a").agg(Polars.col("b").sum).sort("a")
1816
+ # df.group_by("a").agg(Polars.col("b").sum).sort("a")
1834
1817
  # # =>
1835
1818
  # # shape: (3, 2)
1836
1819
  # # ┌─────┬─────┐
@@ -1842,25 +1825,26 @@ module Polars
1842
1825
  # # │ b ┆ 11 │
1843
1826
  # # │ c ┆ 6 │
1844
1827
  # # └─────┴─────┘
1845
- def groupby(by, maintain_order: false)
1828
+ def group_by(by, maintain_order: false)
1846
1829
  if !Utils.bool?(maintain_order)
1847
- raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
1830
+ raise TypeError, "invalid input for group_by arg `maintain_order`: #{maintain_order}."
1848
1831
  end
1849
1832
  GroupBy.new(
1850
- _df,
1833
+ self,
1851
1834
  by,
1852
- self.class,
1853
1835
  maintain_order: maintain_order
1854
1836
  )
1855
1837
  end
1838
+ alias_method :groupby, :group_by
1839
+ alias_method :group, :group_by
1856
1840
 
1857
1841
  # Create rolling groups based on a time column.
1858
1842
  #
1859
1843
  # Also works for index values of type `:i32` or `:i64`.
1860
1844
  #
1861
- # Different from a `dynamic_groupby` the windows are now determined by the
1845
+ # Different from a `dynamic_group_by` the windows are now determined by the
1862
1846
  # individual values and are not of constant intervals. For constant intervals use
1863
- # *groupby_dynamic*
1847
+ # *group_by_dynamic*
1864
1848
  #
1865
1849
  # The `period` and `offset` arguments are created either from a timedelta, or
1866
1850
  # by using the following string language:
@@ -1880,7 +1864,7 @@ module Polars
1880
1864
  # Or combine them:
1881
1865
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1882
1866
  #
1883
- # In case of a groupby_rolling on an integer column, the windows are defined by:
1867
+ # In case of a group_by_rolling on an integer column, the windows are defined by:
1884
1868
  #
1885
1869
  # - **"1i" # length 1**
1886
1870
  # - **"10i" # length 10**
@@ -1891,7 +1875,7 @@ module Polars
1891
1875
  # This column must be sorted in ascending order. If not the output will not
1892
1876
  # make sense.
1893
1877
  #
1894
- # In case of a rolling groupby on indices, dtype needs to be one of
1878
+ # In case of a rolling group by on indices, dtype needs to be one of
1895
1879
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
1896
1880
  # performance matters use an `:i64` column.
1897
1881
  # @param period [Object]
@@ -1923,7 +1907,7 @@ module Polars
1923
1907
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1924
1908
  # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
1925
1909
  # )
1926
- # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1910
+ # df.group_by_rolling(index_column: "dt", period: "2d").agg(
1927
1911
  # [
1928
1912
  # Polars.sum("a").alias("sum_a"),
1929
1913
  # Polars.min("a").alias("min_a"),
@@ -1944,7 +1928,7 @@ module Polars
1944
1928
  # # │ 2020-01-03 19:45:32 ┆ 11 ┆ 2 ┆ 9 │
1945
1929
  # # │ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
1946
1930
  # # └─────────────────────┴───────┴───────┴───────┘
1947
- def groupby_rolling(
1931
+ def group_by_rolling(
1948
1932
  index_column:,
1949
1933
  period:,
1950
1934
  offset: nil,
@@ -1954,11 +1938,12 @@ module Polars
1954
1938
  )
1955
1939
  RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
1956
1940
  end
1941
+ alias_method :groupby_rolling, :group_by_rolling
1957
1942
 
1958
1943
  # Group based on a time value (or index value of type `:i32`, `:i64`).
1959
1944
  #
1960
1945
  # Time windows are calculated and rows are assigned to windows. Different from a
1961
- # normal groupby is that a row can be member of multiple groups. The time/index
1946
+ # normal group by is that a row can be member of multiple groups. The time/index
1962
1947
  # window could be seen as a rolling window, with a window size determined by
1963
1948
  # dates/times/values instead of slots in the DataFrame.
1964
1949
  #
@@ -1986,7 +1971,7 @@ module Polars
1986
1971
  # Or combine them:
1987
1972
  # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
1988
1973
  #
1989
- # In case of a groupby_dynamic on an integer column, the windows are defined by:
1974
+ # In case of a group_by_dynamic on an integer column, the windows are defined by:
1990
1975
  #
1991
1976
  # - "1i" # length 1
1992
1977
  # - "10i" # length 10
@@ -1997,7 +1982,7 @@ module Polars
1997
1982
  # This column must be sorted in ascending order. If not the output will not
1998
1983
  # make sense.
1999
1984
  #
2000
- # In case of a dynamic groupby on indices, dtype needs to be one of
1985
+ # In case of a dynamic group by on indices, dtype needs to be one of
2001
1986
  # `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
2002
1987
  # performance matters use an `:i64` column.
2003
1988
  # @param every
@@ -2048,7 +2033,7 @@ module Polars
2048
2033
  # # └─────────────────────┴─────┘
2049
2034
  #
2050
2035
  # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
2051
- # df.groupby_dynamic("time", every: "1h", closed: "right").agg(
2036
+ # df.group_by_dynamic("time", every: "1h", closed: "right").agg(
2052
2037
  # [
2053
2038
  # Polars.col("time").min.alias("time_min"),
2054
2039
  # Polars.col("time").max.alias("time_max")
@@ -2068,7 +2053,7 @@ module Polars
2068
2053
  # # └─────────────────────┴─────────────────────┴─────────────────────┘
2069
2054
  #
2070
2055
  # @example The window boundaries can also be added to the aggregation result.
2071
- # df.groupby_dynamic(
2056
+ # df.group_by_dynamic(
2072
2057
  # "time", every: "1h", include_boundaries: true, closed: "right"
2073
2058
  # ).agg([Polars.col("time").count.alias("time_count")])
2074
2059
  # # =>
@@ -2085,7 +2070,7 @@ module Polars
2085
2070
  # # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2086
2071
  #
2087
2072
  # @example When closed="left", should not include right end of interval.
2088
- # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
2073
+ # df.group_by_dynamic("time", every: "1h", closed: "left").agg(
2089
2074
  # [
2090
2075
  # Polars.col("time").count.alias("time_count"),
2091
2076
  # Polars.col("time").alias("time_agg_list")
@@ -2105,7 +2090,7 @@ module Polars
2105
2090
  # # └─────────────────────┴────────────┴───────────────────────────────────┘
2106
2091
  #
2107
2092
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
2108
- # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
2093
+ # df.group_by_dynamic("time", every: "1h", closed: "both").agg(
2109
2094
  # [Polars.col("time").count.alias("time_count")]
2110
2095
  # )
2111
2096
  # # =>
@@ -2122,7 +2107,7 @@ module Polars
2122
2107
  # # │ 2021-12-16 03:00:00 ┆ 1 │
2123
2108
  # # └─────────────────────┴────────────┘
2124
2109
  #
2125
- # @example Dynamic groupbys can also be combined with grouping on normal keys.
2110
+ # @example Dynamic group bys can also be combined with grouping on normal keys.
2126
2111
  # df = Polars::DataFrame.new(
2127
2112
  # {
2128
2113
  # "time" => Polars.date_range(
@@ -2133,7 +2118,7 @@ module Polars
2133
2118
  # "groups" => ["a", "a", "a", "b", "b", "a", "a"]
2134
2119
  # }
2135
2120
  # )
2136
- # df.groupby_dynamic(
2121
+ # df.group_by_dynamic(
2137
2122
  # "time",
2138
2123
  # every: "1h",
2139
2124
  # closed: "both",
@@ -2156,14 +2141,14 @@ module Polars
2156
2141
  # # │ b ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1 │
2157
2142
  # # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
2158
2143
  #
2159
- # @example Dynamic groupby on an index column.
2144
+ # @example Dynamic group by on an index column.
2160
2145
  # df = Polars::DataFrame.new(
2161
2146
  # {
2162
2147
  # "idx" => Polars.arange(0, 6, eager: true),
2163
2148
  # "A" => ["A", "A", "B", "B", "B", "C"]
2164
2149
  # }
2165
2150
  # )
2166
- # df.groupby_dynamic(
2151
+ # df.group_by_dynamic(
2167
2152
  # "idx",
2168
2153
  # every: "2i",
2169
2154
  # period: "3i",
@@ -2181,7 +2166,7 @@ module Polars
2181
2166
  # # │ 2 ┆ 5 ┆ 2 ┆ ["B", "B", "C"] │
2182
2167
  # # │ 4 ┆ 7 ┆ 4 ┆ ["C"] │
2183
2168
  # # └─────────────────┴─────────────────┴─────┴─────────────────┘
2184
- def groupby_dynamic(
2169
+ def group_by_dynamic(
2185
2170
  index_column,
2186
2171
  every:,
2187
2172
  period: nil,
@@ -2205,6 +2190,7 @@ module Polars
2205
2190
  start_by
2206
2191
  )
2207
2192
  end
2193
+ alias_method :groupby_dynamic, :group_by_dynamic
2208
2194
 
2209
2195
  # Upsample a DataFrame at a regular frequency.
2210
2196
  #
@@ -3464,8 +3450,10 @@ module Polars
3464
3450
 
3465
3451
  # Shift values by the given period.
3466
3452
  #
3467
- # @param periods [Integer]
3453
+ # @param n [Integer]
3468
3454
  # Number of places to shift (may be negative).
3455
+ # @param fill_value [Object]
3456
+ # Fill the resulting null values with this value.
3469
3457
  #
3470
3458
  # @return [DataFrame]
3471
3459
  #
@@ -3503,8 +3491,8 @@ module Polars
3503
3491
  # # │ 3 ┆ 8 ┆ c │
3504
3492
  # # │ null ┆ null ┆ null │
3505
3493
  # # └──────┴──────┴──────┘
3506
- def shift(periods)
3507
- _from_rbdf(_df.shift(periods))
3494
+ def shift(n, fill_value: nil)
3495
+ lazy.shift(n, fill_value: fill_value).collect(_eager: true)
3508
3496
  end
3509
3497
 
3510
3498
  # Shift the values by a given period and fill the resulting null values.
@@ -3537,9 +3525,7 @@ module Polars
3537
3525
  # # │ 2 ┆ 7 ┆ b │
3538
3526
  # # └─────┴─────┴─────┘
3539
3527
  def shift_and_fill(periods, fill_value)
3540
- lazy
3541
- .shift_and_fill(periods, fill_value)
3542
- .collect(no_optimization: true, string_cache: false)
3528
+ shift(periods, fill_value: fill_value)
3543
3529
  end
3544
3530
 
3545
3531
  # Get a mask of all duplicated rows in this DataFrame.
@@ -3790,7 +3776,7 @@ module Polars
3790
3776
  if axis == 0
3791
3777
  _from_rbdf(_df.max)
3792
3778
  elsif axis == 1
3793
- Utils.wrap_s(_df.hmax)
3779
+ Utils.wrap_s(_df.max_horizontal)
3794
3780
  else
3795
3781
  raise ArgumentError, "Axis should be 0 or 1."
3796
3782
  end
@@ -3822,7 +3808,7 @@ module Polars
3822
3808
  if axis == 0
3823
3809
  _from_rbdf(_df.min)
3824
3810
  elsif axis == 1
3825
- Utils.wrap_s(_df.hmin)
3811
+ Utils.wrap_s(_df.min_horizontal)
3826
3812
  else
3827
3813
  raise ArgumentError, "Axis should be 0 or 1."
3828
3814
  end
@@ -3871,7 +3857,7 @@ module Polars
3871
3857
  when 0
3872
3858
  _from_rbdf(_df.sum)
3873
3859
  when 1
3874
- Utils.wrap_s(_df.hsum(null_strategy))
3860
+ Utils.wrap_s(_df.sum_horizontal(null_strategy))
3875
3861
  else
3876
3862
  raise ArgumentError, "Axis should be 0 or 1."
3877
3863
  end
@@ -3909,7 +3895,7 @@ module Polars
3909
3895
  when 0
3910
3896
  _from_rbdf(_df.mean)
3911
3897
  when 1
3912
- Utils.wrap_s(_df.hmean(null_strategy))
3898
+ Utils.wrap_s(_df.mean_horizontal(null_strategy))
3913
3899
  else
3914
3900
  raise ArgumentError, "Axis should be 0 or 1."
3915
3901
  end
@@ -4294,15 +4280,20 @@ module Polars
4294
4280
  end
4295
4281
 
4296
4282
  if n.nil? && !frac.nil?
4283
+ frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4284
+
4297
4285
  _from_rbdf(
4298
- _df.sample_frac(frac, with_replacement, shuffle, seed)
4286
+ _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4299
4287
  )
4300
4288
  end
4301
4289
 
4302
4290
  if n.nil?
4303
4291
  n = 1
4304
4292
  end
4305
- _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
4293
+
4294
+ n = Series.new("", [n]) unless n.is_a?(Series)
4295
+
4296
+ _from_rbdf(_df.sample_n(n._s, with_replacement, shuffle, seed))
4306
4297
  end
4307
4298
 
4308
4299
  # Apply a horizontal reduction on a DataFrame.
@@ -4601,7 +4592,7 @@ module Polars
4601
4592
  #
4602
4593
  # @example
4603
4594
  # s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
4604
- # s.take_every(2)
4595
+ # s.gather_every(2)
4605
4596
  # # =>
4606
4597
  # # shape: (2, 2)
4607
4598
  # # ┌─────┬─────┐
@@ -4612,9 +4603,10 @@ module Polars
4612
4603
  # # │ 1 ┆ 5 │
4613
4604
  # # │ 3 ┆ 7 │
4614
4605
  # # └─────┴─────┘
4615
- def take_every(n)
4616
- select(Utils.col("*").take_every(n))
4606
+ def gather_every(n)
4607
+ select(Utils.col("*").gather_every(n))
4617
4608
  end
4609
+ alias_method :take_every, :gather_every
4618
4610
 
4619
4611
  # Hash and combine the rows in this DataFrame.
4620
4612
  #
@@ -4671,16 +4663,16 @@ module Polars
4671
4663
  # df.interpolate
4672
4664
  # # =>
4673
4665
  # # shape: (4, 3)
4674
- # # ┌─────┬──────┬─────┐
4675
- # # │ foo ┆ bar ┆ baz
4676
- # # │ --- ┆ --- ┆ ---
4677
- # # │ i64 i64i64
4678
- # # ╞═════╪══════╪═════╡
4679
- # # │ 1 ┆ 6 ┆ 1
4680
- # # │ 5 ┆ 7 ┆ 3
4681
- # # │ 9 ┆ 9 ┆ 6
4682
- # # │ 10 ┆ null ┆ 9
4683
- # # └─────┴──────┴─────┘
4666
+ # # ┌──────┬──────┬──────────┐
4667
+ # # │ foo ┆ bar ┆ baz
4668
+ # # │ --- ┆ --- ┆ ---
4669
+ # # │ f64 f64f64
4670
+ # # ╞══════╪══════╪══════════╡
4671
+ # # │ 1.0 ┆ 6.0 ┆ 1.0
4672
+ # # │ 5.0 ┆ 7.0 ┆ 3.666667
4673
+ # # │ 9.0 ┆ 9.0 ┆ 6.333333
4674
+ # # │ 10.0 ┆ null ┆ 9.0
4675
+ # # └──────┴──────┴──────────┘
4684
4676
  def interpolate
4685
4677
  select(Utils.col("*").interpolate)
4686
4678
  end
@@ -4952,8 +4944,8 @@ module Polars
4952
4944
  [lookup[col[0]] || col[0], col[1]]
4953
4945
  end
4954
4946
 
4955
- if schema_overrides
4956
- raise Todo
4947
+ if schema_overrides && schema_overrides.any?
4948
+ column_dtypes.merge!(schema_overrides)
4957
4949
  end
4958
4950
 
4959
4951
  column_dtypes.each do |col, dtype|
@@ -5056,13 +5048,54 @@ module Polars
5056
5048
  return rbdf
5057
5049
  elsif data[0].is_a?(::Array)
5058
5050
  if orient.nil? && !columns.nil?
5059
- orient = columns.length == data.length ? "col" : "row"
5051
+ first_element = data[0]
5052
+ row_types = first_element.filter_map { |value| value.class }.uniq
5053
+ if row_types.include?(Integer) && row_types.include?(Float)
5054
+ row_types.delete(Integer)
5055
+ end
5056
+ orient = row_types.length == 1 ? "col" : "row"
5060
5057
  end
5061
5058
 
5062
5059
  if orient == "row"
5063
- raise Todo
5060
+ column_names, schema_overrides = _unpack_schema(
5061
+ schema, schema_overrides: schema_overrides, n_expected: first_element.length
5062
+ )
5063
+ local_schema_override = (
5064
+ schema_overrides.any? ? (raise Todo) : {}
5065
+ )
5066
+ if column_names.any? && first_element.length > 0 && first_element.length != column_names.length
5067
+ raise ArgumentError, "the row data does not match the number of columns"
5068
+ end
5069
+
5070
+ unpack_nested = false
5071
+ local_schema_override.each do |col, tp|
5072
+ raise Todo
5073
+ end
5074
+
5075
+ if unpack_nested
5076
+ raise Todo
5077
+ else
5078
+ rbdf = RbDataFrame.read_rows(
5079
+ data,
5080
+ infer_schema_length,
5081
+ local_schema_override.any? ? local_schema_override : nil
5082
+ )
5083
+ end
5084
+ if column_names.any? || schema_overrides.any?
5085
+ rbdf = _post_apply_columns(
5086
+ rbdf, column_names, schema_overrides: schema_overrides
5087
+ )
5088
+ end
5089
+ return rbdf
5064
5090
  elsif orient == "col" || orient.nil?
5065
- raise Todo
5091
+ column_names, schema_overrides = _unpack_schema(
5092
+ schema, schema_overrides: schema_overrides, n_expected: data.length
5093
+ )
5094
+ data_series =
5095
+ data.map.with_index do |element, i|
5096
+ Series.new(column_names[i], element, dtype: schema_overrides[column_names[i]])._s
5097
+ end
5098
+ return RbDataFrame.new(data_series)
5066
5099
  else
5067
5100
  raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
5068
5101
  end
@@ -5108,10 +5141,10 @@ module Polars
5108
5141
 
5109
5142
  def _compare_to_other_df(other, op)
5110
5143
  if columns != other.columns
5111
- raise ArgmentError, "DataFrame columns do not match"
5144
+ raise ArgumentError, "DataFrame columns do not match"
5112
5145
  end
5113
5146
  if shape != other.shape
5114
- raise ArgmentError, "DataFrame dimensions do not match"
5147
+ raise ArgumentError, "DataFrame dimensions do not match"
5115
5148
  end
5116
5149
 
5117
5150
  suffix = "__POLARS_CMP_OTHER"
@@ -97,15 +97,20 @@ module Polars
97
97
  # # │ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
98
98
  # # │ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
99
99
  # # └─────────────────────┴─────────────────────┘
100
- def truncate(every, offset: nil)
100
+ def truncate(every, offset: nil, use_earliest: nil)
101
101
  if offset.nil?
102
102
  offset = "0ns"
103
103
  end
104
104
 
105
+ if !every.is_a?(Expr)
106
+ every = Utils._timedelta_to_pl_duration(every)
107
+ end
108
+ every = Utils.parse_as_expression(every, str_as_lit: true)
109
+
105
110
  Utils.wrap_expr(
106
111
  _rbexpr.dt_truncate(
107
- Utils._timedelta_to_pl_duration(every),
108
- Utils._timedelta_to_pl_duration(offset)
112
+ every,
113
+ Utils._timedelta_to_pl_duration(offset),
109
114
  )
110
115
  )
111
116
  end
@@ -1026,21 +1031,10 @@ module Polars
1026
1031
  # Time zone for the `Datetime` Series.
1027
1032
  #
1028
1033
  # @return [Expr]
1029
- def replace_time_zone(tz, use_earliest: nil)
1030
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
1031
- end
1032
-
1033
- # Localize tz-naive Datetime Series to tz-aware Datetime Series.
1034
- #
1035
- # This method takes a naive Datetime Series and makes this time zone aware.
1036
- # It does not move the time to another time zone.
1037
- #
1038
- # @param tz [String]
1039
- # Time zone for the `Datetime` Series.
1040
- #
1041
- # @return [Expr]
1042
- def tz_localize(tz)
1043
- Utils.wrap_expr(_rbexpr.dt_tz_localize(tz))
1034
+ def replace_time_zone(tz, use_earliest: nil, ambiguous: "raise")
1035
+ ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
1036
+ ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
1037
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, ambiguous._rbexpr))
1044
1038
  end
1045
1039
 
1046
1040
  # Extract the days from a Duration type.
@@ -1348,6 +1342,7 @@ module Polars
1348
1342
  # # │ 2006-01-01 00:00:00 ┆ 2003-11-01 00:00:00 │
1349
1343
  # # └─────────────────────┴─────────────────────┘
1350
1344
  def offset_by(by)
1345
+ by = Utils.parse_as_expression(by, str_as_lit: true)
1351
1346
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1352
1347
  end
1353
1348
 
@@ -23,18 +23,8 @@ module Polars
23
23
  # @return [Object]
24
24
  #
25
25
  # @example
26
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
- # # =>
28
- # # shape: (3,)
29
- # # Series: '' [datetime[μs]]
30
- # # [
31
- # # 2001-01-01 00:00:00
32
- # # 2001-01-02 00:00:00
33
- # # 2001-01-03 00:00:00
34
- # # ]
35
- #
36
- # @example
37
- # date.dt.min
26
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
27
+ # s.dt.min
38
28
  # # => 2001-01-01 00:00:00 UTC
39
29
  def min
40
30
  Utils.wrap_s(_s).min
@@ -45,18 +35,8 @@ module Polars
45
35
  # @return [Object]
46
36
  #
47
37
  # @example
48
- # date = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
49
- # # =>
50
- # # shape: (3,)
51
- # # Series: '' [datetime[μs]]
52
- # # [
53
- # # 2001-01-01 00:00:00
54
- # # 2001-01-02 00:00:00
55
- # # 2001-01-03 00:00:00
56
- # # ]
57
- #
58
- # @example
59
- # date.dt.max
38
+ # s = Polars.date_range(DateTime.new(2001, 1, 1), DateTime.new(2001, 1, 3), "1d")
39
+ # s.dt.max
60
40
  # # => 2001-01-03 00:00:00 UTC
61
41
  def max
62
42
  Utils.wrap_s(_s).max
@@ -1400,7 +1380,7 @@ module Polars
1400
1380
  # # 2001-01-01 00:30:00
1401
1381
  # # 2001-01-01 01:00:00
1402
1382
  # # ]
1403
- def truncate(every, offset: nil)
1383
+ def truncate(every, offset: nil, use_earliest: nil)
1404
1384
  super
1405
1385
  end
1406
1386
 
@@ -2,7 +2,7 @@ module Polars
2
2
  # A dynamic grouper.
3
3
  #
4
4
  # This has an `.agg` method which allows you to run all polars expressions in a
5
- # groupby context.
5
+ # group by context.
6
6
  class DynamicGroupBy
7
7
  def initialize(
8
8
  df,
@@ -34,7 +34,7 @@ module Polars
34
34
 
35
35
  def agg(aggs)
36
36
  @df.lazy
37
- .groupby_dynamic(
37
+ .group_by_dynamic(
38
38
  @time_column,
39
39
  every: @every,
40
40
  period: @period,