polars-df 0.4.0-x86_64-darwin → 0.5.0-x86_64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
58
58
  From Active Record
59
59
 
60
60
  ```ruby
61
- Polars.read_sql(User.all)
61
+ Polars.read_database(User.all)
62
62
  # or
63
- Polars.read_sql("SELECT * FROM users")
63
+ Polars.read_database("SELECT * FROM users")
64
64
  ```
65
65
 
66
66
  From JSON
Binary file
Binary file
Binary file
@@ -26,8 +26,8 @@ module Polars
26
26
  # # │ 1 ┆ 3 │
27
27
  # # │ 2 ┆ 4 │
28
28
  # # └─────┴─────┘
29
- def from_hash(data, columns: nil)
30
- DataFrame._from_hash(data, columns: columns)
29
+ def from_hash(data, schema: nil, columns: nil)
30
+ DataFrame._from_hash(data, schema: schema || columns)
31
31
  end
32
32
 
33
33
  # Construct a DataFrame from a sequence of dictionaries. This operation clones data.
@@ -18,7 +18,10 @@ module Polars
18
18
  # Whether to interpret two-dimensional data as columns or as rows. If `nil`,
19
19
  # the orientation is inferred by matching the columns and data dimensions. If
20
20
  # this does not yield conclusive results, column orientation is used.
21
- def initialize(data = nil, columns: nil, orient: nil)
21
+ def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
+ schema ||= columns
23
+ raise Todo if schema_overrides
24
+
22
25
  # TODO deprecate in favor of read_sql
23
26
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
24
27
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
@@ -29,14 +32,14 @@ module Polars
29
32
  end
30
33
 
31
34
  if data.nil?
32
- self._df = self.class.hash_to_rbdf({}, columns: columns)
35
+ self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
33
36
  elsif data.is_a?(Hash)
34
37
  data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
35
- self._df = self.class.hash_to_rbdf(data, columns: columns)
38
+ self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
36
39
  elsif data.is_a?(Array)
37
- self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
40
+ self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
38
41
  elsif data.is_a?(Series)
39
- self._df = self.class.series_to_rbdf(data, columns: columns)
42
+ self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
40
43
  else
41
44
  raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
42
45
  end
@@ -56,8 +59,8 @@ module Polars
56
59
  end
57
60
 
58
61
  # @private
59
- def self._from_hash(data, columns: nil)
60
- _from_rbdf(hash_to_rbdf(data, columns: columns))
62
+ def self._from_hash(data, schema: nil, schema_overrides: nil)
63
+ _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
61
64
  end
62
65
 
63
66
  # def self._from_records
@@ -336,6 +339,7 @@ module Polars
336
339
  end
337
340
  alias_method :count, :height
338
341
  alias_method :length, :height
342
+ alias_method :size, :height
339
343
 
340
344
  # Get the width of the DataFrame.
341
345
  #
@@ -546,6 +550,13 @@ module Polars
546
550
  end
547
551
  alias_method :inspect, :to_s
548
552
 
553
+ # Returns an array representing the DataFrame
554
+ #
555
+ # @return [Array]
556
+ def to_a
557
+ rows(named: true)
558
+ end
559
+
549
560
  # Check if DataFrame includes column.
550
561
  #
551
562
  # @return [Boolean]
@@ -655,7 +666,7 @@ module Polars
655
666
  end
656
667
 
657
668
  # Ruby-specific
658
- if item.is_a?(Expr)
669
+ if item.is_a?(Expr) || item.is_a?(Series)
659
670
  return filter(item)
660
671
  end
661
672
 
@@ -665,15 +676,42 @@ module Polars
665
676
  # Set item.
666
677
  #
667
678
  # @return [Object]
668
- #
669
- # def []=(key, value)
670
- # if key.is_a?(String)
671
- # raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
672
- # end
679
+ def []=(*key, value)
680
+ if key.length == 1
681
+ key = key.first
682
+ elsif key.length != 2
683
+ raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
684
+ end
673
685
 
674
- # raise Todo
675
- # end
686
+ if Utils.strlike?(key)
687
+ if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
688
+ value = Series.new(value)
689
+ elsif !value.is_a?(Series)
690
+ value = Polars.lit(value)
691
+ end
692
+ self._df = with_column(value.alias(key.to_s))._df
693
+ elsif key.is_a?(Array)
694
+ row_selection, col_selection = key
676
695
 
696
+ if Utils.strlike?(col_selection)
697
+ s = self[col_selection]
698
+ elsif col_selection.is_a?(Integer)
699
+ raise Todo
700
+ else
701
+ raise ArgumentError, "column selection not understood: #{col_selection}"
702
+ end
703
+
704
+ s[row_selection] = value
705
+
706
+ if col_selection.is_a?(Integer)
707
+ replace_at_idx(col_selection, s)
708
+ elsif Utils.strlike?(col_selection)
709
+ replace(col_selection, s)
710
+ end
711
+ else
712
+ raise Todo
713
+ end
714
+ end
677
715
 
678
716
  # Return the dataframe as a scalar.
679
717
  #
@@ -1462,6 +1500,20 @@ module Polars
1462
1500
  end
1463
1501
  end
1464
1502
 
1503
+ # Sort the DataFrame by column in-place.
1504
+ #
1505
+ # @param by [String]
1506
+ # By which column to sort.
1507
+ # @param reverse [Boolean]
1508
+ # Reverse/descending sort.
1509
+ # @param nulls_last [Boolean]
1510
+ # Place null values last. Can only be used if sorted by a single column.
1511
+ #
1512
+ # @return [DataFrame]
1513
+ def sort!(by, reverse: false, nulls_last: false)
1514
+ self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
1515
+ end
1516
+
1465
1517
  # Check if DataFrame is equal to other.
1466
1518
  #
1467
1519
  # @param other [DataFrame]
@@ -1519,7 +1571,7 @@ module Polars
1519
1571
  # # │ 30 ┆ 6 │
1520
1572
  # # └─────┴─────┘
1521
1573
  def replace(column, new_col)
1522
- _df.replace(column, new_col._s)
1574
+ _df.replace(column.to_s, new_col._s)
1523
1575
  self
1524
1576
  end
1525
1577
 
@@ -1860,7 +1912,7 @@ module Polars
1860
1912
  # "2020-01-08 23:16:43"
1861
1913
  # ]
1862
1914
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1863
- # Polars.col("dt").str.strptime(:datetime)
1915
+ # Polars.col("dt").str.strptime(Polars::Datetime)
1864
1916
  # )
1865
1917
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1866
1918
  # [
@@ -2791,6 +2843,16 @@ module Polars
2791
2843
  Utils.wrap_s(_df.drop_in_place(name))
2792
2844
  end
2793
2845
 
2846
+ # Drop in place if exists.
2847
+ #
2848
+ # @param name [Object]
2849
+ # Column to drop.
2850
+ #
2851
+ # @return [Series]
2852
+ def delete(name)
2853
+ drop_in_place(name) if include?(name)
2854
+ end
2855
+
2794
2856
  # Create an empty copy of the current DataFrame.
2795
2857
  #
2796
2858
  # Returns a DataFrame with identical schema but no data.
@@ -3202,7 +3264,7 @@ module Polars
3202
3264
  # # │ B ┆ 1 │
3203
3265
  # # │ C ┆ 2 │
3204
3266
  # # │ D ┆ 3 │
3205
- # # │
3267
+ # # │ E4
3206
3268
  # # │ F ┆ 5 │
3207
3269
  # # │ G ┆ 6 │
3208
3270
  # # │ H ┆ 7 │
@@ -4428,7 +4490,7 @@ module Polars
4428
4490
  end
4429
4491
  end
4430
4492
 
4431
- # Returns an iterator over the DataFrame of rows of python-native values.
4493
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4432
4494
  #
4433
4495
  # @param named [Boolean]
4434
4496
  # Return hashes instead of arrays. The hashes are a mapping of
@@ -4489,6 +4551,24 @@ module Polars
4489
4551
  end
4490
4552
  end
4491
4553
 
4554
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4555
+ #
4556
+ # @param named [Boolean]
4557
+ # Return hashes instead of arrays. The hashes are a mapping of
4558
+ # column name to row value. This is more expensive than returning an
4559
+ # array, but allows for accessing values by column name.
4560
+ # @param buffer_size [Integer]
4561
+ # Determines the number of rows that are buffered internally while iterating
4562
+ # over the data; you should only modify this in very specific cases where the
4563
+ # default value is determined not to be a good fit to your access pattern, as
4564
+ # the speedup from using the buffer is significant (~2-4x). Setting this
4565
+ # value to zero disables row buffering.
4566
+ #
4567
+ # @return [Object]
4568
+ def each_row(named: true, buffer_size: 500, &block)
4569
+ iter_rows(named: named, buffer_size: buffer_size, &block)
4570
+ end
4571
+
4492
4572
  # Shrink DataFrame memory usage.
4493
4573
  #
4494
4574
  # Shrinks to fit the exact capacity needed to hold the data.
@@ -4742,20 +4822,63 @@ module Polars
4742
4822
  end
4743
4823
 
4744
4824
  # @private
4745
- def self.hash_to_rbdf(data, columns: nil)
4746
- if !columns.nil?
4747
- columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
4825
+ def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
4826
+ updated_data = {}
4827
+ unless data.empty?
4828
+ dtypes = schema_overrides || {}
4829
+ array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
4830
+ if array_len > 0
4831
+ data.each do |name, val|
4832
+ dtype = dtypes[name]
4833
+ if val.is_a?(Hash) && dtype != Struct
4834
+ updated_data[name] = DataFrame.new(val).to_struct(name)
4835
+ elsif !Utils.arrlen(val).nil?
4836
+ updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
4837
+ elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4838
+ dtype = Polars::Float64 if val.nil? && dtype.nil?
4839
+ updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4840
+ else
4841
+ raise Todo
4842
+ end
4843
+ end
4844
+ elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
4845
+ data.each do |name, val|
4846
+ updated_data[name] = Series.new(name, val, dtype: dtypes[name])
4847
+ end
4848
+ elsif data.values.all? { |val| Utils.arrlen(val).nil? }
4849
+ data.each do |name, val|
4850
+ updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
4851
+ end
4852
+ end
4853
+ end
4854
+ updated_data
4855
+ end
4748
4856
 
4749
- if data.empty? && dtypes
4750
- data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
4751
- else
4752
- data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
4857
+ # @private
4858
+ def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
4859
+ if schema.is_a?(Hash) && !data.empty?
4860
+ if !data.all? { |col, _| schema[col] }
4861
+ raise ArgumentError, "The given column-schema names do not match the data dictionary"
4753
4862
  end
4754
- data_series = _handle_columns_arg(data_series, columns: columns)
4755
- return RbDataFrame.new(data_series)
4863
+
4864
+ data = schema.to_h { |col| [col, data[col]] }
4865
+ end
4866
+
4867
+ column_names, schema_overrides = _unpack_schema(
4868
+ schema, lookup_names: data.keys, schema_overrides: schema_overrides
4869
+ )
4870
+ if column_names.empty?
4871
+ column_names = data.keys
4756
4872
  end
4757
4873
 
4758
- RbDataFrame.read_hash(data)
4874
+ if data.empty? && !schema_overrides.empty?
4875
+ data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
4876
+ else
4877
+ data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
4878
+ end
4879
+
4880
+ data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
4881
+ RbDataFrame.new(data_series)
4759
4882
  end
4760
4883
 
4761
4884
  # @private
@@ -4764,14 +4887,12 @@ module Polars
4764
4887
  end
4765
4888
 
4766
4889
  # @private
4767
- def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4768
- raise Todo if schema_overrides
4769
-
4770
- if columns.is_a?(Hash)
4771
- columns = columns.to_a
4890
+ def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
4891
+ if schema.is_a?(Hash)
4892
+ schema = schema.to_a
4772
4893
  end
4773
4894
  column_names =
4774
- (columns || []).map.with_index do |col, i|
4895
+ (schema || []).map.with_index do |col, i|
4775
4896
  if col.is_a?(String)
4776
4897
  col || "column_#{i}"
4777
4898
  else
@@ -4784,21 +4905,38 @@ module Polars
4784
4905
  # TODO zip_longest
4785
4906
  lookup = column_names.zip(lookup_names || []).to_h
4786
4907
 
4787
- [
4788
- column_names,
4789
- (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4908
+ column_dtypes =
4909
+ (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4790
4910
  [lookup[col[0]] || col[0], col[1]]
4791
4911
  end
4792
- ]
4912
+
4913
+ if schema_overrides
4914
+ raise Todo
4915
+ end
4916
+
4917
+ column_dtypes.each do |col, dtype|
4918
+ if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
4919
+ column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
4920
+ end
4921
+ end
4922
+
4923
+ [column_names, column_dtypes]
4793
4924
  end
4794
4925
 
4795
- def self._handle_columns_arg(data, columns: nil)
4796
- if columns.nil?
4926
+ def self._handle_columns_arg(data, columns: nil, from_hash: false)
4927
+ if columns.nil? || columns.empty?
4797
4928
  data
4798
4929
  else
4799
4930
  if data.empty?
4800
4931
  columns.map { |c| Series.new(c, nil)._s }
4801
4932
  elsif data.length == columns.length
4933
+ if from_hash
4934
+ series_map = data.to_h { |s| [s.name, s] }
4935
+ if columns.all? { |col| series_map.key?(col) }
4936
+ return columns.map { |col| series_map[col] }
4937
+ end
4938
+ end
4939
+
4802
4940
  columns.each_with_index do |c, i|
4803
4941
  # not in-place?
4804
4942
  data[i].rename(c)
@@ -4813,7 +4951,7 @@ module Polars
4813
4951
  def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4814
4952
  rbdf_columns = rbdf.columns
4815
4953
  rbdf_dtypes = rbdf.dtypes
4816
- columns, dtypes = _unpack_columns(
4954
+ columns, dtypes = _unpack_schema(
4817
4955
  (columns || rbdf_columns), schema_overrides: schema_overrides
4818
4956
  )
4819
4957
  column_subset = []
@@ -4851,20 +4989,23 @@ module Polars
4851
4989
  end
4852
4990
 
4853
4991
  # @private
4854
- def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4992
+ def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
4993
+ raise Todo if schema_overrides
4994
+ columns = schema
4995
+
4855
4996
  if data.length == 0
4856
- return hash_to_rbdf({}, columns: columns)
4997
+ return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
4857
4998
  end
4858
4999
 
4859
5000
  if data[0].is_a?(Series)
4860
5001
  # series_names = data.map(&:name)
4861
- # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
5002
+ # columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
4862
5003
  data_series = []
4863
5004
  data.each do |s|
4864
5005
  data_series << s._s
4865
5006
  end
4866
5007
  elsif data[0].is_a?(Hash)
4867
- column_names, dtypes = _unpack_columns(columns)
5008
+ column_names, dtypes = _unpack_schema(columns)
4868
5009
  schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4869
5010
  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4870
5011
  if column_names
@@ -4890,11 +5031,21 @@ module Polars
4890
5031
  end
4891
5032
 
4892
5033
  # @private
4893
- def self.series_to_rbdf(data, columns: nil)
4894
- if columns
4895
- raise Todo
5034
+ def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
5035
+ data_series = [data._s]
5036
+ series_name = data_series.map(&:name)
5037
+ column_names, schema_overrides = _unpack_schema(
5038
+ schema || series_name, schema_overrides: schema_overrides, n_expected: 1
5039
+ )
5040
+ if schema_overrides.any?
5041
+ new_dtype = schema_overrides.values[0]
5042
+ if new_dtype != data.dtype
5043
+ data_series[0] = data_series[0].cast(new_dtype, true)
5044
+ end
4896
5045
  end
4897
- RbDataFrame.new([data._s])
5046
+
5047
+ data_series = _handle_columns_arg(data_series, columns: column_names)
5048
+ RbDataFrame.new(data_series)
4898
5049
  end
4899
5050
 
4900
5051
  def wrap_ldf(ldf)
@@ -84,20 +84,22 @@ module Polars
84
84
 
85
85
  # Calendar date and time type.
86
86
  class Datetime < TemporalType
87
- attr_reader :tu
87
+ attr_reader :time_unit, :time_zone
88
+ alias_method :tu, :time_unit
88
89
 
89
90
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
91
+ @time_unit = time_unit || "us"
91
92
  @time_zone = time_zone
92
93
  end
93
94
  end
94
95
 
95
96
  # Time duration/delta type.
96
97
  class Duration < TemporalType
97
- attr_reader :tu
98
+ attr_reader :time_unit
99
+ alias_method :tu, :time_unit
98
100
 
99
101
  def initialize(time_unit = "us")
100
- @tu = time_unit
102
+ @time_unit = time_unit
101
103
  end
102
104
  end
103
105
 
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end