polars-df 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +272 -191
  4. data/Cargo.toml +0 -1
  5. data/README.md +2 -2
  6. data/ext/polars/Cargo.toml +8 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +106 -4
  12. data/ext/polars/src/dataframe.rs +19 -17
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/expr/general.rs +933 -0
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +205 -303
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +99 -539
  38. data/lib/polars/convert.rb +2 -2
  39. data/lib/polars/data_frame.rb +201 -50
  40. data/lib/polars/data_types.rb +6 -4
  41. data/lib/polars/date_time_expr.rb +142 -2
  42. data/lib/polars/expr.rb +70 -10
  43. data/lib/polars/lazy_frame.rb +4 -3
  44. data/lib/polars/lazy_functions.rb +4 -1
  45. data/lib/polars/list_expr.rb +68 -19
  46. data/lib/polars/series.rb +181 -73
  47. data/lib/polars/string_expr.rb +149 -43
  48. data/lib/polars/string_name_space.rb +4 -4
  49. data/lib/polars/struct_name_space.rb +32 -0
  50. data/lib/polars/utils.rb +41 -7
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +2 -2
  53. metadata +26 -11
  54. data/ext/polars/src/lazy/dsl.rs +0 -1775
  55. data/ext/polars/src/lazy/mod.rs +0 -5
  56. data/ext/polars/src/lazy/utils.rs +0 -13
  57. data/ext/polars/src/list_construction.rs +0 -100
  58. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  59. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -26,8 +26,8 @@ module Polars
26
26
  # # │ 1 ┆ 3 │
27
27
  # # │ 2 ┆ 4 │
28
28
  # # └─────┴─────┘
29
- def from_hash(data, columns: nil)
30
- DataFrame._from_hash(data, columns: columns)
29
+ def from_hash(data, schema: nil, columns: nil)
30
+ DataFrame._from_hash(data, schema: schema || columns)
31
31
  end
32
32
 
33
33
  # Construct a DataFrame from a sequence of dictionaries. This operation clones data.
@@ -18,7 +18,10 @@ module Polars
18
18
  # Whether to interpret two-dimensional data as columns or as rows. If `nil`,
19
19
  # the orientation is inferred by matching the columns and data dimensions. If
20
20
  # this does not yield conclusive results, column orientation is used.
21
- def initialize(data = nil, columns: nil, orient: nil)
21
+ def initialize(data = nil, schema: nil, columns: nil, schema_overrides: nil, orient: nil, infer_schema_length: 100, nan_to_null: false)
22
+ schema ||= columns
23
+ raise Todo if schema_overrides
24
+
22
25
  # TODO deprecate in favor of read_sql
23
26
  if defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || data.is_a?(ActiveRecord::Result))
24
27
  result = data.is_a?(ActiveRecord::Result) ? data : data.connection.select_all(data.to_sql)
@@ -29,14 +32,14 @@ module Polars
29
32
  end
30
33
 
31
34
  if data.nil?
32
- self._df = self.class.hash_to_rbdf({}, columns: columns)
35
+ self._df = self.class.hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
33
36
  elsif data.is_a?(Hash)
34
37
  data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
35
- self._df = self.class.hash_to_rbdf(data, columns: columns)
38
+ self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
36
39
  elsif data.is_a?(Array)
37
- self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
40
+ self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
38
41
  elsif data.is_a?(Series)
39
- self._df = self.class.series_to_rbdf(data, columns: columns)
42
+ self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
40
43
  else
41
44
  raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
42
45
  end
@@ -56,8 +59,8 @@ module Polars
56
59
  end
57
60
 
58
61
  # @private
59
- def self._from_hash(data, columns: nil)
60
- _from_rbdf(hash_to_rbdf(data, columns: columns))
62
+ def self._from_hash(data, schema: nil, schema_overrides: nil)
63
+ _from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
61
64
  end
62
65
 
63
66
  # def self._from_records
@@ -336,6 +339,7 @@ module Polars
336
339
  end
337
340
  alias_method :count, :height
338
341
  alias_method :length, :height
342
+ alias_method :size, :height
339
343
 
340
344
  # Get the width of the DataFrame.
341
345
  #
@@ -546,6 +550,13 @@ module Polars
546
550
  end
547
551
  alias_method :inspect, :to_s
548
552
 
553
+ # Returns an array representing the DataFrame
554
+ #
555
+ # @return [Array]
556
+ def to_a
557
+ rows(named: true)
558
+ end
559
+
549
560
  # Check if DataFrame includes column.
550
561
  #
551
562
  # @return [Boolean]
@@ -655,7 +666,7 @@ module Polars
655
666
  end
656
667
 
657
668
  # Ruby-specific
658
- if item.is_a?(Expr)
669
+ if item.is_a?(Expr) || item.is_a?(Series)
659
670
  return filter(item)
660
671
  end
661
672
 
@@ -665,15 +676,42 @@ module Polars
665
676
  # Set item.
666
677
  #
667
678
  # @return [Object]
668
- #
669
- # def []=(key, value)
670
- # if key.is_a?(String)
671
- # raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
672
- # end
679
+ def []=(*key, value)
680
+ if key.length == 1
681
+ key = key.first
682
+ elsif key.length != 2
683
+ raise ArgumentError, "wrong number of arguments (given #{key.length + 1}, expected 2..3)"
684
+ end
673
685
 
674
- # raise Todo
675
- # end
686
+ if Utils.strlike?(key)
687
+ if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
688
+ value = Series.new(value)
689
+ elsif !value.is_a?(Series)
690
+ value = Polars.lit(value)
691
+ end
692
+ self._df = with_column(value.alias(key.to_s))._df
693
+ elsif key.is_a?(Array)
694
+ row_selection, col_selection = key
676
695
 
696
+ if Utils.strlike?(col_selection)
697
+ s = self[col_selection]
698
+ elsif col_selection.is_a?(Integer)
699
+ raise Todo
700
+ else
701
+ raise ArgumentError, "column selection not understood: #{col_selection}"
702
+ end
703
+
704
+ s[row_selection] = value
705
+
706
+ if col_selection.is_a?(Integer)
707
+ replace_at_idx(col_selection, s)
708
+ elsif Utils.strlike?(col_selection)
709
+ replace(col_selection, s)
710
+ end
711
+ else
712
+ raise Todo
713
+ end
714
+ end
677
715
 
678
716
  # Return the dataframe as a scalar.
679
717
  #
@@ -1462,6 +1500,20 @@ module Polars
1462
1500
  end
1463
1501
  end
1464
1502
 
1503
+ # Sort the DataFrame by column in-place.
1504
+ #
1505
+ # @param by [String]
1506
+ # By which column to sort.
1507
+ # @param reverse [Boolean]
1508
+ # Reverse/descending sort.
1509
+ # @param nulls_last [Boolean]
1510
+ # Place null values last. Can only be used if sorted by a single column.
1511
+ #
1512
+ # @return [DataFrame]
1513
+ def sort!(by, reverse: false, nulls_last: false)
1514
+ self._df = sort(by, reverse: reverse, nulls_last: nulls_last)._df
1515
+ end
1516
+
1465
1517
  # Check if DataFrame is equal to other.
1466
1518
  #
1467
1519
  # @param other [DataFrame]
@@ -1519,7 +1571,7 @@ module Polars
1519
1571
  # # │ 30 ┆ 6 │
1520
1572
  # # └─────┴─────┘
1521
1573
  def replace(column, new_col)
1522
- _df.replace(column, new_col._s)
1574
+ _df.replace(column.to_s, new_col._s)
1523
1575
  self
1524
1576
  end
1525
1577
 
@@ -1860,7 +1912,7 @@ module Polars
1860
1912
  # "2020-01-08 23:16:43"
1861
1913
  # ]
1862
1914
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1863
- # Polars.col("dt").str.strptime(:datetime)
1915
+ # Polars.col("dt").str.strptime(Polars::Datetime)
1864
1916
  # )
1865
1917
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1866
1918
  # [
@@ -2791,6 +2843,16 @@ module Polars
2791
2843
  Utils.wrap_s(_df.drop_in_place(name))
2792
2844
  end
2793
2845
 
2846
+ # Drop in place if exists.
2847
+ #
2848
+ # @param name [Object]
2849
+ # Column to drop.
2850
+ #
2851
+ # @return [Series]
2852
+ def delete(name)
2853
+ drop_in_place(name) if include?(name)
2854
+ end
2855
+
2794
2856
  # Create an empty copy of the current DataFrame.
2795
2857
  #
2796
2858
  # Returns a DataFrame with identical schema but no data.
@@ -3202,7 +3264,7 @@ module Polars
3202
3264
  # # │ B ┆ 1 │
3203
3265
  # # │ C ┆ 2 │
3204
3266
  # # │ D ┆ 3 │
3205
- # # │
3267
+ # # │ E4
3206
3268
  # # │ F ┆ 5 │
3207
3269
  # # │ G ┆ 6 │
3208
3270
  # # │ H ┆ 7 │
@@ -4428,7 +4490,7 @@ module Polars
4428
4490
  end
4429
4491
  end
4430
4492
 
4431
- # Returns an iterator over the DataFrame of rows of python-native values.
4493
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4432
4494
  #
4433
4495
  # @param named [Boolean]
4434
4496
  # Return hashes instead of arrays. The hashes are a mapping of
@@ -4489,6 +4551,24 @@ module Polars
4489
4551
  end
4490
4552
  end
4491
4553
 
4554
+ # Returns an iterator over the DataFrame of rows of Ruby-native values.
4555
+ #
4556
+ # @param named [Boolean]
4557
+ # Return hashes instead of arrays. The hashes are a mapping of
4558
+ # column name to row value. This is more expensive than returning an
4559
+ # array, but allows for accessing values by column name.
4560
+ # @param buffer_size [Integer]
4561
+ # Determines the number of rows that are buffered internally while iterating
4562
+ # over the data; you should only modify this in very specific cases where the
4563
+ # default value is determined not to be a good fit to your access pattern, as
4564
+ # the speedup from using the buffer is significant (~2-4x). Setting this
4565
+ # value to zero disables row buffering.
4566
+ #
4567
+ # @return [Object]
4568
+ def each_row(named: true, buffer_size: 500, &block)
4569
+ iter_rows(named: named, buffer_size: buffer_size, &block)
4570
+ end
4571
+
4492
4572
  # Shrink DataFrame memory usage.
4493
4573
  #
4494
4574
  # Shrinks to fit the exact capacity needed to hold the data.
@@ -4742,20 +4822,63 @@ module Polars
4742
4822
  end
4743
4823
 
4744
4824
  # @private
4745
- def self.hash_to_rbdf(data, columns: nil)
4746
- if !columns.nil?
4747
- columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
4825
+ def self.expand_hash_scalars(data, schema_overrides: nil, order: nil, nan_to_null: false)
4826
+ updated_data = {}
4827
+ unless data.empty?
4828
+ dtypes = schema_overrides || {}
4829
+ array_len = data.values.map { |val| Utils.arrlen(val) || 0 }.max
4830
+ if array_len > 0
4831
+ data.each do |name, val|
4832
+ dtype = dtypes[name]
4833
+ if val.is_a?(Hash) && dtype != Struct
4834
+ updated_data[name] = DataFrame.new(val).to_struct(name)
4835
+ elsif !Utils.arrlen(val).nil?
4836
+ updated_data[name] = Series.new(String.new(name), val, dtype: dtype)
4837
+ elsif val.nil? || [Integer, Float, TrueClass, FalseClass, String, ::Date, ::DateTime, ::Time].any? { |cls| val.is_a?(cls) }
4838
+ dtype = Polars::Float64 if val.nil? && dtype.nil?
4839
+ updated_data[name] = Series.new(String.new(name), [val], dtype: dtype).extend_constant(val, array_len - 1)
4840
+ else
4841
+ raise Todo
4842
+ end
4843
+ end
4844
+ elsif data.values.all? { |val| Utils.arrlen(val) == 0 }
4845
+ data.each do |name, val|
4846
+ updated_data[name] = Series.new(name, val, dtype: dtypes[name])
4847
+ end
4848
+ elsif data.values.all? { |val| Utils.arrlen(val).nil? }
4849
+ data.each do |name, val|
4850
+ updated_data[name] = Series.new(name, [val], dtype: dtypes[name])
4851
+ end
4852
+ end
4853
+ end
4854
+ updated_data
4855
+ end
4748
4856
 
4749
- if data.empty? && dtypes
4750
- data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
4751
- else
4752
- data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
4857
+ # @private
4858
+ def self.hash_to_rbdf(data, schema: nil, schema_overrides: nil, nan_to_null: nil)
4859
+ if schema.is_a?(Hash) && !data.empty?
4860
+ if !data.all? { |col, _| schema[col] }
4861
+ raise ArgumentError, "The given column-schema names do not match the data dictionary"
4753
4862
  end
4754
- data_series = _handle_columns_arg(data_series, columns: columns)
4755
- return RbDataFrame.new(data_series)
4863
+
4864
+ data = schema.to_h { |col| [col, data[col]] }
4865
+ end
4866
+
4867
+ column_names, schema_overrides = _unpack_schema(
4868
+ schema, lookup_names: data.keys, schema_overrides: schema_overrides
4869
+ )
4870
+ if column_names.empty?
4871
+ column_names = data.keys
4756
4872
  end
4757
4873
 
4758
- RbDataFrame.read_hash(data)
4874
+ if data.empty? && !schema_overrides.empty?
4875
+ data_series = column_names.map { |name| Series.new(name, [], dtype: schema_overrides[name], nan_to_null: nan_to_null)._s }
4876
+ else
4877
+ data_series = expand_hash_scalars(data, schema_overrides: schema_overrides, nan_to_null: nan_to_null).values.map(&:_s)
4878
+ end
4879
+
4880
+ data_series = _handle_columns_arg(data_series, columns: column_names, from_hash: true)
4881
+ RbDataFrame.new(data_series)
4759
4882
  end
4760
4883
 
4761
4884
  # @private
@@ -4764,14 +4887,12 @@ module Polars
4764
4887
  end
4765
4888
 
4766
4889
  # @private
4767
- def self._unpack_columns(columns, schema_overrides: nil, lookup_names: nil, n_expected: nil)
4768
- raise Todo if schema_overrides
4769
-
4770
- if columns.is_a?(Hash)
4771
- columns = columns.to_a
4890
+ def self._unpack_schema(schema, schema_overrides: nil, n_expected: nil, lookup_names: nil, include_overrides_in_columns: false)
4891
+ if schema.is_a?(Hash)
4892
+ schema = schema.to_a
4772
4893
  end
4773
4894
  column_names =
4774
- (columns || []).map.with_index do |col, i|
4895
+ (schema || []).map.with_index do |col, i|
4775
4896
  if col.is_a?(String)
4776
4897
  col || "column_#{i}"
4777
4898
  else
@@ -4784,21 +4905,38 @@ module Polars
4784
4905
  # TODO zip_longest
4785
4906
  lookup = column_names.zip(lookup_names || []).to_h
4786
4907
 
4787
- [
4788
- column_names,
4789
- (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4908
+ column_dtypes =
4909
+ (schema || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
4790
4910
  [lookup[col[0]] || col[0], col[1]]
4791
4911
  end
4792
- ]
4912
+
4913
+ if schema_overrides
4914
+ raise Todo
4915
+ end
4916
+
4917
+ column_dtypes.each do |col, dtype|
4918
+ if !Utils.is_polars_dtype(dtype, include_unknown: true) && !dtype.nil?
4919
+ column_dtypes[col] = Utils.rb_type_to_dtype(dtype)
4920
+ end
4921
+ end
4922
+
4923
+ [column_names, column_dtypes]
4793
4924
  end
4794
4925
 
4795
- def self._handle_columns_arg(data, columns: nil)
4796
- if columns.nil?
4926
+ def self._handle_columns_arg(data, columns: nil, from_hash: false)
4927
+ if columns.nil? || columns.empty?
4797
4928
  data
4798
4929
  else
4799
4930
  if data.empty?
4800
4931
  columns.map { |c| Series.new(c, nil)._s }
4801
4932
  elsif data.length == columns.length
4933
+ if from_hash
4934
+ series_map = data.to_h { |s| [s.name, s] }
4935
+ if columns.all? { |col| series_map.key?(col) }
4936
+ return columns.map { |col| series_map[col] }
4937
+ end
4938
+ end
4939
+
4802
4940
  columns.each_with_index do |c, i|
4803
4941
  # not in-place?
4804
4942
  data[i].rename(c)
@@ -4813,7 +4951,7 @@ module Polars
4813
4951
  def self._post_apply_columns(rbdf, columns, structs: nil, schema_overrides: nil)
4814
4952
  rbdf_columns = rbdf.columns
4815
4953
  rbdf_dtypes = rbdf.dtypes
4816
- columns, dtypes = _unpack_columns(
4954
+ columns, dtypes = _unpack_schema(
4817
4955
  (columns || rbdf_columns), schema_overrides: schema_overrides
4818
4956
  )
4819
4957
  column_subset = []
@@ -4851,20 +4989,23 @@ module Polars
4851
4989
  end
4852
4990
 
4853
4991
  # @private
4854
- def self.sequence_to_rbdf(data, columns: nil, orient: nil, infer_schema_length: 50)
4992
+ def self.sequence_to_rbdf(data, schema: nil, schema_overrides: nil, orient: nil, infer_schema_length: 50)
4993
+ raise Todo if schema_overrides
4994
+ columns = schema
4995
+
4855
4996
  if data.length == 0
4856
- return hash_to_rbdf({}, columns: columns)
4997
+ return hash_to_rbdf({}, schema: schema, schema_overrides: schema_overrides)
4857
4998
  end
4858
4999
 
4859
5000
  if data[0].is_a?(Series)
4860
5001
  # series_names = data.map(&:name)
4861
- # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
5002
+ # columns, dtypes = _unpack_schema(columns || series_names, n_expected: data.length)
4862
5003
  data_series = []
4863
5004
  data.each do |s|
4864
5005
  data_series << s._s
4865
5006
  end
4866
5007
  elsif data[0].is_a?(Hash)
4867
- column_names, dtypes = _unpack_columns(columns)
5008
+ column_names, dtypes = _unpack_schema(columns)
4868
5009
  schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
4869
5010
  rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema_overrides)
4870
5011
  if column_names
@@ -4890,11 +5031,21 @@ module Polars
4890
5031
  end
4891
5032
 
4892
5033
  # @private
4893
- def self.series_to_rbdf(data, columns: nil)
4894
- if columns
4895
- raise Todo
5034
+ def self.series_to_rbdf(data, schema: nil, schema_overrides: nil)
5035
+ data_series = [data._s]
5036
+ series_name = data_series.map(&:name)
5037
+ column_names, schema_overrides = _unpack_schema(
5038
+ schema || series_name, schema_overrides: schema_overrides, n_expected: 1
5039
+ )
5040
+ if schema_overrides.any?
5041
+ new_dtype = schema_overrides.values[0]
5042
+ if new_dtype != data.dtype
5043
+ data_series[0] = data_series[0].cast(new_dtype, true)
5044
+ end
4896
5045
  end
4897
- RbDataFrame.new([data._s])
5046
+
5047
+ data_series = _handle_columns_arg(data_series, columns: column_names)
5048
+ RbDataFrame.new(data_series)
4898
5049
  end
4899
5050
 
4900
5051
  def wrap_ldf(ldf)
@@ -84,20 +84,22 @@ module Polars
84
84
 
85
85
  # Calendar date and time type.
86
86
  class Datetime < TemporalType
87
- attr_reader :tu
87
+ attr_reader :time_unit, :time_zone
88
+ alias_method :tu, :time_unit
88
89
 
89
90
  def initialize(time_unit = "us", time_zone = nil)
90
- @tu = time_unit || "us"
91
+ @time_unit = time_unit || "us"
91
92
  @time_zone = time_zone
92
93
  end
93
94
  end
94
95
 
95
96
  # Time duration/delta type.
96
97
  class Duration < TemporalType
97
- attr_reader :tu
98
+ attr_reader :time_unit
99
+ alias_method :tu, :time_unit
98
100
 
99
101
  def initialize(time_unit = "us")
100
- @tu = time_unit
102
+ @time_unit = time_unit
101
103
  end
102
104
  end
103
105
 
@@ -218,6 +218,25 @@ module Polars
218
218
  )
219
219
  end
220
220
 
221
+ # Create a naive Datetime from an existing Date/Datetime expression and a Time.
222
+ #
223
+ # If the underlying expression is a Datetime then its time component is replaced,
224
+ # and if it is a Date then a new Datetime is created by combining the two values.
225
+ #
226
+ # @param time [Object]
227
+ # A Ruby time literal or Polars expression/column that resolves to a time.
228
+ # @param time_unit ["ns", "us", "ms"]
229
+ # Unit of time.
230
+ #
231
+ # @return [Expr]
232
+ def combine(time, time_unit: "us")
233
+ unless time.is_a?(Time) || time.is_a?(Expr)
234
+ raise TypeError, "expected 'time' to be a Ruby time or Polars expression, found #{time}"
235
+ end
236
+ time = Utils.expr_to_lit_or_expr(time)
237
+ Utils.wrap_expr(_rbexpr.dt_combine(time._rbexpr, time_unit))
238
+ end
239
+
221
240
  # Format Date/datetime with a formatting rule.
222
241
  #
223
242
  # See [chrono strftime/strptime](https://docs.rs/chrono/latest/chrono/format/strftime/index.html).
@@ -270,6 +289,34 @@ module Polars
270
289
  Utils.wrap_expr(_rbexpr.year)
271
290
  end
272
291
 
292
+ # Determine whether the year of the underlying date is a leap year.
293
+ #
294
+ # Applies to Date and Datetime columns.
295
+ #
296
+ # @return [Expr]
297
+ #
298
+ # @example
299
+ # start = DateTime.new(2000, 1, 1)
300
+ # stop = DateTime.new(2002, 1, 1)
301
+ # df = Polars::DataFrame.new(
302
+ # {"date" => Polars.date_range(start, stop, "1y")}
303
+ # )
304
+ # df.select(Polars.col("date").dt.is_leap_year)
305
+ # # =>
306
+ # # shape: (3, 1)
307
+ # # ┌───────┐
308
+ # # │ date │
309
+ # # │ --- │
310
+ # # │ bool │
311
+ # # ╞═══════╡
312
+ # # │ true │
313
+ # # │ false │
314
+ # # │ false │
315
+ # # └───────┘
316
+ def is_leap_year
317
+ Utils.wrap_expr(_rbexpr.dt_is_leap_year)
318
+ end
319
+
273
320
  # Extract ISO year from underlying Date representation.
274
321
  #
275
322
  # Applies to Date and Datetime columns.
@@ -550,6 +597,27 @@ module Polars
550
597
  Utils.wrap_expr(_rbexpr.ordinal_day)
551
598
  end
552
599
 
600
+ # Time
601
+ #
602
+ # @return [Expr]
603
+ def time
604
+ Utils.wrap_expr(_rbexpr.dt_time)
605
+ end
606
+
607
+ # Date
608
+ #
609
+ # @return [Expr]
610
+ def date
611
+ Utils.wrap_expr(_rbexpr.dt_date)
612
+ end
613
+
614
+ # Datetime
615
+ #
616
+ # @return [Expr]
617
+ def datetime
618
+ Utils.wrap_expr(_rbexpr.dt_datetime)
619
+ end
620
+
553
621
  # Extract hour from underlying DateTime representation.
554
622
  #
555
623
  # Applies to Datetime columns.
@@ -958,8 +1026,8 @@ module Polars
958
1026
  # Time zone for the `Datetime` Series.
959
1027
  #
960
1028
  # @return [Expr]
961
- def replace_time_zone(tz)
962
- Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz))
1029
+ def replace_time_zone(tz, use_earliest: nil)
1030
+ Utils.wrap_expr(_rbexpr.dt_replace_time_zone(tz, use_earliest))
963
1031
  end
964
1032
 
965
1033
  # Localize tz-naive Datetime Series to tz-aware Datetime Series.
@@ -1282,5 +1350,77 @@ module Polars
1282
1350
  def offset_by(by)
1283
1351
  Utils.wrap_expr(_rbexpr.dt_offset_by(by))
1284
1352
  end
1353
+
1354
+ # Roll backward to the first day of the month.
1355
+ #
1356
+ # @return [Expr]
1357
+ #
1358
+ # @example
1359
+ # df = Polars::DataFrame.new(
1360
+ # {
1361
+ # "dates" => Polars.date_range(
1362
+ # DateTime.new(2000, 1, 15, 2),
1363
+ # DateTime.new(2000, 12, 15, 2),
1364
+ # "1mo"
1365
+ # )
1366
+ # }
1367
+ # )
1368
+ # df.select(Polars.col("dates").dt.month_start)
1369
+ # # =>
1370
+ # # shape: (12, 1)
1371
+ # # ┌─────────────────────┐
1372
+ # # │ dates │
1373
+ # # │ --- │
1374
+ # # │ datetime[μs] │
1375
+ # # ╞═════════════════════╡
1376
+ # # │ 2000-01-01 02:00:00 │
1377
+ # # │ 2000-02-01 02:00:00 │
1378
+ # # │ 2000-03-01 02:00:00 │
1379
+ # # │ 2000-04-01 02:00:00 │
1380
+ # # │ … │
1381
+ # # │ 2000-09-01 02:00:00 │
1382
+ # # │ 2000-10-01 02:00:00 │
1383
+ # # │ 2000-11-01 02:00:00 │
1384
+ # # │ 2000-12-01 02:00:00 │
1385
+ # # └─────────────────────┘
1386
+ def month_start
1387
+ Utils.wrap_expr(_rbexpr.dt_month_start)
1388
+ end
1389
+
1390
+ # Roll forward to the last day of the month.
1391
+ #
1392
+ # @return [Expr]
1393
+ #
1394
+ # @example
1395
+ # df = Polars::DataFrame.new(
1396
+ # {
1397
+ # "dates" => Polars.date_range(
1398
+ # DateTime.new(2000, 1, 15, 2),
1399
+ # DateTime.new(2000, 12, 15, 2),
1400
+ # "1mo"
1401
+ # )
1402
+ # }
1403
+ # )
1404
+ # df.select(Polars.col("dates").dt.month_end)
1405
+ # # =>
1406
+ # # shape: (12, 1)
1407
+ # # ┌─────────────────────┐
1408
+ # # │ dates │
1409
+ # # │ --- │
1410
+ # # │ datetime[μs] │
1411
+ # # ╞═════════════════════╡
1412
+ # # │ 2000-01-31 02:00:00 │
1413
+ # # │ 2000-02-29 02:00:00 │
1414
+ # # │ 2000-03-31 02:00:00 │
1415
+ # # │ 2000-04-30 02:00:00 │
1416
+ # # │ … │
1417
+ # # │ 2000-09-30 02:00:00 │
1418
+ # # │ 2000-10-31 02:00:00 │
1419
+ # # │ 2000-11-30 02:00:00 │
1420
+ # # │ 2000-12-31 02:00:00 │
1421
+ # # └─────────────────────┘
1422
+ def month_end
1423
+ Utils.wrap_expr(_rbexpr.dt_month_end)
1424
+ end
1285
1425
  end
1286
1426
  end