polars-df 0.21.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +120 -90
  4. data/Cargo.toml +3 -0
  5. data/README.md +20 -7
  6. data/ext/polars/Cargo.toml +18 -12
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +39 -37
  10. data/ext/polars/src/conversion/chunked_array.rs +36 -29
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +244 -51
  13. data/ext/polars/src/dataframe/construction.rs +5 -17
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -17
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -5
  20. data/ext/polars/src/expr/datatype.rs +4 -4
  21. data/ext/polars/src/expr/datetime.rs +22 -28
  22. data/ext/polars/src/expr/general.rs +3 -10
  23. data/ext/polars/src/expr/list.rs +8 -24
  24. data/ext/polars/src/expr/meta.rs +4 -6
  25. data/ext/polars/src/expr/mod.rs +2 -0
  26. data/ext/polars/src/expr/name.rs +11 -14
  27. data/ext/polars/src/expr/serde.rs +28 -0
  28. data/ext/polars/src/expr/string.rs +5 -10
  29. data/ext/polars/src/file.rs +20 -14
  30. data/ext/polars/src/functions/business.rs +0 -1
  31. data/ext/polars/src/functions/io.rs +7 -4
  32. data/ext/polars/src/functions/lazy.rs +7 -6
  33. data/ext/polars/src/functions/meta.rs +3 -3
  34. data/ext/polars/src/functions/string_cache.rs +3 -3
  35. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  36. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  37. data/ext/polars/src/io/mod.rs +23 -3
  38. data/ext/polars/src/lazyframe/general.rs +35 -50
  39. data/ext/polars/src/lazyframe/mod.rs +16 -1
  40. data/ext/polars/src/lazyframe/optflags.rs +57 -0
  41. data/ext/polars/src/lazyframe/serde.rs +27 -3
  42. data/ext/polars/src/lib.rs +144 -19
  43. data/ext/polars/src/map/dataframe.rs +18 -15
  44. data/ext/polars/src/map/lazy.rs +6 -5
  45. data/ext/polars/src/map/series.rs +7 -6
  46. data/ext/polars/src/on_startup.rs +12 -5
  47. data/ext/polars/src/rb_modules.rs +2 -2
  48. data/ext/polars/src/series/aggregation.rs +49 -29
  49. data/ext/polars/src/series/construction.rs +2 -0
  50. data/ext/polars/src/series/export.rs +38 -33
  51. data/ext/polars/src/series/general.rs +69 -31
  52. data/ext/polars/src/series/mod.rs +29 -4
  53. data/lib/polars/array_expr.rb +1 -1
  54. data/lib/polars/data_frame.rb +119 -15
  55. data/lib/polars/data_types.rb +23 -6
  56. data/lib/polars/date_time_expr.rb +36 -15
  57. data/lib/polars/expr.rb +41 -32
  58. data/lib/polars/functions/business.rb +95 -0
  59. data/lib/polars/functions/lazy.rb +1 -1
  60. data/lib/polars/iceberg_dataset.rb +113 -0
  61. data/lib/polars/io/iceberg.rb +34 -0
  62. data/lib/polars/io/ipc.rb +28 -49
  63. data/lib/polars/io/parquet.rb +7 -4
  64. data/lib/polars/io/scan_options.rb +12 -3
  65. data/lib/polars/io/utils.rb +17 -0
  66. data/lib/polars/lazy_frame.rb +97 -10
  67. data/lib/polars/list_expr.rb +21 -13
  68. data/lib/polars/list_name_space.rb +33 -21
  69. data/lib/polars/meta_expr.rb +25 -0
  70. data/lib/polars/query_opt_flags.rb +50 -0
  71. data/lib/polars/scan_cast_options.rb +23 -1
  72. data/lib/polars/schema.rb +1 -1
  73. data/lib/polars/selectors.rb +8 -8
  74. data/lib/polars/series.rb +26 -2
  75. data/lib/polars/string_expr.rb +27 -28
  76. data/lib/polars/string_name_space.rb +18 -5
  77. data/lib/polars/utils/convert.rb +2 -2
  78. data/lib/polars/utils/serde.rb +17 -0
  79. data/lib/polars/utils/various.rb +4 -0
  80. data/lib/polars/version.rb +1 -1
  81. data/lib/polars.rb +6 -0
  82. metadata +10 -1
@@ -72,6 +72,43 @@ module Polars
72
72
  end
73
73
  end
74
74
 
75
+ # Read a serialized DataFrame from a file.
76
+ #
77
+ # @param source [Object]
78
+ # Path to a file or a file-like object (by file-like object, we refer to
79
+ # objects that have a `read` method, such as a file handler or `StringIO`).
80
+ #
81
+ # @return [DataFrame]
82
+ #
83
+ # @note
84
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
85
+ # in one Polars version may not be deserializable in another Polars version.
86
+ #
87
+ # @example
88
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [4.0, 5.0, 6.0]})
89
+ # bytes = df.serialize
90
+ # Polars::DataFrame.deserialize(StringIO.new(bytes))
91
+ # # =>
92
+ # # shape: (3, 2)
93
+ # # ┌─────┬─────┐
94
+ # # │ a ┆ b │
95
+ # # │ --- ┆ --- │
96
+ # # │ i64 ┆ f64 │
97
+ # # ╞═════╪═════╡
98
+ # # │ 1 ┆ 4.0 │
99
+ # # │ 2 ┆ 5.0 │
100
+ # # │ 3 ┆ 6.0 │
101
+ # # └─────┴─────┘
102
+ def self.deserialize(source)
103
+ if Utils.pathlike?(source)
104
+ source = Utils.normalize_filepath(source)
105
+ end
106
+
107
+ deserializer = RbDataFrame.method(:deserialize_binary)
108
+
109
+ _from_rbdf(deserializer.(source))
110
+ end
111
+
75
112
  # @private
76
113
  def self._from_rbdf(rb_df)
77
114
  df = DataFrame.allocate
@@ -562,8 +599,6 @@ module Polars
562
599
 
563
600
  # Convert every row to a hash.
564
601
  #
565
- # Note that this is slow.
566
- #
567
602
  # @return [Array]
568
603
  #
569
604
  # @example
@@ -572,12 +607,7 @@ module Polars
572
607
  # # =>
573
608
  # # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
574
609
  def to_hashes
575
- rbdf = _df
576
- names = columns
577
-
578
- height.times.map do |i|
579
- names.zip(rbdf.row_tuple(i)).to_h
580
- end
610
+ rows(named: true)
581
611
  end
582
612
 
583
613
  # Convert DataFrame to a 2D Numo array.
@@ -634,6 +664,44 @@ module Polars
634
664
  Utils.wrap_s(_df.select_at_idx(index))
635
665
  end
636
666
 
667
+ # Serialize this DataFrame to a file or string.
668
+ #
669
+ # @param file [Object]
670
+ # File path or writable file-like object to which the result will be written.
671
+ # If set to `nil` (default), the output is returned as a string instead.
672
+ #
673
+ # @return [Object]
674
+ #
675
+ # @note
676
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
677
+ # in one Polars version may not be deserializable in another Polars version.
678
+ #
679
+ # @example
680
+ # df = Polars::DataFrame.new(
681
+ # {
682
+ # "foo" => [1, 2, 3],
683
+ # "bar" => [6, 7, 8]
684
+ # }
685
+ # )
686
+ # bytes = df.serialize
687
+ # Polars::DataFrame.deserialize(StringIO.new(bytes))
688
+ # # =>
689
+ # # shape: (3, 2)
690
+ # # ┌─────┬─────┐
691
+ # # │ foo ┆ bar │
692
+ # # │ --- ┆ --- │
693
+ # # │ i64 ┆ i64 │
694
+ # # ╞═════╪═════╡
695
+ # # │ 1 ┆ 6 │
696
+ # # │ 2 ┆ 7 │
697
+ # # │ 3 ┆ 8 │
698
+ # # └─────┴─────┘
699
+ def serialize(file = nil)
700
+ serializer = _df.method(:serialize_binary)
701
+
702
+ Utils.serialize_polars_object(serializer, file)
703
+ end
704
+
637
705
  # Serialize to JSON representation.
638
706
  #
639
707
  # @param file [String]
@@ -1148,6 +1216,40 @@ module Polars
1148
1216
  end
1149
1217
  end
1150
1218
 
1219
+ # Write DataFrame to an Iceberg table.
1220
+ #
1221
+ # @note
1222
+ # This functionality is currently considered **unstable**. It may be
1223
+ # changed at any point without it being considered a breaking change.
1224
+ #
1225
+ # @param target [Object]
1226
+ # Name of the table or the Table object representing an Iceberg table.
1227
+ # @param mode ['append', 'overwrite']
1228
+ # How to handle existing data.
1229
+ #
1230
+ # - If 'append', will add new data.
1231
+ # - If 'overwrite', will replace table with new data.
1232
+ #
1233
+ # @return [nil]
1234
+ def write_iceberg(target, mode:)
1235
+ require "iceberg"
1236
+
1237
+ table =
1238
+ if target.is_a?(Iceberg::Table)
1239
+ target
1240
+ else
1241
+ raise Todo
1242
+ end
1243
+
1244
+ data = self
1245
+
1246
+ if mode == "append"
1247
+ table.append(data)
1248
+ else
1249
+ raise Todo
1250
+ end
1251
+ end
1252
+
1151
1253
  # Write DataFrame as delta table.
1152
1254
  #
1153
1255
  # @param target [Object]
@@ -5957,8 +6059,13 @@ module Polars
5957
6059
  # The fields will be inserted into the `DataFrame` on the location of the
5958
6060
  # `struct` type.
5959
6061
  #
5960
- # @param names [Object]
5961
- # Names of the struct columns that will be decomposed by its fields
6062
+ # @param columns [Object]
6063
+ # Name of the struct column(s) that should be unnested.
6064
+ # @param more_columns [Array]
6065
+ # Additional columns to unnest, specified as positional arguments.
6066
+ # @param separator [String]
6067
+ # Rename output column names as combination of the struct column name,
6068
+ # name separator and field name.
5962
6069
  #
5963
6070
  # @return [DataFrame]
5964
6071
  #
@@ -5984,11 +6091,8 @@ module Polars
5984
6091
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
5985
6092
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
5986
6093
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
5987
- def unnest(names)
5988
- if names.is_a?(::String)
5989
- names = [names]
5990
- end
5991
- _from_rbdf(_df.unnest(names))
6094
+ def unnest(columns, *more_columns, separator: nil)
6095
+ lazy.unnest(columns, *more_columns, separator: separator).collect(_eager: true)
5992
6096
  end
5993
6097
 
5994
6098
  # Requires NumPy
@@ -110,12 +110,23 @@ module Polars
110
110
  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
111
111
  end
112
112
 
113
- [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?, :to_dtype_expr].each do |v|
113
+ [:numeric?, :decimal?, :integer?, :signed_integer?, :unsigned_integer?, :float?, :temporal?, :nested?].each do |v|
114
114
  define_method(v) do
115
115
  self.class.public_send(v)
116
116
  end
117
117
  end
118
118
 
119
+ # Return a `DataTypeExpr` with a static `DataType`.
120
+ #
121
+ # @return [Expr]
122
+ #
123
+ # @example
124
+ # Polars::Int16.new.to_dtype_expr.collect_dtype({})
125
+ # # => Polars::Int16
126
+ def to_dtype_expr
127
+ DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.from_dtype(self))
128
+ end
129
+
119
130
  # Returns a string representing the data type.
120
131
  #
121
132
  # @return [String]
@@ -198,6 +209,10 @@ module Polars
198
209
  class UInt64 < UnsignedIntegerType
199
210
  end
200
211
 
212
+ # 128-bit unsigned integer type.
213
+ class UInt128 < UnsignedIntegerType
214
+ end
215
+
201
216
  # 32-bit floating point type.
202
217
  class Float32 < FloatType
203
218
  end
@@ -212,7 +227,11 @@ module Polars
212
227
  class Decimal < NumericType
213
228
  attr_reader :precision, :scale
214
229
 
215
- def initialize(precision, scale)
230
+ def initialize(precision = nil, scale = 0)
231
+ if precision.nil?
232
+ precision = 38
233
+ end
234
+
216
235
  @precision = precision
217
236
  @scale = scale
218
237
  end
@@ -317,11 +336,9 @@ module Polars
317
336
  class Categories
318
337
  attr_accessor :_categories
319
338
 
320
- def initialize
321
- # TODO fix
322
- name = nil
339
+ def initialize(name = nil)
323
340
  if name.nil? || name == ""
324
- @_categories = RbCategories.global_categories
341
+ self._categories = RbCategories.global_categories
325
342
  return
326
343
  end
327
344
 
@@ -1188,7 +1188,7 @@ module Polars
1188
1188
  if Utils::DTYPE_TEMPORAL_UNITS.include?(time_unit)
1189
1189
  timestamp(time_unit)
1190
1190
  elsif time_unit == "s"
1191
- Utils.wrap_expr(_rbexpr.dt_epoch_seconds)
1191
+ timestamp("ms").floordiv(F.lit(1000, dtype: Int64))
1192
1192
  elsif time_unit == "d"
1193
1193
  Utils.wrap_expr(_rbexpr).cast(:date).cast(:i32)
1194
1194
  else
@@ -1435,6 +1435,9 @@ module Polars
1435
1435
 
1436
1436
  # Extract the days from a Duration type.
1437
1437
  #
1438
+ # @param fractional [Boolean]
1439
+ # Whether to include the fractional component of the second.
1440
+ #
1438
1441
  # @return [Expr]
1439
1442
  #
1440
1443
  # @example
@@ -1462,13 +1465,16 @@ module Polars
1462
1465
  # # │ 2020-04-01 00:00:00 ┆ 31 │
1463
1466
  # # │ 2020-05-01 00:00:00 ┆ 30 │
1464
1467
  # # └─────────────────────┴───────────┘
1465
- def total_days
1466
- Utils.wrap_expr(_rbexpr.dt_total_days)
1468
+ def total_days(fractional: false)
1469
+ Utils.wrap_expr(_rbexpr.dt_total_days(fractional))
1467
1470
  end
1468
1471
  alias_method :days, :total_days
1469
1472
 
1470
1473
  # Extract the hours from a Duration type.
1471
1474
  #
1475
+ # @param fractional [Boolean]
1476
+ # Whether to include the fractional component of the second.
1477
+ #
1472
1478
  # @return [Expr]
1473
1479
  #
1474
1480
  # @example
@@ -1497,13 +1503,16 @@ module Polars
1497
1503
  # # │ 2020-01-03 00:00:00 ┆ 24 │
1498
1504
  # # │ 2020-01-04 00:00:00 ┆ 24 │
1499
1505
  # # └─────────────────────┴────────────┘
1500
- def total_hours
1501
- Utils.wrap_expr(_rbexpr.dt_total_hours)
1506
+ def total_hours(fractional: false)
1507
+ Utils.wrap_expr(_rbexpr.dt_total_hours(fractional))
1502
1508
  end
1503
1509
  alias_method :hours, :total_hours
1504
1510
 
1505
1511
  # Extract the minutes from a Duration type.
1506
1512
  #
1513
+ # @param fractional [Boolean]
1514
+ # Whether to include the fractional component of the second.
1515
+ #
1507
1516
  # @return [Expr]
1508
1517
  #
1509
1518
  # @example
@@ -1532,13 +1541,16 @@ module Polars
1532
1541
  # # │ 2020-01-03 00:00:00 ┆ 1440 │
1533
1542
  # # │ 2020-01-04 00:00:00 ┆ 1440 │
1534
1543
  # # └─────────────────────┴──────────────┘
1535
- def total_minutes
1536
- Utils.wrap_expr(_rbexpr.dt_total_minutes)
1544
+ def total_minutes(fractional: false)
1545
+ Utils.wrap_expr(_rbexpr.dt_total_minutes(fractional))
1537
1546
  end
1538
1547
  alias_method :minutes, :total_minutes
1539
1548
 
1540
1549
  # Extract the seconds from a Duration type.
1541
1550
  #
1551
+ # @param fractional [Boolean]
1552
+ # Whether to include the fractional component of the second.
1553
+ #
1542
1554
  # @return [Expr]
1543
1555
  #
1544
1556
  # @example
@@ -1568,13 +1580,16 @@ module Polars
1568
1580
  # # │ 2020-01-01 00:03:00 ┆ 60 │
1569
1581
  # # │ 2020-01-01 00:04:00 ┆ 60 │
1570
1582
  # # └─────────────────────┴──────────────┘
1571
- def total_seconds
1572
- Utils.wrap_expr(_rbexpr.dt_total_seconds)
1583
+ def total_seconds(fractional: false)
1584
+ Utils.wrap_expr(_rbexpr.dt_total_seconds(fractional))
1573
1585
  end
1574
1586
  alias_method :seconds, :total_seconds
1575
1587
 
1576
1588
  # Extract the milliseconds from a Duration type.
1577
1589
  #
1590
+ # @param fractional [Boolean]
1591
+ # Whether to include the fractional component of the second.
1592
+ #
1578
1593
  # @return [Expr]
1579
1594
  #
1580
1595
  # @example
@@ -1610,13 +1625,16 @@ module Polars
1610
1625
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
1611
1626
  # # │ 2020-01-01 00:00:01 ┆ 1 │
1612
1627
  # # └─────────────────────────┴───────────────────┘
1613
- def total_milliseconds
1614
- Utils.wrap_expr(_rbexpr.dt_total_milliseconds)
1628
+ def total_milliseconds(fractional: false)
1629
+ Utils.wrap_expr(_rbexpr.dt_total_milliseconds(fractional))
1615
1630
  end
1616
1631
  alias_method :milliseconds, :total_milliseconds
1617
1632
 
1618
1633
  # Extract the microseconds from a Duration type.
1619
1634
  #
1635
+ # @param fractional [Boolean]
1636
+ # Whether to include the fractional component of the second.
1637
+ #
1620
1638
  # @return [Expr]
1621
1639
  #
1622
1640
  # @example
@@ -1652,13 +1670,16 @@ module Polars
1652
1670
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
1653
1671
  # # │ 2020-01-01 00:00:01 ┆ 1000 │
1654
1672
  # # └─────────────────────────┴───────────────────┘
1655
- def total_microseconds
1656
- Utils.wrap_expr(_rbexpr.dt_total_microseconds)
1673
+ def total_microseconds(fractional: false)
1674
+ Utils.wrap_expr(_rbexpr.dt_total_microseconds(fractional))
1657
1675
  end
1658
1676
  alias_method :microseconds, :total_microseconds
1659
1677
 
1660
1678
  # Extract the nanoseconds from a Duration type.
1661
1679
  #
1680
+ # @param fractional [Boolean]
1681
+ # Whether to include the fractional component of the second.
1682
+ #
1662
1683
  # @return [Expr]
1663
1684
  #
1664
1685
  # @example
@@ -1694,8 +1715,8 @@ module Polars
1694
1715
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │
1695
1716
  # # │ 2020-01-01 00:00:01 ┆ 1000000 │
1696
1717
  # # └─────────────────────────┴──────────────────┘
1697
- def total_nanoseconds
1698
- Utils.wrap_expr(_rbexpr.dt_total_nanoseconds)
1718
+ def total_nanoseconds(fractional: false)
1719
+ Utils.wrap_expr(_rbexpr.dt_total_nanoseconds(fractional))
1699
1720
  end
1700
1721
  alias_method :nanoseconds, :total_nanoseconds
1701
1722
 
data/lib/polars/expr.rb CHANGED
@@ -146,6 +146,40 @@ module Polars
146
146
  wrap_expr(_rbexpr.neg)
147
147
  end
148
148
 
149
+ # Read a serialized expression from a file.
150
+ #
151
+ # @param source [Object]
152
+ # Path to a file or a file-like object (by file-like object, we refer to
153
+ # objects that have a `read` method, such as a file handler or `StringIO`).
154
+ #
155
+ # @return [Expr]
156
+ #
157
+ # @note
158
+ # This function uses marshaling if the logical plan contains Ruby UDFs,
159
+ # and as such inherits the security implications. Deserializing can execute
160
+ # arbitrary code, so it should only be attempted on trusted data.
161
+ #
162
+ # @note
163
+ # Serialization is not stable across Polars versions: a LazyFrame serialized
164
+ # in one Polars version may not be deserializable in another Polars version.
165
+ #
166
+ # @example
167
+ # expr = Polars.col("foo").sum.over("bar")
168
+ # bytes = expr.meta.serialize
169
+ # Polars::Expr.deserialize(StringIO.new(bytes))
170
+ # # => col("foo").sum().over([col("bar")])
171
+ def self.deserialize(source)
172
+ raise Todo unless RbExpr.respond_to?(:deserialize_binary)
173
+
174
+ if Utils.pathlike?(source)
175
+ source = Utils.normalize_filepath(source)
176
+ end
177
+
178
+ deserializer = RbExpr.method(:deserialize_binary)
179
+
180
+ _from_rbexpr(deserializer.(source))
181
+ end
182
+
149
183
  # Cast to physical representation of the logical dtype.
150
184
  #
151
185
  # - `:date` -> `:i32`
@@ -377,8 +411,6 @@ module Polars
377
411
  wrap_expr(_rbexpr._alias(name))
378
412
  end
379
413
 
380
- # TODO support symbols for exclude
381
-
382
414
  # Exclude certain columns from a wildcard/regex selection.
383
415
  #
384
416
  # You may also use regexes in the exclude list. They must start with `^` and end
@@ -1787,7 +1819,7 @@ module Polars
1787
1819
  wrap_expr(_rbexpr.arg_min)
1788
1820
  end
1789
1821
 
1790
- # Get the index of the first occurrence of a value, or ``None`` if it's not found.
1822
+ # Get the index of the first occurrence of a value, or `nil` if it's not found.
1791
1823
  #
1792
1824
  # @param element [Object]
1793
1825
  # Value to find.
@@ -6580,8 +6612,8 @@ module Polars
6580
6612
  # # │ 10 ┆ null │
6581
6613
  # # │ 11 ┆ 0.1 │
6582
6614
  # # │ 12 ┆ 0.090909 │
6583
- # # │ null ┆ 0.0
6584
- # # │ 12 ┆ 0.0
6615
+ # # │ null ┆ null
6616
+ # # │ 12 ┆ null
6585
6617
  # # └──────┴────────────┘
6586
6618
  def pct_change(n: 1)
6587
6619
  n = Utils.parse_into_expression(n)
@@ -7571,7 +7603,8 @@ module Polars
7571
7603
  # # │ 1.584963 │
7572
7604
  # # └──────────┘
7573
7605
  def log(base = Math::E)
7574
- wrap_expr(_rbexpr.log(base))
7606
+ base_rbexpr = Utils.parse_into_expression(base)
7607
+ wrap_expr(_rbexpr.log(base_rbexpr))
7575
7608
  end
7576
7609
 
7577
7610
  # Compute the natural logarithm of each element plus one.
@@ -7743,33 +7776,9 @@ module Polars
7743
7776
  # This can be used to reduce memory pressure.
7744
7777
  #
7745
7778
  # @return [Expr]
7746
- #
7747
- # @example
7748
- # Polars::DataFrame.new(
7749
- # {
7750
- # "a" => [1, 2, 3],
7751
- # "b" => [1, 2, 2 << 32],
7752
- # "c" => [-1, 2, 1 << 30],
7753
- # "d" => [-112, 2, 112],
7754
- # "e" => [-112, 2, 129],
7755
- # "f" => ["a", "b", "c"],
7756
- # "g" => [0.1, 1.32, 0.12],
7757
- # "h" => [true, nil, false]
7758
- # }
7759
- # ).select(Polars.all.shrink_dtype)
7760
- # # =>
7761
- # # shape: (3, 8)
7762
- # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
7763
- # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
7764
- # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
7765
- # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
7766
- # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
7767
- # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
7768
- # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
7769
- # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
7770
- # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
7771
7779
  def shrink_dtype
7772
- wrap_expr(_rbexpr.shrink_dtype)
7780
+ warn "`Expr.shrink_dtype` is deprecated and is a no-op; use `Series.shrink_dtype` instead."
7781
+ self
7773
7782
  end
7774
7783
 
7775
7784
  # Bin values into buckets and count their occurrences.
@@ -0,0 +1,95 @@
1
+ module Polars
2
+ module Functions
3
+ # Count the number of business days between `start` and `end` (not including `end`).
4
+ #
5
+ # @note
6
+ # This functionality is considered **unstable**. It may be changed
7
+ # at any point without it being considered a breaking change.
8
+ #
9
+ # @param start [Object]
10
+ # Start dates.
11
+ # @param stop [Object]
12
+ # End dates.
13
+ # @param week_mask [Array]
14
+ # Which days of the week to count. The default is Monday to Friday.
15
+ # If you wanted to count only Monday to Thursday, you would pass
16
+ # `[true, true, true, true, false, false, false]`.
17
+ # @param holidays [Array]
18
+ # Holidays to exclude from the count.
19
+ #
20
+ # @return [Expr]
21
+ #
22
+ # @example
23
+ # df = Polars::DataFrame.new(
24
+ # {
25
+ # "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
26
+ # "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
27
+ # }
28
+ # )
29
+ # df.with_columns(
30
+ # business_day_count: Polars.business_day_count("start", "end")
31
+ # )
32
+ # # =>
33
+ # # shape: (2, 3)
34
+ # # ┌────────────┬────────────┬────────────────────┐
35
+ # # │ start ┆ end ┆ business_day_count │
36
+ # # │ --- ┆ --- ┆ --- │
37
+ # # │ date ┆ date ┆ i32 │
38
+ # # ╞════════════╪════════════╪════════════════════╡
39
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
40
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 6 │
41
+ # # └────────────┴────────────┴────────────────────┘
42
+ #
43
+ # @example You can pass a custom weekend - for example, if you only take Sunday off:
44
+ # week_mask = [true, true, true, true, true, true, false]
45
+ # df.with_columns(
46
+ # business_day_count: Polars.business_day_count(
47
+ # "start", "end", week_mask: week_mask
48
+ # )
49
+ # )
50
+ # # =>
51
+ # # shape: (2, 3)
52
+ # # ┌────────────┬────────────┬────────────────────┐
53
+ # # │ start ┆ end ┆ business_day_count │
54
+ # # │ --- ┆ --- ┆ --- │
55
+ # # │ date ┆ date ┆ i32 │
56
+ # # ╞════════════╪════════════╪════════════════════╡
57
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 1 │
58
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 7 │
59
+ # # └────────────┴────────────┴────────────────────┘
60
+ #
61
+ # @example You can also pass a list of holidays to exclude from the count:
62
+ # holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
63
+ # df.with_columns(
64
+ # business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
65
+ # )
66
+ # # =>
67
+ # # shape: (2, 3)
68
+ # # ┌────────────┬────────────┬────────────────────┐
69
+ # # │ start ┆ end ┆ business_day_count │
70
+ # # │ --- ┆ --- ┆ --- │
71
+ # # │ date ┆ date ┆ i32 │
72
+ # # ╞════════════╪════════════╪════════════════════╡
73
+ # # │ 2020-01-01 ┆ 2020-01-02 ┆ 0 │
74
+ # # │ 2020-01-02 ┆ 2020-01-10 ┆ 5 │
75
+ # # └────────────┴────────────┴────────────────────┘
76
+ def business_day_count(
77
+ start,
78
+ stop,
79
+ week_mask: [true, true, true, true, true, false, false],
80
+ holidays: []
81
+ )
82
+ start_rbexpr = Utils.parse_into_expression(start)
83
+ end_rbexpr = Utils.parse_into_expression(stop)
84
+ unix_epoch = ::Date.new(1970, 1, 1)
85
+ Utils.wrap_expr(
86
+ Plr.business_day_count(
87
+ start_rbexpr,
88
+ end_rbexpr,
89
+ week_mask,
90
+ holidays.map { |holiday| holiday - unix_epoch }
91
+ )
92
+ )
93
+ end
94
+ end
95
+ end
@@ -823,7 +823,7 @@ module Polars
823
823
  # # ┌─────┐
824
824
  # # │ sum │
825
825
  # # │ --- │
826
- # # │ i64
826
+ # # │ i32
827
827
  # # ╞═════╡
828
828
  # # │ 10 │
829
829
  # # │ 13 │