polars-df 0.5.0-arm64-darwin → 0.6.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  module Polars
2
- # Series.arr namespace.
2
+ # Series.list namespace.
3
3
  class ListNameSpace
4
4
  include ExprDispatch
5
5
 
6
- self._accessor = "arr"
6
+ self._accessor = "list"
7
7
 
8
8
  # @private
9
9
  def initialize(series)
@@ -16,7 +16,7 @@ module Polars
16
16
  #
17
17
  # @example
18
18
  # s = Polars::Series.new([[1, 2, 3], [5]])
19
- # s.arr.lengths
19
+ # s.list.lengths
20
20
  # # =>
21
21
  # # shape: (2,)
22
22
  # # Series: '' [u32]
@@ -119,13 +119,13 @@ module Polars
119
119
  #
120
120
  # @example
121
121
  # s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
122
- # s.arr.join("-")
122
+ # s.list.join("-")
123
123
  # # =>
124
124
  # # shape: (2,)
125
125
  # # Series: '' [str]
126
126
  # # [
127
- # # "foo-bar"
128
- # # "hello-world"
127
+ # # "foo-bar"
128
+ # # "hello-world"
129
129
  # # ]
130
130
  def join(separator)
131
131
  super
@@ -180,7 +180,7 @@ module Polars
180
180
  #
181
181
  # @example
182
182
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
183
- # s.arr.diff
183
+ # s.list.diff
184
184
  # # =>
185
185
  # # shape: (2,)
186
186
  # # Series: 'a' [list[i64]]
@@ -201,7 +201,7 @@ module Polars
201
201
  #
202
202
  # @example
203
203
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
204
- # s.arr.shift
204
+ # s.list.shift
205
205
  # # =>
206
206
  # # shape: (2,)
207
207
  # # Series: 'a' [list[i64]]
@@ -225,7 +225,7 @@ module Polars
225
225
  #
226
226
  # @example
227
227
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
228
- # s.arr.slice(1, 2)
228
+ # s.list.slice(1, 2)
229
229
  # # =>
230
230
  # # shape: (2,)
231
231
  # # Series: 'a' [list[i64]]
@@ -246,7 +246,7 @@ module Polars
246
246
  #
247
247
  # @example
248
248
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
249
- # s.arr.head(2)
249
+ # s.list.head(2)
250
250
  # # =>
251
251
  # # shape: (2,)
252
252
  # # Series: 'a' [list[i64]]
@@ -267,7 +267,7 @@ module Polars
267
267
  #
268
268
  # @example
269
269
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
270
- # s.arr.tail(2)
270
+ # s.list.tail(2)
271
271
  # # =>
272
272
  # # shape: (2,)
273
273
  # # Series: 'a' [list[i64]]
@@ -291,7 +291,7 @@ module Polars
291
291
  #
292
292
  # @example
293
293
  # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
294
- # df.select([Polars.col("a").arr.to_struct])
294
+ # df.select([Polars.col("a").list.to_struct])
295
295
  # # =>
296
296
  # # shape: (2, 1)
297
297
  # # ┌────────────┐
@@ -323,7 +323,7 @@ module Polars
323
323
  # @example
324
324
  # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
325
325
  # df.with_column(
326
- # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
326
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
327
327
  # )
328
328
  # # =>
329
329
  # # shape: (3, 3)
@@ -10,7 +10,8 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by
13
+ by,
14
+ check_sorted
14
15
  )
15
16
  period = Utils._timedelta_to_pl_duration(period)
16
17
  offset = Utils._timedelta_to_pl_duration(offset)
@@ -21,12 +22,13 @@ module Polars
21
22
  @offset = offset
22
23
  @closed = closed
23
24
  @by = by
25
+ @check_sorted = check_sorted
24
26
  end
25
27
 
26
28
  def agg(aggs)
27
29
  @df.lazy
28
30
  .groupby_rolling(
29
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
31
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
30
32
  )
31
33
  .agg(aggs)
32
34
  .collect(no_optimization: true, string_cache: false)
data/lib/polars/series.rb CHANGED
@@ -65,7 +65,7 @@ module Polars
65
65
  )
66
66
  .rename(name, in_place: true)
67
67
  ._s
68
- elsif values.is_a?(Array)
68
+ elsif values.is_a?(::Array)
69
69
  self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
70
  elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
71
  self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
@@ -317,6 +317,10 @@ module Polars
317
317
  end
318
318
 
319
319
  if item.is_a?(Integer)
320
+ if item < 0
321
+ item = len + item
322
+ end
323
+
320
324
  return _s.get_idx(item)
321
325
  end
322
326
 
@@ -335,7 +339,7 @@ module Polars
335
339
  #
336
340
  # @return [Object]
337
341
  def []=(key, value)
338
- if value.is_a?(Array)
342
+ if value.is_a?(::Array)
339
343
  if is_numeric || is_datelike
340
344
  set_at_idx(key, value)
341
345
  return
@@ -353,7 +357,7 @@ module Polars
353
357
  else
354
358
  raise Todo
355
359
  end
356
- elsif key.is_a?(Array)
360
+ elsif key.is_a?(::Array)
357
361
  s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
358
362
  self[s] = value
359
363
  elsif key.is_a?(Range)
@@ -715,8 +719,8 @@ module Polars
715
719
  # # │ 0 ┆ 1 ┆ 0 │
716
720
  # # │ 0 ┆ 0 ┆ 1 │
717
721
  # # └─────┴─────┴─────┘
718
- def to_dummies(separator: "_")
719
- Utils.wrap_df(_s.to_dummies(separator))
722
+ def to_dummies(separator: "_", drop_first: false)
723
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
720
724
  end
721
725
 
722
726
  # Count the unique values in a Series.
@@ -1124,7 +1128,7 @@ module Polars
1124
1128
  # # 3
1125
1129
  # # ]
1126
1130
  def filter(predicate)
1127
- if predicate.is_a?(Array)
1131
+ if predicate.is_a?(::Array)
1128
1132
  predicate = Series.new("", predicate)
1129
1133
  end
1130
1134
  Utils.wrap_s(_s.filter(predicate._s))
@@ -2813,7 +2817,8 @@ module Polars
2813
2817
  window_size,
2814
2818
  weights: nil,
2815
2819
  min_periods: nil,
2816
- center: false
2820
+ center: false,
2821
+ ddof: 1
2817
2822
  )
2818
2823
  to_frame
2819
2824
  .select(
@@ -2821,7 +2826,8 @@ module Polars
2821
2826
  window_size,
2822
2827
  weights: weights,
2823
2828
  min_periods: min_periods,
2824
- center: center
2829
+ center: center,
2830
+ ddof: ddof
2825
2831
  )
2826
2832
  )
2827
2833
  .to_series
@@ -2864,7 +2870,8 @@ module Polars
2864
2870
  window_size,
2865
2871
  weights: nil,
2866
2872
  min_periods: nil,
2867
- center: false
2873
+ center: false,
2874
+ ddof: 1
2868
2875
  )
2869
2876
  to_frame
2870
2877
  .select(
@@ -2872,7 +2879,8 @@ module Polars
2872
2879
  window_size,
2873
2880
  weights: weights,
2874
2881
  min_periods: min_periods,
2875
- center: center
2882
+ center: center,
2883
+ ddof: ddof
2876
2884
  )
2877
2885
  )
2878
2886
  .to_series
@@ -3581,10 +3589,17 @@ module Polars
3581
3589
  # Create an object namespace of all list related methods.
3582
3590
  #
3583
3591
  # @return [ListNameSpace]
3584
- def arr
3592
+ def list
3585
3593
  ListNameSpace.new(self)
3586
3594
  end
3587
3595
 
3596
+ # Create an object namespace of all array related methods.
3597
+ #
3598
+ # @return [ArrayNameSpace]
3599
+ def arr
3600
+ ArrayNameSpace.new(self)
3601
+ end
3602
+
3588
3603
  # Create an object namespace of all binary related methods.
3589
3604
  #
3590
3605
  # @return [BinaryNameSpace]
@@ -3824,9 +3839,12 @@ module Polars
3824
3839
 
3825
3840
  if (values.nil? || values.empty?) && dtype.nil?
3826
3841
  dtype = dtype_if_empty || Float32
3842
+ elsif dtype == List
3843
+ ruby_dtype = ::Array
3827
3844
  end
3828
3845
 
3829
3846
  rb_temporal_types = [::Date, ::DateTime, ::Time]
3847
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
3830
3848
 
3831
3849
  value = _get_first_non_none(values)
3832
3850
  if !value.nil?
@@ -3835,9 +3853,20 @@ module Polars
3835
3853
  end
3836
3854
  end
3837
3855
 
3838
- if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3856
+ if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3857
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
3858
+ dtype = Array.new(value.size)
3859
+ end
3860
+
3839
3861
  constructor = polars_type_to_constructor(dtype)
3840
3862
  rbseries = constructor.call(name, values, strict)
3863
+
3864
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
3865
+ if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
3866
+ if rbseries.dtype != dtype
3867
+ rbseries = rbseries.cast(dtype, true)
3868
+ end
3869
+ end
3841
3870
  return rbseries
3842
3871
  else
3843
3872
  if ruby_dtype.nil?
@@ -3868,7 +3897,17 @@ module Polars
3868
3897
  return s._s
3869
3898
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
3870
3899
  raise Todo
3871
- elsif ruby_dtype == Array
3900
+ elsif ruby_dtype == ::Array
3901
+ if dtype.is_a?(Object)
3902
+ return RbSeries.new_object(name, values, strict)
3903
+ end
3904
+ if dtype
3905
+ srs = sequence_from_anyvalue_or_object(name, values)
3906
+ if dtype != srs.dtype
3907
+ srs = srs.cast(dtype, strict: false)
3908
+ end
3909
+ return srs
3910
+ end
3872
3911
  return sequence_from_anyvalue_or_object(name, values)
3873
3912
  elsif ruby_dtype == Series
3874
3913
  return RbSeries.new_series_list(name, values.map(&:_s), strict)
@@ -3910,9 +3949,17 @@ module Polars
3910
3949
  UInt16 => RbSeries.method(:new_opt_u16),
3911
3950
  UInt32 => RbSeries.method(:new_opt_u32),
3912
3951
  UInt64 => RbSeries.method(:new_opt_u64),
3952
+ Decimal => RbSeries.method(:new_decimal),
3953
+ Date => RbSeries.method(:new_from_anyvalues),
3954
+ Datetime => RbSeries.method(:new_from_anyvalues),
3955
+ Duration => RbSeries.method(:new_from_anyvalues),
3956
+ Time => RbSeries.method(:new_from_anyvalues),
3913
3957
  Boolean => RbSeries.method(:new_opt_bool),
3914
3958
  Utf8 => RbSeries.method(:new_str),
3915
- Binary => RbSeries.method(:new_binary)
3959
+ Object => RbSeries.method(:new_object),
3960
+ Categorical => RbSeries.method(:new_str),
3961
+ Binary => RbSeries.method(:new_binary),
3962
+ Null => RbSeries.method(:new_null)
3916
3963
  }
3917
3964
 
3918
3965
  SYM_TYPE_TO_CONSTRUCTOR = {
@@ -3931,8 +3978,14 @@ module Polars
3931
3978
  }
3932
3979
 
3933
3980
  def polars_type_to_constructor(dtype)
3934
- if dtype.is_a?(Class) && dtype < DataType
3981
+ if dtype.is_a?(Array)
3982
+ lambda do |name, values, strict|
3983
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
3984
+ end
3985
+ elsif dtype.is_a?(Class) && dtype < DataType
3935
3986
  POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
3987
+ elsif dtype.is_a?(DataType)
3988
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
3936
3989
  else
3937
3990
  SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3938
3991
  end
@@ -3944,7 +3997,8 @@ module Polars
3944
3997
  Float => RbSeries.method(:new_opt_f64),
3945
3998
  Integer => RbSeries.method(:new_opt_i64),
3946
3999
  TrueClass => RbSeries.method(:new_opt_bool),
3947
- FalseClass => RbSeries.method(:new_opt_bool)
4000
+ FalseClass => RbSeries.method(:new_opt_bool),
4001
+ BigDecimal => RbSeries.method(:new_decimal)
3948
4002
  }
3949
4003
 
3950
4004
  def rb_type_to_constructor(dtype)
@@ -9,11 +9,129 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Convert a Utf8 column into a Date column.
13
+ #
14
+ # @param format [String]
15
+ # Format to use for conversion. Refer to the
16
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
17
+ # for the full specification. Example: `"%Y-%m-%d"`.
18
+ # If set to nil (default), the format is inferred from the data.
19
+ # @param strict [Boolean]
20
+ # Raise an error if any conversion fails.
21
+ # @param exact [Boolean]
22
+ # Require an exact format match. If false, allow the format to match anywhere
23
+ # in the target string.
24
+ # @param cache [Boolean]
25
+ # Use a cache of unique, converted dates to apply the conversion.
26
+ #
27
+ # @return [Expr]
28
+ #
29
+ # @example
30
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
31
+ # s.str.to_date
32
+ # # =>
33
+ # # shape: (3,)
34
+ # # Series: '' [date]
35
+ # # [
36
+ # # 2020-01-01
37
+ # # 2020-02-01
38
+ # # 2020-03-01
39
+ # # ]
40
+ def to_date(format = nil, strict: true, exact: true, cache: true)
41
+ _validate_format_argument(format)
42
+ Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Expr]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true
86
+ )
87
+ _validate_format_argument(format)
88
+ Utils.wrap_expr(
89
+ self._rbexpr.str_to_datetime(
90
+ format,
91
+ time_unit,
92
+ time_zone,
93
+ strict,
94
+ exact,
95
+ cache
96
+ )
97
+ )
98
+ end
99
+
100
+ # Convert a Utf8 column into a Time column.
101
+ #
102
+ # @param format [String]
103
+ # Format to use for conversion. Refer to the
104
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
105
+ # for the full specification. Example: `"%H:%M:%S"`.
106
+ # If set to nil (default), the format is inferred from the data.
107
+ # @param strict [Boolean]
108
+ # Raise an error if any conversion fails.
109
+ # @param cache [Boolean]
110
+ # Use a cache of unique, converted times to apply the conversion.
111
+ #
112
+ # @return [Expr]
113
+ #
114
+ # @example
115
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
116
+ # s.str.to_time("%H:%M")
117
+ # # =>
118
+ # # shape: (3,)
119
+ # # Series: '' [time]
120
+ # # [
121
+ # # 01:00:00
122
+ # # 02:00:00
123
+ # # 03:00:00
124
+ # # ]
125
+ def to_time(format = nil, strict: true, cache: true)
126
+ _validate_format_argument(format)
127
+ Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
128
+ end
129
+
12
130
  # Parse a Utf8 expression to a Date/Datetime/Time type.
13
131
  #
14
132
  # @param dtype [Object]
15
133
  # The data type to convert into. Can be either Date, Datetime, or Time.
16
- # @param fmt [String]
134
+ # @param format [String]
17
135
  # Format to use, refer to the
18
136
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
19
137
  # for specification. Example: `"%y-%m-%d"`.
@@ -38,10 +156,10 @@ module Polars
38
156
  # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
39
157
  # # =>
40
158
  # # shape: (2,)
41
- # # Series: '' [datetime[μs, +00:00]]
159
+ # # Series: '' [datetime[μs, UTC]]
42
160
  # # [
43
- # # 2020-01-01 01:00:00 +00:00
44
- # # 2020-01-01 02:00:00 +00:00
161
+ # # 2020-01-01 01:00:00 UTC
162
+ # # 2020-01-01 02:00:00 UTC
45
163
  # # ]
46
164
  #
47
165
  # @example Dealing with different formats.
@@ -71,16 +189,18 @@ module Polars
71
189
  # # 2022-01-31
72
190
  # # 2001-07-08
73
191
  # # ]
74
- def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
192
+ def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
193
+ _validate_format_argument(format)
194
+
75
195
  if dtype == Date
76
- Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
196
+ to_date(format, strict: strict, exact: exact, cache: cache)
77
197
  elsif dtype == Datetime || dtype.is_a?(Datetime)
78
198
  dtype = Datetime.new if dtype == Datetime
79
199
  time_unit = dtype.time_unit
80
200
  time_zone = dtype.time_zone
81
- Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
201
+ to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
82
202
  elsif dtype == Time
83
- Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
203
+ to_time(format, strict: strict, cache: cache)
84
204
  else
85
205
  raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
86
206
  end
@@ -547,11 +667,11 @@ module Polars
547
667
  # # │ {null,null} │
548
668
  # # │ {2,false} │
549
669
  # # └─────────────┘
550
- def json_extract(dtype = nil)
670
+ def json_extract(dtype = nil, infer_schema_length: 100)
551
671
  if !dtype.nil?
552
672
  dtype = Utils.rb_type_to_dtype(dtype)
553
673
  end
554
- Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
674
+ Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
555
675
  end
556
676
 
557
677
  # Extract the first match of json string with provided JSONPath expression.
@@ -968,7 +1088,7 @@ module Polars
968
1088
  # # │ r │
969
1089
  # # └─────┘
970
1090
  def explode
971
- Utils.wrap_expr(_rbexpr.explode)
1091
+ Utils.wrap_expr(_rbexpr.str_explode)
972
1092
  end
973
1093
 
974
1094
  # Parse integers with base radix from strings.
@@ -1018,5 +1138,11 @@ module Polars
1018
1138
  def parse_int(radix = 2, strict: true)
1019
1139
  Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1020
1140
  end
1141
+
1142
+ private
1143
+
1144
+ def _validate_format_argument(format)
1145
+ # TODO
1146
+ end
1021
1147
  end
1022
1148
  end