polars-df 0.5.0-x86_64-linux → 0.6.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,9 @@
1
1
  module Polars
2
- # Series.arr namespace.
2
+ # Series.list namespace.
3
3
  class ListNameSpace
4
4
  include ExprDispatch
5
5
 
6
- self._accessor = "arr"
6
+ self._accessor = "list"
7
7
 
8
8
  # @private
9
9
  def initialize(series)
@@ -16,7 +16,7 @@ module Polars
16
16
  #
17
17
  # @example
18
18
  # s = Polars::Series.new([[1, 2, 3], [5]])
19
- # s.arr.lengths
19
+ # s.list.lengths
20
20
  # # =>
21
21
  # # shape: (2,)
22
22
  # # Series: '' [u32]
@@ -119,13 +119,13 @@ module Polars
119
119
  #
120
120
  # @example
121
121
  # s = Polars::Series.new([["foo", "bar"], ["hello", "world"]])
122
- # s.arr.join("-")
122
+ # s.list.join("-")
123
123
  # # =>
124
124
  # # shape: (2,)
125
125
  # # Series: '' [str]
126
126
  # # [
127
- # # "foo-bar"
128
- # # "hello-world"
127
+ # # "foo-bar"
128
+ # # "hello-world"
129
129
  # # ]
130
130
  def join(separator)
131
131
  super
@@ -180,7 +180,7 @@ module Polars
180
180
  #
181
181
  # @example
182
182
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
183
- # s.arr.diff
183
+ # s.list.diff
184
184
  # # =>
185
185
  # # shape: (2,)
186
186
  # # Series: 'a' [list[i64]]
@@ -201,7 +201,7 @@ module Polars
201
201
  #
202
202
  # @example
203
203
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
204
- # s.arr.shift
204
+ # s.list.shift
205
205
  # # =>
206
206
  # # shape: (2,)
207
207
  # # Series: 'a' [list[i64]]
@@ -225,7 +225,7 @@ module Polars
225
225
  #
226
226
  # @example
227
227
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
228
- # s.arr.slice(1, 2)
228
+ # s.list.slice(1, 2)
229
229
  # # =>
230
230
  # # shape: (2,)
231
231
  # # Series: 'a' [list[i64]]
@@ -246,7 +246,7 @@ module Polars
246
246
  #
247
247
  # @example
248
248
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
249
- # s.arr.head(2)
249
+ # s.list.head(2)
250
250
  # # =>
251
251
  # # shape: (2,)
252
252
  # # Series: 'a' [list[i64]]
@@ -267,7 +267,7 @@ module Polars
267
267
  #
268
268
  # @example
269
269
  # s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
270
- # s.arr.tail(2)
270
+ # s.list.tail(2)
271
271
  # # =>
272
272
  # # shape: (2,)
273
273
  # # Series: 'a' [list[i64]]
@@ -291,7 +291,7 @@ module Polars
291
291
  #
292
292
  # @example
293
293
  # df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
294
- # df.select([Polars.col("a").arr.to_struct])
294
+ # df.select([Polars.col("a").list.to_struct])
295
295
  # # =>
296
296
  # # shape: (2, 1)
297
297
  # # ┌────────────┐
@@ -323,7 +323,7 @@ module Polars
323
323
  # @example
324
324
  # df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
325
325
  # df.with_column(
326
- # Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
326
+ # Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
327
327
  # )
328
328
  # # =>
329
329
  # # shape: (3, 3)
@@ -10,7 +10,8 @@ module Polars
10
10
  period,
11
11
  offset,
12
12
  closed,
13
- by
13
+ by,
14
+ check_sorted
14
15
  )
15
16
  period = Utils._timedelta_to_pl_duration(period)
16
17
  offset = Utils._timedelta_to_pl_duration(offset)
@@ -21,12 +22,13 @@ module Polars
21
22
  @offset = offset
22
23
  @closed = closed
23
24
  @by = by
25
+ @check_sorted = check_sorted
24
26
  end
25
27
 
26
28
  def agg(aggs)
27
29
  @df.lazy
28
30
  .groupby_rolling(
29
- index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
31
+ index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by, check_sorted: @check_sorted
30
32
  )
31
33
  .agg(aggs)
32
34
  .collect(no_optimization: true, string_cache: false)
data/lib/polars/series.rb CHANGED
@@ -65,7 +65,7 @@ module Polars
65
65
  )
66
66
  .rename(name, in_place: true)
67
67
  ._s
68
- elsif values.is_a?(Array)
68
+ elsif values.is_a?(::Array)
69
69
  self._s = sequence_to_rbseries(name, values, dtype: dtype, strict: strict, dtype_if_empty: dtype_if_empty)
70
70
  elsif defined?(Numo::NArray) && values.is_a?(Numo::NArray)
71
71
  self._s = numo_to_rbseries(name, values, strict: strict, nan_to_null: nan_to_null)
@@ -317,6 +317,10 @@ module Polars
317
317
  end
318
318
 
319
319
  if item.is_a?(Integer)
320
+ if item < 0
321
+ item = len + item
322
+ end
323
+
320
324
  return _s.get_idx(item)
321
325
  end
322
326
 
@@ -335,7 +339,7 @@ module Polars
335
339
  #
336
340
  # @return [Object]
337
341
  def []=(key, value)
338
- if value.is_a?(Array)
342
+ if value.is_a?(::Array)
339
343
  if is_numeric || is_datelike
340
344
  set_at_idx(key, value)
341
345
  return
@@ -353,7 +357,7 @@ module Polars
353
357
  else
354
358
  raise Todo
355
359
  end
356
- elsif key.is_a?(Array)
360
+ elsif key.is_a?(::Array)
357
361
  s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: UInt32))
358
362
  self[s] = value
359
363
  elsif key.is_a?(Range)
@@ -715,8 +719,8 @@ module Polars
715
719
  # # │ 0 ┆ 1 ┆ 0 │
716
720
  # # │ 0 ┆ 0 ┆ 1 │
717
721
  # # └─────┴─────┴─────┘
718
- def to_dummies(separator: "_")
719
- Utils.wrap_df(_s.to_dummies(separator))
722
+ def to_dummies(separator: "_", drop_first: false)
723
+ Utils.wrap_df(_s.to_dummies(separator, drop_first))
720
724
  end
721
725
 
722
726
  # Count the unique values in a Series.
@@ -1124,7 +1128,7 @@ module Polars
1124
1128
  # # 3
1125
1129
  # # ]
1126
1130
  def filter(predicate)
1127
- if predicate.is_a?(Array)
1131
+ if predicate.is_a?(::Array)
1128
1132
  predicate = Series.new("", predicate)
1129
1133
  end
1130
1134
  Utils.wrap_s(_s.filter(predicate._s))
@@ -2813,7 +2817,8 @@ module Polars
2813
2817
  window_size,
2814
2818
  weights: nil,
2815
2819
  min_periods: nil,
2816
- center: false
2820
+ center: false,
2821
+ ddof: 1
2817
2822
  )
2818
2823
  to_frame
2819
2824
  .select(
@@ -2821,7 +2826,8 @@ module Polars
2821
2826
  window_size,
2822
2827
  weights: weights,
2823
2828
  min_periods: min_periods,
2824
- center: center
2829
+ center: center,
2830
+ ddof: ddof
2825
2831
  )
2826
2832
  )
2827
2833
  .to_series
@@ -2864,7 +2870,8 @@ module Polars
2864
2870
  window_size,
2865
2871
  weights: nil,
2866
2872
  min_periods: nil,
2867
- center: false
2873
+ center: false,
2874
+ ddof: 1
2868
2875
  )
2869
2876
  to_frame
2870
2877
  .select(
@@ -2872,7 +2879,8 @@ module Polars
2872
2879
  window_size,
2873
2880
  weights: weights,
2874
2881
  min_periods: min_periods,
2875
- center: center
2882
+ center: center,
2883
+ ddof: ddof
2876
2884
  )
2877
2885
  )
2878
2886
  .to_series
@@ -3581,10 +3589,17 @@ module Polars
3581
3589
  # Create an object namespace of all list related methods.
3582
3590
  #
3583
3591
  # @return [ListNameSpace]
3584
- def arr
3592
+ def list
3585
3593
  ListNameSpace.new(self)
3586
3594
  end
3587
3595
 
3596
+ # Create an object namespace of all array related methods.
3597
+ #
3598
+ # @return [ArrayNameSpace]
3599
+ def arr
3600
+ ArrayNameSpace.new(self)
3601
+ end
3602
+
3588
3603
  # Create an object namespace of all binary related methods.
3589
3604
  #
3590
3605
  # @return [BinaryNameSpace]
@@ -3824,9 +3839,12 @@ module Polars
3824
3839
 
3825
3840
  if (values.nil? || values.empty?) && dtype.nil?
3826
3841
  dtype = dtype_if_empty || Float32
3842
+ elsif dtype == List
3843
+ ruby_dtype = ::Array
3827
3844
  end
3828
3845
 
3829
3846
  rb_temporal_types = [::Date, ::DateTime, ::Time]
3847
+ rb_temporal_types << ActiveSupport::TimeWithZone if defined?(ActiveSupport::TimeWithZone)
3830
3848
 
3831
3849
  value = _get_first_non_none(values)
3832
3850
  if !value.nil?
@@ -3835,9 +3853,20 @@ module Polars
3835
3853
  end
3836
3854
  end
3837
3855
 
3838
- if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3856
+ if !dtype.nil? && ![List, Unknown].include?(dtype) && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
3857
+ if dtype == Array && !dtype.is_a?(Array) && value.is_a?(::Array)
3858
+ dtype = Array.new(value.size)
3859
+ end
3860
+
3839
3861
  constructor = polars_type_to_constructor(dtype)
3840
3862
  rbseries = constructor.call(name, values, strict)
3863
+
3864
+ base_type = dtype.is_a?(DataType) ? dtype.class : dtype
3865
+ if [Date, Datetime, Duration, Time, Categorical, Boolean].include?(base_type)
3866
+ if rbseries.dtype != dtype
3867
+ rbseries = rbseries.cast(dtype, true)
3868
+ end
3869
+ end
3841
3870
  return rbseries
3842
3871
  else
3843
3872
  if ruby_dtype.nil?
@@ -3868,7 +3897,17 @@ module Polars
3868
3897
  return s._s
3869
3898
  elsif defined?(Numo::NArray) && value.is_a?(Numo::NArray) && value.shape.length == 1
3870
3899
  raise Todo
3871
- elsif ruby_dtype == Array
3900
+ elsif ruby_dtype == ::Array
3901
+ if dtype.is_a?(Object)
3902
+ return RbSeries.new_object(name, values, strict)
3903
+ end
3904
+ if dtype
3905
+ srs = sequence_from_anyvalue_or_object(name, values)
3906
+ if dtype != srs.dtype
3907
+ srs = srs.cast(dtype, strict: false)
3908
+ end
3909
+ return srs
3910
+ end
3872
3911
  return sequence_from_anyvalue_or_object(name, values)
3873
3912
  elsif ruby_dtype == Series
3874
3913
  return RbSeries.new_series_list(name, values.map(&:_s), strict)
@@ -3910,9 +3949,17 @@ module Polars
3910
3949
  UInt16 => RbSeries.method(:new_opt_u16),
3911
3950
  UInt32 => RbSeries.method(:new_opt_u32),
3912
3951
  UInt64 => RbSeries.method(:new_opt_u64),
3952
+ Decimal => RbSeries.method(:new_decimal),
3953
+ Date => RbSeries.method(:new_from_anyvalues),
3954
+ Datetime => RbSeries.method(:new_from_anyvalues),
3955
+ Duration => RbSeries.method(:new_from_anyvalues),
3956
+ Time => RbSeries.method(:new_from_anyvalues),
3913
3957
  Boolean => RbSeries.method(:new_opt_bool),
3914
3958
  Utf8 => RbSeries.method(:new_str),
3915
- Binary => RbSeries.method(:new_binary)
3959
+ Object => RbSeries.method(:new_object),
3960
+ Categorical => RbSeries.method(:new_str),
3961
+ Binary => RbSeries.method(:new_binary),
3962
+ Null => RbSeries.method(:new_null)
3916
3963
  }
3917
3964
 
3918
3965
  SYM_TYPE_TO_CONSTRUCTOR = {
@@ -3931,8 +3978,14 @@ module Polars
3931
3978
  }
3932
3979
 
3933
3980
  def polars_type_to_constructor(dtype)
3934
- if dtype.is_a?(Class) && dtype < DataType
3981
+ if dtype.is_a?(Array)
3982
+ lambda do |name, values, strict|
3983
+ RbSeries.new_array(dtype.width, dtype.inner, name, values, strict)
3984
+ end
3985
+ elsif dtype.is_a?(Class) && dtype < DataType
3935
3986
  POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
3987
+ elsif dtype.is_a?(DataType)
3988
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.class)
3936
3989
  else
3937
3990
  SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3938
3991
  end
@@ -3944,7 +3997,8 @@ module Polars
3944
3997
  Float => RbSeries.method(:new_opt_f64),
3945
3998
  Integer => RbSeries.method(:new_opt_i64),
3946
3999
  TrueClass => RbSeries.method(:new_opt_bool),
3947
- FalseClass => RbSeries.method(:new_opt_bool)
4000
+ FalseClass => RbSeries.method(:new_opt_bool),
4001
+ BigDecimal => RbSeries.method(:new_decimal)
3948
4002
  }
3949
4003
 
3950
4004
  def rb_type_to_constructor(dtype)
@@ -9,11 +9,129 @@ module Polars
9
9
  self._rbexpr = expr._rbexpr
10
10
  end
11
11
 
12
+ # Convert a Utf8 column into a Date column.
13
+ #
14
+ # @param format [String]
15
+ # Format to use for conversion. Refer to the
16
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
17
+ # for the full specification. Example: `"%Y-%m-%d"`.
18
+ # If set to nil (default), the format is inferred from the data.
19
+ # @param strict [Boolean]
20
+ # Raise an error if any conversion fails.
21
+ # @param exact [Boolean]
22
+ # Require an exact format match. If false, allow the format to match anywhere
23
+ # in the target string.
24
+ # @param cache [Boolean]
25
+ # Use a cache of unique, converted dates to apply the conversion.
26
+ #
27
+ # @return [Expr]
28
+ #
29
+ # @example
30
+ # s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
31
+ # s.str.to_date
32
+ # # =>
33
+ # # shape: (3,)
34
+ # # Series: '' [date]
35
+ # # [
36
+ # # 2020-01-01
37
+ # # 2020-02-01
38
+ # # 2020-03-01
39
+ # # ]
40
+ def to_date(format = nil, strict: true, exact: true, cache: true)
41
+ _validate_format_argument(format)
42
+ Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
43
+ end
44
+
45
+ # Convert a Utf8 column into a Datetime column.
46
+ #
47
+ # @param format [String]
48
+ # Format to use for conversion. Refer to the
49
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
50
+ # for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
51
+ # If set to nil (default), the format is inferred from the data.
52
+ # @param time_unit ["us", "ns", "ms"]
53
+ # Unit of time for the resulting Datetime column. If set to nil (default),
54
+ # the time unit is inferred from the format string if given, eg:
55
+ # `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
56
+ # found, the default is `"us"`.
57
+ # @param time_zone [String]
58
+ # Time zone for the resulting Datetime column.
59
+ # @param strict [Boolean]
60
+ # Raise an error if any conversion fails.
61
+ # @param exact [Boolean]
62
+ # Require an exact format match. If false, allow the format to match anywhere
63
+ # in the target string.
64
+ # @param cache [Boolean]
65
+ # Use a cache of unique, converted datetimes to apply the conversion.
66
+ #
67
+ # @return [Expr]
68
+ #
69
+ # @example
70
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
71
+ # s.str.to_datetime("%Y-%m-%d %H:%M%#z")
72
+ # # =>
73
+ # # shape: (2,)
74
+ # # Series: '' [datetime[μs, UTC]]
75
+ # # [
76
+ # # 2020-01-01 01:00:00 UTC
77
+ # # 2020-01-01 02:00:00 UTC
78
+ # # ]
79
+ def to_datetime(
80
+ format = nil,
81
+ time_unit: nil,
82
+ time_zone: nil,
83
+ strict: true,
84
+ exact: true,
85
+ cache: true
86
+ )
87
+ _validate_format_argument(format)
88
+ Utils.wrap_expr(
89
+ self._rbexpr.str_to_datetime(
90
+ format,
91
+ time_unit,
92
+ time_zone,
93
+ strict,
94
+ exact,
95
+ cache
96
+ )
97
+ )
98
+ end
99
+
100
+ # Convert a Utf8 column into a Time column.
101
+ #
102
+ # @param format [String]
103
+ # Format to use for conversion. Refer to the
104
+ # [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
105
+ # for the full specification. Example: `"%H:%M:%S"`.
106
+ # If set to nil (default), the format is inferred from the data.
107
+ # @param strict [Boolean]
108
+ # Raise an error if any conversion fails.
109
+ # @param cache [Boolean]
110
+ # Use a cache of unique, converted times to apply the conversion.
111
+ #
112
+ # @return [Expr]
113
+ #
114
+ # @example
115
+ # s = Polars::Series.new(["01:00", "02:00", "03:00"])
116
+ # s.str.to_time("%H:%M")
117
+ # # =>
118
+ # # shape: (3,)
119
+ # # Series: '' [time]
120
+ # # [
121
+ # # 01:00:00
122
+ # # 02:00:00
123
+ # # 03:00:00
124
+ # # ]
125
+ def to_time(format = nil, strict: true, cache: true)
126
+ _validate_format_argument(format)
127
+ Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
128
+ end
129
+
12
130
  # Parse a Utf8 expression to a Date/Datetime/Time type.
13
131
  #
14
132
  # @param dtype [Object]
15
133
  # The data type to convert into. Can be either Date, Datetime, or Time.
16
- # @param fmt [String]
134
+ # @param format [String]
17
135
  # Format to use, refer to the
18
136
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
19
137
  # for specification. Example: `"%y-%m-%d"`.
@@ -38,10 +156,10 @@ module Polars
38
156
  # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
39
157
  # # =>
40
158
  # # shape: (2,)
41
- # # Series: '' [datetime[μs, +00:00]]
159
+ # # Series: '' [datetime[μs, UTC]]
42
160
  # # [
43
- # # 2020-01-01 01:00:00 +00:00
44
- # # 2020-01-01 02:00:00 +00:00
161
+ # # 2020-01-01 01:00:00 UTC
162
+ # # 2020-01-01 02:00:00 UTC
45
163
  # # ]
46
164
  #
47
165
  # @example Dealing with different formats.
@@ -71,16 +189,18 @@ module Polars
71
189
  # # 2022-01-31
72
190
  # # 2001-07-08
73
191
  # # ]
74
- def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
192
+ def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
193
+ _validate_format_argument(format)
194
+
75
195
  if dtype == Date
76
- Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
196
+ to_date(format, strict: strict, exact: exact, cache: cache)
77
197
  elsif dtype == Datetime || dtype.is_a?(Datetime)
78
198
  dtype = Datetime.new if dtype == Datetime
79
199
  time_unit = dtype.time_unit
80
200
  time_zone = dtype.time_zone
81
- Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
201
+ to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
82
202
  elsif dtype == Time
83
- Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
203
+ to_time(format, strict: strict, cache: cache)
84
204
  else
85
205
  raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
86
206
  end
@@ -547,11 +667,11 @@ module Polars
547
667
  # # │ {null,null} │
548
668
  # # │ {2,false} │
549
669
  # # └─────────────┘
550
- def json_extract(dtype = nil)
670
+ def json_extract(dtype = nil, infer_schema_length: 100)
551
671
  if !dtype.nil?
552
672
  dtype = Utils.rb_type_to_dtype(dtype)
553
673
  end
554
- Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
674
+ Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
555
675
  end
556
676
 
557
677
  # Extract the first match of json string with provided JSONPath expression.
@@ -968,7 +1088,7 @@ module Polars
968
1088
  # # │ r │
969
1089
  # # └─────┘
970
1090
  def explode
971
- Utils.wrap_expr(_rbexpr.explode)
1091
+ Utils.wrap_expr(_rbexpr.str_explode)
972
1092
  end
973
1093
 
974
1094
  # Parse integers with base radix from strings.
@@ -1018,5 +1138,11 @@ module Polars
1018
1138
  def parse_int(radix = 2, strict: true)
1019
1139
  Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1020
1140
  end
1141
+
1142
+ private
1143
+
1144
+ def _validate_format_argument(format)
1145
+ # TODO
1146
+ end
1021
1147
  end
1022
1148
  end