polars-df 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/Cargo.lock +337 -381
  4. data/README.md +4 -3
  5. data/ext/polars/Cargo.toml +5 -4
  6. data/ext/polars/src/apply/mod.rs +7 -3
  7. data/ext/polars/src/conversion.rs +171 -63
  8. data/ext/polars/src/dataframe.rs +19 -23
  9. data/ext/polars/src/error.rs +8 -0
  10. data/ext/polars/src/expr/array.rs +15 -0
  11. data/ext/polars/src/expr/general.rs +39 -9
  12. data/ext/polars/src/expr/list.rs +27 -22
  13. data/ext/polars/src/expr/string.rs +10 -9
  14. data/ext/polars/src/expr.rs +1 -0
  15. data/ext/polars/src/functions/lazy.rs +61 -21
  16. data/ext/polars/src/lazyframe.rs +14 -2
  17. data/ext/polars/src/lib.rs +25 -20
  18. data/ext/polars/src/object.rs +1 -1
  19. data/ext/polars/src/rb_modules.rs +4 -0
  20. data/ext/polars/src/series/construction.rs +28 -2
  21. data/ext/polars/src/series.rs +57 -17
  22. data/lib/polars/array_expr.rb +84 -0
  23. data/lib/polars/array_name_space.rb +77 -0
  24. data/lib/polars/batched_csv_reader.rb +1 -1
  25. data/lib/polars/data_frame.rb +91 -49
  26. data/lib/polars/data_types.rb +163 -29
  27. data/lib/polars/date_time_name_space.rb +17 -3
  28. data/lib/polars/expr.rb +76 -69
  29. data/lib/polars/functions.rb +0 -1
  30. data/lib/polars/group_by.rb +1 -22
  31. data/lib/polars/lazy_frame.rb +82 -30
  32. data/lib/polars/lazy_functions.rb +67 -31
  33. data/lib/polars/list_expr.rb +28 -28
  34. data/lib/polars/list_name_space.rb +13 -13
  35. data/lib/polars/rolling_group_by.rb +4 -2
  36. data/lib/polars/series.rb +70 -16
  37. data/lib/polars/string_expr.rb +137 -11
  38. data/lib/polars/string_name_space.rb +137 -22
  39. data/lib/polars/utils.rb +107 -57
  40. data/lib/polars/version.rb +1 -1
  41. data/lib/polars.rb +3 -0
  42. metadata +5 -2
@@ -36,7 +36,7 @@ module Polars
36
36
  elsif data.is_a?(Hash)
37
37
  data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
38
38
  self._df = self.class.hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, nan_to_null: nan_to_null)
39
- elsif data.is_a?(Array)
39
+ elsif data.is_a?(::Array)
40
40
  self._df = self.class.sequence_to_rbdf(data, schema: schema, schema_overrides: schema_overrides, orient: orient, infer_schema_length: infer_schema_length)
41
41
  elsif data.is_a?(Series)
42
42
  self._df = self.class.series_to_rbdf(data, schema: schema, schema_overrides: schema_overrides)
@@ -116,7 +116,7 @@ module Polars
116
116
  dtypes.each do|k, v|
117
117
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
118
118
  end
119
- elsif dtypes.is_a?(Array)
119
+ elsif dtypes.is_a?(::Array)
120
120
  dtype_slice = dtypes
121
121
  else
122
122
  raise ArgumentError, "dtype arg should be list or dict"
@@ -590,7 +590,7 @@ module Polars
590
590
 
591
591
  # df[2, ..] (select row as df)
592
592
  if row_selection.is_a?(Integer)
593
- if col_selection.is_a?(Array)
593
+ if col_selection.is_a?(::Array)
594
594
  df = self[0.., col_selection]
595
595
  return df.slice(row_selection, 1)
596
596
  end
@@ -611,7 +611,7 @@ module Polars
611
611
  return series[row_selection]
612
612
  end
613
613
 
614
- if col_selection.is_a?(Array)
614
+ if col_selection.is_a?(::Array)
615
615
  # df[.., [1, 2]]
616
616
  if Utils.is_int_sequence(col_selection)
617
617
  series_list = col_selection.map { |i| to_series(i) }
@@ -641,7 +641,7 @@ module Polars
641
641
  return Slice.new(self).apply(item)
642
642
  end
643
643
 
644
- if item.is_a?(Array) && item.all? { |v| Utils.strlike?(v) }
644
+ if item.is_a?(::Array) && item.all? { |v| Utils.strlike?(v) }
645
645
  # select multiple columns
646
646
  # df[["foo", "bar"]]
647
647
  return _from_rbdf(_df.select(item.map(&:to_s)))
@@ -684,13 +684,13 @@ module Polars
684
684
  end
685
685
 
686
686
  if Utils.strlike?(key)
687
- if value.is_a?(Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
687
+ if value.is_a?(::Array) || (defined?(Numo::NArray) && value.is_a?(Numo::NArray))
688
688
  value = Series.new(value)
689
689
  elsif !value.is_a?(Series)
690
690
  value = Polars.lit(value)
691
691
  end
692
692
  self._df = with_column(value.alias(key.to_s))._df
693
- elsif key.is_a?(Array)
693
+ elsif key.is_a?(::Array)
694
694
  row_selection, col_selection = key
695
695
 
696
696
  if Utils.strlike?(col_selection)
@@ -994,14 +994,21 @@ module Polars
994
994
  #
995
995
  # @return [nil]
996
996
  def write_ipc(file, compression: "uncompressed")
997
- if compression.nil?
998
- compression = "uncompressed"
997
+ return_bytes = file.nil?
998
+ if return_bytes
999
+ file = StringIO.new
1000
+ file.set_encoding(Encoding::BINARY)
999
1001
  end
1000
1002
  if Utils.pathlike?(file)
1001
1003
  file = Utils.normalise_filepath(file)
1002
1004
  end
1003
1005
 
1006
+ if compression.nil?
1007
+ compression = "uncompressed"
1008
+ end
1009
+
1004
1010
  _df.write_ipc(file, compression)
1011
+ return_bytes ? file.string : nil
1005
1012
  end
1006
1013
 
1007
1014
  # Write to Apache Parquet file.
@@ -1491,13 +1498,9 @@ module Polars
1491
1498
  # # │ 1 ┆ 6.0 ┆ a │
1492
1499
  # # └─────┴─────┴─────┘
1493
1500
  def sort(by, reverse: false, nulls_last: false)
1494
- if by.is_a?(Array) || by.is_a?(Expr)
1495
- lazy
1496
- .sort(by, reverse: reverse, nulls_last: nulls_last)
1497
- .collect(no_optimization: true, string_cache: false)
1498
- else
1499
- _from_rbdf(_df.sort(by, reverse, nulls_last))
1500
- end
1501
+ lazy
1502
+ .sort(by, reverse: reverse, nulls_last: nulls_last)
1503
+ .collect(no_optimization: true)
1501
1504
  end
1502
1505
 
1503
1506
  # Sort the DataFrame by column in-place.
@@ -1899,6 +1902,12 @@ module Polars
1899
1902
  # Define whether the temporal window interval is closed or not.
1900
1903
  # @param by [Object]
1901
1904
  # Also group by this column/these columns.
1905
+ # @param check_sorted [Boolean]
1906
+ # When the `by` argument is given, polars can not check sortedness
1907
+ # by the metadata and has to do a full scan on the index column to
1908
+ # verify data is sorted. This is expensive. If you are sure the
1909
+ # data within the by groups is sorted, you can set this to `false`.
1910
+ # Doing so incorrectly will lead to incorrect output
1902
1911
  #
1903
1912
  # @return [RollingGroupBy]
1904
1913
  #
@@ -1912,7 +1921,7 @@ module Polars
1912
1921
  # "2020-01-08 23:16:43"
1913
1922
  # ]
1914
1923
  # df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
1915
- # Polars.col("dt").str.strptime(Polars::Datetime)
1924
+ # Polars.col("dt").str.strptime(Polars::Datetime).set_sorted
1916
1925
  # )
1917
1926
  # df.groupby_rolling(index_column: "dt", period: "2d").agg(
1918
1927
  # [
@@ -1940,9 +1949,10 @@ module Polars
1940
1949
  period:,
1941
1950
  offset: nil,
1942
1951
  closed: "right",
1943
- by: nil
1952
+ by: nil,
1953
+ check_sorted: true
1944
1954
  )
1945
- RollingGroupBy.new(self, index_column, period, offset, closed, by)
1955
+ RollingGroupBy.new(self, index_column, period, offset, closed, by, check_sorted)
1946
1956
  end
1947
1957
 
1948
1958
  # Group based on a time value (or index value of type `:i32`, `:i64`).
@@ -2078,21 +2088,21 @@ module Polars
2078
2088
  # df.groupby_dynamic("time", every: "1h", closed: "left").agg(
2079
2089
  # [
2080
2090
  # Polars.col("time").count.alias("time_count"),
2081
- # Polars.col("time").list.alias("time_agg_list")
2091
+ # Polars.col("time").alias("time_agg_list")
2082
2092
  # ]
2083
2093
  # )
2084
2094
  # # =>
2085
2095
  # # shape: (4, 3)
2086
- # # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
2087
- # # │ time ┆ time_count ┆ time_agg_list
2088
- # # │ --- ┆ --- ┆ ---
2089
- # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
2090
- # # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
2091
- # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16...
2092
- # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16...
2093
- # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16...
2094
- # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
2095
- # # └─────────────────────┴────────────┴─────────────────────────────────────┘
2096
+ # # ┌─────────────────────┬────────────┬───────────────────────────────────┐
2097
+ # # │ time ┆ time_count ┆ time_agg_list
2098
+ # # │ --- ┆ --- ┆ ---
2099
+ # # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
2100
+ # # ╞═════════════════════╪════════════╪═══════════════════════════════════╡
2101
+ # # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-16
2102
+ # # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-16
2103
+ # # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-16
2104
+ # # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
2105
+ # # └─────────────────────┴────────────┴───────────────────────────────────┘
2096
2106
  #
2097
2107
  # @example When closed="both" the time values at the window boundaries belong to 2 groups.
2098
2108
  # df.groupby_dynamic("time", every: "1h", closed: "both").agg(
@@ -2159,7 +2169,7 @@ module Polars
2159
2169
  # period: "3i",
2160
2170
  # include_boundaries: true,
2161
2171
  # closed: "right"
2162
- # ).agg(Polars.col("A").list.alias("A_agg_list"))
2172
+ # ).agg(Polars.col("A").alias("A_agg_list"))
2163
2173
  # # =>
2164
2174
  # # shape: (3, 4)
2165
2175
  # # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
@@ -2242,7 +2252,7 @@ module Polars
2242
2252
  # "groups" => ["A", "B", "A", "B"],
2243
2253
  # "values" => [0, 1, 2, 3]
2244
2254
  # }
2245
- # )
2255
+ # ).set_sorted("time")
2246
2256
  # df.upsample(
2247
2257
  # time_column: "time", every: "1mo", by: "groups", maintain_order: true
2248
2258
  # ).select(Polars.all.forward_fill)
@@ -2360,7 +2370,7 @@ module Polars
2360
2370
  # ], # note record date: Jan 1st (sorted!)
2361
2371
  # "gdp" => [4164, 4411, 4566, 4696]
2362
2372
  # }
2363
- # )
2373
+ # ).set_sorted("date")
2364
2374
  # population = Polars::DataFrame.new(
2365
2375
  # {
2366
2376
  # "date" => [
@@ -2371,7 +2381,7 @@ module Polars
2371
2381
  # ], # note record date: May 12th (sorted!)
2372
2382
  # "population" => [82.19, 82.66, 83.12, 83.52]
2373
2383
  # }
2374
- # )
2384
+ # ).set_sorted("date")
2375
2385
  # population.join_asof(
2376
2386
  # gdp, left_on: "date", right_on: "date", strategy: "backward"
2377
2387
  # )
@@ -2674,7 +2684,7 @@ module Polars
2674
2684
  # # │ 3 ┆ 8 ┆ c ┆ 30 │
2675
2685
  # # └─────┴─────┴─────┴───────┘
2676
2686
  def hstack(columns, in_place: false)
2677
- if !columns.is_a?(Array)
2687
+ if !columns.is_a?(::Array)
2678
2688
  columns = columns.get_columns
2679
2689
  end
2680
2690
  if in_place
@@ -2804,7 +2814,7 @@ module Polars
2804
2814
  # # │ 3 ┆ 8.0 │
2805
2815
  # # └─────┴─────┘
2806
2816
  def drop(columns)
2807
- if columns.is_a?(Array)
2817
+ if columns.is_a?(::Array)
2808
2818
  df = clone
2809
2819
  columns.each do |n|
2810
2820
  df._df.drop_in_place(n)
@@ -3317,7 +3327,7 @@ module Polars
3317
3327
  n_fill = n_cols * n_rows - height
3318
3328
 
3319
3329
  if n_fill > 0
3320
- if !fill_values.is_a?(Array)
3330
+ if !fill_values.is_a?(::Array)
3321
3331
  fill_values = [fill_values] * df.width
3322
3332
  end
3323
3333
 
@@ -3426,29 +3436,29 @@ module Polars
3426
3436
  # # ╞═════╪═════╪═════╡
3427
3437
  # # │ C ┆ 2 ┆ l │
3428
3438
  # # └─────┴─────┴─────┘}
3429
- def partition_by(groups, maintain_order: true, as_dict: false)
3439
+ def partition_by(groups, maintain_order: true, include_key: true, as_dict: false)
3430
3440
  if groups.is_a?(String)
3431
3441
  groups = [groups]
3432
- elsif !groups.is_a?(Array)
3442
+ elsif !groups.is_a?(::Array)
3433
3443
  groups = Array(groups)
3434
3444
  end
3435
3445
 
3436
3446
  if as_dict
3437
3447
  out = {}
3438
3448
  if groups.length == 1
3439
- _df.partition_by(groups, maintain_order).each do |df|
3449
+ _df.partition_by(groups, maintain_order, include_key).each do |df|
3440
3450
  df = _from_rbdf(df)
3441
3451
  out[df[groups][0, 0]] = df
3442
3452
  end
3443
3453
  else
3444
- _df.partition_by(groups, maintain_order).each do |df|
3454
+ _df.partition_by(groups, maintain_order, include_key).each do |df|
3445
3455
  df = _from_rbdf(df)
3446
3456
  out[df[groups].row(0)] = df
3447
3457
  end
3448
3458
  end
3449
3459
  out
3450
3460
  else
3451
- _df.partition_by(groups, maintain_order).map { |df| _from_rbdf(df) }
3461
+ _df.partition_by(groups, maintain_order, include_key).map { |df| _from_rbdf(df) }
3452
3462
  end
3453
3463
  end
3454
3464
 
@@ -3716,7 +3726,7 @@ module Polars
3716
3726
  # # │ 4 ┆ 13.0 ┆ true ┆ 16.0 ┆ 6.5 ┆ false │
3717
3727
  # # └─────┴──────┴───────┴──────┴──────┴───────┘
3718
3728
  def with_columns(exprs)
3719
- if !exprs.nil? && !exprs.is_a?(Array)
3729
+ if !exprs.nil? && !exprs.is_a?(::Array)
3720
3730
  exprs = [exprs]
3721
3731
  end
3722
3732
  lazy
@@ -4097,11 +4107,11 @@ module Polars
4097
4107
  # # │ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 │
4098
4108
  # # │ 0 ┆ 1 ┆ 0 ┆ 1 ┆ 0 ┆ 1 │
4099
4109
  # # └───────┴───────┴───────┴───────┴───────┴───────┘
4100
- def to_dummies(columns: nil, separator: "_")
4110
+ def to_dummies(columns: nil, separator: "_", drop_first: false)
4101
4111
  if columns.is_a?(String)
4102
4112
  columns = [columns]
4103
4113
  end
4104
- _from_rbdf(_df.to_dummies(columns, separator))
4114
+ _from_rbdf(_df.to_dummies(columns, separator, drop_first))
4105
4115
  end
4106
4116
 
4107
4117
  # Drop duplicate rows from this DataFrame.
@@ -4189,7 +4199,7 @@ module Polars
4189
4199
  subset = [subset]
4190
4200
  end
4191
4201
 
4192
- if subset.is_a?(Array) && subset.length == 1
4202
+ if subset.is_a?(::Array) && subset.length == 1
4193
4203
  expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
4194
4204
  else
4195
4205
  struct_fields = subset.nil? ? Polars.all : subset
@@ -4758,6 +4768,38 @@ module Polars
4758
4768
  _from_rbdf(_df.unnest(names))
4759
4769
  end
4760
4770
 
4771
+ # TODO
4772
+ # def corr
4773
+ # end
4774
+
4775
+ # TODO
4776
+ # def merge_sorted
4777
+ # end
4778
+
4779
+ # Indicate that one or multiple columns are sorted.
4780
+ #
4781
+ # @param column [Object]
4782
+ # Columns that are sorted
4783
+ # @param more_columns [Object]
4784
+ # Additional columns that are sorted, specified as positional arguments.
4785
+ # @param descending [Boolean]
4786
+ # Whether the columns are sorted in descending order.
4787
+ #
4788
+ # @return [DataFrame]
4789
+ def set_sorted(
4790
+ column,
4791
+ *more_columns,
4792
+ descending: false
4793
+ )
4794
+ lazy
4795
+ .set_sorted(column, *more_columns, descending: descending)
4796
+ .collect(no_optimization: true)
4797
+ end
4798
+
4799
+ # TODO
4800
+ # def update
4801
+ # end
4802
+
4761
4803
  private
4762
4804
 
4763
4805
  def initialize_copy(other)
@@ -4967,7 +5009,7 @@ module Polars
4967
5009
  columns.each do |col, i|
4968
5010
  if dtypes[col] == Categorical # != rbdf_dtypes[i]
4969
5011
  column_casts << Polars.col(col).cast(Categorical)._rbexpr
4970
- elsif structs.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
5012
+ elsif structs&.any? && structs.include?(col) && structs[col] != rbdf_dtypes[i]
4971
5013
  column_casts << Polars.col(col).cast(structs[col])._rbexpr
4972
5014
  elsif dtypes.include?(col) && dtypes[col] != rbdf_dtypes[i]
4973
5015
  column_casts << Polars.col(col).cast(dtypes[col])._rbexpr
@@ -5012,7 +5054,7 @@ module Polars
5012
5054
  rbdf = _post_apply_columns(rbdf, column_names)
5013
5055
  end
5014
5056
  return rbdf
5015
- elsif data[0].is_a?(Array)
5057
+ elsif data[0].is_a?(::Array)
5016
5058
  if orient.nil? && !columns.nil?
5017
5059
  orient = columns.length == data.length ? "col" : "row"
5018
5060
  end
@@ -5117,7 +5159,7 @@ module Polars
5117
5159
 
5118
5160
  def _prepare_other_arg(other)
5119
5161
  if !other.is_a?(Series)
5120
- if other.is_a?(Array)
5162
+ if other.is_a?(::Array)
5121
5163
  raise ArgumentError, "Operation not supported."
5122
5164
  end
5123
5165
 
@@ -1,6 +1,25 @@
1
1
  module Polars
2
2
  # Base class for all Polars data types.
3
3
  class DataType
4
+ def self.base_type
5
+ self
6
+ end
7
+
8
+ def base_type
9
+ is_a?(DataType) ? self.class : self
10
+ end
11
+
12
+ def self.nested?
13
+ false
14
+ end
15
+
16
+ def nested?
17
+ self.class.nested?
18
+ end
19
+
20
+ def self.==(other)
21
+ eql?(other) || other.is_a?(self)
22
+ end
4
23
  end
5
24
 
6
25
  # Base class for numeric data types.
@@ -15,12 +34,19 @@ module Polars
15
34
  class FractionalType < NumericType
16
35
  end
17
36
 
37
+ # Base class for float data types.
38
+ class FloatType < FractionalType
39
+ end
40
+
18
41
  # Base class for temporal data types.
19
42
  class TemporalType < DataType
20
43
  end
21
44
 
22
45
  # Base class for nested data types.
23
46
  class NestedType < DataType
47
+ def self.nested?
48
+ true
49
+ end
24
50
  end
25
51
 
26
52
  # 8-bit signed integer type.
@@ -56,11 +82,37 @@ module Polars
56
82
  end
57
83
 
58
84
  # 32-bit floating point type.
59
- class Float32 < FractionalType
85
+ class Float32 < FloatType
60
86
  end
61
87
 
62
88
  # 64-bit floating point type.
63
- class Float64 < FractionalType
89
+ class Float64 < FloatType
90
+ end
91
+
92
+ # Decimal 128-bit type with an optional precision and non-negative scale.
93
+ #
94
+ # NOTE: this is an experimental work-in-progress feature and may not work as expected.
95
+ class Decimal < FractionalType
96
+ attr_reader :precision, :scale
97
+
98
+ def initialize(precision, scale)
99
+ @precision = precision
100
+ @scale = scale
101
+ end
102
+
103
+ def ==(other)
104
+ if other.eql?(Decimal)
105
+ true
106
+ elsif other.is_a?(Decimal)
107
+ precision == other.precision && scale == other.scale
108
+ else
109
+ false
110
+ end
111
+ end
112
+
113
+ def to_s
114
+ "#{self.class.name}(precision: #{precision.inspect}, scale: #{scale.inspect})"
115
+ end
64
116
  end
65
117
 
66
118
  # Boolean type.
@@ -71,17 +123,18 @@ module Polars
71
123
  class Utf8 < DataType
72
124
  end
73
125
 
74
- # Nested list/array type.
75
- class List < NestedType
76
- def initialize(inner)
77
- @inner = Utils.rb_type_to_dtype(inner)
78
- end
126
+ # Binary type.
127
+ class Binary < DataType
79
128
  end
80
129
 
81
130
  # Calendar date type.
82
131
  class Date < TemporalType
83
132
  end
84
133
 
134
+ # Time of day type.
135
+ class Time < TemporalType
136
+ end
137
+
85
138
  # Calendar date and time type.
86
139
  class Datetime < TemporalType
87
140
  attr_reader :time_unit, :time_zone
@@ -91,6 +144,20 @@ module Polars
91
144
  @time_unit = time_unit || "us"
92
145
  @time_zone = time_zone
93
146
  end
147
+
148
+ def ==(other)
149
+ if other.eql?(Datetime)
150
+ true
151
+ elsif other.is_a?(Datetime)
152
+ time_unit == other.time_unit && time_zone == other.time_zone
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ def to_s
159
+ "#{self.class.name}(time_unit: #{time_unit.inspect}, time_zone: #{time_zone.inspect})"
160
+ end
94
161
  end
95
162
 
96
163
  # Time duration/delta type.
@@ -101,18 +168,85 @@ module Polars
101
168
  def initialize(time_unit = "us")
102
169
  @time_unit = time_unit
103
170
  end
171
+
172
+ def ==(other)
173
+ if other.eql?(Duration)
174
+ true
175
+ elsif other.is_a?(Duration)
176
+ time_unit == other.time_unit
177
+ else
178
+ false
179
+ end
180
+ end
181
+
182
+ def to_s
183
+ "#{self.class.name}(time_unit: #{time_unit.inspect})"
184
+ end
104
185
  end
105
186
 
106
- # Time of day type.
107
- class Time < TemporalType
187
+ # A categorical encoding of a set of strings.
188
+ class Categorical < DataType
108
189
  end
109
190
 
110
191
  # Type for wrapping arbitrary Ruby objects.
111
192
  class Object < DataType
112
193
  end
113
194
 
114
- # A categorical encoding of a set of strings.
115
- class Categorical < DataType
195
+ # Type representing Null / None values.
196
+ class Null < DataType
197
+ end
198
+
199
+ # Type representing Datatype values that could not be determined statically.
200
+ class Unknown < DataType
201
+ end
202
+
203
+ # Nested list/array type.
204
+ class List < NestedType
205
+ attr_reader :inner
206
+
207
+ def initialize(inner)
208
+ @inner = Utils.rb_type_to_dtype(inner)
209
+ end
210
+
211
+ def ==(other)
212
+ if other.eql?(List)
213
+ true
214
+ elsif other.is_a?(List)
215
+ @inner.nil? || other.inner.nil? || @inner == other.inner
216
+ else
217
+ false
218
+ end
219
+ end
220
+
221
+ def to_s
222
+ "#{self.class.name}(#{inner})"
223
+ end
224
+ end
225
+
226
+ # Nested list/array type.
227
+ class Array < NestedType
228
+ attr_reader :width, :inner
229
+
230
+ def initialize(width, inner = nil)
231
+ @width = width
232
+ @inner = Utils.rb_type_to_dtype(inner) if inner
233
+ end
234
+
235
+ # TODO check width?
236
+ def ==(other)
237
+ if other.eql?(Array)
238
+ true
239
+ elsif other.is_a?(Array)
240
+ @inner.nil? || other.inner.nil? || @inner == other.inner
241
+ else
242
+ false
243
+ end
244
+ end
245
+
246
+ # TODO add width?
247
+ def to_s
248
+ "#{self.class.name}(#{inner})"
249
+ end
116
250
  end
117
251
 
118
252
  # Definition of a single field within a `Struct` DataType.
@@ -124,9 +258,12 @@ module Polars
124
258
  @dtype = Utils.rb_type_to_dtype(dtype)
125
259
  end
126
260
 
127
- def inspect
128
- class_name = self.class.name
129
- "#{class_name}(#{@name}: #{@dtype})"
261
+ def ==(other)
262
+ name == other.name && dtype == other.dtype
263
+ end
264
+
265
+ def to_s
266
+ "#{self.class.name}(#{name.inspect}, #{dtype})"
130
267
  end
131
268
  end
132
269
 
@@ -142,25 +279,22 @@ module Polars
142
279
  end
143
280
  end
144
281
 
145
- def inspect
146
- class_name = self.class.name
147
- "#{class_name}(#{@fields})"
282
+ def ==(other)
283
+ if other.eql?(Struct)
284
+ true
285
+ elsif other.is_a?(Struct)
286
+ fields == other.fields
287
+ else
288
+ false
289
+ end
290
+ end
291
+
292
+ def to_s
293
+ "#{self.class.name}([#{fields.map(&:to_s).join("\n")}])"
148
294
  end
149
295
 
150
296
  def to_schema
151
297
  @fields.to_h { |f| [f.name, f.dtype] }
152
298
  end
153
299
  end
154
-
155
- # Binary type.
156
- class Binary < DataType
157
- end
158
-
159
- # Type representing Null / None values.
160
- class Null < DataType
161
- end
162
-
163
- # Type representing Datatype values that could not be determined statically.
164
- class Unknown < DataType
165
- end
166
300
  end
@@ -82,8 +82,15 @@ module Polars
82
82
  # # => 2001-01-02 00:00:00 UTC
83
83
  def median
84
84
  s = Utils.wrap_s(_s)
85
- out = s.median.to_i
86
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
85
+ out = s.median
86
+ if !out.nil?
87
+ if s.dtype == Date
88
+ return Utils._to_ruby_date(out.to_i)
89
+ else
90
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
91
+ end
92
+ end
93
+ nil
87
94
  end
88
95
 
89
96
  # Return mean as Ruby object.
@@ -107,7 +114,14 @@ module Polars
107
114
  def mean
108
115
  s = Utils.wrap_s(_s)
109
116
  out = s.mean.to_i
110
- Utils._to_ruby_datetime(out, s.dtype, tu: s.time_unit)
117
+ if !out.nil?
118
+ if s.dtype == Date
119
+ return Utils._to_ruby_date(out.to_i)
120
+ else
121
+ return Utils._to_ruby_datetime(out.to_i, s.time_unit)
122
+ end
123
+ end
124
+ nil
111
125
  end
112
126
 
113
127
  # Format Date/datetime with a formatting rule.