polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
data/lib/polars/schema.rb CHANGED
@@ -1,34 +1,110 @@
1
1
  module Polars
2
2
  class Schema
3
+ # Ordered mapping of column names to their data type.
4
+ #
5
+ # @param schema [Object]
6
+ # The schema definition given by column names and their associated
7
+ # Polars data type. Accepts a mapping or an enumerable of arrays.
3
8
  def initialize(schema = nil, check_dtypes: true)
4
- raise Todo if check_dtypes
5
- @schema = schema.to_h
9
+ input = schema || {}
10
+ @schema = {}
11
+ input.each do |name, tp|
12
+ if !check_dtypes
13
+ @schema[name] = tp
14
+ elsif Utils.is_polars_dtype(tp)
15
+ @schema[name] = _check_dtype(tp)
16
+ else
17
+ self[name] = tp
18
+ end
19
+ end
6
20
  end
7
21
 
22
+ # Returns the data type of the column.
23
+ #
24
+ # @return [Object]
8
25
  def [](key)
9
26
  @schema[key]
10
27
  end
11
28
 
29
+ # Sets the data type of the column.
30
+ #
31
+ # @return [Object]
12
32
  def []=(name, dtype)
13
- # TODO check dtype if needed
33
+ # TODO check dtype
14
34
  @schema[name] = dtype
15
35
  end
16
36
 
37
+ # Get the column names of the schema.
38
+ #
39
+ # @return [Array]
40
+ #
41
+ # @example
42
+ # s = Polars::Schema.new({"x" => Polars::Float64.new, "y" => Polars::Datetime.new(time_zone: "UTC")})
43
+ # s.names
44
+ # # => ["x", "y"]
17
45
  def names
18
46
  @schema.keys
19
47
  end
20
48
 
49
+ # Get the data types of the schema.
50
+ #
51
+ # @return [Array]
52
+ #
53
+ # @example
54
+ # s = Polars::Schema.new({"x" => Polars::UInt8.new, "y" => Polars::List.new(Polars::UInt8)})
55
+ # s.dtypes
56
+ # # => [Polars::UInt8, Polars::List(Polars::UInt8)]
21
57
  def dtypes
22
58
  @schema.values
23
59
  end
24
60
 
61
+ # Get the number of schema entries.
62
+ #
63
+ # @return [Integer]
64
+ #
65
+ # @example
66
+ # s = Polars::Schema.new({"x" => Polars::Int32.new, "y" => Polars::List.new(Polars::String)})
67
+ # s.length
68
+ # # => 2
25
69
  def length
26
70
  @schema.length
27
71
  end
28
72
 
73
+ # Returns a string representing the Schema.
74
+ #
75
+ # @return [String]
29
76
  def to_s
30
77
  "#{self.class.name}(#{@schema})"
31
78
  end
32
79
  alias_method :inspect, :to_s
80
+
81
+ # @private
82
+ def include?(name)
83
+ @schema.include?(name)
84
+ end
85
+
86
+ # @private
87
+ def to_h
88
+ @schema.to_h
89
+ end
90
+
91
+ private
92
+
93
+ def _check_dtype(tp)
94
+ if !tp.is_a?(DataType)
95
+ # note: if nested/decimal, or has signature params, this implies required args
96
+ if tp.nested? || tp.decimal? || _required_init_args(tp)
97
+ msg = "dtypes must be fully-specified, got: #{tp.inspect}"
98
+ raise TypeError, msg
99
+ end
100
+ tp = tp.new
101
+ end
102
+ tp
103
+ end
104
+
105
+ def _required_init_args(tp)
106
+ arity = tp.method(:new).arity
107
+ arity > 0 || arity < -1
108
+ end
33
109
  end
34
110
  end
@@ -12,6 +12,9 @@ module Polars
12
12
  slf
13
13
  end
14
14
 
15
+ # Returns a string representing the Selector.
16
+ #
17
+ # @return [String]
15
18
  def inspect
16
19
  Expr._from_rbexpr(_rbexpr).to_s
17
20
  end
@@ -50,10 +53,16 @@ module Polars
50
53
  _from_rbselector(RbSelector.by_name(names, strict))
51
54
  end
52
55
 
56
+ # Invert the selector.
57
+ #
58
+ # @return [Selector]
53
59
  def ~
54
60
  Selectors.all - self
55
61
  end
56
62
 
63
+ # AND.
64
+ #
65
+ # @return [Selector]
57
66
  def &(other)
58
67
  if Utils.is_column(other)
59
68
  colname = other.meta.output_name
@@ -68,6 +77,9 @@ module Polars
68
77
  end
69
78
  end
70
79
 
80
+ # OR.
81
+ #
82
+ # @return [Selector]
71
83
  def |(other)
72
84
  if Utils.is_column(other)
73
85
  other = by_name(other.meta.output_name)
@@ -81,6 +93,9 @@ module Polars
81
93
  end
82
94
  end
83
95
 
96
+ # Difference.
97
+ #
98
+ # @return [Selector]
84
99
  def -(other)
85
100
  if Utils.is_selector(other)
86
101
  Selector._from_rbselector(
@@ -91,6 +106,9 @@ module Polars
91
106
  end
92
107
  end
93
108
 
109
+ # XOR.
110
+ #
111
+ # @return [Selector]
94
112
  def ^(other)
95
113
  if Utils.is_column(other)
96
114
  other = by_name(other.meta.output_name)
@@ -104,6 +122,19 @@ module Polars
104
122
  end
105
123
  end
106
124
 
125
+ # Exclude columns from a multi-column expression.
126
+ #
127
+ # Only works after a wildcard or regex column selection, and you cannot provide
128
+ # both string column names *and* dtypes (you may prefer to use selectors instead).
129
+ #
130
+ # @return [Selector]
131
+ #
132
+ # @param columns [Object]
133
+ # The name or datatype of the column(s) to exclude. Accepts regular expression
134
+ # input. Regular expressions should start with `^` and end with `$`.
135
+ # @param more_columns [Array]
136
+ # Additional names or datatypes of columns to exclude, specified as positional
137
+ # arguments.
107
138
  def exclude(columns, *more_columns)
108
139
  exclude_cols = []
109
140
  exclude_dtypes = []
@@ -131,6 +162,47 @@ module Polars
131
162
  end
132
163
  end
133
164
 
165
+ # Materialize the `selector` as a normal expression.
166
+ #
167
+ # This ensures that the operators `|`, `&`, `~` and `-`
168
+ # are applied on the data and not on the selector sets.
169
+ #
170
+ # @return [Expr]
171
+ #
172
+ # @example Inverting the boolean selector will choose the non-boolean columns:
173
+ # df = Polars::DataFrame.new(
174
+ # {
175
+ # "colx" => ["aa", "bb", "cc"],
176
+ # "coly" => [true, false, true],
177
+ # "colz" => [1, 2, 3]
178
+ # }
179
+ # )
180
+ # df.select(~Polars.cs.boolean)
181
+ # # =>
182
+ # # shape: (3, 2)
183
+ # # ┌──────┬──────┐
184
+ # # │ colx ┆ colz │
185
+ # # │ --- ┆ --- │
186
+ # # │ str ┆ i64 │
187
+ # # ╞══════╪══════╡
188
+ # # │ aa ┆ 1 │
189
+ # # │ bb ┆ 2 │
190
+ # # │ cc ┆ 3 │
191
+ # # └──────┴──────┘
192
+ #
193
+ # @example To invert the *values* in the selected boolean columns, we need to materialize the selector as a standard expression instead:
194
+ # df.select(~Polars.cs.boolean.as_expr)
195
+ # # =>
196
+ # # shape: (3, 1)
197
+ # # ┌───────┐
198
+ # # │ coly │
199
+ # # │ --- │
200
+ # # │ bool │
201
+ # # ╞═══════╡
202
+ # # │ false │
203
+ # # │ true │
204
+ # # │ false │
205
+ # # └───────┘
134
206
  def as_expr
135
207
  Expr._from_rbexpr(_rbexpr)
136
208
  end
@@ -287,7 +287,7 @@ module Polars
287
287
  # # │ b"hello" ┆ world ┆ b"!" ┆ :) │
288
288
  # # └──────────┴───────┴────────┴─────┘
289
289
  #
290
- # @example Select binary columns and export as a dict:
290
+ # @example Select binary columns and export as a hash:
291
291
  # df.select(Polars.cs.binary).to_h(as_series: false)
292
292
  # # => {"a"=>["hello"], "c"=>["!"]}
293
293
  #
@@ -628,7 +628,7 @@ module Polars
628
628
  # # └──────┘
629
629
  #
630
630
  # @example Select all columns *except* for those that are enum:
631
- # df.select(~Polars.cs.enum())
631
+ # df.select(~Polars.cs.enum)
632
632
  # # =>
633
633
  # # shape: (2, 2)
634
634
  # # ┌─────┬─────┐
@@ -928,7 +928,7 @@ module Polars
928
928
  # # │ 456 ┆ 5.5 │
929
929
  # # └─────┴─────┘
930
930
  def self.categorical
931
- Selector._from_rbselector(RbSelector.categorical())
931
+ Selector._from_rbselector(RbSelector.categorical)
932
932
  end
933
933
 
934
934
  # Select columns whose names contain the given literal substring(s).