polars-df 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/README.md +12 -0
  6. data/ext/polars/Cargo.toml +22 -11
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +26 -30
  10. data/ext/polars/src/conversion/chunked_array.rs +32 -28
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +307 -34
  13. data/ext/polars/src/dataframe/construction.rs +4 -3
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -12
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -4
  20. data/ext/polars/src/expr/array.rs +73 -4
  21. data/ext/polars/src/expr/binary.rs +26 -1
  22. data/ext/polars/src/expr/bitwise.rs +39 -0
  23. data/ext/polars/src/expr/categorical.rs +20 -0
  24. data/ext/polars/src/expr/datatype.rs +24 -1
  25. data/ext/polars/src/expr/datetime.rs +58 -14
  26. data/ext/polars/src/expr/general.rs +87 -15
  27. data/ext/polars/src/expr/list.rs +32 -24
  28. data/ext/polars/src/expr/meta.rs +15 -6
  29. data/ext/polars/src/expr/mod.rs +3 -0
  30. data/ext/polars/src/expr/name.rs +19 -14
  31. data/ext/polars/src/expr/rolling.rs +20 -0
  32. data/ext/polars/src/expr/serde.rs +28 -0
  33. data/ext/polars/src/expr/string.rs +64 -10
  34. data/ext/polars/src/expr/struct.rs +9 -1
  35. data/ext/polars/src/file.rs +15 -9
  36. data/ext/polars/src/functions/business.rs +0 -1
  37. data/ext/polars/src/functions/io.rs +25 -3
  38. data/ext/polars/src/functions/lazy.rs +11 -6
  39. data/ext/polars/src/functions/meta.rs +3 -3
  40. data/ext/polars/src/functions/string_cache.rs +3 -3
  41. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  42. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  43. data/ext/polars/src/io/mod.rs +6 -0
  44. data/ext/polars/src/lazyframe/general.rs +59 -9
  45. data/ext/polars/src/lazyframe/mod.rs +16 -1
  46. data/ext/polars/src/lazyframe/optflags.rs +58 -0
  47. data/ext/polars/src/lazyframe/serde.rs +27 -3
  48. data/ext/polars/src/lib.rs +261 -19
  49. data/ext/polars/src/map/dataframe.rs +20 -17
  50. data/ext/polars/src/map/lazy.rs +6 -5
  51. data/ext/polars/src/map/series.rs +8 -7
  52. data/ext/polars/src/on_startup.rs +12 -5
  53. data/ext/polars/src/rb_modules.rs +2 -2
  54. data/ext/polars/src/series/aggregation.rs +85 -28
  55. data/ext/polars/src/series/construction.rs +1 -0
  56. data/ext/polars/src/series/export.rs +37 -33
  57. data/ext/polars/src/series/general.rs +120 -21
  58. data/ext/polars/src/series/mod.rs +29 -4
  59. data/lib/polars/array_expr.rb +382 -3
  60. data/lib/polars/array_name_space.rb +281 -0
  61. data/lib/polars/binary_expr.rb +67 -0
  62. data/lib/polars/binary_name_space.rb +43 -0
  63. data/lib/polars/cat_expr.rb +224 -0
  64. data/lib/polars/cat_name_space.rb +138 -0
  65. data/lib/polars/config.rb +2 -2
  66. data/lib/polars/convert.rb +6 -6
  67. data/lib/polars/data_frame.rb +794 -27
  68. data/lib/polars/data_type_expr.rb +52 -0
  69. data/lib/polars/data_types.rb +26 -5
  70. data/lib/polars/date_time_expr.rb +252 -1
  71. data/lib/polars/date_time_name_space.rb +299 -0
  72. data/lib/polars/expr.rb +1248 -206
  73. data/lib/polars/functions/business.rb +95 -0
  74. data/lib/polars/functions/datatype.rb +21 -0
  75. data/lib/polars/functions/lazy.rb +14 -1
  76. data/lib/polars/io/csv.rb +1 -1
  77. data/lib/polars/io/iceberg.rb +27 -0
  78. data/lib/polars/io/json.rb +4 -4
  79. data/lib/polars/io/ndjson.rb +4 -4
  80. data/lib/polars/io/parquet.rb +32 -7
  81. data/lib/polars/io/scan_options.rb +4 -1
  82. data/lib/polars/lazy_frame.rb +1028 -28
  83. data/lib/polars/list_expr.rb +217 -17
  84. data/lib/polars/list_name_space.rb +231 -22
  85. data/lib/polars/meta_expr.rb +89 -0
  86. data/lib/polars/name_expr.rb +36 -0
  87. data/lib/polars/query_opt_flags.rb +50 -0
  88. data/lib/polars/scan_cast_options.rb +20 -1
  89. data/lib/polars/schema.rb +79 -3
  90. data/lib/polars/selector.rb +72 -0
  91. data/lib/polars/selectors.rb +3 -3
  92. data/lib/polars/series.rb +1053 -54
  93. data/lib/polars/string_expr.rb +436 -32
  94. data/lib/polars/string_name_space.rb +736 -50
  95. data/lib/polars/struct_expr.rb +103 -0
  96. data/lib/polars/struct_name_space.rb +19 -1
  97. data/lib/polars/utils/serde.rb +17 -0
  98. data/lib/polars/utils/various.rb +22 -1
  99. data/lib/polars/utils.rb +5 -1
  100. data/lib/polars/version.rb +1 -1
  101. data/lib/polars.rb +6 -0
  102. metadata +11 -1
@@ -57,6 +57,36 @@ module Polars
57
57
  Utils.wrap_expr(_rbexpr.struct_field_by_name(name))
58
58
  end
59
59
 
60
+ # Expand the struct into its individual fields.
61
+ #
62
+ # Alias for `Expr.struct.field("*")`.
63
+ #
64
+ # @return [Expr]
65
+ #
66
+ # @example
67
+ # df = Polars::DataFrame.new(
68
+ # {
69
+ # "aaa" => [1, 2],
70
+ # "bbb" => ["ab", "cd"],
71
+ # "ccc" => [true, nil],
72
+ # "ddd" => [[1, 2], [3]]
73
+ # }
74
+ # ).select(Polars.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
75
+ # df.select(Polars.col("struct_col").struct.unnest)
76
+ # # =>
77
+ # # shape: (2, 4)
78
+ # # ┌─────┬─────┬──────┬───────────┐
79
+ # # │ aaa ┆ bbb ┆ ccc ┆ ddd │
80
+ # # │ --- ┆ --- ┆ --- ┆ --- │
81
+ # # │ i64 ┆ str ┆ bool ┆ list[i64] │
82
+ # # ╞═════╪═════╪══════╪═══════════╡
83
+ # # │ 1 ┆ ab ┆ true ┆ [1, 2] │
84
+ # # │ 2 ┆ cd ┆ null ┆ [3] │
85
+ # # └─────┴─────┴──────┴───────────┘
86
+ def unnest
87
+ field("*")
88
+ end
89
+
60
90
  # Rename the fields of the struct.
61
91
  #
62
92
  # @param names [Array]
@@ -94,5 +124,78 @@ module Polars
94
124
  def rename_fields(names)
95
125
  Utils.wrap_expr(_rbexpr.struct_rename_fields(names))
96
126
  end
127
+
128
+ # Convert this struct to a string column with json values.
129
+ #
130
+ # @return [Expr]
131
+ #
132
+ # @example
133
+ # Polars::DataFrame.new(
134
+ # {"a" => [{"a" => [1, 2], "b" => [45]}, {"a" => [9, 1, 3], "b" => nil}]}
135
+ # ).with_columns(Polars.col("a").struct.json_encode.alias("encoded"))
136
+ # # =>
137
+ # # shape: (2, 2)
138
+ # # ┌──────────────────┬────────────────────────┐
139
+ # # │ a ┆ encoded │
140
+ # # │ --- ┆ --- │
141
+ # # │ struct[2] ┆ str │
142
+ # # ╞══════════════════╪════════════════════════╡
143
+ # # │ {[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
144
+ # # │ {[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
145
+ # # └──────────────────┴────────────────────────┘
146
+ def json_encode
147
+ Utils.wrap_expr(_rbexpr.struct_json_encode)
148
+ end
149
+
150
+ # Add or overwrite fields of this struct.
151
+ #
152
+ # This is similar to `with_columns` on `DataFrame`.
153
+ #
154
+ # @param exprs [Array]
155
+ # Field(s) to add, specified as positional arguments.
156
+ # Accepts expression input. Strings are parsed as column names, other
157
+ # non-expression inputs are parsed as literals.
158
+ # @param named_exprs [Hash]
159
+ # Additional fields to add, specified as keyword arguments.
160
+ # The columns will be renamed to the keyword used.
161
+ #
162
+ # @return [Expr]
163
+ #
164
+ # @example
165
+ # df = Polars::DataFrame.new(
166
+ # {
167
+ # "coords" => [{"x" => 1, "y" => 4}, {"x" => 4, "y" => 9}, {"x" => 9, "y" => 16}],
168
+ # "multiply" => [10, 2, 3]
169
+ # }
170
+ # )
171
+ # df.with_columns(
172
+ # Polars.col("coords").struct.with_fields(
173
+ # Polars.field("x").sqrt,
174
+ # y_mul: Polars.field("y") * Polars.col("multiply")
175
+ # )
176
+ # )
177
+ # # =>
178
+ # # shape: (3, 2)
179
+ # # ┌─────────────┬──────────┐
180
+ # # │ coords ┆ multiply │
181
+ # # │ --- ┆ --- │
182
+ # # │ struct[3] ┆ i64 │
183
+ # # ╞═════════════╪══════════╡
184
+ # # │ {1.0,4,40} ┆ 10 │
185
+ # # │ {2.0,9,18} ┆ 2 │
186
+ # # │ {3.0,16,48} ┆ 3 │
187
+ # # └─────────────┴──────────┘
188
+ def with_fields(
189
+ *exprs,
190
+ **named_exprs
191
+ )
192
+ structify = ENV.fetch("POLARS_AUTO_STRUCTIFY", 0).to_i != 0
193
+
194
+ rbexprs = Utils.parse_into_list_of_expressions(
195
+ *exprs, **named_exprs, __structify: structify
196
+ )
197
+
198
+ Utils.wrap_expr(_rbexpr.struct_with_fields(rbexprs))
199
+ end
97
200
  end
98
201
  end
@@ -80,7 +80,7 @@ module Polars
80
80
  super
81
81
  end
82
82
 
83
- # Get the struct definition as a name/dtype schema dict.
83
+ # Get the struct definition as a name/dtype schema hash.
84
84
  #
85
85
  # @return [Object]
86
86
  #
@@ -116,5 +116,23 @@ module Polars
116
116
  def unnest
117
117
  Utils.wrap_df(_s.struct_unnest)
118
118
  end
119
+
120
+ # Convert this struct to a string column with json values.
121
+ #
122
+ # @return [Series]
123
+ #
124
+ # @example
125
+ # s = Polars::Series.new("a", [{"a" => [1, 2], "b" => [45]}, {"a" => [9, 1, 3], "b" => nil}])
126
+ # s.struct.json_encode
127
+ # # =>
128
+ # # shape: (2,)
129
+ # # Series: 'a' [str]
130
+ # # [
131
+ # # "{"a":[1,2],"b":[45]}"
132
+ # # "{"a":[9,1,3],"b":null}"
133
+ # # ]
134
+ def json_encode
135
+ super
136
+ end
119
137
  end
120
138
  end
@@ -0,0 +1,17 @@
1
+ module Polars
2
+ module Utils
3
+ def self.serialize_polars_object(serializer, file)
4
+ serialize_to_bytes = lambda do
5
+ buf = StringIO.new
6
+ serializer.(buf)
7
+ buf.string
8
+ end
9
+
10
+ if file.nil?
11
+ return serialize_to_bytes.call
12
+ end
13
+
14
+ raise Todo
15
+ end
16
+ end
17
+ end
@@ -16,7 +16,10 @@ module Polars
16
16
  val.is_a?(::Array) && val.all? { |x| pathlike?(x) }
17
17
  end
18
18
 
19
- def self.is_bool_sequence(val)
19
+ def self.is_bool_sequence(val, include_series: false)
20
+ if include_series && val.is_a?(Series)
21
+ return val.dtype == Boolean
22
+ end
20
23
  val.is_a?(::Array) && val.all? { |x| x == true || x == false }
21
24
  end
22
25
 
@@ -24,6 +27,10 @@ module Polars
24
27
  val.is_a?(::Array) && _is_iterable_of(val, Integer)
25
28
  end
26
29
 
30
+ def self.is_sequence(val, include_series: false)
31
+ val.is_a?(::Array) || (include_series && val.is_a?(Series))
32
+ end
33
+
27
34
  def self.is_str_sequence(val, allow_str: false)
28
35
  if allow_str == false && val.is_a?(::String)
29
36
  false
@@ -68,6 +75,10 @@ module Polars
68
75
  end
69
76
  end
70
77
 
78
+ def self._polars_warn(msg)
79
+ warn msg
80
+ end
81
+
71
82
  def self.extend_bool(value, n_match, value_name, match_name)
72
83
  values = bool?(value) ? [value] * n_match : value
73
84
  if n_match != values.length
@@ -76,5 +87,15 @@ module Polars
76
87
  end
77
88
  values
78
89
  end
90
+
91
+ def self.require_same_type(current, other)
92
+ if !other.is_a?(current.class) && !current.is_a?(other.class)
93
+ msg = (
94
+ "expected `other` to be a #{current.inspect}, " +
95
+ "not #{other.inspect}"
96
+ )
97
+ raise TypeError, msg
98
+ end
99
+ end
79
100
  end
80
101
  end
data/lib/polars/utils.rb CHANGED
@@ -163,7 +163,11 @@ module Polars
163
163
  end
164
164
 
165
165
  def self.parse_into_datatype_expr(input)
166
- raise Todo
166
+ if input.is_a?(DataTypeExpr)
167
+ input
168
+ else
169
+ parse_into_dtype(input).to_dtype_expr
170
+ end
167
171
  end
168
172
  end
169
173
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.21.0"
3
+ VERSION = "0.22.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -29,6 +29,7 @@ require_relative "polars/convert"
29
29
  require_relative "polars/plot"
30
30
  require_relative "polars/data_frame"
31
31
  require_relative "polars/data_types"
32
+ require_relative "polars/data_type_expr"
32
33
  require_relative "polars/data_type_group"
33
34
  require_relative "polars/date_time_expr"
34
35
  require_relative "polars/date_time_name_space"
@@ -36,7 +37,9 @@ require_relative "polars/dynamic_group_by"
36
37
  require_relative "polars/exceptions"
37
38
  require_relative "polars/expr"
38
39
  require_relative "polars/functions/as_datatype"
40
+ require_relative "polars/functions/business"
39
41
  require_relative "polars/functions/col"
42
+ require_relative "polars/functions/datatype"
40
43
  require_relative "polars/functions/eager"
41
44
  require_relative "polars/functions/lazy"
42
45
  require_relative "polars/functions/len"
@@ -55,6 +58,7 @@ require_relative "polars/io/avro"
55
58
  require_relative "polars/io/csv"
56
59
  require_relative "polars/io/database"
57
60
  require_relative "polars/io/delta"
61
+ require_relative "polars/io/iceberg"
58
62
  require_relative "polars/io/ipc"
59
63
  require_relative "polars/io/json"
60
64
  require_relative "polars/io/ndjson"
@@ -66,6 +70,7 @@ require_relative "polars/list_expr"
66
70
  require_relative "polars/list_name_space"
67
71
  require_relative "polars/meta_expr"
68
72
  require_relative "polars/name_expr"
73
+ require_relative "polars/query_opt_flags"
69
74
  require_relative "polars/rolling_group_by"
70
75
  require_relative "polars/scan_cast_options"
71
76
  require_relative "polars/schema"
@@ -84,6 +89,7 @@ require_relative "polars/utils"
84
89
  require_relative "polars/utils/constants"
85
90
  require_relative "polars/utils/convert"
86
91
  require_relative "polars/utils/parse"
92
+ require_relative "polars/utils/serde"
87
93
  require_relative "polars/utils/various"
88
94
  require_relative "polars/utils/wrap"
89
95
  require_relative "polars/version"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.21.0
4
+ version: 0.22.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -58,6 +58,7 @@ files:
58
58
  - ext/polars/src/conversion/any_value.rs
59
59
  - ext/polars/src/conversion/categorical.rs
60
60
  - ext/polars/src/conversion/chunked_array.rs
61
+ - ext/polars/src/conversion/datetime.rs
61
62
  - ext/polars/src/conversion/mod.rs
62
63
  - ext/polars/src/dataframe/construction.rs
63
64
  - ext/polars/src/dataframe/export.rs
@@ -69,6 +70,7 @@ files:
69
70
  - ext/polars/src/exceptions.rs
70
71
  - ext/polars/src/expr/array.rs
71
72
  - ext/polars/src/expr/binary.rs
73
+ - ext/polars/src/expr/bitwise.rs
72
74
  - ext/polars/src/expr/categorical.rs
73
75
  - ext/polars/src/expr/datatype.rs
74
76
  - ext/polars/src/expr/datetime.rs
@@ -79,6 +81,7 @@ files:
79
81
  - ext/polars/src/expr/name.rs
80
82
  - ext/polars/src/expr/rolling.rs
81
83
  - ext/polars/src/expr/selector.rs
84
+ - ext/polars/src/expr/serde.rs
82
85
  - ext/polars/src/expr/string.rs
83
86
  - ext/polars/src/expr/struct.rs
84
87
  - ext/polars/src/file.rs
@@ -104,6 +107,7 @@ files:
104
107
  - ext/polars/src/io/mod.rs
105
108
  - ext/polars/src/lazyframe/general.rs
106
109
  - ext/polars/src/lazyframe/mod.rs
110
+ - ext/polars/src/lazyframe/optflags.rs
107
111
  - ext/polars/src/lazyframe/serde.rs
108
112
  - ext/polars/src/lazyframe/sink.rs
109
113
  - ext/polars/src/lazygroupby.rs
@@ -145,6 +149,7 @@ files:
145
149
  - lib/polars/config.rb
146
150
  - lib/polars/convert.rb
147
151
  - lib/polars/data_frame.rb
152
+ - lib/polars/data_type_expr.rb
148
153
  - lib/polars/data_type_group.rb
149
154
  - lib/polars/data_types.rb
150
155
  - lib/polars/date_time_expr.rb
@@ -156,7 +161,9 @@ files:
156
161
  - lib/polars/functions/aggregation/horizontal.rb
157
162
  - lib/polars/functions/aggregation/vertical.rb
158
163
  - lib/polars/functions/as_datatype.rb
164
+ - lib/polars/functions/business.rb
159
165
  - lib/polars/functions/col.rb
166
+ - lib/polars/functions/datatype.rb
160
167
  - lib/polars/functions/eager.rb
161
168
  - lib/polars/functions/lazy.rb
162
169
  - lib/polars/functions/len.rb
@@ -173,6 +180,7 @@ files:
173
180
  - lib/polars/io/csv.rb
174
181
  - lib/polars/io/database.rb
175
182
  - lib/polars/io/delta.rb
183
+ - lib/polars/io/iceberg.rb
176
184
  - lib/polars/io/ipc.rb
177
185
  - lib/polars/io/json.rb
178
186
  - lib/polars/io/ndjson.rb
@@ -185,6 +193,7 @@ files:
185
193
  - lib/polars/meta_expr.rb
186
194
  - lib/polars/name_expr.rb
187
195
  - lib/polars/plot.rb
196
+ - lib/polars/query_opt_flags.rb
188
197
  - lib/polars/rolling_group_by.rb
189
198
  - lib/polars/scan_cast_options.rb
190
199
  - lib/polars/schema.rb
@@ -203,6 +212,7 @@ files:
203
212
  - lib/polars/utils/constants.rb
204
213
  - lib/polars/utils/convert.rb
205
214
  - lib/polars/utils/parse.rb
215
+ - lib/polars/utils/serde.rb
206
216
  - lib/polars/utils/various.rb
207
217
  - lib/polars/utils/wrap.rb
208
218
  - lib/polars/version.rb