polars-df 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
@@ -0,0 +1,22 @@
1
+ module Polars
2
+ # @private
3
+ module ExprDispatch
4
+ private
5
+
6
+ def self.included(base)
7
+ base.attr_accessor :_s
8
+ base.singleton_class.attr_accessor :_accessor
9
+ end
10
+
11
+ def method_missing(method, ...)
12
+ return super unless self.class.method_defined?(method)
13
+
14
+ namespace = self.class._accessor
15
+
16
+ s = Utils.wrap_s(_s)
17
+ expr = Utils.col(s.name)
18
+ expr = expr.send(namespace) if namespace
19
+ s.to_frame.select(expr.send(method, ...)).to_series
20
+ end
21
+ end
22
+ end
@@ -1,5 +1,51 @@
1
1
  module Polars
2
2
  module Functions
3
+ # Convert categorical variables into dummy/indicator variables.
4
+ #
5
+ # @param df [DataFrame]
6
+ # DataFrame to convert.
7
+ # @param columns [Array, nil]
8
+ # A subset of columns to convert to dummy variables. `nil` means
9
+ # "all columns".
10
+ #
11
+ # @return [DataFrame]
12
+ def get_dummies(df, columns: nil)
13
+ df.to_dummies(columns: columns)
14
+ end
15
+
16
+ # Aggregate multiple Dataframes/Series to a single DataFrame/Series.
17
+ #
18
+ # @param items [Object]
19
+ # DataFrames/Series/LazyFrames to concatenate.
20
+ # @param rechunk [Boolean]
21
+ # Make sure that all data is in contiguous memory.
22
+ # @param how ["vertical", "diagonal", "horizontal"]
23
+ # Lazy only supports the 'vertical' strategy.
24
+ #
25
+ # - Vertical: applies multiple `vstack` operations.
26
+ # - Diagonal: finds a union between the column schemas and fills missing column values with null.
27
+ # - Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
28
+ # @param parallel [Boolean]
29
+ # Only relevant for LazyFrames. This determines if the concatenated
30
+ # lazy computations may be executed in parallel.
31
+ #
32
+ # @return [Object]
33
+ #
34
+ # @example
35
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
36
+ # df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
37
+ # Polars.concat([df1, df2])
38
+ # # =>
39
+ # # shape: (2, 2)
40
+ # # ┌─────┬─────┐
41
+ # # │ a ┆ b │
42
+ # # │ --- ┆ --- │
43
+ # # │ i64 ┆ i64 │
44
+ # # ╞═════╪═════╡
45
+ # # │ 1 ┆ 3 │
46
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
+ # # │ 2 ┆ 4 │
48
+ # # └─────┴─────┘
3
49
  def concat(items, rechunk: true, how: "vertical", parallel: true)
4
50
  if items.empty?
5
51
  raise ArgumentError, "cannot concat empty list"
@@ -41,5 +87,178 @@ module Polars
41
87
  out
42
88
  end
43
89
  end
90
+
91
+ # Create a range of type `Datetime` (or `Date`).
92
+ #
93
+ # @param low [Object]
94
+ # Lower bound of the date range.
95
+ # @param high [Object]
96
+ # Upper bound of the date range.
97
+ # @param interval [Object]
98
+ # Interval periods. It can be a polars duration string, such as `3d12h4m25s`
99
+ # representing 3 days, 12 hours, 4 minutes, and 25 seconds.
100
+ # @param lazy [Boolean]
101
+ # Return an expression.
102
+ # @param closed ["both", "left", "right", "none"]
103
+ # Define whether the temporal window interval is closed or not.
104
+ # @param name [String]
105
+ # Name of the output Series.
106
+ # @param time_unit [nil, "ns", "us", "ms"]
107
+ # Set the time unit.
108
+ # @param time_zone [String]
109
+ # Optional timezone
110
+ #
111
+ # @return [Object]
112
+ #
113
+ # @note
114
+ # If both `low` and `high` are passed as date types (not datetime), and the
115
+ # interval granularity is no finer than 1d, the returned range is also of
116
+ # type date. All other permutations return a datetime Series.
117
+ #
118
+ # @example Using polars duration string to specify the interval
119
+ # Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
120
+ # # =>
121
+ # # shape: (3,)
122
+ # # Series: 'drange' [date]
123
+ # # [
124
+ # # 2022-01-01
125
+ # # 2022-02-01
126
+ # # 2022-03-01
127
+ # # ]
128
+ #
129
+ # @example Using `timedelta` object to specify the interval:
130
+ # Polars.date_range(
131
+ # DateTime.new(1985, 1, 1),
132
+ # DateTime.new(1985, 1, 10),
133
+ # "1d12h",
134
+ # time_unit: "ms"
135
+ # )
136
+ # # =>
137
+ # # shape: (7,)
138
+ # # Series: '' [datetime[ms]]
139
+ # # [
140
+ # # 1985-01-01 00:00:00
141
+ # # 1985-01-02 12:00:00
142
+ # # 1985-01-04 00:00:00
143
+ # # 1985-01-05 12:00:00
144
+ # # 1985-01-07 00:00:00
145
+ # # 1985-01-08 12:00:00
146
+ # # 1985-01-10 00:00:00
147
+ # # ]
148
+ def date_range(
149
+ low,
150
+ high,
151
+ interval,
152
+ lazy: false,
153
+ closed: "both",
154
+ name: nil,
155
+ time_unit: nil,
156
+ time_zone: nil
157
+ )
158
+ if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
159
+ raise Todo
160
+ else
161
+ interval = interval.to_s
162
+ if interval.include?(" ")
163
+ interval = interval.gsub(" ", "")
164
+ end
165
+ end
166
+
167
+ if low.is_a?(Expr) || high.is_a?(Expr) || lazy
168
+ low = Utils.expr_to_lit_or_expr(low, str_to_lit: true)
169
+ high = Utils.expr_to_lit_or_expr(high, str_to_lit: true)
170
+ return Utils.wrap_expr(
171
+ _rb_date_range_lazy(low, high, interval, closed, name, time_zone)
172
+ )
173
+ end
174
+
175
+ low, low_is_date = _ensure_datetime(low)
176
+ high, high_is_date = _ensure_datetime(high)
177
+
178
+ if !time_unit.nil?
179
+ tu = time_unit
180
+ elsif interval.include?("ns")
181
+ tu = "ns"
182
+ else
183
+ tu = "us"
184
+ end
185
+
186
+ start = Utils._datetime_to_pl_timestamp(low, tu)
187
+ stop = Utils._datetime_to_pl_timestamp(high, tu)
188
+ if name.nil?
189
+ name = ""
190
+ end
191
+
192
+ dt_range = Utils.wrap_s(
193
+ _rb_date_range(start, stop, interval, closed, name, tu, time_zone)
194
+ )
195
+ if low_is_date && high_is_date && !["h", "m", "s"].any? { |v| _interval_granularity(interval).end_with?(v) }
196
+ dt_range = dt_range.cast(Date)
197
+ end
198
+
199
+ dt_range
200
+ end
201
+
202
+ # def cut
203
+ # end
204
+
205
+ # def align_frames
206
+ # end
207
+
208
+ # Return a new Series of given length and type, filled with ones.
209
+ #
210
+ # @param n [Integer]
211
+ # Number of elements in the `Series`
212
+ # @param dtype [Symbol]
213
+ # DataType of the elements, defaults to `:f64`
214
+ #
215
+ # @return [Series]
216
+ #
217
+ # @note
218
+ # In the lazy API you should probably not use this, but use `lit(1)`
219
+ # instead.
220
+ def ones(n, dtype: nil)
221
+ s = Series.new([1.0])
222
+ if dtype
223
+ s = s.cast(dtype)
224
+ end
225
+ s.new_from_index(0, n)
226
+ end
227
+
228
+ # Return a new Series of given length and type, filled with zeros.
229
+ #
230
+ # @param n [Integer]
231
+ # Number of elements in the `Series`
232
+ # @param dtype [Symbol]
233
+ # DataType of the elements, defaults to `:f64`
234
+ #
235
+ # @return [Series]
236
+ #
237
+ # @note
238
+ # In the lazy API you should probably not use this, but use `lit(0)`
239
+ # instead.
240
+ def zeros(n, dtype: nil)
241
+ s = Series.new([0.0])
242
+ if dtype
243
+ s = s.cast(dtype)
244
+ end
245
+ s.new_from_index(0, n)
246
+ end
247
+
248
+ private
249
+
250
+ def _ensure_datetime(value)
251
+ is_date_type = false
252
+ if !value.is_a?(DateTime)
253
+ value = DateTime.new(value.year, value.month, value.day)
254
+ is_date_type = true
255
+ end
256
+ [value, is_date_type]
257
+ end
258
+
259
+ # TODO
260
+ def _interval_granularity(interval)
261
+ interval
262
+ end
44
263
  end
45
264
  end