polars-df 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ module Polars
2
+ # @private
3
+ module ExprDispatch
4
+ private
5
+
6
+ def method_missing(method, ...)
7
+ return super unless self.class.method_defined?(method)
8
+
9
+ s = Utils.wrap_s(_s)
10
+ expr = Utils.col(s.name)
11
+ s.to_frame.select(expr.send(method, ...)).to_series
12
+ end
13
+ end
14
+ end
@@ -1,5 +1,51 @@
1
1
  module Polars
2
2
  module Functions
3
+ # Convert categorical variables into dummy/indicator variables.
4
+ #
5
+ # @param df [DataFrame]
6
+ # DataFrame to convert.
7
+ # @param columns [Array, nil]
8
+ # A subset of columns to convert to dummy variables. `nil` means
9
+ # "all columns".
10
+ #
11
+ # @return [DataFrame]
12
+ def get_dummies(df, columns: nil)
13
+ df.to_dummies(columns: columns)
14
+ end
15
+
16
+ # Aggregate multiple Dataframes/Series to a single DataFrame/Series.
17
+ #
18
+ # @param items [Object]
19
+ # DataFrames/Series/LazyFrames to concatenate.
20
+ # @param rechunk [Boolean]
21
+ # Make sure that all data is in contiguous memory.
22
+ # @param how ["vertical", "diagonal", "horizontal"]
23
+ # Lazy only supports the 'vertical' strategy.
24
+ #
25
+ # - Vertical: applies multiple `vstack` operations.
26
+ # - Diagonal: finds a union between the column schemas and fills missing column values with null.
27
+ # - Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
28
+ # @param parallel [Boolean]
29
+ # Only relevant for LazyFrames. This determines if the concatenated
30
+ # lazy computations may be executed in parallel.
31
+ #
32
+ # @return [Object]
33
+ #
34
+ # @example
35
+ # df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
36
+ # df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
37
+ # Polars.concat([df1, df2])
38
+ # # =>
39
+ # # shape: (2, 2)
40
+ # # ┌─────┬─────┐
41
+ # # │ a ┆ b │
42
+ # # │ --- ┆ --- │
43
+ # # │ i64 ┆ i64 │
44
+ # # ╞═════╪═════╡
45
+ # # │ 1 ┆ 3 │
46
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
47
+ # # │ 2 ┆ 4 │
48
+ # # └─────┴─────┘
3
49
  def concat(items, rechunk: true, how: "vertical", parallel: true)
4
50
  if items.empty?
5
51
  raise ArgumentError, "cannot concat empty list"
@@ -41,5 +87,178 @@ module Polars
41
87
  out
42
88
  end
43
89
  end
90
+
91
+ # Create a range of type `Datetime` (or `Date`).
92
+ #
93
+ # @param low [Object]
94
+ # Lower bound of the date range.
95
+ # @param high [Object]
96
+ # Upper bound of the date range.
97
+ # @param interval [Object]
98
+ # Interval periods. It can be a polars duration string, such as `3d12h4m25s`
99
+ # representing 3 days, 12 hours, 4 minutes, and 25 seconds.
100
+ # @param lazy [Boolean]
101
+ # Return an expression.
102
+ # @param closed ["both", "left", "right", "none"]
103
+ # Define whether the temporal window interval is closed or not.
104
+ # @param name [String]
105
+ # Name of the output Series.
106
+ # @param time_unit [nil, "ns", "us", "ms"]
107
+ # Set the time unit.
108
+ # @param time_zone [String]
109
+ # Optional timezone
110
+ #
111
+ # @return [Object]
112
+ #
113
+ # @note
114
+ # If both `low` and `high` are passed as date types (not datetime), and the
115
+ # interval granularity is no finer than 1d, the returned range is also of
116
+ # type date. All other permutations return a datetime Series.
117
+ #
118
+ # @example Using polars duration string to specify the interval
119
+ # Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
120
+ # # =>
121
+ # # shape: (3,)
122
+ # # Series: 'drange' [date]
123
+ # # [
124
+ # # 2022-01-01
125
+ # # 2022-02-01
126
+ # # 2022-03-01
127
+ # # ]
128
+ #
129
+ # @example Using `timedelta` object to specify the interval:
130
+ # Polars.date_range(
131
+ # DateTime.new(1985, 1, 1),
132
+ # DateTime.new(1985, 1, 10),
133
+ # "1d12h",
134
+ # time_unit: "ms"
135
+ # )
136
+ # # =>
137
+ # # shape: (7,)
138
+ # # Series: '' [datetime[ms]]
139
+ # # [
140
+ # # 1985-01-01 00:00:00
141
+ # # 1985-01-02 12:00:00
142
+ # # 1985-01-04 00:00:00
143
+ # # 1985-01-05 12:00:00
144
+ # # 1985-01-07 00:00:00
145
+ # # 1985-01-08 12:00:00
146
+ # # 1985-01-10 00:00:00
147
+ # # ]
148
+ def date_range(
149
+ low,
150
+ high,
151
+ interval,
152
+ lazy: false,
153
+ closed: "both",
154
+ name: nil,
155
+ time_unit: nil,
156
+ time_zone: nil
157
+ )
158
+ if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
159
+ raise Todo
160
+ else
161
+ interval = interval.to_s
162
+ if interval.include?(" ")
163
+ interval = interval.gsub(" ", "")
164
+ end
165
+ end
166
+
167
+ if low.is_a?(Expr) || high.is_a?(Expr) || lazy
168
+ low = Utils.expr_to_lit_or_expr(low, str_to_lit: true)
169
+ high = Utils.expr_to_lit_or_expr(high, str_to_lit: true)
170
+ return Utils.wrap_expr(
171
+ _rb_date_range_lazy(low, high, interval, closed, name, time_zone)
172
+ )
173
+ end
174
+
175
+ low, low_is_date = _ensure_datetime(low)
176
+ high, high_is_date = _ensure_datetime(high)
177
+
178
+ if !time_unit.nil?
179
+ tu = time_unit
180
+ elsif interval.include?("ns")
181
+ tu = "ns"
182
+ else
183
+ tu = "us"
184
+ end
185
+
186
+ start = Utils._datetime_to_pl_timestamp(low, tu)
187
+ stop = Utils._datetime_to_pl_timestamp(high, tu)
188
+ if name.nil?
189
+ name = ""
190
+ end
191
+
192
+ dt_range = Utils.wrap_s(
193
+ _rb_date_range(start, stop, interval, closed, name, tu, time_zone)
194
+ )
195
+ if low_is_date && high_is_date && !["h", "m", "s"].any? { |v| _interval_granularity(interval).end_with?(v) }
196
+ dt_range = dt_range.cast(Date)
197
+ end
198
+
199
+ dt_range
200
+ end
201
+
202
+ # def cut
203
+ # end
204
+
205
+ # def align_frames
206
+ # end
207
+
208
+ # Return a new Series of given length and type, filled with ones.
209
+ #
210
+ # @param n [Integer]
211
+ # Number of elements in the `Series`
212
+ # @param dtype [Symbol]
213
+ # DataType of the elements, defaults to `:f64`
214
+ #
215
+ # @return [Series]
216
+ #
217
+ # @note
218
+ # In the lazy API you should probably not use this, but use `lit(1)`
219
+ # instead.
220
+ def ones(n, dtype: nil)
221
+ s = Series.new([1.0])
222
+ if dtype
223
+ s = s.cast(dtype)
224
+ end
225
+ s.new_from_index(0, n)
226
+ end
227
+
228
+ # Return a new Series of given length and type, filled with zeros.
229
+ #
230
+ # @param n [Integer]
231
+ # Number of elements in the `Series`
232
+ # @param dtype [Symbol]
233
+ # DataType of the elements, defaults to `:f64`
234
+ #
235
+ # @return [Series]
236
+ #
237
+ # @note
238
+ # In the lazy API you should probably not use this, but use `lit(0)`
239
+ # instead.
240
+ def zeros(n, dtype: nil)
241
+ s = Series.new([0.0])
242
+ if dtype
243
+ s = s.cast(dtype)
244
+ end
245
+ s.new_from_index(0, n)
246
+ end
247
+
248
+ private
249
+
250
+ def _ensure_datetime(value)
251
+ is_date_type = false
252
+ if !value.is_a?(DateTime)
253
+ value = DateTime.new(value.year, value.month, value.day)
254
+ is_date_type = true
255
+ end
256
+ [value, is_date_type]
257
+ end
258
+
259
+ # TODO
260
+ def _interval_granularity(interval)
261
+ interval
262
+ end
44
263
  end
45
264
  end