polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
data/lib/polars/functions.rb
CHANGED
@@ -199,12 +199,201 @@ module Polars
|
|
199
199
|
dt_range
|
200
200
|
end
|
201
201
|
|
202
|
-
#
|
203
|
-
#
|
202
|
+
# Bin values into discrete values.
|
203
|
+
#
|
204
|
+
# @param s [Series]
|
205
|
+
# Series to bin.
|
206
|
+
# @param bins [Array]
|
207
|
+
# Bins to create.
|
208
|
+
# @param labels [Array]
|
209
|
+
# Labels to assign to the bins. If given the length of labels must be
|
210
|
+
# len(bins) + 1.
|
211
|
+
# @param break_point_label [String]
|
212
|
+
# Name given to the breakpoint column.
|
213
|
+
# @param category_label [String]
|
214
|
+
# Name given to the category column.
|
215
|
+
#
|
216
|
+
# @return [DataFrame]
|
217
|
+
#
|
218
|
+
# @note
|
219
|
+
# This functionality is experimental and may change without it being considered a
|
220
|
+
# breaking change.
|
221
|
+
#
|
222
|
+
# @example
|
223
|
+
# a = Polars::Series.new("a", 13.times.map { |i| (-30 + i * 5) / 10.0 })
|
224
|
+
# Polars.cut(a, [-1, 1])
|
225
|
+
# # =>
|
226
|
+
# # shape: (12, 3)
|
227
|
+
# # ┌──────┬─────────────┬──────────────┐
|
228
|
+
# # │ a ┆ break_point ┆ category │
|
229
|
+
# # │ --- ┆ --- ┆ --- │
|
230
|
+
# # │ f64 ┆ f64 ┆ cat │
|
231
|
+
# # ╞══════╪═════════════╪══════════════╡
|
232
|
+
# # │ -3.0 ┆ -1.0 ┆ (-inf, -1.0] │
|
233
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
234
|
+
# # │ -2.5 ┆ -1.0 ┆ (-inf, -1.0] │
|
235
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
236
|
+
# # │ -2.0 ┆ -1.0 ┆ (-inf, -1.0] │
|
237
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
238
|
+
# # │ -1.5 ┆ -1.0 ┆ (-inf, -1.0] │
|
239
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
240
|
+
# # │ ... ┆ ... ┆ ... │
|
241
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
242
|
+
# # │ 1.0 ┆ 1.0 ┆ (-1.0, 1.0] │
|
243
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
244
|
+
# # │ 1.5 ┆ inf ┆ (1.0, inf] │
|
245
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
246
|
+
# # │ 2.0 ┆ inf ┆ (1.0, inf] │
|
247
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
248
|
+
# # │ 2.5 ┆ inf ┆ (1.0, inf] │
|
249
|
+
# # └──────┴─────────────┴──────────────┘
|
250
|
+
# def cut(
|
251
|
+
# s,
|
252
|
+
# bins,
|
253
|
+
# labels: nil,
|
254
|
+
# break_point_label: "break_point",
|
255
|
+
# category_label: "category"
|
256
|
+
# )
|
257
|
+
# var_nm = s.name
|
204
258
|
|
205
|
-
#
|
259
|
+
# cuts_df = DataFrame.new(
|
260
|
+
# [
|
261
|
+
# Series.new(
|
262
|
+
# break_point_label, bins, dtype: :f64
|
263
|
+
# ).extend_constant(Float::INFINITY, 1)
|
264
|
+
# ]
|
265
|
+
# )
|
266
|
+
|
267
|
+
# if labels
|
268
|
+
# if labels.length != bins.length + 1
|
269
|
+
# raise ArgumentError, "expected more labels"
|
270
|
+
# end
|
271
|
+
# cuts_df = cuts_df.with_column(Series.new(category_label, labels))
|
272
|
+
# else
|
273
|
+
# cuts_df = cuts_df.with_column(
|
274
|
+
# Polars.format(
|
275
|
+
# "({}, {}]",
|
276
|
+
# Polars.col(break_point_label).shift_and_fill(1, -Float::INFINITY),
|
277
|
+
# Polars.col(break_point_label)
|
278
|
+
# ).alias(category_label)
|
279
|
+
# )
|
280
|
+
# end
|
281
|
+
|
282
|
+
# cuts_df = cuts_df.with_column(Polars.col(category_label).cast(:cat))
|
283
|
+
|
284
|
+
# s.cast(:f64)
|
285
|
+
# .sort
|
286
|
+
# .to_frame
|
287
|
+
# .join_asof(
|
288
|
+
# cuts_df,
|
289
|
+
# left_on: var_nm,
|
290
|
+
# right_on: break_point_label,
|
291
|
+
# strategy: "forward"
|
292
|
+
# )
|
206
293
|
# end
|
207
294
|
|
295
|
+
# Align a sequence of frames using the uique values from one or more columns as a key.
|
296
|
+
#
|
297
|
+
# Frames that do not contain the given key values have rows injected (with nulls
|
298
|
+
# filling the non-key columns), and each resulting frame is sorted by the key.
|
299
|
+
#
|
300
|
+
# The original column order of input frames is not changed unless ``select`` is
|
301
|
+
# specified (in which case the final column order is determined from that).
|
302
|
+
#
|
303
|
+
# Note that this does not result in a joined frame - you receive the same number
|
304
|
+
# of frames back that you passed in, but each is now aligned by key and has
|
305
|
+
# the same number of rows.
|
306
|
+
#
|
307
|
+
# @param frames [Array]
|
308
|
+
# Sequence of DataFrames or LazyFrames.
|
309
|
+
# @param on [Object]
|
310
|
+
# One or more columns whose unique values will be used to align the frames.
|
311
|
+
# @param select [Object]
|
312
|
+
# Optional post-alignment column select to constrain and/or order
|
313
|
+
# the columns returned from the newly aligned frames.
|
314
|
+
# @param reverse [Object]
|
315
|
+
# Sort the alignment column values in descending order; can be a single
|
316
|
+
# boolean or a list of booleans associated with each column in `on`.
|
317
|
+
#
|
318
|
+
# @return [Object]
|
319
|
+
#
|
320
|
+
# @example
|
321
|
+
# df1 = Polars::DataFrame.new(
|
322
|
+
# {
|
323
|
+
# "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
|
324
|
+
# "x" => [3.5, 4.0, 1.0],
|
325
|
+
# "y" => [10.0, 2.5, 1.5]
|
326
|
+
# }
|
327
|
+
# )
|
328
|
+
# df2 = Polars::DataFrame.new(
|
329
|
+
# {
|
330
|
+
# "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
|
331
|
+
# "x" => [8.0, 1.0, 3.5],
|
332
|
+
# "y" => [1.5, 12.0, 5.0]
|
333
|
+
# }
|
334
|
+
# )
|
335
|
+
# df3 = Polars::DataFrame.new(
|
336
|
+
# {
|
337
|
+
# "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
|
338
|
+
# "x" => [2.0, 5.0],
|
339
|
+
# "y" => [2.5, 2.0]
|
340
|
+
# }
|
341
|
+
# )
|
342
|
+
# af1, af2, af3 = Polars.align_frames(
|
343
|
+
# df1, df2, df3, on: "dt", select: ["x", "y"]
|
344
|
+
# )
|
345
|
+
# (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
|
346
|
+
# # =>
|
347
|
+
# # shape: (3, 1)
|
348
|
+
# # ┌───────┐
|
349
|
+
# # │ dot │
|
350
|
+
# # │ --- │
|
351
|
+
# # │ f64 │
|
352
|
+
# # ╞═══════╡
|
353
|
+
# # │ 0.0 │
|
354
|
+
# # ├╌╌╌╌╌╌╌┤
|
355
|
+
# # │ 167.5 │
|
356
|
+
# # ├╌╌╌╌╌╌╌┤
|
357
|
+
# # │ 47.0 │
|
358
|
+
# # └───────┘
|
359
|
+
def align_frames(
|
360
|
+
*frames,
|
361
|
+
on:,
|
362
|
+
select: nil,
|
363
|
+
reverse: false
|
364
|
+
)
|
365
|
+
if frames.empty?
|
366
|
+
return []
|
367
|
+
elsif frames.map(&:class).uniq.length != 1
|
368
|
+
raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
|
369
|
+
end
|
370
|
+
|
371
|
+
# establish the superset of all "on" column values, sort, and cache
|
372
|
+
eager = frames[0].is_a?(DataFrame)
|
373
|
+
alignment_frame = (
|
374
|
+
concat(frames.map { |df| df.lazy.select(on) })
|
375
|
+
.unique(maintain_order: false)
|
376
|
+
.sort(on, reverse: reverse)
|
377
|
+
)
|
378
|
+
alignment_frame = (
|
379
|
+
eager ? alignment_frame.collect.lazy : alignment_frame.cache
|
380
|
+
)
|
381
|
+
# finally, align all frames
|
382
|
+
aligned_frames =
|
383
|
+
frames.map do |df|
|
384
|
+
alignment_frame.join(
|
385
|
+
df.lazy,
|
386
|
+
on: alignment_frame.columns,
|
387
|
+
how: "left"
|
388
|
+
).select(df.columns)
|
389
|
+
end
|
390
|
+
if !select.nil?
|
391
|
+
aligned_frames = aligned_frames.map { |df| df.select(select) }
|
392
|
+
end
|
393
|
+
|
394
|
+
eager ? aligned_frames.map(&:collect) : aligned_frames
|
395
|
+
end
|
396
|
+
|
208
397
|
# Return a new Series of given length and type, filled with ones.
|
209
398
|
#
|
210
399
|
# @param n [Integer]
|
@@ -249,8 +438,8 @@ module Polars
|
|
249
438
|
|
250
439
|
def _ensure_datetime(value)
|
251
440
|
is_date_type = false
|
252
|
-
if !value.is_a?(DateTime)
|
253
|
-
value = DateTime.new(value.year, value.month, value.day)
|
441
|
+
if !value.is_a?(::DateTime)
|
442
|
+
value = ::DateTime.new(value.year, value.month, value.day)
|
254
443
|
is_date_type = true
|
255
444
|
end
|
256
445
|
[value, is_date_type]
|
data/lib/polars/group_by.rb
CHANGED
@@ -12,7 +12,48 @@ module Polars
|
|
12
12
|
self.maintain_order = maintain_order
|
13
13
|
end
|
14
14
|
|
15
|
-
#
|
15
|
+
# Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
|
16
|
+
#
|
17
|
+
# Implementing logic using a Ruby function is almost always _significantly_
|
18
|
+
# slower and more memory intensive than implementing the same logic using
|
19
|
+
# the native expression API because:
|
20
|
+
|
21
|
+
# - The native expression engine runs in Rust; UDFs run in Ruby.
|
22
|
+
# - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
|
23
|
+
# - Polars-native expressions can be parallelised (UDFs cannot).
|
24
|
+
# - Polars-native expressions can be logically optimised (UDFs cannot).
|
25
|
+
#
|
26
|
+
# Wherever possible you should strongly prefer the native expression API
|
27
|
+
# to achieve the best performance.
|
28
|
+
#
|
29
|
+
# @return [DataFrame]
|
30
|
+
#
|
31
|
+
# @example
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# {
|
34
|
+
# "id" => [0, 1, 2, 3, 4],
|
35
|
+
# "color" => ["red", "green", "green", "red", "red"],
|
36
|
+
# "shape" => ["square", "triangle", "square", "triangle", "square"]
|
37
|
+
# }
|
38
|
+
# )
|
39
|
+
# df.groupby("color").apply { |group_df| group_df.sample(2) }
|
40
|
+
# # =>
|
41
|
+
# # shape: (4, 3)
|
42
|
+
# # ┌─────┬───────┬──────────┐
|
43
|
+
# # │ id ┆ color ┆ shape │
|
44
|
+
# # │ --- ┆ --- ┆ --- │
|
45
|
+
# # │ i64 ┆ str ┆ str │
|
46
|
+
# # ╞═════╪═══════╪══════════╡
|
47
|
+
# # │ 1 ┆ green ┆ triangle │
|
48
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
49
|
+
# # │ 2 ┆ green ┆ square │
|
50
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
51
|
+
# # │ 4 ┆ red ┆ square │
|
52
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
53
|
+
# # │ 3 ┆ red ┆ triangle │
|
54
|
+
# # └─────┴───────┴──────────┘
|
55
|
+
# def apply(&f)
|
56
|
+
# _dataframe_class._from_rbdf(_df.groupby_apply(by, f))
|
16
57
|
# end
|
17
58
|
|
18
59
|
# Use multiple aggregations on columns.
|
@@ -182,8 +223,7 @@ module Polars
|
|
182
223
|
_dataframe_class._from_rbdf(df._df)
|
183
224
|
end
|
184
225
|
|
185
|
-
#
|
186
|
-
# end
|
226
|
+
# pivot is deprecated
|
187
227
|
|
188
228
|
# Aggregate the first values in the group.
|
189
229
|
#
|
@@ -294,17 +334,17 @@ module Polars
|
|
294
334
|
# df.groupby("d", maintain_order: true).min
|
295
335
|
# # =>
|
296
336
|
# # shape: (3, 4)
|
297
|
-
# #
|
298
|
-
# # │ d ┆ a ┆ b ┆ c
|
299
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
300
|
-
# # │ str ┆ i64 ┆ f64 ┆
|
301
|
-
# #
|
302
|
-
# # │ Apple ┆ 1 ┆ 0.5 ┆
|
303
|
-
# #
|
304
|
-
# # │ Orange ┆ 2 ┆ 0.5 ┆
|
305
|
-
# #
|
306
|
-
# # │ Banana ┆ 4 ┆ 13.0 ┆
|
307
|
-
# #
|
337
|
+
# # ┌────────┬─────┬──────┬───────┐
|
338
|
+
# # │ d ┆ a ┆ b ┆ c │
|
339
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
340
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
341
|
+
# # ╞════════╪═════╪══════╪═══════╡
|
342
|
+
# # │ Apple ┆ 1 ┆ 0.5 ┆ false │
|
343
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
344
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
345
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
|
346
|
+
# # │ Banana ┆ 4 ┆ 13.0 ┆ false │
|
347
|
+
# # └────────┴─────┴──────┴───────┘
|
308
348
|
def min
|
309
349
|
agg(Polars.all.min)
|
310
350
|
end
|
@@ -325,17 +365,17 @@ module Polars
|
|
325
365
|
# df.groupby("d", maintain_order: true).max
|
326
366
|
# # =>
|
327
367
|
# # shape: (3, 4)
|
328
|
-
# #
|
329
|
-
# # │ d ┆ a ┆ b ┆ c
|
330
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
331
|
-
# # │ str ┆ i64 ┆ f64 ┆
|
332
|
-
# #
|
333
|
-
# # │ Apple ┆ 3 ┆ 10.0 ┆
|
334
|
-
# #
|
335
|
-
# # │ Orange ┆ 2 ┆ 0.5 ┆
|
336
|
-
# #
|
337
|
-
# # │ Banana ┆ 5 ┆ 14.0 ┆
|
338
|
-
# #
|
368
|
+
# # ┌────────┬─────┬──────┬──────┐
|
369
|
+
# # │ d ┆ a ┆ b ┆ c │
|
370
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
371
|
+
# # │ str ┆ i64 ┆ f64 ┆ bool │
|
372
|
+
# # ╞════════╪═════╪══════╪══════╡
|
373
|
+
# # │ Apple ┆ 3 ┆ 10.0 ┆ true │
|
374
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
375
|
+
# # │ Orange ┆ 2 ┆ 0.5 ┆ true │
|
376
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
377
|
+
# # │ Banana ┆ 5 ┆ 14.0 ┆ true │
|
378
|
+
# # └────────┴─────┴──────┴──────┘
|
339
379
|
def max
|
340
380
|
agg(Polars.all.max)
|
341
381
|
end
|
@@ -387,17 +427,17 @@ module Polars
|
|
387
427
|
# df.groupby("d", maintain_order: true).mean
|
388
428
|
# # =>
|
389
429
|
# # shape: (3, 4)
|
390
|
-
# #
|
391
|
-
# # │ d ┆ a ┆ b ┆ c
|
392
|
-
# # │ --- ┆ --- ┆ --- ┆ ---
|
393
|
-
# # │ str ┆ f64 ┆ f64 ┆
|
394
|
-
# #
|
395
|
-
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆
|
396
|
-
# #
|
397
|
-
# # │ Orange ┆ 2.0 ┆ 0.5 ┆
|
398
|
-
# #
|
399
|
-
# # │ Banana ┆ 4.5 ┆ 13.5 ┆
|
400
|
-
# #
|
430
|
+
# # ┌────────┬─────┬──────────┬──────────┐
|
431
|
+
# # │ d ┆ a ┆ b ┆ c │
|
432
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
433
|
+
# # │ str ┆ f64 ┆ f64 ┆ f64 │
|
434
|
+
# # ╞════════╪═════╪══════════╪══════════╡
|
435
|
+
# # │ Apple ┆ 2.0 ┆ 4.833333 ┆ 0.666667 │
|
436
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
437
|
+
# # │ Orange ┆ 2.0 ┆ 0.5 ┆ 1.0 │
|
438
|
+
# # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
|
439
|
+
# # │ Banana ┆ 4.5 ┆ 13.5 ┆ 0.5 │
|
440
|
+
# # └────────┴─────┴──────────┴──────────┘
|
401
441
|
def mean
|
402
442
|
agg(Polars.all.mean)
|
403
443
|
end
|
data/lib/polars/io.rb
CHANGED
@@ -59,7 +59,7 @@ module Polars
|
|
59
59
|
# Lossy means that invalid utf8 values are replaced with `�`
|
60
60
|
# characters. When using other encodings than `utf8` or
|
61
61
|
# `utf8-lossy`, the input is first decoded im memory with
|
62
|
-
#
|
62
|
+
# Ruby.
|
63
63
|
# @param low_memory [Boolean]
|
64
64
|
# Reduce memory usage at expense of performance.
|
65
65
|
# @param rechunk [Boolean]
|
@@ -451,8 +451,24 @@ module Polars
|
|
451
451
|
)
|
452
452
|
end
|
453
453
|
|
454
|
-
#
|
455
|
-
#
|
454
|
+
# Read into a DataFrame from Apache Avro format.
|
455
|
+
#
|
456
|
+
# @param file [Object]
|
457
|
+
# Path to a file or a file-like object.
|
458
|
+
# @param columns [Object]
|
459
|
+
# Columns to select. Accepts a list of column indices (starting at zero) or a list
|
460
|
+
# of column names.
|
461
|
+
# @param n_rows [Integer]
|
462
|
+
# Stop reading from Apache Avro file after reading ``n_rows``.
|
463
|
+
#
|
464
|
+
# @return [DataFrame]
|
465
|
+
def read_avro(file, columns: nil, n_rows: nil)
|
466
|
+
if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
|
467
|
+
file = Utils.format_path(file)
|
468
|
+
end
|
469
|
+
|
470
|
+
DataFrame._read_avro(file, n_rows: n_rows, columns: columns)
|
471
|
+
end
|
456
472
|
|
457
473
|
# Read into a DataFrame from Arrow IPC (Feather v2) file.
|
458
474
|
#
|