polars-df 0.14.0-arm64-darwin → 0.16.0-arm64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +25665 -14861
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -86,8 +86,57 @@ module Polars
86
86
  # Concat the arrays in a Series dtype List in linear time.
87
87
  #
88
88
  # @return [Expr]
89
- def concat_list(exprs)
90
- exprs = Utils.parse_into_list_of_expressions(exprs)
89
+ #
90
+ # @example Concatenate two existing list columns. Null values are propagated.
91
+ # df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
92
+ # df.with_columns(concat_list: Polars.concat_list("a", "b"))
93
+ # # =>
94
+ # # shape: (3, 3)
95
+ # # ┌───────────┬───────────┬─────────────┐
96
+ # # │ a ┆ b ┆ concat_list │
97
+ # # │ --- ┆ --- ┆ --- │
98
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
99
+ # # ╞═══════════╪═══════════╪═════════════╡
100
+ # # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
101
+ # # │ [3] ┆ [] ┆ [3] │
102
+ # # │ [4, 5] ┆ null ┆ null │
103
+ # # └───────────┴───────────┴─────────────┘
104
+ #
105
+ # @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
106
+ # df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
107
+ # # =>
108
+ # # shape: (3, 2)
109
+ # # ┌───────────┬─────────────────┐
110
+ # # │ a ┆ concat_list │
111
+ # # │ --- ┆ --- │
112
+ # # │ list[i64] ┆ list[str] │
113
+ # # ╞═══════════╪═════════════════╡
114
+ # # │ [1, 2] ┆ ["1", "2", "x"] │
115
+ # # │ [3] ┆ ["3", "x"] │
116
+ # # │ [4, 5] ┆ ["4", "5", "x"] │
117
+ # # └───────────┴─────────────────┘
118
+ #
119
+ # @example Create lagged columns and collect them into a list. This mimics a rolling window.
120
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
121
+ # df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
122
+ # df.select(
123
+ # Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
124
+ # )
125
+ # # =>
126
+ # # shape: (5, 1)
127
+ # # ┌───────────────────┐
128
+ # # │ A_rolling │
129
+ # # │ --- │
130
+ # # │ list[f64] │
131
+ # # ╞═══════════════════╡
132
+ # # │ [null, null, 1.0] │
133
+ # # │ [null, 1.0, 2.0] │
134
+ # # │ [1.0, 2.0, 9.0] │
135
+ # # │ [2.0, 9.0, 2.0] │
136
+ # # │ [9.0, 2.0, 13.0] │
137
+ # # └───────────────────┘
138
+ def concat_list(exprs, *more_exprs)
139
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
91
140
  Utils.wrap_expr(Plr.concat_list(exprs))
92
141
  end
93
142
 
@@ -23,7 +23,7 @@ module Polars
23
23
  Utils.wrap_expr(Plr.col(name.to_s))
24
24
  elsif Utils.is_polars_dtype(name)
25
25
  Utils.wrap_expr(Plr.dtype_cols([name]))
26
- elsif name.is_a?(::Array)
26
+ elsif name.is_a?(::Array) || name.is_a?(::Set)
27
27
  names = Array(name)
28
28
  if names.empty?
29
29
  return Utils.wrap_expr(Plr.cols(names))
@@ -127,7 +127,7 @@ module Polars
127
127
  # af1, af2, af3 = Polars.align_frames(
128
128
  # df1, df2, df3, on: "dt", select: ["x", "y"]
129
129
  # )
130
- # (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
130
+ # (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
131
131
  # # =>
132
132
  # # shape: (3, 1)
133
133
  # # ┌───────┐
@@ -136,9 +136,7 @@ module Polars
136
136
  # # │ f64 │
137
137
  # # ╞═══════╡
138
138
  # # │ 0.0 │
139
- # # ├╌╌╌╌╌╌╌┤
140
139
  # # │ 167.5 │
141
- # # ├╌╌╌╌╌╌╌┤
142
140
  # # │ 47.0 │
143
141
  # # └───────┘
144
142
  def align_frames(
@@ -729,16 +729,20 @@ module Polars
729
729
  a,
730
730
  b,
731
731
  method: "pearson",
732
- ddof: 1,
732
+ ddof: nil,
733
733
  propagate_nans: false
734
734
  )
735
+ if !ddof.nil?
736
+ warn "The `ddof` parameter has no effect. Do not use it."
737
+ end
738
+
735
739
  a = Utils.parse_into_expression(a)
736
740
  b = Utils.parse_into_expression(b)
737
741
 
738
742
  if method == "pearson"
739
- Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
743
+ Utils.wrap_expr(Plr.pearson_corr(a, b))
740
744
  elsif method == "spearman"
741
- Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
745
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
742
746
  else
743
747
  msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
744
748
  raise ArgumentError, msg
@@ -824,6 +828,29 @@ module Polars
824
828
  # @note
825
829
  # If you simply want the first encountered expression as accumulator,
826
830
  # consider using `cumreduce`.
831
+ #
832
+ # @example
833
+ # df = Polars::DataFrame.new(
834
+ # {
835
+ # "a" => [1, 2, 3],
836
+ # "b" => [3, 4, 5],
837
+ # "c" => [5, 6, 7]
838
+ # }
839
+ # )
840
+ # df.with_columns(
841
+ # Polars.cum_fold(Polars.lit(1), ->(acc, x) { acc + x }, Polars.all)
842
+ # )
843
+ # # =>
844
+ # # shape: (3, 4)
845
+ # # ┌─────┬─────┬─────┬───────────┐
846
+ # # │ a ┆ b ┆ c ┆ cum_fold │
847
+ # # │ --- ┆ --- ┆ --- ┆ --- │
848
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
849
+ # # ╞═════╪═════╪═════╪═══════════╡
850
+ # # │ 1 ┆ 3 ┆ 5 ┆ {2,5,10} │
851
+ # # │ 2 ┆ 4 ┆ 6 ┆ {3,7,13} │
852
+ # # │ 3 ┆ 5 ┆ 7 ┆ {4,9,16} │
853
+ # # └─────┴─────┴─────┴───────────┘
827
854
  def cum_fold(acc, f, exprs, include_init: false)
828
855
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
829
856
  if exprs.is_a?(Expr)
@@ -831,7 +858,7 @@ module Polars
831
858
  end
832
859
 
833
860
  exprs = Utils.parse_into_list_of_expressions(exprs)
834
- Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
861
+ Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init)._alias("cum_fold"))
835
862
  end
836
863
  alias_method :cumfold, :cum_fold
837
864
 
@@ -1024,15 +1051,70 @@ module Polars
1024
1051
  # Default is ascending.
1025
1052
  #
1026
1053
  # @return [Expr]
1027
- def arg_sort_by(exprs, reverse: false)
1028
- if !exprs.is_a?(::Array)
1029
- exprs = [exprs]
1030
- end
1031
- if reverse == true || reverse == false
1032
- reverse = [reverse] * exprs.length
1033
- end
1034
- exprs = Utils.parse_into_list_of_expressions(exprs)
1035
- Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
1054
+ #
1055
+ # @example Pass a single column name to compute the arg sort by that column.
1056
+ # df = Polars::DataFrame.new(
1057
+ # {
1058
+ # "a" => [0, 1, 1, 0],
1059
+ # "b" => [3, 2, 3, 2],
1060
+ # "c" => [1, 2, 3, 4]
1061
+ # }
1062
+ # )
1063
+ # df.select(Polars.arg_sort_by("a"))
1064
+ # # =>
1065
+ # # shape: (4, 1)
1066
+ # # ┌─────┐
1067
+ # # │ a │
1068
+ # # │ --- │
1069
+ # # │ u32 │
1070
+ # # ╞═════╡
1071
+ # # │ 0 │
1072
+ # # │ 3 │
1073
+ # # │ 1 │
1074
+ # # │ 2 │
1075
+ # # └─────┘
1076
+ #
1077
+ # @example Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.
1078
+ # df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
1079
+ # # =>
1080
+ # # shape: (4, 1)
1081
+ # # ┌─────┐
1082
+ # # │ a │
1083
+ # # │ --- │
1084
+ # # │ u32 │
1085
+ # # ╞═════╡
1086
+ # # │ 2 │
1087
+ # # │ 1 │
1088
+ # # │ 0 │
1089
+ # # │ 3 │
1090
+ # # └─────┘
1091
+ #
1092
+ # @example Use gather to apply the arg sort to other columns.
1093
+ # df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
1094
+ # # =>
1095
+ # # shape: (4, 1)
1096
+ # # ┌─────┐
1097
+ # # │ c │
1098
+ # # │ --- │
1099
+ # # │ i64 │
1100
+ # # ╞═════╡
1101
+ # # │ 1 │
1102
+ # # │ 4 │
1103
+ # # │ 2 │
1104
+ # # │ 3 │
1105
+ # # └─────┘
1106
+ def arg_sort_by(
1107
+ exprs,
1108
+ *more_exprs,
1109
+ reverse: false,
1110
+ nulls_last: false,
1111
+ multithreaded: true,
1112
+ maintain_order: false
1113
+ )
1114
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1115
+ reverse = Utils.extend_bool(reverse, exprs.length, "reverse", "exprs")
1116
+ nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
1117
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse, nulls_last, multithreaded, maintain_order))
1036
1118
  end
1037
1119
  alias_method :argsort_by, :arg_sort_by
1038
1120
 
@@ -18,7 +18,7 @@ module Polars
18
18
  #
19
19
  # @example
20
20
  # Polars.time_range(
21
- # time(14, 0),
21
+ # Time.utc(2000, 1, 1, 14, 0),
22
22
  # nil,
23
23
  # "3h15m",
24
24
  # eager: true
@@ -48,12 +48,12 @@ module Polars
48
48
  end
49
49
 
50
50
  if start.nil?
51
- # start = time(0, 0, 0)
52
- raise Todo
51
+ # date part is ignored
52
+ start = ::Time.utc(2000, 1, 1, 0, 0, 0)
53
53
  end
54
54
  if stop.nil?
55
- # stop = time(23, 59, 59, 999999)
56
- raise Todo
55
+ # date part is ignored
56
+ stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
57
57
  end
58
58
 
59
59
  start_rbexpr = Utils.parse_into_expression(start)
@@ -87,21 +87,21 @@ module Polars
87
87
  # @example
88
88
  # df = Polars::DataFrame.new(
89
89
  # {
90
- # "start" => [time(9, 0), time(10, 0)],
91
- # "end" => time(11, 0)
90
+ # "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
91
+ # "end" => Time.utc(2000, 1, 1, 11, 0)
92
92
  # }
93
93
  # )
94
- # df.with_columns(time_range: Polars.time_ranges("start", "end"))
94
+ # df.select(time_range: Polars.time_ranges("start", "end"))
95
95
  # # =>
96
- # # shape: (2, 3)
97
- # # ┌──────────┬──────────┬────────────────────────────────┐
98
- # # │ start ┆ end ┆ time_range │
99
- # # │ --- ┆ --- ┆ ---
100
- # # │ time ┆ time ┆ list[time] │
101
- # # ╞══════════╪══════════╪════════════════════════════════╡
102
- # # │ 09:00:00 ┆ 11:00:00 ┆ [09:00:00, 10:00:00, 11:00:00] │
103
- # # │ 10:00:00 ┆ 11:00:00 ┆ [10:00:00, 11:00:00] │
104
- # # └──────────┴──────────┴────────────────────────────────┘
96
+ # # shape: (2, 1)
97
+ # # ┌────────────────────────────────┐
98
+ # # │ time_range │
99
+ # # │ --- │
100
+ # # │ list[time] │
101
+ # # ╞════════════════════════════════╡
102
+ # # │ [09:00:00, 10:00:00, 11:00:00] │
103
+ # # │ [10:00:00, 11:00:00] │
104
+ # # └────────────────────────────────┘
105
105
  def time_ranges(
106
106
  start = nil,
107
107
  stop = nil,
@@ -118,12 +118,12 @@ module Polars
118
118
  end
119
119
 
120
120
  if start.nil?
121
- # start = time(0, 0, 0)
122
- raise Todo
121
+ # date part is ignored
122
+ start = ::Time.utc(2000, 1, 1, 0, 0, 0)
123
123
  end
124
124
  if stop.nil?
125
- # stop = time(23, 59, 59, 999999)
126
- raise Todo
125
+ # date part is ignored
126
+ stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
127
127
  end
128
128
 
129
129
  start_rbexpr = Utils.parse_into_expression(start)
data/lib/polars/io/csv.rb CHANGED
@@ -75,9 +75,6 @@ module Polars
75
75
  # the DataFrame.
76
76
  # @param row_count_offset [Integer]
77
77
  # Offset to start the row_count column (only used if the name is set).
78
- # @param sample_size [Integer]
79
- # Set the sample size. This is used to sample statistics to estimate the
80
- # allocation needed.
81
78
  # @param eol_char [String]
82
79
  # Single byte end of line character.
83
80
  # @param truncate_ragged_lines [Boolean]
@@ -114,7 +111,6 @@ module Polars
114
111
  skip_rows_after_header: 0,
115
112
  row_count_name: nil,
116
113
  row_count_offset: 0,
117
- sample_size: 1024,
118
114
  eol_char: "\n",
119
115
  truncate_ragged_lines: false
120
116
  )
@@ -163,7 +159,6 @@ module Polars
163
159
  skip_rows_after_header: skip_rows_after_header,
164
160
  row_count_name: row_count_name,
165
161
  row_count_offset: row_count_offset,
166
- sample_size: sample_size,
167
162
  eol_char: eol_char,
168
163
  truncate_ragged_lines: truncate_ragged_lines
169
164
  )
@@ -201,7 +196,6 @@ module Polars
201
196
  skip_rows_after_header: 0,
202
197
  row_count_name: nil,
203
198
  row_count_offset: 0,
204
- sample_size: 1024,
205
199
  eol_char: "\n",
206
200
  raise_if_empty: true,
207
201
  truncate_ragged_lines: false,
@@ -305,7 +299,6 @@ module Polars
305
299
  parse_dates,
306
300
  skip_rows_after_header,
307
301
  Utils.parse_row_index_args(row_count_name, row_count_offset),
308
- sample_size,
309
302
  eol_char,
310
303
  raise_if_empty,
311
304
  truncate_ragged_lines,
@@ -392,9 +385,6 @@ module Polars
392
385
  # the DataFrame.
393
386
  # @param row_count_offset [Integer]
394
387
  # Offset to start the row_count column (only used if the name is set).
395
- # @param sample_size [Integer]
396
- # Set the sample size. This is used to sample statistics to estimate the
397
- # allocation needed.
398
388
  # @param eol_char [String]
399
389
  # Single byte end of line character.
400
390
  # @param truncate_ragged_lines [Boolean]
@@ -431,7 +421,6 @@ module Polars
431
421
  skip_rows_after_header: 0,
432
422
  row_count_name: nil,
433
423
  row_count_offset: 0,
434
- sample_size: 1024,
435
424
  eol_char: "\n",
436
425
  raise_if_empty: true,
437
426
  truncate_ragged_lines: false,
@@ -474,7 +463,6 @@ module Polars
474
463
  skip_rows_after_header: skip_rows_after_header,
475
464
  row_count_name: row_count_name,
476
465
  row_count_offset: row_count_offset,
477
- sample_size: sample_size,
478
466
  eol_char: eol_char,
479
467
  new_columns: new_columns,
480
468
  raise_if_empty: raise_if_empty,
@@ -618,7 +606,7 @@ module Polars
618
606
 
619
607
  # @private
620
608
  def _scan_csv_impl(
621
- file,
609
+ source,
622
610
  has_header: true,
623
611
  sep: ",",
624
612
  comment_char: nil,
@@ -650,9 +638,16 @@ module Polars
650
638
  end
651
639
  processed_null_values = Utils._process_null_values(null_values)
652
640
 
641
+ if source.is_a?(::Array)
642
+ sources = source
643
+ source = nil
644
+ else
645
+ sources = []
646
+ end
647
+
653
648
  rblf =
654
649
  RbLazyFrame.new_from_csv(
655
- file,
650
+ source,
656
651
  sep,
657
652
  has_header,
658
653
  ignore_errors,
@@ -672,7 +667,8 @@ module Polars
672
667
  Utils.parse_row_index_args(row_count_name, row_count_offset),
673
668
  parse_dates,
674
669
  eol_char,
675
- truncate_ragged_lines
670
+ truncate_ragged_lines,
671
+ sources
676
672
  )
677
673
  Utils.wrap_ldf(rblf)
678
674
  end
@@ -681,7 +677,9 @@ module Polars
681
677
 
682
678
  def _prepare_file_arg(file)
683
679
  if file.is_a?(::String) && file =~ /\Ahttps?:\/\//
684
- raise ArgumentError, "use URI(...) for remote files"
680
+ require "uri"
681
+
682
+ file = URI(file)
685
683
  end
686
684
 
687
685
  if defined?(URI) && file.is_a?(URI)
@@ -18,9 +18,9 @@ module Polars
18
18
  if query.is_a?(ActiveRecord::Result)
19
19
  query
20
20
  elsif query.is_a?(ActiveRecord::Relation)
21
- query.connection.select_all(query.to_sql)
21
+ query.connection_pool.with_connection { |c| c.select_all(query.to_sql) }
22
22
  elsif query.is_a?(::String)
23
- ActiveRecord::Base.connection.select_all(query)
23
+ ActiveRecord::Base.connection_pool.with_connection { |c| c.select_all(query) }
24
24
  else
25
25
  raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
26
26
  end
@@ -0,0 +1,126 @@
1
+ module Polars
2
+ module IO
3
+ # Reads into a DataFrame from a Delta lake table.
4
+ #
5
+ # @param source [Object]
6
+ # DeltaTable or a Path or URI to the root of the Delta lake table.
7
+ # @param version [Object]
8
+ # Numerical version or timestamp version of the Delta lake table.
9
+ # @param columns [Array]
10
+ # Columns to select. Accepts a list of column names.
11
+ # @param rechunk [Boolean]
12
+ # Make sure that all columns are contiguous in memory by
13
+ # aggregating the chunks into a single array.
14
+ # @param storage_options [Hash]
15
+ # Extra options for the storage backends supported by `deltalake-rb`.
16
+ # @param delta_table_options [Hash]
17
+ # Additional keyword arguments while reading a Delta lake Table.
18
+ #
19
+ # @return [DataFrame]
20
+ def read_delta(
21
+ source,
22
+ version: nil,
23
+ columns: nil,
24
+ rechunk: false,
25
+ storage_options: nil,
26
+ delta_table_options: nil
27
+ )
28
+ dl_tbl =
29
+ _get_delta_lake_table(
30
+ source,
31
+ version: version,
32
+ storage_options: storage_options,
33
+ delta_table_options: delta_table_options
34
+ )
35
+
36
+ dl_tbl.to_polars(columns: columns, rechunk: rechunk)
37
+ end
38
+
39
+ # Lazily read from a Delta lake table.
40
+ #
41
+ # @param source [Object]
42
+ # DeltaTable or a Path or URI to the root of the Delta lake table.
43
+ # @param version [Object]
44
+ # Numerical version or timestamp version of the Delta lake table.
45
+ # @param storage_options [Hash]
46
+ # Extra options for the storage backends supported by `deltalake-rb`.
47
+ # @param delta_table_options [Hash]
48
+ # Additional keyword arguments while reading a Delta lake Table.
49
+ #
50
+ # @return [LazyFrame]
51
+ def scan_delta(
52
+ source,
53
+ version: nil,
54
+ storage_options: nil,
55
+ delta_table_options: nil
56
+ )
57
+ dl_tbl =
58
+ _get_delta_lake_table(
59
+ source,
60
+ version: version,
61
+ storage_options: storage_options,
62
+ delta_table_options: delta_table_options
63
+ )
64
+
65
+ dl_tbl.to_polars(eager: false)
66
+ end
67
+
68
+ private
69
+
70
+ def _resolve_delta_lake_uri(table_uri, strict: true)
71
+ require "uri"
72
+
73
+ parsed_result = URI(table_uri)
74
+
75
+ resolved_uri =
76
+ if parsed_result.scheme == ""
77
+ Utils.normalize_filepath(table_uri)
78
+ else
79
+ table_uri
80
+ end
81
+
82
+ resolved_uri
83
+ end
84
+
85
+ def _get_delta_lake_table(
86
+ table_path,
87
+ version: nil,
88
+ storage_options: nil,
89
+ delta_table_options: nil
90
+ )
91
+ _check_if_delta_available
92
+
93
+ if table_path.is_a?(DeltaLake::Table)
94
+ return table_path
95
+ end
96
+ delta_table_options ||= {}
97
+ resolved_uri = _resolve_delta_lake_uri(table_path)
98
+ if !version.is_a?(::String) && !version.is_a?(::Time)
99
+ dl_tbl =
100
+ DeltaLake::Table.new(
101
+ resolved_uri,
102
+ version: version,
103
+ storage_options: storage_options,
104
+ **delta_table_options
105
+ )
106
+ else
107
+ dl_tbl =
108
+ DeltaLake::Table.new(
109
+ resolved_uri,
110
+ storage_options: storage_options,
111
+ **delta_table_options
112
+ )
113
+ dl_tbl.load_as_version(version)
114
+ end
115
+
116
+ dl_tbl = DeltaLake::Table.new(table_path)
117
+ dl_tbl
118
+ end
119
+
120
+ def _check_if_delta_available
121
+ if !defined?(DeltaLake)
122
+ raise Error, "Delta Lake not available"
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/polars/io/ipc.rb CHANGED
@@ -233,7 +233,7 @@ module Polars
233
233
 
234
234
  # @private
235
235
  def _scan_ipc_impl(
236
- file,
236
+ source,
237
237
  n_rows: nil,
238
238
  cache: true,
239
239
  rechunk: true,
@@ -245,13 +245,23 @@ module Polars
245
245
  try_parse_hive_dates: true,
246
246
  include_file_paths: nil
247
247
  )
248
- if Utils.pathlike?(file)
249
- file = Utils.normalize_filepath(file)
248
+ sources = []
249
+ if Utils.pathlike?(source)
250
+ source = Utils.normalize_filepath(source)
251
+ elsif source.is_a?(::Array)
252
+ if Utils.is_path_or_str_sequence(source)
253
+ sources = source.map { |s| Utils.normalize_filepath(s) }
254
+ else
255
+ sources = source
256
+ end
257
+
258
+ source = nil
250
259
  end
251
260
 
252
261
  rblf =
253
262
  RbLazyFrame.new_from_ipc(
254
- file,
263
+ source,
264
+ sources,
255
265
  n_rows,
256
266
  cache,
257
267
  rechunk,
@@ -60,13 +60,23 @@ module Polars
60
60
  row_count_name: nil,
61
61
  row_count_offset: 0
62
62
  )
63
+ sources = []
63
64
  if Utils.pathlike?(source)
64
65
  source = Utils.normalize_filepath(source)
66
+ elsif source.is_a?(::Array)
67
+ if Utils.is_path_or_str_sequence(source)
68
+ sources = source.map { |s| Utils.normalize_filepath(s) }
69
+ else
70
+ sources = source
71
+ end
72
+
73
+ source = nil
65
74
  end
66
75
 
67
76
  rblf =
68
77
  RbLazyFrame.new_from_ndjson(
69
78
  source,
79
+ sources,
70
80
  infer_schema_length,
71
81
  batch_size,
72
82
  n_rows,