polars-df 0.14.0-arm64-darwin → 0.16.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE-THIRD-PARTY.txt +25665 -14861
  5. data/LICENSE.txt +1 -0
  6. data/README.md +38 -4
  7. data/lib/polars/3.2/polars.bundle +0 -0
  8. data/lib/polars/3.3/polars.bundle +0 -0
  9. data/lib/polars/{3.1 → 3.4}/polars.bundle +0 -0
  10. data/lib/polars/batched_csv_reader.rb +0 -2
  11. data/lib/polars/binary_expr.rb +133 -9
  12. data/lib/polars/binary_name_space.rb +101 -6
  13. data/lib/polars/config.rb +4 -0
  14. data/lib/polars/data_frame.rb +452 -101
  15. data/lib/polars/data_type_group.rb +28 -0
  16. data/lib/polars/data_types.rb +3 -1
  17. data/lib/polars/date_time_expr.rb +244 -0
  18. data/lib/polars/date_time_name_space.rb +87 -0
  19. data/lib/polars/expr.rb +103 -2
  20. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  21. data/lib/polars/functions/as_datatype.rb +51 -2
  22. data/lib/polars/functions/col.rb +1 -1
  23. data/lib/polars/functions/eager.rb +1 -3
  24. data/lib/polars/functions/lazy.rb +95 -13
  25. data/lib/polars/functions/range/time_range.rb +21 -21
  26. data/lib/polars/io/csv.rb +14 -16
  27. data/lib/polars/io/database.rb +2 -2
  28. data/lib/polars/io/delta.rb +126 -0
  29. data/lib/polars/io/ipc.rb +14 -4
  30. data/lib/polars/io/ndjson.rb +10 -0
  31. data/lib/polars/io/parquet.rb +168 -111
  32. data/lib/polars/lazy_frame.rb +684 -20
  33. data/lib/polars/list_name_space.rb +169 -0
  34. data/lib/polars/selectors.rb +1226 -0
  35. data/lib/polars/series.rb +465 -35
  36. data/lib/polars/string_cache.rb +27 -1
  37. data/lib/polars/string_expr.rb +0 -1
  38. data/lib/polars/string_name_space.rb +73 -3
  39. data/lib/polars/struct_name_space.rb +31 -7
  40. data/lib/polars/utils/various.rb +5 -1
  41. data/lib/polars/utils.rb +45 -10
  42. data/lib/polars/version.rb +1 -1
  43. data/lib/polars.rb +17 -1
  44. metadata +9 -8
  45. data/lib/polars/functions.rb +0 -57
@@ -86,8 +86,57 @@ module Polars
86
86
  # Concat the arrays in a Series dtype List in linear time.
87
87
  #
88
88
  # @return [Expr]
89
- def concat_list(exprs)
90
- exprs = Utils.parse_into_list_of_expressions(exprs)
89
+ #
90
+ # @example Concatenate two existing list columns. Null values are propagated.
91
+ # df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
92
+ # df.with_columns(concat_list: Polars.concat_list("a", "b"))
93
+ # # =>
94
+ # # shape: (3, 3)
95
+ # # ┌───────────┬───────────┬─────────────┐
96
+ # # │ a ┆ b ┆ concat_list │
97
+ # # │ --- ┆ --- ┆ --- │
98
+ # # │ list[i64] ┆ list[i64] ┆ list[i64] │
99
+ # # ╞═══════════╪═══════════╪═════════════╡
100
+ # # │ [1, 2] ┆ [4] ┆ [1, 2, 4] │
101
+ # # │ [3] ┆ [] ┆ [3] │
102
+ # # │ [4, 5] ┆ null ┆ null │
103
+ # # └───────────┴───────────┴─────────────┘
104
+ #
105
+ # @example Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.
106
+ # df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
107
+ # # =>
108
+ # # shape: (3, 2)
109
+ # # ┌───────────┬─────────────────┐
110
+ # # │ a ┆ concat_list │
111
+ # # │ --- ┆ --- │
112
+ # # │ list[i64] ┆ list[str] │
113
+ # # ╞═══════════╪═════════════════╡
114
+ # # │ [1, 2] ┆ ["1", "2", "x"] │
115
+ # # │ [3] ┆ ["3", "x"] │
116
+ # # │ [4, 5] ┆ ["4", "5", "x"] │
117
+ # # └───────────┴─────────────────┘
118
+ #
119
+ # @example Create lagged columns and collect them into a list. This mimics a rolling window.
120
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
121
+ # df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
122
+ # df.select(
123
+ # Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
124
+ # )
125
+ # # =>
126
+ # # shape: (5, 1)
127
+ # # ┌───────────────────┐
128
+ # # │ A_rolling │
129
+ # # │ --- │
130
+ # # │ list[f64] │
131
+ # # ╞═══════════════════╡
132
+ # # │ [null, null, 1.0] │
133
+ # # │ [null, 1.0, 2.0] │
134
+ # # │ [1.0, 2.0, 9.0] │
135
+ # # │ [2.0, 9.0, 2.0] │
136
+ # # │ [9.0, 2.0, 13.0] │
137
+ # # └───────────────────┘
138
+ def concat_list(exprs, *more_exprs)
139
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
91
140
  Utils.wrap_expr(Plr.concat_list(exprs))
92
141
  end
93
142
 
@@ -23,7 +23,7 @@ module Polars
23
23
  Utils.wrap_expr(Plr.col(name.to_s))
24
24
  elsif Utils.is_polars_dtype(name)
25
25
  Utils.wrap_expr(Plr.dtype_cols([name]))
26
- elsif name.is_a?(::Array)
26
+ elsif name.is_a?(::Array) || name.is_a?(::Set)
27
27
  names = Array(name)
28
28
  if names.empty?
29
29
  return Utils.wrap_expr(Plr.cols(names))
@@ -127,7 +127,7 @@ module Polars
127
127
  # af1, af2, af3 = Polars.align_frames(
128
128
  # df1, df2, df3, on: "dt", select: ["x", "y"]
129
129
  # )
130
- # (af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
130
+ # (af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
131
131
  # # =>
132
132
  # # shape: (3, 1)
133
133
  # # ┌───────┐
@@ -136,9 +136,7 @@ module Polars
136
136
  # # │ f64 │
137
137
  # # ╞═══════╡
138
138
  # # │ 0.0 │
139
- # # ├╌╌╌╌╌╌╌┤
140
139
  # # │ 167.5 │
141
- # # ├╌╌╌╌╌╌╌┤
142
140
  # # │ 47.0 │
143
141
  # # └───────┘
144
142
  def align_frames(
@@ -729,16 +729,20 @@ module Polars
729
729
  a,
730
730
  b,
731
731
  method: "pearson",
732
- ddof: 1,
732
+ ddof: nil,
733
733
  propagate_nans: false
734
734
  )
735
+ if !ddof.nil?
736
+ warn "The `ddof` parameter has no effect. Do not use it."
737
+ end
738
+
735
739
  a = Utils.parse_into_expression(a)
736
740
  b = Utils.parse_into_expression(b)
737
741
 
738
742
  if method == "pearson"
739
- Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
743
+ Utils.wrap_expr(Plr.pearson_corr(a, b))
740
744
  elsif method == "spearman"
741
- Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
745
+ Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
742
746
  else
743
747
  msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
744
748
  raise ArgumentError, msg
@@ -824,6 +828,29 @@ module Polars
824
828
  # @note
825
829
  # If you simply want the first encountered expression as accumulator,
826
830
  # consider using `cumreduce`.
831
+ #
832
+ # @example
833
+ # df = Polars::DataFrame.new(
834
+ # {
835
+ # "a" => [1, 2, 3],
836
+ # "b" => [3, 4, 5],
837
+ # "c" => [5, 6, 7]
838
+ # }
839
+ # )
840
+ # df.with_columns(
841
+ # Polars.cum_fold(Polars.lit(1), ->(acc, x) { acc + x }, Polars.all)
842
+ # )
843
+ # # =>
844
+ # # shape: (3, 4)
845
+ # # ┌─────┬─────┬─────┬───────────┐
846
+ # # │ a ┆ b ┆ c ┆ cum_fold │
847
+ # # │ --- ┆ --- ┆ --- ┆ --- │
848
+ # # │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
849
+ # # ╞═════╪═════╪═════╪═══════════╡
850
+ # # │ 1 ┆ 3 ┆ 5 ┆ {2,5,10} │
851
+ # # │ 2 ┆ 4 ┆ 6 ┆ {3,7,13} │
852
+ # # │ 3 ┆ 5 ┆ 7 ┆ {4,9,16} │
853
+ # # └─────┴─────┴─────┴───────────┘
827
854
  def cum_fold(acc, f, exprs, include_init: false)
828
855
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
829
856
  if exprs.is_a?(Expr)
@@ -831,7 +858,7 @@ module Polars
831
858
  end
832
859
 
833
860
  exprs = Utils.parse_into_list_of_expressions(exprs)
834
- Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init))
861
+ Utils.wrap_expr(Plr.cum_fold(acc, f, exprs, include_init)._alias("cum_fold"))
835
862
  end
836
863
  alias_method :cumfold, :cum_fold
837
864
 
@@ -1024,15 +1051,70 @@ module Polars
1024
1051
  # Default is ascending.
1025
1052
  #
1026
1053
  # @return [Expr]
1027
- def arg_sort_by(exprs, reverse: false)
1028
- if !exprs.is_a?(::Array)
1029
- exprs = [exprs]
1030
- end
1031
- if reverse == true || reverse == false
1032
- reverse = [reverse] * exprs.length
1033
- end
1034
- exprs = Utils.parse_into_list_of_expressions(exprs)
1035
- Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
1054
+ #
1055
+ # @example Pass a single column name to compute the arg sort by that column.
1056
+ # df = Polars::DataFrame.new(
1057
+ # {
1058
+ # "a" => [0, 1, 1, 0],
1059
+ # "b" => [3, 2, 3, 2],
1060
+ # "c" => [1, 2, 3, 4]
1061
+ # }
1062
+ # )
1063
+ # df.select(Polars.arg_sort_by("a"))
1064
+ # # =>
1065
+ # # shape: (4, 1)
1066
+ # # ┌─────┐
1067
+ # # │ a │
1068
+ # # │ --- │
1069
+ # # │ u32 │
1070
+ # # ╞═════╡
1071
+ # # │ 0 │
1072
+ # # │ 3 │
1073
+ # # │ 1 │
1074
+ # # │ 2 │
1075
+ # # └─────┘
1076
+ #
1077
+ # @example Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.
1078
+ # df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
1079
+ # # =>
1080
+ # # shape: (4, 1)
1081
+ # # ┌─────┐
1082
+ # # │ a │
1083
+ # # │ --- │
1084
+ # # │ u32 │
1085
+ # # ╞═════╡
1086
+ # # │ 2 │
1087
+ # # │ 1 │
1088
+ # # │ 0 │
1089
+ # # │ 3 │
1090
+ # # └─────┘
1091
+ #
1092
+ # @example Use gather to apply the arg sort to other columns.
1093
+ # df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
1094
+ # # =>
1095
+ # # shape: (4, 1)
1096
+ # # ┌─────┐
1097
+ # # │ c │
1098
+ # # │ --- │
1099
+ # # │ i64 │
1100
+ # # ╞═════╡
1101
+ # # │ 1 │
1102
+ # # │ 4 │
1103
+ # # │ 2 │
1104
+ # # │ 3 │
1105
+ # # └─────┘
1106
+ def arg_sort_by(
1107
+ exprs,
1108
+ *more_exprs,
1109
+ reverse: false,
1110
+ nulls_last: false,
1111
+ multithreaded: true,
1112
+ maintain_order: false
1113
+ )
1114
+ exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
1115
+ reverse = Utils.extend_bool(reverse, exprs.length, "reverse", "exprs")
1116
+ nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
1117
+ Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse, nulls_last, multithreaded, maintain_order))
1036
1118
  end
1037
1119
  alias_method :argsort_by, :arg_sort_by
1038
1120
 
@@ -18,7 +18,7 @@ module Polars
18
18
  #
19
19
  # @example
20
20
  # Polars.time_range(
21
- # time(14, 0),
21
+ # Time.utc(2000, 1, 1, 14, 0),
22
22
  # nil,
23
23
  # "3h15m",
24
24
  # eager: true
@@ -48,12 +48,12 @@ module Polars
48
48
  end
49
49
 
50
50
  if start.nil?
51
- # start = time(0, 0, 0)
52
- raise Todo
51
+ # date part is ignored
52
+ start = ::Time.utc(2000, 1, 1, 0, 0, 0)
53
53
  end
54
54
  if stop.nil?
55
- # stop = time(23, 59, 59, 999999)
56
- raise Todo
55
+ # date part is ignored
56
+ stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
57
57
  end
58
58
 
59
59
  start_rbexpr = Utils.parse_into_expression(start)
@@ -87,21 +87,21 @@ module Polars
87
87
  # @example
88
88
  # df = Polars::DataFrame.new(
89
89
  # {
90
- # "start" => [time(9, 0), time(10, 0)],
91
- # "end" => time(11, 0)
90
+ # "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
91
+ # "end" => Time.utc(2000, 1, 1, 11, 0)
92
92
  # }
93
93
  # )
94
- # df.with_columns(time_range: Polars.time_ranges("start", "end"))
94
+ # df.select(time_range: Polars.time_ranges("start", "end"))
95
95
  # # =>
96
- # # shape: (2, 3)
97
- # # ┌──────────┬──────────┬────────────────────────────────┐
98
- # # │ start ┆ end ┆ time_range │
99
- # # │ --- ┆ --- ┆ ---
100
- # # │ time ┆ time ┆ list[time] │
101
- # # ╞══════════╪══════════╪════════════════════════════════╡
102
- # # │ 09:00:00 ┆ 11:00:00 ┆ [09:00:00, 10:00:00, 11:00:00] │
103
- # # │ 10:00:00 ┆ 11:00:00 ┆ [10:00:00, 11:00:00] │
104
- # # └──────────┴──────────┴────────────────────────────────┘
96
+ # # shape: (2, 1)
97
+ # # ┌────────────────────────────────┐
98
+ # # │ time_range │
99
+ # # │ --- │
100
+ # # │ list[time] │
101
+ # # ╞════════════════════════════════╡
102
+ # # │ [09:00:00, 10:00:00, 11:00:00] │
103
+ # # │ [10:00:00, 11:00:00] │
104
+ # # └────────────────────────────────┘
105
105
  def time_ranges(
106
106
  start = nil,
107
107
  stop = nil,
@@ -118,12 +118,12 @@ module Polars
118
118
  end
119
119
 
120
120
  if start.nil?
121
- # start = time(0, 0, 0)
122
- raise Todo
121
+ # date part is ignored
122
+ start = ::Time.utc(2000, 1, 1, 0, 0, 0)
123
123
  end
124
124
  if stop.nil?
125
- # stop = time(23, 59, 59, 999999)
126
- raise Todo
125
+ # date part is ignored
126
+ stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
127
127
  end
128
128
 
129
129
  start_rbexpr = Utils.parse_into_expression(start)
data/lib/polars/io/csv.rb CHANGED
@@ -75,9 +75,6 @@ module Polars
75
75
  # the DataFrame.
76
76
  # @param row_count_offset [Integer]
77
77
  # Offset to start the row_count column (only used if the name is set).
78
- # @param sample_size [Integer]
79
- # Set the sample size. This is used to sample statistics to estimate the
80
- # allocation needed.
81
78
  # @param eol_char [String]
82
79
  # Single byte end of line character.
83
80
  # @param truncate_ragged_lines [Boolean]
@@ -114,7 +111,6 @@ module Polars
114
111
  skip_rows_after_header: 0,
115
112
  row_count_name: nil,
116
113
  row_count_offset: 0,
117
- sample_size: 1024,
118
114
  eol_char: "\n",
119
115
  truncate_ragged_lines: false
120
116
  )
@@ -163,7 +159,6 @@ module Polars
163
159
  skip_rows_after_header: skip_rows_after_header,
164
160
  row_count_name: row_count_name,
165
161
  row_count_offset: row_count_offset,
166
- sample_size: sample_size,
167
162
  eol_char: eol_char,
168
163
  truncate_ragged_lines: truncate_ragged_lines
169
164
  )
@@ -201,7 +196,6 @@ module Polars
201
196
  skip_rows_after_header: 0,
202
197
  row_count_name: nil,
203
198
  row_count_offset: 0,
204
- sample_size: 1024,
205
199
  eol_char: "\n",
206
200
  raise_if_empty: true,
207
201
  truncate_ragged_lines: false,
@@ -305,7 +299,6 @@ module Polars
305
299
  parse_dates,
306
300
  skip_rows_after_header,
307
301
  Utils.parse_row_index_args(row_count_name, row_count_offset),
308
- sample_size,
309
302
  eol_char,
310
303
  raise_if_empty,
311
304
  truncate_ragged_lines,
@@ -392,9 +385,6 @@ module Polars
392
385
  # the DataFrame.
393
386
  # @param row_count_offset [Integer]
394
387
  # Offset to start the row_count column (only used if the name is set).
395
- # @param sample_size [Integer]
396
- # Set the sample size. This is used to sample statistics to estimate the
397
- # allocation needed.
398
388
  # @param eol_char [String]
399
389
  # Single byte end of line character.
400
390
  # @param truncate_ragged_lines [Boolean]
@@ -431,7 +421,6 @@ module Polars
431
421
  skip_rows_after_header: 0,
432
422
  row_count_name: nil,
433
423
  row_count_offset: 0,
434
- sample_size: 1024,
435
424
  eol_char: "\n",
436
425
  raise_if_empty: true,
437
426
  truncate_ragged_lines: false,
@@ -474,7 +463,6 @@ module Polars
474
463
  skip_rows_after_header: skip_rows_after_header,
475
464
  row_count_name: row_count_name,
476
465
  row_count_offset: row_count_offset,
477
- sample_size: sample_size,
478
466
  eol_char: eol_char,
479
467
  new_columns: new_columns,
480
468
  raise_if_empty: raise_if_empty,
@@ -618,7 +606,7 @@ module Polars
618
606
 
619
607
  # @private
620
608
  def _scan_csv_impl(
621
- file,
609
+ source,
622
610
  has_header: true,
623
611
  sep: ",",
624
612
  comment_char: nil,
@@ -650,9 +638,16 @@ module Polars
650
638
  end
651
639
  processed_null_values = Utils._process_null_values(null_values)
652
640
 
641
+ if source.is_a?(::Array)
642
+ sources = source
643
+ source = nil
644
+ else
645
+ sources = []
646
+ end
647
+
653
648
  rblf =
654
649
  RbLazyFrame.new_from_csv(
655
- file,
650
+ source,
656
651
  sep,
657
652
  has_header,
658
653
  ignore_errors,
@@ -672,7 +667,8 @@ module Polars
672
667
  Utils.parse_row_index_args(row_count_name, row_count_offset),
673
668
  parse_dates,
674
669
  eol_char,
675
- truncate_ragged_lines
670
+ truncate_ragged_lines,
671
+ sources
676
672
  )
677
673
  Utils.wrap_ldf(rblf)
678
674
  end
@@ -681,7 +677,9 @@ module Polars
681
677
 
682
678
  def _prepare_file_arg(file)
683
679
  if file.is_a?(::String) && file =~ /\Ahttps?:\/\//
684
- raise ArgumentError, "use URI(...) for remote files"
680
+ require "uri"
681
+
682
+ file = URI(file)
685
683
  end
686
684
 
687
685
  if defined?(URI) && file.is_a?(URI)
@@ -18,9 +18,9 @@ module Polars
18
18
  if query.is_a?(ActiveRecord::Result)
19
19
  query
20
20
  elsif query.is_a?(ActiveRecord::Relation)
21
- query.connection.select_all(query.to_sql)
21
+ query.connection_pool.with_connection { |c| c.select_all(query.to_sql) }
22
22
  elsif query.is_a?(::String)
23
- ActiveRecord::Base.connection.select_all(query)
23
+ ActiveRecord::Base.connection_pool.with_connection { |c| c.select_all(query) }
24
24
  else
25
25
  raise ArgumentError, "Expected ActiveRecord::Relation, ActiveRecord::Result, or String"
26
26
  end
@@ -0,0 +1,126 @@
1
+ module Polars
2
+ module IO
3
+ # Reads into a DataFrame from a Delta lake table.
4
+ #
5
+ # @param source [Object]
6
+ # DeltaTable or a Path or URI to the root of the Delta lake table.
7
+ # @param version [Object]
8
+ # Numerical version or timestamp version of the Delta lake table.
9
+ # @param columns [Array]
10
+ # Columns to select. Accepts a list of column names.
11
+ # @param rechunk [Boolean]
12
+ # Make sure that all columns are contiguous in memory by
13
+ # aggregating the chunks into a single array.
14
+ # @param storage_options [Hash]
15
+ # Extra options for the storage backends supported by `deltalake-rb`.
16
+ # @param delta_table_options [Hash]
17
+ # Additional keyword arguments while reading a Delta lake Table.
18
+ #
19
+ # @return [DataFrame]
20
+ def read_delta(
21
+ source,
22
+ version: nil,
23
+ columns: nil,
24
+ rechunk: false,
25
+ storage_options: nil,
26
+ delta_table_options: nil
27
+ )
28
+ dl_tbl =
29
+ _get_delta_lake_table(
30
+ source,
31
+ version: version,
32
+ storage_options: storage_options,
33
+ delta_table_options: delta_table_options
34
+ )
35
+
36
+ dl_tbl.to_polars(columns: columns, rechunk: rechunk)
37
+ end
38
+
39
+ # Lazily read from a Delta lake table.
40
+ #
41
+ # @param source [Object]
42
+ # DeltaTable or a Path or URI to the root of the Delta lake table.
43
+ # @param version [Object]
44
+ # Numerical version or timestamp version of the Delta lake table.
45
+ # @param storage_options [Hash]
46
+ # Extra options for the storage backends supported by `deltalake-rb`.
47
+ # @param delta_table_options [Hash]
48
+ # Additional keyword arguments while reading a Delta lake Table.
49
+ #
50
+ # @return [LazyFrame]
51
+ def scan_delta(
52
+ source,
53
+ version: nil,
54
+ storage_options: nil,
55
+ delta_table_options: nil
56
+ )
57
+ dl_tbl =
58
+ _get_delta_lake_table(
59
+ source,
60
+ version: version,
61
+ storage_options: storage_options,
62
+ delta_table_options: delta_table_options
63
+ )
64
+
65
+ dl_tbl.to_polars(eager: false)
66
+ end
67
+
68
+ private
69
+
70
+ def _resolve_delta_lake_uri(table_uri, strict: true)
71
+ require "uri"
72
+
73
+ parsed_result = URI(table_uri)
74
+
75
+ resolved_uri =
76
+ if parsed_result.scheme == ""
77
+ Utils.normalize_filepath(table_uri)
78
+ else
79
+ table_uri
80
+ end
81
+
82
+ resolved_uri
83
+ end
84
+
85
+ def _get_delta_lake_table(
86
+ table_path,
87
+ version: nil,
88
+ storage_options: nil,
89
+ delta_table_options: nil
90
+ )
91
+ _check_if_delta_available
92
+
93
+ if table_path.is_a?(DeltaLake::Table)
94
+ return table_path
95
+ end
96
+ delta_table_options ||= {}
97
+ resolved_uri = _resolve_delta_lake_uri(table_path)
98
+ if !version.is_a?(::String) && !version.is_a?(::Time)
99
+ dl_tbl =
100
+ DeltaLake::Table.new(
101
+ resolved_uri,
102
+ version: version,
103
+ storage_options: storage_options,
104
+ **delta_table_options
105
+ )
106
+ else
107
+ dl_tbl =
108
+ DeltaLake::Table.new(
109
+ resolved_uri,
110
+ storage_options: storage_options,
111
+ **delta_table_options
112
+ )
113
+ dl_tbl.load_as_version(version)
114
+ end
115
+
116
+ dl_tbl = DeltaLake::Table.new(table_path)
117
+ dl_tbl
118
+ end
119
+
120
+ def _check_if_delta_available
121
+ if !defined?(DeltaLake)
122
+ raise Error, "Delta Lake not available"
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/polars/io/ipc.rb CHANGED
@@ -233,7 +233,7 @@ module Polars
233
233
 
234
234
  # @private
235
235
  def _scan_ipc_impl(
236
- file,
236
+ source,
237
237
  n_rows: nil,
238
238
  cache: true,
239
239
  rechunk: true,
@@ -245,13 +245,23 @@ module Polars
245
245
  try_parse_hive_dates: true,
246
246
  include_file_paths: nil
247
247
  )
248
- if Utils.pathlike?(file)
249
- file = Utils.normalize_filepath(file)
248
+ sources = []
249
+ if Utils.pathlike?(source)
250
+ source = Utils.normalize_filepath(source)
251
+ elsif source.is_a?(::Array)
252
+ if Utils.is_path_or_str_sequence(source)
253
+ sources = source.map { |s| Utils.normalize_filepath(s) }
254
+ else
255
+ sources = source
256
+ end
257
+
258
+ source = nil
250
259
  end
251
260
 
252
261
  rblf =
253
262
  RbLazyFrame.new_from_ipc(
254
- file,
263
+ source,
264
+ sources,
255
265
  n_rows,
256
266
  cache,
257
267
  rechunk,
@@ -60,13 +60,23 @@ module Polars
60
60
  row_count_name: nil,
61
61
  row_count_offset: 0
62
62
  )
63
+ sources = []
63
64
  if Utils.pathlike?(source)
64
65
  source = Utils.normalize_filepath(source)
66
+ elsif source.is_a?(::Array)
67
+ if Utils.is_path_or_str_sequence(source)
68
+ sources = source.map { |s| Utils.normalize_filepath(s) }
69
+ else
70
+ sources = source
71
+ end
72
+
73
+ source = nil
65
74
  end
66
75
 
67
76
  rblf =
68
77
  RbLazyFrame.new_from_ndjson(
69
78
  source,
79
+ sources,
70
80
  infer_schema_length,
71
81
  batch_size,
72
82
  n_rows,