polars-df 0.19.0-x64-mingw-ucrt → 0.21.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE-THIRD-PARTY.txt +1376 -2634
- data/LICENSE.txt +1 -1
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/3.4/polars.so +0 -0
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +10 -2
@@ -458,7 +458,7 @@ module Polars
|
|
458
458
|
# # └─────┴─────┘
|
459
459
|
def first(*columns)
|
460
460
|
if columns.empty?
|
461
|
-
return
|
461
|
+
return cs.first.as_expr
|
462
462
|
end
|
463
463
|
|
464
464
|
col(*columns).first
|
@@ -518,7 +518,7 @@ module Polars
|
|
518
518
|
# # └─────┴─────┘
|
519
519
|
def last(*columns)
|
520
520
|
if columns.empty?
|
521
|
-
return
|
521
|
+
return cs.last.as_expr
|
522
522
|
end
|
523
523
|
|
524
524
|
col(*columns).last
|
@@ -565,12 +565,8 @@ module Polars
|
|
565
565
|
# # │ bar ┆ 8 │
|
566
566
|
# # │ baz ┆ 3 │
|
567
567
|
# # └─────┴─────┘
|
568
|
-
def nth(*indices)
|
569
|
-
|
570
|
-
indices = indices[0]
|
571
|
-
end
|
572
|
-
|
573
|
-
Utils.wrap_expr(Plr.index_cols(indices))
|
568
|
+
def nth(*indices, strict: true)
|
569
|
+
cs.by_index(*indices, require_all: strict).as_expr
|
574
570
|
end
|
575
571
|
|
576
572
|
# Get the first `n` rows.
|
@@ -675,12 +671,12 @@ module Polars
|
|
675
671
|
# Column name or Expression.
|
676
672
|
# @param b [Object]
|
677
673
|
# Column name or Expression.
|
674
|
+
# @param method ["pearson", "spearman"]
|
675
|
+
# Correlation method.
|
678
676
|
# @param ddof [Integer]
|
679
677
|
# "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof,
|
680
678
|
# where N represents the number of elements.
|
681
679
|
# By default ddof is 1.
|
682
|
-
# @param method ["pearson", "spearman"]
|
683
|
-
# Correlation method.
|
684
680
|
# @param propagate_nans [Boolean]
|
685
681
|
# If `true` any `NaN` encountered will lead to `NaN` in the output.
|
686
682
|
# Defaults to `False` where `NaN` are regarded as larger than any finite number
|
@@ -795,14 +791,82 @@ module Polars
|
|
795
791
|
# Accumulate over multiple columns horizontally/row wise with a left fold.
|
796
792
|
#
|
797
793
|
# @return [Expr]
|
798
|
-
|
794
|
+
#
|
795
|
+
# @example Horizontally sum over all columns and add 1.
|
796
|
+
# df = Polars::DataFrame.new(
|
797
|
+
# {
|
798
|
+
# "a" => [1, 2, 3],
|
799
|
+
# "b" => [3, 4, 5],
|
800
|
+
# "c" => [5, 6, 7]
|
801
|
+
# }
|
802
|
+
# )
|
803
|
+
# df.select(
|
804
|
+
# Polars.fold(
|
805
|
+
# Polars.lit(1), ->(acc, x) { acc + x }, Polars.col("*")
|
806
|
+
# ).alias("sum")
|
807
|
+
# )
|
808
|
+
# # =>
|
809
|
+
# # shape: (3, 1)
|
810
|
+
# # ┌─────┐
|
811
|
+
# # │ sum │
|
812
|
+
# # │ --- │
|
813
|
+
# # │ i64 │
|
814
|
+
# # ╞═════╡
|
815
|
+
# # │ 10 │
|
816
|
+
# # │ 13 │
|
817
|
+
# # │ 16 │
|
818
|
+
# # └─────┘
|
819
|
+
#
|
820
|
+
# @example You can also apply a condition/predicate on all columns:
|
821
|
+
# df = Polars::DataFrame.new(
|
822
|
+
# {
|
823
|
+
# "a" => [1, 2, 3],
|
824
|
+
# "b" => [0, 1, 2]
|
825
|
+
# }
|
826
|
+
# )
|
827
|
+
# df.filter(
|
828
|
+
# Polars.fold(
|
829
|
+
# Polars.lit(true),
|
830
|
+
# ->(acc, x) { acc & x },
|
831
|
+
# Polars.col("*") > 1
|
832
|
+
# )
|
833
|
+
# )
|
834
|
+
# # =>
|
835
|
+
# # shape: (1, 2)
|
836
|
+
# # ┌─────┬─────┐
|
837
|
+
# # │ a ┆ b │
|
838
|
+
# # │ --- ┆ --- │
|
839
|
+
# # │ i64 ┆ i64 │
|
840
|
+
# # ╞═════╪═════╡
|
841
|
+
# # │ 3 ┆ 2 │
|
842
|
+
# # └─────┴─────┘
|
843
|
+
def fold(
|
844
|
+
acc,
|
845
|
+
function,
|
846
|
+
exprs,
|
847
|
+
returns_scalar: false,
|
848
|
+
return_dtype: nil
|
849
|
+
)
|
799
850
|
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
800
851
|
if exprs.is_a?(Expr)
|
801
852
|
exprs = [exprs]
|
802
853
|
end
|
803
854
|
|
855
|
+
rt = nil
|
856
|
+
if !return_dtype.nil?
|
857
|
+
rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
|
858
|
+
end
|
859
|
+
|
804
860
|
exprs = Utils.parse_into_list_of_expressions(exprs)
|
805
|
-
Utils.wrap_expr(
|
861
|
+
Utils.wrap_expr(
|
862
|
+
Plr.fold(
|
863
|
+
acc,
|
864
|
+
function,
|
865
|
+
exprs,
|
866
|
+
returns_scalar,
|
867
|
+
rt
|
868
|
+
)
|
869
|
+
)
|
806
870
|
end
|
807
871
|
|
808
872
|
# def reduce
|
@@ -815,11 +879,17 @@ module Polars
|
|
815
879
|
# @param acc [Object]
|
816
880
|
# Accumulator Expression. This is the value that will be initialized when the fold
|
817
881
|
# starts. For a sum this could for instance be lit(0).
|
818
|
-
# @param
|
882
|
+
# @param function [Object]
|
819
883
|
# Function to apply over the accumulator and the value.
|
820
884
|
# Fn(acc, value) -> new_value
|
821
885
|
# @param exprs [Object]
|
822
886
|
# Expressions to aggregate over. May also be a wildcard expression.
|
887
|
+
# @param returns_scalar [Boolean]
|
888
|
+
# Whether or not `function` applied returns a scalar. This must be set correctly
|
889
|
+
# by the user.
|
890
|
+
# @param return_dtype [Object]
|
891
|
+
# Output datatype.
|
892
|
+
# If not set, the dtype will be inferred based on the dtype of the accumulator.
|
823
893
|
# @param include_init [Boolean]
|
824
894
|
# Include the initial accumulator state as struct field.
|
825
895
|
#
|
@@ -851,14 +921,35 @@ module Polars
|
|
851
921
|
# # │ 2 ┆ 4 ┆ 6 ┆ {3,7,13} │
|
852
922
|
# # │ 3 ┆ 5 ┆ 7 ┆ {4,9,16} │
|
853
923
|
# # └─────┴─────┴─────┴───────────┘
|
854
|
-
def cum_fold(
|
924
|
+
def cum_fold(
|
925
|
+
acc,
|
926
|
+
function,
|
927
|
+
exprs,
|
928
|
+
returns_scalar: false,
|
929
|
+
return_dtype: nil,
|
930
|
+
include_init: false
|
931
|
+
)
|
855
932
|
acc = Utils.parse_into_expression(acc, str_as_lit: true)
|
856
933
|
if exprs.is_a?(Expr)
|
857
934
|
exprs = [exprs]
|
858
935
|
end
|
859
936
|
|
937
|
+
rt = nil
|
938
|
+
if !return_dtype.nil?
|
939
|
+
rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
|
940
|
+
end
|
941
|
+
|
860
942
|
exprs = Utils.parse_into_list_of_expressions(exprs)
|
861
|
-
Utils.wrap_expr(
|
943
|
+
Utils.wrap_expr(
|
944
|
+
Plr.cum_fold(
|
945
|
+
acc,
|
946
|
+
function,
|
947
|
+
exprs,
|
948
|
+
returns_scalar,
|
949
|
+
rt,
|
950
|
+
include_init
|
951
|
+
)._alias("cum_fold")
|
952
|
+
)
|
862
953
|
end
|
863
954
|
alias_method :cumfold, :cum_fold
|
864
955
|
|
@@ -1047,8 +1138,16 @@ module Polars
|
|
1047
1138
|
#
|
1048
1139
|
# @param exprs [Object]
|
1049
1140
|
# Columns use to determine the ordering.
|
1141
|
+
# @param more_exprs [Array]
|
1142
|
+
# Additional columns to arg sort by, specified as positional arguments.
|
1050
1143
|
# @param reverse [Boolean]
|
1051
1144
|
# Default is ascending.
|
1145
|
+
# @param nulls_last [Boolean]
|
1146
|
+
# Place null values last.
|
1147
|
+
# @param multithreaded [Boolean]
|
1148
|
+
# Sort using multiple threads.
|
1149
|
+
# @param maintain_order [Boolean]
|
1150
|
+
# Whether the order should be maintained if elements are equal.
|
1052
1151
|
#
|
1053
1152
|
# @return [Expr]
|
1054
1153
|
#
|
@@ -6,6 +6,10 @@ module Polars
|
|
6
6
|
# Value to repeat.
|
7
7
|
# @param n [Integer]
|
8
8
|
# Repeat `n` times.
|
9
|
+
# @param dtype [Object]
|
10
|
+
# Data type of the resulting column. If set to `nil` (default), data type is
|
11
|
+
# inferred from the given value. Defaults to Int32 for integer values, unless
|
12
|
+
# Int64 is required to fit the given value. Defaults to Float64 for float values.
|
9
13
|
# @param eager [Boolean]
|
10
14
|
# Run eagerly and collect into a `Series`.
|
11
15
|
# @param name [String]
|
data/lib/polars/io/csv.rb
CHANGED
@@ -347,6 +347,9 @@ module Polars
|
|
347
347
|
# - `String`: All values equal to this string will be null.
|
348
348
|
# - `Array`: All values equal to any string in this array will be null.
|
349
349
|
# - `Hash`: A hash that maps column name to a null value string.
|
350
|
+
# @param missing_utf8_is_empty_string [Boolean]
|
351
|
+
# By default a missing value is considered to be null; if you would prefer missing
|
352
|
+
# utf8 values to be treated as the empty string you can set this param true.
|
350
353
|
# @param ignore_errors [Boolean]
|
351
354
|
# Try to keep reading lines if some lines yield errors.
|
352
355
|
# First try `infer_schema_length: 0` to read all columns as
|
@@ -387,8 +390,13 @@ module Polars
|
|
387
390
|
# Offset to start the row_count column (only used if the name is set).
|
388
391
|
# @param eol_char [String]
|
389
392
|
# Single byte end of line character.
|
393
|
+
# @param raise_if_empty [Boolean]
|
394
|
+
# When there is no data in the source,`NoDataError` is raised. If this parameter
|
395
|
+
# is set to false, `nil` will be returned from `next_batches(n)` instead.
|
390
396
|
# @param truncate_ragged_lines [Boolean]
|
391
397
|
# Truncate lines that are longer than the schema.
|
398
|
+
# @param decimal_comma [Boolean]
|
399
|
+
# Parse floats using a comma as the decimal separator instead of a period.
|
392
400
|
#
|
393
401
|
# @return [BatchedCsvReader]
|
394
402
|
#
|
@@ -503,6 +511,9 @@ module Polars
|
|
503
511
|
# - `String`: All values equal to this string will be null.
|
504
512
|
# - `Array`: All values equal to any string in this array will be null.
|
505
513
|
# - `Hash`: A hash that maps column name to a null value string.
|
514
|
+
# @param missing_utf8_is_empty_string [Boolean]
|
515
|
+
# By default a missing value is considered to be null; if you would prefer missing
|
516
|
+
# utf8 values to be treated as the empty string you can set this param true.
|
506
517
|
# @param ignore_errors [Boolean]
|
507
518
|
# Try to keep reading lines if some lines yield errors.
|
508
519
|
# First try `infer_schema_length: 0` to read all columns as
|
@@ -538,8 +549,15 @@ module Polars
|
|
538
549
|
# the column remains of data type `:str`.
|
539
550
|
# @param eol_char [String]
|
540
551
|
# Single byte end of line character.
|
552
|
+
# @param raise_if_empty [Boolean]
|
553
|
+
# When there is no data in the source, `NoDataError` is raised. If this parameter
|
554
|
+
# is set to false, an empty LazyFrame (with no columns) is returned instead.
|
541
555
|
# @param truncate_ragged_lines [Boolean]
|
542
556
|
# Truncate lines that are longer than the schema.
|
557
|
+
# @param decimal_comma [Boolean]
|
558
|
+
# Parse floats using a comma as the decimal separator instead of a period.
|
559
|
+
# @param glob [Boolean]
|
560
|
+
# Expand path given via globbing rules.
|
543
561
|
#
|
544
562
|
# @return [LazyFrame]
|
545
563
|
def scan_csv(
|
data/lib/polars/io/json.rb
CHANGED
@@ -4,6 +4,22 @@ module Polars
|
|
4
4
|
#
|
5
5
|
# @param source [Object]
|
6
6
|
# Path to a file or a file-like object.
|
7
|
+
# @param schema [Object]
|
8
|
+
# The DataFrame schema may be declared in several ways:
|
9
|
+
#
|
10
|
+
# * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
|
11
|
+
# * As a list of column names; in this case types are automatically inferred.
|
12
|
+
# * As a list of (name,type) pairs; this is equivalent to the dictionary form.
|
13
|
+
#
|
14
|
+
# If you supply a list of column names that does not match the names in the
|
15
|
+
# underlying data, the names given here will overwrite them. The number
|
16
|
+
# of names given in the schema should match the underlying data dimensions.
|
17
|
+
# @param schema_overrides [Hash]
|
18
|
+
# Support type specification or override of one or more columns; note that
|
19
|
+
# any dtypes inferred from the schema param will be overridden.
|
20
|
+
# @param infer_schema_length [Integer]
|
21
|
+
# The maximum number of rows to scan for schema inference.
|
22
|
+
# If set to `nil`, the full data may be scanned *(this is slow)*.
|
7
23
|
#
|
8
24
|
# @return [DataFrame]
|
9
25
|
def read_json(
|
data/lib/polars/io/ndjson.rb
CHANGED
@@ -4,6 +4,19 @@ module Polars
|
|
4
4
|
#
|
5
5
|
# @param source [Object]
|
6
6
|
# Path to a file or a file-like object.
|
7
|
+
# @param schema [Object]
|
8
|
+
# The DataFrame schema may be declared in several ways:
|
9
|
+
#
|
10
|
+
# * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
|
11
|
+
# * As a list of column names; in this case types are automatically inferred.
|
12
|
+
# * As a list of (name,type) pairs; this is equivalent to the dictionary form.
|
13
|
+
#
|
14
|
+
# If you supply a list of column names that does not match the names in the
|
15
|
+
# underlying data, the names given here will overwrite them. The number
|
16
|
+
# of names given in the schema should match the underlying data dimensions.
|
17
|
+
# @param schema_overrides [Hash]
|
18
|
+
# Support type specification or override of one or more columns; note that
|
19
|
+
# any dtypes inferred from the schema param will be overridden.
|
7
20
|
#
|
8
21
|
# @return [DataFrame]
|
9
22
|
def read_ndjson(
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -49,6 +49,12 @@ module Polars
|
|
49
49
|
# Number of retries if accessing a cloud instance fails.
|
50
50
|
# @param include_file_paths [String]
|
51
51
|
# Include the path of the source file(s) as a column with this name.
|
52
|
+
# @param allow_missing_columns [Boolean]
|
53
|
+
# When reading a list of parquet files, if a column existing in the first
|
54
|
+
# file cannot be found in subsequent files, the default behavior is to
|
55
|
+
# raise an error. However, if `allow_missing_columns` is set to
|
56
|
+
# `true`, a full-NULL column is returned instead of erroring for the files
|
57
|
+
# that do not contain the column.
|
52
58
|
#
|
53
59
|
# @return [DataFrame]
|
54
60
|
def read_parquet(
|
@@ -171,6 +177,17 @@ module Polars
|
|
171
177
|
# Number of retries if accessing a cloud instance fails.
|
172
178
|
# @param include_file_paths [String]
|
173
179
|
# Include the path of the source file(s) as a column with this name.
|
180
|
+
# @param allow_missing_columns [Boolean]
|
181
|
+
# When reading a list of parquet files, if a column existing in the first
|
182
|
+
# file cannot be found in subsequent files, the default behavior is to
|
183
|
+
# raise an error. However, if `allow_missing_columns` is set to
|
184
|
+
# `true`, a full-NULL column is returned instead of erroring for the files
|
185
|
+
# that do not contain the column.
|
186
|
+
# @param extra_columns ['ignore', 'raise']
|
187
|
+
# Configuration for behavior when extra columns outside of the
|
188
|
+
# defined schema are encountered in the data:
|
189
|
+
# * `ignore`: Silently ignores.
|
190
|
+
# * `raise`: Raises an error.
|
174
191
|
#
|
175
192
|
# @return [LazyFrame]
|
176
193
|
def scan_parquet(
|
@@ -192,8 +209,11 @@ module Polars
|
|
192
209
|
credential_provider: nil,
|
193
210
|
retries: 2,
|
194
211
|
include_file_paths: nil,
|
195
|
-
allow_missing_columns: false
|
212
|
+
allow_missing_columns: false,
|
213
|
+
extra_columns: "raise"
|
196
214
|
)
|
215
|
+
missing_columns = allow_missing_columns ? "insert" : "raise"
|
216
|
+
|
197
217
|
if Utils.pathlike?(source)
|
198
218
|
source = Utils.normalize_filepath(source, check_not_directory: false)
|
199
219
|
elsif Utils.is_path_or_str_sequence(source)
|
@@ -204,56 +224,11 @@ module Polars
|
|
204
224
|
raise Todo
|
205
225
|
end
|
206
226
|
|
207
|
-
_scan_parquet_impl(
|
208
|
-
source,
|
209
|
-
n_rows: n_rows,
|
210
|
-
cache: cache,
|
211
|
-
parallel: parallel,
|
212
|
-
rechunk: rechunk,
|
213
|
-
row_index_name: row_count_name,
|
214
|
-
row_index_offset: row_count_offset,
|
215
|
-
storage_options: storage_options,
|
216
|
-
credential_provider: credential_provider,
|
217
|
-
low_memory: low_memory,
|
218
|
-
use_statistics: use_statistics,
|
219
|
-
hive_partitioning: hive_partitioning,
|
220
|
-
schema: schema,
|
221
|
-
hive_schema: hive_schema,
|
222
|
-
try_parse_hive_dates: try_parse_hive_dates,
|
223
|
-
retries: retries,
|
224
|
-
glob: glob,
|
225
|
-
include_file_paths: include_file_paths,
|
226
|
-
allow_missing_columns: allow_missing_columns
|
227
|
-
)
|
228
|
-
end
|
229
|
-
|
230
|
-
# @private
|
231
|
-
def _scan_parquet_impl(
|
232
|
-
source,
|
233
|
-
n_rows: nil,
|
234
|
-
cache: true,
|
235
|
-
parallel: "auto",
|
236
|
-
rechunk: true,
|
237
|
-
row_index_name: nil,
|
238
|
-
row_index_offset: 0,
|
239
|
-
storage_options: nil,
|
240
|
-
credential_provider: nil,
|
241
|
-
low_memory: false,
|
242
|
-
use_statistics: true,
|
243
|
-
hive_partitioning: nil,
|
244
|
-
glob: true,
|
245
|
-
schema: nil,
|
246
|
-
hive_schema: nil,
|
247
|
-
try_parse_hive_dates: true,
|
248
|
-
retries: 2,
|
249
|
-
include_file_paths: nil,
|
250
|
-
allow_missing_columns: false
|
251
|
-
)
|
252
227
|
if source.is_a?(::Array)
|
253
228
|
sources = source
|
254
229
|
source = nil
|
255
230
|
else
|
256
|
-
sources = []
|
231
|
+
sources = [source]
|
257
232
|
end
|
258
233
|
|
259
234
|
if storage_options
|
@@ -262,27 +237,34 @@ module Polars
|
|
262
237
|
storage_options = nil
|
263
238
|
end
|
264
239
|
|
240
|
+
row_index_name = row_count_name
|
241
|
+
row_index_offset = row_count_offset
|
242
|
+
|
265
243
|
rblf =
|
266
244
|
RbLazyFrame.new_from_parquet(
|
267
|
-
source,
|
268
245
|
sources,
|
269
|
-
|
270
|
-
|
246
|
+
schema,
|
247
|
+
ScanOptions.new(
|
248
|
+
row_index: !row_index_name.nil? ? [row_index_name, row_index_offset] : nil,
|
249
|
+
pre_slice: !n_rows.nil? ? [0, n_rows] : nil,
|
250
|
+
# cast_options: cast_options,
|
251
|
+
extra_columns: extra_columns,
|
252
|
+
missing_columns: missing_columns,
|
253
|
+
include_file_paths: include_file_paths,
|
254
|
+
glob: glob,
|
255
|
+
hive_partitioning: hive_partitioning,
|
256
|
+
hive_schema: hive_schema,
|
257
|
+
try_parse_hive_dates: try_parse_hive_dates,
|
258
|
+
rechunk: rechunk,
|
259
|
+
cache: cache,
|
260
|
+
storage_options: storage_options,
|
261
|
+
# credential_provider: credential_provider_builder,
|
262
|
+
retries: retries,
|
263
|
+
# deletion_files: _deletion_files
|
264
|
+
),
|
271
265
|
parallel,
|
272
|
-
rechunk,
|
273
|
-
Utils.parse_row_index_args(row_index_name, row_index_offset),
|
274
266
|
low_memory,
|
275
|
-
|
276
|
-
credential_provider,
|
277
|
-
use_statistics,
|
278
|
-
hive_partitioning,
|
279
|
-
schema,
|
280
|
-
hive_schema,
|
281
|
-
try_parse_hive_dates,
|
282
|
-
retries,
|
283
|
-
glob,
|
284
|
-
include_file_paths,
|
285
|
-
allow_missing_columns
|
267
|
+
use_statistics
|
286
268
|
)
|
287
269
|
Utils.wrap_ldf(rblf)
|
288
270
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Polars
|
2
|
+
module IO
|
3
|
+
class ScanOptions
|
4
|
+
attr_reader :row_index, :pre_slice, :cast_options, :extra_columns, :missing_columns,
|
5
|
+
:include_file_paths, :glob, :hive_partitioning, :hive_schema, :try_parse_hive_dates,
|
6
|
+
:rechunk, :cache, :storage_options, :credential_provider, :retries, :column_mapping, :deletion_files
|
7
|
+
|
8
|
+
def initialize(
|
9
|
+
row_index: nil,
|
10
|
+
pre_slice: nil,
|
11
|
+
cast_options: nil,
|
12
|
+
extra_columns: "raise",
|
13
|
+
missing_columns: "raise",
|
14
|
+
include_file_paths: nil,
|
15
|
+
glob: true,
|
16
|
+
hive_partitioning: nil,
|
17
|
+
hive_schema: nil,
|
18
|
+
try_parse_hive_dates: true,
|
19
|
+
rechunk: false,
|
20
|
+
cache: true,
|
21
|
+
storage_options: nil,
|
22
|
+
credential_provider: nil,
|
23
|
+
retries: 2,
|
24
|
+
column_mapping: nil,
|
25
|
+
deletion_files: nil
|
26
|
+
)
|
27
|
+
@row_index = row_index
|
28
|
+
@pre_slice = pre_slice
|
29
|
+
@cast_options = cast_options
|
30
|
+
@extra_columns = extra_columns
|
31
|
+
@missing_columns = missing_columns
|
32
|
+
@include_file_paths = include_file_paths
|
33
|
+
@glob = glob
|
34
|
+
@hive_partitioning = hive_partitioning
|
35
|
+
@hive_schema = hive_schema
|
36
|
+
@try_parse_hive_dates = try_parse_hive_dates
|
37
|
+
@rechunk = rechunk
|
38
|
+
@cache = cache
|
39
|
+
@storage_options = storage_options
|
40
|
+
@credential_provider = credential_provider
|
41
|
+
@retries = retries
|
42
|
+
@column_mapping = column_mapping
|
43
|
+
@deletion_files = deletion_files
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|