polars-df 0.21.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion/mod.rs +92 -4
- data/ext/polars/src/exceptions.rs +1 -0
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +84 -5
- data/ext/polars/src/expr/list.rs +24 -0
- data/ext/polars/src/expr/meta.rs +11 -0
- data/ext/polars/src/expr/mod.rs +1 -0
- data/ext/polars/src/expr/name.rs +8 -0
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/string.rs +59 -0
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/functions/io.rs +19 -0
- data/ext/polars/src/functions/lazy.rs +4 -0
- data/ext/polars/src/lazyframe/general.rs +51 -0
- data/ext/polars/src/lib.rs +119 -10
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/series.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/general.rs +64 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +684 -19
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +14 -2
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1213 -180
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +13 -0
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +27 -5
- data/lib/polars/lazy_frame.rb +936 -20
- data/lib/polars/list_expr.rb +196 -4
- data/lib/polars/list_name_space.rb +201 -4
- data/lib/polars/meta_expr.rb +64 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1051 -54
- data/lib/polars/string_expr.rb +411 -6
- data/lib/polars/string_name_space.rb +722 -49
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -0
- metadata +4 -1
@@ -0,0 +1,21 @@
|
|
1
|
+
module Polars
|
2
|
+
module Functions
|
3
|
+
# Get a lazily evaluated :class:`DataType` of a column or expression.
|
4
|
+
#
|
5
|
+
# @note
|
6
|
+
# This functionality is considered **unstable**. It may be changed
|
7
|
+
# at any point without it being considered a breaking change.
|
8
|
+
#
|
9
|
+
# @return [DataTypeExpr]
|
10
|
+
def dtype_of(col_or_expr)
|
11
|
+
e = nil
|
12
|
+
if col_or_expr.is_a?(::String)
|
13
|
+
e = F.col(col_or_expr)
|
14
|
+
else
|
15
|
+
e = col_or_expr
|
16
|
+
end
|
17
|
+
|
18
|
+
DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.of_expr(e._rbexpr))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,5 +1,18 @@
|
|
1
1
|
module Polars
|
2
2
|
module Functions
|
3
|
+
# Select a field in the current `struct.with_fields` scope.
|
4
|
+
#
|
5
|
+
# @param name [Object]
|
6
|
+
# Name of the field(s) to select.
|
7
|
+
#
|
8
|
+
# @return [Expr]
|
9
|
+
def field(name)
|
10
|
+
if name.is_a?(::String)
|
11
|
+
name = [name]
|
12
|
+
end
|
13
|
+
Utils.wrap_expr(Plr.field(name))
|
14
|
+
end
|
15
|
+
|
3
16
|
# Alias for an element in evaluated in an `eval` expression.
|
4
17
|
#
|
5
18
|
# @return [Expr]
|
data/lib/polars/io/csv.rb
CHANGED
@@ -499,7 +499,7 @@ module Polars
|
|
499
499
|
# for instance `#`.
|
500
500
|
# @param quote_char [String]
|
501
501
|
# Single byte character used for csv quoting.
|
502
|
-
# Set to
|
502
|
+
# Set to nil to turn off special handling and escaping of quotes.
|
503
503
|
# @param skip_rows [Integer]
|
504
504
|
# Start reading after `skip_rows` lines. The header will be parsed at this
|
505
505
|
# offset.
|
data/lib/polars/io/json.rb
CHANGED
@@ -7,11 +7,11 @@ module Polars
|
|
7
7
|
# @param schema [Object]
|
8
8
|
# The DataFrame schema may be declared in several ways:
|
9
9
|
#
|
10
|
-
# * As a
|
11
|
-
# * As
|
12
|
-
# * As
|
10
|
+
# * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
|
11
|
+
# * As an array of column names; in this case types are automatically inferred.
|
12
|
+
# * As an array of [name,type] pairs; this is equivalent to the hash form.
|
13
13
|
#
|
14
|
-
# If you supply
|
14
|
+
# If you supply an array of column names that does not match the names in the
|
15
15
|
# underlying data, the names given here will overwrite them. The number
|
16
16
|
# of names given in the schema should match the underlying data dimensions.
|
17
17
|
# @param schema_overrides [Hash]
|
data/lib/polars/io/ndjson.rb
CHANGED
@@ -7,11 +7,11 @@ module Polars
|
|
7
7
|
# @param schema [Object]
|
8
8
|
# The DataFrame schema may be declared in several ways:
|
9
9
|
#
|
10
|
-
# * As a
|
11
|
-
# * As
|
12
|
-
# * As
|
10
|
+
# * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
|
11
|
+
# * As an array of column names; in this case types are automatically inferred.
|
12
|
+
# * As an array of [name,type] pairs; this is equivalent to the hash form.
|
13
13
|
#
|
14
|
-
# If you supply
|
14
|
+
# If you supply an array of column names that does not match the names in the
|
15
15
|
# underlying data, the names given here will overwrite them. The number
|
16
16
|
# of names given in the schema should match the underlying data dimensions.
|
17
17
|
# @param schema_overrides [Hash]
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -43,7 +43,7 @@ module Polars
|
|
43
43
|
# Extra options that make sense for a particular storage connection.
|
44
44
|
# @param credential_provider [Object]
|
45
45
|
# Provide a function that can be called to provide cloud storage
|
46
|
-
# credentials. The function is expected to return a
|
46
|
+
# credentials. The function is expected to return a hash of
|
47
47
|
# credential keys along with an optional credential expiry time.
|
48
48
|
# @param retries [Integer]
|
49
49
|
# Number of retries if accessing a cloud instance fails.
|
@@ -123,7 +123,26 @@ module Polars
|
|
123
123
|
source = Utils.normalize_filepath(source)
|
124
124
|
end
|
125
125
|
|
126
|
-
|
126
|
+
# TODO return Schema
|
127
|
+
scan_parquet(source).collect_schema.to_h
|
128
|
+
end
|
129
|
+
|
130
|
+
# Get file-level custom metadata of a Parquet file without reading data.
|
131
|
+
#
|
132
|
+
# @note
|
133
|
+
# This functionality is considered **experimental**. It may be removed or
|
134
|
+
# changed at any point without it being considered a breaking change.
|
135
|
+
#
|
136
|
+
# @param source [Object]
|
137
|
+
# Path to a file or a file-like object.
|
138
|
+
#
|
139
|
+
# @return [Hash]
|
140
|
+
def read_parquet_metadata(source)
|
141
|
+
if Utils.pathlike?(source)
|
142
|
+
source = Utils.normalize_filepath(source, check_not_directory: false)
|
143
|
+
end
|
144
|
+
|
145
|
+
Plr.read_parquet_metadata(source)
|
127
146
|
end
|
128
147
|
|
129
148
|
# Lazily read from a parquet file or multiple files via glob patterns.
|
@@ -171,7 +190,7 @@ module Polars
|
|
171
190
|
# Extra options that make sense for a particular storage connection.
|
172
191
|
# @param credential_provider [Object]
|
173
192
|
# Provide a function that can be called to provide cloud storage
|
174
|
-
# credentials. The function is expected to return a
|
193
|
+
# credentials. The function is expected to return a hash of
|
175
194
|
# credential keys along with an optional credential expiry time.
|
176
195
|
# @param retries [Integer]
|
177
196
|
# Number of retries if accessing a cloud instance fails.
|
@@ -210,7 +229,9 @@ module Polars
|
|
210
229
|
retries: 2,
|
211
230
|
include_file_paths: nil,
|
212
231
|
allow_missing_columns: false,
|
213
|
-
extra_columns: "raise"
|
232
|
+
extra_columns: "raise",
|
233
|
+
_column_mapping: nil,
|
234
|
+
_deletion_files: nil
|
214
235
|
)
|
215
236
|
missing_columns = allow_missing_columns ? "insert" : "raise"
|
216
237
|
|
@@ -260,7 +281,8 @@ module Polars
|
|
260
281
|
storage_options: storage_options,
|
261
282
|
# credential_provider: credential_provider_builder,
|
262
283
|
retries: retries,
|
263
|
-
|
284
|
+
deletion_files: _deletion_files,
|
285
|
+
column_mapping: _column_mapping
|
264
286
|
),
|
265
287
|
parallel,
|
266
288
|
low_memory,
|