polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
@@ -0,0 +1,21 @@
1
+ module Polars
2
+ module Functions
3
+ # Get a lazily evaluated :class:`DataType` of a column or expression.
4
+ #
5
+ # @note
6
+ # This functionality is considered **unstable**. It may be changed
7
+ # at any point without it being considered a breaking change.
8
+ #
9
+ # @return [DataTypeExpr]
10
+ def dtype_of(col_or_expr)
11
+ e = nil
12
+ if col_or_expr.is_a?(::String)
13
+ e = F.col(col_or_expr)
14
+ else
15
+ e = col_or_expr
16
+ end
17
+
18
+ DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.of_expr(e._rbexpr))
19
+ end
20
+ end
21
+ end
@@ -1,5 +1,18 @@
1
1
  module Polars
2
2
  module Functions
3
+ # Select a field in the current `struct.with_fields` scope.
4
+ #
5
+ # @param name [Object]
6
+ # Name of the field(s) to select.
7
+ #
8
+ # @return [Expr]
9
+ def field(name)
10
+ if name.is_a?(::String)
11
+ name = [name]
12
+ end
13
+ Utils.wrap_expr(Plr.field(name))
14
+ end
15
+
3
16
  # Alias for an element in evaluated in an `eval` expression.
4
17
  #
5
18
  # @return [Expr]
data/lib/polars/io/csv.rb CHANGED
@@ -499,7 +499,7 @@ module Polars
499
499
  # for instance `#`.
500
500
  # @param quote_char [String]
501
501
  # Single byte character used for csv quoting.
502
- # Set to None to turn off special handling and escaping of quotes.
502
+ # Set to nil to turn off special handling and escaping of quotes.
503
503
  # @param skip_rows [Integer]
504
504
  # Start reading after `skip_rows` lines. The header will be parsed at this
505
505
  # offset.
@@ -7,11 +7,11 @@ module Polars
7
7
  # @param schema [Object]
8
8
  # The DataFrame schema may be declared in several ways:
9
9
  #
10
- # * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
11
- # * As a list of column names; in this case types are automatically inferred.
12
- # * As a list of (name,type) pairs; this is equivalent to the dictionary form.
10
+ # * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
11
+ # * As an array of column names; in this case types are automatically inferred.
12
+ # * As an array of [name,type] pairs; this is equivalent to the hash form.
13
13
  #
14
- # If you supply a list of column names that does not match the names in the
14
+ # If you supply an array of column names that does not match the names in the
15
15
  # underlying data, the names given here will overwrite them. The number
16
16
  # of names given in the schema should match the underlying data dimensions.
17
17
  # @param schema_overrides [Hash]
@@ -7,11 +7,11 @@ module Polars
7
7
  # @param schema [Object]
8
8
  # The DataFrame schema may be declared in several ways:
9
9
  #
10
- # * As a dict of {name:type} pairs; if type is None, it will be auto-inferred.
11
- # * As a list of column names; in this case types are automatically inferred.
12
- # * As a list of (name,type) pairs; this is equivalent to the dictionary form.
10
+ # * As a hash of \\\\{name:type} pairs; if type is nil, it will be auto-inferred.
11
+ # * As an array of column names; in this case types are automatically inferred.
12
+ # * As an array of [name,type] pairs; this is equivalent to the hash form.
13
13
  #
14
- # If you supply a list of column names that does not match the names in the
14
+ # If you supply an array of column names that does not match the names in the
15
15
  # underlying data, the names given here will overwrite them. The number
16
16
  # of names given in the schema should match the underlying data dimensions.
17
17
  # @param schema_overrides [Hash]
@@ -43,7 +43,7 @@ module Polars
43
43
  # Extra options that make sense for a particular storage connection.
44
44
  # @param credential_provider [Object]
45
45
  # Provide a function that can be called to provide cloud storage
46
- # credentials. The function is expected to return a dictionary of
46
+ # credentials. The function is expected to return a hash of
47
47
  # credential keys along with an optional credential expiry time.
48
48
  # @param retries [Integer]
49
49
  # Number of retries if accessing a cloud instance fails.
@@ -123,7 +123,26 @@ module Polars
123
123
  source = Utils.normalize_filepath(source)
124
124
  end
125
125
 
126
- Plr.parquet_schema(source)
126
+ # TODO return Schema
127
+ scan_parquet(source).collect_schema.to_h
128
+ end
129
+
130
+ # Get file-level custom metadata of a Parquet file without reading data.
131
+ #
132
+ # @note
133
+ # This functionality is considered **experimental**. It may be removed or
134
+ # changed at any point without it being considered a breaking change.
135
+ #
136
+ # @param source [Object]
137
+ # Path to a file or a file-like object.
138
+ #
139
+ # @return [Hash]
140
+ def read_parquet_metadata(source)
141
+ if Utils.pathlike?(source)
142
+ source = Utils.normalize_filepath(source, check_not_directory: false)
143
+ end
144
+
145
+ Plr.read_parquet_metadata(source)
127
146
  end
128
147
 
129
148
  # Lazily read from a parquet file or multiple files via glob patterns.
@@ -171,7 +190,7 @@ module Polars
171
190
  # Extra options that make sense for a particular storage connection.
172
191
  # @param credential_provider [Object]
173
192
  # Provide a function that can be called to provide cloud storage
174
- # credentials. The function is expected to return a dictionary of
193
+ # credentials. The function is expected to return a hash of
175
194
  # credential keys along with an optional credential expiry time.
176
195
  # @param retries [Integer]
177
196
  # Number of retries if accessing a cloud instance fails.
@@ -210,7 +229,9 @@ module Polars
210
229
  retries: 2,
211
230
  include_file_paths: nil,
212
231
  allow_missing_columns: false,
213
- extra_columns: "raise"
232
+ extra_columns: "raise",
233
+ _column_mapping: nil,
234
+ _deletion_files: nil
214
235
  )
215
236
  missing_columns = allow_missing_columns ? "insert" : "raise"
216
237
 
@@ -260,7 +281,8 @@ module Polars
260
281
  storage_options: storage_options,
261
282
  # credential_provider: credential_provider_builder,
262
283
  retries: retries,
263
- # deletion_files: _deletion_files
284
+ deletion_files: _deletion_files,
285
+ column_mapping: _column_mapping
264
286
  ),
265
287
  parallel,
266
288
  low_memory,