polars-df 0.12.0-x86_64-linux → 0.13.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Binary file
Binary file
Binary file
data/lib/polars/io/ipc.rb CHANGED
@@ -193,6 +193,18 @@ module Polars
193
193
  # Try to memory map the file. This can greatly improve performance on repeated
194
194
  # queries as the OS may cache pages.
195
195
  # Only uncompressed IPC files can be memory mapped.
196
+ # @param hive_partitioning [Boolean]
197
+ # Infer statistics and schema from Hive partitioned URL and use them
198
+ # to prune reads. This is unset by default (i.e. `nil`), meaning it is
199
+ # automatically enabled when a single directory is passed, and otherwise
200
+ # disabled.
201
+ # @param hive_schema [Hash]
202
+ # The column names and data types of the columns by which the data is partitioned.
203
+ # If set to `nil` (default), the schema of the Hive partitions is inferred.
204
+ # @param try_parse_hive_dates [Boolean]
205
+ # Whether to try parsing hive values as date/datetime types.
206
+ # @param include_file_paths [String]
207
+ # Include the path of the source file(s) as a column with this name.
196
208
  #
197
209
  # @return [LazyFrame]
198
210
  def scan_ipc(
@@ -203,7 +215,11 @@ module Polars
203
215
  row_count_name: nil,
204
216
  row_count_offset: 0,
205
217
  storage_options: nil,
206
- memory_map: true
218
+ memory_map: true,
219
+ hive_partitioning: nil,
220
+ hive_schema: nil,
221
+ try_parse_hive_dates: true,
222
+ include_file_paths: nil
207
223
  )
208
224
  _scan_ipc_impl(
209
225
  source,
@@ -213,7 +229,11 @@ module Polars
213
229
  row_count_name: row_count_name,
214
230
  row_count_offset: row_count_offset,
215
231
  storage_options: storage_options,
216
- memory_map: memory_map
232
+ memory_map: memory_map,
233
+ hive_partitioning: hive_partitioning,
234
+ hive_schema: hive_schema,
235
+ try_parse_hive_dates: try_parse_hive_dates,
236
+ include_file_paths: include_file_paths
217
237
  )
218
238
  end
219
239
 
@@ -226,7 +246,11 @@ module Polars
226
246
  row_count_name: nil,
227
247
  row_count_offset: 0,
228
248
  storage_options: nil,
229
- memory_map: true
249
+ memory_map: true,
250
+ hive_partitioning: nil,
251
+ hive_schema: nil,
252
+ try_parse_hive_dates: true,
253
+ include_file_paths: nil
230
254
  )
231
255
  if Utils.pathlike?(file)
232
256
  file = Utils.normalize_filepath(file)
@@ -239,7 +263,11 @@ module Polars
239
263
  cache,
240
264
  rechunk,
241
265
  Utils.parse_row_index_args(row_count_name, row_count_offset),
242
- memory_map
266
+ memory_map,
267
+ hive_partitioning,
268
+ hive_schema,
269
+ try_parse_hive_dates,
270
+ include_file_paths
243
271
  )
244
272
  Utils.wrap_ldf(rblf)
245
273
  end
@@ -158,6 +158,8 @@ module Polars
158
158
  # Extra options that make sense for a particular storage connection.
159
159
  # @param low_memory [Boolean]
160
160
  # Reduce memory pressure at the expense of performance.
161
+ # @param include_file_paths [String]
162
+ # Include the path of the source file(s) as a column with this name.
161
163
  #
162
164
  # @return [LazyFrame]
163
165
  def scan_parquet(
@@ -170,7 +172,8 @@ module Polars
170
172
  row_count_name: nil,
171
173
  row_count_offset: 0,
172
174
  storage_options: nil,
173
- low_memory: false
175
+ low_memory: false,
176
+ include_file_paths: nil
174
177
  )
175
178
  if Utils.pathlike?(source)
176
179
  source = Utils.normalize_filepath(source)
@@ -186,7 +189,8 @@ module Polars
186
189
  row_count_offset: row_count_offset,
187
190
  storage_options: storage_options,
188
191
  low_memory: low_memory,
189
- glob: glob
192
+ glob: glob,
193
+ include_file_paths: include_file_paths
190
194
  )
191
195
  end
192
196
 
@@ -203,7 +207,8 @@ module Polars
203
207
  low_memory: false,
204
208
  use_statistics: true,
205
209
  hive_partitioning: nil,
206
- glob: true
210
+ glob: true,
211
+ include_file_paths: nil
207
212
  )
208
213
  rblf =
209
214
  RbLazyFrame.new_from_parquet(
@@ -219,7 +224,8 @@ module Polars
219
224
  hive_partitioning,
220
225
  nil,
221
226
  true,
222
- glob
227
+ glob,
228
+ include_file_paths
223
229
  )
224
230
  Utils.wrap_ldf(rblf)
225
231
  end
@@ -2526,11 +2526,15 @@ module Polars
2526
2526
  value_name: nil,
2527
2527
  streamable: true
2528
2528
  )
2529
+ if !streamable
2530
+ warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
+ end
2532
+
2529
2533
  on = on.nil? ? [] : Utils._expand_selectors(self, on)
2530
2534
  index = index.nil? ? [] : Utils._expand_selectors(self, index)
2531
2535
 
2532
2536
  _from_rbldf(
2533
- _ldf.unpivot(on, index, value_name, variable_name, streamable)
2537
+ _ldf.unpivot(on, index, value_name, variable_name)
2534
2538
  )
2535
2539
  end
2536
2540
  alias_method :melt, :unpivot
data/lib/polars/series.rb CHANGED
@@ -1790,9 +1790,10 @@ module Polars
1790
1790
  # Use this to swiftly assert a Series does not have null values.
1791
1791
  #
1792
1792
  # @return [Boolean]
1793
- def has_validity
1794
- _s.has_validity
1793
+ def has_nulls
1794
+ _s.has_nulls
1795
1795
  end
1796
+ alias_method :has_validity, :has_nulls
1796
1797
 
1797
1798
  # Check if the Series is empty.
1798
1799
  #
@@ -792,15 +792,15 @@ module Polars
792
792
  # df.select(Polars.col("json").str.json_decode(dtype))
793
793
  # # =>
794
794
  # # shape: (3, 1)
795
- # # ┌─────────────┐
796
- # # │ json
797
- # # │ ---
798
- # # │ struct[2]
799
- # # ╞═════════════╡
800
- # # │ {1,true}
801
- # # │ {null,null}
802
- # # │ {2,false}
803
- # # └─────────────┘
795
+ # # ┌───────────┐
796
+ # # │ json
797
+ # # │ ---
798
+ # # │ struct[2]
799
+ # # ╞═══════════╡
800
+ # # │ {1,true}
801
+ # # │ null
802
+ # # │ {2,false}
803
+ # # └───────────┘
804
804
  def json_decode(dtype = nil, infer_schema_length: 100)
805
805
  if !dtype.nil?
806
806
  dtype = Utils.rb_type_to_dtype(dtype)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.12.0"
3
+ VERSION = "0.13.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
11
+ date: 2024-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal