polars-df 0.12.0-x86_64-linux → 0.14.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Binary file
Binary file
Binary file
@@ -2426,15 +2426,15 @@ module Polars
2426
2426
  # df.map_rows { |t| t[0] * 2 + t[1] }
2427
2427
  # # =>
2428
2428
  # # shape: (3, 1)
2429
- # # ┌───────┐
2430
- # # │ apply
2431
- # # │ ---
2432
- # # │ i64
2433
- # # ╞═══════╡
2434
- # # │ 1
2435
- # # │ 9
2436
- # # │ 14
2437
- # # └───────┘
2429
+ # # ┌─────┐
2430
+ # # │ map
2431
+ # # │ ---
2432
+ # # │ i64
2433
+ # # ╞═════╡
2434
+ # # │ 1
2435
+ # # │ 9
2436
+ # # │ 14
2437
+ # # └─────┘
2438
2438
  def map_rows(return_dtype: nil, inference_size: 256, &f)
2439
2439
  out, is_df = _df.map_rows(f, return_dtype, inference_size)
2440
2440
  if is_df
@@ -4234,7 +4234,7 @@ module Polars
4234
4234
  if n.nil? && !frac.nil?
4235
4235
  frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4236
4236
 
4237
- _from_rbdf(
4237
+ return _from_rbdf(
4238
4238
  _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4239
4239
  )
4240
4240
  end
data/lib/polars/expr.rb CHANGED
@@ -1182,7 +1182,7 @@ module Polars
1182
1182
  # "b" => [1, 1, 2, 2]
1183
1183
  # }
1184
1184
  # )
1185
- # df.select(Polars.all.mode)
1185
+ # df.select(Polars.all.mode.first)
1186
1186
  # # =>
1187
1187
  # # shape: (2, 2)
1188
1188
  # # ┌─────┬─────┐
@@ -6015,12 +6015,12 @@ module Polars
6015
6015
  # # ┌──────┐
6016
6016
  # # │ a │
6017
6017
  # # │ --- │
6018
- # # │ i64
6018
+ # # │ f64
6019
6019
  # # ╞══════╡
6020
- # # │ -1
6021
- # # │ 0
6022
- # # │ 0
6023
- # # │ 1
6020
+ # # │ -1.0
6021
+ # # │ -0.0
6022
+ # # │ 0.0
6023
+ # # │ 1.0
6024
6024
  # # │ null │
6025
6025
  # # └──────┘
6026
6026
  def sign
data/lib/polars/io/ipc.rb CHANGED
@@ -189,10 +189,18 @@ module Polars
189
189
  # Offset to start the row_count column (only use if the name is set).
190
190
  # @param storage_options [Hash]
191
191
  # Extra options that make sense for a particular storage connection.
192
- # @param memory_map [Boolean]
193
- # Try to memory map the file. This can greatly improve performance on repeated
194
- # queries as the OS may cache pages.
195
- # Only uncompressed IPC files can be memory mapped.
192
+ # @param hive_partitioning [Boolean]
193
+ # Infer statistics and schema from Hive partitioned URL and use them
194
+ # to prune reads. This is unset by default (i.e. `nil`), meaning it is
195
+ # automatically enabled when a single directory is passed, and otherwise
196
+ # disabled.
197
+ # @param hive_schema [Hash]
198
+ # The column names and data types of the columns by which the data is partitioned.
199
+ # If set to `nil` (default), the schema of the Hive partitions is inferred.
200
+ # @param try_parse_hive_dates [Boolean]
201
+ # Whether to try parsing hive values as date/datetime types.
202
+ # @param include_file_paths [String]
203
+ # Include the path of the source file(s) as a column with this name.
196
204
  #
197
205
  # @return [LazyFrame]
198
206
  def scan_ipc(
@@ -203,7 +211,10 @@ module Polars
203
211
  row_count_name: nil,
204
212
  row_count_offset: 0,
205
213
  storage_options: nil,
206
- memory_map: true
214
+ hive_partitioning: nil,
215
+ hive_schema: nil,
216
+ try_parse_hive_dates: true,
217
+ include_file_paths: nil
207
218
  )
208
219
  _scan_ipc_impl(
209
220
  source,
@@ -213,7 +224,10 @@ module Polars
213
224
  row_count_name: row_count_name,
214
225
  row_count_offset: row_count_offset,
215
226
  storage_options: storage_options,
216
- memory_map: memory_map
227
+ hive_partitioning: hive_partitioning,
228
+ hive_schema: hive_schema,
229
+ try_parse_hive_dates: try_parse_hive_dates,
230
+ include_file_paths: include_file_paths
217
231
  )
218
232
  end
219
233
 
@@ -226,7 +240,10 @@ module Polars
226
240
  row_count_name: nil,
227
241
  row_count_offset: 0,
228
242
  storage_options: nil,
229
- memory_map: true
243
+ hive_partitioning: nil,
244
+ hive_schema: nil,
245
+ try_parse_hive_dates: true,
246
+ include_file_paths: nil
230
247
  )
231
248
  if Utils.pathlike?(file)
232
249
  file = Utils.normalize_filepath(file)
@@ -239,7 +256,10 @@ module Polars
239
256
  cache,
240
257
  rechunk,
241
258
  Utils.parse_row_index_args(row_count_name, row_count_offset),
242
- memory_map
259
+ hive_partitioning,
260
+ hive_schema,
261
+ try_parse_hive_dates,
262
+ include_file_paths
243
263
  )
244
264
  Utils.wrap_ldf(rblf)
245
265
  end
@@ -158,6 +158,8 @@ module Polars
158
158
  # Extra options that make sense for a particular storage connection.
159
159
  # @param low_memory [Boolean]
160
160
  # Reduce memory pressure at the expense of performance.
161
+ # @param include_file_paths [String]
162
+ # Include the path of the source file(s) as a column with this name.
161
163
  #
162
164
  # @return [LazyFrame]
163
165
  def scan_parquet(
@@ -170,7 +172,8 @@ module Polars
170
172
  row_count_name: nil,
171
173
  row_count_offset: 0,
172
174
  storage_options: nil,
173
- low_memory: false
175
+ low_memory: false,
176
+ include_file_paths: nil
174
177
  )
175
178
  if Utils.pathlike?(source)
176
179
  source = Utils.normalize_filepath(source)
@@ -186,7 +189,8 @@ module Polars
186
189
  row_count_offset: row_count_offset,
187
190
  storage_options: storage_options,
188
191
  low_memory: low_memory,
189
- glob: glob
192
+ glob: glob,
193
+ include_file_paths: include_file_paths
190
194
  )
191
195
  end
192
196
 
@@ -203,7 +207,8 @@ module Polars
203
207
  low_memory: false,
204
208
  use_statistics: true,
205
209
  hive_partitioning: nil,
206
- glob: true
210
+ glob: true,
211
+ include_file_paths: nil
207
212
  )
208
213
  rblf =
209
214
  RbLazyFrame.new_from_parquet(
@@ -219,7 +224,8 @@ module Polars
219
224
  hive_partitioning,
220
225
  nil,
221
226
  true,
222
- glob
227
+ glob,
228
+ include_file_paths
223
229
  )
224
230
  Utils.wrap_ldf(rblf)
225
231
  end
@@ -2526,11 +2526,15 @@ module Polars
2526
2526
  value_name: nil,
2527
2527
  streamable: true
2528
2528
  )
2529
+ if !streamable
2530
+ warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
2531
+ end
2532
+
2529
2533
  on = on.nil? ? [] : Utils._expand_selectors(self, on)
2530
2534
  index = index.nil? ? [] : Utils._expand_selectors(self, index)
2531
2535
 
2532
2536
  _from_rbldf(
2533
- _ldf.unpivot(on, index, value_name, variable_name, streamable)
2537
+ _ldf.unpivot(on, index, value_name, variable_name)
2534
2538
  )
2535
2539
  end
2536
2540
  alias_method :melt, :unpivot
data/lib/polars/series.rb CHANGED
@@ -1790,9 +1790,10 @@ module Polars
1790
1790
  # Use this to swiftly assert a Series does not have null values.
1791
1791
  #
1792
1792
  # @return [Boolean]
1793
- def has_validity
1794
- _s.has_validity
1793
+ def has_nulls
1794
+ _s.has_nulls
1795
1795
  end
1796
+ alias_method :has_validity, :has_nulls
1796
1797
 
1797
1798
  # Check if the Series is empty.
1798
1799
  #
@@ -2605,12 +2606,12 @@ module Polars
2605
2606
  # s.sign
2606
2607
  # # =>
2607
2608
  # # shape: (5,)
2608
- # # Series: 'a' [i64]
2609
+ # # Series: 'a' [f64]
2609
2610
  # # [
2610
- # # -1
2611
- # # 0
2612
- # # 0
2613
- # # 1
2611
+ # # -1.0
2612
+ # # -0.0
2613
+ # # 0.0
2614
+ # # 1.0
2614
2615
  # # null
2615
2616
  # # ]
2616
2617
  def sign
@@ -792,15 +792,15 @@ module Polars
792
792
  # df.select(Polars.col("json").str.json_decode(dtype))
793
793
  # # =>
794
794
  # # shape: (3, 1)
795
- # # ┌─────────────┐
796
- # # │ json
797
- # # │ ---
798
- # # │ struct[2]
799
- # # ╞═════════════╡
800
- # # │ {1,true}
801
- # # │ {null,null}
802
- # # │ {2,false}
803
- # # └─────────────┘
795
+ # # ┌───────────┐
796
+ # # │ json
797
+ # # │ ---
798
+ # # │ struct[2]
799
+ # # ╞═══════════╡
800
+ # # │ {1,true}
801
+ # # │ null
802
+ # # │ {2,false}
803
+ # # └───────────┘
804
804
  def json_decode(dtype = nil, infer_schema_length: 100)
805
805
  if !dtype.nil?
806
806
  dtype = Utils.rb_type_to_dtype(dtype)
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.12.0"
3
+ VERSION = "0.14.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.14.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal