polars-df 0.12.0-aarch64-linux → 0.13.0-aarch64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +248 -269
- data/LICENSE-THIRD-PARTY.txt +883 -810
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/io/ipc.rb +32 -4
- data/lib/polars/io/parquet.rb +10 -4
- data/lib/polars/lazy_frame.rb +5 -1
- data/lib/polars/series.rb +3 -2
- data/lib/polars/string_expr.rb +9 -9
- data/lib/polars/version.rb +1 -1
- metadata +2 -2
data/lib/polars/3.1/polars.so
CHANGED
Binary file
|
data/lib/polars/3.2/polars.so
CHANGED
Binary file
|
data/lib/polars/3.3/polars.so
CHANGED
Binary file
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -193,6 +193,18 @@ module Polars
|
|
193
193
|
# Try to memory map the file. This can greatly improve performance on repeated
|
194
194
|
# queries as the OS may cache pages.
|
195
195
|
# Only uncompressed IPC files can be memory mapped.
|
196
|
+
# @param hive_partitioning [Boolean]
|
197
|
+
# Infer statistics and schema from Hive partitioned URL and use them
|
198
|
+
# to prune reads. This is unset by default (i.e. `nil`), meaning it is
|
199
|
+
# automatically enabled when a single directory is passed, and otherwise
|
200
|
+
# disabled.
|
201
|
+
# @param hive_schema [Hash]
|
202
|
+
# The column names and data types of the columns by which the data is partitioned.
|
203
|
+
# If set to `nil` (default), the schema of the Hive partitions is inferred.
|
204
|
+
# @param try_parse_hive_dates [Boolean]
|
205
|
+
# Whether to try parsing hive values as date/datetime types.
|
206
|
+
# @param include_file_paths [String]
|
207
|
+
# Include the path of the source file(s) as a column with this name.
|
196
208
|
#
|
197
209
|
# @return [LazyFrame]
|
198
210
|
def scan_ipc(
|
@@ -203,7 +215,11 @@ module Polars
|
|
203
215
|
row_count_name: nil,
|
204
216
|
row_count_offset: 0,
|
205
217
|
storage_options: nil,
|
206
|
-
memory_map: true
|
218
|
+
memory_map: true,
|
219
|
+
hive_partitioning: nil,
|
220
|
+
hive_schema: nil,
|
221
|
+
try_parse_hive_dates: true,
|
222
|
+
include_file_paths: nil
|
207
223
|
)
|
208
224
|
_scan_ipc_impl(
|
209
225
|
source,
|
@@ -213,7 +229,11 @@ module Polars
|
|
213
229
|
row_count_name: row_count_name,
|
214
230
|
row_count_offset: row_count_offset,
|
215
231
|
storage_options: storage_options,
|
216
|
-
memory_map: memory_map
|
232
|
+
memory_map: memory_map,
|
233
|
+
hive_partitioning: hive_partitioning,
|
234
|
+
hive_schema: hive_schema,
|
235
|
+
try_parse_hive_dates: try_parse_hive_dates,
|
236
|
+
include_file_paths: include_file_paths
|
217
237
|
)
|
218
238
|
end
|
219
239
|
|
@@ -226,7 +246,11 @@ module Polars
|
|
226
246
|
row_count_name: nil,
|
227
247
|
row_count_offset: 0,
|
228
248
|
storage_options: nil,
|
229
|
-
memory_map: true
|
249
|
+
memory_map: true,
|
250
|
+
hive_partitioning: nil,
|
251
|
+
hive_schema: nil,
|
252
|
+
try_parse_hive_dates: true,
|
253
|
+
include_file_paths: nil
|
230
254
|
)
|
231
255
|
if Utils.pathlike?(file)
|
232
256
|
file = Utils.normalize_filepath(file)
|
@@ -239,7 +263,11 @@ module Polars
|
|
239
263
|
cache,
|
240
264
|
rechunk,
|
241
265
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
242
|
-
memory_map
|
266
|
+
memory_map,
|
267
|
+
hive_partitioning,
|
268
|
+
hive_schema,
|
269
|
+
try_parse_hive_dates,
|
270
|
+
include_file_paths
|
243
271
|
)
|
244
272
|
Utils.wrap_ldf(rblf)
|
245
273
|
end
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -158,6 +158,8 @@ module Polars
|
|
158
158
|
# Extra options that make sense for a particular storage connection.
|
159
159
|
# @param low_memory [Boolean]
|
160
160
|
# Reduce memory pressure at the expense of performance.
|
161
|
+
# @param include_file_paths [String]
|
162
|
+
# Include the path of the source file(s) as a column with this name.
|
161
163
|
#
|
162
164
|
# @return [LazyFrame]
|
163
165
|
def scan_parquet(
|
@@ -170,7 +172,8 @@ module Polars
|
|
170
172
|
row_count_name: nil,
|
171
173
|
row_count_offset: 0,
|
172
174
|
storage_options: nil,
|
173
|
-
low_memory: false
|
175
|
+
low_memory: false,
|
176
|
+
include_file_paths: nil
|
174
177
|
)
|
175
178
|
if Utils.pathlike?(source)
|
176
179
|
source = Utils.normalize_filepath(source)
|
@@ -186,7 +189,8 @@ module Polars
|
|
186
189
|
row_count_offset: row_count_offset,
|
187
190
|
storage_options: storage_options,
|
188
191
|
low_memory: low_memory,
|
189
|
-
glob: glob
|
192
|
+
glob: glob,
|
193
|
+
include_file_paths: include_file_paths
|
190
194
|
)
|
191
195
|
end
|
192
196
|
|
@@ -203,7 +207,8 @@ module Polars
|
|
203
207
|
low_memory: false,
|
204
208
|
use_statistics: true,
|
205
209
|
hive_partitioning: nil,
|
206
|
-
glob: true
|
210
|
+
glob: true,
|
211
|
+
include_file_paths: nil
|
207
212
|
)
|
208
213
|
rblf =
|
209
214
|
RbLazyFrame.new_from_parquet(
|
@@ -219,7 +224,8 @@ module Polars
|
|
219
224
|
hive_partitioning,
|
220
225
|
nil,
|
221
226
|
true,
|
222
|
-
glob
|
227
|
+
glob,
|
228
|
+
include_file_paths
|
223
229
|
)
|
224
230
|
Utils.wrap_ldf(rblf)
|
225
231
|
end
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -2526,11 +2526,15 @@ module Polars
|
|
2526
2526
|
value_name: nil,
|
2527
2527
|
streamable: true
|
2528
2528
|
)
|
2529
|
+
if !streamable
|
2530
|
+
warn "The `streamable` parameter for `LazyFrame.unpivot` is deprecated"
|
2531
|
+
end
|
2532
|
+
|
2529
2533
|
on = on.nil? ? [] : Utils._expand_selectors(self, on)
|
2530
2534
|
index = index.nil? ? [] : Utils._expand_selectors(self, index)
|
2531
2535
|
|
2532
2536
|
_from_rbldf(
|
2533
|
-
_ldf.unpivot(on, index, value_name, variable_name
|
2537
|
+
_ldf.unpivot(on, index, value_name, variable_name)
|
2534
2538
|
)
|
2535
2539
|
end
|
2536
2540
|
alias_method :melt, :unpivot
|
data/lib/polars/series.rb
CHANGED
@@ -1790,9 +1790,10 @@ module Polars
|
|
1790
1790
|
# Use this to swiftly assert a Series does not have null values.
|
1791
1791
|
#
|
1792
1792
|
# @return [Boolean]
|
1793
|
-
def
|
1794
|
-
_s.
|
1793
|
+
def has_nulls
|
1794
|
+
_s.has_nulls
|
1795
1795
|
end
|
1796
|
+
alias_method :has_validity, :has_nulls
|
1796
1797
|
|
1797
1798
|
# Check if the Series is empty.
|
1798
1799
|
#
|
data/lib/polars/string_expr.rb
CHANGED
@@ -792,15 +792,15 @@ module Polars
|
|
792
792
|
# df.select(Polars.col("json").str.json_decode(dtype))
|
793
793
|
# # =>
|
794
794
|
# # shape: (3, 1)
|
795
|
-
# #
|
796
|
-
# # │ json
|
797
|
-
# # │ ---
|
798
|
-
# # │ struct[2]
|
799
|
-
# #
|
800
|
-
# # │ {1,true}
|
801
|
-
# # │
|
802
|
-
# # │ {2,false}
|
803
|
-
# #
|
795
|
+
# # ┌───────────┐
|
796
|
+
# # │ json │
|
797
|
+
# # │ --- │
|
798
|
+
# # │ struct[2] │
|
799
|
+
# # ╞═══════════╡
|
800
|
+
# # │ {1,true} │
|
801
|
+
# # │ null │
|
802
|
+
# # │ {2,false} │
|
803
|
+
# # └───────────┘
|
804
804
|
def json_decode(dtype = nil, infer_schema_length: 100)
|
805
805
|
if !dtype.nil?
|
806
806
|
dtype = Utils.rb_type_to_dtype(dtype)
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: aarch64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|