polars-df 0.11.0-x86_64-darwin → 0.12.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/LICENSE-THIRD-PARTY.txt +1067 -880
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +7 -2
data/lib/polars/io/csv.rb
CHANGED
@@ -104,7 +104,7 @@ module Polars
|
|
104
104
|
ignore_errors: false,
|
105
105
|
parse_dates: false,
|
106
106
|
n_threads: nil,
|
107
|
-
infer_schema_length:
|
107
|
+
infer_schema_length: N_INFER_DEFAULT,
|
108
108
|
batch_size: 8192,
|
109
109
|
n_rows: nil,
|
110
110
|
encoding: "utf8",
|
@@ -192,7 +192,7 @@ module Polars
|
|
192
192
|
ignore_errors: false,
|
193
193
|
parse_dates: false,
|
194
194
|
n_threads: nil,
|
195
|
-
infer_schema_length:
|
195
|
+
infer_schema_length: N_INFER_DEFAULT,
|
196
196
|
batch_size: 8192,
|
197
197
|
n_rows: nil,
|
198
198
|
encoding: "utf8",
|
@@ -222,7 +222,7 @@ module Polars
|
|
222
222
|
if !dtypes.nil?
|
223
223
|
if dtypes.is_a?(Hash)
|
224
224
|
dtype_list = []
|
225
|
-
dtypes.each do|k, v|
|
225
|
+
dtypes.each do |k, v|
|
226
226
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
227
227
|
end
|
228
228
|
elsif dtypes.is_a?(::Array)
|
@@ -304,7 +304,7 @@ module Polars
|
|
304
304
|
missing_utf8_is_empty_string,
|
305
305
|
parse_dates,
|
306
306
|
skip_rows_after_header,
|
307
|
-
Utils.
|
307
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
308
308
|
sample_size,
|
309
309
|
eol_char,
|
310
310
|
raise_if_empty,
|
@@ -422,7 +422,7 @@ module Polars
|
|
422
422
|
ignore_errors: false,
|
423
423
|
parse_dates: false,
|
424
424
|
n_threads: nil,
|
425
|
-
infer_schema_length:
|
425
|
+
infer_schema_length: N_INFER_DEFAULT,
|
426
426
|
batch_size: 50_000,
|
427
427
|
n_rows: nil,
|
428
428
|
encoding: "utf8",
|
@@ -567,7 +567,7 @@ module Polars
|
|
567
567
|
ignore_errors: false,
|
568
568
|
cache: true,
|
569
569
|
with_column_names: nil,
|
570
|
-
infer_schema_length:
|
570
|
+
infer_schema_length: N_INFER_DEFAULT,
|
571
571
|
n_rows: nil,
|
572
572
|
encoding: "utf8",
|
573
573
|
low_memory: false,
|
@@ -629,7 +629,7 @@ module Polars
|
|
629
629
|
ignore_errors: false,
|
630
630
|
cache: true,
|
631
631
|
with_column_names: nil,
|
632
|
-
infer_schema_length:
|
632
|
+
infer_schema_length: N_INFER_DEFAULT,
|
633
633
|
n_rows: nil,
|
634
634
|
encoding: "utf8",
|
635
635
|
low_memory: false,
|
@@ -669,7 +669,7 @@ module Polars
|
|
669
669
|
rechunk,
|
670
670
|
skip_rows_after_header,
|
671
671
|
encoding,
|
672
|
-
Utils.
|
672
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
673
673
|
parse_dates,
|
674
674
|
eol_char,
|
675
675
|
truncate_ragged_lines
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -76,7 +76,7 @@ module Polars
|
|
76
76
|
columns,
|
77
77
|
projection,
|
78
78
|
n_rows,
|
79
|
-
Utils.
|
79
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
80
80
|
memory_map
|
81
81
|
)
|
82
82
|
Utils.wrap_df(rbdf)
|
@@ -149,7 +149,7 @@ module Polars
|
|
149
149
|
columns,
|
150
150
|
projection,
|
151
151
|
n_rows,
|
152
|
-
Utils.
|
152
|
+
Utils.parse_row_index_args(row_index_name, row_index_offset),
|
153
153
|
rechunk
|
154
154
|
)
|
155
155
|
Utils.wrap_df(pydf)
|
@@ -238,7 +238,7 @@ module Polars
|
|
238
238
|
n_rows,
|
239
239
|
cache,
|
240
240
|
rechunk,
|
241
|
-
Utils.
|
241
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
242
242
|
memory_map
|
243
243
|
)
|
244
244
|
Utils.wrap_ldf(rblf)
|
data/lib/polars/io/json.rb
CHANGED
@@ -6,12 +6,23 @@ module Polars
|
|
6
6
|
# Path to a file or a file-like object.
|
7
7
|
#
|
8
8
|
# @return [DataFrame]
|
9
|
-
def read_json(
|
9
|
+
def read_json(
|
10
|
+
source,
|
11
|
+
schema: nil,
|
12
|
+
schema_overrides: nil,
|
13
|
+
infer_schema_length: N_INFER_DEFAULT
|
14
|
+
)
|
10
15
|
if Utils.pathlike?(source)
|
11
16
|
source = Utils.normalize_filepath(source)
|
12
17
|
end
|
13
18
|
|
14
|
-
rbdf =
|
19
|
+
rbdf =
|
20
|
+
RbDataFrame.read_json(
|
21
|
+
source,
|
22
|
+
infer_schema_length,
|
23
|
+
schema,
|
24
|
+
schema_overrides
|
25
|
+
)
|
15
26
|
Utils.wrap_df(rbdf)
|
16
27
|
end
|
17
28
|
end
|
data/lib/polars/io/ndjson.rb
CHANGED
@@ -6,12 +6,23 @@ module Polars
|
|
6
6
|
# Path to a file or a file-like object.
|
7
7
|
#
|
8
8
|
# @return [DataFrame]
|
9
|
-
def read_ndjson(
|
9
|
+
def read_ndjson(
|
10
|
+
source,
|
11
|
+
schema: nil,
|
12
|
+
schema_overrides: nil,
|
13
|
+
ignore_errors: false
|
14
|
+
)
|
10
15
|
if Utils.pathlike?(source)
|
11
16
|
source = Utils.normalize_filepath(source)
|
12
17
|
end
|
13
18
|
|
14
|
-
rbdf =
|
19
|
+
rbdf =
|
20
|
+
RbDataFrame.read_ndjson(
|
21
|
+
source,
|
22
|
+
ignore_errors,
|
23
|
+
schema,
|
24
|
+
schema_overrides
|
25
|
+
)
|
15
26
|
Utils.wrap_df(rbdf)
|
16
27
|
end
|
17
28
|
|
@@ -41,7 +52,7 @@ module Polars
|
|
41
52
|
# @return [LazyFrame]
|
42
53
|
def scan_ndjson(
|
43
54
|
source,
|
44
|
-
infer_schema_length:
|
55
|
+
infer_schema_length: N_INFER_DEFAULT,
|
45
56
|
batch_size: 1024,
|
46
57
|
n_rows: nil,
|
47
58
|
low_memory: false,
|
@@ -61,7 +72,7 @@ module Polars
|
|
61
72
|
n_rows,
|
62
73
|
low_memory,
|
63
74
|
rechunk,
|
64
|
-
Utils.
|
75
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset)
|
65
76
|
)
|
66
77
|
Utils.wrap_ldf(rblf)
|
67
78
|
end
|
data/lib/polars/io/parquet.rb
CHANGED
@@ -110,7 +110,7 @@ module Polars
|
|
110
110
|
projection,
|
111
111
|
n_rows,
|
112
112
|
parallel,
|
113
|
-
Utils.
|
113
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
114
114
|
low_memory,
|
115
115
|
use_statistics,
|
116
116
|
rechunk
|
@@ -178,7 +178,7 @@ module Polars
|
|
178
178
|
|
179
179
|
_scan_parquet_impl(
|
180
180
|
source,
|
181
|
-
n_rows:n_rows,
|
181
|
+
n_rows: n_rows,
|
182
182
|
cache: cache,
|
183
183
|
parallel: parallel,
|
184
184
|
rechunk: rechunk,
|
@@ -202,7 +202,7 @@ module Polars
|
|
202
202
|
storage_options: nil,
|
203
203
|
low_memory: false,
|
204
204
|
use_statistics: true,
|
205
|
-
hive_partitioning:
|
205
|
+
hive_partitioning: nil,
|
206
206
|
glob: true
|
207
207
|
)
|
208
208
|
rblf =
|
@@ -213,11 +213,12 @@ module Polars
|
|
213
213
|
cache,
|
214
214
|
parallel,
|
215
215
|
rechunk,
|
216
|
-
Utils.
|
216
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
217
217
|
low_memory,
|
218
218
|
use_statistics,
|
219
219
|
hive_partitioning,
|
220
220
|
nil,
|
221
|
+
true,
|
221
222
|
glob
|
222
223
|
)
|
223
224
|
Utils.wrap_ldf(rblf)
|