polars-df 0.10.0-x86_64-darwin → 0.12.0-x86_64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1127 -867
- data/README.md +6 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Polars
|
1
|
+
# Ruby Polars
|
2
2
|
|
3
3
|
:fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
|
-
[](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -420,16 +420,16 @@ View the [changelog](CHANGELOG.md)
|
|
420
420
|
|
421
421
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
422
422
|
|
423
|
-
- [Report bugs](https://github.com/ankane/polars
|
424
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/polars
|
423
|
+
- [Report bugs](https://github.com/ankane/ruby-polars/issues)
|
424
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/ruby-polars/pulls)
|
425
425
|
- Write, clarify, or fix documentation
|
426
426
|
- Suggest or add new features
|
427
427
|
|
428
428
|
To get started with development:
|
429
429
|
|
430
430
|
```sh
|
431
|
-
git clone https://github.com/ankane/polars
|
432
|
-
cd polars
|
431
|
+
git clone https://github.com/ankane/ruby-polars.git
|
432
|
+
cd ruby-polars
|
433
433
|
bundle install
|
434
434
|
bundle exec rake compile
|
435
435
|
bundle exec rake test
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -39,7 +42,7 @@ module Polars
|
|
39
42
|
if !dtypes.nil?
|
40
43
|
if dtypes.is_a?(Hash)
|
41
44
|
dtype_list = []
|
42
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
43
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
44
47
|
end
|
45
48
|
elsif dtypes.is_a?(::Array)
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -9,42 +9,6 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
# Determine how this categorical series should be sorted.
|
13
|
-
#
|
14
|
-
# @param ordering ["physical", "lexical"]
|
15
|
-
# Ordering type:
|
16
|
-
#
|
17
|
-
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
-
#
|
20
|
-
# @return [Expr]
|
21
|
-
#
|
22
|
-
# @example
|
23
|
-
# df = Polars::DataFrame.new(
|
24
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
-
# ).with_columns(
|
26
|
-
# [
|
27
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
-
# ]
|
29
|
-
# )
|
30
|
-
# df.sort(["cats", "vals"])
|
31
|
-
# # =>
|
32
|
-
# # shape: (5, 2)
|
33
|
-
# # ┌──────┬──────┐
|
34
|
-
# # │ cats ┆ vals │
|
35
|
-
# # │ --- ┆ --- │
|
36
|
-
# # │ cat ┆ i64 │
|
37
|
-
# # ╞══════╪══════╡
|
38
|
-
# # │ a ┆ 2 │
|
39
|
-
# # │ b ┆ 3 │
|
40
|
-
# # │ k ┆ 2 │
|
41
|
-
# # │ z ┆ 1 │
|
42
|
-
# # │ z ┆ 3 │
|
43
|
-
# # └──────┴──────┘
|
44
|
-
def set_ordering(ordering)
|
45
|
-
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
46
|
-
end
|
47
|
-
|
48
12
|
# Get the categories stored in this data type.
|
49
13
|
#
|
50
14
|
# @return [Expr]
|
@@ -10,43 +10,6 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
-
# Determine how this categorical series should be sorted.
|
14
|
-
#
|
15
|
-
# @param ordering ["physical", "lexical"]
|
16
|
-
# Ordering type:
|
17
|
-
#
|
18
|
-
# - 'physical' -> Use the physical representation of the categories to
|
19
|
-
# determine the order (default).
|
20
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
-
#
|
22
|
-
# @return [Series]
|
23
|
-
#
|
24
|
-
# @example
|
25
|
-
# df = Polars::DataFrame.new(
|
26
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
-
# ).with_columns(
|
28
|
-
# [
|
29
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
-
# ]
|
31
|
-
# )
|
32
|
-
# df.sort(["cats", "vals"])
|
33
|
-
# # =>
|
34
|
-
# # shape: (5, 2)
|
35
|
-
# # ┌──────┬──────┐
|
36
|
-
# # │ cats ┆ vals │
|
37
|
-
# # │ --- ┆ --- │
|
38
|
-
# # │ cat ┆ i64 │
|
39
|
-
# # ╞══════╪══════╡
|
40
|
-
# # │ a ┆ 2 │
|
41
|
-
# # │ b ┆ 3 │
|
42
|
-
# # │ k ┆ 2 │
|
43
|
-
# # │ z ┆ 1 │
|
44
|
-
# # │ z ┆ 3 │
|
45
|
-
# # └──────┴──────┘
|
46
|
-
def set_ordering(ordering)
|
47
|
-
super
|
48
|
-
end
|
49
|
-
|
50
13
|
# Get the categories stored in this data type.
|
51
14
|
#
|
52
15
|
# @return [Series]
|
data/lib/polars/convert.rb
CHANGED
@@ -27,7 +27,12 @@ module Polars
|
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
29
|
def from_hash(data, schema: nil, columns: nil)
|
30
|
-
|
30
|
+
Utils.wrap_df(
|
31
|
+
DataFrame.hash_to_rbdf(
|
32
|
+
data,
|
33
|
+
schema: schema || columns
|
34
|
+
)
|
35
|
+
)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|