polars-df 0.10.0-arm64-darwin → 0.12.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/LICENSE-THIRD-PARTY.txt +1127 -867
- data/README.md +6 -6
- data/lib/polars/3.1/polars.bundle +0 -0
- data/lib/polars/3.2/polars.bundle +0 -0
- data/lib/polars/3.3/polars.bundle +0 -0
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +17 -4
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Polars
|
1
|
+
# Ruby Polars
|
2
2
|
|
3
3
|
:fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/polars
|
5
|
+
[![Build Status](https://github.com/ankane/ruby-polars/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -420,16 +420,16 @@ View the [changelog](CHANGELOG.md)
|
|
420
420
|
|
421
421
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
422
422
|
|
423
|
-
- [Report bugs](https://github.com/ankane/polars
|
424
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/polars
|
423
|
+
- [Report bugs](https://github.com/ankane/ruby-polars/issues)
|
424
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/ruby-polars/pulls)
|
425
425
|
- Write, clarify, or fix documentation
|
426
426
|
- Suggest or add new features
|
427
427
|
|
428
428
|
To get started with development:
|
429
429
|
|
430
430
|
```sh
|
431
|
-
git clone https://github.com/ankane/polars
|
432
|
-
cd polars
|
431
|
+
git clone https://github.com/ankane/ruby-polars.git
|
432
|
+
cd ruby-polars
|
433
433
|
bundle install
|
434
434
|
bundle exec rake compile
|
435
435
|
bundle exec rake test
|
Binary file
|
Binary file
|
Binary file
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -39,7 +42,7 @@ module Polars
|
|
39
42
|
if !dtypes.nil?
|
40
43
|
if dtypes.is_a?(Hash)
|
41
44
|
dtype_list = []
|
42
|
-
dtypes.each do|k, v|
|
45
|
+
dtypes.each do |k, v|
|
43
46
|
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
44
47
|
end
|
45
48
|
elsif dtypes.is_a?(::Array)
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
|
-
Utils.
|
81
|
+
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -9,42 +9,6 @@ module Polars
|
|
9
9
|
self._rbexpr = expr._rbexpr
|
10
10
|
end
|
11
11
|
|
12
|
-
# Determine how this categorical series should be sorted.
|
13
|
-
#
|
14
|
-
# @param ordering ["physical", "lexical"]
|
15
|
-
# Ordering type:
|
16
|
-
#
|
17
|
-
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
-
#
|
20
|
-
# @return [Expr]
|
21
|
-
#
|
22
|
-
# @example
|
23
|
-
# df = Polars::DataFrame.new(
|
24
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
-
# ).with_columns(
|
26
|
-
# [
|
27
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
-
# ]
|
29
|
-
# )
|
30
|
-
# df.sort(["cats", "vals"])
|
31
|
-
# # =>
|
32
|
-
# # shape: (5, 2)
|
33
|
-
# # ┌──────┬──────┐
|
34
|
-
# # │ cats ┆ vals │
|
35
|
-
# # │ --- ┆ --- │
|
36
|
-
# # │ cat ┆ i64 │
|
37
|
-
# # ╞══════╪══════╡
|
38
|
-
# # │ a ┆ 2 │
|
39
|
-
# # │ b ┆ 3 │
|
40
|
-
# # │ k ┆ 2 │
|
41
|
-
# # │ z ┆ 1 │
|
42
|
-
# # │ z ┆ 3 │
|
43
|
-
# # └──────┴──────┘
|
44
|
-
def set_ordering(ordering)
|
45
|
-
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
46
|
-
end
|
47
|
-
|
48
12
|
# Get the categories stored in this data type.
|
49
13
|
#
|
50
14
|
# @return [Expr]
|
@@ -10,43 +10,6 @@ module Polars
|
|
10
10
|
self._s = series._s
|
11
11
|
end
|
12
12
|
|
13
|
-
# Determine how this categorical series should be sorted.
|
14
|
-
#
|
15
|
-
# @param ordering ["physical", "lexical"]
|
16
|
-
# Ordering type:
|
17
|
-
#
|
18
|
-
# - 'physical' -> Use the physical representation of the categories to
|
19
|
-
# determine the order (default).
|
20
|
-
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
-
#
|
22
|
-
# @return [Series]
|
23
|
-
#
|
24
|
-
# @example
|
25
|
-
# df = Polars::DataFrame.new(
|
26
|
-
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
-
# ).with_columns(
|
28
|
-
# [
|
29
|
-
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
-
# ]
|
31
|
-
# )
|
32
|
-
# df.sort(["cats", "vals"])
|
33
|
-
# # =>
|
34
|
-
# # shape: (5, 2)
|
35
|
-
# # ┌──────┬──────┐
|
36
|
-
# # │ cats ┆ vals │
|
37
|
-
# # │ --- ┆ --- │
|
38
|
-
# # │ cat ┆ i64 │
|
39
|
-
# # ╞══════╪══════╡
|
40
|
-
# # │ a ┆ 2 │
|
41
|
-
# # │ b ┆ 3 │
|
42
|
-
# # │ k ┆ 2 │
|
43
|
-
# # │ z ┆ 1 │
|
44
|
-
# # │ z ┆ 3 │
|
45
|
-
# # └──────┴──────┘
|
46
|
-
def set_ordering(ordering)
|
47
|
-
super
|
48
|
-
end
|
49
|
-
|
50
13
|
# Get the categories stored in this data type.
|
51
14
|
#
|
52
15
|
# @return [Series]
|
data/lib/polars/convert.rb
CHANGED
@@ -27,7 +27,12 @@ module Polars
|
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
29
|
def from_hash(data, schema: nil, columns: nil)
|
30
|
-
|
30
|
+
Utils.wrap_df(
|
31
|
+
DataFrame.hash_to_rbdf(
|
32
|
+
data,
|
33
|
+
schema: schema || columns
|
34
|
+
)
|
35
|
+
)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|