polars-df 0.13.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +208 -0
- data/Cargo.lock +2556 -0
- data/Cargo.toml +6 -0
- data/LICENSE-THIRD-PARTY.txt +39278 -0
- data/LICENSE.txt +20 -0
- data/README.md +437 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/array_expr.rb +537 -0
- data/lib/polars/array_name_space.rb +423 -0
- data/lib/polars/batched_csv_reader.rb +104 -0
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/cat_expr.rb +36 -0
- data/lib/polars/cat_name_space.rb +88 -0
- data/lib/polars/config.rb +530 -0
- data/lib/polars/convert.rb +98 -0
- data/lib/polars/data_frame.rb +5191 -0
- data/lib/polars/data_types.rb +466 -0
- data/lib/polars/date_time_expr.rb +1397 -0
- data/lib/polars/date_time_name_space.rb +1287 -0
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/exceptions.rb +38 -0
- data/lib/polars/expr.rb +7256 -0
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +271 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1329 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +136 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +57 -0
- data/lib/polars/group_by.rb +613 -0
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/io/csv.rb +696 -0
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +275 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +233 -0
- data/lib/polars/lazy_frame.rb +2708 -0
- data/lib/polars/lazy_group_by.rb +181 -0
- data/lib/polars/list_expr.rb +791 -0
- data/lib/polars/list_name_space.rb +449 -0
- data/lib/polars/meta_expr.rb +222 -0
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +4444 -0
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +1495 -0
- data/lib/polars/string_name_space.rb +811 -0
- data/lib/polars/struct_expr.rb +98 -0
- data/lib/polars/struct_name_space.rb +96 -0
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +130 -0
- data/lib/polars/version.rb +4 -0
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +91 -0
- metadata +138 -0
data/lib/polars/slice.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
module Polars
|
2
|
+
# @private
|
3
|
+
class Slice
|
4
|
+
def initialize(obj)
|
5
|
+
@obj = obj
|
6
|
+
end
|
7
|
+
|
8
|
+
# Apply a slice operation, taking advantage of any potential fast paths.
|
9
|
+
def apply(s)
|
10
|
+
# normalize slice
|
11
|
+
_slice_setup(s)
|
12
|
+
|
13
|
+
# check for fast-paths / single-operation calls
|
14
|
+
if @slice_length == 0
|
15
|
+
@obj.cleared
|
16
|
+
elsif @is_unbounded && [-1, 1].include?(@stride)
|
17
|
+
@stride < 0 ? @obj.reverse : @obj.clone
|
18
|
+
elsif @start >= 0 && @stop >= 0 && @stride == 1
|
19
|
+
@obj.slice(@start, @slice_length)
|
20
|
+
elsif @stride < 0 && @slice_length == 1
|
21
|
+
@obj.slice(@stop + 1, 1)
|
22
|
+
else
|
23
|
+
# multi-operation calls; make lazy
|
24
|
+
lazyobj = _lazify(@obj)
|
25
|
+
sliced = @stride > 0 ? _slice_positive(lazyobj) : _slice_negative(lazyobj)
|
26
|
+
_as_original(sliced, @obj)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Return lazy variant back to its original type.
|
33
|
+
def _as_original(lazy, original)
|
34
|
+
frame = lazy.collect
|
35
|
+
original.is_a?(DataFrame) ? frame : frame.to_series
|
36
|
+
end
|
37
|
+
|
38
|
+
# Make lazy to ensure efficient/consistent handling.
|
39
|
+
def _lazify(obj)
|
40
|
+
obj.is_a?(DataFrame) ? obj.lazy : obj.to_frame.lazy
|
41
|
+
end
|
42
|
+
|
43
|
+
# Logic for slices with positive stride.
|
44
|
+
def _slice_positive(obj)
|
45
|
+
# note: at this point stride is guaranteed to be > 1
|
46
|
+
obj.slice(@start, @slice_length).take_every(@stride)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Logic for slices with negative stride.
|
50
|
+
def _slice_negative(obj)
|
51
|
+
stride = @stride.abs
|
52
|
+
lazyslice = obj.slice(@stop + 1, @slice_length).reverse
|
53
|
+
stride > 1 ? lazyslice.take_every(stride) : lazyslice
|
54
|
+
end
|
55
|
+
|
56
|
+
# Normalize slice bounds, identify unbounded and/or zero-length slices.
|
57
|
+
def _slice_setup(s)
|
58
|
+
# can normalize slice indices as we know object size
|
59
|
+
obj_len = @obj.length
|
60
|
+
start = if s.begin
|
61
|
+
if s.begin < 0
|
62
|
+
[s.begin + obj_len, 0].max
|
63
|
+
else
|
64
|
+
s.begin
|
65
|
+
end
|
66
|
+
else
|
67
|
+
0
|
68
|
+
end
|
69
|
+
stop = if s.end
|
70
|
+
if s.end < 0
|
71
|
+
s.end + (s.exclude_end? ? 0 : 1) + obj_len
|
72
|
+
else
|
73
|
+
s.end + (s.exclude_end? ? 0 : 1)
|
74
|
+
end
|
75
|
+
else
|
76
|
+
obj_len
|
77
|
+
end
|
78
|
+
stride = 1
|
79
|
+
|
80
|
+
# check if slice is actually unbounded
|
81
|
+
if stride >= 1
|
82
|
+
@is_unbounded = start <= 0 && stop >= obj_len
|
83
|
+
else
|
84
|
+
@is_unbounded = stop == -1 && start >= obj_len - 1
|
85
|
+
end
|
86
|
+
|
87
|
+
# determine slice length
|
88
|
+
if @obj.is_empty
|
89
|
+
@slice_length = 0
|
90
|
+
elsif @is_unbounded
|
91
|
+
@slice_length = obj_len
|
92
|
+
else
|
93
|
+
@slice_length = if start == stop || (stride > 0 && start > stop) || (stride < 0 && start < stop)
|
94
|
+
0
|
95
|
+
else
|
96
|
+
(stop - start).abs
|
97
|
+
end
|
98
|
+
end
|
99
|
+
@start = start
|
100
|
+
@stop = stop
|
101
|
+
@stride = stride
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
module Polars
|
2
|
+
# Run SQL queries against DataFrame/LazyFrame data.
|
3
|
+
class SQLContext
|
4
|
+
# @private
|
5
|
+
attr_accessor :_ctxt, :_eager_execution
|
6
|
+
|
7
|
+
# Initialize a new `SQLContext`.
|
8
|
+
def initialize(frames = nil, eager_execution: false, **named_frames)
|
9
|
+
self._ctxt = RbSQLContext.new
|
10
|
+
self._eager_execution = eager_execution
|
11
|
+
|
12
|
+
frames = (frames || {}).to_h
|
13
|
+
|
14
|
+
if frames.any? || named_frames.any?
|
15
|
+
register_many(frames, **named_frames)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse the given SQL query and execute it against the registered frame data.
|
20
|
+
#
|
21
|
+
# @param query [String]
|
22
|
+
# A valid string SQL query.
|
23
|
+
# @param eager [Boolean]
|
24
|
+
# Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
25
|
+
# If unset, the value of the init-time parameter "eager_execution" will be
|
26
|
+
# used. (Note that the query itself is always executed in lazy-mode; this
|
27
|
+
# parameter only impacts the type of the returned frame).
|
28
|
+
#
|
29
|
+
# @return [Object]
|
30
|
+
#
|
31
|
+
# @example Execute a SQL query against the registered frame data:
|
32
|
+
# df = Polars::DataFrame.new(
|
33
|
+
# [
|
34
|
+
# ["The Godfather", 1972, 6_000_000, 134_821_952, 9.2],
|
35
|
+
# ["The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0],
|
36
|
+
# ["Schindler's List", 1993, 22_000_000, 96_067_179, 8.9],
|
37
|
+
# ["Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9],
|
38
|
+
# ["The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3],
|
39
|
+
# ],
|
40
|
+
# schema: ["title", "release_year", "budget", "gross", "imdb_score"]
|
41
|
+
# )
|
42
|
+
# ctx = Polars::SQLContext.new(films: df)
|
43
|
+
# ctx.execute(
|
44
|
+
# "
|
45
|
+
# SELECT title, release_year, imdb_score
|
46
|
+
# FROM films
|
47
|
+
# WHERE release_year > 1990
|
48
|
+
# ORDER BY imdb_score DESC
|
49
|
+
# ",
|
50
|
+
# eager: true
|
51
|
+
# )
|
52
|
+
# # =>
|
53
|
+
# # shape: (4, 3)
|
54
|
+
# # ┌──────────────────────────┬──────────────┬────────────┐
|
55
|
+
# # │ title ┆ release_year ┆ imdb_score │
|
56
|
+
# # │ --- ┆ --- ┆ --- │
|
57
|
+
# # │ str ┆ i64 ┆ f64 │
|
58
|
+
# # ╞══════════════════════════╪══════════════╪════════════╡
|
59
|
+
# # │ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
60
|
+
# # │ The Dark Knight ┆ 2008 ┆ 9.0 │
|
61
|
+
# # │ Schindler's List ┆ 1993 ┆ 8.9 │
|
62
|
+
# # │ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
63
|
+
# # └──────────────────────────┴──────────────┴────────────┘
|
64
|
+
#
|
65
|
+
# @example Execute a GROUP BY query:
|
66
|
+
# ctx.execute(
|
67
|
+
# "
|
68
|
+
# SELECT
|
69
|
+
# MAX(release_year / 10) * 10 AS decade,
|
70
|
+
# SUM(gross) AS total_gross,
|
71
|
+
# COUNT(title) AS n_films,
|
72
|
+
# FROM films
|
73
|
+
# GROUP BY (release_year / 10) -- decade
|
74
|
+
# ORDER BY total_gross DESC
|
75
|
+
# ",
|
76
|
+
# eager: true
|
77
|
+
# )
|
78
|
+
# # =>
|
79
|
+
# # shape: (3, 3)
|
80
|
+
# # ┌────────┬─────────────┬─────────┐
|
81
|
+
# # │ decade ┆ total_gross ┆ n_films │
|
82
|
+
# # │ --- ┆ --- ┆ --- │
|
83
|
+
# # │ i64 ┆ i64 ┆ u32 │
|
84
|
+
# # ╞════════╪═════════════╪═════════╡
|
85
|
+
# # │ 2000 ┆ 533316061 ┆ 1 │
|
86
|
+
# # │ 1990 ┆ 232338648 ┆ 3 │
|
87
|
+
# # │ 1970 ┆ 134821952 ┆ 1 │
|
88
|
+
# # └────────┴─────────────┴─────────┘
|
89
|
+
def execute(query, eager: nil)
|
90
|
+
res = Utils.wrap_ldf(_ctxt.execute(query))
|
91
|
+
eager || _eager_execution ? res.collect : res
|
92
|
+
end
|
93
|
+
|
94
|
+
# Register a single frame as a table, using the given name.
|
95
|
+
#
|
96
|
+
# @param name [String]
|
97
|
+
# Name of the table.
|
98
|
+
# @param frame [Object]
|
99
|
+
# eager/lazy frame to associate with this table name.
|
100
|
+
#
|
101
|
+
# @return [SQLContext]
|
102
|
+
#
|
103
|
+
# @example
|
104
|
+
# df = Polars::DataFrame.new({"hello" => ["world"]})
|
105
|
+
# ctx = Polars::SQLContext.new
|
106
|
+
# ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect
|
107
|
+
# # =>
|
108
|
+
# # shape: (1, 1)
|
109
|
+
# # ┌───────┐
|
110
|
+
# # │ hello │
|
111
|
+
# # │ --- │
|
112
|
+
# # │ str │
|
113
|
+
# # ╞═══════╡
|
114
|
+
# # │ world │
|
115
|
+
# # └───────┘
|
116
|
+
def register(name, frame)
|
117
|
+
if frame.is_a?(DataFrame)
|
118
|
+
frame = frame.lazy
|
119
|
+
end
|
120
|
+
_ctxt.register(name.to_s, frame._ldf)
|
121
|
+
self
|
122
|
+
end
|
123
|
+
|
124
|
+
# Register multiple eager/lazy frames as tables, using the associated names.
|
125
|
+
#
|
126
|
+
# @param frames [Hash]
|
127
|
+
# A `{name:frame, ...}` mapping.
|
128
|
+
# @param named_frames [Object]
|
129
|
+
# Named eager/lazy frames, provided as kwargs.
|
130
|
+
#
|
131
|
+
# @return [SQLContext]
|
132
|
+
def register_many(frames, **named_frames)
|
133
|
+
frames = (frames || {}).to_h
|
134
|
+
frames = frames.merge(named_frames)
|
135
|
+
frames.each do |name, frame|
|
136
|
+
register(name, frame)
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
|
141
|
+
# Unregister one or more eager/lazy frames by name.
|
142
|
+
#
|
143
|
+
# @param names [Object]
|
144
|
+
# Names of the tables to unregister.
|
145
|
+
#
|
146
|
+
# @return [SQLContext]
|
147
|
+
#
|
148
|
+
# @example Register with a SQLContext object:
|
149
|
+
# df0 = Polars::DataFrame.new({"ints" => [9, 8, 7, 6, 5]})
|
150
|
+
# lf1 = Polars::LazyFrame.new({"text" => ["a", "b", "c"]})
|
151
|
+
# lf2 = Polars::LazyFrame.new({"misc" => ["testing1234"]})
|
152
|
+
# ctx = Polars::SQLContext.new(test1: df0, test2: lf1, test3: lf2)
|
153
|
+
# ctx.tables
|
154
|
+
# # => ["test1", "test2", "test3"]
|
155
|
+
#
|
156
|
+
# @example Unregister one or more of the tables:
|
157
|
+
# ctx.unregister(["test1", "test3"]).tables
|
158
|
+
# # => ["test2"]
|
159
|
+
def unregister(names)
|
160
|
+
if names.is_a?(::String)
|
161
|
+
names = [names]
|
162
|
+
end
|
163
|
+
names.each do |nm|
|
164
|
+
_ctxt.unregister(nm)
|
165
|
+
end
|
166
|
+
self
|
167
|
+
end
|
168
|
+
|
169
|
+
# Return a list of the registered table names.
|
170
|
+
#
|
171
|
+
# @return [Array]
|
172
|
+
#
|
173
|
+
# @example Executing as SQL:
|
174
|
+
# frame_data = Polars::DataFrame.new({"hello" => ["world"]})
|
175
|
+
# ctx = Polars::SQLContext.new(hello_world: frame_data)
|
176
|
+
# ctx.execute("SHOW TABLES", eager: true)
|
177
|
+
# # =>
|
178
|
+
# # shape: (1, 1)
|
179
|
+
# # ┌─────────────┐
|
180
|
+
# # │ name │
|
181
|
+
# # │ --- │
|
182
|
+
# # │ str │
|
183
|
+
# # ╞═════════════╡
|
184
|
+
# # │ hello_world │
|
185
|
+
# # └─────────────┘
|
186
|
+
#
|
187
|
+
# @example Calling the method:
|
188
|
+
# ctx.tables
|
189
|
+
# # => ["hello_world"]
|
190
|
+
def tables
|
191
|
+
_ctxt.get_tables.sort
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Polars
|
2
|
+
# Context manager for enabling and disabling the global string cache.
|
3
|
+
class StringCache
|
4
|
+
def initialize(&block)
|
5
|
+
RbStringCacheHolder.hold(&block)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
module Functions
|
10
|
+
# Enable the global string cache.
|
11
|
+
#
|
12
|
+
# `Categorical` columns created under the same global string cache have
|
13
|
+
# the same underlying physical value when string values are equal. This allows the
|
14
|
+
# columns to be concatenated or used in a join operation, for example.
|
15
|
+
#
|
16
|
+
# @return [nil]
|
17
|
+
#
|
18
|
+
# @example Construct two Series using the same global string cache.
|
19
|
+
# Polars.enable_string_cache
|
20
|
+
# s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
|
21
|
+
# s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
|
22
|
+
# Polars.disable_string_cache
|
23
|
+
#
|
24
|
+
# @example As both Series are constructed under the same global string cache, they can be concatenated.
|
25
|
+
# Polars.concat([s1, s2])
|
26
|
+
# # =>
|
27
|
+
# # shape: (6,)
|
28
|
+
# # Series: 'color' [cat]
|
29
|
+
# # [
|
30
|
+
# # "red"
|
31
|
+
# # "green"
|
32
|
+
# # "red"
|
33
|
+
# # "blue"
|
34
|
+
# # "red"
|
35
|
+
# # "green"
|
36
|
+
# # ]
|
37
|
+
def enable_string_cache
|
38
|
+
Plr.enable_string_cache
|
39
|
+
end
|
40
|
+
|
41
|
+
# Disable and clear the global string cache.
|
42
|
+
#
|
43
|
+
# @return [nil]
|
44
|
+
#
|
45
|
+
# @example Construct two Series using the same global string cache.
|
46
|
+
# Polars.enable_string_cache
|
47
|
+
# s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
|
48
|
+
# s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
|
49
|
+
# Polars.disable_string_cache
|
50
|
+
#
|
51
|
+
# @example As both Series are constructed under the same global string cache, they can be concatenated.
|
52
|
+
# Polars.concat([s1, s2])
|
53
|
+
# # =>
|
54
|
+
# # shape: (6,)
|
55
|
+
# # Series: 'color' [cat]
|
56
|
+
# # [
|
57
|
+
# # "red"
|
58
|
+
# # "green"
|
59
|
+
# # "red"
|
60
|
+
# # "blue"
|
61
|
+
# # "red"
|
62
|
+
# # "green"
|
63
|
+
# # ]
|
64
|
+
def disable_string_cache
|
65
|
+
Plr.disable_string_cache
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check whether the global string cache is enabled.
|
69
|
+
#
|
70
|
+
# @return [Boolean]
|
71
|
+
def using_string_cache
|
72
|
+
Plr.using_string_cache
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|