polars-df 0.3.0-x86_64-linux → 0.4.0-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Cargo.lock +335 -310
- data/Cargo.toml +0 -1
- data/LICENSE-THIRD-PARTY.txt +1937 -3898
- data/README.md +69 -2
- data/lib/polars/3.0/polars.so +0 -0
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/data_frame.rb +89 -43
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/date_time_expr.rb +6 -6
- data/lib/polars/expr.rb +9 -2
- data/lib/polars/group_by.rb +11 -0
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +103 -7
- data/lib/polars/lazy_functions.rb +3 -2
- data/lib/polars/list_expr.rb +2 -2
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +50 -4
- data/lib/polars/string_expr.rb +1 -1
- data/lib/polars/utils.rb +10 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +5 -2
data/lib/polars/plot.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
module Polars
|
2
|
+
module Plot
|
3
|
+
# Plot data.
|
4
|
+
#
|
5
|
+
# @return [Vega::LiteChart]
|
6
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
7
|
+
require "vega"
|
8
|
+
|
9
|
+
raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
|
10
|
+
x ||= columns[0]
|
11
|
+
y ||= columns[1]
|
12
|
+
type ||= begin
|
13
|
+
if self[x].numeric? && self[y].numeric?
|
14
|
+
"scatter"
|
15
|
+
elsif self[x].utf8? && self[y].numeric?
|
16
|
+
"column"
|
17
|
+
elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
|
18
|
+
"line"
|
19
|
+
else
|
20
|
+
raise "Cannot determine type. Use the type option."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
|
24
|
+
data = df.rows(named: true)
|
25
|
+
|
26
|
+
case type
|
27
|
+
when "line", "area"
|
28
|
+
x_type =
|
29
|
+
if df[x].numeric?
|
30
|
+
"quantitative"
|
31
|
+
elsif df[x].datelike?
|
32
|
+
"temporal"
|
33
|
+
else
|
34
|
+
"nominal"
|
35
|
+
end
|
36
|
+
|
37
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
38
|
+
encoding = {
|
39
|
+
x: {field: x, type: x_type, scale: scale},
|
40
|
+
y: {field: y, type: "quantitative"}
|
41
|
+
}
|
42
|
+
encoding[:color] = {field: group} if group
|
43
|
+
|
44
|
+
Vega.lite
|
45
|
+
.data(data)
|
46
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
47
|
+
.encoding(encoding)
|
48
|
+
.config(axis: {labelFontSize: 12})
|
49
|
+
when "pie"
|
50
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
51
|
+
|
52
|
+
Vega.lite
|
53
|
+
.data(data)
|
54
|
+
.mark(type: "arc", tooltip: true)
|
55
|
+
.encoding(
|
56
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
57
|
+
theta: {field: y, type: "quantitative"}
|
58
|
+
)
|
59
|
+
.view(stroke: nil)
|
60
|
+
when "column"
|
61
|
+
encoding = {
|
62
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
63
|
+
y: {field: y, type: "quantitative"}
|
64
|
+
}
|
65
|
+
if group
|
66
|
+
encoding[:color] = {field: group}
|
67
|
+
encoding[:xOffset] = {field: group} unless stacked
|
68
|
+
end
|
69
|
+
|
70
|
+
Vega.lite
|
71
|
+
.data(data)
|
72
|
+
.mark(type: "bar", tooltip: true)
|
73
|
+
.encoding(encoding)
|
74
|
+
.config(axis: {labelFontSize: 12})
|
75
|
+
when "bar"
|
76
|
+
encoding = {
|
77
|
+
# TODO determine label angle
|
78
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
79
|
+
x: {field: y, type: "quantitative"}
|
80
|
+
}
|
81
|
+
if group
|
82
|
+
encoding[:color] = {field: group}
|
83
|
+
encoding[:yOffset] = {field: group} unless stacked
|
84
|
+
end
|
85
|
+
|
86
|
+
Vega.lite
|
87
|
+
.data(data)
|
88
|
+
.mark(type: "bar", tooltip: true)
|
89
|
+
.encoding(encoding)
|
90
|
+
.config(axis: {labelFontSize: 12})
|
91
|
+
when "scatter"
|
92
|
+
encoding = {
|
93
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
94
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
95
|
+
size: {value: 60}
|
96
|
+
}
|
97
|
+
encoding[:color] = {field: group} if group
|
98
|
+
|
99
|
+
Vega.lite
|
100
|
+
.data(data)
|
101
|
+
.mark(type: "circle", tooltip: true)
|
102
|
+
.encoding(encoding)
|
103
|
+
.config(axis: {labelFontSize: 12})
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Invalid type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -1776,8 +1776,9 @@ module Polars
|
|
1776
1776
|
# s.is_datelike
|
1777
1777
|
# # => true
|
1778
1778
|
def is_datelike
|
1779
|
-
[Date,
|
1779
|
+
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1780
|
end
|
1781
|
+
alias_method :datelike?, :is_datelike
|
1781
1782
|
|
1782
1783
|
# Check if this Series has floating point numbers.
|
1783
1784
|
#
|
@@ -1823,8 +1824,45 @@ module Polars
|
|
1823
1824
|
# def view
|
1824
1825
|
# end
|
1825
1826
|
|
1826
|
-
#
|
1827
|
-
#
|
1827
|
+
# Convert this Series to a Numo array. This operation clones data but is completely safe.
|
1828
|
+
#
|
1829
|
+
# @return [Numo::NArray]
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.to_numo
|
1834
|
+
# # =>
|
1835
|
+
# # Numo::Int64#shape=[3]
|
1836
|
+
# # [1, 2, 3]
|
1837
|
+
def to_numo
|
1838
|
+
if !has_validity
|
1839
|
+
if is_datelike
|
1840
|
+
Numo::RObject.cast(to_a)
|
1841
|
+
elsif is_numeric
|
1842
|
+
# TODO make more efficient
|
1843
|
+
{
|
1844
|
+
UInt8 => Numo::UInt8,
|
1845
|
+
UInt16 => Numo::UInt16,
|
1846
|
+
UInt32 => Numo::UInt32,
|
1847
|
+
UInt64 => Numo::UInt64,
|
1848
|
+
Int8 => Numo::Int8,
|
1849
|
+
Int16 => Numo::Int16,
|
1850
|
+
Int32 => Numo::Int32,
|
1851
|
+
Int64 => Numo::Int64,
|
1852
|
+
Float32 => Numo::SFloat,
|
1853
|
+
Float64 => Numo::DFloat
|
1854
|
+
}.fetch(dtype).cast(to_a)
|
1855
|
+
elsif is_boolean
|
1856
|
+
Numo::Bit.cast(to_a)
|
1857
|
+
else
|
1858
|
+
_s.to_numo
|
1859
|
+
end
|
1860
|
+
elsif is_datelike
|
1861
|
+
Numo::RObject.cast(to_a)
|
1862
|
+
else
|
1863
|
+
_s.to_numo
|
1864
|
+
end
|
1865
|
+
end
|
1828
1866
|
|
1829
1867
|
# Set masked values.
|
1830
1868
|
#
|
@@ -3493,6 +3531,13 @@ module Polars
|
|
3493
3531
|
ListNameSpace.new(self)
|
3494
3532
|
end
|
3495
3533
|
|
3534
|
+
# Create an object namespace of all binary related methods.
|
3535
|
+
#
|
3536
|
+
# @return [BinaryNameSpace]
|
3537
|
+
def bin
|
3538
|
+
BinaryNameSpace.new(self)
|
3539
|
+
end
|
3540
|
+
|
3496
3541
|
# Create an object namespace of all categorical related methods.
|
3497
3542
|
#
|
3498
3543
|
# @return [CatNameSpace]
|
@@ -3757,7 +3802,8 @@ module Polars
|
|
3757
3802
|
UInt32 => RbSeries.method(:new_opt_u32),
|
3758
3803
|
UInt64 => RbSeries.method(:new_opt_u64),
|
3759
3804
|
Boolean => RbSeries.method(:new_opt_bool),
|
3760
|
-
Utf8 => RbSeries.method(:new_str)
|
3805
|
+
Utf8 => RbSeries.method(:new_str),
|
3806
|
+
Binary => RbSeries.method(:new_binary)
|
3761
3807
|
}
|
3762
3808
|
|
3763
3809
|
SYM_TYPE_TO_CONSTRUCTOR = {
|
data/lib/polars/string_expr.rb
CHANGED
data/lib/polars/utils.rb
CHANGED
@@ -93,8 +93,12 @@ module Polars
|
|
93
93
|
Polars.lit(value)
|
94
94
|
end
|
95
95
|
|
96
|
-
def self.
|
97
|
-
File.expand_path(path)
|
96
|
+
def self.normalise_filepath(path, check_not_directory: true)
|
97
|
+
path = File.expand_path(path)
|
98
|
+
if check_not_directory && File.exist?(path) && Dir.exist?(path)
|
99
|
+
raise ArgumentError, "Expected a file path; #{path} is a directory"
|
100
|
+
end
|
101
|
+
path
|
98
102
|
end
|
99
103
|
|
100
104
|
# TODO fix
|
@@ -216,5 +220,9 @@ module Polars
|
|
216
220
|
val.is_a?(Array) && _is_iterable_of(val, String)
|
217
221
|
end
|
218
222
|
end
|
223
|
+
|
224
|
+
def self.local_file?(file)
|
225
|
+
Dir.glob(file).any?
|
226
|
+
end
|
219
227
|
end
|
220
228
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -12,9 +12,12 @@ require "stringio"
|
|
12
12
|
# modules
|
13
13
|
require_relative "polars/expr_dispatch"
|
14
14
|
require_relative "polars/batched_csv_reader"
|
15
|
+
require_relative "polars/binary_expr"
|
16
|
+
require_relative "polars/binary_name_space"
|
15
17
|
require_relative "polars/cat_expr"
|
16
18
|
require_relative "polars/cat_name_space"
|
17
19
|
require_relative "polars/convert"
|
20
|
+
require_relative "polars/plot"
|
18
21
|
require_relative "polars/data_frame"
|
19
22
|
require_relative "polars/data_types"
|
20
23
|
require_relative "polars/date_time_expr"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-04-01 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -29,6 +29,8 @@ files:
|
|
29
29
|
- lib/polars/3.1/polars.so
|
30
30
|
- lib/polars/3.2/polars.so
|
31
31
|
- lib/polars/batched_csv_reader.rb
|
32
|
+
- lib/polars/binary_expr.rb
|
33
|
+
- lib/polars/binary_name_space.rb
|
32
34
|
- lib/polars/cat_expr.rb
|
33
35
|
- lib/polars/cat_name_space.rb
|
34
36
|
- lib/polars/convert.rb
|
@@ -49,6 +51,7 @@ files:
|
|
49
51
|
- lib/polars/list_expr.rb
|
50
52
|
- lib/polars/list_name_space.rb
|
51
53
|
- lib/polars/meta_expr.rb
|
54
|
+
- lib/polars/plot.rb
|
52
55
|
- lib/polars/rolling_group_by.rb
|
53
56
|
- lib/polars/series.rb
|
54
57
|
- lib/polars/slice.rb
|