polars-df 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +1 -1
- data/README.md +40 -2
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/conversion.rs +16 -4
- data/ext/polars/src/dataframe.rs +20 -0
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lib.rs +5 -0
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/series.rs +45 -33
- data/lib/polars/data_frame.rb +26 -5
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/group_by.rb +11 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +41 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c85781858b193df5bb1bf3156b6ff14cee0d31ba9d70a14e0830c473d1cca589
|
4
|
+
data.tar.gz: b4333da25d4d575f1ef84a39b673244b088b346520b2b7e65d51e899e73e27b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86b867790e1cde10fc813eceb2baf26912700d2db7501f670c5fae5156fcd1ce56b494d178f488d0e84a74220f3b87446df94e1822c44b3c6a869392f805c5ee
|
7
|
+
data.tar.gz: 7afe9f2b39be4045ce4fc95c1619db0fadad80eed5c4d9b25f2434379ee1aae61a87c956ec92e16cfd15bec0c780b5bf25fefe84bb858083874971bfc47860fd
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
|
|
282
282
|
|
283
283
|
## Conversion
|
284
284
|
|
285
|
-
Array of
|
285
|
+
Array of hashes
|
286
286
|
|
287
287
|
```ruby
|
288
|
-
df.rows
|
288
|
+
df.rows(named: true)
|
289
289
|
```
|
290
290
|
|
291
291
|
Hash of series
|
@@ -308,6 +308,12 @@ Parquet
|
|
308
308
|
df.write_parquet("file.parquet")
|
309
309
|
```
|
310
310
|
|
311
|
+
Numo array
|
312
|
+
|
313
|
+
```ruby
|
314
|
+
df.to_numo
|
315
|
+
```
|
316
|
+
|
311
317
|
## Types
|
312
318
|
|
313
319
|
You can specify column types when creating a data frame
|
@@ -343,6 +349,38 @@ Cast a column
|
|
343
349
|
df["a"].cast(Polars::Int32)
|
344
350
|
```
|
345
351
|
|
352
|
+
## Visualization
|
353
|
+
|
354
|
+
Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
|
355
|
+
|
356
|
+
```ruby
|
357
|
+
gem "vega"
|
358
|
+
```
|
359
|
+
|
360
|
+
And use:
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
df.plot("a", "b")
|
364
|
+
```
|
365
|
+
|
366
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
df.plot("a", "b", type: "pie")
|
370
|
+
```
|
371
|
+
|
372
|
+
Group data
|
373
|
+
|
374
|
+
```ruby
|
375
|
+
df.groupby("c").plot("a", "b")
|
376
|
+
```
|
377
|
+
|
378
|
+
Stacked columns or bars
|
379
|
+
|
380
|
+
```ruby
|
381
|
+
df.groupby("c").plot("a", "b", stacked: true)
|
382
|
+
```
|
383
|
+
|
346
384
|
## History
|
347
385
|
|
348
386
|
View the [changelog](CHANGELOG.md)
|
data/ext/polars/Cargo.toml
CHANGED
@@ -125,7 +125,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
125
125
|
.unwrap(),
|
126
126
|
AnyValue::Datetime(v, tu, tz) => {
|
127
127
|
let t = match tu {
|
128
|
-
TimeUnit::Nanoseconds =>
|
128
|
+
TimeUnit::Nanoseconds => {
|
129
|
+
let sec = v / 1000000000;
|
130
|
+
let subsec = v % 1000000000;
|
131
|
+
class::time()
|
132
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
133
|
+
.unwrap()
|
134
|
+
}
|
129
135
|
TimeUnit::Microseconds => {
|
130
136
|
let sec = v / 1000000;
|
131
137
|
let subsec = v % 1000000;
|
@@ -133,7 +139,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
133
139
|
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
134
140
|
.unwrap()
|
135
141
|
}
|
136
|
-
TimeUnit::Milliseconds =>
|
142
|
+
TimeUnit::Milliseconds => {
|
143
|
+
let sec = v / 1000;
|
144
|
+
let subsec = v % 1000;
|
145
|
+
class::time()
|
146
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
147
|
+
.unwrap()
|
148
|
+
}
|
137
149
|
};
|
138
150
|
|
139
151
|
if tz.is_some() {
|
@@ -175,7 +187,7 @@ impl IntoValue for Wrap<DataType> {
|
|
175
187
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
176
188
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
177
189
|
DataType::List(inner) => {
|
178
|
-
let inner = Wrap(*inner
|
190
|
+
let inner = Wrap(*inner);
|
179
191
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
180
192
|
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
181
193
|
}
|
@@ -183,7 +195,7 @@ impl IntoValue for Wrap<DataType> {
|
|
183
195
|
DataType::Datetime(tu, tz) => {
|
184
196
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
185
197
|
datetime_class
|
186
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz
|
198
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
187
199
|
.unwrap()
|
188
200
|
}
|
189
201
|
DataType::Duration(tu) => {
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
|
|
6
6
|
use polars::io::RowCount;
|
7
7
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
8
8
|
use polars::prelude::*;
|
9
|
+
use polars_core::utils::try_get_supertype;
|
9
10
|
use std::cell::RefCell;
|
10
11
|
use std::io::{BufWriter, Cursor};
|
11
12
|
use std::ops::Deref;
|
@@ -493,6 +494,25 @@ impl RbDataFrame {
|
|
493
494
|
.into()
|
494
495
|
}
|
495
496
|
|
497
|
+
pub fn to_numo(&self) -> Option<Value> {
|
498
|
+
let mut st = None;
|
499
|
+
for s in self.df.borrow().iter() {
|
500
|
+
let dt_i = s.dtype();
|
501
|
+
match st {
|
502
|
+
None => st = Some(dt_i.clone()),
|
503
|
+
Some(ref mut st) => {
|
504
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
let st = st?;
|
509
|
+
|
510
|
+
match st {
|
511
|
+
// TODO
|
512
|
+
_ => None,
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
496
516
|
pub fn write_parquet(
|
497
517
|
&self,
|
498
518
|
rb_f: Value,
|
data/ext/polars/src/error.rs
CHANGED
@@ -43,3 +43,11 @@ impl ComputeError {
|
|
43
43
|
Error::new(exception::runtime_error(), message)
|
44
44
|
}
|
45
45
|
}
|
46
|
+
|
47
|
+
#[macro_export]
|
48
|
+
macro_rules! raise_err(
|
49
|
+
($msg:expr, $err:ident) => {{
|
50
|
+
Err(PolarsError::$err($msg.into())).map_err(RbPolarsErr::from)?;
|
51
|
+
unreachable!()
|
52
|
+
}}
|
53
|
+
);
|
data/ext/polars/src/lib.rs
CHANGED
@@ -6,6 +6,7 @@ mod error;
|
|
6
6
|
mod file;
|
7
7
|
mod lazy;
|
8
8
|
mod list_construction;
|
9
|
+
mod numo;
|
9
10
|
mod object;
|
10
11
|
mod prelude;
|
11
12
|
pub(crate) mod rb_modules;
|
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
|
|
87
88
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
88
89
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
89
90
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
91
|
+
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
90
92
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
91
93
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
92
94
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
@@ -783,6 +785,9 @@ fn init() -> RbResult<()> {
|
|
783
785
|
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
784
786
|
// class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
785
787
|
|
788
|
+
// npy
|
789
|
+
class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
|
790
|
+
|
786
791
|
let class = module.define_class("RbWhen", Default::default())?;
|
787
792
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
788
793
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
use magnus::{class, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
|
5
|
+
|
6
|
+
impl RbSeries {
|
7
|
+
/// For numeric types, this should only be called for Series with null types.
|
8
|
+
/// This will cast to floats so that `nil = NAN`
|
9
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
10
|
+
let s = &self.series.borrow();
|
11
|
+
match s.dtype() {
|
12
|
+
DataType::Utf8 => {
|
13
|
+
let ca = s.utf8().unwrap();
|
14
|
+
|
15
|
+
// TODO make more efficient
|
16
|
+
let np_arr = RArray::from_iter(ca.into_iter());
|
17
|
+
class::object()
|
18
|
+
.const_get::<_, RModule>("Numo")?
|
19
|
+
.const_get::<_, RClass>("RObject")?
|
20
|
+
.funcall("cast", (np_arr,))
|
21
|
+
}
|
22
|
+
dt if dt.is_numeric() => {
|
23
|
+
if s.bit_repr_is_large() {
|
24
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
25
|
+
let ca = s.f64().unwrap();
|
26
|
+
// TODO make more efficient
|
27
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
28
|
+
Some(v) => v,
|
29
|
+
None => f64::NAN,
|
30
|
+
}));
|
31
|
+
class::object()
|
32
|
+
.const_get::<_, RModule>("Numo")?
|
33
|
+
.const_get::<_, RClass>("DFloat")?
|
34
|
+
.funcall("cast", (np_arr,))
|
35
|
+
} else {
|
36
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
37
|
+
let ca = s.f32().unwrap();
|
38
|
+
// TODO make more efficient
|
39
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
40
|
+
Some(v) => v,
|
41
|
+
None => f32::NAN,
|
42
|
+
}));
|
43
|
+
class::object()
|
44
|
+
.const_get::<_, RModule>("Numo")?
|
45
|
+
.const_get::<_, RClass>("SFloat")?
|
46
|
+
.funcall("cast", (np_arr,))
|
47
|
+
}
|
48
|
+
}
|
49
|
+
dt => {
|
50
|
+
raise_err!(
|
51
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
52
|
+
ComputeError
|
53
|
+
);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -489,40 +489,52 @@ impl RbSeries {
|
|
489
489
|
}
|
490
490
|
|
491
491
|
pub fn to_a(&self) -> RArray {
|
492
|
-
let series = self.series.borrow();
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
492
|
+
let series = &self.series.borrow();
|
493
|
+
|
494
|
+
fn to_list_recursive(series: &Series) -> RArray {
|
495
|
+
let rblist = match series.dtype() {
|
496
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
|
497
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
|
498
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
|
499
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
|
500
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
|
501
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
|
502
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
|
503
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
|
504
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
|
505
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
|
506
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
|
507
|
+
DataType::Decimal128(_) => todo!(),
|
508
|
+
DataType::Categorical(_) => {
|
509
|
+
RArray::from_iter(series.categorical().unwrap().iter_str())
|
510
|
+
}
|
511
|
+
DataType::Date => {
|
512
|
+
let a = RArray::with_capacity(series.len());
|
513
|
+
for v in series.iter() {
|
514
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
515
|
+
}
|
516
|
+
return a;
|
517
|
+
}
|
518
|
+
DataType::Datetime(_, _) => {
|
519
|
+
let a = RArray::with_capacity(series.len());
|
520
|
+
for v in series.iter() {
|
521
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
522
|
+
}
|
523
|
+
return a;
|
524
|
+
}
|
525
|
+
DataType::Utf8 => {
|
526
|
+
let ca = series.utf8().unwrap();
|
527
|
+
return RArray::from_iter(ca);
|
528
|
+
}
|
529
|
+
DataType::Null | DataType::Unknown => {
|
530
|
+
panic!("to_a not implemented for null/unknown")
|
531
|
+
}
|
532
|
+
_ => todo!(),
|
533
|
+
};
|
534
|
+
rblist
|
525
535
|
}
|
536
|
+
|
537
|
+
to_list_recursive(series)
|
526
538
|
}
|
527
539
|
|
528
540
|
pub fn median(&self) -> Option<f64> {
|
data/lib/polars/data_frame.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module Polars
|
2
2
|
# Two-dimensional data structure representing data as a table with rows and columns.
|
3
3
|
class DataFrame
|
4
|
+
include Plot
|
5
|
+
|
4
6
|
# @private
|
5
7
|
attr_accessor :_df
|
6
8
|
|
@@ -604,10 +606,10 @@ module Polars
|
|
604
606
|
return Slice.new(self).apply(item)
|
605
607
|
end
|
606
608
|
|
607
|
-
if
|
609
|
+
if item.is_a?(Array) && item.all? { |v| Utils.strlike?(v) }
|
608
610
|
# select multiple columns
|
609
611
|
# df[["foo", "bar"]]
|
610
|
-
return _from_rbdf(_df.select(item))
|
612
|
+
return _from_rbdf(_df.select(item.map(&:to_s)))
|
611
613
|
end
|
612
614
|
|
613
615
|
if Utils.is_int_sequence(item)
|
@@ -689,7 +691,8 @@ module Polars
|
|
689
691
|
# @example
|
690
692
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
691
693
|
# df.to_hashes
|
692
|
-
#
|
694
|
+
# # =>
|
695
|
+
# # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
|
693
696
|
def to_hashes
|
694
697
|
rbdf = _df
|
695
698
|
names = columns
|
@@ -699,8 +702,26 @@ module Polars
|
|
699
702
|
end
|
700
703
|
end
|
701
704
|
|
702
|
-
#
|
703
|
-
#
|
705
|
+
# Convert DataFrame to a 2D Numo array.
|
706
|
+
#
|
707
|
+
# This operation clones data.
|
708
|
+
#
|
709
|
+
# @return [Numo::NArray]
|
710
|
+
#
|
711
|
+
# @example
|
712
|
+
# df = Polars::DataFrame.new(
|
713
|
+
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
714
|
+
# )
|
715
|
+
# df.to_numo.class
|
716
|
+
# # => Numo::RObject
|
717
|
+
def to_numo
|
718
|
+
out = _df.to_numo
|
719
|
+
if out.nil?
|
720
|
+
Numo::NArray.vstack(width.times.map { |i| to_series(i).to_numo }).transpose
|
721
|
+
else
|
722
|
+
out
|
723
|
+
end
|
724
|
+
end
|
704
725
|
|
705
726
|
# no to_pandas
|
706
727
|
|
data/lib/polars/data_types.rb
CHANGED
@@ -84,6 +84,8 @@ module Polars
|
|
84
84
|
|
85
85
|
# Calendar date and time type.
|
86
86
|
class Datetime < TemporalType
|
87
|
+
attr_reader :tu
|
88
|
+
|
87
89
|
def initialize(time_unit = "us", time_zone = nil)
|
88
90
|
@tu = time_unit || "us"
|
89
91
|
@time_zone = time_zone
|
@@ -92,6 +94,8 @@ module Polars
|
|
92
94
|
|
93
95
|
# Time duration/delta type.
|
94
96
|
class Duration < TemporalType
|
97
|
+
attr_reader :tu
|
98
|
+
|
95
99
|
def initialize(time_unit = "us")
|
96
100
|
@tu = time_unit
|
97
101
|
end
|
data/lib/polars/group_by.rb
CHANGED
@@ -571,5 +571,16 @@ module Polars
|
|
571
571
|
def agg_list
|
572
572
|
agg(Polars.all.list)
|
573
573
|
end
|
574
|
+
|
575
|
+
# Plot data.
|
576
|
+
#
|
577
|
+
# @return [Vega::LiteChart]
|
578
|
+
def plot(*args, **options)
|
579
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
580
|
+
# same message as Ruby
|
581
|
+
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
|
+
|
583
|
+
Utils.wrap_df(_df).plot(*args, **options, group: by)
|
584
|
+
end
|
574
585
|
end
|
575
586
|
end
|
data/lib/polars/plot.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
module Polars
|
2
|
+
module Plot
|
3
|
+
# Plot data.
|
4
|
+
#
|
5
|
+
# @return [Vega::LiteChart]
|
6
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
7
|
+
require "vega"
|
8
|
+
|
9
|
+
raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
|
10
|
+
x ||= columns[0]
|
11
|
+
y ||= columns[1]
|
12
|
+
type ||= begin
|
13
|
+
if self[x].numeric? && self[y].numeric?
|
14
|
+
"scatter"
|
15
|
+
elsif self[x].utf8? && self[y].numeric?
|
16
|
+
"column"
|
17
|
+
elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
|
18
|
+
"line"
|
19
|
+
else
|
20
|
+
raise "Cannot determine type. Use the type option."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
|
24
|
+
data = df.rows(named: true)
|
25
|
+
|
26
|
+
case type
|
27
|
+
when "line", "area"
|
28
|
+
x_type =
|
29
|
+
if df[x].numeric?
|
30
|
+
"quantitative"
|
31
|
+
elsif df[x].datelike?
|
32
|
+
"temporal"
|
33
|
+
else
|
34
|
+
"nominal"
|
35
|
+
end
|
36
|
+
|
37
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
38
|
+
encoding = {
|
39
|
+
x: {field: x, type: x_type, scale: scale},
|
40
|
+
y: {field: y, type: "quantitative"}
|
41
|
+
}
|
42
|
+
encoding[:color] = {field: group} if group
|
43
|
+
|
44
|
+
Vega.lite
|
45
|
+
.data(data)
|
46
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
47
|
+
.encoding(encoding)
|
48
|
+
.config(axis: {labelFontSize: 12})
|
49
|
+
when "pie"
|
50
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
51
|
+
|
52
|
+
Vega.lite
|
53
|
+
.data(data)
|
54
|
+
.mark(type: "arc", tooltip: true)
|
55
|
+
.encoding(
|
56
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
57
|
+
theta: {field: y, type: "quantitative"}
|
58
|
+
)
|
59
|
+
.view(stroke: nil)
|
60
|
+
when "column"
|
61
|
+
encoding = {
|
62
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
63
|
+
y: {field: y, type: "quantitative"}
|
64
|
+
}
|
65
|
+
if group
|
66
|
+
encoding[:color] = {field: group}
|
67
|
+
encoding[:xOffset] = {field: group} unless stacked
|
68
|
+
end
|
69
|
+
|
70
|
+
Vega.lite
|
71
|
+
.data(data)
|
72
|
+
.mark(type: "bar", tooltip: true)
|
73
|
+
.encoding(encoding)
|
74
|
+
.config(axis: {labelFontSize: 12})
|
75
|
+
when "bar"
|
76
|
+
encoding = {
|
77
|
+
# TODO determine label angle
|
78
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
79
|
+
x: {field: y, type: "quantitative"}
|
80
|
+
}
|
81
|
+
if group
|
82
|
+
encoding[:color] = {field: group}
|
83
|
+
encoding[:yOffset] = {field: group} unless stacked
|
84
|
+
end
|
85
|
+
|
86
|
+
Vega.lite
|
87
|
+
.data(data)
|
88
|
+
.mark(type: "bar", tooltip: true)
|
89
|
+
.encoding(encoding)
|
90
|
+
.config(axis: {labelFontSize: 12})
|
91
|
+
when "scatter"
|
92
|
+
encoding = {
|
93
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
94
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
95
|
+
size: {value: 60}
|
96
|
+
}
|
97
|
+
encoding[:color] = {field: group} if group
|
98
|
+
|
99
|
+
Vega.lite
|
100
|
+
.data(data)
|
101
|
+
.mark(type: "circle", tooltip: true)
|
102
|
+
.encoding(encoding)
|
103
|
+
.config(axis: {labelFontSize: 12})
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Invalid type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -1776,8 +1776,9 @@ module Polars
|
|
1776
1776
|
# s.is_datelike
|
1777
1777
|
# # => true
|
1778
1778
|
def is_datelike
|
1779
|
-
[Date,
|
1779
|
+
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1780
|
end
|
1781
|
+
alias_method :datelike?, :is_datelike
|
1781
1782
|
|
1782
1783
|
# Check if this Series has floating point numbers.
|
1783
1784
|
#
|
@@ -1823,8 +1824,45 @@ module Polars
|
|
1823
1824
|
# def view
|
1824
1825
|
# end
|
1825
1826
|
|
1826
|
-
#
|
1827
|
-
#
|
1827
|
+
# Convert this Series to a Numo array. This operation clones data but is completely safe.
|
1828
|
+
#
|
1829
|
+
# @return [Numo::NArray]
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.to_numo
|
1834
|
+
# # =>
|
1835
|
+
# # Numo::Int64#shape=[3]
|
1836
|
+
# # [1, 2, 3]
|
1837
|
+
def to_numo
|
1838
|
+
if !has_validity
|
1839
|
+
if is_datelike
|
1840
|
+
Numo::RObject.cast(to_a)
|
1841
|
+
elsif is_numeric
|
1842
|
+
# TODO make more efficient
|
1843
|
+
{
|
1844
|
+
UInt8 => Numo::UInt8,
|
1845
|
+
UInt16 => Numo::UInt16,
|
1846
|
+
UInt32 => Numo::UInt32,
|
1847
|
+
UInt64 => Numo::UInt64,
|
1848
|
+
Int8 => Numo::Int8,
|
1849
|
+
Int16 => Numo::Int16,
|
1850
|
+
Int32 => Numo::Int32,
|
1851
|
+
Int64 => Numo::Int64,
|
1852
|
+
Float32 => Numo::SFloat,
|
1853
|
+
Float64 => Numo::DFloat
|
1854
|
+
}.fetch(dtype).cast(to_a)
|
1855
|
+
elsif is_boolean
|
1856
|
+
Numo::Bit.cast(to_a)
|
1857
|
+
else
|
1858
|
+
_s.to_numo
|
1859
|
+
end
|
1860
|
+
elsif is_datelike
|
1861
|
+
Numo::RObject.cast(to_a)
|
1862
|
+
else
|
1863
|
+
_s.to_numo
|
1864
|
+
end
|
1865
|
+
end
|
1828
1866
|
|
1829
1867
|
# Set masked values.
|
1830
1868
|
#
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -15,6 +15,7 @@ require_relative "polars/batched_csv_reader"
|
|
15
15
|
require_relative "polars/cat_expr"
|
16
16
|
require_relative "polars/cat_name_space"
|
17
17
|
require_relative "polars/convert"
|
18
|
+
require_relative "polars/plot"
|
18
19
|
require_relative "polars/data_frame"
|
19
20
|
require_relative "polars/data_types"
|
20
21
|
require_relative "polars/date_time_expr"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- ext/polars/src/lazy/utils.rs
|
56
56
|
- ext/polars/src/lib.rs
|
57
57
|
- ext/polars/src/list_construction.rs
|
58
|
+
- ext/polars/src/numo.rs
|
58
59
|
- ext/polars/src/object.rs
|
59
60
|
- ext/polars/src/prelude.rs
|
60
61
|
- ext/polars/src/rb_modules.rs
|
@@ -84,6 +85,7 @@ files:
|
|
84
85
|
- lib/polars/list_expr.rb
|
85
86
|
- lib/polars/list_name_space.rb
|
86
87
|
- lib/polars/meta_expr.rb
|
88
|
+
- lib/polars/plot.rb
|
87
89
|
- lib/polars/rolling_group_by.rb
|
88
90
|
- lib/polars/series.rb
|
89
91
|
- lib/polars/slice.rb
|