polars-df 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +1 -1
- data/README.md +40 -2
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/conversion.rs +16 -4
- data/ext/polars/src/dataframe.rs +20 -0
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lib.rs +5 -0
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/series.rs +45 -33
- data/lib/polars/data_frame.rb +26 -5
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/group_by.rb +11 -0
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +41 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c85781858b193df5bb1bf3156b6ff14cee0d31ba9d70a14e0830c473d1cca589
|
4
|
+
data.tar.gz: b4333da25d4d575f1ef84a39b673244b088b346520b2b7e65d51e899e73e27b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86b867790e1cde10fc813eceb2baf26912700d2db7501f670c5fae5156fcd1ce56b494d178f488d0e84a74220f3b87446df94e1822c44b3c6a869392f805c5ee
|
7
|
+
data.tar.gz: 7afe9f2b39be4045ce4fc95c1619db0fadad80eed5c4d9b25f2434379ee1aae61a87c956ec92e16cfd15bec0c780b5bf25fefe84bb858083874971bfc47860fd
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/README.md
CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
|
|
282
282
|
|
283
283
|
## Conversion
|
284
284
|
|
285
|
-
Array of
|
285
|
+
Array of hashes
|
286
286
|
|
287
287
|
```ruby
|
288
|
-
df.rows
|
288
|
+
df.rows(named: true)
|
289
289
|
```
|
290
290
|
|
291
291
|
Hash of series
|
@@ -308,6 +308,12 @@ Parquet
|
|
308
308
|
df.write_parquet("file.parquet")
|
309
309
|
```
|
310
310
|
|
311
|
+
Numo array
|
312
|
+
|
313
|
+
```ruby
|
314
|
+
df.to_numo
|
315
|
+
```
|
316
|
+
|
311
317
|
## Types
|
312
318
|
|
313
319
|
You can specify column types when creating a data frame
|
@@ -343,6 +349,38 @@ Cast a column
|
|
343
349
|
df["a"].cast(Polars::Int32)
|
344
350
|
```
|
345
351
|
|
352
|
+
## Visualization
|
353
|
+
|
354
|
+
Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
|
355
|
+
|
356
|
+
```ruby
|
357
|
+
gem "vega"
|
358
|
+
```
|
359
|
+
|
360
|
+
And use:
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
df.plot("a", "b")
|
364
|
+
```
|
365
|
+
|
366
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
df.plot("a", "b", type: "pie")
|
370
|
+
```
|
371
|
+
|
372
|
+
Group data
|
373
|
+
|
374
|
+
```ruby
|
375
|
+
df.groupby("c").plot("a", "b")
|
376
|
+
```
|
377
|
+
|
378
|
+
Stacked columns or bars
|
379
|
+
|
380
|
+
```ruby
|
381
|
+
df.groupby("c").plot("a", "b", stacked: true)
|
382
|
+
```
|
383
|
+
|
346
384
|
## History
|
347
385
|
|
348
386
|
View the [changelog](CHANGELOG.md)
|
data/ext/polars/Cargo.toml
CHANGED
@@ -125,7 +125,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
125
125
|
.unwrap(),
|
126
126
|
AnyValue::Datetime(v, tu, tz) => {
|
127
127
|
let t = match tu {
|
128
|
-
TimeUnit::Nanoseconds =>
|
128
|
+
TimeUnit::Nanoseconds => {
|
129
|
+
let sec = v / 1000000000;
|
130
|
+
let subsec = v % 1000000000;
|
131
|
+
class::time()
|
132
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
133
|
+
.unwrap()
|
134
|
+
}
|
129
135
|
TimeUnit::Microseconds => {
|
130
136
|
let sec = v / 1000000;
|
131
137
|
let subsec = v % 1000000;
|
@@ -133,7 +139,13 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
133
139
|
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
134
140
|
.unwrap()
|
135
141
|
}
|
136
|
-
TimeUnit::Milliseconds =>
|
142
|
+
TimeUnit::Milliseconds => {
|
143
|
+
let sec = v / 1000;
|
144
|
+
let subsec = v % 1000;
|
145
|
+
class::time()
|
146
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
147
|
+
.unwrap()
|
148
|
+
}
|
137
149
|
};
|
138
150
|
|
139
151
|
if tz.is_some() {
|
@@ -175,7 +187,7 @@ impl IntoValue for Wrap<DataType> {
|
|
175
187
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
176
188
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
177
189
|
DataType::List(inner) => {
|
178
|
-
let inner = Wrap(*inner
|
190
|
+
let inner = Wrap(*inner);
|
179
191
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
180
192
|
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
181
193
|
}
|
@@ -183,7 +195,7 @@ impl IntoValue for Wrap<DataType> {
|
|
183
195
|
DataType::Datetime(tu, tz) => {
|
184
196
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
185
197
|
datetime_class
|
186
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz
|
198
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
187
199
|
.unwrap()
|
188
200
|
}
|
189
201
|
DataType::Duration(tu) => {
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
|
|
6
6
|
use polars::io::RowCount;
|
7
7
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
8
8
|
use polars::prelude::*;
|
9
|
+
use polars_core::utils::try_get_supertype;
|
9
10
|
use std::cell::RefCell;
|
10
11
|
use std::io::{BufWriter, Cursor};
|
11
12
|
use std::ops::Deref;
|
@@ -493,6 +494,25 @@ impl RbDataFrame {
|
|
493
494
|
.into()
|
494
495
|
}
|
495
496
|
|
497
|
+
pub fn to_numo(&self) -> Option<Value> {
|
498
|
+
let mut st = None;
|
499
|
+
for s in self.df.borrow().iter() {
|
500
|
+
let dt_i = s.dtype();
|
501
|
+
match st {
|
502
|
+
None => st = Some(dt_i.clone()),
|
503
|
+
Some(ref mut st) => {
|
504
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
let st = st?;
|
509
|
+
|
510
|
+
match st {
|
511
|
+
// TODO
|
512
|
+
_ => None,
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
496
516
|
pub fn write_parquet(
|
497
517
|
&self,
|
498
518
|
rb_f: Value,
|
data/ext/polars/src/error.rs
CHANGED
@@ -43,3 +43,11 @@ impl ComputeError {
|
|
43
43
|
Error::new(exception::runtime_error(), message)
|
44
44
|
}
|
45
45
|
}
|
46
|
+
|
47
|
+
#[macro_export]
|
48
|
+
macro_rules! raise_err(
|
49
|
+
($msg:expr, $err:ident) => {{
|
50
|
+
Err(PolarsError::$err($msg.into())).map_err(RbPolarsErr::from)?;
|
51
|
+
unreachable!()
|
52
|
+
}}
|
53
|
+
);
|
data/ext/polars/src/lib.rs
CHANGED
@@ -6,6 +6,7 @@ mod error;
|
|
6
6
|
mod file;
|
7
7
|
mod lazy;
|
8
8
|
mod list_construction;
|
9
|
+
mod numo;
|
9
10
|
mod object;
|
10
11
|
mod prelude;
|
11
12
|
pub(crate) mod rb_modules;
|
@@ -87,6 +88,7 @@ fn init() -> RbResult<()> {
|
|
87
88
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
88
89
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
89
90
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
91
|
+
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
90
92
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
91
93
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
92
94
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
@@ -783,6 +785,9 @@ fn init() -> RbResult<()> {
|
|
783
785
|
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
784
786
|
// class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
785
787
|
|
788
|
+
// npy
|
789
|
+
class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
|
790
|
+
|
786
791
|
let class = module.define_class("RbWhen", Default::default())?;
|
787
792
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
788
793
|
|
@@ -0,0 +1,57 @@
|
|
1
|
+
use magnus::{class, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars_core::prelude::*;
|
3
|
+
|
4
|
+
use crate::{raise_err, RbPolarsErr, RbResult, RbSeries};
|
5
|
+
|
6
|
+
impl RbSeries {
|
7
|
+
/// For numeric types, this should only be called for Series with null types.
|
8
|
+
/// This will cast to floats so that `nil = NAN`
|
9
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
10
|
+
let s = &self.series.borrow();
|
11
|
+
match s.dtype() {
|
12
|
+
DataType::Utf8 => {
|
13
|
+
let ca = s.utf8().unwrap();
|
14
|
+
|
15
|
+
// TODO make more efficient
|
16
|
+
let np_arr = RArray::from_iter(ca.into_iter());
|
17
|
+
class::object()
|
18
|
+
.const_get::<_, RModule>("Numo")?
|
19
|
+
.const_get::<_, RClass>("RObject")?
|
20
|
+
.funcall("cast", (np_arr,))
|
21
|
+
}
|
22
|
+
dt if dt.is_numeric() => {
|
23
|
+
if s.bit_repr_is_large() {
|
24
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
25
|
+
let ca = s.f64().unwrap();
|
26
|
+
// TODO make more efficient
|
27
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
28
|
+
Some(v) => v,
|
29
|
+
None => f64::NAN,
|
30
|
+
}));
|
31
|
+
class::object()
|
32
|
+
.const_get::<_, RModule>("Numo")?
|
33
|
+
.const_get::<_, RClass>("DFloat")?
|
34
|
+
.funcall("cast", (np_arr,))
|
35
|
+
} else {
|
36
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
37
|
+
let ca = s.f32().unwrap();
|
38
|
+
// TODO make more efficient
|
39
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
40
|
+
Some(v) => v,
|
41
|
+
None => f32::NAN,
|
42
|
+
}));
|
43
|
+
class::object()
|
44
|
+
.const_get::<_, RModule>("Numo")?
|
45
|
+
.const_get::<_, RClass>("SFloat")?
|
46
|
+
.funcall("cast", (np_arr,))
|
47
|
+
}
|
48
|
+
}
|
49
|
+
dt => {
|
50
|
+
raise_err!(
|
51
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
52
|
+
ComputeError
|
53
|
+
);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -489,40 +489,52 @@ impl RbSeries {
|
|
489
489
|
}
|
490
490
|
|
491
491
|
pub fn to_a(&self) -> RArray {
|
492
|
-
let series = self.series.borrow();
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
492
|
+
let series = &self.series.borrow();
|
493
|
+
|
494
|
+
fn to_list_recursive(series: &Series) -> RArray {
|
495
|
+
let rblist = match series.dtype() {
|
496
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()),
|
497
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()),
|
498
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()),
|
499
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()),
|
500
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()),
|
501
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()),
|
502
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()),
|
503
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()),
|
504
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
|
505
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
|
506
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
|
507
|
+
DataType::Decimal128(_) => todo!(),
|
508
|
+
DataType::Categorical(_) => {
|
509
|
+
RArray::from_iter(series.categorical().unwrap().iter_str())
|
510
|
+
}
|
511
|
+
DataType::Date => {
|
512
|
+
let a = RArray::with_capacity(series.len());
|
513
|
+
for v in series.iter() {
|
514
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
515
|
+
}
|
516
|
+
return a;
|
517
|
+
}
|
518
|
+
DataType::Datetime(_, _) => {
|
519
|
+
let a = RArray::with_capacity(series.len());
|
520
|
+
for v in series.iter() {
|
521
|
+
a.push::<Value>(Wrap(v).into_value()).unwrap();
|
522
|
+
}
|
523
|
+
return a;
|
524
|
+
}
|
525
|
+
DataType::Utf8 => {
|
526
|
+
let ca = series.utf8().unwrap();
|
527
|
+
return RArray::from_iter(ca);
|
528
|
+
}
|
529
|
+
DataType::Null | DataType::Unknown => {
|
530
|
+
panic!("to_a not implemented for null/unknown")
|
531
|
+
}
|
532
|
+
_ => todo!(),
|
533
|
+
};
|
534
|
+
rblist
|
525
535
|
}
|
536
|
+
|
537
|
+
to_list_recursive(series)
|
526
538
|
}
|
527
539
|
|
528
540
|
pub fn median(&self) -> Option<f64> {
|
data/lib/polars/data_frame.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
module Polars
|
2
2
|
# Two-dimensional data structure representing data as a table with rows and columns.
|
3
3
|
class DataFrame
|
4
|
+
include Plot
|
5
|
+
|
4
6
|
# @private
|
5
7
|
attr_accessor :_df
|
6
8
|
|
@@ -604,10 +606,10 @@ module Polars
|
|
604
606
|
return Slice.new(self).apply(item)
|
605
607
|
end
|
606
608
|
|
607
|
-
if
|
609
|
+
if item.is_a?(Array) && item.all? { |v| Utils.strlike?(v) }
|
608
610
|
# select multiple columns
|
609
611
|
# df[["foo", "bar"]]
|
610
|
-
return _from_rbdf(_df.select(item))
|
612
|
+
return _from_rbdf(_df.select(item.map(&:to_s)))
|
611
613
|
end
|
612
614
|
|
613
615
|
if Utils.is_int_sequence(item)
|
@@ -689,7 +691,8 @@ module Polars
|
|
689
691
|
# @example
|
690
692
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
|
691
693
|
# df.to_hashes
|
692
|
-
#
|
694
|
+
# # =>
|
695
|
+
# # [{"foo"=>1, "bar"=>4}, {"foo"=>2, "bar"=>5}, {"foo"=>3, "bar"=>6}]
|
693
696
|
def to_hashes
|
694
697
|
rbdf = _df
|
695
698
|
names = columns
|
@@ -699,8 +702,26 @@ module Polars
|
|
699
702
|
end
|
700
703
|
end
|
701
704
|
|
702
|
-
#
|
703
|
-
#
|
705
|
+
# Convert DataFrame to a 2D Numo array.
|
706
|
+
#
|
707
|
+
# This operation clones data.
|
708
|
+
#
|
709
|
+
# @return [Numo::NArray]
|
710
|
+
#
|
711
|
+
# @example
|
712
|
+
# df = Polars::DataFrame.new(
|
713
|
+
# {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
|
714
|
+
# )
|
715
|
+
# df.to_numo.class
|
716
|
+
# # => Numo::RObject
|
717
|
+
def to_numo
|
718
|
+
out = _df.to_numo
|
719
|
+
if out.nil?
|
720
|
+
Numo::NArray.vstack(width.times.map { |i| to_series(i).to_numo }).transpose
|
721
|
+
else
|
722
|
+
out
|
723
|
+
end
|
724
|
+
end
|
704
725
|
|
705
726
|
# no to_pandas
|
706
727
|
|
data/lib/polars/data_types.rb
CHANGED
@@ -84,6 +84,8 @@ module Polars
|
|
84
84
|
|
85
85
|
# Calendar date and time type.
|
86
86
|
class Datetime < TemporalType
|
87
|
+
attr_reader :tu
|
88
|
+
|
87
89
|
def initialize(time_unit = "us", time_zone = nil)
|
88
90
|
@tu = time_unit || "us"
|
89
91
|
@time_zone = time_zone
|
@@ -92,6 +94,8 @@ module Polars
|
|
92
94
|
|
93
95
|
# Time duration/delta type.
|
94
96
|
class Duration < TemporalType
|
97
|
+
attr_reader :tu
|
98
|
+
|
95
99
|
def initialize(time_unit = "us")
|
96
100
|
@tu = time_unit
|
97
101
|
end
|
data/lib/polars/group_by.rb
CHANGED
@@ -571,5 +571,16 @@ module Polars
|
|
571
571
|
def agg_list
|
572
572
|
agg(Polars.all.list)
|
573
573
|
end
|
574
|
+
|
575
|
+
# Plot data.
|
576
|
+
#
|
577
|
+
# @return [Vega::LiteChart]
|
578
|
+
def plot(*args, **options)
|
579
|
+
raise ArgumentError, "Multiple groups not supported" if by.is_a?(Array) && by.size > 1
|
580
|
+
# same message as Ruby
|
581
|
+
raise ArgumentError, "unknown keyword: :group" if options.key?(:group)
|
582
|
+
|
583
|
+
Utils.wrap_df(_df).plot(*args, **options, group: by)
|
584
|
+
end
|
574
585
|
end
|
575
586
|
end
|
data/lib/polars/plot.rb
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
module Polars
|
2
|
+
module Plot
|
3
|
+
# Plot data.
|
4
|
+
#
|
5
|
+
# @return [Vega::LiteChart]
|
6
|
+
def plot(x = nil, y = nil, type: nil, group: nil, stacked: nil)
|
7
|
+
require "vega"
|
8
|
+
|
9
|
+
raise ArgumentError, "Must specify columns" if columns.size != 2 && (!x || !y)
|
10
|
+
x ||= columns[0]
|
11
|
+
y ||= columns[1]
|
12
|
+
type ||= begin
|
13
|
+
if self[x].numeric? && self[y].numeric?
|
14
|
+
"scatter"
|
15
|
+
elsif self[x].utf8? && self[y].numeric?
|
16
|
+
"column"
|
17
|
+
elsif (self[x].dtype == Date || self[x].dtype.is_a?(Datetime)) && self[y].numeric?
|
18
|
+
"line"
|
19
|
+
else
|
20
|
+
raise "Cannot determine type. Use the type option."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
df = self[(group.nil? ? [x, y] : [x, y, group]).map(&:to_s).uniq]
|
24
|
+
data = df.rows(named: true)
|
25
|
+
|
26
|
+
case type
|
27
|
+
when "line", "area"
|
28
|
+
x_type =
|
29
|
+
if df[x].numeric?
|
30
|
+
"quantitative"
|
31
|
+
elsif df[x].datelike?
|
32
|
+
"temporal"
|
33
|
+
else
|
34
|
+
"nominal"
|
35
|
+
end
|
36
|
+
|
37
|
+
scale = x_type == "temporal" ? {type: "utc"} : {}
|
38
|
+
encoding = {
|
39
|
+
x: {field: x, type: x_type, scale: scale},
|
40
|
+
y: {field: y, type: "quantitative"}
|
41
|
+
}
|
42
|
+
encoding[:color] = {field: group} if group
|
43
|
+
|
44
|
+
Vega.lite
|
45
|
+
.data(data)
|
46
|
+
.mark(type: type, tooltip: true, interpolate: "cardinal", point: {size: 60})
|
47
|
+
.encoding(encoding)
|
48
|
+
.config(axis: {labelFontSize: 12})
|
49
|
+
when "pie"
|
50
|
+
raise ArgumentError, "Cannot use group option with pie chart" unless group.nil?
|
51
|
+
|
52
|
+
Vega.lite
|
53
|
+
.data(data)
|
54
|
+
.mark(type: "arc", tooltip: true)
|
55
|
+
.encoding(
|
56
|
+
color: {field: x, type: "nominal", sort: "none", axis: {title: nil}, legend: {labelFontSize: 12}},
|
57
|
+
theta: {field: y, type: "quantitative"}
|
58
|
+
)
|
59
|
+
.view(stroke: nil)
|
60
|
+
when "column"
|
61
|
+
encoding = {
|
62
|
+
x: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
63
|
+
y: {field: y, type: "quantitative"}
|
64
|
+
}
|
65
|
+
if group
|
66
|
+
encoding[:color] = {field: group}
|
67
|
+
encoding[:xOffset] = {field: group} unless stacked
|
68
|
+
end
|
69
|
+
|
70
|
+
Vega.lite
|
71
|
+
.data(data)
|
72
|
+
.mark(type: "bar", tooltip: true)
|
73
|
+
.encoding(encoding)
|
74
|
+
.config(axis: {labelFontSize: 12})
|
75
|
+
when "bar"
|
76
|
+
encoding = {
|
77
|
+
# TODO determine label angle
|
78
|
+
y: {field: x, type: "nominal", sort: "none", axis: {labelAngle: 0}},
|
79
|
+
x: {field: y, type: "quantitative"}
|
80
|
+
}
|
81
|
+
if group
|
82
|
+
encoding[:color] = {field: group}
|
83
|
+
encoding[:yOffset] = {field: group} unless stacked
|
84
|
+
end
|
85
|
+
|
86
|
+
Vega.lite
|
87
|
+
.data(data)
|
88
|
+
.mark(type: "bar", tooltip: true)
|
89
|
+
.encoding(encoding)
|
90
|
+
.config(axis: {labelFontSize: 12})
|
91
|
+
when "scatter"
|
92
|
+
encoding = {
|
93
|
+
x: {field: x, type: "quantitative", scale: {zero: false}},
|
94
|
+
y: {field: y, type: "quantitative", scale: {zero: false}},
|
95
|
+
size: {value: 60}
|
96
|
+
}
|
97
|
+
encoding[:color] = {field: group} if group
|
98
|
+
|
99
|
+
Vega.lite
|
100
|
+
.data(data)
|
101
|
+
.mark(type: "circle", tooltip: true)
|
102
|
+
.encoding(encoding)
|
103
|
+
.config(axis: {labelFontSize: 12})
|
104
|
+
else
|
105
|
+
raise ArgumentError, "Invalid type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -1776,8 +1776,9 @@ module Polars
|
|
1776
1776
|
# s.is_datelike
|
1777
1777
|
# # => true
|
1778
1778
|
def is_datelike
|
1779
|
-
[Date,
|
1779
|
+
[Date, Time].include?(dtype) || dtype.is_a?(Datetime) || dtype.is_a?(Duration)
|
1780
1780
|
end
|
1781
|
+
alias_method :datelike?, :is_datelike
|
1781
1782
|
|
1782
1783
|
# Check if this Series has floating point numbers.
|
1783
1784
|
#
|
@@ -1823,8 +1824,45 @@ module Polars
|
|
1823
1824
|
# def view
|
1824
1825
|
# end
|
1825
1826
|
|
1826
|
-
#
|
1827
|
-
#
|
1827
|
+
# Convert this Series to a Numo array. This operation clones data but is completely safe.
|
1828
|
+
#
|
1829
|
+
# @return [Numo::NArray]
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.to_numo
|
1834
|
+
# # =>
|
1835
|
+
# # Numo::Int64#shape=[3]
|
1836
|
+
# # [1, 2, 3]
|
1837
|
+
def to_numo
|
1838
|
+
if !has_validity
|
1839
|
+
if is_datelike
|
1840
|
+
Numo::RObject.cast(to_a)
|
1841
|
+
elsif is_numeric
|
1842
|
+
# TODO make more efficient
|
1843
|
+
{
|
1844
|
+
UInt8 => Numo::UInt8,
|
1845
|
+
UInt16 => Numo::UInt16,
|
1846
|
+
UInt32 => Numo::UInt32,
|
1847
|
+
UInt64 => Numo::UInt64,
|
1848
|
+
Int8 => Numo::Int8,
|
1849
|
+
Int16 => Numo::Int16,
|
1850
|
+
Int32 => Numo::Int32,
|
1851
|
+
Int64 => Numo::Int64,
|
1852
|
+
Float32 => Numo::SFloat,
|
1853
|
+
Float64 => Numo::DFloat
|
1854
|
+
}.fetch(dtype).cast(to_a)
|
1855
|
+
elsif is_boolean
|
1856
|
+
Numo::Bit.cast(to_a)
|
1857
|
+
else
|
1858
|
+
_s.to_numo
|
1859
|
+
end
|
1860
|
+
elsif is_datelike
|
1861
|
+
Numo::RObject.cast(to_a)
|
1862
|
+
else
|
1863
|
+
_s.to_numo
|
1864
|
+
end
|
1865
|
+
end
|
1828
1866
|
|
1829
1867
|
# Set masked values.
|
1830
1868
|
#
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -15,6 +15,7 @@ require_relative "polars/batched_csv_reader"
|
|
15
15
|
require_relative "polars/cat_expr"
|
16
16
|
require_relative "polars/cat_name_space"
|
17
17
|
require_relative "polars/convert"
|
18
|
+
require_relative "polars/plot"
|
18
19
|
require_relative "polars/data_frame"
|
19
20
|
require_relative "polars/data_types"
|
20
21
|
require_relative "polars/date_time_expr"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-02-
|
11
|
+
date: 2023-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- ext/polars/src/lazy/utils.rs
|
56
56
|
- ext/polars/src/lib.rs
|
57
57
|
- ext/polars/src/list_construction.rs
|
58
|
+
- ext/polars/src/numo.rs
|
58
59
|
- ext/polars/src/object.rs
|
59
60
|
- ext/polars/src/prelude.rs
|
60
61
|
- ext/polars/src/rb_modules.rs
|
@@ -84,6 +85,7 @@ files:
|
|
84
85
|
- lib/polars/list_expr.rb
|
85
86
|
- lib/polars/list_name_space.rb
|
86
87
|
- lib/polars/meta_expr.rb
|
88
|
+
- lib/polars/plot.rb
|
87
89
|
- lib/polars/rolling_group_by.rb
|
88
90
|
- lib/polars/series.rb
|
89
91
|
- lib/polars/slice.rb
|