polars-df 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
# A rolling grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
+
# groupby context.
|
6
|
+
class RollingGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
period,
|
11
|
+
offset,
|
12
|
+
closed,
|
13
|
+
by
|
14
|
+
)
|
15
|
+
period = Utils._timedelta_to_pl_duration(period)
|
16
|
+
offset = Utils._timedelta_to_pl_duration(offset)
|
17
|
+
|
18
|
+
@df = df
|
19
|
+
@time_column = index_column
|
20
|
+
@period = period
|
21
|
+
@offset = offset
|
22
|
+
@closed = closed
|
23
|
+
@by = by
|
24
|
+
end
|
25
|
+
|
26
|
+
def agg(aggs)
|
27
|
+
@df.lazy
|
28
|
+
.groupby_rolling(
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
|
30
|
+
)
|
31
|
+
.agg(aggs)
|
32
|
+
.collect(no_optimization: true, string_cache: false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -23,9 +23,9 @@ module Polars
|
|
23
23
|
# @example Constructing a Series by specifying name and values positionally:
|
24
24
|
# s = Polars::Series.new("a", [1, 2, 3])
|
25
25
|
#
|
26
|
-
# @example Notice that the dtype is automatically inferred as a polars Int64
|
26
|
+
# @example Notice that the dtype is automatically inferred as a polars `Int64`:
|
27
27
|
# s.dtype
|
28
|
-
# # =>
|
28
|
+
# # => Polars::Int64
|
29
29
|
#
|
30
30
|
# @example Constructing a Series with a specific dtype:
|
31
31
|
# s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
|
@@ -45,6 +45,11 @@ module Polars
|
|
45
45
|
|
46
46
|
name = "" if name.nil?
|
47
47
|
|
48
|
+
# TODO improve
|
49
|
+
if values.is_a?(Range) && values.begin.is_a?(String)
|
50
|
+
values = values.to_a
|
51
|
+
end
|
52
|
+
|
48
53
|
if values.nil?
|
49
54
|
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
50
55
|
elsif values.is_a?(Series)
|
@@ -1668,8 +1673,32 @@ module Polars
|
|
1668
1673
|
super
|
1669
1674
|
end
|
1670
1675
|
|
1671
|
-
#
|
1672
|
-
#
|
1676
|
+
# Cast to physical representation of the logical dtype.
|
1677
|
+
#
|
1678
|
+
# - `:date` -> `:i32`
|
1679
|
+
# - `:datetime` -> `:i64`
|
1680
|
+
# - `:time` -> `:i64`
|
1681
|
+
# - `:duration` -> `:i64`
|
1682
|
+
# - `:cat` -> `:u32`
|
1683
|
+
# - other data types will be left unchanged.
|
1684
|
+
#
|
1685
|
+
# @return [Series]
|
1686
|
+
#
|
1687
|
+
# @example
|
1688
|
+
# s = Polars::Series.new("values", ["a", nil, "x", "a"])
|
1689
|
+
# s.cast(:cat).to_physical
|
1690
|
+
# # =>
|
1691
|
+
# # shape: (4,)
|
1692
|
+
# # Series: 'values' [u32]
|
1693
|
+
# # [
|
1694
|
+
# # 0
|
1695
|
+
# # null
|
1696
|
+
# # 1
|
1697
|
+
# # 0
|
1698
|
+
# # ]
|
1699
|
+
def to_physical
|
1700
|
+
super
|
1701
|
+
end
|
1673
1702
|
|
1674
1703
|
# Convert this Series to a Ruby Array. This operation clones data.
|
1675
1704
|
#
|
@@ -1722,7 +1751,7 @@ module Polars
|
|
1722
1751
|
# s.is_numeric
|
1723
1752
|
# # => true
|
1724
1753
|
def is_numeric
|
1725
|
-
[
|
1754
|
+
[Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
|
1726
1755
|
end
|
1727
1756
|
alias_method :numeric?, :is_numeric
|
1728
1757
|
|
@@ -1735,7 +1764,7 @@ module Polars
|
|
1735
1764
|
# s.is_datelike
|
1736
1765
|
# # => true
|
1737
1766
|
def is_datelike
|
1738
|
-
[
|
1767
|
+
[Date, Datetime, Duration, Time].include?(dtype)
|
1739
1768
|
end
|
1740
1769
|
|
1741
1770
|
# Check if this Series has floating point numbers.
|
@@ -1747,7 +1776,7 @@ module Polars
|
|
1747
1776
|
# s.is_float
|
1748
1777
|
# # => true
|
1749
1778
|
def is_float
|
1750
|
-
[
|
1779
|
+
[Float32, Float64].include?(dtype)
|
1751
1780
|
end
|
1752
1781
|
alias_method :float?, :is_float
|
1753
1782
|
|
@@ -1760,7 +1789,7 @@ module Polars
|
|
1760
1789
|
# s.is_boolean
|
1761
1790
|
# # => true
|
1762
1791
|
def is_boolean
|
1763
|
-
dtype ==
|
1792
|
+
dtype == Boolean
|
1764
1793
|
end
|
1765
1794
|
alias_method :boolean?, :is_boolean
|
1766
1795
|
alias_method :is_bool, :is_boolean
|
@@ -1775,7 +1804,7 @@ module Polars
|
|
1775
1804
|
# s.is_utf8
|
1776
1805
|
# # => true
|
1777
1806
|
def is_utf8
|
1778
|
-
dtype ==
|
1807
|
+
dtype == Utf8
|
1779
1808
|
end
|
1780
1809
|
alias_method :utf8?, :is_utf8
|
1781
1810
|
|
@@ -1785,8 +1814,34 @@ module Polars
|
|
1785
1814
|
# def to_numo
|
1786
1815
|
# end
|
1787
1816
|
|
1788
|
-
#
|
1789
|
-
#
|
1817
|
+
# Set masked values.
|
1818
|
+
#
|
1819
|
+
# @param filter [Series]
|
1820
|
+
# Boolean mask.
|
1821
|
+
# @param value [Object]
|
1822
|
+
# Value with which to replace the masked values.
|
1823
|
+
#
|
1824
|
+
# @return [Series]
|
1825
|
+
#
|
1826
|
+
# @note
|
1827
|
+
# Use of this function is frequently an anti-pattern, as it can
|
1828
|
+
# block optimization (predicate pushdown, etc). Consider using
|
1829
|
+
# `Polars.when(predicate).then(value).otherwise(self)` instead.
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.set(s == 2, 10)
|
1834
|
+
# # =>
|
1835
|
+
# # shape: (3,)
|
1836
|
+
# # Series: 'a' [i64]
|
1837
|
+
# # [
|
1838
|
+
# # 1
|
1839
|
+
# # 10
|
1840
|
+
# # 3
|
1841
|
+
# # ]
|
1842
|
+
def set(filter, value)
|
1843
|
+
Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
|
1844
|
+
end
|
1790
1845
|
|
1791
1846
|
# Set values at the index locations.
|
1792
1847
|
#
|
@@ -2286,8 +2341,41 @@ module Polars
|
|
2286
2341
|
super
|
2287
2342
|
end
|
2288
2343
|
|
2289
|
-
#
|
2290
|
-
#
|
2344
|
+
# Apply a custom/user-defined function (UDF) over elements in this Series and
|
2345
|
+
# return a new Series.
|
2346
|
+
#
|
2347
|
+
# If the function returns another datatype, the return_dtype arg should be set,
|
2348
|
+
# otherwise the method will fail.
|
2349
|
+
#
|
2350
|
+
# @param return_dtype [Symbol]
|
2351
|
+
# Output datatype. If none is given, the same datatype as this Series will be
|
2352
|
+
# used.
|
2353
|
+
# @param skip_nulls [Boolean]
|
2354
|
+
# Nulls will be skipped and not passed to the Ruby function.
|
2355
|
+
# This is faster because Ruby can be skipped and because we call
|
2356
|
+
# more specialized functions.
|
2357
|
+
#
|
2358
|
+
# @return [Series]
|
2359
|
+
#
|
2360
|
+
# @example
|
2361
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2362
|
+
# s.apply { |x| x + 10 }
|
2363
|
+
# # =>
|
2364
|
+
# # shape: (3,)
|
2365
|
+
# # Series: 'a' [i64]
|
2366
|
+
# # [
|
2367
|
+
# # 11
|
2368
|
+
# # 12
|
2369
|
+
# # 13
|
2370
|
+
# # ]
|
2371
|
+
def apply(return_dtype: nil, skip_nulls: true, &func)
|
2372
|
+
if return_dtype.nil?
|
2373
|
+
pl_return_dtype = nil
|
2374
|
+
else
|
2375
|
+
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2376
|
+
end
|
2377
|
+
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2378
|
+
end
|
2291
2379
|
|
2292
2380
|
# Shift the values by a given period.
|
2293
2381
|
#
|
@@ -2953,8 +3041,35 @@ module Polars
|
|
2953
3041
|
end
|
2954
3042
|
end
|
2955
3043
|
|
2956
|
-
#
|
2957
|
-
#
|
3044
|
+
# Hash the Series.
|
3045
|
+
#
|
3046
|
+
# The hash value is of type `:u64`.
|
3047
|
+
#
|
3048
|
+
# @param seed [Integer]
|
3049
|
+
# Random seed parameter. Defaults to 0.
|
3050
|
+
# @param seed_1 [Integer]
|
3051
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3052
|
+
# @param seed_2 [Integer]
|
3053
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3054
|
+
# @param seed_3 [Integer]
|
3055
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3056
|
+
#
|
3057
|
+
# @return [Series]
|
3058
|
+
#
|
3059
|
+
# @example
|
3060
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
3061
|
+
# s._hash(42)
|
3062
|
+
# # =>
|
3063
|
+
# # shape: (3,)
|
3064
|
+
# # Series: 'a' [u64]
|
3065
|
+
# # [
|
3066
|
+
# # 2374023516666777365
|
3067
|
+
# # 10386026231460783898
|
3068
|
+
# # 17796317186427479491
|
3069
|
+
# # ]
|
3070
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3071
|
+
super
|
3072
|
+
end
|
2958
3073
|
|
2959
3074
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2960
3075
|
#
|
@@ -2986,7 +3101,7 @@ module Polars
|
|
2986
3101
|
# # 4
|
2987
3102
|
# # 5
|
2988
3103
|
# # ]
|
2989
|
-
def interpolate
|
3104
|
+
def interpolate(method: "linear")
|
2990
3105
|
super
|
2991
3106
|
end
|
2992
3107
|
|
@@ -3410,10 +3525,10 @@ module Polars
|
|
3410
3525
|
return Utils.wrap_s(_s.send(op, other._s))
|
3411
3526
|
end
|
3412
3527
|
|
3413
|
-
if dtype ==
|
3528
|
+
if dtype == Utf8
|
3414
3529
|
raise Todo
|
3415
3530
|
end
|
3416
|
-
Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
|
3531
|
+
Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
|
3417
3532
|
end
|
3418
3533
|
|
3419
3534
|
def _arithmetic(other, op)
|
@@ -3424,8 +3539,39 @@ module Polars
|
|
3424
3539
|
return Utils.wrap_s(_s.send(op, other._s))
|
3425
3540
|
end
|
3426
3541
|
|
3427
|
-
|
3428
|
-
|
3542
|
+
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3543
|
+
raise Todo
|
3544
|
+
end
|
3545
|
+
if other.is_a?(Float) && !is_float
|
3546
|
+
raise Todo
|
3547
|
+
end
|
3548
|
+
|
3549
|
+
Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
|
3550
|
+
end
|
3551
|
+
|
3552
|
+
DTYPE_TO_FFINAME = {
|
3553
|
+
Int8 => "i8",
|
3554
|
+
Int16 => "i16",
|
3555
|
+
Int32 => "i32",
|
3556
|
+
Int64 => "i64",
|
3557
|
+
UInt8 => "u8",
|
3558
|
+
UInt16 => "u16",
|
3559
|
+
UInt32 => "u32",
|
3560
|
+
UInt64 => "u64",
|
3561
|
+
Float32 => "f32",
|
3562
|
+
Float64 => "f64",
|
3563
|
+
Boolean => "bool",
|
3564
|
+
Utf8 => "str",
|
3565
|
+
List => "list",
|
3566
|
+
Date => "date",
|
3567
|
+
Datetime => "datetime",
|
3568
|
+
Duration => "duration",
|
3569
|
+
Time => "time",
|
3570
|
+
Object => "object",
|
3571
|
+
Categorical => "categorical",
|
3572
|
+
Struct => "struct",
|
3573
|
+
Binary => "binary"
|
3574
|
+
}
|
3429
3575
|
|
3430
3576
|
def series_to_rbseries(name, values)
|
3431
3577
|
# should not be in-place?
|
@@ -3449,9 +3595,9 @@ module Polars
|
|
3449
3595
|
end
|
3450
3596
|
|
3451
3597
|
rb_temporal_types = []
|
3452
|
-
rb_temporal_types << Date if defined?(Date)
|
3453
|
-
rb_temporal_types << DateTime if defined?(DateTime)
|
3454
|
-
rb_temporal_types << Time if defined?(Time)
|
3598
|
+
rb_temporal_types << ::Date if defined?(::Date)
|
3599
|
+
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3600
|
+
rb_temporal_types << ::Time if defined?(::Time)
|
3455
3601
|
|
3456
3602
|
value = _get_first_non_none(values)
|
3457
3603
|
|
@@ -3477,11 +3623,11 @@ module Polars
|
|
3477
3623
|
# dtype = rb_type_to_dtype(dtype)
|
3478
3624
|
# end
|
3479
3625
|
|
3480
|
-
if ruby_dtype == Date
|
3626
|
+
if ruby_dtype == ::Date
|
3481
3627
|
RbSeries.new_opt_date(name, values, strict)
|
3482
|
-
elsif ruby_dtype == Time
|
3628
|
+
elsif ruby_dtype == ::Time
|
3483
3629
|
RbSeries.new_opt_datetime(name, values, strict)
|
3484
|
-
elsif ruby_dtype == DateTime
|
3630
|
+
elsif ruby_dtype == ::DateTime
|
3485
3631
|
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3486
3632
|
else
|
3487
3633
|
raise Todo
|
@@ -3524,6 +3670,21 @@ module Polars
|
|
3524
3670
|
end
|
3525
3671
|
|
3526
3672
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3673
|
+
Float32 => RbSeries.method(:new_opt_f32),
|
3674
|
+
Float64 => RbSeries.method(:new_opt_f64),
|
3675
|
+
Int8 => RbSeries.method(:new_opt_i8),
|
3676
|
+
Int16 => RbSeries.method(:new_opt_i16),
|
3677
|
+
Int32 => RbSeries.method(:new_opt_i32),
|
3678
|
+
Int64 => RbSeries.method(:new_opt_i64),
|
3679
|
+
UInt8 => RbSeries.method(:new_opt_u8),
|
3680
|
+
UInt16 => RbSeries.method(:new_opt_u16),
|
3681
|
+
UInt32 => RbSeries.method(:new_opt_u32),
|
3682
|
+
UInt64 => RbSeries.method(:new_opt_u64),
|
3683
|
+
Boolean => RbSeries.method(:new_opt_bool),
|
3684
|
+
Utf8 => RbSeries.method(:new_str)
|
3685
|
+
}
|
3686
|
+
|
3687
|
+
SYM_TYPE_TO_CONSTRUCTOR = {
|
3527
3688
|
f32: RbSeries.method(:new_opt_f32),
|
3528
3689
|
f64: RbSeries.method(:new_opt_f64),
|
3529
3690
|
i8: RbSeries.method(:new_opt_i8),
|
@@ -3539,7 +3700,11 @@ module Polars
|
|
3539
3700
|
}
|
3540
3701
|
|
3541
3702
|
def polars_type_to_constructor(dtype)
|
3542
|
-
|
3703
|
+
if dtype.is_a?(Class) && dtype < DataType
|
3704
|
+
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
3705
|
+
else
|
3706
|
+
SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
3707
|
+
end
|
3543
3708
|
rescue KeyError
|
3544
3709
|
raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
|
3545
3710
|
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -64,24 +64,24 @@ module Polars
|
|
64
64
|
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
65
65
|
# # │ 2001-07-08 │
|
66
66
|
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true)
|
67
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
|
68
68
|
if !Utils.is_polars_dtype(datatype)
|
69
69
|
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
70
|
end
|
71
71
|
|
72
72
|
if datatype == :date
|
73
|
-
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
|
73
|
+
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
74
|
elsif datatype == :datetime
|
75
75
|
# TODO fix
|
76
76
|
tu = nil # datatype.tu
|
77
|
-
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
|
77
|
+
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware))
|
78
78
|
if tu.nil?
|
79
79
|
dtcol
|
80
80
|
else
|
81
81
|
dtcol.dt.cast_time_unit(tu)
|
82
82
|
end
|
83
83
|
elsif datatype == :time
|
84
|
-
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
|
84
|
+
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
85
|
else
|
86
86
|
raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
|
87
87
|
end
|
@@ -725,7 +725,8 @@ module Polars
|
|
725
725
|
# # │ ["678", "910"] │
|
726
726
|
# # └────────────────┘
|
727
727
|
def extract_all(pattern)
|
728
|
-
Utils.
|
728
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
729
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
|
729
730
|
end
|
730
731
|
|
731
732
|
# Count all successive non-overlapping regex matches.
|
data/lib/polars/utils.rb
CHANGED
@@ -11,6 +11,10 @@ module Polars
|
|
11
11
|
DataFrame._from_rbdf(df)
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.wrap_ldf(ldf)
|
15
|
+
LazyFrame._from_rbldf(ldf)
|
16
|
+
end
|
17
|
+
|
14
18
|
def self.wrap_expr(rbexpr)
|
15
19
|
Expr._from_rbexpr(rbexpr)
|
16
20
|
end
|
@@ -38,18 +42,18 @@ module Polars
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
41
|
-
if dtype == :date
|
45
|
+
if dtype == :date || dtype == Date
|
42
46
|
# days to seconds
|
43
47
|
# important to create from utc. Not doing this leads
|
44
48
|
# to inconsistencies dependent on the timezone you are in.
|
45
|
-
Time.at(value * 86400).utc.to_date
|
49
|
+
::Time.at(value * 86400).utc.to_date
|
46
50
|
# TODO fix dtype
|
47
|
-
elsif dtype.to_s.start_with?("datetime[")
|
51
|
+
elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
|
48
52
|
if tz.nil? || tz == ""
|
49
53
|
if tu == "ns"
|
50
54
|
raise Todo
|
51
55
|
elsif tu == "us"
|
52
|
-
dt = Time.at(value / 1000000, value % 1000000, :usec).utc
|
56
|
+
dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
53
57
|
elsif tu == "ms"
|
54
58
|
raise Todo
|
55
59
|
else
|
@@ -95,7 +99,7 @@ module Polars
|
|
95
99
|
|
96
100
|
# TODO fix
|
97
101
|
def self.is_polars_dtype(data_type)
|
98
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
102
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
99
103
|
end
|
100
104
|
|
101
105
|
RB_TYPE_TO_DTYPE = {
|
@@ -104,14 +108,15 @@ module Polars
|
|
104
108
|
String => :str,
|
105
109
|
TrueClass => :bool,
|
106
110
|
FalseClass => :bool,
|
107
|
-
Date => :date,
|
108
|
-
DateTime => :datetime
|
111
|
+
::Date => :date,
|
112
|
+
::DateTime => :datetime
|
109
113
|
}
|
110
114
|
|
111
115
|
# TODO fix
|
112
116
|
def self.rb_type_to_dtype(data_type)
|
113
117
|
if is_polars_dtype(data_type)
|
114
|
-
|
118
|
+
data_type = data_type.to_s if data_type.is_a?(Symbol)
|
119
|
+
return data_type
|
115
120
|
end
|
116
121
|
|
117
122
|
begin
|
@@ -171,5 +176,17 @@ module Polars
|
|
171
176
|
def self.bool?(value)
|
172
177
|
value == true || value == false
|
173
178
|
end
|
179
|
+
|
180
|
+
def self._is_iterable_of(val, eltype)
|
181
|
+
val.all? { |x| x.is_a?(eltype) }
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.is_str_sequence(val, allow_str: false)
|
185
|
+
if allow_str == false && val.is_a?(String)
|
186
|
+
false
|
187
|
+
else
|
188
|
+
val.is_a?(Array) && _is_iterable_of(val, String)
|
189
|
+
end
|
190
|
+
end
|
174
191
|
end
|
175
192
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,40 +1,49 @@
|
|
1
1
|
# ext
|
2
|
-
|
2
|
+
begin
|
3
|
+
require_relative "polars/#{RUBY_VERSION.to_f}/polars"
|
4
|
+
rescue LoadError
|
5
|
+
require_relative "polars/polars"
|
6
|
+
end
|
3
7
|
|
4
8
|
# stdlib
|
5
9
|
require "date"
|
6
10
|
|
7
11
|
# modules
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
require_relative "polars/expr_dispatch"
|
13
|
+
require_relative "polars/batched_csv_reader"
|
14
|
+
require_relative "polars/cat_expr"
|
15
|
+
require_relative "polars/cat_name_space"
|
16
|
+
require_relative "polars/convert"
|
17
|
+
require_relative "polars/data_frame"
|
18
|
+
require_relative "polars/data_types"
|
19
|
+
require_relative "polars/date_time_expr"
|
20
|
+
require_relative "polars/date_time_name_space"
|
21
|
+
require_relative "polars/dynamic_group_by"
|
22
|
+
require_relative "polars/exceptions"
|
23
|
+
require_relative "polars/expr"
|
24
|
+
require_relative "polars/functions"
|
25
|
+
require_relative "polars/group_by"
|
26
|
+
require_relative "polars/io"
|
27
|
+
require_relative "polars/lazy_frame"
|
28
|
+
require_relative "polars/lazy_functions"
|
29
|
+
require_relative "polars/lazy_group_by"
|
30
|
+
require_relative "polars/list_expr"
|
31
|
+
require_relative "polars/list_name_space"
|
32
|
+
require_relative "polars/meta_expr"
|
33
|
+
require_relative "polars/rolling_group_by"
|
34
|
+
require_relative "polars/series"
|
35
|
+
require_relative "polars/slice"
|
36
|
+
require_relative "polars/string_expr"
|
37
|
+
require_relative "polars/string_name_space"
|
38
|
+
require_relative "polars/struct_expr"
|
39
|
+
require_relative "polars/struct_name_space"
|
40
|
+
require_relative "polars/utils"
|
41
|
+
require_relative "polars/version"
|
42
|
+
require_relative "polars/when"
|
43
|
+
require_relative "polars/when_then"
|
36
44
|
|
37
45
|
module Polars
|
46
|
+
extend Convert
|
38
47
|
extend Functions
|
39
48
|
extend IO
|
40
49
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,6 +39,9 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- ext/polars/Cargo.toml
|
41
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/apply/dataframe.rs
|
43
|
+
- ext/polars/src/apply/mod.rs
|
44
|
+
- ext/polars/src/apply/series.rs
|
42
45
|
- ext/polars/src/batched_csv.rs
|
43
46
|
- ext/polars/src/conversion.rs
|
44
47
|
- ext/polars/src/dataframe.rs
|
@@ -60,9 +63,12 @@ files:
|
|
60
63
|
- lib/polars/batched_csv_reader.rb
|
61
64
|
- lib/polars/cat_expr.rb
|
62
65
|
- lib/polars/cat_name_space.rb
|
66
|
+
- lib/polars/convert.rb
|
63
67
|
- lib/polars/data_frame.rb
|
68
|
+
- lib/polars/data_types.rb
|
64
69
|
- lib/polars/date_time_expr.rb
|
65
70
|
- lib/polars/date_time_name_space.rb
|
71
|
+
- lib/polars/dynamic_group_by.rb
|
66
72
|
- lib/polars/exceptions.rb
|
67
73
|
- lib/polars/expr.rb
|
68
74
|
- lib/polars/expr_dispatch.rb
|
@@ -75,6 +81,7 @@ files:
|
|
75
81
|
- lib/polars/list_expr.rb
|
76
82
|
- lib/polars/list_name_space.rb
|
77
83
|
- lib/polars/meta_expr.rb
|
84
|
+
- lib/polars/rolling_group_by.rb
|
78
85
|
- lib/polars/series.rb
|
79
86
|
- lib/polars/slice.rb
|
80
87
|
- lib/polars/string_expr.rb
|
@@ -97,14 +104,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
104
|
requirements:
|
98
105
|
- - ">="
|
99
106
|
- !ruby/object:Gem::Version
|
100
|
-
version: '
|
107
|
+
version: '3.0'
|
101
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
109
|
requirements:
|
103
110
|
- - ">="
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
114
|
+
rubygems_version: 3.4.1
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Blazingly fast DataFrames for Ruby
|