polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -0,0 +1,35 @@
|
|
1
|
+
module Polars
|
2
|
+
# A rolling grouper.
|
3
|
+
#
|
4
|
+
# This has an `.agg` method which will allow you to run all polars expressions in a
|
5
|
+
# groupby context.
|
6
|
+
class RollingGroupBy
|
7
|
+
def initialize(
|
8
|
+
df,
|
9
|
+
index_column,
|
10
|
+
period,
|
11
|
+
offset,
|
12
|
+
closed,
|
13
|
+
by
|
14
|
+
)
|
15
|
+
period = Utils._timedelta_to_pl_duration(period)
|
16
|
+
offset = Utils._timedelta_to_pl_duration(offset)
|
17
|
+
|
18
|
+
@df = df
|
19
|
+
@time_column = index_column
|
20
|
+
@period = period
|
21
|
+
@offset = offset
|
22
|
+
@closed = closed
|
23
|
+
@by = by
|
24
|
+
end
|
25
|
+
|
26
|
+
def agg(aggs)
|
27
|
+
@df.lazy
|
28
|
+
.groupby_rolling(
|
29
|
+
index_column: @time_column, period: @period, offset: @offset, closed: @closed, by: @by
|
30
|
+
)
|
31
|
+
.agg(aggs)
|
32
|
+
.collect(no_optimization: true, string_cache: false)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/polars/series.rb
CHANGED
@@ -23,9 +23,9 @@ module Polars
|
|
23
23
|
# @example Constructing a Series by specifying name and values positionally:
|
24
24
|
# s = Polars::Series.new("a", [1, 2, 3])
|
25
25
|
#
|
26
|
-
# @example Notice that the dtype is automatically inferred as a polars Int64
|
26
|
+
# @example Notice that the dtype is automatically inferred as a polars `Int64`:
|
27
27
|
# s.dtype
|
28
|
-
# # =>
|
28
|
+
# # => Polars::Int64
|
29
29
|
#
|
30
30
|
# @example Constructing a Series with a specific dtype:
|
31
31
|
# s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
|
@@ -45,6 +45,11 @@ module Polars
|
|
45
45
|
|
46
46
|
name = "" if name.nil?
|
47
47
|
|
48
|
+
# TODO improve
|
49
|
+
if values.is_a?(Range) && values.begin.is_a?(String)
|
50
|
+
values = values.to_a
|
51
|
+
end
|
52
|
+
|
48
53
|
if values.nil?
|
49
54
|
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
|
50
55
|
elsif values.is_a?(Series)
|
@@ -1668,8 +1673,32 @@ module Polars
|
|
1668
1673
|
super
|
1669
1674
|
end
|
1670
1675
|
|
1671
|
-
#
|
1672
|
-
#
|
1676
|
+
# Cast to physical representation of the logical dtype.
|
1677
|
+
#
|
1678
|
+
# - `:date` -> `:i32`
|
1679
|
+
# - `:datetime` -> `:i64`
|
1680
|
+
# - `:time` -> `:i64`
|
1681
|
+
# - `:duration` -> `:i64`
|
1682
|
+
# - `:cat` -> `:u32`
|
1683
|
+
# - other data types will be left unchanged.
|
1684
|
+
#
|
1685
|
+
# @return [Series]
|
1686
|
+
#
|
1687
|
+
# @example
|
1688
|
+
# s = Polars::Series.new("values", ["a", nil, "x", "a"])
|
1689
|
+
# s.cast(:cat).to_physical
|
1690
|
+
# # =>
|
1691
|
+
# # shape: (4,)
|
1692
|
+
# # Series: 'values' [u32]
|
1693
|
+
# # [
|
1694
|
+
# # 0
|
1695
|
+
# # null
|
1696
|
+
# # 1
|
1697
|
+
# # 0
|
1698
|
+
# # ]
|
1699
|
+
def to_physical
|
1700
|
+
super
|
1701
|
+
end
|
1673
1702
|
|
1674
1703
|
# Convert this Series to a Ruby Array. This operation clones data.
|
1675
1704
|
#
|
@@ -1722,7 +1751,7 @@ module Polars
|
|
1722
1751
|
# s.is_numeric
|
1723
1752
|
# # => true
|
1724
1753
|
def is_numeric
|
1725
|
-
[
|
1754
|
+
[Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
|
1726
1755
|
end
|
1727
1756
|
alias_method :numeric?, :is_numeric
|
1728
1757
|
|
@@ -1735,7 +1764,7 @@ module Polars
|
|
1735
1764
|
# s.is_datelike
|
1736
1765
|
# # => true
|
1737
1766
|
def is_datelike
|
1738
|
-
[
|
1767
|
+
[Date, Datetime, Duration, Time].include?(dtype)
|
1739
1768
|
end
|
1740
1769
|
|
1741
1770
|
# Check if this Series has floating point numbers.
|
@@ -1747,7 +1776,7 @@ module Polars
|
|
1747
1776
|
# s.is_float
|
1748
1777
|
# # => true
|
1749
1778
|
def is_float
|
1750
|
-
[
|
1779
|
+
[Float32, Float64].include?(dtype)
|
1751
1780
|
end
|
1752
1781
|
alias_method :float?, :is_float
|
1753
1782
|
|
@@ -1760,7 +1789,7 @@ module Polars
|
|
1760
1789
|
# s.is_boolean
|
1761
1790
|
# # => true
|
1762
1791
|
def is_boolean
|
1763
|
-
dtype ==
|
1792
|
+
dtype == Boolean
|
1764
1793
|
end
|
1765
1794
|
alias_method :boolean?, :is_boolean
|
1766
1795
|
alias_method :is_bool, :is_boolean
|
@@ -1775,7 +1804,7 @@ module Polars
|
|
1775
1804
|
# s.is_utf8
|
1776
1805
|
# # => true
|
1777
1806
|
def is_utf8
|
1778
|
-
dtype ==
|
1807
|
+
dtype == Utf8
|
1779
1808
|
end
|
1780
1809
|
alias_method :utf8?, :is_utf8
|
1781
1810
|
|
@@ -1785,8 +1814,34 @@ module Polars
|
|
1785
1814
|
# def to_numo
|
1786
1815
|
# end
|
1787
1816
|
|
1788
|
-
#
|
1789
|
-
#
|
1817
|
+
# Set masked values.
|
1818
|
+
#
|
1819
|
+
# @param filter [Series]
|
1820
|
+
# Boolean mask.
|
1821
|
+
# @param value [Object]
|
1822
|
+
# Value with which to replace the masked values.
|
1823
|
+
#
|
1824
|
+
# @return [Series]
|
1825
|
+
#
|
1826
|
+
# @note
|
1827
|
+
# Use of this function is frequently an anti-pattern, as it can
|
1828
|
+
# block optimization (predicate pushdown, etc). Consider using
|
1829
|
+
# `Polars.when(predicate).then(value).otherwise(self)` instead.
|
1830
|
+
#
|
1831
|
+
# @example
|
1832
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
1833
|
+
# s.set(s == 2, 10)
|
1834
|
+
# # =>
|
1835
|
+
# # shape: (3,)
|
1836
|
+
# # Series: 'a' [i64]
|
1837
|
+
# # [
|
1838
|
+
# # 1
|
1839
|
+
# # 10
|
1840
|
+
# # 3
|
1841
|
+
# # ]
|
1842
|
+
def set(filter, value)
|
1843
|
+
Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
|
1844
|
+
end
|
1790
1845
|
|
1791
1846
|
# Set values at the index locations.
|
1792
1847
|
#
|
@@ -2286,8 +2341,41 @@ module Polars
|
|
2286
2341
|
super
|
2287
2342
|
end
|
2288
2343
|
|
2289
|
-
#
|
2290
|
-
#
|
2344
|
+
# Apply a custom/user-defined function (UDF) over elements in this Series and
|
2345
|
+
# return a new Series.
|
2346
|
+
#
|
2347
|
+
# If the function returns another datatype, the return_dtype arg should be set,
|
2348
|
+
# otherwise the method will fail.
|
2349
|
+
#
|
2350
|
+
# @param return_dtype [Symbol]
|
2351
|
+
# Output datatype. If none is given, the same datatype as this Series will be
|
2352
|
+
# used.
|
2353
|
+
# @param skip_nulls [Boolean]
|
2354
|
+
# Nulls will be skipped and not passed to the Ruby function.
|
2355
|
+
# This is faster because Ruby can be skipped and because we call
|
2356
|
+
# more specialized functions.
|
2357
|
+
#
|
2358
|
+
# @return [Series]
|
2359
|
+
#
|
2360
|
+
# @example
|
2361
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
2362
|
+
# s.apply { |x| x + 10 }
|
2363
|
+
# # =>
|
2364
|
+
# # shape: (3,)
|
2365
|
+
# # Series: 'a' [i64]
|
2366
|
+
# # [
|
2367
|
+
# # 11
|
2368
|
+
# # 12
|
2369
|
+
# # 13
|
2370
|
+
# # ]
|
2371
|
+
def apply(return_dtype: nil, skip_nulls: true, &func)
|
2372
|
+
if return_dtype.nil?
|
2373
|
+
pl_return_dtype = nil
|
2374
|
+
else
|
2375
|
+
pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
|
2376
|
+
end
|
2377
|
+
Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
|
2378
|
+
end
|
2291
2379
|
|
2292
2380
|
# Shift the values by a given period.
|
2293
2381
|
#
|
@@ -2953,8 +3041,35 @@ module Polars
|
|
2953
3041
|
end
|
2954
3042
|
end
|
2955
3043
|
|
2956
|
-
#
|
2957
|
-
#
|
3044
|
+
# Hash the Series.
|
3045
|
+
#
|
3046
|
+
# The hash value is of type `:u64`.
|
3047
|
+
#
|
3048
|
+
# @param seed [Integer]
|
3049
|
+
# Random seed parameter. Defaults to 0.
|
3050
|
+
# @param seed_1 [Integer]
|
3051
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3052
|
+
# @param seed_2 [Integer]
|
3053
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3054
|
+
# @param seed_3 [Integer]
|
3055
|
+
# Random seed parameter. Defaults to `seed` if not set.
|
3056
|
+
#
|
3057
|
+
# @return [Series]
|
3058
|
+
#
|
3059
|
+
# @example
|
3060
|
+
# s = Polars::Series.new("a", [1, 2, 3])
|
3061
|
+
# s._hash(42)
|
3062
|
+
# # =>
|
3063
|
+
# # shape: (3,)
|
3064
|
+
# # Series: 'a' [u64]
|
3065
|
+
# # [
|
3066
|
+
# # 2374023516666777365
|
3067
|
+
# # 10386026231460783898
|
3068
|
+
# # 17796317186427479491
|
3069
|
+
# # ]
|
3070
|
+
def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
|
3071
|
+
super
|
3072
|
+
end
|
2958
3073
|
|
2959
3074
|
# Reinterpret the underlying bits as a signed/unsigned integer.
|
2960
3075
|
#
|
@@ -2986,7 +3101,7 @@ module Polars
|
|
2986
3101
|
# # 4
|
2987
3102
|
# # 5
|
2988
3103
|
# # ]
|
2989
|
-
def interpolate
|
3104
|
+
def interpolate(method: "linear")
|
2990
3105
|
super
|
2991
3106
|
end
|
2992
3107
|
|
@@ -3410,10 +3525,10 @@ module Polars
|
|
3410
3525
|
return Utils.wrap_s(_s.send(op, other._s))
|
3411
3526
|
end
|
3412
3527
|
|
3413
|
-
if dtype ==
|
3528
|
+
if dtype == Utf8
|
3414
3529
|
raise Todo
|
3415
3530
|
end
|
3416
|
-
Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
|
3531
|
+
Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
|
3417
3532
|
end
|
3418
3533
|
|
3419
3534
|
def _arithmetic(other, op)
|
@@ -3424,8 +3539,39 @@ module Polars
|
|
3424
3539
|
return Utils.wrap_s(_s.send(op, other._s))
|
3425
3540
|
end
|
3426
3541
|
|
3427
|
-
|
3428
|
-
|
3542
|
+
if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
|
3543
|
+
raise Todo
|
3544
|
+
end
|
3545
|
+
if other.is_a?(Float) && !is_float
|
3546
|
+
raise Todo
|
3547
|
+
end
|
3548
|
+
|
3549
|
+
Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
|
3550
|
+
end
|
3551
|
+
|
3552
|
+
DTYPE_TO_FFINAME = {
|
3553
|
+
Int8 => "i8",
|
3554
|
+
Int16 => "i16",
|
3555
|
+
Int32 => "i32",
|
3556
|
+
Int64 => "i64",
|
3557
|
+
UInt8 => "u8",
|
3558
|
+
UInt16 => "u16",
|
3559
|
+
UInt32 => "u32",
|
3560
|
+
UInt64 => "u64",
|
3561
|
+
Float32 => "f32",
|
3562
|
+
Float64 => "f64",
|
3563
|
+
Boolean => "bool",
|
3564
|
+
Utf8 => "str",
|
3565
|
+
List => "list",
|
3566
|
+
Date => "date",
|
3567
|
+
Datetime => "datetime",
|
3568
|
+
Duration => "duration",
|
3569
|
+
Time => "time",
|
3570
|
+
Object => "object",
|
3571
|
+
Categorical => "categorical",
|
3572
|
+
Struct => "struct",
|
3573
|
+
Binary => "binary"
|
3574
|
+
}
|
3429
3575
|
|
3430
3576
|
def series_to_rbseries(name, values)
|
3431
3577
|
# should not be in-place?
|
@@ -3449,9 +3595,9 @@ module Polars
|
|
3449
3595
|
end
|
3450
3596
|
|
3451
3597
|
rb_temporal_types = []
|
3452
|
-
rb_temporal_types << Date if defined?(Date)
|
3453
|
-
rb_temporal_types << DateTime if defined?(DateTime)
|
3454
|
-
rb_temporal_types << Time if defined?(Time)
|
3598
|
+
rb_temporal_types << ::Date if defined?(::Date)
|
3599
|
+
rb_temporal_types << ::DateTime if defined?(::DateTime)
|
3600
|
+
rb_temporal_types << ::Time if defined?(::Time)
|
3455
3601
|
|
3456
3602
|
value = _get_first_non_none(values)
|
3457
3603
|
|
@@ -3477,11 +3623,11 @@ module Polars
|
|
3477
3623
|
# dtype = rb_type_to_dtype(dtype)
|
3478
3624
|
# end
|
3479
3625
|
|
3480
|
-
if ruby_dtype == Date
|
3626
|
+
if ruby_dtype == ::Date
|
3481
3627
|
RbSeries.new_opt_date(name, values, strict)
|
3482
|
-
elsif ruby_dtype == Time
|
3628
|
+
elsif ruby_dtype == ::Time
|
3483
3629
|
RbSeries.new_opt_datetime(name, values, strict)
|
3484
|
-
elsif ruby_dtype == DateTime
|
3630
|
+
elsif ruby_dtype == ::DateTime
|
3485
3631
|
RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
|
3486
3632
|
else
|
3487
3633
|
raise Todo
|
@@ -3524,6 +3670,21 @@ module Polars
|
|
3524
3670
|
end
|
3525
3671
|
|
3526
3672
|
POLARS_TYPE_TO_CONSTRUCTOR = {
|
3673
|
+
Float32 => RbSeries.method(:new_opt_f32),
|
3674
|
+
Float64 => RbSeries.method(:new_opt_f64),
|
3675
|
+
Int8 => RbSeries.method(:new_opt_i8),
|
3676
|
+
Int16 => RbSeries.method(:new_opt_i16),
|
3677
|
+
Int32 => RbSeries.method(:new_opt_i32),
|
3678
|
+
Int64 => RbSeries.method(:new_opt_i64),
|
3679
|
+
UInt8 => RbSeries.method(:new_opt_u8),
|
3680
|
+
UInt16 => RbSeries.method(:new_opt_u16),
|
3681
|
+
UInt32 => RbSeries.method(:new_opt_u32),
|
3682
|
+
UInt64 => RbSeries.method(:new_opt_u64),
|
3683
|
+
Boolean => RbSeries.method(:new_opt_bool),
|
3684
|
+
Utf8 => RbSeries.method(:new_str)
|
3685
|
+
}
|
3686
|
+
|
3687
|
+
SYM_TYPE_TO_CONSTRUCTOR = {
|
3527
3688
|
f32: RbSeries.method(:new_opt_f32),
|
3528
3689
|
f64: RbSeries.method(:new_opt_f64),
|
3529
3690
|
i8: RbSeries.method(:new_opt_i8),
|
@@ -3539,7 +3700,11 @@ module Polars
|
|
3539
3700
|
}
|
3540
3701
|
|
3541
3702
|
def polars_type_to_constructor(dtype)
|
3542
|
-
|
3703
|
+
if dtype.is_a?(Class) && dtype < DataType
|
3704
|
+
POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
|
3705
|
+
else
|
3706
|
+
SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
|
3707
|
+
end
|
3543
3708
|
rescue KeyError
|
3544
3709
|
raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
|
3545
3710
|
end
|
data/lib/polars/string_expr.rb
CHANGED
@@ -64,24 +64,24 @@ module Polars
|
|
64
64
|
# # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
|
65
65
|
# # │ 2001-07-08 │
|
66
66
|
# # └────────────┘
|
67
|
-
def strptime(datatype, fmt = nil, strict: true, exact: true)
|
67
|
+
def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
|
68
68
|
if !Utils.is_polars_dtype(datatype)
|
69
69
|
raise ArgumentError, "expected: {DataType} got: #{datatype}"
|
70
70
|
end
|
71
71
|
|
72
72
|
if datatype == :date
|
73
|
-
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
|
73
|
+
Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
|
74
74
|
elsif datatype == :datetime
|
75
75
|
# TODO fix
|
76
76
|
tu = nil # datatype.tu
|
77
|
-
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
|
77
|
+
dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware))
|
78
78
|
if tu.nil?
|
79
79
|
dtcol
|
80
80
|
else
|
81
81
|
dtcol.dt.cast_time_unit(tu)
|
82
82
|
end
|
83
83
|
elsif datatype == :time
|
84
|
-
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
|
84
|
+
Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
|
85
85
|
else
|
86
86
|
raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
|
87
87
|
end
|
@@ -725,7 +725,8 @@ module Polars
|
|
725
725
|
# # │ ["678", "910"] │
|
726
726
|
# # └────────────────┘
|
727
727
|
def extract_all(pattern)
|
728
|
-
Utils.
|
728
|
+
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
|
729
|
+
Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
|
729
730
|
end
|
730
731
|
|
731
732
|
# Count all successive non-overlapping regex matches.
|
data/lib/polars/utils.rb
CHANGED
@@ -11,6 +11,10 @@ module Polars
|
|
11
11
|
DataFrame._from_rbdf(df)
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.wrap_ldf(ldf)
|
15
|
+
LazyFrame._from_rbldf(ldf)
|
16
|
+
end
|
17
|
+
|
14
18
|
def self.wrap_expr(rbexpr)
|
15
19
|
Expr._from_rbexpr(rbexpr)
|
16
20
|
end
|
@@ -38,18 +42,18 @@ module Polars
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
|
41
|
-
if dtype == :date
|
45
|
+
if dtype == :date || dtype == Date
|
42
46
|
# days to seconds
|
43
47
|
# important to create from utc. Not doing this leads
|
44
48
|
# to inconsistencies dependent on the timezone you are in.
|
45
|
-
Time.at(value * 86400).utc.to_date
|
49
|
+
::Time.at(value * 86400).utc.to_date
|
46
50
|
# TODO fix dtype
|
47
|
-
elsif dtype.to_s.start_with?("datetime[")
|
51
|
+
elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
|
48
52
|
if tz.nil? || tz == ""
|
49
53
|
if tu == "ns"
|
50
54
|
raise Todo
|
51
55
|
elsif tu == "us"
|
52
|
-
dt = Time.at(value / 1000000, value % 1000000, :usec).utc
|
56
|
+
dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
|
53
57
|
elsif tu == "ms"
|
54
58
|
raise Todo
|
55
59
|
else
|
@@ -95,7 +99,7 @@ module Polars
|
|
95
99
|
|
96
100
|
# TODO fix
|
97
101
|
def self.is_polars_dtype(data_type)
|
98
|
-
data_type.is_a?(Symbol) || data_type.is_a?(String)
|
102
|
+
data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
|
99
103
|
end
|
100
104
|
|
101
105
|
RB_TYPE_TO_DTYPE = {
|
@@ -104,14 +108,15 @@ module Polars
|
|
104
108
|
String => :str,
|
105
109
|
TrueClass => :bool,
|
106
110
|
FalseClass => :bool,
|
107
|
-
Date => :date,
|
108
|
-
DateTime => :datetime
|
111
|
+
::Date => :date,
|
112
|
+
::DateTime => :datetime
|
109
113
|
}
|
110
114
|
|
111
115
|
# TODO fix
|
112
116
|
def self.rb_type_to_dtype(data_type)
|
113
117
|
if is_polars_dtype(data_type)
|
114
|
-
|
118
|
+
data_type = data_type.to_s if data_type.is_a?(Symbol)
|
119
|
+
return data_type
|
115
120
|
end
|
116
121
|
|
117
122
|
begin
|
@@ -171,5 +176,17 @@ module Polars
|
|
171
176
|
def self.bool?(value)
|
172
177
|
value == true || value == false
|
173
178
|
end
|
179
|
+
|
180
|
+
def self._is_iterable_of(val, eltype)
|
181
|
+
val.all? { |x| x.is_a?(eltype) }
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.is_str_sequence(val, allow_str: false)
|
185
|
+
if allow_str == false && val.is_a?(String)
|
186
|
+
false
|
187
|
+
else
|
188
|
+
val.is_a?(Array) && _is_iterable_of(val, String)
|
189
|
+
end
|
190
|
+
end
|
174
191
|
end
|
175
192
|
end
|
data/lib/polars/version.rb
CHANGED
data/lib/polars.rb
CHANGED
@@ -1,40 +1,49 @@
|
|
1
1
|
# ext
|
2
|
-
|
2
|
+
begin
|
3
|
+
require_relative "polars/#{RUBY_VERSION.to_f}/polars"
|
4
|
+
rescue LoadError
|
5
|
+
require_relative "polars/polars"
|
6
|
+
end
|
3
7
|
|
4
8
|
# stdlib
|
5
9
|
require "date"
|
6
10
|
|
7
11
|
# modules
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
require_relative "polars/expr_dispatch"
|
13
|
+
require_relative "polars/batched_csv_reader"
|
14
|
+
require_relative "polars/cat_expr"
|
15
|
+
require_relative "polars/cat_name_space"
|
16
|
+
require_relative "polars/convert"
|
17
|
+
require_relative "polars/data_frame"
|
18
|
+
require_relative "polars/data_types"
|
19
|
+
require_relative "polars/date_time_expr"
|
20
|
+
require_relative "polars/date_time_name_space"
|
21
|
+
require_relative "polars/dynamic_group_by"
|
22
|
+
require_relative "polars/exceptions"
|
23
|
+
require_relative "polars/expr"
|
24
|
+
require_relative "polars/functions"
|
25
|
+
require_relative "polars/group_by"
|
26
|
+
require_relative "polars/io"
|
27
|
+
require_relative "polars/lazy_frame"
|
28
|
+
require_relative "polars/lazy_functions"
|
29
|
+
require_relative "polars/lazy_group_by"
|
30
|
+
require_relative "polars/list_expr"
|
31
|
+
require_relative "polars/list_name_space"
|
32
|
+
require_relative "polars/meta_expr"
|
33
|
+
require_relative "polars/rolling_group_by"
|
34
|
+
require_relative "polars/series"
|
35
|
+
require_relative "polars/slice"
|
36
|
+
require_relative "polars/string_expr"
|
37
|
+
require_relative "polars/string_name_space"
|
38
|
+
require_relative "polars/struct_expr"
|
39
|
+
require_relative "polars/struct_name_space"
|
40
|
+
require_relative "polars/utils"
|
41
|
+
require_relative "polars/version"
|
42
|
+
require_relative "polars/when"
|
43
|
+
require_relative "polars/when_then"
|
36
44
|
|
37
45
|
module Polars
|
46
|
+
extend Convert
|
38
47
|
extend Functions
|
39
48
|
extend IO
|
40
49
|
extend LazyFunctions
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -39,6 +39,9 @@ files:
|
|
39
39
|
- README.md
|
40
40
|
- ext/polars/Cargo.toml
|
41
41
|
- ext/polars/extconf.rb
|
42
|
+
- ext/polars/src/apply/dataframe.rs
|
43
|
+
- ext/polars/src/apply/mod.rs
|
44
|
+
- ext/polars/src/apply/series.rs
|
42
45
|
- ext/polars/src/batched_csv.rs
|
43
46
|
- ext/polars/src/conversion.rs
|
44
47
|
- ext/polars/src/dataframe.rs
|
@@ -60,9 +63,12 @@ files:
|
|
60
63
|
- lib/polars/batched_csv_reader.rb
|
61
64
|
- lib/polars/cat_expr.rb
|
62
65
|
- lib/polars/cat_name_space.rb
|
66
|
+
- lib/polars/convert.rb
|
63
67
|
- lib/polars/data_frame.rb
|
68
|
+
- lib/polars/data_types.rb
|
64
69
|
- lib/polars/date_time_expr.rb
|
65
70
|
- lib/polars/date_time_name_space.rb
|
71
|
+
- lib/polars/dynamic_group_by.rb
|
66
72
|
- lib/polars/exceptions.rb
|
67
73
|
- lib/polars/expr.rb
|
68
74
|
- lib/polars/expr_dispatch.rb
|
@@ -75,6 +81,7 @@ files:
|
|
75
81
|
- lib/polars/list_expr.rb
|
76
82
|
- lib/polars/list_name_space.rb
|
77
83
|
- lib/polars/meta_expr.rb
|
84
|
+
- lib/polars/rolling_group_by.rb
|
78
85
|
- lib/polars/series.rb
|
79
86
|
- lib/polars/slice.rb
|
80
87
|
- lib/polars/string_expr.rb
|
@@ -97,14 +104,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
97
104
|
requirements:
|
98
105
|
- - ">="
|
99
106
|
- !ruby/object:Gem::Version
|
100
|
-
version: '
|
107
|
+
version: '3.0'
|
101
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
109
|
requirements:
|
103
110
|
- - ">="
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
114
|
+
rubygems_version: 3.4.1
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Blazingly fast DataFrames for Ruby
|