polars-df 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -198,7 +198,7 @@ module Polars
198
198
  # }
199
199
  # ).lazy
200
200
  # lf.dtypes
201
- # # => [:i64, :f64, :str]
201
+ # # => [Polars::Int64, Polars::Float64, Polars::Utf8]
202
202
  def dtypes
203
203
  _ldf.dtypes
204
204
  end
@@ -216,7 +216,7 @@ module Polars
216
216
  # }
217
217
  # ).lazy
218
218
  # lf.schema
219
- # # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
219
+ # # => {"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::Utf8}
220
220
  def schema
221
221
  _ldf.schema
222
222
  end
@@ -1173,7 +1173,8 @@ module Polars
1173
1173
  truncate: true,
1174
1174
  include_boundaries: false,
1175
1175
  closed: "left",
1176
- by: nil
1176
+ by: nil,
1177
+ start_by: "window"
1177
1178
  )
1178
1179
  if offset.nil?
1179
1180
  if period.nil?
@@ -1200,7 +1201,8 @@ module Polars
1200
1201
  truncate,
1201
1202
  include_boundaries,
1202
1203
  closed,
1203
- rbexprs_by
1204
+ rbexprs_by,
1205
+ start_by
1204
1206
  )
1205
1207
  LazyGroupBy.new(lgb, self.class)
1206
1208
  end
@@ -2165,7 +2167,8 @@ module Polars
2165
2167
  # # │ 3.0 ┆ 1.0 │
2166
2168
  # # └─────┴─────┘
2167
2169
  def quantile(quantile, interpolation: "nearest")
2168
- _from_rbldf(_ldf.quantile(quantile, interpolation))
2170
+ quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
2171
+ _from_rbldf(_ldf.quantile(quantile._rbexpr, interpolation))
2169
2172
  end
2170
2173
 
2171
2174
  # Explode lists to long format.
@@ -8,13 +8,18 @@ module Polars
8
8
  name = name.to_a
9
9
  end
10
10
 
11
- if name.is_a?(Array)
11
+ if name.is_a?(Class) && name < DataType
12
+ name = [name]
13
+ end
14
+
15
+ if name.is_a?(DataType)
16
+ Utils.wrap_expr(_dtype_cols([name]))
17
+ elsif name.is_a?(Array)
12
18
  if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
13
19
  name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
14
20
  Utils.wrap_expr(RbExpr.cols(name))
15
21
  elsif Utils.is_polars_dtype(name[0])
16
- raise Todo
17
- # Utils.wrap_expr(_dtype_cols(name))
22
+ Utils.wrap_expr(_dtype_cols(name))
18
23
  else
19
24
  raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
20
25
  end
@@ -568,7 +568,7 @@ module Polars
568
568
  # # └────────────┘
569
569
  def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
570
570
  raise Todo if name_generator
571
- Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator))
571
+ Utils.wrap_expr(_rbexpr.lst_to_struct(n_field_strategy, name_generator, 0))
572
572
  end
573
573
 
574
574
  # Run any polars expression against the lists' elements.
data/lib/polars/series.rb CHANGED
@@ -23,9 +23,9 @@ module Polars
23
23
  # @example Constructing a Series by specifying name and values positionally:
24
24
  # s = Polars::Series.new("a", [1, 2, 3])
25
25
  #
26
- # @example Notice that the dtype is automatically inferred as a polars `:i64`:
26
+ # @example Notice that the dtype is automatically inferred as a polars `Int64`:
27
27
  # s.dtype
28
- # # => :i64
28
+ # # => Polars::Int64
29
29
  #
30
30
  # @example Constructing a Series with a specific dtype:
31
31
  # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
@@ -1751,7 +1751,7 @@ module Polars
1751
1751
  # s.is_numeric
1752
1752
  # # => true
1753
1753
  def is_numeric
1754
- [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype)
1754
+ [Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64].include?(dtype)
1755
1755
  end
1756
1756
  alias_method :numeric?, :is_numeric
1757
1757
 
@@ -1764,7 +1764,7 @@ module Polars
1764
1764
  # s.is_datelike
1765
1765
  # # => true
1766
1766
  def is_datelike
1767
- [:date, :datetime, :duration, :time].include?(dtype)
1767
+ [Date, Datetime, Duration, Time].include?(dtype)
1768
1768
  end
1769
1769
 
1770
1770
  # Check if this Series has floating point numbers.
@@ -1776,7 +1776,7 @@ module Polars
1776
1776
  # s.is_float
1777
1777
  # # => true
1778
1778
  def is_float
1779
- [:f32, :f64].include?(dtype)
1779
+ [Float32, Float64].include?(dtype)
1780
1780
  end
1781
1781
  alias_method :float?, :is_float
1782
1782
 
@@ -1789,7 +1789,7 @@ module Polars
1789
1789
  # s.is_boolean
1790
1790
  # # => true
1791
1791
  def is_boolean
1792
- dtype == :bool
1792
+ dtype == Boolean
1793
1793
  end
1794
1794
  alias_method :boolean?, :is_boolean
1795
1795
  alias_method :is_bool, :is_boolean
@@ -1804,7 +1804,7 @@ module Polars
1804
1804
  # s.is_utf8
1805
1805
  # # => true
1806
1806
  def is_utf8
1807
- dtype == :str
1807
+ dtype == Utf8
1808
1808
  end
1809
1809
  alias_method :utf8?, :is_utf8
1810
1810
 
@@ -1840,7 +1840,7 @@ module Polars
1840
1840
  # # 3
1841
1841
  # # ]
1842
1842
  def set(filter, value)
1843
- Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
1843
+ Utils.wrap_s(_s.send("set_with_mask_#{DTYPE_TO_FFINAME.fetch(dtype)}", filter._s, value))
1844
1844
  end
1845
1845
 
1846
1846
  # Set values at the index locations.
@@ -3101,7 +3101,7 @@ module Polars
3101
3101
  # # 4
3102
3102
  # # 5
3103
3103
  # # ]
3104
- def interpolate
3104
+ def interpolate(method: "linear")
3105
3105
  super
3106
3106
  end
3107
3107
 
@@ -3525,10 +3525,10 @@ module Polars
3525
3525
  return Utils.wrap_s(_s.send(op, other._s))
3526
3526
  end
3527
3527
 
3528
- if dtype == :str
3528
+ if dtype == Utf8
3529
3529
  raise Todo
3530
3530
  end
3531
- Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
3531
+ Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3532
3532
  end
3533
3533
 
3534
3534
  def _arithmetic(other, op)
@@ -3539,15 +3539,39 @@ module Polars
3539
3539
  return Utils.wrap_s(_s.send(op, other._s))
3540
3540
  end
3541
3541
 
3542
- if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
3542
+ if other.is_a?(::Date) || other.is_a?(::DateTime) || other.is_a?(::Time) || other.is_a?(String)
3543
3543
  raise Todo
3544
3544
  end
3545
3545
  if other.is_a?(Float) && !is_float
3546
3546
  raise Todo
3547
3547
  end
3548
3548
 
3549
- Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
3550
- end
3549
+ Utils.wrap_s(_s.send("#{op}_#{DTYPE_TO_FFINAME.fetch(dtype)}", other))
3550
+ end
3551
+
3552
+ DTYPE_TO_FFINAME = {
3553
+ Int8 => "i8",
3554
+ Int16 => "i16",
3555
+ Int32 => "i32",
3556
+ Int64 => "i64",
3557
+ UInt8 => "u8",
3558
+ UInt16 => "u16",
3559
+ UInt32 => "u32",
3560
+ UInt64 => "u64",
3561
+ Float32 => "f32",
3562
+ Float64 => "f64",
3563
+ Boolean => "bool",
3564
+ Utf8 => "str",
3565
+ List => "list",
3566
+ Date => "date",
3567
+ Datetime => "datetime",
3568
+ Duration => "duration",
3569
+ Time => "time",
3570
+ Object => "object",
3571
+ Categorical => "categorical",
3572
+ Struct => "struct",
3573
+ Binary => "binary"
3574
+ }
3551
3575
 
3552
3576
  def series_to_rbseries(name, values)
3553
3577
  # should not be in-place?
@@ -3571,9 +3595,9 @@ module Polars
3571
3595
  end
3572
3596
 
3573
3597
  rb_temporal_types = []
3574
- rb_temporal_types << Date if defined?(Date)
3575
- rb_temporal_types << DateTime if defined?(DateTime)
3576
- rb_temporal_types << Time if defined?(Time)
3598
+ rb_temporal_types << ::Date if defined?(::Date)
3599
+ rb_temporal_types << ::DateTime if defined?(::DateTime)
3600
+ rb_temporal_types << ::Time if defined?(::Time)
3577
3601
 
3578
3602
  value = _get_first_non_none(values)
3579
3603
 
@@ -3599,11 +3623,11 @@ module Polars
3599
3623
  # dtype = rb_type_to_dtype(dtype)
3600
3624
  # end
3601
3625
 
3602
- if ruby_dtype == Date
3626
+ if ruby_dtype == ::Date
3603
3627
  RbSeries.new_opt_date(name, values, strict)
3604
- elsif ruby_dtype == Time
3628
+ elsif ruby_dtype == ::Time
3605
3629
  RbSeries.new_opt_datetime(name, values, strict)
3606
- elsif ruby_dtype == DateTime
3630
+ elsif ruby_dtype == ::DateTime
3607
3631
  RbSeries.new_opt_datetime(name, values.map(&:to_time), strict)
3608
3632
  else
3609
3633
  raise Todo
@@ -3646,6 +3670,21 @@ module Polars
3646
3670
  end
3647
3671
 
3648
3672
  POLARS_TYPE_TO_CONSTRUCTOR = {
3673
+ Float32 => RbSeries.method(:new_opt_f32),
3674
+ Float64 => RbSeries.method(:new_opt_f64),
3675
+ Int8 => RbSeries.method(:new_opt_i8),
3676
+ Int16 => RbSeries.method(:new_opt_i16),
3677
+ Int32 => RbSeries.method(:new_opt_i32),
3678
+ Int64 => RbSeries.method(:new_opt_i64),
3679
+ UInt8 => RbSeries.method(:new_opt_u8),
3680
+ UInt16 => RbSeries.method(:new_opt_u16),
3681
+ UInt32 => RbSeries.method(:new_opt_u32),
3682
+ UInt64 => RbSeries.method(:new_opt_u64),
3683
+ Boolean => RbSeries.method(:new_opt_bool),
3684
+ Utf8 => RbSeries.method(:new_str)
3685
+ }
3686
+
3687
+ SYM_TYPE_TO_CONSTRUCTOR = {
3649
3688
  f32: RbSeries.method(:new_opt_f32),
3650
3689
  f64: RbSeries.method(:new_opt_f64),
3651
3690
  i8: RbSeries.method(:new_opt_i8),
@@ -3661,7 +3700,11 @@ module Polars
3661
3700
  }
3662
3701
 
3663
3702
  def polars_type_to_constructor(dtype)
3664
- POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3703
+ if dtype.is_a?(Class) && dtype < DataType
3704
+ POLARS_TYPE_TO_CONSTRUCTOR.fetch(dtype)
3705
+ else
3706
+ SYM_TYPE_TO_CONSTRUCTOR.fetch(dtype.to_sym)
3707
+ end
3665
3708
  rescue KeyError
3666
3709
  raise ArgumentError, "Cannot construct RbSeries for type #{dtype}."
3667
3710
  end
@@ -64,24 +64,24 @@ module Polars
64
64
  # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
65
65
  # # │ 2001-07-08 │
66
66
  # # └────────────┘
67
- def strptime(datatype, fmt = nil, strict: true, exact: true)
67
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
68
68
  if !Utils.is_polars_dtype(datatype)
69
69
  raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
70
  end
71
71
 
72
72
  if datatype == :date
73
- Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact))
73
+ Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
74
74
  elsif datatype == :datetime
75
75
  # TODO fix
76
76
  tu = nil # datatype.tu
77
- dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact))
77
+ dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware))
78
78
  if tu.nil?
79
79
  dtcol
80
80
  else
81
81
  dtcol.dt.cast_time_unit(tu)
82
82
  end
83
83
  elsif datatype == :time
84
- Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact))
84
+ Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
85
85
  else
86
86
  raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
87
87
  end
@@ -725,7 +725,8 @@ module Polars
725
725
  # # │ ["678", "910"] │
726
726
  # # └────────────────┘
727
727
  def extract_all(pattern)
728
- Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
728
+ pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
729
+ Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
729
730
  end
730
731
 
731
732
  # Count all successive non-overlapping regex matches.
@@ -60,7 +60,7 @@ module Polars
60
60
  # # ├╌╌╌╌╌╌╌╌╌╌╌╌┤
61
61
  # # │ 2001-07-08 │
62
62
  # # └────────────┘
63
- def strptime(datatype, fmt = nil, strict: true, exact: true)
63
+ def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false)
64
64
  super
65
65
  end
66
66
 
data/lib/polars/utils.rb CHANGED
@@ -42,18 +42,18 @@ module Polars
42
42
  end
43
43
 
44
44
  def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
- if dtype == :date
45
+ if dtype == :date || dtype == Date
46
46
  # days to seconds
47
47
  # important to create from utc. Not doing this leads
48
48
  # to inconsistencies dependent on the timezone you are in.
49
- Time.at(value * 86400).utc.to_date
49
+ ::Time.at(value * 86400).utc.to_date
50
50
  # TODO fix dtype
51
- elsif dtype.to_s.start_with?("datetime[")
51
+ elsif dtype.to_s.start_with?("datetime[") || dtype.is_a?(Datetime)
52
52
  if tz.nil? || tz == ""
53
53
  if tu == "ns"
54
54
  raise Todo
55
55
  elsif tu == "us"
56
- dt = Time.at(value / 1000000, value % 1000000, :usec).utc
56
+ dt = ::Time.at(value / 1000000, value % 1000000, :usec).utc
57
57
  elsif tu == "ms"
58
58
  raise Todo
59
59
  else
@@ -99,7 +99,7 @@ module Polars
99
99
 
100
100
  # TODO fix
101
101
  def self.is_polars_dtype(data_type)
102
- data_type.is_a?(Symbol) || data_type.is_a?(String)
102
+ data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
103
103
  end
104
104
 
105
105
  RB_TYPE_TO_DTYPE = {
@@ -108,14 +108,15 @@ module Polars
108
108
  String => :str,
109
109
  TrueClass => :bool,
110
110
  FalseClass => :bool,
111
- Date => :date,
112
- DateTime => :datetime
111
+ ::Date => :date,
112
+ ::DateTime => :datetime
113
113
  }
114
114
 
115
115
  # TODO fix
116
116
  def self.rb_type_to_dtype(data_type)
117
117
  if is_polars_dtype(data_type)
118
- return data_type.to_s
118
+ data_type = data_type.to_s if data_type.is_a?(Symbol)
119
+ return data_type
119
120
  end
120
121
 
121
122
  begin
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.1.5"
3
+ VERSION = "0.2.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,45 +1,46 @@
1
1
  # ext
2
2
  begin
3
- require "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require_relative "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require "polars/polars"
5
+ require_relative "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
9
9
  require "date"
10
10
 
11
11
  # modules
12
- require "polars/expr_dispatch"
13
- require "polars/batched_csv_reader"
14
- require "polars/cat_expr"
15
- require "polars/cat_name_space"
16
- require "polars/convert"
17
- require "polars/data_frame"
18
- require "polars/date_time_expr"
19
- require "polars/date_time_name_space"
20
- require "polars/dynamic_group_by"
21
- require "polars/exceptions"
22
- require "polars/expr"
23
- require "polars/functions"
24
- require "polars/group_by"
25
- require "polars/io"
26
- require "polars/lazy_frame"
27
- require "polars/lazy_functions"
28
- require "polars/lazy_group_by"
29
- require "polars/list_expr"
30
- require "polars/list_name_space"
31
- require "polars/meta_expr"
32
- require "polars/rolling_group_by"
33
- require "polars/series"
34
- require "polars/slice"
35
- require "polars/string_expr"
36
- require "polars/string_name_space"
37
- require "polars/struct_expr"
38
- require "polars/struct_name_space"
39
- require "polars/utils"
40
- require "polars/version"
41
- require "polars/when"
42
- require "polars/when_then"
12
+ require_relative "polars/expr_dispatch"
13
+ require_relative "polars/batched_csv_reader"
14
+ require_relative "polars/cat_expr"
15
+ require_relative "polars/cat_name_space"
16
+ require_relative "polars/convert"
17
+ require_relative "polars/data_frame"
18
+ require_relative "polars/data_types"
19
+ require_relative "polars/date_time_expr"
20
+ require_relative "polars/date_time_name_space"
21
+ require_relative "polars/dynamic_group_by"
22
+ require_relative "polars/exceptions"
23
+ require_relative "polars/expr"
24
+ require_relative "polars/functions"
25
+ require_relative "polars/group_by"
26
+ require_relative "polars/io"
27
+ require_relative "polars/lazy_frame"
28
+ require_relative "polars/lazy_functions"
29
+ require_relative "polars/lazy_group_by"
30
+ require_relative "polars/list_expr"
31
+ require_relative "polars/list_name_space"
32
+ require_relative "polars/meta_expr"
33
+ require_relative "polars/rolling_group_by"
34
+ require_relative "polars/series"
35
+ require_relative "polars/slice"
36
+ require_relative "polars/string_expr"
37
+ require_relative "polars/string_name_space"
38
+ require_relative "polars/struct_expr"
39
+ require_relative "polars/struct_name_space"
40
+ require_relative "polars/utils"
41
+ require_relative "polars/version"
42
+ require_relative "polars/when"
43
+ require_relative "polars/when_then"
43
44
 
44
45
  module Polars
45
46
  extend Convert
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-22 00:00:00.000000000 Z
11
+ date: 2023-01-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -65,6 +65,7 @@ files:
65
65
  - lib/polars/cat_name_space.rb
66
66
  - lib/polars/convert.rb
67
67
  - lib/polars/data_frame.rb
68
+ - lib/polars/data_types.rb
68
69
  - lib/polars/date_time_expr.rb
69
70
  - lib/polars/date_time_name_space.rb
70
71
  - lib/polars/dynamic_group_by.rb
@@ -103,14 +104,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
104
  requirements:
104
105
  - - ">="
105
106
  - !ruby/object:Gem::Version
106
- version: '2.7'
107
+ version: '3.0'
107
108
  required_rubygems_version: !ruby/object:Gem::Requirement
108
109
  requirements:
109
110
  - - ">="
110
111
  - !ruby/object:Gem::Version
111
112
  version: '0'
112
113
  requirements: []
113
- rubygems_version: 3.3.26
114
+ rubygems_version: 3.4.1
114
115
  signing_key:
115
116
  specification_version: 4
116
117
  summary: Blazingly fast DataFrames for Ruby