polars-df 0.3.1-aarch64-linux → 0.5.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,8 @@ module Polars
11
11
 
12
12
  # Parse a Utf8 expression to a Date/Datetime/Time type.
13
13
  #
14
- # @param datatype [Symbol]
15
- # `:date`, `:dateime`, or `:time`.
14
+ # @param dtype [Object]
15
+ # The data type to convert into. Can be either Date, Datetime, or Time.
16
16
  # @param fmt [String]
17
17
  # Format to use, refer to the
18
18
  # [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
@@ -33,57 +33,56 @@ module Polars
33
33
  # the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
34
34
  # no fractional second component is found then the default is "us".
35
35
  #
36
- # @example
36
+ # @example Dealing with a consistent format:
37
+ # s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
38
+ # s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
39
+ # # =>
40
+ # # shape: (2,)
41
+ # # Series: '' [datetime[μs, +00:00]]
42
+ # # [
43
+ # # 2020-01-01 01:00:00 +00:00
44
+ # # 2020-01-01 02:00:00 +00:00
45
+ # # ]
46
+ #
47
+ # @example Dealing with different formats.
37
48
  # s = Polars::Series.new(
38
49
  # "date",
39
50
  # [
40
51
  # "2021-04-22",
41
52
  # "2022-01-04 00:00:00",
42
53
  # "01/31/22",
43
- # "Sun Jul 8 00:34:60 2001"
54
+ # "Sun Jul 8 00:34:60 2001",
44
55
  # ]
45
56
  # )
46
- # s.to_frame.with_column(
47
- # Polars.col("date")
48
- # .str.strptime(:date, "%F", strict: false)
49
- # .fill_null(
50
- # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
51
- # )
52
- # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
53
- # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
54
- # )
57
+ # s.to_frame.select(
58
+ # Polars.coalesce(
59
+ # Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
60
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
61
+ # Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
62
+ # Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
63
+ # )
64
+ # ).to_series
55
65
  # # =>
56
- # # shape: (4, 1)
57
- # # ┌────────────┐
58
- # # │ date │
59
- # # │ --- │
60
- # # │ date │
61
- # # ╞════════════╡
62
- # # │ 2021-04-22 │
63
- # # │ 2022-01-04 │
64
- # # 2022-01-31
65
- # # 2001-07-08
66
- # # └────────────┘
67
- def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
68
- if !Utils.is_polars_dtype(datatype)
69
- raise ArgumentError, "expected: {DataType} got: #{datatype}"
70
- end
71
-
72
- if datatype == :date
66
+ # # shape: (4,)
67
+ # # Series: 'date' [date]
68
+ # # [
69
+ # # 2021-04-22
70
+ # # 2022-01-04
71
+ # # 2022-01-31
72
+ # # 2001-07-08
73
+ # # ]
74
+ def strptime(dtype, fmt = nil, strict: true, exact: true, cache: true, tz_aware: false, utc: false)
75
+ if dtype == Date
73
76
  Utils.wrap_expr(_rbexpr.str_parse_date(fmt, strict, exact, cache))
74
- elsif datatype == :datetime
75
- # TODO fix
76
- tu = nil # datatype.tu
77
- dtcol = Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, strict, exact, cache, tz_aware, utc))
78
- if tu.nil?
79
- dtcol
80
- else
81
- dtcol.dt.cast_time_unit(tu)
82
- end
83
- elsif datatype == :time
77
+ elsif dtype == Datetime || dtype.is_a?(Datetime)
78
+ dtype = Datetime.new if dtype == Datetime
79
+ time_unit = dtype.time_unit
80
+ time_zone = dtype.time_zone
81
+ Utils.wrap_expr(_rbexpr.str_parse_datetime(fmt, time_unit, time_zone, strict, exact, cache, tz_aware, utc))
82
+ elsif dtype == Time
84
83
  Utils.wrap_expr(_rbexpr.str_parse_time(fmt, strict, exact, cache))
85
84
  else
86
- raise ArgumentError, "dtype should be of type :date, :datetime, or :time"
85
+ raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
87
86
  end
88
87
  end
89
88
 
@@ -332,7 +331,7 @@ module Polars
332
331
  # # │ -0001 │
333
332
  # # │ 00000 │
334
333
  # # │ 00001 │
335
- # # │ ...
334
+ # # │
336
335
  # # │ 10000 │
337
336
  # # │ 100000 │
338
337
  # # │ 1000000 │
@@ -521,6 +520,40 @@ module Polars
521
520
  Utils.wrap_expr(_rbexpr.str_starts_with(sub))
522
521
  end
523
522
 
523
+ # Parse string values as JSON.
524
+ #
525
+ # Throw errors if encounter invalid JSON strings.
526
+ #
527
+ # @param dtype [Object]
528
+ # The dtype to cast the extracted value to. If nil, the dtype will be
529
+ # inferred from the JSON value.
530
+ #
531
+ # @return [Expr]
532
+ #
533
+ # @example
534
+ # df = Polars::DataFrame.new(
535
+ # {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
536
+ # )
537
+ # dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
538
+ # df.select(Polars.col("json").str.json_extract(dtype))
539
+ # # =>
540
+ # # shape: (3, 1)
541
+ # # ┌─────────────┐
542
+ # # │ json │
543
+ # # │ --- │
544
+ # # │ struct[2] │
545
+ # # ╞═════════════╡
546
+ # # │ {1,true} │
547
+ # # │ {null,null} │
548
+ # # │ {2,false} │
549
+ # # └─────────────┘
550
+ def json_extract(dtype = nil)
551
+ if !dtype.nil?
552
+ dtype = Utils.rb_type_to_dtype(dtype)
553
+ end
554
+ Utils.wrap_expr(_rbexpr.str_json_extract(dtype))
555
+ end
556
+
524
557
  # Extract the first match of json string with provided JSONPath expression.
525
558
  #
526
559
  # Throw errors if encounter invalid json strings.
@@ -846,10 +879,10 @@ module Polars
846
879
  # # │ 1 ┆ 123ABC │
847
880
  # # │ 2 ┆ abc456 │
848
881
  # # └─────┴────────┘
849
- def replace(pattern, value, literal: false)
882
+ def replace(pattern, value, literal: false, n: 1)
850
883
  pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
851
884
  value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
852
- Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
885
+ Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
853
886
  end
854
887
 
855
888
  # Replace all matching regex/literal substrings with a new string value.
@@ -912,5 +945,78 @@ module Polars
912
945
  def slice(offset, length = nil)
913
946
  Utils.wrap_expr(_rbexpr.str_slice(offset, length))
914
947
  end
948
+
949
+ # Returns a column with a separate row for every string character.
950
+ #
951
+ # @return [Expr]
952
+ #
953
+ # @example
954
+ # df = Polars::DataFrame.new({"a": ["foo", "bar"]})
955
+ # df.select(Polars.col("a").str.explode)
956
+ # # =>
957
+ # # shape: (6, 1)
958
+ # # ┌─────┐
959
+ # # │ a │
960
+ # # │ --- │
961
+ # # │ str │
962
+ # # ╞═════╡
963
+ # # │ f │
964
+ # # │ o │
965
+ # # │ o │
966
+ # # │ b │
967
+ # # │ a │
968
+ # # │ r │
969
+ # # └─────┘
970
+ def explode
971
+ Utils.wrap_expr(_rbexpr.explode)
972
+ end
973
+
974
+ # Parse integers with base radix from strings.
975
+ #
976
+ # By default base 2. ParseError/Overflows become Nulls.
977
+ #
978
+ # @param radix [Integer]
979
+ # Positive integer which is the base of the string we are parsing.
980
+ # Default: 2.
981
+ # @param strict [Boolean]
982
+ # Bool, Default=true will raise any ParseError or overflow as ComputeError.
983
+ # False silently convert to Null.
984
+ #
985
+ # @return [Expr]
986
+ #
987
+ # @example
988
+ # df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
989
+ # df.select(Polars.col("bin").str.parse_int(2, strict: false))
990
+ # # =>
991
+ # # shape: (4, 1)
992
+ # # ┌──────┐
993
+ # # │ bin │
994
+ # # │ --- │
995
+ # # │ i32 │
996
+ # # ╞══════╡
997
+ # # │ 6 │
998
+ # # │ 5 │
999
+ # # │ 2 │
1000
+ # # │ null │
1001
+ # # └──────┘
1002
+ #
1003
+ # @example
1004
+ # df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
1005
+ # df.select(Polars.col("hex").str.parse_int(16, strict: true))
1006
+ # # =>
1007
+ # # shape: (4, 1)
1008
+ # # ┌───────┐
1009
+ # # │ hex │
1010
+ # # │ --- │
1011
+ # # │ i32 │
1012
+ # # ╞═══════╡
1013
+ # # │ 64030 │
1014
+ # # │ 65280 │
1015
+ # # │ 51966 │
1016
+ # # │ null │
1017
+ # # └───────┘
1018
+ def parse_int(radix = 2, strict: true)
1019
+ Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict))
1020
+ end
915
1021
  end
916
1022
  end
@@ -38,12 +38,12 @@ module Polars
38
38
  # )
39
39
  # s.to_frame.with_column(
40
40
  # Polars.col("date")
41
- # .str.strptime(:date, "%F", strict: false)
41
+ # .str.strptime(Polars::Date, "%F", strict: false)
42
42
  # .fill_null(
43
- # Polars.col("date").str.strptime(:date, "%F %T", strict: false)
43
+ # Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false)
44
44
  # )
45
- # .fill_null(Polars.col("date").str.strptime(:date, "%D", strict: false))
46
- # .fill_null(Polars.col("date").str.strptime(:date, "%c", strict: false))
45
+ # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%D", strict: false))
46
+ # .fill_null(Polars.col("date").str.strptime(Polars::Date, "%c", strict: false))
47
47
  # )
48
48
  # # =>
49
49
  # # shape: (4, 1)
@@ -60,5 +60,37 @@ module Polars
60
60
  def rename_fields(names)
61
61
  super
62
62
  end
63
+
64
+ # Get the struct definition as a name/dtype schema dict.
65
+ #
66
+ # @return [Object]
67
+ def schema
68
+ if _s.nil?
69
+ {}
70
+ else
71
+ _s.dtype.to_schema
72
+ end
73
+ end
74
+
75
+ # Convert this struct Series to a DataFrame with a separate column for each field.
76
+ #
77
+ # @return [DataFrame]
78
+ #
79
+ # @example
80
+ # s = Polars::Series.new([{"a" => 1, "b" => 2}, {"a" => 3, "b" => 4}])
81
+ # s.struct.unnest
82
+ # # =>
83
+ # # shape: (2, 2)
84
+ # # ┌─────┬─────┐
85
+ # # │ a ┆ b │
86
+ # # │ --- ┆ --- │
87
+ # # │ i64 ┆ i64 │
88
+ # # ╞═════╪═════╡
89
+ # # │ 1 ┆ 2 │
90
+ # # │ 3 ┆ 4 │
91
+ # # └─────┴─────┘
92
+ def unnest
93
+ Utils.wrap_df(_s.struct_unnest)
94
+ end
63
95
  end
64
96
  end
data/lib/polars/utils.rb CHANGED
@@ -23,24 +23,42 @@ module Polars
23
23
  Polars.col(name)
24
24
  end
25
25
 
26
+ def self.arrlen(obj)
27
+ if obj.is_a?(Range)
28
+ # size only works for numeric ranges
29
+ obj.to_a.length
30
+ elsif obj.is_a?(String)
31
+ nil
32
+ else
33
+ obj.length
34
+ end
35
+ rescue
36
+ nil
37
+ end
38
+
26
39
  def self._timedelta_to_pl_duration(td)
27
40
  td
28
41
  end
29
42
 
30
43
  def self._datetime_to_pl_timestamp(dt, tu)
31
44
  if tu == "ns"
32
- (dt.to_datetime.utc.to_f * 1e9).to_i
45
+ (dt.to_datetime.to_time.to_f * 1e9).to_i
33
46
  elsif tu == "us"
34
- (dt.to_datetime.utc.to_f * 1e6).to_i
47
+ (dt.to_datetime.to_time.to_f * 1e6).to_i
35
48
  elsif tu == "ms"
36
- (dt.to_datetime.utc.to_f * 1e3).to_i
49
+ (dt.to_datetime.to_time.to_f * 1e3).to_i
37
50
  elsif tu.nil?
38
- (dt.to_datetime.utc.to_f * 1e6).to_i
51
+ (dt.to_datetime.to_time.to_f * 1e6).to_i
39
52
  else
40
53
  raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
41
54
  end
42
55
  end
43
56
 
57
+ def self._date_to_pl_date(d)
58
+ dt = d.to_datetime.to_time
59
+ dt.to_i / (3600 * 24)
60
+ end
61
+
44
62
  def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil)
45
63
  if dtype == :date || dtype == Date
46
64
  # days to seconds
@@ -69,6 +87,18 @@ module Polars
69
87
  end
70
88
  end
71
89
 
90
+ def self._to_ruby_duration(value, tu = "ns")
91
+ if tu == "ns"
92
+ value / 1e9
93
+ elsif tu == "us"
94
+ value / 1e6
95
+ elsif tu == "ms"
96
+ value / 1e3
97
+ else
98
+ raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}"
99
+ end
100
+ end
101
+
72
102
  def self.selection_to_rbexpr_list(exprs)
73
103
  if exprs.is_a?(String) || exprs.is_a?(Symbol) || exprs.is_a?(Expr) || exprs.is_a?(Series)
74
104
  exprs = [exprs]
@@ -93,12 +123,19 @@ module Polars
93
123
  Polars.lit(value)
94
124
  end
95
125
 
96
- def self.format_path(path)
97
- File.expand_path(path)
126
+ def self.normalise_filepath(path, check_not_directory: true)
127
+ path = File.expand_path(path)
128
+ if check_not_directory && File.exist?(path) && Dir.exist?(path)
129
+ raise ArgumentError, "Expected a file path; #{path} is a directory"
130
+ end
131
+ path
98
132
  end
99
133
 
100
134
  # TODO fix
101
- def self.is_polars_dtype(data_type)
135
+ def self.is_polars_dtype(data_type, include_unknown: false)
136
+ if data_type == Unknown
137
+ return include_unknown
138
+ end
102
139
  data_type.is_a?(Symbol) || data_type.is_a?(String) || data_type.is_a?(DataType) || (data_type.is_a?(Class) && data_type < DataType)
103
140
  end
104
141
 
@@ -109,7 +146,8 @@ module Polars
109
146
  TrueClass => :bool,
110
147
  FalseClass => :bool,
111
148
  ::Date => :date,
112
- ::DateTime => :datetime
149
+ ::DateTime => :datetime,
150
+ ::Time => :datetime
113
151
  }
114
152
 
115
153
  # TODO fix
@@ -174,7 +212,7 @@ module Polars
174
212
  end
175
213
 
176
214
  def self.bool?(value)
177
- value == true || value == false
215
+ value.is_a?(TrueClass) || value.is_a?(FalseClass)
178
216
  end
179
217
 
180
218
  def self.strlike?(value)
@@ -216,5 +254,9 @@ module Polars
216
254
  val.is_a?(Array) && _is_iterable_of(val, String)
217
255
  end
218
256
  end
257
+
258
+ def self.local_file?(file)
259
+ Dir.glob(file).any?
260
+ end
219
261
  end
220
262
  end
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.3.1"
3
+ VERSION = "0.5.0"
4
4
  end
data/lib/polars.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # ext
2
2
  begin
3
- require_relative "polars/#{RUBY_VERSION.to_f}/polars"
3
+ require "polars/#{RUBY_VERSION.to_f}/polars"
4
4
  rescue LoadError
5
- require_relative "polars/polars"
5
+ require "polars/polars"
6
6
  end
7
7
 
8
8
  # stdlib
@@ -12,6 +12,8 @@ require "stringio"
12
12
  # modules
13
13
  require_relative "polars/expr_dispatch"
14
14
  require_relative "polars/batched_csv_reader"
15
+ require_relative "polars/binary_expr"
16
+ require_relative "polars/binary_name_space"
15
17
  require_relative "polars/cat_expr"
16
18
  require_relative "polars/cat_name_space"
17
19
  require_relative "polars/convert"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.5.0
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-22 00:00:00.000000000 Z
11
+ date: 2023-05-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -29,6 +29,8 @@ files:
29
29
  - lib/polars/3.1/polars.so
30
30
  - lib/polars/3.2/polars.so
31
31
  - lib/polars/batched_csv_reader.rb
32
+ - lib/polars/binary_expr.rb
33
+ - lib/polars/binary_name_space.rb
32
34
  - lib/polars/cat_expr.rb
33
35
  - lib/polars/cat_name_space.rb
34
36
  - lib/polars/convert.rb