polars-df 0.17.0 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +4 -10
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +3 -1
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +18 -0
- data/ext/polars/src/dataframe/general.rs +5 -5
- data/ext/polars/src/expr/general.rs +4 -0
- data/ext/polars/src/functions/lazy.rs +15 -0
- data/ext/polars/src/interop/numo/mod.rs +1 -0
- data/ext/polars/src/interop/numo/numo_rs.rs +52 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +69 -48
- data/ext/polars/src/lazyframe/general.rs +21 -22
- data/ext/polars/src/lib.rs +7 -2
- data/lib/polars/data_frame.rb +304 -6
- data/lib/polars/expr.rb +31 -0
- data/lib/polars/functions/eager.rb +145 -16
- data/lib/polars/io/database.rb +17 -0
- data/lib/polars/lazy_frame.rb +60 -7
- data/lib/polars/schema.rb +29 -0
- data/lib/polars/series.rb +25 -23
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +4 -2
data/lib/polars/data_frame.rb
CHANGED
@@ -495,6 +495,45 @@ module Polars
|
|
495
495
|
_df.arrow_c_stream
|
496
496
|
end
|
497
497
|
|
498
|
+
# Get an ordered mapping of column names to their data type.
|
499
|
+
#
|
500
|
+
# @return [Schema]
|
501
|
+
#
|
502
|
+
# @note
|
503
|
+
# This method is included to facilitate writing code that is generic for both
|
504
|
+
# DataFrame and LazyFrame.
|
505
|
+
#
|
506
|
+
# @example Determine the schema.
|
507
|
+
# df = Polars::DataFrame.new(
|
508
|
+
# {
|
509
|
+
# "foo" => [1, 2, 3],
|
510
|
+
# "bar" => [6.0, 7.0, 8.0],
|
511
|
+
# "ham" => ["a", "b", "c"]
|
512
|
+
# }
|
513
|
+
# )
|
514
|
+
# df.collect_schema
|
515
|
+
# # => Polars::Schema({"foo"=>Polars::Int64, "bar"=>Polars::Float64, "ham"=>Polars::String})
|
516
|
+
#
|
517
|
+
# @example Access various properties of the schema using the `Schema` object.
|
518
|
+
# schema = df.collect_schema
|
519
|
+
# schema["bar"]
|
520
|
+
# # => Polars::Float64
|
521
|
+
#
|
522
|
+
# @example
|
523
|
+
# schema.names
|
524
|
+
# # => ["foo", "bar", "ham"]
|
525
|
+
#
|
526
|
+
# @example
|
527
|
+
# schema.dtypes
|
528
|
+
# # => [Polars::Int64, Polars::Float64, Polars::String]
|
529
|
+
#
|
530
|
+
# @example
|
531
|
+
# schema.length
|
532
|
+
# # => 3
|
533
|
+
def collect_schema
|
534
|
+
Schema.new(columns.zip(dtypes), check_dtypes: false)
|
535
|
+
end
|
536
|
+
|
498
537
|
# Return the dataframe as a scalar.
|
499
538
|
#
|
500
539
|
# Equivalent to `df[0,0]`, with a check that the shape is (1,1).
|
@@ -961,6 +1000,139 @@ module Polars
|
|
961
1000
|
)
|
962
1001
|
end
|
963
1002
|
|
1003
|
+
# Write the data in a Polars DataFrame to a database.
|
1004
|
+
#
|
1005
|
+
# @param table_name [String]
|
1006
|
+
# Schema-qualified name of the table to create or append to in the target
|
1007
|
+
# SQL database.
|
1008
|
+
# @param connection [Object]
|
1009
|
+
# An existing Active Record connection against the target database.
|
1010
|
+
# @param if_table_exists ['append', 'replace', 'fail']
|
1011
|
+
# The insert mode:
|
1012
|
+
#
|
1013
|
+
# * 'replace' will create a new database table, overwriting an existing one.
|
1014
|
+
# * 'append' will append to an existing table.
|
1015
|
+
# * 'fail' will fail if table already exists.
|
1016
|
+
#
|
1017
|
+
# @return [Integer]
|
1018
|
+
#
|
1019
|
+
# @note
|
1020
|
+
# This functionality is experimental. It may be changed at any point without it being considered a breaking change.
|
1021
|
+
def write_database(table_name, connection = nil, if_table_exists: "fail")
|
1022
|
+
if !defined?(ActiveRecord)
|
1023
|
+
raise Error, "Active Record not available"
|
1024
|
+
elsif ActiveRecord::VERSION::MAJOR < 7
|
1025
|
+
raise Error, "Requires Active Record 7+"
|
1026
|
+
end
|
1027
|
+
|
1028
|
+
valid_write_modes = ["append", "replace", "fail"]
|
1029
|
+
if !valid_write_modes.include?(if_table_exists)
|
1030
|
+
msg = "write_database `if_table_exists` must be one of #{valid_write_modes.inspect}, got #{if_table_exists.inspect}"
|
1031
|
+
raise ArgumentError, msg
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
with_connection(connection) do |connection|
|
1035
|
+
table_exists = connection.table_exists?(table_name)
|
1036
|
+
if table_exists && if_table_exists == "fail"
|
1037
|
+
raise ArgumentError, "Table already exists"
|
1038
|
+
end
|
1039
|
+
|
1040
|
+
create_table = !table_exists || if_table_exists == "replace"
|
1041
|
+
maybe_transaction(connection, create_table) do
|
1042
|
+
if create_table
|
1043
|
+
mysql = connection.adapter_name.match?(/mysql|trilogy/i)
|
1044
|
+
force = if_table_exists == "replace"
|
1045
|
+
connection.create_table(table_name, id: false, force: force) do |t|
|
1046
|
+
schema.each do |c, dtype|
|
1047
|
+
options = {}
|
1048
|
+
column_type =
|
1049
|
+
case dtype
|
1050
|
+
when Binary
|
1051
|
+
:binary
|
1052
|
+
when Boolean
|
1053
|
+
:boolean
|
1054
|
+
when Date
|
1055
|
+
:date
|
1056
|
+
when Datetime
|
1057
|
+
:datetime
|
1058
|
+
when Decimal
|
1059
|
+
if mysql
|
1060
|
+
options[:precision] = dtype.precision || 65
|
1061
|
+
options[:scale] = dtype.scale || 30
|
1062
|
+
end
|
1063
|
+
:decimal
|
1064
|
+
when Float32
|
1065
|
+
options[:limit] = 24
|
1066
|
+
:float
|
1067
|
+
when Float64
|
1068
|
+
options[:limit] = 53
|
1069
|
+
:float
|
1070
|
+
when Int8
|
1071
|
+
options[:limit] = 1
|
1072
|
+
:integer
|
1073
|
+
when Int16
|
1074
|
+
options[:limit] = 2
|
1075
|
+
:integer
|
1076
|
+
when Int32
|
1077
|
+
options[:limit] = 4
|
1078
|
+
:integer
|
1079
|
+
when Int64
|
1080
|
+
options[:limit] = 8
|
1081
|
+
:integer
|
1082
|
+
when UInt8
|
1083
|
+
if mysql
|
1084
|
+
options[:limit] = 1
|
1085
|
+
options[:unsigned] = true
|
1086
|
+
else
|
1087
|
+
options[:limit] = 2
|
1088
|
+
end
|
1089
|
+
:integer
|
1090
|
+
when UInt16
|
1091
|
+
if mysql
|
1092
|
+
options[:limit] = 2
|
1093
|
+
options[:unsigned] = true
|
1094
|
+
else
|
1095
|
+
options[:limit] = 4
|
1096
|
+
end
|
1097
|
+
:integer
|
1098
|
+
when UInt32
|
1099
|
+
if mysql
|
1100
|
+
options[:limit] = 4
|
1101
|
+
options[:unsigned] = true
|
1102
|
+
else
|
1103
|
+
options[:limit] = 8
|
1104
|
+
end
|
1105
|
+
:integer
|
1106
|
+
when UInt64
|
1107
|
+
if mysql
|
1108
|
+
options[:limit] = 8
|
1109
|
+
options[:unsigned] = true
|
1110
|
+
:integer
|
1111
|
+
else
|
1112
|
+
options[:precision] = 20
|
1113
|
+
options[:scale] = 0
|
1114
|
+
:decimal
|
1115
|
+
end
|
1116
|
+
when String
|
1117
|
+
:text
|
1118
|
+
when Time
|
1119
|
+
:time
|
1120
|
+
else
|
1121
|
+
raise ArgumentError, "column type not supported yet: #{dtype}"
|
1122
|
+
end
|
1123
|
+
t.column c, column_type, **options
|
1124
|
+
end
|
1125
|
+
end
|
1126
|
+
end
|
1127
|
+
|
1128
|
+
quoted_table = connection.quote_table_name(table_name)
|
1129
|
+
quoted_columns = columns.map { |c| connection.quote_column_name(c) }
|
1130
|
+
rows = cast({Polars::UInt64 => Polars::String}).rows(named: false).map { |row| "(#{row.map { |v| connection.quote(v) }.join(", ")})" }
|
1131
|
+
connection.exec_update("INSERT INTO #{quoted_table} (#{quoted_columns.join(", ")}) VALUES #{rows.join(", ")}")
|
1132
|
+
end
|
1133
|
+
end
|
1134
|
+
end
|
1135
|
+
|
964
1136
|
# Write DataFrame as delta table.
|
965
1137
|
#
|
966
1138
|
# @param target [Object]
|
@@ -2424,6 +2596,24 @@ module Polars
|
|
2424
2596
|
# - true: -> Always coalesce join columns.
|
2425
2597
|
# - false: -> Never coalesce join columns.
|
2426
2598
|
# Note that joining on any other expressions than `col` will turn off coalescing.
|
2599
|
+
# @param maintain_order ['none', 'left', 'right', 'left_right', 'right_left']
|
2600
|
+
# Which DataFrame row order to preserve, if any.
|
2601
|
+
# Do not rely on any observed ordering without explicitly
|
2602
|
+
# setting this parameter, as your code may break in a future release.
|
2603
|
+
# Not specifying any ordering can improve performance
|
2604
|
+
# Supported for inner, left, right and full joins
|
2605
|
+
#
|
2606
|
+
# * *none*
|
2607
|
+
# No specific ordering is desired. The ordering might differ across
|
2608
|
+
# Polars versions or even between different runs.
|
2609
|
+
# * *left*
|
2610
|
+
# Preserves the order of the left DataFrame.
|
2611
|
+
# * *right*
|
2612
|
+
# Preserves the order of the right DataFrame.
|
2613
|
+
# * *left_right*
|
2614
|
+
# First preserves the order of the left DataFrame, then the right.
|
2615
|
+
# * *right_left*
|
2616
|
+
# First preserves the order of the right DataFrame, then the left.
|
2427
2617
|
#
|
2428
2618
|
# @return [DataFrame]
|
2429
2619
|
#
|
@@ -2506,7 +2696,8 @@ module Polars
|
|
2506
2696
|
# # ╞═════╪═════╪═════╡
|
2507
2697
|
# # │ 3 ┆ 8.0 ┆ c │
|
2508
2698
|
# # └─────┴─────┴─────┘
|
2509
|
-
def join(
|
2699
|
+
def join(
|
2700
|
+
other,
|
2510
2701
|
left_on: nil,
|
2511
2702
|
right_on: nil,
|
2512
2703
|
on: nil,
|
@@ -2514,7 +2705,8 @@ module Polars
|
|
2514
2705
|
suffix: "_right",
|
2515
2706
|
validate: "m:m",
|
2516
2707
|
join_nulls: false,
|
2517
|
-
coalesce: nil
|
2708
|
+
coalesce: nil,
|
2709
|
+
maintain_order: nil
|
2518
2710
|
)
|
2519
2711
|
lazy
|
2520
2712
|
.join(
|
@@ -2526,7 +2718,8 @@ module Polars
|
|
2526
2718
|
suffix: suffix,
|
2527
2719
|
validate: validate,
|
2528
2720
|
join_nulls: join_nulls,
|
2529
|
-
coalesce: coalesce
|
2721
|
+
coalesce: coalesce,
|
2722
|
+
maintain_order: maintain_order
|
2530
2723
|
)
|
2531
2724
|
.collect(no_optimization: true)
|
2532
2725
|
end
|
@@ -4865,6 +5058,90 @@ module Polars
|
|
4865
5058
|
iter_rows(named: named, buffer_size: buffer_size, &block)
|
4866
5059
|
end
|
4867
5060
|
|
5061
|
+
# Returns an iterator over the columns of this DataFrame.
|
5062
|
+
#
|
5063
|
+
# @return [Object]
|
5064
|
+
#
|
5065
|
+
# @note
|
5066
|
+
# Consider whether you can use `all` instead.
|
5067
|
+
# If you can, it will be more efficient.
|
5068
|
+
#
|
5069
|
+
# @example
|
5070
|
+
# df = Polars::DataFrame.new(
|
5071
|
+
# {
|
5072
|
+
# "a" => [1, 3, 5],
|
5073
|
+
# "b" => [2, 4, 6]
|
5074
|
+
# }
|
5075
|
+
# )
|
5076
|
+
# df.iter_columns.map { |s| s.name }
|
5077
|
+
# # => ["a", "b"]
|
5078
|
+
#
|
5079
|
+
# @example If you're using this to modify a dataframe's columns, e.g.
|
5080
|
+
# # Do NOT do this
|
5081
|
+
# Polars::DataFrame.new(df.iter_columns.map { |column| column * 2 })
|
5082
|
+
# # =>
|
5083
|
+
# # shape: (3, 2)
|
5084
|
+
# # ┌─────┬─────┐
|
5085
|
+
# # │ a ┆ b │
|
5086
|
+
# # │ --- ┆ --- │
|
5087
|
+
# # │ i64 ┆ i64 │
|
5088
|
+
# # ╞═════╪═════╡
|
5089
|
+
# # │ 2 ┆ 4 │
|
5090
|
+
# # │ 6 ┆ 8 │
|
5091
|
+
# # │ 10 ┆ 12 │
|
5092
|
+
# # └─────┴─────┘
|
5093
|
+
#
|
5094
|
+
# @example then consider whether you can use `all` instead:
|
5095
|
+
# df.select(Polars.all * 2)
|
5096
|
+
# # =>
|
5097
|
+
# # shape: (3, 2)
|
5098
|
+
# # ┌─────┬─────┐
|
5099
|
+
# # │ a ┆ b │
|
5100
|
+
# # │ --- ┆ --- │
|
5101
|
+
# # │ i64 ┆ i64 │
|
5102
|
+
# # ╞═════╪═════╡
|
5103
|
+
# # │ 2 ┆ 4 │
|
5104
|
+
# # │ 6 ┆ 8 │
|
5105
|
+
# # │ 10 ┆ 12 │
|
5106
|
+
# # └─────┴─────┘
|
5107
|
+
def iter_columns
|
5108
|
+
return to_enum(:iter_columns) unless block_given?
|
5109
|
+
|
5110
|
+
_df.get_columns.each do |s|
|
5111
|
+
yield Utils.wrap_s(s)
|
5112
|
+
end
|
5113
|
+
end
|
5114
|
+
|
5115
|
+
# Returns a non-copying iterator of slices over the underlying DataFrame.
|
5116
|
+
#
|
5117
|
+
# @param n_rows [Integer]
|
5118
|
+
# Determines the number of rows contained in each DataFrame slice.
|
5119
|
+
#
|
5120
|
+
# @return [Object]
|
5121
|
+
#
|
5122
|
+
# @example
|
5123
|
+
# df = Polars::DataFrame.new(
|
5124
|
+
# {
|
5125
|
+
# "a" => 0...17_500,
|
5126
|
+
# "b" => Date.new(2023, 1, 1),
|
5127
|
+
# "c" => "klmnoopqrstuvwxyz"
|
5128
|
+
# },
|
5129
|
+
# schema_overrides: {"a" => Polars::Int32}
|
5130
|
+
# )
|
5131
|
+
# df.iter_slices.map.with_index do |frame, idx|
|
5132
|
+
# "#{frame.class.name}:[#{idx}]:#{frame.length}"
|
5133
|
+
# end
|
5134
|
+
# # => ["Polars::DataFrame:[0]:10000", "Polars::DataFrame:[1]:7500"]
|
5135
|
+
def iter_slices(n_rows: 10_000)
|
5136
|
+
return to_enum(:iter_slices, n_rows: n_rows) unless block_given?
|
5137
|
+
|
5138
|
+
offset = 0
|
5139
|
+
while offset < height
|
5140
|
+
yield slice(offset, n_rows)
|
5141
|
+
offset += n_rows
|
5142
|
+
end
|
5143
|
+
end
|
5144
|
+
|
4868
5145
|
# Shrink DataFrame memory usage.
|
4869
5146
|
#
|
4870
5147
|
# Shrinks to fit the exact capacity needed to hold the data.
|
@@ -5101,12 +5378,17 @@ module Polars
|
|
5101
5378
|
lazy.merge_sorted(other.lazy, key).collect(_eager: true)
|
5102
5379
|
end
|
5103
5380
|
|
5104
|
-
#
|
5381
|
+
# Flag a column as sorted.
|
5382
|
+
#
|
5383
|
+
# This can speed up future operations.
|
5384
|
+
#
|
5385
|
+
# @note
|
5386
|
+
# This can lead to incorrect results if the data is NOT sorted! Use with care!
|
5105
5387
|
#
|
5106
5388
|
# @param column [Object]
|
5107
|
-
#
|
5389
|
+
# Column that is sorted.
|
5108
5390
|
# @param descending [Boolean]
|
5109
|
-
# Whether the
|
5391
|
+
# Whether the column is sorted in descending order.
|
5110
5392
|
#
|
5111
5393
|
# @return [DataFrame]
|
5112
5394
|
def set_sorted(
|
@@ -5538,5 +5820,21 @@ module Polars
|
|
5538
5820
|
end
|
5539
5821
|
other
|
5540
5822
|
end
|
5823
|
+
|
5824
|
+
def with_connection(connection, &block)
|
5825
|
+
if !connection.nil?
|
5826
|
+
yield connection
|
5827
|
+
else
|
5828
|
+
ActiveRecord::Base.connection_pool.with_connection(&block)
|
5829
|
+
end
|
5830
|
+
end
|
5831
|
+
|
5832
|
+
def maybe_transaction(connection, create_table, &block)
|
5833
|
+
if create_table && connection.adapter_name.match?(/postg|sqlite/i) && connection.open_transactions == 0
|
5834
|
+
connection.transaction(&block)
|
5835
|
+
else
|
5836
|
+
yield
|
5837
|
+
end
|
5838
|
+
end
|
5541
5839
|
end
|
5542
5840
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -3994,6 +3994,37 @@ module Polars
|
|
3994
3994
|
_from_rbexpr(_rbexpr.interpolate(method))
|
3995
3995
|
end
|
3996
3996
|
|
3997
|
+
# Fill null values using interpolation based on another column.
|
3998
|
+
#
|
3999
|
+
# @param by [Expr] Column to interpolate values based on.
|
4000
|
+
#
|
4001
|
+
# @return [Expr]
|
4002
|
+
#
|
4003
|
+
# @example Fill null values using linear interpolation.
|
4004
|
+
# df = Polars::DataFrame.new(
|
4005
|
+
# {
|
4006
|
+
# "a" => [1, nil, nil, 3],
|
4007
|
+
# "b" => [1, 2, 7, 8]
|
4008
|
+
# }
|
4009
|
+
# )
|
4010
|
+
# df.with_columns(a_interpolated: Polars.col("a").interpolate_by("b"))
|
4011
|
+
# # =>
|
4012
|
+
# # shape: (4, 3)
|
4013
|
+
# # ┌──────┬─────┬────────────────┐
|
4014
|
+
# # │ a ┆ b ┆ a_interpolated │
|
4015
|
+
# # │ --- ┆ --- ┆ --- │
|
4016
|
+
# # │ i64 ┆ i64 ┆ f64 │
|
4017
|
+
# # ╞══════╪═════╪════════════════╡
|
4018
|
+
# # │ 1 ┆ 1 ┆ 1.0 │
|
4019
|
+
# # │ null ┆ 2 ┆ 1.285714 │
|
4020
|
+
# # │ null ┆ 7 ┆ 2.714286 │
|
4021
|
+
# # │ 3 ┆ 8 ┆ 3.0 │
|
4022
|
+
# # └──────┴─────┴────────────────┘
|
4023
|
+
def interpolate_by(by)
|
4024
|
+
by = Utils.parse_into_expression(by)
|
4025
|
+
_from_rbexpr(_rbexpr.interpolate_by(by))
|
4026
|
+
end
|
4027
|
+
|
3997
4028
|
# Apply a rolling min based on another column.
|
3998
4029
|
#
|
3999
4030
|
# @param by [String]
|
@@ -6,8 +6,7 @@ module Polars
|
|
6
6
|
# DataFrames/Series/LazyFrames to concatenate.
|
7
7
|
# @param rechunk [Boolean]
|
8
8
|
# Make sure that all data is in contiguous memory.
|
9
|
-
# @param how ["vertical", "vertical_relaxed", "diagonal", "horizontal"]
|
10
|
-
# LazyFrames do not support the `horizontal` strategy.
|
9
|
+
# @param how ["vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal"]
|
11
10
|
#
|
12
11
|
# - Vertical: applies multiple `vstack` operations.
|
13
12
|
# - Diagonal: finds a union between the column schemas and fills missing column values with null.
|
@@ -21,7 +20,7 @@ module Polars
|
|
21
20
|
# @example
|
22
21
|
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
23
22
|
# df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
|
24
|
-
# Polars.concat([df1, df2])
|
23
|
+
# Polars.concat([df1, df2]) # default is 'vertical' strategy
|
25
24
|
# # =>
|
26
25
|
# # shape: (2, 2)
|
27
26
|
# # ┌─────┬─────┐
|
@@ -32,38 +31,168 @@ module Polars
|
|
32
31
|
# # │ 1 ┆ 3 │
|
33
32
|
# # │ 2 ┆ 4 │
|
34
33
|
# # └─────┴─────┘
|
34
|
+
#
|
35
|
+
# @example
|
36
|
+
# df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
37
|
+
# df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
|
38
|
+
# Polars.concat([df1, df2], how: "vertical_relaxed") # 'a' coerced into f64
|
39
|
+
# # =>
|
40
|
+
# # shape: (2, 2)
|
41
|
+
# # ┌─────┬─────┐
|
42
|
+
# # │ a ┆ b │
|
43
|
+
# # │ --- ┆ --- │
|
44
|
+
# # │ f64 ┆ i64 │
|
45
|
+
# # ╞═════╪═════╡
|
46
|
+
# # │ 1.0 ┆ 3 │
|
47
|
+
# # │ 2.5 ┆ 4 │
|
48
|
+
# # └─────┴─────┘
|
49
|
+
#
|
50
|
+
# @example
|
51
|
+
# df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
|
52
|
+
# df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
|
53
|
+
# Polars.concat([df_h1, df_h2], how: "horizontal")
|
54
|
+
# # =>
|
55
|
+
# # shape: (2, 5)
|
56
|
+
# # ┌─────┬─────┬─────┬─────┬─────┐
|
57
|
+
# # │ l1 ┆ l2 ┆ r1 ┆ r2 ┆ r3 │
|
58
|
+
# # │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
59
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
|
60
|
+
# # ╞═════╪═════╪═════╪═════╪═════╡
|
61
|
+
# # │ 1 ┆ 3 ┆ 5 ┆ 7 ┆ 9 │
|
62
|
+
# # │ 2 ┆ 4 ┆ 6 ┆ 8 ┆ 10 │
|
63
|
+
# # └─────┴─────┴─────┴─────┴─────┘
|
64
|
+
#
|
65
|
+
# @example
|
66
|
+
# df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
|
67
|
+
# df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
|
68
|
+
# Polars.concat([df_d1, df_d2], how: "diagonal")
|
69
|
+
# # =>
|
70
|
+
# # shape: (2, 3)
|
71
|
+
# # ┌─────┬──────┬──────┐
|
72
|
+
# # │ a ┆ b ┆ c │
|
73
|
+
# # │ --- ┆ --- ┆ --- │
|
74
|
+
# # │ i64 ┆ i64 ┆ i64 │
|
75
|
+
# # ╞═════╪══════╪══════╡
|
76
|
+
# # │ 1 ┆ 3 ┆ null │
|
77
|
+
# # │ 2 ┆ null ┆ 4 │
|
78
|
+
# # └─────┴──────┴──────┘
|
79
|
+
#
|
80
|
+
# @example
|
81
|
+
# df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
|
82
|
+
# df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
|
83
|
+
# df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
|
84
|
+
# Polars.concat([df_a1, df_a2, df_a3], how: "align")
|
85
|
+
# # =>
|
86
|
+
# # shape: (3, 4)
|
87
|
+
# # ┌─────┬──────┬──────┬──────┐
|
88
|
+
# # │ id ┆ x ┆ y ┆ z │
|
89
|
+
# # │ --- ┆ --- ┆ --- ┆ --- │
|
90
|
+
# # │ i64 ┆ i64 ┆ i64 ┆ i64 │
|
91
|
+
# # ╞═════╪══════╪══════╪══════╡
|
92
|
+
# # │ 1 ┆ 3 ┆ null ┆ 7 │
|
93
|
+
# # │ 2 ┆ 4 ┆ 5 ┆ null │
|
94
|
+
# # │ 3 ┆ null ┆ 6 ┆ 8 │
|
95
|
+
# # └─────┴──────┴──────┴──────┘
|
35
96
|
def concat(items, rechunk: true, how: "vertical", parallel: true)
|
36
|
-
|
97
|
+
elems = items.to_a
|
98
|
+
|
99
|
+
if elems.empty?
|
37
100
|
raise ArgumentError, "cannot concat empty list"
|
38
101
|
end
|
39
102
|
|
40
|
-
|
103
|
+
if how == "align"
|
104
|
+
if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
|
105
|
+
msg = "'align' strategy is not supported for #{elems[0].class.name}"
|
106
|
+
raise TypeError, msg
|
107
|
+
end
|
108
|
+
|
109
|
+
# establish common columns, maintaining the order in which they appear
|
110
|
+
all_columns = elems.flat_map { |e| e.collect_schema.names }
|
111
|
+
key = all_columns.uniq.map.with_index.to_h
|
112
|
+
common_cols = elems.map { |e| e.collect_schema.names }
|
113
|
+
.reduce { |x, y| Set.new(x) & Set.new(y) }
|
114
|
+
.sort_by { |k| key[k] }
|
115
|
+
# we require at least one key column for 'align'
|
116
|
+
if common_cols.empty?
|
117
|
+
msg = "'align' strategy requires at least one common column"
|
118
|
+
raise InvalidOperationError, msg
|
119
|
+
end
|
120
|
+
|
121
|
+
# align the frame data using a full outer join with no suffix-resolution
|
122
|
+
# (so we raise an error in case of column collision, like "horizontal")
|
123
|
+
lf = elems.map { |df| df.lazy }.reduce do |x, y|
|
124
|
+
x.join(
|
125
|
+
y,
|
126
|
+
how: "full",
|
127
|
+
on: common_cols,
|
128
|
+
suffix: "_PL_CONCAT_RIGHT",
|
129
|
+
maintain_order: "right_left"
|
130
|
+
)
|
131
|
+
# Coalesce full outer join columns
|
132
|
+
.with_columns(
|
133
|
+
common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
|
134
|
+
)
|
135
|
+
.drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
|
136
|
+
end.sort(common_cols)
|
137
|
+
|
138
|
+
eager = elems[0].is_a?(DataFrame)
|
139
|
+
return eager ? lf.collect : lf
|
140
|
+
end
|
141
|
+
|
142
|
+
first = elems[0]
|
143
|
+
|
41
144
|
if first.is_a?(DataFrame)
|
42
145
|
if how == "vertical"
|
43
|
-
out = Utils.wrap_df(Plr.concat_df(
|
146
|
+
out = Utils.wrap_df(Plr.concat_df(elems))
|
147
|
+
elsif how == "vertical_relaxed"
|
148
|
+
out = Utils.wrap_ldf(
|
149
|
+
Plr.concat_lf(
|
150
|
+
elems.map { |df| df.lazy },
|
151
|
+
rechunk,
|
152
|
+
parallel,
|
153
|
+
true
|
154
|
+
)
|
155
|
+
).collect(no_optimization: true)
|
44
156
|
elsif how == "diagonal"
|
45
|
-
out = Utils.wrap_df(Plr.concat_df_diagonal(
|
157
|
+
out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
|
158
|
+
elsif how == "diagonal_relaxed"
|
159
|
+
out = Utils.wrap_ldf(
|
160
|
+
Plr.concat_lf_diagonal(
|
161
|
+
elems.map { |df| df.lazy },
|
162
|
+
rechunk,
|
163
|
+
parallel,
|
164
|
+
true
|
165
|
+
)
|
166
|
+
).collect(no_optimization: true)
|
46
167
|
elsif how == "horizontal"
|
47
|
-
out = Utils.wrap_df(Plr.concat_df_horizontal(
|
168
|
+
out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
|
48
169
|
else
|
49
|
-
raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
|
170
|
+
raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
|
50
171
|
end
|
51
172
|
elsif first.is_a?(LazyFrame)
|
52
173
|
if how == "vertical"
|
53
|
-
return Utils.wrap_ldf(Plr.concat_lf(
|
174
|
+
return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
|
54
175
|
elsif how == "vertical_relaxed"
|
55
|
-
return Utils.wrap_ldf(Plr.concat_lf(
|
176
|
+
return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
|
56
177
|
elsif how == "diagonal"
|
57
|
-
return Utils.wrap_ldf(Plr.concat_lf_diagonal(
|
178
|
+
return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
|
179
|
+
elsif how == "diagonal_relaxed"
|
180
|
+
return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
|
181
|
+
elsif how == "horizontal"
|
182
|
+
return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel))
|
58
183
|
else
|
59
|
-
raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and '
|
184
|
+
raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
|
60
185
|
end
|
61
186
|
elsif first.is_a?(Series)
|
62
|
-
|
63
|
-
|
187
|
+
if how == "vertical"
|
188
|
+
out = Utils.wrap_s(Plr.concat_series(elems))
|
189
|
+
else
|
190
|
+
msg = "Series only supports 'vertical' concat strategy"
|
191
|
+
raise ArgumentError, msg
|
192
|
+
end
|
64
193
|
elsif first.is_a?(Expr)
|
65
194
|
out = first
|
66
|
-
|
195
|
+
elems[1..-1].each do |e|
|
67
196
|
out = out.append(e)
|
68
197
|
end
|
69
198
|
else
|
data/lib/polars/io/database.rb
CHANGED
@@ -51,8 +51,25 @@ module Polars
|
|
51
51
|
when :decimal
|
52
52
|
Decimal
|
53
53
|
when :float
|
54
|
+
# TODO uncomment in 0.18.0
|
55
|
+
# if column_type.limit && column_type.limit <= 24
|
56
|
+
# Float32
|
57
|
+
# else
|
58
|
+
# Float64
|
59
|
+
# end
|
54
60
|
Float64
|
55
61
|
when :integer
|
62
|
+
# TODO uncomment in 0.18.0
|
63
|
+
# case column_type.limit
|
64
|
+
# when 1
|
65
|
+
# Int8
|
66
|
+
# when 2
|
67
|
+
# Int16
|
68
|
+
# when 4
|
69
|
+
# Int32
|
70
|
+
# else
|
71
|
+
# Int64
|
72
|
+
# end
|
56
73
|
Int64
|
57
74
|
when :string, :text
|
58
75
|
String
|