polars-df 0.10.0-aarch64-linux → 0.11.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/LICENSE-THIRD-PARTY.txt +152 -79
- data/README.md +6 -6
- data/lib/polars/3.1/polars.so +0 -0
- data/lib/polars/3.2/polars.so +0 -0
- data/lib/polars/3.3/polars.so +0 -0
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +12 -4
data/lib/polars/data_frame.rb
CHANGED
@@ -46,271 +46,6 @@ module Polars
|
|
46
46
|
df
|
47
47
|
end
|
48
48
|
|
49
|
-
# @private
|
50
|
-
def self._from_hashes(data, infer_schema_length: 100, schema: nil, schema_overrides: nil)
|
51
|
-
rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema, schema_overrides)
|
52
|
-
_from_rbdf(rbdf)
|
53
|
-
end
|
54
|
-
|
55
|
-
# @private
|
56
|
-
def self._from_hash(data, schema: nil, schema_overrides: nil)
|
57
|
-
_from_rbdf(hash_to_rbdf(data, schema: schema, schema_overrides: schema_overrides))
|
58
|
-
end
|
59
|
-
|
60
|
-
# def self._from_records
|
61
|
-
# end
|
62
|
-
|
63
|
-
# def self._from_numo
|
64
|
-
# end
|
65
|
-
|
66
|
-
# no self._from_arrow
|
67
|
-
|
68
|
-
# no self._from_pandas
|
69
|
-
|
70
|
-
# @private
|
71
|
-
def self._read_csv(
|
72
|
-
file,
|
73
|
-
has_header: true,
|
74
|
-
columns: nil,
|
75
|
-
sep: str = ",",
|
76
|
-
comment_char: nil,
|
77
|
-
quote_char: '"',
|
78
|
-
skip_rows: 0,
|
79
|
-
dtypes: nil,
|
80
|
-
null_values: nil,
|
81
|
-
ignore_errors: false,
|
82
|
-
parse_dates: false,
|
83
|
-
n_threads: nil,
|
84
|
-
infer_schema_length: 100,
|
85
|
-
batch_size: 8192,
|
86
|
-
n_rows: nil,
|
87
|
-
encoding: "utf8",
|
88
|
-
low_memory: false,
|
89
|
-
rechunk: true,
|
90
|
-
skip_rows_after_header: 0,
|
91
|
-
row_count_name: nil,
|
92
|
-
row_count_offset: 0,
|
93
|
-
sample_size: 1024,
|
94
|
-
eol_char: "\n",
|
95
|
-
truncate_ragged_lines: false
|
96
|
-
)
|
97
|
-
if Utils.pathlike?(file)
|
98
|
-
path = Utils.normalise_filepath(file)
|
99
|
-
else
|
100
|
-
path = nil
|
101
|
-
# if defined?(StringIO) && file.is_a?(StringIO)
|
102
|
-
# file = file.string
|
103
|
-
# end
|
104
|
-
end
|
105
|
-
|
106
|
-
dtype_list = nil
|
107
|
-
dtype_slice = nil
|
108
|
-
if !dtypes.nil?
|
109
|
-
if dtypes.is_a?(Hash)
|
110
|
-
dtype_list = []
|
111
|
-
dtypes.each do|k, v|
|
112
|
-
dtype_list << [k, Utils.rb_type_to_dtype(v)]
|
113
|
-
end
|
114
|
-
elsif dtypes.is_a?(::Array)
|
115
|
-
dtype_slice = dtypes
|
116
|
-
else
|
117
|
-
raise ArgumentError, "dtype arg should be list or dict"
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
processed_null_values = Utils._process_null_values(null_values)
|
122
|
-
|
123
|
-
if columns.is_a?(::String)
|
124
|
-
columns = [columns]
|
125
|
-
end
|
126
|
-
if file.is_a?(::String) && file.include?("*")
|
127
|
-
dtypes_dict = nil
|
128
|
-
if !dtype_list.nil?
|
129
|
-
dtypes_dict = dtype_list.to_h
|
130
|
-
end
|
131
|
-
if !dtype_slice.nil?
|
132
|
-
raise ArgumentError, "cannot use glob patterns and unnamed dtypes as `dtypes` argument; Use dtypes: Mapping[str, Type[DataType]"
|
133
|
-
end
|
134
|
-
scan = Polars.scan_csv(
|
135
|
-
file,
|
136
|
-
has_header: has_header,
|
137
|
-
sep: sep,
|
138
|
-
comment_char: comment_char,
|
139
|
-
quote_char: quote_char,
|
140
|
-
skip_rows: skip_rows,
|
141
|
-
dtypes: dtypes_dict,
|
142
|
-
null_values: null_values,
|
143
|
-
ignore_errors: ignore_errors,
|
144
|
-
infer_schema_length: infer_schema_length,
|
145
|
-
n_rows: n_rows,
|
146
|
-
low_memory: low_memory,
|
147
|
-
rechunk: rechunk,
|
148
|
-
skip_rows_after_header: skip_rows_after_header,
|
149
|
-
row_count_name: row_count_name,
|
150
|
-
row_count_offset: row_count_offset,
|
151
|
-
eol_char: eol_char,
|
152
|
-
truncate_ragged_lines: truncate_ragged_lines
|
153
|
-
)
|
154
|
-
if columns.nil?
|
155
|
-
return _from_rbdf(scan.collect._df)
|
156
|
-
elsif is_str_sequence(columns, allow_str: false)
|
157
|
-
return _from_rbdf(scan.select(columns).collect._df)
|
158
|
-
else
|
159
|
-
raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: List[str]"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
164
|
-
|
165
|
-
_from_rbdf(
|
166
|
-
RbDataFrame.read_csv(
|
167
|
-
file,
|
168
|
-
infer_schema_length,
|
169
|
-
batch_size,
|
170
|
-
has_header,
|
171
|
-
ignore_errors,
|
172
|
-
n_rows,
|
173
|
-
skip_rows,
|
174
|
-
projection,
|
175
|
-
sep,
|
176
|
-
rechunk,
|
177
|
-
columns,
|
178
|
-
encoding,
|
179
|
-
n_threads,
|
180
|
-
path,
|
181
|
-
dtype_list,
|
182
|
-
dtype_slice,
|
183
|
-
low_memory,
|
184
|
-
comment_char,
|
185
|
-
quote_char,
|
186
|
-
processed_null_values,
|
187
|
-
parse_dates,
|
188
|
-
skip_rows_after_header,
|
189
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
190
|
-
sample_size,
|
191
|
-
eol_char,
|
192
|
-
truncate_ragged_lines
|
193
|
-
)
|
194
|
-
)
|
195
|
-
end
|
196
|
-
|
197
|
-
# @private
|
198
|
-
def self._read_parquet(
|
199
|
-
source,
|
200
|
-
columns: nil,
|
201
|
-
n_rows: nil,
|
202
|
-
parallel: "auto",
|
203
|
-
row_count_name: nil,
|
204
|
-
row_count_offset: 0,
|
205
|
-
low_memory: false,
|
206
|
-
use_statistics: true,
|
207
|
-
rechunk: true
|
208
|
-
)
|
209
|
-
if Utils.pathlike?(source)
|
210
|
-
source = Utils.normalise_filepath(source)
|
211
|
-
end
|
212
|
-
if columns.is_a?(::String)
|
213
|
-
columns = [columns]
|
214
|
-
end
|
215
|
-
|
216
|
-
if source.is_a?(::String) && source.include?("*") && Utils.local_file?(source)
|
217
|
-
scan =
|
218
|
-
Polars.scan_parquet(
|
219
|
-
source,
|
220
|
-
n_rows: n_rows,
|
221
|
-
rechunk: true,
|
222
|
-
parallel: parallel,
|
223
|
-
row_count_name: row_count_name,
|
224
|
-
row_count_offset: row_count_offset,
|
225
|
-
low_memory: low_memory
|
226
|
-
)
|
227
|
-
|
228
|
-
if columns.nil?
|
229
|
-
return self._from_rbdf(scan.collect._df)
|
230
|
-
elsif Utils.is_str_sequence(columns, allow_str: false)
|
231
|
-
return self._from_rbdf(scan.select(columns).collect._df)
|
232
|
-
else
|
233
|
-
raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
238
|
-
_from_rbdf(
|
239
|
-
RbDataFrame.read_parquet(
|
240
|
-
source,
|
241
|
-
columns,
|
242
|
-
projection,
|
243
|
-
n_rows,
|
244
|
-
parallel,
|
245
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
246
|
-
low_memory,
|
247
|
-
use_statistics,
|
248
|
-
rechunk
|
249
|
-
)
|
250
|
-
)
|
251
|
-
end
|
252
|
-
|
253
|
-
# @private
|
254
|
-
def self._read_avro(file, columns: nil, n_rows: nil)
|
255
|
-
if Utils.pathlike?(file)
|
256
|
-
file = Utils.normalise_filepath(file)
|
257
|
-
end
|
258
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
259
|
-
_from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
|
260
|
-
end
|
261
|
-
|
262
|
-
# @private
|
263
|
-
def self._read_ipc(
|
264
|
-
file,
|
265
|
-
columns: nil,
|
266
|
-
n_rows: nil,
|
267
|
-
row_count_name: nil,
|
268
|
-
row_count_offset: 0,
|
269
|
-
rechunk: true,
|
270
|
-
memory_map: true
|
271
|
-
)
|
272
|
-
if Utils.pathlike?(file)
|
273
|
-
file = Utils.normalise_filepath(file)
|
274
|
-
end
|
275
|
-
if columns.is_a?(::String)
|
276
|
-
columns = [columns]
|
277
|
-
end
|
278
|
-
|
279
|
-
if file.is_a?(::String) && file.include?("*")
|
280
|
-
raise Todo
|
281
|
-
end
|
282
|
-
|
283
|
-
projection, columns = Utils.handle_projection_columns(columns)
|
284
|
-
_from_rbdf(
|
285
|
-
RbDataFrame.read_ipc(
|
286
|
-
file,
|
287
|
-
columns,
|
288
|
-
projection,
|
289
|
-
n_rows,
|
290
|
-
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
291
|
-
memory_map
|
292
|
-
)
|
293
|
-
)
|
294
|
-
end
|
295
|
-
|
296
|
-
# @private
|
297
|
-
def self._read_json(file)
|
298
|
-
if Utils.pathlike?(file)
|
299
|
-
file = Utils.normalise_filepath(file)
|
300
|
-
end
|
301
|
-
|
302
|
-
_from_rbdf(RbDataFrame.read_json(file))
|
303
|
-
end
|
304
|
-
|
305
|
-
# @private
|
306
|
-
def self._read_ndjson(file)
|
307
|
-
if Utils.pathlike?(file)
|
308
|
-
file = Utils.normalise_filepath(file)
|
309
|
-
end
|
310
|
-
|
311
|
-
_from_rbdf(RbDataFrame.read_ndjson(file))
|
312
|
-
end
|
313
|
-
|
314
49
|
# Get the shape of the DataFrame.
|
315
50
|
#
|
316
51
|
# @return [Array]
|
@@ -419,6 +154,13 @@ module Polars
|
|
419
154
|
_df.dtypes
|
420
155
|
end
|
421
156
|
|
157
|
+
# Get flags that are set on the columns of this DataFrame.
|
158
|
+
#
|
159
|
+
# @return [Hash]
|
160
|
+
def flags
|
161
|
+
columns.to_h { |name| [name, self[name].flags] }
|
162
|
+
end
|
163
|
+
|
422
164
|
# Get the schema.
|
423
165
|
#
|
424
166
|
# @return [Hash]
|
@@ -845,7 +587,7 @@ module Polars
|
|
845
587
|
row_oriented: false
|
846
588
|
)
|
847
589
|
if Utils.pathlike?(file)
|
848
|
-
file = Utils.
|
590
|
+
file = Utils.normalize_filepath(file)
|
849
591
|
end
|
850
592
|
to_string_io = !file.nil? && file.is_a?(StringIO)
|
851
593
|
if file.nil? || to_string_io
|
@@ -884,7 +626,7 @@ module Polars
|
|
884
626
|
# # => "{\"foo\":1,\"bar\":6}\n{\"foo\":2,\"bar\":7}\n{\"foo\":3,\"bar\":8}\n"
|
885
627
|
def write_ndjson(file = nil)
|
886
628
|
if Utils.pathlike?(file)
|
887
|
-
file = Utils.
|
629
|
+
file = Utils.normalize_filepath(file)
|
888
630
|
end
|
889
631
|
to_string_io = !file.nil? && file.is_a?(StringIO)
|
890
632
|
if file.nil? || to_string_io
|
@@ -991,7 +733,7 @@ module Polars
|
|
991
733
|
end
|
992
734
|
|
993
735
|
if Utils.pathlike?(file)
|
994
|
-
file = Utils.
|
736
|
+
file = Utils.normalize_filepath(file)
|
995
737
|
end
|
996
738
|
|
997
739
|
_df.write_csv(
|
@@ -1029,7 +771,7 @@ module Polars
|
|
1029
771
|
compression = "uncompressed"
|
1030
772
|
end
|
1031
773
|
if Utils.pathlike?(file)
|
1032
|
-
file = Utils.
|
774
|
+
file = Utils.normalize_filepath(file)
|
1033
775
|
end
|
1034
776
|
|
1035
777
|
_df.write_avro(file, compression)
|
@@ -1050,7 +792,7 @@ module Polars
|
|
1050
792
|
file.set_encoding(Encoding::BINARY)
|
1051
793
|
end
|
1052
794
|
if Utils.pathlike?(file)
|
1053
|
-
file = Utils.
|
795
|
+
file = Utils.normalize_filepath(file)
|
1054
796
|
end
|
1055
797
|
|
1056
798
|
if compression.nil?
|
@@ -1061,6 +803,47 @@ module Polars
|
|
1061
803
|
return_bytes ? file.string : nil
|
1062
804
|
end
|
1063
805
|
|
806
|
+
# Write to Arrow IPC record batch stream.
|
807
|
+
#
|
808
|
+
# See "Streaming format" in https://arrow.apache.org/docs/python/ipc.html.
|
809
|
+
#
|
810
|
+
# @param file [Object]
|
811
|
+
# Path or writable file-like object to which the IPC record batch data will
|
812
|
+
# be written. If set to `None`, the output is returned as a BytesIO object.
|
813
|
+
# @param compression ['uncompressed', 'lz4', 'zstd']
|
814
|
+
# Compression method. Defaults to "uncompressed".
|
815
|
+
#
|
816
|
+
# @return [Object]
|
817
|
+
#
|
818
|
+
# @example
|
819
|
+
# df = Polars::DataFrame.new(
|
820
|
+
# {
|
821
|
+
# "foo" => [1, 2, 3, 4, 5],
|
822
|
+
# "bar" => [6, 7, 8, 9, 10],
|
823
|
+
# "ham" => ["a", "b", "c", "d", "e"]
|
824
|
+
# }
|
825
|
+
# )
|
826
|
+
# df.write_ipc_stream("new_file.arrow")
|
827
|
+
def write_ipc_stream(
|
828
|
+
file,
|
829
|
+
compression: "uncompressed"
|
830
|
+
)
|
831
|
+
return_bytes = file.nil?
|
832
|
+
if return_bytes
|
833
|
+
file = StringIO.new
|
834
|
+
file.set_encoding(Encoding::BINARY)
|
835
|
+
elsif Utils.pathlike?(file)
|
836
|
+
file = Utils.normalize_filepath(file)
|
837
|
+
end
|
838
|
+
|
839
|
+
if compression.nil?
|
840
|
+
compression = "uncompressed"
|
841
|
+
end
|
842
|
+
|
843
|
+
_df.write_ipc_stream(file, compression)
|
844
|
+
return_bytes ? file.string : nil
|
845
|
+
end
|
846
|
+
|
1064
847
|
# Write to Apache Parquet file.
|
1065
848
|
#
|
1066
849
|
# @param file [String, Pathname, StringIO]
|
@@ -1097,7 +880,7 @@ module Polars
|
|
1097
880
|
compression = "uncompressed"
|
1098
881
|
end
|
1099
882
|
if Utils.pathlike?(file)
|
1100
|
-
file = Utils.
|
883
|
+
file = Utils.normalize_filepath(file)
|
1101
884
|
end
|
1102
885
|
|
1103
886
|
_df.write_parquet(
|
@@ -1773,10 +1556,7 @@ module Polars
|
|
1773
1556
|
# # │ 3 ┆ 8 ┆ c │
|
1774
1557
|
# # └─────┴─────┴─────┘
|
1775
1558
|
def drop_nulls(subset: nil)
|
1776
|
-
|
1777
|
-
subset = [subset]
|
1778
|
-
end
|
1779
|
-
_from_rbdf(_df.drop_nulls(subset))
|
1559
|
+
lazy.drop_nulls(subset: subset).collect(_eager: true)
|
1780
1560
|
end
|
1781
1561
|
|
1782
1562
|
# Offers a structured way to apply a sequence of user-defined functions (UDFs).
|
@@ -1838,16 +1618,16 @@ module Polars
|
|
1838
1618
|
# df.with_row_index
|
1839
1619
|
# # =>
|
1840
1620
|
# # shape: (3, 3)
|
1841
|
-
# #
|
1842
|
-
# # │
|
1843
|
-
# # │ ---
|
1844
|
-
# # │ u32
|
1845
|
-
# #
|
1846
|
-
# # │ 0
|
1847
|
-
# # │ 1
|
1848
|
-
# # │ 2
|
1849
|
-
# #
|
1850
|
-
def with_row_index(name: "
|
1621
|
+
# # ┌───────┬─────┬─────┐
|
1622
|
+
# # │ index ┆ a ┆ b │
|
1623
|
+
# # │ --- ┆ --- ┆ --- │
|
1624
|
+
# # │ u32 ┆ i64 ┆ i64 │
|
1625
|
+
# # ╞═══════╪═════╪═════╡
|
1626
|
+
# # │ 0 ┆ 1 ┆ 2 │
|
1627
|
+
# # │ 1 ┆ 3 ┆ 4 │
|
1628
|
+
# # │ 2 ┆ 5 ┆ 6 │
|
1629
|
+
# # └───────┴─────┴─────┘
|
1630
|
+
def with_row_index(name: "index", offset: 0)
|
1851
1631
|
_from_rbdf(_df.with_row_index(name, offset))
|
1852
1632
|
end
|
1853
1633
|
alias_method :with_row_count, :with_row_index
|
@@ -2136,16 +1916,16 @@ module Polars
|
|
2136
1916
|
# )
|
2137
1917
|
# # =>
|
2138
1918
|
# # shape: (4, 3)
|
2139
|
-
# #
|
2140
|
-
# # │ time ┆ time_count ┆ time_agg_list
|
2141
|
-
# # │ --- ┆ --- ┆ ---
|
2142
|
-
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]]
|
2143
|
-
# #
|
2144
|
-
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12
|
2145
|
-
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12
|
2146
|
-
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12
|
2147
|
-
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00]
|
2148
|
-
# #
|
1919
|
+
# # ┌─────────────────────┬────────────┬─────────────────────────────────┐
|
1920
|
+
# # │ time ┆ time_count ┆ time_agg_list │
|
1921
|
+
# # │ --- ┆ --- ┆ --- │
|
1922
|
+
# # │ datetime[μs] ┆ u32 ┆ list[datetime[μs]] │
|
1923
|
+
# # ╞═════════════════════╪════════════╪═════════════════════════════════╡
|
1924
|
+
# # │ 2021-12-16 00:00:00 ┆ 2 ┆ [2021-12-16 00:00:00, 2021-12-… │
|
1925
|
+
# # │ 2021-12-16 01:00:00 ┆ 2 ┆ [2021-12-16 01:00:00, 2021-12-… │
|
1926
|
+
# # │ 2021-12-16 02:00:00 ┆ 2 ┆ [2021-12-16 02:00:00, 2021-12-… │
|
1927
|
+
# # │ 2021-12-16 03:00:00 ┆ 1 ┆ [2021-12-16 03:00:00] │
|
1928
|
+
# # └─────────────────────┴────────────┴─────────────────────────────────┘
|
2149
1929
|
#
|
2150
1930
|
# @example When closed="both" the time values at the window boundaries belong to 2 groups.
|
2151
1931
|
# df.group_by_dynamic("time", every: "1h", closed: "both").agg(
|
@@ -2620,7 +2400,7 @@ module Polars
|
|
2620
2400
|
# df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
|
2621
2401
|
#
|
2622
2402
|
# @example Return a DataFrame by mapping each row to a tuple:
|
2623
|
-
# df.
|
2403
|
+
# df.map_rows { |t| [t[0] * 2, t[1] * 3] }
|
2624
2404
|
# # =>
|
2625
2405
|
# # shape: (3, 2)
|
2626
2406
|
# # ┌──────────┬──────────┐
|
@@ -2634,7 +2414,7 @@ module Polars
|
|
2634
2414
|
# # └──────────┴──────────┘
|
2635
2415
|
#
|
2636
2416
|
# @example Return a Series by mapping each row to a scalar:
|
2637
|
-
# df.
|
2417
|
+
# df.map_rows { |t| t[0] * 2 + t[1] }
|
2638
2418
|
# # =>
|
2639
2419
|
# # shape: (3, 1)
|
2640
2420
|
# # ┌───────┐
|
@@ -2646,14 +2426,15 @@ module Polars
|
|
2646
2426
|
# # │ 9 │
|
2647
2427
|
# # │ 14 │
|
2648
2428
|
# # └───────┘
|
2649
|
-
def
|
2650
|
-
out, is_df = _df.
|
2429
|
+
def map_rows(return_dtype: nil, inference_size: 256, &f)
|
2430
|
+
out, is_df = _df.map_rows(f, return_dtype, inference_size)
|
2651
2431
|
if is_df
|
2652
2432
|
_from_rbdf(out)
|
2653
2433
|
else
|
2654
2434
|
_from_rbdf(Utils.wrap_s(out).to_frame._df)
|
2655
2435
|
end
|
2656
2436
|
end
|
2437
|
+
alias_method :apply, :map_rows
|
2657
2438
|
|
2658
2439
|
# Return a new DataFrame with the column added or replaced.
|
2659
2440
|
#
|
@@ -3774,7 +3555,7 @@ module Polars
|
|
3774
3555
|
# # ┌─────────┐
|
3775
3556
|
# # │ literal │
|
3776
3557
|
# # │ --- │
|
3777
|
-
# # │
|
3558
|
+
# # │ i32 │
|
3778
3559
|
# # ╞═════════╡
|
3779
3560
|
# # │ 0 │
|
3780
3561
|
# # │ 0 │
|
@@ -5255,7 +5036,7 @@ module Polars
|
|
5255
5036
|
elsif data[0].is_a?(Hash)
|
5256
5037
|
column_names, dtypes = _unpack_schema(columns)
|
5257
5038
|
schema_overrides = dtypes ? include_unknowns(dtypes, column_names) : nil
|
5258
|
-
rbdf = RbDataFrame.
|
5039
|
+
rbdf = RbDataFrame.from_hashes(data, schema, schema_overrides, false, infer_schema_length)
|
5259
5040
|
if column_names
|
5260
5041
|
rbdf = _post_apply_columns(rbdf, column_names)
|
5261
5042
|
end
|
@@ -5289,7 +5070,7 @@ module Polars
|
|
5289
5070
|
if unpack_nested
|
5290
5071
|
raise Todo
|
5291
5072
|
else
|
5292
|
-
rbdf = RbDataFrame.
|
5073
|
+
rbdf = RbDataFrame.from_rows(
|
5293
5074
|
data,
|
5294
5075
|
infer_schema_length,
|
5295
5076
|
local_schema_override.any? ? local_schema_override : nil
|
@@ -66,6 +66,8 @@ module Polars
|
|
66
66
|
if !out.nil?
|
67
67
|
if s.dtype == Date
|
68
68
|
return Utils._to_ruby_date(out.to_i)
|
69
|
+
elsif [Datetime, Duration, Time].include?(s.dtype)
|
70
|
+
return out
|
69
71
|
else
|
70
72
|
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
71
73
|
end
|
@@ -93,10 +95,12 @@ module Polars
|
|
93
95
|
# # => 2001-01-02 00:00:00 UTC
|
94
96
|
def mean
|
95
97
|
s = Utils.wrap_s(_s)
|
96
|
-
out = s.mean
|
98
|
+
out = s.mean
|
97
99
|
if !out.nil?
|
98
100
|
if s.dtype == Date
|
99
101
|
return Utils._to_ruby_date(out.to_i)
|
102
|
+
elsif [Datetime, Duration, Time].include?(s.dtype)
|
103
|
+
return out
|
100
104
|
else
|
101
105
|
return Utils._to_ruby_datetime(out.to_i, s.time_unit)
|
102
106
|
end
|
@@ -32,7 +32,7 @@ module Polars
|
|
32
32
|
@start_by = start_by
|
33
33
|
end
|
34
34
|
|
35
|
-
def agg(aggs)
|
35
|
+
def agg(*aggs, **named_aggs)
|
36
36
|
@df.lazy
|
37
37
|
.group_by_dynamic(
|
38
38
|
@time_column,
|
@@ -45,7 +45,7 @@ module Polars
|
|
45
45
|
by: @by,
|
46
46
|
start_by: @start_by
|
47
47
|
)
|
48
|
-
.agg(aggs)
|
48
|
+
.agg(*aggs, **named_aggs)
|
49
49
|
.collect(no_optimization: true, string_cache: false)
|
50
50
|
end
|
51
51
|
end
|
data/lib/polars/exceptions.rb
CHANGED
@@ -3,6 +3,10 @@ module Polars
|
|
3
3
|
# Base class for all Polars errors.
|
4
4
|
class Error < StandardError; end
|
5
5
|
|
6
|
+
# @private
|
7
|
+
# Exception raised when an operation is not allowed (or possible) against a given object or data structure.
|
8
|
+
class InvalidOperationError < Error; end
|
9
|
+
|
6
10
|
# @private
|
7
11
|
# Exception raised when an unsupported testing assert is made.
|
8
12
|
class InvalidAssert < Error; end
|