polars-df 0.6.0-x86_64-linux → 0.7.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -359,7 +359,8 @@ Supported types are:
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
360
  - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
- - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
363
364
 
364
365
  Get column types
365
366
 
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
402
403
  Group data
403
404
 
404
405
  ```ruby
405
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
406
407
  ```
407
408
 
408
409
  Stacked columns or bars
409
410
 
410
411
  ```ruby
411
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
412
413
  ```
413
414
 
414
415
  ## History
Binary file
Binary file
Binary file
@@ -0,0 +1,530 @@
1
+ module Polars
2
+ # Configure polars; offers options for table formatting and more.
3
+ class Config
4
+ POLARS_CFG_ENV_VARS = [
5
+ "POLARS_ACTIVATE_DECIMAL",
6
+ "POLARS_AUTO_STRUCTIFY",
7
+ "POLARS_FMT_MAX_COLS",
8
+ "POLARS_FMT_MAX_ROWS",
9
+ "POLARS_FMT_STR_LEN",
10
+ "POLARS_FMT_TABLE_CELL_ALIGNMENT",
11
+ "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW",
12
+ "POLARS_FMT_TABLE_FORMATTING",
13
+ "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES",
14
+ "POLARS_FMT_TABLE_HIDE_COLUMN_NAMES",
15
+ "POLARS_FMT_TABLE_HIDE_COLUMN_SEPARATOR",
16
+ "POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION",
17
+ "POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE",
18
+ "POLARS_FMT_TABLE_ROUNDED_CORNERS",
19
+ "POLARS_STREAMING_CHUNK_SIZE",
20
+ "POLARS_TABLE_WIDTH",
21
+ "POLARS_VERBOSE"
22
+ ]
23
+
24
+ POLARS_CFG_DIRECT_VARS = {"set_fmt_float" => Polars.method(:_get_float_fmt)}
25
+
26
+ # Initialize a Config object instance for context manager usage.
27
+ def initialize(restore_defaults: false, **options)
28
+ @original_state = self.class.save
29
+
30
+ if restore_defaults
31
+ self.class.restore_defaults
32
+ end
33
+
34
+ options.each do |opt, value|
35
+ opt = "set_#{opt}" unless opt.to_s.start_with?("set_")
36
+ if !self.class.respond_to?(opt)
37
+ raise ArgumentError, "Config has no #{opt} option"
38
+ end
39
+ self.class.public_send(opt, value)
40
+ end
41
+
42
+ yield self.class
43
+
44
+ self.class.restore_defaults.load(@original_state)
45
+ @original_state = ""
46
+ end
47
+
48
+ # Load and set previously saved (or shared) Config options from json/file.
49
+ #
50
+ # @return [Config]
51
+ def self.load(cfg)
52
+ options = JSON.parse(cfg)
53
+ ENV.merge!(options["environment"])
54
+ options.fetch("fetch", {}).each do |cfg_methodname, value|
55
+ if POLARS_CFG_DIRECT_VARS.key?(cfg_methodname)
56
+ public_send(cfg_methodname, value)
57
+ end
58
+ end
59
+ self
60
+ end
61
+
62
+ # Reset all polars Config settings to their default state.
63
+ #
64
+ # @return [Config]
65
+ def self.restore_defaults
66
+ POLARS_CFG_ENV_VARS.each do |var|
67
+ ENV.delete(var)
68
+ end
69
+ set_fmt_float
70
+ self
71
+ end
72
+
73
+ # Save the current set of Config options as a json string or file.
74
+ #
75
+ # @return [Config]
76
+ def self.save
77
+ environment_vars = POLARS_CFG_ENV_VARS.sort.select { |k| ENV.key?(k) }.to_h { |k| [k, ENV[k]] }
78
+ direct_vars = POLARS_CFG_DIRECT_VARS.to_h { |cfg_methodname, get_value| [cfg_methodname, get_value.call] }
79
+ options = JSON.generate({environment: environment_vars, direct: direct_vars})
80
+ options
81
+ end
82
+
83
+ # Show the current state of all Config variables as a dict.
84
+ #
85
+ # @param if_set [Boolean]
86
+ # by default this will show the state of all `Config` environment variables.
87
+ # change this to `true` to restrict the returned dictionary to include only
88
+ # those that have been set to a specific value.
89
+ # @param env_only [Boolean]
90
+ # include only Config environment variables in the output; some options (such
91
+ # as "set_fmt_float") are set directly, not via an environment variable.
92
+ #
93
+ # @return [Object]
94
+ def self.state(if_set: false, env_only: false)
95
+ config_state = POLARS_CFG_ENV_VARS.sort
96
+ .select { |var| !if_set || !ENV[var].nil? }
97
+ .to_h { |var| [var, ENV[var]] }
98
+ if !env_only
99
+ POLARS_CFG_DIRECT_VARS.each do |cfg_methodname, get_value|
100
+ config_state[cfg_methodname] = get_value.call
101
+ end
102
+ end
103
+
104
+ config_state
105
+ end
106
+
107
+ # Activate `Decimal` data types.
108
+ #
109
+ # This is a temporary setting that will be removed later once the
110
+ # `Decimal` type stabilize. This will happens without it being
111
+ # considered a breaking change.
112
+ #
113
+ # Currently, `Decimal` types are in an alpha state.
114
+ #
115
+ # @return [Config]
116
+ def self.activate_decimals(active = true)
117
+ if !active
118
+ ENV.delete("POLARS_ACTIVATE_DECIMAL")
119
+ else
120
+ ENV["POLARS_ACTIVATE_DECIMAL"] = "1"
121
+ end
122
+ self
123
+ end
124
+
125
+ # Use ASCII characters to display table outlines (set False to revert to UTF8).
126
+ #
127
+ # @return [Config]
128
+ #
129
+ # @example
130
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
131
+ # Polars::Config.new(ascii_tables: true) do
132
+ # p df
133
+ # end
134
+ # # =>
135
+ # # shape: (3, 2)
136
+ # # +-----+-------+
137
+ # # | abc | xyz |
138
+ # # | --- | --- |
139
+ # # | f64 | bool |
140
+ # # +=============+
141
+ # # | 1.0 | true |
142
+ # # | 2.5 | false |
143
+ # # | 5.0 | true |
144
+ # # +-----+-------+
145
+ def self.set_ascii_tables(active = true)
146
+ fmt = active ? "ASCII_FULL_CONDENSED" : "UTF8_FULL_CONDENSED"
147
+ ENV["POLARS_FMT_TABLE_FORMATTING"] = fmt
148
+ self
149
+ end
150
+
151
+ # Allow multi-output expressions to be automatically turned into Structs.
152
+ #
153
+ # @return [Config]
154
+ def self.set_auto_structify(active = true)
155
+ ENV["POLARS_AUTO_STRUCTIFY"] = active ? "1" : "0"
156
+ self
157
+ end
158
+
159
+ # Control how floating point values are displayed.
160
+ #
161
+ # @param fmt ["mixed", "full"]
162
+ # How to format floating point numbers
163
+ #
164
+ # @return [Config]
165
+ def self.set_fmt_float(fmt = "mixed")
166
+ Polars._set_float_fmt(fmt)
167
+ self
168
+ end
169
+
170
+ # Set the number of characters used to display string values.
171
+ #
172
+ # @param n [Integer]
173
+ # number of characters to display
174
+ #
175
+ # @return [Config]
176
+ #
177
+ # @example
178
+ # df = Polars::DataFrame.new(
179
+ # {
180
+ # "txt" => [
181
+ # "Play it, Sam. Play 'As Time Goes By'.",
182
+ # "This is the beginning of a beautiful friendship.",
183
+ # ]
184
+ # }
185
+ # )
186
+ # Polars::Config.new(fmt_str_lengths: 50) do
187
+ # p df
188
+ # end
189
+ # # =>
190
+ # # shape: (2, 1)
191
+ # # ┌──────────────────────────────────────────────────┐
192
+ # # │ txt │
193
+ # # │ --- │
194
+ # # │ str │
195
+ # # ╞══════════════════════════════════════════════════╡
196
+ # # │ Play it, Sam. Play 'As Time Goes By'. │
197
+ # # │ This is the beginning of a beautiful friendship. │
198
+ # # └──────────────────────────────────────────────────┘
199
+ def self.set_fmt_str_lengths(n)
200
+ if n <= 0
201
+ raise ArgumentError, "number of characters must be > 0"
202
+ end
203
+
204
+ ENV["POLARS_FMT_STR_LEN"] = n.to_s
205
+ self
206
+ end
207
+
208
+ # Overwrite chunk size used in `streaming` engine.
209
+ #
210
+ # By default, the chunk size is determined by the schema
211
+ # and size of the thread pool. For some datasets (esp.
212
+ # when you have large string elements) this can be too
213
+ # optimistic and lead to Out of Memory errors.
214
+ #
215
+ # @param size [Integer]
216
+ # Number of rows per chunk. Every thread will process chunks
217
+ # of this size.
218
+ #
219
+ # @return [Config]
220
+ def self.set_streaming_chunk_size(size)
221
+ if size < 1
222
+ raise ArgumentError, "number of rows per chunk must be >= 1"
223
+ end
224
+
225
+ ENV["POLARS_STREAMING_CHUNK_SIZE"] = size.to_s
226
+ self
227
+ end
228
+
229
+ # Set table cell alignment.
230
+ #
231
+ # @param format [String]
232
+ # * "LEFT": left aligned
233
+ # * "CENTER": center aligned
234
+ # * "RIGHT": right aligned
235
+ #
236
+ # @return [Config]
237
+ #
238
+ # @example
239
+ # df = Polars::DataFrame.new(
240
+ # {"column_abc" => [1.0, 2.5, 5.0], "column_xyz" => [true, false, true]}
241
+ # )
242
+ # Polars::Config.new(tbl_cell_alignment: "RIGHT") do
243
+ # p df
244
+ # end
245
+ # # =>
246
+ # # shape: (3, 2)
247
+ # # ┌────────────┬────────────┐
248
+ # # │ column_abc ┆ column_xyz │
249
+ # # │ --- ┆ --- │
250
+ # # │ f64 ┆ bool │
251
+ # # ╞════════════╪════════════╡
252
+ # # │ 1.0 ┆ true │
253
+ # # │ 2.5 ┆ false │
254
+ # # │ 5.0 ┆ true │
255
+ # # └────────────┴────────────┘
256
+ def self.set_tbl_cell_alignment(format)
257
+ ENV["POLARS_FMT_TABLE_CELL_ALIGNMENT"] = format
258
+ self
259
+ end
260
+
261
+ # Set the number of columns that are visible when displaying tables.
262
+ #
263
+ # @param n [Integer]
264
+ # number of columns to display; if `n < 0` (eg: -1), display all columns.
265
+ #
266
+ # @return [Config]
267
+ #
268
+ # @example Set number of displayed columns to a low value:
269
+ # Polars::Config.new do |cfg|
270
+ # cfg.set_tbl_cols(5)
271
+ # df = Polars::DataFrame.new(100.times.to_h { |i| [i.to_s, [i]] })
272
+ # p df
273
+ # end
274
+ # # =>
275
+ # # shape: (1, 100)
276
+ # # ┌─────┬─────┬─────┬───┬─────┬─────┐
277
+ # # │ 0 ┆ 1 ┆ 2 ┆ … ┆ 98 ┆ 99 │
278
+ # # │ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- │
279
+ # # │ i64 ┆ i64 ┆ i64 ┆ ┆ i64 ┆ i64 │
280
+ # # ╞═════╪═════╪═════╪═══╪═════╪═════╡
281
+ # # │ 0 ┆ 1 ┆ 2 ┆ … ┆ 98 ┆ 99 │
282
+ # # └─────┴─────┴─────┴───┴─────┴─────┘
283
+ def self.set_tbl_cols(n)
284
+ ENV["POLARS_FMT_MAX_COLS"] = n.to_s
285
+ self
286
+ end
287
+
288
+ # Moves the data type inline with the column name (to the right, in parentheses).
289
+ #
290
+ # @return [Config]
291
+ #
292
+ # @example
293
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
294
+ # Polars::Config.new(tbl_column_data_type_inline: true) do
295
+ # p df
296
+ # end
297
+ # # =>
298
+ # # shape: (3, 2)
299
+ # # ┌───────────┬────────────┐
300
+ # # │ abc (f64) ┆ xyz (bool) │
301
+ # # ╞═══════════╪════════════╡
302
+ # # │ 1.0 ┆ true │
303
+ # # │ 2.5 ┆ false │
304
+ # # │ 5.0 ┆ true │
305
+ # # └───────────┴────────────┘
306
+ def self.set_tbl_column_data_type_inline(active = true)
307
+ ENV["POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE"] = active ? "1" : "0"
308
+ self
309
+ end
310
+
311
+ # Print the dataframe shape below the dataframe when displaying tables.
312
+ #
313
+ # @return [Config]
314
+ #
315
+ # @example
316
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
317
+ # Polars::Config.new(tbl_dataframe_shape_below: true) do
318
+ # p df
319
+ # end
320
+ # # =>
321
+ # # ┌─────┬───────┐
322
+ # # │ abc ┆ xyz │
323
+ # # │ --- ┆ --- │
324
+ # # │ f64 ┆ bool │
325
+ # # ╞═════╪═══════╡
326
+ # # │ 1.0 ┆ true │
327
+ # # │ 2.5 ┆ false │
328
+ # # │ 5.0 ┆ true │
329
+ # # └─────┴───────┘
330
+ # # shape: (3, 2)
331
+ def self.set_tbl_dataframe_shape_below(active = true)
332
+ ENV["POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW"] = active ? "1" : "0"
333
+ self
334
+ end
335
+
336
+ # Set table formatting style.
337
+ #
338
+ # @param format [String]
339
+ # * "ASCII_FULL": ASCII, with all borders and lines, including row dividers.
340
+ # * "ASCII_FULL_CONDENSED": Same as ASCII_FULL, but with dense row spacing.
341
+ # * "ASCII_NO_BORDERS": ASCII, no borders.
342
+ # * "ASCII_BORDERS_ONLY": ASCII, borders only.
343
+ # * "ASCII_BORDERS_ONLY_CONDENSED": ASCII, borders only, dense row spacing.
344
+ # * "ASCII_HORIZONTAL_ONLY": ASCII, horizontal lines only.
345
+ # * "ASCII_MARKDOWN": ASCII, Markdown compatible.
346
+ # * "UTF8_FULL": UTF8, with all borders and lines, including row dividers.
347
+ # * "UTF8_FULL_CONDENSED": Same as UTF8_FULL, but with dense row spacing.
348
+ # * "UTF8_NO_BORDERS": UTF8, no borders.
349
+ # * "UTF8_BORDERS_ONLY": UTF8, borders only.
350
+ # * "UTF8_HORIZONTAL_ONLY": UTF8, horizontal lines only.
351
+ # * "NOTHING": No borders or other lines.
352
+ # @param rounded_corners [Boolean]
353
+ # apply rounded corners to UTF8-styled tables (no-op for ASCII formats).
354
+ #
355
+ # @note
356
+ # The UTF8 styles all use one or more of the semigraphic box-drawing characters
357
+ # found in the Unicode Box Drawing block, which are not ASCII compatible:
358
+ # https://en.wikipedia.org/wiki/Box-drawing_character#Box_Drawing
359
+ #
360
+ # @return [Config]
361
+ #
362
+ # @example
363
+ # df = Polars::DataFrame.new(
364
+ # {"abc" => [-2.5, 5.0], "mno" => ["hello", "world"], "xyz" => [true, false]}
365
+ # )
366
+ # Polars::Config.new(
367
+ # tbl_formatting: "ASCII_MARKDOWN",
368
+ # tbl_hide_column_data_types: true,
369
+ # tbl_hide_dataframe_shape: true
370
+ # ) do
371
+ # p df
372
+ # end
373
+ # # =>
374
+ # # | abc | mno | xyz |
375
+ # # |------|-------|-------|
376
+ # # | -2.5 | hello | true |
377
+ # # | 5.0 | world | false |
378
+ def self.set_tbl_formatting(format = nil, rounded_corners: false)
379
+ if format
380
+ ENV["POLARS_FMT_TABLE_FORMATTING"] = format
381
+ end
382
+ ENV["POLARS_FMT_TABLE_ROUNDED_CORNERS"] = rounded_corners ? "1" : "0"
383
+ self
384
+ end
385
+
386
+ # Hide table column data types (i64, f64, str etc.).
387
+ #
388
+ # @return [Config]
389
+ #
390
+ # @example
391
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
392
+ # Polars::Config.new(tbl_hide_column_data_types: true) do
393
+ # p df
394
+ # end
395
+ # # =>
396
+ # # shape: (3, 2)
397
+ # # ┌─────┬───────┐
398
+ # # │ abc ┆ xyz │
399
+ # # ╞═════╪═══════╡
400
+ # # │ 1.0 ┆ true │
401
+ # # │ 2.5 ┆ false │
402
+ # # │ 5.0 ┆ true │
403
+ # # └─────┴───────┘
404
+ def self.set_tbl_hide_column_data_types(active = true)
405
+ ENV["POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES"] = active ? "1" : "0"
406
+ self
407
+ end
408
+
409
+ # Hide table column names.
410
+ #
411
+ # @return [Config]
412
+ #
413
+ # @example
414
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
415
+ # Polars::Config.new(tbl_hide_column_names: true) do
416
+ # p df
417
+ # end
418
+ # # =>
419
+ # # shape: (3, 2)
420
+ # # ┌─────┬───────┐
421
+ # # │ f64 ┆ bool │
422
+ # # ╞═════╪═══════╡
423
+ # # │ 1.0 ┆ true │
424
+ # # │ 2.5 ┆ false │
425
+ # # │ 5.0 ┆ true │
426
+ # # └─────┴───────┘
427
+ def self.set_tbl_hide_column_names(active = true)
428
+ ENV["POLARS_FMT_TABLE_HIDE_COLUMN_NAMES"] = active ? "1" : "0"
429
+ self
430
+ end
431
+
432
+ # Hide the '---' separator between the column names and column types.
433
+ #
434
+ # @return [Config]
435
+ #
436
+ # @example
437
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
438
+ # Polars::Config.new(tbl_hide_dtype_separator: true) do
439
+ # p df
440
+ # end
441
+ # # =>
442
+ # # shape: (3, 2)
443
+ # # ┌─────┬───────┐
444
+ # # │ abc ┆ xyz │
445
+ # # │ f64 ┆ bool │
446
+ # # ╞═════╪═══════╡
447
+ # # │ 1.0 ┆ true │
448
+ # # │ 2.5 ┆ false │
449
+ # # │ 5.0 ┆ true │
450
+ # # └─────┴───────┘
451
+ def self.set_tbl_hide_dtype_separator(active = true)
452
+ ENV["POLARS_FMT_TABLE_HIDE_COLUMN_SEPARATOR"] = active ? "1" : "0"
453
+ self
454
+ end
455
+
456
+ # Hide the shape information of the dataframe when displaying tables.
457
+ #
458
+ # @return [Config]
459
+ #
460
+ # @example
461
+ # df = Polars::DataFrame.new({"abc" => [1.0, 2.5, 5.0], "xyz" => [true, false, true]})
462
+ # Polars::Config.new(tbl_hide_dataframe_shape: true) do
463
+ # p df
464
+ # end
465
+ # # =>
466
+ # # ┌─────┬───────┐
467
+ # # │ abc ┆ xyz │
468
+ # # │ --- ┆ --- │
469
+ # # │ f64 ┆ bool │
470
+ # # ╞═════╪═══════╡
471
+ # # │ 1.0 ┆ true │
472
+ # # │ 2.5 ┆ false │
473
+ # # │ 5.0 ┆ true │
474
+ # # └─────┴───────┘
475
+ def self.set_tbl_hide_dataframe_shape(active = true)
476
+ ENV["POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION"] = active ? "1" : "0"
477
+ self
478
+ end
479
+
480
+ # Set the max number of rows used to draw the table (both Dataframe and Series).
481
+ #
482
+ # @param n [Integer]
483
+ # number of rows to display; if `n < 0` (eg: -1), display all
484
+ # rows (DataFrame) and all elements (Series).
485
+ #
486
+ # @return [Config]
487
+ #
488
+ # @example
489
+ # df = Polars::DataFrame.new(
490
+ # {"abc" => [1.0, 2.5, 3.5, 5.0], "xyz" => [true, false, true, false]}
491
+ # )
492
+ # Polars::Config.new(tbl_rows: 2) do
493
+ # p df
494
+ # end
495
+ # # =>
496
+ # # shape: (4, 2)
497
+ # # ┌─────┬───────┐
498
+ # # │ abc ┆ xyz │
499
+ # # │ --- ┆ --- │
500
+ # # │ f64 ┆ bool │
501
+ # # ╞═════╪═══════╡
502
+ # # │ 1.0 ┆ true │
503
+ # # │ … ┆ … │
504
+ # # │ 5.0 ┆ false │
505
+ # # └─────┴───────┘
506
+ def self.set_tbl_rows(n)
507
+ ENV["POLARS_FMT_MAX_ROWS"] = n.to_s
508
+ self
509
+ end
510
+
511
+ # Set the number of characters used to draw the table.
512
+ #
513
+ # @param width [Integer]
514
+ # number of chars
515
+ #
516
+ # @return [Config]
517
+ def self.set_tbl_width_chars(width)
518
+ ENV["POLARS_TABLE_WIDTH"] = width.to_s
519
+ self
520
+ end
521
+
522
+ # Enable additional verbose/debug logging.
523
+ #
524
+ # @return [Config]
525
+ def self.set_verbose(active = true)
526
+ ENV["POLARS_VERBOSE"] = active ? "1" : "0"
527
+ self
528
+ end
529
+ end
530
+ end