polars-df 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +468 -538
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +17 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +121 -93
  9. data/ext/polars/src/dataframe.rs +116 -71
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/datetime.rs +10 -12
  13. data/ext/polars/src/expr/general.rs +68 -284
  14. data/ext/polars/src/expr/list.rs +17 -9
  15. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  16. data/ext/polars/src/expr/name.rs +44 -0
  17. data/ext/polars/src/expr/rolling.rs +196 -0
  18. data/ext/polars/src/expr/string.rs +85 -58
  19. data/ext/polars/src/file.rs +3 -3
  20. data/ext/polars/src/functions/aggregation.rs +35 -0
  21. data/ext/polars/src/functions/eager.rs +7 -31
  22. data/ext/polars/src/functions/io.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +66 -41
  24. data/ext/polars/src/functions/meta.rs +30 -0
  25. data/ext/polars/src/functions/misc.rs +8 -0
  26. data/ext/polars/src/functions/mod.rs +5 -0
  27. data/ext/polars/src/functions/random.rs +6 -0
  28. data/ext/polars/src/functions/range.rs +46 -0
  29. data/ext/polars/src/functions/string_cache.rs +11 -0
  30. data/ext/polars/src/functions/whenthen.rs +7 -7
  31. data/ext/polars/src/lazyframe.rs +47 -42
  32. data/ext/polars/src/lib.rs +156 -72
  33. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  34. data/ext/polars/src/{apply → map}/mod.rs +3 -3
  35. data/ext/polars/src/{apply → map}/series.rs +12 -16
  36. data/ext/polars/src/object.rs +1 -1
  37. data/ext/polars/src/rb_modules.rs +22 -7
  38. data/ext/polars/src/series/construction.rs +4 -4
  39. data/ext/polars/src/series/export.rs +2 -2
  40. data/ext/polars/src/series/set_at_idx.rs +33 -17
  41. data/ext/polars/src/series.rs +7 -27
  42. data/ext/polars/src/sql.rs +46 -0
  43. data/lib/polars/config.rb +530 -0
  44. data/lib/polars/data_frame.rb +115 -82
  45. data/lib/polars/date_time_expr.rb +13 -18
  46. data/lib/polars/date_time_name_space.rb +5 -25
  47. data/lib/polars/dynamic_group_by.rb +2 -2
  48. data/lib/polars/expr.rb +177 -94
  49. data/lib/polars/functions.rb +29 -37
  50. data/lib/polars/group_by.rb +38 -55
  51. data/lib/polars/io.rb +37 -2
  52. data/lib/polars/lazy_frame.rb +93 -66
  53. data/lib/polars/lazy_functions.rb +36 -48
  54. data/lib/polars/lazy_group_by.rb +7 -8
  55. data/lib/polars/list_expr.rb +12 -8
  56. data/lib/polars/list_name_space.rb +2 -2
  57. data/lib/polars/name_expr.rb +198 -0
  58. data/lib/polars/rolling_group_by.rb +2 -2
  59. data/lib/polars/series.rb +26 -13
  60. data/lib/polars/sql_context.rb +194 -0
  61. data/lib/polars/string_expr.rb +114 -60
  62. data/lib/polars/string_name_space.rb +19 -4
  63. data/lib/polars/utils.rb +12 -0
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +3 -0
  66. metadata +18 -7
  67. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml CHANGED
@@ -1,5 +1,6 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
+ resolver = "2"
3
4
 
4
5
  [patch.crates-io]
5
6
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
data/README.md CHANGED
@@ -20,7 +20,7 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -359,7 +359,8 @@ Supported types are:
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
360
  - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
- - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
363
364
 
364
365
  Get column types
365
366
 
@@ -402,13 +403,13 @@ df.plot("a", "b", type: "pie")
402
403
  Group data
403
404
 
404
405
  ```ruby
405
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
406
407
  ```
407
408
 
408
409
  Stacked columns or bars
409
410
 
410
411
  ```ruby
411
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
412
413
  ```
413
414
 
414
415
  ## History
@@ -1,9 +1,10 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.6.0"
3
+ version = "0.7.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.73.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
@@ -11,14 +12,16 @@ crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
14
  ahash = "0.8"
14
- chrono = "=0.4.24"
15
- magnus = "0.5"
16
- polars-core = "0.31.1"
15
+ chrono = "0.4"
16
+ either = "1.8"
17
+ magnus = "0.6"
18
+ polars-core = "=0.35.2"
19
+ polars-parquet = "=0.35.2"
17
20
  serde_json = "1"
18
21
  smartstring = "1"
19
22
 
20
23
  [dependencies.polars]
21
- version = "0.31.1"
24
+ version = "=0.35.2"
22
25
  features = [
23
26
  "abs",
24
27
  "approx_unique",
@@ -27,6 +30,7 @@ features = [
27
30
  "avro",
28
31
  "binary_encoding",
29
32
  "concat_str",
33
+ "cov",
30
34
  "cse",
31
35
  "csv",
32
36
  "cum_agg",
@@ -37,22 +41,23 @@ features = [
37
41
  "diff",
38
42
  "dot_product",
39
43
  "dtype-full",
40
- "dynamic_groupby",
44
+ "dynamic_group_by",
41
45
  "ewma",
42
46
  "extract_jsonpath",
43
47
  "fmt",
44
48
  "horizontal_concat",
45
49
  "interpolate",
46
50
  "ipc",
47
- "is_first",
51
+ "is_first_distinct",
48
52
  "is_in",
53
+ "is_last_distinct",
49
54
  "is_unique",
50
55
  "json",
51
56
  "lazy",
52
57
  "lazy_regex",
53
58
  "list_count",
54
59
  "list_eval",
55
- "list_take",
60
+ "list_gather",
56
61
  "list_to_struct",
57
62
  "log",
58
63
  "meta",
@@ -62,6 +67,7 @@ features = [
62
67
  "parquet",
63
68
  "partition_by",
64
69
  "pct_change",
70
+ "peaks",
65
71
  "performant",
66
72
  "pivot",
67
73
  "product",
@@ -78,9 +84,10 @@ features = [
78
84
  "semi_anti_join",
79
85
  "serde-lazy",
80
86
  "sign",
87
+ "sql",
81
88
  "string_encoding",
82
- "string_from_radix",
83
- "string_justify",
89
+ "string_pad",
90
+ "string_to_integer",
84
91
  "strings",
85
92
  "timezones",
86
93
  "to_dummies",
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, Value};
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -24,31 +24,31 @@ impl RbBatchedCsv {
24
24
  pub fn new(arguments: &[Value]) -> RbResult<Self> {
25
25
  // start arguments
26
26
  // this pattern is needed for more than 16
27
- let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
28
- let chunk_size: usize = arguments[1].try_convert()?;
29
- let has_header: bool = arguments[2].try_convert()?;
30
- let ignore_errors: bool = arguments[3].try_convert()?;
31
- let n_rows: Option<usize> = arguments[4].try_convert()?;
32
- let skip_rows: usize = arguments[5].try_convert()?;
33
- let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
34
- let sep: String = arguments[7].try_convert()?;
35
- let rechunk: bool = arguments[8].try_convert()?;
36
- let columns: Option<Vec<String>> = arguments[9].try_convert()?;
37
- let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
38
- let n_threads: Option<usize> = arguments[11].try_convert()?;
39
- let path: PathBuf = arguments[12].try_convert()?;
40
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
27
+ let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
28
+ let chunk_size = usize::try_convert(arguments[1])?;
29
+ let has_header = bool::try_convert(arguments[2])?;
30
+ let ignore_errors = bool::try_convert(arguments[3])?;
31
+ let n_rows = Option::<usize>::try_convert(arguments[4])?;
32
+ let skip_rows = usize::try_convert(arguments[5])?;
33
+ let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
34
+ let separator = String::try_convert(arguments[7])?;
35
+ let rechunk = bool::try_convert(arguments[8])?;
36
+ let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
37
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
38
+ let n_threads = Option::<usize>::try_convert(arguments[11])?;
39
+ let path = PathBuf::try_convert(arguments[12])?;
40
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
41
41
  // TODO fix
42
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
43
- let low_memory: bool = arguments[15].try_convert()?;
44
- let comment_char: Option<String> = arguments[16].try_convert()?;
45
- let quote_char: Option<String> = arguments[17].try_convert()?;
46
- let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
47
- let try_parse_dates: bool = arguments[19].try_convert()?;
48
- let skip_rows_after_header: usize = arguments[20].try_convert()?;
49
- let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
50
- let sample_size: usize = arguments[22].try_convert()?;
51
- let eol_char: String = arguments[23].try_convert()?;
42
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
43
+ let low_memory = bool::try_convert(arguments[15])?;
44
+ let comment_char = Option::<String>::try_convert(arguments[16])?;
45
+ let quote_char = Option::<String>::try_convert(arguments[17])?;
46
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
47
+ let try_parse_dates = bool::try_convert(arguments[19])?;
48
+ let skip_rows_after_header = usize::try_convert(arguments[20])?;
49
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
50
+ let sample_size = usize::try_convert(arguments[22])?;
51
+ let eol_char = String::try_convert(arguments[23])?;
52
52
  // end arguments
53
53
 
54
54
  let null_values = null_values.map(|w| w.0);
@@ -90,7 +90,7 @@ impl RbBatchedCsv {
90
90
  .infer_schema(infer_schema_length)
91
91
  .has_header(has_header)
92
92
  .with_n_rows(n_rows)
93
- .with_delimiter(sep.as_bytes()[0])
93
+ .with_separator(separator.as_bytes()[0])
94
94
  .with_skip_rows(skip_rows)
95
95
  .with_ignore_errors(ignore_errors)
96
96
  .with_projection(projection)