polars-df 0.5.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +595 -709
  4. data/Cargo.toml +1 -0
  5. data/README.md +11 -9
  6. data/ext/polars/Cargo.toml +18 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +272 -136
  9. data/ext/polars/src/dataframe.rs +135 -94
  10. data/ext/polars/src/error.rs +8 -5
  11. data/ext/polars/src/expr/array.rs +15 -0
  12. data/ext/polars/src/expr/binary.rs +18 -6
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +78 -264
  15. data/ext/polars/src/expr/list.rs +41 -28
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +196 -0
  19. data/ext/polars/src/expr/string.rs +94 -66
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +119 -54
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +46 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +61 -44
  33. data/ext/polars/src/lib.rs +173 -84
  34. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  35. data/ext/polars/src/{apply → map}/mod.rs +10 -6
  36. data/ext/polars/src/{apply → map}/series.rs +12 -16
  37. data/ext/polars/src/object.rs +2 -2
  38. data/ext/polars/src/rb_modules.rs +25 -6
  39. data/ext/polars/src/series/construction.rs +32 -6
  40. data/ext/polars/src/series/export.rs +2 -2
  41. data/ext/polars/src/series/set_at_idx.rs +33 -17
  42. data/ext/polars/src/series.rs +62 -42
  43. data/ext/polars/src/sql.rs +46 -0
  44. data/lib/polars/array_expr.rb +84 -0
  45. data/lib/polars/array_name_space.rb +77 -0
  46. data/lib/polars/batched_csv_reader.rb +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +206 -131
  49. data/lib/polars/data_types.rb +163 -29
  50. data/lib/polars/date_time_expr.rb +13 -18
  51. data/lib/polars/date_time_name_space.rb +22 -28
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +241 -151
  54. data/lib/polars/functions.rb +29 -38
  55. data/lib/polars/group_by.rb +38 -76
  56. data/lib/polars/io.rb +37 -2
  57. data/lib/polars/lazy_frame.rb +174 -95
  58. data/lib/polars/lazy_functions.rb +87 -63
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +40 -36
  61. data/lib/polars/list_name_space.rb +15 -15
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +6 -4
  64. data/lib/polars/series.rb +95 -28
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +249 -69
  67. data/lib/polars/string_name_space.rb +155 -25
  68. data/lib/polars/utils.rb +119 -57
  69. data/lib/polars/version.rb +1 -1
  70. data/lib/polars.rb +6 -0
  71. metadata +21 -7
  72. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/Cargo.toml CHANGED
@@ -1,5 +1,6 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
+ resolver = "2"
3
4
 
4
5
  [patch.crates-io]
5
6
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
data/README.md CHANGED
@@ -20,12 +20,12 @@ This library follows the [Polars Python API](https://pola-rs.github.io/polars/py
20
20
  Polars.read_csv("iris.csv")
21
21
  .lazy
22
22
  .filter(Polars.col("sepal_length") > 5)
23
- .groupby("species")
23
+ .group_by("species")
24
24
  .agg(Polars.all.sum)
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -260,19 +260,19 @@ df["a"].var
260
260
  Group
261
261
 
262
262
  ```ruby
263
- df.groupby("a").count
263
+ df.group_by("a").count
264
264
  ```
265
265
 
266
266
  Works with all summary statistics
267
267
 
268
268
  ```ruby
269
- df.groupby("a").max
269
+ df.group_by("a").max
270
270
  ```
271
271
 
272
272
  Multiple groups
273
273
 
274
274
  ```ruby
275
- df.groupby(["a", "b"]).count
275
+ df.group_by(["a", "b"]).count
276
276
  ```
277
277
 
278
278
  ## Combining Data Frames
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,10 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - nested - `List`, `Struct`, `Array`
363
+ - other - `Object`, `Null`
362
364
 
363
365
  Get column types
364
366
 
@@ -401,13 +403,13 @@ df.plot("a", "b", type: "pie")
401
403
  Group data
402
404
 
403
405
  ```ruby
404
- df.groupby("c").plot("a", "b")
406
+ df.group_by("c").plot("a", "b")
405
407
  ```
406
408
 
407
409
  Stacked columns or bars
408
410
 
409
411
  ```ruby
410
- df.groupby("c").plot("a", "b", stacked: true)
412
+ df.group_by("c").plot("a", "b", stacked: true)
411
413
  ```
412
414
 
413
415
  ## History
@@ -1,9 +1,10 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.5.0"
3
+ version = "0.7.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
7
+ rust-version = "1.73.0"
7
8
  publish = false
8
9
 
9
10
  [lib]
@@ -11,22 +12,25 @@ crate-type = ["cdylib"]
11
12
 
12
13
  [dependencies]
13
14
  ahash = "0.8"
14
- magnus = "0.5"
15
- polars-core = "0.29.0"
15
+ chrono = "0.4"
16
+ either = "1.8"
17
+ magnus = "0.6"
18
+ polars-core = "=0.35.2"
19
+ polars-parquet = "=0.35.2"
16
20
  serde_json = "1"
17
21
  smartstring = "1"
18
22
 
19
23
  [dependencies.polars]
20
- version = "0.29.0"
24
+ version = "=0.35.2"
21
25
  features = [
22
26
  "abs",
23
27
  "approx_unique",
24
- "arange",
25
28
  "arg_where",
26
29
  "asof_join",
27
30
  "avro",
28
31
  "binary_encoding",
29
32
  "concat_str",
33
+ "cov",
30
34
  "cse",
31
35
  "csv",
32
36
  "cum_agg",
@@ -37,22 +41,23 @@ features = [
37
41
  "diff",
38
42
  "dot_product",
39
43
  "dtype-full",
40
- "dynamic_groupby",
44
+ "dynamic_group_by",
41
45
  "ewma",
42
46
  "extract_jsonpath",
43
47
  "fmt",
44
48
  "horizontal_concat",
45
49
  "interpolate",
46
50
  "ipc",
47
- "is_first",
51
+ "is_first_distinct",
48
52
  "is_in",
53
+ "is_last_distinct",
49
54
  "is_unique",
50
55
  "json",
51
56
  "lazy",
52
57
  "lazy_regex",
53
58
  "list_count",
54
59
  "list_eval",
55
- "list_take",
60
+ "list_gather",
56
61
  "list_to_struct",
57
62
  "log",
58
63
  "meta",
@@ -62,12 +67,14 @@ features = [
62
67
  "parquet",
63
68
  "partition_by",
64
69
  "pct_change",
70
+ "peaks",
65
71
  "performant",
66
72
  "pivot",
67
73
  "product",
68
74
  "propagate_nans",
69
75
  "random",
70
76
  "rank",
77
+ "range",
71
78
  "reinterpret",
72
79
  "repeat_by",
73
80
  "rolling_window",
@@ -77,9 +84,10 @@ features = [
77
84
  "semi_anti_join",
78
85
  "serde-lazy",
79
86
  "sign",
87
+ "sql",
80
88
  "string_encoding",
81
- "string_from_radix",
82
- "string_justify",
89
+ "string_pad",
90
+ "string_to_integer",
83
91
  "strings",
84
92
  "timezones",
85
93
  "to_dummies",
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, Value};
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::io::mmap::MmapBytesReader;
3
3
  use polars::io::RowCount;
4
4
  use polars::prelude::read_impl::OwnedBatchedCsvReader;
@@ -24,31 +24,31 @@ impl RbBatchedCsv {
24
24
  pub fn new(arguments: &[Value]) -> RbResult<Self> {
25
25
  // start arguments
26
26
  // this pattern is needed for more than 16
27
- let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
28
- let chunk_size: usize = arguments[1].try_convert()?;
29
- let has_header: bool = arguments[2].try_convert()?;
30
- let ignore_errors: bool = arguments[3].try_convert()?;
31
- let n_rows: Option<usize> = arguments[4].try_convert()?;
32
- let skip_rows: usize = arguments[5].try_convert()?;
33
- let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
34
- let sep: String = arguments[7].try_convert()?;
35
- let rechunk: bool = arguments[8].try_convert()?;
36
- let columns: Option<Vec<String>> = arguments[9].try_convert()?;
37
- let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
38
- let n_threads: Option<usize> = arguments[11].try_convert()?;
39
- let path: PathBuf = arguments[12].try_convert()?;
40
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
27
+ let infer_schema_length = Option::<usize>::try_convert(arguments[0])?;
28
+ let chunk_size = usize::try_convert(arguments[1])?;
29
+ let has_header = bool::try_convert(arguments[2])?;
30
+ let ignore_errors = bool::try_convert(arguments[3])?;
31
+ let n_rows = Option::<usize>::try_convert(arguments[4])?;
32
+ let skip_rows = usize::try_convert(arguments[5])?;
33
+ let projection = Option::<Vec<usize>>::try_convert(arguments[6])?;
34
+ let separator = String::try_convert(arguments[7])?;
35
+ let rechunk = bool::try_convert(arguments[8])?;
36
+ let columns = Option::<Vec<String>>::try_convert(arguments[9])?;
37
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[10])?;
38
+ let n_threads = Option::<usize>::try_convert(arguments[11])?;
39
+ let path = PathBuf::try_convert(arguments[12])?;
40
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
41
41
  // TODO fix
42
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
43
- let low_memory: bool = arguments[15].try_convert()?;
44
- let comment_char: Option<String> = arguments[16].try_convert()?;
45
- let quote_char: Option<String> = arguments[17].try_convert()?;
46
- let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
47
- let try_parse_dates: bool = arguments[19].try_convert()?;
48
- let skip_rows_after_header: usize = arguments[20].try_convert()?;
49
- let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
50
- let sample_size: usize = arguments[22].try_convert()?;
51
- let eol_char: String = arguments[23].try_convert()?;
42
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
43
+ let low_memory = bool::try_convert(arguments[15])?;
44
+ let comment_char = Option::<String>::try_convert(arguments[16])?;
45
+ let quote_char = Option::<String>::try_convert(arguments[17])?;
46
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
47
+ let try_parse_dates = bool::try_convert(arguments[19])?;
48
+ let skip_rows_after_header = usize::try_convert(arguments[20])?;
49
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[21])?;
50
+ let sample_size = usize::try_convert(arguments[22])?;
51
+ let eol_char = String::try_convert(arguments[23])?;
52
52
  // end arguments
53
53
 
54
54
  let null_values = null_values.map(|w| w.0);
@@ -90,7 +90,7 @@ impl RbBatchedCsv {
90
90
  .infer_schema(infer_schema_length)
91
91
  .has_header(has_header)
92
92
  .with_n_rows(n_rows)
93
- .with_delimiter(sep.as_bytes()[0])
93
+ .with_separator(separator.as_bytes()[0])
94
94
  .with_skip_rows(skip_rows)
95
95
  .with_ignore_errors(ignore_errors)
96
96
  .with_projection(projection)