polars-map 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polars_map-0.1.0/.github/workflows/build.yml +31 -0
- polars_map-0.1.0/.github/workflows/release.yml +121 -0
- polars_map-0.1.0/.gitignore +13 -0
- polars_map-0.1.0/.python-version +1 -0
- polars_map-0.1.0/PKG-INFO +9 -0
- polars_map-0.1.0/README.md +86 -0
- polars_map-0.1.0/polars_map/__init__.py +7 -0
- polars_map-0.1.0/polars_map/_dtype.py +40 -0
- polars_map-0.1.0/polars_map/_expr.py +215 -0
- polars_map-0.1.0/polars_map/_series.py +184 -0
- polars_map-0.1.0/polars_map/_utils.py +43 -0
- polars_map-0.1.0/polars_map/py.typed +0 -0
- polars_map-0.1.0/pyproject.toml +28 -0
- polars_map-0.1.0/tests/__init__.py +1 -0
- polars_map-0.1.0/tests/conftest.py +43 -0
- polars_map-0.1.0/tests/test_construction.py +71 -0
- polars_map-0.1.0/tests/test_expr.py +241 -0
- polars_map-0.1.0/tests/test_series.py +222 -0
- polars_map-0.1.0/uv.lock +375 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: build
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
python-version: ["3.10", "3.12"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: actions/setup-python@v6
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
- name: Install uv
|
|
23
|
+
run: python -m pip install uv
|
|
24
|
+
- name: Check ruff format
|
|
25
|
+
run: uv run ruff format --check
|
|
26
|
+
- name: Check with ruff
|
|
27
|
+
run: uv run ruff check
|
|
28
|
+
- name: Run pyright
|
|
29
|
+
run: uv run pyright
|
|
30
|
+
- name: Test with pytest
|
|
31
|
+
run: uv run pytest
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
bump:
|
|
7
|
+
description: "Version bump type"
|
|
8
|
+
required: true
|
|
9
|
+
type: choice
|
|
10
|
+
options:
|
|
11
|
+
- patch
|
|
12
|
+
- minor
|
|
13
|
+
- major
|
|
14
|
+
|
|
15
|
+
permissions:
|
|
16
|
+
contents: write
|
|
17
|
+
id-token: write
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
gate:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v6
|
|
24
|
+
- uses: actions/setup-python@v6
|
|
25
|
+
with:
|
|
26
|
+
python-version: "3.12"
|
|
27
|
+
- name: Install uv
|
|
28
|
+
run: python -m pip install uv
|
|
29
|
+
- name: Check ruff format
|
|
30
|
+
run: uv run ruff format --check
|
|
31
|
+
- name: Check with ruff
|
|
32
|
+
run: uv run ruff check
|
|
33
|
+
- name: Run pyright
|
|
34
|
+
run: uv run pyright
|
|
35
|
+
- name: Test with pytest
|
|
36
|
+
run: uv run pytest
|
|
37
|
+
|
|
38
|
+
prepare:
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
needs: gate
|
|
41
|
+
outputs:
|
|
42
|
+
version: ${{ steps.bump.outputs.version }}
|
|
43
|
+
steps:
|
|
44
|
+
- uses: actions/checkout@v6
|
|
45
|
+
- uses: actions/setup-python@v6
|
|
46
|
+
with:
|
|
47
|
+
python-version: "3.12"
|
|
48
|
+
- name: Install uv
|
|
49
|
+
run: python -m pip install uv
|
|
50
|
+
- name: Compute new version
|
|
51
|
+
id: bump
|
|
52
|
+
run: |
|
|
53
|
+
current=$(uv version | cut -d' ' -f2)
|
|
54
|
+
IFS='.' read -r major minor patch <<< "$current"
|
|
55
|
+
case "${{ inputs.bump }}" in
|
|
56
|
+
major) major=$((major + 1)); minor=0; patch=0 ;;
|
|
57
|
+
minor) minor=$((minor + 1)); patch=0 ;;
|
|
58
|
+
patch) patch=$((patch + 1)) ;;
|
|
59
|
+
esac
|
|
60
|
+
version="${major}.${minor}.${patch}"
|
|
61
|
+
echo "version=${version}" >> "$GITHUB_OUTPUT"
|
|
62
|
+
echo "New version: ${version}"
|
|
63
|
+
|
|
64
|
+
build:
|
|
65
|
+
needs: prepare
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
steps:
|
|
68
|
+
- uses: actions/checkout@v6
|
|
69
|
+
- uses: actions/setup-python@v6
|
|
70
|
+
with:
|
|
71
|
+
python-version: "3.12"
|
|
72
|
+
- name: Install uv
|
|
73
|
+
run: python -m pip install uv
|
|
74
|
+
- name: Set version
|
|
75
|
+
run: uv version ${{ needs.prepare.outputs.version }} --frozen
|
|
76
|
+
- name: Build
|
|
77
|
+
run: uv build
|
|
78
|
+
- uses: actions/upload-artifact@v4
|
|
79
|
+
with:
|
|
80
|
+
name: dist
|
|
81
|
+
path: dist/
|
|
82
|
+
|
|
83
|
+
publish:
|
|
84
|
+
needs: build
|
|
85
|
+
runs-on: ubuntu-latest
|
|
86
|
+
environment: pypi
|
|
87
|
+
steps:
|
|
88
|
+
- uses: actions/download-artifact@v4
|
|
89
|
+
with:
|
|
90
|
+
name: dist
|
|
91
|
+
path: dist/
|
|
92
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
93
|
+
with:
|
|
94
|
+
packages-dir: dist/
|
|
95
|
+
|
|
96
|
+
release:
|
|
97
|
+
needs: [prepare, publish]
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
steps:
|
|
100
|
+
- uses: actions/checkout@v6
|
|
101
|
+
- uses: actions/setup-python@v6
|
|
102
|
+
with:
|
|
103
|
+
python-version: "3.12"
|
|
104
|
+
- name: Install uv
|
|
105
|
+
run: python -m pip install uv
|
|
106
|
+
- name: Set version and commit
|
|
107
|
+
env:
|
|
108
|
+
VERSION: ${{ needs.prepare.outputs.version }}
|
|
109
|
+
run: |
|
|
110
|
+
uv version "$VERSION" --frozen
|
|
111
|
+
uv lock
|
|
112
|
+
git config user.name "github-actions[bot]"
|
|
113
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
114
|
+
git add pyproject.toml uv.lock
|
|
115
|
+
git commit -m "v${VERSION}"
|
|
116
|
+
git tag -a "v${VERSION}" -m "v${VERSION}"
|
|
117
|
+
git push --follow-tags
|
|
118
|
+
- name: Create GitHub release
|
|
119
|
+
env:
|
|
120
|
+
GH_TOKEN: ${{ github.token }}
|
|
121
|
+
run: gh release create "v${{ needs.prepare.outputs.version }}" --generate-notes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: polars-map
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Polars plugin providing Map operations on List(Struct({key, value})) columns
|
|
5
|
+
Project-URL: Homepage, https://github.com/hafaio/polars-map
|
|
6
|
+
Project-URL: Repository, https://github.com/hafaio/polars-map
|
|
7
|
+
Project-URL: Issues, https://github.com/hafaio/polars-map/issues
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: polars>=1.13.0
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# polars-map
|
|
2
|
+
|
|
3
|
+
[](https://github.com/hafaio/polars-map/actions/workflows/build.yml)
|
|
4
|
+
[](https://pypi.org/project/polars-map/)
|
|
5
|
+
|
|
6
|
+
Polars plugin providing a Map extension type and functions.
|
|
7
|
+
Maps represent a mapping from unique keys of any type to values, and are stored as `List(Struct({key, value}))` columns.
|
|
8
|
+
All function in the `.map` namespace can be used on the extension type or on the
|
|
9
|
+
underlying list.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install polars-map
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Supported operations (`.map.*`)
|
|
18
|
+
|
|
19
|
+
| Category | Methods |
|
|
20
|
+
| ---------- | ---------------------------------------------------------- |
|
|
21
|
+
| Accessors | `entries`, `keys`, `values`, `len`, `get`, `contains_key` |
|
|
22
|
+
| Filtering | `filter`, `filter_keys`, `filter_values` |
|
|
23
|
+
| Transform | `eval`, `eval_keys`, `eval_values` |
|
|
24
|
+
| Set ops | `merge`, `intersection`, `difference` |
|
|
25
|
+
| Conversion | `from_entries` |
|
|
26
|
+
| Iteration | `__iter__`, `to_list` (Series only) |
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import polars as pl
|
|
32
|
+
from polars_map import Map
|
|
33
|
+
|
|
34
|
+
# Construction
|
|
35
|
+
ser = pl.Series(
|
|
36
|
+
"m",
|
|
37
|
+
[
|
|
38
|
+
[{"key": "a", "value": 1}, {"key": "b", "value": 2}],
|
|
39
|
+
[{"key": "x", "value": 10}],
|
|
40
|
+
],
|
|
41
|
+
dtype=Map(pl.String(), pl.Int64()),
|
|
42
|
+
)
|
|
43
|
+
df = pl.DataFrame([ser])
|
|
44
|
+
|
|
45
|
+
# Accessors
|
|
46
|
+
df.select(pl.col("m").map.keys()) # [["a", "b"], ["x"]]
|
|
47
|
+
df.select(pl.col("m").map.values()) # [[1, 2], [10]]
|
|
48
|
+
df.select(pl.col("m").map.len()) # [2, 1]
|
|
49
|
+
|
|
50
|
+
# Lookup
|
|
51
|
+
df.select(pl.col("m").map.get("a")) # [1, None]
|
|
52
|
+
df.select(pl.col("m").map.contains_key("a")) # [True, False]
|
|
53
|
+
|
|
54
|
+
# Filtering
|
|
55
|
+
df.select(pl.col("m").map.filter(pl.element().struct["value"] > 1))
|
|
56
|
+
df.select(pl.col("m").map.filter_keys(pl.element() > "a"))
|
|
57
|
+
df.select(pl.col("m").map.filter_values(pl.element() >= 2))
|
|
58
|
+
|
|
59
|
+
# Transform keys or values
|
|
60
|
+
df.select(pl.col("m").map.eval_keys(pl.element().str.to_uppercase()))
|
|
61
|
+
df.select(pl.col("m").map.eval_values(pl.element() * 2))
|
|
62
|
+
|
|
63
|
+
# Merge (right-side wins on key conflict)
|
|
64
|
+
left = pl.Series("l", [[{"key": "a", "value": 1}, {"key": "b", "value": 2}]], dtype=Map(pl.String(), pl.Int64()))
|
|
65
|
+
right = pl.Series("r", [[{"key": "a", "value": 99}, {"key": "c", "value": 3}]], dtype=Map(pl.String(), pl.Int64()))
|
|
66
|
+
pl.DataFrame([left, right]).select(pl.col("l").map.merge(pl.col("r")))
|
|
67
|
+
# [{"a": 99, "b": 2, "c": 3}]
|
|
68
|
+
|
|
69
|
+
# Set operations
|
|
70
|
+
pl.DataFrame([left, right]).select(pl.col("l").map.intersection(pl.col("r"))) # keys in both
|
|
71
|
+
pl.DataFrame([left, right]).select(pl.col("l").map.difference(pl.col("r"))) # keys only in left
|
|
72
|
+
|
|
73
|
+
# Convert to/from plain List(Struct)
|
|
74
|
+
df.select(pl.col("m").map.entries()) # strip Map type
|
|
75
|
+
df.select(pl.col("m").map.from_entries()) # wrap as Map (with deduplication)
|
|
76
|
+
|
|
77
|
+
# Series iteration yields Python dicts
|
|
78
|
+
for d in ser.map:
|
|
79
|
+
print(d) # {"a": 1, "b": 2}, {"x": 10}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Caveats
|
|
83
|
+
|
|
84
|
+
- **Extension types** — used to wrap the underlying `List(Struct)` storage with a semantic `Map` dtype is not yet stabilized and may change across Polars releases.
|
|
85
|
+
- **`pl.dtype_of`** — used to efficiently cast to the extension type after _some_ operations is also unstable.
|
|
86
|
+
- **GIL** - is required to automatically wrap an expression as the extension type, and so operations which could change the underlying key or value types will briefly lock the GIL to do the cast. This may also prevent the polars engine from reasoning about the type.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Map extension data type for Polars."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _ensure_instance(dt: pl.DataType | type[pl.DataType]) -> pl.DataType:
|
|
9
|
+
return dt() if isinstance(dt, type) else dt
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Map(pl.BaseExtension):
|
|
13
|
+
"""Map extension type backed by List(Struct({key, value})).
|
|
14
|
+
|
|
15
|
+
Usage as a dtype for Series construction::
|
|
16
|
+
|
|
17
|
+
dtype = Map(pl.String(), pl.Int64())
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, key: pl.DataType, value: pl.DataType) -> None:
|
|
21
|
+
storage = pl.List(pl.Struct({"key": key, "value": value}))
|
|
22
|
+
super().__init__("polars_map.map", storage)
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def key(self) -> pl.DataType:
|
|
26
|
+
"""Key data type."""
|
|
27
|
+
[key, _] = self.ext_storage().inner.fields # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType,reportUnknownVariableType]
|
|
28
|
+
return _ensure_instance(key.dtype) # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType,reportUnknownArgumentType]
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def value(self) -> pl.DataType:
|
|
32
|
+
"""Value data type."""
|
|
33
|
+
[_, value] = self.ext_storage().inner.fields # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType,reportUnknownVariableType]
|
|
34
|
+
return _ensure_instance(value.dtype) # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType,reportUnknownArgumentType]
|
|
35
|
+
|
|
36
|
+
def _string_repr(self) -> str:
|
|
37
|
+
return f"map[{self.key._string_repr()},{self.value._string_repr()}]" # pyright: ignore[reportUnknownMemberType]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
pl.register_extension_type("polars_map.map", Map)
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Expr namespace for Map operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
import polars as pl
|
|
9
|
+
|
|
10
|
+
from ._utils import expr_eval, infer_map, validate
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pl.api.register_expr_namespace("map")
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class MapExpr:
|
|
16
|
+
"""Expression namespace for Map operations on List(Struct({key, value})) columns."""
|
|
17
|
+
|
|
18
|
+
_expr: pl.Expr
|
|
19
|
+
|
|
20
|
+
def _as_self(self, expr: pl.Expr) -> pl.Expr:
|
|
21
|
+
"""Wrap a List(Struct) result back as Map, preserving the original dtype."""
|
|
22
|
+
return expr.ext.to(pl.dtype_of(self._expr))
|
|
23
|
+
|
|
24
|
+
def from_entries(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
validate_fields: bool = True,
|
|
28
|
+
deduplicate: bool = True,
|
|
29
|
+
parallel: bool = False,
|
|
30
|
+
) -> pl.Expr:
|
|
31
|
+
"""Wrap a List(Struct({key, value})) expression as a Map extension type.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
deduplicate
|
|
36
|
+
If True, deduplicate by key, keeping the first occurrence.
|
|
37
|
+
parallel
|
|
38
|
+
Run list evaluations in parallel.
|
|
39
|
+
"""
|
|
40
|
+
return infer_map(
|
|
41
|
+
self._expr.list.eval(
|
|
42
|
+
validate(
|
|
43
|
+
pl.element(),
|
|
44
|
+
validate_fields=validate_fields,
|
|
45
|
+
deduplicate=deduplicate,
|
|
46
|
+
),
|
|
47
|
+
parallel=parallel,
|
|
48
|
+
)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@functools.cached_property
|
|
52
|
+
def _entries(self) -> pl.Expr:
|
|
53
|
+
return self._expr.ext.storage()
|
|
54
|
+
|
|
55
|
+
def entries(self) -> pl.Expr:
|
|
56
|
+
"""Strip the Map extension type, returning raw List(Struct({key, value}))."""
|
|
57
|
+
return self._entries
|
|
58
|
+
|
|
59
|
+
def keys(self) -> pl.Expr:
|
|
60
|
+
"""Extract all keys as a List column."""
|
|
61
|
+
return self._entries.list.eval(pl.element().struct["key"])
|
|
62
|
+
|
|
63
|
+
def values(self) -> pl.Expr:
|
|
64
|
+
"""Extract all values as a List column."""
|
|
65
|
+
return self._entries.list.eval(pl.element().struct["value"])
|
|
66
|
+
|
|
67
|
+
def len(self) -> pl.Expr:
|
|
68
|
+
"""Return the number of entries in the map."""
|
|
69
|
+
return self._entries.list.len()
|
|
70
|
+
|
|
71
|
+
def _get(self, key: object) -> pl.Expr:
|
|
72
|
+
"""Look up a value by key. Returns scalar per row."""
|
|
73
|
+
return (
|
|
74
|
+
self._entries.list.eval(
|
|
75
|
+
pl.element()
|
|
76
|
+
.filter(pl.element().struct["key"] == key)
|
|
77
|
+
.struct["value"]
|
|
78
|
+
.first()
|
|
79
|
+
)
|
|
80
|
+
.list.first()
|
|
81
|
+
.alias(str(key))
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def get(self, key: object, *keys: object) -> pl.Expr:
|
|
85
|
+
"""Look up a value by key. Returns scalar per row."""
|
|
86
|
+
if keys:
|
|
87
|
+
return pl.struct( # pyright: ignore[reportUnknownMemberType]
|
|
88
|
+
self._get(key), *(self._get(k) for k in keys)
|
|
89
|
+
).struct.unnest()
|
|
90
|
+
else:
|
|
91
|
+
return self._get(key)
|
|
92
|
+
|
|
93
|
+
def contains_key(self, key: object) -> pl.Expr:
|
|
94
|
+
"""Check if a key exists in the map."""
|
|
95
|
+
return self._entries.list.eval(pl.element().struct["key"] == key).list.any()
|
|
96
|
+
|
|
97
|
+
def eval(
|
|
98
|
+
self,
|
|
99
|
+
expr: pl.Expr,
|
|
100
|
+
*,
|
|
101
|
+
validate_fields: bool = True,
|
|
102
|
+
deduplicate: bool = True,
|
|
103
|
+
parallel: bool = False,
|
|
104
|
+
) -> pl.Expr:
|
|
105
|
+
"""Evaluate an expression on entries, returning a Map.
|
|
106
|
+
|
|
107
|
+
The expression operates on the struct elements via ``pl.element()``.
|
|
108
|
+
|
|
109
|
+
Example
|
|
110
|
+
-------
|
|
111
|
+
>>> col.map.eval(pl.element().struct.with_fields(pl.element().struct["value"] * 2))
|
|
112
|
+
"""
|
|
113
|
+
inner = validate(expr, validate_fields=validate_fields, deduplicate=deduplicate)
|
|
114
|
+
return infer_map(self._entries.list.eval(inner, parallel=parallel))
|
|
115
|
+
|
|
116
|
+
def eval_keys(
|
|
117
|
+
self, expr: pl.Expr, *, deduplicate: bool = True, parallel: bool = False
|
|
118
|
+
) -> pl.Expr:
|
|
119
|
+
"""Transform keys, returning a Map with new key type.
|
|
120
|
+
|
|
121
|
+
The expression operates on each key via ``pl.element()``.
|
|
122
|
+
|
|
123
|
+
Example
|
|
124
|
+
-------
|
|
125
|
+
>>> col.map.eval_keys(pl.element().str.to_uppercase())
|
|
126
|
+
"""
|
|
127
|
+
inner: pl.Expr = pl.element().struct.with_fields( # pyright: ignore[reportUnknownMemberType]
|
|
128
|
+
key=expr_eval(pl.element().struct["key"], expr)
|
|
129
|
+
)
|
|
130
|
+
if deduplicate:
|
|
131
|
+
inner = inner.filter(pl.element().struct["key"].is_first_distinct())
|
|
132
|
+
return infer_map(self._entries.list.eval(inner, parallel=parallel))
|
|
133
|
+
|
|
134
|
+
def eval_values(self, expr: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
135
|
+
"""Transform values, returning a Map with new value type.
|
|
136
|
+
|
|
137
|
+
The expression operates on each value via ``pl.element()``.
|
|
138
|
+
|
|
139
|
+
Example
|
|
140
|
+
-------
|
|
141
|
+
>>> col.map.eval_values(pl.element() * 2)
|
|
142
|
+
"""
|
|
143
|
+
inner = pl.element().struct.with_fields( # pyright: ignore[reportUnknownMemberType]
|
|
144
|
+
value=expr_eval(pl.element().struct["value"], expr)
|
|
145
|
+
)
|
|
146
|
+
return infer_map(self._entries.list.eval(inner, parallel=parallel))
|
|
147
|
+
|
|
148
|
+
def filter(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
149
|
+
"""Filter entries by a predicate on the struct entry.
|
|
150
|
+
|
|
151
|
+
Example
|
|
152
|
+
-------
|
|
153
|
+
>>> col.map.filter(pl.element().struct["key"] > "b")
|
|
154
|
+
"""
|
|
155
|
+
return self._as_self(
|
|
156
|
+
self._entries.list.eval(pl.element().filter(predicate), parallel=parallel)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def filter_keys(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
160
|
+
"""Filter entries where the key satisfies the predicate.
|
|
161
|
+
|
|
162
|
+
Example
|
|
163
|
+
-------
|
|
164
|
+
>>> col.map.filter_keys(pl.element() > "b")
|
|
165
|
+
"""
|
|
166
|
+
inner = pl.element().filter(
|
|
167
|
+
expr_eval(pl.element().struct["key"], predicate) # pyright: ignore[reportUnknownMemberType]
|
|
168
|
+
)
|
|
169
|
+
return self._as_self(self._entries.list.eval(inner, parallel=parallel))
|
|
170
|
+
|
|
171
|
+
def filter_values(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
172
|
+
"""Filter entries where the value satisfies the predicate.
|
|
173
|
+
|
|
174
|
+
Example
|
|
175
|
+
-------
|
|
176
|
+
>>> col.map.filter_values(pl.element() > 5)
|
|
177
|
+
"""
|
|
178
|
+
inner = pl.element().filter(
|
|
179
|
+
expr_eval(pl.element().struct["value"], predicate) # pyright: ignore[reportUnknownMemberType]
|
|
180
|
+
)
|
|
181
|
+
return self._as_self(self._entries.list.eval(inner, parallel=parallel))
|
|
182
|
+
|
|
183
|
+
def merge(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
184
|
+
"""Merge two maps. Right-side values win on key conflict."""
|
|
185
|
+
combined = pl.concat_list([self._entries, other.map.entries()]) # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
|
|
186
|
+
return self._as_self(
|
|
187
|
+
combined.list.eval(
|
|
188
|
+
pl.element().filter(pl.element().struct["key"].is_last_distinct()),
|
|
189
|
+
parallel=parallel,
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def intersection(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
194
|
+
"""Keep entries from self where the key also exists in other."""
|
|
195
|
+
combined = pl.concat_list([self._entries, other.map.entries()]) # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
|
|
196
|
+
return self._as_self(
|
|
197
|
+
combined.list.eval(
|
|
198
|
+
pl.element().filter(
|
|
199
|
+
pl.element().struct["key"].is_duplicated()
|
|
200
|
+
& pl.element().struct["key"].is_first_distinct()
|
|
201
|
+
),
|
|
202
|
+
parallel=parallel,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
def difference(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
|
|
207
|
+
"""Keep entries from self where the key does NOT exist in other."""
|
|
208
|
+
other_entries = other.map.entries() # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
|
|
209
|
+
combined = pl.concat_list([self._entries, other_entries, other_entries]) # pyright: ignore[reportUnknownMemberType]
|
|
210
|
+
return self._as_self(
|
|
211
|
+
combined.list.eval(
|
|
212
|
+
pl.element().filter(~pl.element().struct["key"].is_duplicated()),
|
|
213
|
+
parallel=parallel,
|
|
214
|
+
)
|
|
215
|
+
)
|