polars-map 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ name: build
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+ branches: ["main"]
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.10", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - uses: actions/setup-python@v6
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+ - name: Install uv
23
+ run: python -m pip install uv
24
+ - name: Check ruff format
25
+ run: uv run ruff format --check
26
+ - name: Check with ruff
27
+ run: uv run ruff check
28
+ - name: Run pyright
29
+ run: uv run pyright
30
+ - name: Test with pytest
31
+ run: uv run pytest
@@ -0,0 +1,121 @@
1
+ name: release
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ bump:
7
+ description: "Version bump type"
8
+ required: true
9
+ type: choice
10
+ options:
11
+ - patch
12
+ - minor
13
+ - major
14
+
15
+ permissions:
16
+ contents: write
17
+ id-token: write
18
+
19
+ jobs:
20
+ gate:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v6
24
+ - uses: actions/setup-python@v6
25
+ with:
26
+ python-version: "3.12"
27
+ - name: Install uv
28
+ run: python -m pip install uv
29
+ - name: Check ruff format
30
+ run: uv run ruff format --check
31
+ - name: Check with ruff
32
+ run: uv run ruff check
33
+ - name: Run pyright
34
+ run: uv run pyright
35
+ - name: Test with pytest
36
+ run: uv run pytest
37
+
38
+ prepare:
39
+ runs-on: ubuntu-latest
40
+ needs: gate
41
+ outputs:
42
+ version: ${{ steps.bump.outputs.version }}
43
+ steps:
44
+ - uses: actions/checkout@v6
45
+ - uses: actions/setup-python@v6
46
+ with:
47
+ python-version: "3.12"
48
+ - name: Install uv
49
+ run: python -m pip install uv
50
+ - name: Compute new version
51
+ id: bump
52
+ run: |
53
+ current=$(uv version | cut -d' ' -f2)
54
+ IFS='.' read -r major minor patch <<< "$current"
55
+ case "${{ inputs.bump }}" in
56
+ major) major=$((major + 1)); minor=0; patch=0 ;;
57
+ minor) minor=$((minor + 1)); patch=0 ;;
58
+ patch) patch=$((patch + 1)) ;;
59
+ esac
60
+ version="${major}.${minor}.${patch}"
61
+ echo "version=${version}" >> "$GITHUB_OUTPUT"
62
+ echo "New version: ${version}"
63
+
64
+ build:
65
+ needs: prepare
66
+ runs-on: ubuntu-latest
67
+ steps:
68
+ - uses: actions/checkout@v6
69
+ - uses: actions/setup-python@v6
70
+ with:
71
+ python-version: "3.12"
72
+ - name: Install uv
73
+ run: python -m pip install uv
74
+ - name: Set version
75
+ run: uv version ${{ needs.prepare.outputs.version }} --frozen
76
+ - name: Build
77
+ run: uv build
78
+ - uses: actions/upload-artifact@v4
79
+ with:
80
+ name: dist
81
+ path: dist/
82
+
83
+ publish:
84
+ needs: build
85
+ runs-on: ubuntu-latest
86
+ environment: pypi
87
+ steps:
88
+ - uses: actions/download-artifact@v4
89
+ with:
90
+ name: dist
91
+ path: dist/
92
+ - uses: pypa/gh-action-pypi-publish@release/v1
93
+ with:
94
+ packages-dir: dist/
95
+
96
+ release:
97
+ needs: [prepare, publish]
98
+ runs-on: ubuntu-latest
99
+ steps:
100
+ - uses: actions/checkout@v6
101
+ - uses: actions/setup-python@v6
102
+ with:
103
+ python-version: "3.12"
104
+ - name: Install uv
105
+ run: python -m pip install uv
106
+ - name: Set version and commit
107
+ env:
108
+ VERSION: ${{ needs.prepare.outputs.version }}
109
+ run: |
110
+ uv version "$VERSION" --frozen
111
+ uv lock
112
+ git config user.name "github-actions[bot]"
113
+ git config user.email "github-actions[bot]@users.noreply.github.com"
114
+ git add pyproject.toml uv.lock
115
+ git commit -m "v${VERSION}"
116
+ git tag -a "v${VERSION}" -m "v${VERSION}"
117
+ git push --follow-tags
118
+ - name: Create GitHub release
119
+ env:
120
+ GH_TOKEN: ${{ github.token }}
121
+ run: gh release create "v${{ needs.prepare.outputs.version }}" --generate-notes
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ .coverage
3
+ .venv
4
+ *.egg-info
5
+ *.pyo
6
+ *.pyc
7
+ *.so
8
+ *.dylib
9
+ build/
10
+ dist/
11
+ out/
12
+ target/
13
+ wheels/
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: polars-map
3
+ Version: 0.1.0
4
+ Summary: Polars plugin providing Map operations on List(Struct({key, value})) columns
5
+ Project-URL: Homepage, https://github.com/hafaio/polars-map
6
+ Project-URL: Repository, https://github.com/hafaio/polars-map
7
+ Project-URL: Issues, https://github.com/hafaio/polars-map/issues
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: polars>=1.13.0
@@ -0,0 +1,86 @@
1
+ # polars-map
2
+
3
+ [![build](https://github.com/hafaio/polars-map/actions/workflows/build.yml/badge.svg)](https://github.com/hafaio/polars-map/actions/workflows/build.yml)
4
+ [![pypi](https://img.shields.io/pypi/v/polars-map)](https://pypi.org/project/polars-map/)
5
+
6
+ Polars plugin providing a Map extension type and functions.
7
+ Maps represent a mapping from unique keys of any type to values, and are stored as `List(Struct({key, value}))` columns.
8
+ All function in the `.map` namespace can be used on the extension type or on the
9
+ underlying list.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install polars-map
15
+ ```
16
+
17
+ ## Supported operations (`.map.*`)
18
+
19
+ | Category | Methods |
20
+ | ---------- | ---------------------------------------------------------- |
21
+ | Accessors | `entries`, `keys`, `values`, `len`, `get`, `contains_key` |
22
+ | Filtering | `filter`, `filter_keys`, `filter_values` |
23
+ | Transform | `eval`, `eval_keys`, `eval_values` |
24
+ | Set ops | `merge`, `intersection`, `difference` |
25
+ | Conversion | `from_entries` |
26
+ | Iteration | `__iter__`, `to_list` (Series only) |
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ import polars as pl
32
+ from polars_map import Map
33
+
34
+ # Construction
35
+ ser = pl.Series(
36
+ "m",
37
+ [
38
+ [{"key": "a", "value": 1}, {"key": "b", "value": 2}],
39
+ [{"key": "x", "value": 10}],
40
+ ],
41
+ dtype=Map(pl.String(), pl.Int64()),
42
+ )
43
+ df = pl.DataFrame([ser])
44
+
45
+ # Accessors
46
+ df.select(pl.col("m").map.keys()) # [["a", "b"], ["x"]]
47
+ df.select(pl.col("m").map.values()) # [[1, 2], [10]]
48
+ df.select(pl.col("m").map.len()) # [2, 1]
49
+
50
+ # Lookup
51
+ df.select(pl.col("m").map.get("a")) # [1, None]
52
+ df.select(pl.col("m").map.contains_key("a")) # [True, False]
53
+
54
+ # Filtering
55
+ df.select(pl.col("m").map.filter(pl.element().struct["value"] > 1))
56
+ df.select(pl.col("m").map.filter_keys(pl.element() > "a"))
57
+ df.select(pl.col("m").map.filter_values(pl.element() >= 2))
58
+
59
+ # Transform keys or values
60
+ df.select(pl.col("m").map.eval_keys(pl.element().str.to_uppercase()))
61
+ df.select(pl.col("m").map.eval_values(pl.element() * 2))
62
+
63
+ # Merge (right-side wins on key conflict)
64
+ left = pl.Series("l", [[{"key": "a", "value": 1}, {"key": "b", "value": 2}]], dtype=Map(pl.String(), pl.Int64()))
65
+ right = pl.Series("r", [[{"key": "a", "value": 99}, {"key": "c", "value": 3}]], dtype=Map(pl.String(), pl.Int64()))
66
+ pl.DataFrame([left, right]).select(pl.col("l").map.merge(pl.col("r")))
67
+ # [{"a": 99, "b": 2, "c": 3}]
68
+
69
+ # Set operations
70
+ pl.DataFrame([left, right]).select(pl.col("l").map.intersection(pl.col("r"))) # keys in both
71
+ pl.DataFrame([left, right]).select(pl.col("l").map.difference(pl.col("r"))) # keys only in left
72
+
73
+ # Convert to/from plain List(Struct)
74
+ df.select(pl.col("m").map.entries()) # strip Map type
75
+ df.select(pl.col("m").map.from_entries()) # wrap as Map (with deduplication)
76
+
77
+ # Series iteration yields Python dicts
78
+ for d in ser.map:
79
+ print(d) # {"a": 1, "b": 2}, {"x": 10}
80
+ ```
81
+
82
+ ## Caveats
83
+
84
+ - **Extension types** — used to wrap the underlying `List(Struct)` storage with a semantic `Map` dtype is not yet stabilized and may change across Polars releases.
85
+ - **`pl.dtype_of`** — used to efficiently cast to the extension type after _some_ operations is also unstable.
86
+ - **GIL** - is required to automatically wrap an expression as the extension type, and so operations which could change the underlying key or value types will briefly lock the GIL to do the cast. This may also prevent the polars engine from reasoning about the type.
@@ -0,0 +1,7 @@
1
+ """Polars plugin providing Map operations on List(Struct({key, value})) columns."""
2
+
3
+ from polars_map._dtype import Map
4
+ from polars_map._expr import MapExpr
5
+ from polars_map._series import MapSeries
6
+
7
+ __all__ = ("Map", "MapExpr", "MapSeries")
@@ -0,0 +1,40 @@
1
+ """Map extension data type for Polars."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import polars as pl
6
+
7
+
8
+ def _ensure_instance(dt: pl.DataType | type[pl.DataType]) -> pl.DataType:
9
+ return dt() if isinstance(dt, type) else dt
10
+
11
+
12
+ class Map(pl.BaseExtension):
13
+ """Map extension type backed by List(Struct({key, value})).
14
+
15
+ Usage as a dtype for Series construction::
16
+
17
+ dtype = Map(pl.String(), pl.Int64())
18
+ """
19
+
20
+ def __init__(self, key: pl.DataType, value: pl.DataType) -> None:
21
+ storage = pl.List(pl.Struct({"key": key, "value": value}))
22
+ super().__init__("polars_map.map", storage)
23
+
24
+ @property
25
+ def key(self) -> pl.DataType:
26
+ """Key data type."""
27
+ [key, _] = self.ext_storage().inner.fields # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType,reportUnknownVariableType]
28
+ return _ensure_instance(key.dtype) # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType,reportUnknownArgumentType]
29
+
30
+ @property
31
+ def value(self) -> pl.DataType:
32
+ """Value data type."""
33
+ [_, value] = self.ext_storage().inner.fields # pyright: ignore[reportAttributeAccessIssue,reportUnknownMemberType,reportUnknownVariableType]
34
+ return _ensure_instance(value.dtype) # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType,reportUnknownArgumentType]
35
+
36
+ def _string_repr(self) -> str:
37
+ return f"map[{self.key._string_repr()},{self.value._string_repr()}]" # pyright: ignore[reportUnknownMemberType]
38
+
39
+
40
+ pl.register_extension_type("polars_map.map", Map)
@@ -0,0 +1,215 @@
1
+ """Expr namespace for Map operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ from dataclasses import dataclass
7
+
8
+ import polars as pl
9
+
10
+ from ._utils import expr_eval, infer_map, validate
11
+
12
+
13
+ @pl.api.register_expr_namespace("map")
14
+ @dataclass(frozen=True)
15
+ class MapExpr:
16
+ """Expression namespace for Map operations on List(Struct({key, value})) columns."""
17
+
18
+ _expr: pl.Expr
19
+
20
+ def _as_self(self, expr: pl.Expr) -> pl.Expr:
21
+ """Wrap a List(Struct) result back as Map, preserving the original dtype."""
22
+ return expr.ext.to(pl.dtype_of(self._expr))
23
+
24
+ def from_entries(
25
+ self,
26
+ *,
27
+ validate_fields: bool = True,
28
+ deduplicate: bool = True,
29
+ parallel: bool = False,
30
+ ) -> pl.Expr:
31
+ """Wrap a List(Struct({key, value})) expression as a Map extension type.
32
+
33
+ Parameters
34
+ ----------
35
+ deduplicate
36
+ If True, deduplicate by key, keeping the first occurrence.
37
+ parallel
38
+ Run list evaluations in parallel.
39
+ """
40
+ return infer_map(
41
+ self._expr.list.eval(
42
+ validate(
43
+ pl.element(),
44
+ validate_fields=validate_fields,
45
+ deduplicate=deduplicate,
46
+ ),
47
+ parallel=parallel,
48
+ )
49
+ )
50
+
51
+ @functools.cached_property
52
+ def _entries(self) -> pl.Expr:
53
+ return self._expr.ext.storage()
54
+
55
+ def entries(self) -> pl.Expr:
56
+ """Strip the Map extension type, returning raw List(Struct({key, value}))."""
57
+ return self._entries
58
+
59
+ def keys(self) -> pl.Expr:
60
+ """Extract all keys as a List column."""
61
+ return self._entries.list.eval(pl.element().struct["key"])
62
+
63
+ def values(self) -> pl.Expr:
64
+ """Extract all values as a List column."""
65
+ return self._entries.list.eval(pl.element().struct["value"])
66
+
67
+ def len(self) -> pl.Expr:
68
+ """Return the number of entries in the map."""
69
+ return self._entries.list.len()
70
+
71
+ def _get(self, key: object) -> pl.Expr:
72
+ """Look up a value by key. Returns scalar per row."""
73
+ return (
74
+ self._entries.list.eval(
75
+ pl.element()
76
+ .filter(pl.element().struct["key"] == key)
77
+ .struct["value"]
78
+ .first()
79
+ )
80
+ .list.first()
81
+ .alias(str(key))
82
+ )
83
+
84
+ def get(self, key: object, *keys: object) -> pl.Expr:
85
+ """Look up a value by key. Returns scalar per row."""
86
+ if keys:
87
+ return pl.struct( # pyright: ignore[reportUnknownMemberType]
88
+ self._get(key), *(self._get(k) for k in keys)
89
+ ).struct.unnest()
90
+ else:
91
+ return self._get(key)
92
+
93
+ def contains_key(self, key: object) -> pl.Expr:
94
+ """Check if a key exists in the map."""
95
+ return self._entries.list.eval(pl.element().struct["key"] == key).list.any()
96
+
97
+ def eval(
98
+ self,
99
+ expr: pl.Expr,
100
+ *,
101
+ validate_fields: bool = True,
102
+ deduplicate: bool = True,
103
+ parallel: bool = False,
104
+ ) -> pl.Expr:
105
+ """Evaluate an expression on entries, returning a Map.
106
+
107
+ The expression operates on the struct elements via ``pl.element()``.
108
+
109
+ Example
110
+ -------
111
+ >>> col.map.eval(pl.element().struct.with_fields(pl.element().struct["value"] * 2))
112
+ """
113
+ inner = validate(expr, validate_fields=validate_fields, deduplicate=deduplicate)
114
+ return infer_map(self._entries.list.eval(inner, parallel=parallel))
115
+
116
+ def eval_keys(
117
+ self, expr: pl.Expr, *, deduplicate: bool = True, parallel: bool = False
118
+ ) -> pl.Expr:
119
+ """Transform keys, returning a Map with new key type.
120
+
121
+ The expression operates on each key via ``pl.element()``.
122
+
123
+ Example
124
+ -------
125
+ >>> col.map.eval_keys(pl.element().str.to_uppercase())
126
+ """
127
+ inner: pl.Expr = pl.element().struct.with_fields( # pyright: ignore[reportUnknownMemberType]
128
+ key=expr_eval(pl.element().struct["key"], expr)
129
+ )
130
+ if deduplicate:
131
+ inner = inner.filter(pl.element().struct["key"].is_first_distinct())
132
+ return infer_map(self._entries.list.eval(inner, parallel=parallel))
133
+
134
+ def eval_values(self, expr: pl.Expr, *, parallel: bool = False) -> pl.Expr:
135
+ """Transform values, returning a Map with new value type.
136
+
137
+ The expression operates on each value via ``pl.element()``.
138
+
139
+ Example
140
+ -------
141
+ >>> col.map.eval_values(pl.element() * 2)
142
+ """
143
+ inner = pl.element().struct.with_fields( # pyright: ignore[reportUnknownMemberType]
144
+ value=expr_eval(pl.element().struct["value"], expr)
145
+ )
146
+ return infer_map(self._entries.list.eval(inner, parallel=parallel))
147
+
148
+ def filter(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
149
+ """Filter entries by a predicate on the struct entry.
150
+
151
+ Example
152
+ -------
153
+ >>> col.map.filter(pl.element().struct["key"] > "b")
154
+ """
155
+ return self._as_self(
156
+ self._entries.list.eval(pl.element().filter(predicate), parallel=parallel)
157
+ )
158
+
159
+ def filter_keys(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
160
+ """Filter entries where the key satisfies the predicate.
161
+
162
+ Example
163
+ -------
164
+ >>> col.map.filter_keys(pl.element() > "b")
165
+ """
166
+ inner = pl.element().filter(
167
+ expr_eval(pl.element().struct["key"], predicate) # pyright: ignore[reportUnknownMemberType]
168
+ )
169
+ return self._as_self(self._entries.list.eval(inner, parallel=parallel))
170
+
171
+ def filter_values(self, predicate: pl.Expr, *, parallel: bool = False) -> pl.Expr:
172
+ """Filter entries where the value satisfies the predicate.
173
+
174
+ Example
175
+ -------
176
+ >>> col.map.filter_values(pl.element() > 5)
177
+ """
178
+ inner = pl.element().filter(
179
+ expr_eval(pl.element().struct["value"], predicate) # pyright: ignore[reportUnknownMemberType]
180
+ )
181
+ return self._as_self(self._entries.list.eval(inner, parallel=parallel))
182
+
183
+ def merge(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
184
+ """Merge two maps. Right-side values win on key conflict."""
185
+ combined = pl.concat_list([self._entries, other.map.entries()]) # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
186
+ return self._as_self(
187
+ combined.list.eval(
188
+ pl.element().filter(pl.element().struct["key"].is_last_distinct()),
189
+ parallel=parallel,
190
+ )
191
+ )
192
+
193
+ def intersection(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
194
+ """Keep entries from self where the key also exists in other."""
195
+ combined = pl.concat_list([self._entries, other.map.entries()]) # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
196
+ return self._as_self(
197
+ combined.list.eval(
198
+ pl.element().filter(
199
+ pl.element().struct["key"].is_duplicated()
200
+ & pl.element().struct["key"].is_first_distinct()
201
+ ),
202
+ parallel=parallel,
203
+ )
204
+ )
205
+
206
+ def difference(self, other: pl.Expr, *, parallel: bool = False) -> pl.Expr:
207
+ """Keep entries from self where the key does NOT exist in other."""
208
+ other_entries = other.map.entries() # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue,reportUnknownVariableType]
209
+ combined = pl.concat_list([self._entries, other_entries, other_entries]) # pyright: ignore[reportUnknownMemberType]
210
+ return self._as_self(
211
+ combined.list.eval(
212
+ pl.element().filter(~pl.element().struct["key"].is_duplicated()),
213
+ parallel=parallel,
214
+ )
215
+ )