alap-python 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.4
2
+ Name: alap-python
3
+ Version: 0.1.0
4
+ Summary: Alap expression parser for Python — resolve link expressions server-side
5
+ Author: Daniel Smith
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://alap.info
8
+ Project-URL: Repository, https://github.com/DanielSmith/alap-python
9
+ Keywords: alap,expression-parser,links,menu
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Alap Expression Parser — Python
22
+
23
+ [Alap](https://github.com/DanielSmith/alap) is a JavaScript library that turns links into dynamic menus with multiple curated targets. This is the server-side Python port of the expression parser, enabling expression resolution in Python servers without a Node.js sidecar.
24
+
25
+ ## What's included
26
+
27
+ - **`expression_parser.py`** — Recursive descent parser for the Alap expression grammar, macro expansion, regex search, config merging
28
+ - **`validate_regex.py`** — ReDoS guard for user-supplied regex patterns
29
+
30
+ ## What's NOT included
31
+
32
+ This is the server-side subset of `alap/core`. It covers expression parsing, config merging, and regex validation — everything a server needs to resolve cherry-pick and query requests.
33
+
34
+ Browser-side concerns (DOM rendering, menu positioning, event handling, URL sanitization) are handled by the JavaScript client and are not ported here.
35
+
36
+ ## Supported expression syntax
37
+
38
+ ```
39
+ item1, item2 # item IDs (comma-separated)
40
+ .coffee # tag query
41
+ .nyc + .bridge # AND (intersection)
42
+ .nyc | .sf # OR (union)
43
+ .nyc - .tourist # WITHOUT (subtraction)
44
+ (.nyc | .sf) + .open # parenthesized grouping
45
+ @favorites # macro expansion
46
+ /mypattern/ # regex search (by pattern key)
47
+ /mypattern/lu # regex with field options
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ```python
53
+ from expression_parser import ExpressionParser, resolve_expression, cherry_pick_links, merge_configs
54
+
55
+ config = {
56
+ "allLinks": {
57
+ "item1": {"label": "Example", "url": "https://example.com", "tags": ["demo"]},
58
+ "item2": {"label": "Other", "url": "https://other.com", "tags": ["demo", "test"]},
59
+ },
60
+ "macros": {
61
+ "all": {"linkItems": ".demo"}
62
+ }
63
+ }
64
+
65
+ # Low-level: get matching IDs
66
+ parser = ExpressionParser(config)
67
+ ids = parser.query(".demo") # ["item1", "item2"]
68
+ ids = parser.query(".demo - .test") # ["item1"]
69
+
70
+ # Convenience: expression -> full link objects
71
+ results = resolve_expression(config, ".demo")
72
+ # [{"id": "item1", "label": "Example", ...}, {"id": "item2", ...}]
73
+
74
+ # Cherry-pick: expression -> { id: link } dict
75
+ subset = cherry_pick_links(config, ".test")
76
+ # {"item2": {"label": "Other", ...}}
77
+
78
+ # Merge multiple configs
79
+ merged = merge_configs(config1, config2)
80
+ ```
81
+
82
+ ## Installation
83
+
84
+ Copy `expression_parser.py` and `validate_regex.py` into your project, or install from PyPI:
85
+
86
+ ```bash
87
+ pip install alap
88
+ # or, with uv (recommended):
89
+ uv add alap
90
+ ```
91
+
92
+ ## Used by
93
+
94
+ - [flask-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/flask-sqlite) server
95
+ - [fastapi-postgres](https://github.com/DanielSmith/alap/tree/main/examples/servers/fastapi-postgres) server
96
+ - [django-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/django-sqlite) server
@@ -0,0 +1,76 @@
1
+ # Alap Expression Parser — Python
2
+
3
+ [Alap](https://github.com/DanielSmith/alap) is a JavaScript library that turns links into dynamic menus with multiple curated targets. This is the server-side Python port of the expression parser, enabling expression resolution in Python servers without a Node.js sidecar.
4
+
5
+ ## What's included
6
+
7
+ - **`expression_parser.py`** — Recursive descent parser for the Alap expression grammar, macro expansion, regex search, config merging
8
+ - **`validate_regex.py`** — ReDoS guard for user-supplied regex patterns
9
+
10
+ ## What's NOT included
11
+
12
+ This is the server-side subset of `alap/core`. It covers expression parsing, config merging, and regex validation — everything a server needs to resolve cherry-pick and query requests.
13
+
14
+ Browser-side concerns (DOM rendering, menu positioning, event handling, URL sanitization) are handled by the JavaScript client and are not ported here.
15
+
16
+ ## Supported expression syntax
17
+
18
+ ```
19
+ item1, item2 # item IDs (comma-separated)
20
+ .coffee # tag query
21
+ .nyc + .bridge # AND (intersection)
22
+ .nyc | .sf # OR (union)
23
+ .nyc - .tourist # WITHOUT (subtraction)
24
+ (.nyc | .sf) + .open # parenthesized grouping
25
+ @favorites # macro expansion
26
+ /mypattern/ # regex search (by pattern key)
27
+ /mypattern/lu # regex with field options
28
+ ```
29
+
30
+ ## Usage
31
+
32
+ ```python
33
+ from expression_parser import ExpressionParser, resolve_expression, cherry_pick_links, merge_configs
34
+
35
+ config = {
36
+ "allLinks": {
37
+ "item1": {"label": "Example", "url": "https://example.com", "tags": ["demo"]},
38
+ "item2": {"label": "Other", "url": "https://other.com", "tags": ["demo", "test"]},
39
+ },
40
+ "macros": {
41
+ "all": {"linkItems": ".demo"}
42
+ }
43
+ }
44
+
45
+ # Low-level: get matching IDs
46
+ parser = ExpressionParser(config)
47
+ ids = parser.query(".demo") # ["item1", "item2"]
48
+ ids = parser.query(".demo - .test") # ["item1"]
49
+
50
+ # Convenience: expression -> full link objects
51
+ results = resolve_expression(config, ".demo")
52
+ # [{"id": "item1", "label": "Example", ...}, {"id": "item2", ...}]
53
+
54
+ # Cherry-pick: expression -> { id: link } dict
55
+ subset = cherry_pick_links(config, ".test")
56
+ # {"item2": {"label": "Other", ...}}
57
+
58
+ # Merge multiple configs
59
+ merged = merge_configs(config1, config2)
60
+ ```
61
+
62
+ ## Installation
63
+
64
+ Copy `expression_parser.py` and `validate_regex.py` into your project, or install from PyPI:
65
+
66
+ ```bash
67
+ pip install alap
68
+ # or, with uv (recommended):
69
+ uv add alap
70
+ ```
71
+
72
+ ## Used by
73
+
74
+ - [flask-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/flask-sqlite) server
75
+ - [fastapi-postgres](https://github.com/DanielSmith/alap/tree/main/examples/servers/fastapi-postgres) server
76
+ - [django-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/django-sqlite) server
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.4
2
+ Name: alap-python
3
+ Version: 0.1.0
4
+ Summary: Alap expression parser for Python — resolve link expressions server-side
5
+ Author: Daniel Smith
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://alap.info
8
+ Project-URL: Repository, https://github.com/DanielSmith/alap-python
9
+ Keywords: alap,expression-parser,links,menu
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Alap Expression Parser — Python
22
+
23
+ [Alap](https://github.com/DanielSmith/alap) is a JavaScript library that turns links into dynamic menus with multiple curated targets. This is the server-side Python port of the expression parser, enabling expression resolution in Python servers without a Node.js sidecar.
24
+
25
+ ## What's included
26
+
27
+ - **`expression_parser.py`** — Recursive descent parser for the Alap expression grammar, macro expansion, regex search, config merging
28
+ - **`validate_regex.py`** — ReDoS guard for user-supplied regex patterns
29
+
30
+ ## What's NOT included
31
+
32
+ This is the server-side subset of `alap/core`. It covers expression parsing, config merging, and regex validation — everything a server needs to resolve cherry-pick and query requests.
33
+
34
+ Browser-side concerns (DOM rendering, menu positioning, event handling, URL sanitization) are handled by the JavaScript client and are not ported here.
35
+
36
+ ## Supported expression syntax
37
+
38
+ ```
39
+ item1, item2 # item IDs (comma-separated)
40
+ .coffee # tag query
41
+ .nyc + .bridge # AND (intersection)
42
+ .nyc | .sf # OR (union)
43
+ .nyc - .tourist # WITHOUT (subtraction)
44
+ (.nyc | .sf) + .open # parenthesized grouping
45
+ @favorites # macro expansion
46
+ /mypattern/ # regex search (by pattern key)
47
+ /mypattern/lu # regex with field options
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ ```python
53
+ from expression_parser import ExpressionParser, resolve_expression, cherry_pick_links, merge_configs
54
+
55
+ config = {
56
+ "allLinks": {
57
+ "item1": {"label": "Example", "url": "https://example.com", "tags": ["demo"]},
58
+ "item2": {"label": "Other", "url": "https://other.com", "tags": ["demo", "test"]},
59
+ },
60
+ "macros": {
61
+ "all": {"linkItems": ".demo"}
62
+ }
63
+ }
64
+
65
+ # Low-level: get matching IDs
66
+ parser = ExpressionParser(config)
67
+ ids = parser.query(".demo") # ["item1", "item2"]
68
+ ids = parser.query(".demo - .test") # ["item1"]
69
+
70
+ # Convenience: expression -> full link objects
71
+ results = resolve_expression(config, ".demo")
72
+ # [{"id": "item1", "label": "Example", ...}, {"id": "item2", ...}]
73
+
74
+ # Cherry-pick: expression -> { id: link } dict
75
+ subset = cherry_pick_links(config, ".test")
76
+ # {"item2": {"label": "Other", ...}}
77
+
78
+ # Merge multiple configs
79
+ merged = merge_configs(config1, config2)
80
+ ```
81
+
82
+ ## Installation
83
+
84
+ Copy `expression_parser.py` and `validate_regex.py` into your project, or install from PyPI:
85
+
86
+ ```bash
87
+ pip install alap
88
+ # or, with uv (recommended):
89
+ uv add alap
90
+ ```
91
+
92
+ ## Used by
93
+
94
+ - [flask-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/flask-sqlite) server
95
+ - [fastapi-postgres](https://github.com/DanielSmith/alap/tree/main/examples/servers/fastapi-postgres) server
96
+ - [django-sqlite](https://github.com/DanielSmith/alap/tree/main/examples/servers/django-sqlite) server
@@ -0,0 +1,10 @@
1
+ README.md
2
+ pyproject.toml
3
+ alap_python.egg-info/PKG-INFO
4
+ alap_python.egg-info/SOURCES.txt
5
+ alap_python.egg-info/dependency_links.txt
6
+ alap_python.egg-info/top_level.txt
7
+ tests/test_expression_parser.py
8
+ tests/test_ssrf_guard.py
9
+ tests/test_validate_config.py
10
+ tests/test_validate_regex.py
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "alap-python"
7
+ version = "0.1.0"
8
+ description = "Alap expression parser for Python — resolve link expressions server-side"
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Daniel Smith" }]
13
+ keywords = ["alap", "expression-parser", "links", "menu"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Software Development :: Libraries",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://alap.info"
27
+ Repository = "https://github.com/DanielSmith/alap-python"
28
+
29
+ [tool.setuptools.packages.find]
30
+ where = ["."]
31
+ include = ["alap*"]
32
+
33
+ [tool.setuptools.package-dir]
34
+ alap = "."
35
+
36
+ [tool.pytest.ini_options]
37
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,631 @@
1
+ # Copyright 2026 Daniel Smith
2
+ # Licensed under the Apache License, Version 2.0
3
+ # See https://www.apache.org/licenses/LICENSE-2.0
4
+
5
+ """Tests for the Python expression parser — mirrors the TS test tiers."""
6
+
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ import pytest
11
+
12
+ # Add parent to path so we can import the parser
13
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
14
+
15
+ from expression_parser import (
16
+ ExpressionParser,
17
+ cherry_pick_links,
18
+ merge_configs,
19
+ resolve_expression,
20
+ )
21
+ from sanitize_url import sanitize_url
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Test config — mirrors tests/fixtures/links.ts
25
+ # ---------------------------------------------------------------------------
26
+
27
+ TEST_CONFIG = {
28
+ "settings": {"listType": "ul", "menuTimeout": 5000},
29
+ "macros": {
30
+ "cars": {"linkItems": "vwbug, bmwe36"},
31
+ "nycbridges": {"linkItems": ".nyc + .bridge"},
32
+ "everything": {"linkItems": ".nyc | .sf"},
33
+ },
34
+ "searchPatterns": {
35
+ "bridges": "bridge",
36
+ "germanCars": {
37
+ "pattern": "VW|BMW",
38
+ "options": {"fields": "l", "limit": 5},
39
+ },
40
+ },
41
+ "allLinks": {
42
+ "vwbug": {
43
+ "label": "VW Bug",
44
+ "url": "https://example.com/vwbug",
45
+ "tags": ["car", "vw", "germany"],
46
+ },
47
+ "bmwe36": {
48
+ "label": "BMW E36",
49
+ "url": "https://example.com/bmwe36",
50
+ "tags": ["car", "bmw", "germany"],
51
+ },
52
+ "miata": {
53
+ "label": "Mazda Miata",
54
+ "url": "https://example.com/miata",
55
+ "tags": ["car", "mazda", "japan"],
56
+ },
57
+ "brooklyn": {
58
+ "label": "Brooklyn Bridge",
59
+ "url": "https://example.com/brooklyn",
60
+ "tags": ["nyc", "bridge", "landmark"],
61
+ },
62
+ "manhattan": {
63
+ "label": "Manhattan Bridge",
64
+ "url": "https://example.com/manhattan",
65
+ "tags": ["nyc", "bridge"],
66
+ },
67
+ "highline": {
68
+ "label": "The High Line",
69
+ "url": "https://example.com/highline",
70
+ "tags": ["nyc", "park", "landmark"],
71
+ },
72
+ "centralpark": {
73
+ "label": "Central Park",
74
+ "url": "https://example.com/centralpark",
75
+ "tags": ["nyc", "park"],
76
+ },
77
+ "goldengate": {
78
+ "label": "Golden Gate",
79
+ "url": "https://example.com/goldengate",
80
+ "tags": ["sf", "bridge", "landmark"],
81
+ },
82
+ "dolores": {
83
+ "label": "Dolores Park",
84
+ "url": "https://example.com/dolores",
85
+ "tags": ["sf", "park"],
86
+ },
87
+ "towerbridge": {
88
+ "label": "Tower Bridge",
89
+ "url": "https://example.com/towerbridge",
90
+ "tags": ["london", "bridge", "landmark"],
91
+ },
92
+ "aqus": {
93
+ "label": "Aqus Cafe",
94
+ "url": "https://example.com/aqus",
95
+ "tags": ["coffee", "sf"],
96
+ },
97
+ "bluebottle": {
98
+ "label": "Blue Bottle",
99
+ "url": "https://example.com/bluebottle",
100
+ "tags": ["coffee", "sf", "nyc"],
101
+ },
102
+ "acre": {
103
+ "label": "Acre Coffee",
104
+ "url": "https://example.com/acre",
105
+ "tags": ["coffee"],
106
+ },
107
+ },
108
+ }
109
+
110
+
111
+ @pytest.fixture
112
+ def parser():
113
+ return ExpressionParser(TEST_CONFIG)
114
+
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Tier 1 — Operands
118
+ # ---------------------------------------------------------------------------
119
+
120
+
121
+ class TestOperands:
122
+ def test_single_item_id(self, parser):
123
+ assert parser.query("vwbug") == ["vwbug"]
124
+
125
+ def test_single_class(self, parser):
126
+ result = parser.query(".car")
127
+ assert sorted(result) == ["bmwe36", "miata", "vwbug"]
128
+
129
+ def test_nonexistent_item(self, parser):
130
+ assert parser.query("doesnotexist") == []
131
+
132
+ def test_nonexistent_class(self, parser):
133
+ assert parser.query(".doesnotexist") == []
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # Tier 2 — Commas
138
+ # ---------------------------------------------------------------------------
139
+
140
+
141
+ class TestCommas:
142
+ def test_two_items(self, parser):
143
+ assert parser.query("vwbug, bmwe36") == ["vwbug", "bmwe36"]
144
+
145
+ def test_three_items(self, parser):
146
+ assert parser.query("vwbug, bmwe36, miata") == ["vwbug", "bmwe36", "miata"]
147
+
148
+ def test_item_and_class(self, parser):
149
+ result = parser.query("vwbug, .sf")
150
+ assert result[0] == "vwbug"
151
+ assert "goldengate" in result
152
+ assert "dolores" in result
153
+
154
+ def test_deduplication(self, parser):
155
+ result = parser.query("vwbug, vwbug")
156
+ assert result == ["vwbug"]
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Tier 3 — Operators
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ class TestOperators:
165
+ def test_intersection(self, parser):
166
+ result = parser.query(".nyc + .bridge")
167
+ assert sorted(result) == ["brooklyn", "manhattan"]
168
+
169
+ def test_union(self, parser):
170
+ result = parser.query(".nyc | .sf")
171
+ assert "brooklyn" in result
172
+ assert "goldengate" in result
173
+
174
+ def test_subtraction(self, parser):
175
+ result = parser.query(".nyc - .bridge")
176
+ assert "brooklyn" not in result
177
+ assert "manhattan" not in result
178
+ assert "highline" in result
179
+ assert "centralpark" in result
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # Tier 4 — Chained operators
184
+ # ---------------------------------------------------------------------------
185
+
186
+
187
+ class TestChained:
188
+ def test_three_way_intersection(self, parser):
189
+ result = parser.query(".nyc + .bridge + .landmark")
190
+ assert result == ["brooklyn"]
191
+
192
+ def test_union_then_subtract(self, parser):
193
+ result = parser.query(".nyc | .sf - .bridge")
194
+ # Left-to-right: (.nyc | .sf) - .bridge
195
+ assert "brooklyn" not in result
196
+ assert "manhattan" not in result
197
+ assert "goldengate" not in result
198
+ assert "highline" in result
199
+
200
+
201
+ # ---------------------------------------------------------------------------
202
+ # Tier 5 — Mixed
203
+ # ---------------------------------------------------------------------------
204
+
205
+
206
+ class TestMixed:
207
+ def test_item_and_class_intersection(self, parser):
208
+ result = parser.query("brooklyn + .landmark")
209
+ assert result == ["brooklyn"]
210
+
211
+ def test_class_union_with_item(self, parser):
212
+ result = parser.query(".car | goldengate")
213
+ assert "vwbug" in result
214
+ assert "goldengate" in result
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # Tier 6 — Macros
219
+ # ---------------------------------------------------------------------------
220
+
221
+
222
+ class TestMacros:
223
+ def test_named_macro(self, parser):
224
+ result = parser.query("@cars")
225
+ assert sorted(result) == ["bmwe36", "vwbug"]
226
+
227
+ def test_macro_with_operators(self, parser):
228
+ result = parser.query("@nycbridges")
229
+ assert sorted(result) == ["brooklyn", "manhattan"]
230
+
231
+ def test_unknown_macro(self, parser):
232
+ result = parser.query("@nonexistent")
233
+ assert result == []
234
+
235
+ def test_bare_macro_with_anchor(self, parser):
236
+ # Bare @ uses anchorId
237
+ config_with_macro = {
238
+ **TEST_CONFIG,
239
+ "macros": {**TEST_CONFIG["macros"], "myanchor": {"linkItems": "vwbug"}},
240
+ }
241
+ p = ExpressionParser(config_with_macro)
242
+ result = p.query("@", "myanchor")
243
+ assert result == ["vwbug"]
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # Tier 7 — Parentheses
248
+ # ---------------------------------------------------------------------------
249
+
250
+
251
+ class TestParentheses:
252
+ def test_basic_grouping(self, parser):
253
+ # Without parens: .nyc | .sf + .bridge => (.nyc | .sf) + .bridge (left-to-right)
254
+ # With parens: .nyc | (.sf + .bridge) => .nyc union (sf bridges)
255
+ without = parser.query(".nyc | .sf + .bridge")
256
+ with_parens = parser.query(".nyc | (.sf + .bridge)")
257
+ # with_parens should include all NYC items + goldengate
258
+ assert "highline" in with_parens
259
+ assert "centralpark" in with_parens
260
+ assert "goldengate" in with_parens
261
+
262
+ def test_nested_parens(self, parser):
263
+ result = parser.query("((.nyc + .bridge) | (.sf + .bridge))")
264
+ assert sorted(result) == ["brooklyn", "goldengate", "manhattan"]
265
+
266
+ def test_parens_with_subtraction(self, parser):
267
+ result = parser.query("(.nyc | .sf) - .park")
268
+ assert "centralpark" not in result
269
+ assert "dolores" not in result
270
+ assert "brooklyn" in result
271
+
272
+
273
+ # ---------------------------------------------------------------------------
274
+ # Tier 8 — Edge cases
275
+ # ---------------------------------------------------------------------------
276
+
277
+
278
+ class TestEdgeCases:
279
+ def test_empty_string(self, parser):
280
+ assert parser.query("") == []
281
+
282
+ def test_whitespace_only(self, parser):
283
+ assert parser.query(" ") == []
284
+
285
+ def test_none_expression(self, parser):
286
+ assert parser.query(None) == []
287
+
288
+ def test_empty_config(self):
289
+ p = ExpressionParser({"allLinks": {}})
290
+ assert p.query(".car") == []
291
+
292
+ def test_no_alllinks(self):
293
+ p = ExpressionParser({})
294
+ assert p.query("vwbug") == []
295
+
296
+
297
+ # ---------------------------------------------------------------------------
298
+ # Convenience functions
299
+ # ---------------------------------------------------------------------------
300
+
301
+
302
+ class TestConvenience:
303
+ def test_resolve_expression(self):
304
+ results = resolve_expression(TEST_CONFIG, ".car + .germany")
305
+ ids = [r["id"] for r in results]
306
+ assert sorted(ids) == ["bmwe36", "vwbug"]
307
+ # Each result should have id, label, url, tags
308
+ for r in results:
309
+ assert "id" in r
310
+ assert "label" in r
311
+ assert "url" in r
312
+
313
+ def test_cherry_pick_links(self):
314
+ result = cherry_pick_links(TEST_CONFIG, "vwbug, miata")
315
+ assert "vwbug" in result
316
+ assert "miata" in result
317
+ assert "bmwe36" not in result
318
+
319
+ def test_merge_configs(self):
320
+ config1 = {
321
+ "allLinks": {"a": {"label": "A", "url": "https://a.com"}},
322
+ "macros": {"m1": {"linkItems": "a"}},
323
+ }
324
+ config2 = {
325
+ "allLinks": {"b": {"label": "B", "url": "https://b.com"}},
326
+ "macros": {"m2": {"linkItems": "b"}},
327
+ }
328
+ merged = merge_configs(config1, config2)
329
+ assert "a" in merged["allLinks"]
330
+ assert "b" in merged["allLinks"]
331
+ assert "m1" in merged["macros"]
332
+ assert "m2" in merged["macros"]
333
+
334
+ def test_merge_configs_later_wins(self):
335
+ config1 = {"allLinks": {"a": {"label": "Old", "url": "https://old.com"}}}
336
+ config2 = {"allLinks": {"a": {"label": "New", "url": "https://new.com"}}}
337
+ merged = merge_configs(config1, config2)
338
+ assert merged["allLinks"]["a"]["label"] == "New"
339
+
340
+
341
+ # ---------------------------------------------------------------------------
342
+ # URL sanitization
343
+ # ---------------------------------------------------------------------------
344
+
345
+
346
+ class TestSanitizeUrl:
347
+ def test_safe_urls(self):
348
+ assert sanitize_url("https://example.com") == "https://example.com"
349
+ assert sanitize_url("http://example.com") == "http://example.com"
350
+ assert sanitize_url("mailto:user@example.com") == "mailto:user@example.com"
351
+ assert sanitize_url("/relative/path") == "/relative/path"
352
+ assert sanitize_url("") == ""
353
+
354
+ def test_javascript_blocked(self):
355
+ assert sanitize_url("javascript:alert(1)") == "about:blank"
356
+ assert sanitize_url("JAVASCRIPT:alert(1)") == "about:blank"
357
+ assert sanitize_url("JavaScript:void(0)") == "about:blank"
358
+
359
+ def test_data_blocked(self):
360
+ assert sanitize_url("data:text/html,<h1>Hi</h1>") == "about:blank"
361
+
362
+ def test_vbscript_blocked(self):
363
+ assert sanitize_url("vbscript:MsgBox") == "about:blank"
364
+
365
+ def test_blob_blocked(self):
366
+ assert sanitize_url("blob:https://example.com/uuid") == "about:blank"
367
+
368
+ def test_control_chars_stripped(self):
369
+ assert sanitize_url("java\nscript:alert(1)") == "about:blank"
370
+ assert sanitize_url("java\tscript:alert(1)") == "about:blank"
371
+
372
+ def test_sanitize_in_resolve(self):
373
+ """Ensure resolve_expression sanitizes URLs."""
374
+ config = {
375
+ "allLinks": {
376
+ "bad": {
377
+ "label": "Evil",
378
+ "url": "javascript:alert(1)",
379
+ "tags": ["test"],
380
+ },
381
+ "good": {
382
+ "label": "Good",
383
+ "url": "https://example.com",
384
+ "tags": ["test"],
385
+ },
386
+ }
387
+ }
388
+ results = resolve_expression(config, ".test")
389
+ urls = {r["id"]: r["url"] for r in results}
390
+ assert urls["bad"] == "about:blank"
391
+ assert urls["good"] == "https://example.com"
392
+
393
+ def test_sanitize_in_cherry_pick(self):
394
+ """Ensure cherry_pick_links sanitizes URLs."""
395
+ config = {
396
+ "allLinks": {
397
+ "bad": {
398
+ "label": "Evil",
399
+ "url": "javascript:alert(1)",
400
+ "tags": ["test"],
401
+ },
402
+ }
403
+ }
404
+ result = cherry_pick_links(config, ".test")
405
+ assert result["bad"]["url"] == "about:blank"
406
+
407
+
408
+ # ---------------------------------------------------------------------------
409
+ # Protocol config for tests
410
+ # ---------------------------------------------------------------------------
411
+
412
+ def _tag_protocol(segments, link, item_id):
413
+ """Test protocol handler: checks if the link has a given tag."""
414
+ if not segments:
415
+ return False
416
+ tag = segments[0]
417
+ return tag in (link.get("tags") or [])
418
+
419
+
420
+ def _throwing_protocol(segments, link, item_id):
421
+ """Protocol handler that always throws."""
422
+ raise ValueError("boom")
423
+
424
+
425
+ PROTOCOL_CONFIG = {
426
+ **TEST_CONFIG,
427
+ "protocols": {
428
+ "hastag": {"handler": _tag_protocol},
429
+ "broken": {"handler": _throwing_protocol},
430
+ },
431
+ }
432
+
433
+
434
+ # ---------------------------------------------------------------------------
435
+ # Tier 9 — Protocols
436
+ # ---------------------------------------------------------------------------
437
+
438
+
439
+ class TestProtocols:
440
+ def test_protocol_tokenization(self):
441
+ """Protocol :name:arg: produces a PROTOCOL token."""
442
+ from expression_parser import ExpressionParser
443
+ tokens = ExpressionParser._tokenize(":time:7d:")
444
+ assert len(tokens) == 1
445
+ assert tokens[0].type == "PROTOCOL"
446
+ assert tokens[0].value == "time|7d"
447
+
448
+ def test_protocol_multi_arg_tokenization(self):
449
+ """Protocol :name:a:b: joins segments with |."""
450
+ from expression_parser import ExpressionParser
451
+ tokens = ExpressionParser._tokenize(":time:7d:newest:")
452
+ assert len(tokens) == 1
453
+ assert tokens[0].type == "PROTOCOL"
454
+ assert tokens[0].value == "time|7d|newest"
455
+
456
+ def test_protocol_resolution(self):
457
+ """Protocol resolves via handler predicate."""
458
+ parser = ExpressionParser(PROTOCOL_CONFIG)
459
+ result = parser.query(":hastag:coffee:")
460
+ assert sorted(result) == ["acre", "aqus", "bluebottle"]
461
+
462
+ def test_unknown_protocol(self):
463
+ """Unknown protocol warns and returns empty."""
464
+ parser = ExpressionParser(PROTOCOL_CONFIG)
465
+ with pytest.warns(UserWarning, match="Unknown protocol"):
466
+ result = parser.query(":nonexistent:arg:")
467
+ assert result == []
468
+
469
+ def test_protocol_handler_throws(self):
470
+ """Handler that throws skips that item with a warning."""
471
+ parser = ExpressionParser(PROTOCOL_CONFIG)
472
+ with pytest.warns(UserWarning, match="handler threw"):
473
+ result = parser.query(":broken:arg:")
474
+ assert result == []
475
+
476
+ def test_protocol_with_tag_intersection(self):
477
+ """Protocol composed with tag operator."""
478
+ parser = ExpressionParser(PROTOCOL_CONFIG)
479
+ result = parser.query(":hastag:coffee: + .sf")
480
+ assert sorted(result) == ["aqus", "bluebottle"]
481
+
482
+ def test_protocol_with_tag_union(self):
483
+ """Protocol composed with union."""
484
+ parser = ExpressionParser(PROTOCOL_CONFIG)
485
+ result = parser.query(":hastag:coffee: | .bridge")
486
+ # coffee items + bridge items
487
+ assert "acre" in result
488
+ assert "brooklyn" in result
489
+ assert "goldengate" in result
490
+
491
+ def test_protocol_no_config(self):
492
+ """Protocol with no protocols in config returns empty."""
493
+ parser = ExpressionParser(TEST_CONFIG)
494
+ with pytest.warns(UserWarning, match="Unknown protocol"):
495
+ result = parser.query(":hastag:coffee:")
496
+ assert result == []
497
+
498
+
499
+ # ---------------------------------------------------------------------------
500
+ # Tier 10 — Refiners
501
+ # ---------------------------------------------------------------------------
502
+
503
+
504
+ class TestRefiners:
505
+ def test_refiner_tokenization(self):
506
+ """Refiner *name* produces a REFINER token."""
507
+ from expression_parser import ExpressionParser
508
+ tokens = ExpressionParser._tokenize("*sort*")
509
+ assert len(tokens) == 1
510
+ assert tokens[0].type == "REFINER"
511
+ assert tokens[0].value == "sort"
512
+
513
+ def test_refiner_with_arg_tokenization(self):
514
+ """Refiner *name:arg* preserves arg."""
515
+ from expression_parser import ExpressionParser
516
+ tokens = ExpressionParser._tokenize("*sort:label*")
517
+ assert len(tokens) == 1
518
+ assert tokens[0].type == "REFINER"
519
+ assert tokens[0].value == "sort:label"
520
+
521
+ def test_sort_refiner_default(self):
522
+ """*sort* sorts by label (default)."""
523
+ parser = ExpressionParser(TEST_CONFIG)
524
+ result = parser.query(".car *sort*")
525
+ labels = [TEST_CONFIG["allLinks"][r]["label"] for r in result]
526
+ assert labels == sorted(labels, key=str.lower)
527
+
528
+ def test_sort_refiner_by_url(self):
529
+ """*sort:url* sorts by url field."""
530
+ parser = ExpressionParser(TEST_CONFIG)
531
+ result = parser.query(".car *sort:url*")
532
+ urls = [TEST_CONFIG["allLinks"][r]["url"] for r in result]
533
+ assert urls == sorted(urls, key=str.lower)
534
+
535
+ def test_reverse_refiner(self):
536
+ """*reverse* reverses the order."""
537
+ parser = ExpressionParser(TEST_CONFIG)
538
+ normal = parser.query(".car *sort*")
539
+ reversed_result = parser.query(".car *sort* *reverse*")
540
+ assert reversed_result == list(reversed(normal))
541
+
542
+ def test_limit_refiner(self):
543
+ """*limit:N* takes first N items."""
544
+ parser = ExpressionParser(TEST_CONFIG)
545
+ result = parser.query(".car *sort* *limit:2*")
546
+ assert len(result) == 2
547
+
548
+ def test_limit_zero(self):
549
+ """*limit:0* returns empty."""
550
+ parser = ExpressionParser(TEST_CONFIG)
551
+ result = parser.query(".car *limit:0*")
552
+ assert result == []
553
+
554
+ def test_skip_refiner(self):
555
+ """*skip:N* skips first N items."""
556
+ parser = ExpressionParser(TEST_CONFIG)
557
+ full = parser.query(".car *sort*")
558
+ skipped = parser.query(".car *sort* *skip:1*")
559
+ assert skipped == full[1:]
560
+
561
+ def test_shuffle_refiner(self):
562
+ """*shuffle* randomizes (just check it returns the same items)."""
563
+ parser = ExpressionParser(TEST_CONFIG)
564
+ result = parser.query(".car *shuffle*")
565
+ assert sorted(result) == sorted(["vwbug", "bmwe36", "miata"])
566
+
567
+ def test_unique_refiner(self):
568
+ """*unique:field* deduplicates by field."""
569
+ config = {
570
+ "allLinks": {
571
+ "a": {"label": "A", "url": "https://same.com", "tags": ["t"]},
572
+ "b": {"label": "B", "url": "https://same.com", "tags": ["t"]},
573
+ "c": {"label": "C", "url": "https://other.com", "tags": ["t"]},
574
+ }
575
+ }
576
+ parser = ExpressionParser(config)
577
+ result = parser.query(".t *unique:url*")
578
+ urls = [config["allLinks"][r]["url"] for r in result]
579
+ assert len(urls) == len(set(urls))
580
+ assert len(result) == 2
581
+
582
+ def test_unknown_refiner(self):
583
+ """Unknown refiner warns and skips."""
584
+ parser = ExpressionParser(TEST_CONFIG)
585
+ with pytest.warns(UserWarning, match="Unknown refiner"):
586
+ result = parser.query(".car *bogus*")
587
+ # Should still return the .car items, just unrefined
588
+ assert sorted(result) == ["bmwe36", "miata", "vwbug"]
589
+
590
+ def test_refiner_in_parenthesized_group(self):
591
+ """Refiners work inside parenthesized groups."""
592
+ parser = ExpressionParser(TEST_CONFIG)
593
+ result = parser.query("(.car *sort* *limit:1*), goldengate")
594
+ # First segment: sorted cars limited to 1, second segment: goldengate
595
+ assert len(result) == 2
596
+ assert "goldengate" in result
597
+
598
+ def test_refiner_chained_sort_limit(self):
599
+ """Sort then limit produces sorted subset."""
600
+ parser = ExpressionParser(TEST_CONFIG)
601
+ sorted_all = parser.query(".car *sort*")
602
+ sorted_limited = parser.query(".car *sort* *limit:2*")
603
+ assert sorted_limited == sorted_all[:2]
604
+
605
+
606
+ # ---------------------------------------------------------------------------
607
+ # Tier 11 — Hyphenated identifiers
608
+ # ---------------------------------------------------------------------------
609
+
610
+
611
+ class TestHyphenatedIdentifiers:
612
+ def test_hyphen_parsed_as_without(self):
613
+ """Hyphenated identifiers are parsed as id MINUS id."""
614
+ config = {
615
+ "allLinks": {
616
+ "my": {"label": "My", "url": "https://my.com", "tags": []},
617
+ "item": {"label": "Item", "url": "https://item.com", "tags": []},
618
+ }
619
+ }
620
+ parser = ExpressionParser(config)
621
+ # "my-item" should be parsed as "my" MINUS "item", not as a single ID
622
+ result = parser.query("my-item")
623
+ # "my" minus "item" = ["my"] (since "item" is removed)
624
+ assert result == ["my"]
625
+
626
+ def test_hyphen_in_class_context(self):
627
+ """Hyphen between bare words acts as subtraction."""
628
+ parser = ExpressionParser(TEST_CONFIG)
629
+ # vwbug-miata should be vwbug WITHOUT miata
630
+ result = parser.query("vwbug - miata")
631
+ assert result == ["vwbug"]
@@ -0,0 +1,95 @@
1
+ import sys, os
2
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
3
+ from ssrf_guard import is_private_host
4
+
5
+
6
+ # ---------------------------------------------------------------------------
7
+ # Public hosts (should return False)
8
+ # ---------------------------------------------------------------------------
9
+
10
+ def test_public_ip():
11
+ assert is_private_host("https://8.8.8.8/path") is False
12
+
13
+
14
+ def test_public_domain():
15
+ assert is_private_host("https://example.com") is False
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Localhost variants (should return True)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ def test_localhost():
23
+ assert is_private_host("http://localhost/admin") is True
24
+
25
+
26
+ def test_localhost_with_port():
27
+ assert is_private_host("http://localhost:8080") is True
28
+
29
+
30
+ def test_subdomain_of_localhost():
31
+ assert is_private_host("http://foo.localhost/bar") is True
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Private IPv4 ranges (should return True)
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def test_loopback_127():
39
+ assert is_private_host("http://127.0.0.1") is True
40
+
41
+
42
+ def test_private_10():
43
+ assert is_private_host("http://10.0.0.1") is True
44
+
45
+
46
+ def test_private_172_16():
47
+ assert is_private_host("http://172.16.0.1") is True
48
+
49
+
50
+ def test_private_192_168():
51
+ assert is_private_host("http://192.168.1.1") is True
52
+
53
+
54
+ def test_link_local_169_254():
55
+ assert is_private_host("http://169.254.169.254/latest/meta-data") is True
56
+
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # IPv6 (should return True)
60
+ # ---------------------------------------------------------------------------
61
+
62
+ def test_ipv6_loopback():
63
+ assert is_private_host("http://[::1]/admin") is True
64
+
65
+
66
+ # ---------------------------------------------------------------------------
67
+ # Malformed URL (fail closed → True)
68
+ # ---------------------------------------------------------------------------
69
+
70
+ def test_malformed_url():
71
+ assert is_private_host("not a url at all") is True
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # IPv4-mapped IPv6 addresses (should return True)
76
+ # ---------------------------------------------------------------------------
77
+
78
+ def test_ipv4_mapped_ipv6_loopback():
79
+ assert is_private_host("http://[::ffff:127.0.0.1]") is True
80
+
81
+
82
+ def test_ipv4_mapped_ipv6_private_10():
83
+ assert is_private_host("http://[::ffff:10.0.0.1]") is True
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # 0.0.0.0 bypass
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def test_zero_address():
91
+ assert is_private_host("http://0.0.0.0/") is True
92
+
93
+
94
+ def test_zero_address_with_port():
95
+ assert is_private_host("http://0.0.0.0:8080/") is True
@@ -0,0 +1,242 @@
1
+ import sys, os
2
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
3
+ from expression_parser import validate_config
4
+ import pytest
5
+ import copy
6
+
7
+
8
+ # ---------------------------------------------------------------------------
9
+ # Helpers
10
+ # ---------------------------------------------------------------------------
11
+
12
+ def _minimal_config():
13
+ return {
14
+ "allLinks": {
15
+ "alpha": {"url": "https://example.com/alpha", "label": "Alpha"},
16
+ }
17
+ }
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Structural validation
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def test_minimal_valid_config_passes():
25
+ result = validate_config(_minimal_config())
26
+ assert "allLinks" in result
27
+ assert "alpha" in result["allLinks"]
28
+
29
+
30
+ def test_preserves_settings():
31
+ cfg = _minimal_config()
32
+ cfg["settings"] = {"listType": "ul", "menuTimeout": 5000}
33
+ result = validate_config(cfg)
34
+ assert result["settings"]["listType"] == "ul"
35
+ assert result["settings"]["menuTimeout"] == 5000
36
+
37
+
38
+ def test_preserves_macros():
39
+ cfg = _minimal_config()
40
+ cfg["macros"] = {"fav": {"linkItems": "alpha"}}
41
+ result = validate_config(cfg)
42
+ assert result["macros"]["fav"]["linkItems"] == "alpha"
43
+
44
+
45
+ def test_preserves_search_patterns():
46
+ cfg = _minimal_config()
47
+ cfg["searchPatterns"] = {"bridge": "bridge"}
48
+ result = validate_config(cfg)
49
+ assert result["searchPatterns"]["bridge"] == "bridge"
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Non-dict inputs
54
+ # ---------------------------------------------------------------------------
55
+
56
+ def test_raises_on_none():
57
+ with pytest.raises(ValueError, match="expected a dict"):
58
+ validate_config(None)
59
+
60
+
61
+ def test_raises_on_string():
62
+ with pytest.raises(ValueError, match="expected a dict"):
63
+ validate_config("string")
64
+
65
+
66
+ def test_raises_on_list():
67
+ with pytest.raises(ValueError, match="expected a dict"):
68
+ validate_config([])
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # allLinks validation
73
+ # ---------------------------------------------------------------------------
74
+
75
+ def test_raises_when_alllinks_missing():
76
+ with pytest.raises(ValueError, match="allLinks"):
77
+ validate_config({"settings": {}})
78
+
79
+
80
+ def test_raises_when_alllinks_is_list():
81
+ with pytest.raises(ValueError, match="allLinks"):
82
+ validate_config({"allLinks": []})
83
+
84
+
85
+ def test_skips_links_with_missing_url():
86
+ cfg = {"allLinks": {"nourl": {"label": "No URL"}}}
87
+ result = validate_config(cfg)
88
+ assert "nourl" not in result["allLinks"]
89
+
90
+
91
+ def test_skips_non_dict_links():
92
+ cfg = {"allLinks": {"bad": "not a dict", "good": {"url": "https://ok.com"}}}
93
+ result = validate_config(cfg)
94
+ assert "bad" not in result["allLinks"]
95
+ assert "good" in result["allLinks"]
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # URL sanitization
100
+ # ---------------------------------------------------------------------------
101
+
102
+ def test_sanitizes_javascript_url_to_about_blank():
103
+ cfg = {"allLinks": {"xss": {"url": "javascript:alert(1)", "label": "XSS"}}}
104
+ result = validate_config(cfg)
105
+ assert result["allLinks"]["xss"]["url"] == "about:blank"
106
+
107
+
108
+ def test_sanitizes_javascript_in_image_field():
109
+ cfg = {"allLinks": {
110
+ "img": {"url": "https://safe.com", "image": "javascript:alert(1)"},
111
+ }}
112
+ result = validate_config(cfg)
113
+ assert result["allLinks"]["img"]["image"] == "about:blank"
114
+
115
+
116
+ def test_leaves_safe_https_url_unchanged():
117
+ cfg = {"allLinks": {"safe": {"url": "https://example.com"}}}
118
+ result = validate_config(cfg)
119
+ assert result["allLinks"]["safe"]["url"] == "https://example.com"
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Tag validation
124
+ # ---------------------------------------------------------------------------
125
+
126
+ def test_filters_non_string_tags():
127
+ cfg = {"allLinks": {"a": {"url": "https://x.com", "tags": ["ok", 42, None]}}}
128
+ result = validate_config(cfg)
129
+ assert result["allLinks"]["a"]["tags"] == ["ok"]
130
+
131
+
132
+ def test_ignores_non_list_tags():
133
+ cfg = {"allLinks": {"a": {"url": "https://x.com", "tags": "not-a-list"}}}
134
+ result = validate_config(cfg)
135
+ # tags key should not be present when the source wasn't a list
136
+ assert "tags" not in result["allLinks"]["a"]
137
+
138
+
139
+ # ---------------------------------------------------------------------------
140
+ # Hyphen rejection
141
+ # ---------------------------------------------------------------------------
142
+
143
+ def test_skips_hyphenated_item_ids():
144
+ cfg = {"allLinks": {"bad-id": {"url": "https://x.com"}}}
145
+ result = validate_config(cfg)
146
+ assert "bad-id" not in result["allLinks"]
147
+
148
+
149
+ def test_skips_hyphenated_macro_names():
150
+ cfg = _minimal_config()
151
+ cfg["macros"] = {"my-macro": {"linkItems": "alpha"}}
152
+ result = validate_config(cfg)
153
+ assert "macros" not in result or "my-macro" not in result.get("macros", {})
154
+
155
+
156
+ def test_skips_hyphenated_search_pattern_keys():
157
+ cfg = _minimal_config()
158
+ cfg["searchPatterns"] = {"my-pattern": "bridge"}
159
+ result = validate_config(cfg)
160
+ assert "searchPatterns" not in result or "my-pattern" not in result.get("searchPatterns", {})
161
+
162
+
163
+ def test_strips_hyphenated_tags_but_keeps_link():
164
+ cfg = {"allLinks": {"a": {"url": "https://x.com", "tags": ["good", "bad-tag"]}}}
165
+ result = validate_config(cfg)
166
+ assert "a" in result["allLinks"]
167
+ assert result["allLinks"]["a"]["tags"] == ["good"]
168
+
169
+
170
+ def test_allows_hyphens_in_non_expression_fields():
171
+ cfg = {"allLinks": {"a": {
172
+ "url": "https://my-site.com",
173
+ "label": "My-Label",
174
+ "cssClass": "my-class",
175
+ "description": "some-thing",
176
+ }}}
177
+ result = validate_config(cfg)
178
+ link = result["allLinks"]["a"]
179
+ assert link["url"] == "https://my-site.com"
180
+ assert link["label"] == "My-Label"
181
+ assert link["cssClass"] == "my-class"
182
+ assert link["description"] == "some-thing"
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # Dangerous regex removal
187
+ # ---------------------------------------------------------------------------
188
+
189
+ def test_removes_dangerous_regex_patterns():
190
+ cfg = _minimal_config()
191
+ cfg["searchPatterns"] = {"evil": "(a+)+"}
192
+ result = validate_config(cfg)
193
+ assert "searchPatterns" not in result or "evil" not in result.get("searchPatterns", {})
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Prototype-pollution / dunder blocking
198
+ # ---------------------------------------------------------------------------
199
+
200
+ def test_drops_proto_keys_from_alllinks():
201
+ cfg = {"allLinks": {
202
+ "__proto__": {"url": "https://evil.com"},
203
+ "safe": {"url": "https://safe.com"},
204
+ }}
205
+ result = validate_config(cfg)
206
+ assert "__proto__" not in result["allLinks"]
207
+ assert "safe" in result["allLinks"]
208
+
209
+
210
+ def test_drops_class_dunder_keys():
211
+ cfg = {"allLinks": {
212
+ "__class__": {"url": "https://evil.com"},
213
+ "ok": {"url": "https://ok.com"},
214
+ }}
215
+ result = validate_config(cfg)
216
+ assert "__class__" not in result["allLinks"]
217
+ assert "ok" in result["allLinks"]
218
+
219
+
220
+ def test_drops_bases_dunder_keys():
221
+ cfg = {"allLinks": {
222
+ "__bases__": {"url": "https://evil.com"},
223
+ "ok": {"url": "https://ok.com"},
224
+ }}
225
+ result = validate_config(cfg)
226
+ assert "__bases__" not in result["allLinks"]
227
+ assert "ok" in result["allLinks"]
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+ # Input immutability
232
+ # ---------------------------------------------------------------------------
233
+
234
+ def test_does_not_mutate_input():
235
+ cfg = {
236
+ "allLinks": {
237
+ "a": {"url": "javascript:alert(1)", "tags": ["x", 42]},
238
+ }
239
+ }
240
+ original = copy.deepcopy(cfg)
241
+ validate_config(cfg)
242
+ assert cfg == original
@@ -0,0 +1,54 @@
1
+ import sys, os
2
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
3
+ from validate_regex import validate_regex
4
+
5
+
6
+ # ---------------------------------------------------------------------------
7
+ # Valid patterns
8
+ # ---------------------------------------------------------------------------
9
+
10
+ def test_valid_simple_pattern():
11
+ assert validate_regex("bridge")["safe"] is True
12
+
13
+
14
+ def test_valid_anchored_pattern():
15
+ assert validate_regex("^foo$")["safe"] is True
16
+
17
+
18
+ def test_valid_character_class():
19
+ assert validate_regex("[a-z]+")["safe"] is True
20
+
21
+
22
+ def test_safe_quantified_group():
23
+ assert validate_regex("(abc)+")["safe"] is True
24
+
25
+
26
+ def test_safe_alternation_group():
27
+ assert validate_regex("(a|b)*")["safe"] is True
28
+
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Invalid / dangerous patterns
32
+ # ---------------------------------------------------------------------------
33
+
34
+ def test_invalid_syntax_unclosed_bracket():
35
+ result = validate_regex("[unclosed")
36
+ assert result["safe"] is False
37
+
38
+
39
+ def test_nested_quantifier_a_plus_plus():
40
+ result = validate_regex("(a+)+")
41
+ assert result["safe"] is False
42
+ assert "Nested quantifier" in result["reason"]
43
+
44
+
45
+ def test_nested_quantifier_a_star_star_b():
46
+ result = validate_regex("(a*)*b")
47
+ assert result["safe"] is False
48
+ assert "Nested quantifier" in result["reason"]
49
+
50
+
51
+ def test_nested_quantifier_word_plus():
52
+ result = validate_regex(r"(\w+\w+)+")
53
+ assert result["safe"] is False
54
+ assert "Nested quantifier" in result["reason"]