codemap-jsp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap_jsp-0.2.0/.gitignore +43 -0
- codemap_jsp-0.2.0/PKG-INFO +106 -0
- codemap_jsp-0.2.0/README.md +86 -0
- codemap_jsp-0.2.0/pyproject.toml +39 -0
- codemap_jsp-0.2.0/src/codemap_jsp/__init__.py +13 -0
- codemap_jsp-0.2.0/src/codemap_jsp/indexer.py +344 -0
- codemap_jsp-0.2.0/src/codemap_jsp/sfc.py +232 -0
- codemap_jsp-0.2.0/tests/__init__.py +0 -0
- codemap_jsp-0.2.0/tests/test_indexer.py +219 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Build artifacts
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Test / coverage
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
.coverage.*
|
|
17
|
+
htmlcov/
|
|
18
|
+
coverage.xml
|
|
19
|
+
.tox/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.benchmarks/
|
|
23
|
+
|
|
24
|
+
# Virtualenv
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
env/
|
|
28
|
+
|
|
29
|
+
# uv / pdm lockfiles (commit uv.lock once we settle)
|
|
30
|
+
# uv.lock
|
|
31
|
+
|
|
32
|
+
# IDE
|
|
33
|
+
.idea/
|
|
34
|
+
.vscode/
|
|
35
|
+
*.swp
|
|
36
|
+
*.swo
|
|
37
|
+
|
|
38
|
+
# OS
|
|
39
|
+
.DS_Store
|
|
40
|
+
Thumbs.db
|
|
41
|
+
|
|
42
|
+
# CodeMap own index when dogfooding
|
|
43
|
+
.codemap/
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-jsp
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: JavaServer Pages (.jsp) indexer plugin for CodeMap (https://github.com/qxbyte/codemap)
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,java,jsp,tree-sitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Java
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: codemap-core<0.3,>=0.2.0
|
|
15
|
+
Requires-Dist: tree-sitter-java>=0.23
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# codemap-jsp
|
|
22
|
+
|
|
23
|
+
> A JavaServer Pages (`.jsp`) indexer for
|
|
24
|
+
> [CodeMap](https://github.com/qxbyte/codemap), distributed as an
|
|
25
|
+
> independent PyPI package. Targets legacy Java web projects that
|
|
26
|
+
> still ship `.jsp` views.
|
|
27
|
+
|
|
28
|
+
## What it captures
|
|
29
|
+
|
|
30
|
+
JSP files are HTML containers with embedded Java. The indexer scans
|
|
31
|
+
five top-level constructs:
|
|
32
|
+
|
|
33
|
+
| JSP construct | Captured as |
|
|
34
|
+
|---|---|
|
|
35
|
+
| `<%@ page import="..." %>` | `extra.jsp_imports` on the page symbol |
|
|
36
|
+
| `<%@ include file="..." %>` | `extra.jsp_includes` on the page symbol |
|
|
37
|
+
| `<%! ... %>` (declaration block) | Java members parsed by `tree-sitter-java`; methods → `method`, fields → `variable`, nested classes → `class` |
|
|
38
|
+
| `<% ... %>` (scriptlet) | **Skipped** — scriptlet locals are per-request state, not a stable interface |
|
|
39
|
+
| `<form action="..." method="..."/>` and `<a href="...">` | `extra.http_client_calls` on the page symbol — consumed by the host's `http_route` bridge to link the page to its server controller |
|
|
40
|
+
|
|
41
|
+
Every JSP file produces **one page-level pseudo-class symbol** plus
|
|
42
|
+
zero or more member symbols from any `<%! ... %>` blocks. The page
|
|
43
|
+
name is derived from the file basename (`UserList.jsp` → `UserList`).
|
|
44
|
+
|
|
45
|
+
## Why this matters for AI agents on legacy projects
|
|
46
|
+
|
|
47
|
+
A typical Spring MVC + JSP project has:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
src/main/java/com/example/UserController.java
|
|
51
|
+
@PostMapping("/api/users") void create(User u) { ... }
|
|
52
|
+
|
|
53
|
+
src/main/webapp/WEB-INF/views/users.jsp
|
|
54
|
+
<form action="/api/users" method="POST"> ... </form>
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
The `http_route` bridge ships in `codemap-core`. Once both files are
|
|
58
|
+
indexed, the bridge sees:
|
|
59
|
+
|
|
60
|
+
* the server route `(POST, /api/users)` exposed by `UserController.create`
|
|
61
|
+
* the client call `(POST, /api/users)` advertised by `users.jsp`
|
|
62
|
+
|
|
63
|
+
…and adds a `calls` edge from the page to the controller. AI agents
|
|
64
|
+
can now answer "what JSP pages submit to this controller?" with a
|
|
65
|
+
single `codemap callers` query — no grepping HTML for URLs.
|
|
66
|
+
|
|
67
|
+
## Install
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install codemap-jsp
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
`tree-sitter-java` is a hard dependency (used to parse declaration
|
|
74
|
+
blocks). No tree-sitter-jsp grammar is needed.
|
|
75
|
+
|
|
76
|
+
## SymbolID encoding
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
scip-jsp . . . src/main/webapp/views/UserList.jsp/UserList#tag().
|
|
80
|
+
└──────┘ └──────────────────────────────────────────────────┘
|
|
81
|
+
scheme path / page pseudo-class / member
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Tests
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install -e ".[dev]"
|
|
88
|
+
pytest
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Limits / next steps
|
|
92
|
+
|
|
93
|
+
* Scriptlet (`<% ... %>`) bodies are not indexed for symbol extraction
|
|
94
|
+
— they would surface per-request locals as noise. A future v0.2.x
|
|
95
|
+
could optionally extract control-flow shape from scriptlets to
|
|
96
|
+
support cross-page navigation.
|
|
97
|
+
* JSTL tags (`<c:if>`, `<c:forEach>`) and EL expressions (`${...}`)
|
|
98
|
+
are not parsed. A `codemap-jstl` companion plugin could surface
|
|
99
|
+
declared `<c:set var="..."/>` variables if there's demand.
|
|
100
|
+
* Tag files (`.tag` / `.tagx`) are supported on the same code path as
|
|
101
|
+
pages but their unique `<%@ attribute name="..." %>` directives are
|
|
102
|
+
not yet surfaced as individual symbols.
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT — same as the host project.
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# codemap-jsp
|
|
2
|
+
|
|
3
|
+
> A JavaServer Pages (`.jsp`) indexer for
|
|
4
|
+
> [CodeMap](https://github.com/qxbyte/codemap), distributed as an
|
|
5
|
+
> independent PyPI package. Targets legacy Java web projects that
|
|
6
|
+
> still ship `.jsp` views.
|
|
7
|
+
|
|
8
|
+
## What it captures
|
|
9
|
+
|
|
10
|
+
JSP files are HTML containers with embedded Java. The indexer scans
|
|
11
|
+
five top-level constructs:
|
|
12
|
+
|
|
13
|
+
| JSP construct | Captured as |
|
|
14
|
+
|---|---|
|
|
15
|
+
| `<%@ page import="..." %>` | `extra.jsp_imports` on the page symbol |
|
|
16
|
+
| `<%@ include file="..." %>` | `extra.jsp_includes` on the page symbol |
|
|
17
|
+
| `<%! ... %>` (declaration block) | Java members parsed by `tree-sitter-java`; methods → `method`, fields → `variable`, nested classes → `class` |
|
|
18
|
+
| `<% ... %>` (scriptlet) | **Skipped** — scriptlet locals are per-request state, not a stable interface |
|
|
19
|
+
| `<form action="..." method="..."/>` and `<a href="...">` | `extra.http_client_calls` on the page symbol — consumed by the host's `http_route` bridge to link the page to its server controller |
|
|
20
|
+
|
|
21
|
+
Every JSP file produces **one page-level pseudo-class symbol** plus
|
|
22
|
+
zero or more member symbols from any `<%! ... %>` blocks. The page
|
|
23
|
+
name is derived from the file basename (`UserList.jsp` → `UserList`).
|
|
24
|
+
|
|
25
|
+
## Why this matters for AI agents on legacy projects
|
|
26
|
+
|
|
27
|
+
A typical Spring MVC + JSP project has:
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
src/main/java/com/example/UserController.java
|
|
31
|
+
@PostMapping("/api/users") void create(User u) { ... }
|
|
32
|
+
|
|
33
|
+
src/main/webapp/WEB-INF/views/users.jsp
|
|
34
|
+
<form action="/api/users" method="POST"> ... </form>
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The `http_route` bridge ships in `codemap-core`. Once both files are
|
|
38
|
+
indexed, the bridge sees:
|
|
39
|
+
|
|
40
|
+
* the server route `(POST, /api/users)` exposed by `UserController.create`
|
|
41
|
+
* the client call `(POST, /api/users)` advertised by `users.jsp`
|
|
42
|
+
|
|
43
|
+
…and adds a `calls` edge from the page to the controller. AI agents
|
|
44
|
+
can now answer "what JSP pages submit to this controller?" with a
|
|
45
|
+
single `codemap callers` query — no grepping HTML for URLs.
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install codemap-jsp
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
`tree-sitter-java` is a hard dependency (used to parse declaration
|
|
54
|
+
blocks). No tree-sitter-jsp grammar is needed.
|
|
55
|
+
|
|
56
|
+
## SymbolID encoding
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
scip-jsp . . . src/main/webapp/views/UserList.jsp/UserList#tag().
|
|
60
|
+
└──────┘ └──────────────────────────────────────────────────┘
|
|
61
|
+
scheme path / page pseudo-class / member
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Tests
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e ".[dev]"
|
|
68
|
+
pytest
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Limits / next steps
|
|
72
|
+
|
|
73
|
+
* Scriptlet (`<% ... %>`) bodies are not indexed for symbol extraction
|
|
74
|
+
— they would surface per-request locals as noise. A future v0.2.x
|
|
75
|
+
could optionally extract control-flow shape from scriptlets to
|
|
76
|
+
support cross-page navigation.
|
|
77
|
+
* JSTL tags (`<c:if>`, `<c:forEach>`) and EL expressions (`${...}`)
|
|
78
|
+
are not parsed. A `codemap-jstl` companion plugin could surface
|
|
79
|
+
declared `<c:set var="..."/>` variables if there's demand.
|
|
80
|
+
* Tag files (`.tag` / `.tagx`) are supported on the same code path as
|
|
81
|
+
pages but their unique `<%@ attribute name="..." %>` directives are
|
|
82
|
+
not yet surfaced as individual symbols.
|
|
83
|
+
|
|
84
|
+
## License
|
|
85
|
+
|
|
86
|
+
MIT — same as the host project.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.21"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "codemap-jsp"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "JavaServer Pages (.jsp) indexer plugin for CodeMap (https://github.com/qxbyte/codemap)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "CodeMap Contributors" }]
|
|
13
|
+
keywords = ["codemap", "jsp", "java", "indexer", "tree-sitter"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Programming Language :: Java",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Topic :: Software Development",
|
|
19
|
+
]
|
|
20
|
+
# tree-sitter-jsp is not on PyPI. The plugin scans top-level JSP
|
|
21
|
+
# constructs (page imports, scriptlets, declarations, includes, form
|
|
22
|
+
# actions) via regex and dispatches inner Java code to tree-sitter-java.
|
|
23
|
+
dependencies = [
|
|
24
|
+
"codemap-core>=0.2.0,<0.3",
|
|
25
|
+
"tree-sitter>=0.25",
|
|
26
|
+
"tree-sitter-java>=0.23",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = ["pytest>=8.0"]
|
|
31
|
+
|
|
32
|
+
[project.entry-points."codemap.indexers"]
|
|
33
|
+
jsp = "codemap_jsp:JspIndexer"
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/qxbyte/codemap"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/codemap_jsp"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""JavaServer Pages (JSP) indexer plugin for CodeMap.
|
|
2
|
+
|
|
3
|
+
The entry-point group ``codemap.indexers`` discovers this class
|
|
4
|
+
automatically once ``codemap-jsp`` is installed alongside the host
|
|
5
|
+
CodeMap CLI.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from codemap_jsp.indexer import JspIndexer
|
|
11
|
+
|
|
12
|
+
__all__ = ["JspIndexer"]
|
|
13
|
+
__version__ = "0.2.0a1"
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""JSP (.jsp / .jspx / .tag / .tagx) indexer.
|
|
2
|
+
|
|
3
|
+
Strategy
|
|
4
|
+
========
|
|
5
|
+
|
|
6
|
+
JSP files are HTML-shaped containers with embedded Java in
|
|
7
|
+
``<%! ... %>`` (declarations) and ``<% ... %>`` (scriptlets), plus
|
|
8
|
+
``<%@ page import="..." %>`` directives. We scan the file once for
|
|
9
|
+
those constructs (:mod:`codemap_jsp.sfc`), then:
|
|
10
|
+
|
|
11
|
+
* Parse each ``<%! ... %>`` block's Java with ``tree-sitter-java`` and
|
|
12
|
+
emit method / field / class symbols at the page level (these are the
|
|
13
|
+
generated servlet's instance members and are the only Java symbols
|
|
14
|
+
worth surfacing as a stable interface).
|
|
15
|
+
* Skip scriptlet bodies (`<% ... %>`) for symbol extraction — every
|
|
16
|
+
statement in there is per-request local state, not a stable
|
|
17
|
+
interface.
|
|
18
|
+
* Emit ``<form action="...">`` and ``<a href="...">`` URLs as
|
|
19
|
+
``http_client_calls`` (consumed by the host's ``http_route`` bridge
|
|
20
|
+
to link the JSP page to its server controller).
|
|
21
|
+
* Treat each ``<%@ page import="..." %>`` as a hint of dependency on
|
|
22
|
+
the cited Java type; recorded for future cross-asset bridging.
|
|
23
|
+
|
|
24
|
+
``<template>``-style tags, JSTL (`<c:if>`, `<c:forEach>`), and EL
|
|
25
|
+
(`${expr}`) are ignored — they have no symbol value here.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from pathlib import Path, PurePosixPath
|
|
31
|
+
from typing import ClassVar
|
|
32
|
+
|
|
33
|
+
import tree_sitter
|
|
34
|
+
import tree_sitter_java
|
|
35
|
+
|
|
36
|
+
from codemap.core.models import Diagnostic, IndexResult, Range, Symbol
|
|
37
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
38
|
+
from codemap.indexers.base import IndexContext
|
|
39
|
+
from codemap_jsp.sfc import JspDeclaration, JspExtract, extract
|
|
40
|
+
|
|
41
|
+
SCHEME = "scip-jsp"
|
|
42
|
+
LANG = "jsp"
|
|
43
|
+
|
|
44
|
+
_JAVA_LANG = tree_sitter.Language(tree_sitter_java.language())
|
|
45
|
+
|
|
46
|
+
# Server-side JSP page is conceptually a class; the canonical name we
|
|
47
|
+
# use as the enclosing type for its declared methods/fields is the file
|
|
48
|
+
# basename (the JSP servlet generator does the same).
|
|
49
|
+
_JSP_HTTP_CLIENT_EXTRA_KEY = "http_client_calls"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class JspIndexer:
|
|
53
|
+
name: ClassVar[str] = "jsp"
|
|
54
|
+
version: ClassVar[str] = "0.2.0"
|
|
55
|
+
file_patterns: ClassVar[list[str]] = ["*.jsp", "*.jspx", "*.tag", "*.tagx"]
|
|
56
|
+
languages: ClassVar[list[str]] = [LANG]
|
|
57
|
+
|
|
58
|
+
def supports(self, path: Path) -> bool:
|
|
59
|
+
return path.suffix in {".jsp", ".jspx", ".tag", ".tagx"}
|
|
60
|
+
|
|
61
|
+
def index_file(
|
|
62
|
+
self,
|
|
63
|
+
path: Path,
|
|
64
|
+
source: bytes,
|
|
65
|
+
ctx: IndexContext,
|
|
66
|
+
) -> IndexResult:
|
|
67
|
+
try:
|
|
68
|
+
source.decode("utf-8")
|
|
69
|
+
except UnicodeDecodeError as exc:
|
|
70
|
+
return IndexResult(
|
|
71
|
+
diagnostics=[
|
|
72
|
+
Diagnostic(
|
|
73
|
+
severity="error",
|
|
74
|
+
file=ctx.relative_path,
|
|
75
|
+
code="JSP002",
|
|
76
|
+
message=f"not valid UTF-8: {exc}",
|
|
77
|
+
producer=self.name,
|
|
78
|
+
)
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
ext = extract(source)
|
|
83
|
+
page_symbol, page_kind_sids = _emit_page_symbol(ctx.relative_path, source, ext)
|
|
84
|
+
symbols: list[Symbol] = [page_symbol]
|
|
85
|
+
diagnostics: list[Diagnostic] = []
|
|
86
|
+
|
|
87
|
+
# Parse declaration blocks for member symbols.
|
|
88
|
+
for block in ext.declarations:
|
|
89
|
+
block_symbols, block_diags = _index_declaration(
|
|
90
|
+
ctx.relative_path, block, page_kind_sids
|
|
91
|
+
)
|
|
92
|
+
symbols.extend(block_symbols)
|
|
93
|
+
diagnostics.extend(block_diags)
|
|
94
|
+
|
|
95
|
+
return IndexResult(symbols=symbols, diagnostics=diagnostics)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _emit_page_symbol(
|
|
99
|
+
relative_path: PurePosixPath,
|
|
100
|
+
source: bytes,
|
|
101
|
+
ext: JspExtract,
|
|
102
|
+
) -> tuple[Symbol, list[Descriptor]]:
|
|
103
|
+
"""Emit the page-level pseudo-class symbol.
|
|
104
|
+
|
|
105
|
+
Returns ``(page_symbol, page_type_descriptors)`` where the
|
|
106
|
+
descriptors are reused when generating IDs for member symbols.
|
|
107
|
+
"""
|
|
108
|
+
page_name = _page_type_name(relative_path)
|
|
109
|
+
descriptors = [
|
|
110
|
+
*_path_namespaces(relative_path),
|
|
111
|
+
Descriptor(name=page_name, kind=DescriptorKind.TYPE),
|
|
112
|
+
]
|
|
113
|
+
page_id = SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
114
|
+
|
|
115
|
+
extra: dict[str, object] = {
|
|
116
|
+
"jsp_imports": [{"fqcn": i.fqcn, "line": i.line} for i in ext.imports],
|
|
117
|
+
"jsp_includes": [{"path": i.path, "line": i.line} for i in ext.includes],
|
|
118
|
+
}
|
|
119
|
+
client_calls: list[dict[str, object]] = [
|
|
120
|
+
{"method": fa.method, "url": fa.url, "line": fa.line} for fa in ext.form_actions
|
|
121
|
+
]
|
|
122
|
+
client_calls.extend({"method": "GET", "url": link.url, "line": link.line} for link in ext.links)
|
|
123
|
+
if client_calls:
|
|
124
|
+
extra[_JSP_HTTP_CLIENT_EXTRA_KEY] = client_calls
|
|
125
|
+
|
|
126
|
+
# Page range = full file
|
|
127
|
+
total_lines = source.count(b"\n") + 1
|
|
128
|
+
page_symbol = Symbol(
|
|
129
|
+
id=page_id,
|
|
130
|
+
kind="class", # canonical "page = pseudo-class" mapping
|
|
131
|
+
language=LANG,
|
|
132
|
+
file=relative_path,
|
|
133
|
+
range=Range(start_line=1, end_line=total_lines),
|
|
134
|
+
extra=extra,
|
|
135
|
+
)
|
|
136
|
+
return page_symbol, descriptors
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _index_declaration(
|
|
140
|
+
relative_path: PurePosixPath,
|
|
141
|
+
block: JspDeclaration,
|
|
142
|
+
page_type_descriptors: list[Descriptor],
|
|
143
|
+
) -> tuple[list[Symbol], list[Diagnostic]]:
|
|
144
|
+
"""Parse the Java member declarations in a ``<%! ... %>`` block.
|
|
145
|
+
|
|
146
|
+
JSP declaration blocks are conceptually class-body content of the
|
|
147
|
+
generated servlet, but tree-sitter-java's top-level grammar treats
|
|
148
|
+
a bare ``private int x = 0;`` as a ``local_variable_declaration``
|
|
149
|
+
rather than a ``field_declaration``. To get the right node kinds,
|
|
150
|
+
we wrap the block in a synthetic ``class _S { ... }`` frame and
|
|
151
|
+
walk inside that wrapper. The synthetic class itself contributes
|
|
152
|
+
one extra line at the top, which we subtract from every emitted
|
|
153
|
+
symbol's line number.
|
|
154
|
+
"""
|
|
155
|
+
parser = tree_sitter.Parser(_JAVA_LANG)
|
|
156
|
+
wrapped = b"class _S {\n" + block.content + b"\n}"
|
|
157
|
+
tree = parser.parse(wrapped)
|
|
158
|
+
diagnostics: list[Diagnostic] = []
|
|
159
|
+
if tree.root_node.has_error:
|
|
160
|
+
diagnostics.append(
|
|
161
|
+
Diagnostic(
|
|
162
|
+
severity="warning",
|
|
163
|
+
file=relative_path,
|
|
164
|
+
range=Range(start_line=block.content_start_line, end_line=block.content_start_line),
|
|
165
|
+
code="JSP001",
|
|
166
|
+
message="tree-sitter reported parse errors inside <%! ... %>; symbols may be incomplete",
|
|
167
|
+
producer=LANG,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# The wrapper class contributes one extra line ("class _S {") above
|
|
172
|
+
# the real content. Walk into the wrapper's body so we see field /
|
|
173
|
+
# method / nested class declarations as Java grammar nodes.
|
|
174
|
+
visitor = _DeclarationVisitor(
|
|
175
|
+
relative_path=relative_path,
|
|
176
|
+
# block_offset_line - 1 maps "row 0 in inner tree" to the first
|
|
177
|
+
# actual content line of the JSP file. The wrapper's "class _S {"
|
|
178
|
+
# is on row 0 of the parsed tree so its content begins on row 1,
|
|
179
|
+
# which corresponds to block.content_start_line. Net offset: -1.
|
|
180
|
+
block_offset_line=block.content_start_line - 1 - 1,
|
|
181
|
+
page_type_descriptors=page_type_descriptors,
|
|
182
|
+
synthetic_wrapper="_S",
|
|
183
|
+
)
|
|
184
|
+
visitor.visit(tree.root_node)
|
|
185
|
+
return visitor.symbols, diagnostics
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ---------------------------------------------------------------------------
|
|
189
|
+
# Java AST walking inside <%! ... %>
|
|
190
|
+
# ---------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class _DeclarationVisitor:
|
|
194
|
+
"""Walk one <%! ... %> block's Java AST and emit member symbols."""
|
|
195
|
+
|
|
196
|
+
def __init__(
|
|
197
|
+
self,
|
|
198
|
+
relative_path: PurePosixPath,
|
|
199
|
+
block_offset_line: int,
|
|
200
|
+
page_type_descriptors: list[Descriptor],
|
|
201
|
+
synthetic_wrapper: str | None = None,
|
|
202
|
+
) -> None:
|
|
203
|
+
self.relative_path = relative_path
|
|
204
|
+
self.offset = block_offset_line
|
|
205
|
+
self.page_type_descriptors = page_type_descriptors
|
|
206
|
+
# When a synthetic ``class _S { ... }`` is used to coerce
|
|
207
|
+
# tree-sitter-java into emitting field_declaration nodes, we
|
|
208
|
+
# filter the wrapper itself out of the symbol stream.
|
|
209
|
+
self._synthetic_wrapper = synthetic_wrapper
|
|
210
|
+
self.symbols: list[Symbol] = []
|
|
211
|
+
self._class_stack: list[str] = []
|
|
212
|
+
|
|
213
|
+
def visit(self, node: tree_sitter.Node) -> None:
|
|
214
|
+
kind = node.type
|
|
215
|
+
if kind == "method_declaration":
|
|
216
|
+
self._visit_method(node)
|
|
217
|
+
return
|
|
218
|
+
if kind == "field_declaration":
|
|
219
|
+
self._visit_field(node)
|
|
220
|
+
return
|
|
221
|
+
if kind == "class_declaration":
|
|
222
|
+
name = _name_child_text(node)
|
|
223
|
+
if name == self._synthetic_wrapper:
|
|
224
|
+
# Walk into the wrapper's body but do not emit a symbol.
|
|
225
|
+
body = node.child_by_field_name("body")
|
|
226
|
+
if body is not None:
|
|
227
|
+
for child in body.children:
|
|
228
|
+
self.visit(child)
|
|
229
|
+
return
|
|
230
|
+
self._visit_class(node)
|
|
231
|
+
return
|
|
232
|
+
for child in node.children:
|
|
233
|
+
self.visit(child)
|
|
234
|
+
|
|
235
|
+
def _visit_method(self, node: tree_sitter.Node) -> None:
|
|
236
|
+
name = _name_child_text(node)
|
|
237
|
+
if name is None:
|
|
238
|
+
return
|
|
239
|
+
sid = self._make_id(name, descriptor_kind=DescriptorKind.METHOD)
|
|
240
|
+
signature = _function_signature(node, name)
|
|
241
|
+
self.symbols.append(
|
|
242
|
+
Symbol(
|
|
243
|
+
id=sid,
|
|
244
|
+
kind="method",
|
|
245
|
+
language=LANG,
|
|
246
|
+
file=self.relative_path,
|
|
247
|
+
range=self._node_range(node),
|
|
248
|
+
signature=signature,
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _visit_field(self, node: tree_sitter.Node) -> None:
|
|
253
|
+
# field_declaration: type + variable_declarator[+]
|
|
254
|
+
for child in node.children:
|
|
255
|
+
if child.type != "variable_declarator":
|
|
256
|
+
continue
|
|
257
|
+
name_node = child.child_by_field_name("name")
|
|
258
|
+
if name_node is None or name_node.text is None:
|
|
259
|
+
continue
|
|
260
|
+
name = name_node.text.decode("utf-8")
|
|
261
|
+
if not name:
|
|
262
|
+
continue
|
|
263
|
+
sid = self._make_id(name, descriptor_kind=DescriptorKind.TERM)
|
|
264
|
+
self.symbols.append(
|
|
265
|
+
Symbol(
|
|
266
|
+
id=sid,
|
|
267
|
+
kind="variable",
|
|
268
|
+
language=LANG,
|
|
269
|
+
file=self.relative_path,
|
|
270
|
+
range=self._node_range(child),
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def _visit_class(self, node: tree_sitter.Node) -> None:
|
|
275
|
+
name = _name_child_text(node)
|
|
276
|
+
if name is None:
|
|
277
|
+
return
|
|
278
|
+
sid = self._make_id(name, descriptor_kind=DescriptorKind.TYPE)
|
|
279
|
+
self.symbols.append(
|
|
280
|
+
Symbol(
|
|
281
|
+
id=sid,
|
|
282
|
+
kind="class",
|
|
283
|
+
language=LANG,
|
|
284
|
+
file=self.relative_path,
|
|
285
|
+
range=self._node_range(node),
|
|
286
|
+
)
|
|
287
|
+
)
|
|
288
|
+
self._class_stack.append(name)
|
|
289
|
+
try:
|
|
290
|
+
body = node.child_by_field_name("body")
|
|
291
|
+
if body is not None:
|
|
292
|
+
for child in body.children:
|
|
293
|
+
self.visit(child)
|
|
294
|
+
finally:
|
|
295
|
+
self._class_stack.pop()
|
|
296
|
+
|
|
297
|
+
def _make_id(self, name: str, *, descriptor_kind: DescriptorKind) -> SymbolID:
|
|
298
|
+
descriptors = list(self.page_type_descriptors)
|
|
299
|
+
descriptors.extend(
|
|
300
|
+
Descriptor(name=cls, kind=DescriptorKind.TYPE) for cls in self._class_stack
|
|
301
|
+
)
|
|
302
|
+
descriptors.append(Descriptor(name=name, kind=descriptor_kind))
|
|
303
|
+
return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
|
|
304
|
+
|
|
305
|
+
def _node_range(self, node: tree_sitter.Node) -> Range:
|
|
306
|
+
start_row, start_col = node.start_point
|
|
307
|
+
end_row, end_col = node.end_point
|
|
308
|
+
return Range(
|
|
309
|
+
start_line=start_row + 1 + self.offset,
|
|
310
|
+
start_col=start_col,
|
|
311
|
+
end_line=max(end_row + 1 + self.offset, start_row + 1 + self.offset),
|
|
312
|
+
end_col=end_col,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# ---------------------------------------------------------------------------
|
|
317
|
+
# Pure helpers
|
|
318
|
+
# ---------------------------------------------------------------------------
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
|
|
322
|
+
return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _page_type_name(path: PurePosixPath) -> str:
|
|
326
|
+
"""Map ``foo/bar/UserList.jsp`` → ``UserList``."""
|
|
327
|
+
stem = path.stem
|
|
328
|
+
return stem or path.name
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _name_child_text(node: tree_sitter.Node) -> str | None:
|
|
332
|
+
name_node = node.child_by_field_name("name")
|
|
333
|
+
if name_node is None or name_node.text is None:
|
|
334
|
+
return None
|
|
335
|
+
text = name_node.text.decode("utf-8").strip()
|
|
336
|
+
return text or None
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _function_signature(node: tree_sitter.Node, name: str) -> str:
|
|
340
|
+
params = node.child_by_field_name("parameters")
|
|
341
|
+
params_text = ""
|
|
342
|
+
if params is not None and params.text is not None:
|
|
343
|
+
params_text = params.text.decode("utf-8")
|
|
344
|
+
return f"{name}{params_text}".strip()
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""JSP top-level construct extractor.
|
|
2
|
+
|
|
3
|
+
``tree-sitter-jsp`` does not exist on PyPI. This module locates the
|
|
4
|
+
JSP-specific constructs that matter for indexing:
|
|
5
|
+
|
|
6
|
+
* ``<%@ page import="..." %>`` — type imports (one or more comma-separated)
|
|
7
|
+
* ``<%@ include file="..." %>`` — server-side file inclusion
|
|
8
|
+
* ``<%! ... %>`` — declaration blocks (Java member declarations:
|
|
9
|
+
fields, methods, classes that live at the *generated servlet* scope)
|
|
10
|
+
* ``<% ... %>`` — scriptlets (Java statements that live at servlet
|
|
11
|
+
``_jspService`` scope; locals here are private to one request and we
|
|
12
|
+
intentionally do not surface them as symbols)
|
|
13
|
+
* ``<form action="..." method="...">`` — HTML form submissions, useful
|
|
14
|
+
for the ``http_route`` bridge to link the page to a server controller
|
|
15
|
+
* ``<a href="...">`` — links to other pages or actions, same purpose
|
|
16
|
+
|
|
17
|
+
Anything else (`<c:forEach>`, EL `${...}`, custom tags) is ignored.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
|
|
25
|
+
# JSP directive: <%@ page ... %> / <%@ include ... %> / <%@ taglib ... %>
|
|
26
|
+
_DIRECTIVE_RE = re.compile(
|
|
27
|
+
rb"<%@\s*(?P<name>\w+)\s+(?P<body>[^%]*?)%>",
|
|
28
|
+
re.DOTALL,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Declarations: <%! ... %>
|
|
32
|
+
_DECLARATION_RE = re.compile(rb"<%!\s*(?P<body>.*?)%>", re.DOTALL)
|
|
33
|
+
|
|
34
|
+
# Scriptlets: <% ... %> — excludes <%! ... %> and <%@ ... %> directives,
|
|
35
|
+
# captured separately above. We use a negative-lookahead on `!` and `@`.
|
|
36
|
+
_SCRIPTLET_RE = re.compile(rb"<%(?![@!])\s*(?P<body>.*?)%>", re.DOTALL)
|
|
37
|
+
|
|
38
|
+
# HTML form action / link href (case-insensitive). These let the
|
|
39
|
+
# http_route bridge match a JSP page to its server-side controller.
|
|
40
|
+
_FORM_ACTION_RE = re.compile(
|
|
41
|
+
rb"""<form\b[^>]*?\baction\s*=\s*(?P<q>["'])(?P<url>[^"']+)(?P=q)[^>]*?>""",
|
|
42
|
+
re.IGNORECASE,
|
|
43
|
+
)
|
|
44
|
+
_FORM_METHOD_RE = re.compile(
|
|
45
|
+
rb"""<form\b[^>]*?\bmethod\s*=\s*(?P<q>["'])(?P<method>[^"']+)(?P=q)""",
|
|
46
|
+
re.IGNORECASE,
|
|
47
|
+
)
|
|
48
|
+
_A_HREF_RE = re.compile(
|
|
49
|
+
rb"""<a\b[^>]*?\bhref\s*=\s*(?P<q>["'])(?P<url>[^"']+)(?P=q)""",
|
|
50
|
+
re.IGNORECASE,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True, slots=True)
|
|
55
|
+
class JspImport:
|
|
56
|
+
"""One ``<%@ page import="..." %>`` entry. Java FQCN."""
|
|
57
|
+
|
|
58
|
+
fqcn: str
|
|
59
|
+
line: int
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True, slots=True)
|
|
63
|
+
class JspInclude:
|
|
64
|
+
"""One ``<%@ include file="..." %>`` directive."""
|
|
65
|
+
|
|
66
|
+
path: str
|
|
67
|
+
line: int
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(frozen=True, slots=True)
|
|
71
|
+
class JspDeclaration:
|
|
72
|
+
"""One ``<%! ... %>`` block — Java member declarations."""
|
|
73
|
+
|
|
74
|
+
content: bytes
|
|
75
|
+
content_start_offset: int
|
|
76
|
+
content_start_line: int
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True, slots=True)
|
|
80
|
+
class JspScriptlet:
|
|
81
|
+
"""One ``<% ... %>`` block — Java statements in _jspService."""
|
|
82
|
+
|
|
83
|
+
content: bytes
|
|
84
|
+
content_start_offset: int
|
|
85
|
+
content_start_line: int
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(frozen=True, slots=True)
|
|
89
|
+
class JspFormAction:
|
|
90
|
+
"""One ``<form action="..." method="...">``."""
|
|
91
|
+
|
|
92
|
+
method: str
|
|
93
|
+
url: str
|
|
94
|
+
line: int
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass(frozen=True, slots=True)
|
|
98
|
+
class JspLink:
|
|
99
|
+
"""One ``<a href="...">``."""
|
|
100
|
+
|
|
101
|
+
url: str
|
|
102
|
+
line: int
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True, slots=True)
|
|
106
|
+
class JspExtract:
|
|
107
|
+
"""All JSP-relevant constructs found in one file."""
|
|
108
|
+
|
|
109
|
+
imports: list[JspImport]
|
|
110
|
+
includes: list[JspInclude]
|
|
111
|
+
declarations: list[JspDeclaration]
|
|
112
|
+
scriptlets: list[JspScriptlet]
|
|
113
|
+
form_actions: list[JspFormAction]
|
|
114
|
+
links: list[JspLink]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def extract(source: bytes) -> JspExtract:
|
|
118
|
+
"""Scan a JSP file's bytes once and return everything we care about."""
|
|
119
|
+
return JspExtract(
|
|
120
|
+
imports=_extract_imports(source),
|
|
121
|
+
includes=_extract_includes(source),
|
|
122
|
+
declarations=_extract_declarations(source),
|
|
123
|
+
scriptlets=_extract_scriptlets(source),
|
|
124
|
+
form_actions=_extract_form_actions(source),
|
|
125
|
+
links=_extract_links(source),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Implementation details
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _line_of(source: bytes, offset: int) -> int:
|
|
135
|
+
return source[:offset].count(b"\n") + 1
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _extract_imports(source: bytes) -> list[JspImport]:
|
|
139
|
+
out: list[JspImport] = []
|
|
140
|
+
for m in _DIRECTIVE_RE.finditer(source):
|
|
141
|
+
name = m.group("name").decode("ascii", errors="replace").lower()
|
|
142
|
+
if name != "page":
|
|
143
|
+
continue
|
|
144
|
+
body = m.group("body").decode("utf-8", errors="replace")
|
|
145
|
+
for attr_value in _attr_values(body, "import"):
|
|
146
|
+
# `import="java.util.List, java.util.Map"` → 2 FQCNs
|
|
147
|
+
out.extend(
|
|
148
|
+
JspImport(fqcn=fqcn, line=_line_of(source, m.start()))
|
|
149
|
+
for fqcn in (s.strip() for s in attr_value.split(","))
|
|
150
|
+
if fqcn
|
|
151
|
+
)
|
|
152
|
+
return out
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _extract_includes(source: bytes) -> list[JspInclude]:
|
|
156
|
+
out: list[JspInclude] = []
|
|
157
|
+
for m in _DIRECTIVE_RE.finditer(source):
|
|
158
|
+
name = m.group("name").decode("ascii", errors="replace").lower()
|
|
159
|
+
if name != "include":
|
|
160
|
+
continue
|
|
161
|
+
body = m.group("body").decode("utf-8", errors="replace")
|
|
162
|
+
out.extend(
|
|
163
|
+
JspInclude(path=attr_value, line=_line_of(source, m.start()))
|
|
164
|
+
for attr_value in _attr_values(body, "file")
|
|
165
|
+
if attr_value
|
|
166
|
+
)
|
|
167
|
+
return out
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _extract_declarations(source: bytes) -> list[JspDeclaration]:
|
|
171
|
+
out: list[JspDeclaration] = []
|
|
172
|
+
for m in _DECLARATION_RE.finditer(source):
|
|
173
|
+
body = m.group("body")
|
|
174
|
+
content_start = m.start("body")
|
|
175
|
+
out.append(
|
|
176
|
+
JspDeclaration(
|
|
177
|
+
content=body,
|
|
178
|
+
content_start_offset=content_start,
|
|
179
|
+
content_start_line=_line_of(source, content_start),
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _extract_scriptlets(source: bytes) -> list[JspScriptlet]:
|
|
186
|
+
out: list[JspScriptlet] = []
|
|
187
|
+
for m in _SCRIPTLET_RE.finditer(source):
|
|
188
|
+
body = m.group("body")
|
|
189
|
+
content_start = m.start("body")
|
|
190
|
+
out.append(
|
|
191
|
+
JspScriptlet(
|
|
192
|
+
content=body,
|
|
193
|
+
content_start_offset=content_start,
|
|
194
|
+
content_start_line=_line_of(source, content_start),
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
return out
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _extract_form_actions(source: bytes) -> list[JspFormAction]:
|
|
201
|
+
out: list[JspFormAction] = []
|
|
202
|
+
for m in _FORM_ACTION_RE.finditer(source):
|
|
203
|
+
url = m.group("url").decode("utf-8", errors="replace")
|
|
204
|
+
method_match = _FORM_METHOD_RE.search(m.group(0))
|
|
205
|
+
method = (
|
|
206
|
+
method_match.group("method").decode("ascii", errors="replace").upper()
|
|
207
|
+
if method_match is not None
|
|
208
|
+
else "GET"
|
|
209
|
+
)
|
|
210
|
+
out.append(JspFormAction(method=method, url=url, line=_line_of(source, m.start())))
|
|
211
|
+
return out
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _extract_links(source: bytes) -> list[JspLink]:
|
|
215
|
+
out: list[JspLink] = []
|
|
216
|
+
for m in _A_HREF_RE.finditer(source):
|
|
217
|
+
url = m.group("url").decode("utf-8", errors="replace")
|
|
218
|
+
# Skip pure anchors / fragments / javascript: pseudo-protocol.
|
|
219
|
+
if url.startswith(("#", "javascript:", "mailto:", "tel:")):
|
|
220
|
+
continue
|
|
221
|
+
out.append(JspLink(url=url, line=_line_of(source, m.start())))
|
|
222
|
+
return out
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _attr_values(body: str, attr_name: str) -> list[str]:
|
|
226
|
+
"""Pick attribute values from a directive body, e.g.
|
|
227
|
+
|
|
228
|
+
``import="java.util.List" contentType="text/html"`` →
|
|
229
|
+
``_attr_values(body, "import")`` returns ``["java.util.List"]``.
|
|
230
|
+
"""
|
|
231
|
+
pattern = rf"""\b{re.escape(attr_name)}\s*=\s*(?P<q>["'])(?P<value>[^"']*)(?P=q)"""
|
|
232
|
+
return [m.group("value") for m in re.finditer(pattern, body)]
|
|
File without changes
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Unit tests for the JSP indexer plugin."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import textwrap
|
|
6
|
+
from pathlib import Path, PurePosixPath
|
|
7
|
+
|
|
8
|
+
from codemap_jsp import JspIndexer
|
|
9
|
+
from codemap_jsp.indexer import SCHEME
|
|
10
|
+
from codemap_jsp.sfc import extract
|
|
11
|
+
|
|
12
|
+
from codemap.core.models import IndexResult
|
|
13
|
+
from codemap.indexers.base import IndexContext
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _index(source: str, *, path: str = "src/pages/UserList.jsp") -> IndexResult:
|
|
17
|
+
code = textwrap.dedent(source).lstrip("\n")
|
|
18
|
+
return JspIndexer().index_file(
|
|
19
|
+
Path(path),
|
|
20
|
+
code.encode("utf-8"),
|
|
21
|
+
IndexContext(
|
|
22
|
+
project_root=Path("/tmp/proj"),
|
|
23
|
+
relative_path=PurePosixPath(path),
|
|
24
|
+
language="jsp",
|
|
25
|
+
),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
# Scanner (no parser dependency)
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_extract_page_imports() -> None:
|
|
35
|
+
ext = extract(b'<%@ page contentType="text/html" import="java.util.List, java.util.Map" %>')
|
|
36
|
+
fqcns = [i.fqcn for i in ext.imports]
|
|
37
|
+
assert fqcns == ["java.util.List", "java.util.Map"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_extract_includes() -> None:
|
|
41
|
+
ext = extract(b'<%@ include file="header.jsp" %>')
|
|
42
|
+
assert [i.path for i in ext.includes] == ["header.jsp"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_extract_form_action_and_method() -> None:
|
|
46
|
+
ext = extract(b'<form action="/save" method="POST">')
|
|
47
|
+
assert ext.form_actions[0].method == "POST"
|
|
48
|
+
assert ext.form_actions[0].url == "/save"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_extract_form_action_default_method_is_get() -> None:
|
|
52
|
+
ext = extract(b'<form action="/login">')
|
|
53
|
+
assert ext.form_actions[0].method == "GET"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_extract_links() -> None:
|
|
57
|
+
ext = extract(b'<a href="/admin">A</a> <a href="#section">B</a>')
|
|
58
|
+
# The anchor `#section` link is intentionally skipped.
|
|
59
|
+
assert [link.url for link in ext.links] == ["/admin"]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_extract_scriptlets_vs_declarations() -> None:
|
|
63
|
+
src = b"<%! private int counter = 0; %>\n<% counter++; %>"
|
|
64
|
+
ext = extract(src)
|
|
65
|
+
assert len(ext.declarations) == 1
|
|
66
|
+
assert b"counter = 0" in ext.declarations[0].content
|
|
67
|
+
assert len(ext.scriptlets) == 1
|
|
68
|
+
assert b"counter++" in ext.scriptlets[0].content
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# Indexer metadata
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_indexer_metadata() -> None:
|
|
77
|
+
ix = JspIndexer()
|
|
78
|
+
assert ix.name == "jsp"
|
|
79
|
+
assert ix.languages == ["jsp"]
|
|
80
|
+
assert set(ix.file_patterns) == {"*.jsp", "*.jspx", "*.tag", "*.tagx"}
|
|
81
|
+
assert ix.supports(Path("a.jsp"))
|
|
82
|
+
assert ix.supports(Path("a.jspx"))
|
|
83
|
+
assert ix.supports(Path("a.tag"))
|
|
84
|
+
assert not ix.supports(Path("a.html"))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Indexer: page-level symbol
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_page_symbol_is_class_with_full_range() -> None:
|
|
93
|
+
r = _index(
|
|
94
|
+
"""
|
|
95
|
+
<%@ page contentType="text/html" %>
|
|
96
|
+
<html>
|
|
97
|
+
<body>Hello</body>
|
|
98
|
+
</html>
|
|
99
|
+
"""
|
|
100
|
+
)
|
|
101
|
+
page = next(s for s in r.symbols if s.kind == "class")
|
|
102
|
+
assert "UserList" in str(page.id)
|
|
103
|
+
assert page.range is not None
|
|
104
|
+
assert page.range.start_line == 1
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_page_symbol_records_imports_in_extra() -> None:
|
|
108
|
+
r = _index('<%@ page import="java.util.List, com.example.Foo" %>')
|
|
109
|
+
page = next(s for s in r.symbols if s.kind == "class")
|
|
110
|
+
fqcns = {i["fqcn"] for i in page.extra["jsp_imports"]}
|
|
111
|
+
assert fqcns == {"java.util.List", "com.example.Foo"}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_page_symbol_records_form_action_for_http_route_bridge() -> None:
|
|
115
|
+
r = _index(
|
|
116
|
+
"""
|
|
117
|
+
<form action="/api/users" method="POST">
|
|
118
|
+
<input name="email" />
|
|
119
|
+
<button type="submit">Go</button>
|
|
120
|
+
</form>
|
|
121
|
+
"""
|
|
122
|
+
)
|
|
123
|
+
page = next(s for s in r.symbols if s.kind == "class")
|
|
124
|
+
calls = page.extra["http_client_calls"]
|
|
125
|
+
assert any(c["method"] == "POST" and c["url"] == "/api/users" for c in calls)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_page_symbol_records_a_href_link() -> None:
|
|
129
|
+
r = _index('<a href="/admin/panel">Admin</a>')
|
|
130
|
+
page = next(s for s in r.symbols if s.kind == "class")
|
|
131
|
+
calls = page.extra["http_client_calls"]
|
|
132
|
+
assert any(c["method"] == "GET" and c["url"] == "/admin/panel" for c in calls)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Declaration block (Java <%! ... %>)
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_declaration_block_method_indexed() -> None:
|
|
141
|
+
r = _index(
|
|
142
|
+
"""
|
|
143
|
+
<%! public String greet(String name) { return "Hi " + name; } %>
|
|
144
|
+
"""
|
|
145
|
+
)
|
|
146
|
+
method = next(s for s in r.symbols if s.kind == "method")
|
|
147
|
+
assert "greet" in str(method.id)
|
|
148
|
+
assert method.signature is not None
|
|
149
|
+
assert "greet" in method.signature
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_declaration_block_field_indexed_as_variable() -> None:
|
|
153
|
+
r = _index("<%! private int counter = 0; %>")
|
|
154
|
+
var = next(s for s in r.symbols if s.kind == "variable")
|
|
155
|
+
assert "counter" in str(var.id)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_declaration_block_nested_class_indexed() -> None:
|
|
159
|
+
r = _index(
|
|
160
|
+
"""
|
|
161
|
+
<%!
|
|
162
|
+
private class Helper {
|
|
163
|
+
public int compute() { return 42; }
|
|
164
|
+
}
|
|
165
|
+
%>
|
|
166
|
+
"""
|
|
167
|
+
)
|
|
168
|
+
kinds = sorted(s.kind for s in r.symbols)
|
|
169
|
+
assert "class" in kinds # page + nested
|
|
170
|
+
assert "method" in kinds # compute()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# Symbol line translation
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_declaration_symbol_line_accounts_for_html_above() -> None:
|
|
179
|
+
r = _index(
|
|
180
|
+
"""
|
|
181
|
+
<%@ page contentType="text/html" %>
|
|
182
|
+
<html>
|
|
183
|
+
<head><title>x</title></head>
|
|
184
|
+
<body>
|
|
185
|
+
<h1>Header</h1>
|
|
186
|
+
<%! public String tag() { return "x"; } %>
|
|
187
|
+
</body>
|
|
188
|
+
</html>
|
|
189
|
+
"""
|
|
190
|
+
)
|
|
191
|
+
method = next(s for s in r.symbols if s.kind == "method")
|
|
192
|
+
assert method.range is not None
|
|
193
|
+
# The <%! ... %> is on line 7 of the dedented source — the symbol
|
|
194
|
+
# must reflect that, not line 1.
|
|
195
|
+
assert method.range.start_line >= 6
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# ---------------------------------------------------------------------------
|
|
199
|
+
# Diagnostics
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_invalid_utf8_yields_error_diagnostic() -> None:
|
|
204
|
+
ix = JspIndexer()
|
|
205
|
+
r = ix.index_file(
|
|
206
|
+
Path("bad.jsp"),
|
|
207
|
+
b"\xff\xfe garbage",
|
|
208
|
+
IndexContext(
|
|
209
|
+
project_root=Path("/tmp/proj"),
|
|
210
|
+
relative_path=PurePosixPath("bad.jsp"),
|
|
211
|
+
language="jsp",
|
|
212
|
+
),
|
|
213
|
+
)
|
|
214
|
+
assert r.symbols == []
|
|
215
|
+
assert r.diagnostics[0].code == "JSP002"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_scheme_constant() -> None:
|
|
219
|
+
assert SCHEME == "scip-jsp"
|