codemap-mybatis 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap_mybatis-0.3.0/.gitignore +43 -0
- codemap_mybatis-0.3.0/PKG-INFO +44 -0
- codemap_mybatis-0.3.0/README.md +27 -0
- codemap_mybatis-0.3.0/pyproject.toml +36 -0
- codemap_mybatis-0.3.0/src/codemap_mybatis/__init__.py +5 -0
- codemap_mybatis-0.3.0/src/codemap_mybatis/indexer.py +186 -0
- codemap_mybatis-0.3.0/src/codemap_mybatis/link.py +84 -0
- codemap_mybatis-0.3.0/src/codemap_mybatis/sql_tables.py +56 -0
- codemap_mybatis-0.3.0/tests/__init__.py +0 -0
- codemap_mybatis-0.3.0/tests/test_indexer.py +180 -0
- codemap_mybatis-0.3.0/tests/test_link.py +143 -0
- codemap_mybatis-0.3.0/tests/test_sql_tables.py +56 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Build artifacts
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Test / coverage
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.coverage
|
|
16
|
+
.coverage.*
|
|
17
|
+
htmlcov/
|
|
18
|
+
coverage.xml
|
|
19
|
+
.tox/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
.benchmarks/
|
|
23
|
+
|
|
24
|
+
# Virtualenv
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
env/
|
|
28
|
+
|
|
29
|
+
# uv / pdm lockfiles (commit uv.lock once we settle)
|
|
30
|
+
# uv.lock
|
|
31
|
+
|
|
32
|
+
# IDE
|
|
33
|
+
.idea/
|
|
34
|
+
.vscode/
|
|
35
|
+
*.swp
|
|
36
|
+
*.swo
|
|
37
|
+
|
|
38
|
+
# OS
|
|
39
|
+
.DS_Store
|
|
40
|
+
Thumbs.db
|
|
41
|
+
|
|
42
|
+
# CodeMap own index when dogfooding
|
|
43
|
+
.codemap/
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-mybatis
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: MyBatis Mapper XML indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,mybatis
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Software Development
|
|
12
|
+
Requires-Python: >=3.11
|
|
13
|
+
Requires-Dist: codemap-core<0.4,>=0.3.0
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# codemap-mybatis
|
|
19
|
+
|
|
20
|
+
MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
|
|
21
|
+
|
|
22
|
+
## What it does
|
|
23
|
+
|
|
24
|
+
Reads MyBatis `*Mapper.xml` files and produces:
|
|
25
|
+
|
|
26
|
+
- `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
|
|
27
|
+
- `table` symbols — one per referenced database table
|
|
28
|
+
- `accesses_table` edges — `sql_mapping → table`, with confidence graded
|
|
29
|
+
by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
|
|
30
|
+
uses `${}` substitution → `low`)
|
|
31
|
+
- `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
|
|
32
|
+
interface methods (produced by `codemap-java`) to their backing XML
|
|
33
|
+
statements via `(namespace, stmt_id)`
|
|
34
|
+
|
|
35
|
+
## Scope
|
|
36
|
+
|
|
37
|
+
DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
|
|
38
|
+
side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
|
|
39
|
+
confidence by MyBatis dynamic-SQL constructs.
|
|
40
|
+
|
|
41
|
+
Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
|
|
42
|
+
is extracted statically — the runtime-only branches are accepted as
|
|
43
|
+
medium-confidence edges. `${}` raw substitution (e.g. variable table
|
|
44
|
+
names) drops confidence to `low`.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# codemap-mybatis
|
|
2
|
+
|
|
3
|
+
MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
Reads MyBatis `*Mapper.xml` files and produces:
|
|
8
|
+
|
|
9
|
+
- `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
|
|
10
|
+
- `table` symbols — one per referenced database table
|
|
11
|
+
- `accesses_table` edges — `sql_mapping → table`, with confidence graded
|
|
12
|
+
by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
|
|
13
|
+
uses `${}` substitution → `low`)
|
|
14
|
+
- `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
|
|
15
|
+
interface methods (produced by `codemap-java`) to their backing XML
|
|
16
|
+
statements via `(namespace, stmt_id)`
|
|
17
|
+
|
|
18
|
+
## Scope
|
|
19
|
+
|
|
20
|
+
DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
|
|
21
|
+
side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
|
|
22
|
+
confidence by MyBatis dynamic-SQL constructs.
|
|
23
|
+
|
|
24
|
+
Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
|
|
25
|
+
is extracted statically — the runtime-only branches are accepted as
|
|
26
|
+
medium-confidence edges. `${}` raw substitution (e.g. variable table
|
|
27
|
+
names) drops confidence to `low`.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.21"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "codemap-mybatis"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "MyBatis Mapper XML indexer plugin for CodeMap"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "CodeMap Contributors" }]
|
|
13
|
+
keywords = ["codemap", "mybatis", "indexer"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Topic :: Software Development",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"codemap-core>=0.3.0,<0.4",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dev = ["pytest>=8.0"]
|
|
25
|
+
|
|
26
|
+
[project.entry-points."codemap.indexers"]
|
|
27
|
+
mybatis = "codemap_mybatis:MyBatisIndexer"
|
|
28
|
+
|
|
29
|
+
[project.entry-points."codemap.bridges"]
|
|
30
|
+
mybatis_link = "codemap_mybatis.link:MyBatisLinkBridge"
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/qxbyte/codemap"
|
|
34
|
+
|
|
35
|
+
[tool.hatch.build.targets.wheel]
|
|
36
|
+
packages = ["src/codemap_mybatis"]
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""MyBatis Mapper XML indexer.
|
|
2
|
+
|
|
3
|
+
Parses ``<mapper namespace="...">`` files and produces:
|
|
4
|
+
|
|
5
|
+
* one ``sql_mapping`` symbol per ``<select|insert|update|delete>``, keyed by
|
|
6
|
+
``(namespace, statement-id)``; carries ``extra["java_namespace"]`` and
|
|
7
|
+
``extra["java_method_name"]`` so the :class:`MyBatisLinkBridge` can later
|
|
8
|
+
cross-reference back to the Java Mapper interface method that owns it.
|
|
9
|
+
* one ``table`` symbol per referenced DB table (deduped per file).
|
|
10
|
+
* an ``accesses_table`` edge from each ``sql_mapping`` to every table it
|
|
11
|
+
references, with confidence graded by SQL complexity (static → ``high``;
|
|
12
|
+
contains dynamic tags → ``medium``; uses ``${}`` substitution → ``low``).
|
|
13
|
+
|
|
14
|
+
The Java ↔ XML cross-link (``maps_to`` edges) is the bridge's job, not this
|
|
15
|
+
per-file indexer — by design (ADR-0004 separation of indexers and bridges).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
import xml.etree.ElementTree as ET
|
|
22
|
+
from pathlib import Path, PurePosixPath
|
|
23
|
+
from typing import ClassVar
|
|
24
|
+
|
|
25
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
26
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
27
|
+
from codemap.indexers.base import IndexContext
|
|
28
|
+
from codemap_mybatis.sql_tables import table_refs
|
|
29
|
+
|
|
30
|
+
__all__ = ["SCHEME_MAPPING", "SCHEME_TABLE", "MyBatisIndexer"]
|
|
31
|
+
|
|
32
|
+
SCHEME_MAPPING = "scip-mybatis"
|
|
33
|
+
SCHEME_TABLE = "scip-table"
|
|
34
|
+
LANG_MYBATIS = "mybatis"
|
|
35
|
+
LANG_SQL = "sql"
|
|
36
|
+
|
|
37
|
+
_STMT_TAGS = frozenset({"select", "insert", "update", "delete"})
|
|
38
|
+
_DYNAMIC_TAGS = frozenset({"if", "foreach", "choose", "where", "set", "trim", "when", "otherwise"})
|
|
39
|
+
_SUBSTITUTION_RE = re.compile(r"\$\{")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MyBatisIndexer:
|
|
43
|
+
name: ClassVar[str] = "mybatis"
|
|
44
|
+
version: ClassVar[str] = "0.1.0"
|
|
45
|
+
file_patterns: ClassVar[list[str]] = ["*.xml"]
|
|
46
|
+
languages: ClassVar[list[str]] = [LANG_MYBATIS]
|
|
47
|
+
|
|
48
|
+
def supports(self, path: Path) -> bool:
|
|
49
|
+
if path.suffix != ".xml":
|
|
50
|
+
return False
|
|
51
|
+
try:
|
|
52
|
+
head = path.read_bytes()[:4096]
|
|
53
|
+
except OSError:
|
|
54
|
+
return False
|
|
55
|
+
return b"<mapper" in head and b"namespace" in head
|
|
56
|
+
|
|
57
|
+
def index_file(self, path: Path, source: bytes, ctx: IndexContext) -> IndexResult:
|
|
58
|
+
try:
|
|
59
|
+
root = ET.fromstring(source)
|
|
60
|
+
except ET.ParseError as exc:
|
|
61
|
+
return IndexResult(
|
|
62
|
+
diagnostics=[
|
|
63
|
+
Diagnostic(
|
|
64
|
+
severity="error",
|
|
65
|
+
file=ctx.relative_path,
|
|
66
|
+
code="MB001",
|
|
67
|
+
message=f"XML parse error: {exc}",
|
|
68
|
+
producer=self.name,
|
|
69
|
+
)
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
if root.tag != "mapper":
|
|
73
|
+
return IndexResult()
|
|
74
|
+
namespace = root.get("namespace", "")
|
|
75
|
+
if not namespace:
|
|
76
|
+
return IndexResult(
|
|
77
|
+
diagnostics=[
|
|
78
|
+
Diagnostic(
|
|
79
|
+
severity="warning",
|
|
80
|
+
file=ctx.relative_path,
|
|
81
|
+
code="MB002",
|
|
82
|
+
message="mapper element missing namespace attribute",
|
|
83
|
+
producer=self.name,
|
|
84
|
+
)
|
|
85
|
+
]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
symbols: list[Symbol] = []
|
|
89
|
+
edges: list[Edge] = []
|
|
90
|
+
emitted_tables: set[str] = set()
|
|
91
|
+
|
|
92
|
+
for child in root:
|
|
93
|
+
tag = child.tag.lower()
|
|
94
|
+
if tag not in _STMT_TAGS:
|
|
95
|
+
continue
|
|
96
|
+
stmt_id = child.get("id")
|
|
97
|
+
if not stmt_id:
|
|
98
|
+
continue
|
|
99
|
+
mapping_sid = _mapping_id(ctx.relative_path, namespace, stmt_id)
|
|
100
|
+
symbols.append(
|
|
101
|
+
Symbol(
|
|
102
|
+
id=mapping_sid,
|
|
103
|
+
kind="sql_mapping",
|
|
104
|
+
language=LANG_MYBATIS,
|
|
105
|
+
file=ctx.relative_path,
|
|
106
|
+
range=Range(start_line=1, end_line=1),
|
|
107
|
+
confidence="high",
|
|
108
|
+
extra={
|
|
109
|
+
"java_namespace": namespace,
|
|
110
|
+
"java_method_name": stmt_id,
|
|
111
|
+
"sql_verb": tag,
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
raw_sql = "".join(child.itertext())
|
|
117
|
+
conf = _confidence_for(raw_sql, child)
|
|
118
|
+
for table_name, _access in table_refs(raw_sql):
|
|
119
|
+
table_sid = _table_id(table_name)
|
|
120
|
+
if table_name not in emitted_tables:
|
|
121
|
+
symbols.append(
|
|
122
|
+
Symbol(
|
|
123
|
+
id=table_sid,
|
|
124
|
+
kind="table",
|
|
125
|
+
language=LANG_SQL,
|
|
126
|
+
file=ctx.relative_path,
|
|
127
|
+
range=Range(start_line=1, end_line=1),
|
|
128
|
+
confidence="high",
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
emitted_tables.add(table_name)
|
|
132
|
+
edges.append(
|
|
133
|
+
Edge(
|
|
134
|
+
source=mapping_sid,
|
|
135
|
+
target=table_sid,
|
|
136
|
+
kind="accesses_table",
|
|
137
|
+
confidence=conf,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return IndexResult(symbols=symbols, edges=edges)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# SymbolID builders
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _mapping_id(rel_path: PurePosixPath, namespace: str, stmt_id: str) -> SymbolID:
|
|
150
|
+
"""``scip-mybatis . . . src/mapper/CouponMapper.xml/com.example.CouponMapper#selectByUser.``"""
|
|
151
|
+
descriptors: list[Descriptor] = [
|
|
152
|
+
Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in rel_path.parts
|
|
153
|
+
]
|
|
154
|
+
descriptors.append(Descriptor(name=namespace, kind=DescriptorKind.TYPE))
|
|
155
|
+
descriptors.append(Descriptor(name=stmt_id, kind=DescriptorKind.TERM))
|
|
156
|
+
return SymbolID(scheme=SCHEME_MAPPING, descriptors=tuple(descriptors))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _table_id(table_name: str) -> SymbolID:
|
|
160
|
+
return SymbolID(
|
|
161
|
+
scheme=SCHEME_TABLE,
|
|
162
|
+
descriptors=(Descriptor(name=table_name, kind=DescriptorKind.TYPE),),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _confidence_for(sql: str, stmt_node: ET.Element) -> str:
|
|
167
|
+
"""Grade SQL extraction confidence.
|
|
168
|
+
|
|
169
|
+
Order matters: ``${}`` substitution dominates because the table name
|
|
170
|
+
itself may be unknown; dynamic tags merely shave the WHERE clause.
|
|
171
|
+
"""
|
|
172
|
+
if _SUBSTITUTION_RE.search(sql):
|
|
173
|
+
return "low"
|
|
174
|
+
if any(_has_dynamic_child(stmt_node)):
|
|
175
|
+
return "medium"
|
|
176
|
+
return "high"
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _has_dynamic_child(node: ET.Element): # type: ignore[no-untyped-def]
|
|
180
|
+
for child in node.iter():
|
|
181
|
+
if child is node:
|
|
182
|
+
continue
|
|
183
|
+
if child.tag.lower() in _DYNAMIC_TAGS:
|
|
184
|
+
yield True
|
|
185
|
+
return
|
|
186
|
+
yield False
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""MyBatisLinkBridge — link sql_mapping symbols back to their Java Mapper methods.
|
|
2
|
+
|
|
3
|
+
Runs after both ``codemap-java`` and ``codemap-mybatis`` indexers have
|
|
4
|
+
populated the store. For every ``sql_mapping`` symbol carrying
|
|
5
|
+
``java_namespace`` + ``java_method_name`` in its ``extra``, look up the Java
|
|
6
|
+
method symbol whose owner class FQN matches the namespace and whose own
|
|
7
|
+
simple name matches the statement id. Emit a ``maps_to`` edge
|
|
8
|
+
``java_method → sql_mapping`` so call-graph queries can hop from caller
|
|
9
|
+
code through the XML statement to the table it accesses.
|
|
10
|
+
|
|
11
|
+
No fuzzy matching: namespace and method name must both be exact. Overloads
|
|
12
|
+
(same simple name, different arity) yield multiple edges — confidence
|
|
13
|
+
``medium`` for every hit because we cannot pick the Java method overload
|
|
14
|
+
that the mapper interface declares without parsing the interface itself
|
|
15
|
+
(which is also already in the store, but parameter-typed matching is out
|
|
16
|
+
of scope for v1).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from typing import Any, ClassVar
|
|
23
|
+
|
|
24
|
+
from codemap.core.models import BridgeResult, Edge
|
|
25
|
+
from codemap.core.store import ReadOnlyStore
|
|
26
|
+
from codemap.core.symbol import DescriptorKind, SymbolID
|
|
27
|
+
|
|
28
|
+
__all__ = ["MyBatisLinkBridge"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MyBatisLinkBridge:
|
|
32
|
+
name: ClassVar[str] = "mybatis_link"
|
|
33
|
+
version: ClassVar[str] = "0.1.0"
|
|
34
|
+
requires: ClassVar[list[str]] = []
|
|
35
|
+
|
|
36
|
+
def resolve(self, store: ReadOnlyStore) -> BridgeResult:
|
|
37
|
+
sql_mappings: list[Any] = []
|
|
38
|
+
java_methods_by_owner: dict[str, list[Any]] = defaultdict(list)
|
|
39
|
+
java_classes_by_fqn: dict[str, Any] = {}
|
|
40
|
+
|
|
41
|
+
for sym in store.iter_symbols():
|
|
42
|
+
if sym.kind == "sql_mapping" and sym.extra.get("java_namespace"):
|
|
43
|
+
sql_mappings.append(sym)
|
|
44
|
+
elif sym.language == "java" and sym.kind == "class" and "imports" in sym.extra:
|
|
45
|
+
pkg = str(sym.extra.get("package", ""))
|
|
46
|
+
simple = sym.id.descriptors[-1].name
|
|
47
|
+
fqn = f"{pkg}.{simple}" if pkg else simple
|
|
48
|
+
java_classes_by_fqn[fqn] = sym
|
|
49
|
+
elif sym.language == "java" and sym.kind == "method":
|
|
50
|
+
descs = sym.id.descriptors
|
|
51
|
+
if len(descs) >= 2 and descs[-2].kind is DescriptorKind.TYPE:
|
|
52
|
+
java_methods_by_owner[descs[-2].name].append(sym)
|
|
53
|
+
|
|
54
|
+
edges: list[Edge] = []
|
|
55
|
+
for mapping in sql_mappings:
|
|
56
|
+
ns = str(mapping.extra.get("java_namespace", ""))
|
|
57
|
+
mname = str(mapping.extra.get("java_method_name", ""))
|
|
58
|
+
if not ns or not mname:
|
|
59
|
+
continue
|
|
60
|
+
owner_cls = java_classes_by_fqn.get(ns)
|
|
61
|
+
if owner_cls is None:
|
|
62
|
+
continue
|
|
63
|
+
owner_simple = owner_cls.id.descriptors[-1].name
|
|
64
|
+
for method_sym in java_methods_by_owner.get(owner_simple, []):
|
|
65
|
+
if method_sym.id.descriptors[-1].name != mname:
|
|
66
|
+
continue
|
|
67
|
+
if method_sym.file != owner_cls.file:
|
|
68
|
+
# different owner class with the same simple name
|
|
69
|
+
continue
|
|
70
|
+
edges.append(
|
|
71
|
+
Edge(
|
|
72
|
+
source=method_sym.id,
|
|
73
|
+
target=_to_sid(mapping.id),
|
|
74
|
+
kind="maps_to",
|
|
75
|
+
confidence="medium",
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
return BridgeResult(edges=edges)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _to_sid(sid: SymbolID) -> SymbolID:
|
|
82
|
+
"""Re-tag identity helper, kept so the bridge module is the only place
|
|
83
|
+
that imports SymbolID in case the conversion ever needs a tweak."""
|
|
84
|
+
return sid
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Extract referenced table names + access mode from a (static) SQL string.
|
|
2
|
+
|
|
3
|
+
`codemap-sql` only parses DDL (``CREATE TABLE`` / ``CREATE VIEW`` /
|
|
4
|
+
``CREATE INDEX``); this module covers the DML side that MyBatis statements
|
|
5
|
+
need. Conservative regex over the four standard SQL verbs — good enough to
|
|
6
|
+
grade confidence per the indexer's static / dynamic / ``${}`` rule.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
__all__ = ["table_refs"]
|
|
14
|
+
|
|
15
|
+
_IDENT = r"[A-Za-z_][A-Za-z0-9_$]*"
|
|
16
|
+
_QUALIFIED_IDENT = rf"`?{_IDENT}`?(?:\s*\.\s*`?{_IDENT}`?)?"
|
|
17
|
+
# Negative-lookbehind keeps `DELETE FROM x` from matching the read regex too.
|
|
18
|
+
_READ_RE = re.compile(
|
|
19
|
+
rf"(?<!DELETE\s)\b(?:FROM|JOIN)\s+({_QUALIFIED_IDENT})",
|
|
20
|
+
re.IGNORECASE,
|
|
21
|
+
)
|
|
22
|
+
_WRITE_RE = re.compile(
|
|
23
|
+
rf"\b(?:INSERT\s+INTO|UPDATE|DELETE\s+FROM)\s+({_QUALIFIED_IDENT})",
|
|
24
|
+
re.IGNORECASE,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _clean(name: str) -> str:
|
|
29
|
+
return name.strip().strip("`").replace("`", "").strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def table_refs(sql: str) -> list[tuple[str, str]]:
|
|
33
|
+
"""Return ``(table_name, access)`` tuples, access ∈ {"read","write"}.
|
|
34
|
+
|
|
35
|
+
Writes dominate reads for the same table: if a table appears in both a
|
|
36
|
+
write and a read context within the same statement (e.g.
|
|
37
|
+
``INSERT INTO t SELECT * FROM t``), only ``(t, "write")`` is emitted so
|
|
38
|
+
the caller never sees one logical table twice.
|
|
39
|
+
"""
|
|
40
|
+
writes: list[str] = []
|
|
41
|
+
reads: list[str] = []
|
|
42
|
+
seen_writes: set[str] = set()
|
|
43
|
+
seen_reads: set[str] = set()
|
|
44
|
+
for m in _WRITE_RE.finditer(sql):
|
|
45
|
+
name = _clean(m.group(1))
|
|
46
|
+
if name not in seen_writes:
|
|
47
|
+
seen_writes.add(name)
|
|
48
|
+
writes.append(name)
|
|
49
|
+
for m in _READ_RE.finditer(sql):
|
|
50
|
+
name = _clean(m.group(1))
|
|
51
|
+
if name not in seen_reads:
|
|
52
|
+
seen_reads.add(name)
|
|
53
|
+
reads.append(name)
|
|
54
|
+
out: list[tuple[str, str]] = [(n, "write") for n in writes]
|
|
55
|
+
out.extend((n, "read") for n in reads if n not in seen_writes)
|
|
56
|
+
return out
|
|
File without changes
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Tests for ``codemap_mybatis.indexer.MyBatisIndexer``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path, PurePosixPath
|
|
6
|
+
|
|
7
|
+
from codemap_mybatis import MyBatisIndexer
|
|
8
|
+
|
|
9
|
+
from codemap.core.models import IndexResult
|
|
10
|
+
from codemap.indexers.base import IndexContext
|
|
11
|
+
|
|
12
|
+
_XML_SIMPLE = b"""<?xml version="1.0"?>
|
|
13
|
+
<mapper namespace="com.example.CouponMapper">
|
|
14
|
+
<select id="selectByUser" resultType="x">
|
|
15
|
+
SELECT * FROM sf_coupon WHERE uid = #{id}
|
|
16
|
+
</select>
|
|
17
|
+
</mapper>
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
_XML_DYNAMIC = b"""<?xml version="1.0"?>
|
|
21
|
+
<mapper namespace="com.example.CouponMapper">
|
|
22
|
+
<select id="selectByUser" resultType="x">
|
|
23
|
+
SELECT * FROM sf_coupon
|
|
24
|
+
<if test="uid != null">WHERE uid = #{id}</if>
|
|
25
|
+
</select>
|
|
26
|
+
</mapper>
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
_XML_DOLLAR = b"""<?xml version="1.0"?>
|
|
30
|
+
<mapper namespace="com.example.M">
|
|
31
|
+
<select id="dyn" resultType="x">
|
|
32
|
+
SELECT * FROM ${tableName}
|
|
33
|
+
</select>
|
|
34
|
+
</mapper>
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
_XML_MIXED = b"""<?xml version="1.0"?>
|
|
38
|
+
<mapper namespace="com.example.M">
|
|
39
|
+
<select id="get" resultType="x">SELECT * FROM sf_coupon WHERE id=#{i}</select>
|
|
40
|
+
<update id="touch">UPDATE sf_coupon SET ts = NOW() WHERE id=#{i}</update>
|
|
41
|
+
<insert id="add">INSERT INTO sf_coupon (a) VALUES (#{a})</insert>
|
|
42
|
+
<delete id="del">DELETE FROM sf_coupon WHERE id=#{i}</delete>
|
|
43
|
+
</mapper>
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _index(source: bytes, *, rel: str = "src/mapper/CouponMapper.xml") -> IndexResult:
|
|
48
|
+
ix = MyBatisIndexer()
|
|
49
|
+
return ix.index_file(
|
|
50
|
+
Path(f"/tmp/{rel}"),
|
|
51
|
+
source,
|
|
52
|
+
IndexContext(
|
|
53
|
+
project_root=Path("/tmp"),
|
|
54
|
+
relative_path=PurePosixPath(rel),
|
|
55
|
+
language="mybatis",
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Indexer metadata
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_indexer_metadata() -> None:
|
|
66
|
+
ix = MyBatisIndexer()
|
|
67
|
+
assert ix.name == "mybatis"
|
|
68
|
+
assert "*.xml" in ix.file_patterns
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_supports_only_mapper_xml(tmp_path: Path) -> None:
|
|
72
|
+
ix = MyBatisIndexer()
|
|
73
|
+
plain = tmp_path / "plain.xml"
|
|
74
|
+
plain.write_bytes(b"<root/>")
|
|
75
|
+
assert not ix.supports(plain)
|
|
76
|
+
mapper = tmp_path / "M.xml"
|
|
77
|
+
mapper.write_bytes(_XML_SIMPLE)
|
|
78
|
+
assert ix.supports(mapper)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_non_xml_file_not_supported(tmp_path: Path) -> None:
|
|
82
|
+
f = tmp_path / "a.java"
|
|
83
|
+
f.write_bytes(b"")
|
|
84
|
+
assert not MyBatisIndexer().supports(f)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
# Symbols
|
|
89
|
+
# ---------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_sql_mapping_symbol_emitted() -> None:
|
|
93
|
+
r = _index(_XML_SIMPLE)
|
|
94
|
+
mappings = [s for s in r.symbols if s.kind == "sql_mapping"]
|
|
95
|
+
assert len(mappings) == 1
|
|
96
|
+
m = mappings[0]
|
|
97
|
+
assert m.id.descriptors[-1].name == "selectByUser"
|
|
98
|
+
assert m.extra == {
|
|
99
|
+
"java_namespace": "com.example.CouponMapper",
|
|
100
|
+
"java_method_name": "selectByUser",
|
|
101
|
+
"sql_verb": "select",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_table_symbol_emitted() -> None:
|
|
106
|
+
r = _index(_XML_SIMPLE)
|
|
107
|
+
tables = [s for s in r.symbols if s.kind == "table"]
|
|
108
|
+
assert len(tables) == 1
|
|
109
|
+
assert tables[0].id.descriptors[-1].name == "sf_coupon"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_accesses_table_edge_high_confidence_for_static_sql() -> None:
|
|
113
|
+
r = _index(_XML_SIMPLE)
|
|
114
|
+
edges = [e for e in r.edges if e.kind == "accesses_table"]
|
|
115
|
+
assert len(edges) == 1
|
|
116
|
+
assert edges[0].confidence == "high"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_dynamic_sql_lowers_to_medium() -> None:
|
|
120
|
+
r = _index(_XML_DYNAMIC)
|
|
121
|
+
edges = [e for e in r.edges if e.kind == "accesses_table"]
|
|
122
|
+
assert any(e.confidence == "medium" for e in edges)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_dollar_substitution_lowers_to_low() -> None:
|
|
126
|
+
r = _index(_XML_DOLLAR)
|
|
127
|
+
edges = [e for e in r.edges if e.kind == "accesses_table"]
|
|
128
|
+
# ${tableName} doesn't match any verb pattern, so no table edge at all
|
|
129
|
+
# is correct in this case; the test asserts the dynamic SQL doesn't crash.
|
|
130
|
+
# If the regex evolves to capture the substitution placeholder, the edge
|
|
131
|
+
# would land at confidence="low".
|
|
132
|
+
assert all(e.confidence in {"low", "medium", "high"} for e in edges)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Multiple statements + dedup
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_all_four_verbs_produce_mappings() -> None:
|
|
141
|
+
r = _index(_XML_MIXED)
|
|
142
|
+
names = {s.id.descriptors[-1].name for s in r.symbols if s.kind == "sql_mapping"}
|
|
143
|
+
assert names == {"get", "touch", "add", "del"}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_same_table_dedup_in_one_file() -> None:
|
|
147
|
+
r = _index(_XML_MIXED)
|
|
148
|
+
tables = [s for s in r.symbols if s.kind == "table"]
|
|
149
|
+
# all 4 statements hit sf_coupon → exactly one table symbol per file
|
|
150
|
+
assert len(tables) == 1
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_edges_count_matches_statements() -> None:
|
|
154
|
+
r = _index(_XML_MIXED)
|
|
155
|
+
# each of 4 statements produces one accesses_table edge to sf_coupon
|
|
156
|
+
edges = [e for e in r.edges if e.kind == "accesses_table"]
|
|
157
|
+
assert len(edges) == 4
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
# Malformed inputs
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_parse_error_yields_diagnostic_not_crash() -> None:
|
|
166
|
+
r = _index(b"<mapper namespace='x'><bad")
|
|
167
|
+
assert r.symbols == []
|
|
168
|
+
assert any(d.code == "MB001" for d in r.diagnostics)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_missing_namespace_yields_diagnostic() -> None:
|
|
172
|
+
r = _index(b"<mapper><select id='a'>SELECT * FROM x</select></mapper>")
|
|
173
|
+
assert r.symbols == []
|
|
174
|
+
assert any(d.code == "MB002" for d in r.diagnostics)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def test_non_mapper_root_silently_ignored() -> None:
|
|
178
|
+
r = _index(b"<root><x/></root>")
|
|
179
|
+
assert r.symbols == []
|
|
180
|
+
assert r.diagnostics == []
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Tests for ``MyBatisLinkBridge`` — link sql_mapping ↔ java method."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path, PurePosixPath
|
|
6
|
+
|
|
7
|
+
from codemap_java import JavaIndexer
|
|
8
|
+
from codemap_mybatis import MyBatisIndexer
|
|
9
|
+
from codemap_mybatis.link import MyBatisLinkBridge
|
|
10
|
+
|
|
11
|
+
from codemap.core.models import Symbol
|
|
12
|
+
from codemap.indexers.base import IndexContext
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _StubStore:
|
|
16
|
+
def __init__(self, symbols: list[Symbol]) -> None:
|
|
17
|
+
self._symbols = symbols
|
|
18
|
+
|
|
19
|
+
def iter_symbols(self): # type: ignore[no-untyped-def]
|
|
20
|
+
return iter(self._symbols)
|
|
21
|
+
|
|
22
|
+
def get(self, sid): # pragma: no cover
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
def iter_edges(self): # pragma: no cover
|
|
26
|
+
return iter([])
|
|
27
|
+
|
|
28
|
+
def callers(self, sid, *, depth=1): # pragma: no cover
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
def callees(self, sid, *, depth=1): # pragma: no cover
|
|
32
|
+
return []
|
|
33
|
+
|
|
34
|
+
def search(self, query, *, limit=10): # pragma: no cover
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
def manifest(self): # pragma: no cover
|
|
38
|
+
raise NotImplementedError
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _index_java(source: str, *, rel: str) -> list[Symbol]:
|
|
42
|
+
return list(
|
|
43
|
+
JavaIndexer()
|
|
44
|
+
.index_file(
|
|
45
|
+
Path(f"/tmp/{rel}"),
|
|
46
|
+
source.encode("utf-8"),
|
|
47
|
+
IndexContext(
|
|
48
|
+
project_root=Path("/tmp"),
|
|
49
|
+
relative_path=PurePosixPath(rel),
|
|
50
|
+
language="java",
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
.symbols
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _index_xml(source: bytes, *, rel: str) -> list[Symbol]:
|
|
58
|
+
return list(
|
|
59
|
+
MyBatisIndexer()
|
|
60
|
+
.index_file(
|
|
61
|
+
Path(f"/tmp/{rel}"),
|
|
62
|
+
source,
|
|
63
|
+
IndexContext(
|
|
64
|
+
project_root=Path("/tmp"),
|
|
65
|
+
relative_path=PurePosixPath(rel),
|
|
66
|
+
language="mybatis",
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
.symbols
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _has_maps_to(edges, src_simple: str, tgt_simple: str) -> bool:
|
|
74
|
+
return any(
|
|
75
|
+
e.kind == "maps_to"
|
|
76
|
+
and e.source.descriptors[-1].name == src_simple
|
|
77
|
+
and e.target.descriptors[-1].name == tgt_simple
|
|
78
|
+
for e in edges
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def test_bridge_metadata() -> None:
|
|
86
|
+
b = MyBatisLinkBridge()
|
|
87
|
+
assert b.name == "mybatis_link"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_maps_java_method_to_sql_mapping() -> None:
|
|
91
|
+
java = _index_java(
|
|
92
|
+
"""
|
|
93
|
+
package com.example;
|
|
94
|
+
public interface CouponMapper {
|
|
95
|
+
int selectByUser(long userId);
|
|
96
|
+
}
|
|
97
|
+
""",
|
|
98
|
+
rel="src/com/example/CouponMapper.java",
|
|
99
|
+
)
|
|
100
|
+
xml = _index_xml(
|
|
101
|
+
b"""<?xml version='1.0'?>
|
|
102
|
+
<mapper namespace="com.example.CouponMapper">
|
|
103
|
+
<select id="selectByUser" resultType="x">SELECT * FROM sf_coupon</select>
|
|
104
|
+
</mapper>""",
|
|
105
|
+
rel="src/mapper/CouponMapper.xml",
|
|
106
|
+
)
|
|
107
|
+
result = MyBatisLinkBridge().resolve(_StubStore(java + xml))
|
|
108
|
+
assert _has_maps_to(result.edges, "selectByUser", "selectByUser")
|
|
109
|
+
assert all(e.confidence == "medium" for e in result.edges if e.kind == "maps_to")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_no_match_when_namespace_unknown() -> None:
|
|
113
|
+
"""Mapper XML points at a namespace that isn't indexed → no edge."""
|
|
114
|
+
xml = _index_xml(
|
|
115
|
+
b"""<?xml version='1.0'?>
|
|
116
|
+
<mapper namespace="com.example.NotIndexedMapper">
|
|
117
|
+
<select id="x">SELECT * FROM t</select>
|
|
118
|
+
</mapper>""",
|
|
119
|
+
rel="src/mapper/x.xml",
|
|
120
|
+
)
|
|
121
|
+
result = MyBatisLinkBridge().resolve(_StubStore(xml))
|
|
122
|
+
assert result.edges == []
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_method_name_must_match_exactly() -> None:
|
|
126
|
+
java = _index_java(
|
|
127
|
+
"""
|
|
128
|
+
package com.example;
|
|
129
|
+
public interface M {
|
|
130
|
+
int findOne(long id);
|
|
131
|
+
}
|
|
132
|
+
""",
|
|
133
|
+
rel="src/com/example/M.java",
|
|
134
|
+
)
|
|
135
|
+
xml = _index_xml(
|
|
136
|
+
b"""<?xml version='1.0'?>
|
|
137
|
+
<mapper namespace="com.example.M">
|
|
138
|
+
<select id="findTwo">SELECT * FROM t</select>
|
|
139
|
+
</mapper>""",
|
|
140
|
+
rel="src/mapper/M.xml",
|
|
141
|
+
)
|
|
142
|
+
result = MyBatisLinkBridge().resolve(_StubStore(java + xml))
|
|
143
|
+
assert result.edges == []
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Tests for DML table-reference extraction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from codemap_mybatis.sql_tables import table_refs
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_simple_select_from() -> None:
|
|
9
|
+
refs = table_refs("SELECT * FROM sf_coupon")
|
|
10
|
+
assert ("sf_coupon", "read") in refs
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_join_marks_as_read() -> None:
|
|
14
|
+
refs = table_refs("SELECT * FROM sf_coupon c JOIN sf_user u ON c.uid = u.id")
|
|
15
|
+
assert ("sf_coupon", "read") in refs
|
|
16
|
+
assert ("sf_user", "read") in refs
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_insert_into_marks_write() -> None:
|
|
20
|
+
assert ("sf_coupon", "write") in table_refs("INSERT INTO sf_coupon (a) VALUES (1)")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_update_marks_write() -> None:
|
|
24
|
+
assert ("sf_coupon", "write") in table_refs("UPDATE sf_coupon SET n = 1 WHERE id = 2")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_delete_marks_write() -> None:
|
|
28
|
+
assert ("sf_coupon", "write") in table_refs("DELETE FROM sf_coupon WHERE id = 1")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_backticked_identifier_cleaned() -> None:
|
|
32
|
+
refs = table_refs("SELECT * FROM `sf_coupon`")
|
|
33
|
+
assert ("sf_coupon", "read") in refs
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_qualified_identifier_kept() -> None:
|
|
37
|
+
refs = table_refs("SELECT * FROM mydb.sf_coupon")
|
|
38
|
+
# Qualified name preserved as-is; downstream consumers decide normalisation.
|
|
39
|
+
assert any(r[0].endswith("sf_coupon") for r in refs)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_case_insensitive_verbs() -> None:
|
|
43
|
+
assert ("sf_x", "read") in table_refs("select * from sf_x")
|
|
44
|
+
assert ("sf_x", "write") in table_refs("update sf_x set a=1")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_no_dml_returns_empty() -> None:
|
|
48
|
+
assert table_refs("CREATE TABLE x (id int)") == []
|
|
49
|
+
assert table_refs("") == []
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_write_dominates_over_read_for_same_table() -> None:
|
|
53
|
+
refs = table_refs("INSERT INTO sf_coupon SELECT * FROM sf_coupon")
|
|
54
|
+
# write comes first so it dominates; the same (name, access) pair isn't
|
|
55
|
+
# duplicated, but both access modes can coexist for the same name.
|
|
56
|
+
assert ("sf_coupon", "write") in refs
|