codemap-mybatis 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codemap_mybatis/__init__.py +5 -0
- codemap_mybatis/indexer.py +186 -0
- codemap_mybatis/link.py +84 -0
- codemap_mybatis/sql_tables.py +56 -0
- codemap_mybatis-0.3.0.dist-info/METADATA +44 -0
- codemap_mybatis-0.3.0.dist-info/RECORD +8 -0
- codemap_mybatis-0.3.0.dist-info/WHEEL +4 -0
- codemap_mybatis-0.3.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""MyBatis Mapper XML indexer.
|
|
2
|
+
|
|
3
|
+
Parses ``<mapper namespace="...">`` files and produces:
|
|
4
|
+
|
|
5
|
+
* one ``sql_mapping`` symbol per ``<select|insert|update|delete>``, keyed by
|
|
6
|
+
``(namespace, statement-id)``; carries ``extra["java_namespace"]`` and
|
|
7
|
+
``extra["java_method_name"]`` so the :class:`MyBatisLinkBridge` can later
|
|
8
|
+
cross-reference back to the Java Mapper interface method that owns it.
|
|
9
|
+
* one ``table`` symbol per referenced DB table (deduped per file).
|
|
10
|
+
* an ``accesses_table`` edge from each ``sql_mapping`` to every table it
|
|
11
|
+
references, with confidence graded by SQL complexity (static → ``high``;
|
|
12
|
+
contains dynamic tags → ``medium``; uses ``${}`` substitution → ``low``).
|
|
13
|
+
|
|
14
|
+
The Java ↔ XML cross-link (``maps_to`` edges) is the bridge's job, not this
|
|
15
|
+
per-file indexer — by design (ADR-0004 separation of indexers and bridges).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
import xml.etree.ElementTree as ET
|
|
22
|
+
from pathlib import Path, PurePosixPath
|
|
23
|
+
from typing import ClassVar
|
|
24
|
+
|
|
25
|
+
from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
|
|
26
|
+
from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
|
|
27
|
+
from codemap.indexers.base import IndexContext
|
|
28
|
+
from codemap_mybatis.sql_tables import table_refs
|
|
29
|
+
|
|
30
|
+
__all__ = ["SCHEME_MAPPING", "SCHEME_TABLE", "MyBatisIndexer"]
|
|
31
|
+
|
|
32
|
+
SCHEME_MAPPING = "scip-mybatis"
|
|
33
|
+
SCHEME_TABLE = "scip-table"
|
|
34
|
+
LANG_MYBATIS = "mybatis"
|
|
35
|
+
LANG_SQL = "sql"
|
|
36
|
+
|
|
37
|
+
_STMT_TAGS = frozenset({"select", "insert", "update", "delete"})
|
|
38
|
+
_DYNAMIC_TAGS = frozenset({"if", "foreach", "choose", "where", "set", "trim", "when", "otherwise"})
|
|
39
|
+
_SUBSTITUTION_RE = re.compile(r"\$\{")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MyBatisIndexer:
|
|
43
|
+
name: ClassVar[str] = "mybatis"
|
|
44
|
+
version: ClassVar[str] = "0.1.0"
|
|
45
|
+
file_patterns: ClassVar[list[str]] = ["*.xml"]
|
|
46
|
+
languages: ClassVar[list[str]] = [LANG_MYBATIS]
|
|
47
|
+
|
|
48
|
+
def supports(self, path: Path) -> bool:
|
|
49
|
+
if path.suffix != ".xml":
|
|
50
|
+
return False
|
|
51
|
+
try:
|
|
52
|
+
head = path.read_bytes()[:4096]
|
|
53
|
+
except OSError:
|
|
54
|
+
return False
|
|
55
|
+
return b"<mapper" in head and b"namespace" in head
|
|
56
|
+
|
|
57
|
+
def index_file(self, path: Path, source: bytes, ctx: IndexContext) -> IndexResult:
|
|
58
|
+
try:
|
|
59
|
+
root = ET.fromstring(source)
|
|
60
|
+
except ET.ParseError as exc:
|
|
61
|
+
return IndexResult(
|
|
62
|
+
diagnostics=[
|
|
63
|
+
Diagnostic(
|
|
64
|
+
severity="error",
|
|
65
|
+
file=ctx.relative_path,
|
|
66
|
+
code="MB001",
|
|
67
|
+
message=f"XML parse error: {exc}",
|
|
68
|
+
producer=self.name,
|
|
69
|
+
)
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
if root.tag != "mapper":
|
|
73
|
+
return IndexResult()
|
|
74
|
+
namespace = root.get("namespace", "")
|
|
75
|
+
if not namespace:
|
|
76
|
+
return IndexResult(
|
|
77
|
+
diagnostics=[
|
|
78
|
+
Diagnostic(
|
|
79
|
+
severity="warning",
|
|
80
|
+
file=ctx.relative_path,
|
|
81
|
+
code="MB002",
|
|
82
|
+
message="mapper element missing namespace attribute",
|
|
83
|
+
producer=self.name,
|
|
84
|
+
)
|
|
85
|
+
]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
symbols: list[Symbol] = []
|
|
89
|
+
edges: list[Edge] = []
|
|
90
|
+
emitted_tables: set[str] = set()
|
|
91
|
+
|
|
92
|
+
for child in root:
|
|
93
|
+
tag = child.tag.lower()
|
|
94
|
+
if tag not in _STMT_TAGS:
|
|
95
|
+
continue
|
|
96
|
+
stmt_id = child.get("id")
|
|
97
|
+
if not stmt_id:
|
|
98
|
+
continue
|
|
99
|
+
mapping_sid = _mapping_id(ctx.relative_path, namespace, stmt_id)
|
|
100
|
+
symbols.append(
|
|
101
|
+
Symbol(
|
|
102
|
+
id=mapping_sid,
|
|
103
|
+
kind="sql_mapping",
|
|
104
|
+
language=LANG_MYBATIS,
|
|
105
|
+
file=ctx.relative_path,
|
|
106
|
+
range=Range(start_line=1, end_line=1),
|
|
107
|
+
confidence="high",
|
|
108
|
+
extra={
|
|
109
|
+
"java_namespace": namespace,
|
|
110
|
+
"java_method_name": stmt_id,
|
|
111
|
+
"sql_verb": tag,
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
raw_sql = "".join(child.itertext())
|
|
117
|
+
conf = _confidence_for(raw_sql, child)
|
|
118
|
+
for table_name, _access in table_refs(raw_sql):
|
|
119
|
+
table_sid = _table_id(table_name)
|
|
120
|
+
if table_name not in emitted_tables:
|
|
121
|
+
symbols.append(
|
|
122
|
+
Symbol(
|
|
123
|
+
id=table_sid,
|
|
124
|
+
kind="table",
|
|
125
|
+
language=LANG_SQL,
|
|
126
|
+
file=ctx.relative_path,
|
|
127
|
+
range=Range(start_line=1, end_line=1),
|
|
128
|
+
confidence="high",
|
|
129
|
+
)
|
|
130
|
+
)
|
|
131
|
+
emitted_tables.add(table_name)
|
|
132
|
+
edges.append(
|
|
133
|
+
Edge(
|
|
134
|
+
source=mapping_sid,
|
|
135
|
+
target=table_sid,
|
|
136
|
+
kind="accesses_table",
|
|
137
|
+
confidence=conf,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return IndexResult(symbols=symbols, edges=edges)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# SymbolID builders
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _mapping_id(rel_path: PurePosixPath, namespace: str, stmt_id: str) -> SymbolID:
|
|
150
|
+
"""``scip-mybatis . . . src/mapper/CouponMapper.xml/com.example.CouponMapper#selectByUser.``"""
|
|
151
|
+
descriptors: list[Descriptor] = [
|
|
152
|
+
Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in rel_path.parts
|
|
153
|
+
]
|
|
154
|
+
descriptors.append(Descriptor(name=namespace, kind=DescriptorKind.TYPE))
|
|
155
|
+
descriptors.append(Descriptor(name=stmt_id, kind=DescriptorKind.TERM))
|
|
156
|
+
return SymbolID(scheme=SCHEME_MAPPING, descriptors=tuple(descriptors))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _table_id(table_name: str) -> SymbolID:
|
|
160
|
+
return SymbolID(
|
|
161
|
+
scheme=SCHEME_TABLE,
|
|
162
|
+
descriptors=(Descriptor(name=table_name, kind=DescriptorKind.TYPE),),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _confidence_for(sql: str, stmt_node: ET.Element) -> str:
|
|
167
|
+
"""Grade SQL extraction confidence.
|
|
168
|
+
|
|
169
|
+
Order matters: ``${}`` substitution dominates because the table name
|
|
170
|
+
itself may be unknown; dynamic tags merely shave the WHERE clause.
|
|
171
|
+
"""
|
|
172
|
+
if _SUBSTITUTION_RE.search(sql):
|
|
173
|
+
return "low"
|
|
174
|
+
if any(_has_dynamic_child(stmt_node)):
|
|
175
|
+
return "medium"
|
|
176
|
+
return "high"
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _has_dynamic_child(node: ET.Element): # type: ignore[no-untyped-def]
|
|
180
|
+
for child in node.iter():
|
|
181
|
+
if child is node:
|
|
182
|
+
continue
|
|
183
|
+
if child.tag.lower() in _DYNAMIC_TAGS:
|
|
184
|
+
yield True
|
|
185
|
+
return
|
|
186
|
+
yield False
|
codemap_mybatis/link.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""MyBatisLinkBridge — link sql_mapping symbols back to their Java Mapper methods.
|
|
2
|
+
|
|
3
|
+
Runs after both ``codemap-java`` and ``codemap-mybatis`` indexers have
|
|
4
|
+
populated the store. For every ``sql_mapping`` symbol carrying
|
|
5
|
+
``java_namespace`` + ``java_method_name`` in its ``extra``, look up the Java
|
|
6
|
+
method symbol whose owner class FQN matches the namespace and whose own
|
|
7
|
+
simple name matches the statement id. Emit a ``maps_to`` edge
|
|
8
|
+
``java_method → sql_mapping`` so call-graph queries can hop from caller
|
|
9
|
+
code through the XML statement to the table it accesses.
|
|
10
|
+
|
|
11
|
+
No fuzzy matching: namespace and method name must both be exact. Overloads
|
|
12
|
+
(same simple name, different arity) yield multiple edges — confidence
|
|
13
|
+
``medium`` for every hit because we cannot pick the Java method overload
|
|
14
|
+
that the mapper interface declares without parsing the interface itself
|
|
15
|
+
(which is also already in the store, but parameter-typed matching is out
|
|
16
|
+
of scope for v1).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from typing import Any, ClassVar
|
|
23
|
+
|
|
24
|
+
from codemap.core.models import BridgeResult, Edge
|
|
25
|
+
from codemap.core.store import ReadOnlyStore
|
|
26
|
+
from codemap.core.symbol import DescriptorKind, SymbolID
|
|
27
|
+
|
|
28
|
+
__all__ = ["MyBatisLinkBridge"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MyBatisLinkBridge:
|
|
32
|
+
name: ClassVar[str] = "mybatis_link"
|
|
33
|
+
version: ClassVar[str] = "0.1.0"
|
|
34
|
+
requires: ClassVar[list[str]] = []
|
|
35
|
+
|
|
36
|
+
def resolve(self, store: ReadOnlyStore) -> BridgeResult:
|
|
37
|
+
sql_mappings: list[Any] = []
|
|
38
|
+
java_methods_by_owner: dict[str, list[Any]] = defaultdict(list)
|
|
39
|
+
java_classes_by_fqn: dict[str, Any] = {}
|
|
40
|
+
|
|
41
|
+
for sym in store.iter_symbols():
|
|
42
|
+
if sym.kind == "sql_mapping" and sym.extra.get("java_namespace"):
|
|
43
|
+
sql_mappings.append(sym)
|
|
44
|
+
elif sym.language == "java" and sym.kind == "class" and "imports" in sym.extra:
|
|
45
|
+
pkg = str(sym.extra.get("package", ""))
|
|
46
|
+
simple = sym.id.descriptors[-1].name
|
|
47
|
+
fqn = f"{pkg}.{simple}" if pkg else simple
|
|
48
|
+
java_classes_by_fqn[fqn] = sym
|
|
49
|
+
elif sym.language == "java" and sym.kind == "method":
|
|
50
|
+
descs = sym.id.descriptors
|
|
51
|
+
if len(descs) >= 2 and descs[-2].kind is DescriptorKind.TYPE:
|
|
52
|
+
java_methods_by_owner[descs[-2].name].append(sym)
|
|
53
|
+
|
|
54
|
+
edges: list[Edge] = []
|
|
55
|
+
for mapping in sql_mappings:
|
|
56
|
+
ns = str(mapping.extra.get("java_namespace", ""))
|
|
57
|
+
mname = str(mapping.extra.get("java_method_name", ""))
|
|
58
|
+
if not ns or not mname:
|
|
59
|
+
continue
|
|
60
|
+
owner_cls = java_classes_by_fqn.get(ns)
|
|
61
|
+
if owner_cls is None:
|
|
62
|
+
continue
|
|
63
|
+
owner_simple = owner_cls.id.descriptors[-1].name
|
|
64
|
+
for method_sym in java_methods_by_owner.get(owner_simple, []):
|
|
65
|
+
if method_sym.id.descriptors[-1].name != mname:
|
|
66
|
+
continue
|
|
67
|
+
if method_sym.file != owner_cls.file:
|
|
68
|
+
# different owner class with the same simple name
|
|
69
|
+
continue
|
|
70
|
+
edges.append(
|
|
71
|
+
Edge(
|
|
72
|
+
source=method_sym.id,
|
|
73
|
+
target=_to_sid(mapping.id),
|
|
74
|
+
kind="maps_to",
|
|
75
|
+
confidence="medium",
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
return BridgeResult(edges=edges)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _to_sid(sid: SymbolID) -> SymbolID:
|
|
82
|
+
"""Re-tag identity helper, kept so the bridge module is the only place
|
|
83
|
+
that imports SymbolID in case the conversion ever needs a tweak."""
|
|
84
|
+
return sid
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Extract referenced table names + access mode from a (static) SQL string.
|
|
2
|
+
|
|
3
|
+
`codemap-sql` only parses DDL (``CREATE TABLE`` / ``CREATE VIEW`` /
|
|
4
|
+
``CREATE INDEX``); this module covers the DML side that MyBatis statements
|
|
5
|
+
need. Conservative regex over the four standard SQL verbs — good enough to
|
|
6
|
+
grade confidence per the indexer's static / dynamic / ``${}`` rule.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
__all__ = ["table_refs"]
|
|
14
|
+
|
|
15
|
+
_IDENT = r"[A-Za-z_][A-Za-z0-9_$]*"
|
|
16
|
+
_QUALIFIED_IDENT = rf"`?{_IDENT}`?(?:\s*\.\s*`?{_IDENT}`?)?"
|
|
17
|
+
# Negative-lookbehind keeps `DELETE FROM x` from matching the read regex too.
|
|
18
|
+
_READ_RE = re.compile(
|
|
19
|
+
rf"(?<!DELETE\s)\b(?:FROM|JOIN)\s+({_QUALIFIED_IDENT})",
|
|
20
|
+
re.IGNORECASE,
|
|
21
|
+
)
|
|
22
|
+
_WRITE_RE = re.compile(
|
|
23
|
+
rf"\b(?:INSERT\s+INTO|UPDATE|DELETE\s+FROM)\s+({_QUALIFIED_IDENT})",
|
|
24
|
+
re.IGNORECASE,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _clean(name: str) -> str:
|
|
29
|
+
return name.strip().strip("`").replace("`", "").strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def table_refs(sql: str) -> list[tuple[str, str]]:
|
|
33
|
+
"""Return ``(table_name, access)`` tuples, access ∈ {"read","write"}.
|
|
34
|
+
|
|
35
|
+
Writes dominate reads for the same table: if a table appears in both a
|
|
36
|
+
write and a read context within the same statement (e.g.
|
|
37
|
+
``INSERT INTO t SELECT * FROM t``), only ``(t, "write")`` is emitted so
|
|
38
|
+
the caller never sees one logical table twice.
|
|
39
|
+
"""
|
|
40
|
+
writes: list[str] = []
|
|
41
|
+
reads: list[str] = []
|
|
42
|
+
seen_writes: set[str] = set()
|
|
43
|
+
seen_reads: set[str] = set()
|
|
44
|
+
for m in _WRITE_RE.finditer(sql):
|
|
45
|
+
name = _clean(m.group(1))
|
|
46
|
+
if name not in seen_writes:
|
|
47
|
+
seen_writes.add(name)
|
|
48
|
+
writes.append(name)
|
|
49
|
+
for m in _READ_RE.finditer(sql):
|
|
50
|
+
name = _clean(m.group(1))
|
|
51
|
+
if name not in seen_reads:
|
|
52
|
+
seen_reads.add(name)
|
|
53
|
+
reads.append(name)
|
|
54
|
+
out: list[tuple[str, str]] = [(n, "write") for n in writes]
|
|
55
|
+
out.extend((n, "read") for n in reads if n not in seen_writes)
|
|
56
|
+
return out
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codemap-mybatis
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: MyBatis Mapper XML indexer plugin for CodeMap
|
|
5
|
+
Project-URL: Homepage, https://github.com/qxbyte/codemap
|
|
6
|
+
Author: CodeMap Contributors
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: codemap,indexer,mybatis
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Software Development
|
|
12
|
+
Requires-Python: >=3.11
|
|
13
|
+
Requires-Dist: codemap-core<0.4,>=0.3.0
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# codemap-mybatis
|
|
19
|
+
|
|
20
|
+
MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
|
|
21
|
+
|
|
22
|
+
## What it does
|
|
23
|
+
|
|
24
|
+
Reads MyBatis `*Mapper.xml` files and produces:
|
|
25
|
+
|
|
26
|
+
- `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
|
|
27
|
+
- `table` symbols — one per referenced database table
|
|
28
|
+
- `accesses_table` edges — `sql_mapping → table`, with confidence graded
|
|
29
|
+
by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
|
|
30
|
+
uses `${}` substitution → `low`)
|
|
31
|
+
- `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
|
|
32
|
+
interface methods (produced by `codemap-java`) to their backing XML
|
|
33
|
+
statements via `(namespace, stmt_id)`
|
|
34
|
+
|
|
35
|
+
## Scope
|
|
36
|
+
|
|
37
|
+
DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
|
|
38
|
+
side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
|
|
39
|
+
confidence by MyBatis dynamic-SQL constructs.
|
|
40
|
+
|
|
41
|
+
Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
|
|
42
|
+
is extracted statically — the runtime-only branches are accepted as
|
|
43
|
+
medium-confidence edges. `${}` raw substitution (e.g. variable table
|
|
44
|
+
names) drops confidence to `low`.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
codemap_mybatis/__init__.py,sha256=VKGyJK3oxRWguXqGno6yLzZCAY50BJAtV_cHB7pwTGQ,123
|
|
2
|
+
codemap_mybatis/indexer.py,sha256=fdAzytr4tQ_wq9fjbi6yWBVVL-zdOCbdvJnTpUR2HSI,6738
|
|
3
|
+
codemap_mybatis/link.py,sha256=sJF75pdwvg2iwb1TEaiBwejZ-sEPEnGAVXzDIYNq_iA,3608
|
|
4
|
+
codemap_mybatis/sql_tables.py,sha256=ZKXvEqdOmwID4ySBmxXAWIljNGGchisl5vv8EDmZzg0,1944
|
|
5
|
+
codemap_mybatis-0.3.0.dist-info/METADATA,sha256=3_TF8oH1w_v5Tb0VE1hZ-yOPmHGsCeMbidQQ0kiawZ0,1691
|
|
6
|
+
codemap_mybatis-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
codemap_mybatis-0.3.0.dist-info/entry_points.txt,sha256=XojIYn2AsNgKee1sQtURK4MsOJMxwxLt60it5ptEpQ0,133
|
|
8
|
+
codemap_mybatis-0.3.0.dist-info/RECORD,,
|