codemap-mybatis 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ """MyBatis Mapper XML indexer plugin."""
2
+
3
+ from codemap_mybatis.indexer import MyBatisIndexer
4
+
5
+ __all__ = ["MyBatisIndexer"]
@@ -0,0 +1,186 @@
1
+ """MyBatis Mapper XML indexer.
2
+
3
+ Parses ``<mapper namespace="...">`` files and produces:
4
+
5
+ * one ``sql_mapping`` symbol per ``<select|insert|update|delete>``, keyed by
6
+ ``(namespace, statement-id)``; carries ``extra["java_namespace"]`` and
7
+ ``extra["java_method_name"]`` so the :class:`MyBatisLinkBridge` can later
8
+ cross-reference back to the Java Mapper interface method that owns it.
9
+ * one ``table`` symbol per referenced DB table (deduped per file).
10
+ * an ``accesses_table`` edge from each ``sql_mapping`` to every table it
11
+ references, with confidence graded by SQL complexity (static → ``high``;
12
+ contains dynamic tags → ``medium``; uses ``${}`` substitution → ``low``).
13
+
14
+ The Java ↔ XML cross-link (``maps_to`` edges) is the bridge's job, not this
15
+ per-file indexer — by design (ADR-0004 separation of indexers and bridges).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import xml.etree.ElementTree as ET
22
+ from pathlib import Path, PurePosixPath
23
+ from typing import ClassVar
24
+
25
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
26
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
27
+ from codemap.indexers.base import IndexContext
28
+ from codemap_mybatis.sql_tables import table_refs
29
+
30
+ __all__ = ["SCHEME_MAPPING", "SCHEME_TABLE", "MyBatisIndexer"]
31
+
32
+ SCHEME_MAPPING = "scip-mybatis"
33
+ SCHEME_TABLE = "scip-table"
34
+ LANG_MYBATIS = "mybatis"
35
+ LANG_SQL = "sql"
36
+
37
+ _STMT_TAGS = frozenset({"select", "insert", "update", "delete"})
38
+ _DYNAMIC_TAGS = frozenset({"if", "foreach", "choose", "where", "set", "trim", "when", "otherwise"})
39
+ _SUBSTITUTION_RE = re.compile(r"\$\{")
40
+
41
+
42
+ class MyBatisIndexer:
43
+ name: ClassVar[str] = "mybatis"
44
+ version: ClassVar[str] = "0.1.0"
45
+ file_patterns: ClassVar[list[str]] = ["*.xml"]
46
+ languages: ClassVar[list[str]] = [LANG_MYBATIS]
47
+
48
+ def supports(self, path: Path) -> bool:
49
+ if path.suffix != ".xml":
50
+ return False
51
+ try:
52
+ head = path.read_bytes()[:4096]
53
+ except OSError:
54
+ return False
55
+ return b"<mapper" in head and b"namespace" in head
56
+
57
+ def index_file(self, path: Path, source: bytes, ctx: IndexContext) -> IndexResult:
58
+ try:
59
+ root = ET.fromstring(source)
60
+ except ET.ParseError as exc:
61
+ return IndexResult(
62
+ diagnostics=[
63
+ Diagnostic(
64
+ severity="error",
65
+ file=ctx.relative_path,
66
+ code="MB001",
67
+ message=f"XML parse error: {exc}",
68
+ producer=self.name,
69
+ )
70
+ ]
71
+ )
72
+ if root.tag != "mapper":
73
+ return IndexResult()
74
+ namespace = root.get("namespace", "")
75
+ if not namespace:
76
+ return IndexResult(
77
+ diagnostics=[
78
+ Diagnostic(
79
+ severity="warning",
80
+ file=ctx.relative_path,
81
+ code="MB002",
82
+ message="mapper element missing namespace attribute",
83
+ producer=self.name,
84
+ )
85
+ ]
86
+ )
87
+
88
+ symbols: list[Symbol] = []
89
+ edges: list[Edge] = []
90
+ emitted_tables: set[str] = set()
91
+
92
+ for child in root:
93
+ tag = child.tag.lower()
94
+ if tag not in _STMT_TAGS:
95
+ continue
96
+ stmt_id = child.get("id")
97
+ if not stmt_id:
98
+ continue
99
+ mapping_sid = _mapping_id(ctx.relative_path, namespace, stmt_id)
100
+ symbols.append(
101
+ Symbol(
102
+ id=mapping_sid,
103
+ kind="sql_mapping",
104
+ language=LANG_MYBATIS,
105
+ file=ctx.relative_path,
106
+ range=Range(start_line=1, end_line=1),
107
+ confidence="high",
108
+ extra={
109
+ "java_namespace": namespace,
110
+ "java_method_name": stmt_id,
111
+ "sql_verb": tag,
112
+ },
113
+ )
114
+ )
115
+
116
+ raw_sql = "".join(child.itertext())
117
+ conf = _confidence_for(raw_sql, child)
118
+ for table_name, _access in table_refs(raw_sql):
119
+ table_sid = _table_id(table_name)
120
+ if table_name not in emitted_tables:
121
+ symbols.append(
122
+ Symbol(
123
+ id=table_sid,
124
+ kind="table",
125
+ language=LANG_SQL,
126
+ file=ctx.relative_path,
127
+ range=Range(start_line=1, end_line=1),
128
+ confidence="high",
129
+ )
130
+ )
131
+ emitted_tables.add(table_name)
132
+ edges.append(
133
+ Edge(
134
+ source=mapping_sid,
135
+ target=table_sid,
136
+ kind="accesses_table",
137
+ confidence=conf,
138
+ )
139
+ )
140
+
141
+ return IndexResult(symbols=symbols, edges=edges)
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # SymbolID builders
146
+ # ---------------------------------------------------------------------------
147
+
148
+
149
+ def _mapping_id(rel_path: PurePosixPath, namespace: str, stmt_id: str) -> SymbolID:
150
+ """``scip-mybatis . . . src/mapper/CouponMapper.xml/com.example.CouponMapper#selectByUser.``"""
151
+ descriptors: list[Descriptor] = [
152
+ Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in rel_path.parts
153
+ ]
154
+ descriptors.append(Descriptor(name=namespace, kind=DescriptorKind.TYPE))
155
+ descriptors.append(Descriptor(name=stmt_id, kind=DescriptorKind.TERM))
156
+ return SymbolID(scheme=SCHEME_MAPPING, descriptors=tuple(descriptors))
157
+
158
+
159
+ def _table_id(table_name: str) -> SymbolID:
160
+ return SymbolID(
161
+ scheme=SCHEME_TABLE,
162
+ descriptors=(Descriptor(name=table_name, kind=DescriptorKind.TYPE),),
163
+ )
164
+
165
+
166
+ def _confidence_for(sql: str, stmt_node: ET.Element) -> str:
167
+ """Grade SQL extraction confidence.
168
+
169
+ Order matters: ``${}`` substitution dominates because the table name
170
+ itself may be unknown; dynamic tags merely shave the WHERE clause.
171
+ """
172
+ if _SUBSTITUTION_RE.search(sql):
173
+ return "low"
174
+ if any(_has_dynamic_child(stmt_node)):
175
+ return "medium"
176
+ return "high"
177
+
178
+
179
+ def _has_dynamic_child(node: ET.Element): # type: ignore[no-untyped-def]
180
+ for child in node.iter():
181
+ if child is node:
182
+ continue
183
+ if child.tag.lower() in _DYNAMIC_TAGS:
184
+ yield True
185
+ return
186
+ yield False
@@ -0,0 +1,84 @@
1
+ """MyBatisLinkBridge — link sql_mapping symbols back to their Java Mapper methods.
2
+
3
+ Runs after both ``codemap-java`` and ``codemap-mybatis`` indexers have
4
+ populated the store. For every ``sql_mapping`` symbol carrying
5
+ ``java_namespace`` + ``java_method_name`` in its ``extra``, look up the Java
6
+ method symbol whose owner class FQN matches the namespace and whose own
7
+ simple name matches the statement id. Emit a ``maps_to`` edge
8
+ ``java_method → sql_mapping`` so call-graph queries can hop from caller
9
+ code through the XML statement to the table it accesses.
10
+
11
+ No fuzzy matching: namespace and method name must both be exact. Overloads
12
+ (same simple name, different arity) yield multiple edges — confidence
13
+ ``medium`` for every hit because we cannot pick the Java method overload
14
+ that the mapper interface declares without parsing the interface itself
15
+ (which is also already in the store, but parameter-typed matching is out
16
+ of scope for v1).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections import defaultdict
22
+ from typing import Any, ClassVar
23
+
24
+ from codemap.core.models import BridgeResult, Edge
25
+ from codemap.core.store import ReadOnlyStore
26
+ from codemap.core.symbol import DescriptorKind, SymbolID
27
+
28
+ __all__ = ["MyBatisLinkBridge"]
29
+
30
+
31
+ class MyBatisLinkBridge:
32
+ name: ClassVar[str] = "mybatis_link"
33
+ version: ClassVar[str] = "0.1.0"
34
+ requires: ClassVar[list[str]] = []
35
+
36
+ def resolve(self, store: ReadOnlyStore) -> BridgeResult:
37
+ sql_mappings: list[Any] = []
38
+ java_methods_by_owner: dict[str, list[Any]] = defaultdict(list)
39
+ java_classes_by_fqn: dict[str, Any] = {}
40
+
41
+ for sym in store.iter_symbols():
42
+ if sym.kind == "sql_mapping" and sym.extra.get("java_namespace"):
43
+ sql_mappings.append(sym)
44
+ elif sym.language == "java" and sym.kind == "class" and "imports" in sym.extra:
45
+ pkg = str(sym.extra.get("package", ""))
46
+ simple = sym.id.descriptors[-1].name
47
+ fqn = f"{pkg}.{simple}" if pkg else simple
48
+ java_classes_by_fqn[fqn] = sym
49
+ elif sym.language == "java" and sym.kind == "method":
50
+ descs = sym.id.descriptors
51
+ if len(descs) >= 2 and descs[-2].kind is DescriptorKind.TYPE:
52
+ java_methods_by_owner[descs[-2].name].append(sym)
53
+
54
+ edges: list[Edge] = []
55
+ for mapping in sql_mappings:
56
+ ns = str(mapping.extra.get("java_namespace", ""))
57
+ mname = str(mapping.extra.get("java_method_name", ""))
58
+ if not ns or not mname:
59
+ continue
60
+ owner_cls = java_classes_by_fqn.get(ns)
61
+ if owner_cls is None:
62
+ continue
63
+ owner_simple = owner_cls.id.descriptors[-1].name
64
+ for method_sym in java_methods_by_owner.get(owner_simple, []):
65
+ if method_sym.id.descriptors[-1].name != mname:
66
+ continue
67
+ if method_sym.file != owner_cls.file:
68
+ # different owner class with the same simple name
69
+ continue
70
+ edges.append(
71
+ Edge(
72
+ source=method_sym.id,
73
+ target=_to_sid(mapping.id),
74
+ kind="maps_to",
75
+ confidence="medium",
76
+ )
77
+ )
78
+ return BridgeResult(edges=edges)
79
+
80
+
81
+ def _to_sid(sid: SymbolID) -> SymbolID:
82
+ """Re-tag identity helper, kept so the bridge module is the only place
83
+ that imports SymbolID in case the conversion ever needs a tweak."""
84
+ return sid
@@ -0,0 +1,56 @@
1
+ """Extract referenced table names + access mode from a (static) SQL string.
2
+
3
+ `codemap-sql` only parses DDL (``CREATE TABLE`` / ``CREATE VIEW`` /
4
+ ``CREATE INDEX``); this module covers the DML side that MyBatis statements
5
+ need. Conservative regex over the four standard SQL verbs — good enough to
6
+ grade confidence per the indexer's static / dynamic / ``${}`` rule.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ __all__ = ["table_refs"]
14
+
15
+ _IDENT = r"[A-Za-z_][A-Za-z0-9_$]*"
16
+ _QUALIFIED_IDENT = rf"`?{_IDENT}`?(?:\s*\.\s*`?{_IDENT}`?)?"
17
+ # Negative-lookbehind keeps `DELETE FROM x` from matching the read regex too.
18
+ _READ_RE = re.compile(
19
+ rf"(?<!DELETE\s)\b(?:FROM|JOIN)\s+({_QUALIFIED_IDENT})",
20
+ re.IGNORECASE,
21
+ )
22
+ _WRITE_RE = re.compile(
23
+ rf"\b(?:INSERT\s+INTO|UPDATE|DELETE\s+FROM)\s+({_QUALIFIED_IDENT})",
24
+ re.IGNORECASE,
25
+ )
26
+
27
+
28
+ def _clean(name: str) -> str:
29
+ return name.strip().strip("`").replace("`", "").strip()
30
+
31
+
32
+ def table_refs(sql: str) -> list[tuple[str, str]]:
33
+ """Return ``(table_name, access)`` tuples, access ∈ {"read","write"}.
34
+
35
+ Writes dominate reads for the same table: if a table appears in both a
36
+ write and a read context within the same statement (e.g.
37
+ ``INSERT INTO t SELECT * FROM t``), only ``(t, "write")`` is emitted so
38
+ the caller never sees one logical table twice.
39
+ """
40
+ writes: list[str] = []
41
+ reads: list[str] = []
42
+ seen_writes: set[str] = set()
43
+ seen_reads: set[str] = set()
44
+ for m in _WRITE_RE.finditer(sql):
45
+ name = _clean(m.group(1))
46
+ if name not in seen_writes:
47
+ seen_writes.add(name)
48
+ writes.append(name)
49
+ for m in _READ_RE.finditer(sql):
50
+ name = _clean(m.group(1))
51
+ if name not in seen_reads:
52
+ seen_reads.add(name)
53
+ reads.append(name)
54
+ out: list[tuple[str, str]] = [(n, "write") for n in writes]
55
+ out.extend((n, "read") for n in reads if n not in seen_writes)
56
+ return out
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-mybatis
3
+ Version: 0.3.0
4
+ Summary: MyBatis Mapper XML indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,mybatis
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Software Development
12
+ Requires-Python: >=3.11
13
+ Requires-Dist: codemap-core<0.4,>=0.3.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8.0; extra == 'dev'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # codemap-mybatis
19
+
20
+ MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
21
+
22
+ ## What it does
23
+
24
+ Reads MyBatis `*Mapper.xml` files and produces:
25
+
26
+ - `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
27
+ - `table` symbols — one per referenced database table
28
+ - `accesses_table` edges — `sql_mapping → table`, with confidence graded
29
+ by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
30
+ uses `${}` substitution → `low`)
31
+ - `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
32
+ interface methods (produced by `codemap-java`) to their backing XML
33
+ statements via `(namespace, stmt_id)`
34
+
35
+ ## Scope
36
+
37
+ DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
38
+ side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
39
+ confidence by MyBatis dynamic-SQL constructs.
40
+
41
+ Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
42
+ is extracted statically — the runtime-only branches are accepted as
43
+ medium-confidence edges. `${}` raw substitution (e.g. variable table
44
+ names) drops confidence to `low`.
@@ -0,0 +1,8 @@
1
+ codemap_mybatis/__init__.py,sha256=VKGyJK3oxRWguXqGno6yLzZCAY50BJAtV_cHB7pwTGQ,123
2
+ codemap_mybatis/indexer.py,sha256=fdAzytr4tQ_wq9fjbi6yWBVVL-zdOCbdvJnTpUR2HSI,6738
3
+ codemap_mybatis/link.py,sha256=sJF75pdwvg2iwb1TEaiBwejZ-sEPEnGAVXzDIYNq_iA,3608
4
+ codemap_mybatis/sql_tables.py,sha256=ZKXvEqdOmwID4ySBmxXAWIljNGGchisl5vv8EDmZzg0,1944
5
+ codemap_mybatis-0.3.0.dist-info/METADATA,sha256=3_TF8oH1w_v5Tb0VE1hZ-yOPmHGsCeMbidQQ0kiawZ0,1691
6
+ codemap_mybatis-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
7
+ codemap_mybatis-0.3.0.dist-info/entry_points.txt,sha256=XojIYn2AsNgKee1sQtURK4MsOJMxwxLt60it5ptEpQ0,133
8
+ codemap_mybatis-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,5 @@
1
+ [codemap.bridges]
2
+ mybatis_link = codemap_mybatis.link:MyBatisLinkBridge
3
+
4
+ [codemap.indexers]
5
+ mybatis = codemap_mybatis:MyBatisIndexer