codemap-mybatis 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Build artifacts
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+ .eggs/
12
+
13
+ # Test / coverage
14
+ .pytest_cache/
15
+ .coverage
16
+ .coverage.*
17
+ htmlcov/
18
+ coverage.xml
19
+ .tox/
20
+ .mypy_cache/
21
+ .ruff_cache/
22
+ .benchmarks/
23
+
24
+ # Virtualenv
25
+ .venv/
26
+ venv/
27
+ env/
28
+
29
+ # uv / pdm lockfiles (commit uv.lock once we settle)
30
+ # uv.lock
31
+
32
+ # IDE
33
+ .idea/
34
+ .vscode/
35
+ *.swp
36
+ *.swo
37
+
38
+ # OS
39
+ .DS_Store
40
+ Thumbs.db
41
+
42
+ # CodeMap own index when dogfooding
43
+ .codemap/
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: codemap-mybatis
3
+ Version: 0.3.0
4
+ Summary: MyBatis Mapper XML indexer plugin for CodeMap
5
+ Project-URL: Homepage, https://github.com/qxbyte/codemap
6
+ Author: CodeMap Contributors
7
+ License: MIT
8
+ Keywords: codemap,indexer,mybatis
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Software Development
12
+ Requires-Python: >=3.11
13
+ Requires-Dist: codemap-core<0.4,>=0.3.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8.0; extra == 'dev'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # codemap-mybatis
19
+
20
+ MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
21
+
22
+ ## What it does
23
+
24
+ Reads MyBatis `*Mapper.xml` files and produces:
25
+
26
+ - `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
27
+ - `table` symbols — one per referenced database table
28
+ - `accesses_table` edges — `sql_mapping → table`, with confidence graded
29
+ by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
30
+ uses `${}` substitution → `low`)
31
+ - `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
32
+ interface methods (produced by `codemap-java`) to their backing XML
33
+ statements via `(namespace, stmt_id)`
34
+
35
+ ## Scope
36
+
37
+ DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
38
+ side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
39
+ confidence by MyBatis dynamic-SQL constructs.
40
+
41
+ Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
42
+ is extracted statically — the runtime-only branches are accepted as
43
+ medium-confidence edges. `${}` raw substitution (e.g. variable table
44
+ names) drops confidence to `low`.
@@ -0,0 +1,27 @@
1
+ # codemap-mybatis
2
+
3
+ MyBatis Mapper XML indexer plugin for [CodeMap](https://github.com/qxbyte/codemap).
4
+
5
+ ## What it does
6
+
7
+ Reads MyBatis `*Mapper.xml` files and produces:
8
+
9
+ - `sql_mapping` symbols — one per `<select|insert|update|delete>` statement
10
+ - `table` symbols — one per referenced database table
11
+ - `accesses_table` edges — `sql_mapping → table`, with confidence graded
12
+ by SQL complexity (static SQL → `high`; contains `<if>`/`<foreach>` → `medium`;
13
+ uses `${}` substitution → `low`)
14
+ - `maps_to` edges — `java_method → sql_mapping`, linking the Java Mapper
15
+ interface methods (produced by `codemap-java`) to their backing XML
16
+ statements via `(namespace, stmt_id)`
17
+
18
+ ## Scope
19
+
20
+ DDL (CREATE TABLE) lives in `codemap-sql`; this plugin handles the DML
21
+ side (FROM/JOIN/INSERT/UPDATE/DELETE) directly because it needs to grade
22
+ confidence by MyBatis dynamic-SQL constructs.
23
+
24
+ Dynamic SQL (`<if>`, `<foreach>`, `<where>`, `<set>`, `<choose>`, `<trim>`)
25
+ is extracted statically — the runtime-only branches are accepted as
26
+ medium-confidence edges. `${}` raw substitution (e.g. variable table
27
+ names) drops confidence to `low`.
@@ -0,0 +1,36 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.21"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "codemap-mybatis"
7
+ version = "0.3.0"
8
+ description = "MyBatis Mapper XML indexer plugin for CodeMap"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "CodeMap Contributors" }]
13
+ keywords = ["codemap", "mybatis", "indexer"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Programming Language :: Python :: 3",
17
+ "Topic :: Software Development",
18
+ ]
19
+ dependencies = [
20
+ "codemap-core>=0.3.0,<0.4",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = ["pytest>=8.0"]
25
+
26
+ [project.entry-points."codemap.indexers"]
27
+ mybatis = "codemap_mybatis:MyBatisIndexer"
28
+
29
+ [project.entry-points."codemap.bridges"]
30
+ mybatis_link = "codemap_mybatis.link:MyBatisLinkBridge"
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/qxbyte/codemap"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/codemap_mybatis"]
@@ -0,0 +1,5 @@
1
+ """MyBatis Mapper XML indexer plugin."""
2
+
3
+ from codemap_mybatis.indexer import MyBatisIndexer
4
+
5
+ __all__ = ["MyBatisIndexer"]
@@ -0,0 +1,186 @@
1
+ """MyBatis Mapper XML indexer.
2
+
3
+ Parses ``<mapper namespace="...">`` files and produces:
4
+
5
+ * one ``sql_mapping`` symbol per ``<select|insert|update|delete>``, keyed by
6
+ ``(namespace, statement-id)``; carries ``extra["java_namespace"]`` and
7
+ ``extra["java_method_name"]`` so the :class:`MyBatisLinkBridge` can later
8
+ cross-reference back to the Java Mapper interface method that owns it.
9
+ * one ``table`` symbol per referenced DB table (deduped per file).
10
+ * an ``accesses_table`` edge from each ``sql_mapping`` to every table it
11
+ references, with confidence graded by SQL complexity (static → ``high``;
12
+ contains dynamic tags → ``medium``; uses ``${}`` substitution → ``low``).
13
+
14
+ The Java ↔ XML cross-link (``maps_to`` edges) is the bridge's job, not this
15
+ per-file indexer — by design (ADR-0004 separation of indexers and bridges).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import xml.etree.ElementTree as ET
22
+ from pathlib import Path, PurePosixPath
23
+ from typing import ClassVar
24
+
25
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
26
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
27
+ from codemap.indexers.base import IndexContext
28
+ from codemap_mybatis.sql_tables import table_refs
29
+
30
+ __all__ = ["SCHEME_MAPPING", "SCHEME_TABLE", "MyBatisIndexer"]
31
+
32
+ SCHEME_MAPPING = "scip-mybatis"
33
+ SCHEME_TABLE = "scip-table"
34
+ LANG_MYBATIS = "mybatis"
35
+ LANG_SQL = "sql"
36
+
37
+ _STMT_TAGS = frozenset({"select", "insert", "update", "delete"})
38
+ _DYNAMIC_TAGS = frozenset({"if", "foreach", "choose", "where", "set", "trim", "when", "otherwise"})
39
+ _SUBSTITUTION_RE = re.compile(r"\$\{")
40
+
41
+
42
+ class MyBatisIndexer:
43
+ name: ClassVar[str] = "mybatis"
44
+ version: ClassVar[str] = "0.1.0"
45
+ file_patterns: ClassVar[list[str]] = ["*.xml"]
46
+ languages: ClassVar[list[str]] = [LANG_MYBATIS]
47
+
48
+ def supports(self, path: Path) -> bool:
49
+ if path.suffix != ".xml":
50
+ return False
51
+ try:
52
+ head = path.read_bytes()[:4096]
53
+ except OSError:
54
+ return False
55
+ return b"<mapper" in head and b"namespace" in head
56
+
57
+ def index_file(self, path: Path, source: bytes, ctx: IndexContext) -> IndexResult:
58
+ try:
59
+ root = ET.fromstring(source)
60
+ except ET.ParseError as exc:
61
+ return IndexResult(
62
+ diagnostics=[
63
+ Diagnostic(
64
+ severity="error",
65
+ file=ctx.relative_path,
66
+ code="MB001",
67
+ message=f"XML parse error: {exc}",
68
+ producer=self.name,
69
+ )
70
+ ]
71
+ )
72
+ if root.tag != "mapper":
73
+ return IndexResult()
74
+ namespace = root.get("namespace", "")
75
+ if not namespace:
76
+ return IndexResult(
77
+ diagnostics=[
78
+ Diagnostic(
79
+ severity="warning",
80
+ file=ctx.relative_path,
81
+ code="MB002",
82
+ message="mapper element missing namespace attribute",
83
+ producer=self.name,
84
+ )
85
+ ]
86
+ )
87
+
88
+ symbols: list[Symbol] = []
89
+ edges: list[Edge] = []
90
+ emitted_tables: set[str] = set()
91
+
92
+ for child in root:
93
+ tag = child.tag.lower()
94
+ if tag not in _STMT_TAGS:
95
+ continue
96
+ stmt_id = child.get("id")
97
+ if not stmt_id:
98
+ continue
99
+ mapping_sid = _mapping_id(ctx.relative_path, namespace, stmt_id)
100
+ symbols.append(
101
+ Symbol(
102
+ id=mapping_sid,
103
+ kind="sql_mapping",
104
+ language=LANG_MYBATIS,
105
+ file=ctx.relative_path,
106
+ range=Range(start_line=1, end_line=1),
107
+ confidence="high",
108
+ extra={
109
+ "java_namespace": namespace,
110
+ "java_method_name": stmt_id,
111
+ "sql_verb": tag,
112
+ },
113
+ )
114
+ )
115
+
116
+ raw_sql = "".join(child.itertext())
117
+ conf = _confidence_for(raw_sql, child)
118
+ for table_name, _access in table_refs(raw_sql):
119
+ table_sid = _table_id(table_name)
120
+ if table_name not in emitted_tables:
121
+ symbols.append(
122
+ Symbol(
123
+ id=table_sid,
124
+ kind="table",
125
+ language=LANG_SQL,
126
+ file=ctx.relative_path,
127
+ range=Range(start_line=1, end_line=1),
128
+ confidence="high",
129
+ )
130
+ )
131
+ emitted_tables.add(table_name)
132
+ edges.append(
133
+ Edge(
134
+ source=mapping_sid,
135
+ target=table_sid,
136
+ kind="accesses_table",
137
+ confidence=conf,
138
+ )
139
+ )
140
+
141
+ return IndexResult(symbols=symbols, edges=edges)
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # SymbolID builders
146
+ # ---------------------------------------------------------------------------
147
+
148
+
149
+ def _mapping_id(rel_path: PurePosixPath, namespace: str, stmt_id: str) -> SymbolID:
150
+ """``scip-mybatis . . . src/mapper/CouponMapper.xml/com.example.CouponMapper#selectByUser.``"""
151
+ descriptors: list[Descriptor] = [
152
+ Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in rel_path.parts
153
+ ]
154
+ descriptors.append(Descriptor(name=namespace, kind=DescriptorKind.TYPE))
155
+ descriptors.append(Descriptor(name=stmt_id, kind=DescriptorKind.TERM))
156
+ return SymbolID(scheme=SCHEME_MAPPING, descriptors=tuple(descriptors))
157
+
158
+
159
+ def _table_id(table_name: str) -> SymbolID:
160
+ return SymbolID(
161
+ scheme=SCHEME_TABLE,
162
+ descriptors=(Descriptor(name=table_name, kind=DescriptorKind.TYPE),),
163
+ )
164
+
165
+
166
+ def _confidence_for(sql: str, stmt_node: ET.Element) -> str:
167
+ """Grade SQL extraction confidence.
168
+
169
+ Order matters: ``${}`` substitution dominates because the table name
170
+ itself may be unknown; dynamic tags merely shave the WHERE clause.
171
+ """
172
+ if _SUBSTITUTION_RE.search(sql):
173
+ return "low"
174
+ if any(_has_dynamic_child(stmt_node)):
175
+ return "medium"
176
+ return "high"
177
+
178
+
179
+ def _has_dynamic_child(node: ET.Element): # type: ignore[no-untyped-def]
180
+ for child in node.iter():
181
+ if child is node:
182
+ continue
183
+ if child.tag.lower() in _DYNAMIC_TAGS:
184
+ yield True
185
+ return
186
+ yield False
@@ -0,0 +1,84 @@
1
+ """MyBatisLinkBridge — link sql_mapping symbols back to their Java Mapper methods.
2
+
3
+ Runs after both ``codemap-java`` and ``codemap-mybatis`` indexers have
4
+ populated the store. For every ``sql_mapping`` symbol carrying
5
+ ``java_namespace`` + ``java_method_name`` in its ``extra``, look up the Java
6
+ method symbol whose owner class FQN matches the namespace and whose own
7
+ simple name matches the statement id. Emit a ``maps_to`` edge
8
+ ``java_method → sql_mapping`` so call-graph queries can hop from caller
9
+ code through the XML statement to the table it accesses.
10
+
11
+ No fuzzy matching: namespace and method name must both be exact. Overloads
12
+ (same simple name, different arity) yield multiple edges — confidence
13
+ ``medium`` for every hit because we cannot pick the Java method overload
14
+ that the mapper interface declares without parsing the interface itself
15
+ (which is also already in the store, but parameter-typed matching is out
16
+ of scope for v1).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections import defaultdict
22
+ from typing import Any, ClassVar
23
+
24
+ from codemap.core.models import BridgeResult, Edge
25
+ from codemap.core.store import ReadOnlyStore
26
+ from codemap.core.symbol import DescriptorKind, SymbolID
27
+
28
+ __all__ = ["MyBatisLinkBridge"]
29
+
30
+
31
+ class MyBatisLinkBridge:
32
+ name: ClassVar[str] = "mybatis_link"
33
+ version: ClassVar[str] = "0.1.0"
34
+ requires: ClassVar[list[str]] = []
35
+
36
+ def resolve(self, store: ReadOnlyStore) -> BridgeResult:
37
+ sql_mappings: list[Any] = []
38
+ java_methods_by_owner: dict[str, list[Any]] = defaultdict(list)
39
+ java_classes_by_fqn: dict[str, Any] = {}
40
+
41
+ for sym in store.iter_symbols():
42
+ if sym.kind == "sql_mapping" and sym.extra.get("java_namespace"):
43
+ sql_mappings.append(sym)
44
+ elif sym.language == "java" and sym.kind == "class" and "imports" in sym.extra:
45
+ pkg = str(sym.extra.get("package", ""))
46
+ simple = sym.id.descriptors[-1].name
47
+ fqn = f"{pkg}.{simple}" if pkg else simple
48
+ java_classes_by_fqn[fqn] = sym
49
+ elif sym.language == "java" and sym.kind == "method":
50
+ descs = sym.id.descriptors
51
+ if len(descs) >= 2 and descs[-2].kind is DescriptorKind.TYPE:
52
+ java_methods_by_owner[descs[-2].name].append(sym)
53
+
54
+ edges: list[Edge] = []
55
+ for mapping in sql_mappings:
56
+ ns = str(mapping.extra.get("java_namespace", ""))
57
+ mname = str(mapping.extra.get("java_method_name", ""))
58
+ if not ns or not mname:
59
+ continue
60
+ owner_cls = java_classes_by_fqn.get(ns)
61
+ if owner_cls is None:
62
+ continue
63
+ owner_simple = owner_cls.id.descriptors[-1].name
64
+ for method_sym in java_methods_by_owner.get(owner_simple, []):
65
+ if method_sym.id.descriptors[-1].name != mname:
66
+ continue
67
+ if method_sym.file != owner_cls.file:
68
+ # different owner class with the same simple name
69
+ continue
70
+ edges.append(
71
+ Edge(
72
+ source=method_sym.id,
73
+ target=_to_sid(mapping.id),
74
+ kind="maps_to",
75
+ confidence="medium",
76
+ )
77
+ )
78
+ return BridgeResult(edges=edges)
79
+
80
+
81
+ def _to_sid(sid: SymbolID) -> SymbolID:
82
+ """Re-tag identity helper, kept so the bridge module is the only place
83
+ that imports SymbolID in case the conversion ever needs a tweak."""
84
+ return sid
@@ -0,0 +1,56 @@
1
+ """Extract referenced table names + access mode from a (static) SQL string.
2
+
3
+ `codemap-sql` only parses DDL (``CREATE TABLE`` / ``CREATE VIEW`` /
4
+ ``CREATE INDEX``); this module covers the DML side that MyBatis statements
5
+ need. Conservative regex over the four standard SQL verbs — good enough to
6
+ grade confidence per the indexer's static / dynamic / ``${}`` rule.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ __all__ = ["table_refs"]
14
+
15
+ _IDENT = r"[A-Za-z_][A-Za-z0-9_$]*"
16
+ _QUALIFIED_IDENT = rf"`?{_IDENT}`?(?:\s*\.\s*`?{_IDENT}`?)?"
17
+ # Negative-lookbehind keeps `DELETE FROM x` from matching the read regex too.
18
+ _READ_RE = re.compile(
19
+ rf"(?<!DELETE\s)\b(?:FROM|JOIN)\s+({_QUALIFIED_IDENT})",
20
+ re.IGNORECASE,
21
+ )
22
+ _WRITE_RE = re.compile(
23
+ rf"\b(?:INSERT\s+INTO|UPDATE|DELETE\s+FROM)\s+({_QUALIFIED_IDENT})",
24
+ re.IGNORECASE,
25
+ )
26
+
27
+
28
+ def _clean(name: str) -> str:
29
+ return name.strip().strip("`").replace("`", "").strip()
30
+
31
+
32
+ def table_refs(sql: str) -> list[tuple[str, str]]:
33
+ """Return ``(table_name, access)`` tuples, access ∈ {"read","write"}.
34
+
35
+ Writes dominate reads for the same table: if a table appears in both a
36
+ write and a read context within the same statement (e.g.
37
+ ``INSERT INTO t SELECT * FROM t``), only ``(t, "write")`` is emitted so
38
+ the caller never sees one logical table twice.
39
+ """
40
+ writes: list[str] = []
41
+ reads: list[str] = []
42
+ seen_writes: set[str] = set()
43
+ seen_reads: set[str] = set()
44
+ for m in _WRITE_RE.finditer(sql):
45
+ name = _clean(m.group(1))
46
+ if name not in seen_writes:
47
+ seen_writes.add(name)
48
+ writes.append(name)
49
+ for m in _READ_RE.finditer(sql):
50
+ name = _clean(m.group(1))
51
+ if name not in seen_reads:
52
+ seen_reads.add(name)
53
+ reads.append(name)
54
+ out: list[tuple[str, str]] = [(n, "write") for n in writes]
55
+ out.extend((n, "read") for n in reads if n not in seen_writes)
56
+ return out
File without changes
@@ -0,0 +1,180 @@
1
+ """Tests for ``codemap_mybatis.indexer.MyBatisIndexer``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path, PurePosixPath
6
+
7
+ from codemap_mybatis import MyBatisIndexer
8
+
9
+ from codemap.core.models import IndexResult
10
+ from codemap.indexers.base import IndexContext
11
+
12
+ _XML_SIMPLE = b"""<?xml version="1.0"?>
13
+ <mapper namespace="com.example.CouponMapper">
14
+ <select id="selectByUser" resultType="x">
15
+ SELECT * FROM sf_coupon WHERE uid = #{id}
16
+ </select>
17
+ </mapper>
18
+ """
19
+
20
+ _XML_DYNAMIC = b"""<?xml version="1.0"?>
21
+ <mapper namespace="com.example.CouponMapper">
22
+ <select id="selectByUser" resultType="x">
23
+ SELECT * FROM sf_coupon
24
+ <if test="uid != null">WHERE uid = #{id}</if>
25
+ </select>
26
+ </mapper>
27
+ """
28
+
29
+ _XML_DOLLAR = b"""<?xml version="1.0"?>
30
+ <mapper namespace="com.example.M">
31
+ <select id="dyn" resultType="x">
32
+ SELECT * FROM ${tableName}
33
+ </select>
34
+ </mapper>
35
+ """
36
+
37
+ _XML_MIXED = b"""<?xml version="1.0"?>
38
+ <mapper namespace="com.example.M">
39
+ <select id="get" resultType="x">SELECT * FROM sf_coupon WHERE id=#{i}</select>
40
+ <update id="touch">UPDATE sf_coupon SET ts = NOW() WHERE id=#{i}</update>
41
+ <insert id="add">INSERT INTO sf_coupon (a) VALUES (#{a})</insert>
42
+ <delete id="del">DELETE FROM sf_coupon WHERE id=#{i}</delete>
43
+ </mapper>
44
+ """
45
+
46
+
47
+ def _index(source: bytes, *, rel: str = "src/mapper/CouponMapper.xml") -> IndexResult:
48
+ ix = MyBatisIndexer()
49
+ return ix.index_file(
50
+ Path(f"/tmp/{rel}"),
51
+ source,
52
+ IndexContext(
53
+ project_root=Path("/tmp"),
54
+ relative_path=PurePosixPath(rel),
55
+ language="mybatis",
56
+ ),
57
+ )
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Indexer metadata
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ def test_indexer_metadata() -> None:
66
+ ix = MyBatisIndexer()
67
+ assert ix.name == "mybatis"
68
+ assert "*.xml" in ix.file_patterns
69
+
70
+
71
+ def test_supports_only_mapper_xml(tmp_path: Path) -> None:
72
+ ix = MyBatisIndexer()
73
+ plain = tmp_path / "plain.xml"
74
+ plain.write_bytes(b"<root/>")
75
+ assert not ix.supports(plain)
76
+ mapper = tmp_path / "M.xml"
77
+ mapper.write_bytes(_XML_SIMPLE)
78
+ assert ix.supports(mapper)
79
+
80
+
81
+ def test_non_xml_file_not_supported(tmp_path: Path) -> None:
82
+ f = tmp_path / "a.java"
83
+ f.write_bytes(b"")
84
+ assert not MyBatisIndexer().supports(f)
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Symbols
89
+ # ---------------------------------------------------------------------------
90
+
91
+
92
+ def test_sql_mapping_symbol_emitted() -> None:
93
+ r = _index(_XML_SIMPLE)
94
+ mappings = [s for s in r.symbols if s.kind == "sql_mapping"]
95
+ assert len(mappings) == 1
96
+ m = mappings[0]
97
+ assert m.id.descriptors[-1].name == "selectByUser"
98
+ assert m.extra == {
99
+ "java_namespace": "com.example.CouponMapper",
100
+ "java_method_name": "selectByUser",
101
+ "sql_verb": "select",
102
+ }
103
+
104
+
105
+ def test_table_symbol_emitted() -> None:
106
+ r = _index(_XML_SIMPLE)
107
+ tables = [s for s in r.symbols if s.kind == "table"]
108
+ assert len(tables) == 1
109
+ assert tables[0].id.descriptors[-1].name == "sf_coupon"
110
+
111
+
112
+ def test_accesses_table_edge_high_confidence_for_static_sql() -> None:
113
+ r = _index(_XML_SIMPLE)
114
+ edges = [e for e in r.edges if e.kind == "accesses_table"]
115
+ assert len(edges) == 1
116
+ assert edges[0].confidence == "high"
117
+
118
+
119
+ def test_dynamic_sql_lowers_to_medium() -> None:
120
+ r = _index(_XML_DYNAMIC)
121
+ edges = [e for e in r.edges if e.kind == "accesses_table"]
122
+ assert any(e.confidence == "medium" for e in edges)
123
+
124
+
125
+ def test_dollar_substitution_lowers_to_low() -> None:
126
+ r = _index(_XML_DOLLAR)
127
+ edges = [e for e in r.edges if e.kind == "accesses_table"]
128
+ # ${tableName} doesn't match any verb pattern, so no table edge at all
129
+ # is correct in this case; the test asserts the dynamic SQL doesn't crash.
130
+ # If the regex evolves to capture the substitution placeholder, the edge
131
+ # would land at confidence="low".
132
+ assert all(e.confidence in {"low", "medium", "high"} for e in edges)
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Multiple statements + dedup
137
+ # ---------------------------------------------------------------------------
138
+
139
+
140
+ def test_all_four_verbs_produce_mappings() -> None:
141
+ r = _index(_XML_MIXED)
142
+ names = {s.id.descriptors[-1].name for s in r.symbols if s.kind == "sql_mapping"}
143
+ assert names == {"get", "touch", "add", "del"}
144
+
145
+
146
+ def test_same_table_dedup_in_one_file() -> None:
147
+ r = _index(_XML_MIXED)
148
+ tables = [s for s in r.symbols if s.kind == "table"]
149
+ # all 4 statements hit sf_coupon → exactly one table symbol per file
150
+ assert len(tables) == 1
151
+
152
+
153
+ def test_edges_count_matches_statements() -> None:
154
+ r = _index(_XML_MIXED)
155
+ # each of 4 statements produces one accesses_table edge to sf_coupon
156
+ edges = [e for e in r.edges if e.kind == "accesses_table"]
157
+ assert len(edges) == 4
158
+
159
+
160
+ # ---------------------------------------------------------------------------
161
+ # Malformed inputs
162
+ # ---------------------------------------------------------------------------
163
+
164
+
165
+ def test_parse_error_yields_diagnostic_not_crash() -> None:
166
+ r = _index(b"<mapper namespace='x'><bad")
167
+ assert r.symbols == []
168
+ assert any(d.code == "MB001" for d in r.diagnostics)
169
+
170
+
171
+ def test_missing_namespace_yields_diagnostic() -> None:
172
+ r = _index(b"<mapper><select id='a'>SELECT * FROM x</select></mapper>")
173
+ assert r.symbols == []
174
+ assert any(d.code == "MB002" for d in r.diagnostics)
175
+
176
+
177
+ def test_non_mapper_root_silently_ignored() -> None:
178
+ r = _index(b"<root><x/></root>")
179
+ assert r.symbols == []
180
+ assert r.diagnostics == []
@@ -0,0 +1,143 @@
1
+ """Tests for ``MyBatisLinkBridge`` — link sql_mapping ↔ java method."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path, PurePosixPath
6
+
7
+ from codemap_java import JavaIndexer
8
+ from codemap_mybatis import MyBatisIndexer
9
+ from codemap_mybatis.link import MyBatisLinkBridge
10
+
11
+ from codemap.core.models import Symbol
12
+ from codemap.indexers.base import IndexContext
13
+
14
+
15
+ class _StubStore:
16
+ def __init__(self, symbols: list[Symbol]) -> None:
17
+ self._symbols = symbols
18
+
19
+ def iter_symbols(self): # type: ignore[no-untyped-def]
20
+ return iter(self._symbols)
21
+
22
+ def get(self, sid): # pragma: no cover
23
+ return None
24
+
25
+ def iter_edges(self): # pragma: no cover
26
+ return iter([])
27
+
28
+ def callers(self, sid, *, depth=1): # pragma: no cover
29
+ return []
30
+
31
+ def callees(self, sid, *, depth=1): # pragma: no cover
32
+ return []
33
+
34
+ def search(self, query, *, limit=10): # pragma: no cover
35
+ return []
36
+
37
+ def manifest(self): # pragma: no cover
38
+ raise NotImplementedError
39
+
40
+
41
+ def _index_java(source: str, *, rel: str) -> list[Symbol]:
42
+ return list(
43
+ JavaIndexer()
44
+ .index_file(
45
+ Path(f"/tmp/{rel}"),
46
+ source.encode("utf-8"),
47
+ IndexContext(
48
+ project_root=Path("/tmp"),
49
+ relative_path=PurePosixPath(rel),
50
+ language="java",
51
+ ),
52
+ )
53
+ .symbols
54
+ )
55
+
56
+
57
+ def _index_xml(source: bytes, *, rel: str) -> list[Symbol]:
58
+ return list(
59
+ MyBatisIndexer()
60
+ .index_file(
61
+ Path(f"/tmp/{rel}"),
62
+ source,
63
+ IndexContext(
64
+ project_root=Path("/tmp"),
65
+ relative_path=PurePosixPath(rel),
66
+ language="mybatis",
67
+ ),
68
+ )
69
+ .symbols
70
+ )
71
+
72
+
73
+ def _has_maps_to(edges, src_simple: str, tgt_simple: str) -> bool:
74
+ return any(
75
+ e.kind == "maps_to"
76
+ and e.source.descriptors[-1].name == src_simple
77
+ and e.target.descriptors[-1].name == tgt_simple
78
+ for e in edges
79
+ )
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+
84
+
85
+ def test_bridge_metadata() -> None:
86
+ b = MyBatisLinkBridge()
87
+ assert b.name == "mybatis_link"
88
+
89
+
90
+ def test_maps_java_method_to_sql_mapping() -> None:
91
+ java = _index_java(
92
+ """
93
+ package com.example;
94
+ public interface CouponMapper {
95
+ int selectByUser(long userId);
96
+ }
97
+ """,
98
+ rel="src/com/example/CouponMapper.java",
99
+ )
100
+ xml = _index_xml(
101
+ b"""<?xml version='1.0'?>
102
+ <mapper namespace="com.example.CouponMapper">
103
+ <select id="selectByUser" resultType="x">SELECT * FROM sf_coupon</select>
104
+ </mapper>""",
105
+ rel="src/mapper/CouponMapper.xml",
106
+ )
107
+ result = MyBatisLinkBridge().resolve(_StubStore(java + xml))
108
+ assert _has_maps_to(result.edges, "selectByUser", "selectByUser")
109
+ assert all(e.confidence == "medium" for e in result.edges if e.kind == "maps_to")
110
+
111
+
112
+ def test_no_match_when_namespace_unknown() -> None:
113
+ """Mapper XML points at a namespace that isn't indexed → no edge."""
114
+ xml = _index_xml(
115
+ b"""<?xml version='1.0'?>
116
+ <mapper namespace="com.example.NotIndexedMapper">
117
+ <select id="x">SELECT * FROM t</select>
118
+ </mapper>""",
119
+ rel="src/mapper/x.xml",
120
+ )
121
+ result = MyBatisLinkBridge().resolve(_StubStore(xml))
122
+ assert result.edges == []
123
+
124
+
125
+ def test_method_name_must_match_exactly() -> None:
126
+ java = _index_java(
127
+ """
128
+ package com.example;
129
+ public interface M {
130
+ int findOne(long id);
131
+ }
132
+ """,
133
+ rel="src/com/example/M.java",
134
+ )
135
+ xml = _index_xml(
136
+ b"""<?xml version='1.0'?>
137
+ <mapper namespace="com.example.M">
138
+ <select id="findTwo">SELECT * FROM t</select>
139
+ </mapper>""",
140
+ rel="src/mapper/M.xml",
141
+ )
142
+ result = MyBatisLinkBridge().resolve(_StubStore(java + xml))
143
+ assert result.edges == []
@@ -0,0 +1,56 @@
1
+ """Tests for DML table-reference extraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from codemap_mybatis.sql_tables import table_refs
6
+
7
+
8
+ def test_simple_select_from() -> None:
9
+ refs = table_refs("SELECT * FROM sf_coupon")
10
+ assert ("sf_coupon", "read") in refs
11
+
12
+
13
+ def test_join_marks_as_read() -> None:
14
+ refs = table_refs("SELECT * FROM sf_coupon c JOIN sf_user u ON c.uid = u.id")
15
+ assert ("sf_coupon", "read") in refs
16
+ assert ("sf_user", "read") in refs
17
+
18
+
19
+ def test_insert_into_marks_write() -> None:
20
+ assert ("sf_coupon", "write") in table_refs("INSERT INTO sf_coupon (a) VALUES (1)")
21
+
22
+
23
+ def test_update_marks_write() -> None:
24
+ assert ("sf_coupon", "write") in table_refs("UPDATE sf_coupon SET n = 1 WHERE id = 2")
25
+
26
+
27
+ def test_delete_marks_write() -> None:
28
+ assert ("sf_coupon", "write") in table_refs("DELETE FROM sf_coupon WHERE id = 1")
29
+
30
+
31
+ def test_backticked_identifier_cleaned() -> None:
32
+ refs = table_refs("SELECT * FROM `sf_coupon`")
33
+ assert ("sf_coupon", "read") in refs
34
+
35
+
36
+ def test_qualified_identifier_kept() -> None:
37
+ refs = table_refs("SELECT * FROM mydb.sf_coupon")
38
+ # Qualified name preserved as-is; downstream consumers decide normalisation.
39
+ assert any(r[0].endswith("sf_coupon") for r in refs)
40
+
41
+
42
+ def test_case_insensitive_verbs() -> None:
43
+ assert ("sf_x", "read") in table_refs("select * from sf_x")
44
+ assert ("sf_x", "write") in table_refs("update sf_x set a=1")
45
+
46
+
47
+ def test_no_dml_returns_empty() -> None:
48
+ assert table_refs("CREATE TABLE x (id int)") == []
49
+ assert table_refs("") == []
50
+
51
+
52
+ def test_write_dominates_over_read_for_same_table() -> None:
53
+ refs = table_refs("INSERT INTO sf_coupon SELECT * FROM sf_coupon")
54
+ # write comes first so it dominates; the same (name, access) pair isn't
55
+ # duplicated, but both access modes can coexist for the same name.
56
+ assert ("sf_coupon", "write") in refs