answer42 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_1c/rag/service.py ADDED
@@ -0,0 +1,375 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import subprocess
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from .detect import detect_source_format
9
+ from .model import SourceInfo
10
+ from .parsers import navigation_for_object, normalize_query, parse_source
11
+ from .store import RagStore, json_dumps, row_to_dict
12
+
13
+ DEFAULT_RAG_DB = Path(os.environ.get("MCP_1C_RAG_DB", "build/rag/onec-rag.sqlite"))
14
+
15
+
16
+ class RagService:
17
+ def __init__(self, db_path: str | Path = DEFAULT_RAG_DB):
18
+ self.store = RagStore(db_path)
19
+ self.store.init()
20
+
21
+ @property
22
+ def db_path(self) -> str:
23
+ return str(self.store.db_path)
24
+
25
+ def detect_source(self, path: str) -> dict[str, Any]:
26
+ return {"path": path, "format": detect_source_format(path), "exists": Path(path).exists()}
27
+
28
+ def add_source(self, name: str, path: str, kind: str = "standalone", format: str = "auto") -> dict[str, Any]:
29
+ p = Path(path).resolve()
30
+ fmt = detect_source_format(p) if format == "auto" else format
31
+ commit = _git_commit(p)
32
+ branch = _git_branch(p)
33
+ with self.store.connect() as con:
34
+ con.execute(
35
+ """
36
+ INSERT INTO sources(name,path,kind,format,branch,commit_hash)
37
+ VALUES(?,?,?,?,?,?)
38
+ ON CONFLICT(name) DO UPDATE SET path=excluded.path, kind=excluded.kind,
39
+ format=excluded.format, branch=excluded.branch, commit_hash=excluded.commit_hash
40
+ """,
41
+ (name, str(p), kind, fmt, branch, commit),
42
+ )
43
+ row = con.execute("SELECT * FROM sources WHERE name=?", (name,)).fetchone()
44
+ return row_to_dict(row) | {"detected_format": fmt}
45
+
46
+ def list_sources(self) -> list[dict[str, Any]]:
47
+ with self.store.connect() as con:
48
+ return [dict(r) for r in con.execute("SELECT * FROM sources ORDER BY id")]
49
+
50
+ def create_snapshot(self, name: str, base: str | None = None, extensions: list[str] | None = None, sources: list[str] | None = None) -> dict[str, Any]:
51
+ layers: list[tuple[str, str]] = []
52
+ if sources:
53
+ layers = [(s, "source") for s in sources]
54
+ else:
55
+ if base:
56
+ layers.append((base, "base"))
57
+ for ext in extensions or []:
58
+ layers.append((ext, "extension"))
59
+ if not layers:
60
+ raise ValueError("snapshot needs base/extensions or sources")
61
+ with self.store.connect() as con:
62
+ con.execute("INSERT INTO snapshots(name) VALUES(?) ON CONFLICT(name) DO NOTHING", (name,))
63
+ snap = con.execute("SELECT * FROM snapshots WHERE name=?", (name,)).fetchone()
64
+ snapshot_id = snap["id"]
65
+ con.execute("DELETE FROM snapshot_layers WHERE snapshot_id=?", (snapshot_id,))
66
+ result_layers = []
67
+ for order, (source_name, role) in enumerate(layers, start=1):
68
+ src = con.execute("SELECT * FROM sources WHERE name=?", (source_name,)).fetchone()
69
+ if src is None:
70
+ raise ValueError(f"source not found: {source_name}")
71
+ con.execute(
72
+ "INSERT INTO snapshot_layers(snapshot_id,source_id,layer_order,role) VALUES(?,?,?,?)",
73
+ (snapshot_id, src["id"], order, role),
74
+ )
75
+ result_layers.append({"source": source_name, "source_id": src["id"], "order": order, "role": role, "format": src["format"]})
76
+ return {"snapshot_id": snapshot_id, "name": name, "layers": result_layers}
77
+
78
+ def list_snapshots(self) -> list[dict[str, Any]]:
79
+ with self.store.connect() as con:
80
+ snaps = [dict(r) for r in con.execute("SELECT * FROM snapshots ORDER BY id")]
81
+ for snap in snaps:
82
+ snap["layers"] = [
83
+ dict(r)
84
+ for r in con.execute(
85
+ """
86
+ SELECT sl.layer_order, sl.role, s.name, s.path, s.kind, s.format, s.commit_hash
87
+ FROM snapshot_layers sl JOIN sources s ON s.id=sl.source_id
88
+ WHERE sl.snapshot_id=? ORDER BY sl.layer_order
89
+ """,
90
+ (snap["id"],),
91
+ )
92
+ ]
93
+ return snaps
94
+
95
+ def build_index(self, snapshot: str | int | None = None, source: str | None = None) -> dict[str, Any]:
96
+ with self.store.connect() as con:
97
+ if source:
98
+ source_rows = [con.execute("SELECT * FROM sources WHERE name=?", (source,)).fetchone()]
99
+ snapshot_id = None
100
+ elif snapshot is not None:
101
+ snap = _snapshot_row(con, snapshot)
102
+ if snap is None:
103
+ raise ValueError(f"snapshot not found: {snapshot}")
104
+ snapshot_id = snap["id"]
105
+ source_rows = [
106
+ r
107
+ for r in con.execute(
108
+ """
109
+ SELECT s.* FROM snapshot_layers sl JOIN sources s ON s.id=sl.source_id
110
+ WHERE sl.snapshot_id=? ORDER BY sl.layer_order
111
+ """,
112
+ (snapshot_id,),
113
+ )
114
+ ]
115
+ else:
116
+ snapshot_id = None
117
+ source_rows = [r for r in con.execute("SELECT * FROM sources ORDER BY id")]
118
+ source_rows = [r for r in source_rows if r is not None]
119
+ counts = {"sources": 0, "objects": 0, "attributes": 0, "table_parts": 0, "forms": 0, "form_elements": 0, "data_composition_fields": 0, "chunks": 0}
120
+ for src in source_rows:
121
+ info = SourceInfo(src["name"], Path(src["path"]), src["kind"], src["format"], src["repo_url"], src["branch"], src["commit_hash"])
122
+ parsed = parse_source(info)
123
+ self.store.clear_index_for_source(con, src["id"])
124
+ counts["sources"] += 1
125
+ object_ids: dict[str, int] = {}
126
+ table_part_ids: dict[tuple[str, str], int] = {}
127
+ form_ids: dict[tuple[str, str], int] = {}
128
+ for obj in parsed.objects:
129
+ cur = con.execute(
130
+ """
131
+ INSERT INTO metadata_objects(source_id, full_name, kind, name, synonym, source_path, source_format, is_hierarchical, owner_types_json, raw_json)
132
+ VALUES(?,?,?,?,?,?,?,?,?,?)
133
+ """,
134
+ (src["id"], obj.full_name, obj.kind, obj.name, obj.synonym, obj.source_path, obj.source_format, int(bool(obj.is_hierarchical)) if obj.is_hierarchical is not None else None, json_dumps(obj.owner_types), json_dumps(obj.raw)),
135
+ )
136
+ oid = cur.lastrowid
137
+ object_ids[obj.full_name] = oid
138
+ counts["objects"] += 1
139
+ nav = navigation_for_object(obj.full_name, obj.kind)
140
+ if nav:
141
+ con.execute("INSERT INTO navigation_links(object_id, kind, url) VALUES(?,?,?)", (oid, nav[0], nav[1]))
142
+ chunk_text = f"{obj.full_name} {obj.synonym or ''} {obj.kind} navigation {nav[1] if nav else ''}"
143
+ cid = _insert_chunk(con, snapshot_id, oid, "object", chunk_text, obj.source_path, {"source": src["name"]})
144
+ con.execute("INSERT INTO search_fts(text, object_full_name, kind, source_path, chunk_id) VALUES(?,?,?,?,?)", (chunk_text, obj.full_name, obj.kind, obj.source_path, cid))
145
+ counts["chunks"] += 1
146
+ if obj.synonym:
147
+ _insert_business_term(con, snapshot_id, obj.synonym, oid, 0.7, "object synonym")
148
+ for attr in parsed.attributes:
149
+ oid = object_ids.get(attr.object_full_name)
150
+ if not oid:
151
+ continue
152
+ con.execute("INSERT INTO attributes(object_id,name,type_name,synonym,required,source_path,role,raw_json) VALUES(?,?,?,?,?,?,?,?)", (oid, attr.name, attr.type_name, attr.synonym, attr.required, attr.source_path, attr.role, json_dumps(attr.raw)))
153
+ counts["attributes"] += 1
154
+ for tp in parsed.table_parts:
155
+ oid = object_ids.get(tp.object_full_name)
156
+ if not oid:
157
+ continue
158
+ cur = con.execute("INSERT INTO table_parts(object_id,name,synonym,source_path,raw_json) VALUES(?,?,?,?,?)", (oid, tp.name, tp.synonym, tp.source_path, json_dumps(tp.raw)))
159
+ table_part_ids[(tp.object_full_name, tp.name)] = cur.lastrowid
160
+ counts["table_parts"] += 1
161
+ for col in parsed.table_part_columns:
162
+ tpid = table_part_ids.get((col.object_full_name, col.table_part))
163
+ if tpid:
164
+ con.execute("INSERT INTO table_part_columns(table_part_id,name,type_name,synonym,required,source_path,raw_json) VALUES(?,?,?,?,?,?,?)", (tpid, col.name, col.type_name, col.synonym, col.required, col.source_path, json_dumps(col.raw)))
165
+ for form in parsed.forms:
166
+ oid = object_ids.get(form.object_full_name)
167
+ if not oid:
168
+ continue
169
+ cur = con.execute("INSERT INTO forms(object_id,name,kind,source_path,raw_json) VALUES(?,?,?,?,?)", (oid, form.name, form.kind, form.source_path, json_dumps(form.raw)))
170
+ form_ids[(form.object_full_name, form.name)] = cur.lastrowid
171
+ counts["forms"] += 1
172
+ for elem in parsed.form_elements:
173
+ fid = form_ids.get((elem.object_full_name, elem.form_name))
174
+ if not fid:
175
+ continue
176
+ con.execute("INSERT INTO form_elements(form_id,name,element_type,title,data_path,command_name,parent_name,source_path,raw_json) VALUES(?,?,?,?,?,?,?,?,?)", (fid, elem.name, elem.element_type, elem.title, elem.data_path, elem.command_name, elem.parent_name, elem.source_path, json_dumps(elem.raw)))
177
+ counts["form_elements"] += 1
178
+ for dcs_field in parsed.data_composition_fields:
179
+ oid = object_ids.get(dcs_field.object_full_name)
180
+ if not oid:
181
+ continue
182
+ con.execute(
183
+ "INSERT INTO data_composition_fields(object_id,schema_name,data_path,field,title,expression,dataset_name,field_type,source_path,raw_json) VALUES(?,?,?,?,?,?,?,?,?,?)",
184
+ (oid, dcs_field.schema_name, dcs_field.data_path, dcs_field.source_field, dcs_field.title, dcs_field.expression, dcs_field.dataset_name, dcs_field.field_type, dcs_field.source_path, json_dumps(dcs_field.raw)),
185
+ )
186
+ counts["data_composition_fields"] += 1
187
+ text_parts = [dcs_field.object_full_name, "СКД", dcs_field.schema_name, dcs_field.data_path, dcs_field.source_field or "", dcs_field.title or "", dcs_field.expression or ""]
188
+ chunk_text = " ".join(part for part in text_parts if part)
189
+ cid = _insert_chunk(con, snapshot_id, oid, "dcs_field", chunk_text, dcs_field.source_path, {"source": src["name"], "schema": dcs_field.schema_name})
190
+ con.execute("INSERT INTO search_fts(text, object_full_name, kind, source_path, chunk_id) VALUES(?,?,?,?,?)", (chunk_text, dcs_field.object_full_name, "СКД", dcs_field.source_path, cid))
191
+ counts["chunks"] += 1
192
+ return {"db_path": self.db_path, **counts}
193
+
194
+ def lookup_object(self, object: str, snapshot: str | int | None = None) -> dict[str, Any]:
195
+ with self.store.connect() as con:
196
+ source_filter = ""
197
+ params: list[Any] = []
198
+ if snapshot is not None:
199
+ snap = _snapshot_row(con, snapshot)
200
+ if snap is None:
201
+ raise ValueError(f"snapshot not found: {snapshot}")
202
+ source_filter = "AND mo.source_id IN (SELECT source_id FROM snapshot_layers WHERE snapshot_id=?)"
203
+ params.append(snap["id"])
204
+ rows = [dict(r) for r in con.execute(f"SELECT mo.*, s.name AS source_name, s.kind AS source_kind FROM metadata_objects mo JOIN sources s ON s.id=mo.source_id WHERE mo.full_name LIKE ? {source_filter} ORDER BY mo.source_id", [object, *params])]
205
+ if not rows and "." not in object:
206
+ rows = [dict(r) for r in con.execute(f"SELECT mo.*, s.name AS source_name, s.kind AS source_kind FROM metadata_objects mo JOIN sources s ON s.id=mo.source_id WHERE mo.name LIKE ? {source_filter} ORDER BY mo.source_id", [object, *params])]
207
+ for row in rows:
208
+ oid = row["id"]
209
+ row["navigation"] = [dict(r) for r in con.execute("SELECT kind,url FROM navigation_links WHERE object_id=?", (oid,))]
210
+ row["attributes"] = [dict(r) for r in con.execute("SELECT name,type_name,synonym,required,role FROM attributes WHERE object_id=? ORDER BY id", (oid,))]
211
+ row["table_parts"] = [dict(r) for r in con.execute("SELECT id,name,synonym FROM table_parts WHERE object_id=? ORDER BY id", (oid,))]
212
+ for tp in row["table_parts"]:
213
+ tp["columns"] = [dict(r) for r in con.execute("SELECT name,type_name,synonym,required FROM table_part_columns WHERE table_part_id=? ORDER BY id", (tp["id"],))]
214
+ row["forms"] = [dict(r) for r in con.execute("SELECT id,name,kind,source_path FROM forms WHERE object_id=? ORDER BY id", (oid,))]
215
+ row["data_composition_fields"] = [dict(r) for r in con.execute("SELECT schema_name,data_path,field,title,expression,dataset_name,field_type,source_path FROM data_composition_fields WHERE object_id=? ORDER BY id", (oid,))]
216
+ return {"query": object, "snapshot": snapshot, "matches": rows}
217
+
218
+ def lookup_form_element_types(self, snapshot: str | int | None = None) -> dict[str, Any]:
219
+ """Return a compact name/title/data-path index for UI-tree enrichment.
220
+
221
+ The 1C test-client API exposes runtime UI element names and captions, but
222
+ not always enough metadata typing. RAG already indexes form elements,
223
+ attributes and DCS fields from the configuration source, so this helper
224
+ builds a best-effort map usable by server-side MCP tools.
225
+
226
+ 1C can generate object/list/choice forms automatically from metadata.
227
+ For those generated forms runtime field names usually match metadata
228
+ attribute names/captions even when no explicit ``Form.xml`` exists.
229
+ Therefore the index also exposes synthetic form elements for attributes
230
+ and table-part columns. They let ``ui_tree`` infer field types on
231
+ auto-generated forms using metadata alone.
232
+ """
233
+ with self.store.connect() as con:
234
+ source_filter = ""
235
+ params: list[Any] = []
236
+ if snapshot is not None:
237
+ snap = _snapshot_row(con, snapshot)
238
+ if snap is None:
239
+ raise ValueError(f"snapshot not found: {snapshot}")
240
+ source_filter = "AND mo.source_id IN (SELECT source_id FROM snapshot_layers WHERE snapshot_id=?)"
241
+ params.append(snap["id"])
242
+ rows = [
243
+ dict(r)
244
+ for r in con.execute(
245
+ f"""
246
+ SELECT mo.full_name AS object_full_name, mo.kind AS object_kind,
247
+ f.name AS form_name, fe.name, fe.element_type, fe.title,
248
+ fe.data_path, fe.command_name, fe.parent_name, fe.source_path,
249
+ a.type_name AS attribute_type, a.role AS attribute_role,
250
+ dcf.field_type AS dcs_field_type
251
+ FROM form_elements fe
252
+ JOIN forms f ON f.id=fe.form_id
253
+ JOIN metadata_objects mo ON mo.id=f.object_id
254
+ LEFT JOIN attributes a ON a.object_id=mo.id AND (a.name=fe.data_path OR a.name=fe.name)
255
+ LEFT JOIN data_composition_fields dcf ON dcf.object_id=mo.id AND (dcf.data_path=fe.data_path OR dcf.field=fe.data_path OR dcf.title=fe.title)
256
+ WHERE 1=1 {source_filter}
257
+ ORDER BY mo.full_name, f.name, fe.id
258
+ """,
259
+ params,
260
+ )
261
+ ]
262
+ synthetic_rows = [
263
+ dict(r)
264
+ for r in con.execute(
265
+ f"""
266
+ SELECT mo.full_name AS object_full_name, mo.kind AS object_kind,
267
+ '<auto>' AS form_name,
268
+ a.name AS name,
269
+ 'AutoGeneratedField' AS element_type,
270
+ COALESCE(a.synonym, a.name) AS title,
271
+ a.name AS data_path,
272
+ NULL AS command_name,
273
+ NULL AS parent_name,
274
+ a.source_path AS source_path,
275
+ a.type_name AS attribute_type,
276
+ a.role AS attribute_role,
277
+ NULL AS dcs_field_type
278
+ FROM attributes a
279
+ JOIN metadata_objects mo ON mo.id=a.object_id
280
+ WHERE 1=1 {source_filter}
281
+ ORDER BY mo.full_name, a.id
282
+ """,
283
+ params,
284
+ )
285
+ ]
286
+ table_part_rows = [
287
+ dict(r)
288
+ for r in con.execute(
289
+ f"""
290
+ SELECT mo.full_name AS object_full_name, mo.kind AS object_kind,
291
+ '<auto>' AS form_name,
292
+ (tp.name || '.' || tpc.name) AS name,
293
+ 'AutoGeneratedTablePartField' AS element_type,
294
+ COALESCE(tpc.synonym, tpc.name) AS title,
295
+ (tp.name || '.' || tpc.name) AS data_path,
296
+ NULL AS command_name,
297
+ tp.name AS parent_name,
298
+ tpc.source_path AS source_path,
299
+ tpc.type_name AS attribute_type,
300
+ 'table_part_column' AS attribute_role,
301
+ NULL AS dcs_field_type
302
+ FROM table_part_columns tpc
303
+ JOIN table_parts tp ON tp.id=tpc.table_part_id
304
+ JOIN metadata_objects mo ON mo.id=tp.object_id
305
+ WHERE 1=1 {source_filter}
306
+ ORDER BY mo.full_name, tp.name, tpc.id
307
+ """,
308
+ params,
309
+ )
310
+ ]
311
+ items = rows + synthetic_rows + table_part_rows
312
+ return {"snapshot": snapshot, "count": len(items), "items": items}
313
+
314
+ def query(self, text: str, snapshot: str | int | None = None, limit: int = 10) -> dict[str, Any]:
315
+ q = normalize_query(text)
316
+ with self.store.connect() as con:
317
+ term_rows = [
318
+ dict(r)
319
+ for r in con.execute(
320
+ """
321
+ SELECT bt.*, mo.full_name, mo.kind, s.name AS source_name
322
+ FROM business_terms bt
323
+ LEFT JOIN metadata_objects mo ON mo.id=bt.object_id
324
+ LEFT JOIN sources s ON s.id=mo.source_id
325
+ WHERE bt.normalized_term LIKE ?
326
+ ORDER BY bt.confidence DESC LIMIT ?
327
+ """,
328
+ (f"%{q}%", limit),
329
+ )
330
+ ]
331
+ fts_query = " ".join(q.split()) or q
332
+ try:
333
+ fts_rows = [dict(r) for r in con.execute("SELECT text, object_full_name, kind, source_path, bm25(search_fts) AS score FROM search_fts WHERE search_fts MATCH ? ORDER BY score LIMIT ?", (fts_query, limit))]
334
+ except Exception:
335
+ fts_rows = []
336
+ if len(fts_rows) < limit:
337
+ seen = {(r.get("object_full_name"), r.get("source_path")) for r in fts_rows}
338
+ for row in con.execute("SELECT text, object_full_name, kind, source_path FROM search_fts WHERE lower(text) LIKE ? LIMIT ?", (f"%{q}%", limit)):
339
+ item = dict(row)
340
+ key = (item.get("object_full_name"), item.get("source_path"))
341
+ if key not in seen:
342
+ fts_rows.append(item)
343
+ seen.add(key)
344
+ if len(fts_rows) >= limit:
345
+ break
346
+ return {"query": text, "terms": term_rows, "fts": fts_rows}
347
+
348
+
349
+ def _git_commit(path: Path) -> str | None:
350
+ try:
351
+ return subprocess.check_output(["git", "-C", str(path), "rev-parse", "HEAD"], text=True, stderr=subprocess.DEVNULL).strip()
352
+ except Exception:
353
+ return None
354
+
355
+
356
+ def _git_branch(path: Path) -> str | None:
357
+ try:
358
+ return subprocess.check_output(["git", "-C", str(path), "rev-parse", "--abbrev-ref", "HEAD"], text=True, stderr=subprocess.DEVNULL).strip()
359
+ except Exception:
360
+ return None
361
+
362
+
363
+ def _snapshot_row(con, snapshot: str | int):
364
+ if isinstance(snapshot, int) or str(snapshot).isdigit():
365
+ return con.execute("SELECT * FROM snapshots WHERE id=?", (int(snapshot),)).fetchone()
366
+ return con.execute("SELECT * FROM snapshots WHERE name=?", (snapshot,)).fetchone()
367
+
368
+
369
+ def _insert_chunk(con, snapshot_id, object_id, chunk_type, text, source_path, metadata) -> int:
370
+ cur = con.execute("INSERT INTO rag_chunks(snapshot_id,object_id,chunk_type,text,source_path,metadata_json) VALUES(?,?,?,?,?,?)", (snapshot_id, object_id, chunk_type, text, source_path, json_dumps(metadata)))
371
+ return cur.lastrowid
372
+
373
+
374
+ def _insert_business_term(con, snapshot_id, term, object_id, confidence, evidence) -> None:
375
+ con.execute("INSERT INTO business_terms(snapshot_id,term,normalized_term,object_id,confidence,evidence) VALUES(?,?,?,?,?,?)", (snapshot_id, term, normalize_query(term), object_id, confidence, evidence))
mcp_1c/rag/store.py ADDED
@@ -0,0 +1,228 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sqlite3
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ SCHEMA_VERSION = 2
9
+
10
+
11
+ class RagStore:
12
+ def __init__(self, db_path: str | Path):
13
+ self.db_path = Path(db_path)
14
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
15
+
16
+ def connect(self) -> sqlite3.Connection:
17
+ con = sqlite3.connect(self.db_path)
18
+ con.row_factory = sqlite3.Row
19
+ con.execute("PRAGMA foreign_keys=ON")
20
+ return con
21
+
22
+ def init(self) -> None:
23
+ with self.connect() as con:
24
+ con.executescript(
25
+ """
26
+ CREATE TABLE IF NOT EXISTS schema_info(version INTEGER NOT NULL);
27
+ DELETE FROM schema_info;
28
+ INSERT INTO schema_info(version) VALUES (2);
29
+
30
+ CREATE TABLE IF NOT EXISTS sources(
31
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
32
+ name TEXT NOT NULL UNIQUE,
33
+ path TEXT NOT NULL,
34
+ kind TEXT NOT NULL,
35
+ format TEXT NOT NULL,
36
+ repo_url TEXT,
37
+ branch TEXT,
38
+ commit_hash TEXT
39
+ );
40
+
41
+ CREATE TABLE IF NOT EXISTS snapshots(
42
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
43
+ name TEXT NOT NULL UNIQUE,
44
+ created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
45
+ );
46
+
47
+ CREATE TABLE IF NOT EXISTS snapshot_layers(
48
+ snapshot_id INTEGER NOT NULL,
49
+ source_id INTEGER NOT NULL,
50
+ layer_order INTEGER NOT NULL,
51
+ role TEXT NOT NULL,
52
+ PRIMARY KEY(snapshot_id, source_id),
53
+ FOREIGN KEY(snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
54
+ FOREIGN KEY(source_id) REFERENCES sources(id) ON DELETE CASCADE
55
+ );
56
+
57
+ CREATE TABLE IF NOT EXISTS metadata_objects(
58
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
59
+ source_id INTEGER NOT NULL,
60
+ full_name TEXT NOT NULL,
61
+ kind TEXT NOT NULL,
62
+ name TEXT NOT NULL,
63
+ synonym TEXT,
64
+ source_path TEXT,
65
+ source_format TEXT,
66
+ is_hierarchical INTEGER,
67
+ owner_types_json TEXT,
68
+ raw_json TEXT,
69
+ UNIQUE(source_id, full_name),
70
+ FOREIGN KEY(source_id) REFERENCES sources(id) ON DELETE CASCADE
71
+ );
72
+
73
+ CREATE TABLE IF NOT EXISTS attributes(
74
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
75
+ object_id INTEGER NOT NULL,
76
+ name TEXT NOT NULL,
77
+ type_name TEXT,
78
+ synonym TEXT,
79
+ required INTEGER,
80
+ source_path TEXT,
81
+ role TEXT NOT NULL DEFAULT 'attribute',
82
+ raw_json TEXT,
83
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
84
+ );
85
+
86
+ CREATE TABLE IF NOT EXISTS table_parts(
87
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
88
+ object_id INTEGER NOT NULL,
89
+ name TEXT NOT NULL,
90
+ synonym TEXT,
91
+ source_path TEXT,
92
+ raw_json TEXT,
93
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
94
+ );
95
+
96
+ CREATE TABLE IF NOT EXISTS table_part_columns(
97
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
98
+ table_part_id INTEGER NOT NULL,
99
+ name TEXT NOT NULL,
100
+ type_name TEXT,
101
+ synonym TEXT,
102
+ required INTEGER,
103
+ source_path TEXT,
104
+ raw_json TEXT,
105
+ FOREIGN KEY(table_part_id) REFERENCES table_parts(id) ON DELETE CASCADE
106
+ );
107
+
108
+ CREATE TABLE IF NOT EXISTS forms(
109
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
110
+ object_id INTEGER NOT NULL,
111
+ name TEXT NOT NULL,
112
+ kind TEXT,
113
+ source_path TEXT,
114
+ raw_json TEXT,
115
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
116
+ );
117
+
118
+ CREATE TABLE IF NOT EXISTS form_elements(
119
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
120
+ form_id INTEGER NOT NULL,
121
+ name TEXT NOT NULL,
122
+ element_type TEXT,
123
+ title TEXT,
124
+ data_path TEXT,
125
+ command_name TEXT,
126
+ parent_name TEXT,
127
+ source_path TEXT,
128
+ raw_json TEXT,
129
+ FOREIGN KEY(form_id) REFERENCES forms(id) ON DELETE CASCADE
130
+ );
131
+
132
+ CREATE TABLE IF NOT EXISTS data_composition_fields(
133
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
134
+ object_id INTEGER NOT NULL,
135
+ schema_name TEXT NOT NULL,
136
+ data_path TEXT NOT NULL,
137
+ field TEXT,
138
+ title TEXT,
139
+ expression TEXT,
140
+ dataset_name TEXT,
141
+ field_type TEXT,
142
+ source_path TEXT,
143
+ raw_json TEXT,
144
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
145
+ );
146
+
147
+ CREATE TABLE IF NOT EXISTS navigation_links(
148
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
149
+ object_id INTEGER NOT NULL,
150
+ kind TEXT NOT NULL,
151
+ url TEXT NOT NULL,
152
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
153
+ );
154
+
155
+ CREATE TABLE IF NOT EXISTS business_terms(
156
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
157
+ snapshot_id INTEGER,
158
+ term TEXT NOT NULL,
159
+ normalized_term TEXT NOT NULL,
160
+ object_id INTEGER,
161
+ relation TEXT NOT NULL DEFAULT 'maps_to',
162
+ confidence REAL NOT NULL DEFAULT 0.5,
163
+ evidence TEXT,
164
+ FOREIGN KEY(snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
165
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
166
+ );
167
+
168
+ CREATE TABLE IF NOT EXISTS rag_chunks(
169
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
170
+ snapshot_id INTEGER,
171
+ object_id INTEGER,
172
+ chunk_type TEXT NOT NULL,
173
+ text TEXT NOT NULL,
174
+ source_path TEXT,
175
+ metadata_json TEXT,
176
+ FOREIGN KEY(snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
177
+ FOREIGN KEY(object_id) REFERENCES metadata_objects(id) ON DELETE CASCADE
178
+ );
179
+
180
+ CREATE VIRTUAL TABLE IF NOT EXISTS search_fts USING fts5(
181
+ text,
182
+ object_full_name UNINDEXED,
183
+ kind UNINDEXED,
184
+ source_path UNINDEXED,
185
+ chunk_id UNINDEXED
186
+ );
187
+
188
+ CREATE INDEX IF NOT EXISTS idx_objects_source_name ON metadata_objects(source_id, full_name);
189
+ CREATE INDEX IF NOT EXISTS idx_objects_kind ON metadata_objects(kind);
190
+ CREATE INDEX IF NOT EXISTS idx_attrs_object ON attributes(object_id);
191
+ CREATE INDEX IF NOT EXISTS idx_forms_object ON forms(object_id);
192
+ CREATE INDEX IF NOT EXISTS idx_form_elements_name ON form_elements(form_id, name);
193
+ CREATE INDEX IF NOT EXISTS idx_dcs_fields_object ON data_composition_fields(object_id, schema_name);
194
+ CREATE INDEX IF NOT EXISTS idx_terms_norm ON business_terms(normalized_term);
195
+ CREATE INDEX IF NOT EXISTS idx_layers_snapshot ON snapshot_layers(snapshot_id, layer_order);
196
+ """
197
+ )
198
+ columns = {row["name"] for row in con.execute("PRAGMA table_info(attributes)")}
199
+ if "role" not in columns:
200
+ con.execute("ALTER TABLE attributes ADD COLUMN role TEXT NOT NULL DEFAULT 'attribute'")
201
+
202
+ def clear_index_for_source(self, con: sqlite3.Connection, source_id: int) -> None:
203
+ con.execute(
204
+ """
205
+ DELETE FROM search_fts
206
+ WHERE chunk_id IN (
207
+ SELECT rc.id
208
+ FROM rag_chunks rc JOIN metadata_objects mo ON mo.id=rc.object_id
209
+ WHERE mo.source_id=?
210
+ )
211
+ """,
212
+ (source_id,),
213
+ )
214
+ con.execute("DELETE FROM metadata_objects WHERE source_id=?", (source_id,))
215
+
216
+
217
+ def row_to_dict(row: sqlite3.Row | None) -> dict[str, Any] | None:
218
+ return dict(row) if row is not None else None
219
+
220
+
221
+ def json_dumps(value: Any) -> str:
222
+ return json.dumps(value, ensure_ascii=False, sort_keys=True)
223
+
224
+
225
+ def json_loads(value: str | None) -> Any:
226
+ if not value:
227
+ return None
228
+ return json.loads(value)