ba-agent-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ba_agent_mcp-0.1.0/MANIFEST.in +28 -0
- ba_agent_mcp-0.1.0/PKG-INFO +96 -0
- ba_agent_mcp-0.1.0/README_PYPI.md +75 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/PKG-INFO +96 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/SOURCES.txt +71 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/dependency_links.txt +1 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/entry_points.txt +2 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/requires.txt +5 -0
- ba_agent_mcp-0.1.0/ba_agent_mcp.egg-info/top_level.txt +4 -0
- ba_agent_mcp-0.1.0/kb/__init__.py +0 -0
- ba_agent_mcp-0.1.0/kb/adopt_doc.py +177 -0
- ba_agent_mcp-0.1.0/kb/alerts.py +42 -0
- ba_agent_mcp-0.1.0/kb/backfill_embeddings.py +67 -0
- ba_agent_mcp-0.1.0/kb/chunker.py +254 -0
- ba_agent_mcp-0.1.0/kb/embeddings.py +149 -0
- ba_agent_mcp-0.1.0/kb/eval_kb.py +134 -0
- ba_agent_mcp-0.1.0/kb/extend_doc_types.py +39 -0
- ba_agent_mcp-0.1.0/kb/fast_metadata.py +95 -0
- ba_agent_mcp-0.1.0/kb/ingest.py +255 -0
- ba_agent_mcp-0.1.0/kb/instant_ingest.py +91 -0
- ba_agent_mcp-0.1.0/kb/kb_smoke.py +221 -0
- ba_agent_mcp-0.1.0/kb/reconcile.py +192 -0
- ba_agent_mcp-0.1.0/kb/reference_loader.py +275 -0
- ba_agent_mcp-0.1.0/kb/rerank.py +162 -0
- ba_agent_mcp-0.1.0/kb/retrieval_eval.py +72 -0
- ba_agent_mcp-0.1.0/kb/retrieve.py +393 -0
- ba_agent_mcp-0.1.0/kb/search_client.py +176 -0
- ba_agent_mcp-0.1.0/kb/supabase_rest.py +150 -0
- ba_agent_mcp-0.1.0/lib/__init__.py +0 -0
- ba_agent_mcp-0.1.0/lib/analysis_session.py +130 -0
- ba_agent_mcp-0.1.0/lib/badoc_schema.py +93 -0
- ba_agent_mcp-0.1.0/lib/baseline.py +168 -0
- ba_agent_mcp-0.1.0/lib/citations.py +144 -0
- ba_agent_mcp-0.1.0/lib/doc_lint.py +423 -0
- ba_agent_mcp-0.1.0/lib/docx_text.py +56 -0
- ba_agent_mcp-0.1.0/lib/draft.py +210 -0
- ba_agent_mcp-0.1.0/lib/e2e.py +450 -0
- ba_agent_mcp-0.1.0/lib/egress_audit.py +76 -0
- ba_agent_mcp-0.1.0/lib/figma_analyze.py +57 -0
- ba_agent_mcp-0.1.0/lib/figma_client.py +149 -0
- ba_agent_mcp-0.1.0/lib/figma_doc.py +68 -0
- ba_agent_mcp-0.1.0/lib/figma_extract.py +245 -0
- ba_agent_mcp-0.1.0/lib/figma_publish.py +99 -0
- ba_agent_mcp-0.1.0/lib/health.py +110 -0
- ba_agent_mcp-0.1.0/lib/ingest_hook.py +111 -0
- ba_agent_mcp-0.1.0/lib/migrate_to_theme_docs.py +174 -0
- ba_agent_mcp-0.1.0/lib/project_doc.py +98 -0
- ba_agent_mcp-0.1.0/lib/publish_prep.py +65 -0
- ba_agent_mcp-0.1.0/lib/readback.py +126 -0
- ba_agent_mcp-0.1.0/lib/reconcile_e2e.py +186 -0
- ba_agent_mcp-0.1.0/lib/reconcile_plan.py +162 -0
- ba_agent_mcp-0.1.0/lib/req_matcher.py +38 -0
- ba_agent_mcp-0.1.0/lib/requirement_id.py +28 -0
- ba_agent_mcp-0.1.0/lib/restore_lister.py +104 -0
- ba_agent_mcp-0.1.0/lib/serializer.py +323 -0
- ba_agent_mcp-0.1.0/lib/source_resolver.py +104 -0
- ba_agent_mcp-0.1.0/lib/suppa_adapter.py +218 -0
- ba_agent_mcp-0.1.0/lib/suppa_client.py +651 -0
- ba_agent_mcp-0.1.0/lib/token_gate.py +187 -0
- ba_agent_mcp-0.1.0/lib/transcript_store.py +323 -0
- ba_agent_mcp-0.1.0/lib/validate_doc.py +314 -0
- ba_agent_mcp-0.1.0/lib/write_sequencer.py +499 -0
- ba_agent_mcp-0.1.0/mcp_server/__init__.py +8 -0
- ba_agent_mcp-0.1.0/mcp_server/config.py +64 -0
- ba_agent_mcp-0.1.0/mcp_server/server.py +29 -0
- ba_agent_mcp-0.1.0/mcp_server/tools.py +115 -0
- ba_agent_mcp-0.1.0/pyproject.toml +36 -0
- ba_agent_mcp-0.1.0/setup.cfg +4 -0
- ba_agent_mcp-0.1.0/smoke/__init__.py +0 -0
- ba_agent_mcp-0.1.0/smoke/canonical.py +154 -0
- ba_agent_mcp-0.1.0/smoke/check_supabase.py +101 -0
- ba_agent_mcp-0.1.0/smoke/fixture_brd.py +73 -0
- ba_agent_mcp-0.1.0/smoke/phase0_smoke.py +512 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Keep the published sdist to the runtime package + the PyPI readme only.
|
|
2
|
+
# (The wheel already contains just the lib/kb/mcp_server/smoke packages.)
|
|
3
|
+
# The git-based sdist finder would otherwise sweep in tests/, the internal
|
|
4
|
+
# README, and other repo files — exclude them so nothing internal ships.
|
|
5
|
+
include README_PYPI.md
|
|
6
|
+
include pyproject.toml
|
|
7
|
+
|
|
8
|
+
exclude README.md
|
|
9
|
+
exclude RUNBOOK.md
|
|
10
|
+
exclude Makefile
|
|
11
|
+
exclude vendor.lock
|
|
12
|
+
exclude CLAUDE.md
|
|
13
|
+
exclude .env
|
|
14
|
+
exclude .env.example
|
|
15
|
+
|
|
16
|
+
prune tests
|
|
17
|
+
prune docs
|
|
18
|
+
prune evals
|
|
19
|
+
prune logs
|
|
20
|
+
prune skill
|
|
21
|
+
prune .codegraph
|
|
22
|
+
prune "info to ingest"
|
|
23
|
+
|
|
24
|
+
global-exclude *.sql
|
|
25
|
+
global-exclude .env
|
|
26
|
+
global-exclude *.pyc
|
|
27
|
+
global-exclude *.docx
|
|
28
|
+
global-exclude *.pdf
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ba-agent-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server exposing the Modern-Expo virtual Business Analyst Knowledge Base (read/derive tools).
|
|
5
|
+
Author: Modern-Expo
|
|
6
|
+
Keywords: mcp,model-context-protocol,business-analyst,knowledge-base,rag
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: mcp>=1.2
|
|
18
|
+
Requires-Dist: pypdf>=4
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# ba-agent-mcp
|
|
23
|
+
|
|
24
|
+
An [MCP](https://modelcontextprotocol.io) server that exposes the Modern-Expo
|
|
25
|
+
virtual **Business Analyst Knowledge Base** as a small set of **read / derive**
|
|
26
|
+
tools. It never writes to SUPPA Docs or the Knowledge Base and is deliberately
|
|
27
|
+
kept off the governed write path — authoring documents goes through the
|
|
28
|
+
human-gated `business-analyst` skill, not this server.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uvx ba-agent-mcp # run on demand in an isolated environment (recommended)
|
|
34
|
+
# or
|
|
35
|
+
pip install ba-agent-mcp # then run: ba-agent-mcp
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Credentials — you supply your own
|
|
39
|
+
|
|
40
|
+
**This package ships no credentials.** Each user provides their **own** keys and
|
|
41
|
+
their **own** SUPPA identity. Nothing is ever read from the MCP client JSON
|
|
42
|
+
config; secrets are resolved from the environment, in this order:
|
|
43
|
+
|
|
44
|
+
1. real environment variables in the launching shell, then
|
|
45
|
+
2. an explicit env file at `$BA_AGENT_ENV_FILE`, then
|
|
46
|
+
3. `~/.ba-agent/.env`
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# ~/.ba-agent/.env (or export these in your shell)
|
|
50
|
+
SUPABASE_URL=https://<your-project>.supabase.co
|
|
51
|
+
SUPABASE_SERVICE_ROLE_KEY=... # or SUPABASE_ANON_KEY for read-only
|
|
52
|
+
SUPPA_API_KEY=... # your own SUPPA identity (or SUPPA_TOKEN)
|
|
53
|
+
GEMINI_API_KEY=... # optional — enables hybrid (vector) search
|
|
54
|
+
FIGMA_TOKEN=... # optional — enables analyze_figma
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
> Never put any secret in the MCP client JSON. A tool whose credentials are
|
|
58
|
+
> absent returns a clear, structured error instead of failing the server, so you
|
|
59
|
+
> can add capabilities incrementally.
|
|
60
|
+
|
|
61
|
+
## Connect (any MCP client)
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"ba-agent": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["ba-agent-mcp"],
|
|
69
|
+
"env": { "BA_AGENT_ENV_FILE": "/absolute/path/to/your/.env" }
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Tools
|
|
76
|
+
|
|
77
|
+
| Tool | Returns |
|
|
78
|
+
| --- | --- |
|
|
79
|
+
| `kb_search` | Ranked KB hits with provenance (title, breadcrumb, SUPPA anchor, `data_class`). |
|
|
80
|
+
| `kb_assemble_context` | A cited grounding block for a question + coverage + strictest `data_class`. |
|
|
81
|
+
| `kb_assemble_corpus` | The whole project corpus grouped by document, for cross-corpus analysis. |
|
|
82
|
+
| `kb_list_docs` | Inventory of governed documents in a project (reads SUPPA under your own identity). |
|
|
83
|
+
| `analyze_figma` | A structured design profile of a Figma file (requires `FIGMA_TOKEN`). |
|
|
84
|
+
|
|
85
|
+
## Data governance
|
|
86
|
+
|
|
87
|
+
The KB read tools connect with the Supabase service-role key, which bypasses
|
|
88
|
+
row-level security, so they **fail closed at the application layer**: any chunk
|
|
89
|
+
that is not explicitly `data_class = 'internal'` is excluded from results.
|
|
90
|
+
Consequence — **`client`-classed content is not readable over MCP**. The
|
|
91
|
+
append-only logs never contain document bodies, only ids, hashes, identity, and
|
|
92
|
+
HTTP status.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
Proprietary — Modern-Expo internal tooling. All rights reserved.
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# ba-agent-mcp
|
|
2
|
+
|
|
3
|
+
An [MCP](https://modelcontextprotocol.io) server that exposes the Modern-Expo
|
|
4
|
+
virtual **Business Analyst Knowledge Base** as a small set of **read / derive**
|
|
5
|
+
tools. It never writes to SUPPA Docs or the Knowledge Base and is deliberately
|
|
6
|
+
kept off the governed write path — authoring documents goes through the
|
|
7
|
+
human-gated `business-analyst` skill, not this server.
|
|
8
|
+
|
|
9
|
+
## Install
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
uvx ba-agent-mcp # run on demand in an isolated environment (recommended)
|
|
13
|
+
# or
|
|
14
|
+
pip install ba-agent-mcp # then run: ba-agent-mcp
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Credentials — you supply your own
|
|
18
|
+
|
|
19
|
+
**This package ships no credentials.** Each user provides their **own** keys and
|
|
20
|
+
their **own** SUPPA identity. Nothing is ever read from the MCP client JSON
|
|
21
|
+
config; secrets are resolved from the environment, in this order:
|
|
22
|
+
|
|
23
|
+
1. real environment variables in the launching shell, then
|
|
24
|
+
2. an explicit env file at `$BA_AGENT_ENV_FILE`, then
|
|
25
|
+
3. `~/.ba-agent/.env`
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# ~/.ba-agent/.env (or export these in your shell)
|
|
29
|
+
SUPABASE_URL=https://<your-project>.supabase.co
|
|
30
|
+
SUPABASE_SERVICE_ROLE_KEY=... # or SUPABASE_ANON_KEY for read-only
|
|
31
|
+
SUPPA_API_KEY=... # your own SUPPA identity (or SUPPA_TOKEN)
|
|
32
|
+
GEMINI_API_KEY=... # optional — enables hybrid (vector) search
|
|
33
|
+
FIGMA_TOKEN=... # optional — enables analyze_figma
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
> Never put any secret in the MCP client JSON. A tool whose credentials are
|
|
37
|
+
> absent returns a clear, structured error instead of failing the server, so you
|
|
38
|
+
> can add capabilities incrementally.
|
|
39
|
+
|
|
40
|
+
## Connect (any MCP client)
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"mcpServers": {
|
|
45
|
+
"ba-agent": {
|
|
46
|
+
"command": "uvx",
|
|
47
|
+
"args": ["ba-agent-mcp"],
|
|
48
|
+
"env": { "BA_AGENT_ENV_FILE": "/absolute/path/to/your/.env" }
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Tools
|
|
55
|
+
|
|
56
|
+
| Tool | Returns |
|
|
57
|
+
| --- | --- |
|
|
58
|
+
| `kb_search` | Ranked KB hits with provenance (title, breadcrumb, SUPPA anchor, `data_class`). |
|
|
59
|
+
| `kb_assemble_context` | A cited grounding block for a question + coverage + strictest `data_class`. |
|
|
60
|
+
| `kb_assemble_corpus` | The whole project corpus grouped by document, for cross-corpus analysis. |
|
|
61
|
+
| `kb_list_docs` | Inventory of governed documents in a project (reads SUPPA under your own identity). |
|
|
62
|
+
| `analyze_figma` | A structured design profile of a Figma file (requires `FIGMA_TOKEN`). |
|
|
63
|
+
|
|
64
|
+
## Data governance
|
|
65
|
+
|
|
66
|
+
The KB read tools connect with the Supabase service-role key, which bypasses
|
|
67
|
+
row-level security, so they **fail closed at the application layer**: any chunk
|
|
68
|
+
that is not explicitly `data_class = 'internal'` is excluded from results.
|
|
69
|
+
Consequence — **`client`-classed content is not readable over MCP**. The
|
|
70
|
+
append-only logs never contain document bodies, only ids, hashes, identity, and
|
|
71
|
+
HTTP status.
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
Proprietary — Modern-Expo internal tooling. All rights reserved.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ba-agent-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server exposing the Modern-Expo virtual Business Analyst Knowledge Base (read/derive tools).
|
|
5
|
+
Author: Modern-Expo
|
|
6
|
+
Keywords: mcp,model-context-protocol,business-analyst,knowledge-base,rag
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Software Development :: Documentation
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: mcp>=1.2
|
|
18
|
+
Requires-Dist: pypdf>=4
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# ba-agent-mcp
|
|
23
|
+
|
|
24
|
+
An [MCP](https://modelcontextprotocol.io) server that exposes the Modern-Expo
|
|
25
|
+
virtual **Business Analyst Knowledge Base** as a small set of **read / derive**
|
|
26
|
+
tools. It never writes to SUPPA Docs or the Knowledge Base and is deliberately
|
|
27
|
+
kept off the governed write path — authoring documents goes through the
|
|
28
|
+
human-gated `business-analyst` skill, not this server.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uvx ba-agent-mcp # run on demand in an isolated environment (recommended)
|
|
34
|
+
# or
|
|
35
|
+
pip install ba-agent-mcp # then run: ba-agent-mcp
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Credentials — you supply your own
|
|
39
|
+
|
|
40
|
+
**This package ships no credentials.** Each user provides their **own** keys and
|
|
41
|
+
their **own** SUPPA identity. Nothing is ever read from the MCP client JSON
|
|
42
|
+
config; secrets are resolved from the environment, in this order:
|
|
43
|
+
|
|
44
|
+
1. real environment variables in the launching shell, then
|
|
45
|
+
2. an explicit env file at `$BA_AGENT_ENV_FILE`, then
|
|
46
|
+
3. `~/.ba-agent/.env`
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# ~/.ba-agent/.env (or export these in your shell)
|
|
50
|
+
SUPABASE_URL=https://<your-project>.supabase.co
|
|
51
|
+
SUPABASE_SERVICE_ROLE_KEY=... # or SUPABASE_ANON_KEY for read-only
|
|
52
|
+
SUPPA_API_KEY=... # your own SUPPA identity (or SUPPA_TOKEN)
|
|
53
|
+
GEMINI_API_KEY=... # optional — enables hybrid (vector) search
|
|
54
|
+
FIGMA_TOKEN=... # optional — enables analyze_figma
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
> Never put any secret in the MCP client JSON. A tool whose credentials are
|
|
58
|
+
> absent returns a clear, structured error instead of failing the server, so you
|
|
59
|
+
> can add capabilities incrementally.
|
|
60
|
+
|
|
61
|
+
## Connect (any MCP client)
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"ba-agent": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["ba-agent-mcp"],
|
|
69
|
+
"env": { "BA_AGENT_ENV_FILE": "/absolute/path/to/your/.env" }
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Tools
|
|
76
|
+
|
|
77
|
+
| Tool | Returns |
|
|
78
|
+
| --- | --- |
|
|
79
|
+
| `kb_search` | Ranked KB hits with provenance (title, breadcrumb, SUPPA anchor, `data_class`). |
|
|
80
|
+
| `kb_assemble_context` | A cited grounding block for a question + coverage + strictest `data_class`. |
|
|
81
|
+
| `kb_assemble_corpus` | The whole project corpus grouped by document, for cross-corpus analysis. |
|
|
82
|
+
| `kb_list_docs` | Inventory of governed documents in a project (reads SUPPA under your own identity). |
|
|
83
|
+
| `analyze_figma` | A structured design profile of a Figma file (requires `FIGMA_TOKEN`). |
|
|
84
|
+
|
|
85
|
+
## Data governance
|
|
86
|
+
|
|
87
|
+
The KB read tools connect with the Supabase service-role key, which bypasses
|
|
88
|
+
row-level security, so they **fail closed at the application layer**: any chunk
|
|
89
|
+
that is not explicitly `data_class = 'internal'` is excluded from results.
|
|
90
|
+
Consequence — **`client`-classed content is not readable over MCP**. The
|
|
91
|
+
append-only logs never contain document bodies, only ids, hashes, identity, and
|
|
92
|
+
HTTP status.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
Proprietary — Modern-Expo internal tooling. All rights reserved.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
MANIFEST.in
|
|
2
|
+
README_PYPI.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
ba_agent_mcp.egg-info/PKG-INFO
|
|
5
|
+
ba_agent_mcp.egg-info/SOURCES.txt
|
|
6
|
+
ba_agent_mcp.egg-info/dependency_links.txt
|
|
7
|
+
ba_agent_mcp.egg-info/entry_points.txt
|
|
8
|
+
ba_agent_mcp.egg-info/requires.txt
|
|
9
|
+
ba_agent_mcp.egg-info/top_level.txt
|
|
10
|
+
kb/__init__.py
|
|
11
|
+
kb/adopt_doc.py
|
|
12
|
+
kb/alerts.py
|
|
13
|
+
kb/backfill_embeddings.py
|
|
14
|
+
kb/chunker.py
|
|
15
|
+
kb/embeddings.py
|
|
16
|
+
kb/eval_kb.py
|
|
17
|
+
kb/extend_doc_types.py
|
|
18
|
+
kb/fast_metadata.py
|
|
19
|
+
kb/ingest.py
|
|
20
|
+
kb/instant_ingest.py
|
|
21
|
+
kb/kb_smoke.py
|
|
22
|
+
kb/reconcile.py
|
|
23
|
+
kb/reference_loader.py
|
|
24
|
+
kb/rerank.py
|
|
25
|
+
kb/retrieval_eval.py
|
|
26
|
+
kb/retrieve.py
|
|
27
|
+
kb/search_client.py
|
|
28
|
+
kb/supabase_rest.py
|
|
29
|
+
lib/__init__.py
|
|
30
|
+
lib/analysis_session.py
|
|
31
|
+
lib/badoc_schema.py
|
|
32
|
+
lib/baseline.py
|
|
33
|
+
lib/citations.py
|
|
34
|
+
lib/doc_lint.py
|
|
35
|
+
lib/docx_text.py
|
|
36
|
+
lib/draft.py
|
|
37
|
+
lib/e2e.py
|
|
38
|
+
lib/egress_audit.py
|
|
39
|
+
lib/figma_analyze.py
|
|
40
|
+
lib/figma_client.py
|
|
41
|
+
lib/figma_doc.py
|
|
42
|
+
lib/figma_extract.py
|
|
43
|
+
lib/figma_publish.py
|
|
44
|
+
lib/health.py
|
|
45
|
+
lib/ingest_hook.py
|
|
46
|
+
lib/migrate_to_theme_docs.py
|
|
47
|
+
lib/project_doc.py
|
|
48
|
+
lib/publish_prep.py
|
|
49
|
+
lib/readback.py
|
|
50
|
+
lib/reconcile_e2e.py
|
|
51
|
+
lib/reconcile_plan.py
|
|
52
|
+
lib/req_matcher.py
|
|
53
|
+
lib/requirement_id.py
|
|
54
|
+
lib/restore_lister.py
|
|
55
|
+
lib/serializer.py
|
|
56
|
+
lib/source_resolver.py
|
|
57
|
+
lib/suppa_adapter.py
|
|
58
|
+
lib/suppa_client.py
|
|
59
|
+
lib/token_gate.py
|
|
60
|
+
lib/transcript_store.py
|
|
61
|
+
lib/validate_doc.py
|
|
62
|
+
lib/write_sequencer.py
|
|
63
|
+
mcp_server/__init__.py
|
|
64
|
+
mcp_server/config.py
|
|
65
|
+
mcp_server/server.py
|
|
66
|
+
mcp_server/tools.py
|
|
67
|
+
smoke/__init__.py
|
|
68
|
+
smoke/canonical.py
|
|
69
|
+
smoke/check_supabase.py
|
|
70
|
+
smoke/fixture_brd.py
|
|
71
|
+
smoke/phase0_smoke.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
File without changes
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Backfill: adopt an EXISTING SUPPA Doc into the BA governance layer + KB
|
|
2
|
+
(BUILD_PLAN 2.9). The inventory (docs/INVENTORY_2_9.md) found legacy Docs with
|
|
3
|
+
no BADoc rows; this turns one such Doc into a governed, retrievable document.
|
|
4
|
+
|
|
5
|
+
Distinct from write_sequencer (which authors NEW content): the content already
|
|
6
|
+
lives in SUPPA, so adoption does NOT write blocks. It:
|
|
7
|
+
1. verifies the Doc has live pages;
|
|
8
|
+
2. computes the live-content hash (kb.ingest.live_content_hash — sentinels
|
|
9
|
+
excluded, identical to ingest/reconcile);
|
|
10
|
+
3. registers a BADoc row pointing at the EXISTING Doc — inverted-order-lite:
|
|
11
|
+
insert status='writing' (external_id=doc_key:1, content_hash=live_hash,
|
|
12
|
+
no content write) -> flip 'published' + content_ref=doc_id -> read-back.
|
|
13
|
+
Idempotent on external_id (re-read on unique conflict); refuses to repoint
|
|
14
|
+
a doc_key already published against a DIFFERENT doc_id;
|
|
15
|
+
4. best-effort KB ingest (chunk + Gemini-embed + index). Ingest failure does
|
|
16
|
+
NOT fail adoption — the BADoc row stands and `make kb-reconcile` /
|
|
17
|
+
`kb-embed-backfill` heal the index later (same contract as instant ingest).
|
|
18
|
+
|
|
19
|
+
Metadata (project / doc_key / doc_type / data_class / feature) is supplied by
|
|
20
|
+
the operator — NOTHING is invented here. The BA Lead provides real values per
|
|
21
|
+
doc from the inventory verdicts.
|
|
22
|
+
|
|
23
|
+
CLI:
|
|
24
|
+
python -m kb.adopt_doc <doc_id> --project P --doc-key P/tech_doc/api \\
|
|
25
|
+
--doc-type tech_doc --data-class internal [--feature F] [--no-ingest]
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
|
|
32
|
+
from kb import ingest as kb_ingest
|
|
33
|
+
from kb.supabase_rest import KBRestError, SupabaseREST
|
|
34
|
+
from lib.draft import DATA_CLASSES
|
|
35
|
+
from lib.suppa_client import SuppaError, SuppaHTTPError
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AdoptError(SuppaError):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class AdoptResult:
|
|
44
|
+
status: str # adopted | noop-already-adopted
|
|
45
|
+
doc_id: int
|
|
46
|
+
badoc_id: int | None
|
|
47
|
+
external_id: str
|
|
48
|
+
content_hash: str
|
|
49
|
+
ingest_status: str = "skipped"
|
|
50
|
+
chunks: int = 0
|
|
51
|
+
detail: str = ""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def adopt_doc(doc_id: int, *, project: str, doc_key: str, doc_type: str,
|
|
55
|
+
data_class: str, suppa, rest: SupabaseREST | None = None,
|
|
56
|
+
feature: str | None = None, source: str = "backfill",
|
|
57
|
+
ingest: bool = True, doc_title: str | None = None) -> AdoptResult:
|
|
58
|
+
if data_class not in DATA_CLASSES:
|
|
59
|
+
raise AdoptError(f"data_class {data_class!r} not in {sorted(DATA_CLASSES)}")
|
|
60
|
+
doc_id = int(doc_id)
|
|
61
|
+
ext = f"{doc_key}:1" # adopted legacy docs are version 1 of their doc_key
|
|
62
|
+
|
|
63
|
+
pages = kb_ingest.fetch_pages(suppa, doc_id)
|
|
64
|
+
if not pages:
|
|
65
|
+
raise AdoptError(f"doc {doc_id} has no live pages — nothing to adopt")
|
|
66
|
+
live_hash = kb_ingest.live_content_hash(pages)
|
|
67
|
+
|
|
68
|
+
# idempotency / conflict guard on the doc_key
|
|
69
|
+
published = [r for r in suppa.search_badoc(doc_key=doc_key, status="published")]
|
|
70
|
+
for r in published:
|
|
71
|
+
if r.get("content_ref") and int(r["content_ref"]) != doc_id:
|
|
72
|
+
raise AdoptError(
|
|
73
|
+
f"doc_key {doc_key!r} already published against doc "
|
|
74
|
+
f"{r['content_ref']} (not {doc_id}) — refusing to repoint")
|
|
75
|
+
|
|
76
|
+
existing = suppa.search_badoc(external_id=ext)
|
|
77
|
+
if existing and existing[0].get("status") == "published":
|
|
78
|
+
row = existing[0]
|
|
79
|
+
badoc_id = row.get("id")
|
|
80
|
+
# refresh content_hash if the legacy page changed since last adoption
|
|
81
|
+
if row.get("content_hash") != live_hash:
|
|
82
|
+
suppa.update_badoc(badoc_id, {"content_hash": live_hash})
|
|
83
|
+
result = AdoptResult("noop-already-adopted", doc_id, badoc_id, ext, live_hash)
|
|
84
|
+
else:
|
|
85
|
+
badoc_id = _register_badoc(suppa, ext, doc_id, live_hash, project=project,
|
|
86
|
+
feature=feature, doc_type=doc_type,
|
|
87
|
+
data_class=data_class, doc_key=doc_key, source=source)
|
|
88
|
+
result = AdoptResult("adopted", doc_id, badoc_id, ext, live_hash)
|
|
89
|
+
|
|
90
|
+
if ingest:
|
|
91
|
+
_ingest_best_effort(result, suppa, rest, project=project, feature=feature,
|
|
92
|
+
doc_type=doc_type, data_class=data_class, doc_id=doc_id,
|
|
93
|
+
content_hash=live_hash, doc_title=doc_title)
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _register_badoc(suppa, ext, doc_id, live_hash, *, project, feature, doc_type,
|
|
98
|
+
data_class, doc_key, source) -> int:
|
|
99
|
+
row = {
|
|
100
|
+
"project": project, "feature": feature, "doc_type": doc_type,
|
|
101
|
+
"status": "writing", "data_class": data_class, "source": source,
|
|
102
|
+
"doc_key": doc_key, "version": 1, "external_id": ext,
|
|
103
|
+
"content_hash": live_hash,
|
|
104
|
+
}
|
|
105
|
+
try:
|
|
106
|
+
badoc_id = suppa.insert_badoc(row)
|
|
107
|
+
except SuppaHTTPError:
|
|
108
|
+
# unique conflict on external_id -> re-read, never parse the body
|
|
109
|
+
rows = suppa.search_badoc(external_id=ext)
|
|
110
|
+
if len(rows) != 1:
|
|
111
|
+
raise
|
|
112
|
+
badoc_id = rows[0]["id"]
|
|
113
|
+
if rows[0].get("content_hash") != live_hash:
|
|
114
|
+
suppa.update_badoc(badoc_id, {"content_hash": live_hash})
|
|
115
|
+
# flip to published pointing at the EXISTING doc; read-back
|
|
116
|
+
suppa.update_badoc(badoc_id, {"status": "published", "content_ref": doc_id})
|
|
117
|
+
back = suppa.search_badoc(external_id=ext)
|
|
118
|
+
if not back or back[0].get("status") != "published":
|
|
119
|
+
raise AdoptError(f"publish read-back failed for {ext}")
|
|
120
|
+
return badoc_id
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _ingest_best_effort(result: AdoptResult, suppa, rest, *, project, feature,
|
|
124
|
+
doc_type, data_class, doc_id, content_hash,
|
|
125
|
+
doc_title=None) -> None:
|
|
126
|
+
badoc_row = {
|
|
127
|
+
"id": result.badoc_id, "external_id": result.external_id,
|
|
128
|
+
"project": project, "feature": feature, "doc_type": doc_type,
|
|
129
|
+
"status": "published", "data_class": data_class,
|
|
130
|
+
"content_hash": content_hash, "content_ref": doc_id,
|
|
131
|
+
}
|
|
132
|
+
try:
|
|
133
|
+
rest = rest or SupabaseREST()
|
|
134
|
+
res = kb_ingest.ingest_doc(badoc_row, suppa, rest, doc_title=doc_title)
|
|
135
|
+
result.ingest_status = res.status
|
|
136
|
+
result.chunks = res.chunks
|
|
137
|
+
if res.status == "failed":
|
|
138
|
+
kb_ingest.log_ingest_error(rest, doc_id, res.detail)
|
|
139
|
+
except KBRestError as e:
|
|
140
|
+
result.ingest_status = "error"
|
|
141
|
+
result.detail = str(e)[:200]
|
|
142
|
+
try:
|
|
143
|
+
kb_ingest.log_ingest_error(rest, doc_id, str(e))
|
|
144
|
+
except KBRestError:
|
|
145
|
+
pass # reconcile scans had_errors anyway
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def main(argv=None) -> int:
|
|
149
|
+
import argparse
|
|
150
|
+
import sys
|
|
151
|
+
from lib.suppa_adapter import SuppaAdapter
|
|
152
|
+
|
|
153
|
+
p = argparse.ArgumentParser(description="Adopt an existing SUPPA Doc (BUILD_PLAN 2.9)")
|
|
154
|
+
p.add_argument("doc_id", type=int)
|
|
155
|
+
p.add_argument("--project", required=True)
|
|
156
|
+
p.add_argument("--doc-key", required=True, help="unique doc_key, e.g. PROJ/tech_doc/api")
|
|
157
|
+
p.add_argument("--doc-type", required=True)
|
|
158
|
+
p.add_argument("--data-class", required=True, choices=sorted(DATA_CLASSES))
|
|
159
|
+
p.add_argument("--feature", default=None)
|
|
160
|
+
p.add_argument("--source", default="backfill")
|
|
161
|
+
p.add_argument("--no-ingest", action="store_true")
|
|
162
|
+
args = p.parse_args(argv)
|
|
163
|
+
|
|
164
|
+
res = adopt_doc(args.doc_id, project=args.project, doc_key=args.doc_key,
|
|
165
|
+
doc_type=args.doc_type, data_class=args.data_class,
|
|
166
|
+
suppa=SuppaAdapter(), feature=args.feature, source=args.source,
|
|
167
|
+
ingest=not args.no_ingest)
|
|
168
|
+
print(f"{res.status}: badoc={res.badoc_id} doc={res.doc_id} ext={res.external_id} "
|
|
169
|
+
f"hash={res.content_hash[:16]}… ingest={res.ingest_status} chunks={res.chunks}"
|
|
170
|
+
+ (f" detail={res.detail}" if res.detail else ""))
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
import sys
|
|
176
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
177
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""One alert sink for ALL hard failures (BUILD_PLAN 2.4 DoD): ingest retry
|
|
2
|
+
exhaustion, sequencer hard errors, reconcile manual-edit warnings.
|
|
3
|
+
|
|
4
|
+
Pilot transport: append-only logs/alerts.jsonl (ids/hashes only — never
|
|
5
|
+
bodies, PRR §3.8) + optional webhook POST when ALERT_WEBHOOK_URL is set
|
|
6
|
+
(channel decision — SUPPA task vs chat webhook — pending with the customer;
|
|
7
|
+
recorded in BUILD_PLAN). Alerting must never raise into the caller.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import datetime
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import urllib.request
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
DEFAULT_DIR = Path(__file__).resolve().parent.parent / "logs"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def alert(kind: str, *, run_dir=None, **fields) -> dict:
|
|
22
|
+
entry = {
|
|
23
|
+
"ts": datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
24
|
+
"alert": kind, **fields,
|
|
25
|
+
}
|
|
26
|
+
try:
|
|
27
|
+
out_dir = Path(run_dir) if run_dir else DEFAULT_DIR
|
|
28
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
with open(out_dir / "alerts.jsonl", "a", encoding="utf-8") as f:
|
|
30
|
+
f.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
url = os.environ.get("ALERT_WEBHOOK_URL", "")
|
|
34
|
+
if url:
|
|
35
|
+
try:
|
|
36
|
+
req = urllib.request.Request(
|
|
37
|
+
url, data=json.dumps(entry, ensure_ascii=False).encode("utf-8"),
|
|
38
|
+
headers={"Content-Type": "application/json"}, method="POST")
|
|
39
|
+
urllib.request.urlopen(req, timeout=10).read()
|
|
40
|
+
except Exception:
|
|
41
|
+
pass # the file entry above is the durable record
|
|
42
|
+
return entry
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Embedding backfill (`make kb-embed-backfill`) — fills NULL embeddings
|
|
2
|
+
WITHOUT re-ingesting content (the cheap heal path after chunks were ingested
|
|
3
|
+
while GEMINI_API_KEY was absent or the API was down).
|
|
4
|
+
|
|
5
|
+
Loops kb_missing_embeddings -> Gemini embed_texts -> kb_set_embeddings until
|
|
6
|
+
none remain. ingestion_log had_errors rows are left to `make kb-reconcile`,
|
|
7
|
+
which force-reingests them once and then converges (embeddings now succeed).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from kb import embeddings
|
|
15
|
+
from kb.supabase_rest import SupabaseREST
|
|
16
|
+
from lib import egress_audit
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def backfill(rest: SupabaseREST | None = None, *, project: str | None = None,
|
|
20
|
+
batch: int = 100, sink=None) -> int:
|
|
21
|
+
rest = rest or SupabaseREST()
|
|
22
|
+
total = 0
|
|
23
|
+
while True:
|
|
24
|
+
rows = rest.rpc("kb_missing_embeddings",
|
|
25
|
+
{"p_project": project, "p_limit": batch})
|
|
26
|
+
if not rows:
|
|
27
|
+
return total
|
|
28
|
+
texts = [r["content"] for r in rows]
|
|
29
|
+
# Chunk CONTENT egresses to Gemini here — audit it (always) and honor the
|
|
30
|
+
# fail-closed BLOCK switch (GEMINI_EGRESS_CLIENT_BLOCK=1). A blocked batch
|
|
31
|
+
# raises EgressBlocked: skip it (chunks keep their NULL embeddings) and
|
|
32
|
+
# stop — kb_missing_embeddings would just re-return the same rows, so
|
|
33
|
+
# looping would spin forever. They re-embed once the block lifts
|
|
34
|
+
# (best-effort contract, mirrors ingest/reconcile).
|
|
35
|
+
data_class = next((r.get("data_class") for r in rows if r.get("data_class")),
|
|
36
|
+
None) or "client"
|
|
37
|
+
try:
|
|
38
|
+
egress_audit.check_and_record(
|
|
39
|
+
data_class=data_class, path="embeddings",
|
|
40
|
+
byte_count=sum(len(t.encode("utf-8")) for t in texts), sink=sink)
|
|
41
|
+
except egress_audit.EgressBlocked:
|
|
42
|
+
return total
|
|
43
|
+
vectors = embeddings.embed_texts(texts)
|
|
44
|
+
# C3: carry content_hash so kb_set_embeddings only writes the vector when
|
|
45
|
+
# the chunk's content is still the one we embedded (refuses to overwrite a
|
|
46
|
+
# row a concurrent re-ingest has since replaced — no silent vector/content skew).
|
|
47
|
+
n = rest.rpc("kb_set_embeddings", {"p": {"items": [
|
|
48
|
+
{"id": r["id"], "embedding": embeddings.vector_literal(v),
|
|
49
|
+
"content_hash": r.get("content_hash")}
|
|
50
|
+
for r, v in zip(rows, vectors)]}})
|
|
51
|
+
total += int(n or 0)
|
|
52
|
+
if len(rows) < batch:
|
|
53
|
+
return total
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def main() -> int:
|
|
57
|
+
if not embeddings.available():
|
|
58
|
+
print("GEMINI_API_KEY is not set — nothing to backfill with")
|
|
59
|
+
return 1
|
|
60
|
+
n = backfill()
|
|
61
|
+
print(f"backfilled embeddings for {n} chunks")
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
67
|
+
raise SystemExit(main())
|