cls-knowledge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cls_knowledge-0.1.0.dist-info/METADATA +77 -0
- cls_knowledge-0.1.0.dist-info/RECORD +10 -0
- cls_knowledge-0.1.0.dist-info/WHEEL +4 -0
- cls_knowledge-0.1.0.dist-info/entry_points.txt +3 -0
- cumulus_knowledge/__init__.py +25 -0
- cumulus_knowledge/client.py +181 -0
- cumulus_knowledge/mcp_server.py +110 -0
- cumulus_knowledge/models.py +25 -0
- cumulus_knowledge/operations.py +175 -0
- cumulus_knowledge/tui.py +33 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cls-knowledge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK and MCP server for Cumulus Knowledge
|
|
5
|
+
Project-URL: Homepage, https://cumulush.com
|
|
6
|
+
Project-URL: Repository, https://github.com/Cumulus-s/cumulus-create
|
|
7
|
+
Author: Cumulus Knowledge contributors
|
|
8
|
+
License-Expression: AGPL-3.0-only
|
|
9
|
+
Keywords: agents,cli,knowledge-graph,mcp,sdk
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Provides-Extra: test
|
|
22
|
+
Requires-Dist: pytest>=9.0.0; extra == 'test'
|
|
23
|
+
Provides-Extra: tui
|
|
24
|
+
Requires-Dist: textual>=6.0.0; extra == 'tui'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# cls-knowledge
|
|
28
|
+
|
|
29
|
+
Python SDK, operations ingestion helpers, graph QA helpers, MCP server, and ops-review TUI mode for Cumulus Knowledge.
|
|
30
|
+
|
|
31
|
+
The SDK uses the `cls-knowledge` binary by default. Set `CUMULUS_BIN` to point at another binary.
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from cumulus_knowledge import CumulusKnowledge
|
|
35
|
+
|
|
36
|
+
knowledge = CumulusKnowledge(root="Documents/rune")
|
|
37
|
+
knowledge.index(profile="all")
|
|
38
|
+
hits = knowledge.query("auth flow", budget=800)
|
|
39
|
+
graph = knowledge.get_graph_view("risk")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Operations helpers:
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from cumulus_knowledge import (
|
|
46
|
+
extract_operations_entities,
|
|
47
|
+
build_relationship_candidates,
|
|
48
|
+
score_graph_readability,
|
|
49
|
+
detect_missing_citations,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
entities = extract_operations_entities("demo-project")
|
|
53
|
+
relationships = build_relationship_candidates(entities)
|
|
54
|
+
quality = score_graph_readability(graph.data)
|
|
55
|
+
missing = detect_missing_citations(graph.data)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Ops review TUI mode:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python3 -m cumulus_knowledge.tui --ops-review ./demo-project
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Python owns heavier document and operations workflows: invoices, vendors, bank draws, shipments, schedule risks, missing citations, and batch graph quality checks.
|
|
65
|
+
|
|
66
|
+
Hosted-style API mode:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from cumulus_knowledge import CumulusKnowledge
|
|
70
|
+
|
|
71
|
+
client = CumulusKnowledge(api_base_url="http://127.0.0.1:8787")
|
|
72
|
+
client.create_project({"name": "Demo Operations Project"})
|
|
73
|
+
client.upload_folder([{"path": "invoices/demo.md", "content": "Invoice DEMO-INV-001"}])
|
|
74
|
+
client.index_project()
|
|
75
|
+
graph = client.get_graph_view("finance")
|
|
76
|
+
html = client.export_html()
|
|
77
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
cumulus_knowledge/__init__.py,sha256=uSfZp-Fk0-LPl64ogW1MydbKR1-b685UbN23TAQXh2M,643
|
|
2
|
+
cumulus_knowledge/client.py,sha256=39xr6NKMdgixD9u-EdqzITuIwYsjtwecViTbKUbFre0,7096
|
|
3
|
+
cumulus_knowledge/mcp_server.py,sha256=xDdoh-7iKhj4k7hTgLpGzikjcq1DgX5trZaukgcP204,6316
|
|
4
|
+
cumulus_knowledge/models.py,sha256=QlcUqvYh-lNq4dvBKFn1V_mzNBnjSnryeJG7L5QE8qA,634
|
|
5
|
+
cumulus_knowledge/operations.py,sha256=cIiWwh5OylmGxzojmqwIQyXoJgNz-Z2MdmqpEb-fZxY,7019
|
|
6
|
+
cumulus_knowledge/tui.py,sha256=GVHib4sqPcE-q0sfJ5fRRUbovjh2fEqX8jMB2hRj7XI,1268
|
|
7
|
+
cls_knowledge-0.1.0.dist-info/METADATA,sha256=MpKm09KNR1BFIylwDqa8NTHz8YsWnevY8ZcVvwQvBxk,2604
|
|
8
|
+
cls_knowledge-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
cls_knowledge-0.1.0.dist-info/entry_points.txt,sha256=v8T9qqKRAFGTGjgqKtZhysSySw778P3IbrCNvSMRjXM,119
|
|
10
|
+
cls_knowledge-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .client import CumulusKnowledge
|
|
2
|
+
from .models import AxiEnvelope
|
|
3
|
+
from .operations import (
|
|
4
|
+
build_relationship_candidates,
|
|
5
|
+
detect_missing_citations,
|
|
6
|
+
detect_schedule_shipping_risk,
|
|
7
|
+
compare_invoice_to_bank_draw,
|
|
8
|
+
extract_operations_entities,
|
|
9
|
+
run_agent_eval,
|
|
10
|
+
score_graph_readability,
|
|
11
|
+
upload_project,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"AxiEnvelope",
|
|
16
|
+
"CumulusKnowledge",
|
|
17
|
+
"build_relationship_candidates",
|
|
18
|
+
"compare_invoice_to_bank_draw",
|
|
19
|
+
"detect_missing_citations",
|
|
20
|
+
"detect_schedule_shipping_risk",
|
|
21
|
+
"extract_operations_entities",
|
|
22
|
+
"run_agent_eval",
|
|
23
|
+
"score_graph_readability",
|
|
24
|
+
"upload_project",
|
|
25
|
+
]
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
from urllib import parse, request
|
|
9
|
+
|
|
10
|
+
from .models import AxiEnvelope
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CumulusKnowledge:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
root: str | os.PathLike[str] = ".",
|
|
17
|
+
bin: str | None = None,
|
|
18
|
+
api_base_url: str | None = None,
|
|
19
|
+
project_id: str = "local",
|
|
20
|
+
) -> None:
|
|
21
|
+
self.root = str(root)
|
|
22
|
+
self.bin = bin or os.environ.get("CUMULUS_BIN", "cls-knowledge")
|
|
23
|
+
self.api_base_url = api_base_url or os.environ.get("CUMULUS_API_URL")
|
|
24
|
+
self.project_id = project_id or os.environ.get("CUMULUS_PROJECT_ID", "local")
|
|
25
|
+
|
|
26
|
+
def init(self, root: str | os.PathLike[str] | None = None) -> AxiEnvelope[Any]:
|
|
27
|
+
return self._run(["knowledge", "init", str(root or self.root)])
|
|
28
|
+
|
|
29
|
+
def index(
|
|
30
|
+
self,
|
|
31
|
+
profile: str = "all",
|
|
32
|
+
root: str | os.PathLike[str] | None = None,
|
|
33
|
+
watch: bool = False,
|
|
34
|
+
) -> AxiEnvelope[Any]:
|
|
35
|
+
args = ["knowledge", "index", str(root or self.root), "--profile", profile]
|
|
36
|
+
if watch:
|
|
37
|
+
args.append("--watch")
|
|
38
|
+
return self._run(args)
|
|
39
|
+
|
|
40
|
+
def query(self, text: str, budget: int = 1200, limit: int = 10) -> AxiEnvelope[Any]:
|
|
41
|
+
if self.api_base_url:
|
|
42
|
+
return self._api_json(
|
|
43
|
+
"POST",
|
|
44
|
+
self._project_path("/query"),
|
|
45
|
+
{"query": text, "budget": budget, "limit": limit},
|
|
46
|
+
)
|
|
47
|
+
return self._run(
|
|
48
|
+
[
|
|
49
|
+
"knowledge",
|
|
50
|
+
"query",
|
|
51
|
+
text,
|
|
52
|
+
"--path",
|
|
53
|
+
self.root,
|
|
54
|
+
"--budget",
|
|
55
|
+
str(budget),
|
|
56
|
+
"--limit",
|
|
57
|
+
str(limit),
|
|
58
|
+
]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def get_node(self, node_id: str) -> AxiEnvelope[Any]:
|
|
62
|
+
if self.api_base_url:
|
|
63
|
+
return self.fetch_node(node_id)
|
|
64
|
+
return self._run(["knowledge", "node", "get", node_id, "--path", self.root])
|
|
65
|
+
|
|
66
|
+
def expand_neighbors(self, node_id: str, depth: int = 1) -> AxiEnvelope[Any]:
|
|
67
|
+
return self._run(["knowledge", "graph", "expand", node_id, "--path", self.root, "--depth", str(depth)])
|
|
68
|
+
|
|
69
|
+
def find_paths(self, from_id: str, to_id: str, max_depth: int = 6) -> AxiEnvelope[Any]:
|
|
70
|
+
if self.api_base_url:
|
|
71
|
+
return self.explain_path(from_id, to_id)
|
|
72
|
+
return self._run(
|
|
73
|
+
[
|
|
74
|
+
"knowledge",
|
|
75
|
+
"path",
|
|
76
|
+
"explain",
|
|
77
|
+
from_id,
|
|
78
|
+
to_id,
|
|
79
|
+
"--path",
|
|
80
|
+
self.root,
|
|
81
|
+
"--max-depth",
|
|
82
|
+
str(max_depth),
|
|
83
|
+
]
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def index_status(self) -> AxiEnvelope[Any]:
|
|
87
|
+
if self.api_base_url:
|
|
88
|
+
return self.get_job()
|
|
89
|
+
return self._run(["knowledge", "doctor", "--path", self.root])
|
|
90
|
+
|
|
91
|
+
def get_graph_view(self, preset: str = "full") -> AxiEnvelope[Any]:
|
|
92
|
+
if self.api_base_url:
|
|
93
|
+
query = parse.urlencode({"preset": preset})
|
|
94
|
+
return self._api_json("GET", f"{self._project_path('/graph-view')}?{query}")
|
|
95
|
+
return self._run(["knowledge", "graph", "view", "--path", self.root, "--preset", preset])
|
|
96
|
+
|
|
97
|
+
def create_project(self, data: dict[str, Any] | None = None) -> AxiEnvelope[Any]:
|
|
98
|
+
return self._api_json("POST", "/v1/projects", data or {})
|
|
99
|
+
|
|
100
|
+
def upload_folder(self, files: list[dict[str, str]]) -> AxiEnvelope[Any]:
|
|
101
|
+
return self._api_json("POST", self._project_path("/uploads"), {"files": files})
|
|
102
|
+
|
|
103
|
+
def index_project(self) -> AxiEnvelope[Any]:
|
|
104
|
+
return self._api_json("POST", self._project_path("/index"), {})
|
|
105
|
+
|
|
106
|
+
def get_job(self, job_id: str = "local") -> AxiEnvelope[Any]:
|
|
107
|
+
return self._api_json("GET", f"/v1/jobs/{parse.quote(job_id)}")
|
|
108
|
+
|
|
109
|
+
def upload_project(self, path_or_zip: str | os.PathLike[str]) -> AxiEnvelope[Any]:
|
|
110
|
+
return self.init(path_or_zip)
|
|
111
|
+
|
|
112
|
+
def fetch_node(self, node_id: str) -> AxiEnvelope[Any]:
|
|
113
|
+
if self.api_base_url:
|
|
114
|
+
return self._api_json("GET", self._project_path(f"/nodes/{parse.quote(node_id)}"))
|
|
115
|
+
return self.get_node(node_id)
|
|
116
|
+
|
|
117
|
+
def explain_path(self, from_id: str, to_id: str, max_depth: int = 6) -> AxiEnvelope[Any]:
|
|
118
|
+
if self.api_base_url:
|
|
119
|
+
query = parse.urlencode({"from": from_id, "to": to_id, "max_depth": max_depth})
|
|
120
|
+
return self._api_json("GET", f"{self._project_path('/paths/explain')}?{query}")
|
|
121
|
+
return self.find_paths(from_id, to_id, max_depth=max_depth)
|
|
122
|
+
|
|
123
|
+
def source_trace(self, node_id: str, preset: str = "full") -> AxiEnvelope[Any]:
|
|
124
|
+
if self.api_base_url:
|
|
125
|
+
query = parse.urlencode({"preset": preset})
|
|
126
|
+
return self._api_json("GET", f"{self._project_path(f'/nodes/{parse.quote(node_id)}/source-trace')}?{query}")
|
|
127
|
+
view = self.get_graph_view(preset).data
|
|
128
|
+
return AxiEnvelope.from_dict({
|
|
129
|
+
"ok": True,
|
|
130
|
+
"data": {
|
|
131
|
+
"node_id": node_id,
|
|
132
|
+
"evidence": [item for item in view.get("evidence", []) if item.get("node_id") == node_id],
|
|
133
|
+
},
|
|
134
|
+
"meta": {"command": "knowledge.source_trace"},
|
|
135
|
+
"links": [],
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
def export_html(self) -> str:
|
|
139
|
+
return self._api_text("GET", self._project_path("/exports/html"))
|
|
140
|
+
|
|
141
|
+
def export_graph(self, output: str | os.PathLike[str], format: str = "json") -> Path:
|
|
142
|
+
out = Path(output)
|
|
143
|
+
self._run_raw(["knowledge", "viz", "export", "--path", self.root, "--format", format, "--output", str(out)])
|
|
144
|
+
return out
|
|
145
|
+
|
|
146
|
+
def _run(self, args: list[str]) -> AxiEnvelope[Any]:
|
|
147
|
+
completed = self._run_raw([*args, "--format", "json"])
|
|
148
|
+
return AxiEnvelope.from_dict(json.loads(completed.stdout))
|
|
149
|
+
|
|
150
|
+
def _run_raw(self, args: list[str]) -> subprocess.CompletedProcess[str]:
|
|
151
|
+
return subprocess.run(
|
|
152
|
+
[self.bin, *args],
|
|
153
|
+
text=True,
|
|
154
|
+
stdout=subprocess.PIPE,
|
|
155
|
+
stderr=subprocess.PIPE,
|
|
156
|
+
check=True,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def _api_json(self, method: str, path: str, body: dict[str, Any] | None = None) -> AxiEnvelope[Any]:
|
|
160
|
+
payload = None if body is None else json.dumps(body).encode("utf-8")
|
|
161
|
+
req = request.Request(
|
|
162
|
+
self._api_url(path),
|
|
163
|
+
data=payload,
|
|
164
|
+
method=method,
|
|
165
|
+
headers={"content-type": "application/json"},
|
|
166
|
+
)
|
|
167
|
+
with request.urlopen(req) as response:
|
|
168
|
+
return AxiEnvelope.from_dict(json.loads(response.read().decode("utf-8")))
|
|
169
|
+
|
|
170
|
+
def _api_text(self, method: str, path: str) -> str:
|
|
171
|
+
req = request.Request(self._api_url(path), method=method)
|
|
172
|
+
with request.urlopen(req) as response:
|
|
173
|
+
return response.read().decode("utf-8")
|
|
174
|
+
|
|
175
|
+
def _api_url(self, path: str) -> str:
|
|
176
|
+
if not self.api_base_url:
|
|
177
|
+
raise RuntimeError("api_base_url is required for API calls")
|
|
178
|
+
return parse.urljoin(self.api_base_url.rstrip("/") + "/", path.lstrip("/"))
|
|
179
|
+
|
|
180
|
+
def _project_path(self, suffix: str) -> str:
|
|
181
|
+
return f"/v1/projects/{parse.quote(self.project_id)}{suffix}"
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .client import CumulusKnowledge
|
|
9
|
+
from .operations import (
|
|
10
|
+
build_relationship_candidates,
|
|
11
|
+
compare_invoice_to_bank_draw,
|
|
12
|
+
detect_missing_citations,
|
|
13
|
+
detect_schedule_shipping_risk,
|
|
14
|
+
extract_operations_entities,
|
|
15
|
+
run_agent_eval,
|
|
16
|
+
score_graph_readability,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TOOLS = [
|
|
21
|
+
{"name": "graph_view", "description": "Return a semantic graph view with readable labels, legend, layout, filters, and evidence.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
|
|
22
|
+
{"name": "search", "description": "Search indexed nodes and chunks.", "inputSchema": {"type": "object", "properties": {"query": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["query"]}},
|
|
23
|
+
{"name": "fetch", "description": "Fetch a node by ID.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}}, "required": ["id"]}},
|
|
24
|
+
{"name": "expand_neighbors", "description": "Expand graph neighbors.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}, "depth": {"type": "integer"}}, "required": ["id"]}},
|
|
25
|
+
{"name": "find_paths", "description": "Find a graph path.", "inputSchema": {"type": "object", "properties": {"from_id": {"type": "string"}, "to_id": {"type": "string"}}, "required": ["from_id", "to_id"]}},
|
|
26
|
+
{"name": "index_status", "description": "Return local index status.", "inputSchema": {"type": "object", "properties": {}}},
|
|
27
|
+
{"name": "ingest", "description": "Index the configured root.", "inputSchema": {"type": "object", "properties": {"profile": {"type": "string"}}}},
|
|
28
|
+
{"name": "source_trace", "description": "Return evidence links for a semantic node.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}, "preset": {"type": "string"}}, "required": ["id"]}},
|
|
29
|
+
{"name": "extract_entities", "description": "Extract operations entities from the configured root.", "inputSchema": {"type": "object", "properties": {}}},
|
|
30
|
+
{"name": "audit_graph_quality", "description": "Score graph readability and citation health.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
|
|
31
|
+
{"name": "detect_missing_citations", "description": "Find semantic nodes without evidence.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
|
|
32
|
+
{"name": "compare_invoice_to_bank_draw", "description": "Detect invoice and bank draw mismatches from extracted entities.", "inputSchema": {"type": "object", "properties": {}}},
|
|
33
|
+
{"name": "detect_schedule_shipping_risk", "description": "Detect shipment and schedule risk language.", "inputSchema": {"type": "object", "properties": {}}},
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def main() -> None:
|
|
38
|
+
client = CumulusKnowledge(root=os.environ.get("CUMULUS_ROOT", os.getcwd()))
|
|
39
|
+
for line in sys.stdin:
|
|
40
|
+
if not line.strip():
|
|
41
|
+
continue
|
|
42
|
+
try:
|
|
43
|
+
request = json.loads(line)
|
|
44
|
+
if "id" not in request:
|
|
45
|
+
continue
|
|
46
|
+
result = handle(client, request.get("method", ""), request.get("params") or {})
|
|
47
|
+
write({"jsonrpc": "2.0", "id": request["id"], "result": result})
|
|
48
|
+
except Exception as exc:
|
|
49
|
+
write({"jsonrpc": "2.0", "id": request.get("id") if "request" in locals() else None, "error": {"code": -32603, "message": str(exc)}})
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def handle(client: CumulusKnowledge, method: str, params: dict[str, Any]) -> Any:
|
|
53
|
+
if method == "initialize":
|
|
54
|
+
return {
|
|
55
|
+
"protocolVersion": "2025-06-18",
|
|
56
|
+
"serverInfo": {"name": "cls-knowledge", "version": "0.1.0"},
|
|
57
|
+
"capabilities": {"tools": {}, "resources": {}, "prompts": {}},
|
|
58
|
+
}
|
|
59
|
+
if method == "tools/list":
|
|
60
|
+
return {"tools": TOOLS}
|
|
61
|
+
if method == "resources/list":
|
|
62
|
+
return {"resources": [{"uri": "cumulus://snapshot/current", "name": "Current Graph Snapshot", "mimeType": "application/json"}]}
|
|
63
|
+
if method != "tools/call":
|
|
64
|
+
return {}
|
|
65
|
+
|
|
66
|
+
name = params.get("name")
|
|
67
|
+
args = params.get("arguments") or {}
|
|
68
|
+
if name == "search":
|
|
69
|
+
return tool_result(client.query(args["query"], limit=args.get("limit", 10)).data)
|
|
70
|
+
if name == "graph_view":
|
|
71
|
+
return tool_result(client.get_graph_view(args.get("preset", "full")).data)
|
|
72
|
+
if name == "fetch":
|
|
73
|
+
return tool_result(client.get_node(args["id"]).data)
|
|
74
|
+
if name == "expand_neighbors":
|
|
75
|
+
return tool_result(client.expand_neighbors(args["id"], args.get("depth", 1)).data)
|
|
76
|
+
if name == "find_paths":
|
|
77
|
+
return tool_result(client.find_paths(args["from_id"], args["to_id"], args.get("max_depth", 6)).data)
|
|
78
|
+
if name == "index_status":
|
|
79
|
+
return tool_result(client.index_status().data)
|
|
80
|
+
if name == "ingest":
|
|
81
|
+
return tool_result(client.index(profile=args.get("profile", "all")).data)
|
|
82
|
+
if name == "source_trace":
|
|
83
|
+
view = client.get_graph_view(args.get("preset", "full")).data
|
|
84
|
+
return tool_result({"node_id": args["id"], "evidence": [item for item in view.get("evidence", []) if item.get("node_id") == args["id"]]})
|
|
85
|
+
if name == "extract_entities":
|
|
86
|
+
entities = extract_operations_entities(client.root)
|
|
87
|
+
return tool_result({"entities": entities, "relationships": build_relationship_candidates(entities)})
|
|
88
|
+
if name == "audit_graph_quality":
|
|
89
|
+
view = client.get_graph_view(args.get("preset", "full")).data
|
|
90
|
+
return tool_result({"readability": score_graph_readability(view), "eval": run_agent_eval("local", view)})
|
|
91
|
+
if name == "detect_missing_citations":
|
|
92
|
+
return tool_result(detect_missing_citations(client.get_graph_view(args.get("preset", "full")).data))
|
|
93
|
+
if name == "compare_invoice_to_bank_draw":
|
|
94
|
+
return tool_result(compare_invoice_to_bank_draw(extract_operations_entities(client.root)))
|
|
95
|
+
if name == "detect_schedule_shipping_risk":
|
|
96
|
+
return tool_result(detect_schedule_shipping_risk(extract_operations_entities(client.root)))
|
|
97
|
+
raise ValueError(f"unknown tool: {name}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def tool_result(data: Any) -> dict[str, Any]:
|
|
101
|
+
return {"content": [{"type": "text", "text": json.dumps(data, indent=2)}], "structuredContent": data}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def write(value: dict[str, Any]) -> None:
|
|
105
|
+
sys.stdout.write(json.dumps(value) + "\n")
|
|
106
|
+
sys.stdout.flush()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
main()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
T = TypeVar("T")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(slots=True)
|
|
10
|
+
class AxiEnvelope(Generic[T]):
|
|
11
|
+
ok: bool
|
|
12
|
+
data: T
|
|
13
|
+
meta: dict[str, Any]
|
|
14
|
+
links: list[dict[str, Any]]
|
|
15
|
+
error: dict[str, Any] | None = None
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_dict(cls, value: dict[str, Any]) -> "AxiEnvelope[Any]":
|
|
19
|
+
return cls(
|
|
20
|
+
ok=bool(value.get("ok")),
|
|
21
|
+
data=value.get("data"),
|
|
22
|
+
meta=dict(value.get("meta") or {}),
|
|
23
|
+
links=list(value.get("links") or []),
|
|
24
|
+
error=value.get("error"),
|
|
25
|
+
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .client import CumulusKnowledge
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
ENTITY_PATTERNS: dict[str, re.Pattern[str]] = {
|
|
12
|
+
"invoice": re.compile(r"\b(?:invoice|inv)[:#\s-]+([A-Z0-9-]{3,})", re.I),
|
|
13
|
+
"bank_draw": re.compile(r"\b(?:draw request|bank draw|draw)[:#\s-]+(.+)", re.I),
|
|
14
|
+
"vendor": re.compile(r"\b(?:vendor|contractor)[:\s-]+(.+)", re.I),
|
|
15
|
+
"supplier": re.compile(r"\bsupplier[:\s-]+(.+)", re.I),
|
|
16
|
+
"client": re.compile(r"\b(?:client|owner)[:\s-]+(.+)", re.I),
|
|
17
|
+
"bank": re.compile(r"\b(?:bank|lender)\s*:\s*(.+)", re.I),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upload_project(path_or_zip: str | Path, root: str | Path | None = None) -> dict[str, Any]:
|
|
22
|
+
target = Path(root or path_or_zip)
|
|
23
|
+
client = CumulusKnowledge(root=target)
|
|
24
|
+
init = client.init()
|
|
25
|
+
return {"root": str(target), "init": init.data}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract_operations_entities(path: str | Path) -> list[dict[str, Any]]:
|
|
29
|
+
root = Path(path)
|
|
30
|
+
entities: list[dict[str, Any]] = []
|
|
31
|
+
for file in root.rglob("*"):
|
|
32
|
+
if not file.is_file() or ".cumulus" in file.parts:
|
|
33
|
+
continue
|
|
34
|
+
try:
|
|
35
|
+
text = file.read_text(encoding="utf-8")
|
|
36
|
+
except UnicodeDecodeError:
|
|
37
|
+
continue
|
|
38
|
+
for line_no, line in enumerate(text.splitlines(), start=1):
|
|
39
|
+
compact = line.strip().strip("-*# ")
|
|
40
|
+
if not compact:
|
|
41
|
+
continue
|
|
42
|
+
for kind, pattern in ENTITY_PATTERNS.items():
|
|
43
|
+
match = pattern.search(compact)
|
|
44
|
+
if match:
|
|
45
|
+
entities.append(entity(kind, match.group(1), file, line_no, compact))
|
|
46
|
+
lower = compact.lower()
|
|
47
|
+
if any(term in lower for term in ("shipment", "shipping", "delivery")):
|
|
48
|
+
entities.append(entity("shipment", compact, file, line_no, compact))
|
|
49
|
+
if any(term in lower for term in ("milestone", "inspection", "phase ")):
|
|
50
|
+
entities.append(entity("milestone", compact, file, line_no, compact))
|
|
51
|
+
if any(term in lower for term in ("risk", "delay", "overdue", "blocked")):
|
|
52
|
+
entities.append(entity("risk", compact, file, line_no, compact))
|
|
53
|
+
if any(term in lower for term in ("conflict", "mismatch", "does not match")):
|
|
54
|
+
entities.append(entity("conflict", compact, file, line_no, compact))
|
|
55
|
+
return entities
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def build_relationship_candidates(docs: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
59
|
+
by_path: dict[str, list[dict[str, Any]]] = {}
|
|
60
|
+
for item in docs:
|
|
61
|
+
by_path.setdefault(str(item.get("path")), []).append(item)
|
|
62
|
+
relationships: list[dict[str, Any]] = []
|
|
63
|
+
for path, items in by_path.items():
|
|
64
|
+
for left in items:
|
|
65
|
+
for right in items:
|
|
66
|
+
if left is right or left["kind"] == right["kind"]:
|
|
67
|
+
continue
|
|
68
|
+
relationships.append({
|
|
69
|
+
"from": left["id"],
|
|
70
|
+
"to": right["id"],
|
|
71
|
+
"kind": relation_kind(left["kind"], right["kind"]),
|
|
72
|
+
"path": path,
|
|
73
|
+
"confidence": 0.54,
|
|
74
|
+
})
|
|
75
|
+
return relationships
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def score_graph_readability(graph_view: dict[str, Any]) -> dict[str, Any]:
|
|
79
|
+
nodes = graph_view.get("nodes", [])
|
|
80
|
+
labels = [node.get("display_label", "") for node in nodes]
|
|
81
|
+
chunk_labels = [label for label in labels if str(label).startswith("chunk_")]
|
|
82
|
+
long_labels = [label for label in labels if len(str(label)) > 96]
|
|
83
|
+
legend_count = len(graph_view.get("legend", {}).get("node_kinds", []))
|
|
84
|
+
score = 1.0
|
|
85
|
+
score -= min(0.4, len(chunk_labels) * 0.08)
|
|
86
|
+
score -= min(0.25, len(long_labels) * 0.04)
|
|
87
|
+
if legend_count == 0:
|
|
88
|
+
score -= 0.2
|
|
89
|
+
return {
|
|
90
|
+
"score": round(max(0.0, score), 3),
|
|
91
|
+
"node_count": len(nodes),
|
|
92
|
+
"chunk_label_count": len(chunk_labels),
|
|
93
|
+
"long_label_count": len(long_labels),
|
|
94
|
+
"legend_count": legend_count,
|
|
95
|
+
"passed": score >= 0.8 and not chunk_labels,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run_agent_eval(project_id: str, graph_view: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
100
|
+
readability = score_graph_readability(graph_view or {"nodes": [], "legend": {"node_kinds": []}})
|
|
101
|
+
return {
|
|
102
|
+
"project_id": project_id,
|
|
103
|
+
"passed": readability["passed"],
|
|
104
|
+
"score": readability["score"],
|
|
105
|
+
"checks": {
|
|
106
|
+
"readability": readability,
|
|
107
|
+
"citations_present": bool((graph_view or {}).get("evidence")),
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def detect_missing_citations(graph_view: dict[str, Any]) -> list[dict[str, Any]]:
|
|
113
|
+
evidence_by_node = {item.get("node_id") for item in graph_view.get("evidence", [])}
|
|
114
|
+
return [
|
|
115
|
+
{"node_id": node.get("id"), "label": node.get("display_label"), "issue": "missing citation"}
|
|
116
|
+
for node in graph_view.get("nodes", [])
|
|
117
|
+
if node.get("id") not in evidence_by_node and node.get("domain_kind") not in {"project", "folder"}
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def compare_invoice_to_bank_draw(entities: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
122
|
+
invoices = [item for item in entities if item["kind"] == "invoice"]
|
|
123
|
+
draws = [item for item in entities if item["kind"] == "bank_draw"]
|
|
124
|
+
if invoices and not draws:
|
|
125
|
+
return [{"severity": "high", "issue": "invoice without bank draw evidence", "invoice_count": len(invoices)}]
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def detect_schedule_shipping_risk(entities: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
130
|
+
shipments = [item for item in entities if item["kind"] == "shipment"]
|
|
131
|
+
risks = [item for item in entities if item["kind"] == "risk"]
|
|
132
|
+
return [
|
|
133
|
+
{"severity": "medium", "issue": "shipping item has risk language", "shipment": item["label"]}
|
|
134
|
+
for item in shipments
|
|
135
|
+
if any(term in item["line"].lower() for term in ("delay", "blocked", "overdue"))
|
|
136
|
+
] + [{"severity": "medium", "issue": "schedule risk found", "risk": item["label"]} for item in risks]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def entity(kind: str, label: str, file: Path, line_no: int, line: str) -> dict[str, Any]:
|
|
140
|
+
clean = label.strip().strip(":#- ")
|
|
141
|
+
return {
|
|
142
|
+
"id": f"{kind}:{slug(clean)}:{line_no}",
|
|
143
|
+
"kind": kind,
|
|
144
|
+
"label": clean[:120],
|
|
145
|
+
"path": str(file),
|
|
146
|
+
"line_no": line_no,
|
|
147
|
+
"line": line,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def relation_kind(left: str, right: str) -> str:
|
|
152
|
+
pair = {left, right}
|
|
153
|
+
if "invoice" in pair and "vendor" in pair:
|
|
154
|
+
return "billed_by"
|
|
155
|
+
if "bank_draw" in pair and "bank" in pair:
|
|
156
|
+
return "paid_by"
|
|
157
|
+
if "shipment" in pair and "supplier" in pair:
|
|
158
|
+
return "ships"
|
|
159
|
+
if "risk" in pair:
|
|
160
|
+
return "risks"
|
|
161
|
+
return "mentions"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def slug(value: str) -> str:
|
|
165
|
+
return re.sub(r"-+", "-", re.sub(r"[^a-z0-9]+", "-", value.lower())).strip("-")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main() -> None:
|
|
169
|
+
import argparse
|
|
170
|
+
|
|
171
|
+
parser = argparse.ArgumentParser()
|
|
172
|
+
parser.add_argument("path")
|
|
173
|
+
args = parser.parse_args()
|
|
174
|
+
entities = extract_operations_entities(args.path)
|
|
175
|
+
print(json.dumps({"entities": entities, "relationships": build_relationship_candidates(entities)}, indent=2))
|
cumulus_knowledge/tui.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from .operations import (
|
|
7
|
+
compare_invoice_to_bank_draw,
|
|
8
|
+
detect_schedule_shipping_risk,
|
|
9
|
+
extract_operations_entities,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
if "--ops-review" in sys.argv:
|
|
15
|
+
path = sys.argv[sys.argv.index("--ops-review") + 1] if sys.argv[-1] != "--ops-review" else "."
|
|
16
|
+
entities = extract_operations_entities(path)
|
|
17
|
+
invoice_issues = compare_invoice_to_bank_draw(entities)
|
|
18
|
+
shipping_risks = detect_schedule_shipping_risk(entities)
|
|
19
|
+
print("Cumulus Operations Review")
|
|
20
|
+
print(f"entities: {len(entities)}")
|
|
21
|
+
print(f"invoice/payment issues: {len(invoice_issues)}")
|
|
22
|
+
print(f"shipping/schedule risks: {len(shipping_risks)}")
|
|
23
|
+
for item in [*invoice_issues, *shipping_risks][:12]:
|
|
24
|
+
print(f"- {item}")
|
|
25
|
+
return
|
|
26
|
+
try:
|
|
27
|
+
subprocess.run(["cumulus", "knowledge", *sys.argv[1:]], check=True)
|
|
28
|
+
except FileNotFoundError:
|
|
29
|
+
print("cls-knowledge binary was not found. Build it with `cargo build --release`.", file=sys.stderr)
|
|
30
|
+
raise SystemExit(1)
|
|
31
|
+
except subprocess.CalledProcessError as exc:
|
|
32
|
+
print({"ok": False, "error": str(exc)}, file=sys.stderr)
|
|
33
|
+
raise SystemExit(exc.returncode)
|