cls-knowledge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ node_modules/
2
+ .agent/
3
+ dist/
4
+ .next/
5
+ .output/
6
+ .swc/
7
+ .vercel/
8
+ target/
9
+ __pycache__/
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .env
14
+ .env.local
15
+ .env.*.local
16
+ !.env.example
17
+ .DS_Store
18
+ * 2.ts
19
+ * 2.tsx
20
+ * 2.js
21
+ * 2.jsx
22
+ *.tsbuildinfo
23
+ .cumulus-db-data/
24
+ apps/cumulus-db/.cumulus-db-data/
25
+ .cumulus/
26
+ coverage/
27
+ tmp/
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: cls-knowledge
3
+ Version: 0.1.0
4
+ Summary: Python SDK and MCP server for Cumulus Knowledge
5
+ Project-URL: Homepage, https://cumulush.com
6
+ Project-URL: Repository, https://github.com/Cumulus-s/cumulus-create
7
+ Author: Cumulus Knowledge contributors
8
+ License-Expression: AGPL-3.0-only
9
+ Keywords: agents,cli,knowledge-graph,mcp,sdk
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Text Processing :: Indexing
20
+ Requires-Python: >=3.10
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest>=9.0.0; extra == 'test'
23
+ Provides-Extra: tui
24
+ Requires-Dist: textual>=6.0.0; extra == 'tui'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # cls-knowledge
28
+
29
+ Python SDK, operations ingestion helpers, graph QA helpers, MCP server, and ops-review TUI mode for Cumulus Knowledge.
30
+
31
+ The SDK uses the `cls-knowledge` binary by default. Set `CUMULUS_BIN` to point at another binary.
32
+
33
+ ```python
34
+ from cumulus_knowledge import CumulusKnowledge
35
+
36
+ knowledge = CumulusKnowledge(root="Documents/rune")
37
+ knowledge.index(profile="all")
38
+ hits = knowledge.query("auth flow", budget=800)
39
+ graph = knowledge.get_graph_view("risk")
40
+ ```
41
+
42
+ Operations helpers:
43
+
44
+ ```python
45
+ from cumulus_knowledge import (
46
+ extract_operations_entities,
47
+ build_relationship_candidates,
48
+ score_graph_readability,
49
+ detect_missing_citations,
50
+ )
51
+
52
+ entities = extract_operations_entities("demo-project")
53
+ relationships = build_relationship_candidates(entities)
54
+ quality = score_graph_readability(graph.data)
55
+ missing = detect_missing_citations(graph.data)
56
+ ```
57
+
58
+ Ops review TUI mode:
59
+
60
+ ```bash
61
+ python3 -m cumulus_knowledge.tui --ops-review ./demo-project
62
+ ```
63
+
64
+ Python owns heavier document and operations workflows: invoices, vendors, bank draws, shipments, schedule risks, missing citations, and batch graph quality checks.
65
+
66
+ Hosted-style API mode:
67
+
68
+ ```python
69
+ from cumulus_knowledge import CumulusKnowledge
70
+
71
+ client = CumulusKnowledge(api_base_url="http://127.0.0.1:8787")
72
+ client.create_project({"name": "Demo Operations Project"})
73
+ client.upload_folder([{"path": "invoices/demo.md", "content": "Invoice DEMO-INV-001"}])
74
+ client.index_project()
75
+ graph = client.get_graph_view("finance")
76
+ html = client.export_html()
77
+ ```
@@ -0,0 +1,51 @@
1
+ # cls-knowledge
2
+
3
+ Python SDK, operations ingestion helpers, graph QA helpers, MCP server, and ops-review TUI mode for Cumulus Knowledge.
4
+
5
+ The SDK uses the `cls-knowledge` binary by default. Set `CUMULUS_BIN` to point at another binary.
6
+
7
+ ```python
8
+ from cumulus_knowledge import CumulusKnowledge
9
+
10
+ knowledge = CumulusKnowledge(root="Documents/rune")
11
+ knowledge.index(profile="all")
12
+ hits = knowledge.query("auth flow", budget=800)
13
+ graph = knowledge.get_graph_view("risk")
14
+ ```
15
+
16
+ Operations helpers:
17
+
18
+ ```python
19
+ from cumulus_knowledge import (
20
+ extract_operations_entities,
21
+ build_relationship_candidates,
22
+ score_graph_readability,
23
+ detect_missing_citations,
24
+ )
25
+
26
+ entities = extract_operations_entities("demo-project")
27
+ relationships = build_relationship_candidates(entities)
28
+ quality = score_graph_readability(graph.data)
29
+ missing = detect_missing_citations(graph.data)
30
+ ```
31
+
32
+ Ops review TUI mode:
33
+
34
+ ```bash
35
+ python3 -m cumulus_knowledge.tui --ops-review ./demo-project
36
+ ```
37
+
38
+ Python owns heavier document and operations workflows: invoices, vendors, bank draws, shipments, schedule risks, missing citations, and batch graph quality checks.
39
+
40
+ Hosted-style API mode:
41
+
42
+ ```python
43
+ from cumulus_knowledge import CumulusKnowledge
44
+
45
+ client = CumulusKnowledge(api_base_url="http://127.0.0.1:8787")
46
+ client.create_project({"name": "Demo Operations Project"})
47
+ client.upload_folder([{"path": "invoices/demo.md", "content": "Invoice DEMO-INV-001"}])
48
+ client.index_project()
49
+ graph = client.get_graph_view("finance")
50
+ html = client.export_html()
51
+ ```
@@ -0,0 +1,25 @@
1
+ from .client import CumulusKnowledge
2
+ from .models import AxiEnvelope
3
+ from .operations import (
4
+ build_relationship_candidates,
5
+ detect_missing_citations,
6
+ detect_schedule_shipping_risk,
7
+ compare_invoice_to_bank_draw,
8
+ extract_operations_entities,
9
+ run_agent_eval,
10
+ score_graph_readability,
11
+ upload_project,
12
+ )
13
+
14
+ __all__ = [
15
+ "AxiEnvelope",
16
+ "CumulusKnowledge",
17
+ "build_relationship_candidates",
18
+ "compare_invoice_to_bank_draw",
19
+ "detect_missing_citations",
20
+ "detect_schedule_shipping_risk",
21
+ "extract_operations_entities",
22
+ "run_agent_eval",
23
+ "score_graph_readability",
24
+ "upload_project",
25
+ ]
@@ -0,0 +1,181 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Any
8
+ from urllib import parse, request
9
+
10
+ from .models import AxiEnvelope
11
+
12
+
13
+ class CumulusKnowledge:
14
+ def __init__(
15
+ self,
16
+ root: str | os.PathLike[str] = ".",
17
+ bin: str | None = None,
18
+ api_base_url: str | None = None,
19
+ project_id: str = "local",
20
+ ) -> None:
21
+ self.root = str(root)
22
+ self.bin = bin or os.environ.get("CUMULUS_BIN", "cls-knowledge")
23
+ self.api_base_url = api_base_url or os.environ.get("CUMULUS_API_URL")
24
+ self.project_id = project_id or os.environ.get("CUMULUS_PROJECT_ID", "local")
25
+
26
+ def init(self, root: str | os.PathLike[str] | None = None) -> AxiEnvelope[Any]:
27
+ return self._run(["knowledge", "init", str(root or self.root)])
28
+
29
+ def index(
30
+ self,
31
+ profile: str = "all",
32
+ root: str | os.PathLike[str] | None = None,
33
+ watch: bool = False,
34
+ ) -> AxiEnvelope[Any]:
35
+ args = ["knowledge", "index", str(root or self.root), "--profile", profile]
36
+ if watch:
37
+ args.append("--watch")
38
+ return self._run(args)
39
+
40
+ def query(self, text: str, budget: int = 1200, limit: int = 10) -> AxiEnvelope[Any]:
41
+ if self.api_base_url:
42
+ return self._api_json(
43
+ "POST",
44
+ self._project_path("/query"),
45
+ {"query": text, "budget": budget, "limit": limit},
46
+ )
47
+ return self._run(
48
+ [
49
+ "knowledge",
50
+ "query",
51
+ text,
52
+ "--path",
53
+ self.root,
54
+ "--budget",
55
+ str(budget),
56
+ "--limit",
57
+ str(limit),
58
+ ]
59
+ )
60
+
61
+ def get_node(self, node_id: str) -> AxiEnvelope[Any]:
62
+ if self.api_base_url:
63
+ return self.fetch_node(node_id)
64
+ return self._run(["knowledge", "node", "get", node_id, "--path", self.root])
65
+
66
+ def expand_neighbors(self, node_id: str, depth: int = 1) -> AxiEnvelope[Any]:
67
+ return self._run(["knowledge", "graph", "expand", node_id, "--path", self.root, "--depth", str(depth)])
68
+
69
+ def find_paths(self, from_id: str, to_id: str, max_depth: int = 6) -> AxiEnvelope[Any]:
70
+ if self.api_base_url:
71
+ return self.explain_path(from_id, to_id)
72
+ return self._run(
73
+ [
74
+ "knowledge",
75
+ "path",
76
+ "explain",
77
+ from_id,
78
+ to_id,
79
+ "--path",
80
+ self.root,
81
+ "--max-depth",
82
+ str(max_depth),
83
+ ]
84
+ )
85
+
86
+ def index_status(self) -> AxiEnvelope[Any]:
87
+ if self.api_base_url:
88
+ return self.get_job()
89
+ return self._run(["knowledge", "doctor", "--path", self.root])
90
+
91
+ def get_graph_view(self, preset: str = "full") -> AxiEnvelope[Any]:
92
+ if self.api_base_url:
93
+ query = parse.urlencode({"preset": preset})
94
+ return self._api_json("GET", f"{self._project_path('/graph-view')}?{query}")
95
+ return self._run(["knowledge", "graph", "view", "--path", self.root, "--preset", preset])
96
+
97
+ def create_project(self, data: dict[str, Any] | None = None) -> AxiEnvelope[Any]:
98
+ return self._api_json("POST", "/v1/projects", data or {})
99
+
100
+ def upload_folder(self, files: list[dict[str, str]]) -> AxiEnvelope[Any]:
101
+ return self._api_json("POST", self._project_path("/uploads"), {"files": files})
102
+
103
+ def index_project(self) -> AxiEnvelope[Any]:
104
+ return self._api_json("POST", self._project_path("/index"), {})
105
+
106
+ def get_job(self, job_id: str = "local") -> AxiEnvelope[Any]:
107
+ return self._api_json("GET", f"/v1/jobs/{parse.quote(job_id)}")
108
+
109
+ def upload_project(self, path_or_zip: str | os.PathLike[str]) -> AxiEnvelope[Any]:
110
+ return self.init(path_or_zip)
111
+
112
+ def fetch_node(self, node_id: str) -> AxiEnvelope[Any]:
113
+ if self.api_base_url:
114
+ return self._api_json("GET", self._project_path(f"/nodes/{parse.quote(node_id)}"))
115
+ return self.get_node(node_id)
116
+
117
+ def explain_path(self, from_id: str, to_id: str, max_depth: int = 6) -> AxiEnvelope[Any]:
118
+ if self.api_base_url:
119
+ query = parse.urlencode({"from": from_id, "to": to_id, "max_depth": max_depth})
120
+ return self._api_json("GET", f"{self._project_path('/paths/explain')}?{query}")
121
+ return self.find_paths(from_id, to_id, max_depth=max_depth)
122
+
123
+ def source_trace(self, node_id: str, preset: str = "full") -> AxiEnvelope[Any]:
124
+ if self.api_base_url:
125
+ query = parse.urlencode({"preset": preset})
126
+ return self._api_json("GET", f"{self._project_path(f'/nodes/{parse.quote(node_id)}/source-trace')}?{query}")
127
+ view = self.get_graph_view(preset).data
128
+ return AxiEnvelope.from_dict({
129
+ "ok": True,
130
+ "data": {
131
+ "node_id": node_id,
132
+ "evidence": [item for item in view.get("evidence", []) if item.get("node_id") == node_id],
133
+ },
134
+ "meta": {"command": "knowledge.source_trace"},
135
+ "links": [],
136
+ })
137
+
138
+ def export_html(self) -> str:
139
+ return self._api_text("GET", self._project_path("/exports/html"))
140
+
141
+ def export_graph(self, output: str | os.PathLike[str], format: str = "json") -> Path:
142
+ out = Path(output)
143
+ self._run_raw(["knowledge", "viz", "export", "--path", self.root, "--format", format, "--output", str(out)])
144
+ return out
145
+
146
+ def _run(self, args: list[str]) -> AxiEnvelope[Any]:
147
+ completed = self._run_raw([*args, "--format", "json"])
148
+ return AxiEnvelope.from_dict(json.loads(completed.stdout))
149
+
150
+ def _run_raw(self, args: list[str]) -> subprocess.CompletedProcess[str]:
151
+ return subprocess.run(
152
+ [self.bin, *args],
153
+ text=True,
154
+ stdout=subprocess.PIPE,
155
+ stderr=subprocess.PIPE,
156
+ check=True,
157
+ )
158
+
159
+ def _api_json(self, method: str, path: str, body: dict[str, Any] | None = None) -> AxiEnvelope[Any]:
160
+ payload = None if body is None else json.dumps(body).encode("utf-8")
161
+ req = request.Request(
162
+ self._api_url(path),
163
+ data=payload,
164
+ method=method,
165
+ headers={"content-type": "application/json"},
166
+ )
167
+ with request.urlopen(req) as response:
168
+ return AxiEnvelope.from_dict(json.loads(response.read().decode("utf-8")))
169
+
170
+ def _api_text(self, method: str, path: str) -> str:
171
+ req = request.Request(self._api_url(path), method=method)
172
+ with request.urlopen(req) as response:
173
+ return response.read().decode("utf-8")
174
+
175
+ def _api_url(self, path: str) -> str:
176
+ if not self.api_base_url:
177
+ raise RuntimeError("api_base_url is required for API calls")
178
+ return parse.urljoin(self.api_base_url.rstrip("/") + "/", path.lstrip("/"))
179
+
180
+ def _project_path(self, suffix: str) -> str:
181
+ return f"/v1/projects/{parse.quote(self.project_id)}{suffix}"
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ from typing import Any
7
+
8
+ from .client import CumulusKnowledge
9
+ from .operations import (
10
+ build_relationship_candidates,
11
+ compare_invoice_to_bank_draw,
12
+ detect_missing_citations,
13
+ detect_schedule_shipping_risk,
14
+ extract_operations_entities,
15
+ run_agent_eval,
16
+ score_graph_readability,
17
+ )
18
+
19
+
20
+ TOOLS = [
21
+ {"name": "graph_view", "description": "Return a semantic graph view with readable labels, legend, layout, filters, and evidence.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
22
+ {"name": "search", "description": "Search indexed nodes and chunks.", "inputSchema": {"type": "object", "properties": {"query": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["query"]}},
23
+ {"name": "fetch", "description": "Fetch a node by ID.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}}, "required": ["id"]}},
24
+ {"name": "expand_neighbors", "description": "Expand graph neighbors.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}, "depth": {"type": "integer"}}, "required": ["id"]}},
25
+ {"name": "find_paths", "description": "Find a graph path.", "inputSchema": {"type": "object", "properties": {"from_id": {"type": "string"}, "to_id": {"type": "string"}}, "required": ["from_id", "to_id"]}},
26
+ {"name": "index_status", "description": "Return local index status.", "inputSchema": {"type": "object", "properties": {}}},
27
+ {"name": "ingest", "description": "Index the configured root.", "inputSchema": {"type": "object", "properties": {"profile": {"type": "string"}}}},
28
+ {"name": "source_trace", "description": "Return evidence links for a semantic node.", "inputSchema": {"type": "object", "properties": {"id": {"type": "string"}, "preset": {"type": "string"}}, "required": ["id"]}},
29
+ {"name": "extract_entities", "description": "Extract operations entities from the configured root.", "inputSchema": {"type": "object", "properties": {}}},
30
+ {"name": "audit_graph_quality", "description": "Score graph readability and citation health.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
31
+ {"name": "detect_missing_citations", "description": "Find semantic nodes without evidence.", "inputSchema": {"type": "object", "properties": {"preset": {"type": "string"}}}},
32
+ {"name": "compare_invoice_to_bank_draw", "description": "Detect invoice and bank draw mismatches from extracted entities.", "inputSchema": {"type": "object", "properties": {}}},
33
+ {"name": "detect_schedule_shipping_risk", "description": "Detect shipment and schedule risk language.", "inputSchema": {"type": "object", "properties": {}}},
34
+ ]
35
+
36
+
37
+ def main() -> None:
38
+ client = CumulusKnowledge(root=os.environ.get("CUMULUS_ROOT", os.getcwd()))
39
+ for line in sys.stdin:
40
+ if not line.strip():
41
+ continue
42
+ try:
43
+ request = json.loads(line)
44
+ if "id" not in request:
45
+ continue
46
+ result = handle(client, request.get("method", ""), request.get("params") or {})
47
+ write({"jsonrpc": "2.0", "id": request["id"], "result": result})
48
+ except Exception as exc:
49
+ write({"jsonrpc": "2.0", "id": request.get("id") if "request" in locals() else None, "error": {"code": -32603, "message": str(exc)}})
50
+
51
+
52
+ def handle(client: CumulusKnowledge, method: str, params: dict[str, Any]) -> Any:
53
+ if method == "initialize":
54
+ return {
55
+ "protocolVersion": "2025-06-18",
56
+ "serverInfo": {"name": "cls-knowledge", "version": "0.1.0"},
57
+ "capabilities": {"tools": {}, "resources": {}, "prompts": {}},
58
+ }
59
+ if method == "tools/list":
60
+ return {"tools": TOOLS}
61
+ if method == "resources/list":
62
+ return {"resources": [{"uri": "cumulus://snapshot/current", "name": "Current Graph Snapshot", "mimeType": "application/json"}]}
63
+ if method != "tools/call":
64
+ return {}
65
+
66
+ name = params.get("name")
67
+ args = params.get("arguments") or {}
68
+ if name == "search":
69
+ return tool_result(client.query(args["query"], limit=args.get("limit", 10)).data)
70
+ if name == "graph_view":
71
+ return tool_result(client.get_graph_view(args.get("preset", "full")).data)
72
+ if name == "fetch":
73
+ return tool_result(client.get_node(args["id"]).data)
74
+ if name == "expand_neighbors":
75
+ return tool_result(client.expand_neighbors(args["id"], args.get("depth", 1)).data)
76
+ if name == "find_paths":
77
+ return tool_result(client.find_paths(args["from_id"], args["to_id"], args.get("max_depth", 6)).data)
78
+ if name == "index_status":
79
+ return tool_result(client.index_status().data)
80
+ if name == "ingest":
81
+ return tool_result(client.index(profile=args.get("profile", "all")).data)
82
+ if name == "source_trace":
83
+ view = client.get_graph_view(args.get("preset", "full")).data
84
+ return tool_result({"node_id": args["id"], "evidence": [item for item in view.get("evidence", []) if item.get("node_id") == args["id"]]})
85
+ if name == "extract_entities":
86
+ entities = extract_operations_entities(client.root)
87
+ return tool_result({"entities": entities, "relationships": build_relationship_candidates(entities)})
88
+ if name == "audit_graph_quality":
89
+ view = client.get_graph_view(args.get("preset", "full")).data
90
+ return tool_result({"readability": score_graph_readability(view), "eval": run_agent_eval("local", view)})
91
+ if name == "detect_missing_citations":
92
+ return tool_result(detect_missing_citations(client.get_graph_view(args.get("preset", "full")).data))
93
+ if name == "compare_invoice_to_bank_draw":
94
+ return tool_result(compare_invoice_to_bank_draw(extract_operations_entities(client.root)))
95
+ if name == "detect_schedule_shipping_risk":
96
+ return tool_result(detect_schedule_shipping_risk(extract_operations_entities(client.root)))
97
+ raise ValueError(f"unknown tool: {name}")
98
+
99
+
100
+ def tool_result(data: Any) -> dict[str, Any]:
101
+ return {"content": [{"type": "text", "text": json.dumps(data, indent=2)}], "structuredContent": data}
102
+
103
+
104
+ def write(value: dict[str, Any]) -> None:
105
+ sys.stdout.write(json.dumps(value) + "\n")
106
+ sys.stdout.flush()
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Generic, TypeVar
5
+
6
+ T = TypeVar("T")
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class AxiEnvelope(Generic[T]):
11
+ ok: bool
12
+ data: T
13
+ meta: dict[str, Any]
14
+ links: list[dict[str, Any]]
15
+ error: dict[str, Any] | None = None
16
+
17
+ @classmethod
18
+ def from_dict(cls, value: dict[str, Any]) -> "AxiEnvelope[Any]":
19
+ return cls(
20
+ ok=bool(value.get("ok")),
21
+ data=value.get("data"),
22
+ meta=dict(value.get("meta") or {}),
23
+ links=list(value.get("links") or []),
24
+ error=value.get("error"),
25
+ )
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from .client import CumulusKnowledge
9
+
10
+
11
+ ENTITY_PATTERNS: dict[str, re.Pattern[str]] = {
12
+ "invoice": re.compile(r"\b(?:invoice|inv)[:#\s-]+([A-Z0-9-]{3,})", re.I),
13
+ "bank_draw": re.compile(r"\b(?:draw request|bank draw|draw)[:#\s-]+(.+)", re.I),
14
+ "vendor": re.compile(r"\b(?:vendor|contractor)[:\s-]+(.+)", re.I),
15
+ "supplier": re.compile(r"\bsupplier[:\s-]+(.+)", re.I),
16
+ "client": re.compile(r"\b(?:client|owner)[:\s-]+(.+)", re.I),
17
+ "bank": re.compile(r"\b(?:bank|lender)\s*:\s*(.+)", re.I),
18
+ }
19
+
20
+
21
+ def upload_project(path_or_zip: str | Path, root: str | Path | None = None) -> dict[str, Any]:
22
+ target = Path(root or path_or_zip)
23
+ client = CumulusKnowledge(root=target)
24
+ init = client.init()
25
+ return {"root": str(target), "init": init.data}
26
+
27
+
28
+ def extract_operations_entities(path: str | Path) -> list[dict[str, Any]]:
29
+ root = Path(path)
30
+ entities: list[dict[str, Any]] = []
31
+ for file in root.rglob("*"):
32
+ if not file.is_file() or ".cumulus" in file.parts:
33
+ continue
34
+ try:
35
+ text = file.read_text(encoding="utf-8")
36
+ except UnicodeDecodeError:
37
+ continue
38
+ for line_no, line in enumerate(text.splitlines(), start=1):
39
+ compact = line.strip().strip("-*# ")
40
+ if not compact:
41
+ continue
42
+ for kind, pattern in ENTITY_PATTERNS.items():
43
+ match = pattern.search(compact)
44
+ if match:
45
+ entities.append(entity(kind, match.group(1), file, line_no, compact))
46
+ lower = compact.lower()
47
+ if any(term in lower for term in ("shipment", "shipping", "delivery")):
48
+ entities.append(entity("shipment", compact, file, line_no, compact))
49
+ if any(term in lower for term in ("milestone", "inspection", "phase ")):
50
+ entities.append(entity("milestone", compact, file, line_no, compact))
51
+ if any(term in lower for term in ("risk", "delay", "overdue", "blocked")):
52
+ entities.append(entity("risk", compact, file, line_no, compact))
53
+ if any(term in lower for term in ("conflict", "mismatch", "does not match")):
54
+ entities.append(entity("conflict", compact, file, line_no, compact))
55
+ return entities
56
+
57
+
58
+ def build_relationship_candidates(docs: list[dict[str, Any]]) -> list[dict[str, Any]]:
59
+ by_path: dict[str, list[dict[str, Any]]] = {}
60
+ for item in docs:
61
+ by_path.setdefault(str(item.get("path")), []).append(item)
62
+ relationships: list[dict[str, Any]] = []
63
+ for path, items in by_path.items():
64
+ for left in items:
65
+ for right in items:
66
+ if left is right or left["kind"] == right["kind"]:
67
+ continue
68
+ relationships.append({
69
+ "from": left["id"],
70
+ "to": right["id"],
71
+ "kind": relation_kind(left["kind"], right["kind"]),
72
+ "path": path,
73
+ "confidence": 0.54,
74
+ })
75
+ return relationships
76
+
77
+
78
+ def score_graph_readability(graph_view: dict[str, Any]) -> dict[str, Any]:
79
+ nodes = graph_view.get("nodes", [])
80
+ labels = [node.get("display_label", "") for node in nodes]
81
+ chunk_labels = [label for label in labels if str(label).startswith("chunk_")]
82
+ long_labels = [label for label in labels if len(str(label)) > 96]
83
+ legend_count = len(graph_view.get("legend", {}).get("node_kinds", []))
84
+ score = 1.0
85
+ score -= min(0.4, len(chunk_labels) * 0.08)
86
+ score -= min(0.25, len(long_labels) * 0.04)
87
+ if legend_count == 0:
88
+ score -= 0.2
89
+ return {
90
+ "score": round(max(0.0, score), 3),
91
+ "node_count": len(nodes),
92
+ "chunk_label_count": len(chunk_labels),
93
+ "long_label_count": len(long_labels),
94
+ "legend_count": legend_count,
95
+ "passed": score >= 0.8 and not chunk_labels,
96
+ }
97
+
98
+
99
+ def run_agent_eval(project_id: str, graph_view: dict[str, Any] | None = None) -> dict[str, Any]:
100
+ readability = score_graph_readability(graph_view or {"nodes": [], "legend": {"node_kinds": []}})
101
+ return {
102
+ "project_id": project_id,
103
+ "passed": readability["passed"],
104
+ "score": readability["score"],
105
+ "checks": {
106
+ "readability": readability,
107
+ "citations_present": bool((graph_view or {}).get("evidence")),
108
+ },
109
+ }
110
+
111
+
112
+ def detect_missing_citations(graph_view: dict[str, Any]) -> list[dict[str, Any]]:
113
+ evidence_by_node = {item.get("node_id") for item in graph_view.get("evidence", [])}
114
+ return [
115
+ {"node_id": node.get("id"), "label": node.get("display_label"), "issue": "missing citation"}
116
+ for node in graph_view.get("nodes", [])
117
+ if node.get("id") not in evidence_by_node and node.get("domain_kind") not in {"project", "folder"}
118
+ ]
119
+
120
+
121
+ def compare_invoice_to_bank_draw(entities: list[dict[str, Any]]) -> list[dict[str, Any]]:
122
+ invoices = [item for item in entities if item["kind"] == "invoice"]
123
+ draws = [item for item in entities if item["kind"] == "bank_draw"]
124
+ if invoices and not draws:
125
+ return [{"severity": "high", "issue": "invoice without bank draw evidence", "invoice_count": len(invoices)}]
126
+ return []
127
+
128
+
129
+ def detect_schedule_shipping_risk(entities: list[dict[str, Any]]) -> list[dict[str, Any]]:
130
+ shipments = [item for item in entities if item["kind"] == "shipment"]
131
+ risks = [item for item in entities if item["kind"] == "risk"]
132
+ return [
133
+ {"severity": "medium", "issue": "shipping item has risk language", "shipment": item["label"]}
134
+ for item in shipments
135
+ if any(term in item["line"].lower() for term in ("delay", "blocked", "overdue"))
136
+ ] + [{"severity": "medium", "issue": "schedule risk found", "risk": item["label"]} for item in risks]
137
+
138
+
139
+ def entity(kind: str, label: str, file: Path, line_no: int, line: str) -> dict[str, Any]:
140
+ clean = label.strip().strip(":#- ")
141
+ return {
142
+ "id": f"{kind}:{slug(clean)}:{line_no}",
143
+ "kind": kind,
144
+ "label": clean[:120],
145
+ "path": str(file),
146
+ "line_no": line_no,
147
+ "line": line,
148
+ }
149
+
150
+
151
+ def relation_kind(left: str, right: str) -> str:
152
+ pair = {left, right}
153
+ if "invoice" in pair and "vendor" in pair:
154
+ return "billed_by"
155
+ if "bank_draw" in pair and "bank" in pair:
156
+ return "paid_by"
157
+ if "shipment" in pair and "supplier" in pair:
158
+ return "ships"
159
+ if "risk" in pair:
160
+ return "risks"
161
+ return "mentions"
162
+
163
+
164
+ def slug(value: str) -> str:
165
+ return re.sub(r"-+", "-", re.sub(r"[^a-z0-9]+", "-", value.lower())).strip("-")
166
+
167
+
168
+ def main() -> None:
169
+ import argparse
170
+
171
+ parser = argparse.ArgumentParser()
172
+ parser.add_argument("path")
173
+ args = parser.parse_args()
174
+ entities = extract_operations_entities(args.path)
175
+ print(json.dumps({"entities": entities, "relationships": build_relationship_candidates(entities)}, indent=2))
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import sys
5
+
6
+ from .operations import (
7
+ compare_invoice_to_bank_draw,
8
+ detect_schedule_shipping_risk,
9
+ extract_operations_entities,
10
+ )
11
+
12
+
13
+ def main() -> None:
14
+ if "--ops-review" in sys.argv:
15
+ path = sys.argv[sys.argv.index("--ops-review") + 1] if sys.argv[-1] != "--ops-review" else "."
16
+ entities = extract_operations_entities(path)
17
+ invoice_issues = compare_invoice_to_bank_draw(entities)
18
+ shipping_risks = detect_schedule_shipping_risk(entities)
19
+ print("Cumulus Operations Review")
20
+ print(f"entities: {len(entities)}")
21
+ print(f"invoice/payment issues: {len(invoice_issues)}")
22
+ print(f"shipping/schedule risks: {len(shipping_risks)}")
23
+ for item in [*invoice_issues, *shipping_risks][:12]:
24
+ print(f"- {item}")
25
+ return
26
+ try:
27
+ subprocess.run(["cumulus", "knowledge", *sys.argv[1:]], check=True)
28
+ except FileNotFoundError:
29
+ print("cls-knowledge binary was not found. Build it with `cargo build --release`.", file=sys.stderr)
30
+ raise SystemExit(1)
31
+ except subprocess.CalledProcessError as exc:
32
+ print({"ok": False, "error": str(exc)}, file=sys.stderr)
33
+ raise SystemExit(exc.returncode)
@@ -0,0 +1,44 @@
1
+ [project]
2
+ name = "cls-knowledge"
3
+ version = "0.1.0"
4
+ description = "Python SDK and MCP server for Cumulus Knowledge"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "AGPL-3.0-only"
8
+ authors = [{ name = "Cumulus Knowledge contributors" }]
9
+ dependencies = []
10
+ keywords = ["knowledge-graph", "mcp", "cli", "agents", "sdk"]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "Intended Audience :: Developers",
14
+ "License :: OSI Approved :: GNU Affero General Public License v3",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Topic :: Software Development :: Libraries :: Python Modules",
21
+ "Topic :: Text Processing :: Indexing",
22
+ ]
23
+
24
+ [project.urls]
25
+ Homepage = "https://cumulush.com"
26
+ Repository = "https://github.com/Cumulus-s/cumulus-create"
27
+
28
+ [project.optional-dependencies]
29
+ tui = ["textual>=6.0.0"]
30
+ test = ["pytest>=9.0.0"]
31
+
32
+ [project.scripts]
33
+ cls-knowledge-mcp = "cumulus_knowledge.mcp_server:main"
34
+ cls-knowledge-tui = "cumulus_knowledge.tui:main"
35
+
36
+ [tool.pytest.ini_options]
37
+ testpaths = ["tests"]
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["cumulus_knowledge"]
41
+
42
+ [build-system]
43
+ requires = ["hatchling>=1.26"]
44
+ build-backend = "hatchling.build"
@@ -0,0 +1,52 @@
1
+ import unittest
2
+
3
+ from cumulus_knowledge.models import AxiEnvelope
4
+ from cumulus_knowledge.operations import (
5
+ build_relationship_candidates,
6
+ detect_schedule_shipping_risk,
7
+ extract_operations_entities,
8
+ score_graph_readability,
9
+ )
10
+
11
+
12
+ class EnvelopeTests(unittest.TestCase):
13
+ def test_envelope_from_dict(self) -> None:
14
+ envelope = AxiEnvelope.from_dict({"ok": True, "data": {"id": "node_1"}, "meta": {"command": "x"}, "links": []})
15
+ self.assertTrue(envelope.ok)
16
+ self.assertEqual(envelope.data["id"], "node_1")
17
+ self.assertEqual(envelope.meta["command"], "x")
18
+
19
+
20
+ class OperationsTests(unittest.TestCase):
21
+ def test_extracts_operations_entities_and_scores_graph(self) -> None:
22
+ with self.subTest("entities"):
23
+ import tempfile
24
+ from pathlib import Path
25
+
26
+ with tempfile.TemporaryDirectory() as tmp:
27
+ root = Path(tmp)
28
+ (root / "finance.md").write_text(
29
+ "Invoice DEMO-INV-001\nBank: Atlas Demo Bank\nDraw Request: Bank Draw Request #2\nVendor: BrightSteel Demo Supply\nSteel delivery delayed\n",
30
+ encoding="utf-8",
31
+ )
32
+ entities = extract_operations_entities(root)
33
+ kinds = {item["kind"] for item in entities}
34
+ self.assertIn("invoice", kinds)
35
+ self.assertIn("bank", kinds)
36
+ self.assertIn("bank_draw", kinds)
37
+ self.assertIn("vendor", kinds)
38
+ self.assertTrue(any(item["label"] == "Bank Draw Request #2" for item in entities))
39
+ self.assertFalse(any(item["kind"] == "bank" and "Draw Request" in item["label"] for item in entities))
40
+ self.assertTrue(build_relationship_candidates(entities))
41
+ self.assertTrue(detect_schedule_shipping_risk(entities))
42
+ with self.subTest("readability"):
43
+ score = score_graph_readability({
44
+ "nodes": [{"display_label": "Invoice DEMO-INV-001"}],
45
+ "legend": {"node_kinds": [{"kind": "invoice"}]},
46
+ "evidence": [{"node_id": "n1"}],
47
+ })
48
+ self.assertTrue(score["passed"])
49
+
50
+
51
+ if __name__ == "__main__":
52
+ unittest.main()