gdmcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. gdmcode-0.1.0.dist-info/METADATA +240 -0
  2. gdmcode-0.1.0.dist-info/RECORD +131 -0
  3. gdmcode-0.1.0.dist-info/WHEEL +4 -0
  4. gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
  5. src/__init__.py +1 -0
  6. src/_internal/__init__.py +0 -0
  7. src/_internal/constants.py +244 -0
  8. src/_internal/domain_skills.py +339 -0
  9. src/agent/__init__.py +0 -0
  10. src/agent/commit_classifier.py +91 -0
  11. src/agent/context_budget.py +391 -0
  12. src/agent/daemon.py +681 -0
  13. src/agent/dag_validator.py +153 -0
  14. src/agent/debug_loop.py +473 -0
  15. src/agent/impact_analyzer.py +149 -0
  16. src/agent/impact_graph.py +117 -0
  17. src/agent/loop.py +1410 -0
  18. src/agent/orchestrator.py +141 -0
  19. src/agent/regression_guard.py +251 -0
  20. src/agent/review_gate.py +648 -0
  21. src/agent/risk_scorer.py +169 -0
  22. src/agent/self_healing.py +145 -0
  23. src/agent/smart_test_selector.py +89 -0
  24. src/agent/system_prompt.py +226 -0
  25. src/agent/task_tracker.py +320 -0
  26. src/agent/test_validator.py +210 -0
  27. src/agent/tool_orchestrator.py +402 -0
  28. src/agent/transcript.py +230 -0
  29. src/agent/verification_loop.py +133 -0
  30. src/agent/work_director.py +136 -0
  31. src/agent/worktree_manager.py +53 -0
  32. src/artifacts/__init__.py +16 -0
  33. src/artifacts/artifact_store.py +456 -0
  34. src/artifacts/verification_graph.py +75 -0
  35. src/auth.py +411 -0
  36. src/cli.py +1290 -0
  37. src/commands.py +1398 -0
  38. src/config.py +762 -0
  39. src/cost_tracker.py +348 -0
  40. src/db/__init__.py +4 -0
  41. src/db/migrations.py +337 -0
  42. src/enterprise/__init__.py +3 -0
  43. src/enterprise/audit_log.py +182 -0
  44. src/enterprise/identity.py +90 -0
  45. src/enterprise/rbac.py +100 -0
  46. src/enterprise/team_config.py +125 -0
  47. src/enterprise/usage_analytics.py +261 -0
  48. src/exceptions.py +207 -0
  49. src/git_workflow.py +651 -0
  50. src/integrations/__init__.py +6 -0
  51. src/integrations/github_actions.py +106 -0
  52. src/integrations/mcp_server.py +333 -0
  53. src/integrations/sentry_integration.py +100 -0
  54. src/integrations/sentry_server.py +82 -0
  55. src/integrations/webhook_security.py +19 -0
  56. src/main.py +27 -0
  57. src/memory/__init__.py +0 -0
  58. src/memory/code_index.py +376 -0
  59. src/memory/compressor.py +378 -0
  60. src/memory/context_memory.py +135 -0
  61. src/memory/continuous_memory.py +234 -0
  62. src/memory/conventions.py +495 -0
  63. src/memory/db.py +1119 -0
  64. src/memory/document_index.py +205 -0
  65. src/memory/file_cache.py +128 -0
  66. src/memory/project_scanner.py +178 -0
  67. src/memory/session_store.py +201 -0
  68. src/models/__init__.py +0 -0
  69. src/models/client.py +715 -0
  70. src/models/definitions.py +459 -0
  71. src/models/router.py +418 -0
  72. src/models/schemas.py +389 -0
  73. src/permissions.py +294 -0
  74. src/remote/__init__.py +5 -0
  75. src/remote/command_filter.py +33 -0
  76. src/remote/models.py +31 -0
  77. src/remote/permission_handler.py +79 -0
  78. src/remote/phone_ui.py +48 -0
  79. src/remote/protocol.py +59 -0
  80. src/remote/qr.py +65 -0
  81. src/remote/server.py +586 -0
  82. src/remote/token_manager.py +61 -0
  83. src/remote/tunnel.py +212 -0
  84. src/repl.py +475 -0
  85. src/runtime/__init__.py +1 -0
  86. src/runtime/branch_farm.py +372 -0
  87. src/runtime/replay.py +351 -0
  88. src/sandbox/__init__.py +2 -0
  89. src/sandbox/hermetic.py +214 -0
  90. src/sandbox/policy.py +44 -0
  91. src/sdk/__init__.py +3 -0
  92. src/sdk/plugin_base.py +39 -0
  93. src/sdk/plugin_host.py +100 -0
  94. src/sdk/plugin_loader.py +101 -0
  95. src/security.py +409 -0
  96. src/server/__init__.py +7 -0
  97. src/server/bridge.py +427 -0
  98. src/server/bridge_cli.py +103 -0
  99. src/server/bridge_client.py +170 -0
  100. src/server/protocol_version.py +103 -0
  101. src/session/__init__.py +10 -0
  102. src/session/event_fanout.py +46 -0
  103. src/session/input_broker.py +38 -0
  104. src/session/permission_bridge.py +100 -0
  105. src/tools/__init__.py +160 -0
  106. src/tools/_atomic.py +72 -0
  107. src/tools/agent_tools.py +423 -0
  108. src/tools/ask_user_tool.py +83 -0
  109. src/tools/bash_tool.py +384 -0
  110. src/tools/browser_tool.py +352 -0
  111. src/tools/browser_tools.py +179 -0
  112. src/tools/dep_tools.py +210 -0
  113. src/tools/document_reader.py +167 -0
  114. src/tools/document_tool.py +240 -0
  115. src/tools/document_writer.py +171 -0
  116. src/tools/impact_tools.py +240 -0
  117. src/tools/playwright_tool.py +172 -0
  118. src/tools/quality_tools.py +366 -0
  119. src/tools/read_tools.py +318 -0
  120. src/tools/result_cache.py +157 -0
  121. src/tools/search_tools.py +310 -0
  122. src/tools/shell_tools.py +311 -0
  123. src/tools/write_tools.py +337 -0
  124. src/voice/__init__.py +25 -0
  125. src/voice/audio_capture.py +92 -0
  126. src/voice/audio_playback.py +68 -0
  127. src/voice/errors.py +14 -0
  128. src/voice/models.py +35 -0
  129. src/voice/providers.py +143 -0
  130. src/voice/vad.py +55 -0
  131. src/voice/voice_loop.py +156 -0
src/tools/dep_tools.py ADDED
@@ -0,0 +1,210 @@
1
+ """Dependency guard tools — CVE scanning and version pinning.
2
+
3
+ DependencyGuardTool: runs pip-audit (preferred) or safety (fallback) to detect
4
+ known CVEs before any new package is installed.
5
+
6
+ PinDepsTool: generates a pinned requirements.lock from the live environment,
7
+ filtering development-only packages out by default.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ import shutil
14
+ import subprocess
15
+ import tempfile
16
+ from pathlib import Path
17
+ from typing import Any, ClassVar
18
+
19
+ from src.tools import REGISTRY, ToolBase, ToolResult
20
+
21
+ __all__ = ["DependencyGuardTool", "PinDepsTool"]
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+ _AUDIT_TIMEOUT: int = 60
26
+ _PIN_TIMEOUT: int = 30
27
+
28
+ # Packages that should be stripped from a production lock file
29
+ _DEV_PKG_PREFIXES: frozenset[str] = frozenset({
30
+ "pytest", "pytest-cov", "pytest-xdist", "ruff", "mypy", "black",
31
+ "isort", "pre-commit", "build", "twine", "wheel", "setuptools",
32
+ "pip-audit", "safety", "coverage", "hypothesis", "faker",
33
+ "flake8", "pylint", "bandit", "pipdeptree",
34
+ })
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # DependencyGuardTool
39
+ # ---------------------------------------------------------------------------
40
+
41
+ class DependencyGuardTool(ToolBase):
42
+ """Scan Python dependencies for known CVEs before installing packages.
43
+
44
+ Runs ``pip-audit`` (preferred) or ``safety check`` (fallback).
45
+ Returns JSON-formatted vulnerability data or a clean bill of health.
46
+
47
+ **Always call this before pip install <package>.**
48
+ """
49
+
50
+ name: ClassVar[str] = "check_deps"
51
+ description: ClassVar[str] = (
52
+ "Scan Python dependencies for known CVEs. "
53
+ "Run before any 'pip install' to catch security vulnerabilities. "
54
+ "Returns vulnerable packages with CVE IDs, or confirms clean."
55
+ )
56
+ input_schema: ClassVar[dict[str, Any]] = {
57
+ "type": "object",
58
+ "properties": {
59
+ "packages": {
60
+ "type": "array",
61
+ "items": {"type": "string"},
62
+ "description": (
63
+ "Specific packages to check (e.g. ['requests==2.28.0']). "
64
+ "Omit to scan all installed packages."
65
+ ),
66
+ },
67
+ "requirements_file": {
68
+ "type": "string",
69
+ "description": "Path to a requirements file to scan (optional).",
70
+ },
71
+ },
72
+ }
73
+
74
+ def execute(self, args: dict[str, Any]) -> ToolResult:
75
+ packages: list[str] = args.get("packages") or []
76
+ req_file: str | None = args.get("requirements_file")
77
+
78
+ if shutil.which("pip-audit") is not None:
79
+ return self._run_pip_audit(packages, req_file)
80
+ if shutil.which("safety") is not None:
81
+ return self._run_safety(packages, req_file)
82
+
83
+ return ToolResult(output=(
84
+ "⚠ pip-audit and safety are not installed — cannot scan for CVEs.\n"
85
+ "Install: pip install pip-audit\n"
86
+ + (f"Packages requested: {', '.join(packages)}" if packages else "")
87
+ ))
88
+
89
+ # ------------------------------------------------------------------
90
+ # pip-audit
91
+ # ------------------------------------------------------------------
92
+
93
+ def _run_pip_audit(self, packages: list[str], req_file: str | None) -> ToolResult:
94
+ cmd = ["pip-audit", "--format", "json"]
95
+ if req_file:
96
+ cmd += ["-r", req_file]
97
+ return self._exec(cmd)
98
+ if packages:
99
+ return self._with_temp_req(packages, lambda tmp: self._exec(cmd + ["-r", tmp]))
100
+ return self._exec(cmd)
101
+
102
+ # ------------------------------------------------------------------
103
+ # safety
104
+ # ------------------------------------------------------------------
105
+
106
+ def _run_safety(self, packages: list[str], req_file: str | None) -> ToolResult:
107
+ cmd = ["safety", "check", "--json"]
108
+ if req_file:
109
+ cmd += ["-r", req_file]
110
+ return self._exec(cmd)
111
+ if packages:
112
+ return self._with_temp_req(packages, lambda tmp: self._exec(cmd + ["-r", tmp]))
113
+ return self._exec(cmd)
114
+
115
+ # ------------------------------------------------------------------
116
+ # Helpers
117
+ # ------------------------------------------------------------------
118
+
119
+ @staticmethod
120
+ def _with_temp_req(
121
+ packages: list[str],
122
+ fn: Any,
123
+ ) -> ToolResult:
124
+ """Write a temporary requirements file, call fn(path), then delete it."""
125
+ fd, tmp = tempfile.mkstemp(suffix=".txt")
126
+ try:
127
+ with os.fdopen(fd, "w") as f:
128
+ f.write("\n".join(packages))
129
+ return fn(tmp)
130
+ finally:
131
+ try:
132
+ os.unlink(tmp)
133
+ except OSError:
134
+ pass
135
+
136
+ @staticmethod
137
+ def _exec(cmd: list[str]) -> ToolResult:
138
+ try:
139
+ res = subprocess.run(
140
+ cmd, capture_output=True, text=True, timeout=_AUDIT_TIMEOUT
141
+ )
142
+ output = res.stdout or res.stderr or "(no output)"
143
+ # pip-audit exits 1 when vulnerabilities are found — that is normal, not an error
144
+ ok = res.returncode in (0, 1)
145
+ return ToolResult(output=output, error="" if ok else res.stderr)
146
+ except subprocess.TimeoutExpired:
147
+ return ToolResult(output="", error=f"Audit timed out after {_AUDIT_TIMEOUT}s")
148
+ except Exception as exc: # noqa: BLE001
149
+ return ToolResult(output="", error=str(exc))
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # PinDepsTool
154
+ # ---------------------------------------------------------------------------
155
+
156
+ class PinDepsTool(ToolBase):
157
+ """Generate a pinned requirements.lock from the current Python environment.
158
+
159
+ Uses ``pip freeze`` and filters out development-only packages by default.
160
+ Call after installing new dependencies before committing changes.
161
+ """
162
+
163
+ name: ClassVar[str] = "pin_deps"
164
+ description: ClassVar[str] = (
165
+ "Generate a pinned requirements.lock from the current Python environment. "
166
+ "Filters out dev/test packages. Use after adding new dependencies."
167
+ )
168
+ input_schema: ClassVar[dict[str, Any]] = {
169
+ "type": "object",
170
+ "properties": {
171
+ "output_path": {
172
+ "type": "string",
173
+ "description": "File to write. Defaults to 'requirements.lock'.",
174
+ },
175
+ "exclude_dev": {
176
+ "type": "boolean",
177
+ "description": "Strip dev/test packages from output (default: true).",
178
+ },
179
+ },
180
+ }
181
+
182
+ def execute(self, args: dict[str, Any]) -> ToolResult:
183
+ out_path = Path(args.get("output_path") or "requirements.lock")
184
+ exclude_dev: bool = args.get("exclude_dev", True)
185
+
186
+ try:
187
+ res = subprocess.run(
188
+ ["pip", "freeze"], capture_output=True, text=True, timeout=_PIN_TIMEOUT
189
+ )
190
+ if res.returncode != 0:
191
+ return ToolResult(output="", error=res.stderr)
192
+
193
+ lines = res.stdout.splitlines()
194
+ if exclude_dev:
195
+ lines = [
196
+ ln for ln in lines
197
+ if not any(ln.lower().startswith(p.lower()) for p in _DEV_PKG_PREFIXES)
198
+ ]
199
+
200
+ out_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
201
+ return ToolResult(output=f"Pinned {len(lines)} packages → {out_path}")
202
+ except subprocess.TimeoutExpired:
203
+ return ToolResult(output="", error=f"pip freeze timed out after {_PIN_TIMEOUT}s")
204
+ except Exception as exc: # noqa: BLE001
205
+ return ToolResult(output="", error=str(exc))
206
+
207
+
208
+ # Self-register
209
+ REGISTRY.register(DependencyGuardTool())
210
+ REGISTRY.register(PinDepsTool())
@@ -0,0 +1,167 @@
1
+ """DocumentReader — unified reader for Word, Excel, PDF, and CSV files.
2
+
3
+ All document library imports are guarded with try/except so the module loads
4
+ even when optional deps are not installed. Install with:
5
+ pip install 'gdm-code[docs]'
6
+ """
7
+ from __future__ import annotations
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Optional
11
+ import csv, io, logging
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+ __all__ = ["DocumentReader", "DocumentContent", "SheetData"]
16
+
17
+
18
+ @dataclass
19
+ class SheetData:
20
+ name: str
21
+ headers: list[str]
22
+ rows: list[list[str]] # all values coerced to str
23
+
24
+ def to_text(self) -> str:
25
+ lines = ["\t".join(self.headers)]
26
+ lines += ["\t".join(row) for row in self.rows]
27
+ return "\n".join(lines)
28
+
29
+
30
+ @dataclass
31
+ class DocumentContent:
32
+ file_path: str
33
+ format: str # "docx" | "xlsx" | "xls" | "pdf" | "csv" | "txt"
34
+ text: str # full plain-text representation
35
+ title: Optional[str] = None
36
+ author: Optional[str] = None
37
+ page_count: Optional[int] = None
38
+ sheets: list[SheetData] = field(default_factory=list) # non-empty for spreadsheets
39
+ error: Optional[str] = None # set if partial parse failure
40
+
41
+ @property
42
+ def success(self) -> bool:
43
+ return self.error is None
44
+
45
+
46
+ class DocumentReader:
47
+ def read(self, path: Path | str) -> DocumentContent:
48
+ path = Path(path)
49
+ suffix = path.suffix.lower().lstrip(".")
50
+ dispatch = {
51
+ "docx": self._read_docx,
52
+ "xlsx": self._read_xlsx,
53
+ "xls": self._read_xls,
54
+ "pdf": self._read_pdf,
55
+ "csv": self._read_csv,
56
+ "txt": self._read_text,
57
+ "md": self._read_text,
58
+ }
59
+ reader = dispatch.get(suffix)
60
+ if reader is None:
61
+ return DocumentContent(
62
+ file_path=str(path), format=suffix, text="",
63
+ error=f"Unsupported format: .{suffix}"
64
+ )
65
+ try:
66
+ return reader(path)
67
+ except Exception as exc:
68
+ log.warning("Document read failed for %s: %s", path, exc)
69
+ return DocumentContent(
70
+ file_path=str(path), format=suffix, text="",
71
+ error=str(exc)
72
+ )
73
+
74
+ def _read_docx(self, path: Path) -> DocumentContent:
75
+ try:
76
+ import docx
77
+ except ImportError:
78
+ raise ImportError("python-docx required. pip install 'gdm-code[docs]'")
79
+ doc = docx.Document(str(path))
80
+ paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
81
+ for table in doc.tables:
82
+ for row in table.rows:
83
+ paragraphs.append("\t".join(c.text for c in row.cells))
84
+ props = doc.core_properties
85
+ return DocumentContent(
86
+ file_path=str(path), format="docx",
87
+ text="\n".join(paragraphs),
88
+ title=props.title or None,
89
+ author=props.author or None,
90
+ )
91
+
92
+ def _read_xlsx(self, path: Path) -> DocumentContent:
93
+ try:
94
+ import openpyxl
95
+ except ImportError:
96
+ raise ImportError("openpyxl required. pip install 'gdm-code[docs]'")
97
+ wb = openpyxl.load_workbook(str(path), read_only=True, data_only=True)
98
+ sheets, all_text = [], []
99
+ for ws in wb.worksheets:
100
+ rows = [[str(c.value) if c.value is not None else "" for c in row]
101
+ for row in ws.iter_rows()]
102
+ headers = rows[0] if rows else []
103
+ data_rows = rows[1:] if len(rows) > 1 else []
104
+ sd = SheetData(name=ws.title, headers=headers, rows=data_rows)
105
+ sheets.append(sd)
106
+ all_text.append(f"[Sheet: {ws.title}]\n{sd.to_text()}")
107
+ wb.close()
108
+ return DocumentContent(
109
+ file_path=str(path), format="xlsx",
110
+ text="\n\n".join(all_text), sheets=sheets,
111
+ )
112
+
113
+ def _read_xls(self, path: Path) -> DocumentContent:
114
+ try:
115
+ import xlrd
116
+ except ImportError:
117
+ raise ImportError("xlrd required. pip install 'gdm-code[docs]'")
118
+ wb = xlrd.open_workbook(str(path))
119
+ sheets, all_text = [], []
120
+ for ws in wb.sheets():
121
+ rows = [[str(ws.cell_value(r, c)) for c in range(ws.ncols)]
122
+ for r in range(ws.nrows)]
123
+ headers = rows[0] if rows else []
124
+ data_rows = rows[1:] if len(rows) > 1 else []
125
+ sd = SheetData(name=ws.name, headers=headers, rows=data_rows)
126
+ sheets.append(sd)
127
+ all_text.append(f"[Sheet: {ws.name}]\n{sd.to_text()}")
128
+ return DocumentContent(
129
+ file_path=str(path), format="xls",
130
+ text="\n\n".join(all_text), sheets=sheets,
131
+ )
132
+
133
+ def _read_pdf(self, path: Path) -> DocumentContent:
134
+ try:
135
+ import pdfplumber
136
+ except ImportError:
137
+ raise ImportError("pdfplumber required. pip install 'gdm-code[docs]'")
138
+ pages_text = []
139
+ with pdfplumber.open(str(path)) as pdf:
140
+ page_count = len(pdf.pages)
141
+ for page in pdf.pages:
142
+ t = page.extract_text() or ""
143
+ if t.strip():
144
+ pages_text.append(t)
145
+ return DocumentContent(
146
+ file_path=str(path), format="pdf",
147
+ text="\n\n".join(pages_text),
148
+ page_count=page_count,
149
+ )
150
+
151
+ def _read_csv(self, path: Path) -> DocumentContent:
152
+ text = path.read_text(encoding="utf-8-sig", errors="replace")
153
+ reader = csv.reader(io.StringIO(text))
154
+ rows = list(reader)
155
+ headers = rows[0] if rows else []
156
+ data_rows = rows[1:] if len(rows) > 1 else []
157
+ sd = SheetData(name="Sheet1", headers=headers, rows=data_rows)
158
+ return DocumentContent(
159
+ file_path=str(path), format="csv",
160
+ text=sd.to_text(), sheets=[sd],
161
+ )
162
+
163
+ def _read_text(self, path: Path) -> DocumentContent:
164
+ text = path.read_text(encoding="utf-8", errors="replace")
165
+ return DocumentContent(
166
+ file_path=str(path), format=path.suffix.lstrip("."), text=text,
167
+ )
@@ -0,0 +1,240 @@
1
+ """Document tools — LLM-callable wrappers for document read/write/index/search.
2
+
3
+ Registered tools:
4
+ - read_document : read Word/Excel/PDF/CSV → text
5
+ - generate_document: create docx or xlsx from spec
6
+ - index_document : index a document into the FTS search index
7
+ - search_documents: full-text search across indexed documents
8
+ """
9
+ from __future__ import annotations
10
+ import json, logging
11
+ from pathlib import Path
12
+ from typing import Any, ClassVar
13
+
14
+ from src.tools import REGISTRY, ToolBase, ToolResult
15
+ from src.tools.document_reader import DocumentReader
16
+ from src.tools.document_writer import DocumentWriter, DocxSpec, XlsxSpec
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ __all__ = [
21
+ "is_document_path",
22
+ "read_document_tool",
23
+ "generate_document_tool",
24
+ "DOCUMENT_EXTENSIONS",
25
+ "READ_DOCUMENT_SCHEMA",
26
+ "GENERATE_DOCUMENT_SCHEMA",
27
+ ]
28
+
29
+ DOCUMENT_EXTENSIONS = frozenset({"docx", "xlsx", "xls", "pdf", "csv"})
30
+
31
+
32
+ def is_document_path(path: str) -> bool:
33
+ return Path(path).suffix.lower().lstrip(".") in DOCUMENT_EXTENSIONS
34
+
35
+
36
+ def read_document_tool(path: str, include_sheet_data: bool = False) -> str:
37
+ """Read a document and return its text content. Never raises."""
38
+ result = DocumentReader().read(Path(path))
39
+ if not result.success:
40
+ return f"Error reading {path}: {result.error}"
41
+ if include_sheet_data and result.sheets:
42
+ sheets_json = [
43
+ {"name": s.name, "headers": s.headers, "rows": s.rows}
44
+ for s in result.sheets
45
+ ]
46
+ return json.dumps({"text": result.text, "sheets": sheets_json}, indent=2)
47
+ meta = []
48
+ if result.title:
49
+ meta.append(f"Title: {result.title}")
50
+ if result.author:
51
+ meta.append(f"Author: {result.author}")
52
+ if result.page_count is not None:
53
+ meta.append(f"Pages: {result.page_count}")
54
+ header = "\n".join(meta)
55
+ return f"{header}\n\n{result.text}".strip() if header else result.text
56
+
57
+
58
+ def generate_document_tool(format: str, output_path: str, spec: dict) -> str:
59
+ """Create a docx or xlsx from a spec dict. Never raises."""
60
+ writer = DocumentWriter()
61
+ out = Path(output_path)
62
+ if format == "docx":
63
+ doc_spec = DocxSpec(
64
+ title=spec.get("title", ""),
65
+ author=spec.get("author", ""),
66
+ sections=spec.get("sections", []),
67
+ )
68
+ result = writer.create_docx(doc_spec, out)
69
+ elif format == "xlsx":
70
+ xl_spec = XlsxSpec(sheets=spec.get("sheets", []))
71
+ result = writer.create_xlsx(xl_spec, out)
72
+ else:
73
+ return f"Error: unsupported format '{format}'. Use 'docx' or 'xlsx'."
74
+ if not result.success:
75
+ return f"Error generating {format}: {result.error}"
76
+ return f"Created {result.path} ({result.bytes_written:,} bytes)"
77
+
78
+
79
+ def index_document_tool(path: str) -> str:
80
+ """Index a document for search. Returns chunk count."""
81
+ try:
82
+ from src.memory.document_index import DocumentIndex
83
+ except ImportError:
84
+ return "Error: document_index module not available."
85
+ count = DocumentIndex().index_document(path)
86
+ if count < 0:
87
+ return f"Error: could not index {path} (file not found or unreadable)"
88
+ return f"Indexed {path}: {count} chunks" if count else f"{path} already up to date"
89
+
90
+
91
+ def search_documents_tool(query: str, limit: int = 10) -> str:
92
+ """Search all indexed documents for query. Returns ranked passages."""
93
+ try:
94
+ from src.memory.document_index import DocumentIndex
95
+ except ImportError:
96
+ return "Error: document_index module not available."
97
+ results = DocumentIndex().search(query, limit=limit)
98
+ if not results:
99
+ return f"No results for '{query}'"
100
+ lines = []
101
+ for r in results:
102
+ lines.append(f"[{r.file_path} — {r.source_label}]\n{r.snippet}")
103
+ return "\n\n".join(lines)
104
+
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # LLM function-call schemas
108
+ # ---------------------------------------------------------------------------
109
+
110
+ READ_DOCUMENT_SCHEMA = {
111
+ "name": "read_document",
112
+ "description": "Read a Word (.docx), Excel (.xlsx/.xls), PDF, or CSV file and return its text content.",
113
+ "parameters": {
114
+ "type": "object",
115
+ "properties": {
116
+ "path": {"type": "string", "description": "Absolute or relative path to the document"},
117
+ "include_sheet_data": {
118
+ "type": "boolean",
119
+ "description": "For spreadsheets: return JSON with per-sheet structured data",
120
+ "default": False,
121
+ },
122
+ },
123
+ "required": ["path"],
124
+ },
125
+ }
126
+
127
+ GENERATE_DOCUMENT_SCHEMA = {
128
+ "name": "generate_document",
129
+ "description": "Create a new Word (.docx) or Excel (.xlsx) document from structured content.",
130
+ "parameters": {
131
+ "type": "object",
132
+ "properties": {
133
+ "format": {"type": "string", "enum": ["docx", "xlsx"]},
134
+ "output_path": {"type": "string", "description": "File path to write the document to"},
135
+ "spec": {
136
+ "type": "object",
137
+ "description": "Document spec. For docx: {title, sections:[{heading, paragraphs, table}]}. For xlsx: {sheets:[{name, headers, rows}]}",
138
+ },
139
+ },
140
+ "required": ["format", "output_path", "spec"],
141
+ },
142
+ }
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # ToolBase subclasses
147
+ # ---------------------------------------------------------------------------
148
+
149
+ class ReadDocumentTool(ToolBase):
150
+ """Read a Word, Excel, PDF, or CSV file."""
151
+
152
+ name: ClassVar[str] = "read_document"
153
+ description: ClassVar[str] = (
154
+ "Read a Word (.docx), Excel (.xlsx/.xls), PDF, or CSV file and return its text content. "
155
+ "For spreadsheets returns tab-separated table text per sheet."
156
+ )
157
+ input_schema: ClassVar[dict[str, Any]] = READ_DOCUMENT_SCHEMA["parameters"]
158
+
159
+ def execute(self, params: dict[str, Any]) -> ToolResult:
160
+ path = params["path"]
161
+ include_sheet_data = params.get("include_sheet_data", False)
162
+ text = read_document_tool(path, include_sheet_data=include_sheet_data)
163
+ if text.startswith("Error"):
164
+ return ToolResult(output="", error=text)
165
+ return ToolResult(output=text, metadata={"path": path})
166
+
167
+
168
+ class GenerateDocumentTool(ToolBase):
169
+ """Create a Word or Excel document from structured content."""
170
+
171
+ name: ClassVar[str] = "generate_document"
172
+ description: ClassVar[str] = (
173
+ "Create a new Word (.docx) or Excel (.xlsx) document from structured content. "
174
+ "Returns the output file path and byte count."
175
+ )
176
+ input_schema: ClassVar[dict[str, Any]] = GENERATE_DOCUMENT_SCHEMA["parameters"]
177
+
178
+ def execute(self, params: dict[str, Any]) -> ToolResult:
179
+ fmt = params["format"]
180
+ output_path = params["output_path"]
181
+ spec = params.get("spec", {})
182
+ result = generate_document_tool(fmt, output_path, spec)
183
+ if result.startswith("Error"):
184
+ return ToolResult(output="", error=result)
185
+ return ToolResult(output=result)
186
+
187
+
188
+ class IndexDocumentTool(ToolBase):
189
+ """Index a document for full-text search."""
190
+
191
+ name: ClassVar[str] = "index_document"
192
+ description: ClassVar[str] = (
193
+ "Index a document (Word, Excel, PDF, CSV) into the local search index. "
194
+ "Run this before searching for document content."
195
+ )
196
+ input_schema: ClassVar[dict[str, Any]] = {
197
+ "type": "object",
198
+ "properties": {
199
+ "path": {"type": "string", "description": "Path to the document to index"},
200
+ },
201
+ "required": ["path"],
202
+ }
203
+
204
+ def execute(self, params: dict[str, Any]) -> ToolResult:
205
+ result = index_document_tool(params["path"])
206
+ if result.startswith("Error"):
207
+ return ToolResult(output="", error=result)
208
+ return ToolResult(output=result)
209
+
210
+
211
+ class SearchDocumentsTool(ToolBase):
212
+ """Full-text search across all indexed documents."""
213
+
214
+ name: ClassVar[str] = "search_documents"
215
+ description: ClassVar[str] = (
216
+ "Search all indexed documents for a query. "
217
+ "Returns ranked passages with source file and location."
218
+ )
219
+ input_schema: ClassVar[dict[str, Any]] = {
220
+ "type": "object",
221
+ "properties": {
222
+ "query": {"type": "string", "description": "Search query"},
223
+ "limit": {"type": "integer", "description": "Max results (default 10)", "default": 10},
224
+ },
225
+ "required": ["query"],
226
+ }
227
+
228
+ def execute(self, params: dict[str, Any]) -> ToolResult:
229
+ result = search_documents_tool(params["query"], limit=params.get("limit", 10))
230
+ return ToolResult(output=result)
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Auto-register
235
+ # ---------------------------------------------------------------------------
236
+
237
+ REGISTRY.register(ReadDocumentTool())
238
+ REGISTRY.register(GenerateDocumentTool())
239
+ REGISTRY.register(IndexDocumentTool())
240
+ REGISTRY.register(SearchDocumentsTool())