delimit-cli 3.3.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,53 +42,35 @@ def _call(pkg: str, factory_name: str, method: str, args: Dict, tool_label: str)
42
42
  return {"tool": tool_label, "error": str(e)}
43
43
 
44
44
 
45
- # ─── DesignSystem (custom classes, no BaseMCPServer) ───────────────────
46
- # designsystem uses DesignSystemGenerator, not the _tool_* pattern.
47
- # Provide graceful pass-through until refactored.
48
-
49
- def design_validate_responsive(project_path: str, check_types: Optional[List[str]] = None) -> Dict[str, Any]:
50
- return {"tool": "design.validate_responsive", "project_path": project_path, "status": "pass-through"}
51
-
52
-
53
- def design_extract_tokens(figma_file_key: str, token_types: Optional[List[str]] = None) -> Dict[str, Any]:
54
- return {"tool": "design.extract_tokens", "figma_file_key": figma_file_key, "status": "pass-through"}
55
-
56
-
57
- def design_generate_component(component_name: str, figma_node_id: Optional[str] = None, output_path: Optional[str] = None) -> Dict[str, Any]:
58
- return {"tool": "design.generate_component", "component_name": component_name, "status": "pass-through"}
59
-
60
-
61
- def design_generate_tailwind(figma_file_key: str, output_path: Optional[str] = None) -> Dict[str, Any]:
62
- return {"tool": "design.generate_tailwind", "figma_file_key": figma_file_key, "status": "pass-through"}
63
-
64
-
65
- def design_component_library(project_path: str, output_format: str = "json") -> Dict[str, Any]:
66
- return {"tool": "design.component_library", "project_path": project_path, "status": "pass-through"}
67
-
68
-
69
- # ─── Storybook (custom classes, no BaseMCPServer) ─────────────────────
70
-
71
- def story_generate(component_path: str, story_name: Optional[str] = None, variants: Optional[List[str]] = None) -> Dict[str, Any]:
72
- return {"tool": "story.generate", "component_path": component_path, "status": "pass-through"}
73
-
74
-
75
- def story_visual_test(url: str, project_path: Optional[str] = None, threshold: float = 0.05) -> Dict[str, Any]:
76
- return {"tool": "story.visual_test", "url": url, "status": "pass-through"}
45
+ # ─── DesignSystem (real implementations in tools_design.py) ────────────
46
+ from .tools_design import (
47
+ design_extract_tokens,
48
+ design_generate_component,
49
+ design_generate_tailwind,
50
+ design_validate_responsive,
51
+ design_component_library,
52
+ story_generate,
53
+ story_visual_test,
54
+ story_accessibility,
55
+ )
77
56
 
78
57
 
79
58
  def story_build(project_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
80
- return {"tool": "story.build", "project_path": project_path, "status": "pass-through"}
59
+ """Story build remains a stub — requires Storybook installed."""
60
+ return {"tool": "story.build", "project_path": project_path, "status": "not_available",
61
+ "message": "Storybook build requires Storybook installed. Run: npx storybook init"}
81
62
 
82
63
 
83
64
  def story_accessibility_test(project_path: str, standards: str = "WCAG2AA") -> Dict[str, Any]:
84
- return {"tool": "story.accessibility_test", "project_path": project_path, "status": "pass-through"}
65
+ """Delegate to story_accessibility (renamed for backward compat)."""
66
+ return story_accessibility(project_path=project_path, standards=standards)
85
67
 
86
68
 
87
- # ─── TestSmith (BaseMCPServer pattern) ─────────────────────────────────
69
+ # ─── TestSmith (Real implementations — tools_real.py) ─────────────────
88
70
 
89
71
  def test_generate(project_path: str, source_files: Optional[List[str]] = None, framework: str = "jest") -> Dict[str, Any]:
90
- return _call("testsmith", "create_testsmith_server", "_tool_generate",
91
- {"project_path": project_path, "source_files": source_files, "framework": framework}, "test.generate")
72
+ from .tools_real import test_generate as _real_test_generate
73
+ return _real_test_generate(project_path=project_path, source_files=source_files, framework=framework)
92
74
 
93
75
 
94
76
  def test_coverage(project_path: str, threshold: int = 80) -> Dict[str, Any]:
@@ -97,22 +79,17 @@ def test_coverage(project_path: str, threshold: int = 80) -> Dict[str, Any]:
97
79
 
98
80
 
99
81
  def test_smoke(project_path: str, test_suite: Optional[str] = None) -> Dict[str, Any]:
100
- result = _call("testsmith", "create_testsmith_server", "_tool_smoke",
101
- {"project_path": project_path}, "test.smoke")
102
- # Guard against stub that says "passed" with 0 tests actually run
103
- if result.get("tests_run", -1) == 0 and result.get("passed") is True:
104
- return {"tool": "test.smoke", "status": "no_tests",
105
- "error": "No smoke tests configured. The test runner found 0 tests to execute."}
106
- return result
82
+ from .tools_real import test_smoke as _real_test_smoke
83
+ return _real_test_smoke(project_path=project_path, test_suite=test_suite)
107
84
 
108
85
 
109
- # ─── DocsWeaver (BaseMCPServer pattern) ────────────────────────────────
86
+ # ─── DocsWeaver (Real implementations — tools_real.py) ────────────────
110
87
 
111
88
  def docs_generate(target: str = ".", options: Optional[Dict] = None) -> Dict[str, Any]:
112
- return _call("docsweaver", "create_docsweaver_server", "_tool_generate",
113
- {"project_path": target, "doc_types": ["api", "readme"], **(options or {})}, "docs.generate")
89
+ from .tools_real import docs_generate as _real_docs_generate
90
+ return _real_docs_generate(target=target, options=options)
114
91
 
115
92
 
116
93
  def docs_validate(target: str = ".", options: Optional[Dict] = None) -> Dict[str, Any]:
117
- return _call("docsweaver", "create_docsweaver_server", "_tool_validate",
118
- {"docs_path": target, **(options or {})}, "docs.validate")
94
+ from .tools_real import docs_validate as _real_docs_validate
95
+ return _real_docs_validate(target=target, options=options)
@@ -0,0 +1,387 @@
1
+ """
2
+ Delimit Deliberation Engine — Multi-round consensus with real model-to-model debate.
3
+
4
+ Passes each model's EXACT raw response to the other models for counter-arguments.
5
+ Rounds continue until unanimous agreement or max rounds reached.
6
+
7
+ Models are configured via ~/.delimit/models.json — users choose which AI models
8
+ to include in deliberations. Supports any OpenAI-compatible API.
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ import os
14
+ import shutil
15
+ import time
16
+ import urllib.request
17
+ import urllib.error
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional
20
+
21
+ logger = logging.getLogger("delimit.deliberation")
22
+
23
+ DELIBERATION_DIR = Path.home() / ".delimit" / "deliberations"
24
+ MODELS_CONFIG = Path.home() / ".delimit" / "models.json"
25
+
26
+ DEFAULT_MODELS = {
27
+ "grok": {
28
+ "name": "Grok 4",
29
+ "api_url": "https://api.x.ai/v1/chat/completions",
30
+ "model": "grok-4-0709",
31
+ "env_key": "XAI_API_KEY",
32
+ "enabled": False,
33
+ },
34
+ "gemini": {
35
+ "name": "Gemini 2.5 Flash",
36
+ "api_url": "https://us-central1-aiplatform.googleapis.com/v1/projects/{project}/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent",
37
+ "model": "gemini-2.5-flash",
38
+ "env_key": "GOOGLE_APPLICATION_CREDENTIALS",
39
+ "enabled": False,
40
+ "format": "vertex_ai",
41
+ },
42
+ "codex": {
43
+ "name": "Codex (GPT-5.4)",
44
+ "format": "codex_cli",
45
+ "model": "gpt-5.4",
46
+ "env_key": "CODEX_CLI",
47
+ "enabled": False,
48
+ },
49
+ }
50
+
51
+
52
+ def get_models_config() -> Dict[str, Any]:
53
+ """Load model configuration. Auto-detects available API keys."""
54
+ if MODELS_CONFIG.exists():
55
+ try:
56
+ return json.loads(MODELS_CONFIG.read_text())
57
+ except Exception:
58
+ pass
59
+
60
+ # Auto-detect from environment
61
+ config = {}
62
+ for model_id, defaults in DEFAULT_MODELS.items():
63
+ if defaults.get("format") == "codex_cli":
64
+ # Codex: check if CLI is available
65
+ import shutil
66
+ codex_path = shutil.which("codex")
67
+ config[model_id] = {
68
+ **defaults,
69
+ "enabled": codex_path is not None,
70
+ "codex_path": codex_path or "",
71
+ }
72
+ else:
73
+ key = os.environ.get(defaults["env_key"], "")
74
+ config[model_id] = {
75
+ **defaults,
76
+ "api_key": key,
77
+ "enabled": bool(key),
78
+ }
79
+
80
+ return config
81
+
82
+
83
+ def configure_models() -> Dict[str, Any]:
84
+ """Return current model configuration and what's available."""
85
+ config = get_models_config()
86
+ available = {k: v for k, v in config.items() if v.get("enabled")}
87
+ missing = {k: v for k, v in config.items() if not v.get("enabled")}
88
+
89
+ return {
90
+ "configured_models": list(available.keys()),
91
+ "missing_models": {k: f"Set {v['env_key']} environment variable" for k, v in missing.items()},
92
+ "config_path": str(MODELS_CONFIG),
93
+ "note": "Add API keys to enable more models for deliberation. "
94
+ "Set env vars or create ~/.delimit/models.json",
95
+ }
96
+
97
+
98
+ def _call_codex(prompt: str, system_prompt: str = "") -> str:
99
+ """Call Codex via CLI subprocess."""
100
+ import subprocess
101
+ codex_path = shutil.which("codex")
102
+ if not codex_path:
103
+ return "[Codex unavailable — codex CLI not found in PATH]"
104
+
105
+ full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
106
+ try:
107
+ result = subprocess.run(
108
+ [codex_path, "exec", "--dangerously-bypass-approvals-and-sandbox", full_prompt],
109
+ capture_output=True,
110
+ text=True,
111
+ timeout=120,
112
+ )
113
+ output = result.stdout.strip()
114
+ if not output and result.stderr:
115
+ return f"[Codex error: {result.stderr[:300]}]"
116
+ return output or "[Codex returned empty response]"
117
+ except subprocess.TimeoutExpired:
118
+ return "[Codex timed out after 120s]"
119
+ except Exception as e:
120
+ return f"[Codex error: {e}]"
121
+
122
+
123
+ def _call_model(model_id: str, config: Dict, prompt: str, system_prompt: str = "") -> str:
124
+ """Call any supported model — OpenAI-compatible API, Vertex AI, or Codex CLI."""
125
+ fmt = config.get("format", "openai")
126
+
127
+ # Codex uses CLI, not HTTP API
128
+ if fmt == "codex_cli":
129
+ return _call_codex(prompt, system_prompt)
130
+
131
+ api_key = config.get("api_key") or os.environ.get(config.get("env_key", ""), "")
132
+ # Vertex AI uses service account auth, not API key
133
+ if not api_key and fmt != "vertex_ai":
134
+ return f"[{config.get('name', model_id)} unavailable — {config.get('env_key')} not set]"
135
+
136
+ api_url = config["api_url"]
137
+ model = config.get("model", "")
138
+
139
+ try:
140
+ if fmt == "vertex_ai":
141
+ # Vertex AI format — use google-auth for access token
142
+ try:
143
+ import google.auth
144
+ import google.auth.transport.requests
145
+ # Explicitly set credentials path if not in env
146
+ creds_path = "/root/.config/gcloud/application_default_credentials.json"
147
+ if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and os.path.exists(creds_path):
148
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
149
+ creds, project = google.auth.default()
150
+ creds.refresh(google.auth.transport.requests.Request())
151
+ actual_url = api_url.replace("{project}", project or os.environ.get("GOOGLE_CLOUD_PROJECT", "jamsons"))
152
+ data = json.dumps({
153
+ "contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
154
+ "generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
155
+ }).encode()
156
+ req = urllib.request.Request(
157
+ actual_url,
158
+ data=data,
159
+ headers={
160
+ "Authorization": f"Bearer {creds.token}",
161
+ "Content-Type": "application/json",
162
+ },
163
+ method="POST",
164
+ )
165
+ except ImportError:
166
+ return f"[Gemini unavailable — install google-auth: pip install google-auth]"
167
+ elif fmt == "google":
168
+ # Google Generative AI format (API key)
169
+ data = json.dumps({
170
+ "contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
171
+ "generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
172
+ }).encode()
173
+ req = urllib.request.Request(
174
+ f"{api_url}?key={api_key}",
175
+ data=data,
176
+ headers={"Content-Type": "application/json"},
177
+ method="POST",
178
+ )
179
+ else:
180
+ # OpenAI-compatible format (works for xAI, OpenAI, etc.)
181
+ messages = []
182
+ if system_prompt:
183
+ messages.append({"role": "system", "content": system_prompt})
184
+ messages.append({"role": "user", "content": prompt})
185
+
186
+ data = json.dumps({
187
+ "model": model,
188
+ "messages": messages,
189
+ "temperature": 0.7,
190
+ "max_tokens": 4096,
191
+ }).encode()
192
+ req = urllib.request.Request(
193
+ api_url,
194
+ data=data,
195
+ headers={
196
+ "Authorization": f"Bearer {api_key}",
197
+ "Content-Type": "application/json",
198
+ "User-Agent": "Delimit/3.3.0",
199
+ },
200
+ method="POST",
201
+ )
202
+
203
+ with urllib.request.urlopen(req, timeout=120) as resp:
204
+ result = json.loads(resp.read())
205
+
206
+ if fmt in ("google", "vertex_ai"):
207
+ return result["candidates"][0]["content"]["parts"][0]["text"]
208
+ else:
209
+ return result["choices"][0]["message"]["content"]
210
+
211
+ except Exception as e:
212
+ return f"[{config.get('name', model_id)} error: {e}]"
213
+
214
+
215
+ def deliberate(
216
+ question: str,
217
+ context: str = "",
218
+ max_rounds: int = 3,
219
+ mode: str = "dialogue",
220
+ require_unanimous: bool = True,
221
+ save_path: Optional[str] = None,
222
+ ) -> Dict[str, Any]:
223
+ """
224
+ Run a multi-round deliberation across all configured AI models.
225
+
226
+ Modes:
227
+ - "debate": Long-form essays, models respond to each other's full arguments (3 rounds default)
228
+ - "dialogue": Short conversational turns, models build on each other like a group chat (6 rounds default)
229
+
230
+ Returns the full deliberation transcript + final verdict.
231
+ """
232
+ DELIBERATION_DIR.mkdir(parents=True, exist_ok=True)
233
+
234
+ config = get_models_config()
235
+ enabled_models = {k: v for k, v in config.items() if v.get("enabled")}
236
+
237
+ if len(enabled_models) < 2:
238
+ return {
239
+ "error": "Need at least 2 AI models for deliberation.",
240
+ "configured": list(enabled_models.keys()),
241
+ "missing": {k: f"Set {v.get('env_key', 'key')}" for k, v in config.items() if not v.get("enabled")},
242
+ "tip": "Set API key environment variables or create ~/.delimit/models.json",
243
+ }
244
+
245
+ model_ids = list(enabled_models.keys())
246
+
247
+ # Dialogue mode uses more rounds with shorter responses
248
+ if mode == "dialogue" and max_rounds == 3:
249
+ max_rounds = 6
250
+
251
+ transcript = {
252
+ "question": question,
253
+ "context": context,
254
+ "mode": mode,
255
+ "models": model_ids,
256
+ "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
257
+ "rounds": [],
258
+ "thread": [], # flat conversation thread for dialogue mode
259
+ "unanimous": False,
260
+ "final_verdict": None,
261
+ }
262
+
263
+ if mode == "dialogue":
264
+ system_prompt = (
265
+ "You are in a group chat with other AI models. Keep responses to 2-4 sentences. "
266
+ "Be direct and conversational — this is a discussion, not an essay. "
267
+ "Build on what others said. Disagree specifically if you disagree. "
268
+ "When you're ready to agree, say VERDICT: AGREE. "
269
+ "If you disagree, say VERDICT: DISAGREE — [why in one sentence]."
270
+ )
271
+ else:
272
+ system_prompt = (
273
+ "You are participating in a structured multi-model deliberation with other AI models. "
274
+ "You will see other models' exact responses and must engage with their specific arguments. "
275
+ "At the END of your response, you MUST include exactly one of these lines:\n"
276
+ "VERDICT: AGREE\n"
277
+ "VERDICT: DISAGREE — [one sentence reason]\n"
278
+ "VERDICT: AGREE WITH MODIFICATIONS — [one sentence modification]\n"
279
+ "Do not hedge. Take a clear position."
280
+ )
281
+
282
+ full_prompt = f"{context}\n\nQUESTION:\n{question}" if context else question
283
+
284
+ # Round 1: Independent responses
285
+ logger.info(f"Deliberation Round 1 ({mode} mode): Independent responses")
286
+ round1 = {"round": 1, "type": "independent", "responses": {}}
287
+
288
+ for model_id in model_ids:
289
+ if mode == "dialogue":
290
+ # Shorter initial prompt for dialogue
291
+ r1_prompt = f"{full_prompt}\n\nGive your initial take in 2-4 sentences. Don't write an essay."
292
+ else:
293
+ r1_prompt = full_prompt
294
+ response = _call_model(model_id, enabled_models[model_id], r1_prompt, system_prompt)
295
+ round1["responses"][model_id] = response
296
+ # Build flat thread
297
+ transcript["thread"].append({"model": model_id, "round": 1, "text": response})
298
+ logger.info(f" {model_id}: {len(response)} chars")
299
+
300
+ transcript["rounds"].append(round1)
301
+
302
+ # Subsequent rounds: Models see each other's responses
303
+ for round_num in range(2, max_rounds + 1):
304
+ logger.info(f"Deliberation Round {round_num} ({mode})")
305
+ round_data = {"round": round_num, "type": "deliberation", "responses": {}}
306
+ prev = transcript["rounds"][-1]["responses"]
307
+
308
+ for model_id in model_ids:
309
+ if mode == "dialogue":
310
+ # Dialogue: show the full conversation thread so far
311
+ thread_text = f"Topic: {question}\n\nConversation so far:\n"
312
+ for entry in transcript["thread"]:
313
+ name = enabled_models.get(entry["model"], {}).get("name", entry["model"])
314
+ thread_text += f"\n[{name}]: {entry['text']}\n"
315
+ thread_text += (
316
+ f"\nYour turn ({enabled_models[model_id]['name']}). "
317
+ f"Respond in 2-4 sentences to the conversation above. "
318
+ f"If you agree with the emerging consensus, say VERDICT: AGREE. "
319
+ f"If not, push back specifically."
320
+ )
321
+ cross_prompt = thread_text
322
+ else:
323
+ # Debate: show other models' full responses from last round
324
+ others_text = ""
325
+ for other_id in model_ids:
326
+ if other_id != model_id:
327
+ others_text += (
328
+ f"\n=== {enabled_models[other_id]['name'].upper()}'S EXACT RESPONSE "
329
+ f"(Round {round_num - 1}) ===\n"
330
+ f"{prev[other_id]}\n"
331
+ )
332
+ cross_prompt = (
333
+ f"DELIBERATION ROUND {round_num}\n\n"
334
+ f"Original question: {question}\n"
335
+ f"{others_text}\n"
336
+ f"Respond to the other models' SPECIFIC arguments. "
337
+ f"Quote them directly if you disagree. "
338
+ f"End with VERDICT: AGREE / DISAGREE / AGREE WITH MODIFICATIONS."
339
+ )
340
+
341
+ response = _call_model(model_id, enabled_models[model_id], cross_prompt, system_prompt)
342
+ round_data["responses"][model_id] = response
343
+ transcript["thread"].append({"model": model_id, "round": round_num, "text": response})
344
+
345
+ transcript["rounds"].append(round_data)
346
+
347
+ # Check for unanimous agreement
348
+ all_agree = True
349
+ for model_id in model_ids:
350
+ resp = round_data["responses"][model_id].upper()
351
+ if "VERDICT:" in resp:
352
+ verdict_part = resp.split("VERDICT:")[-1].strip()
353
+ agrees = verdict_part.startswith("AGREE")
354
+ if not agrees:
355
+ all_agree = False
356
+ else:
357
+ all_agree = False # No verdict = no agreement
358
+
359
+ if all_agree:
360
+ transcript["unanimous"] = True
361
+ transcript["final_verdict"] = "UNANIMOUS AGREEMENT"
362
+ transcript["agreed_at_round"] = round_num
363
+ break
364
+ else:
365
+ # Max rounds reached
366
+ transcript["final_verdict"] = "MAX ROUNDS REACHED"
367
+ for model_id in model_ids:
368
+ resp = transcript["rounds"][-1]["responses"][model_id].upper()
369
+ verdict = "unknown"
370
+ if "VERDICT:" in resp:
371
+ verdict_part = resp.split("VERDICT:")[-1].strip()
372
+ verdict = "agree" if verdict_part.startswith("AGREE") else "disagree"
373
+ transcript[f"{model_id}_final"] = verdict
374
+
375
+ transcript["completed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
376
+
377
+ # Save transcript
378
+ save_to = save_path
379
+ if not save_to:
380
+ ts = time.strftime("%Y%m%d_%H%M%S")
381
+ save_to = str(DELIBERATION_DIR / f"deliberation_{ts}.json")
382
+
383
+ Path(save_to).parent.mkdir(parents=True, exist_ok=True)
384
+ Path(save_to).write_text(json.dumps(transcript, indent=2))
385
+ transcript["saved_to"] = save_to
386
+
387
+ return transcript