delimit-cli 3.14.45 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # `</>` Delimit
2
2
 
3
- API governance that enforces itself. One workspace for Claude Code, Codex, Cursor, and Gemini CLI.
3
+ Stop re-explaining your codebase every session. Memory, tasks, and governance that persist across Claude Code, Codex, Cursor, and Gemini CLI.
4
4
 
5
5
  [![npm](https://img.shields.io/npm/v/delimit-cli)](https://www.npmjs.com/package/delimit-cli)
6
6
  [![GitHub Action](https://img.shields.io/badge/GitHub%20Action-v1.8.0-blue)](https://github.com/marketplace/actions/delimit-api-governance)
@@ -37,7 +37,7 @@ INBOX_ROUTING_FILE = Path.home() / ".delimit" / "inbox_routing.jsonl"
37
37
  IMAP_HOST = "mail.spacemail.com"
38
38
  IMAP_PORT = 993
39
39
  IMAP_USER = "pro@delimit.ai"
40
- FORWARD_TO = "configured-email@example.com"
40
+ FORWARD_TO = os.environ.get("DELIMIT_FORWARD_TO", "")
41
41
 
42
42
  # Domains/senders whose emails require owner action
43
43
  OWNER_ACTION_DOMAINS = {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "delimit-cli",
3
3
  "mcpName": "io.github.delimit-ai/delimit-mcp-server",
4
- "version": "3.14.45",
4
+ "version": "3.15.0",
5
5
  "description": "Unify Claude Code, Codex, Cursor, and Gemini CLI with persistent context, governance, and multi-model debate.",
6
6
  "main": "index.js",
7
7
  "files": [
@@ -13,6 +13,7 @@
13
13
  "!gateway/ai/social.py",
14
14
  "!gateway/ai/founding_users.py",
15
15
  "!gateway/ai/inbox_daemon.py",
16
+ "!gateway/ai/deliberation.py",
16
17
  "scripts/",
17
18
  "server.json",
18
19
  "README.md",
@@ -44,7 +44,7 @@ fi
44
44
 
45
45
  # 4. Proprietary files that shouldn't ship
46
46
  echo -n " Proprietary files... "
47
- PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py"
47
+ PROPRIETARY="social_target\.py|social\.py|founding_users\.py|inbox_daemon\.py|deliberation\.py"
48
48
  if find "$TMPDIR/package/" -name "*.py" | grep -Ei "$PROPRIETARY" 2>/dev/null; then
49
49
  echo "❌ PROPRIETARY FILES IN PACKAGE"
50
50
  FAIL=1
@@ -1,1033 +0,0 @@
1
- """
2
- Delimit Deliberation Engine — Multi-round consensus with real model-to-model debate.
3
-
4
- Passes each model's EXACT raw response to the other models for counter-arguments.
5
- Rounds continue until unanimous agreement or max rounds reached.
6
-
7
- Models are configured via ~/.delimit/models.json — users choose which AI models
8
- to include in deliberations. Supports any OpenAI-compatible API.
9
-
10
- ## Hosted (Free Tier) vs BYOK
11
- ##
12
- ## Users without their own API keys get up to 3 free deliberations using Delimit's
13
- ## hosted keys (STR-066). After that, they must configure their own keys (BYOK) for
14
- ## unlimited use. Hosted calls are content-moderated and rate-limited.
15
-
16
- ## Design: Tool-Augmented Deliberation (LED-069, in_progress)
17
- ##
18
- ## Goal: Allow models to call Delimit tools during debate rounds so deliberations
19
- ## are grounded in real data instead of guesses.
20
- ##
21
- ## Approach:
22
- ## 1. Before each deliberation round, run a "context gather" pass that executes
23
- ## relevant Delimit tools (ledger status, governance health, repo analysis)
24
- ## and includes the results in the prompt context.
25
- ## 2. Models receive a "Tool Results" section in their prompt showing real data:
26
- ## - Ledger: "70 open items, 12 blocked, 3 overdue"
27
- ## - Governance: "2 repos failing, policy score 87/100"
28
- ## - Spec diff: "5 breaking changes in v2.3.0"
29
- ## 3. This makes statements like "I checked the ledger and there are 70 open items"
30
- ## factual rather than hallucinated.
31
- ## 4. Implementation phases:
32
- ## a. Static context injection: pre-gather tool results, include in all prompts
33
- ## b. On-demand tool calls: models request specific tool calls via structured
34
- ## output, engine executes them between rounds
35
- ## c. Full function-calling: models use native tool_use/function_calling APIs
36
- ## where supported (OpenAI, Anthropic, Gemini)
37
- ## 5. Security: only whitelisted read-only tools are available during deliberation.
38
- ## No write operations (ledger_add, deploy, etc.) to prevent side effects.
39
- """
40
-
41
- import json
42
- import logging
43
- import os
44
- import re
45
- import shutil
46
- import time
47
- import urllib.request
48
- import urllib.error
49
- import uuid
50
- from datetime import datetime, timezone
51
- from pathlib import Path
52
- from typing import Any, Dict, List, Optional, Tuple
53
-
54
- logger = logging.getLogger("delimit.deliberation")
55
-
56
- DELIBERATION_DIR = Path.home() / ".delimit" / "deliberations"
57
- MODELS_CONFIG = Path.home() / ".delimit" / "models.json"
58
- HOSTED_MODELS_CONFIG = Path.home() / ".delimit" / "secrets" / "hosted-models.json"
59
- HOSTED_USAGE_FILE = Path.home() / ".delimit" / "deliberation_usage.json"
60
- HOSTED_DAILY_FILE = Path.home() / ".delimit" / "hosted_usage_daily.json"
61
-
62
- HOSTED_MAX_PER_INSTALL = 3
63
- HOSTED_DAILY_CAP_DEFAULT = 100
64
-
65
- # --- Content moderation keyword lists for hosted tier ---
66
- # These are checked case-insensitively against question + context text.
67
- # Intentionally broad — false positives just mean "use your own keys".
68
- _MODERATION_EXPLICIT = [
69
- "porn", "hentai", "nsfw", "xxx", "nude", "naked", "sex act",
70
- "erotic", "fetish", "orgasm", "genital", "masturbat",
71
- ]
72
- _MODERATION_VIOLENCE = [
73
- "how to kill", "how to murder", "make a bomb", "build a weapon",
74
- "synthesize poison", "manufacture explosive", "how to harm",
75
- "torture method", "assassination", "mass shooting",
76
- ]
77
- _MODERATION_ILLEGAL = [
78
- "how to hack into", "steal credit card", "forge identity",
79
- "launder money", "cook meth", "make drugs", "child exploit",
80
- "bypass security", "crack password", "phishing attack",
81
- "ddos attack", "ransomware",
82
- ]
83
- _MODERATION_TOS = [
84
- "ignore previous instructions", "ignore your system prompt",
85
- "you are now", "jailbreak", "dan mode", "pretend you have no rules",
86
- "act as an unrestricted", "bypass your filters",
87
- "override safety", "disregard all instructions",
88
- ]
89
-
90
-
91
- def _get_install_id() -> str:
92
- """Get or create a stable install ID for usage tracking."""
93
- id_file = Path.home() / ".delimit" / "install_id"
94
- if id_file.exists():
95
- try:
96
- return id_file.read_text().strip()
97
- except Exception:
98
- pass
99
- install_id = str(uuid.uuid4())
100
- id_file.parent.mkdir(parents=True, exist_ok=True)
101
- try:
102
- id_file.write_text(install_id)
103
- except Exception:
104
- pass
105
- return install_id
106
-
107
-
108
- def _load_hosted_models() -> Dict[str, Any]:
109
- """Load hosted (Delimit-subsidized) model keys from secrets.
110
-
111
- Supports API-key-based providers (xAI, OpenAI, Google Generative AI)
112
- and Vertex AI (using service account credentials on the host).
113
- """
114
- if not HOSTED_MODELS_CONFIG.exists():
115
- return {}
116
- try:
117
- raw = json.loads(HOSTED_MODELS_CONFIG.read_text())
118
- # Convert hosted config to model configs compatible with _call_model
119
- result = {}
120
- for provider, cfg in raw.items():
121
- api_key = cfg.get("api_key", "")
122
- model = cfg.get("model", "")
123
- if provider == "xai":
124
- if not api_key:
125
- continue
126
- result["grok"] = {
127
- "name": "Grok (hosted)",
128
- "api_url": "https://api.x.ai/v1/chat/completions",
129
- "model": model or "grok-4-0709",
130
- "api_key": api_key,
131
- "enabled": True,
132
- "backend": "api",
133
- "hosted": True,
134
- }
135
- elif provider == "gemini":
136
- if api_key:
137
- # Use Google Generative AI (API key) format
138
- result["gemini"] = {
139
- "name": "Gemini (hosted)",
140
- "api_url": f"https://generativelanguage.googleapis.com/v1beta/models/{model or 'gemini-2.5-flash'}:generateContent",
141
- "model": model or "gemini-2.5-flash",
142
- "api_key": api_key,
143
- "enabled": True,
144
- "backend": "api",
145
- "format": "google",
146
- "hosted": True,
147
- }
148
- elif cfg.get("vertex_ai"):
149
- # Use Vertex AI with service account
150
- result["gemini"] = {
151
- "name": "Gemini (hosted)",
152
- "api_url": "https://us-central1-aiplatform.googleapis.com/v1/projects/{project}/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent",
153
- "model": model or "gemini-2.5-flash",
154
- "enabled": True,
155
- "backend": "api",
156
- "format": "vertex_ai",
157
- "hosted": True,
158
- }
159
- elif provider == "codex":
160
- if not api_key:
161
- continue
162
- result["openai"] = {
163
- "name": "Codex (hosted)",
164
- "api_url": "https://api.openai.com/v1/chat/completions",
165
- "model": model or "codex-mini-latest",
166
- "api_key": api_key,
167
- "enabled": True,
168
- "backend": "api",
169
- "hosted": True,
170
- }
171
- return result
172
- except Exception as e:
173
- logger.warning("Failed to load hosted models: %s", e)
174
- return {}
175
-
176
-
177
- def _check_hosted_quota(install_id: str) -> Tuple[bool, int]:
178
- """Check if install is under the per-install hosted quota.
179
-
180
- Returns (allowed, used_count).
181
- """
182
- if not HOSTED_USAGE_FILE.exists():
183
- return True, 0
184
- try:
185
- data = json.loads(HOSTED_USAGE_FILE.read_text())
186
- except Exception:
187
- return True, 0
188
- entry = data.get(install_id, {})
189
- used = entry.get("hosted_count", 0)
190
- return used < HOSTED_MAX_PER_INSTALL, used
191
-
192
-
193
- def _increment_hosted_usage(install_id: str) -> Dict[str, Any]:
194
- """Increment hosted usage for an install. Returns updated entry."""
195
- HOSTED_USAGE_FILE.parent.mkdir(parents=True, exist_ok=True)
196
- try:
197
- data = json.loads(HOSTED_USAGE_FILE.read_text()) if HOSTED_USAGE_FILE.exists() else {}
198
- except Exception:
199
- data = {}
200
-
201
- entry = data.get(install_id, {
202
- "install_id": install_id,
203
- "hosted_count": 0,
204
- "total_count": 0,
205
- "last_used": None,
206
- })
207
- entry["hosted_count"] = entry.get("hosted_count", 0) + 1
208
- entry["total_count"] = entry.get("total_count", 0) + 1
209
- entry["last_used"] = datetime.now(timezone.utc).isoformat()
210
- data[install_id] = entry
211
-
212
- try:
213
- HOSTED_USAGE_FILE.write_text(json.dumps(data, indent=2))
214
- except Exception as e:
215
- logger.warning("Failed to write hosted usage: %s", e)
216
- return entry
217
-
218
-
219
- def _increment_total_usage(install_id: str) -> None:
220
- """Increment only total_count (for BYOK calls) without touching hosted_count."""
221
- HOSTED_USAGE_FILE.parent.mkdir(parents=True, exist_ok=True)
222
- try:
223
- data = json.loads(HOSTED_USAGE_FILE.read_text()) if HOSTED_USAGE_FILE.exists() else {}
224
- except Exception:
225
- data = {}
226
-
227
- entry = data.get(install_id, {
228
- "install_id": install_id,
229
- "hosted_count": 0,
230
- "total_count": 0,
231
- "last_used": None,
232
- })
233
- entry["total_count"] = entry.get("total_count", 0) + 1
234
- entry["last_used"] = datetime.now(timezone.utc).isoformat()
235
- data[install_id] = entry
236
-
237
- try:
238
- HOSTED_USAGE_FILE.write_text(json.dumps(data, indent=2))
239
- except Exception as e:
240
- logger.warning("Failed to write usage: %s", e)
241
-
242
-
243
- def _check_global_daily_cap() -> Tuple[bool, int]:
244
- """Check if global daily hosted cap has been reached.
245
-
246
- Returns (allowed, used_today).
247
- """
248
- cap = int(os.environ.get("DELIMIT_HOSTED_DAILY_CAP", str(HOSTED_DAILY_CAP_DEFAULT)))
249
-
250
- if not HOSTED_DAILY_FILE.exists():
251
- return 0 < cap, 0
252
- try:
253
- data = json.loads(HOSTED_DAILY_FILE.read_text())
254
- except Exception:
255
- return 0 < cap, 0
256
-
257
- today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
258
- if data.get("date") != today:
259
- return 0 < cap, 0
260
-
261
- used = data.get("count", 0)
262
- return used < cap, used
263
-
264
-
265
- def _increment_global_daily() -> None:
266
- """Increment global daily hosted usage counter."""
267
- HOSTED_DAILY_FILE.parent.mkdir(parents=True, exist_ok=True)
268
- today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
269
-
270
- try:
271
- data = json.loads(HOSTED_DAILY_FILE.read_text()) if HOSTED_DAILY_FILE.exists() else {}
272
- except Exception:
273
- data = {}
274
-
275
- if data.get("date") != today:
276
- data = {"date": today, "count": 0}
277
-
278
- data["count"] = data.get("count", 0) + 1
279
-
280
- try:
281
- HOSTED_DAILY_FILE.write_text(json.dumps(data, indent=2))
282
- except Exception as e:
283
- logger.warning("Failed to write daily cap: %s", e)
284
-
285
-
286
- def _moderate_content(question: str, context: str = "") -> Optional[str]:
287
- """Keyword-based content moderation for hosted deliberations.
288
-
289
- Returns None if clean, or an error message string if flagged.
290
- """
291
- text = f"{question} {context}".lower()
292
-
293
- for term in _MODERATION_EXPLICIT:
294
- if term in text:
295
- return (
296
- "This prompt was flagged by content moderation. "
297
- "Hosted deliberations are for technical questions only. "
298
- "Configure your own API keys for unrestricted use."
299
- )
300
-
301
- for term in _MODERATION_VIOLENCE:
302
- if term in text:
303
- return (
304
- "This prompt was flagged by content moderation. "
305
- "Hosted deliberations are for technical questions only. "
306
- "Configure your own API keys for unrestricted use."
307
- )
308
-
309
- for term in _MODERATION_ILLEGAL:
310
- if term in text:
311
- return (
312
- "This prompt was flagged by content moderation. "
313
- "Hosted deliberations are for technical questions only. "
314
- "Configure your own API keys for unrestricted use."
315
- )
316
-
317
- for term in _MODERATION_TOS:
318
- if term in text:
319
- return (
320
- "This prompt was flagged by content moderation. "
321
- "Hosted deliberations are for technical questions only. "
322
- "Configure your own API keys for unrestricted use."
323
- )
324
-
325
- return None
326
-
327
-
328
- def get_deliberation_status() -> Dict[str, Any]:
329
- """Return current deliberation usage and mode info."""
330
- install_id = _get_install_id()
331
-
332
- # Check if user has their own keys
333
- user_config = get_models_config()
334
- user_enabled = {k: v for k, v in user_config.items() if v.get("enabled") and not v.get("hosted")}
335
- has_byok = len(user_enabled) >= 2
336
-
337
- # Check hosted availability
338
- hosted_models = _load_hosted_models()
339
- has_hosted = len(hosted_models) >= 2
340
-
341
- # Usage stats
342
- allowed, hosted_used = _check_hosted_quota(install_id)
343
- hosted_remaining = max(0, HOSTED_MAX_PER_INSTALL - hosted_used)
344
-
345
- # Total usage
346
- try:
347
- data = json.loads(HOSTED_USAGE_FILE.read_text()) if HOSTED_USAGE_FILE.exists() else {}
348
- entry = data.get(install_id, {})
349
- total_count = entry.get("total_count", 0)
350
- except Exception:
351
- total_count = 0
352
-
353
- mode = "byok" if has_byok else ("hosted" if has_hosted else "none")
354
-
355
- result = {
356
- "mode": mode,
357
- "install_id": install_id,
358
- "hosted_used": hosted_used,
359
- "hosted_remaining": hosted_remaining,
360
- "hosted_limit": HOSTED_MAX_PER_INSTALL,
361
- "total_deliberations": total_count,
362
- }
363
-
364
- if mode == "byok":
365
- result["note"] = "Using your own API keys. Unlimited deliberations."
366
- result["byok_models"] = list(user_enabled.keys())
367
- elif mode == "hosted":
368
- if hosted_remaining > 0:
369
- result["note"] = f"{hosted_remaining} free deliberation(s) remaining. Configure ~/.delimit/models.json for unlimited."
370
- else:
371
- result["note"] = "Free deliberations used. Configure your own API keys in ~/.delimit/models.json for unlimited deliberations."
372
- else:
373
- result["note"] = "No models available. Configure API keys in ~/.delimit/models.json or hosted keys will be used when available."
374
-
375
- return result
376
-
377
-
378
- DEFAULT_MODELS = {
379
- "grok": {
380
- "name": "Grok",
381
- "api_url": "https://api.x.ai/v1/chat/completions",
382
- "model": "grok-4-0709",
383
- "env_key": "XAI_API_KEY",
384
- "enabled": False,
385
- },
386
- "gemini": {
387
- "name": "Gemini",
388
- "api_url": "https://us-central1-aiplatform.googleapis.com/v1/projects/{project}/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent",
389
- "model": "gemini-2.5-flash",
390
- "env_key": "GOOGLE_APPLICATION_CREDENTIALS",
391
- "enabled": False,
392
- "format": "vertex_ai",
393
- "prefer_cli": True, # Use gemini CLI if available (Ultra plan), fall back to Vertex AI
394
- "cli_command": "gemini",
395
- },
396
- "openai": {
397
- "name": "OpenAI",
398
- "api_url": "https://api.openai.com/v1/chat/completions",
399
- "model": "gpt-4o",
400
- "env_key": "OPENAI_API_KEY",
401
- "enabled": False,
402
- "prefer_cli": True, # Use Codex CLI if available, fall back to API
403
- },
404
- "anthropic": {
405
- "name": "Claude",
406
- "api_url": "https://api.anthropic.com/v1/messages",
407
- "model": "claude-sonnet-4-5-20250514",
408
- "env_key": "ANTHROPIC_API_KEY",
409
- "enabled": False,
410
- "format": "anthropic",
411
- "prefer_cli": True, # Use claude CLI if available (Pro/Max), fall back to API
412
- "cli_command": "claude",
413
- },
414
- }
415
-
416
-
417
- def get_models_config(allow_hosted_fallback: bool = True) -> Dict[str, Any]:
418
- """Load model configuration. Auto-detects available API keys.
419
-
420
- If the user has a models.json, it is always respected (explicit config).
421
- If no models.json exists and auto-detect finds < 2 enabled models,
422
- falls back to Delimit's hosted keys for the free tier (STR-066).
423
- """
424
- if MODELS_CONFIG.exists():
425
- try:
426
- return json.loads(MODELS_CONFIG.read_text())
427
- except Exception:
428
- pass
429
-
430
- # Auto-detect from environment
431
- config = {}
432
- for model_id, defaults in DEFAULT_MODELS.items():
433
- key = os.environ.get(defaults.get("env_key", ""), "")
434
-
435
- if defaults.get("prefer_cli"):
436
- # Prefer CLI (uses existing subscription) over API (extra cost)
437
- import shutil
438
- cli_cmd = defaults.get("cli_command", "codex")
439
- cli_path = shutil.which(cli_cmd)
440
- if cli_path:
441
- config[model_id] = {
442
- **defaults,
443
- "format": "codex_cli",
444
- "enabled": True,
445
- "codex_path": cli_path,
446
- "backend": "cli",
447
- }
448
- elif key:
449
- config[model_id] = {
450
- **defaults,
451
- "api_key": key,
452
- "enabled": True,
453
- "backend": "api",
454
- }
455
- else:
456
- config[model_id] = {**defaults, "enabled": False}
457
- else:
458
- config[model_id] = {
459
- **defaults,
460
- "api_key": key,
461
- "enabled": bool(key),
462
- }
463
-
464
- # Check if user has any models at all
465
- enabled_user = {k: v for k, v in config.items() if v.get("enabled")}
466
- if enabled_user:
467
- # User has at least one model configured — respect their setup.
468
- # If < 2, deliberate() will return a clear error.
469
- return config
470
-
471
- # No user models at all — fall back to hosted keys (free tier)
472
- if allow_hosted_fallback:
473
- hosted = _load_hosted_models()
474
- if len(hosted) >= 2:
475
- logger.info("No user API keys found. Using hosted (free tier) models.")
476
- return hosted
477
-
478
- return config
479
-
480
-
481
- def _gather_tool_context() -> str:
482
- """LED-069: Gather real data from Delimit tools to ground deliberation.
483
-
484
- Runs whitelisted read-only tools and formats results as context.
485
- Only includes data that's available — silently skips failures.
486
- """
487
- sections = []
488
-
489
- # 1. Ledger status
490
- try:
491
- from ai.ledger_manager import get_context
492
- ledger = get_context()
493
- open_count = ledger.get("open_items", 0)
494
- if open_count > 0:
495
- top = ledger.get("next_up", [])
496
- top_str = ", ".join(f"{i.get('id', '')} ({i.get('title', '')[:40]})" for i in top[:3])
497
- sections.append(f"**Ledger**: {open_count} open items. Top: {top_str}")
498
- else:
499
- sections.append("**Ledger**: No open items.")
500
- except Exception:
501
- pass
502
-
503
- # 2. Governance health
504
- try:
505
- from ai.governance import govern
506
- # Quick check — don't actually run full governance, just read state
507
- delimit_dir = Path(".") / ".delimit"
508
- if delimit_dir.is_dir():
509
- policies = (delimit_dir / "policies.yml").is_file()
510
- ledger_dir = (delimit_dir / "ledger").is_dir()
511
- sections.append(f"**Governance**: initialized={delimit_dir.is_dir()}, policies={policies}, ledger={ledger_dir}")
512
- except Exception:
513
- pass
514
-
515
- # 3. Model configuration
516
- try:
517
- config = get_models_config()
518
- enabled = [v.get("name", k) for k, v in config.items() if v.get("enabled")]
519
- sections.append(f"**Models**: {len(enabled)} enabled — {', '.join(enabled)}")
520
- except Exception:
521
- pass
522
-
523
- # 4. Git status (if in a repo)
524
- try:
525
- import subprocess
526
- r = subprocess.run(["git", "log", "--oneline", "-1"], capture_output=True, text=True, timeout=5)
527
- if r.returncode == 0:
528
- sections.append(f"**Git**: latest commit — {r.stdout.strip()}")
529
- r2 = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True, timeout=5)
530
- if r2.returncode == 0:
531
- changes = len([l for l in r2.stdout.strip().splitlines() if l.strip()])
532
- if changes:
533
- sections.append(f"**Working tree**: {changes} uncommitted change(s)")
534
- except Exception:
535
- pass
536
-
537
- return "\n".join(sections)
538
-
539
-
540
- def configure_models() -> Dict[str, Any]:
541
- """Return current model configuration and what's available."""
542
- config = get_models_config()
543
- available = {k: v for k, v in config.items() if v.get("enabled")}
544
- missing = {k: v for k, v in config.items() if not v.get("enabled")}
545
-
546
- model_details = {}
547
- for k, v in available.items():
548
- backend = v.get("backend", "api")
549
- if v.get("format") == "codex_cli":
550
- backend = "cli"
551
- model_details[k] = {"name": v.get("name", k), "backend": backend, "model": v.get("model", "")}
552
-
553
- return {
554
- "configured_models": list(available.keys()),
555
- "model_details": model_details,
556
- "missing_models": {k: f"Set {v.get('env_key', 'key')} or install {v.get('cli_command', '')} CLI" for k, v in missing.items()},
557
- "config_path": str(MODELS_CONFIG),
558
- "note": "CLI backends use your existing subscription (no extra API cost). "
559
- "API backends require separate API keys.",
560
- }
561
-
562
-
563
- def _call_cli(prompt: str, system_prompt: str = "", cli_path: str = "", cli_command: str = "codex") -> str:
564
- """Call an AI CLI tool (codex or claude) via subprocess. Uses existing subscription — no API cost."""
565
- import subprocess
566
-
567
- if not cli_path:
568
- cli_path = shutil.which(cli_command) or ""
569
- if not cli_path:
570
- return f"[{cli_command} unavailable — CLI not found in PATH]"
571
-
572
- full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
573
-
574
- # Build command based on which CLI
575
- if "claude" in cli_command:
576
- cmd = [cli_path, "--print", "--dangerously-skip-permissions", full_prompt]
577
- else:
578
- # codex
579
- cmd = [cli_path, "exec", "--dangerously-bypass-approvals-and-sandbox", full_prompt]
580
-
581
- try:
582
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
583
- output = result.stdout.strip()
584
- if not output and result.stderr:
585
- return f"[{cli_command} error: {result.stderr[:300]}]"
586
- return output or f"[{cli_command} returned empty response]"
587
- except subprocess.TimeoutExpired:
588
- return f"[{cli_command} timed out after 300s]"
589
- except Exception as e:
590
- return f"[{cli_command} error: {e}]"
591
-
592
-
593
- def _call_model(model_id: str, config: Dict, prompt: str, system_prompt: str = "") -> str:
594
- """Call any supported model — OpenAI-compatible API, Vertex AI, or CLI (codex/claude)."""
595
- fmt = config.get("format", "openai")
596
-
597
- # CLI-based models (codex, claude) — uses existing subscription, no API cost
598
- if fmt == "codex_cli":
599
- cli_path = config.get("codex_path", "")
600
- cli_command = config.get("cli_command", "codex")
601
- return _call_cli(prompt, system_prompt, cli_path=cli_path, cli_command=cli_command)
602
-
603
- api_key = config.get("api_key") or os.environ.get(config.get("env_key", ""), "")
604
- # Vertex AI uses service account auth, not API key
605
- if not api_key and fmt != "vertex_ai":
606
- return f"[{config.get('name', model_id)} unavailable — {config.get('env_key')} not set]"
607
-
608
- api_url = config["api_url"]
609
- model = config.get("model", "")
610
-
611
- try:
612
- if fmt == "vertex_ai":
613
- # Vertex AI format — use google-auth for access token
614
- try:
615
- import google.auth
616
- import google.auth.transport.requests
617
- # Prefer application default credentials (gcloud auth login)
618
- # over service accounts — ADC works with Vertex AI out of the box.
619
- adc_path = str(Path.home() / ".config/gcloud/application_default_credentials.json")
620
- sa_path = str(Path.home() / ".delimit" / "secrets" / "gcp-delimit-sa.json")
621
- for candidate in [adc_path, sa_path]:
622
- if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and os.path.exists(candidate):
623
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = candidate
624
- break
625
- # Explicit scopes needed for service accounts; ADC also accepts them
626
- VERTEX_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
627
- creds, detected_project = google.auth.default(scopes=VERTEX_SCOPES)
628
- creds.refresh(google.auth.transport.requests.Request())
629
- # Use GOOGLE_CLOUD_PROJECT if set, then detected project
630
- project = os.environ.get("GOOGLE_CLOUD_PROJECT", "") or detected_project or ""
631
- # If project still empty, try to read from ADC file
632
- if not project:
633
- try:
634
- adc_data = json.loads(Path(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", adc_path)).read_text())
635
- project = adc_data.get("quota_project_id", "") or adc_data.get("project_id", "")
636
- except Exception:
637
- pass
638
- if not project:
639
- return f"[Gemini unavailable — set GOOGLE_CLOUD_PROJECT env var]"
640
- actual_url = api_url.replace("{project}", project)
641
- data = json.dumps({
642
- "contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
643
- "generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
644
- }).encode()
645
- req = urllib.request.Request(
646
- actual_url,
647
- data=data,
648
- headers={
649
- "Authorization": f"Bearer {creds.token}",
650
- "Content-Type": "application/json",
651
- },
652
- method="POST",
653
- )
654
- except ImportError:
655
- return f"[Gemini unavailable — install google-auth: pip install google-auth]"
656
- elif fmt == "google":
657
- # Google Generative AI format (API key)
658
- data = json.dumps({
659
- "contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
660
- "generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
661
- }).encode()
662
- req = urllib.request.Request(
663
- f"{api_url}?key={api_key}",
664
- data=data,
665
- headers={"Content-Type": "application/json"},
666
- method="POST",
667
- )
668
- elif fmt == "anthropic":
669
- # Anthropic Messages API
670
- data = json.dumps({
671
- "model": model,
672
- "max_tokens": 4096,
673
- "system": system_prompt or "You are a helpful assistant participating in a multi-model deliberation.",
674
- "messages": [{"role": "user", "content": prompt}],
675
- }).encode()
676
- req = urllib.request.Request(
677
- api_url,
678
- data=data,
679
- headers={
680
- "x-api-key": api_key,
681
- "anthropic-version": "2023-06-01",
682
- "Content-Type": "application/json",
683
- "User-Agent": "Delimit/3.6.0",
684
- },
685
- method="POST",
686
- )
687
- else:
688
- # OpenAI-compatible format (works for xAI, OpenAI, etc.)
689
- messages = []
690
- if system_prompt:
691
- messages.append({"role": "system", "content": system_prompt})
692
- messages.append({"role": "user", "content": prompt})
693
-
694
- data = json.dumps({
695
- "model": model,
696
- "messages": messages,
697
- "temperature": 0.7,
698
- "max_tokens": 4096,
699
- }).encode()
700
- req = urllib.request.Request(
701
- api_url,
702
- data=data,
703
- headers={
704
- "Authorization": f"Bearer {api_key}",
705
- "Content-Type": "application/json",
706
- "User-Agent": "Delimit/3.6.0",
707
- },
708
- method="POST",
709
- )
710
-
711
- with urllib.request.urlopen(req, timeout=45) as resp:
712
- result = json.loads(resp.read())
713
-
714
- if fmt in ("google", "vertex_ai"):
715
- return result["candidates"][0]["content"]["parts"][0]["text"]
716
- elif fmt == "anthropic":
717
- return result["content"][0]["text"]
718
- else:
719
- return result["choices"][0]["message"]["content"]
720
-
721
- except Exception as e:
722
- return f"[{config.get('name', model_id)} error: {e}]"
723
-
724
-
725
- def deliberate(
726
- question: str,
727
- context: str = "",
728
- max_rounds: int = 3,
729
- mode: str = "dialogue",
730
- require_unanimous: bool = True,
731
- save_path: Optional[str] = None,
732
- ) -> Dict[str, Any]:
733
- """
734
- Run a multi-round deliberation across all configured AI models.
735
-
736
- Modes:
737
- - "debate": Long-form essays, models respond to each other's full arguments (3 rounds default)
738
- - "dialogue": Short conversational turns, models build on each other like a group chat (6 rounds default)
739
-
740
- Returns the full deliberation transcript + final verdict.
741
- """
742
- DELIBERATION_DIR.mkdir(parents=True, exist_ok=True)
743
-
744
- config = get_models_config()
745
- enabled_models = {k: v for k, v in config.items() if v.get("enabled")}
746
-
747
- if len(enabled_models) < 2:
748
- return {
749
- "error": "Need at least 2 AI models for deliberation.",
750
- "configured": list(enabled_models.keys()),
751
- "missing": {k: f"Set {v.get('env_key', 'key')}" for k, v in config.items() if not v.get("enabled")},
752
- "tip": "Set API key environment variables or create ~/.delimit/models.json. "
753
- "Or use the free tier (up to 3 deliberations) with hosted keys.",
754
- }
755
-
756
- # Determine if this is a hosted (free tier) deliberation
757
- is_hosted = any(v.get("hosted") for v in enabled_models.values())
758
- install_id = _get_install_id()
759
-
760
- if is_hosted:
761
- # Content moderation for hosted calls
762
- mod_result = _moderate_content(question, context)
763
- if mod_result:
764
- return {"error": mod_result, "mode": "hosted", "moderation": "flagged"}
765
-
766
- # Check per-install quota
767
- allowed, used = _check_hosted_quota(install_id)
768
- if not allowed:
769
- return {
770
- "error": "Free deliberations used. Configure your own API keys in "
771
- "~/.delimit/models.json for unlimited deliberations.",
772
- "mode": "hosted",
773
- "hosted_used": used,
774
- "hosted_limit": HOSTED_MAX_PER_INSTALL,
775
- }
776
-
777
- # Check global daily cap
778
- daily_allowed, daily_used = _check_global_daily_cap()
779
- if not daily_allowed:
780
- return {
781
- "error": "Daily hosted deliberation limit reached. Configure your own "
782
- "API keys for unlimited use.",
783
- "mode": "hosted",
784
- "daily_used": daily_used,
785
- }
786
-
787
- model_ids = list(enabled_models.keys())
788
-
789
- # Dialogue mode uses more rounds with shorter responses
790
- # Capped at 4 to stay within MCP timeout (LED-167)
791
- if mode == "dialogue" and max_rounds == 3:
792
- max_rounds = 4
793
-
794
- # LED-106: Estimate cost before deliberation starts
795
- # Rough token estimates per model call (prompt + completion)
796
- COST_PER_1K_TOKENS = {
797
- "grok": 0.005, # xAI Grok
798
- "gemini": 0.00, # Vertex AI (free tier / included in GCP)
799
- "openai": 0.005, # GPT-4o
800
- "anthropic": 0.003, # Claude Sonnet
801
- "codex": 0.00, # CLI-based, uses subscription
802
- }
803
- AVG_TOKENS_PER_CALL = {"debate": 2000, "dialogue": 800}
804
- est_tokens_per_call = AVG_TOKENS_PER_CALL.get(mode, 1500)
805
- est_total_calls = len(model_ids) * max_rounds
806
- est_total_tokens = est_tokens_per_call * est_total_calls
807
-
808
- cost_estimate = {}
809
- total_est_cost = 0.0
810
- for mid in model_ids:
811
- backend = enabled_models[mid].get("backend", "api")
812
- if backend == "cli":
813
- cost_estimate[mid] = {"backend": "cli", "cost": 0.0, "note": "Uses existing subscription"}
814
- else:
815
- rate = COST_PER_1K_TOKENS.get(mid, 0.005)
816
- model_cost = (est_tokens_per_call * max_rounds * rate) / 1000
817
- cost_estimate[mid] = {"backend": "api", "cost_usd": round(model_cost, 4), "rate_per_1k": rate}
818
- total_est_cost += model_cost
819
-
820
- start_time = time.time()
821
-
822
- # LED-069: Tool-augmented deliberation — gather real data before rounds
823
- tool_context = _gather_tool_context()
824
- if tool_context:
825
- context = f"{context}\n\n## Live Tool Results (gathered automatically)\n{tool_context}" if context else f"## Live Tool Results (gathered automatically)\n{tool_context}"
826
-
827
- transcript = {
828
- "question": question,
829
- "context": context,
830
- "mode": mode,
831
- "models": model_ids,
832
- "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
833
- "rounds": [],
834
- "thread": [], # flat conversation thread for dialogue mode
835
- "unanimous": False,
836
- "final_verdict": None,
837
- "cost_estimate": {
838
- "models": cost_estimate,
839
- "total_estimated_usd": round(total_est_cost, 4),
840
- "estimated_calls": est_total_calls,
841
- "estimated_tokens": est_total_tokens,
842
- },
843
- "timing": {}, # populated during execution
844
- }
845
-
846
- if mode == "dialogue":
847
- system_prompt = (
848
- "You are in a group chat with other AI models. Keep responses to 2-4 sentences. "
849
- "Be direct and conversational — this is a discussion, not an essay. "
850
- "Build on what others said. Disagree specifically if you disagree. "
851
- "When you're ready to agree, say VERDICT: AGREE. "
852
- "If you disagree, say VERDICT: DISAGREE — [why in one sentence]."
853
- )
854
- else:
855
- system_prompt = (
856
- "You are participating in a structured multi-model deliberation with other AI models. "
857
- "You will see other models' exact responses and must engage with their specific arguments. "
858
- "At the END of your response, you MUST include exactly one of these lines:\n"
859
- "VERDICT: AGREE\n"
860
- "VERDICT: DISAGREE — [one sentence reason]\n"
861
- "VERDICT: AGREE WITH MODIFICATIONS — [one sentence modification]\n"
862
- "Do not hedge. Take a clear position."
863
- )
864
-
865
- full_prompt = f"{context}\n\nQUESTION:\n{question}" if context else question
866
-
867
- # Round 1: Independent responses — run ALL models in parallel (LED-167)
868
- logger.info(f"Deliberation Round 1 ({mode} mode): Independent responses (parallel)")
869
- round1 = {"round": 1, "type": "independent", "responses": {}}
870
-
871
- if mode == "dialogue":
872
- r1_prompt = f"{full_prompt}\n\nGive your initial take in 2-4 sentences. Don't write an essay."
873
- else:
874
- r1_prompt = full_prompt
875
-
876
- from concurrent.futures import ThreadPoolExecutor, as_completed
877
-
878
- def _call_r1(mid):
879
- t0 = time.time()
880
- resp = _call_model(mid, enabled_models[mid], r1_prompt, system_prompt)
881
- ms = int((time.time() - t0) * 1000)
882
- return mid, resp, ms
883
-
884
- with ThreadPoolExecutor(max_workers=len(model_ids)) as pool:
885
- futures = {pool.submit(_call_r1, mid): mid for mid in model_ids}
886
- for future in as_completed(futures):
887
- mid, response, call_ms = future.result()
888
- round1["responses"][mid] = response
889
- transcript["timing"].setdefault(mid, []).append({"round": 1, "ms": call_ms, "chars": len(response)})
890
- logger.info(f" {mid}: {len(response)} chars, {call_ms}ms")
891
-
892
- # Build thread in consistent model order
893
- for mid in model_ids:
894
- transcript["thread"].append({"model": mid, "round": 1, "text": round1["responses"][mid]})
895
-
896
- transcript["rounds"].append(round1)
897
-
898
- # Subsequent rounds: Models see each other's responses
899
- for round_num in range(2, max_rounds + 1):
900
- logger.info(f"Deliberation Round {round_num} ({mode})")
901
- round_data = {"round": round_num, "type": "deliberation", "responses": {}}
902
- prev = transcript["rounds"][-1]["responses"]
903
-
904
- for model_id in model_ids:
905
- if mode == "dialogue":
906
- # Dialogue: show the full conversation thread so far
907
- thread_text = f"Topic: {question}\n\nConversation so far:\n"
908
- for entry in transcript["thread"]:
909
- name = enabled_models.get(entry["model"], {}).get("name", entry["model"])
910
- thread_text += f"\n[{name}]: {entry['text']}\n"
911
- thread_text += (
912
- f"\nYour turn ({enabled_models[model_id]['name']}). "
913
- f"Respond in 2-4 sentences to the conversation above. "
914
- f"If you agree with the emerging consensus, say VERDICT: AGREE. "
915
- f"If not, push back specifically."
916
- )
917
- cross_prompt = thread_text
918
- else:
919
- # Debate: show other models' full responses from last round
920
- others_text = ""
921
- for other_id in model_ids:
922
- if other_id != model_id:
923
- others_text += (
924
- f"\n=== {enabled_models[other_id]['name'].upper()}'S EXACT RESPONSE "
925
- f"(Round {round_num - 1}) ===\n"
926
- f"{prev[other_id]}\n"
927
- )
928
- cross_prompt = (
929
- f"DELIBERATION ROUND {round_num}\n\n"
930
- f"Original question: {question}\n"
931
- f"{others_text}\n"
932
- f"Respond to the other models' SPECIFIC arguments. "
933
- f"Quote them directly if you disagree. "
934
- f"End with VERDICT: AGREE / DISAGREE / AGREE WITH MODIFICATIONS."
935
- )
936
-
937
- call_start = time.time()
938
- response = _call_model(model_id, enabled_models[model_id], cross_prompt, system_prompt)
939
- call_ms = int((time.time() - call_start) * 1000)
940
- round_data["responses"][model_id] = response
941
- transcript["thread"].append({"model": model_id, "round": round_num, "text": response})
942
- transcript["timing"].setdefault(model_id, []).append({"round": round_num, "ms": call_ms, "chars": len(response)})
943
-
944
- transcript["rounds"].append(round_data)
945
-
946
- # Check for unanimous agreement
947
- all_agree = True
948
- for model_id in model_ids:
949
- resp = round_data["responses"][model_id].upper()
950
- if "VERDICT:" in resp:
951
- verdict_part = resp.split("VERDICT:")[-1].strip()
952
- agrees = verdict_part.startswith("AGREE")
953
- if not agrees:
954
- all_agree = False
955
- else:
956
- all_agree = False # No verdict = no agreement
957
-
958
- if all_agree:
959
- transcript["unanimous"] = True
960
- transcript["final_verdict"] = "UNANIMOUS AGREEMENT"
961
- transcript["agreed_at_round"] = round_num
962
- break
963
- else:
964
- # Max rounds reached
965
- transcript["final_verdict"] = "MAX ROUNDS REACHED"
966
- for model_id in model_ids:
967
- resp = transcript["rounds"][-1]["responses"][model_id].upper()
968
- verdict = "unknown"
969
- if "VERDICT:" in resp:
970
- verdict_part = resp.split("VERDICT:")[-1].strip()
971
- verdict = "agree" if verdict_part.startswith("AGREE") else "disagree"
972
- transcript[f"{model_id}_final"] = verdict
973
-
974
- transcript["completed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
975
- total_elapsed_ms = int((time.time() - start_time) * 1000)
976
-
977
- # LED-106: Compute actual cost and timing summary
978
- actual_calls = sum(len(calls) for calls in transcript["timing"].values())
979
- actual_chars = sum(c["chars"] for calls in transcript["timing"].values() for c in calls)
980
- model_summaries = {}
981
- for mid, calls in transcript["timing"].items():
982
- total_ms = sum(c["ms"] for c in calls)
983
- total_chars = sum(c["chars"] for c in calls)
984
- est_tokens = total_chars // 4 # rough char-to-token ratio
985
- rate = COST_PER_1K_TOKENS.get(mid, 0.005)
986
- backend = enabled_models.get(mid, {}).get("backend", "api")
987
- actual_cost = 0.0 if backend == "cli" else (est_tokens * rate) / 1000
988
- model_summaries[mid] = {
989
- "calls": len(calls),
990
- "total_ms": total_ms,
991
- "avg_ms": total_ms // max(len(calls), 1),
992
- "total_chars": total_chars,
993
- "est_tokens": est_tokens,
994
- "actual_cost_usd": round(actual_cost, 4),
995
- }
996
-
997
- total_actual_cost = sum(s["actual_cost_usd"] for s in model_summaries.values())
998
- transcript["cost_actual"] = {
999
- "models": model_summaries,
1000
- "total_actual_usd": round(total_actual_cost, 4),
1001
- "total_calls": actual_calls,
1002
- "total_chars": actual_chars,
1003
- "total_elapsed_ms": total_elapsed_ms,
1004
- }
1005
-
1006
- # Track usage
1007
- transcript["mode"] = "hosted" if is_hosted else "byok"
1008
- if is_hosted:
1009
- _increment_hosted_usage(install_id)
1010
- _increment_global_daily()
1011
- remaining = max(0, HOSTED_MAX_PER_INSTALL - (used + 1))
1012
- transcript["hosted_remaining"] = remaining
1013
- if remaining == 0:
1014
- transcript["hosted_note"] = (
1015
- "This was your last free deliberation. Configure your own API keys "
1016
- "in ~/.delimit/models.json for unlimited deliberations."
1017
- )
1018
- else:
1019
- transcript["hosted_note"] = f"{remaining} free deliberation(s) remaining."
1020
- else:
1021
- _increment_total_usage(install_id)
1022
-
1023
- # Save transcript
1024
- save_to = save_path
1025
- if not save_to:
1026
- ts = time.strftime("%Y%m%d_%H%M%S")
1027
- save_to = str(DELIBERATION_DIR / f"deliberation_{ts}.json")
1028
-
1029
- Path(save_to).parent.mkdir(parents=True, exist_ok=True)
1030
- Path(save_to).write_text(json.dumps(transcript, indent=2))
1031
- transcript["saved_to"] = save_to
1032
-
1033
- return transcript