codebase-cortex 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. codebase_cortex/__init__.py +3 -0
  2. codebase_cortex/agents/__init__.py +0 -0
  3. codebase_cortex/agents/base.py +69 -0
  4. codebase_cortex/agents/code_analyzer.py +122 -0
  5. codebase_cortex/agents/doc_writer.py +356 -0
  6. codebase_cortex/agents/semantic_finder.py +64 -0
  7. codebase_cortex/agents/sprint_reporter.py +152 -0
  8. codebase_cortex/agents/task_creator.py +138 -0
  9. codebase_cortex/auth/__init__.py +0 -0
  10. codebase_cortex/auth/callback_server.py +80 -0
  11. codebase_cortex/auth/oauth.py +173 -0
  12. codebase_cortex/auth/token_store.py +90 -0
  13. codebase_cortex/cli.py +855 -0
  14. codebase_cortex/config.py +150 -0
  15. codebase_cortex/embeddings/__init__.py +0 -0
  16. codebase_cortex/embeddings/clustering.py +140 -0
  17. codebase_cortex/embeddings/indexer.py +208 -0
  18. codebase_cortex/embeddings/store.py +126 -0
  19. codebase_cortex/git/__init__.py +0 -0
  20. codebase_cortex/git/diff_parser.py +185 -0
  21. codebase_cortex/git/github_client.py +46 -0
  22. codebase_cortex/graph.py +111 -0
  23. codebase_cortex/mcp_client.py +94 -0
  24. codebase_cortex/notion/__init__.py +0 -0
  25. codebase_cortex/notion/bootstrap.py +298 -0
  26. codebase_cortex/notion/page_cache.py +107 -0
  27. codebase_cortex/state.py +77 -0
  28. codebase_cortex/utils/__init__.py +0 -0
  29. codebase_cortex/utils/json_parsing.py +59 -0
  30. codebase_cortex/utils/logging.py +62 -0
  31. codebase_cortex/utils/rate_limiter.py +56 -0
  32. codebase_cortex/utils/section_parser.py +139 -0
  33. codebase_cortex-0.1.0.dist-info/METADATA +209 -0
  34. codebase_cortex-0.1.0.dist-info/RECORD +37 -0
  35. codebase_cortex-0.1.0.dist-info/WHEEL +4 -0
  36. codebase_cortex-0.1.0.dist-info/entry_points.txt +3 -0
  37. codebase_cortex-0.1.0.dist-info/licenses/LICENSE +21 -0
codebase_cortex/cli.py ADDED
@@ -0,0 +1,855 @@
1
+ """CLI commands for Codebase Cortex."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import secrets
7
+ import time
8
+ from pathlib import Path
9
+
10
+ import click
11
+ from rich.console import Console
12
+ from rich.panel import Panel
13
+
14
+ from codebase_cortex.config import Settings, CORTEX_DIR_NAME
15
+
16
+ console = Console()
17
+
18
+
19
+ @click.group()
20
+ @click.version_option(package_name="codebase-cortex")
21
+ def cli() -> None:
22
+ """Codebase Cortex - Keep engineering docs in sync with code."""
23
+ pass
24
+
25
+
26
+ @cli.command()
27
+ def init() -> None:
28
+ """Interactive setup wizard. Run this inside your project repo."""
29
+ cwd = Path.cwd()
30
+ console.print(Panel(f"Codebase Cortex Setup — {cwd.name}", style="bold blue"))
31
+
32
+ # Check if already initialized
33
+ cortex_dir = cwd / CORTEX_DIR_NAME
34
+ if cortex_dir.exists():
35
+ if not click.confirm(f"{CORTEX_DIR_NAME}/ already exists. Re-initialize?", default=False):
36
+ return
37
+
38
+ # Step 1: LLM provider
39
+ from codebase_cortex.config import RECOMMENDED_MODELS, DEFAULT_MODELS
40
+
41
+ provider = click.prompt(
42
+ "LLM provider",
43
+ type=click.Choice(["google", "anthropic", "openrouter"]),
44
+ default="google",
45
+ )
46
+
47
+ api_key = ""
48
+ if provider == "google":
49
+ api_key = click.prompt("Google API key (GOOGLE_API_KEY)")
50
+ key_name = "GOOGLE_API_KEY"
51
+ elif provider == "anthropic":
52
+ api_key = click.prompt("Anthropic API key (ANTHROPIC_API_KEY)")
53
+ key_name = "ANTHROPIC_API_KEY"
54
+ else:
55
+ api_key = click.prompt("OpenRouter API key (OPENROUTER_API_KEY)")
56
+ key_name = "OPENROUTER_API_KEY"
57
+
58
+ # Step 1b: Model selection
59
+ recommended = RECOMMENDED_MODELS.get(provider, [])
60
+ default_model = DEFAULT_MODELS.get(provider, "")
61
+
62
+ if recommended:
63
+ console.print("\n[bold]Recommended models:[/bold]")
64
+ for i, m in enumerate(recommended, 1):
65
+ marker = " (default)" if m == default_model else ""
66
+ console.print(f" {i}. {m}{marker}")
67
+ console.print(f" {len(recommended) + 1}. Custom model name")
68
+
69
+ model_choice = click.prompt(
70
+ "Choose model",
71
+ type=click.IntRange(1, len(recommended) + 1),
72
+ default=1,
73
+ )
74
+
75
+ if model_choice <= len(recommended):
76
+ llm_model = recommended[model_choice - 1]
77
+ else:
78
+ llm_model = click.prompt("Model name")
79
+ else:
80
+ llm_model = click.prompt("Model name")
81
+
82
+ console.print(f"[green]Model:[/green] {llm_model}")
83
+
84
+ # Step 2: GitHub token (optional)
85
+ github_token = ""
86
+ if click.confirm("Add a GitHub token? (only needed for private repos)", default=False):
87
+ import subprocess
88
+
89
+ try:
90
+ result = subprocess.run(
91
+ ["gh", "auth", "token"],
92
+ capture_output=True,
93
+ text=True,
94
+ check=True,
95
+ )
96
+ github_token = result.stdout.strip()
97
+ console.print("[green]GitHub token obtained from gh CLI[/green]")
98
+ except (subprocess.CalledProcessError, FileNotFoundError):
99
+ github_token = click.prompt("GitHub Personal Access Token")
100
+
101
+ # Step 3: Create .cortex/ directory
102
+ cortex_dir.mkdir(exist_ok=True)
103
+
104
+ # Write .cortex/.env
105
+ env_lines = [
106
+ f"LLM_PROVIDER={provider}",
107
+ f"LLM_MODEL={llm_model}",
108
+ f"{key_name}={api_key}",
109
+ ]
110
+ if github_token:
111
+ env_lines.append(f"GITHUB_TOKEN={github_token}")
112
+
113
+ env_path = cortex_dir / ".env"
114
+ env_path.write_text("\n".join(env_lines) + "\n")
115
+
116
+ # Write .cortex/.gitignore (ignore everything inside)
117
+ (cortex_dir / ".gitignore").write_text("*\n")
118
+
119
+ # Add .cortex/ to repo's .gitignore if not already there
120
+ repo_gitignore = cwd / ".gitignore"
121
+ if repo_gitignore.exists():
122
+ content = repo_gitignore.read_text()
123
+ if CORTEX_DIR_NAME not in content:
124
+ with open(repo_gitignore, "a") as f:
125
+ f.write(f"\n# Codebase Cortex\n{CORTEX_DIR_NAME}/\n")
126
+ else:
127
+ repo_gitignore.write_text(f"# Codebase Cortex\n{CORTEX_DIR_NAME}/\n")
128
+
129
+ console.print(f"[green]Created {cortex_dir}/ with config[/green]")
130
+
131
+ # Step 4: Git hook
132
+ git_dir = cwd / ".git"
133
+ if git_dir.is_dir():
134
+ if click.confirm("Auto-run Cortex after each git commit?", default=True):
135
+ mode = click.prompt(
136
+ "Hook mode",
137
+ type=click.Choice(["full", "dry-run"]),
138
+ default="full",
139
+ )
140
+ _install_git_hook(git_dir, mode)
141
+ else:
142
+ console.print("[yellow]Not a git repo — skipping git hook setup.[/yellow]")
143
+
144
+ # Step 5: OAuth with Notion
145
+ console.print("\n[bold]Connecting to Notion...[/bold]")
146
+ console.print("A browser window will open for Notion authorization.")
147
+
148
+ notion_connected = False
149
+ try:
150
+ asyncio.run(_run_oauth(cwd))
151
+ console.print("[green]Notion connected successfully![/green]")
152
+ notion_connected = True
153
+ except Exception as e:
154
+ console.print(f"[yellow]Notion OAuth skipped: {e}[/yellow]")
155
+ console.print("You can retry later with: cortex init")
156
+
157
+ # Step 6: Bootstrap Notion pages
158
+ if notion_connected:
159
+ console.print("\n[bold]Setting up Notion workspace...[/bold]")
160
+ try:
161
+ pages = asyncio.run(_bootstrap_pages(cwd))
162
+ if pages:
163
+ console.print(f"[green]Created {len(pages)} pages in Notion[/green]")
164
+ for p in pages:
165
+ console.print(f" - {p['title']}")
166
+ else:
167
+ console.print("[yellow]No pages created (may already exist)[/yellow]")
168
+ except Exception as e:
169
+ console.print(f"[yellow]Page bootstrap skipped: {e}[/yellow]")
170
+
171
+ console.print("\n[bold]Setup complete![/bold]")
172
+ console.print("Run [cyan]cortex status[/cyan] to verify the connection.")
173
+ console.print("Run [cyan]cortex run --once[/cyan] to analyze your repo.")
174
+
175
+
176
+ CORTEX_HOOK_MARKER = "# --- codebase-cortex post-commit hook ---"
177
+
178
+
179
+ def _install_git_hook(git_dir: Path, mode: str) -> None:
180
+ """Install a post-commit git hook that runs Cortex automatically."""
181
+ hooks_dir = git_dir / "hooks"
182
+ hooks_dir.mkdir(exist_ok=True)
183
+ hook_path = hooks_dir / "post-commit"
184
+
185
+ dry_run_flag = " --dry-run" if mode == "dry-run" else ""
186
+ hook_script = f"""{CORTEX_HOOK_MARKER}
187
+ # Runs Cortex in the background after each commit
188
+ if command -v cortex >/dev/null 2>&1; then
189
+ cortex run --once --verbose{dry_run_flag} >> .cortex/hook.log 2>&1 &
190
+ fi
191
+ """
192
+
193
+ if hook_path.exists():
194
+ existing = hook_path.read_text()
195
+ if CORTEX_HOOK_MARKER in existing:
196
+ console.print("[yellow]Git hook already installed — skipping.[/yellow]")
197
+ return
198
+ # Append to existing hook
199
+ with open(hook_path, "a") as f:
200
+ f.write("\n" + hook_script)
201
+ else:
202
+ hook_path.write_text("#!/bin/sh\n" + hook_script)
203
+
204
+ hook_path.chmod(0o755)
205
+ mode_label = "dry-run" if mode == "dry-run" else "full"
206
+ console.print(f"[green]Installed post-commit hook ({mode_label} mode)[/green]")
207
+
208
+
209
+ async def _run_oauth(repo_path: Path) -> None:
210
+ """Execute the OAuth flow: register client, open browser, wait for callback."""
211
+ from codebase_cortex.auth.oauth import (
212
+ generate_pkce_pair,
213
+ fetch_oauth_metadata,
214
+ register_client,
215
+ build_authorization_url,
216
+ exchange_code,
217
+ open_browser,
218
+ )
219
+ from codebase_cortex.auth.callback_server import wait_for_callback
220
+ from codebase_cortex.auth.token_store import TokenData, save_tokens
221
+
222
+ settings = Settings.from_env(repo_path)
223
+ port = settings.oauth_callback_port
224
+ redirect_uri = f"http://localhost:{port}/callback"
225
+
226
+ # Fetch server metadata
227
+ metadata = await fetch_oauth_metadata()
228
+ auth_endpoint = metadata.get("authorization_endpoint")
229
+ token_endpoint = metadata.get("token_endpoint")
230
+ registration_endpoint = metadata.get("registration_endpoint")
231
+
232
+ # Dynamic client registration
233
+ client_info = await register_client(redirect_uri, registration_endpoint=registration_endpoint)
234
+ client_id = client_info["client_id"]
235
+ client_secret = client_info["client_secret"]
236
+
237
+ # PKCE
238
+ verifier, challenge = generate_pkce_pair()
239
+ state = secrets.token_urlsafe(32)
240
+
241
+ # Open browser
242
+ auth_url = build_authorization_url(
243
+ client_id=client_id,
244
+ redirect_uri=redirect_uri,
245
+ code_challenge=challenge,
246
+ state=state,
247
+ authorization_endpoint=auth_endpoint,
248
+ )
249
+ open_browser(auth_url)
250
+
251
+ # Wait for callback
252
+ code, returned_state = await wait_for_callback(port=port)
253
+ if returned_state != state:
254
+ raise RuntimeError("OAuth state mismatch — possible CSRF attack")
255
+
256
+ # Exchange code for tokens
257
+ token_response = await exchange_code(
258
+ code=code,
259
+ client_id=client_id,
260
+ client_secret=client_secret,
261
+ redirect_uri=redirect_uri,
262
+ code_verifier=verifier,
263
+ token_endpoint=token_endpoint,
264
+ )
265
+
266
+ # Save tokens
267
+ token_data = TokenData(
268
+ access_token=token_response["access_token"],
269
+ refresh_token=token_response.get("refresh_token", ""),
270
+ expires_at=time.time() + token_response.get("expires_in", 3600),
271
+ client_id=client_id,
272
+ client_secret=client_secret,
273
+ token_endpoint=token_endpoint,
274
+ )
275
+ save_tokens(token_data, settings.notion_token_path)
276
+
277
+
278
+ async def _bootstrap_pages(repo_path: Path) -> list[dict]:
279
+ """Create starter Notion pages for the repo."""
280
+ from codebase_cortex.notion.bootstrap import bootstrap_notion_pages
281
+
282
+ settings = Settings.from_env(repo_path)
283
+ return await bootstrap_notion_pages(settings)
284
+
285
+
286
+ @cli.command()
287
+ @click.option("--once", is_flag=True, help="Run once and exit (no watch mode).")
288
+ @click.option("--watch", is_flag=True, help="Watch for changes and run continuously.")
289
+ @click.option("--dry-run", is_flag=True, help="Analyze without writing to Notion.")
290
+ @click.option("--full", is_flag=True, help="Analyze entire codebase (not just recent diff).")
291
+ @click.option("--verbose", "-v", is_flag=True, help="Enable debug logging (LLM calls, MCP calls).")
292
+ def run(once: bool, watch: bool, dry_run: bool, full: bool, verbose: bool) -> None:
293
+ """Run the Cortex pipeline on the current repo."""
294
+ from codebase_cortex.graph import compile_graph
295
+ from codebase_cortex.notion.page_cache import PageCache
296
+ from codebase_cortex.utils.logging import setup_logging, get_logger
297
+
298
+ if verbose:
299
+ setup_logging(verbose=True)
300
+ logger = get_logger()
301
+ settings = Settings.from_env()
302
+
303
+ if not settings.is_initialized:
304
+ console.print("[red]Not initialized. Run 'cortex init' first.[/red]")
305
+ return
306
+
307
+ if not once and not watch:
308
+ once = True # Default to single run
309
+
310
+ # Auto-detect: if doc pages have no real content yet, do a full scan
311
+ if not full:
312
+ cache = PageCache(cache_path=settings.page_cache_path)
313
+ arch_page = cache.find_by_title("Architecture Overview")
314
+ if arch_page and arch_page.content_hash == "":
315
+ # Pages exist but were never written with real content
316
+ # Check if this looks like a first run after init
317
+ doc_pages = cache.find_all_doc_pages()
318
+ all_empty = all(p.content_hash == "" for p in doc_pages)
319
+ if all_empty:
320
+ console.print("[cyan]First run detected — doing full codebase scan[/cyan]")
321
+ full = True
322
+
323
+ graph = compile_graph()
324
+
325
+ initial_state = {
326
+ "trigger": "manual",
327
+ "repo_path": str(settings.repo_path),
328
+ "dry_run": dry_run,
329
+ "full_scan": full,
330
+ "errors": [],
331
+ }
332
+
333
+ if full:
334
+ console.print("[cyan]Full codebase analysis mode[/cyan]")
335
+ if dry_run:
336
+ console.print("[yellow]Dry run mode — no Notion writes[/yellow]")
337
+
338
+ async def _run_once():
339
+ # Discover any new child pages under the parent (e.g. user-moved pages)
340
+ from codebase_cortex.notion.bootstrap import discover_child_pages
341
+
342
+ try:
343
+ new_count = await discover_child_pages(settings)
344
+ if new_count:
345
+ console.print(f"[green]Discovered {new_count} new page(s) in Notion[/green]")
346
+ except Exception as e:
347
+ logger.warning(f"Page discovery failed: {e}")
348
+
349
+ result = await graph.ainvoke(initial_state)
350
+ if result.get("errors"):
351
+ for err in result["errors"]:
352
+ logger.error(err)
353
+ if result.get("analysis"):
354
+ console.print(Panel(result["analysis"], title="Analysis", border_style="green"))
355
+ else:
356
+ console.print("[yellow]No analysis produced. Check if there are recent changes.[/yellow]")
357
+ return
358
+
359
+ if result.get("related_docs"):
360
+ docs_text = "\n".join(
361
+ f"- {d['title']} (similarity: {d['similarity']:.2f})"
362
+ for d in result["related_docs"][:5]
363
+ )
364
+ console.print(Panel(docs_text, title="Related Docs", border_style="cyan"))
365
+
366
+ if result.get("doc_updates"):
367
+ updates_text = "\n".join(
368
+ f"- {d['title']} ({d['action']})" for d in result["doc_updates"]
369
+ )
370
+ console.print(Panel(updates_text, title="Doc Updates", border_style="blue"))
371
+
372
+ if result.get("tasks_created"):
373
+ tasks_text = "\n".join(
374
+ f"- [{t['priority']}] {t['title']}" for t in result["tasks_created"]
375
+ )
376
+ console.print(Panel(tasks_text, title="Tasks Created", border_style="yellow"))
377
+
378
+ if result.get("sprint_summary"):
379
+ console.print(Panel(result["sprint_summary"], title="Sprint Summary", border_style="magenta"))
380
+
381
+ if once:
382
+ asyncio.run(_run_once())
383
+ elif watch:
384
+ console.print("[cyan]Watch mode not yet implemented. Use --once.[/cyan]")
385
+
386
+
387
+ @cli.command()
388
+ def status() -> None:
389
+ """Show connection status and workspace info."""
390
+ settings = Settings.from_env()
391
+
392
+ console.print(Panel(f"Codebase Cortex Status — {settings.repo_path.name}", style="bold blue"))
393
+
394
+ # Check initialization
395
+ if not settings.is_initialized:
396
+ console.print(f"[red]Not initialized.[/red] Run 'cortex init' in this directory.")
397
+ return
398
+
399
+ console.print(f"[green]Config:[/green] {settings.env_path}")
400
+ console.print(f"[green]LLM Provider:[/green] {settings.llm_provider}")
401
+ console.print(f"[green]Repo:[/green] {settings.repo_path}")
402
+
403
+ # Check Notion tokens
404
+ token_path = settings.notion_token_path
405
+ if token_path.exists():
406
+ from codebase_cortex.auth.token_store import load_tokens
407
+
408
+ token_data = load_tokens(token_path)
409
+ if token_data and not token_data.is_expired:
410
+ console.print("[green]Notion:[/green] Connected (token valid)")
411
+ elif token_data:
412
+ console.print("[yellow]Notion:[/yellow] Token expired (will auto-refresh)")
413
+ else:
414
+ console.print("[red]Notion:[/red] Token file corrupt")
415
+ else:
416
+ console.print("[red]Notion:[/red] Not connected. Run 'cortex init'.")
417
+
418
+ # Check FAISS index
419
+ if settings.faiss_index_dir.exists():
420
+ console.print(f"[green]Index:[/green] {settings.faiss_index_dir}")
421
+ else:
422
+ console.print("[yellow]Index:[/yellow] Not built. Run 'cortex embed'.")
423
+
424
+ # Test MCP connection
425
+ if token_path.exists():
426
+ console.print("\nTesting Notion MCP connection...")
427
+ try:
428
+ asyncio.run(_test_mcp(settings))
429
+ console.print("[green]MCP:[/green] Connected to mcp.notion.com")
430
+ except Exception as e:
431
+ console.print(f"[red]MCP:[/red] {e}")
432
+
433
+
434
+ async def _test_mcp(settings: Settings) -> None:
435
+ """Test the MCP connection by listing available tools."""
436
+ from codebase_cortex.mcp_client import notion_mcp_session
437
+
438
+ async with notion_mcp_session(settings) as session:
439
+ result = await session.list_tools()
440
+ console.print(f" Available tools: {len(result.tools)}")
441
+
442
+
443
+ @cli.command()
444
+ def analyze() -> None:
445
+ """One-shot diff analysis without Notion writes."""
446
+ from codebase_cortex.git.diff_parser import get_recent_diff
447
+ from codebase_cortex.agents.code_analyzer import CodeAnalyzerAgent
448
+ from codebase_cortex.config import get_llm
449
+
450
+ settings = Settings.from_env()
451
+
452
+ diff_text = get_recent_diff(str(settings.repo_path))
453
+ if not diff_text:
454
+ console.print("[yellow]No recent changes found.[/yellow]")
455
+ return
456
+
457
+ agent = CodeAnalyzerAgent(get_llm(settings))
458
+ state = {
459
+ "trigger": "manual",
460
+ "repo_path": str(settings.repo_path),
461
+ "diff_text": diff_text,
462
+ "dry_run": True,
463
+ "errors": [],
464
+ }
465
+
466
+ result = asyncio.run(agent.run(state))
467
+ if result.get("analysis"):
468
+ console.print(Panel(result["analysis"], title="Analysis", border_style="green"))
469
+
470
+
471
+ @cli.command()
472
+ def embed() -> None:
473
+ """Rebuild the embedding index for the current repo."""
474
+ from codebase_cortex.embeddings.indexer import EmbeddingIndexer
475
+ from codebase_cortex.embeddings.store import FAISSStore
476
+
477
+ settings = Settings.from_env()
478
+ repo_path = settings.repo_path
479
+ index_dir = settings.faiss_index_dir
480
+
481
+ console.print(f"Indexing [cyan]{repo_path}[/cyan]...")
482
+
483
+ indexer = EmbeddingIndexer(repo_path=repo_path)
484
+ chunks = indexer.collect_chunks()
485
+ console.print(f"Found [green]{len(chunks)}[/green] code chunks")
486
+
487
+ if not chunks:
488
+ console.print("[yellow]No indexable files found.[/yellow]")
489
+ return
490
+
491
+ console.print("Generating embeddings...")
492
+ embeddings = indexer.embed_chunks(chunks)
493
+
494
+ store = FAISSStore(index_dir=index_dir)
495
+ store.build(embeddings, chunks)
496
+ store.save()
497
+ console.print(f"Saved FAISS index with [green]{store.size}[/green] vectors to {index_dir}")
498
+
499
+
500
+ @cli.command()
501
+ @click.option("--query", default="", help="Search query to filter pages (default: scan all).")
502
+ @click.option("--link", multiple=True, help="Manually link a Notion page URL or ID to track.")
503
+ def scan(query: str, link: tuple[str, ...]) -> None:
504
+ """Scan Notion workspace and link existing pages to Cortex.
505
+
506
+ Use this when you have pre-existing documentation in Notion
507
+ that you want Cortex to know about and update.
508
+
509
+ Examples:
510
+ cortex scan # Discover all pages
511
+ cortex scan --query "API docs" # Search for specific pages
512
+ cortex scan --link <page-id> # Link a specific page by ID
513
+ """
514
+ settings = Settings.from_env()
515
+
516
+ if not settings.is_initialized:
517
+ console.print("[red]Not initialized. Run 'cortex init' first.[/red]")
518
+ return
519
+
520
+ if link:
521
+ asyncio.run(_link_pages(settings, list(link)))
522
+ else:
523
+ asyncio.run(_scan_workspace(settings, query))
524
+
525
+
526
+ async def _scan_workspace(settings: Settings, query: str) -> None:
527
+ """Scan Notion workspace for existing pages and add them to cache."""
528
+ from codebase_cortex.mcp_client import notion_mcp_session
529
+ from codebase_cortex.notion.page_cache import PageCache
530
+ from codebase_cortex.notion.bootstrap import extract_page_id
531
+ import re
532
+
533
+ cache = PageCache(cache_path=settings.page_cache_path)
534
+ search_query = query or settings.repo_path.name
535
+
536
+ console.print(f"Searching Notion for: [cyan]{search_query}[/cyan]")
537
+
538
+ async with notion_mcp_session(settings) as session:
539
+ result = await session.call_tool(
540
+ "notion-search",
541
+ arguments={"query": search_query},
542
+ )
543
+
544
+ if result.isError or not result.content:
545
+ console.print("[yellow]No results found.[/yellow]")
546
+ return
547
+
548
+ response_text = result.content[0].text
549
+ console.print(Panel(response_text[:2000], title="Search Results", border_style="cyan"))
550
+
551
+ # Parse page IDs and titles from search results
552
+ # Notion search returns markdown with page references
553
+ uuid_pattern = r"[0-9a-f]{8}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{4}-?[0-9a-f]{12}"
554
+ found_ids = re.findall(uuid_pattern, response_text, re.IGNORECASE)
555
+
556
+ if not found_ids:
557
+ console.print("[yellow]No page IDs found in results.[/yellow]")
558
+ return
559
+
560
+ console.print(f"\nFound [green]{len(found_ids)}[/green] pages. Fetching details...")
561
+
562
+ for page_id in found_ids[:20]: # Limit to 20 pages
563
+ try:
564
+ fetch_result = await session.call_tool(
565
+ "notion-fetch",
566
+ arguments={"id": page_id},
567
+ )
568
+ if not fetch_result.isError and fetch_result.content:
569
+ page_text = fetch_result.content[0].text
570
+ # Extract title from first heading or first line
571
+ lines = page_text.strip().split("\n")
572
+ title = lines[0].lstrip("# ").strip() if lines else "Untitled"
573
+ # Clean up title
574
+ title = title.replace("**", "").strip()
575
+ if title:
576
+ cache.upsert(page_id, title)
577
+ console.print(f" [green]Linked:[/green] {title} ({page_id[:8]}...)")
578
+ except Exception as e:
579
+ console.print(f" [yellow]Failed to fetch {page_id[:8]}...: {e}[/yellow]")
580
+
581
+ total = len(cache.pages)
582
+ console.print(f"\n[bold]Cache now has {total} pages.[/bold]")
583
+ console.print("Cortex will update these pages when relevant code changes are detected.")
584
+
585
+
586
+ @cli.command()
587
+ @click.argument("instruction")
588
+ @click.option("--page", "-p", multiple=True, help="Target page(s) to update. Repeatable. Auto-detects if omitted.")
589
+ @click.option("--dry-run", is_flag=True, help="Show planned changes without writing to Notion.")
590
+ @click.option("--verbose", "-v", is_flag=True, help="Enable debug logging.")
591
+ def prompt(instruction: str, page: tuple[str, ...], dry_run: bool, verbose: bool) -> None:
592
+ """Send a natural language instruction to update Notion pages.
593
+
594
+ Examples:
595
+ cortex prompt "Make API docs more detailed with examples"
596
+ cortex prompt "Add error handling section" --page "API Reference"
597
+ cortex prompt "Update architecture diagram" -p "Architecture Overview" -p "API Reference"
598
+ """
599
+ from codebase_cortex.utils.logging import setup_logging
600
+
601
+ if verbose:
602
+ setup_logging(verbose=True)
603
+
604
+ settings = Settings.from_env()
605
+ if not settings.is_initialized:
606
+ console.print("[red]Not initialized. Run 'cortex init' first.[/red]")
607
+ return
608
+
609
+ asyncio.run(_run_prompt(settings, instruction, list(page), dry_run))
610
+
611
+
612
+ async def _run_prompt(
613
+ settings: Settings,
614
+ instruction: str,
615
+ page_filters: list[str],
616
+ dry_run: bool,
617
+ ) -> None:
618
+ """Execute a user-directed prompt against specific Notion pages."""
619
+ from codebase_cortex.agents.doc_writer import strip_notion_metadata
620
+ from codebase_cortex.config import get_llm
621
+ from codebase_cortex.mcp_client import notion_mcp_session, rate_limiter
622
+ from codebase_cortex.notion.page_cache import PageCache
623
+ from codebase_cortex.utils.json_parsing import parse_json_array
624
+ from codebase_cortex.utils.section_parser import merge_sections, parse_sections
625
+
626
+ cache = PageCache(cache_path=settings.page_cache_path)
627
+ doc_pages = cache.find_all_doc_pages()
628
+
629
+ if not doc_pages:
630
+ console.print("[red]No pages in cache. Run 'cortex run --once' first.[/red]")
631
+ return
632
+
633
+ # Resolve target pages
634
+ if page_filters:
635
+ targets = []
636
+ for name in page_filters:
637
+ found = cache.find_by_title(name)
638
+ if found:
639
+ targets.append(found)
640
+ else:
641
+ console.print(f"[yellow]Page not found: '{name}'[/yellow]")
642
+ console.print("Available pages:")
643
+ for p in doc_pages:
644
+ console.print(f" - {p.title}")
645
+ return
646
+ else:
647
+ targets = None # LLM will auto-select
648
+
649
+ # Fetch content of target pages (or all pages if auto-selecting)
650
+ pages_to_fetch = targets if targets else doc_pages
651
+ existing: dict[str, str] = {}
652
+
653
+ console.print(f"Fetching {len(pages_to_fetch)} page(s) from Notion...")
654
+ try:
655
+ async with notion_mcp_session(settings) as session:
656
+ for cp in pages_to_fetch:
657
+ await rate_limiter.acquire()
658
+ try:
659
+ result = await session.call_tool(
660
+ "notion-fetch",
661
+ arguments={"id": cp.page_id},
662
+ )
663
+ if not result.isError and result.content:
664
+ content = strip_notion_metadata(result.content[0].text)
665
+ existing[cp.title] = content
666
+ except Exception as e:
667
+ console.print(f"[yellow]Could not fetch {cp.title}: {e}[/yellow]")
668
+ except Exception as e:
669
+ console.print(f"[red]Failed to connect to Notion: {e}[/red]")
670
+ return
671
+
672
+ if not existing:
673
+ console.print("[red]No page content fetched.[/red]")
674
+ return
675
+
676
+ # Build LLM prompt
677
+ page_contents = ""
678
+ for title, content in existing.items():
679
+ truncated = content[:4000] + ("..." if len(content) > 4000 else "")
680
+ page_contents += f"\n### {title}\n```\n{truncated}\n```\n"
681
+
682
+ page_list = "\n".join(f"- {t}" for t in existing.keys())
683
+
684
+ if targets:
685
+ scope_note = f"Update ONLY these pages: {', '.join(p.title for p in targets)}"
686
+ else:
687
+ scope_note = (
688
+ "Choose which page(s) need updating based on the instruction. "
689
+ "Only update pages that are relevant."
690
+ )
691
+
692
+ llm_prompt = f"""You are a technical documentation writer. A user wants to update their Notion documentation.
693
+
694
+ ## User Instruction
695
+ {instruction}
696
+
697
+ ## Scope
698
+ {scope_note}
699
+
700
+ ## Current Page Contents
701
+ {page_contents}
702
+
703
+ ## Available Pages
704
+ {page_list}
705
+
706
+ Generate updates as a JSON array. Each element has:
707
+ - "title": Exact page title (must match one of the available pages)
708
+ - "action": "update"
709
+ - "section_updates": Array of sections to change. Each has:
710
+ - "heading": The exact markdown heading (e.g., "## API Endpoints")
711
+ - "content": New content for that section
712
+ - "action": "update" to replace existing section, or "create" to add new section
713
+
714
+ Only include sections that actually change. Unchanged sections are preserved automatically.
715
+ Respond with ONLY the JSON array."""
716
+
717
+ # Call LLM
718
+ from langchain_core.messages import HumanMessage, SystemMessage
719
+
720
+ llm = get_llm(settings)
721
+ console.print("Generating updates...")
722
+
723
+ try:
724
+ response = await llm.ainvoke([
725
+ SystemMessage(content="You are a technical documentation writer. Output only valid JSON."),
726
+ HumanMessage(content=llm_prompt),
727
+ ])
728
+ raw = response.content
729
+ # Gemini 3 returns structured content blocks instead of plain strings
730
+ if isinstance(raw, list):
731
+ raw = "\n".join(
732
+ part["text"] if isinstance(part, dict) else str(part)
733
+ for part in raw
734
+ if not isinstance(part, dict) or part.get("type") == "text"
735
+ )
736
+ updates_data = parse_json_array(raw)
737
+ except Exception as e:
738
+ console.print(f"[red]LLM call failed: {e}[/red]")
739
+ return
740
+
741
+ if not updates_data:
742
+ console.print("[yellow]No updates suggested by LLM.[/yellow]")
743
+ return
744
+
745
+ # Build merged content and show summary
746
+ planned: list[dict] = []
747
+ for update in updates_data:
748
+ title = update.get("title", "")
749
+ if title not in existing:
750
+ console.print(f"[yellow]Skipping unknown page: {title}[/yellow]")
751
+ continue
752
+
753
+ section_updates = update.get("section_updates", [])
754
+ if not section_updates:
755
+ continue
756
+
757
+ existing_sections = parse_sections(existing[title])
758
+ merged_content = merge_sections(existing_sections, section_updates)
759
+
760
+ # Find page_id
761
+ cached = cache.find_by_title(title)
762
+ if not cached:
763
+ continue
764
+
765
+ planned.append({
766
+ "page_id": cached.page_id,
767
+ "title": title,
768
+ "content": merged_content,
769
+ "section_updates": section_updates,
770
+ })
771
+
772
+ if not planned:
773
+ console.print("[yellow]No applicable updates.[/yellow]")
774
+ return
775
+
776
+ # Show summary
777
+ console.print(Panel("[bold]Planned Changes[/bold]", border_style="blue"))
778
+ for item in planned:
779
+ sections_desc = ", ".join(
780
+ f"{s.get('heading', '?')} ({s.get('action', 'update')})"
781
+ for s in item["section_updates"]
782
+ )
783
+ console.print(f" [cyan]{item['title']}[/cyan]: {sections_desc}")
784
+
785
+ if dry_run:
786
+ console.print("\n[yellow]Dry run — no changes written.[/yellow]")
787
+ for item in planned:
788
+ console.print(Panel(
789
+ item["content"][:2000] + ("..." if len(item["content"]) > 2000 else ""),
790
+ title=f"Preview: {item['title']}",
791
+ border_style="dim",
792
+ ))
793
+ return
794
+
795
+ # Confirmation
796
+ if not click.confirm("\nApply these changes?", default=True):
797
+ console.print("[yellow]Cancelled.[/yellow]")
798
+ return
799
+
800
+ # Write to Notion
801
+ import hashlib
802
+
803
+ try:
804
+ async with notion_mcp_session(settings) as session:
805
+ for item in planned:
806
+ await rate_limiter.acquire()
807
+ await session.call_tool(
808
+ "notion-update-page",
809
+ arguments={
810
+ "page_id": item["page_id"],
811
+ "command": "replace_content",
812
+ "new_str": item["content"],
813
+ },
814
+ )
815
+ content_hash = hashlib.md5(item["content"].encode()).hexdigest()[:8]
816
+ cache.upsert(item["page_id"], item["title"], content_hash=content_hash)
817
+ console.print(f" [green]Updated:[/green] {item['title']}")
818
+ except Exception as e:
819
+ console.print(f"[red]Failed to write to Notion: {e}[/red]")
820
+ return
821
+
822
+ console.print(f"\n[bold green]Done! Updated {len(planned)} page(s).[/bold green]")
823
+
824
+
825
+ async def _link_pages(settings: Settings, page_ids: list[str]) -> None:
826
+ """Manually link specific Notion pages by ID."""
827
+ from codebase_cortex.mcp_client import notion_mcp_session
828
+ from codebase_cortex.notion.page_cache import PageCache
829
+
830
+ cache = PageCache(cache_path=settings.page_cache_path)
831
+
832
+ async with notion_mcp_session(settings) as session:
833
+ for page_id in page_ids:
834
+ # Clean up the ID (remove URL prefix if pasted)
835
+ clean_id = page_id.split("/")[-1].split("?")[0].split("-")[-1]
836
+ if len(clean_id) == 32:
837
+ # Add dashes to raw 32-char hex
838
+ clean_id = f"{clean_id[:8]}-{clean_id[8:12]}-{clean_id[12:16]}-{clean_id[16:20]}-{clean_id[20:]}"
839
+
840
+ try:
841
+ result = await session.call_tool(
842
+ "notion-fetch",
843
+ arguments={"id": clean_id},
844
+ )
845
+ if not result.isError and result.content:
846
+ page_text = result.content[0].text
847
+ lines = page_text.strip().split("\n")
848
+ title = lines[0].lstrip("# ").strip() if lines else "Untitled"
849
+ title = title.replace("**", "").strip()
850
+ cache.upsert(clean_id, title)
851
+ console.print(f"[green]Linked:[/green] {title} ({clean_id[:8]}...)")
852
+ else:
853
+ console.print(f"[red]Failed to fetch page {clean_id}[/red]")
854
+ except Exception as e:
855
+ console.print(f"[red]Error linking {clean_id}: {e}[/red]")