okb 1.1.0a0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
okb/cli.py CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import importlib.resources
6
6
  import json
7
+ import os
7
8
  import shutil
8
9
  import subprocess
9
10
  import sys
@@ -44,7 +45,7 @@ def _check_docker() -> bool:
44
45
 
45
46
 
46
47
  def _get_container_status() -> str | None:
47
- """Get the status of the lkb container. Returns None if not found."""
48
+ """Get the status of the okb container. Returns None if not found."""
48
49
  try:
49
50
  result = subprocess.run(
50
51
  [
@@ -841,7 +842,7 @@ def sync_run(
841
842
  ):
842
843
  """Sync from API sources.
843
844
 
844
- Example: lkb sync run github --repo owner/repo
845
+ Example: okb sync run github --repo owner/repo
845
846
  """
846
847
  import psycopg
847
848
  from psycopg.rows import dict_row
@@ -1209,7 +1210,7 @@ def token_list(ctx, database: str | None):
1209
1210
  for t in tokens:
1210
1211
  desc = f" - {t.description}" if t.description else ""
1211
1212
  last_used = t.last_used_at.strftime("%Y-%m-%d %H:%M") if t.last_used_at else "never"
1212
- click.echo(f" [{t.permissions}] {t.token_hash[:12]}...{desc}")
1213
+ click.echo(f" ID {t.id} [{t.permissions}] {t.token_hash[:12]}...{desc}")
1213
1214
  created = t.created_at.strftime("%Y-%m-%d %H:%M")
1214
1215
  click.echo(f" Created: {created}, Last used: {last_used}")
1215
1216
  except Exception as e:
@@ -1218,26 +1219,43 @@ def token_list(ctx, database: str | None):
1218
1219
 
1219
1220
 
1220
1221
  @token.command("revoke")
1221
- @click.argument("token_value")
1222
+ @click.argument("token_value", required=False)
1223
+ @click.option("--id", "token_id", type=int, default=None, help="Token ID to revoke (from 'okb token list')")
1222
1224
  @click.option("--db", "database", default=None, help="Database to revoke token from")
1223
1225
  @click.pass_context
1224
- def token_revoke(ctx, token_value: str, database: str | None):
1226
+ def token_revoke(ctx, token_value: str | None, token_id: int | None, database: str | None):
1225
1227
  """Revoke (delete) an API token.
1226
1228
 
1227
- TOKEN_VALUE must be the full token string.
1229
+ Either provide the full TOKEN_VALUE or use --id with the token ID from 'okb token list'.
1228
1230
  """
1229
- from .tokens import delete_token
1231
+ from .tokens import delete_token, delete_token_by_id
1232
+
1233
+ if not token_value and not token_id:
1234
+ click.echo("Error: Provide either TOKEN_VALUE or --id", err=True)
1235
+ sys.exit(1)
1236
+
1237
+ if token_value and token_id:
1238
+ click.echo("Error: Provide either TOKEN_VALUE or --id, not both", err=True)
1239
+ sys.exit(1)
1230
1240
 
1231
1241
  db_name = database or ctx.obj.get("database")
1232
1242
  db_cfg = config.get_database(db_name)
1233
1243
 
1234
1244
  try:
1235
- deleted = delete_token(db_cfg.url, token_value)
1236
- if deleted:
1237
- click.echo("Token revoked.")
1245
+ if token_id:
1246
+ deleted = delete_token_by_id(db_cfg.url, token_id)
1247
+ if deleted:
1248
+ click.echo(f"Token ID {token_id} revoked.")
1249
+ else:
1250
+ click.echo(f"Token ID {token_id} not found.", err=True)
1251
+ sys.exit(1)
1238
1252
  else:
1239
- click.echo("Token not found. Make sure you're using the full token string.", err=True)
1240
- sys.exit(1)
1253
+ deleted = delete_token(db_cfg.url, token_value)
1254
+ if deleted:
1255
+ click.echo("Token revoked.")
1256
+ else:
1257
+ click.echo("Token not found. Use --id or provide the full token string.", err=True)
1258
+ sys.exit(1)
1241
1259
  except Exception as e:
1242
1260
  click.echo(f"Error revoking token: {e}", err=True)
1243
1261
  sys.exit(1)
@@ -1276,7 +1294,7 @@ def llm_status(ctx, database: str | None):
1276
1294
  click.echo(f"Cache responses: {config.llm_cache_responses}")
1277
1295
 
1278
1296
  if config.llm_provider == "modal":
1279
- click.echo("Backend: Modal GPU (deploy with: lkb llm deploy)")
1297
+ click.echo("Backend: Modal GPU (deploy with: okb llm deploy)")
1280
1298
  elif config.llm_use_bedrock:
1281
1299
  click.echo(f"Backend: AWS Bedrock (region: {config.llm_aws_region})")
1282
1300
  else:
@@ -1366,7 +1384,9 @@ def llm_clear_cache(ctx, database: str | None, days: int | None, yes: bool):
1366
1384
  def llm_deploy():
1367
1385
  """Deploy the Modal LLM app for open model inference.
1368
1386
 
1369
- This deploys a GPU-accelerated LLM service on Modal using Llama 3.2.
1387
+ Deploys a GPU-accelerated LLM service on Modal using the model from your config.
1388
+ Default: microsoft/Phi-3-mini-4k-instruct (no HuggingFace approval needed).
1389
+
1370
1390
  Required for using provider: modal in your config.
1371
1391
 
1372
1392
  Requires Modal CLI to be installed and authenticated:
@@ -1385,14 +1405,1061 @@ def llm_deploy():
1385
1405
  click.echo(f"Error: modal_llm.py not found at {llm_path}", err=True)
1386
1406
  sys.exit(1)
1387
1407
 
1388
- click.echo(f"Deploying {llm_path} to Modal...")
1389
- click.echo("Note: First deploy downloads the model (~2GB) and may take a few minutes.")
1408
+ # Get model and GPU from config
1409
+ model = config.llm_model or "microsoft/Phi-3-mini-4k-instruct"
1410
+ gpu = config.llm_modal_gpu or "L4"
1411
+ click.echo("Deploying Modal LLM:")
1412
+ click.echo(f" Model: {model}")
1413
+ click.echo(f" GPU: {gpu}")
1414
+ click.echo("Note: First deploy downloads the model and may take a few minutes.")
1415
+
1416
+ # Set model and GPU in environment for Modal to pick up
1417
+ env = os.environ.copy()
1418
+ env["OKB_LLM_MODEL"] = model
1419
+ env["OKB_MODAL_GPU"] = gpu
1420
+
1390
1421
  result = subprocess.run(
1391
1422
  ["modal", "deploy", str(llm_path)],
1392
1423
  cwd=llm_path.parent,
1424
+ env=env,
1393
1425
  )
1394
1426
  sys.exit(result.returncode)
1395
1427
 
1396
1428
 
1429
+ # =============================================================================
1430
+ # Enrich commands
1431
+ # =============================================================================
1432
+
1433
+
1434
+ @main.group()
1435
+ def enrich():
1436
+ """LLM-based document enrichment (extract TODOs and entities)."""
1437
+ pass
1438
+
1439
+
1440
+ @enrich.command("run")
1441
+ @click.option("--db", "database", default=None, help="Database to enrich")
1442
+ @click.option("--source-type", default=None, help="Filter by source type")
1443
+ @click.option("--project", default=None, help="Filter by project")
1444
+ @click.option("--query", default=None, help="Semantic search query to filter documents")
1445
+ @click.option("--path-pattern", default=None, help="SQL LIKE pattern for source_path")
1446
+ @click.option(
1447
+ "--all", "enrich_all", is_flag=True, help="Re-enrich all documents (ignore enriched_at)"
1448
+ )
1449
+ @click.option("--dry-run", is_flag=True, help="Show what would be enriched without executing")
1450
+ @click.option("--limit", default=100, help="Maximum documents to process")
1451
+ @click.option("--workers", default=None, type=int, help="Parallel workers (default: docs/5, min 1)")
1452
+ @click.pass_context
1453
+ def enrich_run(
1454
+ ctx,
1455
+ database: str | None,
1456
+ source_type: str | None,
1457
+ project: str | None,
1458
+ query: str | None,
1459
+ path_pattern: str | None,
1460
+ enrich_all: bool,
1461
+ dry_run: bool,
1462
+ limit: int,
1463
+ workers: int | None,
1464
+ ):
1465
+ """Run enrichment on documents to extract TODOs and entities.
1466
+
1467
+ By default, only processes documents that haven't been enriched yet.
1468
+ Use --all to re-enrich all documents (e.g., after changing enrichment config).
1469
+
1470
+ Examples:
1471
+
1472
+ okb enrich run # Enrich un-enriched documents
1473
+
1474
+ okb enrich run --dry-run # Show what would be enriched
1475
+
1476
+ okb enrich run --all # Re-enrich everything
1477
+
1478
+ okb enrich run --source-type markdown # Only markdown files
1479
+
1480
+ okb enrich run --query "meeting notes" # Filter by semantic search
1481
+
1482
+ okb enrich run --path-pattern '%myrepo%' # Filter by source path
1483
+
1484
+ okb enrich run --workers 8 # Use 8 parallel workers
1485
+ """
1486
+ from concurrent.futures import ThreadPoolExecutor, as_completed
1487
+
1488
+ from .llm import get_llm
1489
+ from .llm.enrich import EnrichmentConfig, get_unenriched_documents, process_enrichment
1490
+
1491
+ # Check LLM is configured before doing any work
1492
+ if get_llm() is None:
1493
+ click.echo("Error: No LLM provider configured.", err=True)
1494
+ click.echo("", err=True)
1495
+ click.echo("Enrichment requires an LLM to extract TODOs and entities.", err=True)
1496
+ click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml:", err=True)
1497
+ click.echo("", err=True)
1498
+ click.echo(" llm:", err=True)
1499
+ click.echo(" provider: claude", err=True)
1500
+ click.echo(" model: claude-haiku-4-5-20251001", err=True)
1501
+ click.echo("", err=True)
1502
+ click.echo("Run 'okb llm status' to check configuration.", err=True)
1503
+ ctx.exit(1)
1504
+
1505
+ db_name = database or ctx.obj.get("database")
1506
+ db_cfg = config.get_database(db_name)
1507
+
1508
+ # Get enrichment version for re-enrichment check
1509
+ enrichment_version = config.enrichment_version if enrich_all else None
1510
+
1511
+ click.echo(f"Scanning database '{db_cfg.name}' for documents to enrich...")
1512
+ if dry_run:
1513
+ click.echo("(dry run - no changes will be made)")
1514
+
1515
+ docs = get_unenriched_documents(
1516
+ db_url=db_cfg.url,
1517
+ source_type=source_type,
1518
+ project=project,
1519
+ query=query,
1520
+ path_pattern=path_pattern,
1521
+ enrichment_version=enrichment_version,
1522
+ limit=limit,
1523
+ )
1524
+
1525
+ if not docs:
1526
+ click.echo("No documents need enrichment.")
1527
+ return
1528
+
1529
+ click.echo(f"Found {len(docs)} documents to enrich")
1530
+
1531
+ if dry_run:
1532
+ for doc in docs[:20]:
1533
+ click.echo(f" - {doc['title']} ({doc['source_type']})")
1534
+ if len(docs) > 20:
1535
+ click.echo(f" ... and {len(docs) - 20} more")
1536
+ return
1537
+
1538
+ # Calculate workers if not specified: floor(docs/5), minimum 1
1539
+ if workers is None:
1540
+ workers = max(1, len(docs) // 5)
1541
+
1542
+ # Build config
1543
+ enrich_config = EnrichmentConfig.from_config(
1544
+ {
1545
+ "enabled": config.enrichment_enabled,
1546
+ "version": config.enrichment_version,
1547
+ "extract_todos": config.enrichment_extract_todos,
1548
+ "extract_entities": config.enrichment_extract_entities,
1549
+ "auto_create_todos": config.enrichment_auto_create_todos,
1550
+ "auto_create_entities": config.enrichment_auto_create_entities,
1551
+ "min_confidence_todo": config.enrichment_min_confidence_todo,
1552
+ "min_confidence_entity": config.enrichment_min_confidence_entity,
1553
+ }
1554
+ )
1555
+
1556
+ total_todos = 0
1557
+ total_entities_pending = 0
1558
+ total_entities_created = 0
1559
+ completed = 0
1560
+ errors = 0
1561
+
1562
+ def enrich_one(doc: dict) -> tuple[dict, dict | None, str | None]:
1563
+ """Process a single document. Returns (doc, stats, error)."""
1564
+ proj = doc["metadata"].get("project") if doc["metadata"] else None
1565
+ try:
1566
+ stats = process_enrichment(
1567
+ document_id=str(doc["id"]),
1568
+ source_path=doc["source_path"],
1569
+ title=doc["title"],
1570
+ content=doc["content"],
1571
+ source_type=doc["source_type"],
1572
+ db_url=db_cfg.url,
1573
+ config=enrich_config,
1574
+ project=proj,
1575
+ )
1576
+ return doc, stats, None
1577
+ except Exception as e:
1578
+ return doc, None, str(e)
1579
+
1580
+ click.echo(f"Processing with {workers} parallel workers...")
1581
+
1582
+ with ThreadPoolExecutor(max_workers=workers) as executor:
1583
+ futures = {executor.submit(enrich_one, doc): doc for doc in docs}
1584
+
1585
+ for future in as_completed(futures):
1586
+ doc, stats, error = future.result()
1587
+ completed += 1
1588
+ title = doc["title"][:40] if doc["title"] else "Untitled"
1589
+
1590
+ if error:
1591
+ errors += 1
1592
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> error: {error[:50]}")
1593
+ continue
1594
+
1595
+ total_todos += stats["todos_created"]
1596
+ total_entities_pending += stats["entities_pending"]
1597
+ total_entities_created += stats["entities_created"]
1598
+
1599
+ parts = []
1600
+ if stats["todos_created"]:
1601
+ parts.append(f"{stats['todos_created']} TODOs")
1602
+ if stats["entities_pending"]:
1603
+ parts.append(f"{stats['entities_pending']} pending")
1604
+ if stats["entities_created"]:
1605
+ parts.append(f"{stats['entities_created']} entities")
1606
+ if parts:
1607
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> {', '.join(parts)}")
1608
+ else:
1609
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> nothing extracted")
1610
+
1611
+ click.echo("")
1612
+ click.echo("Summary:")
1613
+ click.echo(f" Documents processed: {len(docs)}")
1614
+ if errors:
1615
+ click.echo(f" Errors: {errors}")
1616
+ click.echo(f" TODOs created: {total_todos}")
1617
+ click.echo(f" Entities pending review: {total_entities_pending}")
1618
+ click.echo(f" Entities auto-created: {total_entities_created}")
1619
+
1620
+
1621
+ @enrich.command("pending")
1622
+ @click.option("--db", "database", default=None, help="Database to check")
1623
+ @click.option("--type", "entity_type", default=None, help="Filter by entity type")
1624
+ @click.option("--limit", default=50, help="Maximum results")
1625
+ @click.pass_context
1626
+ def enrich_pending(ctx, database: str | None, entity_type: str | None, limit: int):
1627
+ """List pending entity suggestions awaiting review.
1628
+
1629
+ Shows entities extracted from documents that need approval before
1630
+ becoming searchable. Use 'okb enrich approve' or 'okb enrich reject'
1631
+ to process them.
1632
+ """
1633
+ from .llm.enrich import list_pending_entities
1634
+
1635
+ db_name = database or ctx.obj.get("database")
1636
+ db_cfg = config.get_database(db_name)
1637
+
1638
+ entities = list_pending_entities(db_cfg.url, entity_type=entity_type, limit=limit)
1639
+
1640
+ if not entities:
1641
+ click.echo("No pending entity suggestions.")
1642
+ return
1643
+
1644
+ click.echo(f"Pending entities ({len(entities)}):\n")
1645
+ for e in entities:
1646
+ confidence = e.get("confidence", 0)
1647
+ confidence_str = f" ({confidence:.0%})" if confidence else ""
1648
+ click.echo(f" [{e['entity_type']}] {e['entity_name']}{confidence_str}")
1649
+ click.echo(f" ID: {e['id']}")
1650
+ if e.get("description"):
1651
+ desc = (
1652
+ e["description"][:60] + "..."
1653
+ if len(e.get("description", "")) > 60
1654
+ else e["description"]
1655
+ )
1656
+ click.echo(f" {desc}")
1657
+ if e.get("aliases"):
1658
+ click.echo(f" Aliases: {', '.join(e['aliases'][:3])}")
1659
+ click.echo(f" Source: {e['source_title']}")
1660
+ click.echo("")
1661
+
1662
+ click.echo("Use 'okb enrich approve <id>' or 'okb enrich reject <id>' to process.")
1663
+
1664
+
1665
+ @enrich.command("approve")
1666
+ @click.argument("pending_id")
1667
+ @click.option("--db", "database", default=None, help="Database")
1668
+ @click.option("--local", is_flag=True, help="Use local CPU embedding instead of Modal")
1669
+ @click.pass_context
1670
+ def enrich_approve(ctx, pending_id: str, database: str | None, local: bool):
1671
+ """Approve a pending entity, creating it as a searchable document."""
1672
+ from .llm.enrich import approve_entity
1673
+
1674
+ db_name = database or ctx.obj.get("database")
1675
+ db_cfg = config.get_database(db_name)
1676
+
1677
+ source_path = approve_entity(db_cfg.url, pending_id, use_modal=not local)
1678
+ if source_path:
1679
+ click.echo(f"Entity approved and created: {source_path}")
1680
+ else:
1681
+ click.echo("Failed to approve entity. ID may be invalid or already processed.", err=True)
1682
+ sys.exit(1)
1683
+
1684
+
1685
+ @enrich.command("reject")
1686
+ @click.argument("pending_id")
1687
+ @click.option("--db", "database", default=None, help="Database")
1688
+ @click.pass_context
1689
+ def enrich_reject(ctx, pending_id: str, database: str | None):
1690
+ """Reject a pending entity suggestion."""
1691
+ from .llm.enrich import reject_entity
1692
+
1693
+ db_name = database or ctx.obj.get("database")
1694
+ db_cfg = config.get_database(db_name)
1695
+
1696
+ if reject_entity(db_cfg.url, pending_id):
1697
+ click.echo("Entity rejected.")
1698
+ else:
1699
+ click.echo("Failed to reject entity. ID may be invalid or already processed.", err=True)
1700
+ sys.exit(1)
1701
+
1702
+
1703
+ @enrich.command("analyze")
1704
+ @click.option("--db", "database", default=None, help="Database to analyze")
1705
+ @click.option("--project", default=None, help="Analyze specific project only")
1706
+ @click.option("--sample-size", default=15, help="Number of documents to sample")
1707
+ @click.option("--no-update", is_flag=True, help="Don't update database metadata")
1708
+ @click.option("--stats-only", is_flag=True, help="Show stats without LLM analysis")
1709
+ @click.pass_context
1710
+ def enrich_analyze(
1711
+ ctx,
1712
+ database: str | None,
1713
+ project: str | None,
1714
+ sample_size: int,
1715
+ no_update: bool,
1716
+ stats_only: bool,
1717
+ ):
1718
+ """Analyze knowledge base and update description/topics.
1719
+
1720
+ Uses entity aggregation and document sampling to understand the overall
1721
+ content and themes in the knowledge base. Generates a description and
1722
+ topic keywords using LLM analysis.
1723
+
1724
+ Examples:
1725
+
1726
+ okb enrich analyze # Analyze entire database
1727
+
1728
+ okb enrich analyze --stats-only # Show stats without LLM call
1729
+
1730
+ okb enrich analyze --project myproject # Analyze specific project
1731
+
1732
+ okb enrich analyze --no-update # Analyze without updating metadata
1733
+ """
1734
+ from .llm.analyze import (
1735
+ analyze_database,
1736
+ get_content_stats,
1737
+ get_entity_summary,
1738
+ )
1739
+
1740
+ db_name = database or ctx.obj.get("database")
1741
+ db_cfg = config.get_database(db_name)
1742
+
1743
+ scope = f"project '{project}'" if project else f"database '{db_cfg.name}'"
1744
+ click.echo(f"Analyzing {scope}...\n")
1745
+
1746
+ # Always get stats
1747
+ stats = get_content_stats(db_cfg.url, project)
1748
+ entities = get_entity_summary(db_cfg.url, project, limit=20)
1749
+
1750
+ # Show stats
1751
+ click.echo("Content Statistics:")
1752
+ click.echo(f" Documents: {stats['total_documents']:,}")
1753
+ click.echo(f" Tokens: ~{stats['total_tokens']:,}")
1754
+ if stats["source_types"]:
1755
+ sorted_types = sorted(stats["source_types"].items(), key=lambda x: -x[1])
1756
+ types_parts = [f"{t}: {c}" for t, c in sorted_types]
1757
+ # Break into multiple lines if many types
1758
+ if len(types_parts) > 4:
1759
+ click.echo(" Source types:")
1760
+ for tp in types_parts:
1761
+ click.echo(f" {tp}")
1762
+ else:
1763
+ click.echo(f" Source types: {', '.join(types_parts)}")
1764
+ if stats["projects"]:
1765
+ click.echo(f" Projects: {', '.join(stats['projects'])}")
1766
+ if stats["date_range"]["earliest"]:
1767
+ earliest = stats["date_range"]["earliest"]
1768
+ latest = stats["date_range"]["latest"]
1769
+ click.echo(f" Date range: {earliest} to {latest}")
1770
+
1771
+ click.echo("")
1772
+
1773
+ # Show top entities
1774
+ if entities:
1775
+ click.echo("Top Entities (by mentions):")
1776
+ for i, e in enumerate(entities[:10], 1):
1777
+ name, etype = e["name"], e["type"]
1778
+ refs, docs = e["ref_count"], e["doc_count"]
1779
+ click.echo(f" {i}. {name} ({etype}) - {refs} mentions in {docs} docs")
1780
+ click.echo("")
1781
+ else:
1782
+ click.echo("No entities extracted yet.")
1783
+ click.echo("Run 'okb enrich run' to extract entities from documents.\n")
1784
+
1785
+ if stats_only:
1786
+ return
1787
+
1788
+ # Check LLM is configured
1789
+ from .llm import get_llm
1790
+
1791
+ if get_llm() is None:
1792
+ click.echo("Error: No LLM provider configured.", err=True)
1793
+ click.echo("", err=True)
1794
+ click.echo("Analysis requires an LLM to generate description and topics.", err=True)
1795
+ click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml:", err=True)
1796
+ click.echo("", err=True)
1797
+ click.echo(" llm:", err=True)
1798
+ click.echo(" provider: claude", err=True)
1799
+ click.echo("", err=True)
1800
+ click.echo("Use --stats-only to see statistics without LLM analysis.", err=True)
1801
+ ctx.exit(1)
1802
+
1803
+ click.echo(f"Sampling {sample_size} documents for analysis...")
1804
+ click.echo("Generating description and topics...")
1805
+ click.echo("")
1806
+
1807
+ try:
1808
+ result = analyze_database(
1809
+ db_url=db_cfg.url,
1810
+ project=project,
1811
+ sample_size=sample_size,
1812
+ auto_update=not no_update,
1813
+ )
1814
+
1815
+ click.echo("Analysis Complete:")
1816
+ click.echo(f" Description: {result.description}")
1817
+ click.echo(f" Topics: {', '.join(result.topics)}")
1818
+
1819
+ if not no_update:
1820
+ click.echo("")
1821
+ click.echo("Updated database metadata.")
1822
+ else:
1823
+ click.echo("")
1824
+ click.echo("(metadata not updated - use without --no-update to save)")
1825
+
1826
+ except Exception as e:
1827
+ click.echo(f"Error during analysis: {e}", err=True)
1828
+ ctx.exit(1)
1829
+
1830
+
1831
+ @enrich.command("consolidate")
1832
+ @click.option("--db", "database", default=None, help="Database to consolidate")
1833
+ @click.option("--duplicates/--no-duplicates", "detect_duplicates", default=True,
1834
+ help="Detect duplicate entities")
1835
+ @click.option("--cross-doc/--no-cross-doc", "detect_cross_doc", default=True,
1836
+ help="Detect cross-document entities")
1837
+ @click.option("--clusters/--no-clusters", "build_clusters", default=True,
1838
+ help="Build topic clusters")
1839
+ @click.option("--relationships/--no-relationships", "extract_relationships", default=True,
1840
+ help="Extract entity relationships")
1841
+ @click.option("--dry-run", is_flag=True, help="Show what would be found without creating proposals")
1842
+ @click.pass_context
1843
+ def enrich_consolidate(
1844
+ ctx,
1845
+ database: str | None,
1846
+ detect_duplicates: bool,
1847
+ detect_cross_doc: bool,
1848
+ build_clusters: bool,
1849
+ extract_relationships: bool,
1850
+ dry_run: bool,
1851
+ ):
1852
+ """Run entity consolidation pipeline.
1853
+
1854
+ Detects duplicate entities, cross-document mentions, builds topic clusters,
1855
+ and extracts entity relationships. Creates pending proposals for review
1856
+ rather than auto-applying changes.
1857
+
1858
+ Examples:
1859
+
1860
+ okb enrich consolidate # Run full consolidation
1861
+
1862
+ okb enrich consolidate --dry-run # Show what would be found
1863
+
1864
+ okb enrich consolidate --no-clusters # Skip clustering
1865
+
1866
+ okb enrich consolidate --duplicates --no-cross-doc --no-clusters --no-relationships
1867
+ """
1868
+ from .llm import get_llm
1869
+ from .llm.consolidate import format_consolidation_result, run_consolidation
1870
+
1871
+ # Check LLM is configured if needed
1872
+ if get_llm() is None:
1873
+ click.echo("Error: No LLM provider configured.", err=True)
1874
+ click.echo("Consolidation requires an LLM for deduplication and clustering.", err=True)
1875
+ click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml", err=True)
1876
+ ctx.exit(1)
1877
+
1878
+ db_name = database or ctx.obj.get("database")
1879
+ db_cfg = config.get_database(db_name)
1880
+
1881
+ click.echo(f"Running consolidation on database '{db_cfg.name}'...")
1882
+ if dry_run:
1883
+ click.echo("(dry run - no proposals will be created)")
1884
+
1885
+ result = run_consolidation(
1886
+ db_url=db_cfg.url,
1887
+ detect_duplicates=detect_duplicates,
1888
+ detect_cross_doc=detect_cross_doc,
1889
+ build_clusters=build_clusters,
1890
+ extract_relationships=extract_relationships,
1891
+ dry_run=dry_run,
1892
+ )
1893
+
1894
+ # Format and display result
1895
+ output = format_consolidation_result(result)
1896
+ click.echo("")
1897
+ click.echo(output)
1898
+
1899
+ if not dry_run and (result.duplicates_found > 0 or result.cross_doc_candidates > 0):
1900
+ click.echo("")
1901
+ click.echo("Use 'okb enrich merge-proposals' to review pending merges.")
1902
+
1903
+
1904
+ @enrich.command("merge-proposals")
1905
+ @click.option("--db", "database", default=None, help="Database to check")
1906
+ @click.option("--limit", default=50, help="Maximum results")
1907
+ @click.pass_context
1908
+ def enrich_merge_proposals(ctx, database: str | None, limit: int):
1909
+ """List pending entity merge proposals.
1910
+
1911
+ Shows duplicate entities and cross-document mentions awaiting review.
1912
+ Use 'okb enrich approve-merge' or 'okb enrich reject-merge' to process.
1913
+ """
1914
+ from .llm.extractors.dedup import list_pending_merges
1915
+
1916
+ db_name = database or ctx.obj.get("database")
1917
+ db_cfg = config.get_database(db_name)
1918
+
1919
+ merges = list_pending_merges(db_cfg.url, limit=limit)
1920
+
1921
+ if not merges:
1922
+ click.echo("No pending merge proposals.")
1923
+ return
1924
+
1925
+ click.echo(f"Pending merge proposals ({len(merges)}):\n")
1926
+ for m in merges:
1927
+ confidence = m.get("confidence", 0)
1928
+ confidence_str = f" ({confidence:.0%})" if confidence else ""
1929
+ click.echo(f" {m['canonical_name']} <- {m['duplicate_name']}{confidence_str}")
1930
+ click.echo(f" ID: {m['id']}")
1931
+ click.echo(f" Reason: {m.get('reason', 'similarity')}")
1932
+ click.echo("")
1933
+
1934
+ click.echo("Use 'okb enrich approve-merge <id>' or 'okb enrich reject-merge <id>' to process.")
1935
+
1936
+
1937
+ @enrich.command("approve-merge")
1938
+ @click.argument("merge_id")
1939
+ @click.option("--db", "database", default=None, help="Database")
1940
+ @click.pass_context
1941
+ def enrich_approve_merge(ctx, merge_id: str, database: str | None):
1942
+ """Approve a pending entity merge.
1943
+
1944
+ Merges the duplicate entity into the canonical entity:
1945
+ - Redirects all entity references from duplicate to canonical
1946
+ - Adds duplicate's name as an alias for canonical
1947
+ - Deletes the duplicate entity document
1948
+ """
1949
+ from .llm.extractors.dedup import approve_merge
1950
+
1951
+ db_name = database or ctx.obj.get("database")
1952
+ db_cfg = config.get_database(db_name)
1953
+
1954
+ if approve_merge(db_cfg.url, merge_id):
1955
+ click.echo("Merge approved and executed.")
1956
+ else:
1957
+ click.echo("Failed to approve merge. ID may be invalid or already processed.", err=True)
1958
+ sys.exit(1)
1959
+
1960
+
1961
+ @enrich.command("reject-merge")
1962
+ @click.argument("merge_id")
1963
+ @click.option("--db", "database", default=None, help="Database")
1964
+ @click.pass_context
1965
+ def enrich_reject_merge(ctx, merge_id: str, database: str | None):
1966
+ """Reject a pending entity merge proposal."""
1967
+ from .llm.extractors.dedup import reject_merge
1968
+
1969
+ db_name = database or ctx.obj.get("database")
1970
+ db_cfg = config.get_database(db_name)
1971
+
1972
+ if reject_merge(db_cfg.url, merge_id):
1973
+ click.echo("Merge rejected.")
1974
+ else:
1975
+ click.echo("Failed to reject merge. ID may be invalid or already processed.", err=True)
1976
+ sys.exit(1)
1977
+
1978
+
1979
+ @enrich.command("clusters")
1980
+ @click.option("--db", "database", default=None, help="Database to check")
1981
+ @click.option("--limit", default=20, help="Maximum clusters to show")
1982
+ @click.pass_context
1983
+ def enrich_clusters(ctx, database: str | None, limit: int):
1984
+ """List topic clusters.
1985
+
1986
+ Shows groups of related entities and documents organized by theme.
1987
+ """
1988
+ from .llm.consolidate import get_topic_clusters
1989
+
1990
+ db_name = database or ctx.obj.get("database")
1991
+ db_cfg = config.get_database(db_name)
1992
+
1993
+ clusters = get_topic_clusters(db_cfg.url, limit=limit)
1994
+
1995
+ if not clusters:
1996
+ click.echo("No topic clusters found.")
1997
+ click.echo("Run 'okb enrich consolidate' to generate clusters.")
1998
+ return
1999
+
2000
+ click.echo(f"Topic clusters ({len(clusters)}):\n")
2001
+ for c in clusters:
2002
+ click.echo(f" {c['name']}")
2003
+ if c.get("description"):
2004
+ desc = c["description"][:70] + "..." if len(c["description"]) > 70 else c["description"]
2005
+ click.echo(f" {desc}")
2006
+ click.echo(f" Members: {c['member_count']} entities/documents")
2007
+ if c.get("sample_members"):
2008
+ samples = ", ".join(c["sample_members"][:5])
2009
+ click.echo(f" Examples: {samples}")
2010
+ click.echo("")
2011
+
2012
+
2013
+ @enrich.command("relationships")
2014
+ @click.option("--db", "database", default=None, help="Database to check")
2015
+ @click.option("--entity", "entity_name", default=None, help="Filter to specific entity")
2016
+ @click.option("--type", "relationship_type", default=None,
2017
+ help="Filter by relationship type (works_for, uses, belongs_to, related_to)")
2018
+ @click.option("--limit", default=50, help="Maximum results")
2019
+ @click.pass_context
2020
+ def enrich_relationships(
2021
+ ctx,
2022
+ database: str | None,
2023
+ entity_name: str | None,
2024
+ relationship_type: str | None,
2025
+ limit: int,
2026
+ ):
2027
+ """List entity relationships.
2028
+
2029
+ Shows connections between entities (person→org, tech→project, etc.).
2030
+
2031
+ Examples:
2032
+
2033
+ okb enrich relationships # All relationships
2034
+
2035
+ okb enrich relationships --entity "Django" # Filter to one entity
2036
+
2037
+ okb enrich relationships --type works_for # Filter by type
2038
+ """
2039
+ from .llm.consolidate import get_entity_relationships
2040
+
2041
+ db_name = database or ctx.obj.get("database")
2042
+ db_cfg = config.get_database(db_name)
2043
+
2044
+ relationships = get_entity_relationships(
2045
+ db_cfg.url,
2046
+ entity_name=entity_name,
2047
+ relationship_type=relationship_type,
2048
+ limit=limit,
2049
+ )
2050
+
2051
+ if not relationships:
2052
+ if entity_name:
2053
+ click.echo(f"No relationships found for entity '{entity_name}'.")
2054
+ else:
2055
+ click.echo("No relationships found.")
2056
+ click.echo("Run 'okb enrich consolidate' to extract relationships.")
2057
+ return
2058
+
2059
+ click.echo(f"Entity relationships ({len(relationships)}):\n")
2060
+ for r in relationships:
2061
+ confidence = r.get("confidence", 0)
2062
+ conf_str = f" ({confidence:.0%})" if confidence else ""
2063
+ click.echo(f" {r['source_name']} --[{r['relationship_type']}]--> {r['target_name']}{conf_str}")
2064
+ if r.get("evidence"):
2065
+ evidence = r["evidence"][:60] + "..." if len(r["evidence"]) > 60 else r["evidence"]
2066
+ click.echo(f" Evidence: {evidence}")
2067
+ click.echo("")
2068
+
2069
+
2070
+ @enrich.command("all")
2071
+ @click.option("--db", "database", default=None, help="Database to enrich")
2072
+ @click.option("--source-type", default=None, help="Filter by source type")
2073
+ @click.option("--project", default=None, help="Filter by project")
2074
+ @click.option("--query", default=None, help="Semantic search query to filter documents")
2075
+ @click.option("--path-pattern", default=None, help="SQL LIKE pattern for source_path")
2076
+ @click.option("--limit", default=100, help="Maximum documents to process")
2077
+ @click.option("--workers", default=None, type=int, help="Parallel workers (default: docs/5, min 1)")
2078
+ @click.option("--dry-run", is_flag=True, help="Show what would be done without executing")
2079
+ @click.option("--skip-consolidate", is_flag=True, help="Skip consolidation phase")
2080
+ @click.option("--duplicates/--no-duplicates", "detect_duplicates", default=True,
2081
+ help="Detect duplicate entities during consolidation")
2082
+ @click.option("--clusters/--no-clusters", "build_clusters", default=True,
2083
+ help="Build topic clusters during consolidation")
2084
+ @click.option("--relationships/--no-relationships", "extract_relationships", default=True,
2085
+ help="Extract entity relationships during consolidation")
2086
+ @click.pass_context
2087
+ def enrich_all(
2088
+ ctx,
2089
+ database: str | None,
2090
+ source_type: str | None,
2091
+ project: str | None,
2092
+ query: str | None,
2093
+ path_pattern: str | None,
2094
+ limit: int,
2095
+ workers: int | None,
2096
+ dry_run: bool,
2097
+ skip_consolidate: bool,
2098
+ detect_duplicates: bool,
2099
+ build_clusters: bool,
2100
+ extract_relationships: bool,
2101
+ ):
2102
+ """Run full enrichment pipeline: extraction + consolidation.
2103
+
2104
+ Combines 'enrich run' and 'enrich consolidate' in one command for
2105
+ one-shot enrichment of documents.
2106
+
2107
+ Examples:
2108
+
2109
+ okb enrich all # Run full pipeline
2110
+
2111
+ okb enrich all --dry-run # Preview what would happen
2112
+
2113
+ okb enrich all --skip-consolidate # Run extraction only
2114
+
2115
+ okb enrich all --source-type markdown # Filter to markdown files
2116
+
2117
+ okb enrich all --no-clusters # Skip cluster building
2118
+ """
2119
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2120
+
2121
+ from .llm import get_llm
2122
+ from .llm.consolidate import format_consolidation_result, run_consolidation
2123
+ from .llm.enrich import EnrichmentConfig, get_unenriched_documents, process_enrichment
2124
+
2125
+ # Check LLM is configured
2126
+ if get_llm() is None:
2127
+ click.echo("Error: No LLM provider configured.", err=True)
2128
+ click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml", err=True)
2129
+ ctx.exit(1)
2130
+
2131
+ db_name = database or ctx.obj.get("database")
2132
+ db_cfg = config.get_database(db_name)
2133
+
2134
+ # Phase 1: Enrichment
2135
+ click.echo("=== Phase 1: Enrichment ===")
2136
+ click.echo(f"Scanning database '{db_cfg.name}' for documents to enrich...")
2137
+ if dry_run:
2138
+ click.echo("(dry run - no changes will be made)")
2139
+
2140
+ docs = get_unenriched_documents(
2141
+ db_url=db_cfg.url,
2142
+ source_type=source_type,
2143
+ project=project,
2144
+ query=query,
2145
+ path_pattern=path_pattern,
2146
+ limit=limit,
2147
+ )
2148
+
2149
+ total_todos = 0
2150
+ total_entities_pending = 0
2151
+ total_entities_created = 0
2152
+
2153
+ if not docs:
2154
+ click.echo("No documents need enrichment.")
2155
+ else:
2156
+ click.echo(f"Found {len(docs)} documents to enrich")
2157
+
2158
+ if dry_run:
2159
+ for doc in docs[:20]:
2160
+ click.echo(f" - {doc['title']} ({doc['source_type']})")
2161
+ if len(docs) > 20:
2162
+ click.echo(f" ... and {len(docs) - 20} more")
2163
+ else:
2164
+ # Build config
2165
+ enrich_config = EnrichmentConfig.from_config(
2166
+ {
2167
+ "enabled": config.enrichment_enabled,
2168
+ "version": config.enrichment_version,
2169
+ "extract_todos": config.enrichment_extract_todos,
2170
+ "extract_entities": config.enrichment_extract_entities,
2171
+ "auto_create_todos": config.enrichment_auto_create_todos,
2172
+ "auto_create_entities": config.enrichment_auto_create_entities,
2173
+ "min_confidence_todo": config.enrichment_min_confidence_todo,
2174
+ "min_confidence_entity": config.enrichment_min_confidence_entity,
2175
+ }
2176
+ )
2177
+
2178
+ # Calculate workers
2179
+ if workers is None:
2180
+ workers = max(1, len(docs) // 5)
2181
+
2182
+ completed = 0
2183
+ errors = 0
2184
+
2185
+ def enrich_one(doc: dict) -> tuple[dict, dict | None, str | None]:
2186
+ proj = doc["metadata"].get("project") if doc["metadata"] else None
2187
+ try:
2188
+ stats = process_enrichment(
2189
+ document_id=str(doc["id"]),
2190
+ source_path=doc["source_path"],
2191
+ title=doc["title"],
2192
+ content=doc["content"],
2193
+ source_type=doc["source_type"],
2194
+ db_url=db_cfg.url,
2195
+ config=enrich_config,
2196
+ project=proj,
2197
+ )
2198
+ return doc, stats, None
2199
+ except Exception as e:
2200
+ return doc, None, str(e)
2201
+
2202
+ click.echo(f"Processing with {workers} parallel workers...")
2203
+
2204
+ with ThreadPoolExecutor(max_workers=workers) as executor:
2205
+ futures = {executor.submit(enrich_one, doc): doc for doc in docs}
2206
+
2207
+ for future in as_completed(futures):
2208
+ doc, stats, error = future.result()
2209
+ completed += 1
2210
+ title = doc["title"][:40] if doc["title"] else "Untitled"
2211
+
2212
+ if error:
2213
+ errors += 1
2214
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> error: {error[:50]}")
2215
+ continue
2216
+
2217
+ total_todos += stats["todos_created"]
2218
+ total_entities_pending += stats["entities_pending"]
2219
+ total_entities_created += stats["entities_created"]
2220
+
2221
+ parts = []
2222
+ if stats["todos_created"]:
2223
+ parts.append(f"{stats['todos_created']} TODOs")
2224
+ if stats["entities_pending"]:
2225
+ parts.append(f"{stats['entities_pending']} pending")
2226
+ if stats["entities_created"]:
2227
+ parts.append(f"{stats['entities_created']} entities")
2228
+ if parts:
2229
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> {', '.join(parts)}")
2230
+ else:
2231
+ click.echo(f"[{completed}/{len(docs)}] {title}... -> nothing extracted")
2232
+
2233
+ click.echo("")
2234
+ click.echo("Enrichment summary:")
2235
+ click.echo(f" Documents processed: {len(docs)}")
2236
+ if errors:
2237
+ click.echo(f" Errors: {errors}")
2238
+ click.echo(f" TODOs created: {total_todos}")
2239
+ click.echo(f" Entities pending review: {total_entities_pending}")
2240
+ click.echo(f" Entities auto-created: {total_entities_created}")
2241
+
2242
+ # Phase 2: Consolidation
2243
+ if skip_consolidate:
2244
+ click.echo("")
2245
+ click.echo("Skipping consolidation (--skip-consolidate)")
2246
+ return
2247
+
2248
+ click.echo("")
2249
+ click.echo("=== Phase 2: Consolidation ===")
2250
+
2251
+ result = run_consolidation(
2252
+ db_url=db_cfg.url,
2253
+ detect_duplicates=detect_duplicates,
2254
+ detect_cross_doc=True,
2255
+ build_clusters=build_clusters,
2256
+ extract_relationships=extract_relationships,
2257
+ dry_run=dry_run,
2258
+ )
2259
+
2260
+ output = format_consolidation_result(result)
2261
+ click.echo(output)
2262
+
2263
+ if not dry_run and (result.duplicates_found > 0 or result.cross_doc_candidates > 0):
2264
+ click.echo("")
2265
+ click.echo("Use 'okb enrich review' to review pending entities and merges.")
2266
+
2267
+
2268
+ @enrich.command("review")
2269
+ @click.option("--db", "database", default=None, help="Database to review")
2270
+ @click.option("--entities-only", is_flag=True, help="Only review pending entities")
2271
+ @click.option("--merges-only", is_flag=True, help="Only review pending merges")
2272
+ @click.option("--local", is_flag=True, help="Use local CPU embedding instead of Modal")
2273
+ @click.option("--wait/--no-wait", default=True, help="Wait for embeddings to complete")
2274
+ @click.pass_context
2275
+ def enrich_review(
2276
+ ctx, database: str | None, entities_only: bool, merges_only: bool, local: bool, wait: bool
2277
+ ):
2278
+ """Interactive review of pending entities and merge proposals.
2279
+
2280
+ Loops through pending items with approve/reject prompts.
2281
+ Press Q to quit early - remaining items stay pending for later.
2282
+
2283
+ Entity approvals run asynchronously - you can continue reviewing while
2284
+ embeddings are generated. Use --no-wait to exit immediately after reviewing.
2285
+
2286
+ Examples:
2287
+
2288
+ okb enrich review # Review all pending items
2289
+
2290
+ okb enrich review --entities-only # Only review entities
2291
+
2292
+ okb enrich review --merges-only # Only review merges
2293
+
2294
+ okb enrich review --local # Use local CPU embedding
2295
+
2296
+ okb enrich review --no-wait # Don't wait for embeddings
2297
+ """
2298
+
2299
+ from .llm.enrich import (
2300
+ approve_entity_async,
2301
+ list_pending_entities,
2302
+ reject_entity,
2303
+ shutdown_executor,
2304
+ )
2305
+ from .llm.extractors.dedup import approve_merge, list_pending_merges, reject_merge
2306
+
2307
+ db_name = database or ctx.obj.get("database")
2308
+ db_cfg = config.get_database(db_name)
2309
+ use_modal = not local
2310
+
2311
+ # Get pending items
2312
+ entities = [] if merges_only else list_pending_entities(db_cfg.url, limit=100)
2313
+ merges = [] if entities_only else list_pending_merges(db_cfg.url, limit=100)
2314
+
2315
+ if not entities and not merges:
2316
+ click.echo("No pending items to review.")
2317
+ return
2318
+
2319
+ click.echo(f"Pending: {len(entities)} entities, {len(merges)} merges")
2320
+ click.echo("")
2321
+
2322
+ # Counters
2323
+ approved = 0
2324
+ rejected = 0
2325
+ skipped = 0
2326
+
2327
+ # Track async approval futures
2328
+ pending_futures: list[tuple] = [] # (future, entity_name)
2329
+
2330
+ # Review entities
2331
+ choice = None
2332
+ if entities and not merges_only:
2333
+ for i, e in enumerate(entities, 1):
2334
+ # Check for completed futures
2335
+ done_count = sum(1 for f, _ in pending_futures if f.done())
2336
+ if pending_futures and done_count > 0:
2337
+ total = len(pending_futures)
2338
+ click.echo(click.style(f" ({done_count}/{total} embeddings done)", dim=True))
2339
+
2340
+ click.echo(click.style(f"=== Entity Review [{i}/{len(entities)}] ===", bold=True))
2341
+ click.echo(f"Name: {click.style(e['entity_name'], fg='cyan')}")
2342
+ click.echo(f"Type: {e['entity_type']}")
2343
+ confidence = e.get("confidence", 0)
2344
+ if confidence:
2345
+ click.echo(f"Confidence: {confidence:.0%}")
2346
+ if e.get("description"):
2347
+ d = e["description"]
2348
+ desc = d[:80] + "..." if len(d) > 80 else d
2349
+ click.echo(f"Description: {desc}")
2350
+ if e.get("aliases"):
2351
+ click.echo(f"Aliases: {', '.join(e['aliases'][:5])}")
2352
+ click.echo(f"Source: {e['source_title']}")
2353
+ click.echo("")
2354
+
2355
+ choice = click.prompt(
2356
+ "[A]pprove [R]eject [S]kip [Q]uit",
2357
+ type=click.Choice(["A", "R", "S", "Q", "a", "r", "s", "q"]),
2358
+ show_choices=False,
2359
+ ).upper()
2360
+
2361
+ if choice == "Q":
2362
+ click.echo("Quitting review...")
2363
+ break
2364
+ elif choice == "A":
2365
+ # Submit async approval
2366
+ future = approve_entity_async(db_cfg.url, str(e["id"]), use_modal)
2367
+ pending_futures.append((future, e["entity_name"]))
2368
+ click.echo(click.style("⏳ Queued for approval", fg="cyan"))
2369
+ approved += 1
2370
+ elif choice == "R":
2371
+ if reject_entity(db_cfg.url, str(e["id"])):
2372
+ click.echo(click.style("✗ Rejected", fg="yellow"))
2373
+ rejected += 1
2374
+ else:
2375
+ click.echo(click.style("✗ Failed to reject", fg="red"))
2376
+ else:
2377
+ click.echo("Skipped")
2378
+ skipped += 1
2379
+
2380
+ click.echo("")
2381
+ else:
2382
+ # Completed all entities, continue to merges
2383
+ pass
2384
+
2385
+ # Review merges (only if we didn't quit early)
2386
+ if merges and not entities_only and (not entities or choice != "Q"):
2387
+ for i, m in enumerate(merges, 1):
2388
+ click.echo(click.style(f"=== Merge Review [{i}/{len(merges)}] ===", bold=True))
2389
+ cname = click.style(m["canonical_name"], fg="cyan")
2390
+ ctype = m.get("canonical_type", "unknown")
2391
+ click.echo(f"Canonical: {cname} ({ctype})")
2392
+ dname = click.style(m["duplicate_name"], fg="yellow")
2393
+ dtype = m.get("duplicate_type", "unknown")
2394
+ click.echo(f"Duplicate: {dname} ({dtype})")
2395
+ confidence = m.get("confidence", 0)
2396
+ if confidence:
2397
+ click.echo(f"Confidence: {confidence:.0%}")
2398
+ click.echo(f"Reason: {m.get('reason', 'similarity')}")
2399
+ click.echo("")
2400
+
2401
+ choice = click.prompt(
2402
+ "[A]pprove [R]eject [S]kip [Q]uit",
2403
+ type=click.Choice(["A", "R", "S", "Q", "a", "r", "s", "q"]),
2404
+ show_choices=False,
2405
+ ).upper()
2406
+
2407
+ if choice == "Q":
2408
+ click.echo("Quitting review...")
2409
+ break
2410
+ elif choice == "A":
2411
+ if approve_merge(db_cfg.url, str(m["id"])):
2412
+ click.echo(click.style("✓ Merged", fg="green"))
2413
+ approved += 1
2414
+ else:
2415
+ click.echo(click.style("✗ Failed to merge", fg="red"))
2416
+ elif choice == "R":
2417
+ if reject_merge(db_cfg.url, str(m["id"])):
2418
+ click.echo(click.style("✗ Rejected", fg="yellow"))
2419
+ rejected += 1
2420
+ else:
2421
+ click.echo(click.style("✗ Failed to reject", fg="red"))
2422
+ else:
2423
+ click.echo("Skipped")
2424
+ skipped += 1
2425
+
2426
+ click.echo("")
2427
+
2428
+ # Wait for pending approvals if requested
2429
+ if pending_futures:
2430
+ if wait:
2431
+ click.echo(f"Waiting for {len(pending_futures)} pending approvals...")
2432
+ succeeded = 0
2433
+ failed = 0
2434
+ for future, name in pending_futures:
2435
+ try:
2436
+ result = future.result(timeout=120)
2437
+ if result:
2438
+ click.echo(click.style(f" ✓ {name}", fg="green"))
2439
+ succeeded += 1
2440
+ else:
2441
+ click.echo(click.style(f" ✗ {name} failed", fg="red"))
2442
+ failed += 1
2443
+ except Exception as e:
2444
+ click.echo(click.style(f" ✗ {name}: {e}", fg="red"))
2445
+ failed += 1
2446
+ click.echo(f"Embeddings: {succeeded} succeeded, {failed} failed")
2447
+ else:
2448
+ done_count = sum(1 for f, _ in pending_futures if f.done())
2449
+ pending_count = len(pending_futures) - done_count
2450
+ if pending_count > 0:
2451
+ click.echo(f"{pending_count} embeddings still processing in background...")
2452
+
2453
+ # Cleanup executor
2454
+ shutdown_executor(wait=wait)
2455
+
2456
+ # Summary
2457
+ click.echo("")
2458
+ click.echo(click.style("Review complete:", bold=True))
2459
+ click.echo(f" {click.style(str(approved), fg='green')} approved")
2460
+ click.echo(f" {click.style(str(rejected), fg='yellow')} rejected")
2461
+ click.echo(f" {skipped} skipped")
2462
+
2463
+
1397
2464
  if __name__ == "__main__":
1398
2465
  main()