okb 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- okb/cli.py +1209 -16
- okb/config.py +122 -4
- okb/http_server.py +208 -2
- okb/llm/analyze.py +524 -0
- okb/llm/consolidate.py +685 -0
- okb/llm/enrich.py +723 -0
- okb/llm/extractors/__init__.py +13 -0
- okb/llm/extractors/base.py +44 -0
- okb/llm/extractors/cross_doc.py +478 -0
- okb/llm/extractors/dedup.py +499 -0
- okb/llm/extractors/entity.py +369 -0
- okb/llm/extractors/todo.py +149 -0
- okb/llm/providers.py +9 -6
- okb/mcp_server.py +1279 -12
- okb/migrations/0008.enrichment.sql +46 -0
- okb/migrations/0009.entity-consolidation.sql +120 -0
- okb/migrations/0010.token-id.sql +7 -0
- okb/modal_llm.py +26 -8
- okb/plugins/sources/__init__.py +2 -1
- okb/plugins/sources/dropbox_paper.py +44 -9
- okb/plugins/sources/github.py +5 -5
- okb/plugins/sources/todoist.py +254 -0
- okb/tokens.py +25 -3
- {okb-1.0.0.dist-info → okb-1.1.0.dist-info}/METADATA +119 -68
- okb-1.1.0.dist-info/RECORD +49 -0
- {okb-1.0.0.dist-info → okb-1.1.0.dist-info}/entry_points.txt +1 -0
- okb-1.0.0.dist-info/RECORD +0 -36
- {okb-1.0.0.dist-info → okb-1.1.0.dist-info}/WHEEL +0 -0
okb/cli.py
CHANGED
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import importlib.resources
|
|
6
6
|
import json
|
|
7
|
+
import os
|
|
7
8
|
import shutil
|
|
8
9
|
import subprocess
|
|
9
10
|
import sys
|
|
@@ -44,7 +45,7 @@ def _check_docker() -> bool:
|
|
|
44
45
|
|
|
45
46
|
|
|
46
47
|
def _get_container_status() -> str | None:
|
|
47
|
-
"""Get the status of the
|
|
48
|
+
"""Get the status of the okb container. Returns None if not found."""
|
|
48
49
|
try:
|
|
49
50
|
result = subprocess.run(
|
|
50
51
|
[
|
|
@@ -841,7 +842,7 @@ def sync_run(
|
|
|
841
842
|
):
|
|
842
843
|
"""Sync from API sources.
|
|
843
844
|
|
|
844
|
-
Example:
|
|
845
|
+
Example: okb sync run github --repo owner/repo
|
|
845
846
|
"""
|
|
846
847
|
import psycopg
|
|
847
848
|
from psycopg.rows import dict_row
|
|
@@ -968,6 +969,132 @@ def sync_list():
|
|
|
968
969
|
click.echo(f" {name}")
|
|
969
970
|
|
|
970
971
|
|
|
972
|
+
@sync.command("list-projects")
|
|
973
|
+
@click.argument("source")
|
|
974
|
+
def sync_list_projects(source: str):
|
|
975
|
+
"""List projects from an API source (for finding project IDs).
|
|
976
|
+
|
|
977
|
+
Example: okb sync list-projects todoist
|
|
978
|
+
"""
|
|
979
|
+
from .plugins.registry import PluginRegistry
|
|
980
|
+
|
|
981
|
+
# Get the plugin
|
|
982
|
+
source_obj = PluginRegistry.get_source(source)
|
|
983
|
+
if source_obj is None:
|
|
984
|
+
click.echo(f"Error: Source '{source}' not found.", err=True)
|
|
985
|
+
click.echo(f"Installed sources: {', '.join(PluginRegistry.list_sources())}")
|
|
986
|
+
sys.exit(1)
|
|
987
|
+
|
|
988
|
+
# Check if source supports list_projects
|
|
989
|
+
if not hasattr(source_obj, "list_projects"):
|
|
990
|
+
click.echo(f"Error: Source '{source}' does not support listing projects.", err=True)
|
|
991
|
+
sys.exit(1)
|
|
992
|
+
|
|
993
|
+
# Get and resolve config
|
|
994
|
+
source_cfg = config.get_source_config(source)
|
|
995
|
+
if source_cfg is None:
|
|
996
|
+
click.echo(f"Error: Source '{source}' not configured.", err=True)
|
|
997
|
+
click.echo("Add it to your config file under plugins.sources")
|
|
998
|
+
sys.exit(1)
|
|
999
|
+
|
|
1000
|
+
try:
|
|
1001
|
+
source_obj.configure(source_cfg)
|
|
1002
|
+
except Exception as e:
|
|
1003
|
+
click.echo(f"Error configuring '{source}': {e}", err=True)
|
|
1004
|
+
sys.exit(1)
|
|
1005
|
+
|
|
1006
|
+
try:
|
|
1007
|
+
projects = source_obj.list_projects()
|
|
1008
|
+
if projects:
|
|
1009
|
+
click.echo(f"Projects in {source}:")
|
|
1010
|
+
for project_id, name in projects:
|
|
1011
|
+
click.echo(f" {project_id}: {name}")
|
|
1012
|
+
else:
|
|
1013
|
+
click.echo("No projects found.")
|
|
1014
|
+
except Exception as e:
|
|
1015
|
+
click.echo(f"Error listing projects: {e}", err=True)
|
|
1016
|
+
sys.exit(1)
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
@sync.command("auth")
|
|
1020
|
+
@click.argument("source")
|
|
1021
|
+
def sync_auth(source: str):
|
|
1022
|
+
"""Authenticate with an API source (get tokens).
|
|
1023
|
+
|
|
1024
|
+
Currently supports: dropbox-paper
|
|
1025
|
+
|
|
1026
|
+
Example: okb sync auth dropbox-paper
|
|
1027
|
+
"""
|
|
1028
|
+
if source == "dropbox-paper":
|
|
1029
|
+
_auth_dropbox()
|
|
1030
|
+
else:
|
|
1031
|
+
click.echo(f"Error: Authentication helper not available for '{source}'", err=True)
|
|
1032
|
+
click.echo("Supported: dropbox-paper")
|
|
1033
|
+
sys.exit(1)
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
def _auth_dropbox():
|
|
1037
|
+
"""Interactive OAuth flow for Dropbox."""
|
|
1038
|
+
try:
|
|
1039
|
+
import dropbox
|
|
1040
|
+
from dropbox import DropboxOAuth2FlowNoRedirect
|
|
1041
|
+
except ImportError:
|
|
1042
|
+
click.echo("Error: dropbox package not installed", err=True)
|
|
1043
|
+
click.echo("Install with: pip install dropbox", err=True)
|
|
1044
|
+
sys.exit(1)
|
|
1045
|
+
|
|
1046
|
+
click.echo("Dropbox OAuth Setup")
|
|
1047
|
+
click.echo("=" * 50)
|
|
1048
|
+
click.echo("")
|
|
1049
|
+
click.echo("You'll need your Dropbox app credentials.")
|
|
1050
|
+
click.echo("Get them at: https://www.dropbox.com/developers/apps")
|
|
1051
|
+
click.echo("")
|
|
1052
|
+
|
|
1053
|
+
app_key = click.prompt("App key")
|
|
1054
|
+
app_secret = click.prompt("App secret")
|
|
1055
|
+
|
|
1056
|
+
# Start OAuth flow
|
|
1057
|
+
auth_flow = DropboxOAuth2FlowNoRedirect(
|
|
1058
|
+
app_key,
|
|
1059
|
+
app_secret,
|
|
1060
|
+
token_access_type="offline", # This gives us a refresh token
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
authorize_url = auth_flow.start()
|
|
1064
|
+
click.echo("")
|
|
1065
|
+
click.echo("1. Go to this URL in your browser:")
|
|
1066
|
+
click.echo(f" {authorize_url}")
|
|
1067
|
+
click.echo("")
|
|
1068
|
+
click.echo("2. Click 'Allow' to authorize the app")
|
|
1069
|
+
click.echo("3. Copy the authorization code")
|
|
1070
|
+
click.echo("")
|
|
1071
|
+
|
|
1072
|
+
auth_code = click.prompt("Enter the authorization code")
|
|
1073
|
+
|
|
1074
|
+
try:
|
|
1075
|
+
oauth_result = auth_flow.finish(auth_code.strip())
|
|
1076
|
+
except Exception as e:
|
|
1077
|
+
click.echo(f"Error: Failed to get tokens - {e}", err=True)
|
|
1078
|
+
sys.exit(1)
|
|
1079
|
+
|
|
1080
|
+
click.echo("")
|
|
1081
|
+
click.echo("Success! Add these to your environment or config:")
|
|
1082
|
+
click.echo("")
|
|
1083
|
+
click.echo(f"DROPBOX_APP_KEY={app_key}")
|
|
1084
|
+
click.echo(f"DROPBOX_APP_SECRET={app_secret}")
|
|
1085
|
+
click.echo(f"DROPBOX_REFRESH_TOKEN={oauth_result.refresh_token}")
|
|
1086
|
+
click.echo("")
|
|
1087
|
+
click.echo("Config example (~/.config/okb/config.yaml):")
|
|
1088
|
+
click.echo("")
|
|
1089
|
+
click.echo("plugins:")
|
|
1090
|
+
click.echo(" sources:")
|
|
1091
|
+
click.echo(" dropbox-paper:")
|
|
1092
|
+
click.echo(" enabled: true")
|
|
1093
|
+
click.echo(" app_key: ${DROPBOX_APP_KEY}")
|
|
1094
|
+
click.echo(" app_secret: ${DROPBOX_APP_SECRET}")
|
|
1095
|
+
click.echo(" refresh_token: ${DROPBOX_REFRESH_TOKEN}")
|
|
1096
|
+
|
|
1097
|
+
|
|
971
1098
|
@sync.command("status")
|
|
972
1099
|
@click.argument("source", required=False)
|
|
973
1100
|
@click.option("--db", "database", default=None, help="Database to check")
|
|
@@ -1083,7 +1210,7 @@ def token_list(ctx, database: str | None):
|
|
|
1083
1210
|
for t in tokens:
|
|
1084
1211
|
desc = f" - {t.description}" if t.description else ""
|
|
1085
1212
|
last_used = t.last_used_at.strftime("%Y-%m-%d %H:%M") if t.last_used_at else "never"
|
|
1086
|
-
click.echo(f" [{t.permissions}] {t.token_hash[:12]}...{desc}")
|
|
1213
|
+
click.echo(f" ID {t.id} [{t.permissions}] {t.token_hash[:12]}...{desc}")
|
|
1087
1214
|
created = t.created_at.strftime("%Y-%m-%d %H:%M")
|
|
1088
1215
|
click.echo(f" Created: {created}, Last used: {last_used}")
|
|
1089
1216
|
except Exception as e:
|
|
@@ -1092,26 +1219,43 @@ def token_list(ctx, database: str | None):
|
|
|
1092
1219
|
|
|
1093
1220
|
|
|
1094
1221
|
@token.command("revoke")
|
|
1095
|
-
@click.argument("token_value")
|
|
1222
|
+
@click.argument("token_value", required=False)
|
|
1223
|
+
@click.option("--id", "token_id", type=int, default=None, help="Token ID to revoke (from 'okb token list')")
|
|
1096
1224
|
@click.option("--db", "database", default=None, help="Database to revoke token from")
|
|
1097
1225
|
@click.pass_context
|
|
1098
|
-
def token_revoke(ctx, token_value: str, database: str | None):
|
|
1226
|
+
def token_revoke(ctx, token_value: str | None, token_id: int | None, database: str | None):
|
|
1099
1227
|
"""Revoke (delete) an API token.
|
|
1100
1228
|
|
|
1101
|
-
TOKEN_VALUE
|
|
1229
|
+
Either provide the full TOKEN_VALUE or use --id with the token ID from 'okb token list'.
|
|
1102
1230
|
"""
|
|
1103
|
-
from .tokens import delete_token
|
|
1231
|
+
from .tokens import delete_token, delete_token_by_id
|
|
1232
|
+
|
|
1233
|
+
if not token_value and not token_id:
|
|
1234
|
+
click.echo("Error: Provide either TOKEN_VALUE or --id", err=True)
|
|
1235
|
+
sys.exit(1)
|
|
1236
|
+
|
|
1237
|
+
if token_value and token_id:
|
|
1238
|
+
click.echo("Error: Provide either TOKEN_VALUE or --id, not both", err=True)
|
|
1239
|
+
sys.exit(1)
|
|
1104
1240
|
|
|
1105
1241
|
db_name = database or ctx.obj.get("database")
|
|
1106
1242
|
db_cfg = config.get_database(db_name)
|
|
1107
1243
|
|
|
1108
1244
|
try:
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1245
|
+
if token_id:
|
|
1246
|
+
deleted = delete_token_by_id(db_cfg.url, token_id)
|
|
1247
|
+
if deleted:
|
|
1248
|
+
click.echo(f"Token ID {token_id} revoked.")
|
|
1249
|
+
else:
|
|
1250
|
+
click.echo(f"Token ID {token_id} not found.", err=True)
|
|
1251
|
+
sys.exit(1)
|
|
1112
1252
|
else:
|
|
1113
|
-
|
|
1114
|
-
|
|
1253
|
+
deleted = delete_token(db_cfg.url, token_value)
|
|
1254
|
+
if deleted:
|
|
1255
|
+
click.echo("Token revoked.")
|
|
1256
|
+
else:
|
|
1257
|
+
click.echo("Token not found. Use --id or provide the full token string.", err=True)
|
|
1258
|
+
sys.exit(1)
|
|
1115
1259
|
except Exception as e:
|
|
1116
1260
|
click.echo(f"Error revoking token: {e}", err=True)
|
|
1117
1261
|
sys.exit(1)
|
|
@@ -1150,7 +1294,7 @@ def llm_status(ctx, database: str | None):
|
|
|
1150
1294
|
click.echo(f"Cache responses: {config.llm_cache_responses}")
|
|
1151
1295
|
|
|
1152
1296
|
if config.llm_provider == "modal":
|
|
1153
|
-
click.echo("Backend: Modal GPU (deploy with:
|
|
1297
|
+
click.echo("Backend: Modal GPU (deploy with: okb llm deploy)")
|
|
1154
1298
|
elif config.llm_use_bedrock:
|
|
1155
1299
|
click.echo(f"Backend: AWS Bedrock (region: {config.llm_aws_region})")
|
|
1156
1300
|
else:
|
|
@@ -1240,7 +1384,9 @@ def llm_clear_cache(ctx, database: str | None, days: int | None, yes: bool):
|
|
|
1240
1384
|
def llm_deploy():
|
|
1241
1385
|
"""Deploy the Modal LLM app for open model inference.
|
|
1242
1386
|
|
|
1243
|
-
|
|
1387
|
+
Deploys a GPU-accelerated LLM service on Modal using the model from your config.
|
|
1388
|
+
Default: microsoft/Phi-3-mini-4k-instruct (no HuggingFace approval needed).
|
|
1389
|
+
|
|
1244
1390
|
Required for using provider: modal in your config.
|
|
1245
1391
|
|
|
1246
1392
|
Requires Modal CLI to be installed and authenticated:
|
|
@@ -1259,14 +1405,1061 @@ def llm_deploy():
|
|
|
1259
1405
|
click.echo(f"Error: modal_llm.py not found at {llm_path}", err=True)
|
|
1260
1406
|
sys.exit(1)
|
|
1261
1407
|
|
|
1262
|
-
|
|
1263
|
-
|
|
1408
|
+
# Get model and GPU from config
|
|
1409
|
+
model = config.llm_model or "microsoft/Phi-3-mini-4k-instruct"
|
|
1410
|
+
gpu = config.llm_modal_gpu or "L4"
|
|
1411
|
+
click.echo("Deploying Modal LLM:")
|
|
1412
|
+
click.echo(f" Model: {model}")
|
|
1413
|
+
click.echo(f" GPU: {gpu}")
|
|
1414
|
+
click.echo("Note: First deploy downloads the model and may take a few minutes.")
|
|
1415
|
+
|
|
1416
|
+
# Set model and GPU in environment for Modal to pick up
|
|
1417
|
+
env = os.environ.copy()
|
|
1418
|
+
env["OKB_LLM_MODEL"] = model
|
|
1419
|
+
env["OKB_MODAL_GPU"] = gpu
|
|
1420
|
+
|
|
1264
1421
|
result = subprocess.run(
|
|
1265
1422
|
["modal", "deploy", str(llm_path)],
|
|
1266
1423
|
cwd=llm_path.parent,
|
|
1424
|
+
env=env,
|
|
1267
1425
|
)
|
|
1268
1426
|
sys.exit(result.returncode)
|
|
1269
1427
|
|
|
1270
1428
|
|
|
1429
|
+
# =============================================================================
|
|
1430
|
+
# Enrich commands
|
|
1431
|
+
# =============================================================================
|
|
1432
|
+
|
|
1433
|
+
|
|
1434
|
+
@main.group()
|
|
1435
|
+
def enrich():
|
|
1436
|
+
"""LLM-based document enrichment (extract TODOs and entities)."""
|
|
1437
|
+
pass
|
|
1438
|
+
|
|
1439
|
+
|
|
1440
|
+
@enrich.command("run")
|
|
1441
|
+
@click.option("--db", "database", default=None, help="Database to enrich")
|
|
1442
|
+
@click.option("--source-type", default=None, help="Filter by source type")
|
|
1443
|
+
@click.option("--project", default=None, help="Filter by project")
|
|
1444
|
+
@click.option("--query", default=None, help="Semantic search query to filter documents")
|
|
1445
|
+
@click.option("--path-pattern", default=None, help="SQL LIKE pattern for source_path")
|
|
1446
|
+
@click.option(
|
|
1447
|
+
"--all", "enrich_all", is_flag=True, help="Re-enrich all documents (ignore enriched_at)"
|
|
1448
|
+
)
|
|
1449
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be enriched without executing")
|
|
1450
|
+
@click.option("--limit", default=100, help="Maximum documents to process")
|
|
1451
|
+
@click.option("--workers", default=None, type=int, help="Parallel workers (default: docs/5, min 1)")
|
|
1452
|
+
@click.pass_context
|
|
1453
|
+
def enrich_run(
|
|
1454
|
+
ctx,
|
|
1455
|
+
database: str | None,
|
|
1456
|
+
source_type: str | None,
|
|
1457
|
+
project: str | None,
|
|
1458
|
+
query: str | None,
|
|
1459
|
+
path_pattern: str | None,
|
|
1460
|
+
enrich_all: bool,
|
|
1461
|
+
dry_run: bool,
|
|
1462
|
+
limit: int,
|
|
1463
|
+
workers: int | None,
|
|
1464
|
+
):
|
|
1465
|
+
"""Run enrichment on documents to extract TODOs and entities.
|
|
1466
|
+
|
|
1467
|
+
By default, only processes documents that haven't been enriched yet.
|
|
1468
|
+
Use --all to re-enrich all documents (e.g., after changing enrichment config).
|
|
1469
|
+
|
|
1470
|
+
Examples:
|
|
1471
|
+
|
|
1472
|
+
okb enrich run # Enrich un-enriched documents
|
|
1473
|
+
|
|
1474
|
+
okb enrich run --dry-run # Show what would be enriched
|
|
1475
|
+
|
|
1476
|
+
okb enrich run --all # Re-enrich everything
|
|
1477
|
+
|
|
1478
|
+
okb enrich run --source-type markdown # Only markdown files
|
|
1479
|
+
|
|
1480
|
+
okb enrich run --query "meeting notes" # Filter by semantic search
|
|
1481
|
+
|
|
1482
|
+
okb enrich run --path-pattern '%myrepo%' # Filter by source path
|
|
1483
|
+
|
|
1484
|
+
okb enrich run --workers 8 # Use 8 parallel workers
|
|
1485
|
+
"""
|
|
1486
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
1487
|
+
|
|
1488
|
+
from .llm import get_llm
|
|
1489
|
+
from .llm.enrich import EnrichmentConfig, get_unenriched_documents, process_enrichment
|
|
1490
|
+
|
|
1491
|
+
# Check LLM is configured before doing any work
|
|
1492
|
+
if get_llm() is None:
|
|
1493
|
+
click.echo("Error: No LLM provider configured.", err=True)
|
|
1494
|
+
click.echo("", err=True)
|
|
1495
|
+
click.echo("Enrichment requires an LLM to extract TODOs and entities.", err=True)
|
|
1496
|
+
click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml:", err=True)
|
|
1497
|
+
click.echo("", err=True)
|
|
1498
|
+
click.echo(" llm:", err=True)
|
|
1499
|
+
click.echo(" provider: claude", err=True)
|
|
1500
|
+
click.echo(" model: claude-haiku-4-5-20251001", err=True)
|
|
1501
|
+
click.echo("", err=True)
|
|
1502
|
+
click.echo("Run 'okb llm status' to check configuration.", err=True)
|
|
1503
|
+
ctx.exit(1)
|
|
1504
|
+
|
|
1505
|
+
db_name = database or ctx.obj.get("database")
|
|
1506
|
+
db_cfg = config.get_database(db_name)
|
|
1507
|
+
|
|
1508
|
+
# Get enrichment version for re-enrichment check
|
|
1509
|
+
enrichment_version = config.enrichment_version if enrich_all else None
|
|
1510
|
+
|
|
1511
|
+
click.echo(f"Scanning database '{db_cfg.name}' for documents to enrich...")
|
|
1512
|
+
if dry_run:
|
|
1513
|
+
click.echo("(dry run - no changes will be made)")
|
|
1514
|
+
|
|
1515
|
+
docs = get_unenriched_documents(
|
|
1516
|
+
db_url=db_cfg.url,
|
|
1517
|
+
source_type=source_type,
|
|
1518
|
+
project=project,
|
|
1519
|
+
query=query,
|
|
1520
|
+
path_pattern=path_pattern,
|
|
1521
|
+
enrichment_version=enrichment_version,
|
|
1522
|
+
limit=limit,
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1525
|
+
if not docs:
|
|
1526
|
+
click.echo("No documents need enrichment.")
|
|
1527
|
+
return
|
|
1528
|
+
|
|
1529
|
+
click.echo(f"Found {len(docs)} documents to enrich")
|
|
1530
|
+
|
|
1531
|
+
if dry_run:
|
|
1532
|
+
for doc in docs[:20]:
|
|
1533
|
+
click.echo(f" - {doc['title']} ({doc['source_type']})")
|
|
1534
|
+
if len(docs) > 20:
|
|
1535
|
+
click.echo(f" ... and {len(docs) - 20} more")
|
|
1536
|
+
return
|
|
1537
|
+
|
|
1538
|
+
# Calculate workers if not specified: floor(docs/5), minimum 1
|
|
1539
|
+
if workers is None:
|
|
1540
|
+
workers = max(1, len(docs) // 5)
|
|
1541
|
+
|
|
1542
|
+
# Build config
|
|
1543
|
+
enrich_config = EnrichmentConfig.from_config(
|
|
1544
|
+
{
|
|
1545
|
+
"enabled": config.enrichment_enabled,
|
|
1546
|
+
"version": config.enrichment_version,
|
|
1547
|
+
"extract_todos": config.enrichment_extract_todos,
|
|
1548
|
+
"extract_entities": config.enrichment_extract_entities,
|
|
1549
|
+
"auto_create_todos": config.enrichment_auto_create_todos,
|
|
1550
|
+
"auto_create_entities": config.enrichment_auto_create_entities,
|
|
1551
|
+
"min_confidence_todo": config.enrichment_min_confidence_todo,
|
|
1552
|
+
"min_confidence_entity": config.enrichment_min_confidence_entity,
|
|
1553
|
+
}
|
|
1554
|
+
)
|
|
1555
|
+
|
|
1556
|
+
total_todos = 0
|
|
1557
|
+
total_entities_pending = 0
|
|
1558
|
+
total_entities_created = 0
|
|
1559
|
+
completed = 0
|
|
1560
|
+
errors = 0
|
|
1561
|
+
|
|
1562
|
+
def enrich_one(doc: dict) -> tuple[dict, dict | None, str | None]:
|
|
1563
|
+
"""Process a single document. Returns (doc, stats, error)."""
|
|
1564
|
+
proj = doc["metadata"].get("project") if doc["metadata"] else None
|
|
1565
|
+
try:
|
|
1566
|
+
stats = process_enrichment(
|
|
1567
|
+
document_id=str(doc["id"]),
|
|
1568
|
+
source_path=doc["source_path"],
|
|
1569
|
+
title=doc["title"],
|
|
1570
|
+
content=doc["content"],
|
|
1571
|
+
source_type=doc["source_type"],
|
|
1572
|
+
db_url=db_cfg.url,
|
|
1573
|
+
config=enrich_config,
|
|
1574
|
+
project=proj,
|
|
1575
|
+
)
|
|
1576
|
+
return doc, stats, None
|
|
1577
|
+
except Exception as e:
|
|
1578
|
+
return doc, None, str(e)
|
|
1579
|
+
|
|
1580
|
+
click.echo(f"Processing with {workers} parallel workers...")
|
|
1581
|
+
|
|
1582
|
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
1583
|
+
futures = {executor.submit(enrich_one, doc): doc for doc in docs}
|
|
1584
|
+
|
|
1585
|
+
for future in as_completed(futures):
|
|
1586
|
+
doc, stats, error = future.result()
|
|
1587
|
+
completed += 1
|
|
1588
|
+
title = doc["title"][:40] if doc["title"] else "Untitled"
|
|
1589
|
+
|
|
1590
|
+
if error:
|
|
1591
|
+
errors += 1
|
|
1592
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> error: {error[:50]}")
|
|
1593
|
+
continue
|
|
1594
|
+
|
|
1595
|
+
total_todos += stats["todos_created"]
|
|
1596
|
+
total_entities_pending += stats["entities_pending"]
|
|
1597
|
+
total_entities_created += stats["entities_created"]
|
|
1598
|
+
|
|
1599
|
+
parts = []
|
|
1600
|
+
if stats["todos_created"]:
|
|
1601
|
+
parts.append(f"{stats['todos_created']} TODOs")
|
|
1602
|
+
if stats["entities_pending"]:
|
|
1603
|
+
parts.append(f"{stats['entities_pending']} pending")
|
|
1604
|
+
if stats["entities_created"]:
|
|
1605
|
+
parts.append(f"{stats['entities_created']} entities")
|
|
1606
|
+
if parts:
|
|
1607
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> {', '.join(parts)}")
|
|
1608
|
+
else:
|
|
1609
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> nothing extracted")
|
|
1610
|
+
|
|
1611
|
+
click.echo("")
|
|
1612
|
+
click.echo("Summary:")
|
|
1613
|
+
click.echo(f" Documents processed: {len(docs)}")
|
|
1614
|
+
if errors:
|
|
1615
|
+
click.echo(f" Errors: {errors}")
|
|
1616
|
+
click.echo(f" TODOs created: {total_todos}")
|
|
1617
|
+
click.echo(f" Entities pending review: {total_entities_pending}")
|
|
1618
|
+
click.echo(f" Entities auto-created: {total_entities_created}")
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
@enrich.command("pending")
|
|
1622
|
+
@click.option("--db", "database", default=None, help="Database to check")
|
|
1623
|
+
@click.option("--type", "entity_type", default=None, help="Filter by entity type")
|
|
1624
|
+
@click.option("--limit", default=50, help="Maximum results")
|
|
1625
|
+
@click.pass_context
|
|
1626
|
+
def enrich_pending(ctx, database: str | None, entity_type: str | None, limit: int):
|
|
1627
|
+
"""List pending entity suggestions awaiting review.
|
|
1628
|
+
|
|
1629
|
+
Shows entities extracted from documents that need approval before
|
|
1630
|
+
becoming searchable. Use 'okb enrich approve' or 'okb enrich reject'
|
|
1631
|
+
to process them.
|
|
1632
|
+
"""
|
|
1633
|
+
from .llm.enrich import list_pending_entities
|
|
1634
|
+
|
|
1635
|
+
db_name = database or ctx.obj.get("database")
|
|
1636
|
+
db_cfg = config.get_database(db_name)
|
|
1637
|
+
|
|
1638
|
+
entities = list_pending_entities(db_cfg.url, entity_type=entity_type, limit=limit)
|
|
1639
|
+
|
|
1640
|
+
if not entities:
|
|
1641
|
+
click.echo("No pending entity suggestions.")
|
|
1642
|
+
return
|
|
1643
|
+
|
|
1644
|
+
click.echo(f"Pending entities ({len(entities)}):\n")
|
|
1645
|
+
for e in entities:
|
|
1646
|
+
confidence = e.get("confidence", 0)
|
|
1647
|
+
confidence_str = f" ({confidence:.0%})" if confidence else ""
|
|
1648
|
+
click.echo(f" [{e['entity_type']}] {e['entity_name']}{confidence_str}")
|
|
1649
|
+
click.echo(f" ID: {e['id']}")
|
|
1650
|
+
if e.get("description"):
|
|
1651
|
+
desc = (
|
|
1652
|
+
e["description"][:60] + "..."
|
|
1653
|
+
if len(e.get("description", "")) > 60
|
|
1654
|
+
else e["description"]
|
|
1655
|
+
)
|
|
1656
|
+
click.echo(f" {desc}")
|
|
1657
|
+
if e.get("aliases"):
|
|
1658
|
+
click.echo(f" Aliases: {', '.join(e['aliases'][:3])}")
|
|
1659
|
+
click.echo(f" Source: {e['source_title']}")
|
|
1660
|
+
click.echo("")
|
|
1661
|
+
|
|
1662
|
+
click.echo("Use 'okb enrich approve <id>' or 'okb enrich reject <id>' to process.")
|
|
1663
|
+
|
|
1664
|
+
|
|
1665
|
+
@enrich.command("approve")
|
|
1666
|
+
@click.argument("pending_id")
|
|
1667
|
+
@click.option("--db", "database", default=None, help="Database")
|
|
1668
|
+
@click.option("--local", is_flag=True, help="Use local CPU embedding instead of Modal")
|
|
1669
|
+
@click.pass_context
|
|
1670
|
+
def enrich_approve(ctx, pending_id: str, database: str | None, local: bool):
|
|
1671
|
+
"""Approve a pending entity, creating it as a searchable document."""
|
|
1672
|
+
from .llm.enrich import approve_entity
|
|
1673
|
+
|
|
1674
|
+
db_name = database or ctx.obj.get("database")
|
|
1675
|
+
db_cfg = config.get_database(db_name)
|
|
1676
|
+
|
|
1677
|
+
source_path = approve_entity(db_cfg.url, pending_id, use_modal=not local)
|
|
1678
|
+
if source_path:
|
|
1679
|
+
click.echo(f"Entity approved and created: {source_path}")
|
|
1680
|
+
else:
|
|
1681
|
+
click.echo("Failed to approve entity. ID may be invalid or already processed.", err=True)
|
|
1682
|
+
sys.exit(1)
|
|
1683
|
+
|
|
1684
|
+
|
|
1685
|
+
@enrich.command("reject")
|
|
1686
|
+
@click.argument("pending_id")
|
|
1687
|
+
@click.option("--db", "database", default=None, help="Database")
|
|
1688
|
+
@click.pass_context
|
|
1689
|
+
def enrich_reject(ctx, pending_id: str, database: str | None):
|
|
1690
|
+
"""Reject a pending entity suggestion."""
|
|
1691
|
+
from .llm.enrich import reject_entity
|
|
1692
|
+
|
|
1693
|
+
db_name = database or ctx.obj.get("database")
|
|
1694
|
+
db_cfg = config.get_database(db_name)
|
|
1695
|
+
|
|
1696
|
+
if reject_entity(db_cfg.url, pending_id):
|
|
1697
|
+
click.echo("Entity rejected.")
|
|
1698
|
+
else:
|
|
1699
|
+
click.echo("Failed to reject entity. ID may be invalid or already processed.", err=True)
|
|
1700
|
+
sys.exit(1)
|
|
1701
|
+
|
|
1702
|
+
|
|
1703
|
+
@enrich.command("analyze")
|
|
1704
|
+
@click.option("--db", "database", default=None, help="Database to analyze")
|
|
1705
|
+
@click.option("--project", default=None, help="Analyze specific project only")
|
|
1706
|
+
@click.option("--sample-size", default=15, help="Number of documents to sample")
|
|
1707
|
+
@click.option("--no-update", is_flag=True, help="Don't update database metadata")
|
|
1708
|
+
@click.option("--stats-only", is_flag=True, help="Show stats without LLM analysis")
|
|
1709
|
+
@click.pass_context
|
|
1710
|
+
def enrich_analyze(
|
|
1711
|
+
ctx,
|
|
1712
|
+
database: str | None,
|
|
1713
|
+
project: str | None,
|
|
1714
|
+
sample_size: int,
|
|
1715
|
+
no_update: bool,
|
|
1716
|
+
stats_only: bool,
|
|
1717
|
+
):
|
|
1718
|
+
"""Analyze knowledge base and update description/topics.
|
|
1719
|
+
|
|
1720
|
+
Uses entity aggregation and document sampling to understand the overall
|
|
1721
|
+
content and themes in the knowledge base. Generates a description and
|
|
1722
|
+
topic keywords using LLM analysis.
|
|
1723
|
+
|
|
1724
|
+
Examples:
|
|
1725
|
+
|
|
1726
|
+
okb enrich analyze # Analyze entire database
|
|
1727
|
+
|
|
1728
|
+
okb enrich analyze --stats-only # Show stats without LLM call
|
|
1729
|
+
|
|
1730
|
+
okb enrich analyze --project myproject # Analyze specific project
|
|
1731
|
+
|
|
1732
|
+
okb enrich analyze --no-update # Analyze without updating metadata
|
|
1733
|
+
"""
|
|
1734
|
+
from .llm.analyze import (
|
|
1735
|
+
analyze_database,
|
|
1736
|
+
get_content_stats,
|
|
1737
|
+
get_entity_summary,
|
|
1738
|
+
)
|
|
1739
|
+
|
|
1740
|
+
db_name = database or ctx.obj.get("database")
|
|
1741
|
+
db_cfg = config.get_database(db_name)
|
|
1742
|
+
|
|
1743
|
+
scope = f"project '{project}'" if project else f"database '{db_cfg.name}'"
|
|
1744
|
+
click.echo(f"Analyzing {scope}...\n")
|
|
1745
|
+
|
|
1746
|
+
# Always get stats
|
|
1747
|
+
stats = get_content_stats(db_cfg.url, project)
|
|
1748
|
+
entities = get_entity_summary(db_cfg.url, project, limit=20)
|
|
1749
|
+
|
|
1750
|
+
# Show stats
|
|
1751
|
+
click.echo("Content Statistics:")
|
|
1752
|
+
click.echo(f" Documents: {stats['total_documents']:,}")
|
|
1753
|
+
click.echo(f" Tokens: ~{stats['total_tokens']:,}")
|
|
1754
|
+
if stats["source_types"]:
|
|
1755
|
+
sorted_types = sorted(stats["source_types"].items(), key=lambda x: -x[1])
|
|
1756
|
+
types_parts = [f"{t}: {c}" for t, c in sorted_types]
|
|
1757
|
+
# Break into multiple lines if many types
|
|
1758
|
+
if len(types_parts) > 4:
|
|
1759
|
+
click.echo(" Source types:")
|
|
1760
|
+
for tp in types_parts:
|
|
1761
|
+
click.echo(f" {tp}")
|
|
1762
|
+
else:
|
|
1763
|
+
click.echo(f" Source types: {', '.join(types_parts)}")
|
|
1764
|
+
if stats["projects"]:
|
|
1765
|
+
click.echo(f" Projects: {', '.join(stats['projects'])}")
|
|
1766
|
+
if stats["date_range"]["earliest"]:
|
|
1767
|
+
earliest = stats["date_range"]["earliest"]
|
|
1768
|
+
latest = stats["date_range"]["latest"]
|
|
1769
|
+
click.echo(f" Date range: {earliest} to {latest}")
|
|
1770
|
+
|
|
1771
|
+
click.echo("")
|
|
1772
|
+
|
|
1773
|
+
# Show top entities
|
|
1774
|
+
if entities:
|
|
1775
|
+
click.echo("Top Entities (by mentions):")
|
|
1776
|
+
for i, e in enumerate(entities[:10], 1):
|
|
1777
|
+
name, etype = e["name"], e["type"]
|
|
1778
|
+
refs, docs = e["ref_count"], e["doc_count"]
|
|
1779
|
+
click.echo(f" {i}. {name} ({etype}) - {refs} mentions in {docs} docs")
|
|
1780
|
+
click.echo("")
|
|
1781
|
+
else:
|
|
1782
|
+
click.echo("No entities extracted yet.")
|
|
1783
|
+
click.echo("Run 'okb enrich run' to extract entities from documents.\n")
|
|
1784
|
+
|
|
1785
|
+
if stats_only:
|
|
1786
|
+
return
|
|
1787
|
+
|
|
1788
|
+
# Check LLM is configured
|
|
1789
|
+
from .llm import get_llm
|
|
1790
|
+
|
|
1791
|
+
if get_llm() is None:
|
|
1792
|
+
click.echo("Error: No LLM provider configured.", err=True)
|
|
1793
|
+
click.echo("", err=True)
|
|
1794
|
+
click.echo("Analysis requires an LLM to generate description and topics.", err=True)
|
|
1795
|
+
click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml:", err=True)
|
|
1796
|
+
click.echo("", err=True)
|
|
1797
|
+
click.echo(" llm:", err=True)
|
|
1798
|
+
click.echo(" provider: claude", err=True)
|
|
1799
|
+
click.echo("", err=True)
|
|
1800
|
+
click.echo("Use --stats-only to see statistics without LLM analysis.", err=True)
|
|
1801
|
+
ctx.exit(1)
|
|
1802
|
+
|
|
1803
|
+
click.echo(f"Sampling {sample_size} documents for analysis...")
|
|
1804
|
+
click.echo("Generating description and topics...")
|
|
1805
|
+
click.echo("")
|
|
1806
|
+
|
|
1807
|
+
try:
|
|
1808
|
+
result = analyze_database(
|
|
1809
|
+
db_url=db_cfg.url,
|
|
1810
|
+
project=project,
|
|
1811
|
+
sample_size=sample_size,
|
|
1812
|
+
auto_update=not no_update,
|
|
1813
|
+
)
|
|
1814
|
+
|
|
1815
|
+
click.echo("Analysis Complete:")
|
|
1816
|
+
click.echo(f" Description: {result.description}")
|
|
1817
|
+
click.echo(f" Topics: {', '.join(result.topics)}")
|
|
1818
|
+
|
|
1819
|
+
if not no_update:
|
|
1820
|
+
click.echo("")
|
|
1821
|
+
click.echo("Updated database metadata.")
|
|
1822
|
+
else:
|
|
1823
|
+
click.echo("")
|
|
1824
|
+
click.echo("(metadata not updated - use without --no-update to save)")
|
|
1825
|
+
|
|
1826
|
+
except Exception as e:
|
|
1827
|
+
click.echo(f"Error during analysis: {e}", err=True)
|
|
1828
|
+
ctx.exit(1)
|
|
1829
|
+
|
|
1830
|
+
|
|
1831
|
+
@enrich.command("consolidate")
|
|
1832
|
+
@click.option("--db", "database", default=None, help="Database to consolidate")
|
|
1833
|
+
@click.option("--duplicates/--no-duplicates", "detect_duplicates", default=True,
|
|
1834
|
+
help="Detect duplicate entities")
|
|
1835
|
+
@click.option("--cross-doc/--no-cross-doc", "detect_cross_doc", default=True,
|
|
1836
|
+
help="Detect cross-document entities")
|
|
1837
|
+
@click.option("--clusters/--no-clusters", "build_clusters", default=True,
|
|
1838
|
+
help="Build topic clusters")
|
|
1839
|
+
@click.option("--relationships/--no-relationships", "extract_relationships", default=True,
|
|
1840
|
+
help="Extract entity relationships")
|
|
1841
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be found without creating proposals")
|
|
1842
|
+
@click.pass_context
|
|
1843
|
+
def enrich_consolidate(
|
|
1844
|
+
ctx,
|
|
1845
|
+
database: str | None,
|
|
1846
|
+
detect_duplicates: bool,
|
|
1847
|
+
detect_cross_doc: bool,
|
|
1848
|
+
build_clusters: bool,
|
|
1849
|
+
extract_relationships: bool,
|
|
1850
|
+
dry_run: bool,
|
|
1851
|
+
):
|
|
1852
|
+
"""Run entity consolidation pipeline.
|
|
1853
|
+
|
|
1854
|
+
Detects duplicate entities, cross-document mentions, builds topic clusters,
|
|
1855
|
+
and extracts entity relationships. Creates pending proposals for review
|
|
1856
|
+
rather than auto-applying changes.
|
|
1857
|
+
|
|
1858
|
+
Examples:
|
|
1859
|
+
|
|
1860
|
+
okb enrich consolidate # Run full consolidation
|
|
1861
|
+
|
|
1862
|
+
okb enrich consolidate --dry-run # Show what would be found
|
|
1863
|
+
|
|
1864
|
+
okb enrich consolidate --no-clusters # Skip clustering
|
|
1865
|
+
|
|
1866
|
+
okb enrich consolidate --duplicates --no-cross-doc --no-clusters --no-relationships
|
|
1867
|
+
"""
|
|
1868
|
+
from .llm import get_llm
|
|
1869
|
+
from .llm.consolidate import format_consolidation_result, run_consolidation
|
|
1870
|
+
|
|
1871
|
+
# Check LLM is configured if needed
|
|
1872
|
+
if get_llm() is None:
|
|
1873
|
+
click.echo("Error: No LLM provider configured.", err=True)
|
|
1874
|
+
click.echo("Consolidation requires an LLM for deduplication and clustering.", err=True)
|
|
1875
|
+
click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml", err=True)
|
|
1876
|
+
ctx.exit(1)
|
|
1877
|
+
|
|
1878
|
+
db_name = database or ctx.obj.get("database")
|
|
1879
|
+
db_cfg = config.get_database(db_name)
|
|
1880
|
+
|
|
1881
|
+
click.echo(f"Running consolidation on database '{db_cfg.name}'...")
|
|
1882
|
+
if dry_run:
|
|
1883
|
+
click.echo("(dry run - no proposals will be created)")
|
|
1884
|
+
|
|
1885
|
+
result = run_consolidation(
|
|
1886
|
+
db_url=db_cfg.url,
|
|
1887
|
+
detect_duplicates=detect_duplicates,
|
|
1888
|
+
detect_cross_doc=detect_cross_doc,
|
|
1889
|
+
build_clusters=build_clusters,
|
|
1890
|
+
extract_relationships=extract_relationships,
|
|
1891
|
+
dry_run=dry_run,
|
|
1892
|
+
)
|
|
1893
|
+
|
|
1894
|
+
# Format and display result
|
|
1895
|
+
output = format_consolidation_result(result)
|
|
1896
|
+
click.echo("")
|
|
1897
|
+
click.echo(output)
|
|
1898
|
+
|
|
1899
|
+
if not dry_run and (result.duplicates_found > 0 or result.cross_doc_candidates > 0):
|
|
1900
|
+
click.echo("")
|
|
1901
|
+
click.echo("Use 'okb enrich merge-proposals' to review pending merges.")
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
@enrich.command("merge-proposals")
|
|
1905
|
+
@click.option("--db", "database", default=None, help="Database to check")
|
|
1906
|
+
@click.option("--limit", default=50, help="Maximum results")
|
|
1907
|
+
@click.pass_context
|
|
1908
|
+
def enrich_merge_proposals(ctx, database: str | None, limit: int):
|
|
1909
|
+
"""List pending entity merge proposals.
|
|
1910
|
+
|
|
1911
|
+
Shows duplicate entities and cross-document mentions awaiting review.
|
|
1912
|
+
Use 'okb enrich approve-merge' or 'okb enrich reject-merge' to process.
|
|
1913
|
+
"""
|
|
1914
|
+
from .llm.extractors.dedup import list_pending_merges
|
|
1915
|
+
|
|
1916
|
+
db_name = database or ctx.obj.get("database")
|
|
1917
|
+
db_cfg = config.get_database(db_name)
|
|
1918
|
+
|
|
1919
|
+
merges = list_pending_merges(db_cfg.url, limit=limit)
|
|
1920
|
+
|
|
1921
|
+
if not merges:
|
|
1922
|
+
click.echo("No pending merge proposals.")
|
|
1923
|
+
return
|
|
1924
|
+
|
|
1925
|
+
click.echo(f"Pending merge proposals ({len(merges)}):\n")
|
|
1926
|
+
for m in merges:
|
|
1927
|
+
confidence = m.get("confidence", 0)
|
|
1928
|
+
confidence_str = f" ({confidence:.0%})" if confidence else ""
|
|
1929
|
+
click.echo(f" {m['canonical_name']} <- {m['duplicate_name']}{confidence_str}")
|
|
1930
|
+
click.echo(f" ID: {m['id']}")
|
|
1931
|
+
click.echo(f" Reason: {m.get('reason', 'similarity')}")
|
|
1932
|
+
click.echo("")
|
|
1933
|
+
|
|
1934
|
+
click.echo("Use 'okb enrich approve-merge <id>' or 'okb enrich reject-merge <id>' to process.")
|
|
1935
|
+
|
|
1936
|
+
|
|
1937
|
+
@enrich.command("approve-merge")
|
|
1938
|
+
@click.argument("merge_id")
|
|
1939
|
+
@click.option("--db", "database", default=None, help="Database")
|
|
1940
|
+
@click.pass_context
|
|
1941
|
+
def enrich_approve_merge(ctx, merge_id: str, database: str | None):
|
|
1942
|
+
"""Approve a pending entity merge.
|
|
1943
|
+
|
|
1944
|
+
Merges the duplicate entity into the canonical entity:
|
|
1945
|
+
- Redirects all entity references from duplicate to canonical
|
|
1946
|
+
- Adds duplicate's name as an alias for canonical
|
|
1947
|
+
- Deletes the duplicate entity document
|
|
1948
|
+
"""
|
|
1949
|
+
from .llm.extractors.dedup import approve_merge
|
|
1950
|
+
|
|
1951
|
+
db_name = database or ctx.obj.get("database")
|
|
1952
|
+
db_cfg = config.get_database(db_name)
|
|
1953
|
+
|
|
1954
|
+
if approve_merge(db_cfg.url, merge_id):
|
|
1955
|
+
click.echo("Merge approved and executed.")
|
|
1956
|
+
else:
|
|
1957
|
+
click.echo("Failed to approve merge. ID may be invalid or already processed.", err=True)
|
|
1958
|
+
sys.exit(1)
|
|
1959
|
+
|
|
1960
|
+
|
|
1961
|
+
@enrich.command("reject-merge")
|
|
1962
|
+
@click.argument("merge_id")
|
|
1963
|
+
@click.option("--db", "database", default=None, help="Database")
|
|
1964
|
+
@click.pass_context
|
|
1965
|
+
def enrich_reject_merge(ctx, merge_id: str, database: str | None):
|
|
1966
|
+
"""Reject a pending entity merge proposal."""
|
|
1967
|
+
from .llm.extractors.dedup import reject_merge
|
|
1968
|
+
|
|
1969
|
+
db_name = database or ctx.obj.get("database")
|
|
1970
|
+
db_cfg = config.get_database(db_name)
|
|
1971
|
+
|
|
1972
|
+
if reject_merge(db_cfg.url, merge_id):
|
|
1973
|
+
click.echo("Merge rejected.")
|
|
1974
|
+
else:
|
|
1975
|
+
click.echo("Failed to reject merge. ID may be invalid or already processed.", err=True)
|
|
1976
|
+
sys.exit(1)
|
|
1977
|
+
|
|
1978
|
+
|
|
1979
|
+
@enrich.command("clusters")
|
|
1980
|
+
@click.option("--db", "database", default=None, help="Database to check")
|
|
1981
|
+
@click.option("--limit", default=20, help="Maximum clusters to show")
|
|
1982
|
+
@click.pass_context
|
|
1983
|
+
def enrich_clusters(ctx, database: str | None, limit: int):
|
|
1984
|
+
"""List topic clusters.
|
|
1985
|
+
|
|
1986
|
+
Shows groups of related entities and documents organized by theme.
|
|
1987
|
+
"""
|
|
1988
|
+
from .llm.consolidate import get_topic_clusters
|
|
1989
|
+
|
|
1990
|
+
db_name = database or ctx.obj.get("database")
|
|
1991
|
+
db_cfg = config.get_database(db_name)
|
|
1992
|
+
|
|
1993
|
+
clusters = get_topic_clusters(db_cfg.url, limit=limit)
|
|
1994
|
+
|
|
1995
|
+
if not clusters:
|
|
1996
|
+
click.echo("No topic clusters found.")
|
|
1997
|
+
click.echo("Run 'okb enrich consolidate' to generate clusters.")
|
|
1998
|
+
return
|
|
1999
|
+
|
|
2000
|
+
click.echo(f"Topic clusters ({len(clusters)}):\n")
|
|
2001
|
+
for c in clusters:
|
|
2002
|
+
click.echo(f" {c['name']}")
|
|
2003
|
+
if c.get("description"):
|
|
2004
|
+
desc = c["description"][:70] + "..." if len(c["description"]) > 70 else c["description"]
|
|
2005
|
+
click.echo(f" {desc}")
|
|
2006
|
+
click.echo(f" Members: {c['member_count']} entities/documents")
|
|
2007
|
+
if c.get("sample_members"):
|
|
2008
|
+
samples = ", ".join(c["sample_members"][:5])
|
|
2009
|
+
click.echo(f" Examples: {samples}")
|
|
2010
|
+
click.echo("")
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
@enrich.command("relationships")
|
|
2014
|
+
@click.option("--db", "database", default=None, help="Database to check")
|
|
2015
|
+
@click.option("--entity", "entity_name", default=None, help="Filter to specific entity")
|
|
2016
|
+
@click.option("--type", "relationship_type", default=None,
|
|
2017
|
+
help="Filter by relationship type (works_for, uses, belongs_to, related_to)")
|
|
2018
|
+
@click.option("--limit", default=50, help="Maximum results")
|
|
2019
|
+
@click.pass_context
|
|
2020
|
+
def enrich_relationships(
|
|
2021
|
+
ctx,
|
|
2022
|
+
database: str | None,
|
|
2023
|
+
entity_name: str | None,
|
|
2024
|
+
relationship_type: str | None,
|
|
2025
|
+
limit: int,
|
|
2026
|
+
):
|
|
2027
|
+
"""List entity relationships.
|
|
2028
|
+
|
|
2029
|
+
Shows connections between entities (person→org, tech→project, etc.).
|
|
2030
|
+
|
|
2031
|
+
Examples:
|
|
2032
|
+
|
|
2033
|
+
okb enrich relationships # All relationships
|
|
2034
|
+
|
|
2035
|
+
okb enrich relationships --entity "Django" # Filter to one entity
|
|
2036
|
+
|
|
2037
|
+
okb enrich relationships --type works_for # Filter by type
|
|
2038
|
+
"""
|
|
2039
|
+
from .llm.consolidate import get_entity_relationships
|
|
2040
|
+
|
|
2041
|
+
db_name = database or ctx.obj.get("database")
|
|
2042
|
+
db_cfg = config.get_database(db_name)
|
|
2043
|
+
|
|
2044
|
+
relationships = get_entity_relationships(
|
|
2045
|
+
db_cfg.url,
|
|
2046
|
+
entity_name=entity_name,
|
|
2047
|
+
relationship_type=relationship_type,
|
|
2048
|
+
limit=limit,
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
if not relationships:
|
|
2052
|
+
if entity_name:
|
|
2053
|
+
click.echo(f"No relationships found for entity '{entity_name}'.")
|
|
2054
|
+
else:
|
|
2055
|
+
click.echo("No relationships found.")
|
|
2056
|
+
click.echo("Run 'okb enrich consolidate' to extract relationships.")
|
|
2057
|
+
return
|
|
2058
|
+
|
|
2059
|
+
click.echo(f"Entity relationships ({len(relationships)}):\n")
|
|
2060
|
+
for r in relationships:
|
|
2061
|
+
confidence = r.get("confidence", 0)
|
|
2062
|
+
conf_str = f" ({confidence:.0%})" if confidence else ""
|
|
2063
|
+
click.echo(f" {r['source_name']} --[{r['relationship_type']}]--> {r['target_name']}{conf_str}")
|
|
2064
|
+
if r.get("evidence"):
|
|
2065
|
+
evidence = r["evidence"][:60] + "..." if len(r["evidence"]) > 60 else r["evidence"]
|
|
2066
|
+
click.echo(f" Evidence: {evidence}")
|
|
2067
|
+
click.echo("")
|
|
2068
|
+
|
|
2069
|
+
|
|
2070
|
+
@enrich.command("all")
|
|
2071
|
+
@click.option("--db", "database", default=None, help="Database to enrich")
|
|
2072
|
+
@click.option("--source-type", default=None, help="Filter by source type")
|
|
2073
|
+
@click.option("--project", default=None, help="Filter by project")
|
|
2074
|
+
@click.option("--query", default=None, help="Semantic search query to filter documents")
|
|
2075
|
+
@click.option("--path-pattern", default=None, help="SQL LIKE pattern for source_path")
|
|
2076
|
+
@click.option("--limit", default=100, help="Maximum documents to process")
|
|
2077
|
+
@click.option("--workers", default=None, type=int, help="Parallel workers (default: docs/5, min 1)")
|
|
2078
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be done without executing")
|
|
2079
|
+
@click.option("--skip-consolidate", is_flag=True, help="Skip consolidation phase")
|
|
2080
|
+
@click.option("--duplicates/--no-duplicates", "detect_duplicates", default=True,
|
|
2081
|
+
help="Detect duplicate entities during consolidation")
|
|
2082
|
+
@click.option("--clusters/--no-clusters", "build_clusters", default=True,
|
|
2083
|
+
help="Build topic clusters during consolidation")
|
|
2084
|
+
@click.option("--relationships/--no-relationships", "extract_relationships", default=True,
|
|
2085
|
+
help="Extract entity relationships during consolidation")
|
|
2086
|
+
@click.pass_context
|
|
2087
|
+
def enrich_all(
|
|
2088
|
+
ctx,
|
|
2089
|
+
database: str | None,
|
|
2090
|
+
source_type: str | None,
|
|
2091
|
+
project: str | None,
|
|
2092
|
+
query: str | None,
|
|
2093
|
+
path_pattern: str | None,
|
|
2094
|
+
limit: int,
|
|
2095
|
+
workers: int | None,
|
|
2096
|
+
dry_run: bool,
|
|
2097
|
+
skip_consolidate: bool,
|
|
2098
|
+
detect_duplicates: bool,
|
|
2099
|
+
build_clusters: bool,
|
|
2100
|
+
extract_relationships: bool,
|
|
2101
|
+
):
|
|
2102
|
+
"""Run full enrichment pipeline: extraction + consolidation.
|
|
2103
|
+
|
|
2104
|
+
Combines 'enrich run' and 'enrich consolidate' in one command for
|
|
2105
|
+
one-shot enrichment of documents.
|
|
2106
|
+
|
|
2107
|
+
Examples:
|
|
2108
|
+
|
|
2109
|
+
okb enrich all # Run full pipeline
|
|
2110
|
+
|
|
2111
|
+
okb enrich all --dry-run # Preview what would happen
|
|
2112
|
+
|
|
2113
|
+
okb enrich all --skip-consolidate # Run extraction only
|
|
2114
|
+
|
|
2115
|
+
okb enrich all --source-type markdown # Filter to markdown files
|
|
2116
|
+
|
|
2117
|
+
okb enrich all --no-clusters # Skip cluster building
|
|
2118
|
+
"""
|
|
2119
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
2120
|
+
|
|
2121
|
+
from .llm import get_llm
|
|
2122
|
+
from .llm.consolidate import format_consolidation_result, run_consolidation
|
|
2123
|
+
from .llm.enrich import EnrichmentConfig, get_unenriched_documents, process_enrichment
|
|
2124
|
+
|
|
2125
|
+
# Check LLM is configured
|
|
2126
|
+
if get_llm() is None:
|
|
2127
|
+
click.echo("Error: No LLM provider configured.", err=True)
|
|
2128
|
+
click.echo("Set ANTHROPIC_API_KEY or configure in ~/.config/okb/config.yaml", err=True)
|
|
2129
|
+
ctx.exit(1)
|
|
2130
|
+
|
|
2131
|
+
db_name = database or ctx.obj.get("database")
|
|
2132
|
+
db_cfg = config.get_database(db_name)
|
|
2133
|
+
|
|
2134
|
+
# Phase 1: Enrichment
|
|
2135
|
+
click.echo("=== Phase 1: Enrichment ===")
|
|
2136
|
+
click.echo(f"Scanning database '{db_cfg.name}' for documents to enrich...")
|
|
2137
|
+
if dry_run:
|
|
2138
|
+
click.echo("(dry run - no changes will be made)")
|
|
2139
|
+
|
|
2140
|
+
docs = get_unenriched_documents(
|
|
2141
|
+
db_url=db_cfg.url,
|
|
2142
|
+
source_type=source_type,
|
|
2143
|
+
project=project,
|
|
2144
|
+
query=query,
|
|
2145
|
+
path_pattern=path_pattern,
|
|
2146
|
+
limit=limit,
|
|
2147
|
+
)
|
|
2148
|
+
|
|
2149
|
+
total_todos = 0
|
|
2150
|
+
total_entities_pending = 0
|
|
2151
|
+
total_entities_created = 0
|
|
2152
|
+
|
|
2153
|
+
if not docs:
|
|
2154
|
+
click.echo("No documents need enrichment.")
|
|
2155
|
+
else:
|
|
2156
|
+
click.echo(f"Found {len(docs)} documents to enrich")
|
|
2157
|
+
|
|
2158
|
+
if dry_run:
|
|
2159
|
+
for doc in docs[:20]:
|
|
2160
|
+
click.echo(f" - {doc['title']} ({doc['source_type']})")
|
|
2161
|
+
if len(docs) > 20:
|
|
2162
|
+
click.echo(f" ... and {len(docs) - 20} more")
|
|
2163
|
+
else:
|
|
2164
|
+
# Build config
|
|
2165
|
+
enrich_config = EnrichmentConfig.from_config(
|
|
2166
|
+
{
|
|
2167
|
+
"enabled": config.enrichment_enabled,
|
|
2168
|
+
"version": config.enrichment_version,
|
|
2169
|
+
"extract_todos": config.enrichment_extract_todos,
|
|
2170
|
+
"extract_entities": config.enrichment_extract_entities,
|
|
2171
|
+
"auto_create_todos": config.enrichment_auto_create_todos,
|
|
2172
|
+
"auto_create_entities": config.enrichment_auto_create_entities,
|
|
2173
|
+
"min_confidence_todo": config.enrichment_min_confidence_todo,
|
|
2174
|
+
"min_confidence_entity": config.enrichment_min_confidence_entity,
|
|
2175
|
+
}
|
|
2176
|
+
)
|
|
2177
|
+
|
|
2178
|
+
# Calculate workers
|
|
2179
|
+
if workers is None:
|
|
2180
|
+
workers = max(1, len(docs) // 5)
|
|
2181
|
+
|
|
2182
|
+
completed = 0
|
|
2183
|
+
errors = 0
|
|
2184
|
+
|
|
2185
|
+
def enrich_one(doc: dict) -> tuple[dict, dict | None, str | None]:
|
|
2186
|
+
proj = doc["metadata"].get("project") if doc["metadata"] else None
|
|
2187
|
+
try:
|
|
2188
|
+
stats = process_enrichment(
|
|
2189
|
+
document_id=str(doc["id"]),
|
|
2190
|
+
source_path=doc["source_path"],
|
|
2191
|
+
title=doc["title"],
|
|
2192
|
+
content=doc["content"],
|
|
2193
|
+
source_type=doc["source_type"],
|
|
2194
|
+
db_url=db_cfg.url,
|
|
2195
|
+
config=enrich_config,
|
|
2196
|
+
project=proj,
|
|
2197
|
+
)
|
|
2198
|
+
return doc, stats, None
|
|
2199
|
+
except Exception as e:
|
|
2200
|
+
return doc, None, str(e)
|
|
2201
|
+
|
|
2202
|
+
click.echo(f"Processing with {workers} parallel workers...")
|
|
2203
|
+
|
|
2204
|
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
2205
|
+
futures = {executor.submit(enrich_one, doc): doc for doc in docs}
|
|
2206
|
+
|
|
2207
|
+
for future in as_completed(futures):
|
|
2208
|
+
doc, stats, error = future.result()
|
|
2209
|
+
completed += 1
|
|
2210
|
+
title = doc["title"][:40] if doc["title"] else "Untitled"
|
|
2211
|
+
|
|
2212
|
+
if error:
|
|
2213
|
+
errors += 1
|
|
2214
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> error: {error[:50]}")
|
|
2215
|
+
continue
|
|
2216
|
+
|
|
2217
|
+
total_todos += stats["todos_created"]
|
|
2218
|
+
total_entities_pending += stats["entities_pending"]
|
|
2219
|
+
total_entities_created += stats["entities_created"]
|
|
2220
|
+
|
|
2221
|
+
parts = []
|
|
2222
|
+
if stats["todos_created"]:
|
|
2223
|
+
parts.append(f"{stats['todos_created']} TODOs")
|
|
2224
|
+
if stats["entities_pending"]:
|
|
2225
|
+
parts.append(f"{stats['entities_pending']} pending")
|
|
2226
|
+
if stats["entities_created"]:
|
|
2227
|
+
parts.append(f"{stats['entities_created']} entities")
|
|
2228
|
+
if parts:
|
|
2229
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> {', '.join(parts)}")
|
|
2230
|
+
else:
|
|
2231
|
+
click.echo(f"[{completed}/{len(docs)}] {title}... -> nothing extracted")
|
|
2232
|
+
|
|
2233
|
+
click.echo("")
|
|
2234
|
+
click.echo("Enrichment summary:")
|
|
2235
|
+
click.echo(f" Documents processed: {len(docs)}")
|
|
2236
|
+
if errors:
|
|
2237
|
+
click.echo(f" Errors: {errors}")
|
|
2238
|
+
click.echo(f" TODOs created: {total_todos}")
|
|
2239
|
+
click.echo(f" Entities pending review: {total_entities_pending}")
|
|
2240
|
+
click.echo(f" Entities auto-created: {total_entities_created}")
|
|
2241
|
+
|
|
2242
|
+
# Phase 2: Consolidation
|
|
2243
|
+
if skip_consolidate:
|
|
2244
|
+
click.echo("")
|
|
2245
|
+
click.echo("Skipping consolidation (--skip-consolidate)")
|
|
2246
|
+
return
|
|
2247
|
+
|
|
2248
|
+
click.echo("")
|
|
2249
|
+
click.echo("=== Phase 2: Consolidation ===")
|
|
2250
|
+
|
|
2251
|
+
result = run_consolidation(
|
|
2252
|
+
db_url=db_cfg.url,
|
|
2253
|
+
detect_duplicates=detect_duplicates,
|
|
2254
|
+
detect_cross_doc=True,
|
|
2255
|
+
build_clusters=build_clusters,
|
|
2256
|
+
extract_relationships=extract_relationships,
|
|
2257
|
+
dry_run=dry_run,
|
|
2258
|
+
)
|
|
2259
|
+
|
|
2260
|
+
output = format_consolidation_result(result)
|
|
2261
|
+
click.echo(output)
|
|
2262
|
+
|
|
2263
|
+
if not dry_run and (result.duplicates_found > 0 or result.cross_doc_candidates > 0):
|
|
2264
|
+
click.echo("")
|
|
2265
|
+
click.echo("Use 'okb enrich review' to review pending entities and merges.")
|
|
2266
|
+
|
|
2267
|
+
|
|
2268
|
+
@enrich.command("review")
|
|
2269
|
+
@click.option("--db", "database", default=None, help="Database to review")
|
|
2270
|
+
@click.option("--entities-only", is_flag=True, help="Only review pending entities")
|
|
2271
|
+
@click.option("--merges-only", is_flag=True, help="Only review pending merges")
|
|
2272
|
+
@click.option("--local", is_flag=True, help="Use local CPU embedding instead of Modal")
|
|
2273
|
+
@click.option("--wait/--no-wait", default=True, help="Wait for embeddings to complete")
|
|
2274
|
+
@click.pass_context
|
|
2275
|
+
def enrich_review(
|
|
2276
|
+
ctx, database: str | None, entities_only: bool, merges_only: bool, local: bool, wait: bool
|
|
2277
|
+
):
|
|
2278
|
+
"""Interactive review of pending entities and merge proposals.
|
|
2279
|
+
|
|
2280
|
+
Loops through pending items with approve/reject prompts.
|
|
2281
|
+
Press Q to quit early - remaining items stay pending for later.
|
|
2282
|
+
|
|
2283
|
+
Entity approvals run asynchronously - you can continue reviewing while
|
|
2284
|
+
embeddings are generated. Use --no-wait to exit immediately after reviewing.
|
|
2285
|
+
|
|
2286
|
+
Examples:
|
|
2287
|
+
|
|
2288
|
+
okb enrich review # Review all pending items
|
|
2289
|
+
|
|
2290
|
+
okb enrich review --entities-only # Only review entities
|
|
2291
|
+
|
|
2292
|
+
okb enrich review --merges-only # Only review merges
|
|
2293
|
+
|
|
2294
|
+
okb enrich review --local # Use local CPU embedding
|
|
2295
|
+
|
|
2296
|
+
okb enrich review --no-wait # Don't wait for embeddings
|
|
2297
|
+
"""
|
|
2298
|
+
|
|
2299
|
+
from .llm.enrich import (
|
|
2300
|
+
approve_entity_async,
|
|
2301
|
+
list_pending_entities,
|
|
2302
|
+
reject_entity,
|
|
2303
|
+
shutdown_executor,
|
|
2304
|
+
)
|
|
2305
|
+
from .llm.extractors.dedup import approve_merge, list_pending_merges, reject_merge
|
|
2306
|
+
|
|
2307
|
+
db_name = database or ctx.obj.get("database")
|
|
2308
|
+
db_cfg = config.get_database(db_name)
|
|
2309
|
+
use_modal = not local
|
|
2310
|
+
|
|
2311
|
+
# Get pending items
|
|
2312
|
+
entities = [] if merges_only else list_pending_entities(db_cfg.url, limit=100)
|
|
2313
|
+
merges = [] if entities_only else list_pending_merges(db_cfg.url, limit=100)
|
|
2314
|
+
|
|
2315
|
+
if not entities and not merges:
|
|
2316
|
+
click.echo("No pending items to review.")
|
|
2317
|
+
return
|
|
2318
|
+
|
|
2319
|
+
click.echo(f"Pending: {len(entities)} entities, {len(merges)} merges")
|
|
2320
|
+
click.echo("")
|
|
2321
|
+
|
|
2322
|
+
# Counters
|
|
2323
|
+
approved = 0
|
|
2324
|
+
rejected = 0
|
|
2325
|
+
skipped = 0
|
|
2326
|
+
|
|
2327
|
+
# Track async approval futures
|
|
2328
|
+
pending_futures: list[tuple] = [] # (future, entity_name)
|
|
2329
|
+
|
|
2330
|
+
# Review entities
|
|
2331
|
+
choice = None
|
|
2332
|
+
if entities and not merges_only:
|
|
2333
|
+
for i, e in enumerate(entities, 1):
|
|
2334
|
+
# Check for completed futures
|
|
2335
|
+
done_count = sum(1 for f, _ in pending_futures if f.done())
|
|
2336
|
+
if pending_futures and done_count > 0:
|
|
2337
|
+
total = len(pending_futures)
|
|
2338
|
+
click.echo(click.style(f" ({done_count}/{total} embeddings done)", dim=True))
|
|
2339
|
+
|
|
2340
|
+
click.echo(click.style(f"=== Entity Review [{i}/{len(entities)}] ===", bold=True))
|
|
2341
|
+
click.echo(f"Name: {click.style(e['entity_name'], fg='cyan')}")
|
|
2342
|
+
click.echo(f"Type: {e['entity_type']}")
|
|
2343
|
+
confidence = e.get("confidence", 0)
|
|
2344
|
+
if confidence:
|
|
2345
|
+
click.echo(f"Confidence: {confidence:.0%}")
|
|
2346
|
+
if e.get("description"):
|
|
2347
|
+
d = e["description"]
|
|
2348
|
+
desc = d[:80] + "..." if len(d) > 80 else d
|
|
2349
|
+
click.echo(f"Description: {desc}")
|
|
2350
|
+
if e.get("aliases"):
|
|
2351
|
+
click.echo(f"Aliases: {', '.join(e['aliases'][:5])}")
|
|
2352
|
+
click.echo(f"Source: {e['source_title']}")
|
|
2353
|
+
click.echo("")
|
|
2354
|
+
|
|
2355
|
+
choice = click.prompt(
|
|
2356
|
+
"[A]pprove [R]eject [S]kip [Q]uit",
|
|
2357
|
+
type=click.Choice(["A", "R", "S", "Q", "a", "r", "s", "q"]),
|
|
2358
|
+
show_choices=False,
|
|
2359
|
+
).upper()
|
|
2360
|
+
|
|
2361
|
+
if choice == "Q":
|
|
2362
|
+
click.echo("Quitting review...")
|
|
2363
|
+
break
|
|
2364
|
+
elif choice == "A":
|
|
2365
|
+
# Submit async approval
|
|
2366
|
+
future = approve_entity_async(db_cfg.url, str(e["id"]), use_modal)
|
|
2367
|
+
pending_futures.append((future, e["entity_name"]))
|
|
2368
|
+
click.echo(click.style("⏳ Queued for approval", fg="cyan"))
|
|
2369
|
+
approved += 1
|
|
2370
|
+
elif choice == "R":
|
|
2371
|
+
if reject_entity(db_cfg.url, str(e["id"])):
|
|
2372
|
+
click.echo(click.style("✗ Rejected", fg="yellow"))
|
|
2373
|
+
rejected += 1
|
|
2374
|
+
else:
|
|
2375
|
+
click.echo(click.style("✗ Failed to reject", fg="red"))
|
|
2376
|
+
else:
|
|
2377
|
+
click.echo("Skipped")
|
|
2378
|
+
skipped += 1
|
|
2379
|
+
|
|
2380
|
+
click.echo("")
|
|
2381
|
+
else:
|
|
2382
|
+
# Completed all entities, continue to merges
|
|
2383
|
+
pass
|
|
2384
|
+
|
|
2385
|
+
# Review merges (only if we didn't quit early)
|
|
2386
|
+
if merges and not entities_only and (not entities or choice != "Q"):
|
|
2387
|
+
for i, m in enumerate(merges, 1):
|
|
2388
|
+
click.echo(click.style(f"=== Merge Review [{i}/{len(merges)}] ===", bold=True))
|
|
2389
|
+
cname = click.style(m["canonical_name"], fg="cyan")
|
|
2390
|
+
ctype = m.get("canonical_type", "unknown")
|
|
2391
|
+
click.echo(f"Canonical: {cname} ({ctype})")
|
|
2392
|
+
dname = click.style(m["duplicate_name"], fg="yellow")
|
|
2393
|
+
dtype = m.get("duplicate_type", "unknown")
|
|
2394
|
+
click.echo(f"Duplicate: {dname} ({dtype})")
|
|
2395
|
+
confidence = m.get("confidence", 0)
|
|
2396
|
+
if confidence:
|
|
2397
|
+
click.echo(f"Confidence: {confidence:.0%}")
|
|
2398
|
+
click.echo(f"Reason: {m.get('reason', 'similarity')}")
|
|
2399
|
+
click.echo("")
|
|
2400
|
+
|
|
2401
|
+
choice = click.prompt(
|
|
2402
|
+
"[A]pprove [R]eject [S]kip [Q]uit",
|
|
2403
|
+
type=click.Choice(["A", "R", "S", "Q", "a", "r", "s", "q"]),
|
|
2404
|
+
show_choices=False,
|
|
2405
|
+
).upper()
|
|
2406
|
+
|
|
2407
|
+
if choice == "Q":
|
|
2408
|
+
click.echo("Quitting review...")
|
|
2409
|
+
break
|
|
2410
|
+
elif choice == "A":
|
|
2411
|
+
if approve_merge(db_cfg.url, str(m["id"])):
|
|
2412
|
+
click.echo(click.style("✓ Merged", fg="green"))
|
|
2413
|
+
approved += 1
|
|
2414
|
+
else:
|
|
2415
|
+
click.echo(click.style("✗ Failed to merge", fg="red"))
|
|
2416
|
+
elif choice == "R":
|
|
2417
|
+
if reject_merge(db_cfg.url, str(m["id"])):
|
|
2418
|
+
click.echo(click.style("✗ Rejected", fg="yellow"))
|
|
2419
|
+
rejected += 1
|
|
2420
|
+
else:
|
|
2421
|
+
click.echo(click.style("✗ Failed to reject", fg="red"))
|
|
2422
|
+
else:
|
|
2423
|
+
click.echo("Skipped")
|
|
2424
|
+
skipped += 1
|
|
2425
|
+
|
|
2426
|
+
click.echo("")
|
|
2427
|
+
|
|
2428
|
+
# Wait for pending approvals if requested
|
|
2429
|
+
if pending_futures:
|
|
2430
|
+
if wait:
|
|
2431
|
+
click.echo(f"Waiting for {len(pending_futures)} pending approvals...")
|
|
2432
|
+
succeeded = 0
|
|
2433
|
+
failed = 0
|
|
2434
|
+
for future, name in pending_futures:
|
|
2435
|
+
try:
|
|
2436
|
+
result = future.result(timeout=120)
|
|
2437
|
+
if result:
|
|
2438
|
+
click.echo(click.style(f" ✓ {name}", fg="green"))
|
|
2439
|
+
succeeded += 1
|
|
2440
|
+
else:
|
|
2441
|
+
click.echo(click.style(f" ✗ {name} failed", fg="red"))
|
|
2442
|
+
failed += 1
|
|
2443
|
+
except Exception as e:
|
|
2444
|
+
click.echo(click.style(f" ✗ {name}: {e}", fg="red"))
|
|
2445
|
+
failed += 1
|
|
2446
|
+
click.echo(f"Embeddings: {succeeded} succeeded, {failed} failed")
|
|
2447
|
+
else:
|
|
2448
|
+
done_count = sum(1 for f, _ in pending_futures if f.done())
|
|
2449
|
+
pending_count = len(pending_futures) - done_count
|
|
2450
|
+
if pending_count > 0:
|
|
2451
|
+
click.echo(f"{pending_count} embeddings still processing in background...")
|
|
2452
|
+
|
|
2453
|
+
# Cleanup executor
|
|
2454
|
+
shutdown_executor(wait=wait)
|
|
2455
|
+
|
|
2456
|
+
# Summary
|
|
2457
|
+
click.echo("")
|
|
2458
|
+
click.echo(click.style("Review complete:", bold=True))
|
|
2459
|
+
click.echo(f" {click.style(str(approved), fg='green')} approved")
|
|
2460
|
+
click.echo(f" {click.style(str(rejected), fg='yellow')} rejected")
|
|
2461
|
+
click.echo(f" {skipped} skipped")
|
|
2462
|
+
|
|
2463
|
+
|
|
1271
2464
|
if __name__ == "__main__":
|
|
1272
2465
|
main()
|