npm - feed-the-machine - Versions diffs - 1.5.0 → 1.6.0 - Mend

feed-the-machine 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

package/LICENSE +21 -21
package/README.md +170 -170
package/bin/generate-manifest.mjs +463 -463
package/bin/install.mjs +491 -491
package/docs/HOOKS.md +243 -243
package/docs/INBOX.md +233 -233
package/ftm/SKILL.md +122 -122
package/ftm-audit/SKILL.md +623 -541
package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
package/ftm-audit/scripts/run-knip.sh +23 -23
package/ftm-audit.yml +2 -2
package/ftm-brainstorm/SKILL.md +498 -498
package/ftm-brainstorm/evals/evals.json +100 -100
package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
package/ftm-brainstorm/references/agent-prompts.md +224 -224
package/ftm-brainstorm/references/plan-template.md +121 -121
package/ftm-brainstorm.yml +2 -2
package/ftm-browse/SKILL.md +454 -454
package/ftm-browse/daemon/browser-manager.ts +206 -206
package/ftm-browse/daemon/bun.lock +30 -30
package/ftm-browse/daemon/cli.ts +347 -347
package/ftm-browse/daemon/commands.ts +410 -410
package/ftm-browse/daemon/main.ts +357 -357
package/ftm-browse/daemon/package.json +17 -17
package/ftm-browse/daemon/server.ts +189 -189
package/ftm-browse/daemon/snapshot.ts +519 -519
package/ftm-browse/daemon/tsconfig.json +22 -22
package/ftm-browse.yml +4 -4
package/ftm-capture/SKILL.md +370 -370
package/ftm-capture.yml +4 -4
package/ftm-codex-gate/SKILL.md +361 -361
package/ftm-codex-gate.yml +2 -2
package/ftm-config/SKILL.md +345 -345
package/ftm-config.default.yml +82 -80
package/ftm-config.yml +2 -2
package/ftm-council/SKILL.md +416 -416
package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
package/ftm-council.yml +2 -2
package/ftm-dashboard/SKILL.md +163 -163
package/ftm-dashboard.yml +4 -4
package/ftm-debug/SKILL.md +1037 -1037
package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
package/ftm-debug.yml +2 -2
package/ftm-diagram/SKILL.md +277 -277
package/ftm-diagram.yml +2 -2
package/ftm-executor/SKILL.md +777 -767
package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
package/ftm-executor/runtime/package.json +8 -8
package/ftm-executor.yml +2 -2
package/ftm-git/SKILL.md +441 -441
package/ftm-git/evals/evals.json +26 -26
package/ftm-git/evals/promptfoo.yaml +75 -75
package/ftm-git/hooks/post-commit-experience.sh +92 -92
package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
package/ftm-git/references/protocols/REMEDIATION.md +139 -139
package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
package/ftm-git.yml +2 -2
package/ftm-inbox/backend/adapters/_retry.py +64 -64
package/ftm-inbox/backend/adapters/base.py +230 -230
package/ftm-inbox/backend/adapters/freshservice.py +104 -104
package/ftm-inbox/backend/adapters/gmail.py +125 -125
package/ftm-inbox/backend/adapters/jira.py +136 -136
package/ftm-inbox/backend/adapters/registry.py +192 -192
package/ftm-inbox/backend/adapters/slack.py +110 -110
package/ftm-inbox/backend/db/connection.py +54 -54
package/ftm-inbox/backend/db/schema.py +78 -78
package/ftm-inbox/backend/executor/__init__.py +7 -7
package/ftm-inbox/backend/executor/engine.py +149 -149
package/ftm-inbox/backend/executor/step_runner.py +98 -98
package/ftm-inbox/backend/main.py +103 -103
package/ftm-inbox/backend/models/__init__.py +1 -1
package/ftm-inbox/backend/models/unified_task.py +36 -36
package/ftm-inbox/backend/planner/__init__.py +6 -6
package/ftm-inbox/backend/planner/generator.py +127 -127
package/ftm-inbox/backend/planner/schema.py +34 -34
package/ftm-inbox/backend/requirements.txt +5 -5
package/ftm-inbox/backend/routes/execute.py +186 -186
package/ftm-inbox/backend/routes/health.py +52 -52
package/ftm-inbox/backend/routes/inbox.py +68 -68
package/ftm-inbox/backend/routes/plan.py +271 -271
package/ftm-inbox/bin/launchagent.mjs +91 -91
package/ftm-inbox/bin/setup.mjs +188 -188
package/ftm-inbox/bin/start.sh +10 -10
package/ftm-inbox/bin/status.sh +17 -17
package/ftm-inbox/bin/stop.sh +8 -8
package/ftm-inbox/config.example.yml +55 -55
package/ftm-inbox/package-lock.json +2898 -2898
package/ftm-inbox/package.json +26 -26
package/ftm-inbox/postcss.config.js +6 -6
package/ftm-inbox/src/app.css +199 -199
package/ftm-inbox/src/app.html +18 -18
package/ftm-inbox/src/lib/api.ts +166 -166
package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
package/ftm-inbox/src/lib/theme.ts +47 -47
package/ftm-inbox/src/routes/+layout.svelte +76 -76
package/ftm-inbox/src/routes/+page.svelte +401 -401
package/ftm-inbox/svelte.config.js +12 -12
package/ftm-inbox/tailwind.config.ts +63 -63
package/ftm-inbox/tsconfig.json +13 -13
package/ftm-inbox/vite.config.ts +6 -6
package/ftm-intent/SKILL.md +241 -241
package/ftm-intent.yml +2 -2
package/ftm-manifest.json +3794 -3794
package/ftm-map/SKILL.md +291 -291
package/ftm-map/scripts/db.py +712 -712
package/ftm-map/scripts/index.py +415 -415
package/ftm-map/scripts/parser.py +224 -224
package/ftm-map/scripts/queries/go-tags.scm +20 -20
package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
package/ftm-map/scripts/queries/python-tags.scm +31 -31
package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
package/ftm-map/scripts/queries/rust-tags.scm +37 -37
package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
package/ftm-map/scripts/query.py +301 -301
package/ftm-map/scripts/ranker.py +377 -377
package/ftm-map/scripts/requirements.txt +5 -5
package/ftm-map/scripts/setup-hooks.sh +27 -27
package/ftm-map/scripts/setup.sh +56 -56
package/ftm-map/scripts/test_db.py +364 -364
package/ftm-map/scripts/test_parser.py +174 -174
package/ftm-map/scripts/test_query.py +183 -183
package/ftm-map/scripts/test_ranker.py +199 -199
package/ftm-map/scripts/views.py +591 -591
package/ftm-map.yml +2 -2
package/ftm-mind/SKILL.md +1943 -1943
package/ftm-mind/evals/promptfoo.yaml +142 -142
package/ftm-mind/references/blackboard-schema.md +328 -328
package/ftm-mind/references/complexity-guide.md +110 -110
package/ftm-mind/references/event-registry.md +319 -319
package/ftm-mind/references/mcp-inventory.md +296 -296
package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
package/ftm-mind/references/reflexion-protocol.md +249 -249
package/ftm-mind/references/routing/SCENARIOS.md +22 -22
package/ftm-mind/references/routing-scenarios.md +35 -35
package/ftm-mind.yml +2 -2
package/ftm-pause/SKILL.md +395 -395
package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
package/ftm-pause/references/protocols/VALIDATION.md +80 -80
package/ftm-pause.yml +2 -2
package/ftm-researcher/SKILL.md +275 -275
package/ftm-researcher/evals/agent-diversity.yaml +17 -17
package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
package/ftm-researcher/references/adaptive-search.md +116 -116
package/ftm-researcher/references/agent-prompts.md +193 -193
package/ftm-researcher/references/council-integration.md +193 -193
package/ftm-researcher/references/output-format.md +203 -203
package/ftm-researcher/references/synthesis-pipeline.md +165 -165
package/ftm-researcher/scripts/score_credibility.py +234 -234
package/ftm-researcher/scripts/validate_research.py +92 -92
package/ftm-researcher.yml +2 -2
package/ftm-resume/SKILL.md +518 -518
package/ftm-resume/references/protocols/VALIDATION.md +172 -172
package/ftm-resume.yml +2 -2
package/ftm-retro/SKILL.md +380 -380
package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
package/ftm-retro.yml +2 -2
package/ftm-routine/SKILL.md +170 -170
package/ftm-routine.yml +4 -4
package/ftm-state/blackboard/capabilities.json +5 -5
package/ftm-state/blackboard/capabilities.schema.json +27 -27
package/ftm-state/blackboard/context.json +23 -23
package/ftm-state/blackboard/experiences/index.json +9 -9
package/ftm-state/blackboard/patterns.json +6 -6
package/ftm-state/schemas/context.schema.json +130 -130
package/ftm-state/schemas/experience-index.schema.json +77 -77
package/ftm-state/schemas/experience.schema.json +78 -78
package/ftm-state/schemas/patterns.schema.json +44 -44
package/ftm-upgrade/SKILL.md +194 -194
package/ftm-upgrade/scripts/check-version.sh +76 -76
package/ftm-upgrade/scripts/upgrade.sh +143 -143
package/ftm-upgrade.yml +2 -2
package/ftm-verify.yml +2 -2
package/ftm.yml +2 -2
package/hooks/ftm-blackboard-enforcer.sh +93 -93
package/hooks/ftm-discovery-reminder.sh +90 -90
package/hooks/ftm-drafts-gate.sh +61 -61
package/hooks/ftm-event-logger.mjs +107 -107
package/hooks/ftm-map-autodetect.sh +79 -79
package/hooks/ftm-pending-sync-check.sh +22 -22
package/hooks/ftm-plan-gate.sh +92 -92
package/hooks/ftm-post-commit-trigger.sh +57 -57
package/hooks/settings-template.json +81 -81
package/install.sh +363 -363
package/package.json +84 -84
package/uninstall.sh +25 -25

package/ftm-map/scripts/ranker.py CHANGED Viewed

@@ -1,377 +1,377 @@
-"""
-ranker.py -- PageRank-based context selection engine for ftm-map.
-Implements Aider-style personalized PageRank over the file-level dependency graph
-with task-aware personalization and token-budget binary search.
-"""
-import math
-import os
-import sys
-sys.path.insert(0, os.path.dirname(__file__))
-import numpy as np
-import scipy.sparse as sp
-# Try fast-pagerank first, fall back to scipy power iteration
-try:
-    from fast_pagerank import pagerank_power
-    HAS_FAST_PAGERANK = True
-except ImportError:
-    HAS_FAST_PAGERANK = False
-def build_adjacency_matrix(conn):
-    """Build undirected sparse adjacency matrix from file_edges.
-    Returns (matrix, file_id_to_idx, idx_to_file_id) where:
-    - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
-    - file_id_to_idx maps file_id -> matrix index
-    - idx_to_file_id maps matrix index -> file_id
-    """
-    # Get all files
-    files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
-    if not files:
-        return None, {}, {}
-    file_ids = [row['id'] for row in files]
-    file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
-    idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
-    n = len(file_ids)
-    # Get edges
-    edges = conn.execute(
-        "SELECT source_file_id, target_file_id, weight FROM file_edges"
-    ).fetchall()
-    if not edges:
-        return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
-    rows, cols, data = [], [], []
-    for edge in edges:
-        src_idx = file_id_to_idx.get(edge['source_file_id'])
-        tgt_idx = file_id_to_idx.get(edge['target_file_id'])
-        if src_idx is not None and tgt_idx is not None:
-            rows.append(src_idx)
-            cols.append(tgt_idx)
-            data.append(edge['weight'])
-    # Build directed matrix, then symmetrize for undirected PageRank
-    A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
-    A_undirected = A + A.T  # Symmetrize
-    return A_undirected, file_id_to_idx, idx_to_file_id
-def build_personalization(
-    conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
-):
-    """Build personalization vector for PageRank.
-    Three channels:
-    - seed_files: file paths get 100x weight
-    - seed_keywords: FTS5 matches get 30x weight
-    - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
-    Returns normalized numpy array (sums to 1.0).
-    """
-    n = len(file_id_to_idx)
-    if n == 0:
-        return None
-    pers = np.ones(n)  # Base: uniform weight of 1
-    # Channel 1: Seed files (100x)
-    if seed_files:
-        for fpath in seed_files:
-            file_row = conn.execute(
-                "SELECT id FROM files WHERE path=?", (fpath,)
-            ).fetchone()
-            if file_row and file_row['id'] in file_id_to_idx:
-                idx = file_id_to_idx[file_row['id']]
-                pers[idx] *= 100
-    # Channel 2: Seed keywords via FTS5 (30x)
-    if seed_keywords:
-        for kw in seed_keywords:
-            try:
-                fts_results = conn.execute(
-                    "SELECT s.file_id FROM symbols_fts fts "
-                    "JOIN symbols s ON s.id = fts.rowid "
-                    "WHERE symbols_fts MATCH ? LIMIT 50",
-                    (kw,),
-                ).fetchall()
-                for row in fts_results:
-                    if row['file_id'] in file_id_to_idx:
-                        pers[file_id_to_idx[row['file_id']]] *= 30
-            except Exception:
-                pass  # FTS query syntax errors are non-fatal
-    # Channel 3: Seed symbols (80x defining, 40x referencing)
-    if seed_symbols:
-        for sym_name in seed_symbols:
-            # Defining files get 80x
-            def_files = conn.execute(
-                "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
-            ).fetchall()
-            for row in def_files:
-                if row['file_id'] in file_id_to_idx:
-                    pers[file_id_to_idx[row['file_id']]] *= 80
-            # Referencing files get 40x
-            ref_files = conn.execute(
-                "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
-            ).fetchall()
-            for row in ref_files:
-                if row['file_id'] in file_id_to_idx:
-                    pers[file_id_to_idx[row['file_id']]] *= 40
-    # Normalize to sum to 1
-    total = pers.sum()
-    if total > 0:
-        pers /= total
-    return pers
-def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
-    """Run PageRank on the adjacency matrix.
-    Uses fast-pagerank if available, otherwise scipy power iteration.
-    Returns numpy array of scores indexed by matrix position.
-    """
-    n = adj_matrix.shape[0]
-    if n == 0:
-        return np.array([])
-    if HAS_FAST_PAGERANK and personalization is not None:
-        try:
-            scores = pagerank_power(
-                adj_matrix, p=damping, personalize=personalization, tol=tol
-            )
-            return scores
-        except Exception:
-            pass  # Fall through to scipy implementation
-    # Scipy power iteration fallback
-    # Normalize adjacency matrix columns (column-stochastic transition matrix)
-    col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
-    col_sums[col_sums == 0] = 1  # Avoid division by zero for dangling nodes
-    # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
-    D_inv = sp.diags(1.0 / col_sums)
-    M = adj_matrix @ D_inv
-    # Initialize personalization / teleport vector
-    if personalization is not None:
-        v = personalization.copy()
-    else:
-        v = np.ones(n) / n
-    scores = v.copy()
-    # Dangling nodes: columns with zero outgoing weight
-    dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
-    for _ in range(max_iter):
-        prev = scores.copy()
-        # PageRank iteration with dangling-node redistribution
-        dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
-        scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
-        # Check convergence via L1 norm
-        if np.abs(scores - prev).sum() < tol:
-            break
-    return scores
-def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
-    """Rank all files by structural importance with personalization.
-    Returns sorted list of (file_path, score) tuples, highest score first.
-    """
-    adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
-    if adj is None or adj.shape[0] == 0:
-        return []
-    pers = build_personalization(
-        conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
-    )
-    scores = run_pagerank(adj, pers)
-    # Map scores back to file paths
-    results = []
-    for idx, score in enumerate(scores):
-        file_id = idx_to_fid[idx]
-        file_row = conn.execute(
-            "SELECT path FROM files WHERE id=?", (file_id,)
-        ).fetchone()
-        if file_row:
-            results.append((file_row['path'], float(score)))
-    results.sort(key=lambda x: x[1], reverse=True)
-    return results
-def fit_to_budget(ranked_files, conn, token_budget):
-    """Select files + key symbols that fit within token budget.
-    Uses binary search with 15% tolerance (Aider's approach).
-    Token estimation: ~25 tokens per tag/symbol entry.
-    Returns (result_list, total_tokens) where result_list contains dicts:
-        [{path, score, symbols: [name, ...], tokens}]
-    """
-    if not ranked_files or token_budget <= 0:
-        return [], 0
-    def estimate_tokens(file_list):
-        """Estimate tokens for a list of files based on their symbol count."""
-        total = 0
-        for fpath, _ in file_list:
-            file_row = conn.execute(
-                "SELECT id, line_count FROM files WHERE path=?", (fpath,)
-            ).fetchone()
-            if not file_row:
-                continue
-            syms = conn.execute(
-                "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
-                (file_row['id'],),
-            ).fetchall()
-            for _sym in syms:
-                # ~25 tokens per tag entry (Aider's estimate)
-                total += 25
-        return total
-    # Binary search: find max number of files that fits within budget
-    lo, hi = 1, len(ranked_files)
-    best = 1
-    while lo <= hi:
-        mid = (lo + hi) // 2
-        tokens = estimate_tokens(ranked_files[:mid])
-        if tokens <= token_budget:
-            best = mid
-            lo = mid + 1
-        else:
-            hi = mid - 1
-    # Allow 15% tolerance -- greedily add more files if within tolerance
-    tolerance_budget = token_budget * 1.15
-    while best < len(ranked_files):
-        tokens = estimate_tokens(ranked_files[: best + 1])
-        if tokens <= tolerance_budget:
-            best += 1
-        else:
-            break
-    # Build output with symbols for each selected file
-    result = []
-    total_tokens = 0
-    for fpath, score in ranked_files[:best]:
-        file_row = conn.execute(
-            "SELECT id FROM files WHERE path=?", (fpath,)
-        ).fetchone()
-        if not file_row:
-            continue
-        syms = conn.execute(
-            "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
-            (file_row['id'],),
-        ).fetchall()
-        sym_names = [s['name'] for s in syms]
-        entry_tokens = len(sym_names) * 25
-        total_tokens += entry_tokens
-        result.append({
-            "path": fpath,
-            "score": round(score, 6),
-            "symbols": sym_names,
-            "tokens": entry_tokens,
-        })
-    return result, total_tokens
-# ---------------------------------------------------------------------------
-# Smoke test
-# ---------------------------------------------------------------------------
-if __name__ == "__main__":
-    import tempfile
-    from db import (
-        get_connection,
-        add_file,
-        add_symbol,
-        add_reference,
-        rebuild_file_edges,
-        rebuild_symbol_edges,
-    )
-    print("Running ranker.py smoke tests ...")
-    with tempfile.TemporaryDirectory() as tmp:
-        conn = get_connection(tmp)
-        # Create a small graph: 3 files with cross-references
-        f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
-        f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
-        f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
-        # Symbols
-        add_symbol(
-            conn, f1, "authenticate", "definition", 1, 20,
-            signature="def authenticate(req)",
-        )
-        add_symbol(conn, f1, "verify_token", "definition", 25, 40)
-        add_symbol(
-            conn, f2, "handle_request", "definition", 1, 50,
-            signature="def handle_request(req)",
-        )
-        add_symbol(conn, f3, "format_date", "definition", 1, 10)
-        add_symbol(conn, f3, "parse_config", "definition", 15, 25)
-        # References: api.py references auth.py functions, and utils.py
-        add_reference(conn, f2, "authenticate", 10)
-        add_reference(conn, f2, "verify_token", 15)
-        add_reference(conn, f2, "format_date", 20)
-        add_reference(conn, f2, "parse_config", 25)
-        # auth.py also references utils
-        add_reference(conn, f1, "parse_config", 30)
-        # Materialize edges
-        rebuild_file_edges(conn)
-        conn.commit()
-        # Test 1: Uniform PageRank
-        results = rank_files(conn)
-        print(f"  Uniform PageRank: {len(results)} files ranked")
-        for path, score in results:
-            print(f"    {path}: {score:.6f}")
-        assert len(results) == 3
-        # Test 2: Personalized -- seed auth.py
-        results_pers = rank_files(conn, seed_files=["src/auth.py"])
-        print(f"  Personalized (seed auth.py): {len(results_pers)} files")
-        for path, score in results_pers:
-            print(f"    {path}: {score:.6f}")
-        # auth.py should be ranked higher with personalization
-        auth_score = next(s for p, s in results_pers if p == "src/auth.py")
-        auth_uniform = next(s for p, s in results if p == "src/auth.py")
-        print(f"  Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
-        # Test 3: Budget fitting
-        budget_result, total_tokens = fit_to_budget(results, conn, 200)
-        print(f"  Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
-        assert total_tokens <= 200 * 1.15  # 15% tolerance
-        # Test 4: Keyword personalization
-        results_kw = rank_files(conn, seed_keywords=["authenticate"])
-        print(f"  Keyword personalized: {len(results_kw)} files")
-        # Test 5: Symbol personalization
-        results_sym = rank_files(conn, seed_symbols=["authenticate"])
-        print(f"  Symbol personalized: {len(results_sym)} files")
-        print("\nAll ranker smoke tests passed.")
+"""
+ranker.py -- PageRank-based context selection engine for ftm-map.
+Implements Aider-style personalized PageRank over the file-level dependency graph
+with task-aware personalization and token-budget binary search.
+"""
+import math
+import os
+import sys
+sys.path.insert(0, os.path.dirname(__file__))
+import numpy as np
+import scipy.sparse as sp
+# Try fast-pagerank first, fall back to scipy power iteration
+try:
+    from fast_pagerank import pagerank_power
+    HAS_FAST_PAGERANK = True
+except ImportError:
+    HAS_FAST_PAGERANK = False
+def build_adjacency_matrix(conn):
+    """Build undirected sparse adjacency matrix from file_edges.
+    Returns (matrix, file_id_to_idx, idx_to_file_id) where:
+    - matrix is a scipy CSR sparse matrix (undirected: A + A.T)
+    - file_id_to_idx maps file_id -> matrix index
+    - idx_to_file_id maps matrix index -> file_id
+    """
+    # Get all files
+    files = conn.execute("SELECT id FROM files ORDER BY id").fetchall()
+    if not files:
+        return None, {}, {}
+    file_ids = [row['id'] for row in files]
+    file_id_to_idx = {fid: i for i, fid in enumerate(file_ids)}
+    idx_to_file_id = {i: fid for i, fid in enumerate(file_ids)}
+    n = len(file_ids)
+    # Get edges
+    edges = conn.execute(
+        "SELECT source_file_id, target_file_id, weight FROM file_edges"
+    ).fetchall()
+    if not edges:
+        return sp.csr_matrix((n, n)), file_id_to_idx, idx_to_file_id
+    rows, cols, data = [], [], []
+    for edge in edges:
+        src_idx = file_id_to_idx.get(edge['source_file_id'])
+        tgt_idx = file_id_to_idx.get(edge['target_file_id'])
+        if src_idx is not None and tgt_idx is not None:
+            rows.append(src_idx)
+            cols.append(tgt_idx)
+            data.append(edge['weight'])
+    # Build directed matrix, then symmetrize for undirected PageRank
+    A = sp.csr_matrix((data, (rows, cols)), shape=(n, n))
+    A_undirected = A + A.T  # Symmetrize
+    return A_undirected, file_id_to_idx, idx_to_file_id
+def build_personalization(
+    conn, seed_files=None, seed_keywords=None, seed_symbols=None, file_id_to_idx=None
+):
+    """Build personalization vector for PageRank.
+    Three channels:
+    - seed_files: file paths get 100x weight
+    - seed_keywords: FTS5 matches get 30x weight
+    - seed_symbols: symbol name matches - defining file gets 80x, referencing files get 40x
+    Returns normalized numpy array (sums to 1.0).
+    """
+    n = len(file_id_to_idx)
+    if n == 0:
+        return None
+    pers = np.ones(n)  # Base: uniform weight of 1
+    # Channel 1: Seed files (100x)
+    if seed_files:
+        for fpath in seed_files:
+            file_row = conn.execute(
+                "SELECT id FROM files WHERE path=?", (fpath,)
+            ).fetchone()
+            if file_row and file_row['id'] in file_id_to_idx:
+                idx = file_id_to_idx[file_row['id']]
+                pers[idx] *= 100
+    # Channel 2: Seed keywords via FTS5 (30x)
+    if seed_keywords:
+        for kw in seed_keywords:
+            try:
+                fts_results = conn.execute(
+                    "SELECT s.file_id FROM symbols_fts fts "
+                    "JOIN symbols s ON s.id = fts.rowid "
+                    "WHERE symbols_fts MATCH ? LIMIT 50",
+                    (kw,),
+                ).fetchall()
+                for row in fts_results:
+                    if row['file_id'] in file_id_to_idx:
+                        pers[file_id_to_idx[row['file_id']]] *= 30
+            except Exception:
+                pass  # FTS query syntax errors are non-fatal
+    # Channel 3: Seed symbols (80x defining, 40x referencing)
+    if seed_symbols:
+        for sym_name in seed_symbols:
+            # Defining files get 80x
+            def_files = conn.execute(
+                "SELECT DISTINCT file_id FROM symbols WHERE name=?", (sym_name,)
+            ).fetchall()
+            for row in def_files:
+                if row['file_id'] in file_id_to_idx:
+                    pers[file_id_to_idx[row['file_id']]] *= 80
+            # Referencing files get 40x
+            ref_files = conn.execute(
+                "SELECT DISTINCT file_id FROM refs WHERE symbol_name=?", (sym_name,)
+            ).fetchall()
+            for row in ref_files:
+                if row['file_id'] in file_id_to_idx:
+                    pers[file_id_to_idx[row['file_id']]] *= 40
+    # Normalize to sum to 1
+    total = pers.sum()
+    if total > 0:
+        pers /= total
+    return pers
+def run_pagerank(adj_matrix, personalization=None, damping=0.85, max_iter=100, tol=1e-6):
+    """Run PageRank on the adjacency matrix.
+    Uses fast-pagerank if available, otherwise scipy power iteration.
+    Returns numpy array of scores indexed by matrix position.
+    """
+    n = adj_matrix.shape[0]
+    if n == 0:
+        return np.array([])
+    if HAS_FAST_PAGERANK and personalization is not None:
+        try:
+            scores = pagerank_power(
+                adj_matrix, p=damping, personalize=personalization, tol=tol
+            )
+            return scores
+        except Exception:
+            pass  # Fall through to scipy implementation
+    # Scipy power iteration fallback
+    # Normalize adjacency matrix columns (column-stochastic transition matrix)
+    col_sums = np.array(adj_matrix.sum(axis=0)).flatten()
+    col_sums[col_sums == 0] = 1  # Avoid division by zero for dangling nodes
+    # Transition matrix: M[i,j] = A[i,j] / col_sum[j]
+    D_inv = sp.diags(1.0 / col_sums)
+    M = adj_matrix @ D_inv
+    # Initialize personalization / teleport vector
+    if personalization is not None:
+        v = personalization.copy()
+    else:
+        v = np.ones(n) / n
+    scores = v.copy()
+    # Dangling nodes: columns with zero outgoing weight
+    dangling_mask = np.array(adj_matrix.sum(axis=0)).flatten() == 0
+    for _ in range(max_iter):
+        prev = scores.copy()
+        # PageRank iteration with dangling-node redistribution
+        dangling_sum = scores[dangling_mask].sum() if dangling_mask.any() else 0
+        scores = damping * (M @ scores) + damping * dangling_sum * v + (1 - damping) * v
+        # Check convergence via L1 norm
+        if np.abs(scores - prev).sum() < tol:
+            break
+    return scores
+def rank_files(conn, seed_files=None, seed_keywords=None, seed_symbols=None):
+    """Rank all files by structural importance with personalization.
+    Returns sorted list of (file_path, score) tuples, highest score first.
+    """
+    adj, fid_to_idx, idx_to_fid = build_adjacency_matrix(conn)
+    if adj is None or adj.shape[0] == 0:
+        return []
+    pers = build_personalization(
+        conn, seed_files, seed_keywords, seed_symbols, fid_to_idx
+    )
+    scores = run_pagerank(adj, pers)
+    # Map scores back to file paths
+    results = []
+    for idx, score in enumerate(scores):
+        file_id = idx_to_fid[idx]
+        file_row = conn.execute(
+            "SELECT path FROM files WHERE id=?", (file_id,)
+        ).fetchone()
+        if file_row:
+            results.append((file_row['path'], float(score)))
+    results.sort(key=lambda x: x[1], reverse=True)
+    return results
+def fit_to_budget(ranked_files, conn, token_budget):
+    """Select files + key symbols that fit within token budget.
+    Uses binary search with 15% tolerance (Aider's approach).
+    Token estimation: ~25 tokens per tag/symbol entry.
+    Returns (result_list, total_tokens) where result_list contains dicts:
+        [{path, score, symbols: [name, ...], tokens}]
+    """
+    if not ranked_files or token_budget <= 0:
+        return [], 0
+    def estimate_tokens(file_list):
+        """Estimate tokens for a list of files based on their symbol count."""
+        total = 0
+        for fpath, _ in file_list:
+            file_row = conn.execute(
+                "SELECT id, line_count FROM files WHERE path=?", (fpath,)
+            ).fetchone()
+            if not file_row:
+                continue
+            syms = conn.execute(
+                "SELECT name, signature FROM symbols WHERE file_id=? ORDER BY line_start",
+                (file_row['id'],),
+            ).fetchall()
+            for _sym in syms:
+                # ~25 tokens per tag entry (Aider's estimate)
+                total += 25
+        return total
+    # Binary search: find max number of files that fits within budget
+    lo, hi = 1, len(ranked_files)
+    best = 1
+    while lo <= hi:
+        mid = (lo + hi) // 2
+        tokens = estimate_tokens(ranked_files[:mid])
+        if tokens <= token_budget:
+            best = mid
+            lo = mid + 1
+        else:
+            hi = mid - 1
+    # Allow 15% tolerance -- greedily add more files if within tolerance
+    tolerance_budget = token_budget * 1.15
+    while best < len(ranked_files):
+        tokens = estimate_tokens(ranked_files[: best + 1])
+        if tokens <= tolerance_budget:
+            best += 1
+        else:
+            break
+    # Build output with symbols for each selected file
+    result = []
+    total_tokens = 0
+    for fpath, score in ranked_files[:best]:
+        file_row = conn.execute(
+            "SELECT id FROM files WHERE path=?", (fpath,)
+        ).fetchone()
+        if not file_row:
+            continue
+        syms = conn.execute(
+            "SELECT name FROM symbols WHERE file_id=? ORDER BY line_start",
+            (file_row['id'],),
+        ).fetchall()
+        sym_names = [s['name'] for s in syms]
+        entry_tokens = len(sym_names) * 25
+        total_tokens += entry_tokens
+        result.append({
+            "path": fpath,
+            "score": round(score, 6),
+            "symbols": sym_names,
+            "tokens": entry_tokens,
+        })
+    return result, total_tokens
+# ---------------------------------------------------------------------------
+# Smoke test
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import tempfile
+    from db import (
+        get_connection,
+        add_file,
+        add_symbol,
+        add_reference,
+        rebuild_file_edges,
+        rebuild_symbol_edges,
+    )
+    print("Running ranker.py smoke tests ...")
+    with tempfile.TemporaryDirectory() as tmp:
+        conn = get_connection(tmp)
+        # Create a small graph: 3 files with cross-references
+        f1 = add_file(conn, "src/auth.py", "python", 1.0, line_count=50)
+        f2 = add_file(conn, "src/api.py", "python", 1.0, line_count=100)
+        f3 = add_file(conn, "src/utils.py", "python", 1.0, line_count=30)
+        # Symbols
+        add_symbol(
+            conn, f1, "authenticate", "definition", 1, 20,
+            signature="def authenticate(req)",
+        )
+        add_symbol(conn, f1, "verify_token", "definition", 25, 40)
+        add_symbol(
+            conn, f2, "handle_request", "definition", 1, 50,
+            signature="def handle_request(req)",
+        )
+        add_symbol(conn, f3, "format_date", "definition", 1, 10)
+        add_symbol(conn, f3, "parse_config", "definition", 15, 25)
+        # References: api.py references auth.py functions, and utils.py
+        add_reference(conn, f2, "authenticate", 10)
+        add_reference(conn, f2, "verify_token", 15)
+        add_reference(conn, f2, "format_date", 20)
+        add_reference(conn, f2, "parse_config", 25)
+        # auth.py also references utils
+        add_reference(conn, f1, "parse_config", 30)
+        # Materialize edges
+        rebuild_file_edges(conn)
+        conn.commit()
+        # Test 1: Uniform PageRank
+        results = rank_files(conn)
+        print(f"  Uniform PageRank: {len(results)} files ranked")
+        for path, score in results:
+            print(f"    {path}: {score:.6f}")
+        assert len(results) == 3
+        # Test 2: Personalized -- seed auth.py
+        results_pers = rank_files(conn, seed_files=["src/auth.py"])
+        print(f"  Personalized (seed auth.py): {len(results_pers)} files")
+        for path, score in results_pers:
+            print(f"    {path}: {score:.6f}")
+        # auth.py should be ranked higher with personalization
+        auth_score = next(s for p, s in results_pers if p == "src/auth.py")
+        auth_uniform = next(s for p, s in results if p == "src/auth.py")
+        print(f"  Auth personalized boost: {auth_score:.6f} vs {auth_uniform:.6f}")
+        # Test 3: Budget fitting
+        budget_result, total_tokens = fit_to_budget(results, conn, 200)
+        print(f"  Budget fit (200 tokens): {len(budget_result)} files, {total_tokens} tokens")
+        assert total_tokens <= 200 * 1.15  # 15% tolerance
+        # Test 4: Keyword personalization
+        results_kw = rank_files(conn, seed_keywords=["authenticate"])
+        print(f"  Keyword personalized: {len(results_kw)} files")
+        # Test 5: Symbol personalization
+        results_sym = rank_files(conn, seed_symbols=["authenticate"])
+        print(f"  Symbol personalized: {len(results_sym)} files")
+        print("\nAll ranker smoke tests passed.")