npm - @pmaddire/gcie - Versions diffs - 0.1.13 → 0.1.15 - Mend

@pmaddire/gcie 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/GCIE_USAGE.md +7 -2
package/README.md +121 -191
package/cli/app.py +42 -10
package/cli/commands/adaptation.py +72 -14
package/cli/commands/context.py +351 -145
package/llm_context/context_builder.py +83 -66
package/llm_context/snippet_selector.py +157 -26
package/package.json +1 -1

package/llm_context/context_builder.py CHANGED Viewed

@@ -1,67 +1,84 @@
-"""Context builder for minimal LLM prompts."""
-from __future__ import annotations
-from dataclasses import dataclass
-from .snippet_selector import RankedSnippet, estimate_tokens, select_snippets
-@dataclass(frozen=True, slots=True)
-class ContextPayload:
-    query: str
-    snippets: tuple[RankedSnippet, ...]
-    total_tokens_estimate: int
-_INTENT_BASE = {
-    "edit": 300,
-    "refactor": 600,
-    "debug": 500,
-    "explore": 400,
-}
-def _detect_intent(query: str) -> str:
-    text = query.lower()
-    if any(word in text for word in ("refactor", "rewrite", "migrate", "restructure")):
-        return "refactor"
-    if any(word in text for word in ("debug", "why", "error", "fail", "bug", "trace")):
-        return "debug"
-    if any(word in text for word in ("add", "change", "update", "extend", "modify", "remove", "rename")):
-        return "edit"
-    return "explore"
-def _auto_budget(query: str, ranked_snippets: list[RankedSnippet], intent: str) -> int:
-    """Compute a context budget that scales with intent, query, and candidate size."""
-    q_tokens = estimate_tokens(query)
-    count = len(ranked_snippets)
-    base = _INTENT_BASE.get(intent, 400)
-    budget = base + min(300, q_tokens * 10) + min(400, count * 30)
-    return max(200, min(1600, budget))
-def build_context(
-    query: str,
-    ranked_snippets: list[RankedSnippet],
-    *,
-    token_budget: int | None = 800,
-    mandatory_node_ids: set[str] | None = None,
-    intent: str | None = None,
-) -> ContextPayload:
-    """Build minimal context payload for LLM usage."""
-    if token_budget is None:
-        detected_intent = _detect_intent(query) if intent is None else intent
-        token_budget = _auto_budget(query, ranked_snippets, detected_intent)
-    selected = select_snippets(
-        ranked_snippets,
-        token_budget=token_budget,
-        mandatory_node_ids=mandatory_node_ids,
+"""Context builder for minimal LLM prompts."""
+from __future__ import annotations
+from dataclasses import dataclass
+from .snippet_selector import (
+    RankedSnippet,
+    SelectionMetrics,
+    estimate_tokens,
+    select_snippets_with_diagnostics,
+)
+@dataclass(frozen=True, slots=True)
+class ContextPayload:
+    query: str
+    snippets: tuple[RankedSnippet, ...]
+    total_tokens_estimate: int
+    selection_confidence: float = 0.0
+    selection_metrics: SelectionMetrics | None = None
+_INTENT_BASE = {
+    "edit": 300,
+    "refactor": 600,
+    "debug": 500,
+    "explore": 400,
+}
+def _detect_intent(query: str) -> str:
+    text = query.lower()
+    if any(word in text for word in ("refactor", "rewrite", "migrate", "restructure")):
+        return "refactor"
+    if any(word in text for word in ("debug", "why", "error", "fail", "bug", "trace")):
+        return "debug"
+    if any(word in text for word in ("add", "change", "update", "extend", "modify", "remove", "rename")):
+        return "edit"
+    return "explore"
+def _auto_budget(query: str, ranked_snippets: list[RankedSnippet], intent: str) -> int:
+    """Compute a context budget that scales with intent, query, and candidate size."""
+    q_tokens = estimate_tokens(query)
+    count = len(ranked_snippets)
+    base = _INTENT_BASE.get(intent, 400)
+    budget = base + min(300, q_tokens * 10) + min(400, count * 30)
+    return max(200, min(1600, budget))
+def should_expand_context(payload: ContextPayload, *, minimum_confidence: float = 0.7) -> bool:
+    """Return True when the caller should consider widening retrieval."""
+    return payload.selection_confidence < minimum_confidence
+def build_context(
+    query: str,
+    ranked_snippets: list[RankedSnippet],
+    *,
+    token_budget: int | None = 800,
+    mandatory_node_ids: set[str] | None = None,
+    intent: str | None = None,
+) -> ContextPayload:
+    """Build minimal context payload for LLM usage."""
+    if token_budget is None:
+        detected_intent = _detect_intent(query) if intent is None else intent
+        token_budget = _auto_budget(query, ranked_snippets, detected_intent)
+    selection = select_snippets_with_diagnostics(
+        ranked_snippets,
+        token_budget=token_budget,
+        mandatory_node_ids=mandatory_node_ids,
+    )
+    total = sum(estimate_tokens(item.content) for item in selection.snippets)
+    return ContextPayload(
+        query=query,
+        snippets=selection.snippets,
+        total_tokens_estimate=total,
+        selection_confidence=selection.metrics.confidence,
+        selection_metrics=selection.metrics,
     )
-    total = sum(estimate_tokens(item.content) for item in selected)
-    return ContextPayload(query=query, snippets=selected, total_tokens_estimate=total)

package/llm_context/snippet_selector.py CHANGED Viewed

@@ -12,46 +12,177 @@ class RankedSnippet:
     score: float
+@dataclass(frozen=True, slots=True)
+class SelectionMetrics:
+    token_budget: int
+    used_tokens: int
+    selected_count: int
+    mandatory_requested_count: int
+    mandatory_selected_count: int
+    mandatory_coverage_ratio: float
+    score_spread: float
+    confidence: float
+@dataclass(frozen=True, slots=True)
+class SelectionResult:
+    snippets: tuple[RankedSnippet, ...]
+    metrics: SelectionMetrics
 def estimate_tokens(text: str) -> int:
     """Cheap token estimate for budget management."""
     return max(1, len(text.split()))
-def select_snippets(
+def _candidate_sort_key(item: RankedSnippet) -> tuple[float, str, str]:
+    return (-item.score, item.node_id, item.content)
+def _dedupe_ranked_snippets(
+    ranked: list[RankedSnippet],
+    *,
+    mandatory_node_ids: set[str],
+) -> list[RankedSnippet]:
+    """Keep one snippet per content blob while preferring stronger candidates."""
+    best_by_content: dict[str, tuple[int, RankedSnippet]] = {}
+    for index, item in enumerate(ranked):
+        current = best_by_content.get(item.content)
+        if current is None:
+            best_by_content[item.content] = (index, item)
+            continue
+        current_index, current_item = current
+        current_is_mandatory = current_item.node_id in mandatory_node_ids
+        item_is_mandatory = item.node_id in mandatory_node_ids
+        replace = False
+        if item_is_mandatory and not current_is_mandatory:
+            replace = True
+        elif item.score > current_item.score:
+            replace = True
+        elif item.score == current_item.score and index < current_index:
+            replace = True
+        if replace:
+            best_by_content[item.content] = (index, item)
+    ordered = sorted(best_by_content.values(), key=lambda pair: pair[0])
+    return [item for _, item in ordered]
+def _score_spread(selected: tuple[RankedSnippet, ...]) -> float:
+    if not selected:
+        return 0.0
+    if len(selected) == 1:
+        return max(0.0, selected[0].score)
+    return max(0.0, selected[0].score - selected[-1].score)
+def _selection_confidence(
+    *,
+    selected_count: int,
+    total_candidates: int,
+    mandatory_requested_count: int,
+    mandatory_selected_count: int,
+    score_spread: float,
+) -> float:
+    if selected_count <= 0:
+        return 0.0
+    if mandatory_requested_count > 0:
+        mandatory_coverage = mandatory_selected_count / mandatory_requested_count
+    else:
+        mandatory_coverage = 1.0
+    spread_score = min(1.0, score_spread / 0.5)
+    selected_density = min(1.0, selected_count / max(3, total_candidates or 1))
+    confidence = (
+        0.65 * mandatory_coverage
+        + 0.2 * spread_score
+        + 0.15 * selected_density
+    )
+    if mandatory_requested_count > 0 and mandatory_coverage < 1.0:
+        confidence *= 0.9
+    return max(0.0, min(1.0, round(confidence, 3)))
+def select_snippets_with_diagnostics(
     ranked: list[RankedSnippet],
     *,
     token_budget: int,
     mandatory_node_ids: set[str] | None = None,
-) -> tuple[RankedSnippet, ...]:
-    """Select minimal high-value snippets under token budget."""
+) -> SelectionResult:
+    """Select snippets under budget and return selection confidence diagnostics."""
     mandatory_node_ids = mandatory_node_ids or set()
+    deduped = _dedupe_ranked_snippets(ranked, mandatory_node_ids=mandatory_node_ids)
+    mandatory = sorted(
+        [item for item in deduped if item.node_id in mandatory_node_ids],
+        key=_candidate_sort_key,
+    )
+    optional = sorted(
+        [item for item in deduped if item.node_id not in mandatory_node_ids],
+        key=_candidate_sort_key,
+    )
     selected: list[RankedSnippet] = []
     seen_contents: set[str] = set()
     used_tokens = 0
-    # First, include mandatory snippets if possible.
-    for item in ranked:
-        if item.node_id not in mandatory_node_ids:
-            continue
-        if item.content in seen_contents:
-            continue
-        t = estimate_tokens(item.content)
-        if used_tokens + t > token_budget:
-            continue
-        selected.append(item)
-        seen_contents.add(item.content)
-        used_tokens += t
+    # Mandatory snippets are chosen first, but still in score order so we keep the most relevant ones.
+    for pool in (mandatory, optional):
+        for item in pool:
+            if item.content in seen_contents:
+                continue
+            tokens = estimate_tokens(item.content)
+            if used_tokens + tokens > token_budget:
+                continue
+            selected.append(item)
+            seen_contents.add(item.content)
+            used_tokens += tokens
-    # Then fill with highest score snippets.
-    for item in sorted(ranked, key=lambda s: s.score, reverse=True):
-        if item.content in seen_contents:
-            continue
-        t = estimate_tokens(item.content)
-        if used_tokens + t > token_budget:
-            continue
-        selected.append(item)
-        seen_contents.add(item.content)
-        used_tokens += t
+    selected_tuple = tuple(selected)
+    mandatory_requested_count = len(mandatory)
+    mandatory_selected_count = sum(1 for item in selected_tuple if item.node_id in mandatory_node_ids)
+    score_spread = _score_spread(selected_tuple)
+    mandatory_coverage_ratio = (
+        mandatory_selected_count / mandatory_requested_count if mandatory_requested_count else 1.0
+    )
+    confidence = _selection_confidence(
+        selected_count=len(selected_tuple),
+        total_candidates=len(deduped),
+        mandatory_requested_count=mandatory_requested_count,
+        mandatory_selected_count=mandatory_selected_count,
+        score_spread=score_spread,
+    )
+    return SelectionResult(
+        snippets=selected_tuple,
+        metrics=SelectionMetrics(
+            token_budget=token_budget,
+            used_tokens=used_tokens,
+            selected_count=len(selected_tuple),
+            mandatory_requested_count=mandatory_requested_count,
+            mandatory_selected_count=mandatory_selected_count,
+            mandatory_coverage_ratio=mandatory_coverage_ratio,
+            score_spread=score_spread,
+            confidence=confidence,
+        ),
+    )
-    return tuple(selected)
+def select_snippets(
+    ranked: list[RankedSnippet],
+    *,
+    token_budget: int,
+    mandatory_node_ids: set[str] | None = None,
+) -> tuple[RankedSnippet, ...]:
+    """Select minimal high-value snippets under token budget."""
+    return select_snippets_with_diagnostics(
+        ranked,
+        token_budget=token_budget,
+        mandatory_node_ids=mandatory_node_ids,
+    ).snippets

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pmaddire/gcie",
-  "version": "0.1.13",
+  "version": "0.1.15",
   "description": "GraphCode Intelligence Engine one-command setup and context CLI",
   "bin": {
     "gcie": "bin/gcie.js",