contextguardrail 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextguardrail-0.1.0/PKG-INFO +72 -0
- contextguardrail-0.1.0/README.md +61 -0
- contextguardrail-0.1.0/contextguardrail/__init__.py +1 -0
- contextguardrail-0.1.0/contextguardrail/budget.py +19 -0
- contextguardrail-0.1.0/contextguardrail/cache.py +57 -0
- contextguardrail-0.1.0/contextguardrail/cli.py +154 -0
- contextguardrail-0.1.0/contextguardrail/config.py +70 -0
- contextguardrail-0.1.0/contextguardrail/exporter.py +35 -0
- contextguardrail-0.1.0/contextguardrail/graph.py +108 -0
- contextguardrail-0.1.0/contextguardrail/scanner.py +80 -0
- contextguardrail-0.1.0/contextguardrail/selector.py +82 -0
- contextguardrail-0.1.0/contextguardrail/stats.py +31 -0
- contextguardrail-0.1.0/contextguardrail/storage.py +94 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/PKG-INFO +72 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/SOURCES.txt +20 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/dependency_links.txt +1 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/entry_points.txt +2 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/requires.txt +4 -0
- contextguardrail-0.1.0/contextguardrail.egg-info/top_level.txt +1 -0
- contextguardrail-0.1.0/pyproject.toml +22 -0
- contextguardrail-0.1.0/setup.cfg +4 -0
- contextguardrail-0.1.0/tests/test_budget.py +5 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contextguardrail
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first token firewall for AI coding agents
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: typer>=0.12
|
|
8
|
+
Requires-Dist: rich>=13.0
|
|
9
|
+
Requires-Dist: networkx>=3.0
|
|
10
|
+
Requires-Dist: tiktoken>=0.7
|
|
11
|
+
|
|
12
|
+
# ContextGuardrail
|
|
13
|
+
|
|
14
|
+
ContextGuardrail is a local-first MVP for reducing AI coding-agent context. It scans a repo, builds a lightweight code graph, selects relevant files for a prompt, prevents replaying already-sent files, caches repeated asks, and reports estimated token/cost savings.
|
|
15
|
+
|
|
16
|
+
## Install locally
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
cd /Users/homesachin/Desktop/zoneone/contextguardrail
|
|
20
|
+
python -m venv .venv
|
|
21
|
+
source .venv/bin/activate
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Use
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
contextguardrail init
|
|
29
|
+
contextguardrail index /path/to/repo
|
|
30
|
+
contextguardrail ask "Where is authentication handled?"
|
|
31
|
+
contextguardrail stats
|
|
32
|
+
contextguardrail export
|
|
33
|
+
contextguardrail clean
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
All state is stored in the indexed repo under `.contextguardrail/`.
|
|
37
|
+
|
|
38
|
+
## MVP Features
|
|
39
|
+
|
|
40
|
+
- Repo scanner with incremental hashing
|
|
41
|
+
- Python AST parser for imports, classes, functions, and summaries
|
|
42
|
+
- Lightweight dependency graph
|
|
43
|
+
- Context selector using prompt keywords and graph metadata
|
|
44
|
+
- Token counting with `tiktoken` when available, word-count fallback otherwise
|
|
45
|
+
- Semantic cache for repeated prompt and selected-file sets
|
|
46
|
+
- Replay prevention so already-sent files are skipped unless changed
|
|
47
|
+
- Context diffing via file hashes
|
|
48
|
+
- Cost observability through `contextguardrail stats`
|
|
49
|
+
|
|
50
|
+
This version intentionally skips dashboards, multi-user support, Neo4j, and agent orchestration.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
## Project Layout
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
contextguardrail/
|
|
57
|
+
├── pyproject.toml # Package metadata, dependencies, CLI entrypoints
|
|
58
|
+
├── README.md # Project documentation and usage guide
|
|
59
|
+
├── contextguardrail/
|
|
60
|
+
│ ├── scanner.py # Scan repo and detect files, hashes, changes
|
|
61
|
+
│ ├── config.py # Global settings and configuration loading
|
|
62
|
+
│ ├── budget.py # Token estimation and budget enforcement
|
|
63
|
+
│ ├── exporter.py # Export graph, summaries, and reports
|
|
64
|
+
│ ├── graph.py # Build dependency graph from source code
|
|
65
|
+
│ ├── selector.py # Select most relevant context for a prompt
|
|
66
|
+
│ ├── cache.py # Semantic cache and replay prevention
|
|
67
|
+
│ ├── cli.py # Main CLI commands exposed to users
|
|
68
|
+
│ ├── stats.py # Usage metrics and cost-saving reports
|
|
69
|
+
│ └── storage.py # SQLite helpers and persistence layer
|
|
70
|
+
└── tests/
|
|
71
|
+
└── test_budget.py # Unit tests for token budgeting logic
|
|
72
|
+
```
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# ContextGuardrail
|
|
2
|
+
|
|
3
|
+
ContextGuardrail is a local-first MVP for reducing AI coding-agent context. It scans a repo, builds a lightweight code graph, selects relevant files for a prompt, prevents replaying already-sent files, caches repeated asks, and reports estimated token/cost savings.
|
|
4
|
+
|
|
5
|
+
## Install locally
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
cd /Users/homesachin/Desktop/zoneone/contextguardrail
|
|
9
|
+
python -m venv .venv
|
|
10
|
+
source .venv/bin/activate
|
|
11
|
+
pip install -e .
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Use
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
contextguardrail init
|
|
18
|
+
contextguardrail index /path/to/repo
|
|
19
|
+
contextguardrail ask "Where is authentication handled?"
|
|
20
|
+
contextguardrail stats
|
|
21
|
+
contextguardrail export
|
|
22
|
+
contextguardrail clean
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
All state is stored in the indexed repo under `.contextguardrail/`.
|
|
26
|
+
|
|
27
|
+
## MVP Features
|
|
28
|
+
|
|
29
|
+
- Repo scanner with incremental hashing
|
|
30
|
+
- Python AST parser for imports, classes, functions, and summaries
|
|
31
|
+
- Lightweight dependency graph
|
|
32
|
+
- Context selector using prompt keywords and graph metadata
|
|
33
|
+
- Token counting with `tiktoken` when available, word-count fallback otherwise
|
|
34
|
+
- Semantic cache for repeated prompt and selected-file sets
|
|
35
|
+
- Replay prevention so already-sent files are skipped unless changed
|
|
36
|
+
- Context diffing via file hashes
|
|
37
|
+
- Cost observability through `contextguardrail stats`
|
|
38
|
+
|
|
39
|
+
This version intentionally skips dashboards, multi-user support, Neo4j, and agent orchestration.
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## Project Layout
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
contextguardrail/
|
|
46
|
+
├── pyproject.toml # Package metadata, dependencies, CLI entrypoints
|
|
47
|
+
├── README.md # Project documentation and usage guide
|
|
48
|
+
├── contextguardrail/
|
|
49
|
+
│ ├── scanner.py # Scan repo and detect files, hashes, changes
|
|
50
|
+
│ ├── config.py # Global settings and configuration loading
|
|
51
|
+
│ ├── budget.py # Token estimation and budget enforcement
|
|
52
|
+
│ ├── exporter.py # Export graph, summaries, and reports
|
|
53
|
+
│ ├── graph.py # Build dependency graph from source code
|
|
54
|
+
│ ├── selector.py # Select most relevant context for a prompt
|
|
55
|
+
│ ├── cache.py # Semantic cache and replay prevention
|
|
56
|
+
│ ├── cli.py # Main CLI commands exposed to users
|
|
57
|
+
│ ├── stats.py # Usage metrics and cost-saving reports
|
|
58
|
+
│ └── storage.py # SQLite helpers and persistence layer
|
|
59
|
+
└── tests/
|
|
60
|
+
└── test_budget.py # Unit tests for token budgeting logic
|
|
61
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def estimate_tokens(text: str, model: str = "gpt-4o-mini") -> int:
|
|
5
|
+
try:
|
|
6
|
+
import tiktoken
|
|
7
|
+
|
|
8
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
9
|
+
return len(encoding.encode(text))
|
|
10
|
+
except Exception:
|
|
11
|
+
return int(len(text.split()) * 1.3)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def cost_usd(input_tokens: int, output_tokens: int = 0) -> float:
|
|
15
|
+
input_cost_per_million = 0.15
|
|
16
|
+
output_cost_per_million = 0.60
|
|
17
|
+
return (input_tokens / 1_000_000 * input_cost_per_million) + (
|
|
18
|
+
output_tokens / 1_000_000 * output_cost_per_million
|
|
19
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from contextguardrail.storage import connect
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def cache_key(prompt: str, selected_hash: str, model: str) -> str:
|
|
11
|
+
return hashlib.sha256(f"{prompt}\n{selected_hash}\n{model}".encode()).hexdigest()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def selected_files_hash(files: list[dict]) -> str:
|
|
15
|
+
payload = "|".join(f"{item['path']}:{item['hash']}" for item in files)
|
|
16
|
+
return hashlib.sha256(payload.encode()).hexdigest()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_cache(repo: str | Path, key: str) -> str | None:
|
|
20
|
+
with connect(repo, "cache.db") as db:
|
|
21
|
+
row = db.execute("SELECT response FROM cache WHERE key = ?", (key,)).fetchone()
|
|
22
|
+
return row["response"] if row else None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def set_cache(repo: str | Path, key: str, response: str) -> None:
|
|
26
|
+
with connect(repo, "cache.db") as db:
|
|
27
|
+
db.execute(
|
|
28
|
+
"INSERT OR REPLACE INTO cache(key, response) VALUES (?, ?)",
|
|
29
|
+
(key, response),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def prompt_hash(prompt: str) -> str:
|
|
34
|
+
return hashlib.sha256(prompt.strip().lower().encode()).hexdigest()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def already_sent(repo: str | Path, prompt: str) -> dict[str, str]:
|
|
38
|
+
with connect(repo, "cache.db") as db:
|
|
39
|
+
row = db.execute("SELECT file_hashes FROM replay WHERE prompt_hash = ?", (prompt_hash(prompt),)).fetchone()
|
|
40
|
+
return json.loads(row["file_hashes"]) if row else {}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def remember_sent(repo: str | Path, prompt: str, files: list[dict]) -> None:
|
|
44
|
+
file_hashes = {item["path"]: item["hash"] for item in files}
|
|
45
|
+
with connect(repo, "cache.db") as db:
|
|
46
|
+
db.execute(
|
|
47
|
+
"INSERT OR REPLACE INTO replay(prompt_hash, files, file_hashes) VALUES (?, ?, ?)",
|
|
48
|
+
(prompt_hash(prompt), json.dumps(list(file_hashes)), json.dumps(file_hashes)),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def clean_cache(repo: str | Path) -> int:
|
|
53
|
+
with connect(repo, "cache.db") as db:
|
|
54
|
+
count = db.execute("SELECT COUNT(*) AS count FROM cache").fetchone()["count"]
|
|
55
|
+
db.execute("DELETE FROM cache")
|
|
56
|
+
db.execute("DELETE FROM replay")
|
|
57
|
+
return int(count)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from contextguardrail.budget import cost_usd
|
|
10
|
+
from contextguardrail.cache import (
|
|
11
|
+
already_sent,
|
|
12
|
+
cache_key,
|
|
13
|
+
clean_cache,
|
|
14
|
+
get_cache,
|
|
15
|
+
remember_sent,
|
|
16
|
+
selected_files_hash,
|
|
17
|
+
set_cache,
|
|
18
|
+
)
|
|
19
|
+
from contextguardrail.config import DEFAULT_BUDGET, DEFAULT_MODEL, ensure_state, repo_root
|
|
20
|
+
from contextguardrail.exporter import export_repo
|
|
21
|
+
from contextguardrail.graph import graph_counts
|
|
22
|
+
from contextguardrail.scanner import index_repo
|
|
23
|
+
from contextguardrail.selector import select_context
|
|
24
|
+
from contextguardrail.stats import record_request, show_stats
|
|
25
|
+
from contextguardrail.storage import init_storage
|
|
26
|
+
|
|
27
|
+
app = typer.Typer(help="Local-first token firewall for AI coding agents.")
|
|
28
|
+
console = Console()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.command()
|
|
32
|
+
def init(path: str = typer.Argument(".")):
|
|
33
|
+
"""Create .contextguardrail storage for a repo."""
|
|
34
|
+
root = repo_root(path)
|
|
35
|
+
init_storage(root)
|
|
36
|
+
console.print(f"[green]Initialized[/green] {ensure_state(root)}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@app.command()
|
|
40
|
+
def index(
|
|
41
|
+
path: str = typer.Argument("."),
|
|
42
|
+
incremental: bool = typer.Option(False, "--incremental", "-i"),
|
|
43
|
+
):
|
|
44
|
+
"""Scan a repo and build the local code graph."""
|
|
45
|
+
root = repo_root(path)
|
|
46
|
+
result = index_repo(root, incremental=incremental)
|
|
47
|
+
functions, classes = graph_counts(root)
|
|
48
|
+
console.print(f"Files scanned: [bold]{result['files_scanned']}[/bold]")
|
|
49
|
+
if incremental:
|
|
50
|
+
console.print(f"Files skipped: [bold]{result['files_skipped']}[/bold]")
|
|
51
|
+
console.print(f"Functions: [bold]{functions}[/bold]")
|
|
52
|
+
console.print(f"Classes: [bold]{classes}[/bold]")
|
|
53
|
+
console.print("[green]Graph built.[/green]")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command()
|
|
57
|
+
def ask(
|
|
58
|
+
prompt: str,
|
|
59
|
+
path: str = typer.Option(".", "--path", "-p"),
|
|
60
|
+
model: str = typer.Option(DEFAULT_MODEL, "--model", "-m"),
|
|
61
|
+
budget: int = typer.Option(DEFAULT_BUDGET, "--budget", "-b"),
|
|
62
|
+
include_replay: bool = typer.Option(False, "--include-replay"),
|
|
63
|
+
):
|
|
64
|
+
"""Select optimized context for a coding prompt."""
|
|
65
|
+
root = repo_root(path)
|
|
66
|
+
base_selected, raw_tokens = select_context(root, prompt, budget=budget)
|
|
67
|
+
base_optimized = estimated_optimized_tokens(base_selected)
|
|
68
|
+
key = cache_key(prompt, selected_files_hash(base_selected), model)
|
|
69
|
+
cached = get_cache(root, key)
|
|
70
|
+
if cached is not None:
|
|
71
|
+
console.print(cached.replace("Cache: miss", "Cache: hit"))
|
|
72
|
+
record_request(root, raw_tokens, 0, cache_hit=True)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
replay = {} if include_replay else already_sent(root, prompt)
|
|
76
|
+
selected, raw_tokens = select_context(root, prompt, budget=budget, exclude_unchanged=replay)
|
|
77
|
+
optimized_tokens = sum(max(30, min(item["tokens"], int(item["tokens"] * 0.25))) for item in selected)
|
|
78
|
+
|
|
79
|
+
lines = ["Files selected:", ""]
|
|
80
|
+
if selected:
|
|
81
|
+
lines.extend(item["path"] for item in selected)
|
|
82
|
+
else:
|
|
83
|
+
lines.append("No changed files since this prompt was last sent.")
|
|
84
|
+
lines.extend(
|
|
85
|
+
[
|
|
86
|
+
"",
|
|
87
|
+
f"Raw Tokens: {raw_tokens:,}",
|
|
88
|
+
f"Optimized Tokens: {optimized_tokens:,}",
|
|
89
|
+
f"Savings: {savings_percent(raw_tokens, optimized_tokens):.1f}%",
|
|
90
|
+
f"Estimated Cost Saved: ${cost_usd(max(0, raw_tokens - optimized_tokens)):.4f}",
|
|
91
|
+
]
|
|
92
|
+
)
|
|
93
|
+
lines.append("Cache: miss")
|
|
94
|
+
if base_optimized == optimized_tokens or not replay:
|
|
95
|
+
set_cache(root, key, "\n".join(lines))
|
|
96
|
+
remember_sent(root, prompt, selected)
|
|
97
|
+
record_request(root, raw_tokens, optimized_tokens, cache_hit=False)
|
|
98
|
+
console.print("\n".join(lines))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@app.command()
|
|
102
|
+
def analyze(
|
|
103
|
+
prompt: str,
|
|
104
|
+
path: str = typer.Option(".", "--path", "-p"),
|
|
105
|
+
budget: int = typer.Option(DEFAULT_BUDGET, "--budget", "-b"),
|
|
106
|
+
):
|
|
107
|
+
"""Killer-demo alias for ask with cost framing."""
|
|
108
|
+
ask(prompt=prompt, path=path, budget=budget)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.command()
|
|
112
|
+
def stats(path: str = typer.Argument(".")):
|
|
113
|
+
"""Show token and cache savings."""
|
|
114
|
+
data = show_stats(repo_root(path))
|
|
115
|
+
table = Table(title="ContextGuardrail Stats")
|
|
116
|
+
table.add_column("Metric")
|
|
117
|
+
table.add_column("Value", justify="right")
|
|
118
|
+
table.add_row("Requests", f"{data['requests']:,}")
|
|
119
|
+
table.add_row("Input tokens saved", f"{data['input_tokens_saved']:,}")
|
|
120
|
+
table.add_row("Output tokens saved", f"{data['output_tokens_saved']:,}")
|
|
121
|
+
table.add_row("Cache hits", f"{data['cache_hits']:,}")
|
|
122
|
+
table.add_row("Raw tokens observed", f"{data['raw_tokens']:,}")
|
|
123
|
+
table.add_row("Optimized tokens sent", f"{data['optimized_tokens']:,}")
|
|
124
|
+
table.add_row("Estimated cost saved", f"${data['estimated_cost_saved']:.4f}")
|
|
125
|
+
console.print(table)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@app.command("clean")
|
|
129
|
+
def clean(path: str = typer.Argument(".")):
|
|
130
|
+
"""Clean semantic cache and replay memory."""
|
|
131
|
+
count = clean_cache(repo_root(path))
|
|
132
|
+
console.print(f"[green]Cleaned[/green] {count} cached responses")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@app.command("export")
|
|
136
|
+
def export_command(path: str = typer.Argument(".")):
|
|
137
|
+
"""Export repo-brain.json, code-dna.json, and ai-gossip.md."""
|
|
138
|
+
files = export_repo(repo_root(path))
|
|
139
|
+
for file in files:
|
|
140
|
+
console.print(file)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def savings_percent(raw_tokens: int, optimized_tokens: int) -> float:
|
|
144
|
+
if raw_tokens <= 0:
|
|
145
|
+
return 0.0
|
|
146
|
+
return max(0.0, (raw_tokens - optimized_tokens) / raw_tokens * 100)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def estimated_optimized_tokens(files: list[dict]) -> int:
|
|
150
|
+
return sum(max(30, min(item["tokens"], int(item["tokens"] * 0.25))) for item in files)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
app()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
STATE_DIR = ".contextguardrail"
|
|
7
|
+
DEFAULT_MODEL = "gpt-4o-mini"
|
|
8
|
+
DEFAULT_BUDGET = 8_000
|
|
9
|
+
|
|
10
|
+
SKIP_DIRS = {
|
|
11
|
+
".git",
|
|
12
|
+
".hg",
|
|
13
|
+
".svn",
|
|
14
|
+
".venv",
|
|
15
|
+
"venv",
|
|
16
|
+
"env",
|
|
17
|
+
"__pycache__",
|
|
18
|
+
"node_modules",
|
|
19
|
+
"dist",
|
|
20
|
+
"build",
|
|
21
|
+
".contextguardrail",
|
|
22
|
+
".pytest_cache",
|
|
23
|
+
".mypy_cache",
|
|
24
|
+
".ruff_cache",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
CODE_EXTENSIONS = {
|
|
28
|
+
".py",
|
|
29
|
+
".js",
|
|
30
|
+
".jsx",
|
|
31
|
+
".ts",
|
|
32
|
+
".tsx",
|
|
33
|
+
".go",
|
|
34
|
+
".java",
|
|
35
|
+
".rs",
|
|
36
|
+
".rb",
|
|
37
|
+
".php",
|
|
38
|
+
".c",
|
|
39
|
+
".cc",
|
|
40
|
+
".cpp",
|
|
41
|
+
".h",
|
|
42
|
+
".hpp",
|
|
43
|
+
".cs",
|
|
44
|
+
".swift",
|
|
45
|
+
".kt",
|
|
46
|
+
".kts",
|
|
47
|
+
".scala",
|
|
48
|
+
".sh",
|
|
49
|
+
".sql",
|
|
50
|
+
".yaml",
|
|
51
|
+
".yml",
|
|
52
|
+
".json",
|
|
53
|
+
".toml",
|
|
54
|
+
".md",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def repo_root(path: str | Path = ".") -> Path:
|
|
59
|
+
return Path(path).expanduser().resolve()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def state_dir(path: str | Path = ".") -> Path:
|
|
63
|
+
return repo_root(path) / STATE_DIR
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def ensure_state(path: str | Path = ".") -> Path:
|
|
67
|
+
root = state_dir(path)
|
|
68
|
+
(root / "cache").mkdir(parents=True, exist_ok=True)
|
|
69
|
+
(root / "summaries").mkdir(parents=True, exist_ok=True)
|
|
70
|
+
return root
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from contextguardrail.config import ensure_state
|
|
7
|
+
from contextguardrail.graph import load_graph
|
|
8
|
+
from contextguardrail.storage import connect, load_stats
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def export_repo(repo: str | Path = ".") -> list[Path]:
|
|
12
|
+
state = ensure_state(repo)
|
|
13
|
+
with connect(repo, "hashes.db") as files_db, connect(repo, "graph.db") as graph_db:
|
|
14
|
+
files = [dict(row) for row in files_db.execute("SELECT * FROM files ORDER BY path").fetchall()]
|
|
15
|
+
symbols = [dict(row) for row in graph_db.execute("SELECT * FROM symbols ORDER BY path").fetchall()]
|
|
16
|
+
graph = load_graph(repo)
|
|
17
|
+
brain = {
|
|
18
|
+
"files": files,
|
|
19
|
+
"symbols": symbols,
|
|
20
|
+
"edges": [{"source": a, "target": b, **data} for a, b, data in graph.edges(data=True)],
|
|
21
|
+
"stats": load_stats(repo),
|
|
22
|
+
}
|
|
23
|
+
repo_brain = state / "repo-brain.json"
|
|
24
|
+
code_dna = state / "code-dna.json"
|
|
25
|
+
ai_gossip = state / "ai-gossip.md"
|
|
26
|
+
repo_brain.write_text(json.dumps(brain, indent=2) + "\n", encoding="utf-8")
|
|
27
|
+
code_dna.write_text(
|
|
28
|
+
json.dumps({"files": len(files), "symbols": len(symbols), "edges": graph.number_of_edges()}, indent=2) + "\n",
|
|
29
|
+
encoding="utf-8",
|
|
30
|
+
)
|
|
31
|
+
lines = ["# AI Gossip", "", "Most connected files:"]
|
|
32
|
+
for node, degree in sorted(graph.degree, key=lambda item: item[1], reverse=True)[:20]:
|
|
33
|
+
lines.append(f"- {node}: {degree} links")
|
|
34
|
+
ai_gossip.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
35
|
+
return [repo_brain, code_dna, ai_gossip]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import networkx as nx
|
|
9
|
+
|
|
10
|
+
from contextguardrail.storage import connect
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
WORD_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]+")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_python(text: str) -> dict[str, list[str]]:
|
|
17
|
+
tree = ast.parse(text)
|
|
18
|
+
imports: list[str] = []
|
|
19
|
+
classes: list[str] = []
|
|
20
|
+
functions: list[str] = []
|
|
21
|
+
for node in ast.walk(tree):
|
|
22
|
+
if isinstance(node, ast.Import):
|
|
23
|
+
imports.extend(alias.name for alias in node.names)
|
|
24
|
+
elif isinstance(node, ast.ImportFrom):
|
|
25
|
+
if node.module:
|
|
26
|
+
imports.append(node.module)
|
|
27
|
+
elif isinstance(node, ast.ClassDef):
|
|
28
|
+
classes.append(node.name)
|
|
29
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
30
|
+
functions.append(node.name)
|
|
31
|
+
return {
|
|
32
|
+
"imports": sorted(set(imports)),
|
|
33
|
+
"classes": sorted(set(classes)),
|
|
34
|
+
"functions": sorted(set(functions)),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def parse_file(path: Path, rel_path: str, text: str) -> dict[str, Any]:
|
|
39
|
+
data: dict[str, Any] = {"imports": [], "classes": [], "functions": []}
|
|
40
|
+
if path.suffix == ".py":
|
|
41
|
+
try:
|
|
42
|
+
data = parse_python(text)
|
|
43
|
+
except SyntaxError:
|
|
44
|
+
pass
|
|
45
|
+
words = WORD_RE.findall(rel_path + " " + text[:8_000])
|
|
46
|
+
symbols = data["imports"] + data["classes"] + data["functions"]
|
|
47
|
+
data["keywords"] = sorted(set(w.lower() for w in words + symbols if len(w) > 2))
|
|
48
|
+
return data
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def summarize_file(rel_path: str, text: str, parsed: dict[str, Any]) -> str:
|
|
52
|
+
lines = [f"File: {rel_path}"]
|
|
53
|
+
if parsed["functions"]:
|
|
54
|
+
lines.append("Functions: " + ", ".join(parsed["functions"][:30]))
|
|
55
|
+
if parsed["classes"]:
|
|
56
|
+
lines.append("Classes: " + ", ".join(parsed["classes"][:30]))
|
|
57
|
+
if parsed["imports"]:
|
|
58
|
+
lines.append("Imports: " + ", ".join(parsed["imports"][:20]))
|
|
59
|
+
doc = next((line.strip("# ").strip() for line in text.splitlines() if line.strip()), "")
|
|
60
|
+
if doc:
|
|
61
|
+
lines.append("First line: " + doc[:240])
|
|
62
|
+
return "\n".join(lines)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def upsert_symbols(repo: str | Path, rel_path: str, parsed: dict[str, Any]) -> None:
|
|
66
|
+
with connect(repo, "graph.db") as db:
|
|
67
|
+
db.execute(
|
|
68
|
+
"""
|
|
69
|
+
INSERT INTO symbols(path, imports, classes, functions, keywords)
|
|
70
|
+
VALUES (?, ?, ?, ?, ?)
|
|
71
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
72
|
+
imports=excluded.imports,
|
|
73
|
+
classes=excluded.classes,
|
|
74
|
+
functions=excluded.functions,
|
|
75
|
+
keywords=excluded.keywords
|
|
76
|
+
""",
|
|
77
|
+
(
|
|
78
|
+
rel_path,
|
|
79
|
+
"\n".join(parsed["imports"]),
|
|
80
|
+
"\n".join(parsed["classes"]),
|
|
81
|
+
"\n".join(parsed["functions"]),
|
|
82
|
+
"\n".join(parsed["keywords"]),
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_graph(repo: str | Path) -> nx.DiGraph:
|
|
88
|
+
graph = nx.DiGraph()
|
|
89
|
+
with connect(repo, "graph.db") as db:
|
|
90
|
+
rows = db.execute("SELECT path, imports, classes, functions FROM symbols").fetchall()
|
|
91
|
+
paths = {row["path"] for row in rows}
|
|
92
|
+
module_to_path = {Path(path).with_suffix("").as_posix().replace("/", "."): path for path in paths}
|
|
93
|
+
for row in rows:
|
|
94
|
+
path = row["path"]
|
|
95
|
+
graph.add_node(path, classes=row["classes"].splitlines(), functions=row["functions"].splitlines())
|
|
96
|
+
for imported in row["imports"].splitlines():
|
|
97
|
+
target = module_to_path.get(imported)
|
|
98
|
+
if target:
|
|
99
|
+
graph.add_edge(path, target, type="imports")
|
|
100
|
+
return graph
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def graph_counts(repo: str | Path) -> tuple[int, int]:
|
|
104
|
+
with connect(repo, "graph.db") as db:
|
|
105
|
+
rows = db.execute("SELECT classes, functions FROM symbols").fetchall()
|
|
106
|
+
classes = sum(len(row["classes"].splitlines()) for row in rows if row["classes"])
|
|
107
|
+
functions = sum(len(row["functions"].splitlines()) for row in rows if row["functions"])
|
|
108
|
+
return functions, classes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from contextguardrail.budget import estimate_tokens
|
|
7
|
+
from contextguardrail.config import CODE_EXTENSIONS, SKIP_DIRS, ensure_state, repo_root
|
|
8
|
+
from contextguardrail.graph import parse_file, summarize_file, upsert_symbols
|
|
9
|
+
from contextguardrail.storage import connect, init_storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def file_hash(path: Path) -> str:
|
|
13
|
+
return hashlib.sha256(path.read_bytes()).hexdigest()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def should_scan(path: Path, rel_path: Path) -> bool:
|
|
17
|
+
if any(part in SKIP_DIRS for part in rel_path.parts):
|
|
18
|
+
return False
|
|
19
|
+
return path.is_file() and path.suffix.lower() in CODE_EXTENSIONS
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def iter_files(root: Path):
|
|
23
|
+
for path in root.rglob("*"):
|
|
24
|
+
if should_scan(path, path.relative_to(root)):
|
|
25
|
+
yield path
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def read_text(path: Path) -> str:
|
|
29
|
+
return path.read_text(encoding="utf-8", errors="ignore")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def index_repo(path: str | Path = ".", incremental: bool = False) -> dict[str, int]:
|
|
33
|
+
root = repo_root(path)
|
|
34
|
+
state = ensure_state(root)
|
|
35
|
+
init_storage(root)
|
|
36
|
+
|
|
37
|
+
scanned = changed = skipped = raw_tokens = 0
|
|
38
|
+
for file_path in iter_files(root):
|
|
39
|
+
rel_path = file_path.relative_to(root).as_posix()
|
|
40
|
+
digest = file_hash(file_path)
|
|
41
|
+
stat = file_path.stat()
|
|
42
|
+
with connect(root, "hashes.db") as db:
|
|
43
|
+
existing = db.execute("SELECT hash FROM files WHERE path = ?", (rel_path,)).fetchone()
|
|
44
|
+
if incremental and existing and existing["hash"] == digest:
|
|
45
|
+
skipped += 1
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
text = read_text(file_path)
|
|
49
|
+
tokens = estimate_tokens(text)
|
|
50
|
+
parsed = parse_file(file_path, rel_path, text)
|
|
51
|
+
summary = summarize_file(rel_path, text, parsed)
|
|
52
|
+
summary_path = state / "summaries" / f"{hashlib.sha256(rel_path.encode()).hexdigest()}.md"
|
|
53
|
+
summary_path.write_text(summary + "\n", encoding="utf-8")
|
|
54
|
+
|
|
55
|
+
with connect(root, "hashes.db") as db:
|
|
56
|
+
db.execute(
|
|
57
|
+
"""
|
|
58
|
+
INSERT INTO files(path, hash, size, mtime, tokens, summary)
|
|
59
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
60
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
61
|
+
hash=excluded.hash,
|
|
62
|
+
size=excluded.size,
|
|
63
|
+
mtime=excluded.mtime,
|
|
64
|
+
tokens=excluded.tokens,
|
|
65
|
+
summary=excluded.summary,
|
|
66
|
+
updated_at=CURRENT_TIMESTAMP
|
|
67
|
+
""",
|
|
68
|
+
(rel_path, digest, stat.st_size, stat.st_mtime, tokens, summary),
|
|
69
|
+
)
|
|
70
|
+
upsert_symbols(root, rel_path, parsed)
|
|
71
|
+
scanned += 1
|
|
72
|
+
changed += 1
|
|
73
|
+
raw_tokens += tokens
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
"files_scanned": scanned,
|
|
77
|
+
"files_changed": changed,
|
|
78
|
+
"files_skipped": skipped,
|
|
79
|
+
"raw_tokens": raw_tokens,
|
|
80
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from contextguardrail.config import DEFAULT_BUDGET
|
|
7
|
+
from contextguardrail.storage import connect
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
WORD_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]+")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def prompt_terms(prompt: str) -> set[str]:
|
|
14
|
+
return {word.lower() for word in WORD_RE.findall(prompt) if len(word) > 2}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def score_row(row, terms: set[str]) -> int:
|
|
18
|
+
haystack = "\n".join(
|
|
19
|
+
[row["path"], row["summary"], row["keywords"], row["classes"], row["functions"]]
|
|
20
|
+
).lower()
|
|
21
|
+
score = sum(5 for term in terms if term in row["path"].lower())
|
|
22
|
+
score += sum(3 for term in terms if term in row["classes"].lower() or term in row["functions"].lower())
|
|
23
|
+
score += sum(1 for term in terms if term in haystack)
|
|
24
|
+
if any(term in row["path"].lower() for term in ("auth", "user", "api", "cache", "config", "setting")):
|
|
25
|
+
score += 1
|
|
26
|
+
return score
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def select_context(
|
|
30
|
+
repo: str | Path,
|
|
31
|
+
prompt: str,
|
|
32
|
+
budget: int = DEFAULT_BUDGET,
|
|
33
|
+
exclude_unchanged: dict[str, str] | None = None,
|
|
34
|
+
) -> tuple[list[dict], int]:
|
|
35
|
+
terms = prompt_terms(prompt)
|
|
36
|
+
exclude_unchanged = exclude_unchanged or {}
|
|
37
|
+
with connect(repo, "hashes.db") as files_db, connect(repo, "graph.db") as graph_db:
|
|
38
|
+
rows = files_db.execute(
|
|
39
|
+
"SELECT path, hash, tokens, summary FROM files ORDER BY path"
|
|
40
|
+
).fetchall()
|
|
41
|
+
symbols = {
|
|
42
|
+
row["path"]: row
|
|
43
|
+
for row in graph_db.execute(
|
|
44
|
+
"SELECT path, classes, functions, keywords FROM symbols"
|
|
45
|
+
).fetchall()
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
candidates = []
|
|
49
|
+
raw_tokens = 0
|
|
50
|
+
for row in rows:
|
|
51
|
+
raw_tokens += int(row["tokens"])
|
|
52
|
+
if exclude_unchanged.get(row["path"]) == row["hash"]:
|
|
53
|
+
continue
|
|
54
|
+
symbol = symbols.get(row["path"])
|
|
55
|
+
merged = {
|
|
56
|
+
"path": row["path"],
|
|
57
|
+
"hash": row["hash"],
|
|
58
|
+
"tokens": int(row["tokens"]),
|
|
59
|
+
"summary": row["summary"],
|
|
60
|
+
"classes": symbol["classes"] if symbol else "",
|
|
61
|
+
"functions": symbol["functions"] if symbol else "",
|
|
62
|
+
"keywords": symbol["keywords"] if symbol else "",
|
|
63
|
+
}
|
|
64
|
+
score = score_row(merged, terms)
|
|
65
|
+
if score > 0:
|
|
66
|
+
candidates.append((score, merged))
|
|
67
|
+
|
|
68
|
+
candidates.sort(key=lambda item: (-item[0], item[1]["tokens"], item[1]["path"]))
|
|
69
|
+
selected = []
|
|
70
|
+
used = 0
|
|
71
|
+
for _, item in candidates:
|
|
72
|
+
summary_tokens = max(30, min(item["tokens"], int(item["tokens"] * 0.25)))
|
|
73
|
+
if selected and used + summary_tokens > budget:
|
|
74
|
+
continue
|
|
75
|
+
selected.append(item)
|
|
76
|
+
used += summary_tokens
|
|
77
|
+
if used >= budget:
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
if not selected:
|
|
81
|
+
selected = [item for _, item in candidates[:5]]
|
|
82
|
+
return selected, raw_tokens
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from contextguardrail.budget import cost_usd
|
|
6
|
+
from contextguardrail.storage import load_stats, save_stats
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def record_request(
|
|
10
|
+
repo: str | Path,
|
|
11
|
+
raw_tokens: int,
|
|
12
|
+
optimized_tokens: int,
|
|
13
|
+
cache_hit: bool = False,
|
|
14
|
+
) -> dict:
|
|
15
|
+
stats = load_stats(repo)
|
|
16
|
+
saved = max(0, raw_tokens - optimized_tokens)
|
|
17
|
+
stats["requests"] += 1
|
|
18
|
+
stats["raw_tokens"] += raw_tokens
|
|
19
|
+
stats["optimized_tokens"] += optimized_tokens
|
|
20
|
+
stats["input_tokens_saved"] += saved
|
|
21
|
+
stats["estimated_cost_saved"] = round(
|
|
22
|
+
stats.get("estimated_cost_saved", 0.0) + cost_usd(saved), 4
|
|
23
|
+
)
|
|
24
|
+
if cache_hit:
|
|
25
|
+
stats["cache_hits"] += 1
|
|
26
|
+
save_stats(repo, stats)
|
|
27
|
+
return stats
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def show_stats(repo: str | Path = ".") -> dict:
|
|
31
|
+
return load_stats(repo)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contextguardrail.config import ensure_state
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def db_path(repo: str | Path, name: str) -> Path:
|
|
12
|
+
return ensure_state(repo) / name
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def connect(repo: str | Path, name: str) -> sqlite3.Connection:
|
|
16
|
+
connection = sqlite3.connect(db_path(repo, name))
|
|
17
|
+
connection.row_factory = sqlite3.Row
|
|
18
|
+
return connection
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def init_storage(repo: str | Path = ".") -> None:
|
|
22
|
+
ensure_state(repo)
|
|
23
|
+
with connect(repo, "hashes.db") as db:
|
|
24
|
+
db.execute(
|
|
25
|
+
"""
|
|
26
|
+
CREATE TABLE IF NOT EXISTS files(
|
|
27
|
+
path TEXT PRIMARY KEY,
|
|
28
|
+
hash TEXT NOT NULL,
|
|
29
|
+
size INTEGER NOT NULL,
|
|
30
|
+
mtime REAL NOT NULL,
|
|
31
|
+
tokens INTEGER NOT NULL,
|
|
32
|
+
summary TEXT NOT NULL,
|
|
33
|
+
updated_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
34
|
+
)
|
|
35
|
+
"""
|
|
36
|
+
)
|
|
37
|
+
with connect(repo, "graph.db") as db:
|
|
38
|
+
db.execute(
|
|
39
|
+
"""
|
|
40
|
+
CREATE TABLE IF NOT EXISTS symbols(
|
|
41
|
+
path TEXT PRIMARY KEY,
|
|
42
|
+
imports TEXT NOT NULL,
|
|
43
|
+
classes TEXT NOT NULL,
|
|
44
|
+
functions TEXT NOT NULL,
|
|
45
|
+
keywords TEXT NOT NULL
|
|
46
|
+
)
|
|
47
|
+
"""
|
|
48
|
+
)
|
|
49
|
+
with connect(repo, "cache.db") as db:
|
|
50
|
+
db.execute(
|
|
51
|
+
"""
|
|
52
|
+
CREATE TABLE IF NOT EXISTS cache(
|
|
53
|
+
key TEXT PRIMARY KEY,
|
|
54
|
+
response TEXT NOT NULL,
|
|
55
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
56
|
+
)
|
|
57
|
+
"""
|
|
58
|
+
)
|
|
59
|
+
db.execute(
|
|
60
|
+
"""
|
|
61
|
+
CREATE TABLE IF NOT EXISTS replay(
|
|
62
|
+
prompt_hash TEXT PRIMARY KEY,
|
|
63
|
+
files TEXT NOT NULL,
|
|
64
|
+
file_hashes TEXT NOT NULL,
|
|
65
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
66
|
+
)
|
|
67
|
+
"""
|
|
68
|
+
)
|
|
69
|
+
stats_file(repo).write_text(
|
|
70
|
+
json.dumps(load_stats(repo), indent=2) + "\n", encoding="utf-8"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def stats_file(repo: str | Path) -> Path:
|
|
75
|
+
return ensure_state(repo) / "costs.json"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def load_stats(repo: str | Path) -> dict[str, Any]:
|
|
79
|
+
path = stats_file(repo)
|
|
80
|
+
if not path.exists():
|
|
81
|
+
return {
|
|
82
|
+
"requests": 0,
|
|
83
|
+
"input_tokens_saved": 0,
|
|
84
|
+
"output_tokens_saved": 0,
|
|
85
|
+
"cache_hits": 0,
|
|
86
|
+
"raw_tokens": 0,
|
|
87
|
+
"optimized_tokens": 0,
|
|
88
|
+
"estimated_cost_saved": 0.0,
|
|
89
|
+
}
|
|
90
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def save_stats(repo: str | Path, stats: dict[str, Any]) -> None:
|
|
94
|
+
stats_file(repo).write_text(json.dumps(stats, indent=2) + "\n", encoding="utf-8")
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contextguardrail
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first token firewall for AI coding agents
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: typer>=0.12
|
|
8
|
+
Requires-Dist: rich>=13.0
|
|
9
|
+
Requires-Dist: networkx>=3.0
|
|
10
|
+
Requires-Dist: tiktoken>=0.7
|
|
11
|
+
|
|
12
|
+
# ContextGuardrail
|
|
13
|
+
|
|
14
|
+
ContextGuardrail is a local-first MVP for reducing AI coding-agent context. It scans a repo, builds a lightweight code graph, selects relevant files for a prompt, prevents replaying already-sent files, caches repeated asks, and reports estimated token/cost savings.
|
|
15
|
+
|
|
16
|
+
## Install locally
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
cd /Users/homesachin/Desktop/zoneone/contextguardrail
|
|
20
|
+
python -m venv .venv
|
|
21
|
+
source .venv/bin/activate
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Use
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
contextguardrail init
|
|
29
|
+
contextguardrail index /path/to/repo
|
|
30
|
+
contextguardrail ask "Where is authentication handled?"
|
|
31
|
+
contextguardrail stats
|
|
32
|
+
contextguardrail export
|
|
33
|
+
contextguardrail clean
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
All state is stored in the indexed repo under `.contextguardrail/`.
|
|
37
|
+
|
|
38
|
+
## MVP Features
|
|
39
|
+
|
|
40
|
+
- Repo scanner with incremental hashing
|
|
41
|
+
- Python AST parser for imports, classes, functions, and summaries
|
|
42
|
+
- Lightweight dependency graph
|
|
43
|
+
- Context selector using prompt keywords and graph metadata
|
|
44
|
+
- Token counting with `tiktoken` when available, word-count fallback otherwise
|
|
45
|
+
- Semantic cache for repeated prompt and selected-file sets
|
|
46
|
+
- Replay prevention so already-sent files are skipped unless changed
|
|
47
|
+
- Context diffing via file hashes
|
|
48
|
+
- Cost observability through `contextguardrail stats`
|
|
49
|
+
|
|
50
|
+
This version intentionally skips dashboards, multi-user support, Neo4j, and agent orchestration.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
## Project Layout
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
contextguardrail/
|
|
57
|
+
├── pyproject.toml # Package metadata, dependencies, CLI entrypoints
|
|
58
|
+
├── README.md # Project documentation and usage guide
|
|
59
|
+
├── contextguardrail/
|
|
60
|
+
│ ├── scanner.py # Scan repo and detect files, hashes, changes
|
|
61
|
+
│ ├── config.py # Global settings and configuration loading
|
|
62
|
+
│ ├── budget.py # Token estimation and budget enforcement
|
|
63
|
+
│ ├── exporter.py # Export graph, summaries, and reports
|
|
64
|
+
│ ├── graph.py # Build dependency graph from source code
|
|
65
|
+
│ ├── selector.py # Select most relevant context for a prompt
|
|
66
|
+
│ ├── cache.py # Semantic cache and replay prevention
|
|
67
|
+
│ ├── cli.py # Main CLI commands exposed to users
|
|
68
|
+
│ ├── stats.py # Usage metrics and cost-saving reports
|
|
69
|
+
│ └── storage.py # SQLite helpers and persistence layer
|
|
70
|
+
└── tests/
|
|
71
|
+
└── test_budget.py # Unit tests for token budgeting logic
|
|
72
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
contextguardrail/__init__.py
|
|
4
|
+
contextguardrail/budget.py
|
|
5
|
+
contextguardrail/cache.py
|
|
6
|
+
contextguardrail/cli.py
|
|
7
|
+
contextguardrail/config.py
|
|
8
|
+
contextguardrail/exporter.py
|
|
9
|
+
contextguardrail/graph.py
|
|
10
|
+
contextguardrail/scanner.py
|
|
11
|
+
contextguardrail/selector.py
|
|
12
|
+
contextguardrail/stats.py
|
|
13
|
+
contextguardrail/storage.py
|
|
14
|
+
contextguardrail.egg-info/PKG-INFO
|
|
15
|
+
contextguardrail.egg-info/SOURCES.txt
|
|
16
|
+
contextguardrail.egg-info/dependency_links.txt
|
|
17
|
+
contextguardrail.egg-info/entry_points.txt
|
|
18
|
+
contextguardrail.egg-info/requires.txt
|
|
19
|
+
contextguardrail.egg-info/top_level.txt
|
|
20
|
+
tests/test_budget.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
contextguardrail
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "contextguardrail"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Local-first token firewall for AI coding agents"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"typer>=0.12",
|
|
13
|
+
"rich>=13.0",
|
|
14
|
+
"networkx>=3.0",
|
|
15
|
+
"tiktoken>=0.7"
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
contextguardrail = "contextguardrail.cli:app"
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
include = ["contextguardrail*"]
|