insightforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- insightforge/__init__.py +7 -0
- insightforge/__main__.py +5 -0
- insightforge/analyzer.py +172 -0
- insightforge/cli.py +266 -0
- insightforge/config.py +92 -0
- insightforge/diffing.py +154 -0
- insightforge/migrations.py +98 -0
- insightforge/models.py +74 -0
- insightforge/policy.py +67 -0
- insightforge/providers.py +170 -0
- insightforge/redaction.py +55 -0
- insightforge/renderer.py +250 -0
- insightforge/store.py +134 -0
- insightforge/updater.py +82 -0
- insightforge-0.1.0.dist-info/METADATA +236 -0
- insightforge-0.1.0.dist-info/RECORD +19 -0
- insightforge-0.1.0.dist-info/WHEEL +5 -0
- insightforge-0.1.0.dist-info/entry_points.txt +2 -0
- insightforge-0.1.0.dist-info/top_level.txt +1 -0
insightforge/__init__.py
ADDED
insightforge/__main__.py
ADDED
insightforge/analyzer.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
|
|
6
|
+
from .models import RiskFlag, TraceNode, TraceRecord
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
HEDGE_PATTERNS = (
|
|
10
|
+
r"\bmaybe\b",
|
|
11
|
+
r"\bprobably\b",
|
|
12
|
+
r"\bI think\b",
|
|
13
|
+
r"\blikely\b",
|
|
14
|
+
r"\bappears to\b",
|
|
15
|
+
r"\bseems\b",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
SOURCE_PATTERNS = (
|
|
19
|
+
r"https?://",
|
|
20
|
+
r"\bsource\b",
|
|
21
|
+
r"\bcitation\b",
|
|
22
|
+
r"\breference\b",
|
|
23
|
+
r"\bdocumentation\b",
|
|
24
|
+
r"\bresearch paper\b",
|
|
25
|
+
r"\bstudy\b",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
BIAS_PATTERNS = (
|
|
29
|
+
r"\balways\b",
|
|
30
|
+
r"\bnever\b",
|
|
31
|
+
r"\bobviously\b",
|
|
32
|
+
r"\beveryone\b",
|
|
33
|
+
r"\bno one\b",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _count_matches(patterns: Sequence[str], text: str) -> int:
|
|
38
|
+
return sum(len(re.findall(pattern, text, flags=re.IGNORECASE)) for pattern in patterns)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _build_summary(prompt: str, stdout: str, stderr: str, risk_count: int) -> str:
|
|
42
|
+
if stderr.strip():
|
|
43
|
+
return "The wrapped command produced stderr output; review execution details before trusting the answer."
|
|
44
|
+
if risk_count:
|
|
45
|
+
return "InsightForge detected language patterns that correlate with weak grounding or overconfident claims."
|
|
46
|
+
if not stdout.strip():
|
|
47
|
+
return "The wrapped command returned no stdout, so the trace is mostly execution metadata."
|
|
48
|
+
if prompt.strip():
|
|
49
|
+
return "The response completed without obvious risk markers, but the trace should still be reviewed for source quality."
|
|
50
|
+
return "Execution completed successfully with a low-risk heuristic profile."
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def build_trace(
|
|
54
|
+
*,
|
|
55
|
+
prompt: str,
|
|
56
|
+
command: Sequence[str],
|
|
57
|
+
model_hint: str,
|
|
58
|
+
provider: str = "unknown",
|
|
59
|
+
system_prompt: str = "",
|
|
60
|
+
stdout: str,
|
|
61
|
+
stderr: str,
|
|
62
|
+
exit_code: int,
|
|
63
|
+
metadata: dict[str, str] | None = None,
|
|
64
|
+
provenance_notes: Sequence[str] | None = None,
|
|
65
|
+
) -> TraceRecord:
|
|
66
|
+
output_blob = "\n".join(part for part in (stdout, stderr) if part)
|
|
67
|
+
hedge_hits = _count_matches(HEDGE_PATTERNS, output_blob)
|
|
68
|
+
source_hits = _count_matches(SOURCE_PATTERNS, output_blob)
|
|
69
|
+
bias_hits = _count_matches(BIAS_PATTERNS, output_blob)
|
|
70
|
+
stderr_penalty = 0.2 if stderr.strip() else 0.0
|
|
71
|
+
empty_penalty = 0.15 if not stdout.strip() else 0.0
|
|
72
|
+
|
|
73
|
+
confidence = 0.72
|
|
74
|
+
confidence -= min(0.24, hedge_hits * 0.04)
|
|
75
|
+
confidence -= min(0.20, bias_hits * 0.05)
|
|
76
|
+
confidence -= stderr_penalty + empty_penalty
|
|
77
|
+
confidence += min(0.18, source_hits * 0.06)
|
|
78
|
+
confidence = max(0.05, min(0.99, round(confidence, 2)))
|
|
79
|
+
|
|
80
|
+
bias_flags: list[RiskFlag] = []
|
|
81
|
+
hallucination_flags: list[RiskFlag] = []
|
|
82
|
+
provenance: list[str] = list(provenance_notes or [])
|
|
83
|
+
|
|
84
|
+
if source_hits:
|
|
85
|
+
provenance.append("Sources or citations were mentioned in the output.")
|
|
86
|
+
elif not provenance:
|
|
87
|
+
provenance.append("No explicit sources or citations were detected.")
|
|
88
|
+
|
|
89
|
+
if bias_hits:
|
|
90
|
+
bias_flags.append(
|
|
91
|
+
RiskFlag(
|
|
92
|
+
code="OVERGENERALIZATION",
|
|
93
|
+
title="Overgeneralized claim pattern",
|
|
94
|
+
severity="medium",
|
|
95
|
+
evidence="The output uses absolute language that can hide edge cases or demographic skew.",
|
|
96
|
+
recommendation="Ask the model to qualify claims, state assumptions, and list known exceptions.",
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if hedge_hits and not source_hits:
|
|
101
|
+
hallucination_flags.append(
|
|
102
|
+
RiskFlag(
|
|
103
|
+
code="UNGROUNDED_HEDGING",
|
|
104
|
+
title="Ungrounded uncertainty",
|
|
105
|
+
severity="high",
|
|
106
|
+
evidence="The output contains hedging language without nearby source signals.",
|
|
107
|
+
recommendation="Request citations, intermediate evidence, or a narrower task boundary.",
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if stderr.strip():
|
|
112
|
+
hallucination_flags.append(
|
|
113
|
+
RiskFlag(
|
|
114
|
+
code="EXECUTION_ANOMALY",
|
|
115
|
+
title="Execution anomaly",
|
|
116
|
+
severity="medium",
|
|
117
|
+
evidence="The wrapped command emitted stderr output, which may indicate tool failure or partial completion.",
|
|
118
|
+
recommendation="Inspect stderr and rerun before relying on the result for audits or downstream actions.",
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
nodes = [
|
|
123
|
+
TraceNode(id="prompt", label="Prompt", kind="input", detail=prompt or "No prompt recorded."),
|
|
124
|
+
TraceNode(
|
|
125
|
+
id="system",
|
|
126
|
+
label="System Prompt",
|
|
127
|
+
kind="input",
|
|
128
|
+
detail=system_prompt or "No system prompt recorded.",
|
|
129
|
+
),
|
|
130
|
+
TraceNode(
|
|
131
|
+
id="execution",
|
|
132
|
+
label="Execution",
|
|
133
|
+
kind="process",
|
|
134
|
+
detail=" ".join(command) if command else "No command recorded.",
|
|
135
|
+
score=1.0 if exit_code == 0 else 0.4,
|
|
136
|
+
),
|
|
137
|
+
TraceNode(
|
|
138
|
+
id="analysis",
|
|
139
|
+
label="Heuristic Analysis",
|
|
140
|
+
kind="analysis",
|
|
141
|
+
detail=f"Hedges={hedge_hits}, source signals={source_hits}, bias markers={bias_hits}",
|
|
142
|
+
score=confidence,
|
|
143
|
+
),
|
|
144
|
+
TraceNode(
|
|
145
|
+
id="output",
|
|
146
|
+
label="Model Output",
|
|
147
|
+
kind="output",
|
|
148
|
+
detail=(stdout or stderr or "No output captured.")[:1200],
|
|
149
|
+
score=confidence,
|
|
150
|
+
),
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
risk_count = len(bias_flags) + len(hallucination_flags)
|
|
154
|
+
summary = _build_summary(prompt, stdout, stderr, risk_count)
|
|
155
|
+
|
|
156
|
+
return TraceRecord(
|
|
157
|
+
model_hint=model_hint or "unknown",
|
|
158
|
+
provider=provider or "unknown",
|
|
159
|
+
prompt=prompt,
|
|
160
|
+
system_prompt=system_prompt,
|
|
161
|
+
command=list(command),
|
|
162
|
+
exit_code=exit_code,
|
|
163
|
+
stdout=stdout,
|
|
164
|
+
stderr=stderr,
|
|
165
|
+
metadata=dict(metadata or {}),
|
|
166
|
+
confidence_score=confidence,
|
|
167
|
+
bias_flags=bias_flags,
|
|
168
|
+
hallucination_flags=hallucination_flags,
|
|
169
|
+
provenance=provenance,
|
|
170
|
+
nodes=nodes,
|
|
171
|
+
summary=summary,
|
|
172
|
+
)
|
insightforge/cli.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import shlex
|
|
5
|
+
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from . import __version__
|
|
9
|
+
from .analyzer import build_trace
|
|
10
|
+
from .config import load_config
|
|
11
|
+
from .diffing import build_diff, render_diff_text, write_diff_html
|
|
12
|
+
from .migrations import CURRENT_DB_SCHEMA_VERSION, get_schema_version, migrate_storage
|
|
13
|
+
from .policy import evaluate_policies
|
|
14
|
+
from .providers import ProviderError, get_provider
|
|
15
|
+
from .redaction import apply_redaction
|
|
16
|
+
from .renderer import write_html, write_json
|
|
17
|
+
from .store import index_trace, load_trace, load_registry
|
|
18
|
+
from .updater import maybe_get_update_message
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
prog="insightforge",
|
|
24
|
+
description="Wrap AI interactions and emit audit-friendly traces.",
|
|
25
|
+
)
|
|
26
|
+
subparsers = parser.add_subparsers(dest="command_name", required=True)
|
|
27
|
+
|
|
28
|
+
wrap = subparsers.add_parser("wrap", help="Run a command and generate JSON + HTML traces.")
|
|
29
|
+
wrap.add_argument("model", help="Model or tool hint, e.g. claude, grok, local-llm")
|
|
30
|
+
wrap.add_argument("prompt", help="The prompt or intent associated with the wrapped run")
|
|
31
|
+
wrap.add_argument(
|
|
32
|
+
"--cmd",
|
|
33
|
+
required=True,
|
|
34
|
+
help="Shell command to execute, quoted as a single string",
|
|
35
|
+
)
|
|
36
|
+
wrap.add_argument(
|
|
37
|
+
"--out",
|
|
38
|
+
default="traces/latest",
|
|
39
|
+
help="Output prefix for generated files, without extension",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
ask = subparsers.add_parser("ask", help="Prompt a supported provider and generate JSON + HTML traces.")
|
|
43
|
+
ask.add_argument("provider", help="Provider name: mock, openai, anthropic")
|
|
44
|
+
ask.add_argument("model", help="Model name understood by the provider")
|
|
45
|
+
ask.add_argument("prompt", help="User prompt to send to the provider")
|
|
46
|
+
ask.add_argument(
|
|
47
|
+
"--system",
|
|
48
|
+
default="",
|
|
49
|
+
help="Optional system prompt or instruction block",
|
|
50
|
+
)
|
|
51
|
+
ask.add_argument(
|
|
52
|
+
"--out",
|
|
53
|
+
default="traces/latest",
|
|
54
|
+
help="Output prefix for generated files, without extension",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
list_cmd = subparsers.add_parser("list", help="List indexed traces from the local registry.")
|
|
58
|
+
list_cmd.add_argument(
|
|
59
|
+
"--limit",
|
|
60
|
+
type=int,
|
|
61
|
+
default=10,
|
|
62
|
+
help="Maximum number of traces to display",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
diff = subparsers.add_parser("diff", help="Compare two saved traces by path or trace id.")
|
|
66
|
+
diff.add_argument("before", help="Older trace path or trace id")
|
|
67
|
+
diff.add_argument("after", help="Newer trace path or trace id")
|
|
68
|
+
diff.add_argument(
|
|
69
|
+
"--out",
|
|
70
|
+
default="traces/diff-latest.html",
|
|
71
|
+
help="HTML destination for the diff report",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
schema = subparsers.add_parser("schema-version", help="Show the current storage schema version.")
|
|
75
|
+
schema.add_argument(
|
|
76
|
+
"--expected",
|
|
77
|
+
action="store_true",
|
|
78
|
+
help="Also print the expected application schema version",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
subparsers.add_parser("migrate", help="Run storage migrations to the latest schema version.")
|
|
82
|
+
|
|
83
|
+
version = subparsers.add_parser("version", help="Show the installed CLI version.")
|
|
84
|
+
version.add_argument(
|
|
85
|
+
"--check-updates",
|
|
86
|
+
action="store_true",
|
|
87
|
+
help="Also check whether a newer package version is available",
|
|
88
|
+
)
|
|
89
|
+
return parser
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _cap_output(value: str, limit: int) -> str:
|
|
93
|
+
if len(value) <= limit:
|
|
94
|
+
return value
|
|
95
|
+
return value[:limit] + "\n[InsightForge truncated persisted output]"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _finalize_trace(trace, app_config):
|
|
99
|
+
trace.policy_results, trace.overall_status = evaluate_policies(trace, app_config.policy)
|
|
100
|
+
return apply_redaction(trace, app_config.redaction)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _write_trace_artifacts(args: argparse.Namespace, trace, app_config) -> None:
|
|
104
|
+
out_prefix = Path(args.out)
|
|
105
|
+
out_prefix.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
json_path = out_prefix.with_suffix(".json")
|
|
107
|
+
html_path = out_prefix.with_suffix(".html")
|
|
108
|
+
write_json(trace, json_path)
|
|
109
|
+
write_html(trace, html_path)
|
|
110
|
+
index_trace(trace, json_path, html_path, app_config.storage)
|
|
111
|
+
|
|
112
|
+
print(f"InsightForge trace captured for {trace.model_hint}")
|
|
113
|
+
print(f"Trace ID: {trace.trace_id}")
|
|
114
|
+
print(f"Provider: {trace.provider}")
|
|
115
|
+
print(f"Status: {trace.overall_status}")
|
|
116
|
+
print(f"Confidence score: {trace.confidence_score:.2f}")
|
|
117
|
+
print(f"JSON: {json_path}")
|
|
118
|
+
print(f"HTML: {html_path}")
|
|
119
|
+
|
|
120
|
+
for flag in trace.bias_flags + trace.hallucination_flags:
|
|
121
|
+
print(f"[{flag.severity}] {flag.title}: {flag.recommendation}")
|
|
122
|
+
for result in trace.policy_results:
|
|
123
|
+
print(f"[policy:{result.status}] {result.policy_id}: {result.message}")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def run_wrap(args: argparse.Namespace) -> int:
|
|
127
|
+
app_config = load_config()
|
|
128
|
+
command = shlex.split(args.cmd)
|
|
129
|
+
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
|
130
|
+
|
|
131
|
+
trace = build_trace(
|
|
132
|
+
prompt=args.prompt,
|
|
133
|
+
command=command,
|
|
134
|
+
model_hint=args.model,
|
|
135
|
+
provider="shell",
|
|
136
|
+
stdout=_cap_output(completed.stdout, app_config.policy.max_output_chars),
|
|
137
|
+
stderr=_cap_output(completed.stderr, app_config.policy.max_output_chars),
|
|
138
|
+
exit_code=completed.returncode,
|
|
139
|
+
metadata={"mode": "wrapped shell command"},
|
|
140
|
+
)
|
|
141
|
+
trace = _finalize_trace(trace, app_config)
|
|
142
|
+
|
|
143
|
+
_write_trace_artifacts(args, trace, app_config)
|
|
144
|
+
|
|
145
|
+
return completed.returncode
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def run_ask(args: argparse.Namespace) -> int:
|
|
149
|
+
app_config = load_config()
|
|
150
|
+
try:
|
|
151
|
+
provider = get_provider(args.provider)
|
|
152
|
+
response = provider.generate(model=args.model, prompt=args.prompt, system_prompt=args.system)
|
|
153
|
+
stdout = response.output_text
|
|
154
|
+
stderr = ""
|
|
155
|
+
exit_code = 0
|
|
156
|
+
metadata = response.metadata
|
|
157
|
+
provenance_notes = response.provenance
|
|
158
|
+
except ProviderError as exc:
|
|
159
|
+
stdout = ""
|
|
160
|
+
stderr = str(exc)
|
|
161
|
+
exit_code = 1
|
|
162
|
+
metadata = {"transport": "provider call failed"}
|
|
163
|
+
provenance_notes = []
|
|
164
|
+
|
|
165
|
+
trace = build_trace(
|
|
166
|
+
prompt=args.prompt,
|
|
167
|
+
system_prompt=args.system,
|
|
168
|
+
command=[args.provider, args.model],
|
|
169
|
+
model_hint=args.model,
|
|
170
|
+
provider=args.provider,
|
|
171
|
+
stdout=_cap_output(stdout, app_config.policy.max_output_chars),
|
|
172
|
+
stderr=_cap_output(stderr, app_config.policy.max_output_chars),
|
|
173
|
+
exit_code=exit_code,
|
|
174
|
+
metadata=metadata,
|
|
175
|
+
provenance_notes=provenance_notes,
|
|
176
|
+
)
|
|
177
|
+
trace = _finalize_trace(trace, app_config)
|
|
178
|
+
|
|
179
|
+
_write_trace_artifacts(args, trace, app_config)
|
|
180
|
+
print(stdout if stdout else stderr)
|
|
181
|
+
return exit_code
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def run_list(args: argparse.Namespace) -> int:
|
|
185
|
+
app_config = load_config()
|
|
186
|
+
entries = load_registry(app_config.storage, max(1, args.limit))
|
|
187
|
+
if not entries:
|
|
188
|
+
print("No traces indexed yet.")
|
|
189
|
+
return 0
|
|
190
|
+
|
|
191
|
+
for entry in entries:
|
|
192
|
+
print(
|
|
193
|
+
f"{entry['trace_id']} | {entry['captured_at']} | {entry['provider']} | "
|
|
194
|
+
f"{entry['model_hint']} | status={entry['overall_status']} | "
|
|
195
|
+
f"score={entry['confidence_score']:.2f} | {entry['json_path']}"
|
|
196
|
+
)
|
|
197
|
+
return 0
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def run_diff(args: argparse.Namespace) -> int:
|
|
201
|
+
app_config = load_config()
|
|
202
|
+
before = load_trace(args.before, app_config.storage)
|
|
203
|
+
after = load_trace(args.after, app_config.storage)
|
|
204
|
+
diff = build_diff(before, after)
|
|
205
|
+
|
|
206
|
+
out_path = Path(args.out)
|
|
207
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
write_diff_html(diff, out_path)
|
|
209
|
+
|
|
210
|
+
print(render_diff_text(diff))
|
|
211
|
+
print(f"HTML: {out_path}")
|
|
212
|
+
return 0
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def run_schema_version(args: argparse.Namespace) -> int:
|
|
216
|
+
app_config = load_config()
|
|
217
|
+
current = get_schema_version(app_config.storage)
|
|
218
|
+
print(f"Current schema version: {current}")
|
|
219
|
+
if args.expected:
|
|
220
|
+
print(f"Expected schema version: {CURRENT_DB_SCHEMA_VERSION}")
|
|
221
|
+
return 0
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def run_migrate(_: argparse.Namespace) -> int:
|
|
225
|
+
app_config = load_config()
|
|
226
|
+
before, after = migrate_storage(app_config.storage)
|
|
227
|
+
print(f"Migrated storage schema: {before} -> {after}")
|
|
228
|
+
print(f"SQLite: {app_config.storage.sqlite_path}")
|
|
229
|
+
return 0
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def run_version(args: argparse.Namespace) -> int:
|
|
233
|
+
app_config = load_config()
|
|
234
|
+
print(f"InsightForge version: {__version__}")
|
|
235
|
+
if args.check_updates:
|
|
236
|
+
message = maybe_get_update_message(__version__, app_config.updates)
|
|
237
|
+
print(message if message else "InsightForge is up to date or update check is unavailable.")
|
|
238
|
+
return 0
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def main() -> int:
|
|
242
|
+
parser = build_parser()
|
|
243
|
+
args = parser.parse_args()
|
|
244
|
+
app_config = load_config()
|
|
245
|
+
if args.command_name != "version":
|
|
246
|
+
update_message = maybe_get_update_message(__version__, app_config.updates)
|
|
247
|
+
if update_message:
|
|
248
|
+
print(update_message)
|
|
249
|
+
|
|
250
|
+
if args.command_name == "wrap":
|
|
251
|
+
return run_wrap(args)
|
|
252
|
+
if args.command_name == "ask":
|
|
253
|
+
return run_ask(args)
|
|
254
|
+
if args.command_name == "list":
|
|
255
|
+
return run_list(args)
|
|
256
|
+
if args.command_name == "diff":
|
|
257
|
+
return run_diff(args)
|
|
258
|
+
if args.command_name == "schema-version":
|
|
259
|
+
return run_schema_version(args)
|
|
260
|
+
if args.command_name == "migrate":
|
|
261
|
+
return run_migrate(args)
|
|
262
|
+
if args.command_name == "version":
|
|
263
|
+
return run_version(args)
|
|
264
|
+
|
|
265
|
+
parser.error("Unknown command")
|
|
266
|
+
return 2
|
insightforge/config.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tomllib
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
DEFAULT_CONFIG_PATH = Path(".insightforge.toml")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class PolicyConfig:
|
|
13
|
+
min_confidence: float = 0.65
|
|
14
|
+
require_sources: bool = False
|
|
15
|
+
fail_on_stderr: bool = True
|
|
16
|
+
block_absolute_language: bool = False
|
|
17
|
+
max_output_chars: int = 20000
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class RedactionConfig:
|
|
22
|
+
enabled: bool = True
|
|
23
|
+
mask: str = "[REDACTED]"
|
|
24
|
+
patterns: list[str] = field(
|
|
25
|
+
default_factory=lambda: [
|
|
26
|
+
r"sk-[A-Za-z0-9_\-]+",
|
|
27
|
+
r"AIza[0-9A-Za-z\-_]+",
|
|
28
|
+
r"ghp_[A-Za-z0-9]+",
|
|
29
|
+
r"\b[\w.\-]+@[\w.\-]+\.\w+\b",
|
|
30
|
+
r"Bearer\s+[A-Za-z0-9\-._~+/]+=*",
|
|
31
|
+
]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class StorageConfig:
|
|
37
|
+
sqlite_path: str = ".insightforge/traces.db"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(slots=True)
|
|
41
|
+
class UpdateConfig:
|
|
42
|
+
enabled: bool = True
|
|
43
|
+
package_name: str = "insightforge"
|
|
44
|
+
check_interval_hours: int = 24
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(slots=True)
|
|
48
|
+
class AppConfig:
|
|
49
|
+
policy: PolicyConfig = field(default_factory=PolicyConfig)
|
|
50
|
+
redaction: RedactionConfig = field(default_factory=RedactionConfig)
|
|
51
|
+
storage: StorageConfig = field(default_factory=StorageConfig)
|
|
52
|
+
updates: UpdateConfig = field(default_factory=UpdateConfig)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_config(cwd: Path | None = None) -> AppConfig:
|
|
56
|
+
base = cwd or Path.cwd()
|
|
57
|
+
path = base / DEFAULT_CONFIG_PATH
|
|
58
|
+
config = AppConfig()
|
|
59
|
+
if not path.exists():
|
|
60
|
+
return config
|
|
61
|
+
|
|
62
|
+
payload = tomllib.loads(path.read_text(encoding="utf-8"))
|
|
63
|
+
policy = payload.get("policy", {})
|
|
64
|
+
redaction = payload.get("redaction", {})
|
|
65
|
+
storage = payload.get("storage", {})
|
|
66
|
+
updates = payload.get("updates", {})
|
|
67
|
+
|
|
68
|
+
config.policy = PolicyConfig(
|
|
69
|
+
min_confidence=float(policy.get("min_confidence", config.policy.min_confidence)),
|
|
70
|
+
require_sources=bool(policy.get("require_sources", config.policy.require_sources)),
|
|
71
|
+
fail_on_stderr=bool(policy.get("fail_on_stderr", config.policy.fail_on_stderr)),
|
|
72
|
+
block_absolute_language=bool(
|
|
73
|
+
policy.get("block_absolute_language", config.policy.block_absolute_language)
|
|
74
|
+
),
|
|
75
|
+
max_output_chars=int(policy.get("max_output_chars", config.policy.max_output_chars)),
|
|
76
|
+
)
|
|
77
|
+
config.redaction = RedactionConfig(
|
|
78
|
+
enabled=bool(redaction.get("enabled", config.redaction.enabled)),
|
|
79
|
+
mask=str(redaction.get("mask", config.redaction.mask)),
|
|
80
|
+
patterns=[str(item) for item in redaction.get("patterns", config.redaction.patterns)],
|
|
81
|
+
)
|
|
82
|
+
config.storage = StorageConfig(
|
|
83
|
+
sqlite_path=str(storage.get("sqlite_path", config.storage.sqlite_path)),
|
|
84
|
+
)
|
|
85
|
+
config.updates = UpdateConfig(
|
|
86
|
+
enabled=bool(updates.get("enabled", config.updates.enabled)),
|
|
87
|
+
package_name=str(updates.get("package_name", config.updates.package_name)),
|
|
88
|
+
check_interval_hours=int(
|
|
89
|
+
updates.get("check_interval_hours", config.updates.check_interval_hours)
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
return config
|
insightforge/diffing.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import html
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .models import TraceRecord
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class TraceDiff:
|
|
13
|
+
before: TraceRecord
|
|
14
|
+
after: TraceRecord
|
|
15
|
+
summary_lines: list[str]
|
|
16
|
+
score_delta: float
|
|
17
|
+
added_flags: list[str]
|
|
18
|
+
removed_flags: list[str]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def build_diff(before: TraceRecord, after: TraceRecord) -> TraceDiff:
|
|
22
|
+
before_flags = {_flag_key(flag) for flag in before.bias_flags + before.hallucination_flags}
|
|
23
|
+
after_flags = {_flag_key(flag) for flag in after.bias_flags + after.hallucination_flags}
|
|
24
|
+
score_delta = round(after.confidence_score - before.confidence_score, 2)
|
|
25
|
+
|
|
26
|
+
summary_lines = [
|
|
27
|
+
f"Provider: {before.provider} -> {after.provider}",
|
|
28
|
+
f"Model: {before.model_hint} -> {after.model_hint}",
|
|
29
|
+
f"Status: {before.overall_status} -> {after.overall_status}",
|
|
30
|
+
f"Confidence: {before.confidence_score:.2f} -> {after.confidence_score:.2f} ({score_delta:+.2f})",
|
|
31
|
+
f"Exit code: {before.exit_code} -> {after.exit_code}",
|
|
32
|
+
"Prompt changed." if before.prompt != after.prompt else "Prompt unchanged.",
|
|
33
|
+
"Output changed." if before.stdout != after.stdout else "Output unchanged.",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
return TraceDiff(
|
|
37
|
+
before=before,
|
|
38
|
+
after=after,
|
|
39
|
+
summary_lines=summary_lines,
|
|
40
|
+
score_delta=score_delta,
|
|
41
|
+
added_flags=sorted(after_flags - before_flags),
|
|
42
|
+
removed_flags=sorted(before_flags - after_flags),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def write_diff_html(diff: TraceDiff, destination: Path) -> None:
|
|
47
|
+
before_json = html.escape(json.dumps(diff.before.to_dict(), indent=2))
|
|
48
|
+
after_json = html.escape(json.dumps(diff.after.to_dict(), indent=2))
|
|
49
|
+
document = f"""<!DOCTYPE html>
|
|
50
|
+
<html lang="en">
|
|
51
|
+
<head>
|
|
52
|
+
<meta charset="utf-8">
|
|
53
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
54
|
+
<title>InsightForge Diff</title>
|
|
55
|
+
<style>
|
|
56
|
+
:root {{
|
|
57
|
+
--bg: #f3f1ea;
|
|
58
|
+
--panel: #fffdf8;
|
|
59
|
+
--ink: #1d1d1b;
|
|
60
|
+
--muted: #666050;
|
|
61
|
+
--good: #2f6f4f;
|
|
62
|
+
--bad: #aa3d2a;
|
|
63
|
+
--line: #d7d0c2;
|
|
64
|
+
}}
|
|
65
|
+
body {{
|
|
66
|
+
margin: 0;
|
|
67
|
+
font-family: "IBM Plex Sans", "Avenir Next", sans-serif;
|
|
68
|
+
color: var(--ink);
|
|
69
|
+
background: linear-gradient(180deg, #ebe4d8 0%, var(--bg) 100%);
|
|
70
|
+
}}
|
|
71
|
+
main {{
|
|
72
|
+
max-width: 1240px;
|
|
73
|
+
margin: 0 auto;
|
|
74
|
+
padding: 28px 18px 64px;
|
|
75
|
+
}}
|
|
76
|
+
.panel {{
|
|
77
|
+
background: rgba(255,253,248,0.92);
|
|
78
|
+
border: 1px solid var(--line);
|
|
79
|
+
border-radius: 22px;
|
|
80
|
+
padding: 20px;
|
|
81
|
+
box-shadow: 0 20px 70px rgba(52, 42, 28, 0.08);
|
|
82
|
+
margin-bottom: 18px;
|
|
83
|
+
}}
|
|
84
|
+
.grid {{
|
|
85
|
+
display: grid;
|
|
86
|
+
grid-template-columns: 1fr 1fr;
|
|
87
|
+
gap: 18px;
|
|
88
|
+
}}
|
|
89
|
+
.chips {{
|
|
90
|
+
display: flex;
|
|
91
|
+
flex-wrap: wrap;
|
|
92
|
+
gap: 10px;
|
|
93
|
+
margin-top: 12px;
|
|
94
|
+
}}
|
|
95
|
+
.chip {{
|
|
96
|
+
border-radius: 999px;
|
|
97
|
+
padding: 8px 12px;
|
|
98
|
+
border: 1px solid var(--line);
|
|
99
|
+
background: var(--panel);
|
|
100
|
+
}}
|
|
101
|
+
.good {{ color: var(--good); }}
|
|
102
|
+
.bad {{ color: var(--bad); }}
|
|
103
|
+
pre {{
|
|
104
|
+
overflow: auto;
|
|
105
|
+
padding: 14px;
|
|
106
|
+
border-radius: 16px;
|
|
107
|
+
background: #1d1d1b;
|
|
108
|
+
color: #f7f0e8;
|
|
109
|
+
font-size: 13px;
|
|
110
|
+
}}
|
|
111
|
+
@media (max-width: 860px) {{
|
|
112
|
+
.grid {{ grid-template-columns: 1fr; }}
|
|
113
|
+
}}
|
|
114
|
+
</style>
|
|
115
|
+
</head>
|
|
116
|
+
<body>
|
|
117
|
+
<main>
|
|
118
|
+
<section class="panel">
|
|
119
|
+
<h1>Trace diff</h1>
|
|
120
|
+
{''.join(f"<p>{html.escape(line)}</p>" for line in diff.summary_lines)}
|
|
121
|
+
<div class="chips">
|
|
122
|
+
{''.join(f"<span class='chip bad'>Added flag: {html.escape(flag)}</span>" for flag in diff.added_flags) or "<span class='chip'>No new flags</span>"}
|
|
123
|
+
{''.join(f"<span class='chip good'>Removed flag: {html.escape(flag)}</span>" for flag in diff.removed_flags) or "<span class='chip'>No removed flags</span>"}
|
|
124
|
+
</div>
|
|
125
|
+
</section>
|
|
126
|
+
<section class="grid">
|
|
127
|
+
<section class="panel">
|
|
128
|
+
<h2>Before</h2>
|
|
129
|
+
<pre>{before_json}</pre>
|
|
130
|
+
</section>
|
|
131
|
+
<section class="panel">
|
|
132
|
+
<h2>After</h2>
|
|
133
|
+
<pre>{after_json}</pre>
|
|
134
|
+
</section>
|
|
135
|
+
</section>
|
|
136
|
+
</main>
|
|
137
|
+
</body>
|
|
138
|
+
</html>
|
|
139
|
+
"""
|
|
140
|
+
destination.write_text(document, encoding="utf-8")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def render_diff_text(diff: TraceDiff) -> str:
|
|
144
|
+
lines = ["InsightForge diff"]
|
|
145
|
+
lines.extend(diff.summary_lines)
|
|
146
|
+
if diff.added_flags:
|
|
147
|
+
lines.append("Added flags: " + ", ".join(diff.added_flags))
|
|
148
|
+
if diff.removed_flags:
|
|
149
|
+
lines.append("Removed flags: " + ", ".join(diff.removed_flags))
|
|
150
|
+
return "\n".join(lines)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _flag_key(flag) -> str:
|
|
154
|
+
return f"{flag.code}:{flag.severity}:{flag.title}"
|