PyPI - insightforge - Versions diffs - 0.1.0__tar.gz - Mend

insightforge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

insightforge-0.1.0/PKG-INFO +236 -0
insightforge-0.1.0/README.md +217 -0
insightforge-0.1.0/pyproject.toml +34 -0
insightforge-0.1.0/setup.cfg +4 -0
insightforge-0.1.0/src/insightforge/__init__.py +7 -0
insightforge-0.1.0/src/insightforge/__main__.py +5 -0
insightforge-0.1.0/src/insightforge/analyzer.py +172 -0
insightforge-0.1.0/src/insightforge/cli.py +266 -0
insightforge-0.1.0/src/insightforge/config.py +92 -0
insightforge-0.1.0/src/insightforge/diffing.py +154 -0
insightforge-0.1.0/src/insightforge/migrations.py +98 -0
insightforge-0.1.0/src/insightforge/models.py +74 -0
insightforge-0.1.0/src/insightforge/policy.py +67 -0
insightforge-0.1.0/src/insightforge/providers.py +170 -0
insightforge-0.1.0/src/insightforge/redaction.py +55 -0
insightforge-0.1.0/src/insightforge/renderer.py +250 -0
insightforge-0.1.0/src/insightforge/store.py +134 -0
insightforge-0.1.0/src/insightforge/updater.py +82 -0
insightforge-0.1.0/src/insightforge.egg-info/PKG-INFO +236 -0
insightforge-0.1.0/src/insightforge.egg-info/SOURCES.txt +22 -0
insightforge-0.1.0/src/insightforge.egg-info/dependency_links.txt +1 -0
insightforge-0.1.0/src/insightforge.egg-info/entry_points.txt +2 -0
insightforge-0.1.0/src/insightforge.egg-info/top_level.txt +1 -0
insightforge-0.1.0/tests/test_production_mvp.py +132 -0

insightforge-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,236 @@
+Metadata-Version: 2.4
+Name: insightforge
+Version: 0.1.0
+Summary: A transparency engine for AI interactions.
+Author: InsightForge
+License: MIT
+Project-URL: Homepage, https://github.com/your-org/InsightForge
+Project-URL: Repository, https://github.com/your-org/InsightForge
+Keywords: ai,observability,auditability,cli,transparency
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+# InsightForge
+AI without insight is blind ambition.
+InsightForge is an open-source transparency engine for AI interactions. It wraps a model call or agent command, captures the prompt and execution context, scores the response for obvious risk signals, and renders a visual trace you can inspect instead of blindly trusting output.
+This is not observability theater. It is the start of a forensic layer for AI:
+- What did we ask?
+- What came back?
+- How confident should we be?
+- Which parts look biased, weakly grounded, or potentially hallucinatory?
+- What evidence would make this answer safer to trust?
+## Why this exists
+The current AI stack optimizes for output volume and benchmark swagger. What it usually does not give you is the "why" layer:
+- Developers cannot easily debug why an agent drifted off task.
+- Teams cannot prove traceability during audits.
+- Users are asked to trust answers without seeing grounding quality.
+- Compliance pressure is increasing faster than transparency tooling.
+InsightForge is the neutral inspector that sits around model interactions and makes them legible.
+## MVP Scope
+This repository bootstraps the first local MVP:
+- `insightforge wrap ...` runs any shell command that represents an AI interaction.
+- `insightforge ask ...` talks to supported providers directly.
+- `insightforge list` shows indexed traces from the local registry.
+- `insightforge diff ...` compares two traces and renders a visual report.
+- `insightforge schema-version` shows the current SQLite schema version.
+- `insightforge migrate` upgrades local storage to the latest schema version.
+- Evaluates production policies such as minimum confidence, stderr failures, source requirements, and blocked absolute language.
+- Redacts common secrets and emails before traces are persisted.
+- Stores trace metadata and payloads in SQLite for durable retrieval.
+- Captures prompt, stdout, stderr, exit status, and basic provenance hints.
+- Applies heuristic checks for weak grounding and overgeneralized language.
+- Emits both a machine-readable JSON trace and a polished HTML "insight map".
+It is intentionally simple. The point is to prove the workflow before building provider adapters, policy packs, and team-grade compliance pipelines.
+## Quickstart
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+cp .insightforge.toml.example .insightforge.toml
+insightforge wrap claude "Explain why this migration failed" --cmd "python3 -c 'print(\"Maybe the issue is a missing foreign key\")'"
+```
+The command generates:
+- `traces/latest.json`
+- `traces/latest.html`
+Open the HTML file in a browser to inspect the visual trace.
+Provider-native flow:
+```bash
+insightforge ask mock demo-model "Why did the recommendation change?" \
+  --system "Explain assumptions and mention missing evidence." \
+  --out traces/mock-provider
+```
+Supported providers today:
+- `mock` for local demos and tests
+- `openai` via `OPENAI_API_KEY`
+- `anthropic` via `ANTHROPIC_API_KEY`
+The provider adapters use only the Python standard library so the project stays dependency-light.
+CLI install options for other developers:
+```bash
+pipx install git+https://github.com/<your-org>/InsightForge.git
+```
+Or from a published package:
+```bash
+pipx install insightforge
+```
+That gives developers a global `insightforge` command without needing a project-local virtualenv.
+Users are automatically notified in the CLI when PyPI has a newer InsightForge release. They can also check manually with:
+```bash
+insightforge version --check-updates
+```
+Comparison workflow:
+```bash
+insightforge list
+insightforge diff trace_id_one trace_id_two --out traces/compare.html
+```
+Every captured trace is indexed in `.insightforge/registry.json` so you can compare runs by path or by trace id.
+The primary production store is SQLite at `.insightforge/traces.db`, with the JSON registry kept as a convenience export.
+Storage maintenance:
+```bash
+insightforge schema-version --expected
+insightforge migrate
+```
+CI/CD automation:
+- `.github/workflows/ci.yml` runs compile, tests, and package builds on every push and pull request.
+- `.github/workflows/release.yml` runs on `main`, re-tests the package, compares the local version to PyPI, and publishes only when the version is newer.
+- Users then see the update prompt in the CLI and can upgrade with `pipx upgrade insightforge`.
+Trusted Publishing setup:
+1. Push this repository to GitHub.
+2. In GitHub, keep Actions enabled for the repository.
+3. In PyPI, add a Trusted Publisher for this project with:
+   - Owner: your GitHub user or org
+   - Repository: `InsightForge`
+   - Workflow name: `release.yml`
+   - Environment name: `pypi`
+4. In GitHub, no `PYPI_API_TOKEN` secret is needed for publishing.
+5. Bump the version in `pyproject.toml`, merge/push to `main`, and the release workflow will publish automatically if tests pass and the version is newer than PyPI.
+If you want a dry run before real publishing, point the same workflow model at TestPyPI first and use a separate trusted publisher there.
+Policy and redaction config:
+```bash
+cp .insightforge.toml.example .insightforge.toml
+```
+The checked-in defaults are intentionally strict for factual audit demos:
+- `policy.min_confidence = 0.85`
+- `policy.require_sources = true`
+- `policy.fail_on_stderr = true`
+- `policy.block_absolute_language = true`
+Other knobs:
+- `policy.max_output_chars`
+- `redaction.enabled`
+- `storage.sqlite_path`
+- `updates.enabled`
+- `updates.check_interval_hours`
+## Example
+```bash
+insightforge wrap local-llm "Review this answer for risk" \
+  --cmd "python3 -c 'print(\"This obviously always works\")'" \
+  --out traces/risky-demo
+```
+Expected outcome:
+- Confidence score drops.
+- The report flags overgeneralized language.
+- The insight map shows prompt, execution, heuristic analysis, and captured output.
+## Product Direction
+The wedge is developer trust. The moat is structured forensic data.
+Short term:
+- Wrap local CLIs and SDK calls.
+- Add richer provider-specific adapters for OpenAI, Anthropic, and local models.
+- Expand bias, hallucination, and provenance checks.
+- Ship a VS Code extension and diff view for prompt iterations.
+Long term:
+- Team dashboards for audit review.
+- Queryable trace stores.
+- Policy enforcement before outputs reach end users.
+- Explainability primitives for enterprise compliance.
+## 90-Day Build Narrative
+Weeks 1-2:
+- Ship crude but usable.
+- Publish the manifesto and demo.
+- Capture raw traces and get developers trying it immediately.
+Weeks 3-4:
+- Talk to users who already complain about hallucinations and opacity.
+- Turn every legitimate complaint into a fast release.
+Weeks 5-8:
+- Add richer visual maps and editor integrations.
+- Make "gotcha" demos trivially shareable.
+Weeks 9-12:
+- Launch a hosted audit workflow for teams.
+- Use real usage, not pitch decks, to pull in platform partners.
+## Current Constraints
+This MVP does not capture hidden chain-of-thought or privileged model internals. It captures the observable trail around the interaction and turns that into a usable audit artifact. That distinction matters.
+It also uses heuristic analysis for confidence and risk flags. Those signals are useful for audit triage, but they are not a claim of true model introspection.
+If you want trustworthy AI systems, you need tooling that treats every answer as inspectable infrastructure.

insightforge-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,217 @@
+# InsightForge
+AI without insight is blind ambition.
+InsightForge is an open-source transparency engine for AI interactions. It wraps a model call or agent command, captures the prompt and execution context, scores the response for obvious risk signals, and renders a visual trace you can inspect instead of blindly trusting output.
+This is not observability theater. It is the start of a forensic layer for AI:
+- What did we ask?
+- What came back?
+- How confident should we be?
+- Which parts look biased, weakly grounded, or potentially hallucinatory?
+- What evidence would make this answer safer to trust?
+## Why this exists
+The current AI stack optimizes for output volume and benchmark swagger. What it usually does not give you is the "why" layer:
+- Developers cannot easily debug why an agent drifted off task.
+- Teams cannot prove traceability during audits.
+- Users are asked to trust answers without seeing grounding quality.
+- Compliance pressure is increasing faster than transparency tooling.
+InsightForge is the neutral inspector that sits around model interactions and makes them legible.
+## MVP Scope
+This repository bootstraps the first local MVP:
+- `insightforge wrap ...` runs any shell command that represents an AI interaction.
+- `insightforge ask ...` talks to supported providers directly.
+- `insightforge list` shows indexed traces from the local registry.
+- `insightforge diff ...` compares two traces and renders a visual report.
+- `insightforge schema-version` shows the current SQLite schema version.
+- `insightforge migrate` upgrades local storage to the latest schema version.
+- Evaluates production policies such as minimum confidence, stderr failures, source requirements, and blocked absolute language.
+- Redacts common secrets and emails before traces are persisted.
+- Stores trace metadata and payloads in SQLite for durable retrieval.
+- Captures prompt, stdout, stderr, exit status, and basic provenance hints.
+- Applies heuristic checks for weak grounding and overgeneralized language.
+- Emits both a machine-readable JSON trace and a polished HTML "insight map".
+It is intentionally simple. The point is to prove the workflow before building provider adapters, policy packs, and team-grade compliance pipelines.
+## Quickstart
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+cp .insightforge.toml.example .insightforge.toml
+insightforge wrap claude "Explain why this migration failed" --cmd "python3 -c 'print(\"Maybe the issue is a missing foreign key\")'"
+```
+The command generates:
+- `traces/latest.json`
+- `traces/latest.html`
+Open the HTML file in a browser to inspect the visual trace.
+Provider-native flow:
+```bash
+insightforge ask mock demo-model "Why did the recommendation change?" \
+  --system "Explain assumptions and mention missing evidence." \
+  --out traces/mock-provider
+```
+Supported providers today:
+- `mock` for local demos and tests
+- `openai` via `OPENAI_API_KEY`
+- `anthropic` via `ANTHROPIC_API_KEY`
+The provider adapters use only the Python standard library so the project stays dependency-light.
+CLI install options for other developers:
+```bash
+pipx install git+https://github.com/<your-org>/InsightForge.git
+```
+Or from a published package:
+```bash
+pipx install insightforge
+```
+That gives developers a global `insightforge` command without needing a project-local virtualenv.
+Users are automatically notified in the CLI when PyPI has a newer InsightForge release. They can also check manually with:
+```bash
+insightforge version --check-updates
+```
+Comparison workflow:
+```bash
+insightforge list
+insightforge diff trace_id_one trace_id_two --out traces/compare.html
+```
+Every captured trace is indexed in `.insightforge/registry.json` so you can compare runs by path or by trace id.
+The primary production store is SQLite at `.insightforge/traces.db`, with the JSON registry kept as a convenience export.
+Storage maintenance:
+```bash
+insightforge schema-version --expected
+insightforge migrate
+```
+CI/CD automation:
+- `.github/workflows/ci.yml` runs compile, tests, and package builds on every push and pull request.
+- `.github/workflows/release.yml` runs on `main`, re-tests the package, compares the local version to PyPI, and publishes only when the version is newer.
+- Users then see the update prompt in the CLI and can upgrade with `pipx upgrade insightforge`.
+Trusted Publishing setup:
+1. Push this repository to GitHub.
+2. In GitHub, keep Actions enabled for the repository.
+3. In PyPI, add a Trusted Publisher for this project with:
+   - Owner: your GitHub user or org
+   - Repository: `InsightForge`
+   - Workflow name: `release.yml`
+   - Environment name: `pypi`
+4. In GitHub, no `PYPI_API_TOKEN` secret is needed for publishing.
+5. Bump the version in `pyproject.toml`, merge/push to `main`, and the release workflow will publish automatically if tests pass and the version is newer than PyPI.
+If you want a dry run before real publishing, point the same workflow model at TestPyPI first and use a separate trusted publisher there.
+Policy and redaction config:
+```bash
+cp .insightforge.toml.example .insightforge.toml
+```
+The checked-in defaults are intentionally strict for factual audit demos:
+- `policy.min_confidence = 0.85`
+- `policy.require_sources = true`
+- `policy.fail_on_stderr = true`
+- `policy.block_absolute_language = true`
+Other knobs:
+- `policy.max_output_chars`
+- `redaction.enabled`
+- `storage.sqlite_path`
+- `updates.enabled`
+- `updates.check_interval_hours`
+## Example
+```bash
+insightforge wrap local-llm "Review this answer for risk" \
+  --cmd "python3 -c 'print(\"This obviously always works\")'" \
+  --out traces/risky-demo
+```
+Expected outcome:
+- Confidence score drops.
+- The report flags overgeneralized language.
+- The insight map shows prompt, execution, heuristic analysis, and captured output.
+## Product Direction
+The wedge is developer trust. The moat is structured forensic data.
+Short term:
+- Wrap local CLIs and SDK calls.
+- Add richer provider-specific adapters for OpenAI, Anthropic, and local models.
+- Expand bias, hallucination, and provenance checks.
+- Ship a VS Code extension and diff view for prompt iterations.
+Long term:
+- Team dashboards for audit review.
+- Queryable trace stores.
+- Policy enforcement before outputs reach end users.
+- Explainability primitives for enterprise compliance.
+## 90-Day Build Narrative
+Weeks 1-2:
+- Ship crude but usable.
+- Publish the manifesto and demo.
+- Capture raw traces and get developers trying it immediately.
+Weeks 3-4:
+- Talk to users who already complain about hallucinations and opacity.
+- Turn every legitimate complaint into a fast release.
+Weeks 5-8:
+- Add richer visual maps and editor integrations.
+- Make "gotcha" demos trivially shareable.
+Weeks 9-12:
+- Launch a hosted audit workflow for teams.
+- Use real usage, not pitch decks, to pull in platform partners.
+## Current Constraints
+This MVP does not capture hidden chain-of-thought or privileged model internals. It captures the observable trail around the interaction and turns that into a usable audit artifact. That distinction matters.
+It also uses heuristic analysis for confidence and risk flags. Those signals are useful for audit triage, but they are not a claim of true model introspection.
+If you want trustworthy AI systems, you need tooling that treats every answer as inspectable infrastructure.

insightforge-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,34 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "insightforge"
+version = "0.1.0"
+description = "A transparency engine for AI interactions."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "MIT" }
+authors = [
+  { name = "InsightForge" }
+]
+urls = { Homepage = "https://github.com/your-org/InsightForge", Repository = "https://github.com/your-org/InsightForge" }
+keywords = ["ai", "observability", "auditability", "cli", "transparency"]
+classifiers = [
+  "Development Status :: 3 - Alpha",
+  "Environment :: Console",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+]
+[project.scripts]
+insightforge = "insightforge.cli:main"
+[tool.setuptools]
+package-dir = { "" = "src" }
+[tool.setuptools.packages.find]
+where = ["src"]

insightforge-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

insightforge-0.1.0/src/insightforge/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""InsightForge public package interface."""
+from .analyzer import build_trace
+__version__ = "0.1.0"
+__all__ = ["__version__", "build_trace"]

insightforge-0.1.0/src/insightforge/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

insightforge-0.1.0/src/insightforge/analyzer.py ADDED Viewed

@@ -0,0 +1,172 @@
+from __future__ import annotations
+import re
+from collections.abc import Sequence
+from .models import RiskFlag, TraceNode, TraceRecord
+HEDGE_PATTERNS = (
+    r"\bmaybe\b",
+    r"\bprobably\b",
+    r"\bI think\b",
+    r"\blikely\b",
+    r"\bappears to\b",
+    r"\bseems\b",
+)
+SOURCE_PATTERNS = (
+    r"https?://",
+    r"\bsource\b",
+    r"\bcitation\b",
+    r"\breference\b",
+    r"\bdocumentation\b",
+    r"\bresearch paper\b",
+    r"\bstudy\b",
+)
+BIAS_PATTERNS = (
+    r"\balways\b",
+    r"\bnever\b",
+    r"\bobviously\b",
+    r"\beveryone\b",
+    r"\bno one\b",
+)
+def _count_matches(patterns: Sequence[str], text: str) -> int:
+    return sum(len(re.findall(pattern, text, flags=re.IGNORECASE)) for pattern in patterns)
+def _build_summary(prompt: str, stdout: str, stderr: str, risk_count: int) -> str:
+    if stderr.strip():
+        return "The wrapped command produced stderr output; review execution details before trusting the answer."
+    if risk_count:
+        return "InsightForge detected language patterns that correlate with weak grounding or overconfident claims."
+    if not stdout.strip():
+        return "The wrapped command returned no stdout, so the trace is mostly execution metadata."
+    if prompt.strip():
+        return "The response completed without obvious risk markers, but the trace should still be reviewed for source quality."
+    return "Execution completed successfully with a low-risk heuristic profile."
+def build_trace(
+    *,
+    prompt: str,
+    command: Sequence[str],
+    model_hint: str,
+    provider: str = "unknown",
+    system_prompt: str = "",
+    stdout: str,
+    stderr: str,
+    exit_code: int,
+    metadata: dict[str, str] | None = None,
+    provenance_notes: Sequence[str] | None = None,
+) -> TraceRecord:
+    output_blob = "\n".join(part for part in (stdout, stderr) if part)
+    hedge_hits = _count_matches(HEDGE_PATTERNS, output_blob)
+    source_hits = _count_matches(SOURCE_PATTERNS, output_blob)
+    bias_hits = _count_matches(BIAS_PATTERNS, output_blob)
+    stderr_penalty = 0.2 if stderr.strip() else 0.0
+    empty_penalty = 0.15 if not stdout.strip() else 0.0
+    confidence = 0.72
+    confidence -= min(0.24, hedge_hits * 0.04)
+    confidence -= min(0.20, bias_hits * 0.05)
+    confidence -= stderr_penalty + empty_penalty
+    confidence += min(0.18, source_hits * 0.06)
+    confidence = max(0.05, min(0.99, round(confidence, 2)))
+    bias_flags: list[RiskFlag] = []
+    hallucination_flags: list[RiskFlag] = []
+    provenance: list[str] = list(provenance_notes or [])
+    if source_hits:
+        provenance.append("Sources or citations were mentioned in the output.")
+    elif not provenance:
+        provenance.append("No explicit sources or citations were detected.")
+    if bias_hits:
+        bias_flags.append(
+            RiskFlag(
+                code="OVERGENERALIZATION",
+                title="Overgeneralized claim pattern",
+                severity="medium",
+                evidence="The output uses absolute language that can hide edge cases or demographic skew.",
+                recommendation="Ask the model to qualify claims, state assumptions, and list known exceptions.",
+            )
+        )
+    if hedge_hits and not source_hits:
+        hallucination_flags.append(
+            RiskFlag(
+                code="UNGROUNDED_HEDGING",
+                title="Ungrounded uncertainty",
+                severity="high",
+                evidence="The output contains hedging language without nearby source signals.",
+                recommendation="Request citations, intermediate evidence, or a narrower task boundary.",
+            )
+        )
+    if stderr.strip():
+        hallucination_flags.append(
+            RiskFlag(
+                code="EXECUTION_ANOMALY",
+                title="Execution anomaly",
+                severity="medium",
+                evidence="The wrapped command emitted stderr output, which may indicate tool failure or partial completion.",
+                recommendation="Inspect stderr and rerun before relying on the result for audits or downstream actions.",
+            )
+        )
+    nodes = [
+        TraceNode(id="prompt", label="Prompt", kind="input", detail=prompt or "No prompt recorded."),
+        TraceNode(
+            id="system",
+            label="System Prompt",
+            kind="input",
+            detail=system_prompt or "No system prompt recorded.",
+        ),
+        TraceNode(
+            id="execution",
+            label="Execution",
+            kind="process",
+            detail=" ".join(command) if command else "No command recorded.",
+            score=1.0 if exit_code == 0 else 0.4,
+        ),
+        TraceNode(
+            id="analysis",
+            label="Heuristic Analysis",
+            kind="analysis",
+            detail=f"Hedges={hedge_hits}, source signals={source_hits}, bias markers={bias_hits}",
+            score=confidence,
+        ),
+        TraceNode(
+            id="output",
+            label="Model Output",
+            kind="output",
+            detail=(stdout or stderr or "No output captured.")[:1200],
+            score=confidence,
+        ),
+    ]
+    risk_count = len(bias_flags) + len(hallucination_flags)
+    summary = _build_summary(prompt, stdout, stderr, risk_count)
+    return TraceRecord(
+        model_hint=model_hint or "unknown",
+        provider=provider or "unknown",
+        prompt=prompt,
+        system_prompt=system_prompt,
+        command=list(command),
+        exit_code=exit_code,
+        stdout=stdout,
+        stderr=stderr,
+        metadata=dict(metadata or {}),
+        confidence_score=confidence,
+        bias_flags=bias_flags,
+        hallucination_flags=hallucination_flags,
+        provenance=provenance,
+        nodes=nodes,
+        summary=summary,
+    )