graphite-engine 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. graphite_engine-0.3.2/PKG-INFO +251 -0
  2. graphite_engine-0.3.2/README.md +213 -0
  3. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/pyproject.toml +9 -10
  4. graphite_engine-0.3.2/src/graphite/__init__.py +53 -0
  5. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/adapters/alphaearth.py +4 -1
  6. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/adapters/weathernext.py +1 -0
  7. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/cache.py +1 -0
  8. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/claim.py +160 -25
  9. graphite_engine-0.3.2/src/graphite/claim_store.py +321 -0
  10. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/confidence.py +101 -67
  11. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/domain.py +23 -13
  12. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/enums.py +13 -6
  13. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/evidence.py +19 -11
  14. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/llm.py +1 -0
  15. graphite_engine-0.3.2/src/graphite/pipeline/__init__.py +18 -0
  16. graphite_engine-0.3.2/src/graphite/pipeline/analyzer.py +107 -0
  17. graphite_engine-0.3.2/src/graphite/pipeline/extractor.py +81 -0
  18. graphite_engine-0.3.2/src/graphite/pipeline/report.py +82 -0
  19. graphite_engine-0.3.2/src/graphite/pipeline/retriever.py +81 -0
  20. graphite_engine-0.3.2/src/graphite/pipeline/verifier.py +132 -0
  21. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/rules.py +3 -0
  22. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/schemas.py +63 -15
  23. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/text.py +128 -33
  24. graphite_engine-0.3.2/src/graphite_engine.egg-info/PKG-INFO +251 -0
  25. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite_engine.egg-info/SOURCES.txt +7 -17
  26. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite_engine.egg-info/requires.txt +2 -7
  27. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/tests/test_claim.py +30 -11
  28. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/tests/test_confidence.py +21 -14
  29. graphite_engine-0.3.0/PKG-INFO +0 -205
  30. graphite_engine-0.3.0/README.md +0 -164
  31. graphite_engine-0.3.0/src/graphite/__init__.py +0 -47
  32. graphite_engine-0.3.0/src/graphite/assembler.py +0 -299
  33. graphite_engine-0.3.0/src/graphite/claim_store.py +0 -133
  34. graphite_engine-0.3.0/src/graphite/features/__init__.py +0 -1
  35. graphite_engine-0.3.0/src/graphite/features/alphaearth_enricher.py +0 -125
  36. graphite_engine-0.3.0/src/graphite/features/embedding_similarity.py +0 -193
  37. graphite_engine-0.3.0/src/graphite/geo_evidence/__init__.py +0 -1
  38. graphite_engine-0.3.0/src/graphite/geo_evidence/geo_foundation.py +0 -86
  39. graphite_engine-0.3.0/src/graphite/graph.py +0 -182
  40. graphite_engine-0.3.0/src/graphite/io.py +0 -194
  41. graphite_engine-0.3.0/src/graphite/scenario.py +0 -157
  42. graphite_engine-0.3.0/src/graphite/scenarios/__init__.py +0 -1
  43. graphite_engine-0.3.0/src/graphite/scenarios/weathernext_forecast.py +0 -140
  44. graphite_engine-0.3.0/src/graphite/simulate.py +0 -245
  45. graphite_engine-0.3.0/src/graphite_engine.egg-info/PKG-INFO +0 -205
  46. graphite_engine-0.3.0/tests/test_graph_store.py +0 -57
  47. graphite_engine-0.3.0/tests/test_pipeline_core.py +0 -460
  48. graphite_engine-0.3.0/tests/test_propagation.py +0 -248
  49. graphite_engine-0.3.0/tests/test_scenario.py +0 -196
  50. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/LICENSE +0 -0
  51. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/setup.cfg +0 -0
  52. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/adapters/__init__.py +0 -0
  53. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite/py.typed +0 -0
  54. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite_engine.egg-info/dependency_links.txt +0 -0
  55. {graphite_engine-0.3.0 → graphite_engine-0.3.2}/src/graphite_engine.egg-info/top_level.txt +0 -0
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.4
2
+ Name: graphite-engine
3
+ Version: 0.3.2
4
+ Summary: Open-source claim verification engine for agent-generated assertions in high-stakes domains
5
+ Author: Min Jun Kim
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/minjun1/graphite-core
8
+ Project-URL: Documentation, https://github.com/minjun1/graphite-core#readme
9
+ Project-URL: Repository, https://github.com/minjun1/graphite-core
10
+ Project-URL: Issues, https://github.com/minjun1/graphite-core/issues
11
+ Keywords: verification,claims,evidence,provenance,trust,graph
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: networkx>=3.0
25
+ Requires-Dist: pydantic>=2.0
26
+ Provides-Extra: llm
27
+ Requires-Dist: openai>=1.0; extra == "llm"
28
+ Requires-Dist: google-genai>=1.0; extra == "llm"
29
+ Provides-Extra: geo
30
+ Requires-Dist: rasterio>=1.3; extra == "geo"
31
+ Requires-Dist: numpy>=1.24; extra == "geo"
32
+ Provides-Extra: all
33
+ Requires-Dist: graphite-engine[geo,llm]; extra == "all"
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=8.0; extra == "dev"
36
+ Requires-Dist: python-dotenv>=1.0; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ <div align="center">
40
+ <h1>⛏️ Graphite</h1>
41
+ <p><strong>Claim verification engine for AI agent outputs.</strong></p>
42
+ <p><em>LLMs judge. Graphs remember.</em></p>
43
+ <p>Graphite extracts claims from agent-generated text, retrieves evidence, verifies support and contradiction, flags unsupported reasoning leaps, and stores every verdict with a full provenance trail — building a verification memory that gets stronger with every review.</p>
44
+ <p>
45
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache--2.0-blue.svg" alt="License"></a>
46
+ <a href="https://python.org"><img src="https://img.shields.io/badge/python-3.10%2B-brightgreen.svg" alt="Python"></a>
47
+ </p>
48
+ </div>
49
+
50
+ > ⚠️ **v0.3.x — Experimental**. Usable and tested, but API may change before 1.0. Pin your version.
51
+
52
+ ---
53
+
54
+ ### How it works
55
+
56
+ Graphite turns raw agent output into a structured verification report.
57
+
58
+ ```python
59
+ from graphite.pipeline import verify_agent_output
60
+
61
+ report = verify_agent_output(
62
+ text=agent_memo_markdown,
63
+ corpus=sec_filings_corpus,
64
+ model="gemini-2.5-flash" # any OpenAI-compatible model works
65
+ )
66
+
67
+ print(f"Supported: {report.supported_count} | Conflicted: {report.conflicted_count}")
68
+ print(f"Requires Human Review: {len(report.risky_claim_ids)} claims")
69
+ ```
70
+
71
+ This single API wraps a 5-step pipeline:
72
+
73
+ 1. **Extract**: Parses the document into atomic claims using LLMs.
74
+ 2. **Retrieve**: Finds candidate evidence spans across the corpus for each claim.
75
+ 3. **Verify**: Judges claims against the retrieved spans (Supported, Conflicted, Insufficient).
76
+ 4. **Analyze**: Flags argument-level reasoning leaps (`CONCLUSION_JUMP`).
77
+ 5. **Report**: Aggregates the findings into a `VerificationReport` with structured rationale, review flags, and full provenance.
78
+
79
+ ---
80
+
81
+ ### Handling Logic Leaps & Human Review
82
+
83
+ Downstream UI and review workflows can be built directly on top of the structured output.
84
+
85
+ ```python
86
+ from graphite.claim import ArgumentVerdictEnum
87
+
88
+ # Check for logic leaps (Argument-level verification)
89
+ for argument in report.argument_verdicts:
90
+ if argument.verdict == ArgumentVerdictEnum.CONCLUSION_JUMP:
91
+ print(f"⚠️ LOGIC LEAP: {argument.text}")
92
+
93
+ # Route high-risk factual claims to a Human-in-the-loop review queue
94
+ for claim_id in report.risky_claim_ids:
95
+ verdict = report.get_verdict(claim_id)
96
+ if verdict.needs_human_review:
97
+ print(f"🚨 REVIEW NEEDED: {verdict.claim_text}")
98
+ print(f" Reason: {verdict.rationale.missing_evidence_reason or verdict.rationale.contradiction_type}")
99
+ ```
100
+
101
+ ---
102
+
103
+ ### Quickstart
104
+
105
+ ```bash
106
+ pip install "graphite-engine[llm]"
107
+ export GEMINI_API_KEY="your-api-key-here"
108
+ python examples/quickstart_verification/run.py
109
+ ```
110
+
111
+ Or from source:
112
+
113
+ ```bash
114
+ git clone https://github.com/minjun1/graphite-core.git
115
+ cd graphite-core
116
+ pip install -e ".[llm]"
117
+ export GEMINI_API_KEY="your-api-key-here"
118
+ python examples/quickstart_verification/run.py
119
+ ```
120
+
121
+ Graphite defaults to Gemini via the OpenAI-compatible endpoint, so any OpenAI-compatible provider also works — including local models via Ollama or vLLM, or hosted endpoints like Together and Groq. Set `OPENAI_API_KEY` and `OPENAI_BASE_URL` to point at any compatible endpoint.
122
+
123
+ ---
124
+
125
+ ## Why a Graph?
126
+
127
+ Most verification tools run once and forget. Graphite anchors every judgment into a persistent graph — turning disposable LLM outputs into a living verification memory.
128
+
129
+ **Claims are first-class objects.** The same assertion can be identified, revisited, and re-evaluated across documents and time — not lost in prompt logs.
130
+
131
+ **Evidence accumulates, not overwrites.** When a second source confirms (or contradicts) a claim, Graphite appends the new evidence to the existing node instead of starting from scratch.
132
+
133
+ **Review history becomes lineage.** AI verdict → analyst override → re-evaluation with new data — every step is recorded as a relationship in the graph, not a flat log entry.
134
+
135
+ **Cross-document deduplication.** When the same claim appears in TSMC's 10-K and Nvidia's 10-K, Graphite recognizes it as one canonical claim backed by two independent sources.
136
+
137
+ **Reasoning structure, not just fact-checking.** Claims don't exist in isolation. Graphite can represent claim-to-conclusion relationships, enabling checks like `CONCLUSION_JUMP` when the logical link between premises and conclusion is unsupported.
138
+
139
+ ---
140
+
141
+ ## Stateful Verification Memory
142
+
143
+ Unlike stateless evaluators that produce a score and discard context, Graphite's `ClaimStore` builds a persistent fact base that strengthens over time.
144
+
145
+ ```
146
+ Run 1: Extract "TSMC supplies CoWoS to Nvidia" from TSMC 10-K
147
+ → 1 evidence source recorded
148
+
149
+ Run 2: Same claim found in Nvidia 10-K
150
+ → evidence accumulates → 2 independent sources
151
+
152
+ Run 3: Exact duplicate from same source
153
+ → deduplicated, no change
154
+
155
+ Run 4: Related claim "Nvidia depends on TSMC" extracted
156
+ → cross-claim linkage via shared entities
157
+ ```
158
+
159
+ What this looks like as a graph:
160
+
161
+ ```
162
+ Claim: "TSMC supplies CoWoS to Nvidia"
163
+ ├── supported_by → TSMC 10-K (cited span)
164
+ ├── supported_by → Nvidia 10-K (cited span)
165
+ ├── reviewed_as → SUPPORTED (model verdict)
166
+ └── contributes_to → "Nvidia depends on TSMC"
167
+ ```
168
+
169
+ Each claim is a deduplicated node. Evidence merges across extraction runs. Analyst overrides persist. The result is a verification memory where repeated reviews compound — not repeat.
170
+
171
+ *Most verification tools forget. Graphite remembers — and gets stronger with every review.*
172
+
173
+ *(See `examples/evidence_accumulation/` for a runnable demo — no API keys required.)*
174
+
175
+ ---
176
+
177
+ ## Graphite vs. Existing Tools
178
+
179
+ *Evaluators grade your prompts. Graphite audits your agent's claims — and remembers every verdict.*
180
+
181
+ These tools solve adjacent but different problems:
182
+
183
+ | Dimension | Ragas / TruLens / DeepEval | Graphite |
184
+ |-----------|---------------------------|----------|
185
+ | **Purpose** | Prompt/model evaluation (CI/CD) | Production output verification (runtime) |
186
+ | **State** | Stateless — each run is independent | Stateful — evidence accumulates across runs |
187
+ | **Output** | Scores (faithfulness, relevance) | Structured `VerificationReport` with provenance |
188
+ | **Logic Leaps** | Not addressed | `CONCLUSION_JUMP` / `OVERSTATED` detection |
189
+ | **Human Review** | Manual review of score dashboards | `needs_human_review` routing with analyst override |
190
+ | **Audit Trail** | Execution logs | Every verdict links to exact `cited_span` with full lineage |
191
+
192
+ ---
193
+
194
+ ## Evaluation Snapshot
195
+
196
+ Representative verification cases from our golden test suite (current snapshot run on GPT-4o; `evals/verify_eval.py`):
197
+
198
+ | Test Case | Type | Expected | Graphite Output |
199
+ |-----------|------|----------|-----------------|
200
+ | Paraphrased contradiction | Semantic | CONFLICTED | CONFLICTED |
201
+ | Numeric mismatch (10× error) | Factual | CONFLICTED | CONFLICTED |
202
+ | Temporal mismatch (stale CEO) | Temporal | CONFLICTED | CONFLICTED |
203
+ | Unsupported revenue prediction | Reasoning Leap | CONCLUSION_JUMP | CONCLUSION_JUMP |
204
+
205
+ - Claim-level verdict: correct in 3/3 factual cases
206
+ - Argument-level verdict: correct in 1/1 reasoning case
207
+ - These cases are intended as regression checks for key failure modes, not as a broad accuracy benchmark.
208
+
209
+ > *This is a representative snapshot, not a comprehensive benchmark. See `evals/` for the full test suite and `examples/` for runnable demos. A larger-scale evaluation suite (100+ memos) is on the roadmap.*
210
+
211
+ ---
212
+
213
+ ## Core Primitives
214
+
215
+ | Object | What it does |
216
+ |-----------|-------------|
217
+ | `VerificationReport` | Top-level summary of the entire review, ready for product UI integrations |
218
+ | `Verdict` | Claim-level judgment (`SUPPORTED`, `CONFLICTED`, `INSUFFICIENT`) with structured rationale |
219
+ | `ArgumentVerdict` | Argument-level judgment (`GROUNDED`, `CONCLUSION_JUMP`, `OVERSTATED`) |
220
+ | `ClaimStore` | Persistent verification memory — deduplicates claims, merges evidence, and preserves review history across runs |
221
+
222
+ ---
223
+
224
+ ## Reference Applications
225
+
226
+ Graphite is designed as the verification engine for high-stakes workflows across multiple domains:
227
+
228
+ - **Compliance & Legal Review**: Checking internal policy documents or marketing copy against regulatory guidelines.
229
+ - **Healthcare & Scientific Fact-checking**: Cross-referencing generated medical or scientific summaries against peer-reviewed journals.
230
+ - **Investment & Research QA**: Verifying AI-generated analyst memos against SEC filings or earnings call transcripts.
231
+
232
+ *(See `examples/quickstart_verification/` for end-to-end verification, `examples/evidence_accumulation/` for stateful memory, and `examples/lineage_override_demo/` for analyst override workflows.)*
233
+
234
+ ---
235
+
236
+ ## Optional extras
237
+
238
+ **Core** (always included): `networkx` + `pydantic`
239
+
240
+ ```bash
241
+ pip install -e ".[llm]" # LLM support (OpenAI-compatible providers)
242
+ pip install -e ".[all]" # Everything
243
+ ```
244
+
245
+ > Set `GEMINI_API_KEY` to get started. To use other providers, set `OPENAI_API_KEY` and `OPENAI_BASE_URL`.
246
+
247
+ ---
248
+
249
+ ## License
250
+
251
+ Apache-2.0 — see [LICENSE](LICENSE).
@@ -0,0 +1,213 @@
1
+ <div align="center">
2
+ <h1>⛏️ Graphite</h1>
3
+ <p><strong>Claim verification engine for AI agent outputs.</strong></p>
4
+ <p><em>LLMs judge. Graphs remember.</em></p>
5
+ <p>Graphite extracts claims from agent-generated text, retrieves evidence, verifies support and contradiction, flags unsupported reasoning leaps, and stores every verdict with a full provenance trail — building a verification memory that gets stronger with every review.</p>
6
+ <p>
7
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache--2.0-blue.svg" alt="License"></a>
8
+ <a href="https://python.org"><img src="https://img.shields.io/badge/python-3.10%2B-brightgreen.svg" alt="Python"></a>
9
+ </p>
10
+ </div>
11
+
12
+ > ⚠️ **v0.3.x — Experimental**. Usable and tested, but API may change before 1.0. Pin your version.
13
+
14
+ ---
15
+
16
+ ### How it works
17
+
18
+ Graphite turns raw agent output into a structured verification report.
19
+
20
+ ```python
21
+ from graphite.pipeline import verify_agent_output
22
+
23
+ report = verify_agent_output(
24
+ text=agent_memo_markdown,
25
+ corpus=sec_filings_corpus,
26
+ model="gemini-2.5-flash" # any OpenAI-compatible model works
27
+ )
28
+
29
+ print(f"Supported: {report.supported_count} | Conflicted: {report.conflicted_count}")
30
+ print(f"Requires Human Review: {len(report.risky_claim_ids)} claims")
31
+ ```
32
+
33
+ This single API wraps a 5-step pipeline:
34
+
35
+ 1. **Extract**: Parses the document into atomic claims using LLMs.
36
+ 2. **Retrieve**: Finds candidate evidence spans across the corpus for each claim.
37
+ 3. **Verify**: Judges claims against the retrieved spans (Supported, Conflicted, Insufficient).
38
+ 4. **Analyze**: Flags argument-level reasoning leaps (`CONCLUSION_JUMP`).
39
+ 5. **Report**: Aggregates the findings into a `VerificationReport` with structured rationale, review flags, and full provenance.
40
+
41
+ ---
42
+
43
+ ### Handling Logic Leaps & Human Review
44
+
45
+ Downstream UI and review workflows can be built directly on top of the structured output.
46
+
47
+ ```python
48
+ from graphite.claim import ArgumentVerdictEnum
49
+
50
+ # Check for logic leaps (Argument-level verification)
51
+ for argument in report.argument_verdicts:
52
+ if argument.verdict == ArgumentVerdictEnum.CONCLUSION_JUMP:
53
+ print(f"⚠️ LOGIC LEAP: {argument.text}")
54
+
55
+ # Route high-risk factual claims to a Human-in-the-loop review queue
56
+ for claim_id in report.risky_claim_ids:
57
+ verdict = report.get_verdict(claim_id)
58
+ if verdict.needs_human_review:
59
+ print(f"🚨 REVIEW NEEDED: {verdict.claim_text}")
60
+ print(f" Reason: {verdict.rationale.missing_evidence_reason or verdict.rationale.contradiction_type}")
61
+ ```
62
+
63
+ ---
64
+
65
+ ### Quickstart
66
+
67
+ ```bash
68
+ pip install "graphite-engine[llm]"
69
+ export GEMINI_API_KEY="your-api-key-here"
70
+ python examples/quickstart_verification/run.py
71
+ ```
72
+
73
+ Or from source:
74
+
75
+ ```bash
76
+ git clone https://github.com/minjun1/graphite-core.git
77
+ cd graphite-core
78
+ pip install -e ".[llm]"
79
+ export GEMINI_API_KEY="your-api-key-here"
80
+ python examples/quickstart_verification/run.py
81
+ ```
82
+
83
+ Graphite defaults to Gemini via the OpenAI-compatible endpoint, so any OpenAI-compatible provider also works — including local models via Ollama or vLLM, or hosted endpoints like Together and Groq. Set `OPENAI_API_KEY` and `OPENAI_BASE_URL` to point at any compatible endpoint.
84
+
85
+ ---
86
+
87
+ ## Why a Graph?
88
+
89
+ Most verification tools run once and forget. Graphite anchors every judgment into a persistent graph — turning disposable LLM outputs into a living verification memory.
90
+
91
+ **Claims are first-class objects.** The same assertion can be identified, revisited, and re-evaluated across documents and time — not lost in prompt logs.
92
+
93
+ **Evidence accumulates, not overwrites.** When a second source confirms (or contradicts) a claim, Graphite appends the new evidence to the existing node instead of starting from scratch.
94
+
95
+ **Review history becomes lineage.** AI verdict → analyst override → re-evaluation with new data — every step is recorded as a relationship in the graph, not a flat log entry.
96
+
97
+ **Cross-document deduplication.** When the same claim appears in TSMC's 10-K and Nvidia's 10-K, Graphite recognizes it as one canonical claim backed by two independent sources.
98
+
99
+ **Reasoning structure, not just fact-checking.** Claims don't exist in isolation. Graphite can represent claim-to-conclusion relationships, enabling checks like `CONCLUSION_JUMP` when the logical link between premises and conclusion is unsupported.
100
+
101
+ ---
102
+
103
+ ## Stateful Verification Memory
104
+
105
+ Unlike stateless evaluators that produce a score and discard context, Graphite's `ClaimStore` builds a persistent fact base that strengthens over time.
106
+
107
+ ```
108
+ Run 1: Extract "TSMC supplies CoWoS to Nvidia" from TSMC 10-K
109
+ → 1 evidence source recorded
110
+
111
+ Run 2: Same claim found in Nvidia 10-K
112
+ → evidence accumulates → 2 independent sources
113
+
114
+ Run 3: Exact duplicate from same source
115
+ → deduplicated, no change
116
+
117
+ Run 4: Related claim "Nvidia depends on TSMC" extracted
118
+ → cross-claim linkage via shared entities
119
+ ```
120
+
121
+ What this looks like as a graph:
122
+
123
+ ```
124
+ Claim: "TSMC supplies CoWoS to Nvidia"
125
+ ├── supported_by → TSMC 10-K (cited span)
126
+ ├── supported_by → Nvidia 10-K (cited span)
127
+ ├── reviewed_as → SUPPORTED (model verdict)
128
+ └── contributes_to → "Nvidia depends on TSMC"
129
+ ```
130
+
131
+ Each claim is a deduplicated node. Evidence merges across extraction runs. Analyst overrides persist. The result is a verification memory where repeated reviews compound — not repeat.
132
+
133
+ *Most verification tools forget. Graphite remembers — and gets stronger with every review.*
134
+
135
+ *(See `examples/evidence_accumulation/` for a runnable demo — no API keys required.)*
136
+
137
+ ---
138
+
139
+ ## Graphite vs. Existing Tools
140
+
141
+ *Evaluators grade your prompts. Graphite audits your agent's claims — and remembers every verdict.*
142
+
143
+ These tools solve adjacent but different problems:
144
+
145
+ | Dimension | Ragas / TruLens / DeepEval | Graphite |
146
+ |-----------|---------------------------|----------|
147
+ | **Purpose** | Prompt/model evaluation (CI/CD) | Production output verification (runtime) |
148
+ | **State** | Stateless — each run is independent | Stateful — evidence accumulates across runs |
149
+ | **Output** | Scores (faithfulness, relevance) | Structured `VerificationReport` with provenance |
150
+ | **Logic Leaps** | Not addressed | `CONCLUSION_JUMP` / `OVERSTATED` detection |
151
+ | **Human Review** | Manual review of score dashboards | `needs_human_review` routing with analyst override |
152
+ | **Audit Trail** | Execution logs | Every verdict links to exact `cited_span` with full lineage |
153
+
154
+ ---
155
+
156
+ ## Evaluation Snapshot
157
+
158
+ Representative verification cases from our golden test suite (current snapshot run on GPT-4o; `evals/verify_eval.py`):
159
+
160
+ | Test Case | Type | Expected | Graphite Output |
161
+ |-----------|------|----------|-----------------|
162
+ | Paraphrased contradiction | Semantic | CONFLICTED | CONFLICTED |
163
+ | Numeric mismatch (10× error) | Factual | CONFLICTED | CONFLICTED |
164
+ | Temporal mismatch (stale CEO) | Temporal | CONFLICTED | CONFLICTED |
165
+ | Unsupported revenue prediction | Reasoning Leap | CONCLUSION_JUMP | CONCLUSION_JUMP |
166
+
167
+ - Claim-level verdict: correct in 3/3 factual cases
168
+ - Argument-level verdict: correct in 1/1 reasoning case
169
+ - These cases are intended as regression checks for key failure modes, not as a broad accuracy benchmark.
170
+
171
+ > *This is a representative snapshot, not a comprehensive benchmark. See `evals/` for the full test suite and `examples/` for runnable demos. A larger-scale evaluation suite (100+ memos) is on the roadmap.*
172
+
173
+ ---
174
+
175
+ ## Core Primitives
176
+
177
+ | Object | What it does |
178
+ |-----------|-------------|
179
+ | `VerificationReport` | Top-level summary of the entire review, ready for product UI integrations |
180
+ | `Verdict` | Claim-level judgment (`SUPPORTED`, `CONFLICTED`, `INSUFFICIENT`) with structured rationale |
181
+ | `ArgumentVerdict` | Argument-level judgment (`GROUNDED`, `CONCLUSION_JUMP`, `OVERSTATED`) |
182
+ | `ClaimStore` | Persistent verification memory — deduplicates claims, merges evidence, and preserves review history across runs |
183
+
184
+ ---
185
+
186
+ ## Reference Applications
187
+
188
+ Graphite is designed as the verification engine for high-stakes workflows across multiple domains:
189
+
190
+ - **Compliance & Legal Review**: Checking internal policy documents or marketing copy against regulatory guidelines.
191
+ - **Healthcare & Scientific Fact-checking**: Cross-referencing generated medical or scientific summaries against peer-reviewed journals.
192
+ - **Investment & Research QA**: Verifying AI-generated analyst memos against SEC filings or earnings call transcripts.
193
+
194
+ *(See `examples/quickstart_verification/` for end-to-end verification, `examples/evidence_accumulation/` for stateful memory, and `examples/lineage_override_demo/` for analyst override workflows.)*
195
+
196
+ ---
197
+
198
+ ## Optional extras
199
+
200
+ **Core** (always included): `networkx` + `pydantic`
201
+
202
+ ```bash
203
+ pip install -e ".[llm]" # LLM support (OpenAI-compatible providers)
204
+ pip install -e ".[all]" # Everything
205
+ ```
206
+
207
+ > Set `GEMINI_API_KEY` to get started. To use other providers, set `OPENAI_API_KEY` and `OPENAI_BASE_URL`.
208
+
209
+ ---
210
+
211
+ ## License
212
+
213
+ Apache-2.0 — see [LICENSE](LICENSE).
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "graphite-engine"
7
- version = "0.3.0"
8
- description = "Open-source claim verification engine for high-stakes decisions"
7
+ version = "0.3.2"
8
+ description = "Open-source claim verification engine for agent-generated assertions in high-stakes domains"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = {text = "Apache-2.0"}
@@ -28,24 +28,23 @@ dependencies = [
28
28
  ]
29
29
 
30
30
  [project.optional-dependencies]
31
- neo4j = ["neo4j>=5.0"]
32
- llm = ["google-genai>=1.0"]
33
- pdf = ["pdfplumber>=0.9"]
31
+ llm = ["openai>=1.0", "google-genai>=1.0"]
34
32
  geo = ["rasterio>=1.3", "numpy>=1.24"]
35
- all = ["graphite-engine[neo4j,llm,pdf,geo]"]
33
+ all = ["graphite-engine[llm,geo]"]
36
34
  dev = [
37
35
  "pytest>=8.0",
38
36
  "python-dotenv>=1.0",
39
37
  ]
40
38
 
41
39
  [project.urls]
42
- Homepage = "https://github.com/graf-research/graphite"
43
- Documentation = "https://github.com/graf-research/graphite#readme"
44
- Repository = "https://github.com/graf-research/graphite"
45
- Issues = "https://github.com/graf-research/graphite/issues"
40
+ Homepage = "https://github.com/minjun1/graphite-core"
41
+ Documentation = "https://github.com/minjun1/graphite-core#readme"
42
+ Repository = "https://github.com/minjun1/graphite-core"
43
+ Issues = "https://github.com/minjun1/graphite-core/issues"
46
44
 
47
45
  [tool.setuptools.packages.find]
48
46
  where = ["src"]
47
+ exclude = ["graphite._archive", "graphite._archive.*"]
49
48
 
50
49
  [tool.setuptools.package-data]
51
50
  graphite = ["py.typed"]
@@ -0,0 +1,53 @@
1
+ """
2
+ Graphite — Open-source claim verification engine for agent-generated
3
+ assertions in high-stakes domains.
4
+
5
+ Core primitives:
6
+ - Claim: the atomic unit of trust — structured assertion with provenance
7
+ - ClaimStore: evidence-accumulating registry (claims dedupe, evidence merges)
8
+ - Provenance: first-class evidence source (document, quote, confidence)
9
+ - ConfidenceScorer: explainable confidence scoring with named factors
10
+
11
+ Pipeline:
12
+ Agent/Extractor → Claim[] → ClaimStore (accumulate) → Verify
13
+ """
14
+
15
+ # ── Core schemas ──
16
+ from .schemas import ExtractedEdge, NodeRef, Provenance, InferenceBasis, ExtractionError
17
+ from .enums import (
18
+ EdgeType,
19
+ NodeType,
20
+ SourceType,
21
+ ConfidenceLevel,
22
+ AssertionMode,
23
+ EvidenceType,
24
+ )
25
+ from .evidence import EvidencePacket, EvidenceData
26
+
27
+ # ── Trust engine primitives ──
28
+ from .claim import (
29
+ Claim,
30
+ ClaimType,
31
+ ClaimStatus,
32
+ ClaimGranularity,
33
+ ReviewState,
34
+ ClaimOrigin,
35
+ )
36
+ from .claim import ConfidenceFactor, ConfidenceResult
37
+ from .claim_store import ClaimStore
38
+ from .confidence import ConfidenceScorer
39
+
40
+ # ── Domain plugin contracts ──
41
+ from .domain import (
42
+ BaseFetcher,
43
+ BaseExtractor,
44
+ BasePipeline,
45
+ DocumentContext,
46
+ DomainSpec,
47
+ )
48
+ from .domain import register_domain, get_domain, list_domains
49
+
50
+ # ── Rules ──
51
+ from .rules import BaseRuleEngine, RuleResult, ScoreBreakdown
52
+
53
+ __version__ = "0.3.2"
@@ -12,6 +12,7 @@ AlphaEarth Foundations:
12
12
  - Earth Engine dataset: GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL
13
13
  - GCS bucket: gs://alphaearth_foundations (Requester Pays)
14
14
  """
15
+
15
16
  import json
16
17
  import os
17
18
  from pathlib import Path
@@ -122,7 +123,9 @@ class AlphaEarthAdapter:
122
123
  Returns:
123
124
  numpy array of shape (64,)
124
125
  """
125
- cache_key = node_id or f"bbox_{bbox[0]:.4f}_{bbox[1]:.4f}_{bbox[2]:.4f}_{bbox[3]:.4f}"
126
+ cache_key = (
127
+ node_id or f"bbox_{bbox[0]:.4f}_{bbox[1]:.4f}_{bbox[2]:.4f}_{bbox[3]:.4f}"
128
+ )
126
129
 
127
130
  cached = self._read_cache(cache_key, year)
128
131
  if cached is not None:
@@ -12,6 +12,7 @@ WeatherNext 2:
12
12
  - Access: EE/BigQuery (requires data request form)
13
13
  - Note: Experimental dataset, not validated for real-world use
14
14
  """
15
+
15
16
  import json
16
17
  from pathlib import Path
17
18
  from typing import Any, Dict, List, Optional
@@ -4,6 +4,7 @@ graphite/cache.py — Extraction result caching.
4
4
  5-part cache key prevents pollution when anything changes:
5
5
  (source_id, content_hash, extractor_version, prompt_version, model_name)
6
6
  """
7
+
7
8
  import hashlib
8
9
  import json
9
10
  import os