llm2graph 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm2graph-0.3.0/PKG-INFO +270 -0
- llm2graph-0.3.0/README.md +248 -0
- llm2graph-0.3.0/pyproject.toml +30 -0
- llm2graph-0.3.0/setup.cfg +4 -0
- llm2graph-0.3.0/src/llm2graph/__init__.py +5 -0
- llm2graph-0.3.0/src/llm2graph/alias.py +22 -0
- llm2graph-0.3.0/src/llm2graph/cli.py +70 -0
- llm2graph-0.3.0/src/llm2graph/evaluation.py +66 -0
- llm2graph-0.3.0/src/llm2graph/graph_builder.py +131 -0
- llm2graph-0.3.0/src/llm2graph/llm_client.py +111 -0
- llm2graph-0.3.0/src/llm2graph/metrics.py +65 -0
- llm2graph-0.3.0/src/llm2graph/query_generator.py +107 -0
- llm2graph-0.3.0/src/llm2graph/relevance.py +18 -0
- llm2graph-0.3.0/src/llm2graph/settings.py +10 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/PKG-INFO +270 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/SOURCES.txt +18 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/dependency_links.txt +1 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/entry_points.txt +2 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/requires.txt +15 -0
- llm2graph-0.3.0/src/llm2graph.egg-info/top_level.txt +1 -0
llm2graph-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llm2graph
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: LLM2Graph: Dynamic Knowledge Graph Construction via LLM-only elicitation
|
|
5
|
+
Author: Raj Sanjay Shah
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: pydantic>=2.7
|
|
10
|
+
Requires-Dist: tqdm>=4.66
|
|
11
|
+
Requires-Dist: networkx>=3.2
|
|
12
|
+
Requires-Dist: tenacity>=8.2
|
|
13
|
+
Requires-Dist: typer>=0.12
|
|
14
|
+
Requires-Dist: openai>=1.37
|
|
15
|
+
Provides-Extra: gemini
|
|
16
|
+
Requires-Dist: google-generativeai>=0.7; extra == "gemini"
|
|
17
|
+
Provides-Extra: hf-local
|
|
18
|
+
Requires-Dist: transformers>=4.44; extra == "hf-local"
|
|
19
|
+
Requires-Dist: accelerate>=0.33; extra == "hf-local"
|
|
20
|
+
Requires-Dist: sentencepiece>=0.2; extra == "hf-local"
|
|
21
|
+
Requires-Dist: einops>=0.7; extra == "hf-local"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# LLM2Graph - Dynamic Knowledge Graph Construction & Evaluation
|
|
25
|
+
|
|
26
|
+
This package implements the graph-based methodology from the COLM 2025 paper:
|
|
27
|
+
|
|
28
|
+
> **The Unlearning Mirage: A Dynamic Framework for Evaluating LLM Unlearning**
|
|
29
|
+
|
|
30
|
+
It provides an **LLM-only** pipeline (no heuristic fallbacks) for:
|
|
31
|
+
1. **Graph construction** via entity-centric elicitation and triple extraction.
|
|
32
|
+
2. **Query generation** with **multi-hop**, **alias-perturbed**, **paraphrased** questions, and optional **distractors**.
|
|
33
|
+
3. **Evaluation** of **pre** vs **post** (unlearned) models, including a **residual knowledge** analysis.
|
|
34
|
+
|
|
35
|
+
If any step returns an unexpected format, the package **raises** `LLMError`.
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Quick Start (End-to-End)
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# 0) Install (choose providers you need)
|
|
44
|
+
pip install -e .
|
|
45
|
+
# Optionals:
|
|
46
|
+
pip install -e '.[gemini]' # Gemini support
|
|
47
|
+
pip install -e '.[hf-local]' # HuggingFace local LLMs
|
|
48
|
+
|
|
49
|
+
# 1) Build a graph from an entity
|
|
50
|
+
export OPENAI_API_KEY=sk-...
|
|
51
|
+
llm2graph entity --seed "Stephen King" --max-depth 2 \
|
|
52
|
+
--provider openai --model gpt-4o-mini-2024-07-18 --out graph.json
|
|
53
|
+
|
|
54
|
+
# 2) Generate multi-hop queries with alias/paraphrase perturbations + distractors
|
|
55
|
+
llm2graph gen-queries --graph graph.json --target "Stephen King" \
|
|
56
|
+
--hops 2 --num-paths 50 --aliases 3 --paraphrases 2 --distractors 2 \
|
|
57
|
+
--provider openai --model gpt-4o-mini-2024-07-18 --out queries.json
|
|
58
|
+
|
|
59
|
+
# 3) Evaluate pre vs post models (optionally use a judge model for equivalence)
|
|
60
|
+
llm2graph eval --queries queries.json \
|
|
61
|
+
--pre-provider openai --pre-model gpt-4o-mini-2024-07-18 \
|
|
62
|
+
--post-provider openai --post-model gpt-4o-mini-2024-07-18 \
|
|
63
|
+
--judge-provider openai --judge-model gpt-4o-mini-2024-07-18 \
|
|
64
|
+
--out eval_report.json
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The **evaluation report** includes accuracies by bucket (single/multi-hop, alias, paraphrase) and a **residual_rate** capturing when gold phrasing fails but a perturbation still succeeds.
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Installation & Providers
|
|
73
|
+
|
|
74
|
+
### Base
|
|
75
|
+
```bash
|
|
76
|
+
pip install -e .
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### OpenAI (default)
|
|
80
|
+
```bash
|
|
81
|
+
export OPENAI_API_KEY=sk-... # required for provider=openai
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Gemini
|
|
85
|
+
```bash
|
|
86
|
+
pip install -e '.[gemini]'
|
|
87
|
+
export GEMINI_API_KEY=... # required for provider=gemini
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Local HuggingFace
|
|
91
|
+
```bash
|
|
92
|
+
pip install -e '.[hf-local]'
|
|
93
|
+
# Ensure PyTorch is installed and you have a compatible GPU (recommended).
|
|
94
|
+
# Example model:
|
|
95
|
+
llm2graph entity --seed "Ada Lovelace" --provider hf-local \
|
|
96
|
+
--model mistralai/Mistral-7B-Instruct-v0.3 --max-depth 1 --out graph.json
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
All providers share the same strict prompting/validation; non-conforming outputs raise `LLMError`.
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## 1) Graph Construction (Entity --> Graph)
|
|
105
|
+
|
|
106
|
+
**Command**
|
|
107
|
+
```bash
|
|
108
|
+
llm2graph entity \
|
|
109
|
+
--seed "Stephen King" \
|
|
110
|
+
--max-depth 2 \
|
|
111
|
+
--provider openai \
|
|
112
|
+
--model gpt-4o-mini-2024-07-18 \
|
|
113
|
+
--out graph.json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**What happens**
|
|
117
|
+
- **Elicitation**: LLM writes a compact factual paragraph about the node.
|
|
118
|
+
- **Triple extraction**: LLM returns strictly formatted triples: `(subject ; relation ; object)`.
|
|
119
|
+
- **Strict checks**: subject must equal the current node; malformed lines raise.
|
|
120
|
+
- **Expansion (BFS)**: Adds objects as next-depth nodes.
|
|
121
|
+
|
|
122
|
+
**Advanced (programmatic kwargs in `GraphBuilder`)**
|
|
123
|
+
- `use_relevance: bool` - LLM-scored 0-10; below threshold filtered.
|
|
124
|
+
- `relevance_threshold: float` - default 3.0.
|
|
125
|
+
- `decay: float in [0.1, 1.0]` - limits breadth as depth grows.
|
|
126
|
+
- `max_nodes_per_depth: Optional[int]` - hard cap per depth.
|
|
127
|
+
- `alias_merge: bool` - LLM-judged canonicalization of new nodes (YES/NO).
|
|
128
|
+
|
|
129
|
+
**Output format (`graph.json`)**
|
|
130
|
+
```jsonc
|
|
131
|
+
{
|
|
132
|
+
"seed": "Stephen King",
|
|
133
|
+
"nodes": ["Stephen King", "The Shining", "Maine", "..."],
|
|
134
|
+
"edges": [
|
|
135
|
+
{"subject": "Stephen King", "relation": "wrote", "object": "The Shining"},
|
|
136
|
+
{"subject": "Stephen King", "relation": "lives in", "object": "Maine"}
|
|
137
|
+
]
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 2) Query Generation (Multi-hop, Aliases, Paraphrases, Distractors)
|
|
145
|
+
|
|
146
|
+
**Command**
|
|
147
|
+
```bash
|
|
148
|
+
llm2graph gen-queries \
|
|
149
|
+
--graph graph.json \
|
|
150
|
+
--target "Stephen King" \
|
|
151
|
+
--hops 2 \
|
|
152
|
+
--num-paths 50 \
|
|
153
|
+
--aliases 3 \
|
|
154
|
+
--paraphrases 2 \
|
|
155
|
+
--distractors 2 \
|
|
156
|
+
--provider openai \
|
|
157
|
+
--model gpt-4o-mini-2024-07-18 \
|
|
158
|
+
--out queries.json
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**What happens**
|
|
162
|
+
- Samples `--hops`-length paths from the graph.
|
|
163
|
+
- Synthesizes a **single** question per path; the final node is the gold answer.
|
|
164
|
+
- Generates **paraphrases** and **alias-perturbed** variants.
|
|
165
|
+
- Optionally generates **distractors**.
|
|
166
|
+
|
|
167
|
+
**Output (`queries.json`)**
|
|
168
|
+
```jsonc
|
|
169
|
+
{
|
|
170
|
+
"meta": {"hops": 2, "num_paths": 50, "aliases": 3, "paraphrases": 2, "distractors": 2},
|
|
171
|
+
"queries": [{
|
|
172
|
+
"path": [{"s": "A", "r": "rel1", "o": "B"}, {"s": "B", "r": "rel2", "o": "C"}],
|
|
173
|
+
"q_gold": "Which work by the 'King of Horror' features ...?",
|
|
174
|
+
"q_variants": ["... paraphrase1", "... paraphrase2"],
|
|
175
|
+
"q_alias_variants": ["... alias-perturbed phrasing ..."],
|
|
176
|
+
"answer": "C",
|
|
177
|
+
"distractors": ["X","Y"]
|
|
178
|
+
}]
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
**Difficulty control**
|
|
183
|
+
- **Hop length** (`--hops`) raises reasoning depth.
|
|
184
|
+
- **Distractors** increase choice difficulty.
|
|
185
|
+
- **Aliases/Paraphrases** stress alias-robustness and surface-form robustness.
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 3) Evaluation (Pre vs Post, with Residual Knowledge)
|
|
191
|
+
|
|
192
|
+
**Command**
|
|
193
|
+
```bash
|
|
194
|
+
llm2graph eval \
|
|
195
|
+
--queries queries.json \
|
|
196
|
+
--pre-provider openai --pre-model gpt-4o-mini-2024-07-18 \
|
|
197
|
+
--post-provider openai --post-model gpt-4o-mini-2024-07-18 \
|
|
198
|
+
--judge-provider openai --judge-model gpt-4o-mini-2024-07-18 \
|
|
199
|
+
--out eval_report.json
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
**What happens**
|
|
203
|
+
- Asks **pre** and **post** models the gold question.
|
|
204
|
+
- Asks the **post** model every variant (paraphrase/alias).
|
|
205
|
+
- If `judge` is provided, equivalence is decided by strict `"YES"/"NO"` judgments; otherwise exact string equality is used.
|
|
206
|
+
|
|
207
|
+
**Residual Knowledge (paper-aligned)**
|
|
208
|
+
- An item is marked **residual** if **gold** is incorrect **post**, **but** any alias/paraphrase variant is correct.
|
|
209
|
+
- Summarized via `residual_rate` and `residual_count`.
|
|
210
|
+
|
|
211
|
+
**Output (`eval_report.json`)**
|
|
212
|
+
```jsonc
|
|
213
|
+
{
|
|
214
|
+
"summary": {
|
|
215
|
+
"all": {"total": N, "correct": k, "accuracy": 0.xx},
|
|
216
|
+
"single_hop": {"total": ..., ...},
|
|
217
|
+
"multi_hop": {"total": ..., ...},
|
|
218
|
+
"alias": {"total": ..., ...},
|
|
219
|
+
"paraphrase": {"total": ..., ...},
|
|
220
|
+
"residual_rate": 0.xx,
|
|
221
|
+
"residual_count": M,
|
|
222
|
+
"num_items": N_items
|
|
223
|
+
},
|
|
224
|
+
"items": [
|
|
225
|
+
{
|
|
226
|
+
"path": [...],
|
|
227
|
+
"predictions": [
|
|
228
|
+
{"variant": "gold", "type": "gold", "pre": "…", "post": "…", "correct": true/false},
|
|
229
|
+
{"variant": "paraphrase", "type": "paraphrase", "pre": null, "post": "…", "correct": ...},
|
|
230
|
+
{"variant": "alias", "type": "alias", "pre": null, "post": "…", "correct": ...}
|
|
231
|
+
],
|
|
232
|
+
"residual_flags": {
|
|
233
|
+
"residual": true/false,
|
|
234
|
+
"gold_correct": false,
|
|
235
|
+
"alias_any": true/false,
|
|
236
|
+
"para_any": true/false
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
]
|
|
240
|
+
}
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## Implementation Notes
|
|
247
|
+
|
|
248
|
+
- **Strict parsing**: Triple lines must be exactly `(subject ; relation ; object)`; subject must equal the current node.
|
|
249
|
+
- **Alias canonicalization**: Node merging uses `canonical_same(a,b)` --> strict `"YES"/"NO"` from an LLM.
|
|
250
|
+
- **Relevance scoring**: 0-10 numeric, LLM-only; thresholded filtering (optional).
|
|
251
|
+
- **HF local chat templates**: If available, we use `.apply_chat_template`; else a minimal structured prompt is used.
|
|
252
|
+
- **No heuristic fallbacks**: Any format drift raises `LLMError`.
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Troubleshooting
|
|
258
|
+
|
|
259
|
+
- **LLMError**: The model did not follow the strict format. Retry with a different model or lower temperature.
|
|
260
|
+
- **Model access**: Ensure `OPENAI_API_KEY`/`GEMINI_API_KEY` is set; confirm the `--model` exists for that provider.
|
|
261
|
+
- **HF OOM**: Choose a smaller HF repo; reduce generation tokens; consider 4/8-bit loading (extend loader as needed).
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## Citation
|
|
267
|
+
|
|
268
|
+
If you use this package, please cite:
|
|
269
|
+
|
|
270
|
+
Shah, Raj Sanjay, Jing Huang, Keerthiram Murugesan, Nathalie Baracaldo, and Diyi Yang. *The Unlearning Mirage: A Dynamic Framework for Evaluating LLM Unlearning.* Second Conference on Language Modeling. 2025.
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
|
|
2
|
+
# LLM2Graph - Dynamic Knowledge Graph Construction & Evaluation
|
|
3
|
+
|
|
4
|
+
This package implements the graph-based methodology from the COLM 2025 paper:
|
|
5
|
+
|
|
6
|
+
> **The Unlearning Mirage: A Dynamic Framework for Evaluating LLM Unlearning**
|
|
7
|
+
|
|
8
|
+
It provides an **LLM-only** pipeline (no heuristic fallbacks) for:
|
|
9
|
+
1. **Graph construction** via entity-centric elicitation and triple extraction.
|
|
10
|
+
2. **Query generation** with **multi-hop**, **alias-perturbed**, **paraphrased** questions, and optional **distractors**.
|
|
11
|
+
3. **Evaluation** of **pre** vs **post** (unlearned) models, including a **residual knowledge** analysis.
|
|
12
|
+
|
|
13
|
+
If any step returns an unexpected format, the package **raises** `LLMError`.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Quick Start (End-to-End)
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# 0) Install (choose providers you need)
|
|
22
|
+
pip install -e .
|
|
23
|
+
# Optionals:
|
|
24
|
+
pip install -e '.[gemini]' # Gemini support
|
|
25
|
+
pip install -e '.[hf-local]' # HuggingFace local LLMs
|
|
26
|
+
|
|
27
|
+
# 1) Build a graph from an entity
|
|
28
|
+
export OPENAI_API_KEY=sk-...
|
|
29
|
+
llm2graph entity --seed "Stephen King" --max-depth 2 \
|
|
30
|
+
--provider openai --model gpt-4o-mini-2024-07-18 --out graph.json
|
|
31
|
+
|
|
32
|
+
# 2) Generate multi-hop queries with alias/paraphrase perturbations + distractors
|
|
33
|
+
llm2graph gen-queries --graph graph.json --target "Stephen King" \
|
|
34
|
+
--hops 2 --num-paths 50 --aliases 3 --paraphrases 2 --distractors 2 \
|
|
35
|
+
--provider openai --model gpt-4o-mini-2024-07-18 --out queries.json
|
|
36
|
+
|
|
37
|
+
# 3) Evaluate pre vs post models (optionally use a judge model for equivalence)
|
|
38
|
+
llm2graph eval --queries queries.json \
|
|
39
|
+
--pre-provider openai --pre-model gpt-4o-mini-2024-07-18 \
|
|
40
|
+
--post-provider openai --post-model gpt-4o-mini-2024-07-18 \
|
|
41
|
+
--judge-provider openai --judge-model gpt-4o-mini-2024-07-18 \
|
|
42
|
+
--out eval_report.json
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The **evaluation report** includes accuracies by bucket (single/multi-hop, alias, paraphrase) and a **residual_rate** capturing when gold phrasing fails but a perturbation still succeeds.
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Installation & Providers
|
|
51
|
+
|
|
52
|
+
### Base
|
|
53
|
+
```bash
|
|
54
|
+
pip install -e .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### OpenAI (default)
|
|
58
|
+
```bash
|
|
59
|
+
export OPENAI_API_KEY=sk-... # required for provider=openai
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Gemini
|
|
63
|
+
```bash
|
|
64
|
+
pip install -e '.[gemini]'
|
|
65
|
+
export GEMINI_API_KEY=... # required for provider=gemini
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Local HuggingFace
|
|
69
|
+
```bash
|
|
70
|
+
pip install -e '.[hf-local]'
|
|
71
|
+
# Ensure PyTorch is installed and you have a compatible GPU (recommended).
|
|
72
|
+
# Example model:
|
|
73
|
+
llm2graph entity --seed "Ada Lovelace" --provider hf-local \
|
|
74
|
+
--model mistralai/Mistral-7B-Instruct-v0.3 --max-depth 1 --out graph.json
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
All providers share the same strict prompting/validation; non-conforming outputs raise `LLMError`.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## 1) Graph Construction (Entity --> Graph)
|
|
83
|
+
|
|
84
|
+
**Command**
|
|
85
|
+
```bash
|
|
86
|
+
llm2graph entity \
|
|
87
|
+
--seed "Stephen King" \
|
|
88
|
+
--max-depth 2 \
|
|
89
|
+
--provider openai \
|
|
90
|
+
--model gpt-4o-mini-2024-07-18 \
|
|
91
|
+
--out graph.json
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**What happens**
|
|
95
|
+
- **Elicitation**: LLM writes a compact factual paragraph about the node.
|
|
96
|
+
- **Triple extraction**: LLM returns strictly formatted triples: `(subject ; relation ; object)`.
|
|
97
|
+
- **Strict checks**: subject must equal the current node; malformed lines raise.
|
|
98
|
+
- **Expansion (BFS)**: Adds objects as next-depth nodes.
|
|
99
|
+
|
|
100
|
+
**Advanced (programmatic kwargs in `GraphBuilder`)**
|
|
101
|
+
- `use_relevance: bool` - LLM-scored 0-10; below threshold filtered.
|
|
102
|
+
- `relevance_threshold: float` - default 3.0.
|
|
103
|
+
- `decay: float in [0.1, 1.0]` - limits breadth as depth grows.
|
|
104
|
+
- `max_nodes_per_depth: Optional[int]` - hard cap per depth.
|
|
105
|
+
- `alias_merge: bool` - LLM-judged canonicalization of new nodes (YES/NO).
|
|
106
|
+
|
|
107
|
+
**Output format (`graph.json`)**
|
|
108
|
+
```jsonc
|
|
109
|
+
{
|
|
110
|
+
"seed": "Stephen King",
|
|
111
|
+
"nodes": ["Stephen King", "The Shining", "Maine", "..."],
|
|
112
|
+
"edges": [
|
|
113
|
+
{"subject": "Stephen King", "relation": "wrote", "object": "The Shining"},
|
|
114
|
+
{"subject": "Stephen King", "relation": "lives in", "object": "Maine"}
|
|
115
|
+
]
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## 2) Query Generation (Multi-hop, Aliases, Paraphrases, Distractors)
|
|
123
|
+
|
|
124
|
+
**Command**
|
|
125
|
+
```bash
|
|
126
|
+
llm2graph gen-queries \
|
|
127
|
+
--graph graph.json \
|
|
128
|
+
--target "Stephen King" \
|
|
129
|
+
--hops 2 \
|
|
130
|
+
--num-paths 50 \
|
|
131
|
+
--aliases 3 \
|
|
132
|
+
--paraphrases 2 \
|
|
133
|
+
--distractors 2 \
|
|
134
|
+
--provider openai \
|
|
135
|
+
--model gpt-4o-mini-2024-07-18 \
|
|
136
|
+
--out queries.json
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**What happens**
|
|
140
|
+
- Samples `--hops`-length paths from the graph.
|
|
141
|
+
- Synthesizes a **single** question per path; the final node is the gold answer.
|
|
142
|
+
- Generates **paraphrases** and **alias-perturbed** variants.
|
|
143
|
+
- Optionally generates **distractors**.
|
|
144
|
+
|
|
145
|
+
**Output (`queries.json`)**
|
|
146
|
+
```jsonc
|
|
147
|
+
{
|
|
148
|
+
"meta": {"hops": 2, "num_paths": 50, "aliases": 3, "paraphrases": 2, "distractors": 2},
|
|
149
|
+
"queries": [{
|
|
150
|
+
"path": [{"s": "A", "r": "rel1", "o": "B"}, {"s": "B", "r": "rel2", "o": "C"}],
|
|
151
|
+
"q_gold": "Which work by the 'King of Horror' features ...?",
|
|
152
|
+
"q_variants": ["... paraphrase1", "... paraphrase2"],
|
|
153
|
+
"q_alias_variants": ["... alias-perturbed phrasing ..."],
|
|
154
|
+
"answer": "C",
|
|
155
|
+
"distractors": ["X","Y"]
|
|
156
|
+
}]
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Difficulty control**
|
|
161
|
+
- **Hop length** (`--hops`) raises reasoning depth.
|
|
162
|
+
- **Distractors** increase choice difficulty.
|
|
163
|
+
- **Aliases/Paraphrases** stress alias-robustness and surface-form robustness.
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## 3) Evaluation (Pre vs Post, with Residual Knowledge)
|
|
169
|
+
|
|
170
|
+
**Command**
|
|
171
|
+
```bash
|
|
172
|
+
llm2graph eval \
|
|
173
|
+
--queries queries.json \
|
|
174
|
+
--pre-provider openai --pre-model gpt-4o-mini-2024-07-18 \
|
|
175
|
+
--post-provider openai --post-model gpt-4o-mini-2024-07-18 \
|
|
176
|
+
--judge-provider openai --judge-model gpt-4o-mini-2024-07-18 \
|
|
177
|
+
--out eval_report.json
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**What happens**
|
|
181
|
+
- Asks **pre** and **post** models the gold question.
|
|
182
|
+
- Asks the **post** model every variant (paraphrase/alias).
|
|
183
|
+
- If `judge` is provided, equivalence is decided by strict `"YES"/"NO"` judgments; otherwise exact string equality is used.
|
|
184
|
+
|
|
185
|
+
**Residual Knowledge (paper-aligned)**
|
|
186
|
+
- An item is marked **residual** if **gold** is incorrect **post**, **but** any alias/paraphrase variant is correct.
|
|
187
|
+
- Summarized via `residual_rate` and `residual_count`.
|
|
188
|
+
|
|
189
|
+
**Output (`eval_report.json`)**
|
|
190
|
+
```jsonc
|
|
191
|
+
{
|
|
192
|
+
"summary": {
|
|
193
|
+
"all": {"total": N, "correct": k, "accuracy": 0.xx},
|
|
194
|
+
"single_hop": {"total": ..., ...},
|
|
195
|
+
"multi_hop": {"total": ..., ...},
|
|
196
|
+
"alias": {"total": ..., ...},
|
|
197
|
+
"paraphrase": {"total": ..., ...},
|
|
198
|
+
"residual_rate": 0.xx,
|
|
199
|
+
"residual_count": M,
|
|
200
|
+
"num_items": N_items
|
|
201
|
+
},
|
|
202
|
+
"items": [
|
|
203
|
+
{
|
|
204
|
+
"path": [...],
|
|
205
|
+
"predictions": [
|
|
206
|
+
{"variant": "gold", "type": "gold", "pre": "…", "post": "…", "correct": true/false},
|
|
207
|
+
{"variant": "paraphrase", "type": "paraphrase", "pre": null, "post": "…", "correct": ...},
|
|
208
|
+
{"variant": "alias", "type": "alias", "pre": null, "post": "…", "correct": ...}
|
|
209
|
+
],
|
|
210
|
+
"residual_flags": {
|
|
211
|
+
"residual": true/false,
|
|
212
|
+
"gold_correct": false,
|
|
213
|
+
"alias_any": true/false,
|
|
214
|
+
"para_any": true/false
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
]
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Implementation Notes
|
|
225
|
+
|
|
226
|
+
- **Strict parsing**: Triple lines must be exactly `(subject ; relation ; object)`; subject must equal the current node.
|
|
227
|
+
- **Alias canonicalization**: Node merging uses `canonical_same(a,b)` --> strict `"YES"/"NO"` from an LLM.
|
|
228
|
+
- **Relevance scoring**: 0-10 numeric, LLM-only; thresholded filtering (optional).
|
|
229
|
+
- **HF local chat templates**: If available, we use `.apply_chat_template`; else a minimal structured prompt is used.
|
|
230
|
+
- **No heuristic fallbacks**: Any format drift raises `LLMError`.
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Troubleshooting
|
|
236
|
+
|
|
237
|
+
- **LLMError**: The model did not follow the strict format. Retry with a different model or lower temperature.
|
|
238
|
+
- **Model access**: Ensure `OPENAI_API_KEY`/`GEMINI_API_KEY` is set; confirm the `--model` exists for that provider.
|
|
239
|
+
- **HF OOM**: Choose a smaller HF repo; reduce generation tokens; consider 4/8-bit loading (extend loader as needed).
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
## Citation
|
|
245
|
+
|
|
246
|
+
If you use this package, please cite:
|
|
247
|
+
|
|
248
|
+
Shah, Raj Sanjay, Jing Huang, Keerthiram Murugesan, Nathalie Baracaldo, and Diyi Yang. *The Unlearning Mirage: A Dynamic Framework for Evaluating LLM Unlearning.* Second Conference on Language Modeling. 2025.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "llm2graph"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "LLM2Graph: Dynamic Knowledge Graph Construction via LLM-only elicitation"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
authors = [{name = "Raj Sanjay Shah"}]
|
|
12
|
+
license = {text = "MIT"}
|
|
13
|
+
dependencies = [
|
|
14
|
+
"pydantic>=2.7",
|
|
15
|
+
"tqdm>=4.66",
|
|
16
|
+
"networkx>=3.2",
|
|
17
|
+
"tenacity>=8.2",
|
|
18
|
+
"typer>=0.12",
|
|
19
|
+
"openai>=1.37",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.optional-dependencies]
|
|
23
|
+
gemini = ["google-generativeai>=0.7"]
|
|
24
|
+
hf-local = ["transformers>=4.44", "accelerate>=0.33", "sentencepiece>=0.2", "einops>=0.7"]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
llm2graph = "llm2graph.cli:app"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["src"]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .llm_client import LLMClient, LLMError
|
|
2
|
+
|
|
3
|
+
class AliasTools:
|
|
4
|
+
def __init__(self, llm: LLMClient):
|
|
5
|
+
self.llm = llm
|
|
6
|
+
|
|
7
|
+
def canonical_same(self, a: str, b: str) -> bool:
|
|
8
|
+
system = "Answer strictly 'YES' or 'NO'."
|
|
9
|
+
user = f'Are these two names the **same canonical entity**?\nA = "{a}"\nB = "{b}"\nAnswer:'
|
|
10
|
+
out = self.llm.chat(system, user).strip().upper()
|
|
11
|
+
if out not in {"YES", "NO"}:
|
|
12
|
+
raise LLMError(f"Alias check returned invalid token: {out!r}")
|
|
13
|
+
return out == "YES"
|
|
14
|
+
|
|
15
|
+
def generate_aliases(self, name: str, k: int) -> list[str]:
|
|
16
|
+
system = "Output exactly k alternative surface forms (aliases) for the given entity; one per line; no numbering."
|
|
17
|
+
user = f'Entity: "{name}"\nk = {k}\nConstraints: common nicknames/epithets/abbreviations; no hallucinated entities.'
|
|
18
|
+
text = self.llm.chat(system, user)
|
|
19
|
+
aliases = [line.strip() for line in text.splitlines() if line.strip()]
|
|
20
|
+
if len(aliases) < 1:
|
|
21
|
+
raise LLMError("No aliases produced by LLM")
|
|
22
|
+
return aliases[:k]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import typer
|
|
3
|
+
from .settings import Config
|
|
4
|
+
from .graph_builder import GraphBuilder
|
|
5
|
+
|
|
6
|
+
app = typer.Typer(help="LLM2Graph: LLM-only knowledge graph construction")
|
|
7
|
+
|
|
8
|
+
@app.command("entity")
|
|
9
|
+
def entity_to_graph(seed: str = typer.Option(..., "--seed", help="Seed entity name"),
|
|
10
|
+
max_depth: int = typer.Option(2, "--max-depth", help="Max BFS depth (0..5)"),
|
|
11
|
+
model: str = typer.Option("gpt-4o-mini-2024-07-18", "--model", help="Model id or HF repo id"),
|
|
12
|
+
provider: str = typer.Option("openai", "--provider", help="Provider (openai | gemini | hf-local)"),
|
|
13
|
+
temperature: float = typer.Option(0.2, "--temperature"),
|
|
14
|
+
timeout_s: int = typer.Option(60, "--timeout-s"),
|
|
15
|
+
out: str = typer.Option("graph.json", "--out", help="Output JSON file path")):
|
|
16
|
+
cfg = Config(model=model, max_depth=max_depth, temperature=temperature, timeout_s=timeout_s, provider=provider)
|
|
17
|
+
builder = GraphBuilder(cfg)
|
|
18
|
+
kg = builder.build(seed)
|
|
19
|
+
with open(out, "w", encoding="utf-8") as f:
|
|
20
|
+
json.dump(kg.to_json(), f, ensure_ascii=False, indent=2)
|
|
21
|
+
typer.echo(f"Wrote {out}")
|
|
22
|
+
|
|
23
|
+
if __name__ == "__main__":
|
|
24
|
+
app()
|
|
25
|
+
|
|
26
|
+
from .query_generator import QueryGenerator
|
|
27
|
+
from .evaluation import Evaluator
|
|
28
|
+
from .llm_client import LLMClient
|
|
29
|
+
|
|
30
|
+
@app.command("gen-queries")
|
|
31
|
+
def gen_queries(graph: str = typer.Option(..., "--graph", help="Graph JSON from entity builder"),
|
|
32
|
+
target: str = typer.Option(..., "--target", help="Seed/target entity to start paths"),
|
|
33
|
+
hops: int = typer.Option(1, "--hops"),
|
|
34
|
+
num_paths: int = typer.Option(50, "--num-paths"),
|
|
35
|
+
aliases: int = typer.Option(0, "--aliases"),
|
|
36
|
+
paraphrases: int = typer.Option(0, "--paraphrases"),
|
|
37
|
+
distractors: int = typer.Option(0, "--distractors"),
|
|
38
|
+
provider: str = typer.Option("openai", "--provider"),
|
|
39
|
+
model: str = typer.Option("gpt-4o-mini-2024-07-18", "--model"),
|
|
40
|
+
out: str = typer.Option("queries.json", "--out")):
|
|
41
|
+
import json as _json
|
|
42
|
+
with open(graph, "r", encoding="utf-8") as f:
|
|
43
|
+
gj = _json.load(f)
|
|
44
|
+
llm = LLMClient(provider=provider, model=model)
|
|
45
|
+
gen = QueryGenerator(llm)
|
|
46
|
+
q = gen.generate(gj, target=target, hops=hops, num_paths=num_paths, aliases=aliases, paraphrases=paraphrases, distractors=distractors)
|
|
47
|
+
with open(out, "w", encoding="utf-8") as f:
|
|
48
|
+
_json.dump(q, f, ensure_ascii=False, indent=2)
|
|
49
|
+
typer.echo(f"Wrote {out}")
|
|
50
|
+
|
|
51
|
+
@app.command("eval")
|
|
52
|
+
def eval_models(queries: str = typer.Option(..., "--queries"),
|
|
53
|
+
pre_provider: str = typer.Option("openai", "--pre-provider"),
|
|
54
|
+
pre_model: str = typer.Option("gpt-4o-mini-2024-07-18", "--pre-model"),
|
|
55
|
+
post_provider: str = typer.Option("openai", "--post-provider"),
|
|
56
|
+
post_model: str = typer.Option("gpt-4o-mini-2024-07-18", "--post-model"),
|
|
57
|
+
judge_provider: str = typer.Option(None, "--judge-provider"),
|
|
58
|
+
judge_model: str = typer.Option(None, "--judge-model"),
|
|
59
|
+
out: str = typer.Option("eval_report.json", "--out")):
|
|
60
|
+
import json as _json
|
|
61
|
+
with open(queries, "r", encoding="utf-8") as f:
|
|
62
|
+
qj = _json.load(f)
|
|
63
|
+
pre = LLMClient(provider=pre_provider, model=pre_model)
|
|
64
|
+
post = LLMClient(provider=post_provider, model=post_model)
|
|
65
|
+
judge = LLMClient(provider=judge_provider, model=judge_model) if judge_provider and judge_model else None
|
|
66
|
+
ev = Evaluator(pre, post, judge)
|
|
67
|
+
rep = ev.run(qj)
|
|
68
|
+
with open(out, "w", encoding="utf-8") as f:
|
|
69
|
+
_json.dump(rep, f, ensure_ascii=False, indent=2)
|
|
70
|
+
typer.echo(f"Wrote {out}")
|