rust-kgdb 0.6.31 → 0.6.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/CLAUDE.md +50 -499
- package/HYPERMIND_BENCHMARK_REPORT.md +199 -41
- package/README.md +194 -171
- package/benchmark-frameworks.py +568 -0
- package/package.json +3 -1
- package/verified_benchmark_results.json +307 -0
|
@@ -1,28 +1,31 @@
|
|
|
1
1
|
# HyperMind Benchmark Report
|
|
2
2
|
|
|
3
|
-
##
|
|
3
|
+
## Verified Framework Comparison: Schema Injection Works
|
|
4
4
|
|
|
5
|
-
**Version**: 0.6.
|
|
5
|
+
**Version**: 0.6.32
|
|
6
6
|
**Date**: December 16, 2025
|
|
7
|
-
**SDK**: rust-kgdb@0.6.
|
|
7
|
+
**SDK**: rust-kgdb@0.6.32
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Executive Summary (Verified Results)
|
|
12
12
|
|
|
13
|
-
**
|
|
13
|
+
**Schema injection improves ALL frameworks by +66.7 percentage points.**
|
|
14
14
|
|
|
15
|
-
|
|
|
16
|
-
|
|
17
|
-
| **
|
|
18
|
-
| **
|
|
19
|
-
| **
|
|
20
|
-
| **
|
|
21
|
-
|
|
15
|
+
| Framework | No Schema | With Schema | Improvement |
|
|
16
|
+
|-----------|-----------|-------------|-------------|
|
|
17
|
+
| **Vanilla OpenAI** | 0.0% | 71.4% | +71.4 pp |
|
|
18
|
+
| **LangChain** | 0.0% | 71.4% | +71.4 pp |
|
|
19
|
+
| **DSPy** | 14.3% | 71.4% | +57.1 pp |
|
|
20
|
+
| **Average** | 4.8% | **71.4%** | **+66.7 pp** |
|
|
21
|
+
|
|
22
|
+
*GPT-4o, 7 LUBM queries, real API calls, no mocking. See `verified_benchmark_results.json`.*
|
|
23
|
+
|
|
24
|
+
**Key Insight**: The value is in the ARCHITECTURE (schema injection, type contracts), not the specific framework.
|
|
22
25
|
|
|
23
26
|
---
|
|
24
27
|
|
|
25
|
-
## Why Vanilla LLMs Fail
|
|
28
|
+
## Why Vanilla LLMs Fail (Without Schema)
|
|
26
29
|
|
|
27
30
|
When you ask a vanilla LLM to query your database:
|
|
28
31
|
|
|
@@ -33,39 +36,194 @@ When you ask a vanilla LLM to query your database:
|
|
|
33
36
|
|
|
34
37
|
---
|
|
35
38
|
|
|
36
|
-
## How
|
|
39
|
+
## How Schema Injection Fixes This
|
|
37
40
|
|
|
38
|
-
HyperMind
|
|
41
|
+
The HyperMind approach (schema injection) works with ANY framework:
|
|
39
42
|
|
|
40
43
|
1. **Schema injection** - LLM sees your real data structure (30 classes, 23 properties)
|
|
41
|
-
2. **
|
|
42
|
-
3. **
|
|
43
|
-
4. **Reproducible** - Same question = Same answer
|
|
44
|
+
2. **Output format** - Explicit instructions for raw SPARQL (no markdown)
|
|
45
|
+
3. **Type contracts** - Predicate constraints from actual schema
|
|
46
|
+
4. **Reproducible** - Same question = Same answer
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Benchmark Setup: Code for Each Framework
|
|
51
|
+
|
|
52
|
+
### Test Queries (Same for All Frameworks)
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
TEST_QUERIES = [
|
|
56
|
+
{"id": "A1", "question": "Find all teachers", "correct_predicate": "teacherOf"},
|
|
57
|
+
{"id": "A2", "question": "Get student emails", "correct_predicate": "emailAddress"},
|
|
58
|
+
{"id": "A3", "question": "Find faculty members", "correct_predicate": "Professor"},
|
|
59
|
+
{"id": "S1", "question": "Write a SPARQL query to count professors. Just give me the query."},
|
|
60
|
+
{"id": "S2", "question": "SPARQL only, no explanation: find graduate students"},
|
|
61
|
+
{"id": "M1", "question": "Find professors who work for departments"},
|
|
62
|
+
{"id": "E1", "question": "Find professors with no publications"}
|
|
63
|
+
]
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### LUBM Schema (Injected for "With Schema" Tests)
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
LUBM_SCHEMA = """LUBM (Lehigh University Benchmark) Schema:
|
|
70
|
+
|
|
71
|
+
PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#>
|
|
44
72
|
|
|
73
|
+
Classes: University, Department, Professor, AssociateProfessor, AssistantProfessor,
|
|
74
|
+
FullProfessor, Lecturer, GraduateStudent, UndergraduateStudent,
|
|
75
|
+
Course, GraduateCourse, Publication, Research, ResearchGroup
|
|
76
|
+
|
|
77
|
+
Properties:
|
|
78
|
+
- ub:worksFor (person → organization)
|
|
79
|
+
- ub:memberOf (person → organization)
|
|
80
|
+
- ub:advisor (student → professor)
|
|
81
|
+
- ub:takesCourse (student → course)
|
|
82
|
+
- ub:teacherOf (professor → course)
|
|
83
|
+
- ub:publicationAuthor (publication → person)
|
|
84
|
+
- ub:subOrganizationOf (organization → organization)
|
|
85
|
+
- ub:emailAddress (person → string)
|
|
86
|
+
|
|
87
|
+
IMPORTANT: Use ONLY these predicates. Do NOT use: teacher, email, faculty"""
|
|
45
88
|
```
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Framework Code Comparison
|
|
93
|
+
|
|
94
|
+
### 1. Vanilla OpenAI (No Schema) - 0% Accuracy
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from openai import OpenAI
|
|
98
|
+
client = OpenAI(api_key=api_key)
|
|
99
|
+
|
|
100
|
+
response = client.chat.completions.create(
|
|
101
|
+
model="gpt-4o",
|
|
102
|
+
messages=[{"role": "user", "content": f"Generate a SPARQL query for: {question}"}],
|
|
103
|
+
max_tokens=500
|
|
104
|
+
)
|
|
105
|
+
sparql = response.choices[0].message.content
|
|
106
|
+
# Result: 0/7 passed - all wrapped in markdown
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### 2. Vanilla OpenAI (With Schema) - 71.4% Accuracy
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from openai import OpenAI
|
|
113
|
+
client = OpenAI(api_key=api_key)
|
|
114
|
+
|
|
115
|
+
prompt = f"""You are a SPARQL query generator.
|
|
116
|
+
|
|
117
|
+
{LUBM_SCHEMA}
|
|
118
|
+
|
|
119
|
+
TYPE CONTRACT:
|
|
120
|
+
- Input: natural language query
|
|
121
|
+
- Output: raw SPARQL (NO markdown, NO code blocks, NO explanation)
|
|
122
|
+
- Use ONLY predicates from the schema above
|
|
123
|
+
|
|
124
|
+
Query: {question}
|
|
125
|
+
|
|
126
|
+
Output raw SPARQL only:"""
|
|
127
|
+
|
|
128
|
+
response = client.chat.completions.create(
|
|
129
|
+
model="gpt-4o",
|
|
130
|
+
messages=[{"role": "user", "content": prompt}],
|
|
131
|
+
max_tokens=500
|
|
132
|
+
)
|
|
133
|
+
sparql = response.choices[0].message.content
|
|
134
|
+
# Result: 5/7 passed - schema prevents wrong predicates
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### 3. LangChain (No Schema) - 0% Accuracy
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from langchain_openai import ChatOpenAI
|
|
141
|
+
from langchain_core.prompts import PromptTemplate
|
|
142
|
+
from langchain_core.output_parsers import StrOutputParser
|
|
143
|
+
|
|
144
|
+
llm = ChatOpenAI(model="gpt-4o", api_key=api_key)
|
|
145
|
+
parser = StrOutputParser()
|
|
146
|
+
|
|
147
|
+
template = PromptTemplate(
|
|
148
|
+
input_variables=["question"],
|
|
149
|
+
template="Generate a SPARQL query for: {question}"
|
|
150
|
+
)
|
|
151
|
+
chain = template | llm | parser
|
|
152
|
+
|
|
153
|
+
sparql = chain.invoke({"question": question})
|
|
154
|
+
# Result: 0/7 passed - all wrapped in markdown
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### 4. LangChain (With Schema) - 71.4% Accuracy
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from langchain_openai import ChatOpenAI
|
|
161
|
+
from langchain_core.prompts import PromptTemplate
|
|
162
|
+
from langchain_core.output_parsers import StrOutputParser
|
|
163
|
+
|
|
164
|
+
llm = ChatOpenAI(model="gpt-4o", api_key=api_key)
|
|
165
|
+
parser = StrOutputParser()
|
|
166
|
+
|
|
167
|
+
template = PromptTemplate(
|
|
168
|
+
input_variables=["question", "schema"],
|
|
169
|
+
template="""You are a SPARQL query generator.
|
|
170
|
+
|
|
171
|
+
{schema}
|
|
172
|
+
|
|
173
|
+
TYPE CONTRACT:
|
|
174
|
+
- Input: natural language query
|
|
175
|
+
- Output: raw SPARQL (NO markdown, NO code blocks, NO explanation)
|
|
176
|
+
- Use ONLY predicates from the schema above
|
|
177
|
+
|
|
178
|
+
Query: {question}
|
|
179
|
+
|
|
180
|
+
Output raw SPARQL only:"""
|
|
181
|
+
)
|
|
182
|
+
chain = template | llm | parser
|
|
183
|
+
|
|
184
|
+
sparql = chain.invoke({"question": question, "schema": LUBM_SCHEMA})
|
|
185
|
+
# Result: 5/7 passed - same as vanilla with schema
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### 5. DSPy (No Schema) - 14.3% Accuracy
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
import dspy
|
|
192
|
+
from dspy import LM
|
|
193
|
+
|
|
194
|
+
lm = LM("openai/gpt-4o")
|
|
195
|
+
dspy.configure(lm=lm)
|
|
196
|
+
|
|
197
|
+
class SPARQLGenerator(dspy.Signature):
|
|
198
|
+
"""Generate SPARQL query from natural language."""
|
|
199
|
+
question = dspy.InputField(desc="Natural language question")
|
|
200
|
+
sparql = dspy.OutputField(desc="SPARQL query")
|
|
201
|
+
|
|
202
|
+
generator = dspy.Predict(SPARQLGenerator)
|
|
203
|
+
response = generator(question=question)
|
|
204
|
+
sparql = response.sparql
|
|
205
|
+
# Result: 1/7 passed - slightly better output formatting
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### 6. DSPy (With Schema) - 71.4% Accuracy
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
import dspy
|
|
212
|
+
from dspy import LM
|
|
213
|
+
|
|
214
|
+
lm = LM("openai/gpt-4o")
|
|
215
|
+
dspy.configure(lm=lm)
|
|
216
|
+
|
|
217
|
+
class SchemaSPARQLGenerator(dspy.Signature):
|
|
218
|
+
"""Generate SPARQL query using the provided schema. Output raw SPARQL only."""
|
|
219
|
+
schema = dspy.InputField(desc="Database schema with classes and properties")
|
|
220
|
+
question = dspy.InputField(desc="Natural language question")
|
|
221
|
+
sparql = dspy.OutputField(desc="Raw SPARQL query (no markdown, no explanation)")
|
|
222
|
+
|
|
223
|
+
generator = dspy.Predict(SchemaSPARQLGenerator)
|
|
224
|
+
response = generator(schema=LUBM_SCHEMA, question=question)
|
|
225
|
+
sparql = response.sparql
|
|
226
|
+
# Result: 5/7 passed - same as others with schema
|
|
69
227
|
```
|
|
70
228
|
|
|
71
229
|
---
|