hana-kgvector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,449 @@
1
+ # hana-kgvector
2
+
3
+ A TypeScript framework for building **hybrid GraphRAG** applications using SAP HANA Cloud as the unified backend for knowledge graphs (RDF) and vector embeddings.
4
+
5
+ ## Features
6
+
7
+ - **Unified Storage**: SAP HANA Cloud for both RDF triples (Knowledge Graph Engine) and vector embeddings (Vector Engine)
8
+ - **PropertyGraphIndex**: LlamaIndex-inspired API for building and querying property graphs
9
+ - **Hybrid Retrieval**: Combine vector similarity search with graph traversal
10
+ - **Schema-Guided Extraction**: Extract entities and relations from documents using LLMs
11
+ - **Multi-Tenancy**: Isolate data into logical "Spaces" for different domains
12
+ - **LLM Agnostic**: Works with any LLM via LiteLLM proxy (OpenAI, Anthropic, Azure, etc.)
13
+
14
+ > 📚 **New to hana-kgvector?** Check out the [Step-by-Step Tutorial](./TUTORIAL.md) for a complete guide.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pnpm add hana-kgvector
20
+ # or
21
+ npm install hana-kgvector
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ### 1. Setup Environment
27
+
28
+ Create a `.env.local` file:
29
+
30
+ ```env
31
+ # SAP HANA Cloud
32
+ HANA_HOST=your-hana-instance.hanacloud.ondemand.com:443
33
+ HANA_USER=your_user
34
+ HANA_PASSWORD=your_password
35
+
36
+ # LiteLLM Proxy
37
+ LITELLM_PROXY_URL=http://localhost:4000
38
+ LITELLM_API_KEY=your_key
39
+
40
+ # Models
41
+ DEFAULT_LLM_MODEL=gpt-4o-mini
42
+ DEFAULT_EMBEDDING_MODEL=text-embedding-3-small
43
+ ```
44
+
45
+ ### 2. Create a PropertyGraphIndex
46
+
47
+ ```typescript
48
+ import {
49
+ loadEnv,
50
+ createHanaConnection,
51
+ HanaPropertyGraphStore,
52
+ PropertyGraphIndex,
53
+ SchemaLLMPathExtractor,
54
+ ImplicitPathExtractor,
55
+ } from "hana-kgvector";
56
+ import OpenAI from "openai";
57
+
58
+ // Load environment
59
+ loadEnv();
60
+
61
+ // Connect to HANA
62
+ const conn = await createHanaConnection({
63
+ host: process.env.HANA_HOST!,
64
+ user: process.env.HANA_USER!,
65
+ password: process.env.HANA_PASSWORD!,
66
+ });
67
+
68
+ // Create OpenAI client (via LiteLLM)
69
+ const openai = new OpenAI({
70
+ apiKey: process.env.LITELLM_API_KEY,
71
+ baseURL: process.env.LITELLM_PROXY_URL,
72
+ });
73
+
74
+ // Create embed model adapter
75
+ const embedModel = {
76
+ async getTextEmbedding(text: string) {
77
+ const res = await openai.embeddings.create({
78
+ model: process.env.DEFAULT_EMBEDDING_MODEL ?? "text-embedding-3-small",
79
+ input: text,
80
+ encoding_format: "base64", // Required for some LiteLLM proxy configurations
81
+ });
82
+ return res.data[0].embedding;
83
+ },
84
+ async getTextEmbeddingBatch(texts: string[]) {
85
+ if (texts.length === 0) return [];
86
+ const res = await openai.embeddings.create({
87
+ model: process.env.DEFAULT_EMBEDDING_MODEL ?? "text-embedding-3-small",
88
+ input: texts,
89
+ encoding_format: "base64",
90
+ });
91
+ return res.data.map((d) => d.embedding);
92
+ },
93
+ };
94
+
95
+ // Create LLM client adapter
96
+ const llmClient = {
97
+ async structuredPredict<T>(schema: any, prompt: string): Promise<T> {
98
+ const res = await openai.chat.completions.create({
99
+ model: process.env.DEFAULT_LLM_MODEL ?? "gpt-4o-mini",
100
+ messages: [{ role: "user", content: prompt }],
101
+ response_format: { type: "json_object" },
102
+ });
103
+ let content = res.choices[0]?.message?.content ?? "{}";
104
+ // Strip markdown code blocks if present (some LLMs wrap JSON in ```json...```)
105
+ content = content.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
106
+ return JSON.parse(content);
107
+ },
108
+ };
109
+
110
+ // Create HANA-backed graph store
111
+ const graphStore = new HanaPropertyGraphStore(conn, {
112
+ graphName: "my_knowledge_graph", // RDF named graph identifier
113
+ // vectorDimension is auto-detected from first embedding
114
+ });
115
+
116
+ // Create PropertyGraphIndex with extractors
117
+ const index = new PropertyGraphIndex({
118
+ propertyGraphStore: graphStore,
119
+ embedModel,
120
+ kgExtractors: [
121
+ new SchemaLLMPathExtractor({
122
+ llm: llmClient,
123
+ schema: {
124
+ entityTypes: ["PERSON", "ORGANIZATION", "LOCATION", "PRODUCT"],
125
+ relationTypes: ["WORKS_AT", "LOCATED_IN", "PRODUCES", "KNOWS"],
126
+ validationSchema: [
127
+ ["PERSON", "WORKS_AT", "ORGANIZATION"],
128
+ ["PERSON", "KNOWS", "PERSON"],
129
+ ["ORGANIZATION", "LOCATED_IN", "LOCATION"],
130
+ ["ORGANIZATION", "PRODUCES", "PRODUCT"],
131
+ ],
132
+ },
133
+ }),
134
+ new ImplicitPathExtractor(),
135
+ ],
136
+ embedKgNodes: true,
137
+ });
138
+ ```
139
+
140
+ ### 3. Insert Documents
141
+
142
+ ```typescript
143
+ await index.insert([
144
+ {
145
+ id: "doc_1",
146
+ text: "Alice works at SAP in Walldorf. She collaborates with Bob.",
147
+ metadata: { documentId: "company_info" },
148
+ },
149
+ {
150
+ id: "doc_2",
151
+ text: "SAP produces enterprise software and is headquartered in Germany.",
152
+ metadata: { documentId: "company_info" },
153
+ },
154
+ ]);
155
+ ```
156
+
157
+ ### 4. Query the Graph
158
+
159
+ ```typescript
160
+ // Simple query
161
+ const results = await index.query("Who works at SAP?");
162
+
163
+ for (const result of results) {
164
+ console.log(`[${result.score.toFixed(3)}] ${result.node.text}`);
165
+ }
166
+
167
+ // Advanced: Use retriever directly
168
+ import { VectorContextRetriever } from "hana-kgvector";
169
+
170
+ const retriever = new VectorContextRetriever({
171
+ graphStore,
172
+ embedModel,
173
+ similarityTopK: 5,
174
+ pathDepth: 2, // Traverse 2 hops from matched nodes
175
+ });
176
+
177
+ const nodes = await retriever.retrieve({ queryStr: "SAP employees" });
178
+ ```
179
+
180
+ ## Architecture
181
+
182
+ ```
183
+ ┌────────────────────────────────────────────────────────────────────┐
184
+ │ hana-kgvector │
185
+ ├────────────────────────────────────────────────────────────────────┤
186
+ │ │
187
+ │ ┌────────────────────┐ ┌──────────────────┐ ┌────────────────┐ │
188
+ │ │ PropertyGraphIndex │ │ Extractors │ │ Retrievers │ │
189
+ │ │ - insert() │ │ - SchemaLLM │ │ - Vector │ │
190
+ │ │ - query() │ │ - Implicit │ │ - PGRetriever │ │
191
+ │ └────────┬───────────┘ └──────────────────┘ └────────────────┘ │
192
+ │ │ │
193
+ │ ▼ │
194
+ │ ┌──────────────────────────────────────────────────────────┐ │
195
+ │ │ HanaPropertyGraphStore │ │
196
+ │ │ - upsertNodes() - vectorQuery() - getRelMap() │ │
197
+ │ └──────────────────────────────────────────────────────────┘ │
198
+ │ │ │
199
+ │ ▼ │
200
+ │ ┌──────────────────────┐ ┌─────────────────────┐ │
201
+ │ │ HANA Vector Engine │ │ HANA KG Engine │ │
202
+ │ │ (REAL_VECTOR) │ │ (SPARQL_EXECUTE) │ │
203
+ │ └──────────────────────┘ └─────────────────────┘ │
204
+ │ │
205
+ └────────────────────────────────────────────────────────────────────┘
206
+ ```
207
+
208
+ ## Core Components
209
+
210
+ ### PropertyGraphIndex
211
+
212
+ Main entry point for building and querying knowledge graphs.
213
+
214
+ ```typescript
215
+ const index = new PropertyGraphIndex({
216
+ propertyGraphStore: graphStore, // Required: HANA-backed store
217
+ embedModel, // Optional: for vector search
218
+ kgExtractors: [...], // Optional: extraction pipeline
219
+ embedKgNodes: true, // Embed entity nodes
220
+ });
221
+ ```
222
+
223
+ ### HanaPropertyGraphStore
224
+
225
+ HANA-backed implementation of PropertyGraphStore interface.
226
+
227
+ ```typescript
228
+ const store = new HanaPropertyGraphStore(conn, {
229
+ graphName: "my_graph", // RDF named graph identifier
230
+ vectorTableName: "MY_VECTORS", // Optional: custom table name
231
+ // vectorDimension auto-detected from embeddings (supports 1536, 3072, etc.)
232
+ });
233
+ ```
234
+
235
+ ### Extractors
236
+
237
+ Transform text nodes into entities and relations.
238
+
239
+ | Extractor | Description |
240
+ |-----------|-------------|
241
+ | `SchemaLLMPathExtractor` | Schema-guided extraction with LLM |
242
+ | `ImplicitPathExtractor` | Extract structure-based relations (CHUNK → DOCUMENT) |
243
+
244
+ ### Retrievers
245
+
246
+ Retrieve relevant context from the graph.
247
+
248
+ | Retriever | Description |
249
+ |-----------|-------------|
250
+ | `VectorContextRetriever` | Vector similarity → graph traversal |
251
+ | `PGRetriever` | Orchestrates multiple sub-retrievers |
252
+
253
+ ## Configuration Reference
254
+
255
+ ### HanaPropertyGraphStore Options
256
+
257
+ | Parameter | Type | Default | Description |
258
+ |-----------|------|---------|-------------|
259
+ | `graphName` | `string` | Required | RDF named graph identifier (e.g., `"my_knowledge_graph"`) |
260
+ | `vectorTableName` | `string` | Auto-generated | Custom table name for vector storage |
261
+ | `llamaNodesTableName` | `string` | Auto-generated | Custom table name for document nodes |
262
+ | `resetTables` | `boolean` | `false` | Drop and recreate tables on init (dev/test only) |
263
+
264
+ ### PropertyGraphIndex Options
265
+
266
+ | Parameter | Type | Default | Description |
267
+ |-----------|------|---------|-------------|
268
+ | `propertyGraphStore` | `PropertyGraphStore` | Required | HANA-backed graph store instance |
269
+ | `embedModel` | `EmbedModel` | - | Embedding model for vector search |
270
+ | `kgExtractors` | `TransformComponent[]` | `[ImplicitPathExtractor]` | Pipeline of entity/relation extractors |
271
+ | `embedKgNodes` | `boolean` | `true` | Generate embeddings for extracted entity nodes |
272
+ | `showProgress` | `boolean` | `false` | Log progress during extraction |
273
+
274
+ ### Query/Retrieval Options
275
+
276
+ These options can be passed to `index.query()` or `index.asRetriever()`:
277
+
278
+ | Parameter | Type | Default | Description |
279
+ |-----------|------|---------|-------------|
280
+ | `similarityTopK` | `number` | `4` | Number of top similar nodes to retrieve via vector search |
281
+ | `pathDepth` | `number` | `1` | Graph traversal depth (hops) from matched nodes |
282
+ | `limit` | `number` | `30` | Maximum triplets/results to return after graph expansion |
283
+ | `similarityScore` | `number` | - | Minimum similarity threshold (0.0-1.0) to filter results |
284
+ | `crossCheckBoost` | `boolean` | `true` | Enable cross-check boosting (see below) |
285
+ | `crossCheckBoostFactor` | `number` | `1.25` | Score multiplier for cross-check matches |
286
+
287
+ **Example:**
288
+
289
+ ```typescript
290
+ // Retrieve more results with deeper graph traversal
291
+ const results = await index.query("Tech companies in California", {
292
+ similarityTopK: 10, // More initial matches
293
+ pathDepth: 2, // Traverse 2 hops
294
+ limit: 50, // Return up to 50 results
295
+ similarityScore: 0.5, // Only results with score >= 0.5
296
+ crossCheckBoost: true, // Enable provenance-based boosting
297
+ });
298
+ ```
299
+
300
+ ### Cross-Check Boosting
301
+
302
+ Cross-check boosting is an advanced retrieval feature that improves result quality by combining vector similarity with graph provenance:
303
+
304
+ 1. **Vector search** finds semantically similar entity nodes
305
+ 2. **Graph traversal** expands to find related facts/triplets
306
+ 3. **Cross-check**: If a graph fact originated from the same document as a vector-matched entity, its score is boosted
307
+
308
+ This rewards results that are **both semantically relevant AND have explicit graph connections**, improving precision for complex queries.
309
+
310
+ ```typescript
311
+ // Disable cross-check boosting for raw vector scores
312
+ const results = await index.query("Apple CEO", {
313
+ crossCheckBoost: false,
314
+ });
315
+
316
+ // Increase boost factor for stronger provenance preference
317
+ const results = await index.query("Apple CEO", {
318
+ crossCheckBoostFactor: 1.5, // 50% boost instead of default 25%
319
+ });
320
+ ```
321
+
322
+ ### SchemaLLMPathExtractor Options
323
+
324
+ | Parameter | Type | Default | Description |
325
+ |-----------|------|---------|-------------|
326
+ | `llm` | `LLMClient` | Required | LLM client for entity extraction |
327
+ | `schema.entityTypes` | `string[]` | Required | Allowed entity types (e.g., `["PERSON", "ORG"]`) |
328
+ | `schema.relationTypes` | `string[]` | Required | Allowed relation types (e.g., `["WORKS_AT"]`) |
329
+ | `schema.validationSchema` | `[string,string,string][]` | - | Valid triplet patterns (e.g., `["PERSON", "WORKS_AT", "ORG"]`) |
330
+ | `maxTripletsPerChunk` | `number` | `10` | Max entities/relations to extract per document |
331
+ | `strict` | `boolean` | `true` | Only allow relations defined in validationSchema |
332
+ | `extractPromptTemplate` | `string` | Built-in | Custom prompt template for extraction |
333
+
334
+ ### VectorContextRetriever Options
335
+
336
+ | Parameter | Type | Default | Description |
337
+ |-----------|------|---------|-------------|
338
+ | `graphStore` | `PropertyGraphStore` | Required | Graph store instance |
339
+ | `embedModel` | `EmbedModel` | Required | Embedding model for query embedding |
340
+ | `similarityTopK` | `number` | `4` | Number of top similar nodes |
341
+ | `pathDepth` | `number` | `1` | Graph traversal depth |
342
+ | `limit` | `number` | `30` | Max results after expansion |
343
+ | `similarityScore` | `number` | - | Minimum similarity threshold |
344
+ | `includeText` | `boolean` | `true` | Include source text in results |
345
+ | `crossCheckBoost` | `boolean` | `true` | Enable cross-check boosting |
346
+ | `crossCheckBoostFactor` | `number` | `1.25` | Score multiplier for provenance matches |
347
+
348
+ ## Multi-Tenancy
349
+
350
+ Isolate data for different domains using separate graph names:
351
+
352
+ ```typescript
353
+ // Tenant 1: Finance data
354
+ const financeStore = new HanaPropertyGraphStore(conn, {
355
+ graphName: "finance_contracts",
356
+ });
357
+ const financeIndex = new PropertyGraphIndex({
358
+ propertyGraphStore: financeStore,
359
+ embedModel,
360
+ kgExtractors: [...],
361
+ });
362
+
363
+ // Tenant 2: HR data (completely isolated)
364
+ const hrStore = new HanaPropertyGraphStore(conn, {
365
+ graphName: "hr_data",
366
+ });
367
+ const hrIndex = new PropertyGraphIndex({
368
+ propertyGraphStore: hrStore,
369
+ embedModel,
370
+ kgExtractors: [...],
371
+ });
372
+ ```
373
+
374
+ Each `graphName` creates:
375
+ - A separate RDF named graph for knowledge graph data
376
+ - A separate vector table for embeddings
377
+
378
+ ## Low-Level Access
379
+
380
+ ### Direct SPARQL Access
381
+
382
+ ```typescript
383
+ import { HanaSparqlStore } from "hana-kgvector";
384
+
385
+ const sparql = new HanaSparqlStore(conn);
386
+
387
+ // Execute SPARQL query
388
+ const result = await sparql.execute({
389
+ sparql: `SELECT ?s ?p ?o FROM <my-graph> WHERE { ?s ?p ?o } LIMIT 10`,
390
+ });
391
+
392
+ // Load Turtle data
393
+ await sparql.loadTurtle({
394
+ turtle: `<urn:entity:1> <urn:rel:knows> <urn:entity:2> .`,
395
+ graphName: "urn:hkv:my_graph",
396
+ });
397
+ ```
398
+
399
+ ## Requirements
400
+
401
+ - **Node.js** 20+
402
+ - **SAP HANA Cloud** with:
403
+ - Vector Engine enabled (GA since Q1 2024)
404
+ - Knowledge Graph Engine enabled (GA since Q1 2025)
405
+ - Minimum 3 vCPUs / 48 GB memory
406
+ - **LiteLLM Proxy** (recommended) or direct LLM API access
407
+
408
+ ## Scripts
409
+
410
+ ```bash
411
+ # Build
412
+ pnpm run build
413
+
414
+ # Test
415
+ pnpm run test
416
+
417
+ # Validate HANA connection
418
+ pnpm run phase0:hana
419
+
420
+ # Validate LiteLLM connection
421
+ pnpm run phase0:litellm
422
+
423
+ # Run PropertyGraphIndex smoke test
424
+ pnpm run smoke:pg
425
+
426
+ # Run quality test suite (comprehensive testing)
427
+ pnpm exec tsx scripts/test-quality.ts
428
+ ```
429
+
430
+ ## Quality Test Results
431
+
432
+ The quality test suite validates entity extraction, vector retrieval, and graph traversal:
433
+
434
+ | Test | Score |
435
+ |------|-------|
436
+ | Entity Extraction (Organizations, People, Locations) | 100% |
437
+ | Relation Extraction | 100% |
438
+ | Vector Retrieval Relevance | 100% |
439
+ | Graph Traversal | 100% |
440
+ | Data Persistence (Vectors + RDF Triples) | 100% |
441
+ | **Overall** | **97.3%** |
442
+
443
+ ## License
444
+
445
+ MIT
446
+
447
+ ## Contributing
448
+
449
+ Contributions welcome! Please read the PRD.md for architectural decisions and design principles.
@@ -0,0 +1,6 @@
1
+ import {
2
+ require_Stream
3
+ } from "./chunk-I3F3SOHM.js";
4
+ import "./chunk-VUNV25KB.js";
5
+ export default require_Stream();
6
+ //# sourceMappingURL=Stream-JW2S2DUH.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}