endee-llamaindex 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -0
- package/package.json +17 -0
- package/src/endeeClient.ts +148 -0
package/README.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# endee-llamaindex
|
|
2
|
+
|
|
3
|
+
A LlamaIndex vector store integration for [Endee](https://endee.io) — enabling seamless RAG (Retrieval-Augmented Generation) workflows with the Endee vector database.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install endee-llamaindex
|
|
9
|
+
# or
|
|
10
|
+
pnpm add endee-llamaindex
|
|
11
|
+
# or
|
|
12
|
+
yarn add endee-llamaindex
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Prerequisites
|
|
16
|
+
|
|
17
|
+
- An Endee account and API token
|
|
18
|
+
- An existing index created in Endee
|
|
19
|
+
- Node.js 18+
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
import { EndeeVectorStore } from "endee-llamaindex";
|
|
25
|
+
import { Document, Settings, storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
|
|
26
|
+
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
|
|
27
|
+
|
|
28
|
+
// Configure your embedding model
|
|
29
|
+
Settings.embedModel = new OpenAIEmbedding({
|
|
30
|
+
model: "text-embedding-3-small",
|
|
31
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Configure your LLM
|
|
35
|
+
Settings.llm = new OpenAI({
|
|
36
|
+
model: "gpt-4o",
|
|
37
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// Initialize the Endee vector store
|
|
41
|
+
const vectorStore = new EndeeVectorStore({
|
|
42
|
+
indexName: "my-index",
|
|
43
|
+
apiToken: process.env.ENDEE_API_TOKEN,
|
|
44
|
+
chunkSize: 100,
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Create documents
|
|
48
|
+
const document = new Document({
|
|
49
|
+
text: "Your document content here...",
|
|
50
|
+
id_: "doc-1",
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Index documents with storage context
|
|
54
|
+
const storageContext = await storageContextFromDefaults({
|
|
55
|
+
vectorStore: vectorStore,
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
const index = await VectorStoreIndex.fromDocuments([document], { storageContext });
|
|
59
|
+
|
|
60
|
+
// Query the index
|
|
61
|
+
const queryEngine = index.asQueryEngine();
|
|
62
|
+
const response = await queryEngine.query({
|
|
63
|
+
query: "What is the main topic?",
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
console.log(response.toString());
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Configuration
|
|
70
|
+
|
|
71
|
+
### EndeeVectorStore Parameters
|
|
72
|
+
|
|
73
|
+
| Parameter | Type | Description |
|
|
74
|
+
| ----------- | -------- | --------------------------------------------------- |
|
|
75
|
+
| `indexName` | `string` | Name of your Endee index |
|
|
76
|
+
| `apiToken` | `string` | Your Endee API token |
|
|
77
|
+
| `chunkSize` | `number` | Batch size for upserting vectors (recommended: 100) |
|
|
78
|
+
|
|
79
|
+
## Usage Examples
|
|
80
|
+
|
|
81
|
+
### Indexing Documents
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import fs from "node:fs/promises";
|
|
85
|
+
|
|
86
|
+
const vectorStore = new EndeeVectorStore({
|
|
87
|
+
indexName: "knowledge-base",
|
|
88
|
+
apiToken: process.env.ENDEE_API_TOKEN,
|
|
89
|
+
chunkSize: 100,
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const content = await fs.readFile("./documents/article.txt", "utf-8");
|
|
93
|
+
const document = new Document({ text: content, id_: "article-1" });
|
|
94
|
+
|
|
95
|
+
const storageContext = await storageContextFromDefaults({ vectorStore });
|
|
96
|
+
const index = await VectorStoreIndex.fromDocuments([document], { storageContext });
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Querying an Existing Index
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
const vectorStore = new EndeeVectorStore({
|
|
103
|
+
indexName: "knowledge-base",
|
|
104
|
+
apiToken: process.env.ENDEE_API_TOKEN,
|
|
105
|
+
chunkSize: 100,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// Load from existing vector store
|
|
109
|
+
const index = await VectorStoreIndex.fromVectorStore(vectorStore);
|
|
110
|
+
|
|
111
|
+
const queryEngine = index.asQueryEngine();
|
|
112
|
+
const response = await queryEngine.query({
|
|
113
|
+
query: "Summarize the key points",
|
|
114
|
+
});
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Deleting Documents
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
const vectorStore = new EndeeVectorStore({
|
|
121
|
+
indexName: "knowledge-base",
|
|
122
|
+
apiToken: process.env.ENDEE_API_TOKEN,
|
|
123
|
+
chunkSize: 100,
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
// Delete by reference document ID
|
|
127
|
+
await vectorStore.delete("doc-id-to-remove");
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## API Reference
|
|
131
|
+
|
|
132
|
+
### `EndeeVectorStore`
|
|
133
|
+
|
|
134
|
+
#### Methods
|
|
135
|
+
|
|
136
|
+
| Method | Description |
|
|
137
|
+
| --------------------- | ---------------------------------------- |
|
|
138
|
+
| `add(nodes)` | Add embedding nodes to the vector store |
|
|
139
|
+
| `query(query)` | Query the vector store for similar nodes |
|
|
140
|
+
| `delete(refDocId)` | Delete vectors by reference document ID |
|
|
141
|
+
| `client()` | Get the underlying Endee client |
|
|
142
|
+
| `index()` | Get the Endee index instance |
|
|
143
|
+
|
|
144
|
+
## Dependencies
|
|
145
|
+
|
|
146
|
+
- [llamaindex](https://www.npmjs.com/package/llamaindex) - LlamaIndex TypeScript SDK
|
|
147
|
+
- [endee](https://www.npmjs.com/package/endee) - Endee vector database client
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
ISC
|
|
152
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "endee-llamaindex",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "LlamaIndex integration with endee vector database",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"keywords": [],
|
|
10
|
+
"author": "Pankaj Singh (Endee Labs)",
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"@llamaindex/openai": "^0.4.22",
|
|
14
|
+
"endee": "^1.0.0",
|
|
15
|
+
"llamaindex": "^0.12.1"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { Endee } from "endee";
|
|
2
|
+
import {
|
|
3
|
+
BaseNode,
|
|
4
|
+
BaseVectorStore,
|
|
5
|
+
Metadata,
|
|
6
|
+
metadataDictToNode,
|
|
7
|
+
nodeToMetadata,
|
|
8
|
+
VectorStoreBaseParams,
|
|
9
|
+
VectorStoreQuery,
|
|
10
|
+
VectorStoreQueryResult,
|
|
11
|
+
} from "llamaindex";
|
|
12
|
+
|
|
13
|
+
type EndeeParams = {
|
|
14
|
+
indexName: string;
|
|
15
|
+
apiToken: string;
|
|
16
|
+
chunkSize: number;
|
|
17
|
+
} & VectorStoreBaseParams;
|
|
18
|
+
|
|
19
|
+
export class EndeeVectorStore extends BaseVectorStore {
|
|
20
|
+
storesText: boolean = true;
|
|
21
|
+
private apiToken: string;
|
|
22
|
+
private indexName: string;
|
|
23
|
+
private chunkSize: number;
|
|
24
|
+
private db: any;
|
|
25
|
+
|
|
26
|
+
constructor(params: EndeeParams) {
|
|
27
|
+
super(params);
|
|
28
|
+
this.apiToken = params.apiToken;
|
|
29
|
+
this.indexName = params.indexName;
|
|
30
|
+
this.chunkSize = params.chunkSize;
|
|
31
|
+
this.db = new Endee(params.apiToken);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private getDB() {
|
|
35
|
+
if (!this.db) {
|
|
36
|
+
this.db = new Endee(this.apiToken);
|
|
37
|
+
}
|
|
38
|
+
return this.db;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
client() {
|
|
42
|
+
return this.getDB();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async index() {
|
|
46
|
+
const db = this.getDB();
|
|
47
|
+
const index = await db.getIndex(this.indexName);
|
|
48
|
+
return index;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async add(embeddingResults: BaseNode<Metadata>[]): Promise<string[]> {
|
|
52
|
+
if (embeddingResults.length == 0) {
|
|
53
|
+
return [];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const idx: any = await this.index();
|
|
57
|
+
const nodes = embeddingResults.map((node) => {
|
|
58
|
+
const nodeRecord = this.nodeToRecord(node);
|
|
59
|
+
|
|
60
|
+
if (nodeRecord.meta.ref_doc_id) {
|
|
61
|
+
// adding refDoc id as prefix to the chunk to find them using refDoc id
|
|
62
|
+
nodeRecord.id = `${nodeRecord.meta.ref_doc_id}_chunk_${nodeRecord.id}`;
|
|
63
|
+
}
|
|
64
|
+
return nodeRecord;
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
for (let i = 0; i < nodes.length; i += this.chunkSize) {
|
|
68
|
+
const chunk = nodes.slice(i, i + this.chunkSize);
|
|
69
|
+
const result = await this.saveChunk(idx, chunk);
|
|
70
|
+
if (!result) {
|
|
71
|
+
throw new Error("Failed to save chunk");
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return [];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
protected async saveChunk(idx: any, chunk: any) {
|
|
78
|
+
try {
|
|
79
|
+
await idx.upsert(chunk);
|
|
80
|
+
return true;
|
|
81
|
+
} catch (err) {
|
|
82
|
+
const msg = `${err}`;
|
|
83
|
+
console.log(msg, err);
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
nodeToRecord(node: BaseNode<Metadata>) {
|
|
89
|
+
const id = node.id_.length ? node.id_ : null;
|
|
90
|
+
return {
|
|
91
|
+
id: id!,
|
|
92
|
+
vector: node.getEmbedding(),
|
|
93
|
+
meta: nodeToMetadata(node),
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async query(
|
|
98
|
+
query: VectorStoreQuery,
|
|
99
|
+
_options?: object
|
|
100
|
+
): Promise<VectorStoreQueryResult> {
|
|
101
|
+
const filter = "";
|
|
102
|
+
const defaultOptions: any = {
|
|
103
|
+
vector: query.queryEmbedding!,
|
|
104
|
+
topK: query.similarityTopK,
|
|
105
|
+
includeVectors: true,
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
if (filter) {
|
|
109
|
+
defaultOptions.filter = filter;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const idx = await this.index();
|
|
113
|
+
const results = await idx.query(defaultOptions);
|
|
114
|
+
|
|
115
|
+
const idList = results.map((row: any) => row.id);
|
|
116
|
+
if (idList.length == 0) {
|
|
117
|
+
return { nodes: [], similarities: [], ids: [] };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const nodes = results.map((row: any) => {
|
|
121
|
+
const node = metadataDictToNode(row.meta ?? {}, {
|
|
122
|
+
fallback: {
|
|
123
|
+
id: row.id,
|
|
124
|
+
text: row._node_content,
|
|
125
|
+
metadata: row.meta,
|
|
126
|
+
embedding: row.vector,
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
return node;
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
nodes: nodes,
|
|
134
|
+
similarities: results.map((row: any) => row.similarity || 999),
|
|
135
|
+
ids: results.map((row: any) => row.id),
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async delete(refDocId: string, deleteKwargs?: object): Promise<void> {
|
|
140
|
+
const [idx, index] = await Promise.all([
|
|
141
|
+
this.index(),
|
|
142
|
+
//to get the information about the index
|
|
143
|
+
this.db?.describeIndex(this.indexName),
|
|
144
|
+
]);
|
|
145
|
+
|
|
146
|
+
await idx.deleteVector(refDocId);
|
|
147
|
+
}
|
|
148
|
+
}
|