@mastra/rag 0.0.2-alpha.2 → 0.0.2-alpha.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -0
- package/dist/astra-db/index.d.ts +53 -0
- package/dist/chroma/index.d.ts +20 -0
- package/dist/document/document.d.ts +35 -0
- package/dist/document/index.d.ts +2 -0
- package/dist/document/transformers/character.d.ts +44 -0
- package/dist/document/transformers/html.d.ts +23 -0
- package/dist/document/transformers/json.d.ts +56 -0
- package/dist/document/transformers/latex.d.ts +11 -0
- package/dist/document/transformers/markdown.d.ts +24 -0
- package/dist/document/transformers/text.d.ts +21 -0
- package/dist/document/transformers/token.d.ts +45 -0
- package/dist/document/transformers/transformer.d.ts +4 -0
- package/dist/document/types.d.ts +83 -0
- package/dist/embeddings/index.d.ts +3 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/pg/index.d.ts +13 -0
- package/dist/pg/index_test.d.ts +1 -0
- package/dist/pinecone/index.d.ts +11 -0
- package/dist/qdrant/index.d.ts +11 -0
- package/dist/rag.cjs.development.js +3262 -0
- package/dist/rag.cjs.development.js.map +1 -0
- package/dist/rag.cjs.production.min.js +2 -0
- package/dist/rag.cjs.production.min.js.map +1 -0
- package/dist/rag.esm.js +3252 -0
- package/dist/rag.esm.js.map +1 -0
- package/dist/upstash/index.d.ts +19 -0
- package/docker-compose.yaml +4 -0
- package/package.json +9 -3
- package/src/astra-db/index.test.ts +201 -0
- package/src/astra-db/index.ts +146 -0
- package/src/chroma/index.test.ts +154 -0
- package/src/chroma/index.ts +135 -0
- package/src/document/document.test.ts +870 -0
- package/src/document/document.ts +283 -0
- package/src/document/index.ts +2 -129
- package/src/document/transformers/character.ts +278 -0
- package/src/document/transformers/html.ts +283 -0
- package/src/document/transformers/json.ts +265 -0
- package/src/document/transformers/latex.ts +19 -0
- package/src/document/transformers/markdown.ts +244 -0
- package/src/document/transformers/text.ts +136 -0
- package/src/document/transformers/token.ts +147 -0
- package/src/document/transformers/transformer.ts +5 -0
- package/src/document/types.ts +98 -0
- package/src/embeddings/index.ts +16 -0
- package/src/index.ts +4 -1
- package/src/upstash/index.test.ts +98 -0
- package/src/upstash/index.ts +92 -0
- package/src/document/index.test.ts +0 -229
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,155 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 0.0.2-alpha.22
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- f031a1f: expose embed from rag, and refactor embed
|
|
8
|
+
- Updated dependencies [f031a1f]
|
|
9
|
+
- @mastra/core@0.1.27-alpha.38
|
|
10
|
+
|
|
11
|
+
## 0.0.2-alpha.21
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- 45fd5b8: rename MastraDocument to MDocument
|
|
16
|
+
- Updated dependencies [c872875]
|
|
17
|
+
- Updated dependencies [f6da688]
|
|
18
|
+
- Updated dependencies [b5393f1]
|
|
19
|
+
- @mastra/core@0.1.27-alpha.37
|
|
20
|
+
|
|
21
|
+
## 0.0.2-alpha.20
|
|
22
|
+
|
|
23
|
+
### Patch Changes
|
|
24
|
+
|
|
25
|
+
- Updated dependencies [f537e33]
|
|
26
|
+
- Updated dependencies [bc40916]
|
|
27
|
+
- Updated dependencies [f7d1131]
|
|
28
|
+
- Updated dependencies [75bf3f0]
|
|
29
|
+
- Updated dependencies [3c4488b]
|
|
30
|
+
- Updated dependencies [d38f7a6]
|
|
31
|
+
- @mastra/core@0.1.27-alpha.36
|
|
32
|
+
|
|
33
|
+
## 0.0.2-alpha.19
|
|
34
|
+
|
|
35
|
+
### Patch Changes
|
|
36
|
+
|
|
37
|
+
- 033eda6: More fixes for refactor
|
|
38
|
+
- Updated dependencies [033eda6]
|
|
39
|
+
- @mastra/core@0.1.27-alpha.35
|
|
40
|
+
|
|
41
|
+
## 0.0.2-alpha.18
|
|
42
|
+
|
|
43
|
+
### Patch Changes
|
|
44
|
+
|
|
45
|
+
- 837a288: MAJOR Revamp of tools, workflows, syncs.
|
|
46
|
+
- 5811de6: Updates spec-writer example to use new workflows constructs. Small improvements to workflow internals. Switch transformer tokenizer for js compatible one.
|
|
47
|
+
- Updated dependencies [837a288]
|
|
48
|
+
- Updated dependencies [5811de6]
|
|
49
|
+
- @mastra/core@0.1.27-alpha.34
|
|
50
|
+
|
|
51
|
+
## 0.0.2-alpha.17
|
|
52
|
+
|
|
53
|
+
### Patch Changes
|
|
54
|
+
|
|
55
|
+
- e1dd94a: update the api for embeddings
|
|
56
|
+
- Updated dependencies [e1dd94a]
|
|
57
|
+
- @mastra/core@0.1.27-alpha.33
|
|
58
|
+
|
|
59
|
+
## 0.0.2-alpha.16
|
|
60
|
+
|
|
61
|
+
### Patch Changes
|
|
62
|
+
|
|
63
|
+
- Updated dependencies [2712098]
|
|
64
|
+
- @mastra/core@0.1.27-alpha.32
|
|
65
|
+
|
|
66
|
+
## 0.0.2-alpha.15
|
|
67
|
+
|
|
68
|
+
### Patch Changes
|
|
69
|
+
|
|
70
|
+
- Updated dependencies [c2dd6b5]
|
|
71
|
+
- @mastra/core@0.1.27-alpha.31
|
|
72
|
+
|
|
73
|
+
## 0.0.2-alpha.14
|
|
74
|
+
|
|
75
|
+
### Patch Changes
|
|
76
|
+
|
|
77
|
+
- 1c3232a: ChromaDB
|
|
78
|
+
|
|
79
|
+
## 0.0.2-alpha.13
|
|
80
|
+
|
|
81
|
+
### Patch Changes
|
|
82
|
+
|
|
83
|
+
- Updated dependencies [963c15a]
|
|
84
|
+
- @mastra/core@0.1.27-alpha.30
|
|
85
|
+
|
|
86
|
+
## 0.0.2-alpha.12
|
|
87
|
+
|
|
88
|
+
### Patch Changes
|
|
89
|
+
|
|
90
|
+
- Updated dependencies [7d87a15]
|
|
91
|
+
- @mastra/core@0.1.27-alpha.29
|
|
92
|
+
|
|
93
|
+
## 0.0.2-alpha.11
|
|
94
|
+
|
|
95
|
+
### Patch Changes
|
|
96
|
+
|
|
97
|
+
- Updated dependencies [1ebd071]
|
|
98
|
+
- @mastra/core@0.1.27-alpha.28
|
|
99
|
+
|
|
100
|
+
## 0.0.2-alpha.10
|
|
101
|
+
|
|
102
|
+
### Patch Changes
|
|
103
|
+
|
|
104
|
+
- 779702b: Upstash vector
|
|
105
|
+
|
|
106
|
+
## 0.0.2-alpha.9
|
|
107
|
+
|
|
108
|
+
### Patch Changes
|
|
109
|
+
|
|
110
|
+
- Updated dependencies [cd02c56]
|
|
111
|
+
- @mastra/core@0.1.27-alpha.27
|
|
112
|
+
|
|
113
|
+
## 0.0.2-alpha.8
|
|
114
|
+
|
|
115
|
+
### Patch Changes
|
|
116
|
+
|
|
117
|
+
- Updated dependencies [d5e12de]
|
|
118
|
+
- @mastra/core@0.1.27-alpha.26
|
|
119
|
+
|
|
120
|
+
## 0.0.2-alpha.7
|
|
121
|
+
|
|
122
|
+
### Patch Changes
|
|
123
|
+
|
|
124
|
+
- 24fe87e: Change document semantics
|
|
125
|
+
|
|
126
|
+
## 0.0.2-alpha.6
|
|
127
|
+
|
|
128
|
+
### Patch Changes
|
|
129
|
+
|
|
130
|
+
- Updated dependencies [01502b0]
|
|
131
|
+
- @mastra/core@0.1.27-alpha.25
|
|
132
|
+
|
|
133
|
+
## 0.0.2-alpha.5
|
|
134
|
+
|
|
135
|
+
### Patch Changes
|
|
136
|
+
|
|
137
|
+
- 036ee5e: Add astra-db to rag
|
|
138
|
+
|
|
139
|
+
## 0.0.2-alpha.4
|
|
140
|
+
|
|
141
|
+
### Patch Changes
|
|
142
|
+
|
|
143
|
+
- Updated dependencies [836f4e3]
|
|
144
|
+
- @mastra/core@0.1.27-alpha.24
|
|
145
|
+
|
|
146
|
+
## 0.0.2-alpha.3
|
|
147
|
+
|
|
148
|
+
### Patch Changes
|
|
149
|
+
|
|
150
|
+
- Updated dependencies [0b826f6]
|
|
151
|
+
- @mastra/core@0.1.27-alpha.23
|
|
152
|
+
|
|
3
153
|
## 0.0.2-alpha.2
|
|
4
154
|
|
|
5
155
|
### Patch Changes
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
|
|
2
|
+
export interface AstraDbOptions {
|
|
3
|
+
token: string;
|
|
4
|
+
endpoint: string;
|
|
5
|
+
keyspace?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare class AstraVector extends MastraVector {
|
|
8
|
+
#private;
|
|
9
|
+
constructor({ token, endpoint, keyspace }: AstraDbOptions);
|
|
10
|
+
/**
|
|
11
|
+
* Creates a new collection with the specified configuration.
|
|
12
|
+
*
|
|
13
|
+
* @param {string} indexName - The name of the collection to create.
|
|
14
|
+
* @param {number} dimension - The dimension of the vectors to be stored in the collection.
|
|
15
|
+
* @param {'cosine' | 'euclidean' | 'dotproduct'} [metric=cosine] - The metric to use to sort vectors in the collection.
|
|
16
|
+
* @returns {Promise<void>} A promise that resolves when the collection is created.
|
|
17
|
+
*/
|
|
18
|
+
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Inserts or updates vectors in the specified collection.
|
|
21
|
+
*
|
|
22
|
+
* @param {string} indexName - The name of the collection to upsert into.
|
|
23
|
+
* @param {number[][]} vectors - An array of vectors to upsert.
|
|
24
|
+
* @param {Record<string, any>[]} [metadata] - An optional array of metadata objects corresponding to each vector.
|
|
25
|
+
* @param {string[]} [ids] - An optional array of IDs corresponding to each vector. If not provided, new IDs will be generated.
|
|
26
|
+
* @returns {Promise<string[]>} A promise that resolves to an array of IDs of the upserted vectors.
|
|
27
|
+
*/
|
|
28
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
29
|
+
/**
|
|
30
|
+
* Queries the specified collection using a vector and optional filter.
|
|
31
|
+
*
|
|
32
|
+
* @param {string} indexName - The name of the collection to query.
|
|
33
|
+
* @param {number[]} queryVector - The vector to query with.
|
|
34
|
+
* @param {number} [topK] - The maximum number of results to return.
|
|
35
|
+
* @param {Record<string, any>} [filter] - An optional filter to apply to the query. For more on filters in Astra DB, see the filtering reference: https://docs.datastax.com/en/astra-db-serverless/api-reference/documents.html#operators
|
|
36
|
+
* @returns {Promise<QueryResult[]>} A promise that resolves to an array of query results.
|
|
37
|
+
*/
|
|
38
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
|
|
39
|
+
/**
|
|
40
|
+
* Lists all collections in the database.
|
|
41
|
+
*
|
|
42
|
+
* @returns {Promise<string[]>} A promise that resolves to an array of collection names.
|
|
43
|
+
*/
|
|
44
|
+
listIndexes(): Promise<string[]>;
|
|
45
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
46
|
+
/**
|
|
47
|
+
* Deletes the specified collection.
|
|
48
|
+
*
|
|
49
|
+
* @param {string} indexName - The name of the collection to delete.
|
|
50
|
+
* @returns {Promise<void>} A promise that resolves when the collection is deleted.
|
|
51
|
+
*/
|
|
52
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
53
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
|
|
2
|
+
export declare class ChromaVector extends MastraVector {
|
|
3
|
+
private client;
|
|
4
|
+
private collections;
|
|
5
|
+
constructor({ path, auth, }: {
|
|
6
|
+
path: string;
|
|
7
|
+
auth?: {
|
|
8
|
+
provider: string;
|
|
9
|
+
credentials: string;
|
|
10
|
+
};
|
|
11
|
+
});
|
|
12
|
+
private getCollection;
|
|
13
|
+
private validateVectorDimensions;
|
|
14
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
15
|
+
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
16
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
|
|
17
|
+
listIndexes(): Promise<string[]>;
|
|
18
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
19
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
20
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { Document as Chunk } from 'llamaindex';
|
|
2
|
+
import { ChunkOptions, ChunkStrategy, ExtractParams } from './types';
|
|
3
|
+
export declare class MDocument {
|
|
4
|
+
private chunks;
|
|
5
|
+
private type;
|
|
6
|
+
constructor({ docs, type }: {
|
|
7
|
+
docs: {
|
|
8
|
+
text: string;
|
|
9
|
+
metadata?: Record<string, any>;
|
|
10
|
+
}[];
|
|
11
|
+
type: string;
|
|
12
|
+
});
|
|
13
|
+
private extract;
|
|
14
|
+
static fromText(text: string, metadata?: Record<string, any>): MDocument;
|
|
15
|
+
static fromHTML(html: string, metadata?: Record<string, any>): MDocument;
|
|
16
|
+
static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument;
|
|
17
|
+
static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument;
|
|
18
|
+
private defaultStrategy;
|
|
19
|
+
private chunkBy;
|
|
20
|
+
chunkRecursive(options?: ChunkOptions): Promise<void>;
|
|
21
|
+
chunkCharacter(options?: ChunkOptions): Promise<void>;
|
|
22
|
+
chunkHTML(options?: ChunkOptions): Promise<void>;
|
|
23
|
+
chunkJSON(options?: ChunkOptions): Promise<void>;
|
|
24
|
+
chunkLatex(options?: ChunkOptions): Promise<void>;
|
|
25
|
+
chunkToken(options?: ChunkOptions): Promise<void>;
|
|
26
|
+
chunkMarkdown(options?: ChunkOptions): Promise<void>;
|
|
27
|
+
chunk(params?: {
|
|
28
|
+
strategy?: ChunkStrategy;
|
|
29
|
+
options?: ChunkOptions;
|
|
30
|
+
extract?: ExtractParams;
|
|
31
|
+
}): Promise<MDocument['chunks']>;
|
|
32
|
+
getDocs(): Chunk[];
|
|
33
|
+
getText(): string[];
|
|
34
|
+
getMetadata(): Record<string, any>[];
|
|
35
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { ChunkOptions, Language } from '../types';
|
|
2
|
+
import { TextTransformer } from './text';
|
|
3
|
+
export declare class CharacterTransformer extends TextTransformer {
|
|
4
|
+
protected separator: string;
|
|
5
|
+
protected isSeparatorRegex: boolean;
|
|
6
|
+
constructor({ separator, isSeparatorRegex, options, }: {
|
|
7
|
+
separator?: string;
|
|
8
|
+
isSeparatorRegex?: boolean;
|
|
9
|
+
options?: {
|
|
10
|
+
chunkSize?: number;
|
|
11
|
+
chunkOverlap?: number;
|
|
12
|
+
lengthFunction?: (text: string) => number;
|
|
13
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
14
|
+
addStartIndex?: boolean;
|
|
15
|
+
stripWhitespace?: boolean;
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
splitText({ text }: {
|
|
19
|
+
text: string;
|
|
20
|
+
}): string[];
|
|
21
|
+
private __splitChunk;
|
|
22
|
+
}
|
|
23
|
+
export declare class RecursiveCharacterTransformer extends TextTransformer {
|
|
24
|
+
protected separators: string[];
|
|
25
|
+
protected isSeparatorRegex: boolean;
|
|
26
|
+
constructor({ separators, isSeparatorRegex, options, }: {
|
|
27
|
+
separators?: string[];
|
|
28
|
+
isSeparatorRegex?: boolean;
|
|
29
|
+
options?: ChunkOptions;
|
|
30
|
+
});
|
|
31
|
+
private _splitText;
|
|
32
|
+
splitText({ text }: {
|
|
33
|
+
text: string;
|
|
34
|
+
}): string[];
|
|
35
|
+
static fromLanguage(language: Language, options?: {
|
|
36
|
+
chunkSize?: number;
|
|
37
|
+
chunkOverlap?: number;
|
|
38
|
+
lengthFunction?: (text: string) => number;
|
|
39
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
40
|
+
addStartIndex?: boolean;
|
|
41
|
+
stripWhitespace?: boolean;
|
|
42
|
+
}): RecursiveCharacterTransformer;
|
|
43
|
+
static getSeparatorsForLanguage(language: Language): string[];
|
|
44
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { Document } from 'llamaindex';
|
|
2
|
+
export declare class HTMLHeaderTransformer {
|
|
3
|
+
private headersToSplitOn;
|
|
4
|
+
private returnEachElement;
|
|
5
|
+
constructor(headersToSplitOn: [string, string][], returnEachElement?: boolean);
|
|
6
|
+
splitText({ text }: {
|
|
7
|
+
text: string;
|
|
8
|
+
}): Document[];
|
|
9
|
+
private getXPath;
|
|
10
|
+
private aggregateElementsToChunks;
|
|
11
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
12
|
+
transformDocuments(documents: Document[]): Document[];
|
|
13
|
+
}
|
|
14
|
+
export declare class HTMLSectionTransformer {
|
|
15
|
+
private headersToSplitOn;
|
|
16
|
+
private options;
|
|
17
|
+
constructor(headersToSplitOn: [string, string][], options?: Record<string, any>);
|
|
18
|
+
splitText(text: string): Document[];
|
|
19
|
+
splitDocuments(documents: Document[]): Promise<Document[]>;
|
|
20
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
21
|
+
private splitHtmlByHeaders;
|
|
22
|
+
transformDocuments(documents: Document[]): Document[];
|
|
23
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { Document } from 'llamaindex';
|
|
2
|
+
export declare class RecursiveJsonTransformer {
|
|
3
|
+
private maxChunkSize;
|
|
4
|
+
private minChunkSize;
|
|
5
|
+
constructor({ maxChunkSize, minChunkSize }: {
|
|
6
|
+
maxChunkSize: number;
|
|
7
|
+
minChunkSize?: number;
|
|
8
|
+
});
|
|
9
|
+
private static jsonSize;
|
|
10
|
+
/**
|
|
11
|
+
* Transform JSON data while handling circular references
|
|
12
|
+
*/
|
|
13
|
+
transform(data: Record<string, any>): Record<string, any>;
|
|
14
|
+
/**
|
|
15
|
+
* Set a value in a nested dictionary based on the given path
|
|
16
|
+
*/
|
|
17
|
+
private static setNestedDict;
|
|
18
|
+
/**
|
|
19
|
+
* Convert lists in the JSON structure to dictionaries with index-based keys
|
|
20
|
+
*/
|
|
21
|
+
private listToDictPreprocessing;
|
|
22
|
+
/**
|
|
23
|
+
* Split json into maximum size dictionaries while preserving structure
|
|
24
|
+
*/
|
|
25
|
+
private jsonSplit;
|
|
26
|
+
/**
|
|
27
|
+
* Splits JSON into a list of JSON chunks
|
|
28
|
+
*/
|
|
29
|
+
splitJson({ jsonData, convertLists, }: {
|
|
30
|
+
jsonData: Record<string, any>;
|
|
31
|
+
convertLists?: boolean;
|
|
32
|
+
}): Record<string, any>[];
|
|
33
|
+
private escapeNonAscii;
|
|
34
|
+
/**
|
|
35
|
+
* Splits JSON into a list of JSON formatted strings
|
|
36
|
+
*/
|
|
37
|
+
splitText({ jsonData, convertLists, ensureAscii, }: {
|
|
38
|
+
jsonData: Record<string, any>;
|
|
39
|
+
convertLists?: boolean;
|
|
40
|
+
ensureAscii?: boolean;
|
|
41
|
+
}): string[];
|
|
42
|
+
/**
|
|
43
|
+
* Create documents from a list of json objects
|
|
44
|
+
*/
|
|
45
|
+
createDocuments({ texts, convertLists, ensureAscii, metadatas, }: {
|
|
46
|
+
texts: string[];
|
|
47
|
+
convertLists?: boolean;
|
|
48
|
+
ensureAscii?: boolean;
|
|
49
|
+
metadatas?: Record<string, any>[];
|
|
50
|
+
}): Document[];
|
|
51
|
+
transformDocuments({ ensureAscii, documents, convertLists, }: {
|
|
52
|
+
ensureAscii?: boolean;
|
|
53
|
+
convertLists?: boolean;
|
|
54
|
+
documents: Document[];
|
|
55
|
+
}): Document[];
|
|
56
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { RecursiveCharacterTransformer } from './character';
|
|
2
|
+
export declare class LatexTransformer extends RecursiveCharacterTransformer {
|
|
3
|
+
constructor(options?: {
|
|
4
|
+
chunkSize?: number;
|
|
5
|
+
chunkOverlap?: number;
|
|
6
|
+
lengthFunction?: (text: string) => number;
|
|
7
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
8
|
+
addStartIndex?: boolean;
|
|
9
|
+
stripWhitespace?: boolean;
|
|
10
|
+
});
|
|
11
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { Document } from 'llamaindex';
|
|
2
|
+
import { RecursiveCharacterTransformer } from './character';
|
|
3
|
+
export declare class MarkdownTransformer extends RecursiveCharacterTransformer {
|
|
4
|
+
constructor(options?: {
|
|
5
|
+
chunkSize?: number;
|
|
6
|
+
chunkOverlap?: number;
|
|
7
|
+
lengthFunction?: (text: string) => number;
|
|
8
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
9
|
+
addStartIndex?: boolean;
|
|
10
|
+
stripWhitespace?: boolean;
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
export declare class MarkdownHeaderTransformer {
|
|
14
|
+
private headersToSplitOn;
|
|
15
|
+
private returnEachLine;
|
|
16
|
+
private stripHeaders;
|
|
17
|
+
constructor(headersToSplitOn: [string, string][], returnEachLine?: boolean, stripHeaders?: boolean);
|
|
18
|
+
private aggregateLinesToChunks;
|
|
19
|
+
splitText({ text }: {
|
|
20
|
+
text: string;
|
|
21
|
+
}): Document[];
|
|
22
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
23
|
+
transformDocuments(documents: Document[]): Document[];
|
|
24
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Document } from 'llamaindex';
|
|
2
|
+
import { ChunkOptions } from '../types';
|
|
3
|
+
import { Transformer } from './transformer';
|
|
4
|
+
export declare abstract class TextTransformer implements Transformer {
|
|
5
|
+
protected chunkSize: number;
|
|
6
|
+
protected chunkOverlap: number;
|
|
7
|
+
protected lengthFunction: (text: string) => number;
|
|
8
|
+
protected keepSeparator: boolean | 'start' | 'end';
|
|
9
|
+
protected addStartIndex: boolean;
|
|
10
|
+
protected stripWhitespace: boolean;
|
|
11
|
+
constructor({ chunkSize, chunkOverlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: ChunkOptions);
|
|
12
|
+
setAddStartIndex(value: boolean): void;
|
|
13
|
+
abstract splitText({ text }: {
|
|
14
|
+
text: string;
|
|
15
|
+
}): string[];
|
|
16
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
17
|
+
splitDocuments(documents: Document[]): Document[];
|
|
18
|
+
transformDocuments(documents: Document[]): Document[];
|
|
19
|
+
protected joinDocs(docs: string[], separator: string): string | null;
|
|
20
|
+
protected mergeSplits(splits: string[], separator: string): string[];
|
|
21
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { TiktokenModel, TiktokenEncoding } from 'js-tiktoken';
|
|
2
|
+
import { TextTransformer } from './text';
|
|
3
|
+
interface Tokenizer {
|
|
4
|
+
chunkOverlap: number;
|
|
5
|
+
tokensPerChunk: number;
|
|
6
|
+
decode: (tokens: number[]) => string;
|
|
7
|
+
encode: (text: string) => number[];
|
|
8
|
+
}
|
|
9
|
+
export declare function splitTextOnTokens({ text, tokenizer }: {
|
|
10
|
+
text: string;
|
|
11
|
+
tokenizer: Tokenizer;
|
|
12
|
+
}): string[];
|
|
13
|
+
export declare class TokenTransformer extends TextTransformer {
|
|
14
|
+
private tokenizer;
|
|
15
|
+
private allowedSpecial;
|
|
16
|
+
private disallowedSpecial;
|
|
17
|
+
constructor({ encodingName, modelName, allowedSpecial, disallowedSpecial, options, }: {
|
|
18
|
+
encodingName: TiktokenEncoding;
|
|
19
|
+
modelName?: TiktokenModel;
|
|
20
|
+
allowedSpecial?: Set<string> | 'all';
|
|
21
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
22
|
+
options: {
|
|
23
|
+
chunkSize?: number;
|
|
24
|
+
chunkOverlap?: number;
|
|
25
|
+
lengthFunction?: (text: string) => number;
|
|
26
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
27
|
+
addStartIndex?: boolean;
|
|
28
|
+
stripWhitespace?: boolean;
|
|
29
|
+
};
|
|
30
|
+
});
|
|
31
|
+
splitText({ text }: {
|
|
32
|
+
text: string;
|
|
33
|
+
}): string[];
|
|
34
|
+
static fromTikToken({ encodingName, modelName, options, }: {
|
|
35
|
+
encodingName?: TiktokenEncoding;
|
|
36
|
+
modelName?: TiktokenModel;
|
|
37
|
+
options?: {
|
|
38
|
+
chunkSize?: number;
|
|
39
|
+
chunkOverlap?: number;
|
|
40
|
+
allowedSpecial?: Set<string> | 'all';
|
|
41
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
42
|
+
};
|
|
43
|
+
}): TokenTransformer;
|
|
44
|
+
}
|
|
45
|
+
export {};
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
|
|
2
|
+
import { LLM, TitleCombinePrompt, TitleExtractorPrompt, SummaryPrompt, QuestionExtractPrompt, KeywordExtractPrompt } from 'llamaindex';
|
|
3
|
+
export declare enum Language {
|
|
4
|
+
CPP = "cpp",
|
|
5
|
+
GO = "go",
|
|
6
|
+
JAVA = "java",
|
|
7
|
+
KOTLIN = "kotlin",
|
|
8
|
+
JS = "js",
|
|
9
|
+
TS = "ts",
|
|
10
|
+
PHP = "php",
|
|
11
|
+
PROTO = "proto",
|
|
12
|
+
PYTHON = "python",
|
|
13
|
+
RST = "rst",
|
|
14
|
+
RUBY = "ruby",
|
|
15
|
+
RUST = "rust",
|
|
16
|
+
SCALA = "scala",
|
|
17
|
+
SWIFT = "swift",
|
|
18
|
+
MARKDOWN = "markdown",
|
|
19
|
+
LATEX = "latex",
|
|
20
|
+
HTML = "html",
|
|
21
|
+
SOL = "sol",
|
|
22
|
+
CSHARP = "csharp",
|
|
23
|
+
COBOL = "cobol",
|
|
24
|
+
C = "c",
|
|
25
|
+
LUA = "lua",
|
|
26
|
+
PERL = "perl",
|
|
27
|
+
HASKELL = "haskell",
|
|
28
|
+
ELIXIR = "elixir",
|
|
29
|
+
POWERSHELL = "powershell"
|
|
30
|
+
}
|
|
31
|
+
export type ExtractParams = {
|
|
32
|
+
title?: TitleExtractorsArgs | boolean;
|
|
33
|
+
summary?: SummaryExtractArgs | boolean;
|
|
34
|
+
questions?: QuestionAnswerExtractArgs | boolean;
|
|
35
|
+
keywords?: boolean | Record<string, any>;
|
|
36
|
+
};
|
|
37
|
+
export type ChunkOptions = {
|
|
38
|
+
headers?: [string, string][];
|
|
39
|
+
returnEachLine?: boolean;
|
|
40
|
+
sections?: [string, string][];
|
|
41
|
+
separator?: string;
|
|
42
|
+
separators?: string[];
|
|
43
|
+
isSeparatorRegex?: boolean;
|
|
44
|
+
chunkSize?: number;
|
|
45
|
+
maxChunkSize?: number;
|
|
46
|
+
minChunkSize?: number;
|
|
47
|
+
chunkOverlap?: number;
|
|
48
|
+
lengthFunction?: (text: string) => number;
|
|
49
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
50
|
+
addStartIndex?: boolean;
|
|
51
|
+
stripWhitespace?: boolean;
|
|
52
|
+
language?: Language;
|
|
53
|
+
ensureAscii?: boolean;
|
|
54
|
+
convertLists?: boolean;
|
|
55
|
+
encodingName?: TiktokenEncoding;
|
|
56
|
+
modelName?: TiktokenModel;
|
|
57
|
+
allowedSpecial?: Set<string> | 'all';
|
|
58
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
59
|
+
stripHeaders?: boolean;
|
|
60
|
+
};
|
|
61
|
+
export type TitleExtractorsArgs = {
|
|
62
|
+
llm?: LLM;
|
|
63
|
+
nodes?: number;
|
|
64
|
+
nodeTemplate?: TitleExtractorPrompt['template'];
|
|
65
|
+
combineTemplate?: TitleCombinePrompt['template'];
|
|
66
|
+
};
|
|
67
|
+
export type SummaryExtractArgs = {
|
|
68
|
+
llm?: LLM;
|
|
69
|
+
summaries?: string[];
|
|
70
|
+
promptTemplate?: SummaryPrompt['template'];
|
|
71
|
+
};
|
|
72
|
+
export type QuestionAnswerExtractArgs = {
|
|
73
|
+
llm?: LLM;
|
|
74
|
+
questions?: number;
|
|
75
|
+
promptTemplate?: QuestionExtractPrompt['template'];
|
|
76
|
+
embeddingOnly?: boolean;
|
|
77
|
+
};
|
|
78
|
+
export type KeywordExtractArgs = {
|
|
79
|
+
llm?: LLM;
|
|
80
|
+
keywords?: number;
|
|
81
|
+
promptTemplate?: KeywordExtractPrompt['template'];
|
|
82
|
+
};
|
|
83
|
+
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex';
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { EmbeddingOptions } from '@mastra/core';
|
|
2
|
+
import { Document as Chunk } from 'llamaindex';
|
|
3
|
+
export declare const embed: (chunk: Chunk | string | string[] | Chunk[], options: EmbeddingOptions) => Promise<import("ai").EmbedManyResult<string> | import("ai").EmbedResult<string>>;
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { IndexStats, QueryResult, MastraVector } from '@mastra/core';
|
|
2
|
+
export declare class PgVector extends MastraVector {
|
|
3
|
+
private pool;
|
|
4
|
+
constructor(connectionString: string);
|
|
5
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>, minScore?: number): Promise<QueryResult[]>;
|
|
6
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
7
|
+
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
8
|
+
listIndexes(): Promise<string[]>;
|
|
9
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
10
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
11
|
+
truncateIndex(indexName: string): Promise<void>;
|
|
12
|
+
disconnect(): Promise<void>;
|
|
13
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
|
|
2
|
+
export declare class PineconeVector extends MastraVector {
|
|
3
|
+
private client;
|
|
4
|
+
constructor(apiKey: string, environment?: string);
|
|
5
|
+
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
6
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
7
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
|
|
8
|
+
listIndexes(): Promise<string[]>;
|
|
9
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
10
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
|
|
2
|
+
export declare class QdrantVector extends MastraVector {
|
|
3
|
+
private client;
|
|
4
|
+
constructor(url: string, apiKey?: string, https?: boolean);
|
|
5
|
+
upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
|
|
6
|
+
createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
|
|
7
|
+
query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
|
|
8
|
+
listIndexes(): Promise<string[]>;
|
|
9
|
+
describeIndex(indexName: string): Promise<IndexStats>;
|
|
10
|
+
deleteIndex(indexName: string): Promise<void>;
|
|
11
|
+
}
|