@mastra/rag 1.0.5 → 1.0.6-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -21
- package/CHANGELOG.md +12 -0
- package/dist/document/document.d.ts +32 -0
- package/dist/document/document.d.ts.map +1 -0
- package/dist/document/extractors/base.d.ts +12 -0
- package/dist/document/extractors/base.d.ts.map +1 -0
- package/dist/document/extractors/index.d.ts +6 -0
- package/dist/document/extractors/index.d.ts.map +1 -0
- package/dist/document/extractors/keywords.d.ts +50 -0
- package/dist/document/extractors/keywords.d.ts.map +1 -0
- package/dist/document/extractors/questions.d.ts +42 -0
- package/dist/document/extractors/questions.d.ts.map +1 -0
- package/dist/document/extractors/summary.d.ts +39 -0
- package/dist/document/extractors/summary.d.ts.map +1 -0
- package/dist/document/extractors/title.d.ts +31 -0
- package/dist/document/extractors/title.d.ts.map +1 -0
- package/dist/document/extractors/types.d.ts +27 -0
- package/dist/document/extractors/types.d.ts.map +1 -0
- package/dist/document/index.d.ts +3 -0
- package/dist/document/index.d.ts.map +1 -0
- package/dist/document/prompts/base.d.ts +19 -0
- package/dist/document/prompts/base.d.ts.map +1 -0
- package/dist/document/prompts/format.d.ts +8 -0
- package/dist/document/prompts/format.d.ts.map +1 -0
- package/dist/document/prompts/index.d.ts +4 -0
- package/dist/document/prompts/index.d.ts.map +1 -0
- package/dist/document/prompts/prompt.d.ts +12 -0
- package/dist/document/prompts/prompt.d.ts.map +1 -0
- package/dist/document/prompts/types.d.ts +22 -0
- package/dist/document/prompts/types.d.ts.map +1 -0
- package/dist/document/schema/index.d.ts +4 -0
- package/dist/document/schema/index.d.ts.map +1 -0
- package/dist/document/schema/node.d.ts +53 -0
- package/dist/document/schema/node.d.ts.map +1 -0
- package/dist/document/schema/types.d.ts +35 -0
- package/dist/document/schema/types.d.ts.map +1 -0
- package/dist/document/transformers/character.d.ts +46 -0
- package/dist/document/transformers/character.d.ts.map +1 -0
- package/dist/document/transformers/html.d.ts +26 -0
- package/dist/document/transformers/html.d.ts.map +1 -0
- package/dist/document/transformers/json.d.ts +91 -0
- package/dist/document/transformers/json.d.ts.map +1 -0
- package/dist/document/transformers/latex.d.ts +12 -0
- package/dist/document/transformers/latex.d.ts.map +1 -0
- package/dist/document/transformers/markdown.d.ts +25 -0
- package/dist/document/transformers/markdown.d.ts.map +1 -0
- package/dist/document/transformers/text.d.ts +22 -0
- package/dist/document/transformers/text.d.ts.map +1 -0
- package/dist/document/transformers/token.d.ts +46 -0
- package/dist/document/transformers/token.d.ts.map +1 -0
- package/dist/document/transformers/transformer.d.ts +5 -0
- package/dist/document/transformers/transformer.d.ts.map +1 -0
- package/dist/document/types.d.ts +66 -0
- package/dist/document/types.d.ts.map +1 -0
- package/dist/graph-rag/index.d.ts +57 -0
- package/dist/graph-rag/index.d.ts.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +9 -33
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -0
- package/dist/rerank/index.d.ts +44 -0
- package/dist/rerank/index.d.ts.map +1 -0
- package/dist/rerank/relevance/cohere/index.d.ts +8 -0
- package/dist/rerank/relevance/cohere/index.d.ts.map +1 -0
- package/dist/rerank/relevance/index.d.ts +4 -0
- package/dist/rerank/relevance/index.d.ts.map +1 -0
- package/dist/rerank/relevance/mastra-agent/index.d.ts +8 -0
- package/dist/rerank/relevance/mastra-agent/index.d.ts.map +1 -0
- package/dist/rerank/relevance/zeroentropy/index.d.ts +8 -0
- package/dist/rerank/relevance/zeroentropy/index.d.ts.map +1 -0
- package/dist/tools/document-chunker.d.ts +7 -0
- package/dist/tools/document-chunker.d.ts.map +1 -0
- package/dist/tools/graph-rag.d.ts +26 -0
- package/dist/tools/graph-rag.d.ts.map +1 -0
- package/dist/tools/index.d.ts +4 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/types.d.ts +97 -0
- package/dist/tools/types.d.ts.map +1 -0
- package/dist/tools/vector-query.d.ts +26 -0
- package/dist/tools/vector-query.d.ts.map +1 -0
- package/dist/utils/convert-sources.d.ts +18 -0
- package/dist/utils/convert-sources.d.ts.map +1 -0
- package/dist/utils/default-settings.d.ts +6 -0
- package/dist/utils/default-settings.d.ts.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/tool-schemas.d.ts +63 -0
- package/dist/utils/tool-schemas.d.ts.map +1 -0
- package/dist/utils/vector-prompts.d.ts +32 -0
- package/dist/utils/vector-prompts.d.ts.map +1 -0
- package/dist/utils/vector-search.d.ts +23 -0
- package/dist/utils/vector-search.d.ts.map +1 -0
- package/package.json +5 -5
- package/src/document/extractors/keywords.test.ts +15 -9
- package/tsconfig.build.json +9 -0
- package/tsconfig.json +1 -1
- package/tsup.config.ts +22 -0
- package/dist/_tsup-dts-rollup.d.cts +0 -1307
- package/dist/_tsup-dts-rollup.d.ts +0 -1307
- package/dist/index.d.cts +0 -33
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { Document } from '../schema';
|
|
2
|
+
export declare class RecursiveJsonTransformer {
|
|
3
|
+
private maxSize;
|
|
4
|
+
private minSize;
|
|
5
|
+
constructor({ maxSize, minSize }: {
|
|
6
|
+
maxSize: number;
|
|
7
|
+
minSize?: number;
|
|
8
|
+
});
|
|
9
|
+
private static jsonSize;
|
|
10
|
+
/**
|
|
11
|
+
* Transform JSON data while handling circular references
|
|
12
|
+
*/
|
|
13
|
+
transform(data: Record<string, any>): Record<string, any>;
|
|
14
|
+
/**
|
|
15
|
+
* Set a value in a nested dictionary based on the given path
|
|
16
|
+
*/
|
|
17
|
+
private static setNestedDict;
|
|
18
|
+
/**
|
|
19
|
+
* Convert lists in the JSON structure to dictionaries with index-based keys
|
|
20
|
+
*/
|
|
21
|
+
private listToDictPreprocessing;
|
|
22
|
+
/**
|
|
23
|
+
* Handles primitive values (strings, numbers, etc) by either adding them to the current chunk
|
|
24
|
+
* or creating new chunks if they don't fit
|
|
25
|
+
*/
|
|
26
|
+
private handlePrimitiveValue;
|
|
27
|
+
/**
|
|
28
|
+
* Creates a nested dictionary chunk from a value and path
|
|
29
|
+
* e.g., path ['a', 'b'], value 'c' becomes { a: { b: 'c' } }
|
|
30
|
+
*/
|
|
31
|
+
private createChunk;
|
|
32
|
+
/**
|
|
33
|
+
* Checks if value is within size limits
|
|
34
|
+
*/
|
|
35
|
+
private isWithinSizeLimit;
|
|
36
|
+
/**
|
|
37
|
+
* Splits arrays into chunks based on size limits
|
|
38
|
+
* Handles nested objects by recursing into handleNestedObject
|
|
39
|
+
*/
|
|
40
|
+
private handleArray;
|
|
41
|
+
/**
|
|
42
|
+
* Splits objects into chunks based on size limits
|
|
43
|
+
* Handles nested arrays and objects by recursing into handleArray and handleNestedObject
|
|
44
|
+
*/
|
|
45
|
+
private handleNestedObject;
|
|
46
|
+
/**
|
|
47
|
+
* Splits long strings into smaller chunks at word boundaries
|
|
48
|
+
* Ensures each chunk is within maxSize limit
|
|
49
|
+
*/
|
|
50
|
+
private splitLongString;
|
|
51
|
+
/**
|
|
52
|
+
* Core chunking logic that processes JSON data recursively
|
|
53
|
+
* Handles arrays, objects, and primitive values while maintaining structure
|
|
54
|
+
*/
|
|
55
|
+
private jsonSplit;
|
|
56
|
+
/**
|
|
57
|
+
* Splits JSON into a list of JSON chunks
|
|
58
|
+
*/
|
|
59
|
+
splitJson({ jsonData, convertLists, }: {
|
|
60
|
+
jsonData: Record<string, any>;
|
|
61
|
+
convertLists?: boolean;
|
|
62
|
+
}): Record<string, any>[];
|
|
63
|
+
/**
|
|
64
|
+
* Converts Unicode characters to their escaped ASCII representation
|
|
65
|
+
* e.g., 'café' becomes 'caf\u00e9'
|
|
66
|
+
*/
|
|
67
|
+
private escapeNonAscii;
|
|
68
|
+
/**
|
|
69
|
+
* Splits JSON into a list of JSON formatted strings
|
|
70
|
+
*/
|
|
71
|
+
splitText({ jsonData, convertLists, ensureAscii, }: {
|
|
72
|
+
jsonData: Record<string, any>;
|
|
73
|
+
convertLists?: boolean;
|
|
74
|
+
ensureAscii?: boolean;
|
|
75
|
+
}): string[];
|
|
76
|
+
/**
|
|
77
|
+
* Create documents from a list of json objects
|
|
78
|
+
*/
|
|
79
|
+
createDocuments({ texts, convertLists, ensureAscii, metadatas, }: {
|
|
80
|
+
texts: string[];
|
|
81
|
+
convertLists?: boolean;
|
|
82
|
+
ensureAscii?: boolean;
|
|
83
|
+
metadatas?: Record<string, any>[];
|
|
84
|
+
}): Document[];
|
|
85
|
+
transformDocuments({ ensureAscii, documents, convertLists, }: {
|
|
86
|
+
ensureAscii?: boolean;
|
|
87
|
+
convertLists?: boolean;
|
|
88
|
+
documents: Document[];
|
|
89
|
+
}): Document[];
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=json.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,qBAAa,wBAAwB;IACnC,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;gBAEZ,EAAE,OAAc,EAAE,OAAO,EAAE,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE;IAK9E,OAAO,CAAC,MAAM,CAAC,QAAQ;IAoCvB;;OAEG;IACI,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAmChE;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAS5B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAU/B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA0C5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAMnB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;;OAGG;IACH,OAAO,CAAC,WAAW;IA8DnB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAmE1B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAuBvB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAuDjB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,GACrB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;IAYzB;;;OAGG;IACH,OAAO,CAAC,cAAc;IAiBtB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,EACpB,WAAkB,GACnB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB,GAAG,MAAM,EAAE;IAoBZ;;OAEG;IACH,eAAe,CAAC,EACd,KAAK,EACL,YAAoB,EACpB,WAAkB,EAClB,SAAS,GACV,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;KACnC,GAAG,QAAQ,EAAE;IAoBd,kBAAkB,CAAC,EACjB,WAAW,EACX,SAAS,EACT,YAAY,GACb,EAAE;QACD,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,SAAS,EAAE,QAAQ,EAAE,CAAC;KACvB,GAAG,QAAQ,EAAE;CAiBf"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { RecursiveCharacterTransformer } from './character';
|
|
2
|
+
export declare class LatexTransformer extends RecursiveCharacterTransformer {
|
|
3
|
+
constructor(options?: {
|
|
4
|
+
size?: number;
|
|
5
|
+
overlap?: number;
|
|
6
|
+
lengthFunction?: (text: string) => number;
|
|
7
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
8
|
+
addStartIndex?: boolean;
|
|
9
|
+
stripWhitespace?: boolean;
|
|
10
|
+
});
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=latex.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAE5D,qBAAa,gBAAiB,SAAQ,6BAA6B;gBAE/D,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB;CAKT"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Document } from '../schema';
|
|
2
|
+
import { RecursiveCharacterTransformer } from './character';
|
|
3
|
+
export declare class MarkdownTransformer extends RecursiveCharacterTransformer {
|
|
4
|
+
constructor(options?: {
|
|
5
|
+
chunkSize?: number;
|
|
6
|
+
chunkOverlap?: number;
|
|
7
|
+
lengthFunction?: (text: string) => number;
|
|
8
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
9
|
+
addStartIndex?: boolean;
|
|
10
|
+
stripWhitespace?: boolean;
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
export declare class MarkdownHeaderTransformer {
|
|
14
|
+
private headersToSplitOn;
|
|
15
|
+
private returnEachLine;
|
|
16
|
+
private stripHeaders;
|
|
17
|
+
constructor(headersToSplitOn: [string, string][], returnEachLine?: boolean, stripHeaders?: boolean);
|
|
18
|
+
private aggregateLinesToChunks;
|
|
19
|
+
splitText({ text }: {
|
|
20
|
+
text: string;
|
|
21
|
+
}): Document[];
|
|
22
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
23
|
+
transformDocuments(documents: Document[]): Document[];
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=markdown.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAIrC,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBAElE,OAAO,GAAE;QACP,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;QAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,eAAe,CAAC,EAAE,OAAO,CAAC;KACtB;CAKT;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAqHjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Document } from '../schema';
|
|
2
|
+
import type { ChunkOptions } from '../types';
|
|
3
|
+
import type { Transformer } from './transformer';
|
|
4
|
+
export declare abstract class TextTransformer implements Transformer {
|
|
5
|
+
protected size: number;
|
|
6
|
+
protected overlap: number;
|
|
7
|
+
protected lengthFunction: (text: string) => number;
|
|
8
|
+
protected keepSeparator: boolean | 'start' | 'end';
|
|
9
|
+
protected addStartIndex: boolean;
|
|
10
|
+
protected stripWhitespace: boolean;
|
|
11
|
+
constructor({ size, overlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: ChunkOptions);
|
|
12
|
+
setAddStartIndex(value: boolean): void;
|
|
13
|
+
abstract splitText({ text }: {
|
|
14
|
+
text: string;
|
|
15
|
+
}): string[];
|
|
16
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Document[];
|
|
17
|
+
splitDocuments(documents: Document[]): Document[];
|
|
18
|
+
transformDocuments(documents: Document[]): Document[];
|
|
19
|
+
protected joinDocs(docs: string[], separator: string): string | null;
|
|
20
|
+
protected mergeSplits(splits: string[], separator: string): string[];
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=text.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAE7C,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,IAAW,EACX,OAAa,EACb,cAA8C,EAC9C,aAAqB,EACrB,aAAqB,EACrB,eAAsB,GACvB,EAAE,YAAY;IAYf,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { TiktokenModel, TiktokenEncoding } from 'js-tiktoken';
|
|
2
|
+
import { TextTransformer } from './text';
|
|
3
|
+
interface Tokenizer {
|
|
4
|
+
overlap: number;
|
|
5
|
+
tokensPerChunk: number;
|
|
6
|
+
decode: (tokens: number[]) => string;
|
|
7
|
+
encode: (text: string) => number[];
|
|
8
|
+
}
|
|
9
|
+
export declare function splitTextOnTokens({ text, tokenizer }: {
|
|
10
|
+
text: string;
|
|
11
|
+
tokenizer: Tokenizer;
|
|
12
|
+
}): string[];
|
|
13
|
+
export declare class TokenTransformer extends TextTransformer {
|
|
14
|
+
private tokenizer;
|
|
15
|
+
private allowedSpecial;
|
|
16
|
+
private disallowedSpecial;
|
|
17
|
+
constructor({ encodingName, modelName, allowedSpecial, disallowedSpecial, options, }: {
|
|
18
|
+
encodingName: TiktokenEncoding;
|
|
19
|
+
modelName?: TiktokenModel;
|
|
20
|
+
allowedSpecial?: Set<string> | 'all';
|
|
21
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
22
|
+
options: {
|
|
23
|
+
size?: number;
|
|
24
|
+
overlap?: number;
|
|
25
|
+
lengthFunction?: (text: string) => number;
|
|
26
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
27
|
+
addStartIndex?: boolean;
|
|
28
|
+
stripWhitespace?: boolean;
|
|
29
|
+
};
|
|
30
|
+
});
|
|
31
|
+
splitText({ text }: {
|
|
32
|
+
text: string;
|
|
33
|
+
}): string[];
|
|
34
|
+
static fromTikToken({ encodingName, modelName, options, }: {
|
|
35
|
+
encodingName?: TiktokenEncoding;
|
|
36
|
+
modelName?: TiktokenModel;
|
|
37
|
+
options?: {
|
|
38
|
+
size?: number;
|
|
39
|
+
overlap?: number;
|
|
40
|
+
allowedSpecial?: Set<string> | 'all';
|
|
41
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
42
|
+
};
|
|
43
|
+
}): TokenTransformer;
|
|
44
|
+
}
|
|
45
|
+
export {};
|
|
46
|
+
//# sourceMappingURL=token.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;AAG7E,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,cAAc,CAAsB;IAC5C,OAAO,CAAC,iBAAiB,CAAsB;gBAEnC,EACV,YAA4B,EAC5B,SAAS,EACT,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,EAAE,gBAAgB,CAAC;QAC/B,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE;YACP,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;YAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;YACxB,eAAe,CAAC,EAAE,OAAO,CAAC;SAC3B,CAAC;KACH;IAaD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IA0B/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE;YACR,IAAI,CAAC,EAAE,MAAM,CAAC;YACd,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;YACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;SACzC,CAAC;KACH,GAAG,gBAAgB;CAuCrB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transformer.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/transformer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,MAAM,WAAW,WAAW;IAC1B,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE,CAAC;CACvD"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
|
|
2
|
+
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors';
|
|
3
|
+
export declare enum Language {
|
|
4
|
+
CPP = "cpp",
|
|
5
|
+
GO = "go",
|
|
6
|
+
JAVA = "java",
|
|
7
|
+
KOTLIN = "kotlin",
|
|
8
|
+
JS = "js",
|
|
9
|
+
TS = "ts",
|
|
10
|
+
PHP = "php",
|
|
11
|
+
PROTO = "proto",
|
|
12
|
+
PYTHON = "python",
|
|
13
|
+
RST = "rst",
|
|
14
|
+
RUBY = "ruby",
|
|
15
|
+
RUST = "rust",
|
|
16
|
+
SCALA = "scala",
|
|
17
|
+
SWIFT = "swift",
|
|
18
|
+
MARKDOWN = "markdown",
|
|
19
|
+
LATEX = "latex",
|
|
20
|
+
HTML = "html",
|
|
21
|
+
SOL = "sol",
|
|
22
|
+
CSHARP = "csharp",
|
|
23
|
+
COBOL = "cobol",
|
|
24
|
+
C = "c",
|
|
25
|
+
LUA = "lua",
|
|
26
|
+
PERL = "perl",
|
|
27
|
+
HASKELL = "haskell",
|
|
28
|
+
ELIXIR = "elixir",
|
|
29
|
+
POWERSHELL = "powershell"
|
|
30
|
+
}
|
|
31
|
+
export type ExtractParams = {
|
|
32
|
+
title?: TitleExtractorsArgs | boolean;
|
|
33
|
+
summary?: SummaryExtractArgs | boolean;
|
|
34
|
+
questions?: QuestionAnswerExtractArgs | boolean;
|
|
35
|
+
keywords?: KeywordExtractArgs | boolean;
|
|
36
|
+
};
|
|
37
|
+
export type ChunkOptions = {
|
|
38
|
+
headers?: [string, string][];
|
|
39
|
+
returnEachLine?: boolean;
|
|
40
|
+
sections?: [string, string][];
|
|
41
|
+
separator?: string;
|
|
42
|
+
separators?: string[];
|
|
43
|
+
isSeparatorRegex?: boolean;
|
|
44
|
+
size?: number;
|
|
45
|
+
maxSize?: number;
|
|
46
|
+
minSize?: number;
|
|
47
|
+
overlap?: number;
|
|
48
|
+
lengthFunction?: (text: string) => number;
|
|
49
|
+
keepSeparator?: boolean | 'start' | 'end';
|
|
50
|
+
addStartIndex?: boolean;
|
|
51
|
+
stripWhitespace?: boolean;
|
|
52
|
+
language?: Language;
|
|
53
|
+
ensureAscii?: boolean;
|
|
54
|
+
convertLists?: boolean;
|
|
55
|
+
encodingName?: TiktokenEncoding;
|
|
56
|
+
modelName?: TiktokenModel;
|
|
57
|
+
allowedSpecial?: Set<string> | 'all';
|
|
58
|
+
disallowedSpecial?: Set<string> | 'all';
|
|
59
|
+
stripHeaders?: boolean;
|
|
60
|
+
};
|
|
61
|
+
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex';
|
|
62
|
+
export interface ChunkParams extends ChunkOptions {
|
|
63
|
+
strategy?: ChunkStrategy;
|
|
64
|
+
extract?: ExtractParams;
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACxC,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,WAAW,GAAG,WAAW,GAAG,OAAO,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAEzG,MAAM,WAAW,WAAY,SAAQ,YAAY;IAC/C,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TODO: GraphRAG Enhancements
|
|
3
|
+
* - Add support for more edge types (sequential, hierarchical, citation, etc)
|
|
4
|
+
* - Allow for custom edge types
|
|
5
|
+
* - Utilize metadata for richer connections
|
|
6
|
+
* - Improve graph traversal and querying using types
|
|
7
|
+
*/
|
|
8
|
+
type SupportedEdgeType = 'semantic';
|
|
9
|
+
export interface GraphNode {
|
|
10
|
+
id: string;
|
|
11
|
+
content: string;
|
|
12
|
+
embedding?: number[];
|
|
13
|
+
metadata?: Record<string, any>;
|
|
14
|
+
}
|
|
15
|
+
export interface RankedNode extends GraphNode {
|
|
16
|
+
score: number;
|
|
17
|
+
}
|
|
18
|
+
export interface GraphEdge {
|
|
19
|
+
source: string;
|
|
20
|
+
target: string;
|
|
21
|
+
weight: number;
|
|
22
|
+
type: SupportedEdgeType;
|
|
23
|
+
}
|
|
24
|
+
export interface GraphChunk {
|
|
25
|
+
text: string;
|
|
26
|
+
metadata: Record<string, any>;
|
|
27
|
+
}
|
|
28
|
+
export interface GraphEmbedding {
|
|
29
|
+
vector: number[];
|
|
30
|
+
}
|
|
31
|
+
export declare class GraphRAG {
|
|
32
|
+
private nodes;
|
|
33
|
+
private edges;
|
|
34
|
+
private dimension;
|
|
35
|
+
private threshold;
|
|
36
|
+
constructor(dimension?: number, threshold?: number);
|
|
37
|
+
addNode(node: GraphNode): void;
|
|
38
|
+
addEdge(edge: GraphEdge): void;
|
|
39
|
+
getNodes(): GraphNode[];
|
|
40
|
+
getEdges(): GraphEdge[];
|
|
41
|
+
getEdgesByType(type: string): GraphEdge[];
|
|
42
|
+
clear(): void;
|
|
43
|
+
updateNodeContent(id: string, newContent: string): void;
|
|
44
|
+
private getNeighbors;
|
|
45
|
+
private cosineSimilarity;
|
|
46
|
+
createGraph(chunks: GraphChunk[], embeddings: GraphEmbedding[]): void;
|
|
47
|
+
private selectWeightedNeighbor;
|
|
48
|
+
private randomWalkWithRestart;
|
|
49
|
+
query({ query, topK, randomWalkSteps, restartProb, }: {
|
|
50
|
+
query: number[];
|
|
51
|
+
topK?: number;
|
|
52
|
+
randomWalkSteps?: number;
|
|
53
|
+
restartProb?: number;
|
|
54
|
+
}): RankedNode[];
|
|
55
|
+
}
|
|
56
|
+
export {};
|
|
57
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/graph-rag/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,KAAK,iBAAiB,GAAG,UAAU,CAAC;AAGpC,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAW,SAAQ,SAAS;IAC3C,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,iBAAiB,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAa,EAAE,SAAS,GAAE,MAAY;IAQ7D,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAW9B,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAe9B,QAAQ,IAAI,SAAS,EAAE;IAKvB,QAAQ,IAAI,SAAS,EAAE;IAIvB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IAIzC,KAAK,IAAI,IAAI;IAKb,iBAAiB,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IASvD,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IAgCxB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,UAAU,EAAE,cAAc,EAAE;IAuC9D,OAAO,CAAC,sBAAsB;IAoB9B,OAAO,CAAC,qBAAqB;IAoC7B,KAAK,CAAC,EACJ,KAAK,EACL,IAAS,EACT,eAAqB,EACrB,WAAkB,GACnB,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,UAAU,EAAE;CAoDjB"}
|
package/dist/index.cjs
CHANGED
|
@@ -7630,3 +7630,5 @@ exports.queryTextDescription = queryTextDescription;
|
|
|
7630
7630
|
exports.rerank = rerank;
|
|
7631
7631
|
exports.rerankWithScorer = rerankWithScorer;
|
|
7632
7632
|
exports.topKDescription = topKDescription;
|
|
7633
|
+
//# sourceMappingURL=index.cjs.map
|
|
7634
|
+
//# sourceMappingURL=index.cjs.map
|