@mastra/rag 0.1.20-alpha.0 → 0.1.20-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +25 -0
- package/dist/_tsup-dts-rollup.d.cts +225 -86
- package/dist/_tsup-dts-rollup.d.ts +225 -86
- package/dist/index.cjs +417 -131
- package/dist/index.js +385 -99
- package/package.json +2 -3
- package/src/document/document.ts +6 -9
- package/src/document/extractors/base.ts +30 -0
- package/src/document/extractors/index.ts +1 -1
- package/src/document/extractors/keywords.test.ts +1 -1
- package/src/document/extractors/keywords.ts +7 -19
- package/src/document/extractors/questions.test.ts +1 -1
- package/src/document/extractors/questions.ts +7 -25
- package/src/document/extractors/summary.test.ts +1 -1
- package/src/document/extractors/summary.ts +7 -19
- package/src/document/extractors/title.test.ts +1 -1
- package/src/document/extractors/title.ts +7 -44
- package/src/document/extractors/types.ts +1 -1
- package/src/document/prompts/base.ts +77 -0
- package/src/document/prompts/format.ts +9 -0
- package/src/document/prompts/index.ts +15 -0
- package/src/document/prompts/prompt.ts +60 -0
- package/src/document/prompts/types.ts +29 -0
- package/src/document/schema/index.ts +3 -0
- package/src/document/schema/node.ts +187 -0
- package/src/document/schema/types.ts +40 -0
- package/src/document/transformers/html.ts +1 -1
- package/src/document/transformers/json.ts +1 -1
- package/src/document/transformers/markdown.ts +1 -1
- package/src/document/transformers/text.ts +1 -1
- package/src/document/transformers/transformer.ts +1 -1
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { createHash, randomUUID } from 'crypto';
|
|
2
|
+
import { NodeRelationship, ObjectType } from './types';
|
|
3
|
+
import type { Metadata, RelatedNodeInfo, RelatedNodeType, BaseNodeParams, TextNodeParams } from './types';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Generic abstract class for retrievable nodes
|
|
7
|
+
*/
|
|
8
|
+
export abstract class BaseNode<T extends Metadata = Metadata> {
|
|
9
|
+
id_: string;
|
|
10
|
+
metadata: T;
|
|
11
|
+
relationships: Partial<Record<NodeRelationship, RelatedNodeType<T>>>;
|
|
12
|
+
|
|
13
|
+
@lazyInitHash
|
|
14
|
+
accessor hash: string = '';
|
|
15
|
+
|
|
16
|
+
protected constructor(init?: BaseNodeParams<T>) {
|
|
17
|
+
const { id_, metadata, relationships } = init || {};
|
|
18
|
+
this.id_ = id_ ?? randomUUID();
|
|
19
|
+
this.metadata = metadata ?? ({} as T);
|
|
20
|
+
this.relationships = relationships ?? {};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
abstract get type(): ObjectType;
|
|
24
|
+
|
|
25
|
+
abstract getContent(): string;
|
|
26
|
+
|
|
27
|
+
abstract getMetadataStr(): string;
|
|
28
|
+
|
|
29
|
+
get sourceNode(): RelatedNodeInfo<T> | undefined {
|
|
30
|
+
const relationship = this.relationships[NodeRelationship.SOURCE];
|
|
31
|
+
|
|
32
|
+
if (Array.isArray(relationship)) {
|
|
33
|
+
throw new Error('Source object must be a single RelatedNodeInfo object');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return relationship;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
get prevNode(): RelatedNodeInfo<T> | undefined {
|
|
40
|
+
const relationship = this.relationships[NodeRelationship.PREVIOUS];
|
|
41
|
+
|
|
42
|
+
if (Array.isArray(relationship)) {
|
|
43
|
+
throw new Error('Previous object must be a single RelatedNodeInfo object');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return relationship;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
get nextNode(): RelatedNodeInfo<T> | undefined {
|
|
50
|
+
const relationship = this.relationships[NodeRelationship.NEXT];
|
|
51
|
+
|
|
52
|
+
if (Array.isArray(relationship)) {
|
|
53
|
+
throw new Error('Next object must be a single RelatedNodeInfo object');
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return relationship;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
get parentNode(): RelatedNodeInfo<T> | undefined {
|
|
60
|
+
const relationship = this.relationships[NodeRelationship.PARENT];
|
|
61
|
+
|
|
62
|
+
if (Array.isArray(relationship)) {
|
|
63
|
+
throw new Error('Parent object must be a single RelatedNodeInfo object');
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return relationship;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
get childNodes(): RelatedNodeInfo<T>[] | undefined {
|
|
70
|
+
const relationship = this.relationships[NodeRelationship.CHILD];
|
|
71
|
+
|
|
72
|
+
if (!Array.isArray(relationship)) {
|
|
73
|
+
throw new Error('Child object must be a an array of RelatedNodeInfo objects');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return relationship;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
abstract generateHash(): string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* TextNode is the default node type for text.
|
|
84
|
+
*/
|
|
85
|
+
export class TextNode<T extends Metadata = Metadata> extends BaseNode<T> {
|
|
86
|
+
text: string;
|
|
87
|
+
|
|
88
|
+
startCharIdx?: number;
|
|
89
|
+
endCharIdx?: number;
|
|
90
|
+
metadataSeparator: string;
|
|
91
|
+
|
|
92
|
+
constructor(init: TextNodeParams<T> = {}) {
|
|
93
|
+
super(init);
|
|
94
|
+
const { text, startCharIdx, endCharIdx, metadataSeparator } = init;
|
|
95
|
+
this.text = text ?? '';
|
|
96
|
+
if (startCharIdx) {
|
|
97
|
+
this.startCharIdx = startCharIdx;
|
|
98
|
+
}
|
|
99
|
+
if (endCharIdx) {
|
|
100
|
+
this.endCharIdx = endCharIdx;
|
|
101
|
+
}
|
|
102
|
+
this.metadataSeparator = metadataSeparator ?? '\n';
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Generate a hash of the text node.
|
|
107
|
+
* The ID is not part of the hash as it can change independent of content.
|
|
108
|
+
* @returns
|
|
109
|
+
*/
|
|
110
|
+
generateHash() {
|
|
111
|
+
const hashFunction = createSHA256();
|
|
112
|
+
hashFunction.update(`type=${this.type}`);
|
|
113
|
+
hashFunction.update(`startCharIdx=${this.startCharIdx} endCharIdx=${this.endCharIdx}`);
|
|
114
|
+
hashFunction.update(this.getContent());
|
|
115
|
+
return hashFunction.digest();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
get type() {
|
|
119
|
+
return ObjectType.TEXT;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
getContent(): string {
|
|
123
|
+
const metadataStr = this.getMetadataStr().trim();
|
|
124
|
+
return `${metadataStr}\n\n${this.text}`.trim();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
getMetadataStr(): string {
|
|
128
|
+
const usableMetadataKeys = new Set(Object.keys(this.metadata).sort());
|
|
129
|
+
|
|
130
|
+
return [...usableMetadataKeys].map(key => `${key}: ${this.metadata[key]}`).join(this.metadataSeparator);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
getNodeInfo() {
|
|
134
|
+
return { start: this.startCharIdx, end: this.endCharIdx };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
getText() {
|
|
138
|
+
return this.text;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* A document is just a special text node with a docId.
|
|
144
|
+
*/
|
|
145
|
+
export class Document<T extends Metadata = Metadata> extends TextNode<T> {
|
|
146
|
+
constructor(init?: TextNodeParams<T>) {
|
|
147
|
+
super(init);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
get type() {
|
|
151
|
+
return ObjectType.DOCUMENT;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function lazyInitHash(
|
|
156
|
+
value: ClassAccessorDecoratorTarget<BaseNode, string>,
|
|
157
|
+
_context: ClassAccessorDecoratorContext,
|
|
158
|
+
): ClassAccessorDecoratorResult<BaseNode, string> {
|
|
159
|
+
return {
|
|
160
|
+
get() {
|
|
161
|
+
const oldValue = value.get.call(this);
|
|
162
|
+
if (oldValue === '') {
|
|
163
|
+
const hash = this.generateHash();
|
|
164
|
+
value.set.call(this, hash);
|
|
165
|
+
}
|
|
166
|
+
return value.get.call(this);
|
|
167
|
+
},
|
|
168
|
+
set(newValue: string) {
|
|
169
|
+
value.set.call(this, newValue);
|
|
170
|
+
},
|
|
171
|
+
init(value: string): string {
|
|
172
|
+
return value;
|
|
173
|
+
},
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function createSHA256() {
|
|
178
|
+
const hash = createHash('sha256');
|
|
179
|
+
return {
|
|
180
|
+
update(data: string | Uint8Array): void {
|
|
181
|
+
hash.update(data);
|
|
182
|
+
},
|
|
183
|
+
digest() {
|
|
184
|
+
return hash.digest('base64');
|
|
185
|
+
},
|
|
186
|
+
};
|
|
187
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export enum NodeRelationship {
|
|
2
|
+
SOURCE = 'SOURCE',
|
|
3
|
+
PREVIOUS = 'PREVIOUS',
|
|
4
|
+
NEXT = 'NEXT',
|
|
5
|
+
PARENT = 'PARENT',
|
|
6
|
+
CHILD = 'CHILD',
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export enum ObjectType {
|
|
10
|
+
TEXT = 'TEXT',
|
|
11
|
+
IMAGE = 'IMAGE',
|
|
12
|
+
INDEX = 'INDEX',
|
|
13
|
+
DOCUMENT = 'DOCUMENT',
|
|
14
|
+
IMAGE_DOCUMENT = 'IMAGE_DOCUMENT',
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export type Metadata = Record<string, any>;
|
|
18
|
+
|
|
19
|
+
export interface RelatedNodeInfo<T extends Metadata = Metadata> {
|
|
20
|
+
nodeId: string;
|
|
21
|
+
nodeType?: ObjectType;
|
|
22
|
+
metadata: T;
|
|
23
|
+
hash?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type RelatedNodeType<T extends Metadata = Metadata> = RelatedNodeInfo<T> | RelatedNodeInfo<T>[];
|
|
27
|
+
|
|
28
|
+
export type BaseNodeParams<T extends Metadata = Metadata> = {
|
|
29
|
+
id_?: string | undefined;
|
|
30
|
+
metadata?: T | undefined;
|
|
31
|
+
relationships?: Partial<Record<NodeRelationship, RelatedNodeType<T>>> | undefined;
|
|
32
|
+
hash?: string | undefined;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export type TextNodeParams<T extends Metadata = Metadata> = BaseNodeParams<T> & {
|
|
36
|
+
text?: string | undefined;
|
|
37
|
+
startCharIdx?: number | undefined;
|
|
38
|
+
endCharIdx?: number | undefined;
|
|
39
|
+
metadataSeparator?: string | undefined;
|
|
40
|
+
};
|