@mastra/rag 1.0.6 → 1.0.7-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +25 -0
- package/dist/document/document.d.ts +10 -9
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/base.d.ts +1 -1
- package/dist/document/extractors/index.d.ts +5 -5
- package/dist/document/extractors/keywords.d.ts +4 -4
- package/dist/document/extractors/questions.d.ts +4 -4
- package/dist/document/extractors/summary.d.ts +4 -4
- package/dist/document/extractors/title.d.ts +4 -4
- package/dist/document/extractors/types.d.ts +1 -1
- package/dist/document/index.d.ts +2 -2
- package/dist/document/prompts/base.d.ts +1 -1
- package/dist/document/prompts/index.d.ts +3 -3
- package/dist/document/prompts/prompt.d.ts +1 -1
- package/dist/document/schema/index.d.ts +3 -3
- package/dist/document/schema/node.d.ts +2 -2
- package/dist/document/transformers/character.d.ts +6 -28
- package/dist/document/transformers/character.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +9 -4
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/document/transformers/json.d.ts +5 -5
- package/dist/document/transformers/json.d.ts.map +1 -1
- package/dist/document/transformers/latex.d.ts +3 -9
- package/dist/document/transformers/latex.d.ts.map +1 -1
- package/dist/document/transformers/markdown.d.ts +4 -10
- package/dist/document/transformers/markdown.d.ts.map +1 -1
- package/dist/document/transformers/sentence.d.ts +31 -0
- package/dist/document/transformers/sentence.d.ts.map +1 -0
- package/dist/document/transformers/text.d.ts +5 -5
- package/dist/document/transformers/text.d.ts.map +1 -1
- package/dist/document/transformers/token.d.ts +5 -16
- package/dist/document/transformers/token.d.ts.map +1 -1
- package/dist/document/transformers/transformer.d.ts +1 -1
- package/dist/document/types.d.ts +86 -15
- package/dist/document/types.d.ts.map +1 -1
- package/dist/document/validation.d.ts +3 -0
- package/dist/document/validation.d.ts.map +1 -0
- package/dist/index.cjs +414 -80
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +8 -8
- package/dist/index.js +414 -80
- package/dist/index.js.map +1 -1
- package/dist/rerank/relevance/index.d.ts +3 -3
- package/dist/tools/document-chunker.d.ts +1 -1
- package/dist/tools/document-chunker.d.ts.map +1 -1
- package/dist/tools/graph-rag.d.ts +2 -2
- package/dist/tools/index.d.ts +3 -3
- package/dist/tools/types.d.ts +1 -1
- package/dist/tools/vector-query.d.ts +2 -2
- package/dist/utils/convert-sources.d.ts +2 -2
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/vector-search.d.ts +1 -1
- package/package.json +8 -7
- package/src/document/document.test.ts +294 -39
- package/src/document/document.ts +69 -41
- package/src/document/transformers/character.ts +15 -43
- package/src/document/transformers/html.ts +9 -9
- package/src/document/transformers/json.ts +8 -3
- package/src/document/transformers/latex.ts +3 -11
- package/src/document/transformers/markdown.ts +3 -11
- package/src/document/transformers/sentence.ts +314 -0
- package/src/document/transformers/text.ts +10 -10
- package/src/document/transformers/token.ts +6 -17
- package/src/document/types.ts +66 -15
- package/src/document/validation.ts +147 -0
- package/src/tools/document-chunker.ts +12 -8
- package/tsup.config.ts +2 -7
package/dist/document/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
|
|
2
|
-
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors';
|
|
2
|
+
import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors/index.js';
|
|
3
3
|
export declare enum Language {
|
|
4
4
|
CPP = "cpp",
|
|
5
5
|
GO = "go",
|
|
@@ -34,33 +34,104 @@ export type ExtractParams = {
|
|
|
34
34
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
35
35
|
keywords?: KeywordExtractArgs | boolean;
|
|
36
36
|
};
|
|
37
|
-
export type
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
separator?: string;
|
|
42
|
-
separators?: string[];
|
|
43
|
-
isSeparatorRegex?: boolean;
|
|
37
|
+
export type BaseChunkOptions = {
|
|
38
|
+
/**
|
|
39
|
+
* @deprecated Use `maxSize` instead. Will be removed in next major version.
|
|
40
|
+
*/
|
|
44
41
|
size?: number;
|
|
45
42
|
maxSize?: number;
|
|
46
|
-
minSize?: number;
|
|
47
43
|
overlap?: number;
|
|
48
44
|
lengthFunction?: (text: string) => number;
|
|
49
45
|
keepSeparator?: boolean | 'start' | 'end';
|
|
50
46
|
addStartIndex?: boolean;
|
|
51
47
|
stripWhitespace?: boolean;
|
|
48
|
+
};
|
|
49
|
+
export type CharacterChunkOptions = BaseChunkOptions & {
|
|
50
|
+
separator?: string;
|
|
51
|
+
isSeparatorRegex?: boolean;
|
|
52
|
+
};
|
|
53
|
+
export type RecursiveChunkOptions = BaseChunkOptions & {
|
|
54
|
+
separators?: string[];
|
|
55
|
+
isSeparatorRegex?: boolean;
|
|
52
56
|
language?: Language;
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
};
|
|
58
|
+
export type TokenChunkOptions = BaseChunkOptions & {
|
|
55
59
|
encodingName?: TiktokenEncoding;
|
|
56
60
|
modelName?: TiktokenModel;
|
|
57
61
|
allowedSpecial?: Set<string> | 'all';
|
|
58
62
|
disallowedSpecial?: Set<string> | 'all';
|
|
63
|
+
};
|
|
64
|
+
export type MarkdownChunkOptions = BaseChunkOptions & {
|
|
65
|
+
headers?: [string, string][];
|
|
66
|
+
returnEachLine?: boolean;
|
|
59
67
|
stripHeaders?: boolean;
|
|
60
68
|
};
|
|
61
|
-
export type
|
|
62
|
-
|
|
63
|
-
|
|
69
|
+
export type HTMLChunkOptions = BaseChunkOptions & ({
|
|
70
|
+
headers: [string, string][];
|
|
71
|
+
sections?: never;
|
|
72
|
+
returnEachLine?: boolean;
|
|
73
|
+
} | {
|
|
74
|
+
sections: [string, string][];
|
|
75
|
+
headers?: never;
|
|
76
|
+
}) & {
|
|
77
|
+
returnEachLine?: boolean;
|
|
78
|
+
};
|
|
79
|
+
export type JsonChunkOptions = BaseChunkOptions & {
|
|
80
|
+
minSize?: number;
|
|
81
|
+
ensureAscii?: boolean;
|
|
82
|
+
convertLists?: boolean;
|
|
83
|
+
};
|
|
84
|
+
export type LatexChunkOptions = BaseChunkOptions & {};
|
|
85
|
+
export type SentenceChunkOptions = BaseChunkOptions & {
|
|
86
|
+
maxSize: number;
|
|
87
|
+
minSize?: number;
|
|
88
|
+
targetSize?: number;
|
|
89
|
+
sentenceEnders?: string[];
|
|
90
|
+
fallbackToWords?: boolean;
|
|
91
|
+
fallbackToCharacters?: boolean;
|
|
92
|
+
};
|
|
93
|
+
export type StrategyOptions = {
|
|
94
|
+
recursive: RecursiveChunkOptions;
|
|
95
|
+
character: CharacterChunkOptions;
|
|
96
|
+
token: TokenChunkOptions;
|
|
97
|
+
markdown: MarkdownChunkOptions;
|
|
98
|
+
html: HTMLChunkOptions;
|
|
99
|
+
json: JsonChunkOptions;
|
|
100
|
+
latex: LatexChunkOptions;
|
|
101
|
+
sentence: SentenceChunkOptions;
|
|
102
|
+
};
|
|
103
|
+
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex' | 'sentence';
|
|
104
|
+
export type ChunkParams = ({
|
|
105
|
+
strategy?: 'character';
|
|
106
|
+
} & CharacterChunkOptions & {
|
|
64
107
|
extract?: ExtractParams;
|
|
65
|
-
}
|
|
108
|
+
}) | ({
|
|
109
|
+
strategy: 'recursive';
|
|
110
|
+
} & RecursiveChunkOptions & {
|
|
111
|
+
extract?: ExtractParams;
|
|
112
|
+
}) | ({
|
|
113
|
+
strategy: 'token';
|
|
114
|
+
} & TokenChunkOptions & {
|
|
115
|
+
extract?: ExtractParams;
|
|
116
|
+
}) | ({
|
|
117
|
+
strategy: 'markdown';
|
|
118
|
+
} & MarkdownChunkOptions & {
|
|
119
|
+
extract?: ExtractParams;
|
|
120
|
+
}) | ({
|
|
121
|
+
strategy: 'html';
|
|
122
|
+
} & HTMLChunkOptions & {
|
|
123
|
+
extract?: ExtractParams;
|
|
124
|
+
}) | ({
|
|
125
|
+
strategy: 'json';
|
|
126
|
+
} & JsonChunkOptions & {
|
|
127
|
+
extract?: ExtractParams;
|
|
128
|
+
}) | ({
|
|
129
|
+
strategy: 'latex';
|
|
130
|
+
} & LatexChunkOptions & {
|
|
131
|
+
extract?: ExtractParams;
|
|
132
|
+
}) | ({
|
|
133
|
+
strategy: 'sentence';
|
|
134
|
+
} & SentenceChunkOptions & {
|
|
135
|
+
extract?: ExtractParams;
|
|
136
|
+
});
|
|
66
137
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,WAAW,GAAG,WAAW,GAAG,OAAO,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,UAAU,CAAC;AAEtH,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/document/validation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AA2H7C,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI,CAsB9E"}
|