@mastra/rag 1.0.6 → 1.0.7-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +25 -0
  3. package/dist/document/document.d.ts +10 -9
  4. package/dist/document/document.d.ts.map +1 -1
  5. package/dist/document/extractors/base.d.ts +1 -1
  6. package/dist/document/extractors/index.d.ts +5 -5
  7. package/dist/document/extractors/keywords.d.ts +4 -4
  8. package/dist/document/extractors/questions.d.ts +4 -4
  9. package/dist/document/extractors/summary.d.ts +4 -4
  10. package/dist/document/extractors/title.d.ts +4 -4
  11. package/dist/document/extractors/types.d.ts +1 -1
  12. package/dist/document/index.d.ts +2 -2
  13. package/dist/document/prompts/base.d.ts +1 -1
  14. package/dist/document/prompts/index.d.ts +3 -3
  15. package/dist/document/prompts/prompt.d.ts +1 -1
  16. package/dist/document/schema/index.d.ts +3 -3
  17. package/dist/document/schema/node.d.ts +2 -2
  18. package/dist/document/transformers/character.d.ts +6 -28
  19. package/dist/document/transformers/character.d.ts.map +1 -1
  20. package/dist/document/transformers/html.d.ts +9 -4
  21. package/dist/document/transformers/html.d.ts.map +1 -1
  22. package/dist/document/transformers/json.d.ts +5 -5
  23. package/dist/document/transformers/json.d.ts.map +1 -1
  24. package/dist/document/transformers/latex.d.ts +3 -9
  25. package/dist/document/transformers/latex.d.ts.map +1 -1
  26. package/dist/document/transformers/markdown.d.ts +4 -10
  27. package/dist/document/transformers/markdown.d.ts.map +1 -1
  28. package/dist/document/transformers/sentence.d.ts +31 -0
  29. package/dist/document/transformers/sentence.d.ts.map +1 -0
  30. package/dist/document/transformers/text.d.ts +5 -5
  31. package/dist/document/transformers/text.d.ts.map +1 -1
  32. package/dist/document/transformers/token.d.ts +5 -16
  33. package/dist/document/transformers/token.d.ts.map +1 -1
  34. package/dist/document/transformers/transformer.d.ts +1 -1
  35. package/dist/document/types.d.ts +86 -15
  36. package/dist/document/types.d.ts.map +1 -1
  37. package/dist/document/validation.d.ts +3 -0
  38. package/dist/document/validation.d.ts.map +1 -0
  39. package/dist/index.cjs +414 -80
  40. package/dist/index.cjs.map +1 -1
  41. package/dist/index.d.ts +8 -8
  42. package/dist/index.js +414 -80
  43. package/dist/index.js.map +1 -1
  44. package/dist/rerank/relevance/index.d.ts +3 -3
  45. package/dist/tools/document-chunker.d.ts +1 -1
  46. package/dist/tools/document-chunker.d.ts.map +1 -1
  47. package/dist/tools/graph-rag.d.ts +2 -2
  48. package/dist/tools/index.d.ts +3 -3
  49. package/dist/tools/types.d.ts +1 -1
  50. package/dist/tools/vector-query.d.ts +2 -2
  51. package/dist/utils/convert-sources.d.ts +2 -2
  52. package/dist/utils/index.d.ts +3 -3
  53. package/dist/utils/vector-search.d.ts +1 -1
  54. package/package.json +8 -7
  55. package/src/document/document.test.ts +294 -39
  56. package/src/document/document.ts +69 -41
  57. package/src/document/transformers/character.ts +15 -43
  58. package/src/document/transformers/html.ts +9 -9
  59. package/src/document/transformers/json.ts +8 -3
  60. package/src/document/transformers/latex.ts +3 -11
  61. package/src/document/transformers/markdown.ts +3 -11
  62. package/src/document/transformers/sentence.ts +314 -0
  63. package/src/document/transformers/text.ts +10 -10
  64. package/src/document/transformers/token.ts +6 -17
  65. package/src/document/types.ts +66 -15
  66. package/src/document/validation.ts +147 -0
  67. package/src/tools/document-chunker.ts +12 -8
  68. package/tsup.config.ts +2 -7
@@ -1,5 +1,5 @@
1
1
  import type { TiktokenEncoding, TiktokenModel } from 'js-tiktoken';
2
- import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors';
2
+ import type { TitleExtractorsArgs, SummaryExtractArgs, QuestionAnswerExtractArgs, KeywordExtractArgs } from './extractors/index.js';
3
3
  export declare enum Language {
4
4
  CPP = "cpp",
5
5
  GO = "go",
@@ -34,33 +34,104 @@ export type ExtractParams = {
34
34
  questions?: QuestionAnswerExtractArgs | boolean;
35
35
  keywords?: KeywordExtractArgs | boolean;
36
36
  };
37
- export type ChunkOptions = {
38
- headers?: [string, string][];
39
- returnEachLine?: boolean;
40
- sections?: [string, string][];
41
- separator?: string;
42
- separators?: string[];
43
- isSeparatorRegex?: boolean;
37
+ export type BaseChunkOptions = {
38
+ /**
39
+ * @deprecated Use `maxSize` instead. Will be removed in next major version.
40
+ */
44
41
  size?: number;
45
42
  maxSize?: number;
46
- minSize?: number;
47
43
  overlap?: number;
48
44
  lengthFunction?: (text: string) => number;
49
45
  keepSeparator?: boolean | 'start' | 'end';
50
46
  addStartIndex?: boolean;
51
47
  stripWhitespace?: boolean;
48
+ };
49
+ export type CharacterChunkOptions = BaseChunkOptions & {
50
+ separator?: string;
51
+ isSeparatorRegex?: boolean;
52
+ };
53
+ export type RecursiveChunkOptions = BaseChunkOptions & {
54
+ separators?: string[];
55
+ isSeparatorRegex?: boolean;
52
56
  language?: Language;
53
- ensureAscii?: boolean;
54
- convertLists?: boolean;
57
+ };
58
+ export type TokenChunkOptions = BaseChunkOptions & {
55
59
  encodingName?: TiktokenEncoding;
56
60
  modelName?: TiktokenModel;
57
61
  allowedSpecial?: Set<string> | 'all';
58
62
  disallowedSpecial?: Set<string> | 'all';
63
+ };
64
+ export type MarkdownChunkOptions = BaseChunkOptions & {
65
+ headers?: [string, string][];
66
+ returnEachLine?: boolean;
59
67
  stripHeaders?: boolean;
60
68
  };
61
- export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex';
62
- export interface ChunkParams extends ChunkOptions {
63
- strategy?: ChunkStrategy;
69
+ export type HTMLChunkOptions = BaseChunkOptions & ({
70
+ headers: [string, string][];
71
+ sections?: never;
72
+ returnEachLine?: boolean;
73
+ } | {
74
+ sections: [string, string][];
75
+ headers?: never;
76
+ }) & {
77
+ returnEachLine?: boolean;
78
+ };
79
+ export type JsonChunkOptions = BaseChunkOptions & {
80
+ minSize?: number;
81
+ ensureAscii?: boolean;
82
+ convertLists?: boolean;
83
+ };
84
+ export type LatexChunkOptions = BaseChunkOptions & {};
85
+ export type SentenceChunkOptions = BaseChunkOptions & {
86
+ maxSize: number;
87
+ minSize?: number;
88
+ targetSize?: number;
89
+ sentenceEnders?: string[];
90
+ fallbackToWords?: boolean;
91
+ fallbackToCharacters?: boolean;
92
+ };
93
+ export type StrategyOptions = {
94
+ recursive: RecursiveChunkOptions;
95
+ character: CharacterChunkOptions;
96
+ token: TokenChunkOptions;
97
+ markdown: MarkdownChunkOptions;
98
+ html: HTMLChunkOptions;
99
+ json: JsonChunkOptions;
100
+ latex: LatexChunkOptions;
101
+ sentence: SentenceChunkOptions;
102
+ };
103
+ export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex' | 'sentence';
104
+ export type ChunkParams = ({
105
+ strategy?: 'character';
106
+ } & CharacterChunkOptions & {
64
107
  extract?: ExtractParams;
65
- }
108
+ }) | ({
109
+ strategy: 'recursive';
110
+ } & RecursiveChunkOptions & {
111
+ extract?: ExtractParams;
112
+ }) | ({
113
+ strategy: 'token';
114
+ } & TokenChunkOptions & {
115
+ extract?: ExtractParams;
116
+ }) | ({
117
+ strategy: 'markdown';
118
+ } & MarkdownChunkOptions & {
119
+ extract?: ExtractParams;
120
+ }) | ({
121
+ strategy: 'html';
122
+ } & HTMLChunkOptions & {
123
+ extract?: ExtractParams;
124
+ }) | ({
125
+ strategy: 'json';
126
+ } & JsonChunkOptions & {
127
+ extract?: ExtractParams;
128
+ }) | ({
129
+ strategy: 'latex';
130
+ } & LatexChunkOptions & {
131
+ extract?: ExtractParams;
132
+ }) | ({
133
+ strategy: 'sentence';
134
+ } & SentenceChunkOptions & {
135
+ extract?: ExtractParams;
136
+ });
66
137
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,YAAY,GAAG;IACzB,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACxC,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,WAAW,GAAG,WAAW,GAAG,OAAO,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAEzG,MAAM,WAAW,WAAY,SAAQ,YAAY;IAC/C,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,WAAW,GAAG,WAAW,GAAG,OAAO,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,UAAU,CAAC;AAEtH,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ChunkStrategy } from './types.js';
2
+ export declare function validateChunkParams(strategy: ChunkStrategy, params: any): void;
3
+ //# sourceMappingURL=validation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/document/validation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AA2H7C,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI,CAsB9E"}