@mastra/rag 1.0.6-alpha.0 → 1.0.7-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +25 -0
- package/dist/document/document.d.ts +9 -8
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/transformers/character.d.ts +4 -26
- package/dist/document/transformers/character.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +8 -3
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/document/transformers/json.d.ts +4 -4
- package/dist/document/transformers/json.d.ts.map +1 -1
- package/dist/document/transformers/latex.d.ts +2 -8
- package/dist/document/transformers/latex.d.ts.map +1 -1
- package/dist/document/transformers/markdown.d.ts +2 -8
- package/dist/document/transformers/markdown.d.ts.map +1 -1
- package/dist/document/transformers/sentence.d.ts +31 -0
- package/dist/document/transformers/sentence.d.ts.map +1 -0
- package/dist/document/transformers/text.d.ts +3 -3
- package/dist/document/transformers/text.d.ts.map +1 -1
- package/dist/document/transformers/token.d.ts +4 -15
- package/dist/document/transformers/token.d.ts.map +1 -1
- package/dist/document/types.d.ts +85 -14
- package/dist/document/types.d.ts.map +1 -1
- package/dist/document/validation.d.ts +3 -0
- package/dist/document/validation.d.ts.map +1 -0
- package/dist/index.cjs +414 -80
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +414 -80
- package/dist/index.js.map +1 -1
- package/dist/tools/document-chunker.d.ts.map +1 -1
- package/package.json +5 -5
- package/src/document/document.test.ts +294 -39
- package/src/document/document.ts +69 -41
- package/src/document/transformers/character.ts +15 -43
- package/src/document/transformers/html.ts +9 -9
- package/src/document/transformers/json.ts +8 -3
- package/src/document/transformers/latex.ts +3 -11
- package/src/document/transformers/markdown.ts +3 -11
- package/src/document/transformers/sentence.ts +314 -0
- package/src/document/transformers/text.ts +10 -10
- package/src/document/transformers/token.ts +6 -17
- package/src/document/types.ts +66 -15
- package/src/document/validation.ts +147 -0
- package/src/tools/document-chunker.ts +12 -8
package/.turbo/turbo-build.log
CHANGED
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,30 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 1.0.7-alpha.0
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 351b36e: update evals and rag ai sdk package versions
|
|
8
|
+
- ccd519c: Add sentence chunking strategy and strategy-specific parameter validation for all existing strategies.
|
|
9
|
+
- Updated dependencies [ea0c5f2]
|
|
10
|
+
- Updated dependencies [b0e43c1]
|
|
11
|
+
- Updated dependencies [1fb812e]
|
|
12
|
+
- Updated dependencies [35c5798]
|
|
13
|
+
- @mastra/core@0.13.0-alpha.1
|
|
14
|
+
|
|
15
|
+
## 1.0.6
|
|
16
|
+
|
|
17
|
+
### Patch Changes
|
|
18
|
+
|
|
19
|
+
- 613eec3: dependencies updates:
|
|
20
|
+
- Updated dependency [`node-html-better-parser@^1.5.2` ↗︎](https://www.npmjs.com/package/node-html-better-parser/v/1.5.2) (from `^1.5.1`, in `dependencies`)
|
|
21
|
+
- Updated dependencies [33dcb07]
|
|
22
|
+
- Updated dependencies [d0d9500]
|
|
23
|
+
- Updated dependencies [d30b1a0]
|
|
24
|
+
- Updated dependencies [bff87f7]
|
|
25
|
+
- Updated dependencies [b4a8df0]
|
|
26
|
+
- @mastra/core@0.12.1
|
|
27
|
+
|
|
3
28
|
## 1.0.6-alpha.0
|
|
4
29
|
|
|
5
30
|
### Patch Changes
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Document as Chunk } from './schema';
|
|
2
|
-
import type {
|
|
2
|
+
import type { ChunkParams, ExtractParams, HTMLChunkOptions, RecursiveChunkOptions, CharacterChunkOptions, TokenChunkOptions, MarkdownChunkOptions, JsonChunkOptions, LatexChunkOptions, SentenceChunkOptions } from './types';
|
|
3
3
|
export declare class MDocument {
|
|
4
4
|
private chunks;
|
|
5
5
|
private type;
|
|
@@ -17,13 +17,14 @@ export declare class MDocument {
|
|
|
17
17
|
static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument;
|
|
18
18
|
private defaultStrategy;
|
|
19
19
|
private chunkBy;
|
|
20
|
-
chunkRecursive(options?:
|
|
21
|
-
chunkCharacter(options?:
|
|
22
|
-
chunkHTML(options?:
|
|
23
|
-
chunkJSON(options?:
|
|
24
|
-
chunkLatex(options?:
|
|
25
|
-
chunkToken(options?:
|
|
26
|
-
chunkMarkdown(options?:
|
|
20
|
+
chunkRecursive(options?: RecursiveChunkOptions): Promise<void>;
|
|
21
|
+
chunkCharacter(options?: CharacterChunkOptions): Promise<void>;
|
|
22
|
+
chunkHTML(options?: HTMLChunkOptions): Promise<void>;
|
|
23
|
+
chunkJSON(options?: JsonChunkOptions): Promise<void>;
|
|
24
|
+
chunkLatex(options?: LatexChunkOptions): Promise<void>;
|
|
25
|
+
chunkToken(options?: TokenChunkOptions): Promise<void>;
|
|
26
|
+
chunkMarkdown(options?: MarkdownChunkOptions): Promise<void>;
|
|
27
|
+
chunkSentence(options?: SentenceChunkOptions): Promise<void>;
|
|
27
28
|
chunk(params?: ChunkParams): Promise<Chunk[]>;
|
|
28
29
|
getDocs(): Chunk[];
|
|
29
30
|
getText(): string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAS3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;YAeT,OAAO;IAoBf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
|
|
@@ -1,21 +1,10 @@
|
|
|
1
1
|
import { Language } from '../types';
|
|
2
|
-
import type {
|
|
2
|
+
import type { BaseChunkOptions, CharacterChunkOptions, RecursiveChunkOptions } from '../types';
|
|
3
3
|
import { TextTransformer } from './text';
|
|
4
4
|
export declare class CharacterTransformer extends TextTransformer {
|
|
5
5
|
protected separator: string;
|
|
6
6
|
protected isSeparatorRegex: boolean;
|
|
7
|
-
constructor({ separator, isSeparatorRegex,
|
|
8
|
-
separator?: string;
|
|
9
|
-
isSeparatorRegex?: boolean;
|
|
10
|
-
options?: {
|
|
11
|
-
size?: number;
|
|
12
|
-
overlap?: number;
|
|
13
|
-
lengthFunction?: (text: string) => number;
|
|
14
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
15
|
-
addStartIndex?: boolean;
|
|
16
|
-
stripWhitespace?: boolean;
|
|
17
|
-
};
|
|
18
|
-
});
|
|
7
|
+
constructor({ separator, isSeparatorRegex, ...baseOptions }?: CharacterChunkOptions);
|
|
19
8
|
splitText({ text }: {
|
|
20
9
|
text: string;
|
|
21
10
|
}): string[];
|
|
@@ -24,23 +13,12 @@ export declare class CharacterTransformer extends TextTransformer {
|
|
|
24
13
|
export declare class RecursiveCharacterTransformer extends TextTransformer {
|
|
25
14
|
protected separators: string[];
|
|
26
15
|
protected isSeparatorRegex: boolean;
|
|
27
|
-
constructor({ separators, isSeparatorRegex,
|
|
28
|
-
separators?: string[];
|
|
29
|
-
isSeparatorRegex?: boolean;
|
|
30
|
-
options?: ChunkOptions;
|
|
31
|
-
});
|
|
16
|
+
constructor({ separators, isSeparatorRegex, language, ...baseOptions }?: RecursiveChunkOptions);
|
|
32
17
|
private _splitText;
|
|
33
18
|
splitText({ text }: {
|
|
34
19
|
text: string;
|
|
35
20
|
}): string[];
|
|
36
|
-
static fromLanguage(language: Language, options?:
|
|
37
|
-
size?: number;
|
|
38
|
-
chunkOverlap?: number;
|
|
39
|
-
lengthFunction?: (text: string) => number;
|
|
40
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
41
|
-
addStartIndex?: boolean;
|
|
42
|
-
stripWhitespace?: boolean;
|
|
43
|
-
}): RecursiveCharacterTransformer;
|
|
21
|
+
static fromLanguage(language: Language, options?: BaseChunkOptions): RecursiveCharacterTransformer;
|
|
44
22
|
static getSeparatorsForLanguage(language: Language): string[];
|
|
45
23
|
}
|
|
46
24
|
//# sourceMappingURL=character.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAE/F,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AA+CzC,qBAAa,oBAAqB,SAAQ,eAAe;IACvD,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EAAE,SAAkB,EAAE,gBAAwB,EAAE,GAAG,WAAW,EAAE,GAAE,qBAA0B;IAMxG,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,OAAO,CAAC,YAAY;CAyBrB;AAED,qBAAa,6BAA8B,SAAQ,eAAe;IAChE,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAC/B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EAAE,UAAU,EAAE,gBAAwB,EAAE,QAAQ,EAAE,GAAG,WAAW,EAAE,GAAE,qBAA0B;IAM1G,OAAO,CAAC,UAAU;IAuDlB,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAI/C,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,GAAE,gBAAqB,GAAG,6BAA6B;IAUtG,MAAM,CAAC,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,EAAE;CA+E9D"}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { Document } from '../schema';
|
|
2
|
+
import type { HTMLChunkOptions } from '../types';
|
|
2
3
|
export declare class HTMLHeaderTransformer {
|
|
3
4
|
private headersToSplitOn;
|
|
4
5
|
private returnEachElement;
|
|
5
|
-
constructor(
|
|
6
|
+
constructor(options: HTMLChunkOptions & {
|
|
7
|
+
headers: [string, string][];
|
|
8
|
+
});
|
|
6
9
|
splitText({ text }: {
|
|
7
10
|
text: string;
|
|
8
11
|
}): Document[];
|
|
@@ -14,8 +17,10 @@ export declare class HTMLHeaderTransformer {
|
|
|
14
17
|
}
|
|
15
18
|
export declare class HTMLSectionTransformer {
|
|
16
19
|
private headersToSplitOn;
|
|
17
|
-
private
|
|
18
|
-
constructor(
|
|
20
|
+
private textSplitter;
|
|
21
|
+
constructor(options: HTMLChunkOptions & {
|
|
22
|
+
sections: [string, string][];
|
|
23
|
+
});
|
|
19
24
|
splitText(text: string): Document[];
|
|
20
25
|
private getXPath;
|
|
21
26
|
private splitHtmlByHeaders;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,kBAAkB;IA8CpB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAahE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { Document } from '../schema';
|
|
2
|
+
import type { JsonChunkOptions } from '../types';
|
|
2
3
|
export declare class RecursiveJsonTransformer {
|
|
3
4
|
private maxSize;
|
|
4
5
|
private minSize;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
});
|
|
6
|
+
private ensureAscii;
|
|
7
|
+
private convertLists;
|
|
8
|
+
constructor({ maxSize, minSize, ensureAscii, convertLists }: JsonChunkOptions);
|
|
9
9
|
private static jsonSize;
|
|
10
10
|
/**
|
|
11
11
|
* Transform JSON data while handling circular references
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,wBAAwB;IACnC,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,YAAY,CAAU;gBAElB,EAAE,OAAc,EAAE,OAAO,EAAE,WAAmB,EAAE,YAAmB,EAAE,EAAE,gBAAgB;IAOnG,OAAO,CAAC,MAAM,CAAC,QAAQ;IAoCvB;;OAEG;IACI,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAmChE;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAS5B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAU/B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA0C5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAMnB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;;OAGG;IACH,OAAO,CAAC,WAAW;IA8DnB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAmE1B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAuBvB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAuDjB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,GACrB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;IAYzB;;;OAGG;IACH,OAAO,CAAC,cAAc;IAiBtB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,EACpB,WAAkB,GACnB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB,GAAG,MAAM,EAAE;IAoBZ;;OAEG;IACH,eAAe,CAAC,EACd,KAAK,EACL,YAAoB,EACpB,WAAkB,EAClB,SAAS,GACV,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;KACnC,GAAG,QAAQ,EAAE;IAoBd,kBAAkB,CAAC,EACjB,WAAW,EACX,SAAS,EACT,YAAY,GACb,EAAE;QACD,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,SAAS,EAAE,QAAQ,EAAE,CAAC;KACvB,GAAG,QAAQ,EAAE;CAiBf"}
|
|
@@ -1,12 +1,6 @@
|
|
|
1
|
+
import type { BaseChunkOptions } from '../types';
|
|
1
2
|
import { RecursiveCharacterTransformer } from './character';
|
|
2
3
|
export declare class LatexTransformer extends RecursiveCharacterTransformer {
|
|
3
|
-
constructor(options?:
|
|
4
|
-
size?: number;
|
|
5
|
-
overlap?: number;
|
|
6
|
-
lengthFunction?: (text: string) => number;
|
|
7
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
8
|
-
addStartIndex?: boolean;
|
|
9
|
-
stripWhitespace?: boolean;
|
|
10
|
-
});
|
|
4
|
+
constructor(options?: BaseChunkOptions);
|
|
11
5
|
}
|
|
12
6
|
//# sourceMappingURL=latex.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAE5D,qBAAa,gBAAiB,SAAQ,6BAA6B;gBACrD,OAAO,GAAE,gBAAqB;CAI3C"}
|
|
@@ -1,14 +1,8 @@
|
|
|
1
1
|
import { Document } from '../schema';
|
|
2
|
+
import type { BaseChunkOptions } from '../types';
|
|
2
3
|
import { RecursiveCharacterTransformer } from './character';
|
|
3
4
|
export declare class MarkdownTransformer extends RecursiveCharacterTransformer {
|
|
4
|
-
constructor(options?:
|
|
5
|
-
chunkSize?: number;
|
|
6
|
-
chunkOverlap?: number;
|
|
7
|
-
lengthFunction?: (text: string) => number;
|
|
8
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
9
|
-
addStartIndex?: boolean;
|
|
10
|
-
stripWhitespace?: boolean;
|
|
11
|
-
});
|
|
5
|
+
constructor(options?: BaseChunkOptions);
|
|
12
6
|
}
|
|
13
7
|
export declare class MarkdownHeaderTransformer {
|
|
14
8
|
private headersToSplitOn;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAqHjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { SentenceChunkOptions } from '../types';
|
|
2
|
+
import { TextTransformer } from './text';
|
|
3
|
+
export declare class SentenceTransformer extends TextTransformer {
|
|
4
|
+
protected minSize: number;
|
|
5
|
+
protected maxSize: number;
|
|
6
|
+
protected targetSize: number;
|
|
7
|
+
protected sentenceEnders: string[];
|
|
8
|
+
protected fallbackToWords: boolean;
|
|
9
|
+
protected fallbackToCharacters: boolean;
|
|
10
|
+
protected keepSeparator: boolean | 'start' | 'end';
|
|
11
|
+
constructor(options: SentenceChunkOptions);
|
|
12
|
+
private detectSentenceBoundaries;
|
|
13
|
+
private isRealSentenceBoundary;
|
|
14
|
+
private isCommonAbbreviation;
|
|
15
|
+
/**
|
|
16
|
+
* Group sentences into chunks with integrated overlap processing
|
|
17
|
+
*/
|
|
18
|
+
private groupSentencesIntoChunks;
|
|
19
|
+
/**
|
|
20
|
+
* Handle oversized sentences with fallback strategies
|
|
21
|
+
*/
|
|
22
|
+
private handleOversizedSentence;
|
|
23
|
+
private splitSentenceIntoWords;
|
|
24
|
+
private splitSentenceIntoCharacters;
|
|
25
|
+
private calculateSentenceOverlap;
|
|
26
|
+
private calculateChunkSize;
|
|
27
|
+
splitText({ text }: {
|
|
28
|
+
text: string;
|
|
29
|
+
}): string[];
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=sentence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;gBAEvC,OAAO,EAAE,oBAAoB;IAuBzC,OAAO,CAAC,wBAAwB;IA+BhC,OAAO,CAAC,sBAAsB;IAqB9B,OAAO,CAAC,oBAAoB;IA8B5B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsDhC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAqB/B,OAAO,CAAC,sBAAsB;IAmC9B,OAAO,CAAC,2BAA2B;IAsBnC,OAAO,CAAC,wBAAwB;IA4BhC,OAAO,CAAC,kBAAkB;IAqB1B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;CAShD"}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { Document } from '../schema';
|
|
2
|
-
import type {
|
|
2
|
+
import type { BaseChunkOptions } from '../types';
|
|
3
3
|
import type { Transformer } from './transformer';
|
|
4
4
|
export declare abstract class TextTransformer implements Transformer {
|
|
5
|
-
protected
|
|
5
|
+
protected maxSize: number;
|
|
6
6
|
protected overlap: number;
|
|
7
7
|
protected lengthFunction: (text: string) => number;
|
|
8
8
|
protected keepSeparator: boolean | 'start' | 'end';
|
|
9
9
|
protected addStartIndex: boolean;
|
|
10
10
|
protected stripWhitespace: boolean;
|
|
11
|
-
constructor({
|
|
11
|
+
constructor({ maxSize, overlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: BaseChunkOptions);
|
|
12
12
|
setAddStartIndex(value: boolean): void;
|
|
13
13
|
abstract splitText({ text }: {
|
|
14
14
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,OAAc,EACd,OAAa,EACb,cAA8C,EAC9C,aAAqB,EACrB,aAAqB,EACrB,eAAsB,GACvB,EAAE,gBAAgB;IAYnB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { TiktokenModel, TiktokenEncoding } from 'js-tiktoken';
|
|
2
|
+
import type { TokenChunkOptions } from '../types';
|
|
2
3
|
import { TextTransformer } from './text';
|
|
3
4
|
interface Tokenizer {
|
|
4
5
|
overlap: number;
|
|
@@ -15,18 +16,11 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
15
16
|
private allowedSpecial;
|
|
16
17
|
private disallowedSpecial;
|
|
17
18
|
constructor({ encodingName, modelName, allowedSpecial, disallowedSpecial, options, }: {
|
|
18
|
-
encodingName
|
|
19
|
+
encodingName?: TiktokenEncoding;
|
|
19
20
|
modelName?: TiktokenModel;
|
|
20
21
|
allowedSpecial?: Set<string> | 'all';
|
|
21
22
|
disallowedSpecial?: Set<string> | 'all';
|
|
22
|
-
options:
|
|
23
|
-
size?: number;
|
|
24
|
-
overlap?: number;
|
|
25
|
-
lengthFunction?: (text: string) => number;
|
|
26
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
27
|
-
addStartIndex?: boolean;
|
|
28
|
-
stripWhitespace?: boolean;
|
|
29
|
-
};
|
|
23
|
+
options: TokenChunkOptions;
|
|
30
24
|
});
|
|
31
25
|
splitText({ text }: {
|
|
32
26
|
text: string;
|
|
@@ -34,12 +28,7 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
34
28
|
static fromTikToken({ encodingName, modelName, options, }: {
|
|
35
29
|
encodingName?: TiktokenEncoding;
|
|
36
30
|
modelName?: TiktokenModel;
|
|
37
|
-
options?:
|
|
38
|
-
size?: number;
|
|
39
|
-
overlap?: number;
|
|
40
|
-
allowedSpecial?: Set<string> | 'all';
|
|
41
|
-
disallowedSpecial?: Set<string> | 'all';
|
|
42
|
-
};
|
|
31
|
+
options?: TokenChunkOptions;
|
|
43
32
|
}): TokenTransformer;
|
|
44
33
|
}
|
|
45
34
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;AAE7E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,cAAc,CAAsB;IAC5C,OAAO,CAAC,iBAAiB,CAAsB;gBAEnC,EACV,YAA4B,EAC5B,SAAS,EACT,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE,iBAAiB,CAAC;KAC5B;IAaD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IA0B/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE,iBAAiB,CAAC;KAC7B,GAAG,gBAAgB;CAuCrB"}
|
package/dist/document/types.d.ts
CHANGED
|
@@ -34,33 +34,104 @@ export type ExtractParams = {
|
|
|
34
34
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
35
35
|
keywords?: KeywordExtractArgs | boolean;
|
|
36
36
|
};
|
|
37
|
-
export type
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
separator?: string;
|
|
42
|
-
separators?: string[];
|
|
43
|
-
isSeparatorRegex?: boolean;
|
|
37
|
+
export type BaseChunkOptions = {
|
|
38
|
+
/**
|
|
39
|
+
* @deprecated Use `maxSize` instead. Will be removed in next major version.
|
|
40
|
+
*/
|
|
44
41
|
size?: number;
|
|
45
42
|
maxSize?: number;
|
|
46
|
-
minSize?: number;
|
|
47
43
|
overlap?: number;
|
|
48
44
|
lengthFunction?: (text: string) => number;
|
|
49
45
|
keepSeparator?: boolean | 'start' | 'end';
|
|
50
46
|
addStartIndex?: boolean;
|
|
51
47
|
stripWhitespace?: boolean;
|
|
48
|
+
};
|
|
49
|
+
export type CharacterChunkOptions = BaseChunkOptions & {
|
|
50
|
+
separator?: string;
|
|
51
|
+
isSeparatorRegex?: boolean;
|
|
52
|
+
};
|
|
53
|
+
export type RecursiveChunkOptions = BaseChunkOptions & {
|
|
54
|
+
separators?: string[];
|
|
55
|
+
isSeparatorRegex?: boolean;
|
|
52
56
|
language?: Language;
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
};
|
|
58
|
+
export type TokenChunkOptions = BaseChunkOptions & {
|
|
55
59
|
encodingName?: TiktokenEncoding;
|
|
56
60
|
modelName?: TiktokenModel;
|
|
57
61
|
allowedSpecial?: Set<string> | 'all';
|
|
58
62
|
disallowedSpecial?: Set<string> | 'all';
|
|
63
|
+
};
|
|
64
|
+
export type MarkdownChunkOptions = BaseChunkOptions & {
|
|
65
|
+
headers?: [string, string][];
|
|
66
|
+
returnEachLine?: boolean;
|
|
59
67
|
stripHeaders?: boolean;
|
|
60
68
|
};
|
|
61
|
-
export type
|
|
62
|
-
|
|
63
|
-
|
|
69
|
+
export type HTMLChunkOptions = BaseChunkOptions & ({
|
|
70
|
+
headers: [string, string][];
|
|
71
|
+
sections?: never;
|
|
72
|
+
returnEachLine?: boolean;
|
|
73
|
+
} | {
|
|
74
|
+
sections: [string, string][];
|
|
75
|
+
headers?: never;
|
|
76
|
+
}) & {
|
|
77
|
+
returnEachLine?: boolean;
|
|
78
|
+
};
|
|
79
|
+
export type JsonChunkOptions = BaseChunkOptions & {
|
|
80
|
+
minSize?: number;
|
|
81
|
+
ensureAscii?: boolean;
|
|
82
|
+
convertLists?: boolean;
|
|
83
|
+
};
|
|
84
|
+
export type LatexChunkOptions = BaseChunkOptions & {};
|
|
85
|
+
export type SentenceChunkOptions = BaseChunkOptions & {
|
|
86
|
+
maxSize: number;
|
|
87
|
+
minSize?: number;
|
|
88
|
+
targetSize?: number;
|
|
89
|
+
sentenceEnders?: string[];
|
|
90
|
+
fallbackToWords?: boolean;
|
|
91
|
+
fallbackToCharacters?: boolean;
|
|
92
|
+
};
|
|
93
|
+
export type StrategyOptions = {
|
|
94
|
+
recursive: RecursiveChunkOptions;
|
|
95
|
+
character: CharacterChunkOptions;
|
|
96
|
+
token: TokenChunkOptions;
|
|
97
|
+
markdown: MarkdownChunkOptions;
|
|
98
|
+
html: HTMLChunkOptions;
|
|
99
|
+
json: JsonChunkOptions;
|
|
100
|
+
latex: LatexChunkOptions;
|
|
101
|
+
sentence: SentenceChunkOptions;
|
|
102
|
+
};
|
|
103
|
+
export type ChunkStrategy = 'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex' | 'sentence';
|
|
104
|
+
export type ChunkParams = ({
|
|
105
|
+
strategy?: 'character';
|
|
106
|
+
} & CharacterChunkOptions & {
|
|
64
107
|
extract?: ExtractParams;
|
|
65
|
-
}
|
|
108
|
+
}) | ({
|
|
109
|
+
strategy: 'recursive';
|
|
110
|
+
} & RecursiveChunkOptions & {
|
|
111
|
+
extract?: ExtractParams;
|
|
112
|
+
}) | ({
|
|
113
|
+
strategy: 'token';
|
|
114
|
+
} & TokenChunkOptions & {
|
|
115
|
+
extract?: ExtractParams;
|
|
116
|
+
}) | ({
|
|
117
|
+
strategy: 'markdown';
|
|
118
|
+
} & MarkdownChunkOptions & {
|
|
119
|
+
extract?: ExtractParams;
|
|
120
|
+
}) | ({
|
|
121
|
+
strategy: 'html';
|
|
122
|
+
} & HTMLChunkOptions & {
|
|
123
|
+
extract?: ExtractParams;
|
|
124
|
+
}) | ({
|
|
125
|
+
strategy: 'json';
|
|
126
|
+
} & JsonChunkOptions & {
|
|
127
|
+
extract?: ExtractParams;
|
|
128
|
+
}) | ({
|
|
129
|
+
strategy: 'latex';
|
|
130
|
+
} & LatexChunkOptions & {
|
|
131
|
+
extract?: ExtractParams;
|
|
132
|
+
}) | ({
|
|
133
|
+
strategy: 'sentence';
|
|
134
|
+
} & SentenceChunkOptions & {
|
|
135
|
+
extract?: ExtractParams;
|
|
136
|
+
});
|
|
66
137
|
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/document/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,KAAK,EACV,mBAAmB,EACnB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,oBAAY,QAAQ;IAClB,GAAG,QAAQ;IACX,EAAE,OAAO;IACT,IAAI,SAAS;IACb,MAAM,WAAW;IACjB,EAAE,OAAO;IACT,EAAE,OAAO;IACT,GAAG,QAAQ;IACX,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,IAAI,SAAS;IACb,KAAK,UAAU;IACf,KAAK,UAAU;IACf,QAAQ,aAAa;IACrB,KAAK,UAAU;IACf,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,KAAK,UAAU;IACf,CAAC,MAAM;IACP,GAAG,QAAQ;IACX,IAAI,SAAS;IACb,OAAO,YAAY;IACnB,MAAM,WAAW;IACjB,UAAU,eAAe;CAC1B;AAED,MAAM,MAAM,aAAa,GAAG;IAC1B,KAAK,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC;IACtC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;IACvC,SAAS,CAAC,EAAE,yBAAyB,GAAG,OAAO,CAAC;IAChD,QAAQ,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG;IAC7B;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IAC1C,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,CAAC;AAEF,MAAM,MAAM,qBAAqB,GAAG,gBAAgB,GAAG;IACrD,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG;IACjD,YAAY,CAAC,EAAE,gBAAgB,CAAC;IAChC,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;IACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;CACzC,CAAC;AAEF,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAC7C,CACI;IAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,QAAQ,CAAC,EAAE,KAAK,CAAC;IAAC,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,GAC3E;IAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,KAAK,CAAA;CAAE,CACpD,GAAG;IAAE,cAAc,CAAC,EAAE,OAAO,CAAA;CAAE,CAAC;AAEnC,MAAM,MAAM,gBAAgB,GAAG,gBAAgB,GAAG;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB,CAAC;AAEF,MAAM,MAAM,iBAAiB,GAAG,gBAAgB,GAAG,EAAE,CAAC;AAEtD,MAAM,MAAM,oBAAoB,GAAG,gBAAgB,GAAG;IACpD,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,oBAAoB,CAAC,EAAE,OAAO,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,SAAS,EAAE,qBAAqB,CAAC;IACjC,SAAS,EAAE,qBAAqB,CAAC;IACjC,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,IAAI,EAAE,gBAAgB,CAAC;IACvB,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;IACzB,QAAQ,EAAE,oBAAoB,CAAC;CAChC,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG,WAAW,GAAG,WAAW,GAAG,OAAO,GAAG,UAAU,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,UAAU,CAAC;AAEtH,MAAM,MAAM,WAAW,GACnB,CAAC;IAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAClF,CAAC;IAAE,QAAQ,EAAE,WAAW,CAAA;CAAE,GAAG,qBAAqB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACjF,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GAC/E,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,MAAM,CAAA;CAAE,GAAG,gBAAgB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACvE,CAAC;IAAE,QAAQ,EAAE,OAAO,CAAA;CAAE,GAAG,iBAAiB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,GACzE,CAAC;IAAE,QAAQ,EAAE,UAAU,CAAA;CAAE,GAAG,oBAAoB,GAAG;IAAE,OAAO,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validation.d.ts","sourceRoot":"","sources":["../../src/document/validation.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AA2H7C,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,aAAa,EAAE,MAAM,EAAE,GAAG,GAAG,IAAI,CAsB9E"}
|