@mastra/rag 1.0.6 → 1.0.7-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +25 -0
- package/dist/document/document.d.ts +10 -9
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/extractors/base.d.ts +1 -1
- package/dist/document/extractors/index.d.ts +5 -5
- package/dist/document/extractors/keywords.d.ts +4 -4
- package/dist/document/extractors/questions.d.ts +4 -4
- package/dist/document/extractors/summary.d.ts +4 -4
- package/dist/document/extractors/title.d.ts +4 -4
- package/dist/document/extractors/types.d.ts +1 -1
- package/dist/document/index.d.ts +2 -2
- package/dist/document/prompts/base.d.ts +1 -1
- package/dist/document/prompts/index.d.ts +3 -3
- package/dist/document/prompts/prompt.d.ts +1 -1
- package/dist/document/schema/index.d.ts +3 -3
- package/dist/document/schema/node.d.ts +2 -2
- package/dist/document/transformers/character.d.ts +6 -28
- package/dist/document/transformers/character.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +9 -4
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/document/transformers/json.d.ts +5 -5
- package/dist/document/transformers/json.d.ts.map +1 -1
- package/dist/document/transformers/latex.d.ts +3 -9
- package/dist/document/transformers/latex.d.ts.map +1 -1
- package/dist/document/transformers/markdown.d.ts +4 -10
- package/dist/document/transformers/markdown.d.ts.map +1 -1
- package/dist/document/transformers/sentence.d.ts +31 -0
- package/dist/document/transformers/sentence.d.ts.map +1 -0
- package/dist/document/transformers/text.d.ts +5 -5
- package/dist/document/transformers/text.d.ts.map +1 -1
- package/dist/document/transformers/token.d.ts +5 -16
- package/dist/document/transformers/token.d.ts.map +1 -1
- package/dist/document/transformers/transformer.d.ts +1 -1
- package/dist/document/types.d.ts +86 -15
- package/dist/document/types.d.ts.map +1 -1
- package/dist/document/validation.d.ts +3 -0
- package/dist/document/validation.d.ts.map +1 -0
- package/dist/index.cjs +414 -80
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +8 -8
- package/dist/index.js +414 -80
- package/dist/index.js.map +1 -1
- package/dist/rerank/relevance/index.d.ts +3 -3
- package/dist/tools/document-chunker.d.ts +1 -1
- package/dist/tools/document-chunker.d.ts.map +1 -1
- package/dist/tools/graph-rag.d.ts +2 -2
- package/dist/tools/index.d.ts +3 -3
- package/dist/tools/types.d.ts +1 -1
- package/dist/tools/vector-query.d.ts +2 -2
- package/dist/utils/convert-sources.d.ts +2 -2
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/vector-search.d.ts +1 -1
- package/package.json +8 -7
- package/src/document/document.test.ts +294 -39
- package/src/document/document.ts +69 -41
- package/src/document/transformers/character.ts +15 -43
- package/src/document/transformers/html.ts +9 -9
- package/src/document/transformers/json.ts +8 -3
- package/src/document/transformers/latex.ts +3 -11
- package/src/document/transformers/markdown.ts +3 -11
- package/src/document/transformers/sentence.ts +314 -0
- package/src/document/transformers/text.ts +10 -10
- package/src/document/transformers/token.ts +6 -17
- package/src/document/types.ts +66 -15
- package/src/document/validation.ts +147 -0
- package/src/tools/document-chunker.ts +12 -8
- package/tsup.config.ts +2 -7
package/.turbo/turbo-build.log
CHANGED
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,30 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 1.0.7-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 4a406ec: fixes TypeScript declaration file imports to ensure proper ESM compatibility
|
|
8
|
+
- Updated dependencies [cb36de0]
|
|
9
|
+
- Updated dependencies [a82b851]
|
|
10
|
+
- Updated dependencies [41a0a0e]
|
|
11
|
+
- Updated dependencies [2871020]
|
|
12
|
+
- Updated dependencies [4a406ec]
|
|
13
|
+
- Updated dependencies [5d377e5]
|
|
14
|
+
- @mastra/core@0.13.0-alpha.2
|
|
15
|
+
|
|
16
|
+
## 1.0.7-alpha.0
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- 351b36e: update evals and rag ai sdk package versions
|
|
21
|
+
- ccd519c: Add sentence chunking strategy and strategy-specific parameter validation for all existing strategies.
|
|
22
|
+
- Updated dependencies [ea0c5f2]
|
|
23
|
+
- Updated dependencies [b0e43c1]
|
|
24
|
+
- Updated dependencies [1fb812e]
|
|
25
|
+
- Updated dependencies [35c5798]
|
|
26
|
+
- @mastra/core@0.13.0-alpha.1
|
|
27
|
+
|
|
3
28
|
## 1.0.6
|
|
4
29
|
|
|
5
30
|
### Patch Changes
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Document as Chunk } from './schema';
|
|
2
|
-
import type {
|
|
1
|
+
import { Document as Chunk } from './schema/index.js';
|
|
2
|
+
import type { ChunkParams, ExtractParams, HTMLChunkOptions, RecursiveChunkOptions, CharacterChunkOptions, TokenChunkOptions, MarkdownChunkOptions, JsonChunkOptions, LatexChunkOptions, SentenceChunkOptions } from './types.js';
|
|
3
3
|
export declare class MDocument {
|
|
4
4
|
private chunks;
|
|
5
5
|
private type;
|
|
@@ -17,13 +17,14 @@ export declare class MDocument {
|
|
|
17
17
|
static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument;
|
|
18
18
|
private defaultStrategy;
|
|
19
19
|
private chunkBy;
|
|
20
|
-
chunkRecursive(options?:
|
|
21
|
-
chunkCharacter(options?:
|
|
22
|
-
chunkHTML(options?:
|
|
23
|
-
chunkJSON(options?:
|
|
24
|
-
chunkLatex(options?:
|
|
25
|
-
chunkToken(options?:
|
|
26
|
-
chunkMarkdown(options?:
|
|
20
|
+
chunkRecursive(options?: RecursiveChunkOptions): Promise<void>;
|
|
21
|
+
chunkCharacter(options?: CharacterChunkOptions): Promise<void>;
|
|
22
|
+
chunkHTML(options?: HTMLChunkOptions): Promise<void>;
|
|
23
|
+
chunkJSON(options?: JsonChunkOptions): Promise<void>;
|
|
24
|
+
chunkLatex(options?: LatexChunkOptions): Promise<void>;
|
|
25
|
+
chunkToken(options?: TokenChunkOptions): Promise<void>;
|
|
26
|
+
chunkMarkdown(options?: MarkdownChunkOptions): Promise<void>;
|
|
27
|
+
chunkSentence(options?: SentenceChunkOptions): Promise<void>;
|
|
27
28
|
chunk(params?: ChunkParams): Promise<Chunk[]>;
|
|
28
29
|
getDocs(): Chunk[];
|
|
29
30
|
getText(): string[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"document.d.ts","sourceRoot":"","sources":["../../src/document/document.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,IAAI,KAAK,EAAgC,MAAM,UAAU,CAAC;AAS3E,OAAO,KAAK,EACV,WAAW,EAEX,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,EACjB,oBAAoB,EAErB,MAAM,SAAS,CAAC;AAGjB,qBAAa,SAAS;IACpB,OAAO,CAAC,MAAM,CAAU;IACxB,OAAO,CAAC,IAAI,CAAS;gBAET,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;SAAE,EAAE,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE;IAOhG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,EAAE,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC;IAmDjG,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYxE,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAYhF,MAAM,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,SAAS;IAY9E,OAAO,CAAC,eAAe;YAeT,OAAO;IAoBf,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa9D,cAAc,CAAC,OAAO,CAAC,EAAE,qBAAqB,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBpD,SAAS,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAmBpD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtD,UAAU,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUtD,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAa5D,aAAa,CAAC,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAuB5D,KAAK,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiBnD,OAAO,IAAI,KAAK,EAAE;IAIlB,OAAO,IAAI,MAAM,EAAE;IAInB,WAAW,IAAI,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;CAGrC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
export { TitleExtractor } from './title';
|
|
2
|
-
export { SummaryExtractor } from './summary';
|
|
3
|
-
export { QuestionsAnsweredExtractor } from './questions';
|
|
4
|
-
export { KeywordExtractor } from './keywords';
|
|
5
|
-
export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs } from './types';
|
|
1
|
+
export { TitleExtractor } from './title.js';
|
|
2
|
+
export { SummaryExtractor } from './summary.js';
|
|
3
|
+
export { QuestionsAnsweredExtractor } from './questions.js';
|
|
4
|
+
export { KeywordExtractor } from './keywords.js';
|
|
5
|
+
export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs } from './types.js';
|
|
6
6
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
|
-
import type { KeywordExtractPrompt } from '../prompts';
|
|
3
|
-
import type { BaseNode } from '../schema';
|
|
4
|
-
import { BaseExtractor } from './base';
|
|
5
|
-
import type { KeywordExtractArgs } from './types';
|
|
2
|
+
import type { KeywordExtractPrompt } from '../prompts/index.js';
|
|
3
|
+
import type { BaseNode } from '../schema/index.js';
|
|
4
|
+
import { BaseExtractor } from './base.js';
|
|
5
|
+
import type { KeywordExtractArgs } from './types.js';
|
|
6
6
|
type ExtractKeyword = {
|
|
7
7
|
/**
|
|
8
8
|
* Comma-separated keywords extracted from the node. May be empty if extraction fails.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
|
-
import type { QuestionExtractPrompt } from '../prompts';
|
|
3
|
-
import type { BaseNode } from '../schema';
|
|
4
|
-
import { BaseExtractor } from './base';
|
|
5
|
-
import type { QuestionAnswerExtractArgs } from './types';
|
|
2
|
+
import type { QuestionExtractPrompt } from '../prompts/index.js';
|
|
3
|
+
import type { BaseNode } from '../schema/index.js';
|
|
4
|
+
import { BaseExtractor } from './base.js';
|
|
5
|
+
import type { QuestionAnswerExtractArgs } from './types.js';
|
|
6
6
|
type ExtractQuestion = {
|
|
7
7
|
/**
|
|
8
8
|
* Questions extracted from the node as a string (may be empty if extraction fails).
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import type { SummaryPrompt } from '../prompts';
|
|
2
|
-
import type { BaseNode } from '../schema';
|
|
3
|
-
import { BaseExtractor } from './base';
|
|
4
|
-
import type { SummaryExtractArgs } from './types';
|
|
1
|
+
import type { SummaryPrompt } from '../prompts/index.js';
|
|
2
|
+
import type { BaseNode } from '../schema/index.js';
|
|
3
|
+
import { BaseExtractor } from './base.js';
|
|
4
|
+
import type { SummaryExtractArgs } from './types.js';
|
|
5
5
|
type ExtractSummary = {
|
|
6
6
|
sectionSummary?: string;
|
|
7
7
|
prevSectionSummary?: string;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
|
-
import type { TitleCombinePrompt, TitleExtractorPrompt } from '../prompts';
|
|
3
|
-
import type { BaseNode } from '../schema';
|
|
4
|
-
import { BaseExtractor } from './base';
|
|
5
|
-
import type { TitleExtractorsArgs } from './types';
|
|
2
|
+
import type { TitleCombinePrompt, TitleExtractorPrompt } from '../prompts/index.js';
|
|
3
|
+
import type { BaseNode } from '../schema/index.js';
|
|
4
|
+
import { BaseExtractor } from './base.js';
|
|
5
|
+
import type { TitleExtractorsArgs } from './types.js';
|
|
6
6
|
type ExtractTitle = {
|
|
7
7
|
documentTitle: string;
|
|
8
8
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
|
-
import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts';
|
|
2
|
+
import type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleExtractorPrompt, TitleCombinePrompt } from '../prompts/index.js';
|
|
3
3
|
export type KeywordExtractArgs = {
|
|
4
4
|
llm?: MastraLanguageModel;
|
|
5
5
|
keywords?: number;
|
package/dist/document/index.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export * from './document';
|
|
2
|
-
export * from './types';
|
|
1
|
+
export * from './document.js';
|
|
2
|
+
export * from './types.js';
|
|
3
3
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BasePromptTemplateOptions, ChatMessage, PromptTemplateOptions } from './types';
|
|
1
|
+
import type { BasePromptTemplateOptions, ChatMessage, PromptTemplateOptions } from './types.js';
|
|
2
2
|
export declare abstract class BasePromptTemplate<const TemplatesVar extends readonly string[] = string[]> {
|
|
3
3
|
templateVars: Set<string>;
|
|
4
4
|
options: Partial<Record<TemplatesVar[number] | (string & {}), string>>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { BasePromptTemplate, PromptTemplate } from './base';
|
|
2
|
-
export { defaultKeywordExtractPrompt, defaultQuestionExtractPrompt, defaultSummaryPrompt, defaultTitleCombinePromptTemplate, defaultTitleExtractorPromptTemplate, } from './prompt';
|
|
3
|
-
export type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleCombinePrompt, TitleExtractorPrompt, } from './prompt';
|
|
1
|
+
export { BasePromptTemplate, PromptTemplate } from './base.js';
|
|
2
|
+
export { defaultKeywordExtractPrompt, defaultQuestionExtractPrompt, defaultSummaryPrompt, defaultTitleCombinePromptTemplate, defaultTitleExtractorPromptTemplate, } from './prompt.js';
|
|
3
|
+
export type { KeywordExtractPrompt, QuestionExtractPrompt, SummaryPrompt, TitleCombinePrompt, TitleExtractorPrompt, } from './prompt.js';
|
|
4
4
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { PromptTemplate } from './base';
|
|
1
|
+
import { PromptTemplate } from './base.js';
|
|
2
2
|
export type SummaryPrompt = PromptTemplate<['context']>;
|
|
3
3
|
export type KeywordExtractPrompt = PromptTemplate<['context', 'maxKeywords']>;
|
|
4
4
|
export type QuestionExtractPrompt = PromptTemplate<['context', 'numQuestions']>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { BaseNode, Document, TextNode } from './node';
|
|
2
|
-
export { NodeRelationship, ObjectType } from './types';
|
|
3
|
-
export type { Metadata, RelatedNodeInfo, RelatedNodeType, BaseNodeParams, TextNodeParams } from './types';
|
|
1
|
+
export { BaseNode, Document, TextNode } from './node.js';
|
|
2
|
+
export { NodeRelationship, ObjectType } from './types.js';
|
|
3
|
+
export type { Metadata, RelatedNodeInfo, RelatedNodeType, BaseNodeParams, TextNodeParams } from './types.js';
|
|
4
4
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { NodeRelationship, ObjectType } from './types';
|
|
2
|
-
import type { Metadata, RelatedNodeInfo, RelatedNodeType, BaseNodeParams, TextNodeParams } from './types';
|
|
1
|
+
import { NodeRelationship, ObjectType } from './types.js';
|
|
2
|
+
import type { Metadata, RelatedNodeInfo, RelatedNodeType, BaseNodeParams, TextNodeParams } from './types.js';
|
|
3
3
|
/**
|
|
4
4
|
* Generic abstract class for retrievable nodes
|
|
5
5
|
*/
|
|
@@ -1,21 +1,10 @@
|
|
|
1
|
-
import { Language } from '../types';
|
|
2
|
-
import type {
|
|
3
|
-
import { TextTransformer } from './text';
|
|
1
|
+
import { Language } from '../types.js';
|
|
2
|
+
import type { BaseChunkOptions, CharacterChunkOptions, RecursiveChunkOptions } from '../types.js';
|
|
3
|
+
import { TextTransformer } from './text.js';
|
|
4
4
|
export declare class CharacterTransformer extends TextTransformer {
|
|
5
5
|
protected separator: string;
|
|
6
6
|
protected isSeparatorRegex: boolean;
|
|
7
|
-
constructor({ separator, isSeparatorRegex,
|
|
8
|
-
separator?: string;
|
|
9
|
-
isSeparatorRegex?: boolean;
|
|
10
|
-
options?: {
|
|
11
|
-
size?: number;
|
|
12
|
-
overlap?: number;
|
|
13
|
-
lengthFunction?: (text: string) => number;
|
|
14
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
15
|
-
addStartIndex?: boolean;
|
|
16
|
-
stripWhitespace?: boolean;
|
|
17
|
-
};
|
|
18
|
-
});
|
|
7
|
+
constructor({ separator, isSeparatorRegex, ...baseOptions }?: CharacterChunkOptions);
|
|
19
8
|
splitText({ text }: {
|
|
20
9
|
text: string;
|
|
21
10
|
}): string[];
|
|
@@ -24,23 +13,12 @@ export declare class CharacterTransformer extends TextTransformer {
|
|
|
24
13
|
export declare class RecursiveCharacterTransformer extends TextTransformer {
|
|
25
14
|
protected separators: string[];
|
|
26
15
|
protected isSeparatorRegex: boolean;
|
|
27
|
-
constructor({ separators, isSeparatorRegex,
|
|
28
|
-
separators?: string[];
|
|
29
|
-
isSeparatorRegex?: boolean;
|
|
30
|
-
options?: ChunkOptions;
|
|
31
|
-
});
|
|
16
|
+
constructor({ separators, isSeparatorRegex, language, ...baseOptions }?: RecursiveChunkOptions);
|
|
32
17
|
private _splitText;
|
|
33
18
|
splitText({ text }: {
|
|
34
19
|
text: string;
|
|
35
20
|
}): string[];
|
|
36
|
-
static fromLanguage(language: Language, options?:
|
|
37
|
-
size?: number;
|
|
38
|
-
chunkOverlap?: number;
|
|
39
|
-
lengthFunction?: (text: string) => number;
|
|
40
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
41
|
-
addStartIndex?: boolean;
|
|
42
|
-
stripWhitespace?: boolean;
|
|
43
|
-
}): RecursiveCharacterTransformer;
|
|
21
|
+
static fromLanguage(language: Language, options?: BaseChunkOptions): RecursiveCharacterTransformer;
|
|
44
22
|
static getSeparatorsForLanguage(language: Language): string[];
|
|
45
23
|
}
|
|
46
24
|
//# sourceMappingURL=character.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"character.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/character.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AACpC,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAE/F,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AA+CzC,qBAAa,oBAAqB,SAAQ,eAAe;IACvD,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EAAE,SAAkB,EAAE,gBAAwB,EAAE,GAAG,WAAW,EAAE,GAAE,qBAA0B;IAMxG,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,OAAO,CAAC,YAAY;CAyBrB;AAED,qBAAa,6BAA8B,SAAQ,eAAe;IAChE,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAC/B,SAAS,CAAC,gBAAgB,EAAE,OAAO,CAAC;gBAExB,EAAE,UAAU,EAAE,gBAAwB,EAAE,QAAQ,EAAE,GAAG,WAAW,EAAE,GAAE,qBAA0B;IAM1G,OAAO,CAAC,UAAU;IAuDlB,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAI/C,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,QAAQ,EAAE,OAAO,GAAE,gBAAqB,GAAG,6BAA6B;IAUtG,MAAM,CAAC,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,EAAE;CA+E9D"}
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
import { Document } from '../schema';
|
|
1
|
+
import { Document } from '../schema/index.js';
|
|
2
|
+
import type { HTMLChunkOptions } from '../types.js';
|
|
2
3
|
export declare class HTMLHeaderTransformer {
|
|
3
4
|
private headersToSplitOn;
|
|
4
5
|
private returnEachElement;
|
|
5
|
-
constructor(
|
|
6
|
+
constructor(options: HTMLChunkOptions & {
|
|
7
|
+
headers: [string, string][];
|
|
8
|
+
});
|
|
6
9
|
splitText({ text }: {
|
|
7
10
|
text: string;
|
|
8
11
|
}): Document[];
|
|
@@ -14,8 +17,10 @@ export declare class HTMLHeaderTransformer {
|
|
|
14
17
|
}
|
|
15
18
|
export declare class HTMLSectionTransformer {
|
|
16
19
|
private headersToSplitOn;
|
|
17
|
-
private
|
|
18
|
-
constructor(
|
|
20
|
+
private textSplitter;
|
|
21
|
+
constructor(options: HTMLChunkOptions & {
|
|
22
|
+
sections: [string, string][];
|
|
23
|
+
});
|
|
19
24
|
splitText(text: string): Document[];
|
|
20
25
|
private getXPath;
|
|
21
26
|
private splitHtmlByHeaders;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"html.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/html.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAWjD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,iBAAiB,CAAU;gBAEvB,OAAO,EAAE,gBAAgB,GAAG;QAAE,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKvE,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAwDjD,OAAO,CAAC,QAAQ;IA2BhB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,yBAAyB;IAyBjC,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA8B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD;AAED,qBAAa,sBAAsB;IACjC,OAAO,CAAC,gBAAgB,CAAyB;IACjD,OAAO,CAAC,YAAY,CAAgC;gBAExC,OAAO,EAAE,gBAAgB,GAAG;QAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAA;KAAE;IAKxE,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,QAAQ,EAAE;IAenC,OAAO,CAAC,QAAQ;IAwBhB,OAAO,CAAC,kBAAkB;IA8CpB,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAahE,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA+B/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { Document } from '../schema';
|
|
1
|
+
import { Document } from '../schema/index.js';
|
|
2
|
+
import type { JsonChunkOptions } from '../types.js';
|
|
2
3
|
export declare class RecursiveJsonTransformer {
|
|
3
4
|
private maxSize;
|
|
4
5
|
private minSize;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
});
|
|
6
|
+
private ensureAscii;
|
|
7
|
+
private convertLists;
|
|
8
|
+
constructor({ maxSize, minSize, ensureAscii, convertLists }: JsonChunkOptions);
|
|
9
9
|
private static jsonSize;
|
|
10
10
|
/**
|
|
11
11
|
* Transform JSON data while handling circular references
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,qBAAa,wBAAwB;IACnC,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,WAAW,CAAU;IAC7B,OAAO,CAAC,YAAY,CAAU;gBAElB,EAAE,OAAc,EAAE,OAAO,EAAE,WAAmB,EAAE,YAAmB,EAAE,EAAE,gBAAgB;IAOnG,OAAO,CAAC,MAAM,CAAC,QAAQ;IAoCvB;;OAEG;IACI,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAmChE;;OAEG;IACH,OAAO,CAAC,MAAM,CAAC,aAAa;IAS5B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAU/B;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA0C5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAMnB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAOzB;;;OAGG;IACH,OAAO,CAAC,WAAW;IA8DnB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAmE1B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAuBvB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAuDjB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,GACrB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE;IAYzB;;;OAGG;IACH,OAAO,CAAC,cAAc;IAiBtB;;OAEG;IACH,SAAS,CAAC,EACR,QAAQ,EACR,YAAoB,EACpB,WAAkB,GACnB,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB,GAAG,MAAM,EAAE;IAoBZ;;OAEG;IACH,eAAe,CAAC,EACd,KAAK,EACL,YAAoB,EACpB,WAAkB,EAClB,SAAS,GACV,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,CAAC;KACnC,GAAG,QAAQ,EAAE;IAoBd,kBAAkB,CAAC,EACjB,WAAW,EACX,SAAS,EACT,YAAY,GACb,EAAE;QACD,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,SAAS,EAAE,QAAQ,EAAE,CAAC;KACvB,GAAG,QAAQ,EAAE;CAiBf"}
|
|
@@ -1,12 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { BaseChunkOptions } from '../types.js';
|
|
2
|
+
import { RecursiveCharacterTransformer } from './character.js';
|
|
2
3
|
export declare class LatexTransformer extends RecursiveCharacterTransformer {
|
|
3
|
-
constructor(options?:
|
|
4
|
-
size?: number;
|
|
5
|
-
overlap?: number;
|
|
6
|
-
lengthFunction?: (text: string) => number;
|
|
7
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
8
|
-
addStartIndex?: boolean;
|
|
9
|
-
stripWhitespace?: boolean;
|
|
10
|
-
});
|
|
4
|
+
constructor(options?: BaseChunkOptions);
|
|
11
5
|
}
|
|
12
6
|
//# sourceMappingURL=latex.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"latex.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/latex.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAE5D,qBAAa,gBAAiB,SAAQ,6BAA6B;gBACrD,OAAO,GAAE,gBAAqB;CAI3C"}
|
|
@@ -1,14 +1,8 @@
|
|
|
1
|
-
import { Document } from '../schema';
|
|
2
|
-
import {
|
|
1
|
+
import { Document } from '../schema/index.js';
|
|
2
|
+
import type { BaseChunkOptions } from '../types.js';
|
|
3
|
+
import { RecursiveCharacterTransformer } from './character.js';
|
|
3
4
|
export declare class MarkdownTransformer extends RecursiveCharacterTransformer {
|
|
4
|
-
constructor(options?:
|
|
5
|
-
chunkSize?: number;
|
|
6
|
-
chunkOverlap?: number;
|
|
7
|
-
lengthFunction?: (text: string) => number;
|
|
8
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
9
|
-
addStartIndex?: boolean;
|
|
10
|
-
stripWhitespace?: boolean;
|
|
11
|
-
});
|
|
5
|
+
constructor(options?: BaseChunkOptions);
|
|
12
6
|
}
|
|
13
7
|
export declare class MarkdownHeaderTransformer {
|
|
14
8
|
private headersToSplitOn;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAqHjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { SentenceChunkOptions } from '../types.js';
|
|
2
|
+
import { TextTransformer } from './text.js';
|
|
3
|
+
export declare class SentenceTransformer extends TextTransformer {
|
|
4
|
+
protected minSize: number;
|
|
5
|
+
protected maxSize: number;
|
|
6
|
+
protected targetSize: number;
|
|
7
|
+
protected sentenceEnders: string[];
|
|
8
|
+
protected fallbackToWords: boolean;
|
|
9
|
+
protected fallbackToCharacters: boolean;
|
|
10
|
+
protected keepSeparator: boolean | 'start' | 'end';
|
|
11
|
+
constructor(options: SentenceChunkOptions);
|
|
12
|
+
private detectSentenceBoundaries;
|
|
13
|
+
private isRealSentenceBoundary;
|
|
14
|
+
private isCommonAbbreviation;
|
|
15
|
+
/**
|
|
16
|
+
* Group sentences into chunks with integrated overlap processing
|
|
17
|
+
*/
|
|
18
|
+
private groupSentencesIntoChunks;
|
|
19
|
+
/**
|
|
20
|
+
* Handle oversized sentences with fallback strategies
|
|
21
|
+
*/
|
|
22
|
+
private handleOversizedSentence;
|
|
23
|
+
private splitSentenceIntoWords;
|
|
24
|
+
private splitSentenceIntoCharacters;
|
|
25
|
+
private calculateSentenceOverlap;
|
|
26
|
+
private calculateChunkSize;
|
|
27
|
+
splitText({ text }: {
|
|
28
|
+
text: string;
|
|
29
|
+
}): string[];
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=sentence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/sentence.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AACrD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,qBAAa,mBAAoB,SAAQ,eAAe;IACtD,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,CAAC;IACnC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;gBAEvC,OAAO,EAAE,oBAAoB;IAuBzC,OAAO,CAAC,wBAAwB;IA+BhC,OAAO,CAAC,sBAAsB;IAqB9B,OAAO,CAAC,oBAAoB;IA8B5B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAsDhC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAqB/B,OAAO,CAAC,sBAAsB;IAmC9B,OAAO,CAAC,2BAA2B;IAsBnC,OAAO,CAAC,wBAAwB;IA4BhC,OAAO,CAAC,kBAAkB;IAqB1B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;CAShD"}
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
import { Document } from '../schema';
|
|
2
|
-
import type {
|
|
3
|
-
import type { Transformer } from './transformer';
|
|
1
|
+
import { Document } from '../schema/index.js';
|
|
2
|
+
import type { BaseChunkOptions } from '../types.js';
|
|
3
|
+
import type { Transformer } from './transformer.js';
|
|
4
4
|
export declare abstract class TextTransformer implements Transformer {
|
|
5
|
-
protected
|
|
5
|
+
protected maxSize: number;
|
|
6
6
|
protected overlap: number;
|
|
7
7
|
protected lengthFunction: (text: string) => number;
|
|
8
8
|
protected keepSeparator: boolean | 'start' | 'end';
|
|
9
9
|
protected addStartIndex: boolean;
|
|
10
10
|
protected stripWhitespace: boolean;
|
|
11
|
-
constructor({
|
|
11
|
+
constructor({ maxSize, overlap, lengthFunction, keepSeparator, addStartIndex, stripWhitespace, }: BaseChunkOptions);
|
|
12
12
|
setAddStartIndex(value: boolean): void;
|
|
13
13
|
abstract splitText({ text }: {
|
|
14
14
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"text.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/text.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAEjD,8BAAsB,eAAgB,YAAW,WAAW;IAC1D,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,OAAO,EAAE,MAAM,CAAC;IAC1B,SAAS,CAAC,cAAc,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,GAAG,OAAO,GAAG,KAAK,CAAC;IACnD,SAAS,CAAC,aAAa,EAAE,OAAO,CAAC;IACjC,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;gBAEvB,EACV,OAAc,EACd,OAAa,EACb,cAA8C,EAC9C,aAAqB,EACrB,aAAqB,EACrB,eAAsB,GACvB,EAAE,gBAAgB;IAYnB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAItC,QAAQ,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAExD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IA4B/E,cAAc,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAUjD,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;IAYrD,SAAS,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IAQpE,SAAS,CAAC,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE;CA4DrE"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { TiktokenModel, TiktokenEncoding } from 'js-tiktoken';
|
|
2
|
-
import {
|
|
2
|
+
import type { TokenChunkOptions } from '../types.js';
|
|
3
|
+
import { TextTransformer } from './text.js';
|
|
3
4
|
interface Tokenizer {
|
|
4
5
|
overlap: number;
|
|
5
6
|
tokensPerChunk: number;
|
|
@@ -15,18 +16,11 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
15
16
|
private allowedSpecial;
|
|
16
17
|
private disallowedSpecial;
|
|
17
18
|
constructor({ encodingName, modelName, allowedSpecial, disallowedSpecial, options, }: {
|
|
18
|
-
encodingName
|
|
19
|
+
encodingName?: TiktokenEncoding;
|
|
19
20
|
modelName?: TiktokenModel;
|
|
20
21
|
allowedSpecial?: Set<string> | 'all';
|
|
21
22
|
disallowedSpecial?: Set<string> | 'all';
|
|
22
|
-
options:
|
|
23
|
-
size?: number;
|
|
24
|
-
overlap?: number;
|
|
25
|
-
lengthFunction?: (text: string) => number;
|
|
26
|
-
keepSeparator?: boolean | 'start' | 'end';
|
|
27
|
-
addStartIndex?: boolean;
|
|
28
|
-
stripWhitespace?: boolean;
|
|
29
|
-
};
|
|
23
|
+
options: TokenChunkOptions;
|
|
30
24
|
});
|
|
31
25
|
splitText({ text }: {
|
|
32
26
|
text: string;
|
|
@@ -34,12 +28,7 @@ export declare class TokenTransformer extends TextTransformer {
|
|
|
34
28
|
static fromTikToken({ encodingName, modelName, options, }: {
|
|
35
29
|
encodingName?: TiktokenEncoding;
|
|
36
30
|
modelName?: TiktokenModel;
|
|
37
|
-
options?:
|
|
38
|
-
size?: number;
|
|
39
|
-
overlap?: number;
|
|
40
|
-
allowedSpecial?: Set<string> | 'all';
|
|
41
|
-
disallowedSpecial?: Set<string> | 'all';
|
|
42
|
-
};
|
|
31
|
+
options?: TokenChunkOptions;
|
|
43
32
|
}): TokenTransformer;
|
|
44
33
|
}
|
|
45
34
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAY,MAAM,aAAa,CAAC;AAE7E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,cAAc,CAAsB;IAC5C,OAAO,CAAC,iBAAiB,CAAsB;gBAEnC,EACV,YAA4B,EAC5B,SAAS,EACT,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE,iBAAiB,CAAC;KAC5B;IAaD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IA0B/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE,iBAAiB,CAAC;KAC7B,GAAG,gBAAgB;CAuCrB"}
|