@agentionai/agents 0.12.0-beta → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/Agent.d.ts +9 -3
- package/dist/agents/Agent.js +4 -0
- package/dist/agents/AgentConfig.d.ts +12 -2
- package/dist/agents/model-types.d.ts +7 -1
- package/dist/agents/ollama/OllamaAgent.d.ts +69 -0
- package/dist/agents/ollama/OllamaAgent.js +304 -0
- package/dist/chunkers/index.d.ts +0 -1
- package/dist/chunkers/index.js +1 -3
- package/dist/history/transformers.d.ts +36 -0
- package/dist/history/transformers.js +78 -1
- package/dist/history/types.d.ts +8 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +4 -1
- package/dist/ingestion/IngestionPipeline.d.ts +1 -73
- package/dist/ingestion/IngestionPipeline.js +1 -110
- package/dist/ollama.d.ts +4 -0
- package/dist/ollama.js +24 -0
- package/dist/viz/types.d.ts +1 -1
- package/package.json +6 -42
- package/dist/chunkers/ElementChunker.d.ts +0 -100
- package/dist/chunkers/ElementChunker.js +0 -242
- package/dist/parsers/DocumentParser.d.ts +0 -36
- package/dist/parsers/DocumentParser.js +0 -35
- package/dist/parsers/LlamaIndexParser.d.ts +0 -58
- package/dist/parsers/LlamaIndexParser.js +0 -71
- package/dist/parsers/OllamaOCRParser.d.ts +0 -98
- package/dist/parsers/OllamaOCRParser.js +0 -203
- package/dist/parsers/UnstructuredAPIParser.d.ts +0 -57
- package/dist/parsers/UnstructuredAPIParser.js +0 -131
- package/dist/parsers/UnstructuredLocalParser.d.ts +0 -42
- package/dist/parsers/UnstructuredLocalParser.js +0 -118
- package/dist/parsers/index.d.ts +0 -3
- package/dist/parsers/index.js +0 -6
- package/dist/parsers/types.d.ts +0 -50
- package/dist/parsers/types.js +0 -3
package/dist/index.d.ts
CHANGED
|
@@ -3,13 +3,14 @@ export * from "./agents/anthropic/ClaudeAgent";
|
|
|
3
3
|
export { OpenAiAgent } from "./agents/openai/OpenAiAgent";
|
|
4
4
|
export { MistralAgent } from "./agents/mistral/MistralAgent";
|
|
5
5
|
export { GeminiAgent } from "./agents/google/GeminiAgent";
|
|
6
|
+
export { OllamaAgent } from "./agents/ollama/OllamaAgent";
|
|
6
7
|
export * from "./agents/model-types";
|
|
7
8
|
export * from "./agents/AgentConfig";
|
|
8
9
|
export * from "./agents/AgentEvent";
|
|
9
10
|
export * from "./agents/errors/AgentError";
|
|
10
11
|
export * from "./history/History";
|
|
11
12
|
export * from "./history/types";
|
|
12
|
-
export { anthropicTransformer, openAiTransformer, mistralTransformer, geminiTransformer, } from "./history/transformers";
|
|
13
|
+
export { anthropicTransformer, openAiTransformer, mistralTransformer, geminiTransformer, ollamaTransformer, } from "./history/transformers";
|
|
13
14
|
export * from "./graph/AgentGraph";
|
|
14
15
|
export * from "./tools/Tool";
|
|
15
16
|
export * from "./mcp";
|
package/dist/index.js
CHANGED
|
@@ -22,7 +22,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
22
22
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
23
23
|
};
|
|
24
24
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
-
exports.geminiTransformer = exports.mistralTransformer = exports.openAiTransformer = exports.anthropicTransformer = exports.GeminiAgent = exports.MistralAgent = exports.OpenAiAgent = void 0;
|
|
25
|
+
exports.ollamaTransformer = exports.geminiTransformer = exports.mistralTransformer = exports.openAiTransformer = exports.anthropicTransformer = exports.OllamaAgent = exports.GeminiAgent = exports.MistralAgent = exports.OpenAiAgent = void 0;
|
|
26
26
|
// Agents
|
|
27
27
|
__exportStar(require("./agents/BaseAgent"), exports);
|
|
28
28
|
__exportStar(require("./agents/anthropic/ClaudeAgent"), exports);
|
|
@@ -32,6 +32,8 @@ var MistralAgent_1 = require("./agents/mistral/MistralAgent");
|
|
|
32
32
|
Object.defineProperty(exports, "MistralAgent", { enumerable: true, get: function () { return MistralAgent_1.MistralAgent; } });
|
|
33
33
|
var GeminiAgent_1 = require("./agents/google/GeminiAgent");
|
|
34
34
|
Object.defineProperty(exports, "GeminiAgent", { enumerable: true, get: function () { return GeminiAgent_1.GeminiAgent; } });
|
|
35
|
+
var OllamaAgent_1 = require("./agents/ollama/OllamaAgent");
|
|
36
|
+
Object.defineProperty(exports, "OllamaAgent", { enumerable: true, get: function () { return OllamaAgent_1.OllamaAgent; } });
|
|
35
37
|
__exportStar(require("./agents/model-types"), exports);
|
|
36
38
|
__exportStar(require("./agents/AgentConfig"), exports);
|
|
37
39
|
__exportStar(require("./agents/AgentEvent"), exports);
|
|
@@ -44,6 +46,7 @@ Object.defineProperty(exports, "anthropicTransformer", { enumerable: true, get:
|
|
|
44
46
|
Object.defineProperty(exports, "openAiTransformer", { enumerable: true, get: function () { return transformers_1.openAiTransformer; } });
|
|
45
47
|
Object.defineProperty(exports, "mistralTransformer", { enumerable: true, get: function () { return transformers_1.mistralTransformer; } });
|
|
46
48
|
Object.defineProperty(exports, "geminiTransformer", { enumerable: true, get: function () { return transformers_1.geminiTransformer; } });
|
|
49
|
+
Object.defineProperty(exports, "ollamaTransformer", { enumerable: true, get: function () { return transformers_1.ollamaTransformer; } });
|
|
47
50
|
// Graph
|
|
48
51
|
__exportStar(require("./graph/AgentGraph"), exports);
|
|
49
52
|
// Tools
|
|
@@ -2,8 +2,6 @@ import { Chunk, ChunkOptions } from "../chunkers/types";
|
|
|
2
2
|
import { Chunker } from "../chunkers/Chunker";
|
|
3
3
|
import { Embeddings } from "../embeddings/Embeddings";
|
|
4
4
|
import { VectorStore } from "../vectorstore/VectorStore";
|
|
5
|
-
import { DocumentParser } from "../parsers/DocumentParser";
|
|
6
|
-
import { ParseOptions } from "../parsers/types";
|
|
7
5
|
import { IngestionOptions, IngestionResult, DocumentInput } from "./types";
|
|
8
6
|
/**
|
|
9
7
|
* Pipeline for ingesting documents into a vector store.
|
|
@@ -33,15 +31,7 @@ export declare class IngestionPipeline {
|
|
|
33
31
|
private chunker;
|
|
34
32
|
private embeddings;
|
|
35
33
|
private store;
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* @param chunker - Chunker to split parsed/raw text into chunks
|
|
39
|
-
* @param embeddings - Embeddings provider
|
|
40
|
-
* @param store - Vector store for persistence
|
|
41
|
-
* @param parser - Optional default parser used by {@link ingestFile} and
|
|
42
|
-
* {@link ingestFiles} when no parser is passed at call time
|
|
43
|
-
*/
|
|
44
|
-
constructor(chunker: Chunker, embeddings: Embeddings, store: VectorStore, parser?: DocumentParser);
|
|
34
|
+
constructor(chunker: Chunker, embeddings: Embeddings, store: VectorStore);
|
|
45
35
|
/**
|
|
46
36
|
* Ingest a single document into the vector store.
|
|
47
37
|
*
|
|
@@ -58,64 +48,6 @@ export declare class IngestionPipeline {
|
|
|
58
48
|
* @returns Aggregated result of all ingestions
|
|
59
49
|
*/
|
|
60
50
|
ingestMany(documents: DocumentInput[], options?: IngestionOptions): Promise<IngestionResult>;
|
|
61
|
-
/**
|
|
62
|
-
* Parse a file and ingest it into the vector store.
|
|
63
|
-
*
|
|
64
|
-
* Combines parsing + chunking + embedding + storing in a single call.
|
|
65
|
-
* When the pipeline's chunker is an {@link ElementChunker} and the parser
|
|
66
|
-
* returns structured elements, chunking is done on element boundaries
|
|
67
|
-
* instead of raw text.
|
|
68
|
-
*
|
|
69
|
-
* The `parser` argument is optional when one was configured on the pipeline
|
|
70
|
-
* constructor; it is required otherwise.
|
|
71
|
-
*
|
|
72
|
-
* @example Using a pipeline-level parser:
|
|
73
|
-
* ```typescript
|
|
74
|
-
* const pipeline = new IngestionPipeline(
|
|
75
|
-
* new ElementChunker({ chunkSize: 1000 }),
|
|
76
|
-
* embeddings,
|
|
77
|
-
* store,
|
|
78
|
-
* new UnstructuredLocalParser(),
|
|
79
|
-
* );
|
|
80
|
-
* await pipeline.ingestFile("/docs/report.pdf", { strategy: "hi_res" });
|
|
81
|
-
* ```
|
|
82
|
-
*
|
|
83
|
-
* @example Passing a parser per call:
|
|
84
|
-
* ```typescript
|
|
85
|
-
* await pipeline.ingestFile("/docs/report.pdf", new UnstructuredLocalParser(), {
|
|
86
|
-
* strategy: "hi_res",
|
|
87
|
-
* sourceId: "report-2024",
|
|
88
|
-
* });
|
|
89
|
-
* ```
|
|
90
|
-
*/
|
|
91
|
-
ingestFile(filePath: string, options?: ParseOptions & ChunkOptions & IngestionOptions): Promise<IngestionResult>;
|
|
92
|
-
ingestFile(filePath: string, parser: DocumentParser, options?: ParseOptions & ChunkOptions & IngestionOptions): Promise<IngestionResult>;
|
|
93
|
-
/**
|
|
94
|
-
* Parse and ingest multiple files.
|
|
95
|
-
*
|
|
96
|
-
* Files are parsed sequentially; all chunks are batched together for
|
|
97
|
-
* embedding and storage. When the pipeline uses an {@link ElementChunker}
|
|
98
|
-
* and the parser returns structured elements, element-aware chunking is
|
|
99
|
-
* applied per file (preserving `element_types` and `page` metadata).
|
|
100
|
-
* The `parser` argument is optional when one was set on the pipeline
|
|
101
|
-
* constructor.
|
|
102
|
-
*
|
|
103
|
-
* @example Using a pipeline-level parser:
|
|
104
|
-
* ```typescript
|
|
105
|
-
* await pipeline.ingestFiles(["/a.pdf", "/b.docx"], { skipDuplicates: true });
|
|
106
|
-
* ```
|
|
107
|
-
*
|
|
108
|
-
* @example Passing a parser per call:
|
|
109
|
-
* ```typescript
|
|
110
|
-
* await pipeline.ingestFiles(
|
|
111
|
-
* ["/docs/a.pdf", "/docs/b.docx"],
|
|
112
|
-
* new UnstructuredAPIParser({ serverUrl: "http://localhost:8000" }),
|
|
113
|
-
* { strategy: "auto", skipDuplicates: true }
|
|
114
|
-
* );
|
|
115
|
-
* ```
|
|
116
|
-
*/
|
|
117
|
-
ingestFiles(filePaths: string[], options?: ParseOptions & ChunkOptions & IngestionOptions): Promise<IngestionResult>;
|
|
118
|
-
ingestFiles(filePaths: string[], parser: DocumentParser, options?: ParseOptions & ChunkOptions & IngestionOptions): Promise<IngestionResult>;
|
|
119
51
|
/**
|
|
120
52
|
* Ingest pre-chunked data into the vector store.
|
|
121
53
|
* Useful when chunking is done separately.
|
|
@@ -150,9 +82,5 @@ export declare class IngestionPipeline {
|
|
|
150
82
|
* Get the vector store used by this pipeline.
|
|
151
83
|
*/
|
|
152
84
|
getStore(): VectorStore;
|
|
153
|
-
/**
|
|
154
|
-
* Get the default parser configured on this pipeline, if any.
|
|
155
|
-
*/
|
|
156
|
-
getParser(): DocumentParser | undefined;
|
|
157
85
|
}
|
|
158
86
|
//# sourceMappingURL=IngestionPipeline.d.ts.map
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.IngestionPipeline = void 0;
|
|
4
|
-
const ElementChunker_1 = require("../chunkers/ElementChunker");
|
|
5
4
|
/**
|
|
6
5
|
* Pipeline for ingesting documents into a vector store.
|
|
7
6
|
* Orchestrates the flow: chunk → batch embed → store
|
|
@@ -27,18 +26,10 @@ const ElementChunker_1 = require("../chunkers/ElementChunker");
|
|
|
27
26
|
* ```
|
|
28
27
|
*/
|
|
29
28
|
class IngestionPipeline {
|
|
30
|
-
|
|
31
|
-
* @param chunker - Chunker to split parsed/raw text into chunks
|
|
32
|
-
* @param embeddings - Embeddings provider
|
|
33
|
-
* @param store - Vector store for persistence
|
|
34
|
-
* @param parser - Optional default parser used by {@link ingestFile} and
|
|
35
|
-
* {@link ingestFiles} when no parser is passed at call time
|
|
36
|
-
*/
|
|
37
|
-
constructor(chunker, embeddings, store, parser) {
|
|
29
|
+
constructor(chunker, embeddings, store) {
|
|
38
30
|
this.chunker = chunker;
|
|
39
31
|
this.embeddings = embeddings;
|
|
40
32
|
this.store = store;
|
|
41
|
-
this.parser = parser;
|
|
42
33
|
}
|
|
43
34
|
/**
|
|
44
35
|
* Ingest a single document into the vector store.
|
|
@@ -104,100 +95,6 @@ class IngestionPipeline {
|
|
|
104
95
|
// Process all chunks together
|
|
105
96
|
return this.processChunks(allChunks, options ?? {}, startTime);
|
|
106
97
|
}
|
|
107
|
-
async ingestFile(filePath, parserOrOptions, options) {
|
|
108
|
-
let parser;
|
|
109
|
-
let opts;
|
|
110
|
-
if (parserOrOptions != null && typeof parserOrOptions.parse === "function") {
|
|
111
|
-
parser = parserOrOptions;
|
|
112
|
-
opts = options;
|
|
113
|
-
}
|
|
114
|
-
else {
|
|
115
|
-
parser = this.parser;
|
|
116
|
-
opts = parserOrOptions;
|
|
117
|
-
}
|
|
118
|
-
if (!parser) {
|
|
119
|
-
throw new Error("No parser provided. Pass a DocumentParser to ingestFile() or set one in the IngestionPipeline constructor.");
|
|
120
|
-
}
|
|
121
|
-
const parseOptions = {
|
|
122
|
-
strategy: opts?.strategy,
|
|
123
|
-
languages: opts?.languages,
|
|
124
|
-
};
|
|
125
|
-
const parsed = await parser.parse(filePath, parseOptions);
|
|
126
|
-
const chunkOptions = {
|
|
127
|
-
sourceId: opts?.sourceId,
|
|
128
|
-
sourcePath: opts?.sourcePath ?? filePath,
|
|
129
|
-
metadata: opts?.metadata,
|
|
130
|
-
};
|
|
131
|
-
const ingestionOptions = {
|
|
132
|
-
batchSize: opts?.batchSize,
|
|
133
|
-
onProgress: opts?.onProgress,
|
|
134
|
-
onError: opts?.onError,
|
|
135
|
-
skipDuplicates: opts?.skipDuplicates,
|
|
136
|
-
};
|
|
137
|
-
// When the pipeline uses an ElementChunker and the parser returned
|
|
138
|
-
// structured elements, chunk on element boundaries instead of raw text.
|
|
139
|
-
if (this.chunker instanceof ElementChunker_1.ElementChunker && parsed.elements?.length) {
|
|
140
|
-
const startTime = Date.now();
|
|
141
|
-
const chunks = await this.chunker.chunkElements(parsed.elements, chunkOptions);
|
|
142
|
-
return this.processChunks(chunks, ingestionOptions, startTime);
|
|
143
|
-
}
|
|
144
|
-
return this.ingest(parsed.text, { ...chunkOptions, ...ingestionOptions });
|
|
145
|
-
}
|
|
146
|
-
async ingestFiles(filePaths, parserOrOptions, options) {
|
|
147
|
-
let parser;
|
|
148
|
-
let opts;
|
|
149
|
-
if (parserOrOptions != null && typeof parserOrOptions.parse === "function") {
|
|
150
|
-
parser = parserOrOptions;
|
|
151
|
-
opts = options;
|
|
152
|
-
}
|
|
153
|
-
else {
|
|
154
|
-
parser = this.parser;
|
|
155
|
-
opts = parserOrOptions;
|
|
156
|
-
}
|
|
157
|
-
if (!parser) {
|
|
158
|
-
throw new Error("No parser provided. Pass a DocumentParser to ingestFiles() or set one in the IngestionPipeline constructor.");
|
|
159
|
-
}
|
|
160
|
-
const parseOptions = {
|
|
161
|
-
strategy: opts?.strategy,
|
|
162
|
-
languages: opts?.languages,
|
|
163
|
-
};
|
|
164
|
-
const ingestionOptions = {
|
|
165
|
-
batchSize: opts?.batchSize,
|
|
166
|
-
onProgress: opts?.onProgress,
|
|
167
|
-
onError: opts?.onError,
|
|
168
|
-
skipDuplicates: opts?.skipDuplicates,
|
|
169
|
-
};
|
|
170
|
-
const startTime = Date.now();
|
|
171
|
-
const allChunks = [];
|
|
172
|
-
this.emitProgress(ingestionOptions.onProgress, {
|
|
173
|
-
phase: "chunking",
|
|
174
|
-
processed: 0,
|
|
175
|
-
total: filePaths.length,
|
|
176
|
-
});
|
|
177
|
-
for (let i = 0; i < filePaths.length; i++) {
|
|
178
|
-
const filePath = filePaths[i];
|
|
179
|
-
const parsed = await parser.parse(filePath, parseOptions);
|
|
180
|
-
const chunkOptions = {
|
|
181
|
-
sourceId: opts?.sourceId,
|
|
182
|
-
sourcePath: filePath,
|
|
183
|
-
metadata: opts?.metadata,
|
|
184
|
-
};
|
|
185
|
-
let fileChunks;
|
|
186
|
-
if (this.chunker instanceof ElementChunker_1.ElementChunker && parsed.elements?.length) {
|
|
187
|
-
fileChunks = await this.chunker.chunkElements(parsed.elements, chunkOptions);
|
|
188
|
-
}
|
|
189
|
-
else {
|
|
190
|
-
fileChunks = await this.chunker.chunk(parsed.text, chunkOptions);
|
|
191
|
-
}
|
|
192
|
-
allChunks.push(...fileChunks);
|
|
193
|
-
this.emitProgress(ingestionOptions.onProgress, {
|
|
194
|
-
phase: "chunking",
|
|
195
|
-
processed: i + 1,
|
|
196
|
-
total: filePaths.length,
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
return this.processChunks(allChunks, ingestionOptions, startTime);
|
|
200
|
-
}
|
|
201
98
|
/**
|
|
202
99
|
* Ingest pre-chunked data into the vector store.
|
|
203
100
|
* Useful when chunking is done separately.
|
|
@@ -364,12 +261,6 @@ class IngestionPipeline {
|
|
|
364
261
|
getStore() {
|
|
365
262
|
return this.store;
|
|
366
263
|
}
|
|
367
|
-
/**
|
|
368
|
-
* Get the default parser configured on this pipeline, if any.
|
|
369
|
-
*/
|
|
370
|
-
getParser() {
|
|
371
|
-
return this.parser;
|
|
372
|
-
}
|
|
373
264
|
}
|
|
374
265
|
exports.IngestionPipeline = IngestionPipeline;
|
|
375
266
|
//# sourceMappingURL=IngestionPipeline.js.map
|
package/dist/ollama.d.ts
ADDED
package/dist/ollama.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.ollamaTransformer = exports.OllamaAgent = void 0;
|
|
18
|
+
// Ollama Agent Entry Point
|
|
19
|
+
__exportStar(require("./core"), exports);
|
|
20
|
+
var OllamaAgent_1 = require("./agents/ollama/OllamaAgent");
|
|
21
|
+
Object.defineProperty(exports, "OllamaAgent", { enumerable: true, get: function () { return OllamaAgent_1.OllamaAgent; } });
|
|
22
|
+
var transformers_1 = require("./history/transformers");
|
|
23
|
+
Object.defineProperty(exports, "ollamaTransformer", { enumerable: true, get: function () { return transformers_1.ollamaTransformer; } });
|
|
24
|
+
//# sourceMappingURL=ollama.js.map
|
package/dist/viz/types.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Visualization event types and interfaces for agent monitoring.
|
|
3
3
|
* These types define the contract between agention-lib and @agention/viz.
|
|
4
4
|
*/
|
|
5
|
-
export type VizVendor = "anthropic" | "openai" | "mistral" | "gemini";
|
|
5
|
+
export type VizVendor = "anthropic" | "openai" | "mistral" | "gemini" | "ollama";
|
|
6
6
|
export type VizEventType = "session.start" | "session.end" | "pipeline.start" | "pipeline.end" | "executor.start" | "executor.end" | "agent.start" | "agent.complete" | "agent.error" | "tool.start" | "tool.complete" | "tool.error" | "message.user" | "message.assistant";
|
|
7
7
|
export type VizExecutorType = "sequential" | "parallel" | "map" | "voting" | "router";
|
|
8
8
|
export type VizStopReason = "end_turn" | "tool_use" | "max_tokens" | "stop_sequence" | "error";
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentionai/agents",
|
|
3
3
|
"author": "Laurent Zuijdwijk",
|
|
4
|
-
"version": "0.12.0
|
|
4
|
+
"version": "0.12.0",
|
|
5
5
|
"description": "Agent Library",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
@@ -31,8 +31,8 @@
|
|
|
31
31
|
"default": "./dist/gemini.js"
|
|
32
32
|
},
|
|
33
33
|
"./ollama": {
|
|
34
|
-
"types": "./dist/
|
|
35
|
-
"default": "./dist/
|
|
34
|
+
"types": "./dist/ollama.d.ts",
|
|
35
|
+
"default": "./dist/ollama.js"
|
|
36
36
|
},
|
|
37
37
|
"./embeddings": {
|
|
38
38
|
"types": "./dist/embeddings/index.d.ts",
|
|
@@ -65,26 +65,6 @@
|
|
|
65
65
|
"./history/plugins": {
|
|
66
66
|
"types": "./dist/history/plugins/index.d.ts",
|
|
67
67
|
"default": "./dist/history/plugins/index.js"
|
|
68
|
-
},
|
|
69
|
-
"./parsers": {
|
|
70
|
-
"types": "./dist/parsers/index.d.ts",
|
|
71
|
-
"default": "./dist/parsers/index.js"
|
|
72
|
-
},
|
|
73
|
-
"./parsers/unstructured-local": {
|
|
74
|
-
"types": "./dist/parsers/UnstructuredLocalParser.d.ts",
|
|
75
|
-
"default": "./dist/parsers/UnstructuredLocalParser.js"
|
|
76
|
-
},
|
|
77
|
-
"./parsers/unstructured-api": {
|
|
78
|
-
"types": "./dist/parsers/UnstructuredAPIParser.d.ts",
|
|
79
|
-
"default": "./dist/parsers/UnstructuredAPIParser.js"
|
|
80
|
-
},
|
|
81
|
-
"./parsers/llamaindex": {
|
|
82
|
-
"types": "./dist/parsers/LlamaIndexParser.d.ts",
|
|
83
|
-
"default": "./dist/parsers/LlamaIndexParser.js"
|
|
84
|
-
},
|
|
85
|
-
"./parsers/ollama-ocr": {
|
|
86
|
-
"types": "./dist/parsers/OllamaOCRParser.d.ts",
|
|
87
|
-
"default": "./dist/parsers/OllamaOCRParser.js"
|
|
88
68
|
}
|
|
89
69
|
},
|
|
90
70
|
"files": [
|
|
@@ -164,13 +144,9 @@
|
|
|
164
144
|
"@mistralai/mistralai": "^1.13.0",
|
|
165
145
|
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
166
146
|
"apache-arrow": "^18.0.0",
|
|
147
|
+
"ollama": "^0.5.18",
|
|
167
148
|
"openai": "^6.16.0",
|
|
168
|
-
"voyageai": "^0.0.3"
|
|
169
|
-
"@epilogo/unstructured-io-node": "*",
|
|
170
|
-
"unstructured-client": "*",
|
|
171
|
-
"llamaindex": "*",
|
|
172
|
-
"@llamaindex/readers": "*",
|
|
173
|
-
"pdf-to-img": "*"
|
|
149
|
+
"voyageai": "^0.0.3"
|
|
174
150
|
},
|
|
175
151
|
"peerDependenciesMeta": {
|
|
176
152
|
"@lancedb/lancedb": {
|
|
@@ -200,19 +176,7 @@
|
|
|
200
176
|
"@opensearch-project/opensearch": {
|
|
201
177
|
"optional": true
|
|
202
178
|
},
|
|
203
|
-
"
|
|
204
|
-
"optional": true
|
|
205
|
-
},
|
|
206
|
-
"unstructured-client": {
|
|
207
|
-
"optional": true
|
|
208
|
-
},
|
|
209
|
-
"llamaindex": {
|
|
210
|
-
"optional": true
|
|
211
|
-
},
|
|
212
|
-
"@llamaindex/readers": {
|
|
213
|
-
"optional": true
|
|
214
|
-
},
|
|
215
|
-
"pdf-to-img": {
|
|
179
|
+
"ollama": {
|
|
216
180
|
"optional": true
|
|
217
181
|
}
|
|
218
182
|
},
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
import { Chunker } from "./Chunker";
|
|
2
|
-
import { Chunk, ChunkerConfig, ChunkOptions } from "./types";
|
|
3
|
-
import { ParsedElement } from "../parsers/types";
|
|
4
|
-
/**
|
|
5
|
-
* Configuration for {@link ElementChunker}.
|
|
6
|
-
*/
|
|
7
|
-
export interface ElementChunkerConfig extends ChunkerConfig {
|
|
8
|
-
/**
|
|
9
|
-
* Element types to skip entirely.
|
|
10
|
-
* Useful for dropping decorative or non-content elements.
|
|
11
|
-
* @example ["Image", "PageBreak", "Header", "Footer"]
|
|
12
|
-
*/
|
|
13
|
-
excludeTypes?: string[];
|
|
14
|
-
/**
|
|
15
|
-
* Element types that always start a new chunk, even if there is room
|
|
16
|
-
* in the current one. Use this to keep headings at the top of their
|
|
17
|
-
* section's chunk.
|
|
18
|
-
* @default ["Title"]
|
|
19
|
-
*/
|
|
20
|
-
breakOnTypes?: string[];
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Chunks a document by grouping its **structured elements** rather than
|
|
24
|
-
* splitting raw text. Designed for use with parsers that return element
|
|
25
|
-
* lists (e.g. {@link UnstructuredLocalParser}, {@link UnstructuredAPIParser}).
|
|
26
|
-
*
|
|
27
|
-
* **How it works:**
|
|
28
|
-
* 1. Adjacent elements are merged into a single chunk until the combined
|
|
29
|
-
* character count would exceed `chunkSize`.
|
|
30
|
-
* 2. A `breakOnTypes` element (default: `"Title"`) always starts a fresh
|
|
31
|
-
* chunk so that headings introduce their section's content.
|
|
32
|
-
* 3. A single element whose text exceeds `chunkSize` is split recursively
|
|
33
|
-
* using separator heuristics (paragraphs → sentences → words → characters).
|
|
34
|
-
* 4. Element types are stored in `chunk.metadata.element_types`; page number
|
|
35
|
-
* is stored in `chunk.metadata.page` when available.
|
|
36
|
-
*
|
|
37
|
-
* Use via {@link IngestionPipeline.ingestFile} — the pipeline automatically
|
|
38
|
-
* calls `chunkElements()` instead of `chunk()` when this chunker is used and
|
|
39
|
-
* the parser returns a structured element list.
|
|
40
|
-
*
|
|
41
|
-
* @example
|
|
42
|
-
* ```typescript
|
|
43
|
-
* import { ElementChunker } from '@agentionai/agents/chunkers';
|
|
44
|
-
* import { UnstructuredLocalParser } from '@agentionai/agents/parsers/unstructured-local';
|
|
45
|
-
*
|
|
46
|
-
* const pipeline = new IngestionPipeline(
|
|
47
|
-
* new ElementChunker({ chunkSize: 1000 }),
|
|
48
|
-
* embeddings,
|
|
49
|
-
* store,
|
|
50
|
-
* );
|
|
51
|
-
*
|
|
52
|
-
* await pipeline.ingestFile('/docs/report.pdf', new UnstructuredLocalParser(), {
|
|
53
|
-
* strategy: 'hi_res',
|
|
54
|
-
* });
|
|
55
|
-
* ```
|
|
56
|
-
*/
|
|
57
|
-
export declare class ElementChunker extends Chunker {
|
|
58
|
-
readonly name = "ElementChunker";
|
|
59
|
-
private readonly excludeTypes;
|
|
60
|
-
private readonly breakOnTypes;
|
|
61
|
-
constructor(config: ElementChunkerConfig);
|
|
62
|
-
/**
|
|
63
|
-
* Chunk a list of structured elements into {@link Chunk} objects.
|
|
64
|
-
*
|
|
65
|
-
* This is the primary entry point when using this chunker with a parser.
|
|
66
|
-
* Called automatically by {@link IngestionPipeline.ingestFile} when
|
|
67
|
-
* the parsed document has an `elements` array.
|
|
68
|
-
*
|
|
69
|
-
* @param elements - Parsed elements from a {@link DocumentParser}
|
|
70
|
-
* @param options - Source tracking and custom metadata
|
|
71
|
-
*/
|
|
72
|
-
chunkElements(elements: ParsedElement[], options?: ChunkOptions): Promise<Chunk[]>;
|
|
73
|
-
/**
|
|
74
|
-
* Fallback text splitting used when {@link Chunker.chunk} is called directly
|
|
75
|
-
* (i.e. without a structured element list). Splits on double newlines first,
|
|
76
|
-
* then sentences, then words.
|
|
77
|
-
*/
|
|
78
|
-
protected splitText(text: string): string[];
|
|
79
|
-
/**
|
|
80
|
-
* Build a {@link Chunk} from a group of elements.
|
|
81
|
-
*/
|
|
82
|
-
private buildChunk;
|
|
83
|
-
/**
|
|
84
|
-
* Split a single large element text using separator heuristics.
|
|
85
|
-
*/
|
|
86
|
-
private splitLargeText;
|
|
87
|
-
/**
|
|
88
|
-
* Greedily merge string parts into windows of at most `maxSize` characters.
|
|
89
|
-
*/
|
|
90
|
-
private mergeToSize;
|
|
91
|
-
/**
|
|
92
|
-
* Hard character-count split when no separator works.
|
|
93
|
-
*/
|
|
94
|
-
private forceSplit;
|
|
95
|
-
/**
|
|
96
|
-
* Apply character-level overlap between already-split strings.
|
|
97
|
-
*/
|
|
98
|
-
private applyCharOverlap;
|
|
99
|
-
}
|
|
100
|
-
//# sourceMappingURL=ElementChunker.d.ts.map
|