@agentionai/agents 0.10.2 → 0.12.0-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunkers/ElementChunker.d.ts +100 -0
- package/dist/chunkers/ElementChunker.js +242 -0
- package/dist/chunkers/index.d.ts +1 -0
- package/dist/chunkers/index.js +3 -1
- package/dist/ingestion/IngestionPipeline.d.ts +73 -1
- package/dist/ingestion/IngestionPipeline.js +110 -1
- package/dist/parsers/DocumentParser.d.ts +36 -0
- package/dist/parsers/DocumentParser.js +35 -0
- package/dist/parsers/LlamaIndexParser.d.ts +58 -0
- package/dist/parsers/LlamaIndexParser.js +71 -0
- package/dist/parsers/OllamaOCRParser.d.ts +98 -0
- package/dist/parsers/OllamaOCRParser.js +203 -0
- package/dist/parsers/UnstructuredAPIParser.d.ts +57 -0
- package/dist/parsers/UnstructuredAPIParser.js +131 -0
- package/dist/parsers/UnstructuredLocalParser.d.ts +42 -0
- package/dist/parsers/UnstructuredLocalParser.js +118 -0
- package/dist/parsers/index.d.ts +3 -0
- package/dist/parsers/index.js +6 -0
- package/dist/parsers/types.d.ts +50 -0
- package/dist/parsers/types.js +3 -0
- package/dist/vectorstore/LanceDBVectorStore.d.ts +1 -16
- package/dist/vectorstore/OpenSearchVectorStore.d.ts +259 -0
- package/dist/vectorstore/OpenSearchVectorStore.js +481 -0
- package/dist/vectorstore/VectorStore.d.ts +25 -0
- package/dist/vectorstore/index.d.ts +3 -2
- package/dist/vectorstore/index.js +3 -1
- package/package.json +50 -2
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.UnstructuredLocalParser = void 0;
|
|
37
|
+
const DocumentParser_1 = require("./DocumentParser");
|
|
38
|
+
/**
|
|
39
|
+
* Document parser that uses the **local** (open-source Python) version of
|
|
40
|
+
* Unstructured via the `@epilogo/unstructured-io-node` npm bridge.
|
|
41
|
+
*
|
|
42
|
+
* The bridge spawns a Python virtual environment and calls the Python
|
|
43
|
+
* `unstructured` library directly — no API key required, but Python 3.8+
|
|
44
|
+
* and system dependencies (poppler, tesseract, etc.) must be available.
|
|
45
|
+
*
|
|
46
|
+
* **Peer dependency:** `@epilogo/unstructured-io-node`
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```typescript
|
|
50
|
+
* import { UnstructuredLocalParser } from "@agentionai/agents/parsers";
|
|
51
|
+
*
|
|
52
|
+
* const parser = new UnstructuredLocalParser();
|
|
53
|
+
* const doc = await parser.parse("/path/to/report.pdf", {
|
|
54
|
+
* strategy: "hi_res",
|
|
55
|
+
* languages: ["eng"],
|
|
56
|
+
* });
|
|
57
|
+
* console.log(doc.elements?.length, "elements");
|
|
58
|
+
*
|
|
59
|
+
* // Use with IngestionPipeline
|
|
60
|
+
* await pipeline.ingestFile("/path/to/report.pdf", parser);
|
|
61
|
+
* ```
|
|
62
|
+
*/
|
|
63
|
+
class UnstructuredLocalParser extends DocumentParser_1.DocumentParser {
|
|
64
|
+
constructor() {
|
|
65
|
+
super(...arguments);
|
|
66
|
+
this.name = "unstructured-local";
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Parse a file using the local Python Unstructured library.
|
|
70
|
+
*
|
|
71
|
+
* On first call, `ensureEnvironmentSetup()` is invoked to download the
|
|
72
|
+
* Python venv if it does not already exist (one-time, slow operation).
|
|
73
|
+
*
|
|
74
|
+
* @param filePath - Path to the document to parse
|
|
75
|
+
* @param options - Strategy, languages, and any other unstructured kwargs
|
|
76
|
+
*/
|
|
77
|
+
async parse(filePath, options) {
|
|
78
|
+
const pkg = "@epilogo/unstructured-io-node";
|
|
79
|
+
let UnstructuredIO;
|
|
80
|
+
try {
|
|
81
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
82
|
+
({ UnstructuredIO } = await Promise.resolve(`${pkg}`).then(s => __importStar(require(s))));
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
throw new Error("UnstructuredLocalParser requires '@epilogo/unstructured-io-node'. " +
|
|
86
|
+
"Install it with: npm install @epilogo/unstructured-io-node");
|
|
87
|
+
}
|
|
88
|
+
await UnstructuredIO.ensureEnvironmentSetup();
|
|
89
|
+
const { strategy, languages, ...rest } = options ?? {};
|
|
90
|
+
const rawElements = await UnstructuredIO.partition({
|
|
91
|
+
filename: filePath,
|
|
92
|
+
strategy: strategy ?? "auto",
|
|
93
|
+
...(languages ? { languages } : {}),
|
|
94
|
+
...rest,
|
|
95
|
+
});
|
|
96
|
+
const elements = this.mapRawElements(rawElements);
|
|
97
|
+
return {
|
|
98
|
+
text: this.elementsToText(elements),
|
|
99
|
+
elements,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
mapRawElements(raw) {
|
|
103
|
+
return raw.map((el) => {
|
|
104
|
+
const e = el;
|
|
105
|
+
return {
|
|
106
|
+
type: typeof e["type"] === "string" ? e["type"] : "unknown",
|
|
107
|
+
text: typeof e["text"] === "string" ? e["text"] : "",
|
|
108
|
+
metadata: e["metadata"] != null &&
|
|
109
|
+
typeof e["metadata"] === "object" &&
|
|
110
|
+
!Array.isArray(e["metadata"])
|
|
111
|
+
? e["metadata"]
|
|
112
|
+
: undefined,
|
|
113
|
+
};
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
exports.UnstructuredLocalParser = UnstructuredLocalParser;
|
|
118
|
+
//# sourceMappingURL=UnstructuredLocalParser.js.map
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DocumentParser = void 0;
|
|
4
|
+
var DocumentParser_1 = require("./DocumentParser");
|
|
5
|
+
Object.defineProperty(exports, "DocumentParser", { enumerable: true, get: function () { return DocumentParser_1.DocumentParser; } });
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A single structured element extracted from a document.
|
|
3
|
+
* Matches the element format returned by Unstructured and similar parsers.
|
|
4
|
+
*/
|
|
5
|
+
export interface ParsedElement {
|
|
6
|
+
/**
|
|
7
|
+
* Element type — e.g. "Title", "NarrativeText", "Table", "Image",
|
|
8
|
+
* "ListItem", "Header", "Footer", "Document", etc.
|
|
9
|
+
*/
|
|
10
|
+
type: string;
|
|
11
|
+
/** Text content of this element */
|
|
12
|
+
text: string;
|
|
13
|
+
/**
|
|
14
|
+
* Parser-provided metadata — e.g. page_number, coordinates, languages,
|
|
15
|
+
* file_directory, filename, filetype, etc.
|
|
16
|
+
*/
|
|
17
|
+
metadata?: Record<string, unknown>;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* The result of parsing a document file.
|
|
21
|
+
*/
|
|
22
|
+
export interface ParsedDocument {
|
|
23
|
+
/** Full plain-text content (elements joined by double newlines) */
|
|
24
|
+
text: string;
|
|
25
|
+
/**
|
|
26
|
+
* Structured elements if the parser supports them.
|
|
27
|
+
* Absent when the parser only returns plain text.
|
|
28
|
+
*/
|
|
29
|
+
elements?: ParsedElement[];
|
|
30
|
+
/** File-level metadata from the parser, when available */
|
|
31
|
+
metadata?: Record<string, unknown>;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Options shared across all document parsers.
|
|
35
|
+
*/
|
|
36
|
+
export interface ParseOptions {
|
|
37
|
+
/**
|
|
38
|
+
* Parsing strategy.
|
|
39
|
+
* - `"auto"`: Let the parser decide (default)
|
|
40
|
+
* - `"fast"`: Text extraction only, no OCR
|
|
41
|
+
* - `"hi_res"`: High-resolution layout analysis with OCR
|
|
42
|
+
* - `"ocr_only"`: Force OCR on every page
|
|
43
|
+
*/
|
|
44
|
+
strategy?: "auto" | "fast" | "hi_res" | "ocr_only";
|
|
45
|
+
/** Languages to use for OCR (ISO 639-1 codes, e.g. `["eng", "fra"]`) */
|
|
46
|
+
languages?: string[];
|
|
47
|
+
/** Pass-through options specific to the underlying parser */
|
|
48
|
+
[key: string]: unknown;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -8,23 +8,8 @@
|
|
|
8
8
|
* @requires apache-arrow - Install with: npm install apache-arrow
|
|
9
9
|
*/
|
|
10
10
|
import type { Connection, Table, ConnectionOptions } from "@lancedb/lancedb";
|
|
11
|
-
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions } from "./VectorStore";
|
|
11
|
+
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, MetadataFieldDefinition } from "./VectorStore";
|
|
12
12
|
import { Embeddings } from "../embeddings/Embeddings";
|
|
13
|
-
/**
|
|
14
|
-
* Supported types for metadata fields.
|
|
15
|
-
*/
|
|
16
|
-
export type MetadataFieldType = "string" | "number" | "boolean";
|
|
17
|
-
/**
|
|
18
|
-
* Definition for a metadata field that will be stored as a separate column.
|
|
19
|
-
*/
|
|
20
|
-
export interface MetadataFieldDefinition {
|
|
21
|
-
/** Name of the metadata field. Use snake_case (e.g. `tenant_id`) to avoid SQL filter issues. */
|
|
22
|
-
name: string;
|
|
23
|
-
/** Data type for the field */
|
|
24
|
-
type: MetadataFieldType;
|
|
25
|
-
/** Whether the field can be null (default: true) */
|
|
26
|
-
nullable?: boolean;
|
|
27
|
-
}
|
|
28
13
|
/**
|
|
29
14
|
* Configuration for LanceDBVectorStore.
|
|
30
15
|
*/
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenSearch implementation of the VectorStore interface.
|
|
3
|
+
*
|
|
4
|
+
* Uses the OpenSearch k-NN plugin for approximate nearest-neighbour search
|
|
5
|
+
* via HNSW indexing. Supports cosine similarity, L2, and inner product
|
|
6
|
+
* space types.
|
|
7
|
+
*
|
|
8
|
+
* @requires @opensearch-project/opensearch - Install with: npm install @opensearch-project/opensearch
|
|
9
|
+
*/
|
|
10
|
+
import { VectorStore, Document, EmbeddedDocument, SearchResult, AddDocumentsOptions, SearchOptions, DeleteOptions, MetadataFieldDefinition } from "./VectorStore";
|
|
11
|
+
import { Embeddings } from "../embeddings/Embeddings";
|
|
12
|
+
interface OpenSearchHit<T> {
|
|
13
|
+
_id: string;
|
|
14
|
+
_score: number;
|
|
15
|
+
_source: T;
|
|
16
|
+
}
|
|
17
|
+
interface OpenSearchClient {
|
|
18
|
+
indices: {
|
|
19
|
+
exists(params: {
|
|
20
|
+
index: string;
|
|
21
|
+
}): Promise<{
|
|
22
|
+
body: boolean;
|
|
23
|
+
}>;
|
|
24
|
+
create(params: {
|
|
25
|
+
index: string;
|
|
26
|
+
body: unknown;
|
|
27
|
+
}): Promise<unknown>;
|
|
28
|
+
delete(params: {
|
|
29
|
+
index: string;
|
|
30
|
+
}): Promise<unknown>;
|
|
31
|
+
};
|
|
32
|
+
bulk(params: {
|
|
33
|
+
body: unknown[];
|
|
34
|
+
refresh?: boolean | string;
|
|
35
|
+
}): Promise<{
|
|
36
|
+
body: {
|
|
37
|
+
items?: Array<{
|
|
38
|
+
delete?: {
|
|
39
|
+
result?: string;
|
|
40
|
+
};
|
|
41
|
+
index?: {
|
|
42
|
+
result?: string;
|
|
43
|
+
};
|
|
44
|
+
}>;
|
|
45
|
+
errors?: boolean;
|
|
46
|
+
};
|
|
47
|
+
}>;
|
|
48
|
+
search<T>(params: {
|
|
49
|
+
index: string;
|
|
50
|
+
body: unknown;
|
|
51
|
+
}): Promise<{
|
|
52
|
+
body: {
|
|
53
|
+
hits?: {
|
|
54
|
+
hits?: OpenSearchHit<T>[];
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
}>;
|
|
58
|
+
get<T>(params: {
|
|
59
|
+
index: string;
|
|
60
|
+
id: string;
|
|
61
|
+
}): Promise<{
|
|
62
|
+
body: {
|
|
63
|
+
found: boolean;
|
|
64
|
+
_source: T;
|
|
65
|
+
};
|
|
66
|
+
}>;
|
|
67
|
+
deleteByQuery(params: {
|
|
68
|
+
index: string;
|
|
69
|
+
body: unknown;
|
|
70
|
+
refresh?: boolean | string;
|
|
71
|
+
}): Promise<unknown>;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* k-NN vector space type used by the OpenSearch k-NN plugin.
|
|
75
|
+
* - `cosinesimil` — cosine similarity (default, normalised vectors recommended)
|
|
76
|
+
* - `l2` — Euclidean L2 distance
|
|
77
|
+
* - `innerproduct` — inner / dot product
|
|
78
|
+
*/
|
|
79
|
+
export type OpenSearchSpaceType = "cosinesimil" | "l2" | "innerproduct";
|
|
80
|
+
/**
|
|
81
|
+
* k-NN engine used by the OpenSearch k-NN plugin.
|
|
82
|
+
* - `lucene` — native Lucene ANN (cosinesimil and l2 only); default since OpenSearch 3.x
|
|
83
|
+
* - `faiss` — high-throughput GPU-accelerated (l2 and innerproduct only)
|
|
84
|
+
* - `nmslib` — deprecated and removed in OpenSearch 3.0; do not use
|
|
85
|
+
*/
|
|
86
|
+
export type OpenSearchKnnEngine = "lucene" | "faiss" | "nmslib";
|
|
87
|
+
/**
|
|
88
|
+
* Configuration for OpenSearchVectorStore.
|
|
89
|
+
*/
|
|
90
|
+
export interface OpenSearchVectorStoreConfig {
|
|
91
|
+
/** Name identifier for this store instance */
|
|
92
|
+
name: string;
|
|
93
|
+
/** OpenSearch node URL (e.g. `https://localhost:9200`) */
|
|
94
|
+
node: string;
|
|
95
|
+
/** Basic-auth credentials */
|
|
96
|
+
auth?: {
|
|
97
|
+
username: string;
|
|
98
|
+
password: string;
|
|
99
|
+
};
|
|
100
|
+
/** SSL options — set `rejectUnauthorized: false` for self-signed certs */
|
|
101
|
+
ssl?: {
|
|
102
|
+
rejectUnauthorized?: boolean;
|
|
103
|
+
};
|
|
104
|
+
/** OpenSearch index name to use for document storage */
|
|
105
|
+
indexName: string;
|
|
106
|
+
/** Embeddings provider for automatic embedding generation */
|
|
107
|
+
embeddings?: Embeddings;
|
|
108
|
+
/**
|
|
109
|
+
* Vector dimensions.
|
|
110
|
+
* Defaults to `embeddings.dimensions` when an embeddings provider is given,
|
|
111
|
+
* otherwise falls back to `1536`.
|
|
112
|
+
*/
|
|
113
|
+
dimensions?: number;
|
|
114
|
+
/**
|
|
115
|
+
* k-NN vector space type (default: `"cosinesimil"`).
|
|
116
|
+
* Must match the space type the embeddings model was trained for.
|
|
117
|
+
*/
|
|
118
|
+
spaceType?: OpenSearchSpaceType;
|
|
119
|
+
/**
|
|
120
|
+
* k-NN engine (default: `"lucene"`).
|
|
121
|
+
* `nmslib` was removed in OpenSearch 3.0 and cannot be used for new indices.
|
|
122
|
+
*/
|
|
123
|
+
engine?: OpenSearchKnnEngine;
|
|
124
|
+
/**
|
|
125
|
+
* HNSW `ef_search` parameter — controls recall vs. latency at query time.
|
|
126
|
+
* Higher values improve recall at the cost of latency. Default: `512`.
|
|
127
|
+
*/
|
|
128
|
+
efSearch?: number;
|
|
129
|
+
/**
|
|
130
|
+
* HNSW `ef_construction` parameter — controls graph quality at index time.
|
|
131
|
+
* Higher values improve recall at the cost of indexing speed. Default: `512`.
|
|
132
|
+
*/
|
|
133
|
+
efConstruction?: number;
|
|
134
|
+
/**
|
|
135
|
+
* HNSW `M` parameter — number of bidirectional links per node.
|
|
136
|
+
* Higher values improve recall but increase memory usage. Default: `16`.
|
|
137
|
+
*/
|
|
138
|
+
m?: number;
|
|
139
|
+
/**
|
|
140
|
+
* Optional user-defined metadata field definitions.
|
|
141
|
+
*
|
|
142
|
+
* When provided, these fields are declared in the index mapping with proper
|
|
143
|
+
* types (`keyword` for strings, `double` for numbers, `boolean` for booleans),
|
|
144
|
+
* which enables reliable exact-match filtering via `SearchOptions.filter`.
|
|
145
|
+
*
|
|
146
|
+
* Without this option, OpenSearch uses dynamic mapping for the `metadata`
|
|
147
|
+
* object. String fields are mapped as `text` with a `.keyword` sub-field —
|
|
148
|
+
* the store handles this automatically by appending `.keyword` to undeclared
|
|
149
|
+
* string filter values at query time.
|
|
150
|
+
*
|
|
151
|
+
* Chunk metadata fields produced by the library's chunkers (`hash`,
|
|
152
|
+
* `prev_id`, `next_id`, etc.) are always declared explicitly — you do not
|
|
153
|
+
* need to list them here.
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* metadataFields: [
|
|
158
|
+
* { name: "source", type: "string" },
|
|
159
|
+
* { name: "page", type: "number" },
|
|
160
|
+
* ]
|
|
161
|
+
* ```
|
|
162
|
+
*/
|
|
163
|
+
metadataFields?: MetadataFieldDefinition[];
|
|
164
|
+
}
|
|
165
|
+
export declare class OpenSearchVectorStore extends VectorStore {
|
|
166
|
+
readonly name: string;
|
|
167
|
+
private client;
|
|
168
|
+
private indexName;
|
|
169
|
+
private embeddings?;
|
|
170
|
+
private dimensions;
|
|
171
|
+
private spaceType;
|
|
172
|
+
private engine;
|
|
173
|
+
private efSearch;
|
|
174
|
+
private efConstruction;
|
|
175
|
+
private m;
|
|
176
|
+
private metadataFields?;
|
|
177
|
+
/** Set of metadata field names declared as keyword (string) type. */
|
|
178
|
+
private keywordFields;
|
|
179
|
+
private constructor();
|
|
180
|
+
/**
|
|
181
|
+
* Create a new OpenSearchVectorStore instance.
|
|
182
|
+
*
|
|
183
|
+
* Connects to the given OpenSearch node and creates the index (with k-NN
|
|
184
|
+
* mapping) if it does not already exist.
|
|
185
|
+
*
|
|
186
|
+
* @param config - Store configuration
|
|
187
|
+
* @returns A ready-to-use OpenSearchVectorStore instance
|
|
188
|
+
* @throws Error if `@opensearch-project/opensearch` is not installed
|
|
189
|
+
*/
|
|
190
|
+
static create(config: OpenSearchVectorStoreConfig): Promise<OpenSearchVectorStore>;
|
|
191
|
+
/**
|
|
192
|
+
* Create the k-NN index if it does not already exist.
|
|
193
|
+
*
|
|
194
|
+
* The `metadata` object always includes explicit mappings for chunk metadata
|
|
195
|
+
* fields (hash, prev_id, etc.) so they work correctly in term queries.
|
|
196
|
+
* Any user-declared `metadataFields` are also mapped with proper types.
|
|
197
|
+
* All other metadata fields fall back to dynamic mapping.
|
|
198
|
+
*/
|
|
199
|
+
private ensureIndex;
|
|
200
|
+
/**
|
|
201
|
+
* Add documents to the store.
|
|
202
|
+
* Embeddings are generated automatically using the configured provider.
|
|
203
|
+
*/
|
|
204
|
+
addDocuments(documents: Document[], options?: AddDocumentsOptions): Promise<string[]>;
|
|
205
|
+
/**
|
|
206
|
+
* Add documents with pre-computed embeddings.
|
|
207
|
+
* Uses OpenSearch bulk API for efficiency.
|
|
208
|
+
*/
|
|
209
|
+
addEmbeddedDocuments(documents: EmbeddedDocument[], options?: AddDocumentsOptions): Promise<string[]>;
|
|
210
|
+
/**
|
|
211
|
+
* Search for documents similar to the query text.
|
|
212
|
+
* The query is embedded automatically using the configured embeddings provider.
|
|
213
|
+
*/
|
|
214
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
215
|
+
/**
|
|
216
|
+
* Search using a pre-computed embedding vector.
|
|
217
|
+
* Executes a k-NN query against the OpenSearch index.
|
|
218
|
+
*/
|
|
219
|
+
searchByVector(embedding: number[], options?: SearchOptions): Promise<SearchResult[]>;
|
|
220
|
+
/**
|
|
221
|
+
* Delete documents by their IDs.
|
|
222
|
+
* @returns Number of documents actually deleted.
|
|
223
|
+
*/
|
|
224
|
+
delete(ids: string[], _options?: DeleteOptions): Promise<number>;
|
|
225
|
+
/**
|
|
226
|
+
* Delete all documents, optionally scoped to a namespace.
|
|
227
|
+
*/
|
|
228
|
+
clear(options?: DeleteOptions): Promise<void>;
|
|
229
|
+
/**
|
|
230
|
+
* Retrieve a document by its ID.
|
|
231
|
+
* @returns The document, or `null` if not found.
|
|
232
|
+
*/
|
|
233
|
+
getById(id: string, _options?: DeleteOptions): Promise<Document | null>;
|
|
234
|
+
/**
|
|
235
|
+
* Get existing documents by their content hashes.
|
|
236
|
+
* Used by the ingestion pipeline for deduplication.
|
|
237
|
+
*
|
|
238
|
+
* Requires that documents were stored with chunk metadata containing a
|
|
239
|
+
* `hash` field (automatically set by chunkers in this library).
|
|
240
|
+
*
|
|
241
|
+
* @returns Map of hash → document ID for hashes that already exist.
|
|
242
|
+
*/
|
|
243
|
+
getByHashes(hashes: string[], _options?: DeleteOptions): Promise<Map<string, string>>;
|
|
244
|
+
/**
|
|
245
|
+
* Delete the entire OpenSearch index.
|
|
246
|
+
* WARNING: This permanently removes all indexed documents and the mapping.
|
|
247
|
+
*/
|
|
248
|
+
deleteIndex(): Promise<void>;
|
|
249
|
+
/** The OpenSearch index name used by this store. */
|
|
250
|
+
getIndexName(): string;
|
|
251
|
+
/** The configured vector dimensions. */
|
|
252
|
+
getDimensions(): number;
|
|
253
|
+
/** The configured embeddings provider, if any. */
|
|
254
|
+
getEmbeddings(): Embeddings | undefined;
|
|
255
|
+
/** The underlying OpenSearch client instance. */
|
|
256
|
+
getClient(): OpenSearchClient;
|
|
257
|
+
}
|
|
258
|
+
export {};
|
|
259
|
+
//# sourceMappingURL=OpenSearchVectorStore.d.ts.map
|