@magda/semantic-indexer-sdk 6.0.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +238 -0
  2. package/dist/index.js +131845 -0
  3. package/package.json +53 -0
@@ -0,0 +1,238 @@
1
+ import { ResponseBody } from '@opensearch-project/opensearch/api/_types/_core.search.js';
2
+ import urijs from 'urijs';
3
+
4
+ declare interface ApiClientBaseConfig {
5
+ baseApiUrl?: string;
6
+ }
7
+
8
+ declare interface ApiKeyBasedConfigOptions extends ApiClientBaseConfig {
9
+ apiKeyId?: string;
10
+ apiKey?: string;
11
+ }
12
+
13
+ declare abstract class BaseApiClient {
14
+ readonly authMode: "jwtToken" | "apiKey" | "noAuth";
15
+ protected baseApiUrl: string;
16
+ private readonly baseApiUri;
17
+ protected jwtSecret: string;
18
+ readonly apiKeyId: string;
19
+ protected apiKey: string;
20
+ readonly userId: string;
21
+ constructor(options: BaseApiClientConfig);
22
+ protected getBaseApiUri(): urijs;
23
+ protected setHeader(headers: HeadersInit | undefined | null, headerName: string, headerValue: string): Record<string, string> | [string, string][] | Headers;
24
+ protected addAuthHeader(config?: RequestInit): {
25
+ body?: BodyInit;
26
+ cache?: RequestCache;
27
+ credentials?: RequestCredentials;
28
+ headers?: HeadersInit;
29
+ integrity?: string;
30
+ keepalive?: boolean;
31
+ method?: string;
32
+ mode?: RequestMode;
33
+ redirect?: RequestRedirect;
34
+ referrer?: string;
35
+ referrerPolicy?: ReferrerPolicy;
36
+ signal?: AbortSignal;
37
+ window?: null;
38
+ };
39
+ }
40
+
41
+ declare type BaseApiClientConfig = JWTBasedConfigOptions & ApiKeyBasedConfigOptions;
42
+
43
+ declare class Chunker {
44
+ private strategy;
45
+ constructor(strategy: ChunkStrategy);
46
+ chunk(text: string): Promise<ChunkResult[]>;
47
+ }
48
+
49
+ export declare interface ChunkResult {
50
+ text: string;
51
+ position: number;
52
+ length: number;
53
+ overlap: number;
54
+ }
55
+
56
+ declare interface ChunkStrategy {
57
+ chunk: ChunkStrategyType;
58
+ }
59
+
60
+ export declare type ChunkStrategyType = (text: string) => Promise<ChunkResult[]>;
61
+
62
+ export declare function commonYargs(defaultPort: number, defaultInternalUrl: string): SemanticIndexerArguments;
63
+
64
+ export declare type CreateEmbeddingText = (params: CreateEmbeddingTextParams) => Promise<EmbeddingText>;
65
+
66
+ export declare type CreateEmbeddingTextParams = {
67
+ record: Record_2;
68
+ format: string | null;
69
+ filePath: string | null;
70
+ url: string | null;
71
+ };
72
+
73
+ declare class EmbeddingApiClient extends BaseApiClient {
74
+ private taskSize;
75
+ constructor(options: EmbeddingApiClientConfig);
76
+ private testConnection;
77
+ get(text: string): Promise<number[]>;
78
+ get(textList: string[]): Promise<number[][]>;
79
+ }
80
+
81
+ declare interface EmbeddingApiClientConfig extends BaseApiClientConfig {
82
+ }
83
+
84
+ declare interface EmbeddingApiConfig {
85
+ baseUrl: string;
86
+ bulkEmbeddingsSize: number;
87
+ }
88
+
89
+ export declare type EmbeddingText = {
90
+ text: string;
91
+ subObjects?: Array<{
92
+ subObjectId?: string;
93
+ subObjectType?: string;
94
+ text: string;
95
+ }>;
96
+ };
97
+
98
+ export declare function indexEmbeddingText(options: SemanticIndexerOptions, EmbeddingText: EmbeddingText, meta: {
99
+ recordId: string;
100
+ fileFormat?: string;
101
+ }, chunker: Chunker, embeddingApiClient: EmbeddingApiClient, opensearchApiClient: OpensearchApiClient): Promise<void>;
102
+
103
+ export declare type ItemType = "registryRecord" | "storageObject";
104
+
105
+ declare interface JWTBasedConfigOptions extends ApiClientBaseConfig {
106
+ jwtSecret?: string;
107
+ userId?: string;
108
+ }
109
+
110
+ declare interface MinioConfig {
111
+ endPoint: string;
112
+ port: number;
113
+ useSSL: boolean;
114
+ accessKey: string;
115
+ secretKey: string;
116
+ region: string;
117
+ defaultDatasetBucket: string;
118
+ }
119
+
120
+ declare type MinionArguments = {
121
+ listenPort: string | number;
122
+ internalUrl: string;
123
+ jwtSecret: string;
124
+ userId: string;
125
+ registryUrl: string;
126
+ enableMultiTenant: boolean;
127
+ tenantUrl: string;
128
+ retries: string | number;
129
+ crawlerRecordFetchNumber?: string | number;
130
+ };
131
+
132
+ declare class OpensearchApiClient {
133
+ private client;
134
+ constructor(config: OpensearchConfig_2);
135
+ static getOpensearchApiClient(openSearchConfig: OpensearchConfig_2): Promise<OpensearchApiClient>;
136
+ createIndex(indexDefinition: {
137
+ indexName: string;
138
+ settings?: any;
139
+ mappings?: any;
140
+ }): Promise<void>;
141
+ deleteIndex(indexName: string): Promise<void>;
142
+ indexExists(indexName: string): Promise<boolean>;
143
+ indexDocument(indexName: string, document: any): Promise<void>;
144
+ bulkIndexDocument(indexName: string, documents: any[]): Promise<void>;
145
+ searchDocuments(indexName: string, query: string): Promise<ResponseBody>;
146
+ deleteDocument(indexName: string, documentId: string): Promise<void>;
147
+ }
148
+
149
+ declare interface OpensearchConfig {
150
+ serverUrl: string;
151
+ bulkIndexSize: number;
152
+ indices: {
153
+ semanticIndex: {
154
+ indexName: string;
155
+ indexVersion: number;
156
+ numberOfShards: number;
157
+ numberOfReplicas: number;
158
+ knnVectorFieldConfig: {
159
+ mode: string;
160
+ dimension: number;
161
+ spaceType: string;
162
+ efConstruction: number;
163
+ efSearch: number;
164
+ m: number;
165
+ encoder?: {
166
+ name: string;
167
+ type: string;
168
+ clip: boolean;
169
+ };
170
+ compressionLevel?: string;
171
+ };
172
+ };
173
+ };
174
+ }
175
+
176
+ declare interface OpensearchConfig_2 {
177
+ url?: string;
178
+ }
179
+
180
+ /**
181
+ * A record in the registry, usually including data for one or more aspects, unique for a tenant.
182
+ */
183
+ declare class Record_2 {
184
+ /**
185
+ * The identifier of the record
186
+ */
187
+ "id": string;
188
+ /**
189
+ * The name of the record
190
+ */
191
+ "name": string;
192
+ /**
193
+ * The aspects included in this record
194
+ */
195
+ "aspects": any;
196
+ /**
197
+ * A tag representing the action by the source of this record (e.g. an id for a individual crawl of a data portal).
198
+ */
199
+ "sourceTag": string;
200
+ /**
201
+ * The identifier of a tenant
202
+ */
203
+ "tenantId": number;
204
+ }
205
+ export { Record_2 as Record }
206
+
207
+ declare function semanticIndexer(userConfig: SemanticIndexerOptions): Promise<void>;
208
+ export default semanticIndexer;
209
+
210
+ declare interface SemanticIndexerArguments extends MinionArguments {
211
+ semanticIndexerConfig: SemanticIndexerConfig;
212
+ minioConfig: MinioConfig;
213
+ }
214
+
215
+ declare interface SemanticIndexerConfig {
216
+ semanticIndexer: {
217
+ chunkSizeLimit: number;
218
+ overlap: number;
219
+ opensearch: OpensearchConfig;
220
+ embeddingApi: EmbeddingApiConfig;
221
+ };
222
+ }
223
+
224
+ export declare interface SemanticIndexerOptions {
225
+ argv: SemanticIndexerArguments;
226
+ id: string;
227
+ itemType: ItemType;
228
+ aspects?: string[];
229
+ optionalAspects?: string[];
230
+ formatTypes?: string[];
231
+ createEmbeddingText: CreateEmbeddingText;
232
+ chunkStrategy?: ChunkStrategyType;
233
+ chunkSizeLimit?: number;
234
+ overlap?: number;
235
+ autoDownloadFile?: boolean;
236
+ }
237
+
238
+ export { }