scrapex 1.0.0-alpha.1 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -5
- package/dist/embeddings/index.cjs +52 -0
- package/dist/embeddings/index.d.cts +3 -0
- package/dist/embeddings/index.d.mts +3 -0
- package/dist/embeddings/index.mjs +4 -0
- package/dist/embeddings-BjNTQSG9.cjs +1455 -0
- package/dist/embeddings-BjNTQSG9.cjs.map +1 -0
- package/dist/embeddings-Bsymy_jA.mjs +1215 -0
- package/dist/embeddings-Bsymy_jA.mjs.map +1 -0
- package/dist/{enhancer-oM4BhYYS.cjs → enhancer-Cs_WyWtJ.cjs} +2 -51
- package/dist/enhancer-Cs_WyWtJ.cjs.map +1 -0
- package/dist/{enhancer-Q6CSc1gA.mjs → enhancer-INx5NlgO.mjs} +2 -45
- package/dist/enhancer-INx5NlgO.mjs.map +1 -0
- package/dist/http-base-CHLf-Tco.cjs +684 -0
- package/dist/http-base-CHLf-Tco.cjs.map +1 -0
- package/dist/http-base-DM7YNo6X.mjs +618 -0
- package/dist/http-base-DM7YNo6X.mjs.map +1 -0
- package/dist/index-Bvseqli-.d.cts +268 -0
- package/dist/index-Bvseqli-.d.cts.map +1 -0
- package/dist/index-CIFjNySr.d.mts +268 -0
- package/dist/index-CIFjNySr.d.mts.map +1 -0
- package/dist/index-D6qfjmZQ.d.mts +401 -0
- package/dist/index-D6qfjmZQ.d.mts.map +1 -0
- package/dist/index-RFSpP5g8.d.cts +401 -0
- package/dist/index-RFSpP5g8.d.cts.map +1 -0
- package/dist/index.cjs +171 -51
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +61 -2
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +61 -2
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +129 -6
- package/dist/index.mjs.map +1 -1
- package/dist/llm/index.cjs +252 -233
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +132 -85
- package/dist/llm/index.d.cts.map +1 -1
- package/dist/llm/index.d.mts +132 -85
- package/dist/llm/index.d.mts.map +1 -1
- package/dist/llm/index.mjs +244 -236
- package/dist/llm/index.mjs.map +1 -1
- package/dist/parsers/index.cjs +10 -199
- package/dist/parsers/index.d.cts +2 -133
- package/dist/parsers/index.d.mts +2 -133
- package/dist/parsers/index.mjs +2 -191
- package/dist/parsers-Bneuws8x.cjs +569 -0
- package/dist/parsers-Bneuws8x.cjs.map +1 -0
- package/dist/parsers-DsawHeo0.mjs +482 -0
- package/dist/parsers-DsawHeo0.mjs.map +1 -0
- package/dist/types-BOcHQU9s.d.mts +831 -0
- package/dist/types-BOcHQU9s.d.mts.map +1 -0
- package/dist/types-DutdBpqd.d.cts +831 -0
- package/dist/types-DutdBpqd.d.cts.map +1 -0
- package/package.json +15 -16
- package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
- package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
- package/dist/parsers/index.cjs.map +0 -1
- package/dist/parsers/index.d.cts.map +0 -1
- package/dist/parsers/index.d.mts.map +0 -1
- package/dist/parsers/index.mjs.map +0 -1
- package/dist/types-CNQZVW36.d.mts +0 -150
- package/dist/types-CNQZVW36.d.mts.map +0 -1
- package/dist/types-D0HYR95H.d.cts +0 -150
- package/dist/types-D0HYR95H.d.cts.map +0 -1
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
import { D as SafetyConfig, O as TextChunk, S as PiiRedactionConfig, c as EmbeddingCache, g as EmbeddingResult, h as EmbeddingProviderConfig, m as EmbeddingProvider, p as EmbeddingOptions, rt as ScrapedData, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig } from "./types-BOcHQU9s.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/aggregation.d.ts
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Aggregate multiple embedding vectors into a single vector or return all.
|
|
7
|
+
*
|
|
8
|
+
* @param vectors - Array of embedding vectors (must all have same dimensions)
|
|
9
|
+
* @param strategy - Aggregation strategy
|
|
10
|
+
* @returns Aggregated result based on strategy
|
|
11
|
+
*/
|
|
12
|
+
declare function aggregateVectors(vectors: number[][], strategy?: EmbeddingAggregation): AggregationResult;
|
|
13
|
+
/**
|
|
14
|
+
* Result of vector aggregation.
|
|
15
|
+
*/
|
|
16
|
+
type AggregationResult = {
|
|
17
|
+
type: 'single';
|
|
18
|
+
vector: number[];
|
|
19
|
+
dimensions: number;
|
|
20
|
+
} | {
|
|
21
|
+
type: 'multiple';
|
|
22
|
+
vectors: number[][];
|
|
23
|
+
dimensions: number;
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Normalize a vector to unit length (L2 normalization).
|
|
27
|
+
*/
|
|
28
|
+
declare function normalizeVector(vector: number[]): number[];
|
|
29
|
+
/**
|
|
30
|
+
* Compute cosine similarity between two vectors.
|
|
31
|
+
* Both vectors should be normalized for accurate results.
|
|
32
|
+
*/
|
|
33
|
+
declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
34
|
+
/**
|
|
35
|
+
* Compute euclidean distance between two vectors.
|
|
36
|
+
*/
|
|
37
|
+
declare function euclideanDistance(a: number[], b: number[]): number;
|
|
38
|
+
/**
|
|
39
|
+
* Compute dot product of two vectors.
|
|
40
|
+
*/
|
|
41
|
+
declare function dotProduct(a: number[], b: number[]): number;
|
|
42
|
+
/**
|
|
43
|
+
* Get the dimensions of a vector or set of vectors.
|
|
44
|
+
*/
|
|
45
|
+
declare function getDimensions(vectors: number[] | number[][]): number;
|
|
46
|
+
//#endregion
|
|
47
|
+
//#region src/embeddings/cache.d.ts
|
|
48
|
+
interface CacheKeyParams {
|
|
49
|
+
providerKey: string;
|
|
50
|
+
/** Model identifier (may be undefined for custom providers) */
|
|
51
|
+
model?: string;
|
|
52
|
+
dimensions?: number;
|
|
53
|
+
aggregation?: EmbeddingAggregation;
|
|
54
|
+
input?: EmbeddingInputConfig;
|
|
55
|
+
chunking?: ChunkingConfig;
|
|
56
|
+
safety?: SafetyConfig;
|
|
57
|
+
cacheKeySalt?: string;
|
|
58
|
+
content: string;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Generate a content-addressable cache key.
|
|
62
|
+
* Key is based on content hash and embedding configuration.
|
|
63
|
+
* Note: custom RegExp patterns are serialized by source+flags; different
|
|
64
|
+
* constructions can yield different cache keys even if equivalent.
|
|
65
|
+
*/
|
|
66
|
+
declare function generateCacheKey(params: CacheKeyParams): string;
|
|
67
|
+
/**
|
|
68
|
+
* Generate a checksum for content verification.
|
|
69
|
+
*/
|
|
70
|
+
declare function generateChecksum(content: string): string;
|
|
71
|
+
/**
|
|
72
|
+
* In-memory LRU cache with TTL support.
|
|
73
|
+
* Content-addressable: uses content hash as key, not URL.
|
|
74
|
+
*/
|
|
75
|
+
declare class InMemoryEmbeddingCache implements EmbeddingCache {
|
|
76
|
+
private cache;
|
|
77
|
+
private readonly maxEntries;
|
|
78
|
+
private readonly defaultTtlMs;
|
|
79
|
+
constructor(options?: {
|
|
80
|
+
maxEntries?: number;
|
|
81
|
+
ttlMs?: number;
|
|
82
|
+
});
|
|
83
|
+
get(key: string): Promise<EmbeddingResult | undefined>;
|
|
84
|
+
set(key: string, value: EmbeddingResult, options?: {
|
|
85
|
+
ttlMs?: number;
|
|
86
|
+
}): Promise<void>;
|
|
87
|
+
delete(key: string): Promise<boolean>;
|
|
88
|
+
clear(): Promise<void>;
|
|
89
|
+
/**
|
|
90
|
+
* Get cache statistics.
|
|
91
|
+
*/
|
|
92
|
+
getStats(): CacheStats;
|
|
93
|
+
/**
|
|
94
|
+
* Evict expired entries.
|
|
95
|
+
*/
|
|
96
|
+
cleanup(): number;
|
|
97
|
+
/**
|
|
98
|
+
* Evict least recently used entry.
|
|
99
|
+
*/
|
|
100
|
+
private evictLRU;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Cache statistics.
|
|
104
|
+
*/
|
|
105
|
+
interface CacheStats {
|
|
106
|
+
/** Current number of entries */
|
|
107
|
+
size: number;
|
|
108
|
+
/** Maximum allowed entries */
|
|
109
|
+
maxEntries: number;
|
|
110
|
+
/** Number of expired entries (not yet cleaned up) */
|
|
111
|
+
expired: number;
|
|
112
|
+
/** Cache utilization (0-1) */
|
|
113
|
+
utilization: number;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Validate that a cached result matches expected parameters.
|
|
117
|
+
*/
|
|
118
|
+
declare function validateCachedResult(result: EmbeddingResult, expectedDimensions?: number): boolean;
|
|
119
|
+
/**
|
|
120
|
+
* Create a no-op cache that never stores anything.
|
|
121
|
+
* Useful for disabling caching while maintaining interface compatibility.
|
|
122
|
+
*/
|
|
123
|
+
declare function createNoOpCache(): EmbeddingCache;
|
|
124
|
+
/**
|
|
125
|
+
* Get or create the default cache instance.
|
|
126
|
+
*/
|
|
127
|
+
declare function getDefaultCache(): InMemoryEmbeddingCache;
|
|
128
|
+
/**
|
|
129
|
+
* Reset the default cache (mainly for testing).
|
|
130
|
+
*/
|
|
131
|
+
declare function resetDefaultCache(): Promise<void>;
|
|
132
|
+
//#endregion
|
|
133
|
+
//#region src/embeddings/chunking.d.ts
|
|
134
|
+
/**
|
|
135
|
+
* Heuristic token counting: approximately 4 characters per token.
|
|
136
|
+
* This is a reasonable approximation for English text.
|
|
137
|
+
*/
|
|
138
|
+
declare function heuristicTokenCount(text: string): number;
|
|
139
|
+
/**
|
|
140
|
+
* Create a tokenizer function based on configuration.
|
|
141
|
+
*/
|
|
142
|
+
declare function createTokenizer(config?: ChunkingConfig['tokenizer']): (text: string) => number;
|
|
143
|
+
/**
|
|
144
|
+
* Split text into overlapping chunks optimized for embedding.
|
|
145
|
+
* Respects sentence boundaries when possible.
|
|
146
|
+
*/
|
|
147
|
+
declare function chunkText(text: string, config?: ChunkingConfig): TextChunk[];
|
|
148
|
+
/**
|
|
149
|
+
* Estimate total tokens for a text without chunking.
|
|
150
|
+
*/
|
|
151
|
+
declare function estimateTokens(text: string, tokenizer?: ChunkingConfig['tokenizer']): number;
|
|
152
|
+
/**
|
|
153
|
+
* Check if text needs chunking based on token count.
|
|
154
|
+
*/
|
|
155
|
+
declare function needsChunking(text: string, maxTokens?: number, tokenizer?: ChunkingConfig['tokenizer']): boolean;
|
|
156
|
+
/**
|
|
157
|
+
* Get statistics about potential chunking.
|
|
158
|
+
*/
|
|
159
|
+
declare function getChunkingStats(text: string, config?: ChunkingConfig): {
|
|
160
|
+
inputLength: number;
|
|
161
|
+
estimatedTokens: number;
|
|
162
|
+
estimatedChunks: number;
|
|
163
|
+
willTruncate: boolean;
|
|
164
|
+
};
|
|
165
|
+
//#endregion
|
|
166
|
+
//#region src/embeddings/input.d.ts
|
|
167
|
+
/**
|
|
168
|
+
* Select and prepare input text for embedding based on configuration.
|
|
169
|
+
*
|
|
170
|
+
* @param data - Scraped data to extract input from
|
|
171
|
+
* @param config - Input configuration
|
|
172
|
+
* @returns Selected and prepared text, or undefined if no valid input
|
|
173
|
+
*/
|
|
174
|
+
declare function selectInput(data: Partial<ScrapedData>, config?: EmbeddingInputConfig): string | undefined;
|
|
175
|
+
/**
|
|
176
|
+
* Check if the selected input meets minimum requirements.
|
|
177
|
+
*/
|
|
178
|
+
declare function validateInput(text: string | undefined, minLength?: number): InputValidation;
|
|
179
|
+
/**
|
|
180
|
+
* Result of input validation.
|
|
181
|
+
*/
|
|
182
|
+
type InputValidation = {
|
|
183
|
+
valid: false;
|
|
184
|
+
reason: string;
|
|
185
|
+
} | {
|
|
186
|
+
valid: true;
|
|
187
|
+
text: string;
|
|
188
|
+
wordCount: number;
|
|
189
|
+
charCount: number;
|
|
190
|
+
};
|
|
191
|
+
/**
|
|
192
|
+
* Get a preview of what input would be selected.
|
|
193
|
+
* Useful for debugging and testing.
|
|
194
|
+
*/
|
|
195
|
+
declare function previewInput(data: Partial<ScrapedData>, config?: EmbeddingInputConfig, maxLength?: number): string;
|
|
196
|
+
//#endregion
|
|
197
|
+
//#region src/embeddings/pipeline.d.ts
|
|
198
|
+
/**
|
|
199
|
+
* Generate embeddings for scraped data.
|
|
200
|
+
* This is the main entry point for the embedding pipeline.
|
|
201
|
+
*/
|
|
202
|
+
declare function generateEmbeddings(data: Partial<ScrapedData>, options: EmbeddingOptions): Promise<EmbeddingResult>;
|
|
203
|
+
/**
|
|
204
|
+
* Embed arbitrary text directly.
|
|
205
|
+
* Standalone function for embedding text outside of scrape().
|
|
206
|
+
*/
|
|
207
|
+
declare function embed(text: string, options: EmbeddingOptions): Promise<EmbeddingResult>;
|
|
208
|
+
/**
|
|
209
|
+
* Embed from existing ScrapedData.
|
|
210
|
+
* Useful when you've already scraped and want to add embeddings later.
|
|
211
|
+
*/
|
|
212
|
+
declare function embedScrapedData(data: ScrapedData, options: EmbeddingOptions): Promise<EmbeddingResult>;
|
|
213
|
+
//#endregion
|
|
214
|
+
//#region src/embeddings/providers/base.d.ts
|
|
215
|
+
/**
|
|
216
|
+
* Get default model for a provider type.
|
|
217
|
+
*/
|
|
218
|
+
declare function getDefaultModel(providerType: string): string;
|
|
219
|
+
//#endregion
|
|
220
|
+
//#region src/embeddings/providers/presets.d.ts
|
|
221
|
+
/**
|
|
222
|
+
* Create an OpenAI embedding provider.
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```ts
|
|
226
|
+
* const provider = createOpenAIEmbedding({ apiKey: 'sk-...' });
|
|
227
|
+
* const { embeddings } = await provider.embed(['Hello'], { model: 'text-embedding-3-small' });
|
|
228
|
+
* ```
|
|
229
|
+
*/
|
|
230
|
+
declare function createOpenAIEmbedding(options?: {
|
|
231
|
+
apiKey?: string;
|
|
232
|
+
model?: string;
|
|
233
|
+
baseUrl?: string;
|
|
234
|
+
organization?: string;
|
|
235
|
+
}): EmbeddingProvider;
|
|
236
|
+
/**
|
|
237
|
+
* Create an Azure OpenAI embedding provider.
|
|
238
|
+
*
|
|
239
|
+
* @example
|
|
240
|
+
* ```ts
|
|
241
|
+
* const provider = createAzureEmbedding({
|
|
242
|
+
* endpoint: 'https://my-resource.openai.azure.com',
|
|
243
|
+
* deploymentName: 'text-embedding-ada-002',
|
|
244
|
+
* apiVersion: '2023-05-15',
|
|
245
|
+
* });
|
|
246
|
+
* ```
|
|
247
|
+
*/
|
|
248
|
+
declare function createAzureEmbedding(options: {
|
|
249
|
+
endpoint: string;
|
|
250
|
+
deploymentName: string;
|
|
251
|
+
apiVersion: string;
|
|
252
|
+
apiKey?: string;
|
|
253
|
+
}): EmbeddingProvider;
|
|
254
|
+
/**
|
|
255
|
+
* Create an Ollama embedding provider for local models.
|
|
256
|
+
*
|
|
257
|
+
* LIMITATION: Ollama's /api/embeddings endpoint processes one text at a time,
|
|
258
|
+
* not batches. When multiple chunks are embedded, each chunk triggers a
|
|
259
|
+
* separate HTTP request. This is handled transparently by the pipeline's
|
|
260
|
+
* sequential chunk processing, but may be slower than batch-capable providers.
|
|
261
|
+
* For high-throughput scenarios, consider using OpenAI, Cohere, or HuggingFace
|
|
262
|
+
* which support batch embedding in a single request.
|
|
263
|
+
*
|
|
264
|
+
* @example
|
|
265
|
+
* ```ts
|
|
266
|
+
* const provider = createOllamaEmbedding({ model: 'nomic-embed-text' });
|
|
267
|
+
* ```
|
|
268
|
+
*/
|
|
269
|
+
declare function createOllamaEmbedding(options?: {
|
|
270
|
+
baseUrl?: string;
|
|
271
|
+
model?: string;
|
|
272
|
+
}): EmbeddingProvider;
|
|
273
|
+
/**
|
|
274
|
+
* Create a HuggingFace Inference API embedding provider.
|
|
275
|
+
*
|
|
276
|
+
* @example
|
|
277
|
+
* ```ts
|
|
278
|
+
* const provider = createHuggingFaceEmbedding({
|
|
279
|
+
* model: 'sentence-transformers/all-MiniLM-L6-v2',
|
|
280
|
+
* });
|
|
281
|
+
* ```
|
|
282
|
+
*/
|
|
283
|
+
declare function createHuggingFaceEmbedding(options: {
|
|
284
|
+
model: string;
|
|
285
|
+
apiKey?: string;
|
|
286
|
+
}): EmbeddingProvider;
|
|
287
|
+
/**
|
|
288
|
+
* Create a Cohere embedding provider.
|
|
289
|
+
*
|
|
290
|
+
* @example
|
|
291
|
+
* ```ts
|
|
292
|
+
* const provider = createCohereEmbedding({ model: 'embed-english-v3.0' });
|
|
293
|
+
* ```
|
|
294
|
+
*/
|
|
295
|
+
declare function createCohereEmbedding(options?: {
|
|
296
|
+
apiKey?: string;
|
|
297
|
+
model?: string;
|
|
298
|
+
/** Input type for embeddings. Use 'search_query' for queries, 'search_document' for documents */
|
|
299
|
+
inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';
|
|
300
|
+
}): EmbeddingProvider;
|
|
301
|
+
/**
|
|
302
|
+
* Feature extraction pipeline type for Transformers.js
|
|
303
|
+
*/
|
|
304
|
+
type FeatureExtractionPipeline = (text: string, options?: {
|
|
305
|
+
pooling?: 'mean' | 'cls' | 'max';
|
|
306
|
+
normalize?: boolean;
|
|
307
|
+
}) => Promise<{
|
|
308
|
+
data: Float32Array;
|
|
309
|
+
}>;
|
|
310
|
+
/**
|
|
311
|
+
* Transformers.js module interface for dependency injection.
|
|
312
|
+
*/
|
|
313
|
+
interface TransformersModule {
|
|
314
|
+
pipeline: (task: 'feature-extraction', model: string, options?: {
|
|
315
|
+
quantized?: boolean;
|
|
316
|
+
}) => Promise<FeatureExtractionPipeline>;
|
|
317
|
+
env?: {
|
|
318
|
+
cacheDir?: string;
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Create a local Transformers.js embedding provider.
|
|
323
|
+
* Uses dependency injection - user provides the imported transformers module.
|
|
324
|
+
*
|
|
325
|
+
* @example
|
|
326
|
+
* ```typescript
|
|
327
|
+
* import * as transformers from '@huggingface/transformers';
|
|
328
|
+
* import { createTransformersEmbedding } from 'scrapex/embeddings';
|
|
329
|
+
*
|
|
330
|
+
* const provider = createTransformersEmbedding(transformers, {
|
|
331
|
+
* model: 'Xenova/all-MiniLM-L6-v2',
|
|
332
|
+
* });
|
|
333
|
+
* ```
|
|
334
|
+
*
|
|
335
|
+
* Required Node.js dependencies:
|
|
336
|
+
* ```
|
|
337
|
+
* npm install @huggingface/transformers onnxruntime-node
|
|
338
|
+
* ```
|
|
339
|
+
*/
|
|
340
|
+
declare function createTransformersEmbedding(transformers: TransformersModule, options?: {
|
|
341
|
+
model?: string;
|
|
342
|
+
quantized?: boolean;
|
|
343
|
+
pooling?: 'mean' | 'cls' | 'max';
|
|
344
|
+
normalize?: boolean;
|
|
345
|
+
cacheDir?: string;
|
|
346
|
+
}): EmbeddingProvider;
|
|
347
|
+
/** Recommended models for Transformers.js */
|
|
348
|
+
declare const TRANSFORMERS_MODELS: {
|
|
349
|
+
/** Default - Fast, general purpose (384 dimensions, ~23MB) */
|
|
350
|
+
readonly DEFAULT: "Xenova/all-MiniLM-L6-v2";
|
|
351
|
+
/** Higher quality, more resources (768 dimensions, ~110MB) */
|
|
352
|
+
readonly QUALITY: "Xenova/all-mpnet-base-v2";
|
|
353
|
+
/** Optimized for retrieval (384 dimensions, ~33MB) */
|
|
354
|
+
readonly RETRIEVAL: "Xenova/bge-small-en-v1.5";
|
|
355
|
+
/** Multi-language support (384 dimensions, ~118MB) */
|
|
356
|
+
readonly MULTILINGUAL: "Xenova/multilingual-e5-small";
|
|
357
|
+
};
|
|
358
|
+
//#endregion
|
|
359
|
+
//#region src/embeddings/providers/index.d.ts
|
|
360
|
+
/**
|
|
361
|
+
* Create an embedding provider from configuration.
|
|
362
|
+
* This is the main factory function for creating providers.
|
|
363
|
+
*/
|
|
364
|
+
declare function createEmbeddingProvider(config: EmbeddingProviderConfig): EmbeddingProvider;
|
|
365
|
+
/**
|
|
366
|
+
* Type guard to check if a value is an EmbeddingProvider.
|
|
367
|
+
*/
|
|
368
|
+
declare function isEmbeddingProvider(value: unknown): value is EmbeddingProvider;
|
|
369
|
+
//#endregion
|
|
370
|
+
//#region src/embeddings/safety.d.ts
|
|
371
|
+
/**
|
|
372
|
+
* Create a redaction function based on configuration.
|
|
373
|
+
* Returns a function that applies all configured PII patterns.
|
|
374
|
+
*/
|
|
375
|
+
declare function createPiiRedactor(config: PiiRedactionConfig): (text: string) => RedactionResult;
|
|
376
|
+
/**
|
|
377
|
+
* Result of PII redaction operation.
|
|
378
|
+
*/
|
|
379
|
+
interface RedactionResult {
|
|
380
|
+
/** Redacted text */
|
|
381
|
+
text: string;
|
|
382
|
+
/** Whether any redactions were made */
|
|
383
|
+
redacted: boolean;
|
|
384
|
+
/** Total number of redactions */
|
|
385
|
+
redactionCount: number;
|
|
386
|
+
/** Count by redaction type */
|
|
387
|
+
redactionsByType: Record<string, number>;
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Simple redaction that applies all default patterns.
|
|
391
|
+
* Use createPiiRedactor() for fine-grained control.
|
|
392
|
+
*/
|
|
393
|
+
declare function redactPii(text: string): RedactionResult;
|
|
394
|
+
/**
|
|
395
|
+
* Check if text contains any PII.
|
|
396
|
+
* Useful for validation before sending to external APIs.
|
|
397
|
+
*/
|
|
398
|
+
declare function containsPii(text: string, config?: Partial<PiiRedactionConfig>): boolean;
|
|
399
|
+
//#endregion
|
|
400
|
+
export { createNoOpCache as A, euclideanDistance as B, createTokenizer as C, needsChunking as D, heuristicTokenCount as E, validateCachedResult as F, normalizeVector as H, AggregationResult as I, aggregateVectors as L, generateChecksum as M, getDefaultCache as N, CacheStats as O, resetDefaultCache as P, cosineSimilarity as R, chunkText as S, getChunkingStats as T, getDimensions as V, generateEmbeddings as _, createEmbeddingProvider as a, selectInput as b, createAzureEmbedding as c, createOllamaEmbedding as d, createOpenAIEmbedding as f, embedScrapedData as g, embed as h, redactPii as i, generateCacheKey as j, InMemoryEmbeddingCache as k, createCohereEmbedding as l, getDefaultModel as m, containsPii as n, isEmbeddingProvider as o, createTransformersEmbedding as p, createPiiRedactor as r, TRANSFORMERS_MODELS as s, RedactionResult as t, createHuggingFaceEmbedding as u, InputValidation as v, estimateTokens as w, validateInput as x, previewInput as y, dotProduct as z };
|
|
401
|
+
//# sourceMappingURL=index-D6qfjmZQ.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-D6qfjmZQ.d.mts","names":[],"sources":["../src/embeddings/aggregation.ts","../src/embeddings/cache.ts","../src/embeddings/chunking.ts","../src/embeddings/input.ts","../src/embeddings/pipeline.ts","../src/embeddings/providers/base.ts","../src/embeddings/providers/presets.ts","../src/embeddings/providers/index.ts","../src/embeddings/safety.ts"],"sourcesContent":[],"mappings":";;;;;;AASA;AAgEA;AAkEA;AAcA;AA6BA;AAmBgB,iBAhMA,gBAAA,CAgMU,OAAA,EAAA,MAAA,EAAA,EAAA,EAAA,QAAA,CAAA,EA9Ld,oBA8Lc,CAAA,EA7LvB,iBA6LuB;AAkB1B;;;KAlJY,iBAAA;ECpDK,IAAA,EAAA,QAAA;EAKD,MAAA,EAAA,MAAA,EAAA;EACN,UAAA,EAAA,MAAA;CACG,GAAA;EACF,IAAA,EAAA,UAAA;EAAY,OAAA,EAAA,MAAA,EAAA,EAAA;EAWP,UAAA,EAAA,MAAA;AAwBhB,CAAA;AAyIA;;;AAgCgC,iBD9FhB,eAAA,CC8FgB,MAAA,EAAA,MAAA,EAAA,CAAA,EAAA,MAAA,EAAA;;;;;AAhCe,iBDhD/B,gBAAA,CCgD+B,CAAA,EAAA,MAAA,EAAA,EAAA,CAAA,EAAA,MAAA,EAAA,CAAA,EAAA,MAAA;;AAsH/C;AAcA;AA2BgB,iBDlLA,iBAAA,CCkLmB,CAAA,EAAA,MAAc,EAAA,EAAA,CAAA,EAAA,MAAA,EAAA,CAAA,EAAA,MAAA;AA0BjD;AAUA;;iBDnMgB,UAAA;;AEpLhB;AAcA;AAgEgB,iBFwHA,aAAA,CExHiC,OAAiB,EAAA,MAAA,EAAS,GAAA,MAAA,EAAA,EAAA,CAAA,EAAA,MAAA;;;UD9E1D,cAAA;;EDZD;EAgEJ,KAAA,CAAA,EAAA,MAAA;EAkEI,UAAA,CAAA,EAAA,MAAe;EAcf,WAAA,CAAA,EC/HA,oBD+HgB;EA6BhB,KAAA,CAAA,EC3JN,oBD2JuB;EAmBjB,QAAA,CAAA,EC7KH,cD6Ka;EAkBV,MAAA,CAAA,EC9LL,YD8LkB;;;;ACtM7B;;;;;;AAmBgB,iBAAA,gBAAA,CAAyB,MAAA,EAAA,cAAc,CAAA,EAAA,MAAA;AAwBvD;AAyIA;;AAW0B,iBApJV,gBAAA,CAoJU,OAAA,EAAA,MAAA,CAAA,EAAA,MAAA;;;;;AAiDZ,cA5DD,sBAAA,YAAkC,cA4DjC,CAAA;EA5DiC,QAAA,KAAA;EAAc,iBAAA,UAAA;EAsH5C,iBAAU,YAAA;EAcX,WAAA,CAAA,OA+DM,CA/DN,EAAA;IA2BA,UAAA,CAAA,EAAA,MAAe;IA0Bf,KAAA,CAAA,EAAA,MAAe;EAUT,CAAA;oBAxLI,QAAQ;0BAqBF,wBCpNhB;;EAAA,CAAA,CAAA,EDoNgE,OCpNhE,CAAA,IAAA,CAAA;EAcA,MAAA,CAAA,GAAA,EAAA,MAAe,CAAA,EDuNF,OCvNE,CAAA,OAAU,CAAA;EAgEzB,KAAA,CAAA,CAAA,ED2JC,OC3JQ,CAAA,IAAA,CAAA;EAiFT;AAQhB;AAYA;cD6Dc;;;AE3Pd;EACgB,OAAA,CAAA,CAAA,EAAA,MAAA;EAAR;;;EAwJQ,QAAA,QAAa;AAmC7B;AAQA;;;AAEW,UF+GM,UAAA,CE/GN;EAAoB;;;;EChKT;EACN,OAAA,EAAA,MAAA;EAAR;EACG,WAAA,EAAA,MAAA;;;;AA4PX;AAAmD,iBH+BnC,oBAAA,CG/BmC,MAAA,EHgCzC,eGhCyC,EAAA,kBAAA,CAAA,EAAA,MAAA,CAAA,EAAA,OAAA;;;;AAsBnD;AACQ,iBHmCQ,eAAA,CAAA,CGnCR,EHmC2B,cGnC3B;;;;AAEE,iBH2DM,eAAA,CAAA,CG3DN,EH2DyB,sBG3DzB;;;;ACxMM,iBJ6QM,iBAAA,CAAA,CI7QS,EJ6QY,OI7QZ,CAAA,IAAA,CAAA;;;;;ALtH/B;AAgEA;AAkEgB,iBEtHA,mBAAA,CFsHe,IAAA,EAAA,MAAA,CAAA,EAAA,MAAA;AAc/B;AA6BA;AAmBA;AAkBgB,iBExLA,eAAA,CFwLa,MAAA,CAAA,EExLY,cFwLZ,CAAA,WAAA,CAAA,CAAA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ACtM7B;AAKgB,iBCyEA,SAAA,CDzEA,IAAA,EAAA,MAAA,EAAA,MAAA,CAAA,ECyEiC,cDzEjC,CAAA,ECyEkD,SDzElD,EAAA;;;;AAGO,iBCuJP,cAAA,CDvJO,IAAA,EAAA,MAAA,EAAA,SAAA,CAAA,ECuJkC,cDvJlC,CAAA,WAAA,CAAA,CAAA,EAAA,MAAA;AAWvB;AAwBA;AAyIA;AAWkC,iBCxBlB,aAAA,CDwBkB,IAAA,EAAA,MAAA,EAAA,SAAA,CAAA,EAAA,MAAA,EAAA,SAAA,CAAA,ECrBpB,cDqBoB,CAAA,WAAA,CAAA,CAAA,EAAA,OAAA;;;;AAsCL,iBClDb,gBAAA,CDkDa,IAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EChDlB,cDgDkB,CAAA,EAAA;EAIZ,WAAA,EAAA,MAAA;EAOH,eAAA,EAAA,MAAA;EA5DiC,eAAA,EAAA,MAAA;EAAc,YAAA,EAAA,OAAA;AAsH7D,CAAA;;;;ADtTA;AAgEA;AAkEA;AAcA;AA6BA;AAmBA;AAkBgB,iBGjNA,WAAA,CHiNa,IAAA,EGhNrB,OHgNqB,CGhNb,WHgNa,CAAA,EAAA,MAAA,CAAA,EG/MlB,oBH+MkB,CAAA,EAAA,MAAA,GAAA,SAAA;;;;ACtMZ,iBE8ID,aAAA,CF9Ie,IAAA,EAAA,MAAA,GAAA,SAAA,EAAA,SAAA,CAAA,EAAA,MAAA,CAAA,EE8I0C,eF9I1C;;;;AAQpB,KEyKC,eAAA,GFzKD;EAAY,KAAA,EAAA,KAAA;EAWP,MAAA,EAAA,MAAA;AAwBhB,CAAA,GAAgB;EAyIH,KAAA,EAAA,IAAA;EAWqB,IAAA,EAAA,MAAA;EAAR,SAAA,EAAA,MAAA;EAqBM,SAAA,EAAA,MAAA;CAAgD;;;;;AAhCnB,iBEK7C,YAAA,CFL6C,IAAA,EEMrD,OFNqD,CEM7C,WFN6C,CAAA,EAAA,MAAA,CAAA,EEOlD,oBFPkD,EAAA,SAAA,CAAA,EAAA,MAAA,CAAA,EAAA,MAAA;;;;ADhM7D;AAgEA;AAkEA;AAcgB,iBIzGM,kBAAA,CJyGU,IAAA,EIxGxB,OJwGwB,CIxGhB,WJwGgB,CAAA,EAAA,OAAA,EIvGrB,gBJuGqB,CAAA,EItG7B,OJsG6B,CItGrB,eJsGqB,CAAA;AA6BhC;AAmBA;AAkBA;;iBImFsB,KAAA,wBAA6B,mBAAmB,QAAQ;;AHzR9E;;;AAOa,iBGwSS,gBAAA,CHxST,IAAA,EGySL,WHzSK,EAAA,OAAA,EG0SF,gBH1SE,CAAA,EG2SV,OH3SU,CG2SF,eH3SE,CAAA;;;AAPb;;;AAOa,iBImGG,eAAA,CJnGH,YAAA,EAAA,MAAA,CAAA,EAAA,MAAA;;;AD+Gb;AAcA;AA6BA;AAmBA;AAkBA;;;;ACtMA;AAKgB,iBKMA,qBAAA,CLNA,OAchB,CAdgB,EAAA;EACN,MAAA,CAAA,EAAA,MAAA;EACG,KAAA,CAAA,EAAA,MAAA;EACF,OAAA,CAAA,EAAA,MAAA;EAAY,YAAA,CAAA,EAAA,MAAA;AAWvB,CAAA,CAAA,EKHI,iBLGY;AAwBhB;AAyIA;;;;;;;;;;AAsHA;AAcgB,iBKlQA,oBAAA,CLmQN,OAAA,EAAA;EA0BM,QAAA,EAAA,MAAA;EA0BA,cAAA,EAAA,MAAe;EAUT,UAAA,EAAA,MAAA;;IK5TlB;;AJ3DJ;AAcA;AAgEA;AAiFA;AAQA;AAYA;;;;AC9LA;;;;;AAyJgB,iBGtCA,qBAAA,CHsCyD,OA4CzD,CA5CwE,EAAA;EAmC5E,OAAA,CAAA,EAAA,MAAA;EAQI,KAAA,CAAA,EAAA,MAAA;CACA,CAAA,EG/EZ,iBH+EY;;;;;;;AC/JhB;;;;AAGW,iBEsGK,0BAAA,CFtGL,OAAA,EAAA;EAAR,KAAA,EAAA,MAAA;EAAO,MAAA,CAAA,EAAA,MAAA;AA2PV,CAAA,CAAA,EElJI,iBFkJuB;;;;;AAsB3B;;;;AAGG,iBE5Ha,qBAAA,CF4Hb,OCxMH,CDwMG,EAAA;EAAO,MAAA,CAAA,EAAA,MAAA;;;;ACxMV,CAAA,CAAA,ECiFI,iBDjFY;;;;AC/FhB,KA0MK,yBAAA,GA1MgC,CAAA,IAAA,EAAA,MAKjC,EAAA,OAoHJ,CApHI,EAAA;EAsCY,OAAA,CAAA,EAAA,MAAA,GAAA,KAAoB,GAAA,KAAA;EAkDpB,SAAA,CAAA,EAAA,OAAA;AA4BhB,CAAA,EAAA,GAoFK,OApFW,CAAA;EAkDA,IAAA,EAkCK,YAlCL;AAsBf,CAAA,CAAA;AAYW;AAiCZ;AAkEA;UA9FU,kBAAA;+DCtNM;;EAAA,CAAA,EAAA,GD2NT,OC3NS,CD2ND,yBC3NiC,CAAA;EAyBhC,GAAA,CAAA,EAAA;;;;ACpBhB;AAkEA;AAeA;AAeA;;;;;;;;;;;;;;;;iBF6IgB,2BAAA,eACA;;;;;;IAQb;;cAyDU;;;;;;;;;;;;ANrMb;AAcA;AA6BA;AAmBA;AAkBgB,iBO/LA,uBAAA,CP+La,MAAA,EO/LmB,uBP+LnB,CAAA,EO/L6C,iBP+L7C;;;;ACtMZ,iBMgCD,mBAAA,CNhCe,KAAA,EAAA,OAAA,CAAA,EAAA,KAAA,IMgC+B,iBNhC/B;;;;;ADZ/B;AAgEA;AAkEgB,iBQ1GA,iBAAA,CR0Ge,MAAA,EQ1GW,kBR0GX,CAAA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GQ1GkD,eR0GlD;AAc/B;AA6BA;AAmBA;AAkBgB,UQxHC,eAAA,CRwHY;;;;ECtMZ,QAAA,EAAA,OAAc;EAKf;EACN,cAAA,EAAA,MAAA;EACG;EACF,gBAAA,EO8ES,MP9ET,CAAA,MAAA,EAAA,MAAA,CAAA;;AAWX;AAwBA;AAyIA;;AAW0B,iBOlGV,SAAA,CPkGU,IAAA,EAAA,MAAA,CAAA,EOlGe,ePkGf;;;;;AAiDZ,iBOpIE,WAAA,CPoIF,IAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EOpIqC,OPoIrC,COpI6C,kBPoI7C,CAAA,CAAA,EAAA,OAAA"}
|