scrapex 1.0.0-alpha.1 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +164 -5
  2. package/dist/embeddings/index.cjs +52 -0
  3. package/dist/embeddings/index.d.cts +3 -0
  4. package/dist/embeddings/index.d.mts +3 -0
  5. package/dist/embeddings/index.mjs +4 -0
  6. package/dist/embeddings-BjNTQSG9.cjs +1455 -0
  7. package/dist/embeddings-BjNTQSG9.cjs.map +1 -0
  8. package/dist/embeddings-Bsymy_jA.mjs +1215 -0
  9. package/dist/embeddings-Bsymy_jA.mjs.map +1 -0
  10. package/dist/{enhancer-oM4BhYYS.cjs → enhancer-Cs_WyWtJ.cjs} +2 -51
  11. package/dist/enhancer-Cs_WyWtJ.cjs.map +1 -0
  12. package/dist/{enhancer-Q6CSc1gA.mjs → enhancer-INx5NlgO.mjs} +2 -45
  13. package/dist/enhancer-INx5NlgO.mjs.map +1 -0
  14. package/dist/http-base-CHLf-Tco.cjs +684 -0
  15. package/dist/http-base-CHLf-Tco.cjs.map +1 -0
  16. package/dist/http-base-DM7YNo6X.mjs +618 -0
  17. package/dist/http-base-DM7YNo6X.mjs.map +1 -0
  18. package/dist/index-Bvseqli-.d.cts +268 -0
  19. package/dist/index-Bvseqli-.d.cts.map +1 -0
  20. package/dist/index-CIFjNySr.d.mts +268 -0
  21. package/dist/index-CIFjNySr.d.mts.map +1 -0
  22. package/dist/index-D6qfjmZQ.d.mts +401 -0
  23. package/dist/index-D6qfjmZQ.d.mts.map +1 -0
  24. package/dist/index-RFSpP5g8.d.cts +401 -0
  25. package/dist/index-RFSpP5g8.d.cts.map +1 -0
  26. package/dist/index.cjs +171 -51
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +61 -2
  29. package/dist/index.d.cts.map +1 -1
  30. package/dist/index.d.mts +61 -2
  31. package/dist/index.d.mts.map +1 -1
  32. package/dist/index.mjs +129 -6
  33. package/dist/index.mjs.map +1 -1
  34. package/dist/llm/index.cjs +252 -233
  35. package/dist/llm/index.cjs.map +1 -1
  36. package/dist/llm/index.d.cts +132 -85
  37. package/dist/llm/index.d.cts.map +1 -1
  38. package/dist/llm/index.d.mts +132 -85
  39. package/dist/llm/index.d.mts.map +1 -1
  40. package/dist/llm/index.mjs +244 -236
  41. package/dist/llm/index.mjs.map +1 -1
  42. package/dist/parsers/index.cjs +10 -199
  43. package/dist/parsers/index.d.cts +2 -133
  44. package/dist/parsers/index.d.mts +2 -133
  45. package/dist/parsers/index.mjs +2 -191
  46. package/dist/parsers-Bneuws8x.cjs +569 -0
  47. package/dist/parsers-Bneuws8x.cjs.map +1 -0
  48. package/dist/parsers-DsawHeo0.mjs +482 -0
  49. package/dist/parsers-DsawHeo0.mjs.map +1 -0
  50. package/dist/types-BOcHQU9s.d.mts +831 -0
  51. package/dist/types-BOcHQU9s.d.mts.map +1 -0
  52. package/dist/types-DutdBpqd.d.cts +831 -0
  53. package/dist/types-DutdBpqd.d.cts.map +1 -0
  54. package/package.json +15 -16
  55. package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
  56. package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
  57. package/dist/parsers/index.cjs.map +0 -1
  58. package/dist/parsers/index.d.cts.map +0 -1
  59. package/dist/parsers/index.d.mts.map +0 -1
  60. package/dist/parsers/index.mjs.map +0 -1
  61. package/dist/types-CNQZVW36.d.mts +0 -150
  62. package/dist/types-CNQZVW36.d.mts.map +0 -1
  63. package/dist/types-D0HYR95H.d.cts +0 -150
  64. package/dist/types-D0HYR95H.d.cts.map +0 -1
package/README.md CHANGED
@@ -2,22 +2,24 @@
2
2
 
3
3
  Modern web scraper with LLM-enhanced extraction, extensible pipeline, and pluggable parsers.
4
4
 
5
- > **Alpha Release**: v1.0.0 is currently in alpha. The API may change before the stable release.
5
+ > **Beta Release**: v1.0.0 is currently in beta. The API is stable but minor changes may occur before the stable release.
6
6
 
7
7
  ## Features
8
8
 
9
9
  - **LLM-Ready Output** - Content extracted as Markdown, optimized for AI/LLM consumption
10
10
  - **Provider-Agnostic LLM** - Works with OpenAI, Anthropic, Ollama, LM Studio, or any OpenAI-compatible API
11
+ - **Vector Embeddings** - Generate embeddings with OpenAI, Azure, Cohere, HuggingFace, Ollama, or local Transformers.js
11
12
  - **Extensible Pipeline** - Pluggable extractors with priority-based execution
12
13
  - **Smart Extraction** - Uses Mozilla Readability for content, Cheerio for metadata
13
14
  - **Markdown Parsing** - Parse markdown content, awesome lists, and GitHub repos
15
+ - **RSS/Atom Feeds** - Parse RSS 2.0, RSS 1.0 (RDF), and Atom feeds with pagination support
14
16
  - **TypeScript First** - Full type safety with comprehensive type exports
15
17
  - **Dual Format** - ESM and CommonJS builds
16
18
 
17
19
  ## Installation
18
20
 
19
21
  ```bash
20
- npm install scrapex@alpha
22
+ npm install scrapex@beta
21
23
  ```
22
24
 
23
25
  ### Optional Peer Dependencies
@@ -142,14 +144,48 @@ console.log(result.suggestedTags); // ['javascript', 'web', ...]
142
144
  console.log(result.entities); // { people: [], organizations: [], ... }
143
145
  ```
144
146
 
147
+ ### Embeddings
148
+
149
+ Generate vector embeddings from scraped content for semantic search, RAG, and similarity matching:
150
+
151
+ ```typescript
152
+ import { scrape } from 'scrapex';
153
+ import { createOpenAIEmbedding } from 'scrapex/embeddings';
154
+
155
+ const result = await scrape('https://example.com/article', {
156
+ embeddings: {
157
+ provider: { type: 'custom', provider: createOpenAIEmbedding() },
158
+ model: 'text-embedding-3-small',
159
+ },
160
+ });
161
+
162
+ if (result.embeddings?.status === 'success') {
163
+ console.log(result.embeddings.vector); // [0.023, -0.041, ...]
164
+ }
165
+ ```
166
+
167
+ Features include:
168
+ - **Multiple providers** - OpenAI, Azure, Cohere, HuggingFace, Ollama, Transformers.js
169
+ - **PII redaction** - Automatically redact emails, phones, SSNs before sending to APIs
170
+ - **Smart chunking** - Split long content with configurable overlap
171
+ - **Caching** - Content-addressable cache to avoid redundant API calls
172
+ - **Resilience** - Retry, circuit breaker, rate limiting
173
+
174
+ See the [Embeddings Guide](https://scrapex.dev/guides/embeddings) for full documentation.
175
+
176
+ ## Breaking Changes (Beta)
177
+
178
+ - LLM provider classes (e.g., `AnthropicProvider`) were removed. Use preset factories like
179
+ `createOpenAI`, `createAnthropic`, `createOllama`, and `createLMStudio` instead.
180
+
145
181
  ### Using Anthropic Claude
146
182
 
147
183
  ```typescript
148
- import { AnthropicProvider } from 'scrapex/llm';
184
+ import { createAnthropic } from 'scrapex/llm';
149
185
 
150
- const llm = new AnthropicProvider({
186
+ const llm = createAnthropic({
151
187
  apiKey: process.env.ANTHROPIC_API_KEY,
152
- model: 'claude-3-5-haiku-20241022', // or 'claude-sonnet-4-20250514'
188
+ model: 'claude-3-5-haiku-20241022', // or 'claude-3-5-sonnet-20241022'
153
189
  });
154
190
 
155
191
  const result = await scrape(url, { llm, enhance: ['summarize'] });
@@ -279,6 +315,129 @@ toRawUrl('https://github.com/owner/repo');
279
315
  // 'https://raw.githubusercontent.com/owner/repo/main/README.md'
280
316
  ```
281
317
 
318
+ ## RSS/Atom Feed Parsing
319
+
320
+ Parse RSS 2.0, RSS 1.0 (RDF), and Atom 1.0 feeds:
321
+
322
+ ```typescript
323
+ import { RSSParser } from 'scrapex';
324
+
325
+ const parser = new RSSParser();
326
+ const result = parser.parse(feedXml, 'https://example.com/feed.xml');
327
+
328
+ console.log(result.data.format); // 'rss2' | 'rss1' | 'atom'
329
+ console.log(result.data.title); // Feed title
330
+ console.log(result.data.items); // Array of feed items
331
+ ```
332
+
333
+ **Supported formats:**
334
+ - `rss2` - RSS 2.0 (most common format)
335
+ - `rss1` - RSS 1.0 (RDF-based, older format)
336
+ - `atom` - Atom 1.0 (modern format with better semantics)
337
+
338
+ ### Feed Item Structure
339
+
340
+ ```typescript
341
+ interface FeedItem {
342
+ id: string;
343
+ title: string;
344
+ link: string;
345
+ description?: string;
346
+ content?: string;
347
+ author?: string;
348
+ publishedAt?: string; // ISO 8601
349
+ rawPublishedAt?: string; // Original date string
350
+ updatedAt?: string; // Atom only
351
+ categories: string[];
352
+ enclosure?: FeedEnclosure; // Podcast/media attachments
353
+ customFields?: Record<string, string>;
354
+ }
355
+ ```
356
+
357
+ ### Fetching and Parsing Feeds
358
+
359
+ ```typescript
360
+ import { fetchFeed, paginateFeed } from 'scrapex';
361
+
362
+ // Fetch and parse in one call
363
+ const result = await fetchFeed('https://example.com/feed.xml');
364
+ console.log(result.data.items);
365
+
366
+ // Paginate through feeds with rel="next" links (Atom)
367
+ for await (const page of paginateFeed('https://example.com/atom')) {
368
+ console.log(`Page with ${page.data.items.length} items`);
369
+ }
370
+ ```
371
+
372
+ ### Discovering Feeds in HTML
373
+
374
+ ```typescript
375
+ import { discoverFeeds } from 'scrapex';
376
+
377
+ const html = await fetch('https://example.com').then(r => r.text());
378
+ const feedUrls = discoverFeeds(html, 'https://example.com');
379
+ // ['https://example.com/feed.xml', 'https://example.com/atom.xml']
380
+ ```
381
+
382
+ ### Filtering by Date
383
+
384
+ ```typescript
385
+ import { RSSParser, filterByDate } from 'scrapex';
386
+
387
+ const parser = new RSSParser();
388
+ const result = parser.parse(feedXml);
389
+
390
+ const recentItems = filterByDate(result.data.items, {
391
+ after: new Date('2024-01-01'),
392
+ before: new Date('2024-12-31'),
393
+ includeUndated: false,
394
+ });
395
+ ```
396
+
397
+ ### Converting to Markdown/Text
398
+
399
+ ```typescript
400
+ import { RSSParser, feedToMarkdown, feedToText } from 'scrapex';
401
+
402
+ const parser = new RSSParser();
403
+ const result = parser.parse(feedXml);
404
+
405
+ // Convert to markdown (great for LLM consumption)
406
+ const markdown = feedToMarkdown(result.data, { maxItems: 10 });
407
+
408
+ // Convert to plain text
409
+ const text = feedToText(result.data);
410
+ ```
411
+
412
+ ### Custom Fields (Podcast/Media)
413
+
414
+ Extract custom namespace fields like iTunes podcast tags:
415
+
416
+ ```typescript
417
+ const parser = new RSSParser({
418
+ customFields: {
419
+ duration: 'itunes\\:duration',
420
+ explicit: 'itunes\\:explicit',
421
+ rating: 'media\\:rating',
422
+ },
423
+ });
424
+
425
+ const result = parser.parse(podcastXml);
426
+ const item = result.data.items[0];
427
+
428
+ console.log(item.customFields?.duration); // '10:00'
429
+ console.log(item.customFields?.explicit); // 'no'
430
+ ```
431
+
432
+ ### Security
433
+
434
+ The RSS parser enforces strict URL security:
435
+
436
+ - **HTTPS-only URLs (RSS parser only)**: The RSS/Atom parser (`RSSParser`) resolves all links to HTTPS only. Non-HTTPS URLs (http, javascript, data, file) are rejected and returned as empty strings. This is specific to feed parsing to prevent malicious links in untrusted feeds.
437
+ - **XML Mode**: Feeds are parsed with Cheerio's `{ xml: true }` mode, which disables HTML entity processing and prevents XSS vectors.
438
+
439
+ > **Note**: The public URL utilities (`resolveUrl`, `isValidUrl`, etc.) accept both `http:` and `https:` URLs. Protocol-relative URLs (e.g., `//example.com/path`) are resolved against the base URL's protocol by the standard `URL` constructor.
440
+
282
441
  ## URL Utilities
283
442
 
284
443
  ```typescript
@@ -0,0 +1,52 @@
1
+ const require_http_base = require('../http-base-CHLf-Tco.cjs');
2
+ const require_embeddings = require('../embeddings-BjNTQSG9.cjs');
3
+
4
+ exports.CircuitBreaker = require_http_base.CircuitBreaker;
5
+ exports.CircuitOpenError = require_http_base.CircuitOpenError;
6
+ exports.HttpEmbeddingProvider = require_embeddings.HttpEmbeddingProvider;
7
+ exports.InMemoryEmbeddingCache = require_embeddings.InMemoryEmbeddingCache;
8
+ exports.RateLimiter = require_http_base.RateLimiter;
9
+ exports.Semaphore = require_http_base.Semaphore;
10
+ exports.TRANSFORMERS_MODELS = require_embeddings.TRANSFORMERS_MODELS;
11
+ exports.aggregateVectors = require_embeddings.aggregateVectors;
12
+ exports.chunkText = require_embeddings.chunkText;
13
+ exports.containsPii = require_embeddings.containsPii;
14
+ exports.cosineSimilarity = require_embeddings.cosineSimilarity;
15
+ exports.createAzureEmbedding = require_embeddings.createAzureEmbedding;
16
+ exports.createCohereEmbedding = require_embeddings.createCohereEmbedding;
17
+ exports.createEmbeddingProvider = require_embeddings.createEmbeddingProvider;
18
+ exports.createHttpEmbedding = require_embeddings.createHttpEmbedding;
19
+ exports.createHuggingFaceEmbedding = require_embeddings.createHuggingFaceEmbedding;
20
+ exports.createNoOpCache = require_embeddings.createNoOpCache;
21
+ exports.createOllamaEmbedding = require_embeddings.createOllamaEmbedding;
22
+ exports.createOpenAIEmbedding = require_embeddings.createOpenAIEmbedding;
23
+ exports.createPiiRedactor = require_embeddings.createPiiRedactor;
24
+ exports.createTimeoutSignal = require_http_base.createTimeoutSignal;
25
+ exports.createTokenizer = require_embeddings.createTokenizer;
26
+ exports.createTransformersEmbedding = require_embeddings.createTransformersEmbedding;
27
+ exports.dotProduct = require_embeddings.dotProduct;
28
+ exports.embed = require_embeddings.embed;
29
+ exports.embedScrapedData = require_embeddings.embedScrapedData;
30
+ exports.estimateTokens = require_embeddings.estimateTokens;
31
+ exports.euclideanDistance = require_embeddings.euclideanDistance;
32
+ exports.generateCacheKey = require_embeddings.generateCacheKey;
33
+ exports.generateChecksum = require_embeddings.generateChecksum;
34
+ exports.generateEmbeddings = require_embeddings.generateEmbeddings;
35
+ exports.getChunkingStats = require_embeddings.getChunkingStats;
36
+ exports.getDefaultCache = require_embeddings.getDefaultCache;
37
+ exports.getDefaultModel = require_embeddings.getDefaultModel;
38
+ exports.getDimensions = require_embeddings.getDimensions;
39
+ exports.heuristicTokenCount = require_embeddings.heuristicTokenCount;
40
+ exports.isEmbeddingProvider = require_embeddings.isEmbeddingProvider;
41
+ exports.isRetryableError = require_http_base.isRetryableError;
42
+ exports.needsChunking = require_embeddings.needsChunking;
43
+ exports.normalizeVector = require_embeddings.normalizeVector;
44
+ exports.previewInput = require_embeddings.previewInput;
45
+ exports.redactPii = require_embeddings.redactPii;
46
+ exports.resetDefaultCache = require_embeddings.resetDefaultCache;
47
+ exports.selectInput = require_embeddings.selectInput;
48
+ exports.validateCachedResult = require_embeddings.validateCachedResult;
49
+ exports.validateInput = require_embeddings.validateInput;
50
+ exports.withResilience = require_http_base.withResilience;
51
+ exports.withRetry = require_http_base.withRetry;
52
+ exports.withTimeout = require_http_base.withTimeout;
@@ -0,0 +1,3 @@
1
+ import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-DutdBpqd.cjs";
2
+ import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-RFSpP5g8.cjs";
3
+ export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
@@ -0,0 +1,3 @@
1
+ import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-BOcHQU9s.mjs";
2
+ import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-D6qfjmZQ.mjs";
3
+ export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
@@ -0,0 +1,4 @@
1
+ import { a as Semaphore, c as withResilience, i as RateLimiter, l as withRetry, n as CircuitBreaker, o as createTimeoutSignal, r as CircuitOpenError, s as isRetryableError, u as withTimeout } from "../http-base-DM7YNo6X.mjs";
2
+ import { A as generateCacheKey, B as normalizeVector, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as aggregateVectors, I as cosineSimilarity, L as dotProduct, M as getDefaultCache, N as resetDefaultCache, O as InMemoryEmbeddingCache, P as validateCachedResult, R as euclideanDistance, S as chunkText, T as getChunkingStats, _ as createHttpEmbedding, a as createPiiRedactor, b as selectInput, c as isEmbeddingProvider, d as createCohereEmbedding, f as createHuggingFaceEmbedding, g as HttpEmbeddingProvider, h as createTransformersEmbedding, i as containsPii, j as generateChecksum, k as createNoOpCache, l as TRANSFORMERS_MODELS, m as createOpenAIEmbedding, n as embedScrapedData, o as redactPii, p as createOllamaEmbedding, r as generateEmbeddings, s as createEmbeddingProvider, t as embed, u as createAzureEmbedding, v as getDefaultModel, w as estimateTokens, x as validateInput, y as previewInput, z as getDimensions } from "../embeddings-Bsymy_jA.mjs";
3
+
4
+ export { CircuitBreaker, CircuitOpenError, HttpEmbeddingProvider, InMemoryEmbeddingCache, RateLimiter, Semaphore, TRANSFORMERS_MODELS, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };