scrapex 1.0.0-beta.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +164 -3
  2. package/dist/embeddings/index.cjs +2 -2
  3. package/dist/embeddings/index.d.cts +2 -2
  4. package/dist/embeddings/index.d.mts +2 -2
  5. package/dist/embeddings/index.mjs +2 -2
  6. package/dist/{embeddings-DHVu52VF.cjs → embeddings-BzRSRX9t.cjs} +3 -3
  7. package/dist/{embeddings-DHVu52VF.cjs.map → embeddings-BzRSRX9t.cjs.map} +1 -1
  8. package/dist/{embeddings-Dama_IQl.mjs → embeddings-CukTWZVJ.mjs} +2 -2
  9. package/dist/{embeddings-Dama_IQl.mjs.map → embeddings-CukTWZVJ.mjs.map} +1 -1
  10. package/dist/{enhancer-Cs_WyWtJ.cjs → enhancer-18LeSP31.cjs} +2 -2
  11. package/dist/{enhancer-Cs_WyWtJ.cjs.map → enhancer-18LeSP31.cjs.map} +1 -1
  12. package/dist/{enhancer-INx5NlgO.mjs → enhancer-CIFsqRhc.mjs} +1 -1
  13. package/dist/{enhancer-INx5NlgO.mjs.map → enhancer-CIFsqRhc.mjs.map} +1 -1
  14. package/dist/{http-base-CHLf-Tco.cjs → http-base-B5EXbNwR.cjs} +2 -2
  15. package/dist/{http-base-CHLf-Tco.cjs.map → http-base-B5EXbNwR.cjs.map} +1 -1
  16. package/dist/{http-base-DM7YNo6X.mjs → http-base-B8qhcRit.mjs} +1 -1
  17. package/dist/{http-base-DM7YNo6X.mjs.map → http-base-B8qhcRit.mjs.map} +1 -1
  18. package/dist/{index-Bvseqli-.d.cts → index-B87QDy6g.d.mts} +39 -2
  19. package/dist/index-B87QDy6g.d.mts.map +1 -0
  20. package/dist/{index-CliKPY5N.d.cts → index-BKGgfF5C.d.cts} +2 -2
  21. package/dist/{index-CliKPY5N.d.cts.map → index-BKGgfF5C.d.cts.map} +1 -1
  22. package/dist/{index-REk4CMxV.d.mts → index-DoA7xuDF.d.mts} +2 -2
  23. package/dist/{index-REk4CMxV.d.mts.map → index-DoA7xuDF.d.mts.map} +1 -1
  24. package/dist/{index-CIFjNySr.d.mts → index-qCDikize.d.cts} +39 -2
  25. package/dist/index-qCDikize.d.cts.map +1 -0
  26. package/dist/index.cjs +10 -331
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +66 -9
  29. package/dist/index.d.cts.map +1 -1
  30. package/dist/index.d.mts +66 -9
  31. package/dist/index.d.mts.map +1 -1
  32. package/dist/index.mjs +5 -327
  33. package/dist/index.mjs.map +1 -1
  34. package/dist/llm/index.cjs +3 -3
  35. package/dist/llm/index.d.cts +1 -1
  36. package/dist/llm/index.d.mts +1 -1
  37. package/dist/llm/index.mjs +2 -2
  38. package/dist/parsers/index.cjs +2 -1
  39. package/dist/parsers/index.d.cts +2 -2
  40. package/dist/parsers/index.d.mts +2 -2
  41. package/dist/parsers/index.mjs +2 -2
  42. package/dist/parsers-C-AnBv0B.mjs +946 -0
  43. package/dist/parsers-C-AnBv0B.mjs.map +1 -0
  44. package/dist/{parsers-Bneuws8x.cjs → parsers-DsqevSf6.cjs} +496 -2
  45. package/dist/parsers-DsqevSf6.cjs.map +1 -0
  46. package/dist/{types-BdlvuOtp.d.mts → types--8EgNAtb.d.cts} +4 -134
  47. package/dist/types--8EgNAtb.d.cts.map +1 -0
  48. package/dist/types-BeKod8eQ.d.cts +134 -0
  49. package/dist/types-BeKod8eQ.d.cts.map +1 -0
  50. package/dist/{types-BYsD9u71.d.cts → types-dyXBh3Xy.d.mts} +4 -134
  51. package/dist/types-dyXBh3Xy.d.mts.map +1 -0
  52. package/dist/types-tpb9d2vq.d.mts +134 -0
  53. package/dist/types-tpb9d2vq.d.mts.map +1 -0
  54. package/package.json +1 -1
  55. package/dist/index-Bvseqli-.d.cts.map +0 -1
  56. package/dist/index-CIFjNySr.d.mts.map +0 -1
  57. package/dist/parsers-Bneuws8x.cjs.map +0 -1
  58. package/dist/parsers-DsawHeo0.mjs +0 -482
  59. package/dist/parsers-DsawHeo0.mjs.map +0 -1
  60. package/dist/types-BYsD9u71.d.cts.map +0 -1
  61. package/dist/types-BdlvuOtp.d.mts.map +0 -1
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Modern web scraper with LLM-enhanced extraction, extensible pipeline, and pluggable parsers.
4
4
 
5
- > **Beta Release**: v1.0.0 is currently in beta. The API is stable but minor changes may occur before the stable release.
5
+ > **Release**: v1.0.0 (API stable; breaking changes documented below).
6
6
 
7
7
  ## Features
8
8
 
@@ -13,13 +13,14 @@ Modern web scraper with LLM-enhanced extraction, extensible pipeline, and plugga
13
13
  - **Smart Extraction** - Uses Mozilla Readability for content, Cheerio for metadata
14
14
  - **Markdown Parsing** - Parse markdown content, awesome lists, and GitHub repos
15
15
  - **RSS/Atom Feeds** - Parse RSS 2.0, RSS 1.0 (RDF), and Atom feeds with pagination support
16
+ - **Content Normalization** - Clean, embedding-ready text with boilerplate removal
16
17
  - **TypeScript First** - Full type safety with comprehensive type exports
17
18
  - **Dual Format** - ESM and CommonJS builds
18
19
 
19
20
  ## Installation
20
21
 
21
22
  ```bash
22
- npm install scrapex@beta
23
+ npm install scrapex
23
24
  ```
24
25
 
25
26
  ### Optional Peer Dependencies
@@ -29,6 +30,9 @@ npm install scrapex@beta
29
30
  npm install openai # OpenAI/Ollama/LM Studio
30
31
  npm install @anthropic-ai/sdk # Anthropic Claude
31
32
 
33
+ # For local embeddings (zero API cost)
34
+ npm install @huggingface/transformers onnxruntime-node
35
+
32
36
  # For JavaScript-rendered pages
33
37
  npm install puppeteer
34
38
  ```
@@ -60,6 +64,10 @@ const result = await scrape('https://example.com', {
60
64
  userAgent: 'MyBot/1.0',
61
65
  extractContent: true,
62
66
  maxContentLength: 50000,
67
+ normalize: {
68
+ mode: 'full',
69
+ removeBoilerplate: true,
70
+ },
63
71
  respectRobots: false,
64
72
  });
65
73
  ```
@@ -117,6 +125,17 @@ interface ScrapedData {
117
125
  entities?: ExtractedEntities;
118
126
  extracted?: Record<string, unknown>;
119
127
 
128
+ // Custom extractor results
129
+ custom?: Record<string, unknown>;
130
+
131
+ // Embeddings (when enabled)
132
+ embeddings?: EmbeddingResult;
133
+
134
+ // Normalized text (when enabled)
135
+ normalizedText?: string;
136
+ normalizationMeta?: NormalizationMeta;
137
+ normalizedBlocks?: ContentBlock[];
138
+
120
139
  // Meta
121
140
  scrapedAt: string;
122
141
  scrapeTimeMs: number;
@@ -173,7 +192,84 @@ Features include:
173
192
 
174
193
  See the [Embeddings Guide](https://scrapex.dev/guides/embeddings) for full documentation.
175
194
 
176
- ## Breaking Changes (Beta)
195
+ ## Content Normalization
196
+
197
+ Clean, embedding-ready text with boilerplate removal and block classification:
198
+
199
+ ```typescript
200
+ const result = await scrape(url, {
201
+ normalize: {
202
+ mode: 'full', // or 'summary' for score-ranked blocks
203
+ removeBoilerplate: true, // filter nav, footer, promos
204
+ maxChars: 5000, // truncate at sentence boundary
205
+ },
206
+ });
207
+
208
+ console.log(result.normalizedText); // Clean text ready for embedding
209
+ console.log(result.normalizationMeta); // { charCount, tokenEstimate, hash, ... }
210
+ ```
211
+
212
+ ### Standalone Normalization
213
+
214
+ Use normalization without scraping:
215
+
216
+ ```typescript
217
+ import { parseBlocks, normalizeText } from 'scrapex';
218
+ import { load } from 'cheerio';
219
+
220
+ const $ = load(html);
221
+ const blocks = parseBlocks($);
222
+ const result = await normalizeText(blocks, { mode: 'full' });
223
+
224
+ console.log(result.text);
225
+ console.log(result.meta);
226
+ ```
227
+
228
+ ### Custom Classifiers
229
+
230
+ Filter blocks with custom logic:
231
+
232
+ ```typescript
233
+ import { combineClassifiers, defaultBlockClassifier } from 'scrapex';
234
+
235
+ const myClassifier = combineClassifiers(
236
+ defaultBlockClassifier,
237
+ (block) => {
238
+ if (block.text.includes('Advertisement')) {
239
+ return { accept: false, label: 'ad' };
240
+ }
241
+ return { accept: true };
242
+ }
243
+ );
244
+
245
+ const result = await scrape(url, {
246
+ normalize: { blockClassifier: myClassifier },
247
+ });
248
+ ```
249
+
250
+ ### Normalization Options
251
+
252
+ ```typescript
253
+ interface NormalizeOptions {
254
+ mode?: 'summary' | 'full'; // Output mode (default: 'full')
255
+ maxChars?: number; // Max characters in output
256
+ minChars?: number; // Minimum characters required
257
+ maxBlocks?: number; // Max blocks to process (default: 2000)
258
+ truncate?: 'sentence' | 'word' | 'char'; // Truncation strategy
259
+ dropSelectors?: string[]; // Selectors to drop before parsing
260
+ removeBoilerplate?: boolean; // Filter nav/footer/promos (default: true)
261
+ decodeEntities?: boolean; // Decode HTML entities (default: true)
262
+ normalizeUnicode?: boolean; // Normalize Unicode to NFC (default: true)
263
+ preserveLineBreaks?: boolean; // Preserve paragraph breaks (default: true)
264
+ stripLinks?: boolean; // Strip Markdown links (default: true)
265
+ includeHtml?: boolean; // Include raw HTML in blocks (default: false)
266
+ languageHint?: string; // Language hint for metadata
267
+ blockClassifier?: ContentBlockClassifier; // Custom classifier
268
+ debug?: boolean; // Include blocks in output
269
+ }
270
+ ```
271
+
272
+ ## Breaking Changes
177
273
 
178
274
  - LLM provider classes (e.g., `AnthropicProvider`) were removed. Use preset factories like
179
275
  `createOpenAI`, `createAnthropic`, `createOllama`, and `createLMStudio` instead.
@@ -409,6 +505,27 @@ const markdown = feedToMarkdown(result.data, { maxItems: 10 });
409
505
  const text = feedToText(result.data);
410
506
  ```
411
507
 
508
+ ### Normalizing Feed Items
509
+
510
+ Convert feed item content to clean, embedding-ready text:
511
+
512
+ ```typescript
513
+ import { RSSParser, normalizeFeedItem } from 'scrapex';
514
+
515
+ const parser = new RSSParser();
516
+ const result = parser.parse(feedXml);
517
+
518
+ for (const item of result.data.items) {
519
+ const normalized = await normalizeFeedItem(item, {
520
+ mode: 'full',
521
+ removeBoilerplate: true,
522
+ });
523
+
524
+ console.log(normalized.text); // Clean text from content/description
525
+ console.log(normalized.meta); // { charCount, tokenEstimate, hash, ... }
526
+ }
527
+ ```
528
+
412
529
  ### Custom Fields (Podcast/Media)
413
530
 
414
531
  Extract custom namespace fields like iTunes podcast tags:
@@ -429,6 +546,25 @@ console.log(item.customFields?.duration); // '10:00'
429
546
  console.log(item.customFields?.explicit); // 'no'
430
547
  ```
431
548
 
549
+ #### Attribute Extraction
550
+
551
+ Use `selector@attr` syntax to extract XML attribute values:
552
+
553
+ ```typescript
554
+ const parser = new RSSParser({
555
+ customFields: {
556
+ // Extract url attribute from media:thumbnail element
557
+ thumbnail: 'media\\:thumbnail@url',
558
+ // Extract url attribute from media:content element
559
+ mediaUrl: 'media\\:content@url',
560
+ },
561
+ });
562
+
563
+ const result = parser.parse(mediaRssFeed);
564
+ console.log(result.data.items[0]?.customFields?.thumbnail);
565
+ // => "https://example.com/images/thumbnail.jpg"
566
+ ```
567
+
432
568
  ### Security
433
569
 
434
570
  The RSS parser enforces strict URL security:
@@ -536,6 +672,8 @@ interface ScrapeOptions {
536
672
  llm?: LLMProvider; // LLM provider
537
673
  enhance?: EnhancementType[]; // LLM enhancements
538
674
  extract?: ExtractionSchema; // Structured extraction
675
+ embeddings?: EmbeddingOptions; // Vector embeddings
676
+ normalize?: NormalizeOptions; // Content normalization
539
677
  }
540
678
  ```
541
679
 
@@ -554,6 +692,29 @@ type EnhancementType =
554
692
  - Node.js 20+
555
693
  - TypeScript 5.0+ (for type imports)
556
694
 
695
+ ## Get Help
696
+
697
+ - [Documentation](https://scrapex.dev) - Guides and API reference
698
+ - [GitHub Issues](https://github.com/developer-rakeshpaul/scrapex/issues) - Bug reports and feature requests
699
+ - [GitHub Discussions](https://github.com/developer-rakeshpaul/scrapex/discussions) - Questions and ideas
700
+ - [Stack Overflow](https://stackoverflow.com/questions/tagged/scrapex) - Community Q&A
701
+
702
+ ## Contributing
703
+
704
+ Contributions are welcome! Whether it's bug reports, feature requests, or pull requests.
705
+
706
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on:
707
+ - Reporting bugs
708
+ - Suggesting features
709
+ - Submitting pull requests
710
+ - Development setup
711
+
712
+ ## Support
713
+
714
+ If you find scrapex useful, consider supporting its development:
715
+
716
+ [![Buy Me A Coffee](https://img.shields.io/badge/Buy%20Me%20A%20Coffee-support-yellow?style=flat&logo=buy-me-a-coffee)](https://buymeacoffee.com/binaryroute)
717
+
557
718
  ## License
558
719
 
559
720
  MIT
@@ -1,5 +1,5 @@
1
- const require_http_base = require('../http-base-CHLf-Tco.cjs');
2
- const require_embeddings = require('../embeddings-DHVu52VF.cjs');
1
+ const require_http_base = require('../http-base-B5EXbNwR.cjs');
2
+ const require_embeddings = require('../embeddings-BzRSRX9t.cjs');
3
3
 
4
4
  exports.CircuitBreaker = require_http_base.CircuitBreaker;
5
5
  exports.CircuitOpenError = require_http_base.CircuitOpenError;
@@ -1,3 +1,3 @@
1
- import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-BYsD9u71.cjs";
2
- import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-CliKPY5N.cjs";
1
+ import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types--8EgNAtb.cjs";
2
+ import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-BKGgfF5C.cjs";
3
3
  export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
@@ -1,3 +1,3 @@
1
- import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-BdlvuOtp.mjs";
2
- import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-REk4CMxV.mjs";
1
+ import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-dyXBh3Xy.mjs";
2
+ import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-DoA7xuDF.mjs";
3
3
  export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
@@ -1,4 +1,4 @@
1
- import { a as Semaphore, c as withResilience, i as RateLimiter, l as withRetry, n as CircuitBreaker, o as createTimeoutSignal, r as CircuitOpenError, s as isRetryableError, u as withTimeout } from "../http-base-DM7YNo6X.mjs";
2
- import { A as generateCacheKey, B as normalizeVector, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as aggregateVectors, I as cosineSimilarity, L as dotProduct, M as getDefaultCache, N as resetDefaultCache, O as InMemoryEmbeddingCache, P as validateCachedResult, R as euclideanDistance, S as chunkText, T as getChunkingStats, _ as createHttpEmbedding, a as createPiiRedactor, b as selectInput, c as isEmbeddingProvider, d as createCohereEmbedding, f as createHuggingFaceEmbedding, g as HttpEmbeddingProvider, h as createTransformersEmbedding, i as containsPii, j as generateChecksum, k as createNoOpCache, l as TRANSFORMERS_MODELS, m as createOpenAIEmbedding, n as embedScrapedData, o as redactPii, p as createOllamaEmbedding, r as generateEmbeddings, s as createEmbeddingProvider, t as embed, u as createAzureEmbedding, v as getDefaultModel, w as estimateTokens, x as validateInput, y as previewInput, z as getDimensions } from "../embeddings-Dama_IQl.mjs";
1
+ import { a as Semaphore, c as withResilience, i as RateLimiter, l as withRetry, n as CircuitBreaker, o as createTimeoutSignal, r as CircuitOpenError, s as isRetryableError, u as withTimeout } from "../http-base-B8qhcRit.mjs";
2
+ import { A as generateCacheKey, B as normalizeVector, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as aggregateVectors, I as cosineSimilarity, L as dotProduct, M as getDefaultCache, N as resetDefaultCache, O as InMemoryEmbeddingCache, P as validateCachedResult, R as euclideanDistance, S as chunkText, T as getChunkingStats, _ as createHttpEmbedding, a as createPiiRedactor, b as selectInput, c as isEmbeddingProvider, d as createCohereEmbedding, f as createHuggingFaceEmbedding, g as HttpEmbeddingProvider, h as createTransformersEmbedding, i as containsPii, j as generateChecksum, k as createNoOpCache, l as TRANSFORMERS_MODELS, m as createOpenAIEmbedding, n as embedScrapedData, o as redactPii, p as createOllamaEmbedding, r as generateEmbeddings, s as createEmbeddingProvider, t as embed, u as createAzureEmbedding, v as getDefaultModel, w as estimateTokens, x as validateInput, y as previewInput, z as getDimensions } from "../embeddings-CukTWZVJ.mjs";
3
3
 
4
4
  export { CircuitBreaker, CircuitOpenError, HttpEmbeddingProvider, InMemoryEmbeddingCache, RateLimiter, Semaphore, TRANSFORMERS_MODELS, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
@@ -1,5 +1,5 @@
1
- const require_parsers = require('./parsers-Bneuws8x.cjs');
2
- const require_http_base = require('./http-base-CHLf-Tco.cjs');
1
+ const require_parsers = require('./parsers-DsqevSf6.cjs');
2
+ const require_http_base = require('./http-base-B5EXbNwR.cjs');
3
3
  let node_crypto = require("node:crypto");
4
4
 
5
5
  //#region src/embeddings/aggregation.ts
@@ -1453,4 +1453,4 @@ Object.defineProperty(exports, 'validateInput', {
1453
1453
  return validateInput;
1454
1454
  }
1455
1455
  });
1456
- //# sourceMappingURL=embeddings-DHVu52VF.cjs.map
1456
+ //# sourceMappingURL=embeddings-BzRSRX9t.cjs.map