scrapex 1.0.0-alpha.1 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -5
- package/dist/embeddings/index.cjs +52 -0
- package/dist/embeddings/index.d.cts +3 -0
- package/dist/embeddings/index.d.mts +3 -0
- package/dist/embeddings/index.mjs +4 -0
- package/dist/embeddings-BjNTQSG9.cjs +1455 -0
- package/dist/embeddings-BjNTQSG9.cjs.map +1 -0
- package/dist/embeddings-Bsymy_jA.mjs +1215 -0
- package/dist/embeddings-Bsymy_jA.mjs.map +1 -0
- package/dist/{enhancer-oM4BhYYS.cjs → enhancer-Cs_WyWtJ.cjs} +2 -51
- package/dist/enhancer-Cs_WyWtJ.cjs.map +1 -0
- package/dist/{enhancer-Q6CSc1gA.mjs → enhancer-INx5NlgO.mjs} +2 -45
- package/dist/enhancer-INx5NlgO.mjs.map +1 -0
- package/dist/http-base-CHLf-Tco.cjs +684 -0
- package/dist/http-base-CHLf-Tco.cjs.map +1 -0
- package/dist/http-base-DM7YNo6X.mjs +618 -0
- package/dist/http-base-DM7YNo6X.mjs.map +1 -0
- package/dist/index-Bvseqli-.d.cts +268 -0
- package/dist/index-Bvseqli-.d.cts.map +1 -0
- package/dist/index-CIFjNySr.d.mts +268 -0
- package/dist/index-CIFjNySr.d.mts.map +1 -0
- package/dist/index-D6qfjmZQ.d.mts +401 -0
- package/dist/index-D6qfjmZQ.d.mts.map +1 -0
- package/dist/index-RFSpP5g8.d.cts +401 -0
- package/dist/index-RFSpP5g8.d.cts.map +1 -0
- package/dist/index.cjs +171 -51
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +61 -2
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +61 -2
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +129 -6
- package/dist/index.mjs.map +1 -1
- package/dist/llm/index.cjs +252 -233
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +132 -85
- package/dist/llm/index.d.cts.map +1 -1
- package/dist/llm/index.d.mts +132 -85
- package/dist/llm/index.d.mts.map +1 -1
- package/dist/llm/index.mjs +244 -236
- package/dist/llm/index.mjs.map +1 -1
- package/dist/parsers/index.cjs +10 -199
- package/dist/parsers/index.d.cts +2 -133
- package/dist/parsers/index.d.mts +2 -133
- package/dist/parsers/index.mjs +2 -191
- package/dist/parsers-Bneuws8x.cjs +569 -0
- package/dist/parsers-Bneuws8x.cjs.map +1 -0
- package/dist/parsers-DsawHeo0.mjs +482 -0
- package/dist/parsers-DsawHeo0.mjs.map +1 -0
- package/dist/types-BOcHQU9s.d.mts +831 -0
- package/dist/types-BOcHQU9s.d.mts.map +1 -0
- package/dist/types-DutdBpqd.d.cts +831 -0
- package/dist/types-DutdBpqd.d.cts.map +1 -0
- package/package.json +15 -16
- package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
- package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
- package/dist/parsers/index.cjs.map +0 -1
- package/dist/parsers/index.d.cts.map +0 -1
- package/dist/parsers/index.d.mts.map +0 -1
- package/dist/parsers/index.mjs.map +0 -1
- package/dist/types-CNQZVW36.d.mts +0 -150
- package/dist/types-CNQZVW36.d.mts.map +0 -1
- package/dist/types-D0HYR95H.d.cts +0 -150
- package/dist/types-D0HYR95H.d.cts.map +0 -1
package/README.md
CHANGED
|
@@ -2,22 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
Modern web scraper with LLM-enhanced extraction, extensible pipeline, and pluggable parsers.
|
|
4
4
|
|
|
5
|
-
> **
|
|
5
|
+
> **Beta Release**: v1.0.0 is currently in beta. The API is stable but minor changes may occur before the stable release.
|
|
6
6
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
9
|
- **LLM-Ready Output** - Content extracted as Markdown, optimized for AI/LLM consumption
|
|
10
10
|
- **Provider-Agnostic LLM** - Works with OpenAI, Anthropic, Ollama, LM Studio, or any OpenAI-compatible API
|
|
11
|
+
- **Vector Embeddings** - Generate embeddings with OpenAI, Azure, Cohere, HuggingFace, Ollama, or local Transformers.js
|
|
11
12
|
- **Extensible Pipeline** - Pluggable extractors with priority-based execution
|
|
12
13
|
- **Smart Extraction** - Uses Mozilla Readability for content, Cheerio for metadata
|
|
13
14
|
- **Markdown Parsing** - Parse markdown content, awesome lists, and GitHub repos
|
|
15
|
+
- **RSS/Atom Feeds** - Parse RSS 2.0, RSS 1.0 (RDF), and Atom feeds with pagination support
|
|
14
16
|
- **TypeScript First** - Full type safety with comprehensive type exports
|
|
15
17
|
- **Dual Format** - ESM and CommonJS builds
|
|
16
18
|
|
|
17
19
|
## Installation
|
|
18
20
|
|
|
19
21
|
```bash
|
|
20
|
-
npm install scrapex@
|
|
22
|
+
npm install scrapex@beta
|
|
21
23
|
```
|
|
22
24
|
|
|
23
25
|
### Optional Peer Dependencies
|
|
@@ -142,14 +144,48 @@ console.log(result.suggestedTags); // ['javascript', 'web', ...]
|
|
|
142
144
|
console.log(result.entities); // { people: [], organizations: [], ... }
|
|
143
145
|
```
|
|
144
146
|
|
|
147
|
+
### Embeddings
|
|
148
|
+
|
|
149
|
+
Generate vector embeddings from scraped content for semantic search, RAG, and similarity matching:
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
import { scrape } from 'scrapex';
|
|
153
|
+
import { createOpenAIEmbedding } from 'scrapex/embeddings';
|
|
154
|
+
|
|
155
|
+
const result = await scrape('https://example.com/article', {
|
|
156
|
+
embeddings: {
|
|
157
|
+
provider: { type: 'custom', provider: createOpenAIEmbedding() },
|
|
158
|
+
model: 'text-embedding-3-small',
|
|
159
|
+
},
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
if (result.embeddings?.status === 'success') {
|
|
163
|
+
console.log(result.embeddings.vector); // [0.023, -0.041, ...]
|
|
164
|
+
}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Features include:
|
|
168
|
+
- **Multiple providers** - OpenAI, Azure, Cohere, HuggingFace, Ollama, Transformers.js
|
|
169
|
+
- **PII redaction** - Automatically redact emails, phones, SSNs before sending to APIs
|
|
170
|
+
- **Smart chunking** - Split long content with configurable overlap
|
|
171
|
+
- **Caching** - Content-addressable cache to avoid redundant API calls
|
|
172
|
+
- **Resilience** - Retry, circuit breaker, rate limiting
|
|
173
|
+
|
|
174
|
+
See the [Embeddings Guide](https://scrapex.dev/guides/embeddings) for full documentation.
|
|
175
|
+
|
|
176
|
+
## Breaking Changes (Beta)
|
|
177
|
+
|
|
178
|
+
- LLM provider classes (e.g., `AnthropicProvider`) were removed. Use preset factories like
|
|
179
|
+
`createOpenAI`, `createAnthropic`, `createOllama`, and `createLMStudio` instead.
|
|
180
|
+
|
|
145
181
|
### Using Anthropic Claude
|
|
146
182
|
|
|
147
183
|
```typescript
|
|
148
|
-
import {
|
|
184
|
+
import { createAnthropic } from 'scrapex/llm';
|
|
149
185
|
|
|
150
|
-
const llm =
|
|
186
|
+
const llm = createAnthropic({
|
|
151
187
|
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
152
|
-
model: 'claude-3-5-haiku-20241022', // or 'claude-sonnet-
|
|
188
|
+
model: 'claude-3-5-haiku-20241022', // or 'claude-3-5-sonnet-20241022'
|
|
153
189
|
});
|
|
154
190
|
|
|
155
191
|
const result = await scrape(url, { llm, enhance: ['summarize'] });
|
|
@@ -279,6 +315,129 @@ toRawUrl('https://github.com/owner/repo');
|
|
|
279
315
|
// 'https://raw.githubusercontent.com/owner/repo/main/README.md'
|
|
280
316
|
```
|
|
281
317
|
|
|
318
|
+
## RSS/Atom Feed Parsing
|
|
319
|
+
|
|
320
|
+
Parse RSS 2.0, RSS 1.0 (RDF), and Atom 1.0 feeds:
|
|
321
|
+
|
|
322
|
+
```typescript
|
|
323
|
+
import { RSSParser } from 'scrapex';
|
|
324
|
+
|
|
325
|
+
const parser = new RSSParser();
|
|
326
|
+
const result = parser.parse(feedXml, 'https://example.com/feed.xml');
|
|
327
|
+
|
|
328
|
+
console.log(result.data.format); // 'rss2' | 'rss1' | 'atom'
|
|
329
|
+
console.log(result.data.title); // Feed title
|
|
330
|
+
console.log(result.data.items); // Array of feed items
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
**Supported formats:**
|
|
334
|
+
- `rss2` - RSS 2.0 (most common format)
|
|
335
|
+
- `rss1` - RSS 1.0 (RDF-based, older format)
|
|
336
|
+
- `atom` - Atom 1.0 (modern format with better semantics)
|
|
337
|
+
|
|
338
|
+
### Feed Item Structure
|
|
339
|
+
|
|
340
|
+
```typescript
|
|
341
|
+
interface FeedItem {
|
|
342
|
+
id: string;
|
|
343
|
+
title: string;
|
|
344
|
+
link: string;
|
|
345
|
+
description?: string;
|
|
346
|
+
content?: string;
|
|
347
|
+
author?: string;
|
|
348
|
+
publishedAt?: string; // ISO 8601
|
|
349
|
+
rawPublishedAt?: string; // Original date string
|
|
350
|
+
updatedAt?: string; // Atom only
|
|
351
|
+
categories: string[];
|
|
352
|
+
enclosure?: FeedEnclosure; // Podcast/media attachments
|
|
353
|
+
customFields?: Record<string, string>;
|
|
354
|
+
}
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Fetching and Parsing Feeds
|
|
358
|
+
|
|
359
|
+
```typescript
|
|
360
|
+
import { fetchFeed, paginateFeed } from 'scrapex';
|
|
361
|
+
|
|
362
|
+
// Fetch and parse in one call
|
|
363
|
+
const result = await fetchFeed('https://example.com/feed.xml');
|
|
364
|
+
console.log(result.data.items);
|
|
365
|
+
|
|
366
|
+
// Paginate through feeds with rel="next" links (Atom)
|
|
367
|
+
for await (const page of paginateFeed('https://example.com/atom')) {
|
|
368
|
+
console.log(`Page with ${page.data.items.length} items`);
|
|
369
|
+
}
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
### Discovering Feeds in HTML
|
|
373
|
+
|
|
374
|
+
```typescript
|
|
375
|
+
import { discoverFeeds } from 'scrapex';
|
|
376
|
+
|
|
377
|
+
const html = await fetch('https://example.com').then(r => r.text());
|
|
378
|
+
const feedUrls = discoverFeeds(html, 'https://example.com');
|
|
379
|
+
// ['https://example.com/feed.xml', 'https://example.com/atom.xml']
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
### Filtering by Date
|
|
383
|
+
|
|
384
|
+
```typescript
|
|
385
|
+
import { RSSParser, filterByDate } from 'scrapex';
|
|
386
|
+
|
|
387
|
+
const parser = new RSSParser();
|
|
388
|
+
const result = parser.parse(feedXml);
|
|
389
|
+
|
|
390
|
+
const recentItems = filterByDate(result.data.items, {
|
|
391
|
+
after: new Date('2024-01-01'),
|
|
392
|
+
before: new Date('2024-12-31'),
|
|
393
|
+
includeUndated: false,
|
|
394
|
+
});
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### Converting to Markdown/Text
|
|
398
|
+
|
|
399
|
+
```typescript
|
|
400
|
+
import { RSSParser, feedToMarkdown, feedToText } from 'scrapex';
|
|
401
|
+
|
|
402
|
+
const parser = new RSSParser();
|
|
403
|
+
const result = parser.parse(feedXml);
|
|
404
|
+
|
|
405
|
+
// Convert to markdown (great for LLM consumption)
|
|
406
|
+
const markdown = feedToMarkdown(result.data, { maxItems: 10 });
|
|
407
|
+
|
|
408
|
+
// Convert to plain text
|
|
409
|
+
const text = feedToText(result.data);
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
### Custom Fields (Podcast/Media)
|
|
413
|
+
|
|
414
|
+
Extract custom namespace fields like iTunes podcast tags:
|
|
415
|
+
|
|
416
|
+
```typescript
|
|
417
|
+
const parser = new RSSParser({
|
|
418
|
+
customFields: {
|
|
419
|
+
duration: 'itunes\\:duration',
|
|
420
|
+
explicit: 'itunes\\:explicit',
|
|
421
|
+
rating: 'media\\:rating',
|
|
422
|
+
},
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
const result = parser.parse(podcastXml);
|
|
426
|
+
const item = result.data.items[0];
|
|
427
|
+
|
|
428
|
+
console.log(item.customFields?.duration); // '10:00'
|
|
429
|
+
console.log(item.customFields?.explicit); // 'no'
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
### Security
|
|
433
|
+
|
|
434
|
+
The RSS parser enforces strict URL security:
|
|
435
|
+
|
|
436
|
+
- **HTTPS-only URLs (RSS parser only)**: The RSS/Atom parser (`RSSParser`) resolves all links to HTTPS only. Non-HTTPS URLs (http, javascript, data, file) are rejected and returned as empty strings. This is specific to feed parsing to prevent malicious links in untrusted feeds.
|
|
437
|
+
- **XML Mode**: Feeds are parsed with Cheerio's `{ xml: true }` mode, which disables HTML entity processing and prevents XSS vectors.
|
|
438
|
+
|
|
439
|
+
> **Note**: The public URL utilities (`resolveUrl`, `isValidUrl`, etc.) accept both `http:` and `https:` URLs. Protocol-relative URLs (e.g., `//example.com/path`) are resolved against the base URL's protocol by the standard `URL` constructor.
|
|
440
|
+
|
|
282
441
|
## URL Utilities
|
|
283
442
|
|
|
284
443
|
```typescript
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
const require_http_base = require('../http-base-CHLf-Tco.cjs');
|
|
2
|
+
const require_embeddings = require('../embeddings-BjNTQSG9.cjs');
|
|
3
|
+
|
|
4
|
+
exports.CircuitBreaker = require_http_base.CircuitBreaker;
|
|
5
|
+
exports.CircuitOpenError = require_http_base.CircuitOpenError;
|
|
6
|
+
exports.HttpEmbeddingProvider = require_embeddings.HttpEmbeddingProvider;
|
|
7
|
+
exports.InMemoryEmbeddingCache = require_embeddings.InMemoryEmbeddingCache;
|
|
8
|
+
exports.RateLimiter = require_http_base.RateLimiter;
|
|
9
|
+
exports.Semaphore = require_http_base.Semaphore;
|
|
10
|
+
exports.TRANSFORMERS_MODELS = require_embeddings.TRANSFORMERS_MODELS;
|
|
11
|
+
exports.aggregateVectors = require_embeddings.aggregateVectors;
|
|
12
|
+
exports.chunkText = require_embeddings.chunkText;
|
|
13
|
+
exports.containsPii = require_embeddings.containsPii;
|
|
14
|
+
exports.cosineSimilarity = require_embeddings.cosineSimilarity;
|
|
15
|
+
exports.createAzureEmbedding = require_embeddings.createAzureEmbedding;
|
|
16
|
+
exports.createCohereEmbedding = require_embeddings.createCohereEmbedding;
|
|
17
|
+
exports.createEmbeddingProvider = require_embeddings.createEmbeddingProvider;
|
|
18
|
+
exports.createHttpEmbedding = require_embeddings.createHttpEmbedding;
|
|
19
|
+
exports.createHuggingFaceEmbedding = require_embeddings.createHuggingFaceEmbedding;
|
|
20
|
+
exports.createNoOpCache = require_embeddings.createNoOpCache;
|
|
21
|
+
exports.createOllamaEmbedding = require_embeddings.createOllamaEmbedding;
|
|
22
|
+
exports.createOpenAIEmbedding = require_embeddings.createOpenAIEmbedding;
|
|
23
|
+
exports.createPiiRedactor = require_embeddings.createPiiRedactor;
|
|
24
|
+
exports.createTimeoutSignal = require_http_base.createTimeoutSignal;
|
|
25
|
+
exports.createTokenizer = require_embeddings.createTokenizer;
|
|
26
|
+
exports.createTransformersEmbedding = require_embeddings.createTransformersEmbedding;
|
|
27
|
+
exports.dotProduct = require_embeddings.dotProduct;
|
|
28
|
+
exports.embed = require_embeddings.embed;
|
|
29
|
+
exports.embedScrapedData = require_embeddings.embedScrapedData;
|
|
30
|
+
exports.estimateTokens = require_embeddings.estimateTokens;
|
|
31
|
+
exports.euclideanDistance = require_embeddings.euclideanDistance;
|
|
32
|
+
exports.generateCacheKey = require_embeddings.generateCacheKey;
|
|
33
|
+
exports.generateChecksum = require_embeddings.generateChecksum;
|
|
34
|
+
exports.generateEmbeddings = require_embeddings.generateEmbeddings;
|
|
35
|
+
exports.getChunkingStats = require_embeddings.getChunkingStats;
|
|
36
|
+
exports.getDefaultCache = require_embeddings.getDefaultCache;
|
|
37
|
+
exports.getDefaultModel = require_embeddings.getDefaultModel;
|
|
38
|
+
exports.getDimensions = require_embeddings.getDimensions;
|
|
39
|
+
exports.heuristicTokenCount = require_embeddings.heuristicTokenCount;
|
|
40
|
+
exports.isEmbeddingProvider = require_embeddings.isEmbeddingProvider;
|
|
41
|
+
exports.isRetryableError = require_http_base.isRetryableError;
|
|
42
|
+
exports.needsChunking = require_embeddings.needsChunking;
|
|
43
|
+
exports.normalizeVector = require_embeddings.normalizeVector;
|
|
44
|
+
exports.previewInput = require_embeddings.previewInput;
|
|
45
|
+
exports.redactPii = require_embeddings.redactPii;
|
|
46
|
+
exports.resetDefaultCache = require_embeddings.resetDefaultCache;
|
|
47
|
+
exports.selectInput = require_embeddings.selectInput;
|
|
48
|
+
exports.validateCachedResult = require_embeddings.validateCachedResult;
|
|
49
|
+
exports.validateInput = require_embeddings.validateInput;
|
|
50
|
+
exports.withResilience = require_http_base.withResilience;
|
|
51
|
+
exports.withRetry = require_http_base.withRetry;
|
|
52
|
+
exports.withTimeout = require_http_base.withTimeout;
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-DutdBpqd.cjs";
|
|
2
|
+
import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-RFSpP5g8.cjs";
|
|
3
|
+
export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import { A as HttpEmbeddingProvider, B as withResilience, C as RateLimitConfig, D as SafetyConfig, E as RetryConfig, F as CircuitOpenError, H as withTimeout, I as RateLimiter, L as Semaphore, O as TextChunk, P as CircuitBreaker, R as createTimeoutSignal, S as PiiRedactionConfig, T as ResilienceState, V as withRetry, _ as EmbeddingSkipped, a as EmbedRequest, b as EmbeddingSuccessSingle, c as EmbeddingCache, d as EmbeddingInputType, f as EmbeddingMetrics, g as EmbeddingResult, h as EmbeddingProviderConfig, i as CircuitState, j as createHttpEmbedding, k as HttpEmbeddingConfig, l as EmbeddingCacheConfig, m as EmbeddingProvider, n as CircuitBreakerConfig, o as EmbedResponse, p as EmbeddingOptions, r as CircuitBreakerState, s as EmbeddingAggregation, t as ChunkingConfig, u as EmbeddingInputConfig, v as EmbeddingSource, w as ResilienceConfig, x as OutputConfig, y as EmbeddingSuccessMultiple, z as isRetryableError } from "../types-BOcHQU9s.mjs";
|
|
2
|
+
import { A as createNoOpCache, B as euclideanDistance, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as validateCachedResult, H as normalizeVector, I as AggregationResult, L as aggregateVectors, M as generateChecksum, N as getDefaultCache, O as CacheStats, P as resetDefaultCache, R as cosineSimilarity, S as chunkText, T as getChunkingStats, V as getDimensions, _ as generateEmbeddings, a as createEmbeddingProvider, b as selectInput, c as createAzureEmbedding, d as createOllamaEmbedding, f as createOpenAIEmbedding, g as embedScrapedData, h as embed, i as redactPii, j as generateCacheKey, k as InMemoryEmbeddingCache, l as createCohereEmbedding, m as getDefaultModel, n as containsPii, o as isEmbeddingProvider, p as createTransformersEmbedding, r as createPiiRedactor, s as TRANSFORMERS_MODELS, t as RedactionResult, u as createHuggingFaceEmbedding, v as InputValidation, w as estimateTokens, x as validateInput, y as previewInput, z as dotProduct } from "../index-D6qfjmZQ.mjs";
|
|
3
|
+
export { AggregationResult, CacheStats, ChunkingConfig, CircuitBreaker, CircuitBreakerConfig, CircuitBreakerState, CircuitOpenError, CircuitState, EmbedRequest, EmbedResponse, EmbeddingAggregation, EmbeddingCache, EmbeddingCacheConfig, EmbeddingInputConfig, EmbeddingInputType, EmbeddingMetrics, EmbeddingOptions, EmbeddingProvider, EmbeddingProviderConfig, EmbeddingResult, EmbeddingSkipped, EmbeddingSource, EmbeddingSuccessMultiple, EmbeddingSuccessSingle, HttpEmbeddingConfig, HttpEmbeddingProvider, InMemoryEmbeddingCache, InputValidation, OutputConfig, PiiRedactionConfig, RateLimitConfig, RateLimiter, RedactionResult, ResilienceConfig, ResilienceState, RetryConfig, SafetyConfig, Semaphore, TRANSFORMERS_MODELS, TextChunk, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { a as Semaphore, c as withResilience, i as RateLimiter, l as withRetry, n as CircuitBreaker, o as createTimeoutSignal, r as CircuitOpenError, s as isRetryableError, u as withTimeout } from "../http-base-DM7YNo6X.mjs";
|
|
2
|
+
import { A as generateCacheKey, B as normalizeVector, C as createTokenizer, D as needsChunking, E as heuristicTokenCount, F as aggregateVectors, I as cosineSimilarity, L as dotProduct, M as getDefaultCache, N as resetDefaultCache, O as InMemoryEmbeddingCache, P as validateCachedResult, R as euclideanDistance, S as chunkText, T as getChunkingStats, _ as createHttpEmbedding, a as createPiiRedactor, b as selectInput, c as isEmbeddingProvider, d as createCohereEmbedding, f as createHuggingFaceEmbedding, g as HttpEmbeddingProvider, h as createTransformersEmbedding, i as containsPii, j as generateChecksum, k as createNoOpCache, l as TRANSFORMERS_MODELS, m as createOpenAIEmbedding, n as embedScrapedData, o as redactPii, p as createOllamaEmbedding, r as generateEmbeddings, s as createEmbeddingProvider, t as embed, u as createAzureEmbedding, v as getDefaultModel, w as estimateTokens, x as validateInput, y as previewInput, z as getDimensions } from "../embeddings-Bsymy_jA.mjs";
|
|
3
|
+
|
|
4
|
+
export { CircuitBreaker, CircuitOpenError, HttpEmbeddingProvider, InMemoryEmbeddingCache, RateLimiter, Semaphore, TRANSFORMERS_MODELS, aggregateVectors, chunkText, containsPii, cosineSimilarity, createAzureEmbedding, createCohereEmbedding, createEmbeddingProvider, createHttpEmbedding, createHuggingFaceEmbedding, createNoOpCache, createOllamaEmbedding, createOpenAIEmbedding, createPiiRedactor, createTimeoutSignal, createTokenizer, createTransformersEmbedding, dotProduct, embed, embedScrapedData, estimateTokens, euclideanDistance, generateCacheKey, generateChecksum, generateEmbeddings, getChunkingStats, getDefaultCache, getDefaultModel, getDimensions, heuristicTokenCount, isEmbeddingProvider, isRetryableError, needsChunking, normalizeVector, previewInput, redactPii, resetDefaultCache, selectInput, validateCachedResult, validateInput, withResilience, withRetry, withTimeout };
|