scrapex 1.0.0-alpha.1 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +164 -5
  2. package/dist/embeddings/index.cjs +52 -0
  3. package/dist/embeddings/index.d.cts +3 -0
  4. package/dist/embeddings/index.d.mts +3 -0
  5. package/dist/embeddings/index.mjs +4 -0
  6. package/dist/embeddings-BjNTQSG9.cjs +1455 -0
  7. package/dist/embeddings-BjNTQSG9.cjs.map +1 -0
  8. package/dist/embeddings-Bsymy_jA.mjs +1215 -0
  9. package/dist/embeddings-Bsymy_jA.mjs.map +1 -0
  10. package/dist/{enhancer-oM4BhYYS.cjs → enhancer-Cs_WyWtJ.cjs} +2 -51
  11. package/dist/enhancer-Cs_WyWtJ.cjs.map +1 -0
  12. package/dist/{enhancer-Q6CSc1gA.mjs → enhancer-INx5NlgO.mjs} +2 -45
  13. package/dist/enhancer-INx5NlgO.mjs.map +1 -0
  14. package/dist/http-base-CHLf-Tco.cjs +684 -0
  15. package/dist/http-base-CHLf-Tco.cjs.map +1 -0
  16. package/dist/http-base-DM7YNo6X.mjs +618 -0
  17. package/dist/http-base-DM7YNo6X.mjs.map +1 -0
  18. package/dist/index-Bvseqli-.d.cts +268 -0
  19. package/dist/index-Bvseqli-.d.cts.map +1 -0
  20. package/dist/index-CIFjNySr.d.mts +268 -0
  21. package/dist/index-CIFjNySr.d.mts.map +1 -0
  22. package/dist/index-D6qfjmZQ.d.mts +401 -0
  23. package/dist/index-D6qfjmZQ.d.mts.map +1 -0
  24. package/dist/index-RFSpP5g8.d.cts +401 -0
  25. package/dist/index-RFSpP5g8.d.cts.map +1 -0
  26. package/dist/index.cjs +171 -51
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +61 -2
  29. package/dist/index.d.cts.map +1 -1
  30. package/dist/index.d.mts +61 -2
  31. package/dist/index.d.mts.map +1 -1
  32. package/dist/index.mjs +129 -6
  33. package/dist/index.mjs.map +1 -1
  34. package/dist/llm/index.cjs +252 -233
  35. package/dist/llm/index.cjs.map +1 -1
  36. package/dist/llm/index.d.cts +132 -85
  37. package/dist/llm/index.d.cts.map +1 -1
  38. package/dist/llm/index.d.mts +132 -85
  39. package/dist/llm/index.d.mts.map +1 -1
  40. package/dist/llm/index.mjs +244 -236
  41. package/dist/llm/index.mjs.map +1 -1
  42. package/dist/parsers/index.cjs +10 -199
  43. package/dist/parsers/index.d.cts +2 -133
  44. package/dist/parsers/index.d.mts +2 -133
  45. package/dist/parsers/index.mjs +2 -191
  46. package/dist/parsers-Bneuws8x.cjs +569 -0
  47. package/dist/parsers-Bneuws8x.cjs.map +1 -0
  48. package/dist/parsers-DsawHeo0.mjs +482 -0
  49. package/dist/parsers-DsawHeo0.mjs.map +1 -0
  50. package/dist/types-BOcHQU9s.d.mts +831 -0
  51. package/dist/types-BOcHQU9s.d.mts.map +1 -0
  52. package/dist/types-DutdBpqd.d.cts +831 -0
  53. package/dist/types-DutdBpqd.d.cts.map +1 -0
  54. package/package.json +15 -16
  55. package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
  56. package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
  57. package/dist/parsers/index.cjs.map +0 -1
  58. package/dist/parsers/index.d.cts.map +0 -1
  59. package/dist/parsers/index.d.mts.map +0 -1
  60. package/dist/parsers/index.mjs.map +0 -1
  61. package/dist/types-CNQZVW36.d.mts +0 -150
  62. package/dist/types-CNQZVW36.d.mts.map +0 -1
  63. package/dist/types-D0HYR95H.d.cts +0 -150
  64. package/dist/types-D0HYR95H.d.cts.map +0 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings-BjNTQSG9.cjs","names":["_exhaustive: never","result: number[]","normalized: Record<string, unknown>","oldestKey: string | null","defaultCache: InMemoryEmbeddingCache | null","DEFAULT_CHUNK_SIZE","chunks: TextChunk[]","chunkText","_exhaustive: never","parts: string[]","_exhaustive: never","BaseHttpProvider","ScrapeError","headers: Record<string, string>","pipeline: FeatureExtractionPipeline | null","currentModel: string | null","embeddings: number[][]","ScrapeError","patterns: Array<{ name: string; pattern: RegExp }>","redactionsByType: Record<string, number>","fullConfig: PiiRedactionConfig","patterns: RegExp[]","RateLimiter","CircuitBreaker","Semaphore","embeddings: number[][]","withResilience","result","source: EmbeddingSource","result: EmbeddingResult","metrics: EmbeddingMetrics","ScrapeError"],"sources":["../src/embeddings/aggregation.ts","../src/embeddings/cache.ts","../src/embeddings/chunking.ts","../src/embeddings/input.ts","../src/embeddings/providers/base.ts","../src/embeddings/providers/http.ts","../src/embeddings/providers/presets.ts","../src/embeddings/providers/index.ts","../src/embeddings/safety.ts","../src/embeddings/pipeline.ts"],"sourcesContent":["import type { EmbeddingAggregation } from './types.js';\n\n/**\n * Aggregate multiple embedding vectors into a single vector or return all.\n *\n * @param vectors - Array of embedding vectors (must all have same dimensions)\n * @param strategy - Aggregation strategy\n * @returns Aggregated result based on strategy\n */\nexport function aggregateVectors(\n vectors: number[][],\n strategy: EmbeddingAggregation = 'average'\n): AggregationResult {\n if (vectors.length === 0) {\n throw new Error('Cannot aggregate empty vector array');\n }\n\n // Validate all vectors have same dimensions\n const firstVector = vectors[0];\n if (!firstVector) {\n throw new Error('Cannot aggregate empty vector array');\n }\n\n const dimensions = firstVector.length;\n for (let i = 1; i < vectors.length; i++) {\n const vec = vectors[i];\n if (!vec || vec.length !== dimensions) {\n throw new Error(\n `Vector dimension mismatch: expected ${dimensions}, got ${vec?.length ?? 0} at index ${i}`\n );\n }\n }\n\n switch (strategy) {\n case 'average':\n return {\n type: 'single',\n vector: averageVectors(vectors),\n dimensions,\n };\n\n case 'max':\n return {\n type: 'single',\n vector: maxPoolVectors(vectors),\n dimensions,\n };\n\n case 'first':\n return {\n type: 'single',\n vector: firstVector,\n dimensions,\n };\n\n case 'all':\n return {\n type: 'multiple',\n vectors,\n dimensions,\n };\n\n default: {\n // Exhaustive check\n const _exhaustive: never = strategy;\n throw new Error(`Unknown aggregation strategy: ${_exhaustive}`);\n }\n }\n}\n\n/**\n * Result of vector aggregation.\n */\nexport type AggregationResult =\n | { type: 'single'; vector: number[]; dimensions: number }\n | { type: 'multiple'; vectors: number[][]; dimensions: number };\n\n/**\n * Compute element-wise average of vectors.\n */\nfunction averageVectors(vectors: number[][]): number[] {\n const first = vectors[0];\n if (!first || vectors.length === 1) {\n return first ?? [];\n }\n\n const dimensions = first.length;\n const count = vectors.length;\n const result: number[] = new Array<number>(dimensions).fill(0);\n\n for (const vector of vectors) {\n for (let i = 0; i < dimensions; i++) {\n const val = result[i];\n if (val !== undefined) {\n result[i] = val + (vector[i] ?? 0);\n }\n }\n }\n\n for (let i = 0; i < dimensions; i++) {\n const val = result[i];\n if (val !== undefined) {\n result[i] = val / count;\n }\n }\n\n return result;\n}\n\n/**\n * Compute element-wise maximum of vectors (max pooling).\n */\nfunction maxPoolVectors(vectors: number[][]): number[] {\n const first = vectors[0];\n if (!first || vectors.length === 1) {\n return first ?? [];\n }\n\n const dimensions = first.length;\n const result = [...first]; // Start with copy of first vector\n\n for (let v = 1; v < vectors.length; v++) {\n const vec = vectors[v];\n if (!vec) continue;\n for (let i = 0; i < dimensions; i++) {\n const val = vec[i] ?? 0;\n const curr = result[i] ?? 0;\n if (val > curr) {\n result[i] = val;\n }\n }\n }\n\n return result;\n}\n\n/**\n * Normalize a vector to unit length (L2 normalization).\n */\nexport function normalizeVector(vector: number[]): number[] {\n const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));\n\n if (magnitude === 0) {\n return vector;\n }\n\n return vector.map((val) => val / magnitude);\n}\n\n/**\n * Compute cosine similarity between two vectors.\n * Both vectors should be normalized for accurate results.\n */\nexport function cosineSimilarity(a: number[], b: number[]): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n\n let dot = 0;\n let magnitudeA = 0;\n let magnitudeB = 0;\n\n for (let i = 0; i < a.length; i++) {\n const aVal = a[i] ?? 0;\n const bVal = b[i] ?? 0;\n dot += aVal * bVal;\n magnitudeA += aVal * aVal;\n magnitudeB += bVal * bVal;\n }\n\n const magnitude = Math.sqrt(magnitudeA) * Math.sqrt(magnitudeB);\n\n if (magnitude === 0) {\n return 0;\n }\n\n return dot / magnitude;\n}\n\n/**\n * Compute euclidean distance between two vectors.\n */\nexport function euclideanDistance(a: number[], b: number[]): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n\n let sum = 0;\n for (let i = 0; i < a.length; i++) {\n const aVal = a[i] ?? 0;\n const bVal = b[i] ?? 0;\n const diff = aVal - bVal;\n sum += diff * diff;\n }\n\n return Math.sqrt(sum);\n}\n\n/**\n * Compute dot product of two vectors.\n */\nexport function dotProduct(a: number[], b: number[]): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n\n let result = 0;\n for (let i = 0; i < a.length; i++) {\n const aVal = a[i] ?? 0;\n const bVal = b[i] ?? 0;\n result += aVal * bVal;\n }\n\n return result;\n}\n\n/**\n * Get the dimensions of a vector or set of vectors.\n */\nexport function getDimensions(vectors: number[] | number[][]): number {\n if (vectors.length === 0) {\n return 0;\n }\n\n const first = vectors[0];\n\n // Check if it's a single vector or array of vectors\n if (typeof first === 'number') {\n return vectors.length;\n }\n\n return first?.length ?? 0;\n}\n","import { createHash } from 'node:crypto';\nimport type {\n ChunkingConfig,\n EmbeddingAggregation,\n EmbeddingCache,\n EmbeddingInputConfig,\n EmbeddingResult,\n PiiRedactionConfig,\n SafetyConfig,\n} from './types.js';\n\n/**\n * Default maximum cache entries.\n */\nconst DEFAULT_MAX_ENTRIES = 1000;\n\n/**\n * Default TTL in milliseconds (1 hour).\n */\nconst DEFAULT_TTL_MS = 60 * 60 * 1000;\n\nexport interface CacheKeyParams {\n providerKey: string;\n /** Model identifier (may be undefined for custom providers) */\n model?: string;\n dimensions?: number;\n aggregation?: EmbeddingAggregation;\n input?: EmbeddingInputConfig;\n chunking?: ChunkingConfig;\n safety?: SafetyConfig;\n cacheKeySalt?: string;\n content: string;\n}\n\n/**\n * Generate a content-addressable cache key.\n * Key is based on content hash and embedding configuration.\n * Note: custom RegExp patterns are serialized by source+flags; different\n * constructions can yield different cache keys even if equivalent.\n */\nexport function generateCacheKey(params: CacheKeyParams): string {\n const hash = createHash('sha256');\n\n const fingerprint = stableStringify({\n providerKey: params.providerKey,\n model: params.model ?? 'provider-default',\n dimensions: params.dimensions ?? 'default',\n aggregation: params.aggregation ?? 'average',\n input: serializeInputConfig(params.input),\n chunking: serializeChunkingConfig(params.chunking),\n safety: serializeSafetyConfig(params.safety),\n cacheKeySalt: params.cacheKeySalt,\n });\n\n hash.update(fingerprint);\n hash.update('\\0'); // Separator\n hash.update(params.content);\n\n return hash.digest('hex');\n}\n\n/**\n * Generate a checksum for content verification.\n */\nexport function generateChecksum(content: string): string {\n return createHash('sha256').update(content).digest('hex').slice(0, 16);\n}\n\nfunction serializeInputConfig(config?: EmbeddingInputConfig): Record<string, unknown> | undefined {\n if (!config) return undefined;\n\n return normalizeObject({\n type: config.type ?? 'textContent',\n hasTransform: Boolean(config.transform),\n hasCustomText: Boolean(config.customText),\n });\n}\n\nfunction serializeChunkingConfig(config?: ChunkingConfig): Record<string, unknown> | undefined {\n if (!config) return undefined;\n\n return normalizeObject({\n size: config.size,\n overlap: config.overlap,\n tokenizer: getTokenizerId(config.tokenizer),\n maxInputLength: config.maxInputLength,\n });\n}\n\nfunction serializeSafetyConfig(config?: SafetyConfig): Record<string, unknown> | undefined {\n if (!config) return undefined;\n\n return normalizeObject({\n piiRedaction: serializePiiConfig(config.piiRedaction),\n minTextLength: config.minTextLength,\n maxTokens: config.maxTokens,\n });\n}\n\nfunction serializePiiConfig(config?: PiiRedactionConfig): Record<string, unknown> | undefined {\n if (!config) return undefined;\n\n return normalizeObject({\n email: config.email ?? false,\n phone: config.phone ?? false,\n creditCard: config.creditCard ?? false,\n ssn: config.ssn ?? false,\n ipAddress: config.ipAddress ?? false,\n customPatterns: config.customPatterns?.map((pattern) => `${pattern.source}/${pattern.flags}`),\n });\n}\n\nfunction getTokenizerId(tokenizer: ChunkingConfig['tokenizer']): string {\n if (!tokenizer || tokenizer === 'heuristic') {\n return 'heuristic';\n }\n\n if (tokenizer === 'tiktoken') {\n return 'tiktoken';\n }\n\n return 'custom';\n}\n\nfunction stableStringify(value: unknown): string {\n const normalized = normalizeValue(value);\n return stringifyNormalized(normalized);\n}\n\nfunction normalizeValue(value: unknown): unknown {\n if (value === undefined) return undefined;\n if (value === null) return null;\n\n if (Array.isArray(value)) {\n const normalized = value\n .map((entry) => normalizeValue(entry))\n .filter((entry) => entry !== undefined);\n return normalized;\n }\n\n if (typeof value === 'object') {\n return normalizeObject(value as Record<string, unknown>);\n }\n\n return value;\n}\n\nfunction normalizeObject(value: Record<string, unknown>): Record<string, unknown> {\n const normalized: Record<string, unknown> = {};\n\n for (const key of Object.keys(value).sort()) {\n const entry = normalizeValue(value[key]);\n if (entry !== undefined) {\n normalized[key] = entry;\n }\n }\n\n return normalized;\n}\n\nfunction stringifyNormalized(value: unknown): string {\n if (value === undefined) return 'undefined';\n if (value === null) return 'null';\n\n if (typeof value === 'string') {\n return JSON.stringify(value);\n }\n\n if (typeof value === 'number' || typeof value === 'boolean') {\n return String(value);\n }\n\n if (Array.isArray(value)) {\n return `[${value.map((entry) => stringifyNormalized(entry)).join(',')}]`;\n }\n\n if (typeof value === 'object') {\n const obj = value as Record<string, unknown>;\n const entries = Object.keys(obj)\n .sort()\n .map((key) => `${JSON.stringify(key)}:${stringifyNormalized(obj[key])}`);\n return `{${entries.join(',')}}`;\n }\n\n return JSON.stringify(value);\n}\n\n/**\n * Cache entry with metadata for LRU and TTL management.\n */\ninterface CacheEntry {\n value: EmbeddingResult;\n createdAt: number;\n expiresAt: number;\n accessedAt: number;\n}\n\n/**\n * In-memory LRU cache with TTL support.\n * Content-addressable: uses content hash as key, not URL.\n */\nexport class InMemoryEmbeddingCache implements EmbeddingCache {\n private cache: Map<string, CacheEntry>;\n private readonly maxEntries: number;\n private readonly defaultTtlMs: number;\n\n constructor(options?: { maxEntries?: number; ttlMs?: number }) {\n this.cache = new Map();\n this.maxEntries = options?.maxEntries ?? DEFAULT_MAX_ENTRIES;\n this.defaultTtlMs = options?.ttlMs ?? DEFAULT_TTL_MS;\n }\n\n async get(key: string): Promise<EmbeddingResult | undefined> {\n const entry = this.cache.get(key);\n\n if (!entry) {\n return undefined;\n }\n\n const now = Date.now();\n\n // Check TTL expiration\n if (now > entry.expiresAt) {\n this.cache.delete(key);\n return undefined;\n }\n\n // Update access time for LRU\n entry.accessedAt = now;\n\n return entry.value;\n }\n\n async set(key: string, value: EmbeddingResult, options?: { ttlMs?: number }): Promise<void> {\n const now = Date.now();\n const ttl = options?.ttlMs ?? this.defaultTtlMs;\n\n // Enforce max entries with LRU eviction\n if (this.cache.size >= this.maxEntries && !this.cache.has(key)) {\n this.evictLRU();\n }\n\n this.cache.set(key, {\n value,\n createdAt: now,\n expiresAt: now + ttl,\n accessedAt: now,\n });\n }\n\n async delete(key: string): Promise<boolean> {\n return this.cache.delete(key);\n }\n\n async clear(): Promise<void> {\n this.cache.clear();\n }\n\n /**\n * Get cache statistics.\n */\n getStats(): CacheStats {\n const now = Date.now();\n let expired = 0;\n\n for (const entry of this.cache.values()) {\n if (now > entry.expiresAt) {\n expired++;\n }\n }\n\n return {\n size: this.cache.size,\n maxEntries: this.maxEntries,\n expired,\n utilization: this.cache.size / this.maxEntries,\n };\n }\n\n /**\n * Evict expired entries.\n */\n cleanup(): number {\n const now = Date.now();\n let evicted = 0;\n\n for (const [key, entry] of this.cache.entries()) {\n if (now > entry.expiresAt) {\n this.cache.delete(key);\n evicted++;\n }\n }\n\n return evicted;\n }\n\n /**\n * Evict least recently used entry.\n */\n private evictLRU(): void {\n let oldestKey: string | null = null;\n let oldestAccess = Number.POSITIVE_INFINITY;\n\n for (const [key, entry] of this.cache.entries()) {\n if (entry.accessedAt < oldestAccess) {\n oldestAccess = entry.accessedAt;\n oldestKey = key;\n }\n }\n\n if (oldestKey) {\n this.cache.delete(oldestKey);\n }\n }\n}\n\n/**\n * Cache statistics.\n */\nexport interface CacheStats {\n /** Current number of entries */\n size: number;\n /** Maximum allowed entries */\n maxEntries: number;\n /** Number of expired entries (not yet cleaned up) */\n expired: number;\n /** Cache utilization (0-1) */\n utilization: number;\n}\n\n/**\n * Validate that a cached result matches expected parameters.\n */\nexport function validateCachedResult(\n result: EmbeddingResult,\n expectedDimensions?: number\n): boolean {\n if (result.status !== 'success') {\n return true; // Skipped results are always valid\n }\n\n if (!expectedDimensions) {\n return true; // No dimension check required\n }\n\n if (result.aggregation === 'all') {\n const firstVec = result.vectors[0];\n if (!firstVec) {\n return false;\n }\n return firstVec.length === expectedDimensions;\n }\n\n return result.vector.length === expectedDimensions;\n}\n\n/**\n * Create a no-op cache that never stores anything.\n * Useful for disabling caching while maintaining interface compatibility.\n */\nexport function createNoOpCache(): EmbeddingCache {\n return {\n async get(): Promise<undefined> {\n return undefined;\n },\n async set(): Promise<void> {\n // No-op\n },\n async delete(): Promise<boolean> {\n return false;\n },\n async clear(): Promise<void> {\n // No-op\n },\n };\n}\n\n/**\n * Default in-memory cache instance.\n * Optimized for moderate cache sizes (default 1000 entries).\n */\nlet defaultCache: InMemoryEmbeddingCache | null = null;\n\n/**\n * Get or create the default cache instance.\n */\nexport function getDefaultCache(): InMemoryEmbeddingCache {\n if (!defaultCache) {\n defaultCache = new InMemoryEmbeddingCache();\n }\n return defaultCache;\n}\n\n/**\n * Reset the default cache (mainly for testing).\n */\nexport async function resetDefaultCache(): Promise<void> {\n if (defaultCache) {\n await defaultCache.clear();\n }\n defaultCache = null;\n}\n","import type { ChunkingConfig, TextChunk } from './types.js';\n\n/**\n * Default chunk size in tokens.\n */\nconst DEFAULT_CHUNK_SIZE = 500;\n\n/**\n * Default overlap in tokens.\n */\nconst DEFAULT_OVERLAP = 50;\n\n/**\n * Default maximum input length in characters.\n */\nconst DEFAULT_MAX_INPUT_LENGTH = 100_000;\n\n/**\n * Heuristic token counting: approximately 4 characters per token.\n * This is a reasonable approximation for English text.\n */\nexport function heuristicTokenCount(text: string): number {\n return Math.ceil(text.length / 4);\n}\n\n/**\n * Convert token count to approximate character count.\n */\nfunction tokensToChars(tokens: number): number {\n return tokens * 4;\n}\n\n/**\n * Create a tokenizer function based on configuration.\n */\nexport function createTokenizer(config?: ChunkingConfig['tokenizer']): (text: string) => number {\n if (!config || config === 'heuristic') {\n return heuristicTokenCount;\n }\n\n if (config === 'tiktoken') {\n // LIMITATION: tiktoken requires async initialization which is incompatible\n // with the synchronous tokenizer interface. The API accepts 'tiktoken' as\n // a value for future compatibility, but currently falls back to heuristic.\n //\n // For accurate OpenAI token counting, users should provide a custom\n // tokenizer function that wraps a pre-initialized tiktoken encoder:\n //\n // ```ts\n // import { get_encoding } from 'tiktoken';\n // const encoder = get_encoding('cl100k_base');\n // const tokenizer = (text: string) => encoder.encode(text).length;\n // ```\n return heuristicTokenCount;\n }\n\n // Custom tokenizer function\n return config;\n}\n\n/**\n * Find a natural break point in text (sentence or word boundary).\n * Prefers common sentence boundaries (Latin + CJK), falls back to word boundaries.\n */\nfunction findBreakPoint(text: string, targetIndex: number): number {\n // Look for sentence boundary within 20% of target\n const searchStart = Math.max(0, targetIndex - Math.floor(targetIndex * 0.2));\n const searchEnd = Math.min(text.length, targetIndex + Math.floor(targetIndex * 0.2));\n const searchText = text.slice(searchStart, searchEnd);\n\n // Find last sentence boundary before target\n const sentenceMatch = /[.!?。!?]\\s*/g;\n let lastSentenceEnd = -1;\n\n for (const match of searchText.matchAll(sentenceMatch)) {\n const absolutePos = searchStart + match.index + match[0].length;\n if (absolutePos <= targetIndex) {\n lastSentenceEnd = absolutePos;\n }\n }\n\n if (lastSentenceEnd !== -1) {\n return lastSentenceEnd;\n }\n\n // Fall back to word boundary\n const wordBoundary = text.lastIndexOf(' ', targetIndex);\n if (wordBoundary > searchStart) {\n return wordBoundary + 1; // Include the space in previous chunk\n }\n\n // No good break point found, use target\n return targetIndex;\n}\n\n/**\n * Split text into overlapping chunks optimized for embedding.\n * Respects sentence boundaries when possible.\n */\nexport function chunkText(text: string, config?: ChunkingConfig): TextChunk[] {\n const chunkSize = config?.size ?? DEFAULT_CHUNK_SIZE;\n const rawOverlap = config?.overlap ?? DEFAULT_OVERLAP;\n const safeOverlap = Math.max(0, rawOverlap);\n const overlap = Math.min(safeOverlap, Math.max(0, chunkSize - 1)); // Ensure overlap < size\n const maxInputLength = config?.maxInputLength ?? DEFAULT_MAX_INPUT_LENGTH;\n const tokenizer = createTokenizer(config?.tokenizer);\n\n // Truncate if exceeding max input length\n const processedText = text.length > maxInputLength ? text.slice(0, maxInputLength) : text;\n\n // Normalize whitespace\n const normalizedText = processedText.replace(/\\s+/g, ' ').trim();\n\n if (!normalizedText) {\n return [];\n }\n\n const totalTokens = tokenizer(normalizedText);\n\n // If text fits in one chunk, return as single chunk\n if (totalTokens <= chunkSize) {\n return [\n {\n text: normalizedText,\n startIndex: 0,\n endIndex: normalizedText.length,\n tokens: totalTokens,\n },\n ];\n }\n\n const chunks: TextChunk[] = [];\n const chunkSizeChars = tokensToChars(chunkSize);\n const overlapChars = tokensToChars(overlap);\n\n let startIndex = 0;\n\n while (startIndex < normalizedText.length) {\n // Calculate target end position\n const targetEnd = Math.min(startIndex + chunkSizeChars, normalizedText.length);\n\n // Find natural break point if not at end\n const endIndex =\n targetEnd < normalizedText.length ? findBreakPoint(normalizedText, targetEnd) : targetEnd;\n\n const chunkText = normalizedText.slice(startIndex, endIndex).trim();\n\n if (chunkText) {\n chunks.push({\n text: chunkText,\n startIndex,\n endIndex,\n tokens: tokenizer(chunkText),\n });\n }\n\n // Move start position with overlap\n if (endIndex >= normalizedText.length) {\n break;\n }\n\n // Calculate next start with overlap\n const nextStart = endIndex - overlapChars;\n startIndex = Math.max(nextStart, startIndex + 1);\n\n // Find word boundary for overlap start\n if (startIndex < normalizedText.length) {\n const spaceIndex = normalizedText.indexOf(' ', startIndex);\n if (spaceIndex !== -1 && spaceIndex < startIndex + overlapChars) {\n startIndex = spaceIndex + 1;\n }\n }\n }\n\n return chunks;\n}\n\n/**\n * Estimate total tokens for a text without chunking.\n */\nexport function estimateTokens(text: string, tokenizer?: ChunkingConfig['tokenizer']): number {\n const count = createTokenizer(tokenizer);\n return count(text);\n}\n\n/**\n * Check if text needs chunking based on token count.\n */\nexport function needsChunking(\n text: string,\n maxTokens = DEFAULT_CHUNK_SIZE,\n tokenizer?: ChunkingConfig['tokenizer']\n): boolean {\n const count = createTokenizer(tokenizer);\n return count(text) > maxTokens;\n}\n\n/**\n * Get statistics about potential chunking.\n */\nexport function getChunkingStats(\n text: string,\n config?: ChunkingConfig\n): {\n inputLength: number;\n estimatedTokens: number;\n estimatedChunks: number;\n willTruncate: boolean;\n} {\n const maxInputLength = config?.maxInputLength ?? DEFAULT_MAX_INPUT_LENGTH;\n const chunkSize = config?.size ?? DEFAULT_CHUNK_SIZE;\n const overlap = config?.overlap ?? DEFAULT_OVERLAP;\n const tokenizer = createTokenizer(config?.tokenizer);\n\n const inputLength = text.length;\n const willTruncate = inputLength > maxInputLength;\n const processedLength = willTruncate ? maxInputLength : inputLength;\n\n // Normalize for accurate token estimation\n const normalized = text.slice(0, processedLength).replace(/\\s+/g, ' ').trim();\n const estimatedTokens = tokenizer(normalized);\n\n // Calculate estimated chunks\n let estimatedChunks = 1;\n if (estimatedTokens > chunkSize) {\n const clampedOverlap = Math.min(overlap, Math.max(0, chunkSize - 1));\n const effectiveChunkSize = Math.max(1, chunkSize - clampedOverlap);\n estimatedChunks = Math.ceil((estimatedTokens - clampedOverlap) / effectiveChunkSize);\n }\n\n return {\n inputLength,\n estimatedTokens,\n estimatedChunks,\n willTruncate,\n };\n}\n","import type { ScrapedData } from '../core/types.js';\nimport type { EmbeddingInputConfig } from './types.js';\n\n/**\n * Select and prepare input text for embedding based on configuration.\n *\n * @param data - Scraped data to extract input from\n * @param config - Input configuration\n * @returns Selected and prepared text, or undefined if no valid input\n */\nexport function selectInput(\n data: Partial<ScrapedData>,\n config?: EmbeddingInputConfig\n): string | undefined {\n // If transform function is provided, use it directly\n if (config?.transform) {\n const transformed = config.transform(data);\n return normalizeText(transformed);\n }\n\n // If custom text is provided and type is 'custom', use it\n if (config?.type === 'custom' && config.customText) {\n return normalizeText(config.customText);\n }\n\n // Select based on type\n const type = config?.type ?? 'textContent';\n\n switch (type) {\n case 'textContent':\n return selectTextContent(data);\n\n case 'title+summary':\n return selectTitleSummary(data);\n\n case 'custom':\n // Custom without customText - fall back to textContent\n return selectTextContent(data);\n\n default: {\n // Exhaustive check\n const _exhaustive: never = type;\n throw new Error(`Unknown input type: ${_exhaustive}`);\n }\n }\n}\n\n/**\n * Select textContent as input.\n */\nfunction selectTextContent(data: Partial<ScrapedData>): string | undefined {\n if (data.textContent) {\n return normalizeText(data.textContent);\n }\n\n // Fallback chain: content (markdown) -> excerpt -> description\n if (data.content) {\n return normalizeText(stripMarkdown(data.content));\n }\n\n if (data.excerpt) {\n return normalizeText(data.excerpt);\n }\n\n if (data.description) {\n return normalizeText(data.description);\n }\n\n return undefined;\n}\n\n/**\n * Select title + summary (or fallbacks) as input.\n * Optimized for semantic search and classification.\n */\nfunction selectTitleSummary(data: Partial<ScrapedData>): string | undefined {\n const parts: string[] = [];\n\n // Title is always included if available\n if (data.title) {\n parts.push(data.title);\n }\n\n // Prefer summary, fall back to excerpt or description\n if (data.summary) {\n parts.push(data.summary);\n } else if (data.excerpt) {\n parts.push(data.excerpt);\n } else if (data.description) {\n parts.push(data.description);\n }\n\n if (parts.length === 0) {\n return undefined;\n }\n\n return normalizeText(parts.join('\\n\\n'));\n}\n\n/**\n * Normalize text for embedding:\n * - Collapse whitespace\n * - Trim leading/trailing whitespace\n * - Remove control characters\n */\nfunction normalizeText(text: string): string {\n if (!text) {\n return '';\n }\n\n // biome-ignore lint/suspicious/noControlCharactersInRegex: Intentionally removing control characters for text sanitization\n const controlCharRegex = /[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F]/g;\n return (\n text\n // Remove control characters (except newlines and tabs)\n .replace(controlCharRegex, '')\n // Collapse multiple spaces/tabs to single space\n .replace(/[ \\t]+/g, ' ')\n // Collapse multiple newlines to double newline (paragraph break)\n .replace(/\\n{3,}/g, '\\n\\n')\n // Trim each line\n .split('\\n')\n .map((line) => line.trim())\n .join('\\n')\n // Trim entire text\n .trim()\n );\n}\n\n/**\n * Basic markdown stripping for when we need plain text from content.\n * Not comprehensive, but handles common cases.\n */\nfunction stripMarkdown(markdown: string): string {\n return (\n markdown\n // Remove code blocks\n .replace(/```[\\s\\S]*?```/g, '')\n .replace(/`[^`]+`/g, '')\n // Remove links but keep text\n .replace(/\\[([^\\]]+)\\]\\([^)]+\\)/g, '$1')\n // Remove images\n .replace(/!\\[([^\\]]*)\\]\\([^)]+\\)/g, '$1')\n // Remove headers\n .replace(/^#{1,6}\\s+/gm, '')\n // Remove emphasis\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n .replace(/\\*([^*]+)\\*/g, '$1')\n .replace(/__([^_]+)__/g, '$1')\n .replace(/_([^_]+)_/g, '$1')\n // Remove blockquotes\n .replace(/^>\\s+/gm, '')\n // Remove horizontal rules\n .replace(/^[-*_]{3,}$/gm, '')\n // Remove list markers\n .replace(/^[\\s]*[-*+]\\s+/gm, '')\n .replace(/^[\\s]*\\d+\\.\\s+/gm, '')\n );\n}\n\n/**\n * Check if the selected input meets minimum requirements.\n */\nexport function validateInput(text: string | undefined, minLength = 10): InputValidation {\n if (!text) {\n return {\n valid: false,\n reason: 'No input text available',\n };\n }\n\n if (text.length < minLength) {\n return {\n valid: false,\n reason: `Input too short (${text.length} < ${minLength} characters)`,\n };\n }\n\n // Check for meaningful content (not just whitespace/punctuation)\n const wordCount = text.split(/\\s+/).filter((w) => w.length > 1).length;\n if (wordCount < 3) {\n return {\n valid: false,\n reason: `Input has too few words (${wordCount} < 3)`,\n };\n }\n\n return {\n valid: true,\n text,\n wordCount,\n charCount: text.length,\n };\n}\n\n/**\n * Result of input validation.\n */\nexport type InputValidation =\n | { valid: false; reason: string }\n | { valid: true; text: string; wordCount: number; charCount: number };\n\n/**\n * Get a preview of what input would be selected.\n * Useful for debugging and testing.\n */\nexport function previewInput(\n data: Partial<ScrapedData>,\n config?: EmbeddingInputConfig,\n maxLength = 200\n): string {\n const input = selectInput(data, config);\n\n if (!input) {\n return '[No input available]';\n }\n\n if (input.length <= maxLength) {\n return input;\n }\n\n return `${input.slice(0, maxLength)}...`;\n}\n","/**\n * Embedding provider base utilities.\n * SSRF protection and HTTP utilities are now in src/common/http-base.ts\n */\n\nimport { ScrapeError } from '../../core/errors.js';\nimport type { EmbeddingProviderConfig, EmbedResponse } from '../types.js';\n\n/**\n * Generate a stable cache key identifier for provider configuration.\n */\nexport function getProviderCacheKey(config: EmbeddingProviderConfig): string {\n switch (config.type) {\n case 'http': {\n const baseUrl = config.config.baseUrl.replace(/\\/$/, '');\n return `http:${baseUrl}:${config.config.model}`;\n }\n case 'custom':\n return `custom:${config.provider.name}`;\n default: {\n const _exhaustive: never = config;\n return String(_exhaustive);\n }\n }\n}\n\n/**\n * Validate embedding response structure.\n */\nexport function validateEmbedResponse(response: unknown, expectedCount: number): EmbedResponse {\n if (!response || typeof response !== 'object') {\n throw new ScrapeError('Invalid embedding response: expected object', 'VALIDATION_ERROR');\n }\n\n const resp = response as Record<string, unknown>;\n\n // Check for embeddings array\n let embeddings: number[][];\n\n if (Array.isArray(resp.embeddings)) {\n embeddings = resp.embeddings;\n } else if (Array.isArray(resp.data)) {\n // OpenAI format: { data: [{ embedding: [...] }] }\n embeddings = resp.data.map((item: unknown) => {\n if (typeof item !== 'object' || item === null) {\n throw new ScrapeError(\n 'Invalid embedding response: data item is not an object',\n 'VALIDATION_ERROR'\n );\n }\n const embedding = (item as Record<string, unknown>).embedding;\n if (!Array.isArray(embedding)) {\n throw new ScrapeError(\n 'Invalid embedding response: missing embedding in data item',\n 'VALIDATION_ERROR'\n );\n }\n return embedding as number[];\n });\n } else if (Array.isArray(resp.embedding)) {\n // Single embedding format: { embedding: [...] }\n embeddings = [resp.embedding];\n } else {\n throw new ScrapeError(\n 'Invalid embedding response: missing embeddings array',\n 'VALIDATION_ERROR'\n );\n }\n\n // Validate count\n if (embeddings.length !== expectedCount) {\n throw new ScrapeError(\n `Embedding count mismatch: expected ${expectedCount}, got ${embeddings.length}`,\n 'VALIDATION_ERROR'\n );\n }\n\n // Validate dimensions consistency\n if (embeddings.length > 0) {\n const firstEmbed = embeddings[0];\n if (!firstEmbed) {\n throw new ScrapeError(\n 'Invalid embedding response: empty first embedding',\n 'VALIDATION_ERROR'\n );\n }\n const dimensions = firstEmbed.length;\n for (let i = 1; i < embeddings.length; i++) {\n const embed = embeddings[i];\n if (!embed || embed.length !== dimensions) {\n throw new ScrapeError(\n `Embedding dimension mismatch at index ${i}: expected ${dimensions}, got ${embed?.length ?? 0}`,\n 'VALIDATION_ERROR'\n );\n }\n }\n\n // Validate values are numbers\n for (const embedding of embeddings) {\n for (const value of embedding) {\n if (typeof value !== 'number' || !Number.isFinite(value)) {\n throw new ScrapeError(\n 'Invalid embedding value: expected finite number',\n 'VALIDATION_ERROR'\n );\n }\n }\n }\n }\n\n // Extract usage if present\n const usage = resp.usage as { prompt_tokens?: number; total_tokens?: number } | undefined;\n\n return {\n embeddings,\n usage: usage\n ? {\n promptTokens: usage.prompt_tokens ?? 0,\n totalTokens: usage.total_tokens ?? 0,\n }\n : undefined,\n };\n}\n\n/**\n * Get default model for a provider type.\n */\nexport function getDefaultModel(providerType: string): string {\n switch (providerType) {\n case 'openai':\n return 'text-embedding-3-small';\n case 'azure':\n return 'text-embedding-ada-002';\n case 'transformers':\n return 'Xenova/all-MiniLM-L6-v2';\n default:\n return 'default';\n }\n}\n\n// Note: createHeaders is available from 'src/common/http-base.ts' if needed\n\n/**\n * Handle common API errors and convert to ScrapeError.\n */\nexport function handleApiError(error: unknown, providerName: string): never {\n if (error instanceof ScrapeError) {\n throw error;\n }\n\n const statusCode =\n typeof (error as { statusCode?: number }).statusCode === 'number'\n ? (error as { statusCode: number }).statusCode\n : typeof (error as { status?: number }).status === 'number'\n ? (error as { status: number }).status\n : undefined;\n\n if (statusCode) {\n if (statusCode === 429) {\n throw new ScrapeError(`${providerName} rate limit exceeded`, 'BLOCKED', statusCode);\n }\n if (statusCode === 401 || statusCode === 403) {\n throw new ScrapeError(`${providerName} authentication failed`, 'BLOCKED', statusCode);\n }\n if (statusCode === 408 || statusCode === 504) {\n throw new ScrapeError(`${providerName} request timed out`, 'TIMEOUT', statusCode);\n }\n if (statusCode >= 500) {\n throw new ScrapeError(`${providerName} server error`, 'FETCH_FAILED', statusCode);\n }\n }\n\n if (error instanceof Error) {\n const lowerMessage = error.message.toLowerCase();\n\n // Check for rate limiting\n if (lowerMessage.includes('rate limit') || lowerMessage.includes('429')) {\n throw new ScrapeError(`${providerName} rate limit exceeded: ${error.message}`, 'BLOCKED');\n }\n\n // Check for auth errors\n if (lowerMessage.includes('401') || lowerMessage.includes('unauthorized')) {\n throw new ScrapeError(`${providerName} authentication failed: ${error.message}`, 'BLOCKED');\n }\n\n // Check for timeout\n if (lowerMessage.includes('timeout') || error.name === 'AbortError') {\n throw new ScrapeError(`${providerName} request timed out: ${error.message}`, 'TIMEOUT');\n }\n\n throw new ScrapeError(`${providerName} embedding failed: ${error.message}`, 'LLM_ERROR');\n }\n\n throw new ScrapeError(`${providerName} embedding failed: ${String(error)}`, 'LLM_ERROR');\n}\n\n/**\n * Parse error response from API.\n */\nexport async function parseErrorResponse(response: Response): Promise<string> {\n try {\n const text = await response.text();\n try {\n const json = JSON.parse(text);\n return json.error?.message || json.message || text;\n } catch {\n return text;\n }\n } catch {\n return `HTTP ${response.status} ${response.statusText}`;\n }\n}\n","/**\n * HTTP-based Embedding Provider using native fetch.\n * Provides a unified interface for any REST-based embedding API.\n */\n\nimport { type BaseHttpConfig, BaseHttpProvider } from '../../common/http-base.js';\nimport { ScrapeError } from '../../core/errors.js';\nimport type { EmbeddingProvider, EmbedRequest, EmbedResponse } from '../types.js';\n\n// ─────────────────────────────────────────────────────────────\n// Types\n// ─────────────────────────────────────────────────────────────\n\n/**\n * HTTP embedding provider configuration.\n */\nexport interface HttpEmbeddingConfig<TRequest = unknown, TResponse = unknown, TError = unknown>\n extends BaseHttpConfig<TError> {\n /**\n * Build request body from input texts.\n * @default { input: texts, model }\n */\n requestBuilder?: (texts: string[], model: string) => TRequest;\n /**\n * Extract embeddings array from response.\n * @default (res) => res.data.map(d => d.embedding)\n */\n responseMapper?: (response: TResponse) => number[][];\n}\n\n// ─────────────────────────────────────────────────────────────\n// HTTP Embedding Provider\n// ─────────────────────────────────────────────────────────────\n\n/**\n * HTTP-based embedding provider.\n * Works with any REST API using native fetch.\n */\nexport class HttpEmbeddingProvider<TRequest = unknown, TResponse = unknown, TError = unknown>\n extends BaseHttpProvider<TError>\n implements EmbeddingProvider\n{\n readonly name = 'http-embedding';\n\n private readonly requestBuilder: (texts: string[], model: string) => TRequest;\n private readonly responseMapper: (response: TResponse) => number[][];\n\n constructor(config: HttpEmbeddingConfig<TRequest, TResponse, TError>) {\n super(config);\n\n // Default request builder: OpenAI-compatible format\n this.requestBuilder =\n config.requestBuilder ??\n ((texts: string[], model: string) =>\n ({\n input: texts,\n model,\n }) as TRequest);\n\n // Default response mapper: OpenAI-compatible format\n this.responseMapper =\n config.responseMapper ??\n ((response: TResponse) => {\n const resp = response as Record<string, unknown>;\n\n // OpenAI format: { data: [{ embedding: [...] }] }\n if (Array.isArray(resp.data)) {\n return resp.data.map((item: { embedding: number[] }) => item.embedding);\n }\n\n // Simple format: { embeddings: [[...]] }\n if (Array.isArray(resp.embeddings)) {\n return resp.embeddings as number[][];\n }\n\n // Ollama format: { embedding: [...] }\n if (Array.isArray(resp.embedding)) {\n return [resp.embedding as number[]];\n }\n\n // HuggingFace format: [[...]]\n if (Array.isArray(response)) {\n return response as number[][];\n }\n\n throw new ScrapeError(\n 'Unable to parse embedding response. Provide a custom responseMapper.',\n 'VALIDATION_ERROR'\n );\n });\n }\n\n /**\n * Generate embeddings for one or more texts.\n */\n async embed(texts: string[], options: EmbedRequest): Promise<EmbedResponse> {\n const model = options.model || this.model;\n const body = this.requestBuilder(texts, model);\n\n const { data } = await this.fetch<TResponse>(this.baseUrl, {\n body,\n signal: options.signal,\n });\n\n const embeddings = this.responseMapper(data);\n\n // Validate embeddings count\n if (embeddings.length !== texts.length) {\n throw new ScrapeError(\n `Embedding count mismatch: expected ${texts.length}, got ${embeddings.length}`,\n 'VALIDATION_ERROR'\n );\n }\n\n return { embeddings };\n }\n}\n\n/**\n * Create a generic HTTP embedding provider.\n */\nexport function createHttpEmbedding<TRequest = unknown, TResponse = unknown, TError = unknown>(\n config: HttpEmbeddingConfig<TRequest, TResponse, TError>\n): EmbeddingProvider {\n return new HttpEmbeddingProvider(config);\n}\n","/**\n * Preset factory functions for common embedding providers.\n * All presets use the HttpEmbeddingProvider with appropriate configuration.\n */\n\nimport type { EmbeddingProvider, EmbedRequest, EmbedResponse } from '../types.js';\nimport { HttpEmbeddingProvider } from './http.js';\n\n// ─────────────────────────────────────────────────────────────\n// OpenAI\n// ─────────────────────────────────────────────────────────────\n\n/**\n * OpenAI API embedding response shape.\n */\ninterface OpenAIEmbeddingResponse {\n data: Array<{ embedding: number[] }>;\n usage?: {\n prompt_tokens: number;\n total_tokens: number;\n };\n}\n\n/**\n * Create an OpenAI embedding provider.\n *\n * @example\n * ```ts\n * const provider = createOpenAIEmbedding({ apiKey: 'sk-...' });\n * const { embeddings } = await provider.embed(['Hello'], { model: 'text-embedding-3-small' });\n * ```\n */\nexport function createOpenAIEmbedding(options?: {\n apiKey?: string;\n model?: string;\n baseUrl?: string;\n organization?: string;\n}): EmbeddingProvider {\n const apiKey = options?.apiKey ?? process.env.OPENAI_API_KEY;\n if (!apiKey) {\n throw new Error('OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.');\n }\n\n const headers: Record<string, string> = {\n Authorization: `Bearer ${apiKey}`,\n };\n if (options?.organization) {\n headers['OpenAI-Organization'] = options.organization;\n }\n\n return new HttpEmbeddingProvider<unknown, OpenAIEmbeddingResponse>({\n baseUrl: options?.baseUrl ?? 'https://api.openai.com/v1/embeddings',\n model: options?.model ?? 'text-embedding-3-small',\n headers,\n requestBuilder: (texts, model) => ({ input: texts, model }),\n responseMapper: (res) => res.data.map((item) => item.embedding),\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Azure OpenAI\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create an Azure OpenAI embedding provider.\n *\n * @example\n * ```ts\n * const provider = createAzureEmbedding({\n * endpoint: 'https://my-resource.openai.azure.com',\n * deploymentName: 'text-embedding-ada-002',\n * apiVersion: '2023-05-15',\n * });\n * ```\n */\nexport function createAzureEmbedding(options: {\n endpoint: string;\n deploymentName: string;\n apiVersion: string;\n apiKey?: string;\n}): EmbeddingProvider {\n const apiKey = options.apiKey ?? process.env.AZURE_OPENAI_API_KEY;\n if (!apiKey) {\n throw new Error(\n 'Azure OpenAI API key required. Set AZURE_OPENAI_API_KEY env var or pass apiKey option.'\n );\n }\n\n const baseUrl = `${options.endpoint.replace(/\\/$/, '')}/openai/deployments/${options.deploymentName}/embeddings?api-version=${options.apiVersion}`;\n\n return new HttpEmbeddingProvider<unknown, OpenAIEmbeddingResponse>({\n baseUrl,\n model: options.deploymentName,\n headers: { 'api-key': apiKey },\n requestBuilder: (texts) => ({ input: texts }),\n responseMapper: (res) => res.data.map((item) => item.embedding),\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Ollama (Local)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Ollama embedding response shape.\n */\ninterface OllamaEmbeddingResponse {\n embedding: number[];\n}\n\n/**\n * Create an Ollama embedding provider for local models.\n *\n * LIMITATION: Ollama's /api/embeddings endpoint processes one text at a time,\n * not batches. When multiple chunks are embedded, each chunk triggers a\n * separate HTTP request. This is handled transparently by the pipeline's\n * sequential chunk processing, but may be slower than batch-capable providers.\n * For high-throughput scenarios, consider using OpenAI, Cohere, or HuggingFace\n * which support batch embedding in a single request.\n *\n * @example\n * ```ts\n * const provider = createOllamaEmbedding({ model: 'nomic-embed-text' });\n * ```\n */\nexport function createOllamaEmbedding(options?: {\n baseUrl?: string;\n model?: string;\n}): EmbeddingProvider {\n return new HttpEmbeddingProvider<unknown, OllamaEmbeddingResponse>({\n baseUrl: options?.baseUrl ?? 'http://localhost:11434/api/embeddings',\n model: options?.model ?? 'nomic-embed-text',\n requireHttps: false,\n allowPrivate: true,\n requestBuilder: (texts, model) => ({ model, prompt: texts[0] }),\n responseMapper: (res) => [res.embedding],\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// HuggingFace Inference\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create a HuggingFace Inference API embedding provider.\n *\n * @example\n * ```ts\n * const provider = createHuggingFaceEmbedding({\n * model: 'sentence-transformers/all-MiniLM-L6-v2',\n * });\n * ```\n */\nexport function createHuggingFaceEmbedding(options: {\n model: string;\n apiKey?: string;\n}): EmbeddingProvider {\n const apiKey = options.apiKey ?? process.env.HF_TOKEN ?? process.env.HUGGINGFACE_API_KEY;\n\n const headers: Record<string, string> = {};\n if (apiKey) {\n headers.Authorization = `Bearer ${apiKey}`;\n }\n\n return new HttpEmbeddingProvider<{ inputs: string[] }, number[][]>({\n baseUrl: `https://api-inference.huggingface.co/models/${options.model}`,\n model: options.model,\n headers,\n requestBuilder: (texts) => ({ inputs: texts }),\n responseMapper: (response) => {\n // HuggingFace returns embeddings directly as array\n if (Array.isArray(response)) {\n // Check if it's a single embedding or array of embeddings\n if (Array.isArray(response[0]) && typeof response[0][0] === 'number') {\n return response;\n }\n // Single text input returns single embedding\n return [response as unknown as number[]];\n }\n throw new Error('Unexpected HuggingFace response format');\n },\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Cohere\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Cohere embedding response shape.\n */\ninterface CohereEmbeddingResponse {\n embeddings: number[][];\n}\n\n/**\n * Create a Cohere embedding provider.\n *\n * @example\n * ```ts\n * const provider = createCohereEmbedding({ model: 'embed-english-v3.0' });\n * ```\n */\nexport function createCohereEmbedding(options?: {\n apiKey?: string;\n model?: string;\n /** Input type for embeddings. Use 'search_query' for queries, 'search_document' for documents */\n inputType?: 'search_document' | 'search_query' | 'classification' | 'clustering';\n}): EmbeddingProvider {\n const apiKey = options?.apiKey ?? process.env.COHERE_API_KEY;\n if (!apiKey) {\n throw new Error('Cohere API key required. Set COHERE_API_KEY env var or pass apiKey option.');\n }\n\n return new HttpEmbeddingProvider<unknown, CohereEmbeddingResponse>({\n baseUrl: 'https://api.cohere.ai/v1/embed',\n model: options?.model ?? 'embed-english-v3.0',\n headers: { Authorization: `Bearer ${apiKey}` },\n requestBuilder: (texts, model) => ({\n texts,\n model,\n input_type: options?.inputType ?? 'search_document',\n }),\n responseMapper: (res) => res.embeddings,\n });\n}\n\n// ─────────────────────────────────────────────────────────────\n// Transformers.js (Dependency Injection)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Feature extraction pipeline type for Transformers.js\n */\ntype FeatureExtractionPipeline = (\n text: string,\n options?: { pooling?: 'mean' | 'cls' | 'max'; normalize?: boolean }\n) => Promise<{ data: Float32Array }>;\n\n/**\n * Transformers.js module interface for dependency injection.\n */\ninterface TransformersModule {\n pipeline: (\n task: 'feature-extraction',\n model: string,\n options?: { quantized?: boolean }\n ) => Promise<FeatureExtractionPipeline>;\n env?: { cacheDir?: string };\n}\n\n/**\n * Create a local Transformers.js embedding provider.\n * Uses dependency injection - user provides the imported transformers module.\n *\n * @example\n * ```typescript\n * import * as transformers from '@huggingface/transformers';\n * import { createTransformersEmbedding } from 'scrapex/embeddings';\n *\n * const provider = createTransformersEmbedding(transformers, {\n * model: 'Xenova/all-MiniLM-L6-v2',\n * });\n * ```\n *\n * Required Node.js dependencies:\n * ```\n * npm install @huggingface/transformers onnxruntime-node\n * ```\n */\nexport function createTransformersEmbedding(\n transformers: TransformersModule,\n options?: {\n model?: string;\n quantized?: boolean;\n pooling?: 'mean' | 'cls' | 'max';\n normalize?: boolean;\n cacheDir?: string;\n }\n): EmbeddingProvider {\n let pipeline: FeatureExtractionPipeline | null = null;\n let currentModel: string | null = null;\n\n const config = {\n model: options?.model ?? 'Xenova/all-MiniLM-L6-v2',\n quantized: options?.quantized ?? true,\n pooling: options?.pooling ?? 'mean',\n normalize: options?.normalize ?? true,\n };\n\n return {\n name: 'transformers',\n async embed(texts: string[], request: EmbedRequest): Promise<EmbedResponse> {\n const model = request.model || config.model;\n\n // Lazy-load pipeline (only on first use or model change)\n if (!pipeline || currentModel !== model) {\n const cacheDir = options?.cacheDir;\n const env = transformers.env as { cacheDir?: string } | undefined;\n const priorCacheDir = env?.cacheDir;\n\n if (cacheDir && env) {\n env.cacheDir = cacheDir;\n }\n\n try {\n pipeline = await transformers.pipeline('feature-extraction', model, {\n quantized: config.quantized,\n });\n } finally {\n if (cacheDir && env) {\n if (priorCacheDir === undefined) {\n delete env.cacheDir;\n } else {\n env.cacheDir = priorCacheDir;\n }\n }\n }\n currentModel = model;\n }\n\n const embeddings: number[][] = [];\n for (const text of texts) {\n const output = await pipeline(text, {\n pooling: config.pooling,\n normalize: config.normalize,\n });\n embeddings.push(Array.from(output.data));\n }\n\n return { embeddings };\n },\n };\n}\n\n/** Recommended models for Transformers.js */\nexport const TRANSFORMERS_MODELS = {\n /** Default - Fast, general purpose (384 dimensions, ~23MB) */\n DEFAULT: 'Xenova/all-MiniLM-L6-v2',\n /** Higher quality, more resources (768 dimensions, ~110MB) */\n QUALITY: 'Xenova/all-mpnet-base-v2',\n /** Optimized for retrieval (384 dimensions, ~33MB) */\n RETRIEVAL: 'Xenova/bge-small-en-v1.5',\n /** Multi-language support (384 dimensions, ~118MB) */\n MULTILINGUAL: 'Xenova/multilingual-e5-small',\n} as const;\n","import { ScrapeError } from '../../core/errors.js';\nimport type { EmbeddingProvider, EmbeddingProviderConfig } from '../types.js';\nimport { createHttpEmbedding } from './http.js';\n\n// Re-export base utilities that are still used\nexport {\n getDefaultModel,\n getProviderCacheKey,\n validateEmbedResponse,\n} from './base.js';\nexport type { HttpEmbeddingConfig } from './http.js';\n// HTTP Provider (provider-agnostic)\nexport { createHttpEmbedding, HttpEmbeddingProvider } from './http.js';\n// Preset factory functions\nexport {\n createAzureEmbedding,\n createCohereEmbedding,\n createHuggingFaceEmbedding,\n createOllamaEmbedding,\n createOpenAIEmbedding,\n createTransformersEmbedding,\n TRANSFORMERS_MODELS,\n} from './presets.js';\n\n/**\n * Create an embedding provider from configuration.\n * This is the main factory function for creating providers.\n */\nexport function createEmbeddingProvider(config: EmbeddingProviderConfig): EmbeddingProvider {\n switch (config.type) {\n case 'http': {\n // Use static import - already imported at top of file\n return createHttpEmbedding(config.config);\n }\n\n case 'custom': {\n return config.provider;\n }\n\n default: {\n // Exhaustive check\n const _exhaustive: never = config;\n throw new ScrapeError(\n `Unknown embedding provider type: ${(_exhaustive as { type: string }).type}`,\n 'VALIDATION_ERROR'\n );\n }\n }\n}\n\n/**\n * Type guard to check if a value is an EmbeddingProvider.\n */\nexport function isEmbeddingProvider(value: unknown): value is EmbeddingProvider {\n return (\n typeof value === 'object' &&\n value !== null &&\n 'name' in value &&\n typeof (value as EmbeddingProvider).name === 'string' &&\n 'embed' in value &&\n typeof (value as EmbeddingProvider).embed === 'function'\n );\n}\n","import type { PiiRedactionConfig } from './types.js';\n\n/**\n * PII redaction patterns with high precision to minimize false positives.\n * Patterns are designed to match common formats while avoiding over-matching.\n */\n\n// Email: matches user@domain.tld format\nconst EMAIL_PATTERN = /\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b/g;\n\n// Phone: matches various formats including international\n// Covers: +1-234-567-8901, (234) 567-8901, 234.567.8901, 234-567-8901, etc.\nconst PHONE_PATTERN = /(?:\\+?1[-.\\s]?)?\\(?[0-9]{3}\\)?[-.\\s]?[0-9]{3}[-.\\s]?[0-9]{4}\\b/g;\n\n// Credit Card: matches major card formats (Visa, MC, Amex, Discover)\n// 13-19 digits with optional spaces or dashes\nconst CREDIT_CARD_PATTERN =\n /\\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12}|(?:[0-9]{4}[-\\s]){3}[0-9]{4}|[0-9]{13,19})\\b/g;\n\n// SSN: matches XXX-XX-XXXX format (US Social Security Number)\nconst SSN_PATTERN = /\\b[0-9]{3}-[0-9]{2}-[0-9]{4}\\b/g;\n\n// IP Address: matches both IPv4 and common IPv6 formats\nconst IPV4_PATTERN =\n /\\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b/g;\n\n// Redaction placeholder\nconst REDACTED = '[REDACTED]';\n\n/**\n * Create a redaction function based on configuration.\n * Returns a function that applies all configured PII patterns.\n */\nexport function createPiiRedactor(config: PiiRedactionConfig): (text: string) => RedactionResult {\n const patterns: Array<{ name: string; pattern: RegExp }> = [];\n\n // Process credit card BEFORE phone to prevent partial matches\n // (phone pattern can match 10-digit substrings of credit card numbers)\n if (config.creditCard) {\n patterns.push({ name: 'creditCard', pattern: CREDIT_CARD_PATTERN });\n }\n if (config.email) {\n patterns.push({ name: 'email', pattern: EMAIL_PATTERN });\n }\n if (config.phone) {\n patterns.push({ name: 'phone', pattern: PHONE_PATTERN });\n }\n if (config.ssn) {\n patterns.push({ name: 'ssn', pattern: SSN_PATTERN });\n }\n if (config.ipAddress) {\n patterns.push({ name: 'ipAddress', pattern: IPV4_PATTERN });\n }\n\n // Add custom patterns\n if (config.customPatterns) {\n for (let i = 0; i < config.customPatterns.length; i++) {\n const customPattern = config.customPatterns[i];\n if (customPattern) {\n patterns.push({\n name: `custom_${i}`,\n pattern: customPattern,\n });\n }\n }\n }\n\n return (text: string): RedactionResult => {\n let redactedText = text;\n let totalRedactions = 0;\n const redactionsByType: Record<string, number> = {};\n\n for (const { name, pattern } of patterns) {\n // Reset pattern state for global patterns\n pattern.lastIndex = 0;\n\n // Count matches before replacing\n const matches = text.match(pattern);\n const matchCount = matches?.length ?? 0;\n\n if (matchCount > 0) {\n redactedText = redactedText.replace(pattern, REDACTED);\n totalRedactions += matchCount;\n redactionsByType[name] = (redactionsByType[name] ?? 0) + matchCount;\n }\n }\n\n return {\n text: redactedText,\n redacted: totalRedactions > 0,\n redactionCount: totalRedactions,\n redactionsByType,\n };\n };\n}\n\n/**\n * Result of PII redaction operation.\n */\nexport interface RedactionResult {\n /** Redacted text */\n text: string;\n /** Whether any redactions were made */\n redacted: boolean;\n /** Total number of redactions */\n redactionCount: number;\n /** Count by redaction type */\n redactionsByType: Record<string, number>;\n}\n\n/**\n * Simple redaction that applies all default patterns.\n * Use createPiiRedactor() for fine-grained control.\n */\nexport function redactPii(text: string): RedactionResult {\n const redactor = createPiiRedactor({\n email: true,\n phone: true,\n creditCard: true,\n ssn: true,\n ipAddress: true,\n });\n return redactor(text);\n}\n\n/**\n * Check if text contains any PII.\n * Useful for validation before sending to external APIs.\n */\nexport function containsPii(text: string, config?: Partial<PiiRedactionConfig>): boolean {\n const fullConfig: PiiRedactionConfig = {\n email: config?.email ?? true,\n phone: config?.phone ?? true,\n creditCard: config?.creditCard ?? true,\n ssn: config?.ssn ?? true,\n ipAddress: config?.ipAddress ?? true,\n customPatterns: config?.customPatterns,\n };\n\n const patterns: RegExp[] = [];\n\n if (fullConfig.email) patterns.push(EMAIL_PATTERN);\n if (fullConfig.phone) patterns.push(PHONE_PATTERN);\n if (fullConfig.creditCard) patterns.push(CREDIT_CARD_PATTERN);\n if (fullConfig.ssn) patterns.push(SSN_PATTERN);\n if (fullConfig.ipAddress) patterns.push(IPV4_PATTERN);\n if (fullConfig.customPatterns) patterns.push(...fullConfig.customPatterns);\n\n for (const pattern of patterns) {\n pattern.lastIndex = 0;\n if (pattern.test(text)) {\n return true;\n }\n }\n\n return false;\n}\n","import type { ScrapedData } from '../core/types.js';\nimport { ScrapeError } from '../core/errors.js';\nimport { aggregateVectors, getDimensions } from './aggregation.js';\nimport { generateCacheKey, generateChecksum, getDefaultCache } from './cache.js';\nimport { chunkText, estimateTokens } from './chunking.js';\nimport { selectInput, validateInput } from './input.js';\nimport { getProviderCacheKey } from './providers/base.js';\nimport { createEmbeddingProvider } from './providers/index.js';\nimport { CircuitBreaker, RateLimiter, Semaphore, withResilience } from './resilience.js';\nimport { createPiiRedactor } from './safety.js';\nimport type {\n EmbeddingMetrics,\n EmbeddingOptions,\n EmbeddingProviderConfig,\n EmbeddingResult,\n EmbeddingSkipped,\n EmbeddingSource,\n EmbedResponse,\n} from './types.js';\n\nconst DEFAULT_CHUNK_SIZE = 500;\n\n/**\n * Get the effective model for embedding.\n * Prioritizes: explicit options.model > provider config model\n */\nfunction getEffectiveModel(\n providerConfig: EmbeddingProviderConfig,\n explicitModel?: string\n): string | undefined {\n // Explicit model from options always wins\n if (explicitModel) {\n return explicitModel;\n }\n\n // For HTTP providers, use the model from config\n if (providerConfig.type === 'http') {\n return providerConfig.config.model;\n }\n\n // For custom providers, return undefined to let provider fully control its default\n return undefined;\n}\n\n/**\n * Generate embeddings for scraped data.\n * This is the main entry point for the embedding pipeline.\n */\nexport async function generateEmbeddings(\n data: Partial<ScrapedData>,\n options: EmbeddingOptions\n): Promise<EmbeddingResult> {\n const startTime = Date.now();\n\n try {\n // Step 1: Create or get provider\n const provider = createEmbeddingProvider(options.provider);\n const model = getEffectiveModel(options.provider, options.model);\n\n // Step 2: Select input text\n const rawInput = selectInput(data, options.input);\n const validation = validateInput(rawInput, options.safety?.minTextLength ?? 10);\n\n if (!validation.valid) {\n return createSkippedResult(validation.reason, { model });\n }\n\n const originalInput = validation.text;\n let inputText = validation.text;\n\n // Step 3: Apply PII redaction if configured\n let piiRedacted = false;\n if (options.safety?.piiRedaction) {\n const redactor = createPiiRedactor(options.safety.piiRedaction);\n const redactionResult = redactor(inputText);\n inputText = redactionResult.text;\n piiRedacted = redactionResult.redacted;\n }\n\n // Step 4: Check cache\n const effectiveChunking = applyMaxTokensToChunking(options.chunking, options.safety?.maxTokens);\n\n const cacheKey = generateCacheKey({\n providerKey: getProviderCacheKey(options.provider),\n model,\n dimensions: options.output?.dimensions,\n aggregation: options.output?.aggregation,\n input: options.input,\n chunking: effectiveChunking,\n safety: options.safety,\n cacheKeySalt: options.cache?.cacheKeySalt,\n content: inputText,\n });\n\n const cache = options.cache?.store ?? getDefaultCache();\n const cachedResult = await cache.get(cacheKey);\n\n if (cachedResult && cachedResult.status === 'success') {\n // Emit metrics for cache hit\n if (options.onMetrics) {\n options.onMetrics({\n provider: provider.name,\n model,\n inputTokens: estimateTokens(inputText),\n outputDimensions: getDimensions(\n cachedResult.aggregation === 'all' ? cachedResult.vectors : cachedResult.vector\n ),\n chunks: cachedResult.source.chunks,\n latencyMs: Date.now() - startTime,\n cached: true,\n retries: 0,\n piiRedacted,\n });\n }\n\n return {\n ...cachedResult,\n source: { ...cachedResult.source, cached: true },\n };\n }\n\n // Step 5: Chunk text\n const chunks = chunkText(inputText, effectiveChunking);\n const callbackChunks =\n options.onChunk && options.safety?.allowSensitiveCallbacks\n ? chunkText(originalInput, effectiveChunking)\n : null;\n\n if (chunks.length === 0) {\n return createSkippedResult('No content after chunking', { model });\n }\n\n // Step 6: Apply rate limiting and resilience\n const sharedState = options.resilience?.state;\n const rateLimiter =\n sharedState?.rateLimiter ??\n (options.resilience?.rateLimit ? new RateLimiter(options.resilience.rateLimit) : null);\n\n const circuitBreaker =\n sharedState?.circuitBreaker ??\n (options.resilience?.circuitBreaker\n ? new CircuitBreaker(options.resilience.circuitBreaker)\n : null);\n\n const concurrency = options.resilience?.concurrency ?? 1;\n const semaphore = sharedState?.semaphore ?? new Semaphore(concurrency);\n\n // Step 7: Embed chunks\n const embeddings: number[][] = [];\n let totalTokens = 0;\n let retryCount = 0;\n\n for (let i = 0; i < chunks.length; i++) {\n const chunk = chunks[i];\n if (!chunk) continue;\n\n // Wait for rate limiter\n if (rateLimiter) {\n await rateLimiter.acquire();\n }\n\n // Check circuit breaker\n if (circuitBreaker?.isOpen()) {\n return createSkippedResult('Circuit breaker is open', {\n model,\n chunks: i,\n });\n }\n\n // Process chunk with semaphore for concurrency control\n await semaphore.execute(async () => {\n const { result } = await withResilience<EmbedResponse>(\n async (signal) => {\n return provider.embed([chunk.text], {\n model,\n dimensions: options.output?.dimensions,\n signal,\n });\n },\n options.resilience,\n { circuitBreaker: circuitBreaker ?? undefined, rateLimiter: undefined, semaphore: undefined },\n {\n onRetry: () => {\n retryCount++;\n },\n }\n );\n\n // Track tokens\n if (result.usage) {\n totalTokens += result.usage.totalTokens;\n } else {\n totalTokens += chunk.tokens;\n }\n\n // Store embedding\n const embedding = result.embeddings[0];\n if (embedding) {\n embeddings.push(embedding);\n\n // Callback for progress tracking\n if (options.onChunk) {\n const callbackText = callbackChunks?.[i]?.text ?? chunk.text;\n options.onChunk(callbackText, embedding);\n }\n }\n });\n }\n\n // Step 8: Aggregate embeddings\n const aggregation = options.output?.aggregation ?? 'average';\n const aggregated = aggregateVectors(embeddings, aggregation);\n\n // Step 9: Build source metadata\n const source: EmbeddingSource = {\n model,\n chunks: chunks.length,\n tokens: totalTokens || estimateTokens(inputText),\n checksum: generateChecksum(inputText),\n cached: false,\n latencyMs: Date.now() - startTime,\n };\n\n // Step 10: Build result\n let result: EmbeddingResult;\n\n if (aggregated.type === 'single') {\n result = {\n status: 'success',\n aggregation: aggregation as 'average' | 'max' | 'first',\n vector: aggregated.vector,\n source,\n };\n } else {\n result = {\n status: 'success',\n aggregation: 'all',\n vectors: aggregated.vectors,\n source,\n };\n }\n\n // Step 11: Cache result\n await cache.set(cacheKey, result, {\n ttlMs: options.cache?.ttlMs,\n });\n\n // Step 12: Emit metrics\n if (options.onMetrics) {\n const metrics: EmbeddingMetrics = {\n provider: provider.name,\n model,\n inputTokens: source.tokens,\n outputDimensions: aggregated.dimensions,\n chunks: chunks.length,\n latencyMs: source.latencyMs,\n cached: false,\n retries: retryCount,\n piiRedacted,\n };\n options.onMetrics(metrics);\n }\n\n return result;\n } catch (error) {\n const reason = error instanceof Error ? error.message : String(error);\n if (error instanceof ScrapeError && ['INVALID_URL', 'BLOCKED'].includes(error.code)) {\n throw error;\n }\n return createSkippedResult(reason, {\n latencyMs: Date.now() - startTime,\n });\n }\n}\n\nfunction applyMaxTokensToChunking(\n chunking: EmbeddingOptions['chunking'],\n maxTokens?: number\n): EmbeddingOptions['chunking'] {\n if (!maxTokens || maxTokens <= 0) {\n return chunking;\n }\n\n const baseSize = chunking?.size ?? DEFAULT_CHUNK_SIZE;\n const baseOverlap = chunking?.overlap ?? 50;\n const clampedSize = Math.min(baseSize, maxTokens);\n\n // Clamp overlap to be less than the new size to prevent negative effective chunk sizes\n // Ensure at least 1 token of effective chunk size (size - overlap >= 1)\n const clampedOverlap = Math.min(baseOverlap, Math.max(0, clampedSize - 1));\n\n return {\n ...chunking,\n size: clampedSize,\n overlap: clampedOverlap,\n };\n}\n\n/**\n * Embed arbitrary text directly.\n * Standalone function for embedding text outside of scrape().\n */\nexport async function embed(text: string, options: EmbeddingOptions): Promise<EmbeddingResult> {\n // Create a minimal ScrapedData-like object\n const data: Partial<ScrapedData> = {\n textContent: text,\n };\n\n // Force textContent input type\n const optionsWithInput: EmbeddingOptions = {\n ...options,\n input: {\n ...options.input,\n type: 'textContent',\n },\n };\n\n return generateEmbeddings(data, optionsWithInput);\n}\n\n/**\n * Embed from existing ScrapedData.\n * Useful when you've already scraped and want to add embeddings later.\n */\nexport async function embedScrapedData(\n data: ScrapedData,\n options: EmbeddingOptions\n): Promise<EmbeddingResult> {\n return generateEmbeddings(data, options);\n}\n\n/**\n * Create a skipped result with reason.\n */\nfunction createSkippedResult(\n reason: string,\n partialSource?: Partial<EmbeddingSource>\n): EmbeddingSkipped {\n return {\n status: 'skipped',\n reason,\n source: partialSource ?? {},\n };\n}\n"],"mappings":";;;;;;;;;;;;AASA,SAAgB,iBACd,SACA,WAAiC,WACd;AACnB,KAAI,QAAQ,WAAW,EACrB,OAAM,IAAI,MAAM,sCAAsC;CAIxD,MAAM,cAAc,QAAQ;AAC5B,KAAI,CAAC,YACH,OAAM,IAAI,MAAM,sCAAsC;CAGxD,MAAM,aAAa,YAAY;AAC/B,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACvC,MAAM,MAAM,QAAQ;AACpB,MAAI,CAAC,OAAO,IAAI,WAAW,WACzB,OAAM,IAAI,MACR,uCAAuC,WAAW,QAAQ,KAAK,UAAU,EAAE,YAAY,IACxF;;AAIL,SAAQ,UAAR;EACE,KAAK,UACH,QAAO;GACL,MAAM;GACN,QAAQ,eAAe,QAAQ;GAC/B;GACD;EAEH,KAAK,MACH,QAAO;GACL,MAAM;GACN,QAAQ,eAAe,QAAQ;GAC/B;GACD;EAEH,KAAK,QACH,QAAO;GACL,MAAM;GACN,QAAQ;GACR;GACD;EAEH,KAAK,MACH,QAAO;GACL,MAAM;GACN;GACA;GACD;EAEH,SAAS;GAEP,MAAMA,cAAqB;AAC3B,SAAM,IAAI,MAAM,iCAAiC,cAAc;;;;;;;AAerE,SAAS,eAAe,SAA+B;CACrD,MAAM,QAAQ,QAAQ;AACtB,KAAI,CAAC,SAAS,QAAQ,WAAW,EAC/B,QAAO,SAAS,EAAE;CAGpB,MAAM,aAAa,MAAM;CACzB,MAAM,QAAQ,QAAQ;CACtB,MAAMC,SAAmB,IAAI,MAAc,WAAW,CAAC,KAAK,EAAE;AAE9D,MAAK,MAAM,UAAU,QACnB,MAAK,IAAI,IAAI,GAAG,IAAI,YAAY,KAAK;EACnC,MAAM,MAAM,OAAO;AACnB,MAAI,QAAQ,OACV,QAAO,KAAK,OAAO,OAAO,MAAM;;AAKtC,MAAK,IAAI,IAAI,GAAG,IAAI,YAAY,KAAK;EACnC,MAAM,MAAM,OAAO;AACnB,MAAI,QAAQ,OACV,QAAO,KAAK,MAAM;;AAItB,QAAO;;;;;AAMT,SAAS,eAAe,SAA+B;CACrD,MAAM,QAAQ,QAAQ;AACtB,KAAI,CAAC,SAAS,QAAQ,WAAW,EAC/B,QAAO,SAAS,EAAE;CAGpB,MAAM,aAAa,MAAM;CACzB,MAAM,SAAS,CAAC,GAAG,MAAM;AAEzB,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACvC,MAAM,MAAM,QAAQ;AACpB,MAAI,CAAC,IAAK;AACV,OAAK,IAAI,IAAI,GAAG,IAAI,YAAY,KAAK;GACnC,MAAM,MAAM,IAAI,MAAM;AAEtB,OAAI,OADS,OAAO,MAAM,GAExB,QAAO,KAAK;;;AAKlB,QAAO;;;;;AAMT,SAAgB,gBAAgB,QAA4B;CAC1D,MAAM,YAAY,KAAK,KAAK,OAAO,QAAQ,KAAK,QAAQ,MAAM,MAAM,KAAK,EAAE,CAAC;AAE5E,KAAI,cAAc,EAChB,QAAO;AAGT,QAAO,OAAO,KAAK,QAAQ,MAAM,UAAU;;;;;;AAO7C,SAAgB,iBAAiB,GAAa,GAAqB;AACjE,KAAI,EAAE,WAAW,EAAE,OACjB,OAAM,IAAI,MAAM,8BAA8B,EAAE,OAAO,MAAM,EAAE,SAAS;CAG1E,IAAI,MAAM;CACV,IAAI,aAAa;CACjB,IAAI,aAAa;AAEjB,MAAK,IAAI,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;EACjC,MAAM,OAAO,EAAE,MAAM;EACrB,MAAM,OAAO,EAAE,MAAM;AACrB,SAAO,OAAO;AACd,gBAAc,OAAO;AACrB,gBAAc,OAAO;;CAGvB,MAAM,YAAY,KAAK,KAAK,WAAW,GAAG,KAAK,KAAK,WAAW;AAE/D,KAAI,cAAc,EAChB,QAAO;AAGT,QAAO,MAAM;;;;;AAMf,SAAgB,kBAAkB,GAAa,GAAqB;AAClE,KAAI,EAAE,WAAW,EAAE,OACjB,OAAM,IAAI,MAAM,8BAA8B,EAAE,OAAO,MAAM,EAAE,SAAS;CAG1E,IAAI,MAAM;AACV,MAAK,IAAI,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;EAGjC,MAAM,QAFO,EAAE,MAAM,MACR,EAAE,MAAM;AAErB,SAAO,OAAO;;AAGhB,QAAO,KAAK,KAAK,IAAI;;;;;AAMvB,SAAgB,WAAW,GAAa,GAAqB;AAC3D,KAAI,EAAE,WAAW,EAAE,OACjB,OAAM,IAAI,MAAM,8BAA8B,EAAE,OAAO,MAAM,EAAE,SAAS;CAG1E,IAAI,SAAS;AACb,MAAK,IAAI,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;EACjC,MAAM,OAAO,EAAE,MAAM;EACrB,MAAM,OAAO,EAAE,MAAM;AACrB,YAAU,OAAO;;AAGnB,QAAO;;;;;AAMT,SAAgB,cAAc,SAAwC;AACpE,KAAI,QAAQ,WAAW,EACrB,QAAO;CAGT,MAAM,QAAQ,QAAQ;AAGtB,KAAI,OAAO,UAAU,SACnB,QAAO,QAAQ;AAGjB,QAAO,OAAO,UAAU;;;;;;;;ACzN1B,MAAM,sBAAsB;;;;AAK5B,MAAM,iBAAiB,OAAU;;;;;;;AAqBjC,SAAgB,iBAAiB,QAAgC;CAC/D,MAAM,mCAAkB,SAAS;CAEjC,MAAM,cAAc,gBAAgB;EAClC,aAAa,OAAO;EACpB,OAAO,OAAO,SAAS;EACvB,YAAY,OAAO,cAAc;EACjC,aAAa,OAAO,eAAe;EACnC,OAAO,qBAAqB,OAAO,MAAM;EACzC,UAAU,wBAAwB,OAAO,SAAS;EAClD,QAAQ,sBAAsB,OAAO,OAAO;EAC5C,cAAc,OAAO;EACtB,CAAC;AAEF,MAAK,OAAO,YAAY;AACxB,MAAK,OAAO,KAAK;AACjB,MAAK,OAAO,OAAO,QAAQ;AAE3B,QAAO,KAAK,OAAO,MAAM;;;;;AAM3B,SAAgB,iBAAiB,SAAyB;AACxD,oCAAkB,SAAS,CAAC,OAAO,QAAQ,CAAC,OAAO,MAAM,CAAC,MAAM,GAAG,GAAG;;AAGxE,SAAS,qBAAqB,QAAoE;AAChG,KAAI,CAAC,OAAQ,QAAO;AAEpB,QAAO,gBAAgB;EACrB,MAAM,OAAO,QAAQ;EACrB,cAAc,QAAQ,OAAO,UAAU;EACvC,eAAe,QAAQ,OAAO,WAAW;EAC1C,CAAC;;AAGJ,SAAS,wBAAwB,QAA8D;AAC7F,KAAI,CAAC,OAAQ,QAAO;AAEpB,QAAO,gBAAgB;EACrB,MAAM,OAAO;EACb,SAAS,OAAO;EAChB,WAAW,eAAe,OAAO,UAAU;EAC3C,gBAAgB,OAAO;EACxB,CAAC;;AAGJ,SAAS,sBAAsB,QAA4D;AACzF,KAAI,CAAC,OAAQ,QAAO;AAEpB,QAAO,gBAAgB;EACrB,cAAc,mBAAmB,OAAO,aAAa;EACrD,eAAe,OAAO;EACtB,WAAW,OAAO;EACnB,CAAC;;AAGJ,SAAS,mBAAmB,QAAkE;AAC5F,KAAI,CAAC,OAAQ,QAAO;AAEpB,QAAO,gBAAgB;EACrB,OAAO,OAAO,SAAS;EACvB,OAAO,OAAO,SAAS;EACvB,YAAY,OAAO,cAAc;EACjC,KAAK,OAAO,OAAO;EACnB,WAAW,OAAO,aAAa;EAC/B,gBAAgB,OAAO,gBAAgB,KAAK,YAAY,GAAG,QAAQ,OAAO,GAAG,QAAQ,QAAQ;EAC9F,CAAC;;AAGJ,SAAS,eAAe,WAAgD;AACtE,KAAI,CAAC,aAAa,cAAc,YAC9B,QAAO;AAGT,KAAI,cAAc,WAChB,QAAO;AAGT,QAAO;;AAGT,SAAS,gBAAgB,OAAwB;AAE/C,QAAO,oBADY,eAAe,MAAM,CACF;;AAGxC,SAAS,eAAe,OAAyB;AAC/C,KAAI,UAAU,OAAW,QAAO;AAChC,KAAI,UAAU,KAAM,QAAO;AAE3B,KAAI,MAAM,QAAQ,MAAM,CAItB,QAHmB,MAChB,KAAK,UAAU,eAAe,MAAM,CAAC,CACrC,QAAQ,UAAU,UAAU,OAAU;AAI3C,KAAI,OAAO,UAAU,SACnB,QAAO,gBAAgB,MAAiC;AAG1D,QAAO;;AAGT,SAAS,gBAAgB,OAAyD;CAChF,MAAMC,aAAsC,EAAE;AAE9C,MAAK,MAAM,OAAO,OAAO,KAAK,MAAM,CAAC,MAAM,EAAE;EAC3C,MAAM,QAAQ,eAAe,MAAM,KAAK;AACxC,MAAI,UAAU,OACZ,YAAW,OAAO;;AAItB,QAAO;;AAGT,SAAS,oBAAoB,OAAwB;AACnD,KAAI,UAAU,OAAW,QAAO;AAChC,KAAI,UAAU,KAAM,QAAO;AAE3B,KAAI,OAAO,UAAU,SACnB,QAAO,KAAK,UAAU,MAAM;AAG9B,KAAI,OAAO,UAAU,YAAY,OAAO,UAAU,UAChD,QAAO,OAAO,MAAM;AAGtB,KAAI,MAAM,QAAQ,MAAM,CACtB,QAAO,IAAI,MAAM,KAAK,UAAU,oBAAoB,MAAM,CAAC,CAAC,KAAK,IAAI,CAAC;AAGxE,KAAI,OAAO,UAAU,UAAU;EAC7B,MAAM,MAAM;AAIZ,SAAO,IAHS,OAAO,KAAK,IAAI,CAC7B,MAAM,CACN,KAAK,QAAQ,GAAG,KAAK,UAAU,IAAI,CAAC,GAAG,oBAAoB,IAAI,KAAK,GAAG,CACvD,KAAK,IAAI,CAAC;;AAG/B,QAAO,KAAK,UAAU,MAAM;;;;;;AAiB9B,IAAa,yBAAb,MAA8D;CAC5D,AAAQ;CACR,AAAiB;CACjB,AAAiB;CAEjB,YAAY,SAAmD;AAC7D,OAAK,wBAAQ,IAAI,KAAK;AACtB,OAAK,aAAa,SAAS,cAAc;AACzC,OAAK,eAAe,SAAS,SAAS;;CAGxC,MAAM,IAAI,KAAmD;EAC3D,MAAM,QAAQ,KAAK,MAAM,IAAI,IAAI;AAEjC,MAAI,CAAC,MACH;EAGF,MAAM,MAAM,KAAK,KAAK;AAGtB,MAAI,MAAM,MAAM,WAAW;AACzB,QAAK,MAAM,OAAO,IAAI;AACtB;;AAIF,QAAM,aAAa;AAEnB,SAAO,MAAM;;CAGf,MAAM,IAAI,KAAa,OAAwB,SAA6C;EAC1F,MAAM,MAAM,KAAK,KAAK;EACtB,MAAM,MAAM,SAAS,SAAS,KAAK;AAGnC,MAAI,KAAK,MAAM,QAAQ,KAAK,cAAc,CAAC,KAAK,MAAM,IAAI,IAAI,CAC5D,MAAK,UAAU;AAGjB,OAAK,MAAM,IAAI,KAAK;GAClB;GACA,WAAW;GACX,WAAW,MAAM;GACjB,YAAY;GACb,CAAC;;CAGJ,MAAM,OAAO,KAA+B;AAC1C,SAAO,KAAK,MAAM,OAAO,IAAI;;CAG/B,MAAM,QAAuB;AAC3B,OAAK,MAAM,OAAO;;;;;CAMpB,WAAuB;EACrB,MAAM,MAAM,KAAK,KAAK;EACtB,IAAI,UAAU;AAEd,OAAK,MAAM,SAAS,KAAK,MAAM,QAAQ,CACrC,KAAI,MAAM,MAAM,UACd;AAIJ,SAAO;GACL,MAAM,KAAK,MAAM;GACjB,YAAY,KAAK;GACjB;GACA,aAAa,KAAK,MAAM,OAAO,KAAK;GACrC;;;;;CAMH,UAAkB;EAChB,MAAM,MAAM,KAAK,KAAK;EACtB,IAAI,UAAU;AAEd,OAAK,MAAM,CAAC,KAAK,UAAU,KAAK,MAAM,SAAS,CAC7C,KAAI,MAAM,MAAM,WAAW;AACzB,QAAK,MAAM,OAAO,IAAI;AACtB;;AAIJ,SAAO;;;;;CAMT,AAAQ,WAAiB;EACvB,IAAIC,YAA2B;EAC/B,IAAI,eAAe,OAAO;AAE1B,OAAK,MAAM,CAAC,KAAK,UAAU,KAAK,MAAM,SAAS,CAC7C,KAAI,MAAM,aAAa,cAAc;AACnC,kBAAe,MAAM;AACrB,eAAY;;AAIhB,MAAI,UACF,MAAK,MAAM,OAAO,UAAU;;;;;;AAsBlC,SAAgB,qBACd,QACA,oBACS;AACT,KAAI,OAAO,WAAW,UACpB,QAAO;AAGT,KAAI,CAAC,mBACH,QAAO;AAGT,KAAI,OAAO,gBAAgB,OAAO;EAChC,MAAM,WAAW,OAAO,QAAQ;AAChC,MAAI,CAAC,SACH,QAAO;AAET,SAAO,SAAS,WAAW;;AAG7B,QAAO,OAAO,OAAO,WAAW;;;;;;AAOlC,SAAgB,kBAAkC;AAChD,QAAO;EACL,MAAM,MAA0B;EAGhC,MAAM,MAAqB;EAG3B,MAAM,SAA2B;AAC/B,UAAO;;EAET,MAAM,QAAuB;EAG9B;;;;;;AAOH,IAAIC,eAA8C;;;;AAKlD,SAAgB,kBAA0C;AACxD,KAAI,CAAC,aACH,gBAAe,IAAI,wBAAwB;AAE7C,QAAO;;;;;AAMT,eAAsB,oBAAmC;AACvD,KAAI,aACF,OAAM,aAAa,OAAO;AAE5B,gBAAe;;;;;;;;AC3YjB,MAAMC,uBAAqB;;;;AAK3B,MAAM,kBAAkB;;;;AAKxB,MAAM,2BAA2B;;;;;AAMjC,SAAgB,oBAAoB,MAAsB;AACxD,QAAO,KAAK,KAAK,KAAK,SAAS,EAAE;;;;;AAMnC,SAAS,cAAc,QAAwB;AAC7C,QAAO,SAAS;;;;;AAMlB,SAAgB,gBAAgB,QAAgE;AAC9F,KAAI,CAAC,UAAU,WAAW,YACxB,QAAO;AAGT,KAAI,WAAW,WAab,QAAO;AAIT,QAAO;;;;;;AAOT,SAAS,eAAe,MAAc,aAA6B;CAEjE,MAAM,cAAc,KAAK,IAAI,GAAG,cAAc,KAAK,MAAM,cAAc,GAAI,CAAC;CAC5E,MAAM,YAAY,KAAK,IAAI,KAAK,QAAQ,cAAc,KAAK,MAAM,cAAc,GAAI,CAAC;CACpF,MAAM,aAAa,KAAK,MAAM,aAAa,UAAU;CAGrD,MAAM,gBAAgB;CACtB,IAAI,kBAAkB;AAEtB,MAAK,MAAM,SAAS,WAAW,SAAS,cAAc,EAAE;EACtD,MAAM,cAAc,cAAc,MAAM,QAAQ,MAAM,GAAG;AACzD,MAAI,eAAe,YACjB,mBAAkB;;AAItB,KAAI,oBAAoB,GACtB,QAAO;CAIT,MAAM,eAAe,KAAK,YAAY,KAAK,YAAY;AACvD,KAAI,eAAe,YACjB,QAAO,eAAe;AAIxB,QAAO;;;;;;AAOT,SAAgB,UAAU,MAAc,QAAsC;CAC5E,MAAM,YAAY,QAAQ,QAAQA;CAClC,MAAM,aAAa,QAAQ,WAAW;CACtC,MAAM,cAAc,KAAK,IAAI,GAAG,WAAW;CAC3C,MAAM,UAAU,KAAK,IAAI,aAAa,KAAK,IAAI,GAAG,YAAY,EAAE,CAAC;CACjE,MAAM,iBAAiB,QAAQ,kBAAkB;CACjD,MAAM,YAAY,gBAAgB,QAAQ,UAAU;CAMpD,MAAM,kBAHgB,KAAK,SAAS,iBAAiB,KAAK,MAAM,GAAG,eAAe,GAAG,MAGhD,QAAQ,QAAQ,IAAI,CAAC,MAAM;AAEhE,KAAI,CAAC,eACH,QAAO,EAAE;CAGX,MAAM,cAAc,UAAU,eAAe;AAG7C,KAAI,eAAe,UACjB,QAAO,CACL;EACE,MAAM;EACN,YAAY;EACZ,UAAU,eAAe;EACzB,QAAQ;EACT,CACF;CAGH,MAAMC,SAAsB,EAAE;CAC9B,MAAM,iBAAiB,cAAc,UAAU;CAC/C,MAAM,eAAe,cAAc,QAAQ;CAE3C,IAAI,aAAa;AAEjB,QAAO,aAAa,eAAe,QAAQ;EAEzC,MAAM,YAAY,KAAK,IAAI,aAAa,gBAAgB,eAAe,OAAO;EAG9E,MAAM,WACJ,YAAY,eAAe,SAAS,eAAe,gBAAgB,UAAU,GAAG;EAElF,MAAMC,cAAY,eAAe,MAAM,YAAY,SAAS,CAAC,MAAM;AAEnE,MAAIA,YACF,QAAO,KAAK;GACV,MAAMA;GACN;GACA;GACA,QAAQ,UAAUA,YAAU;GAC7B,CAAC;AAIJ,MAAI,YAAY,eAAe,OAC7B;EAIF,MAAM,YAAY,WAAW;AAC7B,eAAa,KAAK,IAAI,WAAW,aAAa,EAAE;AAGhD,MAAI,aAAa,eAAe,QAAQ;GACtC,MAAM,aAAa,eAAe,QAAQ,KAAK,WAAW;AAC1D,OAAI,eAAe,MAAM,aAAa,aAAa,aACjD,cAAa,aAAa;;;AAKhC,QAAO;;;;;AAMT,SAAgB,eAAe,MAAc,WAAiD;AAE5F,QADc,gBAAgB,UAAU,CAC3B,KAAK;;;;;AAMpB,SAAgB,cACd,MACA,YAAYF,sBACZ,WACS;AAET,QADc,gBAAgB,UAAU,CAC3B,KAAK,GAAG;;;;;AAMvB,SAAgB,iBACd,MACA,QAMA;CACA,MAAM,iBAAiB,QAAQ,kBAAkB;CACjD,MAAM,YAAY,QAAQ,QAAQA;CAClC,MAAM,UAAU,QAAQ,WAAW;CACnC,MAAM,YAAY,gBAAgB,QAAQ,UAAU;CAEpD,MAAM,cAAc,KAAK;CACzB,MAAM,eAAe,cAAc;CACnC,MAAM,kBAAkB,eAAe,iBAAiB;CAIxD,MAAM,kBAAkB,UADL,KAAK,MAAM,GAAG,gBAAgB,CAAC,QAAQ,QAAQ,IAAI,CAAC,MAAM,CAChC;CAG7C,IAAI,kBAAkB;AACtB,KAAI,kBAAkB,WAAW;EAC/B,MAAM,iBAAiB,KAAK,IAAI,SAAS,KAAK,IAAI,GAAG,YAAY,EAAE,CAAC;EACpE,MAAM,qBAAqB,KAAK,IAAI,GAAG,YAAY,eAAe;AAClE,oBAAkB,KAAK,MAAM,kBAAkB,kBAAkB,mBAAmB;;AAGtF,QAAO;EACL;EACA;EACA;EACA;EACD;;;;;;;;;;;;ACjOH,SAAgB,YACd,MACA,QACoB;AAEpB,KAAI,QAAQ,UAEV,QAAO,cADa,OAAO,UAAU,KAAK,CACT;AAInC,KAAI,QAAQ,SAAS,YAAY,OAAO,WACtC,QAAO,cAAc,OAAO,WAAW;CAIzC,MAAM,OAAO,QAAQ,QAAQ;AAE7B,SAAQ,MAAR;EACE,KAAK,cACH,QAAO,kBAAkB,KAAK;EAEhC,KAAK,gBACH,QAAO,mBAAmB,KAAK;EAEjC,KAAK,SAEH,QAAO,kBAAkB,KAAK;EAEhC,SAAS;GAEP,MAAMG,cAAqB;AAC3B,SAAM,IAAI,MAAM,uBAAuB,cAAc;;;;;;;AAQ3D,SAAS,kBAAkB,MAAgD;AACzE,KAAI,KAAK,YACP,QAAO,cAAc,KAAK,YAAY;AAIxC,KAAI,KAAK,QACP,QAAO,cAAc,cAAc,KAAK,QAAQ,CAAC;AAGnD,KAAI,KAAK,QACP,QAAO,cAAc,KAAK,QAAQ;AAGpC,KAAI,KAAK,YACP,QAAO,cAAc,KAAK,YAAY;;;;;;AAU1C,SAAS,mBAAmB,MAAgD;CAC1E,MAAMC,QAAkB,EAAE;AAG1B,KAAI,KAAK,MACP,OAAM,KAAK,KAAK,MAAM;AAIxB,KAAI,KAAK,QACP,OAAM,KAAK,KAAK,QAAQ;UACf,KAAK,QACd,OAAM,KAAK,KAAK,QAAQ;UACf,KAAK,YACd,OAAM,KAAK,KAAK,YAAY;AAG9B,KAAI,MAAM,WAAW,EACnB;AAGF,QAAO,cAAc,MAAM,KAAK,OAAO,CAAC;;;;;;;;AAS1C,SAAS,cAAc,MAAsB;AAC3C,KAAI,CAAC,KACH,QAAO;AAKT,QACE,KAEG,QAJoB,qCAIM,GAAG,CAE7B,QAAQ,WAAW,IAAI,CAEvB,QAAQ,WAAW,OAAO,CAE1B,MAAM,KAAK,CACX,KAAK,SAAS,KAAK,MAAM,CAAC,CAC1B,KAAK,KAAK,CAEV,MAAM;;;;;;AAQb,SAAS,cAAc,UAA0B;AAC/C,QACE,SAEG,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,YAAY,GAAG,CAEvB,QAAQ,0BAA0B,KAAK,CAEvC,QAAQ,2BAA2B,KAAK,CAExC,QAAQ,gBAAgB,GAAG,CAE3B,QAAQ,oBAAoB,KAAK,CACjC,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,cAAc,KAAK,CAE3B,QAAQ,WAAW,GAAG,CAEtB,QAAQ,iBAAiB,GAAG,CAE5B,QAAQ,oBAAoB,GAAG,CAC/B,QAAQ,oBAAoB,GAAG;;;;;AAOtC,SAAgB,cAAc,MAA0B,YAAY,IAAqB;AACvF,KAAI,CAAC,KACH,QAAO;EACL,OAAO;EACP,QAAQ;EACT;AAGH,KAAI,KAAK,SAAS,UAChB,QAAO;EACL,OAAO;EACP,QAAQ,oBAAoB,KAAK,OAAO,KAAK,UAAU;EACxD;CAIH,MAAM,YAAY,KAAK,MAAM,MAAM,CAAC,QAAQ,MAAM,EAAE,SAAS,EAAE,CAAC;AAChE,KAAI,YAAY,EACd,QAAO;EACL,OAAO;EACP,QAAQ,4BAA4B,UAAU;EAC/C;AAGH,QAAO;EACL,OAAO;EACP;EACA;EACA,WAAW,KAAK;EACjB;;;;;;AAcH,SAAgB,aACd,MACA,QACA,YAAY,KACJ;CACR,MAAM,QAAQ,YAAY,MAAM,OAAO;AAEvC,KAAI,CAAC,MACH,QAAO;AAGT,KAAI,MAAM,UAAU,UAClB,QAAO;AAGT,QAAO,GAAG,MAAM,MAAM,GAAG,UAAU,CAAC;;;;;;;;AClNtC,SAAgB,oBAAoB,QAAyC;AAC3E,SAAQ,OAAO,MAAf;EACE,KAAK,OAEH,QAAO,QADS,OAAO,OAAO,QAAQ,QAAQ,OAAO,GAAG,CACjC,GAAG,OAAO,OAAO;EAE1C,KAAK,SACH,QAAO,UAAU,OAAO,SAAS;EACnC,SAAS;GACP,MAAMC,cAAqB;AAC3B,UAAO,OAAO,YAAY;;;;;;;AA0GhC,SAAgB,gBAAgB,cAA8B;AAC5D,SAAQ,cAAR;EACE,KAAK,SACH,QAAO;EACT,KAAK,QACH,QAAO;EACT,KAAK,eACH,QAAO;EACT,QACE,QAAO;;;;;;;;;;;;;;AClGb,IAAa,wBAAb,cACUC,mCAEV;CACE,AAAS,OAAO;CAEhB,AAAiB;CACjB,AAAiB;CAEjB,YAAY,QAA0D;AACpE,QAAM,OAAO;AAGb,OAAK,iBACH,OAAO,oBACL,OAAiB,WAChB;GACC,OAAO;GACP;GACD;AAGL,OAAK,iBACH,OAAO,oBACL,aAAwB;GACxB,MAAM,OAAO;AAGb,OAAI,MAAM,QAAQ,KAAK,KAAK,CAC1B,QAAO,KAAK,KAAK,KAAK,SAAkC,KAAK,UAAU;AAIzE,OAAI,MAAM,QAAQ,KAAK,WAAW,CAChC,QAAO,KAAK;AAId,OAAI,MAAM,QAAQ,KAAK,UAAU,CAC/B,QAAO,CAAC,KAAK,UAAsB;AAIrC,OAAI,MAAM,QAAQ,SAAS,CACzB,QAAO;AAGT,SAAM,IAAIC,8BACR,wEACA,mBACD;;;;;;CAOP,MAAM,MAAM,OAAiB,SAA+C;EAC1E,MAAM,QAAQ,QAAQ,SAAS,KAAK;EACpC,MAAM,OAAO,KAAK,eAAe,OAAO,MAAM;EAE9C,MAAM,EAAE,SAAS,MAAM,KAAK,MAAiB,KAAK,SAAS;GACzD;GACA,QAAQ,QAAQ;GACjB,CAAC;EAEF,MAAM,aAAa,KAAK,eAAe,KAAK;AAG5C,MAAI,WAAW,WAAW,MAAM,OAC9B,OAAM,IAAIA,8BACR,sCAAsC,MAAM,OAAO,QAAQ,WAAW,UACtE,mBACD;AAGH,SAAO,EAAE,YAAY;;;;;;AAOzB,SAAgB,oBACd,QACmB;AACnB,QAAO,IAAI,sBAAsB,OAAO;;;;;;;;;;;;;;AC5F1C,SAAgB,sBAAsB,SAKhB;CACpB,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,6EAA6E;CAG/F,MAAMC,UAAkC,EACtC,eAAe,UAAU,UAC1B;AACD,KAAI,SAAS,aACX,SAAQ,yBAAyB,QAAQ;AAG3C,QAAO,IAAI,sBAAwD;EACjE,SAAS,SAAS,WAAW;EAC7B,OAAO,SAAS,SAAS;EACzB;EACA,iBAAiB,OAAO,WAAW;GAAE,OAAO;GAAO;GAAO;EAC1D,iBAAiB,QAAQ,IAAI,KAAK,KAAK,SAAS,KAAK,UAAU;EAChE,CAAC;;;;;;;;;;;;;;AAmBJ,SAAgB,qBAAqB,SAKf;CACpB,MAAM,SAAS,QAAQ,UAAU,QAAQ,IAAI;AAC7C,KAAI,CAAC,OACH,OAAM,IAAI,MACR,yFACD;AAKH,QAAO,IAAI,sBAAwD;EACjE,SAHc,GAAG,QAAQ,SAAS,QAAQ,OAAO,GAAG,CAAC,sBAAsB,QAAQ,eAAe,0BAA0B,QAAQ;EAIpI,OAAO,QAAQ;EACf,SAAS,EAAE,WAAW,QAAQ;EAC9B,iBAAiB,WAAW,EAAE,OAAO,OAAO;EAC5C,iBAAiB,QAAQ,IAAI,KAAK,KAAK,SAAS,KAAK,UAAU;EAChE,CAAC;;;;;;;;;;;;;;;;;AA6BJ,SAAgB,sBAAsB,SAGhB;AACpB,QAAO,IAAI,sBAAwD;EACjE,SAAS,SAAS,WAAW;EAC7B,OAAO,SAAS,SAAS;EACzB,cAAc;EACd,cAAc;EACd,iBAAiB,OAAO,WAAW;GAAE;GAAO,QAAQ,MAAM;GAAI;EAC9D,iBAAiB,QAAQ,CAAC,IAAI,UAAU;EACzC,CAAC;;;;;;;;;;;;AAiBJ,SAAgB,2BAA2B,SAGrB;CACpB,MAAM,SAAS,QAAQ,UAAU,QAAQ,IAAI,YAAY,QAAQ,IAAI;CAErE,MAAMA,UAAkC,EAAE;AAC1C,KAAI,OACF,SAAQ,gBAAgB,UAAU;AAGpC,QAAO,IAAI,sBAAwD;EACjE,SAAS,+CAA+C,QAAQ;EAChE,OAAO,QAAQ;EACf;EACA,iBAAiB,WAAW,EAAE,QAAQ,OAAO;EAC7C,iBAAiB,aAAa;AAE5B,OAAI,MAAM,QAAQ,SAAS,EAAE;AAE3B,QAAI,MAAM,QAAQ,SAAS,GAAG,IAAI,OAAO,SAAS,GAAG,OAAO,SAC1D,QAAO;AAGT,WAAO,CAAC,SAAgC;;AAE1C,SAAM,IAAI,MAAM,yCAAyC;;EAE5D,CAAC;;;;;;;;;;AAsBJ,SAAgB,sBAAsB,SAKhB;CACpB,MAAM,SAAS,SAAS,UAAU,QAAQ,IAAI;AAC9C,KAAI,CAAC,OACH,OAAM,IAAI,MAAM,6EAA6E;AAG/F,QAAO,IAAI,sBAAwD;EACjE,SAAS;EACT,OAAO,SAAS,SAAS;EACzB,SAAS,EAAE,eAAe,UAAU,UAAU;EAC9C,iBAAiB,OAAO,WAAW;GACjC;GACA;GACA,YAAY,SAAS,aAAa;GACnC;EACD,iBAAiB,QAAQ,IAAI;EAC9B,CAAC;;;;;;;;;;;;;;;;;;;;;AA8CJ,SAAgB,4BACd,cACA,SAOmB;CACnB,IAAIC,WAA6C;CACjD,IAAIC,eAA8B;CAElC,MAAM,SAAS;EACb,OAAO,SAAS,SAAS;EACzB,WAAW,SAAS,aAAa;EACjC,SAAS,SAAS,WAAW;EAC7B,WAAW,SAAS,aAAa;EAClC;AAED,QAAO;EACL,MAAM;EACN,MAAM,MAAM,OAAiB,SAA+C;GAC1E,MAAM,QAAQ,QAAQ,SAAS,OAAO;AAGtC,OAAI,CAAC,YAAY,iBAAiB,OAAO;IACvC,MAAM,WAAW,SAAS;IAC1B,MAAM,MAAM,aAAa;IACzB,MAAM,gBAAgB,KAAK;AAE3B,QAAI,YAAY,IACd,KAAI,WAAW;AAGjB,QAAI;AACF,gBAAW,MAAM,aAAa,SAAS,sBAAsB,OAAO,EAClE,WAAW,OAAO,WACnB,CAAC;cACM;AACR,SAAI,YAAY,IACd,KAAI,kBAAkB,OACpB,QAAO,IAAI;SAEX,KAAI,WAAW;;AAIrB,mBAAe;;GAGjB,MAAMC,aAAyB,EAAE;AACjC,QAAK,MAAM,QAAQ,OAAO;IACxB,MAAM,SAAS,MAAM,SAAS,MAAM;KAClC,SAAS,OAAO;KAChB,WAAW,OAAO;KACnB,CAAC;AACF,eAAW,KAAK,MAAM,KAAK,OAAO,KAAK,CAAC;;AAG1C,UAAO,EAAE,YAAY;;EAExB;;;AAIH,MAAa,sBAAsB;CAEjC,SAAS;CAET,SAAS;CAET,WAAW;CAEX,cAAc;CACf;;;;;;;;AC7TD,SAAgB,wBAAwB,QAAoD;AAC1F,SAAQ,OAAO,MAAf;EACE,KAAK,OAEH,QAAO,oBAAoB,OAAO,OAAO;EAG3C,KAAK,SACH,QAAO,OAAO;EAGhB,QAGE,OAAM,IAAIC,8BACR,oCAFyB,OAE6C,QACtE,mBACD;;;;;;AAQP,SAAgB,oBAAoB,OAA4C;AAC9E,QACE,OAAO,UAAU,YACjB,UAAU,QACV,UAAU,SACV,OAAQ,MAA4B,SAAS,YAC7C,WAAW,SACX,OAAQ,MAA4B,UAAU;;;;;;;;;ACpDlD,MAAM,gBAAgB;AAItB,MAAM,gBAAgB;AAItB,MAAM,sBACJ;AAGF,MAAM,cAAc;AAGpB,MAAM,eACJ;AAGF,MAAM,WAAW;;;;;AAMjB,SAAgB,kBAAkB,QAA+D;CAC/F,MAAMC,WAAqD,EAAE;AAI7D,KAAI,OAAO,WACT,UAAS,KAAK;EAAE,MAAM;EAAc,SAAS;EAAqB,CAAC;AAErE,KAAI,OAAO,MACT,UAAS,KAAK;EAAE,MAAM;EAAS,SAAS;EAAe,CAAC;AAE1D,KAAI,OAAO,MACT,UAAS,KAAK;EAAE,MAAM;EAAS,SAAS;EAAe,CAAC;AAE1D,KAAI,OAAO,IACT,UAAS,KAAK;EAAE,MAAM;EAAO,SAAS;EAAa,CAAC;AAEtD,KAAI,OAAO,UACT,UAAS,KAAK;EAAE,MAAM;EAAa,SAAS;EAAc,CAAC;AAI7D,KAAI,OAAO,eACT,MAAK,IAAI,IAAI,GAAG,IAAI,OAAO,eAAe,QAAQ,KAAK;EACrD,MAAM,gBAAgB,OAAO,eAAe;AAC5C,MAAI,cACF,UAAS,KAAK;GACZ,MAAM,UAAU;GAChB,SAAS;GACV,CAAC;;AAKR,SAAQ,SAAkC;EACxC,IAAI,eAAe;EACnB,IAAI,kBAAkB;EACtB,MAAMC,mBAA2C,EAAE;AAEnD,OAAK,MAAM,EAAE,MAAM,aAAa,UAAU;AAExC,WAAQ,YAAY;GAIpB,MAAM,aADU,KAAK,MAAM,QAAQ,EACP,UAAU;AAEtC,OAAI,aAAa,GAAG;AAClB,mBAAe,aAAa,QAAQ,SAAS,SAAS;AACtD,uBAAmB;AACnB,qBAAiB,SAAS,iBAAiB,SAAS,KAAK;;;AAI7D,SAAO;GACL,MAAM;GACN,UAAU,kBAAkB;GAC5B,gBAAgB;GAChB;GACD;;;;;;;AAsBL,SAAgB,UAAU,MAA+B;AAQvD,QAPiB,kBAAkB;EACjC,OAAO;EACP,OAAO;EACP,YAAY;EACZ,KAAK;EACL,WAAW;EACZ,CAAC,CACc,KAAK;;;;;;AAOvB,SAAgB,YAAY,MAAc,QAA+C;CACvF,MAAMC,aAAiC;EACrC,OAAO,QAAQ,SAAS;EACxB,OAAO,QAAQ,SAAS;EACxB,YAAY,QAAQ,cAAc;EAClC,KAAK,QAAQ,OAAO;EACpB,WAAW,QAAQ,aAAa;EAChC,gBAAgB,QAAQ;EACzB;CAED,MAAMC,WAAqB,EAAE;AAE7B,KAAI,WAAW,MAAO,UAAS,KAAK,cAAc;AAClD,KAAI,WAAW,MAAO,UAAS,KAAK,cAAc;AAClD,KAAI,WAAW,WAAY,UAAS,KAAK,oBAAoB;AAC7D,KAAI,WAAW,IAAK,UAAS,KAAK,YAAY;AAC9C,KAAI,WAAW,UAAW,UAAS,KAAK,aAAa;AACrD,KAAI,WAAW,eAAgB,UAAS,KAAK,GAAG,WAAW,eAAe;AAE1E,MAAK,MAAM,WAAW,UAAU;AAC9B,UAAQ,YAAY;AACpB,MAAI,QAAQ,KAAK,KAAK,CACpB,QAAO;;AAIX,QAAO;;;;;ACvIT,MAAM,qBAAqB;;;;;AAM3B,SAAS,kBACP,gBACA,eACoB;AAEpB,KAAI,cACF,QAAO;AAIT,KAAI,eAAe,SAAS,OAC1B,QAAO,eAAe,OAAO;;;;;;AAWjC,eAAsB,mBACpB,MACA,SAC0B;CAC1B,MAAM,YAAY,KAAK,KAAK;AAE5B,KAAI;EAEF,MAAM,WAAW,wBAAwB,QAAQ,SAAS;EAC1D,MAAM,QAAQ,kBAAkB,QAAQ,UAAU,QAAQ,MAAM;EAIhE,MAAM,aAAa,cADF,YAAY,MAAM,QAAQ,MAAM,EACN,QAAQ,QAAQ,iBAAiB,GAAG;AAE/E,MAAI,CAAC,WAAW,MACd,QAAO,oBAAoB,WAAW,QAAQ,EAAE,OAAO,CAAC;EAG1D,MAAM,gBAAgB,WAAW;EACjC,IAAI,YAAY,WAAW;EAG3B,IAAI,cAAc;AAClB,MAAI,QAAQ,QAAQ,cAAc;GAEhC,MAAM,kBADW,kBAAkB,QAAQ,OAAO,aAAa,CAC9B,UAAU;AAC3C,eAAY,gBAAgB;AAC5B,iBAAc,gBAAgB;;EAIhC,MAAM,oBAAoB,yBAAyB,QAAQ,UAAU,QAAQ,QAAQ,UAAU;EAE/F,MAAM,WAAW,iBAAiB;GAChC,aAAa,oBAAoB,QAAQ,SAAS;GAClD;GACA,YAAY,QAAQ,QAAQ;GAC5B,aAAa,QAAQ,QAAQ;GAC7B,OAAO,QAAQ;GACf,UAAU;GACV,QAAQ,QAAQ;GAChB,cAAc,QAAQ,OAAO;GAC7B,SAAS;GACV,CAAC;EAEF,MAAM,QAAQ,QAAQ,OAAO,SAAS,iBAAiB;EACvD,MAAM,eAAe,MAAM,MAAM,IAAI,SAAS;AAE9C,MAAI,gBAAgB,aAAa,WAAW,WAAW;AAErD,OAAI,QAAQ,UACV,SAAQ,UAAU;IAChB,UAAU,SAAS;IACnB;IACA,aAAa,eAAe,UAAU;IACtC,kBAAkB,cAChB,aAAa,gBAAgB,QAAQ,aAAa,UAAU,aAAa,OAC1E;IACD,QAAQ,aAAa,OAAO;IAC5B,WAAW,KAAK,KAAK,GAAG;IACxB,QAAQ;IACR,SAAS;IACT;IACD,CAAC;AAGJ,UAAO;IACL,GAAG;IACH,QAAQ;KAAE,GAAG,aAAa;KAAQ,QAAQ;KAAM;IACjD;;EAIH,MAAM,SAAS,UAAU,WAAW,kBAAkB;EACtD,MAAM,iBACJ,QAAQ,WAAW,QAAQ,QAAQ,0BAC/B,UAAU,eAAe,kBAAkB,GAC3C;AAEN,MAAI,OAAO,WAAW,EACpB,QAAO,oBAAoB,6BAA6B,EAAE,OAAO,CAAC;EAIpE,MAAM,cAAc,QAAQ,YAAY;EACxC,MAAM,cACJ,aAAa,gBACZ,QAAQ,YAAY,YAAY,IAAIC,8BAAY,QAAQ,WAAW,UAAU,GAAG;EAEnF,MAAM,iBACJ,aAAa,mBACZ,QAAQ,YAAY,iBACjB,IAAIC,iCAAe,QAAQ,WAAW,eAAe,GACrD;EAEN,MAAM,cAAc,QAAQ,YAAY,eAAe;EACvD,MAAM,YAAY,aAAa,aAAa,IAAIC,4BAAU,YAAY;EAGtE,MAAMC,aAAyB,EAAE;EACjC,IAAI,cAAc;EAClB,IAAI,aAAa;AAEjB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;GACtC,MAAM,QAAQ,OAAO;AACrB,OAAI,CAAC,MAAO;AAGZ,OAAI,YACF,OAAM,YAAY,SAAS;AAI7B,OAAI,gBAAgB,QAAQ,CAC1B,QAAO,oBAAoB,2BAA2B;IACpD;IACA,QAAQ;IACT,CAAC;AAIJ,SAAM,UAAU,QAAQ,YAAY;IAClC,MAAM,EAAE,qBAAW,MAAMC,iCACvB,OAAO,WAAW;AAChB,YAAO,SAAS,MAAM,CAAC,MAAM,KAAK,EAAE;MAClC;MACA,YAAY,QAAQ,QAAQ;MAC5B;MACD,CAAC;OAEJ,QAAQ,YACR;KAAE,gBAAgB,kBAAkB;KAAW,aAAa;KAAW,WAAW;KAAW,EAC7F,EACE,eAAe;AACb;OAEH,CACF;AAGD,QAAIC,SAAO,MACT,gBAAeA,SAAO,MAAM;QAE5B,gBAAe,MAAM;IAIvB,MAAM,YAAYA,SAAO,WAAW;AACpC,QAAI,WAAW;AACb,gBAAW,KAAK,UAAU;AAG1B,SAAI,QAAQ,SAAS;MACnB,MAAM,eAAe,iBAAiB,IAAI,QAAQ,MAAM;AACxD,cAAQ,QAAQ,cAAc,UAAU;;;KAG5C;;EAIJ,MAAM,cAAc,QAAQ,QAAQ,eAAe;EACnD,MAAM,aAAa,iBAAiB,YAAY,YAAY;EAG5D,MAAMC,SAA0B;GAC9B;GACA,QAAQ,OAAO;GACf,QAAQ,eAAe,eAAe,UAAU;GAChD,UAAU,iBAAiB,UAAU;GACrC,QAAQ;GACR,WAAW,KAAK,KAAK,GAAG;GACzB;EAGD,IAAIC;AAEJ,MAAI,WAAW,SAAS,SACtB,UAAS;GACP,QAAQ;GACK;GACb,QAAQ,WAAW;GACnB;GACD;MAED,UAAS;GACP,QAAQ;GACR,aAAa;GACb,SAAS,WAAW;GACpB;GACD;AAIH,QAAM,MAAM,IAAI,UAAU,QAAQ,EAChC,OAAO,QAAQ,OAAO,OACvB,CAAC;AAGF,MAAI,QAAQ,WAAW;GACrB,MAAMC,UAA4B;IAChC,UAAU,SAAS;IACnB;IACA,aAAa,OAAO;IACpB,kBAAkB,WAAW;IAC7B,QAAQ,OAAO;IACf,WAAW,OAAO;IAClB,QAAQ;IACR,SAAS;IACT;IACD;AACD,WAAQ,UAAU,QAAQ;;AAG5B,SAAO;UACA,OAAO;EACd,MAAM,SAAS,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACrE,MAAI,iBAAiBC,iCAAe,CAAC,eAAe,UAAU,CAAC,SAAS,MAAM,KAAK,CACjF,OAAM;AAER,SAAO,oBAAoB,QAAQ,EACjC,WAAW,KAAK,KAAK,GAAG,WACzB,CAAC;;;AAIN,SAAS,yBACP,UACA,WAC8B;AAC9B,KAAI,CAAC,aAAa,aAAa,EAC7B,QAAO;CAGT,MAAM,WAAW,UAAU,QAAQ;CACnC,MAAM,cAAc,UAAU,WAAW;CACzC,MAAM,cAAc,KAAK,IAAI,UAAU,UAAU;CAIjD,MAAM,iBAAiB,KAAK,IAAI,aAAa,KAAK,IAAI,GAAG,cAAc,EAAE,CAAC;AAE1E,QAAO;EACL,GAAG;EACH,MAAM;EACN,SAAS;EACV;;;;;;AAOH,eAAsB,MAAM,MAAc,SAAqD;AAe7F,QAAO,mBAb4B,EACjC,aAAa,MACd,EAG0C;EACzC,GAAG;EACH,OAAO;GACL,GAAG,QAAQ;GACX,MAAM;GACP;EACF,CAEgD;;;;;;AAOnD,eAAsB,iBACpB,MACA,SAC0B;AAC1B,QAAO,mBAAmB,MAAM,QAAQ;;;;;AAM1C,SAAS,oBACP,QACA,eACkB;AAClB,QAAO;EACL,QAAQ;EACR;EACA,QAAQ,iBAAiB,EAAE;EAC5B"}