scrapex 0.5.3 → 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +551 -145
- package/dist/enhancer-ByjRD-t5.mjs +769 -0
- package/dist/enhancer-ByjRD-t5.mjs.map +1 -0
- package/dist/enhancer-j0xqKDJm.cjs +847 -0
- package/dist/enhancer-j0xqKDJm.cjs.map +1 -0
- package/dist/index-CDgcRnig.d.cts +268 -0
- package/dist/index-CDgcRnig.d.cts.map +1 -0
- package/dist/index-piS5wtki.d.mts +268 -0
- package/dist/index-piS5wtki.d.mts.map +1 -0
- package/dist/index.cjs +2007 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +580 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +580 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1956 -0
- package/dist/index.mjs.map +1 -0
- package/dist/llm/index.cjs +334 -0
- package/dist/llm/index.cjs.map +1 -0
- package/dist/llm/index.d.cts +258 -0
- package/dist/llm/index.d.cts.map +1 -0
- package/dist/llm/index.d.mts +258 -0
- package/dist/llm/index.d.mts.map +1 -0
- package/dist/llm/index.mjs +317 -0
- package/dist/llm/index.mjs.map +1 -0
- package/dist/parsers/index.cjs +11 -0
- package/dist/parsers/index.d.cts +2 -0
- package/dist/parsers/index.d.mts +2 -0
- package/dist/parsers/index.mjs +3 -0
- package/dist/parsers-Bneuws8x.cjs +569 -0
- package/dist/parsers-Bneuws8x.cjs.map +1 -0
- package/dist/parsers-CwkYnyWY.mjs +482 -0
- package/dist/parsers-CwkYnyWY.mjs.map +1 -0
- package/dist/types-CadAXrme.d.mts +674 -0
- package/dist/types-CadAXrme.d.mts.map +1 -0
- package/dist/types-DPEtPihB.d.cts +674 -0
- package/dist/types-DPEtPihB.d.cts.map +1 -0
- package/package.json +79 -100
- package/dist/index.d.ts +0 -45
- package/dist/index.js +0 -8
- package/dist/scrapex.cjs.development.js +0 -1130
- package/dist/scrapex.cjs.development.js.map +0 -1
- package/dist/scrapex.cjs.production.min.js +0 -2
- package/dist/scrapex.cjs.production.min.js.map +0 -1
- package/dist/scrapex.esm.js +0 -1122
- package/dist/scrapex.esm.js.map +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"enhancer-j0xqKDJm.cjs","names":["message: string","DEFAULT_RETRY: Required<RetryConfig>","lastError: Error | undefined","DEFAULT_CIRCUIT_BREAKER: Required<CircuitBreakerConfig>","parsed: URL","dns","securityOptions: HttpSecurityOptions","result: FetchResult<T>","z","results: Partial<ScrapedData>","promises: Promise<void>[]","zodShape: Record<string, z.ZodTypeAny>","zodType: z.ZodTypeAny","z","prompt: string"],"sources":["../src/core/errors.ts","../src/common/errors.ts","../src/common/resilience.ts","../src/common/http-base.ts","../src/llm/types.ts","../src/llm/enhancer.ts"],"sourcesContent":["/**\n * Error codes for scraping failures\n */\nexport type ScrapeErrorCode =\n | 'FETCH_FAILED'\n | 'TIMEOUT'\n | 'INVALID_URL'\n | 'BLOCKED'\n | 'NOT_FOUND'\n | 'ROBOTS_BLOCKED'\n | 'PARSE_ERROR'\n | 'LLM_ERROR'\n | 'VALIDATION_ERROR';\n\n/**\n * Custom error class for scraping failures with structured error codes\n */\nexport class ScrapeError extends Error {\n public readonly code: ScrapeErrorCode;\n public readonly statusCode?: number;\n\n constructor(message: string, code: ScrapeErrorCode, statusCode?: number, cause?: Error) {\n super(message, { cause });\n this.name = 'ScrapeError';\n this.code = code;\n this.statusCode = statusCode;\n\n // Maintains proper stack trace in V8 environments\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, ScrapeError);\n }\n }\n\n /**\n * Create a ScrapeError from an unknown error\n */\n static from(error: unknown, code: ScrapeErrorCode = 'FETCH_FAILED'): ScrapeError {\n if (error instanceof ScrapeError) {\n return error;\n }\n\n if (error instanceof Error) {\n return new ScrapeError(error.message, code, undefined, error);\n }\n\n return new ScrapeError(String(error), code);\n }\n\n /**\n * Check if error is retryable (network issues, timeouts)\n */\n isRetryable(): boolean {\n return this.code === 'FETCH_FAILED' || this.code === 'TIMEOUT';\n }\n\n /**\n * Convert to a plain object for serialization\n */\n toJSON(): Record<string, unknown> {\n return {\n name: this.name,\n message: this.message,\n code: this.code,\n statusCode: this.statusCode,\n stack: this.stack,\n };\n }\n}\n","/**\n * Error normalization utilities for HTTP providers.\n * Maps HTTP status codes to consistent ScrapeError codes.\n */\n\nimport { ScrapeError, type ScrapeErrorCode } from '../core/errors.js';\n\n/**\n * HTTP status code to ScrapeError code mapping.\n */\nexport function getErrorCodeFromStatus(status: number): ScrapeErrorCode {\n if (status === 401 || status === 403) {\n return 'BLOCKED';\n }\n if (status === 404) {\n return 'NOT_FOUND';\n }\n if (status === 429) {\n return 'BLOCKED';\n }\n if (status === 408) {\n return 'TIMEOUT';\n }\n if (status >= 500) {\n return 'LLM_ERROR';\n }\n return 'FETCH_FAILED';\n}\n\n/**\n * Retryable HTTP status codes.\n */\nexport const RETRYABLE_STATUS_CODES = [\n 408, // Request Timeout\n 429, // Too Many Requests\n 500, // Internal Server Error\n 502, // Bad Gateway\n 503, // Service Unavailable\n 504, // Gateway Timeout\n] as const;\n\n/**\n * Check if an HTTP status code is retryable.\n */\nexport function isRetryableStatus(status: number): boolean {\n return RETRYABLE_STATUS_CODES.includes(status as (typeof RETRYABLE_STATUS_CODES)[number]);\n}\n\n/**\n * Parse error message from API response body.\n */\nexport async function parseErrorBody(response: Response): Promise<string> {\n try {\n const text = await response.text();\n try {\n const json = JSON.parse(text) as Record<string, unknown>;\n // Common error formats\n if (typeof json.error === 'object' && json.error !== null) {\n const error = json.error as Record<string, unknown>;\n return String(error.message ?? error.msg ?? JSON.stringify(error));\n }\n if (typeof json.error === 'string') {\n return json.error;\n }\n if (typeof json.message === 'string') {\n return json.message;\n }\n if (typeof json.detail === 'string') {\n return json.detail;\n }\n return text;\n } catch {\n return text || `HTTP ${response.status} ${response.statusText}`;\n }\n } catch {\n return `HTTP ${response.status} ${response.statusText}`;\n }\n}\n\n/**\n * Create a ScrapeError from an HTTP response.\n */\nexport async function createHttpError(\n response: Response,\n providerName: string,\n errorMapper?: (body: unknown) => string\n): Promise<ScrapeError> {\n const code = getErrorCodeFromStatus(response.status);\n\n let message: string;\n if (errorMapper) {\n try {\n const body = await response.json();\n message = errorMapper(body);\n } catch {\n message = await parseErrorBody(response);\n }\n } else {\n message = await parseErrorBody(response);\n }\n\n return new ScrapeError(\n `${providerName} API error (${response.status}): ${message}`,\n code,\n response.status\n );\n}\n\n/**\n * Normalize any error to ScrapeError.\n */\nexport function normalizeError(\n error: unknown,\n providerName: string,\n defaultCode: ScrapeErrorCode = 'LLM_ERROR'\n): ScrapeError {\n if (error instanceof ScrapeError) {\n return error;\n }\n\n if (error instanceof Error) {\n // Check for abort/timeout\n if (error.name === 'AbortError' || error.message.includes('timeout')) {\n return new ScrapeError(`${providerName} request timed out`, 'TIMEOUT', undefined, error);\n }\n\n // Check for network errors\n const code = (error as NodeJS.ErrnoException).code;\n if (code === 'ECONNREFUSED' || code === 'ENOTFOUND' || code === 'ENETUNREACH') {\n return new ScrapeError(\n `${providerName} connection failed: ${error.message}`,\n 'FETCH_FAILED',\n undefined,\n error\n );\n }\n\n return new ScrapeError(\n `${providerName} error: ${error.message}`,\n defaultCode,\n undefined,\n error\n );\n }\n\n return new ScrapeError(`${providerName} error: ${String(error)}`, defaultCode);\n}\n","/**\n * Shared resilience utilities for HTTP providers.\n * Provides retry, circuit breaker, rate limiting, timeout, and concurrency control.\n */\n\n// ─────────────────────────────────────────────────────────────\n// Types\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Retry configuration.\n */\nexport interface RetryConfig {\n /** Maximum retry attempts. @default 3 */\n maxAttempts?: number;\n /** Initial backoff delay in ms. @default 1000 */\n backoffMs?: number;\n /** Backoff multiplier. @default 2 */\n backoffMultiplier?: number;\n /** HTTP status codes to retry. @default [408, 429, 500, 502, 503, 504] */\n retryableStatuses?: number[];\n}\n\n/**\n * Circuit breaker configuration.\n */\nexport interface CircuitBreakerConfig {\n /** Failures before opening circuit. @default 5 */\n failureThreshold?: number;\n /** Time before attempting to close circuit. @default 30000 */\n resetTimeoutMs?: number;\n}\n\n/**\n * Rate limit configuration.\n */\nexport interface RateLimitConfig {\n /** Max requests per minute */\n requestsPerMinute?: number;\n /** Max tokens per minute (for LLM providers) */\n tokensPerMinute?: number;\n}\n\n/**\n * Circuit breaker state.\n */\nexport type CircuitState = 'closed' | 'open' | 'half-open';\n\n/**\n * Internal circuit breaker state.\n */\nexport interface CircuitBreakerState {\n state: CircuitState;\n failures: number;\n lastFailureTime?: number;\n nextAttemptTime?: number;\n}\n\n/**\n * Resilience configuration for HTTP providers.\n */\nexport interface ResilienceConfig {\n retry?: RetryConfig;\n circuitBreaker?: CircuitBreakerConfig;\n rateLimit?: RateLimitConfig;\n /** Request timeout in ms. @default 30000 */\n timeoutMs?: number;\n /** Max concurrent requests. @default 1 */\n concurrency?: number;\n /** Optional shared state for circuit breaker / rate limiter / semaphore */\n state?: ResilienceState;\n}\n\n/**\n * Shared resilience state for persistence across calls.\n */\nexport interface ResilienceState {\n circuitBreaker?: {\n isOpen(): boolean;\n recordSuccess(): void;\n recordFailure(): void;\n getState?(): CircuitState;\n };\n rateLimiter?: {\n acquire(): Promise<void>;\n };\n semaphore?: {\n execute<T>(fn: () => Promise<T>): Promise<T>;\n };\n}\n\n// ─────────────────────────────────────────────────────────────\n// Retry Logic\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Default retry configuration.\n */\nconst DEFAULT_RETRY: Required<RetryConfig> = {\n maxAttempts: 3,\n backoffMs: 1000,\n backoffMultiplier: 2,\n retryableStatuses: [408, 429, 500, 502, 503, 504],\n};\n\n/**\n * Errors that should be retried (transient failures).\n */\nconst RETRYABLE_ERROR_CODES = [\n 'ECONNRESET',\n 'ETIMEDOUT',\n 'ECONNREFUSED',\n 'EPIPE',\n 'ENOTFOUND',\n 'ENETUNREACH',\n 'EAI_AGAIN',\n];\n\n/**\n * Check if an error is retryable.\n */\nexport function isRetryableError(\n error: unknown,\n retryableStatuses: number[] = DEFAULT_RETRY.retryableStatuses\n): boolean {\n if (error instanceof Error) {\n // Check for network errors\n const code = (error as NodeJS.ErrnoException).code;\n if (code && RETRYABLE_ERROR_CODES.includes(code)) {\n return true;\n }\n\n // Check for HTTP status codes on error object\n if ('statusCode' in error && typeof error.statusCode === 'number') {\n return retryableStatuses.includes(error.statusCode);\n }\n if ('status' in error && typeof error.status === 'number') {\n return retryableStatuses.includes(error.status);\n }\n\n // Check for timeout/fetch errors by code\n if ('code' in error) {\n const errCode = error.code as string;\n if (errCode === 'TIMEOUT' || errCode === 'FETCH_FAILED') {\n return true;\n }\n }\n\n // Check message for common retryable patterns\n const message = error.message.toLowerCase();\n if (\n message.includes('timeout') ||\n message.includes('rate limit') ||\n message.includes('too many requests') ||\n message.includes('temporarily unavailable')\n ) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Sleep for specified milliseconds.\n */\nexport function sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\n\n/**\n * Execute a function with retry logic.\n */\nexport async function withRetry<T>(\n fn: () => Promise<T>,\n config?: RetryConfig,\n onRetry?: (attempt: number, error: Error, delayMs: number) => void\n): Promise<{ result: T; attempts: number }> {\n const maxAttempts = config?.maxAttempts ?? DEFAULT_RETRY.maxAttempts;\n const backoffMs = config?.backoffMs ?? DEFAULT_RETRY.backoffMs;\n const multiplier = config?.backoffMultiplier ?? DEFAULT_RETRY.backoffMultiplier;\n const retryableStatuses = config?.retryableStatuses ?? DEFAULT_RETRY.retryableStatuses;\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await fn();\n return { result, attempts: attempt };\n } catch (error) {\n lastError = error instanceof Error ? error : new Error(String(error));\n\n // Don't retry on last attempt or non-retryable errors\n if (attempt === maxAttempts || !isRetryableError(error, retryableStatuses)) {\n throw lastError;\n }\n\n // Calculate delay with exponential backoff\n const delay = backoffMs * multiplier ** (attempt - 1);\n\n // Add jitter (±10%)\n const jitter = delay * (0.9 + Math.random() * 0.2);\n\n onRetry?.(attempt, lastError, jitter);\n\n await sleep(jitter);\n }\n }\n\n throw lastError ?? new Error('Retry failed');\n}\n\n// ─────────────────────────────────────────────────────────────\n// Timeout\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Execute a function with timeout.\n */\nexport async function withTimeout<T>(\n fn: (signal: AbortSignal) => Promise<T>,\n timeoutMs: number\n): Promise<T> {\n const controller = new AbortController();\n const timeoutId = setTimeout(() => controller.abort(), timeoutMs);\n\n try {\n return await fn(controller.signal);\n } finally {\n clearTimeout(timeoutId);\n }\n}\n\n/**\n * Create an AbortSignal that times out after specified milliseconds.\n * If parentSignal is provided, this signal will abort when the parent aborts.\n */\nexport function createTimeoutSignal(timeoutMs: number, parentSignal?: AbortSignal): AbortSignal {\n const controller = new AbortController();\n const timeoutId = setTimeout(() => controller.abort(), timeoutMs);\n timeoutId.unref?.();\n\n const clear = () => clearTimeout(timeoutId);\n controller.signal.addEventListener('abort', clear, { once: true });\n\n if (parentSignal) {\n if (parentSignal.aborted) {\n clear();\n controller.abort(parentSignal.reason);\n return controller.signal;\n }\n\n parentSignal.addEventListener(\n 'abort',\n () => {\n clear();\n controller.abort(parentSignal.reason);\n },\n { once: true }\n );\n }\n return controller.signal;\n}\n\n// ─────────────────────────────────────────────────────────────\n// Circuit Breaker\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Default circuit breaker configuration.\n */\nconst DEFAULT_CIRCUIT_BREAKER: Required<CircuitBreakerConfig> = {\n failureThreshold: 5,\n resetTimeoutMs: 30000,\n};\n\n/**\n * Error thrown when circuit breaker is open.\n */\nexport class CircuitOpenError extends Error {\n readonly isCircuitOpen = true;\n\n constructor(message: string) {\n super(message);\n this.name = 'CircuitOpenError';\n }\n}\n\n/**\n * Circuit breaker implementation.\n * Prevents cascade failures by stopping requests when failure rate is high.\n */\nexport class CircuitBreaker {\n private state: CircuitBreakerState;\n private readonly failureThreshold: number;\n private readonly resetTimeoutMs: number;\n\n constructor(config?: CircuitBreakerConfig) {\n this.failureThreshold = config?.failureThreshold ?? DEFAULT_CIRCUIT_BREAKER.failureThreshold;\n this.resetTimeoutMs = config?.resetTimeoutMs ?? DEFAULT_CIRCUIT_BREAKER.resetTimeoutMs;\n this.state = {\n state: 'closed',\n failures: 0,\n };\n }\n\n /**\n * Check if requests are blocked.\n */\n isOpen(): boolean {\n this.updateState();\n return this.state.state === 'open';\n }\n\n /**\n * Get current circuit state.\n */\n getState(): CircuitState {\n this.updateState();\n return this.state.state;\n }\n\n /**\n * Record a successful request.\n */\n recordSuccess(): void {\n this.state.failures = 0;\n this.state.state = 'closed';\n this.state.lastFailureTime = undefined;\n this.state.nextAttemptTime = undefined;\n }\n\n /**\n * Record a failed request.\n */\n recordFailure(): void {\n this.state.failures++;\n this.state.lastFailureTime = Date.now();\n\n if (this.state.failures >= this.failureThreshold) {\n this.state.state = 'open';\n this.state.nextAttemptTime = Date.now() + this.resetTimeoutMs;\n }\n }\n\n /**\n * Execute a function with circuit breaker protection.\n */\n async execute<T>(fn: () => Promise<T>): Promise<T> {\n if (this.isOpen()) {\n const nextAttempt = this.state.nextAttemptTime\n ? new Date(this.state.nextAttemptTime).toISOString()\n : 'unknown';\n throw new CircuitOpenError(`Circuit breaker is open. Next attempt at ${nextAttempt}`);\n }\n\n try {\n const result = await fn();\n this.recordSuccess();\n return result;\n } catch (error) {\n this.recordFailure();\n throw error;\n }\n }\n\n /**\n * Reset the circuit breaker.\n */\n reset(): void {\n this.state = {\n state: 'closed',\n failures: 0,\n };\n }\n\n /**\n * Update state based on time (open -> half-open transition).\n */\n private updateState(): void {\n if (\n this.state.state === 'open' &&\n this.state.nextAttemptTime &&\n Date.now() >= this.state.nextAttemptTime\n ) {\n this.state.state = 'half-open';\n }\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Rate Limiting\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Token bucket rate limiter.\n */\nexport class RateLimiter {\n private tokens: number;\n private lastRefill: number;\n private readonly maxTokens: number;\n private readonly refillRate: number; // tokens per second\n\n constructor(config: RateLimitConfig) {\n // Convert requests per minute to tokens per second\n const requestsPerSecond = (config.requestsPerMinute ?? 60) / 60;\n\n this.maxTokens = Math.max(1, Math.ceil(requestsPerSecond * 10)); // 10 second burst\n this.refillRate = requestsPerSecond;\n this.tokens = this.maxTokens;\n this.lastRefill = Date.now();\n }\n\n /**\n * Check if a request is allowed without consuming tokens.\n */\n canProceed(): boolean {\n this.refill();\n return this.tokens >= 1;\n }\n\n /**\n * Attempt to acquire tokens for a request.\n * Returns true if allowed, false if rate limited.\n */\n tryAcquire(tokens = 1): boolean {\n this.refill();\n\n if (this.tokens >= tokens) {\n this.tokens -= tokens;\n return true;\n }\n\n return false;\n }\n\n /**\n * Wait until tokens are available, then acquire.\n */\n async acquire(tokens = 1): Promise<void> {\n if (this.tryAcquire(tokens)) {\n return;\n }\n\n // Calculate precise wait time for required tokens\n this.refill();\n const tokensNeeded = tokens - this.tokens;\n const waitMs = Math.ceil((tokensNeeded / this.refillRate) * 1000);\n\n if (waitMs > 0) {\n await sleep(waitMs);\n }\n\n // After waiting, acquire should succeed (but retry if timing drift)\n while (!this.tryAcquire(tokens)) {\n await sleep(Math.ceil((1 / this.refillRate) * 1000));\n }\n }\n\n /**\n * Get time until next token is available (in milliseconds).\n */\n getWaitTime(): number {\n this.refill();\n\n if (this.tokens >= 1) {\n return 0;\n }\n\n return Math.ceil((1 / this.refillRate) * 1000);\n }\n\n /**\n * Refill tokens based on elapsed time.\n */\n private refill(): void {\n const now = Date.now();\n const elapsed = (now - this.lastRefill) / 1000; // seconds\n const newTokens = elapsed * this.refillRate;\n\n this.tokens = Math.min(this.maxTokens, this.tokens + newTokens);\n this.lastRefill = now;\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Concurrency Control\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Semaphore for limiting concurrent operations.\n */\nexport class Semaphore {\n private permits: number;\n private waiting: Array<() => void> = [];\n\n constructor(permits: number) {\n this.permits = permits;\n }\n\n /**\n * Acquire a permit, waiting if necessary.\n */\n async acquire(): Promise<void> {\n if (this.permits > 0) {\n this.permits--;\n return;\n }\n\n return new Promise<void>((resolve) => {\n this.waiting.push(resolve);\n });\n }\n\n /**\n * Release a permit.\n */\n release(): void {\n const next = this.waiting.shift();\n if (next) {\n next();\n } else {\n this.permits++;\n }\n }\n\n /**\n * Execute function with semaphore protection.\n */\n async execute<T>(fn: () => Promise<T>): Promise<T> {\n await this.acquire();\n try {\n return await fn();\n } finally {\n this.release();\n }\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Combined Resilience Wrapper\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Execute a function with all resilience features.\n *\n * @param fn - The async function to execute with resilience\n * @param config - Configuration for retry and timeout behavior\n * @param state - Pre-instantiated resilience primitives for stateful features.\n * Circuit breaker, rate limiter, and semaphore must be instantiated by the caller\n * and passed via state to enable those features. This allows sharing state across\n * multiple calls for proper circuit breaker tracking and rate limiting.\n * The config parameter is only used for retry and timeout settings.\n * @param callbacks - Optional callbacks for retry events\n */\nexport async function withResilience<T>(\n fn: (signal: AbortSignal) => Promise<T>,\n config?: ResilienceConfig,\n state?: ResilienceState,\n callbacks?: {\n onRetry?: (attempt: number, error: Error, delayMs: number) => void;\n }\n): Promise<{ result: T; attempts: number }> {\n const timeoutMs = config?.timeoutMs ?? 30000;\n\n // Check circuit breaker\n if (state?.circuitBreaker?.isOpen()) {\n throw new CircuitOpenError('Circuit breaker is open');\n }\n\n // Acquire rate limit token\n if (state?.rateLimiter) {\n await state.rateLimiter.acquire();\n }\n\n // Wrap with concurrency control\n const executeWithConcurrency = async (): Promise<{ result: T; attempts: number }> => {\n // Wrap with timeout\n const withTimeoutFn = () => withTimeout(fn, timeoutMs);\n\n // Wrap with retry\n try {\n const retryResult = await withRetry(withTimeoutFn, config?.retry, callbacks?.onRetry);\n state?.circuitBreaker?.recordSuccess();\n return retryResult;\n } catch (error) {\n state?.circuitBreaker?.recordFailure();\n throw error;\n }\n };\n\n // Apply semaphore if available\n if (state?.semaphore) {\n return state.semaphore.execute(executeWithConcurrency);\n }\n\n return executeWithConcurrency();\n}\n","/**\n * Shared HTTP provider infrastructure for LLM and Embedding providers.\n * Provides SSRF protection, resilience, and error normalization.\n */\nimport { promises as dns } from 'node:dns';\nimport { isIP } from 'node:net';\nimport { ScrapeError } from '../core/errors.js';\nimport { createHttpError } from './errors.js';\nimport type { ResilienceConfig, ResilienceState, RetryConfig } from './resilience.js';\nimport {\n CircuitBreaker,\n CircuitOpenError,\n RateLimiter,\n Semaphore,\n withRetry,\n withTimeout,\n} from './resilience.js';\n\nexport type {\n CircuitBreakerConfig,\n CircuitState,\n RateLimitConfig,\n ResilienceConfig,\n ResilienceState,\n RetryConfig,\n} from './resilience.js';\n// Re-export resilience utilities and types for convenience\nexport {\n CircuitBreaker,\n CircuitOpenError,\n isRetryableError,\n RateLimiter,\n Semaphore,\n sleep,\n withResilience,\n withRetry,\n withTimeout,\n} from './resilience.js';\n\n// ─────────────────────────────────────────────────────────────\n// Types\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Security options for HTTP providers.\n */\nexport interface HttpSecurityOptions {\n /** Require HTTPS protocol. @default true */\n requireHttps?: boolean;\n /** Allow private/internal IP addresses. @default false */\n allowPrivate?: boolean;\n /** Resolve DNS and validate IPs before request. @default true */\n resolveDns?: boolean;\n /** Allow HTTP redirects. @default false */\n allowRedirects?: boolean;\n}\n\n/**\n * Base configuration for HTTP providers.\n */\nexport interface BaseHttpConfig<TError = unknown> {\n /** Base URL for the API endpoint */\n baseUrl: string;\n /** Model identifier */\n model: string;\n /** Additional headers */\n headers?: Record<string, string>;\n /** Extract error message from failed response */\n errorMapper?: (response: TError) => string;\n /** Security options */\n requireHttps?: boolean;\n allowPrivate?: boolean;\n resolveDns?: boolean;\n allowRedirects?: boolean;\n /** Resilience options */\n resilience?: ResilienceConfig;\n}\n\n// ─────────────────────────────────────────────────────────────\n// SSRF Protection\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Private IP ranges blocked for SSRF protection.\n */\nconst PRIVATE_IP_PATTERNS = [\n /^10\\./, // 10.0.0.0/8\n /^172\\.(1[6-9]|2\\d|3[01])\\./, // 172.16.0.0/12\n /^192\\.168\\./, // 192.168.0.0/16\n /^127\\./, // 127.0.0.0/8\n /^0\\./, // 0.0.0.0/8\n /^169\\.254\\./, // Link-local\n /^100\\.(6[4-9]|[7-9]\\d|1[01]\\d|12[0-7])\\./, // Carrier-grade NAT 100.64.0.0/10 (RFC 6598)\n /^::1$/, // IPv6 loopback\n /^(fc|fd)[0-9a-f]{2}:/i, // IPv6 private (ULA)\n /^fe80:/i, // IPv6 link-local\n /^fec0:/i, // IPv6 site-local (deprecated but may exist)\n /^::ffff:(10\\.|172\\.(1[6-9]|2\\d|3[01])\\.|192\\.168\\.|127\\.|0\\.)/i, // IPv4-mapped IPv6\n /^localhost$/i,\n];\n\n/**\n * Check if a hostname/IP is private.\n */\nexport function isPrivateHost(hostname: string): boolean {\n return PRIVATE_IP_PATTERNS.some((pattern) => pattern.test(hostname));\n}\n\n/**\n * Validate a URL for security.\n */\nexport function validateUrl(url: string, options: HttpSecurityOptions = {}): URL {\n const requireHttps = options.requireHttps ?? true;\n const allowPrivate = options.allowPrivate ?? false;\n\n let parsed: URL;\n try {\n parsed = new URL(url);\n } catch {\n throw new ScrapeError(`Invalid URL: ${url}`, 'INVALID_URL');\n }\n\n // HTTPS enforcement\n if (requireHttps && parsed.protocol !== 'https:') {\n throw new ScrapeError(`HTTPS required. Got: ${parsed.protocol}`, 'VALIDATION_ERROR');\n }\n\n // Private IP check (before DNS resolution)\n if (!allowPrivate && isPrivateHost(parsed.hostname)) {\n throw new ScrapeError(\n `Private/internal addresses not allowed: ${parsed.hostname}`,\n 'VALIDATION_ERROR'\n );\n }\n\n return parsed;\n}\n\n/**\n * Validate URL and resolve DNS to check for private IPs.\n */\nexport async function validateUrlWithDns(\n url: string,\n options: HttpSecurityOptions = {}\n): Promise<void> {\n const parsed = validateUrl(url, options);\n const resolveDns = options.resolveDns ?? true;\n const allowPrivate = options.allowPrivate ?? false;\n\n if (!resolveDns || allowPrivate) {\n return;\n }\n\n const host = parsed.hostname;\n\n // Skip if already an IP address\n if (isIP(host)) {\n return;\n }\n\n // Resolve DNS and check all addresses\n try {\n const addresses = await dns.lookup(host, { all: true });\n for (const addr of addresses) {\n if (isPrivateHost(addr.address)) {\n throw new ScrapeError(\n `DNS resolved to private address: ${host} -> ${addr.address}`,\n 'VALIDATION_ERROR'\n );\n }\n }\n } catch (error) {\n if (error instanceof ScrapeError) {\n throw error;\n }\n const message = error instanceof Error ? error.message : String(error);\n throw new ScrapeError(`Failed to resolve hostname: ${host} (${message})`, 'FETCH_FAILED');\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Base HTTP Provider\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Fetch request options for base provider.\n */\nexport interface FetchOptions {\n method?: 'GET' | 'POST';\n body?: unknown;\n headers?: Record<string, string>;\n signal?: AbortSignal;\n}\n\n/**\n * Result of a fetch request.\n */\nexport interface FetchResult<T> {\n data: T;\n status: number;\n headers: Headers;\n}\n\n/**\n * Base HTTP provider with shared security and resilience.\n */\nexport abstract class BaseHttpProvider<TError = unknown> {\n protected readonly baseUrl: string;\n protected readonly model: string;\n protected readonly headers: Record<string, string>;\n protected readonly errorMapper?: (response: TError) => string;\n\n // Security options\n protected readonly requireHttps: boolean;\n protected readonly allowPrivate: boolean;\n protected readonly resolveDns: boolean;\n protected readonly allowRedirects: boolean;\n\n // Resilience options\n protected readonly timeoutMs: number;\n protected readonly retryConfig?: RetryConfig;\n protected readonly concurrency: number;\n\n // Resilience state (initialized lazily or passed in)\n private circuitBreaker?: ResilienceState['circuitBreaker'];\n private rateLimiter?: ResilienceState['rateLimiter'];\n private semaphore?: ResilienceState['semaphore'];\n\n constructor(config: BaseHttpConfig<TError>) {\n this.baseUrl = config.baseUrl.replace(/\\/$/, '');\n this.model = config.model;\n this.headers = {\n 'Content-Type': 'application/json',\n ...config.headers,\n };\n this.errorMapper = config.errorMapper;\n\n // Security defaults\n this.requireHttps = config.requireHttps ?? true;\n this.allowPrivate = config.allowPrivate ?? false;\n this.resolveDns = config.resolveDns ?? true;\n this.allowRedirects = config.allowRedirects ?? false;\n\n // Resilience defaults\n this.timeoutMs = config.resilience?.timeoutMs ?? 30000;\n this.retryConfig = config.resilience?.retry;\n this.concurrency = config.resilience?.concurrency ?? 1;\n\n const sharedState = config.resilience?.state;\n\n // Initialize resilience components if configured or provided\n this.circuitBreaker =\n sharedState?.circuitBreaker ??\n (config.resilience?.circuitBreaker\n ? new CircuitBreaker(config.resilience.circuitBreaker)\n : undefined);\n this.rateLimiter =\n sharedState?.rateLimiter ??\n (config.resilience?.rateLimit ? new RateLimiter(config.resilience.rateLimit) : undefined);\n this.semaphore = sharedState?.semaphore ?? new Semaphore(this.concurrency);\n\n // Validate URL on construction\n validateUrl(this.baseUrl, {\n requireHttps: this.requireHttps,\n allowPrivate: this.allowPrivate,\n });\n }\n\n /**\n * Get the current resilience state for persistence across calls.\n */\n getResilienceState(): ResilienceState {\n return {\n circuitBreaker: this.circuitBreaker,\n rateLimiter: this.rateLimiter,\n semaphore: this.semaphore,\n };\n }\n\n /**\n * Make an HTTP request with security and resilience.\n */\n protected async fetch<T>(url: string, options: FetchOptions = {}): Promise<FetchResult<T>> {\n const securityOptions: HttpSecurityOptions = {\n requireHttps: this.requireHttps,\n allowPrivate: this.allowPrivate,\n resolveDns: this.resolveDns,\n allowRedirects: this.allowRedirects,\n };\n\n // Validate URL with DNS resolution\n await validateUrlWithDns(url, securityOptions);\n\n // Check circuit breaker before proceeding\n if (this.circuitBreaker?.isOpen()) {\n throw new CircuitOpenError('Circuit breaker is open. Too many recent failures.');\n }\n\n // Acquire rate limit token if configured\n if (this.rateLimiter) {\n await this.rateLimiter.acquire();\n }\n\n // The actual fetch operation\n const doFetch = async (signal: AbortSignal): Promise<FetchResult<T>> => {\n const composedSignal = options.signal ? AbortSignal.any([options.signal, signal]) : signal;\n const response = await fetch(url, {\n method: options.method ?? 'POST',\n headers: { ...this.headers, ...options.headers },\n body: options.body ? JSON.stringify(options.body) : undefined,\n signal: composedSignal,\n redirect: this.allowRedirects ? 'follow' : 'error',\n });\n\n // Validate redirect target if followed\n if (this.allowRedirects && response.redirected) {\n await validateUrlWithDns(response.url, securityOptions);\n }\n\n // Handle errors\n if (!response.ok) {\n // Cast is safe: errorMapper receives the parsed JSON body which matches TError at runtime\n throw await createHttpError(\n response,\n this.constructor.name,\n this.errorMapper as ((body: unknown) => string) | undefined\n );\n }\n\n const data = (await response.json()) as T;\n\n return {\n data,\n status: response.status,\n headers: response.headers,\n };\n };\n\n // Wrap with concurrency control (semaphore)\n const executeWithConcurrency = async (): Promise<FetchResult<T>> => {\n if (!this.semaphore) {\n throw new ScrapeError('Semaphore not initialized', 'VALIDATION_ERROR');\n }\n return this.semaphore.execute(async () => {\n // Apply timeout\n const fetchWithTimeout = async (): Promise<FetchResult<T>> => {\n return withTimeout((signal) => doFetch(signal), this.timeoutMs);\n };\n\n // Apply retry if configured\n try {\n let result: FetchResult<T>;\n if (this.retryConfig) {\n const retryResult = await withRetry(fetchWithTimeout, this.retryConfig);\n result = retryResult.result;\n } else {\n result = await fetchWithTimeout();\n }\n\n // Record success for circuit breaker\n this.circuitBreaker?.recordSuccess();\n return result;\n } catch (error) {\n // Record failure for circuit breaker\n this.circuitBreaker?.recordFailure();\n throw error;\n }\n });\n };\n\n return executeWithConcurrency();\n }\n}\n\n// ─────────────────────────────────────────────────────────────\n// Utility Exports\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Create standard headers for API requests.\n */\nexport function createHeaders(\n apiKey?: string,\n additionalHeaders?: Record<string, string>\n): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n if (apiKey) {\n headers.Authorization = `Bearer ${apiKey}`;\n }\n\n if (additionalHeaders) {\n Object.assign(headers, additionalHeaders);\n }\n\n return headers;\n}\n","import { z } from 'zod';\n\n/**\n * LLM completion options\n */\nexport interface CompletionOptions {\n maxTokens?: number;\n temperature?: number;\n systemPrompt?: string;\n}\n\n/**\n * LLM Provider interface - implemented by all providers\n */\nexport interface LLMProvider {\n readonly name: string;\n\n /**\n * Generate a text completion\n */\n complete(prompt: string, options?: CompletionOptions): Promise<string>;\n\n /**\n * Generate a structured JSON completion with Zod validation\n */\n completeJSON<T>(prompt: string, schema: z.ZodType<T>, options?: CompletionOptions): Promise<T>;\n}\n\n/**\n * Enhancement result types\n */\nexport interface SummaryResult {\n summary: string;\n}\n\nexport interface TagsResult {\n tags: string[];\n}\n\nexport interface EntitiesResult {\n people: string[];\n organizations: string[];\n technologies: string[];\n locations: string[];\n concepts: string[];\n}\n\nexport interface ClassifyResult {\n contentType: string;\n confidence: number;\n}\n\n/**\n * Zod schemas for LLM outputs\n */\nexport const SummarySchema = z.object({\n summary: z.string().describe('A concise 2-3 sentence summary of the content'),\n});\n\nexport const TagsSchema = z.object({\n tags: z.array(z.string()).describe('5-10 relevant tags/keywords'),\n});\n\nexport const EntitiesSchema = z.object({\n people: z.array(z.string()).describe('People mentioned'),\n organizations: z.array(z.string()).describe('Organizations/companies'),\n technologies: z.array(z.string()).describe('Technologies/tools/frameworks'),\n locations: z.array(z.string()).describe('Locations/places'),\n concepts: z.array(z.string()).describe('Key concepts/topics'),\n});\n\nexport const ClassifySchema = z.object({\n contentType: z\n .enum(['article', 'repo', 'docs', 'package', 'video', 'tool', 'product', 'unknown'])\n .describe('The type of content'),\n confidence: z.number().min(0).max(1).describe('Confidence score 0-1'),\n});\n","import { z } from 'zod';\nimport type {\n EnhancementType,\n ExtractedEntities,\n ExtractionSchema,\n ScrapedData,\n} from '@/core/types.js';\nimport type { LLMProvider } from './types.js';\nimport { ClassifySchema, EntitiesSchema, SummarySchema, TagsSchema } from './types.js';\n\n/**\n * Enhance scraped data with LLM-powered features\n */\nexport async function enhance(\n data: ScrapedData,\n provider: LLMProvider,\n types: EnhancementType[]\n): Promise<Partial<ScrapedData>> {\n const results: Partial<ScrapedData> = {};\n\n // Prepare content for LLM (use excerpt/textContent to save tokens)\n const content = data.excerpt || data.textContent.slice(0, 10000);\n const context = `Title: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n // Run enhancements in parallel\n const promises: Promise<void>[] = [];\n\n if (types.includes('summarize')) {\n promises.push(\n summarize(context, provider).then((summary) => {\n results.summary = summary;\n })\n );\n }\n\n if (types.includes('tags')) {\n promises.push(\n extractTags(context, provider).then((tags) => {\n results.suggestedTags = tags;\n })\n );\n }\n\n if (types.includes('entities')) {\n promises.push(\n extractEntities(context, provider).then((entities) => {\n results.entities = entities;\n })\n );\n }\n\n if (types.includes('classify')) {\n promises.push(\n classify(context, provider).then((classification) => {\n if (classification.confidence > 0.7) {\n results.contentType = classification.contentType as ScrapedData['contentType'];\n }\n })\n );\n }\n\n await Promise.all(promises);\n\n return results;\n}\n\n/**\n * Options for the ask() function\n */\nexport interface AskOptions {\n /** Key to store the result under in custom field */\n key?: string;\n /** Schema for structured response */\n schema?: ExtractionSchema;\n}\n\n/**\n * Ask a custom question about the scraped content\n * Results are stored in the `custom` field of ScrapedData\n */\nexport async function ask(\n data: ScrapedData,\n provider: LLMProvider,\n prompt: string,\n options?: AskOptions\n): Promise<Partial<ScrapedData>> {\n const key = options?.key || 'response';\n const content = data.excerpt || data.textContent.slice(0, 10000);\n\n // Apply placeholder replacements\n const processedPrompt = applyPlaceholders(prompt, data, content);\n\n if (options?.schema) {\n // Use structured extraction\n const result = await extract(data, provider, options.schema, processedPrompt);\n return { custom: { [key]: result } };\n }\n\n // Simple string response\n const fullPrompt = prompt.includes('{{content}}')\n ? processedPrompt\n : `${processedPrompt}\\n\\nTitle: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n const response = await provider.complete(fullPrompt);\n return { custom: { [key]: response } };\n}\n\n/**\n * Apply placeholder replacements to a prompt template\n */\nfunction applyPlaceholders(prompt: string, data: ScrapedData, content: string): string {\n const domain = (() => {\n try {\n return new URL(data.url).hostname;\n } catch {\n return '';\n }\n })();\n\n return prompt\n .replace(/\\{\\{title\\}\\}/g, data.title)\n .replace(/\\{\\{url\\}\\}/g, data.url)\n .replace(/\\{\\{content\\}\\}/g, content)\n .replace(/\\{\\{description\\}\\}/g, data.description || '')\n .replace(/\\{\\{excerpt\\}\\}/g, data.excerpt || '')\n .replace(/\\{\\{domain\\}\\}/g, domain);\n}\n\n/**\n * Extract structured data using LLM and a custom schema\n */\nexport async function extract<T>(\n data: ScrapedData,\n provider: LLMProvider,\n schema: ExtractionSchema,\n promptTemplate?: string\n): Promise<T> {\n // Convert simple schema to Zod schema\n const zodShape: Record<string, z.ZodTypeAny> = {};\n\n for (const [key, type] of Object.entries(schema)) {\n const isOptional = type.endsWith('?');\n const baseType = isOptional ? type.slice(0, -1) : type;\n\n let zodType: z.ZodTypeAny;\n switch (baseType) {\n case 'string':\n zodType = z.string();\n break;\n case 'number':\n zodType = z.number();\n break;\n case 'boolean':\n zodType = z.boolean();\n break;\n case 'string[]':\n zodType = z.array(z.string());\n break;\n case 'number[]':\n zodType = z.array(z.number());\n break;\n default:\n zodType = z.string();\n }\n\n zodShape[key] = isOptional ? zodType.optional() : zodType;\n }\n\n const zodSchema = z.object(zodShape) as unknown as z.ZodType<T>;\n\n const content = data.textContent.slice(0, 4000);\n\n let prompt: string;\n\n if (promptTemplate) {\n // Apply all placeholder replacements\n prompt = applyPlaceholders(promptTemplate, data, content);\n\n // If content wasn't included via placeholder, append it\n if (!promptTemplate.includes('{{content}}')) {\n prompt += `\\n\\nContext:\\n${content}`;\n }\n } else {\n prompt = `Extract the following information from this content:\n\nTitle: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}\n\nExtract these fields:\n${Object.entries(schema)\n .map(([key, type]) => `- ${key} (${type})`)\n .join('\\n')}`;\n }\n\n return provider.completeJSON<T>(prompt, zodSchema as z.ZodType<T>);\n}\n\n/**\n * Generate a summary of the content\n */\nasync function summarize(context: string, provider: LLMProvider): Promise<string> {\n const prompt = `Summarize the following content in 2-3 concise sentences:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, SummarySchema);\n return result.summary;\n}\n\n/**\n * Extract relevant tags/keywords\n */\nasync function extractTags(context: string, provider: LLMProvider): Promise<string[]> {\n const prompt = `Extract 5-10 relevant tags or keywords from the following content. Focus on technologies, concepts, and topics mentioned:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, TagsSchema);\n return result.tags;\n}\n\n/**\n * Extract named entities from content\n */\nasync function extractEntities(context: string, provider: LLMProvider): Promise<ExtractedEntities> {\n const prompt = `Extract named entities from the following content. Identify people, organizations, technologies, locations, and key concepts:\n\n${context}`;\n\n return provider.completeJSON(prompt, EntitiesSchema);\n}\n\n/**\n * Classify content type using LLM\n */\nasync function classify(\n context: string,\n provider: LLMProvider\n): Promise<{ contentType: string; confidence: number }> {\n const prompt = `Classify the following content into one of these categories:\n- article: Blog post, news article, essay\n- repo: Code repository, open source project\n- docs: Documentation, API reference, guides\n- package: npm/pip package page\n- video: Video content, YouTube\n- tool: Software tool, web application\n- product: Commercial product, e-commerce\n\n${context}`;\n\n return provider.completeJSON(prompt, ClassifySchema);\n}\n"],"mappings":";;;;;;;;;AAiBA,IAAa,cAAb,MAAa,oBAAoB,MAAM;CACrC,AAAgB;CAChB,AAAgB;CAEhB,YAAY,SAAiB,MAAuB,YAAqB,OAAe;AACtF,QAAM,SAAS,EAAE,OAAO,CAAC;AACzB,OAAK,OAAO;AACZ,OAAK,OAAO;AACZ,OAAK,aAAa;AAGlB,MAAI,MAAM,kBACR,OAAM,kBAAkB,MAAM,YAAY;;;;;CAO9C,OAAO,KAAK,OAAgB,OAAwB,gBAA6B;AAC/E,MAAI,iBAAiB,YACnB,QAAO;AAGT,MAAI,iBAAiB,MACnB,QAAO,IAAI,YAAY,MAAM,SAAS,MAAM,QAAW,MAAM;AAG/D,SAAO,IAAI,YAAY,OAAO,MAAM,EAAE,KAAK;;;;;CAM7C,cAAuB;AACrB,SAAO,KAAK,SAAS,kBAAkB,KAAK,SAAS;;;;;CAMvD,SAAkC;AAChC,SAAO;GACL,MAAM,KAAK;GACX,SAAS,KAAK;GACd,MAAM,KAAK;GACX,YAAY,KAAK;GACjB,OAAO,KAAK;GACb;;;;;;;;;;;;;ACvDL,SAAgB,uBAAuB,QAAiC;AACtE,KAAI,WAAW,OAAO,WAAW,IAC/B,QAAO;AAET,KAAI,WAAW,IACb,QAAO;AAET,KAAI,WAAW,IACb,QAAO;AAET,KAAI,WAAW,IACb,QAAO;AAET,KAAI,UAAU,IACZ,QAAO;AAET,QAAO;;;;;AAyBT,eAAsB,eAAe,UAAqC;AACxE,KAAI;EACF,MAAM,OAAO,MAAM,SAAS,MAAM;AAClC,MAAI;GACF,MAAM,OAAO,KAAK,MAAM,KAAK;AAE7B,OAAI,OAAO,KAAK,UAAU,YAAY,KAAK,UAAU,MAAM;IACzD,MAAM,QAAQ,KAAK;AACnB,WAAO,OAAO,MAAM,WAAW,MAAM,OAAO,KAAK,UAAU,MAAM,CAAC;;AAEpE,OAAI,OAAO,KAAK,UAAU,SACxB,QAAO,KAAK;AAEd,OAAI,OAAO,KAAK,YAAY,SAC1B,QAAO,KAAK;AAEd,OAAI,OAAO,KAAK,WAAW,SACzB,QAAO,KAAK;AAEd,UAAO;UACD;AACN,UAAO,QAAQ,QAAQ,SAAS,OAAO,GAAG,SAAS;;SAE/C;AACN,SAAO,QAAQ,SAAS,OAAO,GAAG,SAAS;;;;;;AAO/C,eAAsB,gBACpB,UACA,cACA,aACsB;CACtB,MAAM,OAAO,uBAAuB,SAAS,OAAO;CAEpD,IAAIA;AACJ,KAAI,YACF,KAAI;AAEF,YAAU,YADG,MAAM,SAAS,MAAM,CACP;SACrB;AACN,YAAU,MAAM,eAAe,SAAS;;KAG1C,WAAU,MAAM,eAAe,SAAS;AAG1C,QAAO,IAAI,YACT,GAAG,aAAa,cAAc,SAAS,OAAO,KAAK,WACnD,MACA,SAAS,OACV;;;;;;;;ACPH,MAAMC,gBAAuC;CAC3C,aAAa;CACb,WAAW;CACX,mBAAmB;CACnB,mBAAmB;EAAC;EAAK;EAAK;EAAK;EAAK;EAAK;EAAI;CAClD;;;;AAKD,MAAM,wBAAwB;CAC5B;CACA;CACA;CACA;CACA;CACA;CACA;CACD;;;;AAKD,SAAgB,iBACd,OACA,oBAA8B,cAAc,mBACnC;AACT,KAAI,iBAAiB,OAAO;EAE1B,MAAM,OAAQ,MAAgC;AAC9C,MAAI,QAAQ,sBAAsB,SAAS,KAAK,CAC9C,QAAO;AAIT,MAAI,gBAAgB,SAAS,OAAO,MAAM,eAAe,SACvD,QAAO,kBAAkB,SAAS,MAAM,WAAW;AAErD,MAAI,YAAY,SAAS,OAAO,MAAM,WAAW,SAC/C,QAAO,kBAAkB,SAAS,MAAM,OAAO;AAIjD,MAAI,UAAU,OAAO;GACnB,MAAM,UAAU,MAAM;AACtB,OAAI,YAAY,aAAa,YAAY,eACvC,QAAO;;EAKX,MAAM,UAAU,MAAM,QAAQ,aAAa;AAC3C,MACE,QAAQ,SAAS,UAAU,IAC3B,QAAQ,SAAS,aAAa,IAC9B,QAAQ,SAAS,oBAAoB,IACrC,QAAQ,SAAS,0BAA0B,CAE3C,QAAO;;AAIX,QAAO;;;;;AAMT,SAAgB,MAAM,IAA2B;AAC/C,QAAO,IAAI,SAAS,YAAY,WAAW,SAAS,GAAG,CAAC;;;;;AAM1D,eAAsB,UACpB,IACA,QACA,SAC0C;CAC1C,MAAM,cAAc,QAAQ,eAAe,cAAc;CACzD,MAAM,YAAY,QAAQ,aAAa,cAAc;CACrD,MAAM,aAAa,QAAQ,qBAAqB,cAAc;CAC9D,MAAM,oBAAoB,QAAQ,qBAAqB,cAAc;CAErE,IAAIC;AAEJ,MAAK,IAAI,UAAU,GAAG,WAAW,aAAa,UAC5C,KAAI;AAEF,SAAO;GAAE,QADM,MAAM,IAAI;GACR,UAAU;GAAS;UAC7B,OAAO;AACd,cAAY,iBAAiB,QAAQ,QAAQ,IAAI,MAAM,OAAO,MAAM,CAAC;AAGrE,MAAI,YAAY,eAAe,CAAC,iBAAiB,OAAO,kBAAkB,CACxE,OAAM;EAOR,MAAM,SAHQ,YAAY,eAAe,UAAU,MAG3B,KAAM,KAAK,QAAQ,GAAG;AAE9C,YAAU,SAAS,WAAW,OAAO;AAErC,QAAM,MAAM,OAAO;;AAIvB,OAAM,6BAAa,IAAI,MAAM,eAAe;;;;;AAU9C,eAAsB,YACpB,IACA,WACY;CACZ,MAAM,aAAa,IAAI,iBAAiB;CACxC,MAAM,YAAY,iBAAiB,WAAW,OAAO,EAAE,UAAU;AAEjE,KAAI;AACF,SAAO,MAAM,GAAG,WAAW,OAAO;WAC1B;AACR,eAAa,UAAU;;;;;;AA0C3B,MAAMC,0BAA0D;CAC9D,kBAAkB;CAClB,gBAAgB;CACjB;;;;AAKD,IAAa,mBAAb,cAAsC,MAAM;CAC1C,AAAS,gBAAgB;CAEzB,YAAY,SAAiB;AAC3B,QAAM,QAAQ;AACd,OAAK,OAAO;;;;;;;AAQhB,IAAa,iBAAb,MAA4B;CAC1B,AAAQ;CACR,AAAiB;CACjB,AAAiB;CAEjB,YAAY,QAA+B;AACzC,OAAK,mBAAmB,QAAQ,oBAAoB,wBAAwB;AAC5E,OAAK,iBAAiB,QAAQ,kBAAkB,wBAAwB;AACxE,OAAK,QAAQ;GACX,OAAO;GACP,UAAU;GACX;;;;;CAMH,SAAkB;AAChB,OAAK,aAAa;AAClB,SAAO,KAAK,MAAM,UAAU;;;;;CAM9B,WAAyB;AACvB,OAAK,aAAa;AAClB,SAAO,KAAK,MAAM;;;;;CAMpB,gBAAsB;AACpB,OAAK,MAAM,WAAW;AACtB,OAAK,MAAM,QAAQ;AACnB,OAAK,MAAM,kBAAkB;AAC7B,OAAK,MAAM,kBAAkB;;;;;CAM/B,gBAAsB;AACpB,OAAK,MAAM;AACX,OAAK,MAAM,kBAAkB,KAAK,KAAK;AAEvC,MAAI,KAAK,MAAM,YAAY,KAAK,kBAAkB;AAChD,QAAK,MAAM,QAAQ;AACnB,QAAK,MAAM,kBAAkB,KAAK,KAAK,GAAG,KAAK;;;;;;CAOnD,MAAM,QAAW,IAAkC;AACjD,MAAI,KAAK,QAAQ,CAIf,OAAM,IAAI,iBAAiB,4CAHP,KAAK,MAAM,kBAC3B,IAAI,KAAK,KAAK,MAAM,gBAAgB,CAAC,aAAa,GAClD,YACiF;AAGvF,MAAI;GACF,MAAM,SAAS,MAAM,IAAI;AACzB,QAAK,eAAe;AACpB,UAAO;WACA,OAAO;AACd,QAAK,eAAe;AACpB,SAAM;;;;;;CAOV,QAAc;AACZ,OAAK,QAAQ;GACX,OAAO;GACP,UAAU;GACX;;;;;CAMH,AAAQ,cAAoB;AAC1B,MACE,KAAK,MAAM,UAAU,UACrB,KAAK,MAAM,mBACX,KAAK,KAAK,IAAI,KAAK,MAAM,gBAEzB,MAAK,MAAM,QAAQ;;;;;;AAYzB,IAAa,cAAb,MAAyB;CACvB,AAAQ;CACR,AAAQ;CACR,AAAiB;CACjB,AAAiB;CAEjB,YAAY,QAAyB;EAEnC,MAAM,qBAAqB,OAAO,qBAAqB,MAAM;AAE7D,OAAK,YAAY,KAAK,IAAI,GAAG,KAAK,KAAK,oBAAoB,GAAG,CAAC;AAC/D,OAAK,aAAa;AAClB,OAAK,SAAS,KAAK;AACnB,OAAK,aAAa,KAAK,KAAK;;;;;CAM9B,aAAsB;AACpB,OAAK,QAAQ;AACb,SAAO,KAAK,UAAU;;;;;;CAOxB,WAAW,SAAS,GAAY;AAC9B,OAAK,QAAQ;AAEb,MAAI,KAAK,UAAU,QAAQ;AACzB,QAAK,UAAU;AACf,UAAO;;AAGT,SAAO;;;;;CAMT,MAAM,QAAQ,SAAS,GAAkB;AACvC,MAAI,KAAK,WAAW,OAAO,CACzB;AAIF,OAAK,QAAQ;EACb,MAAM,eAAe,SAAS,KAAK;EACnC,MAAM,SAAS,KAAK,KAAM,eAAe,KAAK,aAAc,IAAK;AAEjE,MAAI,SAAS,EACX,OAAM,MAAM,OAAO;AAIrB,SAAO,CAAC,KAAK,WAAW,OAAO,CAC7B,OAAM,MAAM,KAAK,KAAM,IAAI,KAAK,aAAc,IAAK,CAAC;;;;;CAOxD,cAAsB;AACpB,OAAK,QAAQ;AAEb,MAAI,KAAK,UAAU,EACjB,QAAO;AAGT,SAAO,KAAK,KAAM,IAAI,KAAK,aAAc,IAAK;;;;;CAMhD,AAAQ,SAAe;EACrB,MAAM,MAAM,KAAK,KAAK;EAEtB,MAAM,aADW,MAAM,KAAK,cAAc,MACd,KAAK;AAEjC,OAAK,SAAS,KAAK,IAAI,KAAK,WAAW,KAAK,SAAS,UAAU;AAC/D,OAAK,aAAa;;;;;;AAWtB,IAAa,YAAb,MAAuB;CACrB,AAAQ;CACR,AAAQ,UAA6B,EAAE;CAEvC,YAAY,SAAiB;AAC3B,OAAK,UAAU;;;;;CAMjB,MAAM,UAAyB;AAC7B,MAAI,KAAK,UAAU,GAAG;AACpB,QAAK;AACL;;AAGF,SAAO,IAAI,SAAe,YAAY;AACpC,QAAK,QAAQ,KAAK,QAAQ;IAC1B;;;;;CAMJ,UAAgB;EACd,MAAM,OAAO,KAAK,QAAQ,OAAO;AACjC,MAAI,KACF,OAAM;MAEN,MAAK;;;;;CAOT,MAAM,QAAW,IAAkC;AACjD,QAAM,KAAK,SAAS;AACpB,MAAI;AACF,UAAO,MAAM,IAAI;YACT;AACR,QAAK,SAAS;;;;;;;;;;;;;;;;AAqBpB,eAAsB,eACpB,IACA,QACA,OACA,WAG0C;CAC1C,MAAM,YAAY,QAAQ,aAAa;AAGvC,KAAI,OAAO,gBAAgB,QAAQ,CACjC,OAAM,IAAI,iBAAiB,0BAA0B;AAIvD,KAAI,OAAO,YACT,OAAM,MAAM,YAAY,SAAS;CAInC,MAAM,yBAAyB,YAAsD;EAEnF,MAAM,sBAAsB,YAAY,IAAI,UAAU;AAGtD,MAAI;GACF,MAAM,cAAc,MAAM,UAAU,eAAe,QAAQ,OAAO,WAAW,QAAQ;AACrF,UAAO,gBAAgB,eAAe;AACtC,UAAO;WACA,OAAO;AACd,UAAO,gBAAgB,eAAe;AACtC,SAAM;;;AAKV,KAAI,OAAO,UACT,QAAO,MAAM,UAAU,QAAQ,uBAAuB;AAGxD,QAAO,wBAAwB;;;;;;;;;;;;AC/fjC,MAAM,sBAAsB;CAC1B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;;;;AAKD,SAAgB,cAAc,UAA2B;AACvD,QAAO,oBAAoB,MAAM,YAAY,QAAQ,KAAK,SAAS,CAAC;;;;;AAMtE,SAAgB,YAAY,KAAa,UAA+B,EAAE,EAAO;CAC/E,MAAM,eAAe,QAAQ,gBAAgB;CAC7C,MAAM,eAAe,QAAQ,gBAAgB;CAE7C,IAAIC;AACJ,KAAI;AACF,WAAS,IAAI,IAAI,IAAI;SACf;AACN,QAAM,IAAI,YAAY,gBAAgB,OAAO,cAAc;;AAI7D,KAAI,gBAAgB,OAAO,aAAa,SACtC,OAAM,IAAI,YAAY,wBAAwB,OAAO,YAAY,mBAAmB;AAItF,KAAI,CAAC,gBAAgB,cAAc,OAAO,SAAS,CACjD,OAAM,IAAI,YACR,2CAA2C,OAAO,YAClD,mBACD;AAGH,QAAO;;;;;AAMT,eAAsB,mBACpB,KACA,UAA+B,EAAE,EAClB;CACf,MAAM,SAAS,YAAY,KAAK,QAAQ;CACxC,MAAM,aAAa,QAAQ,cAAc;CACzC,MAAM,eAAe,QAAQ,gBAAgB;AAE7C,KAAI,CAAC,cAAc,aACjB;CAGF,MAAM,OAAO,OAAO;AAGpB,wBAAS,KAAK,CACZ;AAIF,KAAI;EACF,MAAM,YAAY,MAAMC,kBAAI,OAAO,MAAM,EAAE,KAAK,MAAM,CAAC;AACvD,OAAK,MAAM,QAAQ,UACjB,KAAI,cAAc,KAAK,QAAQ,CAC7B,OAAM,IAAI,YACR,oCAAoC,KAAK,MAAM,KAAK,WACpD,mBACD;UAGE,OAAO;AACd,MAAI,iBAAiB,YACnB,OAAM;AAGR,QAAM,IAAI,YAAY,+BAA+B,KAAK,IAD1C,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM,CACA,IAAI,eAAe;;;;;;AA8B7F,IAAsB,mBAAtB,MAAyD;CACvD,AAAmB;CACnB,AAAmB;CACnB,AAAmB;CACnB,AAAmB;CAGnB,AAAmB;CACnB,AAAmB;CACnB,AAAmB;CACnB,AAAmB;CAGnB,AAAmB;CACnB,AAAmB;CACnB,AAAmB;CAGnB,AAAQ;CACR,AAAQ;CACR,AAAQ;CAER,YAAY,QAAgC;AAC1C,OAAK,UAAU,OAAO,QAAQ,QAAQ,OAAO,GAAG;AAChD,OAAK,QAAQ,OAAO;AACpB,OAAK,UAAU;GACb,gBAAgB;GAChB,GAAG,OAAO;GACX;AACD,OAAK,cAAc,OAAO;AAG1B,OAAK,eAAe,OAAO,gBAAgB;AAC3C,OAAK,eAAe,OAAO,gBAAgB;AAC3C,OAAK,aAAa,OAAO,cAAc;AACvC,OAAK,iBAAiB,OAAO,kBAAkB;AAG/C,OAAK,YAAY,OAAO,YAAY,aAAa;AACjD,OAAK,cAAc,OAAO,YAAY;AACtC,OAAK,cAAc,OAAO,YAAY,eAAe;EAErD,MAAM,cAAc,OAAO,YAAY;AAGvC,OAAK,iBACH,aAAa,mBACZ,OAAO,YAAY,iBAChB,IAAI,eAAe,OAAO,WAAW,eAAe,GACpD;AACN,OAAK,cACH,aAAa,gBACZ,OAAO,YAAY,YAAY,IAAI,YAAY,OAAO,WAAW,UAAU,GAAG;AACjF,OAAK,YAAY,aAAa,aAAa,IAAI,UAAU,KAAK,YAAY;AAG1E,cAAY,KAAK,SAAS;GACxB,cAAc,KAAK;GACnB,cAAc,KAAK;GACpB,CAAC;;;;;CAMJ,qBAAsC;AACpC,SAAO;GACL,gBAAgB,KAAK;GACrB,aAAa,KAAK;GAClB,WAAW,KAAK;GACjB;;;;;CAMH,MAAgB,MAAS,KAAa,UAAwB,EAAE,EAA2B;EACzF,MAAMC,kBAAuC;GAC3C,cAAc,KAAK;GACnB,cAAc,KAAK;GACnB,YAAY,KAAK;GACjB,gBAAgB,KAAK;GACtB;AAGD,QAAM,mBAAmB,KAAK,gBAAgB;AAG9C,MAAI,KAAK,gBAAgB,QAAQ,CAC/B,OAAM,IAAI,iBAAiB,qDAAqD;AAIlF,MAAI,KAAK,YACP,OAAM,KAAK,YAAY,SAAS;EAIlC,MAAM,UAAU,OAAO,WAAiD;GACtE,MAAM,iBAAiB,QAAQ,SAAS,YAAY,IAAI,CAAC,QAAQ,QAAQ,OAAO,CAAC,GAAG;GACpF,MAAM,WAAW,MAAM,MAAM,KAAK;IAChC,QAAQ,QAAQ,UAAU;IAC1B,SAAS;KAAE,GAAG,KAAK;KAAS,GAAG,QAAQ;KAAS;IAChD,MAAM,QAAQ,OAAO,KAAK,UAAU,QAAQ,KAAK,GAAG;IACpD,QAAQ;IACR,UAAU,KAAK,iBAAiB,WAAW;IAC5C,CAAC;AAGF,OAAI,KAAK,kBAAkB,SAAS,WAClC,OAAM,mBAAmB,SAAS,KAAK,gBAAgB;AAIzD,OAAI,CAAC,SAAS,GAEZ,OAAM,MAAM,gBACV,UACA,KAAK,YAAY,MACjB,KAAK,YACN;AAKH,UAAO;IACL,MAHY,MAAM,SAAS,MAAM;IAIjC,QAAQ,SAAS;IACjB,SAAS,SAAS;IACnB;;EAIH,MAAM,yBAAyB,YAAqC;AAClE,OAAI,CAAC,KAAK,UACR,OAAM,IAAI,YAAY,6BAA6B,mBAAmB;AAExE,UAAO,KAAK,UAAU,QAAQ,YAAY;IAExC,MAAM,mBAAmB,YAAqC;AAC5D,YAAO,aAAa,WAAW,QAAQ,OAAO,EAAE,KAAK,UAAU;;AAIjE,QAAI;KACF,IAAIC;AACJ,SAAI,KAAK,YAEP,WADoB,MAAM,UAAU,kBAAkB,KAAK,YAAY,EAClD;SAErB,UAAS,MAAM,kBAAkB;AAInC,UAAK,gBAAgB,eAAe;AACpC,YAAO;aACA,OAAO;AAEd,UAAK,gBAAgB,eAAe;AACpC,WAAM;;KAER;;AAGJ,SAAO,wBAAwB;;;;;;;;;AC3TnC,MAAa,gBAAgBC,MAAE,OAAO,EACpC,SAASA,MAAE,QAAQ,CAAC,SAAS,gDAAgD,EAC9E,CAAC;AAEF,MAAa,aAAaA,MAAE,OAAO,EACjC,MAAMA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,8BAA8B,EAClE,CAAC;AAEF,MAAa,iBAAiBA,MAAE,OAAO;CACrC,QAAQA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CACxD,eAAeA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,0BAA0B;CACtE,cAAcA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,gCAAgC;CAC3E,WAAWA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CAC3D,UAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,sBAAsB;CAC9D,CAAC;AAEF,MAAa,iBAAiBA,MAAE,OAAO;CACrC,aAAaA,MACV,KAAK;EAAC;EAAW;EAAQ;EAAQ;EAAW;EAAS;EAAQ;EAAW;EAAU,CAAC,CACnF,SAAS,sBAAsB;CAClC,YAAYA,MAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,uBAAuB;CACtE,CAAC;;;;;;;AC/DF,eAAsB,QACpB,MACA,UACA,OAC+B;CAC/B,MAAMC,UAAgC,EAAE;CAGxC,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAChE,MAAM,UAAU,UAAU,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAGvE,MAAMC,WAA4B,EAAE;AAEpC,KAAI,MAAM,SAAS,YAAY,CAC7B,UAAS,KACP,UAAU,SAAS,SAAS,CAAC,MAAM,YAAY;AAC7C,UAAQ,UAAU;GAClB,CACH;AAGH,KAAI,MAAM,SAAS,OAAO,CACxB,UAAS,KACP,YAAY,SAAS,SAAS,CAAC,MAAM,SAAS;AAC5C,UAAQ,gBAAgB;GACxB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,gBAAgB,SAAS,SAAS,CAAC,MAAM,aAAa;AACpD,UAAQ,WAAW;GACnB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,SAAS,SAAS,SAAS,CAAC,MAAM,mBAAmB;AACnD,MAAI,eAAe,aAAa,GAC9B,SAAQ,cAAc,eAAe;GAEvC,CACH;AAGH,OAAM,QAAQ,IAAI,SAAS;AAE3B,QAAO;;;;;;AAiBT,eAAsB,IACpB,MACA,UACA,QACA,SAC+B;CAC/B,MAAM,MAAM,SAAS,OAAO;CAC5B,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAGhE,MAAM,kBAAkB,kBAAkB,QAAQ,MAAM,QAAQ;AAEhE,KAAI,SAAS,QAAQ;EAEnB,MAAM,SAAS,MAAM,QAAQ,MAAM,UAAU,QAAQ,QAAQ,gBAAgB;AAC7E,SAAO,EAAE,QAAQ,GAAG,MAAM,QAAQ,EAAE;;CAItC,MAAM,aAAa,OAAO,SAAS,cAAc,GAC7C,kBACA,GAAG,gBAAgB,aAAa,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAEjF,MAAM,WAAW,MAAM,SAAS,SAAS,WAAW;AACpD,QAAO,EAAE,QAAQ,GAAG,MAAM,UAAU,EAAE;;;;;AAMxC,SAAS,kBAAkB,QAAgB,MAAmB,SAAyB;CACrF,MAAM,gBAAgB;AACpB,MAAI;AACF,UAAO,IAAI,IAAI,KAAK,IAAI,CAAC;UACnB;AACN,UAAO;;KAEP;AAEJ,QAAO,OACJ,QAAQ,kBAAkB,KAAK,MAAM,CACrC,QAAQ,gBAAgB,KAAK,IAAI,CACjC,QAAQ,oBAAoB,QAAQ,CACpC,QAAQ,wBAAwB,KAAK,eAAe,GAAG,CACvD,QAAQ,oBAAoB,KAAK,WAAW,GAAG,CAC/C,QAAQ,mBAAmB,OAAO;;;;;AAMvC,eAAsB,QACpB,MACA,UACA,QACA,gBACY;CAEZ,MAAMC,WAAyC,EAAE;AAEjD,MAAK,MAAM,CAAC,KAAK,SAAS,OAAO,QAAQ,OAAO,EAAE;EAChD,MAAM,aAAa,KAAK,SAAS,IAAI;EACrC,MAAM,WAAW,aAAa,KAAK,MAAM,GAAG,GAAG,GAAG;EAElD,IAAIC;AACJ,UAAQ,UAAR;GACE,KAAK;AACH,cAAUC,MAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAUA,MAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAUA,MAAE,SAAS;AACrB;GACF,KAAK;AACH,cAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC;AAC7B;GACF,KAAK;AACH,cAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC;AAC7B;GACF,QACE,WAAUA,MAAE,QAAQ;;AAGxB,WAAS,OAAO,aAAa,QAAQ,UAAU,GAAG;;CAGpD,MAAM,YAAYA,MAAE,OAAO,SAAS;CAEpC,MAAM,UAAU,KAAK,YAAY,MAAM,GAAG,IAAK;CAE/C,IAAIC;AAEJ,KAAI,gBAAgB;AAElB,WAAS,kBAAkB,gBAAgB,MAAM,QAAQ;AAGzD,MAAI,CAAC,eAAe,SAAS,cAAc,CACzC,WAAU,iBAAiB;OAG7B,UAAS;;SAEJ,KAAK,MAAM;OACb,KAAK,IAAI;;;EAGd,QAAQ;;;EAGR,OAAO,QAAQ,OAAO,CACrB,KAAK,CAAC,KAAK,UAAU,KAAK,IAAI,IAAI,KAAK,GAAG,CAC1C,KAAK,KAAK;AAGX,QAAO,SAAS,aAAgB,QAAQ,UAA0B;;;;;AAMpE,eAAe,UAAU,SAAiB,UAAwC;CAChF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,cAAc,EACnD;;;;;AAMhB,eAAe,YAAY,SAAiB,UAA0C;CACpF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,WAAW,EAChD;;;;;AAMhB,eAAe,gBAAgB,SAAiB,UAAmD;CACjG,MAAM,SAAS;;EAEf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe;;;;;AAMtD,eAAe,SACb,SACA,UACsD;CACtD,MAAM,SAAS;;;;;;;;;EASf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
//#region src/parsers/types.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Generic source parser interface.
|
|
4
|
+
* Parsers transform raw content into structured data with metadata.
|
|
5
|
+
*
|
|
6
|
+
* @template TData - The main data type (e.g., array of links)
|
|
7
|
+
* @template TMeta - Optional metadata type
|
|
8
|
+
*/
|
|
9
|
+
interface SourceParser<TData, TMeta = unknown> {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
/**
|
|
12
|
+
* Check if this parser can handle the given content
|
|
13
|
+
*/
|
|
14
|
+
canParse(content: string, url?: string): boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Parse the content and extract structured data
|
|
17
|
+
*/
|
|
18
|
+
parse(content: string, url?: string): ParserResult<TData, TMeta>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Result from a parser
|
|
22
|
+
*/
|
|
23
|
+
interface ParserResult<TData, TMeta = unknown> {
|
|
24
|
+
data: TData;
|
|
25
|
+
meta?: TMeta;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* RSS/Atom feed item
|
|
29
|
+
*/
|
|
30
|
+
interface FeedItem {
|
|
31
|
+
id: string;
|
|
32
|
+
title: string;
|
|
33
|
+
link: string;
|
|
34
|
+
description?: string;
|
|
35
|
+
content?: string;
|
|
36
|
+
author?: string;
|
|
37
|
+
publishedAt?: string;
|
|
38
|
+
rawPublishedAt?: string;
|
|
39
|
+
updatedAt?: string;
|
|
40
|
+
categories: string[];
|
|
41
|
+
enclosure?: FeedEnclosure;
|
|
42
|
+
customFields?: Record<string, string>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Media enclosure (podcasts, videos)
|
|
46
|
+
*/
|
|
47
|
+
interface FeedEnclosure {
|
|
48
|
+
url: string;
|
|
49
|
+
type?: string;
|
|
50
|
+
length?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Parsed feed structure
|
|
54
|
+
*/
|
|
55
|
+
interface ParsedFeed {
|
|
56
|
+
format: 'rss2' | 'rss1' | 'atom';
|
|
57
|
+
title: string;
|
|
58
|
+
description?: string;
|
|
59
|
+
link: string;
|
|
60
|
+
next?: string;
|
|
61
|
+
language?: string;
|
|
62
|
+
lastBuildDate?: string;
|
|
63
|
+
copyright?: string;
|
|
64
|
+
items: FeedItem[];
|
|
65
|
+
customFields?: Record<string, string>;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Feed metadata
|
|
69
|
+
*/
|
|
70
|
+
interface FeedMeta {
|
|
71
|
+
generator?: string;
|
|
72
|
+
ttl?: number;
|
|
73
|
+
image?: {
|
|
74
|
+
url: string;
|
|
75
|
+
title?: string;
|
|
76
|
+
link?: string;
|
|
77
|
+
};
|
|
78
|
+
categories?: string[];
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Markdown link extracted from content
|
|
82
|
+
*/
|
|
83
|
+
interface MarkdownLink {
|
|
84
|
+
url: string;
|
|
85
|
+
text: string;
|
|
86
|
+
title?: string;
|
|
87
|
+
context?: string;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Markdown section (heading + content)
|
|
91
|
+
*/
|
|
92
|
+
interface MarkdownSection {
|
|
93
|
+
level: number;
|
|
94
|
+
title: string;
|
|
95
|
+
content: string;
|
|
96
|
+
links: MarkdownLink[];
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Parsed markdown structure
|
|
100
|
+
*/
|
|
101
|
+
interface ParsedMarkdown {
|
|
102
|
+
title?: string;
|
|
103
|
+
description?: string;
|
|
104
|
+
sections: MarkdownSection[];
|
|
105
|
+
links: MarkdownLink[];
|
|
106
|
+
codeBlocks: CodeBlock[];
|
|
107
|
+
frontmatter?: Record<string, unknown>;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Code block from markdown
|
|
111
|
+
*/
|
|
112
|
+
interface CodeBlock {
|
|
113
|
+
language?: string;
|
|
114
|
+
code: string;
|
|
115
|
+
meta?: string;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* GitHub repository metadata
|
|
119
|
+
*/
|
|
120
|
+
interface GitHubMeta {
|
|
121
|
+
repoOwner?: string;
|
|
122
|
+
repoName?: string;
|
|
123
|
+
stars?: number;
|
|
124
|
+
lastUpdated?: string;
|
|
125
|
+
}
|
|
126
|
+
//#endregion
|
|
127
|
+
//#region src/parsers/github.d.ts
|
|
128
|
+
/**
|
|
129
|
+
* GitHub-specific utilities for parsing repositories.
|
|
130
|
+
*/
|
|
131
|
+
/**
|
|
132
|
+
* Check if a URL is a GitHub repository
|
|
133
|
+
*/
|
|
134
|
+
declare function isGitHubRepo(url: string): boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Extract GitHub repo info from URL
|
|
137
|
+
*/
|
|
138
|
+
declare function parseGitHubUrl(url: string): {
|
|
139
|
+
owner: string;
|
|
140
|
+
repo: string;
|
|
141
|
+
} | null;
|
|
142
|
+
/**
|
|
143
|
+
* Convert a GitHub repo URL to raw content URL
|
|
144
|
+
*/
|
|
145
|
+
declare function toRawUrl(url: string, branch?: string, file?: string): string;
|
|
146
|
+
/**
|
|
147
|
+
* Fetch GitHub API metadata for a repository
|
|
148
|
+
* Note: This is a placeholder - actual implementation would need GitHub API access
|
|
149
|
+
*/
|
|
150
|
+
declare function fetchRepoMeta(owner: string, repo: string, _token?: string): Promise<GitHubMeta>;
|
|
151
|
+
/**
|
|
152
|
+
* Group links by their category/section
|
|
153
|
+
*/
|
|
154
|
+
declare function groupByCategory(links: MarkdownLink[]): Map<string, MarkdownLink[]>;
|
|
155
|
+
//#endregion
|
|
156
|
+
//#region src/parsers/markdown.d.ts
|
|
157
|
+
/**
|
|
158
|
+
* Generic Markdown parser.
|
|
159
|
+
* Extracts structure, links, and code blocks from markdown content.
|
|
160
|
+
*
|
|
161
|
+
* @example
|
|
162
|
+
* ```ts
|
|
163
|
+
* const parser = new MarkdownParser();
|
|
164
|
+
* const result = parser.parse(markdownContent);
|
|
165
|
+
* console.log(result.data.sections);
|
|
166
|
+
* console.log(result.data.links);
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
declare class MarkdownParser implements SourceParser<ParsedMarkdown> {
|
|
170
|
+
readonly name = "markdown";
|
|
171
|
+
canParse(content: string): boolean;
|
|
172
|
+
parse(content: string): ParserResult<ParsedMarkdown>;
|
|
173
|
+
private parseFrontmatter;
|
|
174
|
+
private extractDescription;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Extract links from a list-based markdown structure (like awesome lists)
|
|
178
|
+
*/
|
|
179
|
+
declare function extractListLinks(markdown: string): MarkdownLink[];
|
|
180
|
+
/**
|
|
181
|
+
* Parse markdown into sections by heading level
|
|
182
|
+
*/
|
|
183
|
+
declare function parseByHeadings(markdown: string, minLevel?: number): MarkdownSection[];
|
|
184
|
+
//#endregion
|
|
185
|
+
//#region src/parsers/rss.d.ts
|
|
186
|
+
interface RSSParserOptions {
|
|
187
|
+
/**
|
|
188
|
+
* Map of custom field names to CSS selectors or XML tag names.
|
|
189
|
+
* Useful for extracting podcast/media namespace fields.
|
|
190
|
+
* @example { duration: "itunes\:duration", rating: "media\:rating" }
|
|
191
|
+
*/
|
|
192
|
+
customFields?: Record<string, string>;
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* RSS/Atom feed parser.
|
|
196
|
+
* Supports RSS 2.0, RSS 1.0 (RDF), and Atom 1.0 formats.
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```ts
|
|
200
|
+
* import { RSSParser } from 'scrapex/parsers';
|
|
201
|
+
*
|
|
202
|
+
* const parser = new RSSParser();
|
|
203
|
+
* const result = parser.parse(feedXml, 'https://example.com/feed.xml');
|
|
204
|
+
*
|
|
205
|
+
* console.log(result.data.title);
|
|
206
|
+
* console.log(result.data.items);
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
declare class RSSParser implements SourceParser<ParsedFeed, FeedMeta> {
|
|
210
|
+
readonly name = "rss";
|
|
211
|
+
private customFields;
|
|
212
|
+
constructor(options?: RSSParserOptions);
|
|
213
|
+
canParse(content: string): boolean;
|
|
214
|
+
parse(content: string, url?: string): ParserResult<ParsedFeed, FeedMeta>;
|
|
215
|
+
private parseRSS2;
|
|
216
|
+
private parseAtom;
|
|
217
|
+
private parseRSS1;
|
|
218
|
+
/**
|
|
219
|
+
* Extract custom fields from an element using configured selectors.
|
|
220
|
+
*/
|
|
221
|
+
private extractCustomFields;
|
|
222
|
+
/**
|
|
223
|
+
* Parse date string to ISO 8601 format.
|
|
224
|
+
* Returns undefined if parsing fails (never returns raw strings).
|
|
225
|
+
*/
|
|
226
|
+
private parseDate;
|
|
227
|
+
/**
|
|
228
|
+
* Parse element text content, returning undefined if empty.
|
|
229
|
+
*/
|
|
230
|
+
private parseText;
|
|
231
|
+
/**
|
|
232
|
+
* Parse content:encoded, stripping HTML tags from CDATA content.
|
|
233
|
+
*/
|
|
234
|
+
private parseContentEncoded;
|
|
235
|
+
/**
|
|
236
|
+
* Parse categories/tags, filtering out empty strings.
|
|
237
|
+
*/
|
|
238
|
+
private parseCategories;
|
|
239
|
+
/**
|
|
240
|
+
* Resolve a URL against a base URL.
|
|
241
|
+
* Only allows https scheme; all other schemes are rejected.
|
|
242
|
+
*/
|
|
243
|
+
private resolveUrl;
|
|
244
|
+
/**
|
|
245
|
+
* Resolve link with fallback to id/guid if primary link is empty and id is a URL.
|
|
246
|
+
* Only allows https scheme; all other schemes are rejected.
|
|
247
|
+
*/
|
|
248
|
+
private resolveLink;
|
|
249
|
+
/**
|
|
250
|
+
* Parse enclosure element with URL resolution.
|
|
251
|
+
*/
|
|
252
|
+
private parseEnclosure;
|
|
253
|
+
/**
|
|
254
|
+
* Parse RSS image element with URL resolution.
|
|
255
|
+
*/
|
|
256
|
+
private parseImage;
|
|
257
|
+
/**
|
|
258
|
+
* Parse Atom logo/icon element with URL resolution.
|
|
259
|
+
*/
|
|
260
|
+
private parseAtomImage;
|
|
261
|
+
/**
|
|
262
|
+
* Parse a string to number, returning undefined if invalid.
|
|
263
|
+
*/
|
|
264
|
+
private parseNumber;
|
|
265
|
+
}
|
|
266
|
+
//#endregion
|
|
267
|
+
export { MarkdownSection as _, parseByHeadings as a, ParserResult as b, isGitHubRepo as c, CodeBlock as d, FeedEnclosure as f, MarkdownLink as g, GitHubMeta as h, extractListLinks as i, parseGitHubUrl as l, FeedMeta as m, RSSParserOptions as n, fetchRepoMeta as o, FeedItem as p, MarkdownParser as r, groupByCategory as s, RSSParser as t, toRawUrl as u, ParsedFeed as v, SourceParser as x, ParsedMarkdown as y };
|
|
268
|
+
//# sourceMappingURL=index-CDgcRnig.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index-CDgcRnig.d.cts","names":[],"sources":["../src/parsers/types.ts","../src/parsers/github.ts","../src/parsers/markdown.ts","../src/parsers/rss.ts"],"sourcesContent":[],"mappings":";;AAOA;;;;;AAiBA;AAQiB,UAzBA,YAoCH,CAAA,KAAA,EAAA,QACG,OAAM,CAAA,CAAA;EAMN,SAAA,IAAA,EAAA,MAAa;EASb;AAgBjB;AAcA;EAUiB,QAAA,CAAA,OAAA,EAAe,MAAA,EAAA,GAIvB,CAAA,EAAA,MAAA,CAAA,EAAA,OAAY;EAMJ;;;EAKH,KAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,CAAA,EAAA,MAAA,CAAA,EAhG0B,YAgG1B,CAhGuC,KAgGvC,EAhG8C,KAgG9C,CAAA;;;AAOd;AASA;UA1GiB;QACT;SACC;ACjBT;AAOA;AAYA;AAUA;AAgBgB,UDtBC,QAAA,CCsBc;EAAQ,EAAA,EAAA,MAAA;EAA6B,KAAA,EAAA,MAAA;EAAZ,IAAA,EAAA,MAAA;EAAG,WAAA,CAAA,EAAA,MAAA;;;;EC7B9C,cAAA,CAAA,EAAe,MAAA;EAAwB,SAAA,CAAA,EAAA,MAAA;EAcb,UAAA,EAAA,MAAA,EAAA;EAAb,SAAA,CAAA,EFIZ,aEJY;EAda,YAAA,CAAA,EFmBtB,MEnBsB,CAAA,MAAA,EAAA,MAAA,CAAA;;AAqJvC;AA+BA;;UF3JiB,aAAA;;EGvCA,IAAA,CAAA,EAAA,MAAA;EAwBJ,MAAA,CAAA,EAAA,MAAU;;;;;AAa0C,UHWhD,UAAA,CGXgD;EAAzB,MAAA,EAAA,MAAA,GAAA,MAAA,GAAA,MAAA;EAbN,KAAA,EAAA,MAAA;EAAY,WAAA,CAAA,EAAA,MAAA;;;;;;SHiCrC;iBACQ;;;;;UAMA,QAAA;;;;;;;;;;;;;UAcA,YAAA;;;;;;;;;UAUA,eAAA;;;;SAIR;;;;;UAMQ,cAAA;;;YAGL;SACH;cACK;gBACE;;;;;UAMC,SAAA;;;;;;;;UASA,UAAA;;;;;;;;AA3HjB;;;;;AAiBA;AAQiB,iBCvBD,YAAA,CDkCF,GAAA,EACG,MAAA,CAAM,EAAA,OAAA;AAMvB;AASA;AAgBA;AAciB,iBCzED,cAAA,CDyEa,GAAA,EAAA,MAAA,CAAA,EAAA;EAUZ,KAAA,EAAA,MAAA;EAUA,IAAA,EAAA,MAAA;CAGL,GAAA,IAAA;;;;AAGU,iBCvFN,QAAA,CDuFM,GAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EAAA,MAAA,EAAA,IAAA,CAAA,EAAA,MAAA,CAAA,EAAA,MAAA;AAMtB;AASA;;;iBC5FsB,aAAA,gDAInB,QAAQ;AAjCX;AAOA;AAYA;AAUsB,iBAgBN,eAAA,CAZL,KAAR,EAYoC,YAZ7B,EAAA,CAAA,EAY8C,GAZ9C,CAAA,MAAA,EAY0D,YAZ1D,EAAA,CAAA;;;ADnCV;;;;;AAiBA;AAQA;AAkBA;AASA;AAgBA;AAcA;AAUA;AAUiB,cEpFJ,cAAA,YAA0B,YFoFR,CEpFqB,cFoFrB,CAAA,CAAA;EAGnB,SAAA,IAAA,GAAA,UAAA;EACH,QAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EAAA,OAAA;EACK,KAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EE3EY,YF2EZ,CE3EyB,cF2EzB,CAAA;EACE,QAAA,gBAAA;EAAM,QAAA,kBAAA;AAMtB;AASA;;;iBE4CgB,gBAAA,oBAAoC;ADrKpD;AAOA;AAYA;AAUsB,iBCuKN,eAAA,CDnKL,QAAR,EAAA,MAAO,EAAA,QAAA,CAAA,EAAA,MAAA,CAAA,ECmKuD,eDnKvD,EAAA;;;ADnCO,UGIA,gBAAA,CHJY;EAWwB;;;;AAMrD;EAQiB,YAAQ,CAAA,EGfR,MHeQ,CAAA,MAWX,EAAA,MAAA,CAAA;AAOd;AASA;AAgBA;AAcA;AAUA;AAUA;;;;;;AAYA;AASA;;;;ACzHgB,cE0BH,SAAA,YAAqB,YF1BN,CE0BmB,UF1BnB,EE0B+B,QF1B/B,CAAA,CAAA;EAOZ,SAAA,IAAA,GAAA,KAAc;EAYd,QAAA,YAAQ;EAUF,WAAA,CAAA,OAIX,CAJwB,EECX,gBFGb;EAYK,QAAA,CAAA,OAAA,EAAe,MAAA,CAAA,EAAA,OAAA;EAAQ,KAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,CAAA,EAAA,MAAA,CAAA,EENC,YFMD,CENc,UFMd,EEN0B,QFM1B,CAAA;EAA6B,QAAA,SAAA;EAAZ,QAAA,SAAA;EAAG,QAAA,SAAA;;;;EC7B9C,QAAA,mBAAe;EAAwB;;;;EAAD,QAAA,SAAA;EAqJnC;AA+BhB;;;;AClMA;AAwBA;EAA+C,QAAA,mBAAA;EAAY;;;EAaM,QAAA,eAAA;EAAzB"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
//#region src/parsers/types.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Generic source parser interface.
|
|
4
|
+
* Parsers transform raw content into structured data with metadata.
|
|
5
|
+
*
|
|
6
|
+
* @template TData - The main data type (e.g., array of links)
|
|
7
|
+
* @template TMeta - Optional metadata type
|
|
8
|
+
*/
|
|
9
|
+
interface SourceParser<TData, TMeta = unknown> {
|
|
10
|
+
readonly name: string;
|
|
11
|
+
/**
|
|
12
|
+
* Check if this parser can handle the given content
|
|
13
|
+
*/
|
|
14
|
+
canParse(content: string, url?: string): boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Parse the content and extract structured data
|
|
17
|
+
*/
|
|
18
|
+
parse(content: string, url?: string): ParserResult<TData, TMeta>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Result from a parser
|
|
22
|
+
*/
|
|
23
|
+
interface ParserResult<TData, TMeta = unknown> {
|
|
24
|
+
data: TData;
|
|
25
|
+
meta?: TMeta;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* RSS/Atom feed item
|
|
29
|
+
*/
|
|
30
|
+
interface FeedItem {
|
|
31
|
+
id: string;
|
|
32
|
+
title: string;
|
|
33
|
+
link: string;
|
|
34
|
+
description?: string;
|
|
35
|
+
content?: string;
|
|
36
|
+
author?: string;
|
|
37
|
+
publishedAt?: string;
|
|
38
|
+
rawPublishedAt?: string;
|
|
39
|
+
updatedAt?: string;
|
|
40
|
+
categories: string[];
|
|
41
|
+
enclosure?: FeedEnclosure;
|
|
42
|
+
customFields?: Record<string, string>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Media enclosure (podcasts, videos)
|
|
46
|
+
*/
|
|
47
|
+
interface FeedEnclosure {
|
|
48
|
+
url: string;
|
|
49
|
+
type?: string;
|
|
50
|
+
length?: number;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Parsed feed structure
|
|
54
|
+
*/
|
|
55
|
+
interface ParsedFeed {
|
|
56
|
+
format: 'rss2' | 'rss1' | 'atom';
|
|
57
|
+
title: string;
|
|
58
|
+
description?: string;
|
|
59
|
+
link: string;
|
|
60
|
+
next?: string;
|
|
61
|
+
language?: string;
|
|
62
|
+
lastBuildDate?: string;
|
|
63
|
+
copyright?: string;
|
|
64
|
+
items: FeedItem[];
|
|
65
|
+
customFields?: Record<string, string>;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Feed metadata
|
|
69
|
+
*/
|
|
70
|
+
interface FeedMeta {
|
|
71
|
+
generator?: string;
|
|
72
|
+
ttl?: number;
|
|
73
|
+
image?: {
|
|
74
|
+
url: string;
|
|
75
|
+
title?: string;
|
|
76
|
+
link?: string;
|
|
77
|
+
};
|
|
78
|
+
categories?: string[];
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Markdown link extracted from content
|
|
82
|
+
*/
|
|
83
|
+
interface MarkdownLink {
|
|
84
|
+
url: string;
|
|
85
|
+
text: string;
|
|
86
|
+
title?: string;
|
|
87
|
+
context?: string;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Markdown section (heading + content)
|
|
91
|
+
*/
|
|
92
|
+
interface MarkdownSection {
|
|
93
|
+
level: number;
|
|
94
|
+
title: string;
|
|
95
|
+
content: string;
|
|
96
|
+
links: MarkdownLink[];
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Parsed markdown structure
|
|
100
|
+
*/
|
|
101
|
+
interface ParsedMarkdown {
|
|
102
|
+
title?: string;
|
|
103
|
+
description?: string;
|
|
104
|
+
sections: MarkdownSection[];
|
|
105
|
+
links: MarkdownLink[];
|
|
106
|
+
codeBlocks: CodeBlock[];
|
|
107
|
+
frontmatter?: Record<string, unknown>;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Code block from markdown
|
|
111
|
+
*/
|
|
112
|
+
interface CodeBlock {
|
|
113
|
+
language?: string;
|
|
114
|
+
code: string;
|
|
115
|
+
meta?: string;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* GitHub repository metadata
|
|
119
|
+
*/
|
|
120
|
+
interface GitHubMeta {
|
|
121
|
+
repoOwner?: string;
|
|
122
|
+
repoName?: string;
|
|
123
|
+
stars?: number;
|
|
124
|
+
lastUpdated?: string;
|
|
125
|
+
}
|
|
126
|
+
//#endregion
|
|
127
|
+
//#region src/parsers/github.d.ts
|
|
128
|
+
/**
|
|
129
|
+
* GitHub-specific utilities for parsing repositories.
|
|
130
|
+
*/
|
|
131
|
+
/**
|
|
132
|
+
* Check if a URL is a GitHub repository
|
|
133
|
+
*/
|
|
134
|
+
declare function isGitHubRepo(url: string): boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Extract GitHub repo info from URL
|
|
137
|
+
*/
|
|
138
|
+
declare function parseGitHubUrl(url: string): {
|
|
139
|
+
owner: string;
|
|
140
|
+
repo: string;
|
|
141
|
+
} | null;
|
|
142
|
+
/**
|
|
143
|
+
* Convert a GitHub repo URL to raw content URL
|
|
144
|
+
*/
|
|
145
|
+
declare function toRawUrl(url: string, branch?: string, file?: string): string;
|
|
146
|
+
/**
|
|
147
|
+
* Fetch GitHub API metadata for a repository
|
|
148
|
+
* Note: This is a placeholder - actual implementation would need GitHub API access
|
|
149
|
+
*/
|
|
150
|
+
declare function fetchRepoMeta(owner: string, repo: string, _token?: string): Promise<GitHubMeta>;
|
|
151
|
+
/**
|
|
152
|
+
* Group links by their category/section
|
|
153
|
+
*/
|
|
154
|
+
declare function groupByCategory(links: MarkdownLink[]): Map<string, MarkdownLink[]>;
|
|
155
|
+
//#endregion
|
|
156
|
+
//#region src/parsers/markdown.d.ts
|
|
157
|
+
/**
|
|
158
|
+
* Generic Markdown parser.
|
|
159
|
+
* Extracts structure, links, and code blocks from markdown content.
|
|
160
|
+
*
|
|
161
|
+
* @example
|
|
162
|
+
* ```ts
|
|
163
|
+
* const parser = new MarkdownParser();
|
|
164
|
+
* const result = parser.parse(markdownContent);
|
|
165
|
+
* console.log(result.data.sections);
|
|
166
|
+
* console.log(result.data.links);
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
declare class MarkdownParser implements SourceParser<ParsedMarkdown> {
|
|
170
|
+
readonly name = "markdown";
|
|
171
|
+
canParse(content: string): boolean;
|
|
172
|
+
parse(content: string): ParserResult<ParsedMarkdown>;
|
|
173
|
+
private parseFrontmatter;
|
|
174
|
+
private extractDescription;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Extract links from a list-based markdown structure (like awesome lists)
|
|
178
|
+
*/
|
|
179
|
+
declare function extractListLinks(markdown: string): MarkdownLink[];
|
|
180
|
+
/**
|
|
181
|
+
* Parse markdown into sections by heading level
|
|
182
|
+
*/
|
|
183
|
+
declare function parseByHeadings(markdown: string, minLevel?: number): MarkdownSection[];
|
|
184
|
+
//#endregion
|
|
185
|
+
//#region src/parsers/rss.d.ts
|
|
186
|
+
interface RSSParserOptions {
|
|
187
|
+
/**
|
|
188
|
+
* Map of custom field names to CSS selectors or XML tag names.
|
|
189
|
+
* Useful for extracting podcast/media namespace fields.
|
|
190
|
+
* @example { duration: "itunes\:duration", rating: "media\:rating" }
|
|
191
|
+
*/
|
|
192
|
+
customFields?: Record<string, string>;
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* RSS/Atom feed parser.
|
|
196
|
+
* Supports RSS 2.0, RSS 1.0 (RDF), and Atom 1.0 formats.
|
|
197
|
+
*
|
|
198
|
+
* @example
|
|
199
|
+
* ```ts
|
|
200
|
+
* import { RSSParser } from 'scrapex/parsers';
|
|
201
|
+
*
|
|
202
|
+
* const parser = new RSSParser();
|
|
203
|
+
* const result = parser.parse(feedXml, 'https://example.com/feed.xml');
|
|
204
|
+
*
|
|
205
|
+
* console.log(result.data.title);
|
|
206
|
+
* console.log(result.data.items);
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
declare class RSSParser implements SourceParser<ParsedFeed, FeedMeta> {
|
|
210
|
+
readonly name = "rss";
|
|
211
|
+
private customFields;
|
|
212
|
+
constructor(options?: RSSParserOptions);
|
|
213
|
+
canParse(content: string): boolean;
|
|
214
|
+
parse(content: string, url?: string): ParserResult<ParsedFeed, FeedMeta>;
|
|
215
|
+
private parseRSS2;
|
|
216
|
+
private parseAtom;
|
|
217
|
+
private parseRSS1;
|
|
218
|
+
/**
|
|
219
|
+
* Extract custom fields from an element using configured selectors.
|
|
220
|
+
*/
|
|
221
|
+
private extractCustomFields;
|
|
222
|
+
/**
|
|
223
|
+
* Parse date string to ISO 8601 format.
|
|
224
|
+
* Returns undefined if parsing fails (never returns raw strings).
|
|
225
|
+
*/
|
|
226
|
+
private parseDate;
|
|
227
|
+
/**
|
|
228
|
+
* Parse element text content, returning undefined if empty.
|
|
229
|
+
*/
|
|
230
|
+
private parseText;
|
|
231
|
+
/**
|
|
232
|
+
* Parse content:encoded, stripping HTML tags from CDATA content.
|
|
233
|
+
*/
|
|
234
|
+
private parseContentEncoded;
|
|
235
|
+
/**
|
|
236
|
+
* Parse categories/tags, filtering out empty strings.
|
|
237
|
+
*/
|
|
238
|
+
private parseCategories;
|
|
239
|
+
/**
|
|
240
|
+
* Resolve a URL against a base URL.
|
|
241
|
+
* Only allows https scheme; all other schemes are rejected.
|
|
242
|
+
*/
|
|
243
|
+
private resolveUrl;
|
|
244
|
+
/**
|
|
245
|
+
* Resolve link with fallback to id/guid if primary link is empty and id is a URL.
|
|
246
|
+
* Only allows https scheme; all other schemes are rejected.
|
|
247
|
+
*/
|
|
248
|
+
private resolveLink;
|
|
249
|
+
/**
|
|
250
|
+
* Parse enclosure element with URL resolution.
|
|
251
|
+
*/
|
|
252
|
+
private parseEnclosure;
|
|
253
|
+
/**
|
|
254
|
+
* Parse RSS image element with URL resolution.
|
|
255
|
+
*/
|
|
256
|
+
private parseImage;
|
|
257
|
+
/**
|
|
258
|
+
* Parse Atom logo/icon element with URL resolution.
|
|
259
|
+
*/
|
|
260
|
+
private parseAtomImage;
|
|
261
|
+
/**
|
|
262
|
+
* Parse a string to number, returning undefined if invalid.
|
|
263
|
+
*/
|
|
264
|
+
private parseNumber;
|
|
265
|
+
}
|
|
266
|
+
//#endregion
|
|
267
|
+
export { MarkdownSection as _, parseByHeadings as a, ParserResult as b, isGitHubRepo as c, CodeBlock as d, FeedEnclosure as f, MarkdownLink as g, GitHubMeta as h, extractListLinks as i, parseGitHubUrl as l, FeedMeta as m, RSSParserOptions as n, fetchRepoMeta as o, FeedItem as p, MarkdownParser as r, groupByCategory as s, RSSParser as t, toRawUrl as u, ParsedFeed as v, SourceParser as x, ParsedMarkdown as y };
|
|
268
|
+
//# sourceMappingURL=index-piS5wtki.d.mts.map
|