@jambudipa/spider 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -16
- package/dist/browser/BrowserManager.d.ts +63 -0
- package/dist/browser/BrowserManager.d.ts.map +1 -0
- package/dist/browser/PlaywrightAdapter.d.ts +166 -0
- package/dist/browser/PlaywrightAdapter.d.ts.map +1 -0
- package/dist/examples/01-basic-crawl-working.d.ts +13 -0
- package/dist/examples/01-basic-crawl-working.d.ts.map +1 -0
- package/dist/examples/02-multiple-urls-working.d.ts +13 -0
- package/dist/examples/02-multiple-urls-working.d.ts.map +1 -0
- package/dist/examples/03-url-filtering.d.ts +13 -0
- package/dist/examples/03-url-filtering.d.ts.map +1 -0
- package/dist/examples/04-robots-compliance.d.ts +14 -0
- package/dist/examples/04-robots-compliance.d.ts.map +1 -0
- package/dist/examples/05-link-extraction-selectors.d.ts +14 -0
- package/dist/examples/05-link-extraction-selectors.d.ts.map +1 -0
- package/dist/examples/06-custom-middleware.d.ts +18 -0
- package/dist/examples/06-custom-middleware.d.ts.map +1 -0
- package/dist/examples/07-resumability-demo.d.ts +14 -0
- package/dist/examples/07-resumability-demo.d.ts.map +1 -0
- package/dist/examples/08-worker-monitoring.d.ts +15 -0
- package/dist/examples/08-worker-monitoring.d.ts.map +1 -0
- package/dist/examples/09-error-handling-recovery.d.ts +15 -0
- package/dist/examples/09-error-handling-recovery.d.ts.map +1 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2891 -1456
- package/dist/index.js.map +1 -1
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
- package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
- package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
- package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
- package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
- package/dist/lib/HttpClient/index.d.ts +8 -0
- package/dist/lib/HttpClient/index.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/index.d.ts +37 -0
- package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
- package/dist/lib/Logging/FetchLogger.d.ts +24 -0
- package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
- package/dist/lib/Middleware/types.d.ts +99 -0
- package/dist/lib/Middleware/types.d.ts.map +1 -0
- package/dist/lib/PageData/PageData.d.ts +28 -0
- package/dist/lib/PageData/PageData.d.ts.map +1 -0
- package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
- package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/index.d.ts +51 -0
- package/dist/lib/Resumability/index.d.ts.map +1 -0
- package/dist/lib/Resumability/strategies.d.ts +76 -0
- package/dist/lib/Resumability/strategies.d.ts.map +1 -0
- package/dist/lib/Resumability/types.d.ts +201 -0
- package/dist/lib/Resumability/types.d.ts.map +1 -0
- package/dist/lib/Robots/Robots.service.d.ts +78 -0
- package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
- package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
- package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.service.d.ts +249 -0
- package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
- package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
- package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
- package/dist/lib/StateManager/index.d.ts +5 -0
- package/dist/lib/StateManager/index.d.ts.map +1 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +110 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
- package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
- package/dist/lib/api-facades.d.ts +313 -0
- package/dist/lib/api-facades.d.ts.map +1 -0
- package/dist/lib/errors/effect-errors.d.ts +179 -0
- package/dist/lib/errors/effect-errors.d.ts.map +1 -0
- package/dist/lib/errors.d.ts +172 -0
- package/dist/lib/errors.d.ts.map +1 -0
- package/dist/lib/utils/FileUtils.d.ts +284 -0
- package/dist/lib/utils/FileUtils.d.ts.map +1 -0
- package/dist/lib/utils/JsonUtils.d.ts +196 -0
- package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
- package/dist/lib/utils/RegexUtils.d.ts +257 -0
- package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
- package/dist/lib/utils/SchemaUtils.d.ts +251 -0
- package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
- package/dist/lib/utils/UrlUtils.d.ts +223 -0
- package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
- package/dist/lib/utils/effect-migration.d.ts +31 -0
- package/dist/lib/utils/effect-migration.d.ts.map +1 -0
- package/dist/lib/utils/index.d.ts +15 -0
- package/dist/lib/utils/index.d.ts.map +1 -0
- package/dist/lib/utils/url-deduplication.d.ts +108 -0
- package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
- package/dist/lib/utils/url-deduplication.test.d.ts +5 -0
- package/dist/lib/utils/url-deduplication.test.d.ts.map +1 -0
- package/dist/test/infrastructure/EffectTestUtils.d.ts +167 -0
- package/dist/test/infrastructure/EffectTestUtils.d.ts.map +1 -0
- package/package.json +21 -9
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Web Scraping Engine Service
|
|
3
|
+
* Orchestrates all scraping capabilities including authentication, token management, and session handling
|
|
4
|
+
*/
|
|
5
|
+
import { Context, DateTime, Effect, HashMap, Layer } from 'effect';
|
|
6
|
+
import { EnhancedHttpClient, type HttpResponse } from '../HttpClient/EnhancedHttpClient.js';
|
|
7
|
+
import { CookieManager } from '../HttpClient/CookieManager.js';
|
|
8
|
+
import { SessionStore, SessionError } from '../HttpClient/SessionStore.js';
|
|
9
|
+
import { TokenExtractor } from '../HttpClient/TokenExtractor.js';
|
|
10
|
+
import { StateManager, TokenType } from '../StateManager/StateManager.service.js';
|
|
11
|
+
import { SpiderLogger } from '../Logging/SpiderLogger.service.js';
|
|
12
|
+
import { NetworkError } from '../errors.js';
|
|
13
|
+
import { ParseError, TimeoutError } from '../errors/effect-errors.js';
|
|
14
|
+
import { JsonStringifyError } from '../utils/JsonUtils.js';
|
|
15
|
+
declare const LoginError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
16
|
+
readonly _tag: "LoginError";
|
|
17
|
+
} & Readonly<A>;
|
|
18
|
+
export declare class LoginError extends LoginError_base<{
|
|
19
|
+
readonly status: number;
|
|
20
|
+
readonly message: string;
|
|
21
|
+
}> {
|
|
22
|
+
}
|
|
23
|
+
declare const SessionNotValidError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
24
|
+
readonly _tag: "SessionNotValidError";
|
|
25
|
+
} & Readonly<A>;
|
|
26
|
+
export declare class SessionNotValidError extends SessionNotValidError_base<{
|
|
27
|
+
readonly message: string;
|
|
28
|
+
}> {
|
|
29
|
+
}
|
|
30
|
+
declare const SessionLoadError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
31
|
+
readonly _tag: "SessionLoadError";
|
|
32
|
+
} & Readonly<A>;
|
|
33
|
+
export declare class SessionLoadError extends SessionLoadError_base<{
|
|
34
|
+
readonly message: string;
|
|
35
|
+
}> {
|
|
36
|
+
}
|
|
37
|
+
export type WebScrapingEngineError = LoginError | SessionNotValidError | SessionLoadError;
|
|
38
|
+
/**
|
|
39
|
+
* Combined error types for HTTP operations
|
|
40
|
+
*/
|
|
41
|
+
export type HttpOperationError = NetworkError | ParseError | TimeoutError;
|
|
42
|
+
/**
|
|
43
|
+
* Combined error types for POST operations
|
|
44
|
+
*/
|
|
45
|
+
export type HttpPostOperationError = HttpOperationError | JsonStringifyError;
|
|
46
|
+
export interface LoginCredentials {
|
|
47
|
+
username: string;
|
|
48
|
+
password: string;
|
|
49
|
+
loginUrl: string;
|
|
50
|
+
usernameField?: string;
|
|
51
|
+
passwordField?: string;
|
|
52
|
+
additionalFields?: Record<string, string>;
|
|
53
|
+
}
|
|
54
|
+
export interface ScrapingSession {
|
|
55
|
+
id: string;
|
|
56
|
+
authenticated: boolean;
|
|
57
|
+
tokens: HashMap.HashMap<TokenType, string>;
|
|
58
|
+
startTime: DateTime.Utc;
|
|
59
|
+
}
|
|
60
|
+
export interface WebScrapingEngineService {
|
|
61
|
+
/**
|
|
62
|
+
* Perform login with form submission
|
|
63
|
+
*/
|
|
64
|
+
login: (_credentials: LoginCredentials) => Effect.Effect<ScrapingSession, HttpOperationError | SessionError | LoginError>;
|
|
65
|
+
/**
|
|
66
|
+
* Fetch authenticated content
|
|
67
|
+
*/
|
|
68
|
+
fetchAuthenticated: (_url: string) => Effect.Effect<HttpResponse, HttpOperationError | SessionNotValidError>;
|
|
69
|
+
/**
|
|
70
|
+
* Submit form with CSRF protection
|
|
71
|
+
*/
|
|
72
|
+
submitFormWithCSRF: (_url: string, _formData: Record<string, string>, _csrfUrl?: string) => Effect.Effect<HttpResponse, HttpOperationError>;
|
|
73
|
+
/**
|
|
74
|
+
* Make API request with token
|
|
75
|
+
*/
|
|
76
|
+
makeAPIRequest: (_url: string, _method?: 'GET' | 'POST' | 'PUT' | 'DELETE', _data?: Record<string, unknown>) => Effect.Effect<HttpResponse, HttpPostOperationError>;
|
|
77
|
+
/**
|
|
78
|
+
* Create and save a scraping session
|
|
79
|
+
*/
|
|
80
|
+
createSession: (_id?: string) => Effect.Effect<ScrapingSession>;
|
|
81
|
+
/**
|
|
82
|
+
* Load existing session
|
|
83
|
+
*/
|
|
84
|
+
loadSession: (_id: string) => Effect.Effect<ScrapingSession, SessionError | SessionLoadError>;
|
|
85
|
+
/**
|
|
86
|
+
* Export session for persistence
|
|
87
|
+
*/
|
|
88
|
+
exportSession: () => Effect.Effect<string, SessionError>;
|
|
89
|
+
/**
|
|
90
|
+
* Import session from persistence
|
|
91
|
+
*/
|
|
92
|
+
importSession: (_data: string) => Effect.Effect<void, SessionError>;
|
|
93
|
+
/**
|
|
94
|
+
* Clear all state and sessions
|
|
95
|
+
*/
|
|
96
|
+
clearAll: () => Effect.Effect<void>;
|
|
97
|
+
}
|
|
98
|
+
declare const WebScrapingEngine_base: Context.TagClass<WebScrapingEngine, "WebScrapingEngine", WebScrapingEngineService>;
|
|
99
|
+
export declare class WebScrapingEngine extends WebScrapingEngine_base {
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Create a WebScrapingEngine service implementation
|
|
103
|
+
*/
|
|
104
|
+
export declare const makeWebScrapingEngine: Effect.Effect<WebScrapingEngineService, never, SpiderLogger | CookieManager | EnhancedHttpClient | StateManager | SessionStore | TokenExtractor>;
|
|
105
|
+
/**
|
|
106
|
+
* WebScrapingEngine Layer with all dependencies
|
|
107
|
+
*/
|
|
108
|
+
export declare const WebScrapingEngineLive: Layer.Layer<WebScrapingEngine, never, SpiderLogger | CookieManager | EnhancedHttpClient | StateManager | SessionStore | TokenExtractor>;
|
|
109
|
+
export {};
|
|
110
|
+
//# sourceMappingURL=WebScrapingEngine.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"WebScrapingEngine.service.d.ts","sourceRoot":"","sources":["../../../src/lib/WebScrapingEngine/WebScrapingEngine.service.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAQ,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAU,MAAM,QAAQ,CAAC;AACjF,OAAO,EACL,kBAAkB,EAClB,KAAK,YAAY,EAClB,MAAM,qCAAqC,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAC3E,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACjE,OAAO,EACL,YAAY,EACZ,SAAS,EACV,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;;;;AAM3D,qBAAa,UAAW,SAAQ,gBAA+B;IAC7D,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;CAAG;;;;AAEL,qBAAa,oBAAqB,SAAQ,0BAAyC;IACjF,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;CAAG;;;;AAEL,qBAAa,gBAAiB,SAAQ,sBAAqC;IACzE,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;CAAG;AAEL,MAAM,MAAM,sBAAsB,GAAG,UAAU,GAAG,oBAAoB,GAAG,gBAAgB,CAAC;AAE1F;;GAEG;AACH,MAAM,MAAM,kBAAkB,GAAG,YAAY,GAAG,UAAU,GAAG,YAAY,CAAC;AAE1E;;GAEG;AACH,MAAM,MAAM,sBAAsB,GAAG,kBAAkB,GAAG,kBAAkB,CAAC;AAE7E,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC3C;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,aAAa,EAAE,OAAO,CAAC;IACvB,MAAM,EAAE,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC3C,SAAS,EAAE,QAAQ,CAAC,GAAG,CAAC;CACzB;AAED,MAAM,WAAW,wBAAwB;IACvC;;OAEG;IACH,KAAK,EAAE,CACL,YAAY,EAAE,gBAAgB,KAC3B,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,kBAAkB,GAAG,YAAY,GAAG,UAAU,CAAC,CAAC;IAEpF;;OAEG;IACH,kBAAkB,EAAE,CAClB,IAAI,EAAE,MAAM,KACT,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,kBAAkB,GAAG,oBAAoB,CAAC,CAAC;IAE5E;;OAEG;IACH,kBAAkB,EAAE,CAClB,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,QAAQ,CAAC,EAAE,MAAM,KACd,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAAC;IAErD;;OAEG;IACH,cAAc,EAAE,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,EAC3C,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC5B,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,sBAAsB,CAAC,CAAC;IAEzD;;OAEG;IACH,aAAa,EAAE,CAAC,GAAG,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAEhE;;OAEG;IACH,WAAW,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,eAAe,EAAE,YAAY,GAAG,gBAAgB,CAAC,CAAC;IAE9F;;OAEG;IACH,aAAa,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAEzD;;OAEG;IACH,aAAa,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAEpE;;OAEG;IACH,QAAQ,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;CACrC;;AAED,qBAAa,iBAAkB,SAAQ,sBAGpC;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,qBAAqB,kJA+RhC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,qBAAqB,yIAGjC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/WebScrapingEngine/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,gCAAgC,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { DateTime, Effect } from 'effect';
|
|
2
|
+
import { SpiderLogger } from '../Logging/SpiderLogger.service.js';
|
|
3
|
+
interface WorkerStatus {
|
|
4
|
+
workerId: string;
|
|
5
|
+
domain: string;
|
|
6
|
+
currentUrl?: string;
|
|
7
|
+
lastActivity: DateTime.Utc;
|
|
8
|
+
fetchStartTime?: DateTime.Utc;
|
|
9
|
+
}
|
|
10
|
+
declare const WorkerHealthMonitor_base: Effect.Service.Class<WorkerHealthMonitor, "@jambudipa.io/WorkerHealthMonitor", {
|
|
11
|
+
readonly effect: Effect.Effect<{
|
|
12
|
+
/**
|
|
13
|
+
* Register a worker's activity
|
|
14
|
+
*/
|
|
15
|
+
recordActivity: (workerId: string, domain: string, activity: {
|
|
16
|
+
url?: string;
|
|
17
|
+
fetchStart?: boolean;
|
|
18
|
+
}) => Effect.Effect<void, never, never>;
|
|
19
|
+
/**
|
|
20
|
+
* Remove a worker from monitoring
|
|
21
|
+
*/
|
|
22
|
+
removeWorker: (workerId: string) => Effect.Effect<void, never, never>;
|
|
23
|
+
/**
|
|
24
|
+
* Get stuck workers
|
|
25
|
+
*/
|
|
26
|
+
getStuckWorkers: Effect.Effect<WorkerStatus[], never, never>;
|
|
27
|
+
/**
|
|
28
|
+
* Monitor workers and log stuck ones
|
|
29
|
+
*/
|
|
30
|
+
startMonitoring: Effect.Effect<void, never, never>;
|
|
31
|
+
}, never, SpiderLogger>;
|
|
32
|
+
}>;
|
|
33
|
+
/**
|
|
34
|
+
* Monitors worker health and kills stuck workers
|
|
35
|
+
*/
|
|
36
|
+
export declare class WorkerHealthMonitor extends WorkerHealthMonitor_base {
|
|
37
|
+
}
|
|
38
|
+
export {};
|
|
39
|
+
//# sourceMappingURL=WorkerHealthMonitor.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"WorkerHealthMonitor.service.d.ts","sourceRoot":"","sources":["../../../src/lib/WorkerHealth/WorkerHealthMonitor.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAY,MAAM,EAAkC,MAAM,QAAQ,CAAC;AACpF,OAAO,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAElE,UAAU,YAAY;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,QAAQ,CAAC,GAAG,CAAC;IAC3B,cAAc,CAAC,EAAE,QAAQ,CAAC,GAAG,CAAC;CAC/B;;;QAcO;;WAEG;mCAES,MAAM,UACR,MAAM,YACJ;YAAE,GAAG,CAAC,EAAE,MAAM,CAAC;YAAC,UAAU,CAAC,EAAE,OAAO,CAAA;SAAE;QA4BlD;;WAEG;iCACsB,MAAM;QAG/B;;WAEG;;QAgBH;;WAEG;;;;AAxEX;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,wBAwHxC;CAAG"}
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clean API facades that hide Effect.Service implementation details.
|
|
3
|
+
*
|
|
4
|
+
* These interfaces provide clean documentation without exposing
|
|
5
|
+
* internal Effect service machinery.
|
|
6
|
+
*
|
|
7
|
+
* @group Services
|
|
8
|
+
*/
|
|
9
|
+
import { Effect, Sink } from 'effect';
|
|
10
|
+
import { CrawlResult, CrawlTask } from './Spider/Spider.service.js';
|
|
11
|
+
import { PriorityRequest, SpiderState, SpiderStateKey, StatePersistence } from './Scheduler/SpiderScheduler.service.js';
|
|
12
|
+
import { SpiderMiddleware, SpiderRequest, SpiderResponse } from './Middleware/SpiderMiddleware.js';
|
|
13
|
+
import { MiddlewareError } from './errors.js';
|
|
14
|
+
/**
|
|
15
|
+
* The main Spider service interface for web crawling.
|
|
16
|
+
*
|
|
17
|
+
* Orchestrates the entire crawling process including URL validation,
|
|
18
|
+
* robots.txt checking, concurrent processing, and result streaming.
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const program = Effect.gen(function* () {
|
|
23
|
+
* const spider = yield* Spider;
|
|
24
|
+
* const collectSink = Sink.forEach<CrawlResult>(result =>
|
|
25
|
+
* Effect.sync(() => console.log(result.pageData.url))
|
|
26
|
+
* );
|
|
27
|
+
*
|
|
28
|
+
* const stats = yield* spider.crawl('https://example.com', collectSink);
|
|
29
|
+
* console.log(`Crawled ${stats.totalPages} pages`);
|
|
30
|
+
* });
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @group Services
|
|
34
|
+
* @public
|
|
35
|
+
*/
|
|
36
|
+
export interface ISpider {
|
|
37
|
+
/**
|
|
38
|
+
* Starts crawling from the specified URL and processes results through the provided sink.
|
|
39
|
+
*
|
|
40
|
+
* @param urlString - The starting URL for crawling
|
|
41
|
+
* @param sink - Sink to process crawl results as they're produced
|
|
42
|
+
* @returns Effect containing crawl statistics (total pages, completion status)
|
|
43
|
+
*/
|
|
44
|
+
crawl<A, E, R>(_urlString: string, _sink: Sink.Sink<A, CrawlResult, E, R>): Effect.Effect<{
|
|
45
|
+
totalPages: number;
|
|
46
|
+
completed: boolean;
|
|
47
|
+
}, Error>;
|
|
48
|
+
/**
|
|
49
|
+
* Returns the list of URLs that have been visited during crawling.
|
|
50
|
+
*
|
|
51
|
+
* @returns Effect containing array of visited URLs
|
|
52
|
+
*/
|
|
53
|
+
getVisitedUrls(): Effect.Effect<string[]>;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* The SpiderSchedulerService service interface for request scheduling and persistence.
|
|
57
|
+
*
|
|
58
|
+
* Manages request queuing, prioritization, and state persistence for
|
|
59
|
+
* resumable crawling operations.
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* ```typescript
|
|
63
|
+
* const program = Effect.gen(function* () {
|
|
64
|
+
* const scheduler = yield* SpiderSchedulerService;
|
|
65
|
+
*
|
|
66
|
+
* // Configure persistence
|
|
67
|
+
* const stateKey = new SpiderStateKey({
|
|
68
|
+
* id: 'my-crawl',
|
|
69
|
+
* timestamp: new Date(),
|
|
70
|
+
* name: 'Example Crawl'
|
|
71
|
+
* });
|
|
72
|
+
*
|
|
73
|
+
* yield* scheduler.configurePersistence(persistence, stateKey);
|
|
74
|
+
*
|
|
75
|
+
* // Queue requests with priority
|
|
76
|
+
* yield* scheduler.enqueue({ url: 'https://example.com', depth: 0 }, 10);
|
|
77
|
+
*
|
|
78
|
+
* // Process requests
|
|
79
|
+
* const request = yield* scheduler.dequeue();
|
|
80
|
+
* console.log(`Processing: ${request.request.url}`);
|
|
81
|
+
* });
|
|
82
|
+
* ```
|
|
83
|
+
*
|
|
84
|
+
* @group Services
|
|
85
|
+
* @public
|
|
86
|
+
*/
|
|
87
|
+
export interface ISpiderScheduler {
|
|
88
|
+
/**
|
|
89
|
+
* Configures the scheduler to use a specific persistence layer with a state key.
|
|
90
|
+
*
|
|
91
|
+
* @param persistence - Implementation of StatePersistence interface
|
|
92
|
+
* @param stateKey - Unique identifier for the crawl session
|
|
93
|
+
*/
|
|
94
|
+
configurePersistence(_persistence: StatePersistence, _stateKey: SpiderStateKey): Effect.Effect<void>;
|
|
95
|
+
/**
|
|
96
|
+
* Removes persistence configuration, disabling state saving.
|
|
97
|
+
*/
|
|
98
|
+
clearPersistence(): Effect.Effect<void>;
|
|
99
|
+
/**
|
|
100
|
+
* Adds a crawl task to the processing queue with optional priority.
|
|
101
|
+
*
|
|
102
|
+
* @param request - Crawl task containing URL and depth
|
|
103
|
+
* @param priority - Optional priority (higher numbers = higher priority, default: 0)
|
|
104
|
+
* @returns Effect containing boolean indicating if task was added (false if duplicate)
|
|
105
|
+
*/
|
|
106
|
+
enqueue(_request: CrawlTask, _priority?: number): Effect.Effect<boolean>;
|
|
107
|
+
/**
|
|
108
|
+
* Retrieves the next highest-priority task from the queue.
|
|
109
|
+
*
|
|
110
|
+
* @returns Effect containing the next priority request
|
|
111
|
+
*/
|
|
112
|
+
dequeue(): Effect.Effect<PriorityRequest>;
|
|
113
|
+
/**
|
|
114
|
+
* Returns the current number of tasks in the queue.
|
|
115
|
+
*/
|
|
116
|
+
size(): Effect.Effect<number>;
|
|
117
|
+
/**
|
|
118
|
+
* Checks if the queue is empty.
|
|
119
|
+
*/
|
|
120
|
+
isEmpty(): Effect.Effect<boolean>;
|
|
121
|
+
/**
|
|
122
|
+
* Returns the current scheduler state for persistence.
|
|
123
|
+
*/
|
|
124
|
+
getState(): Effect.Effect<SpiderState>;
|
|
125
|
+
/**
|
|
126
|
+
* Restores the scheduler from a previously saved state.
|
|
127
|
+
*
|
|
128
|
+
* @param state - Complete state to restore from
|
|
129
|
+
*/
|
|
130
|
+
restoreFromState(_state: SpiderState): Effect.Effect<void>;
|
|
131
|
+
/**
|
|
132
|
+
* Attempts to restore state from a persistence layer.
|
|
133
|
+
*
|
|
134
|
+
* @param persistence - Persistence layer to load from
|
|
135
|
+
* @param stateKey - State key to restore
|
|
136
|
+
* @returns Effect containing boolean indicating if state was successfully restored
|
|
137
|
+
*/
|
|
138
|
+
restore(_persistence: StatePersistence, _stateKey: SpiderStateKey): Effect.Effect<boolean>;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* The MiddlewareManager service interface for pipeline processing.
|
|
142
|
+
*
|
|
143
|
+
* Orchestrates the execution of middleware in the correct order for
|
|
144
|
+
* request processing, response handling, and error recovery.
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* const program = Effect.gen(function* () {
|
|
149
|
+
* const manager = yield* MiddlewareManager;
|
|
150
|
+
*
|
|
151
|
+
* const middleware = [
|
|
152
|
+
* rateLimitMiddleware,
|
|
153
|
+
* loggingMiddleware,
|
|
154
|
+
* userAgentMiddleware
|
|
155
|
+
* ];
|
|
156
|
+
*
|
|
157
|
+
* const processedRequest = yield* manager.processRequest(request, middleware);
|
|
158
|
+
* console.log('Request processed through middleware pipeline');
|
|
159
|
+
* });
|
|
160
|
+
* ```
|
|
161
|
+
*
|
|
162
|
+
* @group Services
|
|
163
|
+
* @public
|
|
164
|
+
*/
|
|
165
|
+
export interface IMiddlewareManager {
|
|
166
|
+
/**
|
|
167
|
+
* Processes a request through the middleware pipeline.
|
|
168
|
+
*
|
|
169
|
+
* @param request - The initial request to process
|
|
170
|
+
* @param middlewares - Array of middleware to apply
|
|
171
|
+
* @returns Effect containing the processed request
|
|
172
|
+
*/
|
|
173
|
+
processRequest(_request: SpiderRequest, _middlewares: SpiderMiddleware[]): Effect.Effect<SpiderRequest, MiddlewareError>;
|
|
174
|
+
/**
|
|
175
|
+
* Processes a response through the middleware pipeline in reverse order.
|
|
176
|
+
*
|
|
177
|
+
* @param response - The response to process
|
|
178
|
+
* @param request - The original request (for context)
|
|
179
|
+
* @param middlewares - Array of middleware to apply
|
|
180
|
+
* @returns Effect containing the processed response
|
|
181
|
+
*/
|
|
182
|
+
processResponse(_response: SpiderResponse, _request: SpiderRequest, _middlewares: SpiderMiddleware[]): Effect.Effect<SpiderResponse, MiddlewareError>;
|
|
183
|
+
/**
|
|
184
|
+
* Processes an exception through the middleware pipeline in reverse order.
|
|
185
|
+
*
|
|
186
|
+
* @param error - The error that occurred
|
|
187
|
+
* @param request - The request that caused the error
|
|
188
|
+
* @param middlewares - Array of middleware to apply
|
|
189
|
+
* @returns Effect containing a recovered response or null
|
|
190
|
+
*/
|
|
191
|
+
processException(_error: Error, _request: SpiderRequest, _middlewares: SpiderMiddleware[]): Effect.Effect<SpiderResponse | null, MiddlewareError>;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Rate limiting middleware service interface.
|
|
195
|
+
*
|
|
196
|
+
* Provides rate limiting functionality for respectful crawling,
|
|
197
|
+
* controlling request frequency at both global and per-domain levels.
|
|
198
|
+
*
|
|
199
|
+
* @group Middleware
|
|
200
|
+
* @public
|
|
201
|
+
*/
|
|
202
|
+
export interface IRateLimitMiddleware {
|
|
203
|
+
/**
|
|
204
|
+
* Creates a rate limiting middleware with the specified configuration.
|
|
205
|
+
*
|
|
206
|
+
* @param config - Rate limiting configuration options
|
|
207
|
+
* @returns Configured middleware instance
|
|
208
|
+
*
|
|
209
|
+
* @example
|
|
210
|
+
* ```typescript
|
|
211
|
+
* const rateLimiter = yield* RateLimitMiddleware;
|
|
212
|
+
* const middleware = rateLimiter.create({
|
|
213
|
+
* maxConcurrentRequests: 5,
|
|
214
|
+
* maxRequestsPerSecondPerDomain: 2,
|
|
215
|
+
* requestDelayMs: 250
|
|
216
|
+
* });
|
|
217
|
+
* ```
|
|
218
|
+
*/
|
|
219
|
+
create(_config: {
|
|
220
|
+
maxConcurrentRequests: number;
|
|
221
|
+
maxRequestsPerSecondPerDomain: number;
|
|
222
|
+
requestDelayMs?: number;
|
|
223
|
+
}): SpiderMiddleware;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Logging middleware service interface.
|
|
227
|
+
*
|
|
228
|
+
* Provides logging functionality using Effect.Logger for debugging
|
|
229
|
+
* and monitoring crawling operations.
|
|
230
|
+
*
|
|
231
|
+
* @group Middleware
|
|
232
|
+
* @public
|
|
233
|
+
*/
|
|
234
|
+
export interface ILoggingMiddleware {
|
|
235
|
+
/**
|
|
236
|
+
* Creates a logging middleware with optional configuration.
|
|
237
|
+
*
|
|
238
|
+
* @param config - Optional logging configuration
|
|
239
|
+
* @returns Configured middleware instance
|
|
240
|
+
*
|
|
241
|
+
* @example
|
|
242
|
+
* ```typescript
|
|
243
|
+
* const logger = yield* LoggingMiddleware;
|
|
244
|
+
* const middleware = logger.create({
|
|
245
|
+
* logRequests: true,
|
|
246
|
+
* logResponses: true,
|
|
247
|
+
* logLevel: 'info'
|
|
248
|
+
* });
|
|
249
|
+
* ```
|
|
250
|
+
*/
|
|
251
|
+
create(_config?: {
|
|
252
|
+
logRequests?: boolean;
|
|
253
|
+
logResponses?: boolean;
|
|
254
|
+
logErrors?: boolean;
|
|
255
|
+
logLevel?: 'debug' | 'info' | 'warn' | 'error';
|
|
256
|
+
}): SpiderMiddleware;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* User agent middleware service interface.
|
|
260
|
+
*
|
|
261
|
+
* Adds consistent User-Agent headers to all requests for
|
|
262
|
+
* proper identification of your crawler.
|
|
263
|
+
*
|
|
264
|
+
* @group Middleware
|
|
265
|
+
* @public
|
|
266
|
+
*/
|
|
267
|
+
export interface IUserAgentMiddleware {
|
|
268
|
+
/**
|
|
269
|
+
* Creates a User-Agent middleware with the specified user agent string.
|
|
270
|
+
*
|
|
271
|
+
* @param userAgent - User agent string to add to requests
|
|
272
|
+
* @returns Configured middleware instance
|
|
273
|
+
*
|
|
274
|
+
* @example
|
|
275
|
+
* ```typescript
|
|
276
|
+
* const userAgent = yield* UserAgentMiddleware;
|
|
277
|
+
* const middleware = userAgent.create('MyBot/1.0 (+https://example.com)');
|
|
278
|
+
* ```
|
|
279
|
+
*/
|
|
280
|
+
create(_userAgent: string): SpiderMiddleware;
|
|
281
|
+
}
|
|
282
|
+
/**
|
|
283
|
+
* Statistics middleware service interface.
|
|
284
|
+
*
|
|
285
|
+
* Collects comprehensive metrics about crawling activity including
|
|
286
|
+
* request counts, response codes, and performance statistics.
|
|
287
|
+
*
|
|
288
|
+
* @group Middleware
|
|
289
|
+
* @public
|
|
290
|
+
*/
|
|
291
|
+
export interface IStatsMiddleware {
|
|
292
|
+
/**
|
|
293
|
+
* Creates a statistics middleware and returns both the middleware and a stats getter.
|
|
294
|
+
*
|
|
295
|
+
* @returns Object containing the middleware instance and statistics retrieval function
|
|
296
|
+
*
|
|
297
|
+
* @example
|
|
298
|
+
* ```typescript
|
|
299
|
+
* const statsService = yield* StatsMiddleware;
|
|
300
|
+
* const { middleware, getStats } = statsService.create();
|
|
301
|
+
*
|
|
302
|
+
* // Use middleware in your pipeline
|
|
303
|
+
* // Later get statistics
|
|
304
|
+
* const stats = yield* getStats();
|
|
305
|
+
* console.log(`Processed ${stats.requests_processed} requests`);
|
|
306
|
+
* ```
|
|
307
|
+
*/
|
|
308
|
+
create(): {
|
|
309
|
+
middleware: SpiderMiddleware;
|
|
310
|
+
getStats: () => Effect.Effect<Record<string, number>>;
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
//# sourceMappingURL=api-facades.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"api-facades.d.ts","sourceRoot":"","sources":["../../src/lib/api-facades.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,4BAA4B,CAAC;AACpE,OAAO,EACL,eAAe,EACf,WAAW,EACX,cAAc,EACd,gBAAgB,EACjB,MAAM,wCAAwC,CAAC;AAChD,OAAO,EACL,gBAAgB,EAChB,aAAa,EACb,cAAc,EACf,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,WAAW,OAAO;IACtB;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EACX,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC,GACrC,MAAM,CAAC,MAAM,CAAC;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,OAAO,CAAA;KAAE,EAAE,KAAK,CAAC,CAAC;IAEpE;;;;OAIG;IACH,cAAc,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;CAC3C;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;;;OAKG;IACH,oBAAoB,CAClB,YAAY,EAAE,gBAAgB,EAC9B,SAAS,EAAE,cAAc,GACxB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAEvB;;OAEG;IACH,gBAAgB,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAExC;;;;;;OAMG;IACH,OAAO,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEzE;;;;OAIG;IACH,OAAO,IAAI,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAE1C;;OAEG;IACH,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAE9B;;OAEG;IACH,OAAO,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAElC;;OAEG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IAEvC;;;;OAIG;IACH,gBAAgB,CAAC,MAAM,EAAE,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAE3D;;;;;;OAMG;IACH,OAAO,CACL,YAAY,EAAE,gBAAgB,EAC9B,SAAS,EAAE,cAAc,GACxB,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;;;;OAMG;IACH,cAAc,CACZ,QAAQ,EAAE,aAAa,EACvB,YAAY,EAAE,gBAAgB,EAAE,GAC/B,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,eAAe,CAAC,CAAC;IAEjD;;;;;;;OAOG;IACH,eAAe,CACb,SAAS,EAAE,cAAc,EACzB,QAAQ,EAAE,aAAa,EACvB,YAAY,EAAE,gBAAgB,EAAE,GAC/B,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;IAElD;;;;;;;OAOG;IACH,gBAAgB,CACd,MAAM,EAAE,KAAK,EACb,QAAQ,EAAE,aAAa,EACvB,YAAY,EAAE,gBAAgB,EAAE,GAC/B,MAAM,CAAC,MAAM,CAAC,cAAc,GAAG,IAAI,EAAE,eAAe,CAAC,CAAC;CAC1D;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;;;;;;;;;;;;;OAeG;IACH,MAAM,CAAC,OAAO,EAAE;QACd,qBAAqB,EAAE,MAAM,CAAC;QAC9B,6BAA6B,EAAE,MAAM,CAAC;QACtC,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,GAAG,gBAAgB,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,kBAAkB;IACjC;;;;;;;;;;;;;;;OAeG;IACH,MAAM,CAAC,OAAO,CAAC,EAAE;QACf,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,SAAS,CAAC,EAAE,OAAO,CAAC;QACpB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;KAChD,GAAG,gBAAgB,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,UAAU,EAAE,MAAM,GAAG,gBAAgB,CAAC;CAC9C;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;;;;;;;;;;;;;OAeG;IACH,MAAM,IAAI;QACR,UAAU,EAAE,gBAAgB,CAAC;QAC7B,QAAQ,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;KACvD,CAAC;CACH"}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Effect-based Error Types
|
|
3
|
+
* Comprehensive error hierarchy using Data.TaggedError for type-safe error handling
|
|
4
|
+
*/
|
|
5
|
+
declare const SpiderError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
6
|
+
readonly _tag: "SpiderError";
|
|
7
|
+
} & Readonly<A>;
|
|
8
|
+
/**
|
|
9
|
+
* Base error class for all Spider errors
|
|
10
|
+
*/
|
|
11
|
+
export declare class SpiderError extends SpiderError_base<{
|
|
12
|
+
readonly operation: string;
|
|
13
|
+
readonly details?: unknown;
|
|
14
|
+
readonly cause?: unknown;
|
|
15
|
+
}> {
|
|
16
|
+
get message(): string;
|
|
17
|
+
}
|
|
18
|
+
declare const NetworkError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
19
|
+
readonly _tag: "NetworkError";
|
|
20
|
+
} & Readonly<A>;
|
|
21
|
+
export declare class NetworkError extends NetworkError_base<{
|
|
22
|
+
readonly url: string;
|
|
23
|
+
readonly statusCode?: number;
|
|
24
|
+
readonly method?: string;
|
|
25
|
+
readonly cause?: unknown;
|
|
26
|
+
}> {
|
|
27
|
+
get message(): string;
|
|
28
|
+
static fromResponse(url: string, response: Response): NetworkError;
|
|
29
|
+
static fromCause(url: string, cause: unknown): NetworkError;
|
|
30
|
+
}
|
|
31
|
+
declare const TimeoutError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
32
|
+
readonly _tag: "TimeoutError";
|
|
33
|
+
} & Readonly<A>;
|
|
34
|
+
export declare class TimeoutError extends TimeoutError_base<{
|
|
35
|
+
readonly url: string;
|
|
36
|
+
readonly timeoutMs: number;
|
|
37
|
+
readonly operation: string;
|
|
38
|
+
}> {
|
|
39
|
+
get message(): string;
|
|
40
|
+
}
|
|
41
|
+
declare const ParseError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
42
|
+
readonly _tag: "ParseError";
|
|
43
|
+
} & Readonly<A>;
|
|
44
|
+
export declare class ParseError extends ParseError_base<{
|
|
45
|
+
readonly input?: string;
|
|
46
|
+
readonly expected: string;
|
|
47
|
+
readonly cause?: unknown;
|
|
48
|
+
}> {
|
|
49
|
+
get message(): string;
|
|
50
|
+
static json(input: string, cause?: unknown): ParseError;
|
|
51
|
+
static html(input: string, cause?: unknown): ParseError;
|
|
52
|
+
}
|
|
53
|
+
declare const ValidationError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
54
|
+
readonly _tag: "ValidationError";
|
|
55
|
+
} & Readonly<A>;
|
|
56
|
+
export declare class ValidationError extends ValidationError_base<{
|
|
57
|
+
readonly field: string;
|
|
58
|
+
readonly value?: unknown;
|
|
59
|
+
readonly constraint: string;
|
|
60
|
+
}> {
|
|
61
|
+
get message(): string;
|
|
62
|
+
static url(url: string): ValidationError;
|
|
63
|
+
}
|
|
64
|
+
declare const BrowserError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
65
|
+
readonly _tag: "BrowserError";
|
|
66
|
+
} & Readonly<A>;
|
|
67
|
+
export declare class BrowserError extends BrowserError_base<{
|
|
68
|
+
readonly operation: string;
|
|
69
|
+
readonly browserId?: string;
|
|
70
|
+
readonly cause?: unknown;
|
|
71
|
+
}> {
|
|
72
|
+
get message(): string;
|
|
73
|
+
static notLaunched(): BrowserError;
|
|
74
|
+
static launchFailed(cause: unknown): BrowserError;
|
|
75
|
+
}
|
|
76
|
+
declare const PageError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
77
|
+
readonly _tag: "PageError";
|
|
78
|
+
} & Readonly<A>;
|
|
79
|
+
export declare class PageError extends PageError_base<{
|
|
80
|
+
readonly url: string;
|
|
81
|
+
readonly operation: string;
|
|
82
|
+
readonly selector?: string;
|
|
83
|
+
readonly cause?: unknown;
|
|
84
|
+
}> {
|
|
85
|
+
get message(): string;
|
|
86
|
+
}
|
|
87
|
+
declare const StateError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
88
|
+
readonly _tag: "StateError";
|
|
89
|
+
} & Readonly<A>;
|
|
90
|
+
export declare class StateError extends StateError_base<{
|
|
91
|
+
readonly operation: 'save' | 'load' | 'delete' | 'update';
|
|
92
|
+
readonly stateKey?: string;
|
|
93
|
+
readonly cause?: unknown;
|
|
94
|
+
}> {
|
|
95
|
+
get message(): string;
|
|
96
|
+
}
|
|
97
|
+
declare const SessionError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
98
|
+
readonly _tag: "SessionError";
|
|
99
|
+
} & Readonly<A>;
|
|
100
|
+
export declare class SessionError extends SessionError_base<{
|
|
101
|
+
readonly sessionId?: string;
|
|
102
|
+
readonly operation: string;
|
|
103
|
+
readonly cause?: unknown;
|
|
104
|
+
}> {
|
|
105
|
+
get message(): string;
|
|
106
|
+
static noActiveSession(): SessionError;
|
|
107
|
+
}
|
|
108
|
+
declare const FileSystemError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
109
|
+
readonly _tag: "FileSystemError";
|
|
110
|
+
} & Readonly<A>;
|
|
111
|
+
export declare class FileSystemError extends FileSystemError_base<{
|
|
112
|
+
readonly path: string;
|
|
113
|
+
readonly operation: 'read' | 'write' | 'delete' | 'create';
|
|
114
|
+
readonly cause?: unknown;
|
|
115
|
+
}> {
|
|
116
|
+
get message(): string;
|
|
117
|
+
}
|
|
118
|
+
declare const CrawlError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
119
|
+
readonly _tag: "CrawlError";
|
|
120
|
+
} & Readonly<A>;
|
|
121
|
+
export declare class CrawlError extends CrawlError_base<{
|
|
122
|
+
readonly url: string;
|
|
123
|
+
readonly depth: number;
|
|
124
|
+
readonly reason: string;
|
|
125
|
+
readonly cause?: unknown;
|
|
126
|
+
}> {
|
|
127
|
+
get message(): string;
|
|
128
|
+
static maxDepthReached(url: string, depth: number): CrawlError;
|
|
129
|
+
static robotsBlocked(url: string): CrawlError;
|
|
130
|
+
}
|
|
131
|
+
declare const QueueError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
132
|
+
readonly _tag: "QueueError";
|
|
133
|
+
} & Readonly<A>;
|
|
134
|
+
export declare class QueueError extends QueueError_base<{
|
|
135
|
+
readonly operation: 'enqueue' | 'dequeue' | 'peek';
|
|
136
|
+
readonly queueSize?: number;
|
|
137
|
+
readonly cause?: unknown;
|
|
138
|
+
}> {
|
|
139
|
+
get message(): string;
|
|
140
|
+
}
|
|
141
|
+
declare const ConfigError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
142
|
+
readonly _tag: "ConfigError";
|
|
143
|
+
} & Readonly<A>;
|
|
144
|
+
export declare class ConfigError extends ConfigError_base<{
|
|
145
|
+
readonly field: string;
|
|
146
|
+
readonly value?: unknown;
|
|
147
|
+
readonly reason: string;
|
|
148
|
+
}> {
|
|
149
|
+
get message(): string;
|
|
150
|
+
static invalid(field: string, value: unknown, expected: string): ConfigError;
|
|
151
|
+
}
|
|
152
|
+
declare const MiddlewareError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
|
|
153
|
+
readonly _tag: "MiddlewareError";
|
|
154
|
+
} & Readonly<A>;
|
|
155
|
+
export declare class MiddlewareError extends MiddlewareError_base<{
|
|
156
|
+
readonly middlewareName: string;
|
|
157
|
+
readonly phase: 'request' | 'response' | 'error';
|
|
158
|
+
readonly cause?: unknown;
|
|
159
|
+
}> {
|
|
160
|
+
get message(): string;
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Type guard for Spider errors
|
|
164
|
+
*/
|
|
165
|
+
export declare const isSpiderError: (error: unknown) => error is SpiderError;
|
|
166
|
+
/**
|
|
167
|
+
* Type guard for network-related errors
|
|
168
|
+
*/
|
|
169
|
+
export declare const isNetworkError: (error: unknown) => error is NetworkError | TimeoutError;
|
|
170
|
+
/**
|
|
171
|
+
* Type guard for browser-related errors
|
|
172
|
+
*/
|
|
173
|
+
export declare const isBrowserError: (error: unknown) => error is BrowserError | PageError;
|
|
174
|
+
/**
|
|
175
|
+
* Union type of all Spider errors
|
|
176
|
+
*/
|
|
177
|
+
export type AllSpiderErrors = SpiderError | NetworkError | TimeoutError | ParseError | ValidationError | BrowserError | PageError | StateError | SessionError | FileSystemError | CrawlError | QueueError | ConfigError | MiddlewareError;
|
|
178
|
+
export {};
|
|
179
|
+
//# sourceMappingURL=effect-errors.d.ts.map
|