@jambudipa/spider 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -16
- package/dist/browser/BrowserManager.d.ts +63 -0
- package/dist/browser/BrowserManager.d.ts.map +1 -0
- package/dist/browser/PlaywrightAdapter.d.ts +166 -0
- package/dist/browser/PlaywrightAdapter.d.ts.map +1 -0
- package/dist/examples/01-basic-crawl-working.d.ts +13 -0
- package/dist/examples/01-basic-crawl-working.d.ts.map +1 -0
- package/dist/examples/02-multiple-urls-working.d.ts +13 -0
- package/dist/examples/02-multiple-urls-working.d.ts.map +1 -0
- package/dist/examples/03-url-filtering.d.ts +13 -0
- package/dist/examples/03-url-filtering.d.ts.map +1 -0
- package/dist/examples/04-robots-compliance.d.ts +14 -0
- package/dist/examples/04-robots-compliance.d.ts.map +1 -0
- package/dist/examples/05-link-extraction-selectors.d.ts +14 -0
- package/dist/examples/05-link-extraction-selectors.d.ts.map +1 -0
- package/dist/examples/06-custom-middleware.d.ts +18 -0
- package/dist/examples/06-custom-middleware.d.ts.map +1 -0
- package/dist/examples/07-resumability-demo.d.ts +14 -0
- package/dist/examples/07-resumability-demo.d.ts.map +1 -0
- package/dist/examples/08-worker-monitoring.d.ts +15 -0
- package/dist/examples/08-worker-monitoring.d.ts.map +1 -0
- package/dist/examples/09-error-handling-recovery.d.ts +15 -0
- package/dist/examples/09-error-handling-recovery.d.ts.map +1 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2891 -1456
- package/dist/index.js.map +1 -1
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
- package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
- package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
- package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
- package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
- package/dist/lib/HttpClient/index.d.ts +8 -0
- package/dist/lib/HttpClient/index.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/index.d.ts +37 -0
- package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
- package/dist/lib/Logging/FetchLogger.d.ts +24 -0
- package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
- package/dist/lib/Middleware/types.d.ts +99 -0
- package/dist/lib/Middleware/types.d.ts.map +1 -0
- package/dist/lib/PageData/PageData.d.ts +28 -0
- package/dist/lib/PageData/PageData.d.ts.map +1 -0
- package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
- package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/index.d.ts +51 -0
- package/dist/lib/Resumability/index.d.ts.map +1 -0
- package/dist/lib/Resumability/strategies.d.ts +76 -0
- package/dist/lib/Resumability/strategies.d.ts.map +1 -0
- package/dist/lib/Resumability/types.d.ts +201 -0
- package/dist/lib/Resumability/types.d.ts.map +1 -0
- package/dist/lib/Robots/Robots.service.d.ts +78 -0
- package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
- package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
- package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.service.d.ts +249 -0
- package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
- package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
- package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
- package/dist/lib/StateManager/index.d.ts +5 -0
- package/dist/lib/StateManager/index.d.ts.map +1 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +110 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
- package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
- package/dist/lib/api-facades.d.ts +313 -0
- package/dist/lib/api-facades.d.ts.map +1 -0
- package/dist/lib/errors/effect-errors.d.ts +179 -0
- package/dist/lib/errors/effect-errors.d.ts.map +1 -0
- package/dist/lib/errors.d.ts +172 -0
- package/dist/lib/errors.d.ts.map +1 -0
- package/dist/lib/utils/FileUtils.d.ts +284 -0
- package/dist/lib/utils/FileUtils.d.ts.map +1 -0
- package/dist/lib/utils/JsonUtils.d.ts +196 -0
- package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
- package/dist/lib/utils/RegexUtils.d.ts +257 -0
- package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
- package/dist/lib/utils/SchemaUtils.d.ts +251 -0
- package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
- package/dist/lib/utils/UrlUtils.d.ts +223 -0
- package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
- package/dist/lib/utils/effect-migration.d.ts +31 -0
- package/dist/lib/utils/effect-migration.d.ts.map +1 -0
- package/dist/lib/utils/index.d.ts +15 -0
- package/dist/lib/utils/index.d.ts.map +1 -0
- package/dist/lib/utils/url-deduplication.d.ts +108 -0
- package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
- package/dist/lib/utils/url-deduplication.test.d.ts +5 -0
- package/dist/lib/utils/url-deduplication.test.d.ts.map +1 -0
- package/dist/test/infrastructure/EffectTestUtils.d.ts +167 -0
- package/dist/test/infrastructure/EffectTestUtils.d.ts.map +1 -0
- package/package.json +21 -9
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data type definitions for Spider Middleware
|
|
3
|
+
* Using Effect's Data.Class for immutability and built-in equality
|
|
4
|
+
*/
|
|
5
|
+
import { Data, Option } from 'effect';
|
|
6
|
+
import { PageData } from '../PageData/PageData.js';
|
|
7
|
+
/**
|
|
8
|
+
* Represents a single crawling task with URL and depth information.
|
|
9
|
+
* Used internally by the Spider service for task management.
|
|
10
|
+
*/
|
|
11
|
+
export interface CrawlTask {
|
|
12
|
+
/** The URL to be crawled */
|
|
13
|
+
url: string;
|
|
14
|
+
/** The depth level of this URL relative to the starting URL */
|
|
15
|
+
depth: number;
|
|
16
|
+
/** The URL from which this URL was discovered (optional) */
|
|
17
|
+
fromUrl?: string;
|
|
18
|
+
/** Optional metadata to be passed through to the result */
|
|
19
|
+
metadata?: Record<string, unknown>;
|
|
20
|
+
/** Optional data extraction configuration */
|
|
21
|
+
extractData?: Record<string, unknown>;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Request object used in the middleware pipeline.
|
|
25
|
+
*
|
|
26
|
+
* Contains the crawl task along with optional headers and metadata
|
|
27
|
+
* that can be modified by middleware during processing.
|
|
28
|
+
*
|
|
29
|
+
* Uses Data.Class for:
|
|
30
|
+
* - Built-in equality checking
|
|
31
|
+
* - Immutability by default
|
|
32
|
+
* - Better pattern matching support
|
|
33
|
+
*
|
|
34
|
+
* @group Data Types
|
|
35
|
+
* @public
|
|
36
|
+
*/
|
|
37
|
+
export declare class SpiderRequest extends Data.Class<{
|
|
38
|
+
/** The crawl task containing URL and depth information */
|
|
39
|
+
readonly task: CrawlTask;
|
|
40
|
+
/** HTTP headers to include with the request */
|
|
41
|
+
readonly headers: Option.Option<Record<string, string>>;
|
|
42
|
+
/** Additional metadata that can be used by middleware */
|
|
43
|
+
readonly meta: Option.Option<Record<string, unknown>>;
|
|
44
|
+
}> {
|
|
45
|
+
/**
|
|
46
|
+
* Create a SpiderRequest from a CrawlTask
|
|
47
|
+
*/
|
|
48
|
+
static fromTask(task: CrawlTask, headers?: Record<string, string>, meta?: Record<string, unknown>): SpiderRequest;
|
|
49
|
+
/**
|
|
50
|
+
* Add or update headers
|
|
51
|
+
*/
|
|
52
|
+
withHeaders(headers: Record<string, string>): SpiderRequest;
|
|
53
|
+
/**
|
|
54
|
+
* Add or update metadata
|
|
55
|
+
*/
|
|
56
|
+
withMeta(meta: Record<string, unknown>): SpiderRequest;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Response object used in the middleware pipeline.
|
|
60
|
+
*
|
|
61
|
+
* Contains the extracted page data along with optional HTTP response
|
|
62
|
+
* information and metadata from middleware processing.
|
|
63
|
+
*
|
|
64
|
+
* Uses Data.Class for:
|
|
65
|
+
* - Built-in equality checking
|
|
66
|
+
* - Immutability by default
|
|
67
|
+
* - Better pattern matching support
|
|
68
|
+
*
|
|
69
|
+
* @group Data Types
|
|
70
|
+
* @public
|
|
71
|
+
*/
|
|
72
|
+
export declare class SpiderResponse extends Data.Class<{
|
|
73
|
+
/** The extracted page data including content, links, and metadata */
|
|
74
|
+
readonly pageData: PageData;
|
|
75
|
+
/** HTTP status code of the response */
|
|
76
|
+
readonly statusCode: Option.Option<number>;
|
|
77
|
+
/** HTTP response headers */
|
|
78
|
+
readonly headers: Option.Option<Record<string, string>>;
|
|
79
|
+
/** Additional metadata from middleware processing */
|
|
80
|
+
readonly meta: Option.Option<Record<string, unknown>>;
|
|
81
|
+
}> {
|
|
82
|
+
/**
|
|
83
|
+
* Create a SpiderResponse from PageData
|
|
84
|
+
*/
|
|
85
|
+
static fromPageData(pageData: PageData, statusCode?: number, headers?: Record<string, string>, meta?: Record<string, unknown>): SpiderResponse;
|
|
86
|
+
/**
|
|
87
|
+
* Update the page data
|
|
88
|
+
*/
|
|
89
|
+
withPageData(pageData: PageData): SpiderResponse;
|
|
90
|
+
/**
|
|
91
|
+
* Add or update metadata
|
|
92
|
+
*/
|
|
93
|
+
withMeta(meta: Record<string, unknown>): SpiderResponse;
|
|
94
|
+
/**
|
|
95
|
+
* Check if the response was successful (2xx status code)
|
|
96
|
+
*/
|
|
97
|
+
isSuccessful(): boolean;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/lib/Middleware/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AAEnD;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,4BAA4B;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,+DAA+D;IAC/D,KAAK,EAAE,MAAM,CAAC;IACd,4DAA4D;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,6CAA6C;IAC7C,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACvC;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,aAAc,SAAQ,IAAI,CAAC,KAAK,CAAC;IAC5C,0DAA0D;IAC1D,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,+CAA+C;IAC/C,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,yDAAyD;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACvD,CAAC;IACA;;OAEG;IACH,MAAM,CAAC,QAAQ,CACb,IAAI,EAAE,SAAS,EACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,aAAa;IAQhB;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,aAAa;IAQ3D;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,aAAa;CAOvD;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,cAAe,SAAQ,IAAI,CAAC,KAAK,CAAC;IAC7C,qEAAqE;IACrE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IAC5B,uCAAuC;IACvC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC3C,4BAA4B;IAC5B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,qDAAqD;IACrD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACvD,CAAC;IACA;;OAEG;IACH,MAAM,CAAC,YAAY,CACjB,QAAQ,EAAE,QAAQ,EAClB,UAAU,CAAC,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,cAAc;IASjB;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,cAAc;IAOhD;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,cAAc;IAQvD;;OAEG;IACH,YAAY,IAAI,OAAO;CAMxB"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Schema } from 'effect';
|
|
2
|
+
export declare const PageDataSchema: Schema.Struct<{
|
|
3
|
+
url: Schema.filter<typeof Schema.String>;
|
|
4
|
+
html: typeof Schema.String;
|
|
5
|
+
title: Schema.optional<typeof Schema.String>;
|
|
6
|
+
/** All available metadata from meta tags */
|
|
7
|
+
metadata: Schema.Record$<typeof Schema.String, typeof Schema.String>;
|
|
8
|
+
/** Commonly used metadata fields for convenience */
|
|
9
|
+
commonMetadata: Schema.optional<Schema.Struct<{
|
|
10
|
+
description: Schema.optional<typeof Schema.String>;
|
|
11
|
+
keywords: Schema.optional<typeof Schema.String>;
|
|
12
|
+
author: Schema.optional<typeof Schema.String>;
|
|
13
|
+
robots: Schema.optional<typeof Schema.String>;
|
|
14
|
+
}>>;
|
|
15
|
+
statusCode: Schema.filter<Schema.filter<typeof Schema.Number>>;
|
|
16
|
+
/** All response headers */
|
|
17
|
+
headers: Schema.Record$<typeof Schema.String, typeof Schema.String>;
|
|
18
|
+
/** When the fetch operation started */
|
|
19
|
+
fetchedAt: typeof Schema.DateFromSelf;
|
|
20
|
+
/** How long the entire fetch and parse operation took in milliseconds */
|
|
21
|
+
scrapeDurationMs: typeof Schema.Number;
|
|
22
|
+
/** The crawl depth (number of hops from the starting URL) */
|
|
23
|
+
depth: Schema.filter<Schema.filter<typeof Schema.Number>>;
|
|
24
|
+
/** Optional extracted data from the page */
|
|
25
|
+
extractedData: Schema.optional<Schema.Record$<typeof Schema.String, typeof Schema.Unknown>>;
|
|
26
|
+
}>;
|
|
27
|
+
export type PageData = Schema.Schema.Type<typeof PageDataSchema>;
|
|
28
|
+
//# sourceMappingURL=PageData.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PageData.d.ts","sourceRoot":"","sources":["../../../src/lib/PageData/PageData.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,eAAO,MAAM,cAAc;;;;IAQzB,4CAA4C;;IAE5C,oDAAoD;;;;;;;;IAUpD,2BAA2B;;IAE3B,uCAAuC;;IAEvC,yEAAyE;;IAEzE,6DAA6D;;IAE7D,4CAA4C;;EAI5C,CAAC;AAEH,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,cAAc,CAAC,CAAC"}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { Effect, Option } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { HybridPersistenceConfig, PersistenceError, StateDelta, StateOperation, StorageBackend } from './types.js';
|
|
4
|
+
import { type RedisClientInterface } from './backends/RedisStorageBackend.js';
|
|
5
|
+
import { type DatabaseClientInterface, type PostgresStorageConfig } from './backends/PostgresStorageBackend.js';
|
|
6
|
+
/**
|
|
7
|
+
* Configuration for the ResumabilityService.
|
|
8
|
+
*
|
|
9
|
+
* Allows choosing between different persistence strategies and
|
|
10
|
+
* configuring their behavior based on use case requirements.
|
|
11
|
+
*
|
|
12
|
+
* @group Configuration
|
|
13
|
+
* @public
|
|
14
|
+
*/
|
|
15
|
+
export interface ResumabilityConfig {
|
|
16
|
+
/** Persistence strategy to use */
|
|
17
|
+
strategy: 'full-state' | 'delta' | 'hybrid' | 'auto';
|
|
18
|
+
/** Storage backend implementation */
|
|
19
|
+
backend: StorageBackend;
|
|
20
|
+
/** Configuration for hybrid strategy (only used when strategy is 'hybrid') */
|
|
21
|
+
hybridConfig?: HybridPersistenceConfig;
|
|
22
|
+
}
|
|
23
|
+
declare const ResumabilityService_base: Effect.Service.Class<ResumabilityService, "@jambudipa.io/ResumabilityService", {
|
|
24
|
+
readonly effect: Effect.Effect<{
|
|
25
|
+
/**
|
|
26
|
+
* Configure the resumability service with a specific strategy and backend.
|
|
27
|
+
*
|
|
28
|
+
* This method initializes the storage backend and creates the appropriate
|
|
29
|
+
* persistence strategy based on the configuration.
|
|
30
|
+
*
|
|
31
|
+
* @param config - Resumability configuration
|
|
32
|
+
* @returns Effect that completes when configuration is applied
|
|
33
|
+
*/
|
|
34
|
+
configure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
|
|
35
|
+
/**
|
|
36
|
+
* Persist a state operation using the configured strategy.
|
|
37
|
+
*
|
|
38
|
+
* @param operation - State operation to persist
|
|
39
|
+
* @returns Effect that completes when operation is persisted
|
|
40
|
+
*/
|
|
41
|
+
persistOperation: (operation: StateOperation) => Effect.Effect<undefined, PersistenceError, never>;
|
|
42
|
+
/**
|
|
43
|
+
* Restore spider state from persistent storage.
|
|
44
|
+
*
|
|
45
|
+
* @param key - State key identifying the session to restore
|
|
46
|
+
* @returns Effect containing the restored state, or null if not found
|
|
47
|
+
*/
|
|
48
|
+
restore: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError, never>;
|
|
49
|
+
/**
|
|
50
|
+
* Clean up old state data for a session.
|
|
51
|
+
*
|
|
52
|
+
* @param key - State key identifying the session to clean up
|
|
53
|
+
* @returns Effect that completes when cleanup is finished
|
|
54
|
+
*/
|
|
55
|
+
cleanup: (key: SpiderStateKey) => Effect.Effect<undefined, PersistenceError, never>;
|
|
56
|
+
/**
|
|
57
|
+
* List all available sessions in storage.
|
|
58
|
+
*
|
|
59
|
+
* @returns Effect containing array of session keys
|
|
60
|
+
*/
|
|
61
|
+
listSessions: () => Effect.Effect<readonly SpiderStateKey[], PersistenceError, never>;
|
|
62
|
+
/**
|
|
63
|
+
* Get information about the current configuration.
|
|
64
|
+
*
|
|
65
|
+
* @returns Information about strategy and backend
|
|
66
|
+
*/
|
|
67
|
+
getInfo: () => Effect.Effect<{
|
|
68
|
+
strategy: {
|
|
69
|
+
readonly name: string;
|
|
70
|
+
readonly description: string;
|
|
71
|
+
readonly capabilities: string[];
|
|
72
|
+
};
|
|
73
|
+
backend: {
|
|
74
|
+
name: string;
|
|
75
|
+
capabilities: import("./types.js").StorageCapabilities;
|
|
76
|
+
};
|
|
77
|
+
}, PersistenceError, never>;
|
|
78
|
+
/**
|
|
79
|
+
* Reconfigure the service with new settings.
|
|
80
|
+
*
|
|
81
|
+
* This will clean up the current backend and reinitialize with new config.
|
|
82
|
+
*
|
|
83
|
+
* @param config - New configuration
|
|
84
|
+
* @returns Effect that completes when reconfiguration is finished
|
|
85
|
+
*/
|
|
86
|
+
reconfigure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
|
|
87
|
+
}, never, never>;
|
|
88
|
+
}>;
|
|
89
|
+
/**
|
|
90
|
+
* Service for resumable spider crawling with configurable persistence strategies.
|
|
91
|
+
*
|
|
92
|
+
* Provides a unified interface for different persistence approaches:
|
|
93
|
+
* - Full state: Simple, saves complete state on every change
|
|
94
|
+
* - Delta: Efficient, saves only incremental changes
|
|
95
|
+
* - Hybrid: Best of both worlds, deltas + periodic snapshots
|
|
96
|
+
* - Auto: Automatically chooses best strategy based on backend capabilities
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* // File-based full state persistence
|
|
101
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
102
|
+
* strategy: 'full-state',
|
|
103
|
+
* backend: new FileStorageBackend('./spider-state')
|
|
104
|
+
* });
|
|
105
|
+
*
|
|
106
|
+
* // Redis-based hybrid persistence
|
|
107
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
108
|
+
* strategy: 'hybrid',
|
|
109
|
+
* backend: new RedisStorageBackend(redisClient),
|
|
110
|
+
* hybridConfig: {
|
|
111
|
+
* snapshotInterval: 1000,
|
|
112
|
+
* maxDeltasBeforeSnapshot: 500
|
|
113
|
+
* }
|
|
114
|
+
* });
|
|
115
|
+
*
|
|
116
|
+
* // Auto-selected strategy based on backend
|
|
117
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
118
|
+
* strategy: 'auto',
|
|
119
|
+
* backend: new PostgresStorageBackend(pgClient)
|
|
120
|
+
* });
|
|
121
|
+
* ```
|
|
122
|
+
*
|
|
123
|
+
* @group Services
|
|
124
|
+
* @public
|
|
125
|
+
*/
|
|
126
|
+
export declare class ResumabilityService extends ResumabilityService_base {
|
|
127
|
+
/**
|
|
128
|
+
* Create a ResumabilityService layer from configuration.
|
|
129
|
+
*
|
|
130
|
+
* This is the primary way to create and configure the ResumabilityService.
|
|
131
|
+
*
|
|
132
|
+
* @param config - Resumability configuration
|
|
133
|
+
* @returns Effect layer providing the configured ResumabilityService
|
|
134
|
+
*/
|
|
135
|
+
static fromConfig: (config: ResumabilityConfig) => Effect.Effect<ResumabilityService, PersistenceError, never>;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Utility function to create a state operation.
|
|
139
|
+
*
|
|
140
|
+
* @param delta - The delta operation
|
|
141
|
+
* @param resultingState - The complete state after applying the delta
|
|
142
|
+
* @param shouldSnapshot - Whether this operation should trigger a snapshot
|
|
143
|
+
* @returns StateOperation object
|
|
144
|
+
*/
|
|
145
|
+
export declare const createStateOperation: (delta: StateDelta, resultingState: SpiderState, shouldSnapshot?: boolean) => StateOperation;
|
|
146
|
+
/**
|
|
147
|
+
* Factory functions for creating common resumability configurations.
|
|
148
|
+
*/
|
|
149
|
+
export declare const ResumabilityConfigs: {
|
|
150
|
+
/**
|
|
151
|
+
* Create a file-based configuration.
|
|
152
|
+
*
|
|
153
|
+
* @param baseDir - Directory to store state files
|
|
154
|
+
* @param strategy - Persistence strategy (defaults to 'auto')
|
|
155
|
+
* @returns ResumabilityConfig
|
|
156
|
+
*/
|
|
157
|
+
file: (baseDir: string, strategy?: "full-state" | "delta" | "hybrid" | "auto") => ResumabilityConfig;
|
|
158
|
+
/**
|
|
159
|
+
* Create a Redis-based configuration.
|
|
160
|
+
*
|
|
161
|
+
* @param redisClient - Redis client instance
|
|
162
|
+
* @param strategy - Persistence strategy (defaults to 'hybrid')
|
|
163
|
+
* @param keyPrefix - Redis key prefix (defaults to 'spider')
|
|
164
|
+
* @returns ResumabilityConfig
|
|
165
|
+
*/
|
|
166
|
+
redis: (redisClient: RedisClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", keyPrefix?: string) => ResumabilityConfig;
|
|
167
|
+
/**
|
|
168
|
+
* Create a PostgreSQL-based configuration.
|
|
169
|
+
*
|
|
170
|
+
* @param dbClient - Database client instance
|
|
171
|
+
* @param strategy - Persistence strategy (defaults to 'hybrid')
|
|
172
|
+
* @param config - PostgreSQL configuration
|
|
173
|
+
* @returns ResumabilityConfig
|
|
174
|
+
*/
|
|
175
|
+
postgres: (dbClient: DatabaseClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", config?: PostgresStorageConfig) => ResumabilityConfig;
|
|
176
|
+
};
|
|
177
|
+
export {};
|
|
178
|
+
//# sourceMappingURL=Resumability.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Resumability.service.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/Resumability.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AACxC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAEL,uBAAuB,EACvB,gBAAgB,EAEhB,UAAU,EACV,cAAc,EACd,cAAc,EACf,MAAM,YAAY,CAAC;AAOpB,OAAO,EAEL,KAAK,oBAAoB,EAC1B,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAEL,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,EAC3B,MAAM,sCAAsC,CAAC;AAE9C;;;;;;;;GAQG;AACH,MAAM,WAAW,kBAAkB;IACjC,kCAAkC;IAClC,QAAQ,EAAE,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,qCAAqC;IACrC,OAAO,EAAE,cAAc,CAAC;IACxB,8EAA8E;IAC9E,YAAY,CAAC,EAAE,uBAAuB,CAAC;CACxC;;;QAmDO;;;;;;;;WAQG;4BACiB,kBAAkB;QAWtC;;;;;WAKG;sCAC2B,cAAc;QAe5C;;;;;WAKG;uBACY,cAAc;QAe7B;;;;;WAKG;uBACY,cAAc;QAe7B;;;;WAIG;;QA0BH;;;;WAIG;;;;;;;;;;;;QAwBH;;;;;;;WAOG;8BACmB,kBAAkB;;;AAtMhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,qBAAa,mBAAoB,SAAQ,wBAgLxC;IACC;;;;;;;OAOG;IACH,MAAM,CAAC,UAAU,GAAI,QAAQ,kBAAkB,iEAKQ;CACxD;AAwDD;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAC/B,OAAO,UAAU,EACjB,gBAAgB,WAAW,EAC3B,wBAAsB,KACrB,cAID,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;IAC9B;;;;;;OAMG;oBAEQ,MAAM,aACL,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,KACnD,kBAAkB;IAKrB;;;;;;;OAOG;yBAEY,oBAAoB,aACvB,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,yBAEnD,kBAAkB;IAKrB;;;;;;;OAOG;yBAES,uBAAuB,aACvB,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,WAC3C,qBAAqB,KAC7B,kBAAkB;CAItB,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Effect, Option } from 'effect';
|
|
2
|
+
import { PersistenceError, SpiderState, SpiderStateKey, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* File system storage backend for spider state persistence.
|
|
5
|
+
*
|
|
6
|
+
* Stores state and deltas as JSON files in a directory structure.
|
|
7
|
+
* Good for development, testing, and single-machine deployments.
|
|
8
|
+
*
|
|
9
|
+
* Directory structure:
|
|
10
|
+
* ```
|
|
11
|
+
* baseDir/
|
|
12
|
+
* sessions/
|
|
13
|
+
* sessionId/
|
|
14
|
+
* state.json # Full state
|
|
15
|
+
* snapshot.json # Latest snapshot
|
|
16
|
+
* deltas/
|
|
17
|
+
* 0001.json # Delta files
|
|
18
|
+
* 0002.json
|
|
19
|
+
* ...
|
|
20
|
+
* ```
|
|
21
|
+
*
|
|
22
|
+
* @group Backends
|
|
23
|
+
* @public
|
|
24
|
+
*/
|
|
25
|
+
export declare class FileStorageBackend implements StorageBackend {
|
|
26
|
+
readonly capabilities: StorageCapabilities;
|
|
27
|
+
readonly name = "FileStorageBackend";
|
|
28
|
+
private readonly storageDir;
|
|
29
|
+
constructor(baseDir: string);
|
|
30
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
31
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
32
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
|
|
33
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
|
|
34
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
35
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
36
|
+
saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
37
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
38
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
39
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
|
|
40
|
+
state: SpiderState;
|
|
41
|
+
sequence: number;
|
|
42
|
+
}>, PersistenceError>;
|
|
43
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
44
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
45
|
+
private getSessionDir;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=FileStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FileStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/FileStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AAGjE,OAAO,EACL,gBAAgB,EAChB,WAAW,EACX,cAAc,EACd,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AA4BrB;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,kBAAmB,YAAW,cAAc;IACvD,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,wBAAwB;IAErC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;gBAExB,OAAO,EAAE,MAAM;IAI3B,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAuBpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAgB;IAGnE,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAqCtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CA0C5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAetC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6CpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAQtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CA6D9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA0CtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CA6CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6CtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CA6DlE;IAEF,OAAO,CAAC,aAAa,CAEnB;CACH"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { Effect, Option } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Database client interface for dependency injection.
|
|
6
|
+
*
|
|
7
|
+
* This allows users to provide their own database client implementation
|
|
8
|
+
* (pg, node-postgres, prisma, drizzle, etc.) without tight coupling.
|
|
9
|
+
*
|
|
10
|
+
* @group Backends
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export interface DatabaseClientInterface {
|
|
14
|
+
query<T = unknown>(sql: string, params?: readonly unknown[]): Promise<{
|
|
15
|
+
rows: readonly T[];
|
|
16
|
+
rowCount: number;
|
|
17
|
+
}>;
|
|
18
|
+
transaction?<T>(callback: (client: DatabaseClientInterface) => Promise<T>): Promise<T>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Configuration for PostgreSQL storage backend.
|
|
22
|
+
*/
|
|
23
|
+
export interface PostgresStorageConfig {
|
|
24
|
+
/** Table prefix for spider tables */
|
|
25
|
+
tablePrefix?: string;
|
|
26
|
+
/** Schema name (defaults to 'public') */
|
|
27
|
+
schema?: string;
|
|
28
|
+
/** Whether to auto-create tables */
|
|
29
|
+
autoCreateTables?: boolean;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* PostgreSQL storage backend for spider state persistence.
|
|
33
|
+
*
|
|
34
|
+
* Uses PostgreSQL for robust, ACID-compliant state persistence with
|
|
35
|
+
* excellent support for concurrent access and complex queries.
|
|
36
|
+
*
|
|
37
|
+
* Database schema:
|
|
38
|
+
* ```sql
|
|
39
|
+
* CREATE TABLE spider_sessions (
|
|
40
|
+
* id VARCHAR(255) PRIMARY KEY,
|
|
41
|
+
* name VARCHAR(255) NOT NULL,
|
|
42
|
+
* created_at TIMESTAMP NOT NULL,
|
|
43
|
+
* state_data JSONB,
|
|
44
|
+
* updated_at TIMESTAMP DEFAULT NOW()
|
|
45
|
+
* );
|
|
46
|
+
*
|
|
47
|
+
* CREATE TABLE spider_deltas (
|
|
48
|
+
* id SERIAL PRIMARY KEY,
|
|
49
|
+
* session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
|
|
50
|
+
* sequence_number BIGINT NOT NULL,
|
|
51
|
+
* operation_type VARCHAR(50) NOT NULL,
|
|
52
|
+
* operation_data JSONB NOT NULL,
|
|
53
|
+
* created_at TIMESTAMP DEFAULT NOW(),
|
|
54
|
+
* UNIQUE(session_id, sequence_number)
|
|
55
|
+
* );
|
|
56
|
+
*
|
|
57
|
+
* CREATE TABLE spider_snapshots (
|
|
58
|
+
* id SERIAL PRIMARY KEY,
|
|
59
|
+
* session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
|
|
60
|
+
* sequence_number BIGINT NOT NULL,
|
|
61
|
+
* state_data JSONB NOT NULL,
|
|
62
|
+
* created_at TIMESTAMP DEFAULT NOW()
|
|
63
|
+
* );
|
|
64
|
+
* ```
|
|
65
|
+
*
|
|
66
|
+
* @group Backends
|
|
67
|
+
* @public
|
|
68
|
+
*/
|
|
69
|
+
export declare class PostgresStorageBackend implements StorageBackend {
|
|
70
|
+
readonly db: DatabaseClientInterface;
|
|
71
|
+
readonly capabilities: StorageCapabilities;
|
|
72
|
+
readonly name = "PostgresStorageBackend";
|
|
73
|
+
private readonly tablePrefix;
|
|
74
|
+
private readonly schema;
|
|
75
|
+
private readonly autoCreateTables;
|
|
76
|
+
constructor(db: DatabaseClientInterface, config?: PostgresStorageConfig);
|
|
77
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
78
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
79
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
|
|
80
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
|
|
81
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
82
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
83
|
+
saveDeltas: (deltas: readonly StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
84
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
85
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
86
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
|
|
87
|
+
state: SpiderState;
|
|
88
|
+
sequence: number;
|
|
89
|
+
}>, PersistenceError>;
|
|
90
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
91
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
92
|
+
private createTables;
|
|
93
|
+
private getTableName;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=PostgresStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PostgresStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/PostgresStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AACjE,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAQrB;;;;;;;;GAQG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,CAAC,GAAG,OAAO,EACf,GAAG,EAAE,MAAM,EACX,MAAM,CAAC,EAAE,SAAS,OAAO,EAAE,GAC1B,OAAO,CAAC;QAAE,IAAI,EAAE,SAAS,CAAC,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACrD,WAAW,CAAC,CAAC,CAAC,EACZ,QAAQ,EAAE,CAAC,MAAM,EAAE,uBAAuB,KAAK,OAAO,CAAC,CAAC,CAAC,GACxD,OAAO,CAAC,CAAC,CAAC,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,qCAAqC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,qBAAa,sBAAuB,YAAW,cAAc;IAgBzD,QAAQ,CAAC,EAAE,EAAE,uBAAuB;IAftC,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;gBAGhC,EAAE,EAAE,uBAAuB,EACpC,MAAM,CAAC,EAAE,qBAAqB;IAOhC,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAOpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAgB;IAGnE,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAyCtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CAoC5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmFtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCpE;IAEF,UAAU,GACR,QAAQ,SAAS,UAAU,EAAE,KAC5B,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+DtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CA6C9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+BtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CA4CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAkBtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CAgClE;IAGF,OAAO,CAAC,YAAY,CAoFlB;IAEF,OAAO,CAAC,YAAY,CAElB;CACH"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { Effect, Option } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Redis client interface for dependency injection.
|
|
6
|
+
*
|
|
7
|
+
* This allows users to provide their own Redis client implementation
|
|
8
|
+
* (node_redis, ioredis, etc.) without tight coupling.
|
|
9
|
+
*
|
|
10
|
+
* @group Backends
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export interface RedisClientInterface {
|
|
14
|
+
get(_key: string): Promise<string | null>;
|
|
15
|
+
set(_key: string, _value: string): Promise<void>;
|
|
16
|
+
del(_key: string): Promise<void>;
|
|
17
|
+
exists(_key: string): Promise<boolean>;
|
|
18
|
+
hget(_key: string, _field: string): Promise<string | null>;
|
|
19
|
+
hset(_key: string, _field: string, _value: string): Promise<void>;
|
|
20
|
+
hdel(_key: string, _field: string): Promise<void>;
|
|
21
|
+
hgetall(_key: string): Promise<Record<string, string>>;
|
|
22
|
+
zadd(_key: string, _score: number, _member: string): Promise<void>;
|
|
23
|
+
zrange(_key: string, _start: number, _stop: number): Promise<string[]>;
|
|
24
|
+
zrangebyscore(_key: string, _min: number | string, _max: number | string): Promise<string[]>;
|
|
25
|
+
zrem(_key: string, _member: string): Promise<void>;
|
|
26
|
+
zremrangebyscore(_key: string, _min: number | string, _max: number | string): Promise<void>;
|
|
27
|
+
keys(_pattern: string): Promise<string[]>;
|
|
28
|
+
pipeline?(): RedisPipeline;
|
|
29
|
+
multi?(): RedisMulti;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Redis pipeline interface for batch operations.
|
|
33
|
+
*/
|
|
34
|
+
export interface RedisPipeline {
|
|
35
|
+
zadd(_key: string, _score: number, _member: string): RedisPipeline;
|
|
36
|
+
exec(): Promise<unknown[]>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Redis multi/transaction interface.
|
|
40
|
+
*/
|
|
41
|
+
export interface RedisMulti {
|
|
42
|
+
zadd(_key: string, _score: number, _member: string): RedisMulti;
|
|
43
|
+
exec(): Promise<unknown[]>;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Redis storage backend for spider state persistence.
|
|
47
|
+
*
|
|
48
|
+
* Uses Redis data structures for efficient storage:
|
|
49
|
+
* - Hashes for full state and snapshots
|
|
50
|
+
* - Sorted sets for deltas (ordered by sequence number)
|
|
51
|
+
* - TTL support for automatic cleanup
|
|
52
|
+
*
|
|
53
|
+
* Redis key structure:
|
|
54
|
+
* ```
|
|
55
|
+
* spider:state:{sessionId} # Hash: full state
|
|
56
|
+
* spider:snapshot:{sessionId} # Hash: latest snapshot + sequence
|
|
57
|
+
* spider:deltas:{sessionId} # Sorted set: sequence -> delta JSON
|
|
58
|
+
* spider:sessions # Set: all session IDs
|
|
59
|
+
* ```
|
|
60
|
+
*
|
|
61
|
+
* @group Backends
|
|
62
|
+
* @public
|
|
63
|
+
*/
|
|
64
|
+
export declare class RedisStorageBackend implements StorageBackend {
|
|
65
|
+
readonly capabilities: StorageCapabilities;
|
|
66
|
+
readonly name = "RedisStorageBackend";
|
|
67
|
+
private readonly redis;
|
|
68
|
+
private readonly keyPrefix;
|
|
69
|
+
constructor(redis: RedisClientInterface, keyPrefix?: string);
|
|
70
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
71
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
72
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
|
|
73
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
|
|
74
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
75
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
76
|
+
saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
77
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
78
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
79
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
|
|
80
|
+
state: SpiderState;
|
|
81
|
+
sequence: number;
|
|
82
|
+
}>, PersistenceError>;
|
|
83
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
84
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
85
|
+
private getStateKey;
|
|
86
|
+
private getSnapshotKey;
|
|
87
|
+
private getDeltasKey;
|
|
88
|
+
private getSessionsKey;
|
|
89
|
+
private addToSessionsList;
|
|
90
|
+
private removeFromSessionsList;
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=RedisStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RedisStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/RedisStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAW,MAAM,EAAU,MAAM,QAAQ,CAAC;AAC1E,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAarB;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1C,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjD,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACvC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClE,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACvD,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACvE,aAAa,CACX,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,GAAG,MAAM,EACrB,IAAI,EAAE,MAAM,GAAG,MAAM,GACpB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACrB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,GAAG,MAAM,EACrB,IAAI,EAAE,MAAM,GAAG,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC,CAAC;IACjB,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,QAAQ,CAAC,IAAI,aAAa,CAAC;IAC3B,KAAK,CAAC,IAAI,UAAU,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,aAAa,CAAC;IACnE,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,UAAU,CAAC;IAChE,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,mBAAoB,YAAW,cAAc;IACxD,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAuB;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,KAAK,EAAE,oBAAoB,EAAE,SAAS,SAAW;IAK7D,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACxC;IAEd,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACrC;IAGd,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA4BtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CAgC5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAqEtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAkC9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmCtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CAwCC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAetC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CA2ClE;IAGF,OAAO,CAAC,WAAW,CACmB;IAEtC,OAAO,CAAC,cAAc,CACmB;IAEzC,OAAO,CAAC,YAAY,CACmB;IAEvC,OAAO,CAAC,cAAc,CAA8C;IAEpE,OAAO,CAAC,iBAAiB,CAkBvB;IAEF,OAAO,CAAC,sBAAsB,CAgB5B;CACH"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resumable spider crawling with configurable persistence strategies.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a complete solution for resumable web crawling with
|
|
5
|
+
* support for different persistence strategies and storage backends.
|
|
6
|
+
*
|
|
7
|
+
* ## Key Features
|
|
8
|
+
*
|
|
9
|
+
* - **Multiple Strategies**: Full state, delta, hybrid, and auto-selection
|
|
10
|
+
* - **Multiple Backends**: File system, Redis, PostgreSQL with extensible interface
|
|
11
|
+
* - **Effect Native**: Full integration with Effect ecosystem
|
|
12
|
+
* - **Type Safe**: Complete TypeScript support with runtime validation
|
|
13
|
+
* - **Production Ready**: Handles concurrency, errors, and edge cases
|
|
14
|
+
*
|
|
15
|
+
* ## Quick Start
|
|
16
|
+
*
|
|
17
|
+
* ```typescript
|
|
18
|
+
* import { ResumabilityService, ResumabilityConfigs } from '@jambudipa/spider/resumability';
|
|
19
|
+
*
|
|
20
|
+
* // File-based resumability
|
|
21
|
+
* const resumabilityLayer = ResumabilityService.fromConfig(
|
|
22
|
+
* ResumabilityConfigs.file('./spider-state', 'hybrid')
|
|
23
|
+
* );
|
|
24
|
+
*
|
|
25
|
+
* // Use with Spider
|
|
26
|
+
* const program = Effect.gen(function* () {
|
|
27
|
+
* const spider = yield* Spider;
|
|
28
|
+
* const resumability = yield* ResumabilityService;
|
|
29
|
+
*
|
|
30
|
+
* // Configure resumable crawling...
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* Effect.runPromise(
|
|
34
|
+
* program.pipe(
|
|
35
|
+
* Effect.provide(Spider.Default),
|
|
36
|
+
* Effect.provide(resumabilityLayer)
|
|
37
|
+
* )
|
|
38
|
+
* );
|
|
39
|
+
* ```
|
|
40
|
+
*
|
|
41
|
+
* @group Resumability
|
|
42
|
+
* @public
|
|
43
|
+
*/
|
|
44
|
+
export type { StorageBackend, StorageCapabilities, PersistenceStrategy, StateOperation, HybridPersistenceConfig, } from './types.js';
|
|
45
|
+
export { StateDelta, PersistenceError, DEFAULT_HYBRID_CONFIG, } from './types.js';
|
|
46
|
+
export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './strategies.js';
|
|
47
|
+
export { FileStorageBackend } from './backends/FileStorageBackend.js';
|
|
48
|
+
export { RedisStorageBackend, type RedisClientInterface, type RedisPipeline, type RedisMulti, } from './backends/RedisStorageBackend.js';
|
|
49
|
+
export { PostgresStorageBackend, type DatabaseClientInterface, type PostgresStorageConfig, } from './backends/PostgresStorageBackend.js';
|
|
50
|
+
export { ResumabilityService, ResumabilityConfigs, createStateOperation, type ResumabilityConfig, } from './Resumability.service.js';
|
|
51
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AAGH,YAAY,EACV,cAAc,EACd,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,uBAAuB,GACxB,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAGpB,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,kCAAkC,CAAC;AACtE,OAAO,EACL,mBAAmB,EACnB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,sBAAsB,EACtB,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,GAC3B,MAAM,sCAAsC,CAAC;AAG9C,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,EACpB,KAAK,kBAAkB,GACxB,MAAM,2BAA2B,CAAC"}
|