@jambudipa/spider 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +426 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4681 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +57 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
- package/dist/lib/HttpClient/CookieManager.d.ts +44 -0
- package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +88 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
- package/dist/lib/HttpClient/SessionStore.d.ts +82 -0
- package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts +58 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
- package/dist/lib/HttpClient/index.d.ts +8 -0
- package/dist/lib/HttpClient/index.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/index.d.ts +37 -0
- package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
- package/dist/lib/Logging/FetchLogger.d.ts +8 -0
- package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts +34 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts +276 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
- package/dist/lib/PageData/PageData.d.ts +28 -0
- package/dist/lib/PageData/PageData.d.ts.map +1 -0
- package/dist/lib/Resumability/Resumability.service.d.ts +176 -0
- package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/index.d.ts +51 -0
- package/dist/lib/Resumability/index.d.ts.map +1 -0
- package/dist/lib/Resumability/strategies.d.ts +76 -0
- package/dist/lib/Resumability/strategies.d.ts.map +1 -0
- package/dist/lib/Resumability/types.d.ts +201 -0
- package/dist/lib/Resumability/types.d.ts.map +1 -0
- package/dist/lib/Robots/Robots.service.d.ts +78 -0
- package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
- package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
- package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.service.d.ts +194 -0
- package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
- package/dist/lib/StateManager/StateManager.service.d.ts +68 -0
- package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
- package/dist/lib/StateManager/index.d.ts +5 -0
- package/dist/lib/StateManager/index.d.ts.map +1 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +77 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
- package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
- package/dist/lib/api-facades.d.ts +313 -0
- package/dist/lib/api-facades.d.ts.map +1 -0
- package/dist/lib/errors.d.ts +99 -0
- package/dist/lib/errors.d.ts.map +1 -0
- package/package.json +108 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { Effect } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { HybridPersistenceConfig, PersistenceError, StateDelta, StateOperation, StorageBackend } from './types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Configuration for the ResumabilityService.
|
|
6
|
+
*
|
|
7
|
+
* Allows choosing between different persistence strategies and
|
|
8
|
+
* configuring their behavior based on use case requirements.
|
|
9
|
+
*
|
|
10
|
+
* @group Configuration
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export interface ResumabilityConfig {
|
|
14
|
+
/** Persistence strategy to use */
|
|
15
|
+
strategy: 'full-state' | 'delta' | 'hybrid' | 'auto';
|
|
16
|
+
/** Storage backend implementation */
|
|
17
|
+
backend: StorageBackend;
|
|
18
|
+
/** Configuration for hybrid strategy (only used when strategy is 'hybrid') */
|
|
19
|
+
hybridConfig?: HybridPersistenceConfig;
|
|
20
|
+
}
|
|
21
|
+
declare const ResumabilityService_base: Effect.Service.Class<ResumabilityService, "@jambudipa.io/ResumabilityService", {
|
|
22
|
+
readonly effect: Effect.Effect<{
|
|
23
|
+
/**
|
|
24
|
+
* Configure the resumability service with a specific strategy and backend.
|
|
25
|
+
*
|
|
26
|
+
* This method initializes the storage backend and creates the appropriate
|
|
27
|
+
* persistence strategy based on the configuration.
|
|
28
|
+
*
|
|
29
|
+
* @param config - Resumability configuration
|
|
30
|
+
* @returns Effect that completes when configuration is applied
|
|
31
|
+
*/
|
|
32
|
+
configure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
|
|
33
|
+
/**
|
|
34
|
+
* Persist a state operation using the configured strategy.
|
|
35
|
+
*
|
|
36
|
+
* @param operation - State operation to persist
|
|
37
|
+
* @returns Effect that completes when operation is persisted
|
|
38
|
+
*/
|
|
39
|
+
persistOperation: (operation: StateOperation) => Effect.Effect<undefined, PersistenceError, never>;
|
|
40
|
+
/**
|
|
41
|
+
* Restore spider state from persistent storage.
|
|
42
|
+
*
|
|
43
|
+
* @param key - State key identifying the session to restore
|
|
44
|
+
* @returns Effect containing the restored state, or null if not found
|
|
45
|
+
*/
|
|
46
|
+
restore: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError, never>;
|
|
47
|
+
/**
|
|
48
|
+
* Clean up old state data for a session.
|
|
49
|
+
*
|
|
50
|
+
* @param key - State key identifying the session to clean up
|
|
51
|
+
* @returns Effect that completes when cleanup is finished
|
|
52
|
+
*/
|
|
53
|
+
cleanup: (key: SpiderStateKey) => Effect.Effect<undefined, PersistenceError, never>;
|
|
54
|
+
/**
|
|
55
|
+
* List all available sessions in storage.
|
|
56
|
+
*
|
|
57
|
+
* @returns Effect containing array of session keys
|
|
58
|
+
*/
|
|
59
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError, never>;
|
|
60
|
+
/**
|
|
61
|
+
* Get information about the current configuration.
|
|
62
|
+
*
|
|
63
|
+
* @returns Information about strategy and backend
|
|
64
|
+
*/
|
|
65
|
+
getInfo: () => Effect.Effect<{
|
|
66
|
+
strategy: {
|
|
67
|
+
readonly name: string;
|
|
68
|
+
readonly description: string;
|
|
69
|
+
readonly capabilities: string[];
|
|
70
|
+
};
|
|
71
|
+
backend: {
|
|
72
|
+
name: string;
|
|
73
|
+
capabilities: import("./types.js").StorageCapabilities;
|
|
74
|
+
};
|
|
75
|
+
}, PersistenceError, never>;
|
|
76
|
+
/**
|
|
77
|
+
* Reconfigure the service with new settings.
|
|
78
|
+
*
|
|
79
|
+
* This will clean up the current backend and reinitialize with new config.
|
|
80
|
+
*
|
|
81
|
+
* @param config - New configuration
|
|
82
|
+
* @returns Effect that completes when reconfiguration is finished
|
|
83
|
+
*/
|
|
84
|
+
reconfigure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
|
|
85
|
+
}, never, never>;
|
|
86
|
+
}>;
|
|
87
|
+
/**
|
|
88
|
+
* Service for resumable spider crawling with configurable persistence strategies.
|
|
89
|
+
*
|
|
90
|
+
* Provides a unified interface for different persistence approaches:
|
|
91
|
+
* - Full state: Simple, saves complete state on every change
|
|
92
|
+
* - Delta: Efficient, saves only incremental changes
|
|
93
|
+
* - Hybrid: Best of both worlds, deltas + periodic snapshots
|
|
94
|
+
* - Auto: Automatically chooses best strategy based on backend capabilities
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```typescript
|
|
98
|
+
* // File-based full state persistence
|
|
99
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
100
|
+
* strategy: 'full-state',
|
|
101
|
+
* backend: new FileStorageBackend('./spider-state')
|
|
102
|
+
* });
|
|
103
|
+
*
|
|
104
|
+
* // Redis-based hybrid persistence
|
|
105
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
106
|
+
* strategy: 'hybrid',
|
|
107
|
+
* backend: new RedisStorageBackend(redisClient),
|
|
108
|
+
* hybridConfig: {
|
|
109
|
+
* snapshotInterval: 1000,
|
|
110
|
+
* maxDeltasBeforeSnapshot: 500
|
|
111
|
+
* }
|
|
112
|
+
* });
|
|
113
|
+
*
|
|
114
|
+
* // Auto-selected strategy based on backend
|
|
115
|
+
* const resumabilityLayer = ResumabilityService.fromConfig({
|
|
116
|
+
* strategy: 'auto',
|
|
117
|
+
* backend: new PostgresStorageBackend(pgClient)
|
|
118
|
+
* });
|
|
119
|
+
* ```
|
|
120
|
+
*
|
|
121
|
+
* @group Services
|
|
122
|
+
* @public
|
|
123
|
+
*/
|
|
124
|
+
export declare class ResumabilityService extends ResumabilityService_base {
|
|
125
|
+
/**
|
|
126
|
+
* Create a ResumabilityService layer from configuration.
|
|
127
|
+
*
|
|
128
|
+
* This is the primary way to create and configure the ResumabilityService.
|
|
129
|
+
*
|
|
130
|
+
* @param config - Resumability configuration
|
|
131
|
+
* @returns Effect layer providing the configured ResumabilityService
|
|
132
|
+
*/
|
|
133
|
+
static fromConfig: (config: ResumabilityConfig) => Effect.Effect<ResumabilityService, PersistenceError, never>;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Utility function to create a state operation.
|
|
137
|
+
*
|
|
138
|
+
* @param delta - The delta operation
|
|
139
|
+
* @param resultingState - The complete state after applying the delta
|
|
140
|
+
* @param shouldSnapshot - Whether this operation should trigger a snapshot
|
|
141
|
+
* @returns StateOperation object
|
|
142
|
+
*/
|
|
143
|
+
export declare const createStateOperation: (delta: StateDelta, resultingState: SpiderState, shouldSnapshot?: boolean) => StateOperation;
|
|
144
|
+
/**
|
|
145
|
+
* Factory functions for creating common resumability configurations.
|
|
146
|
+
*/
|
|
147
|
+
export declare const ResumabilityConfigs: {
|
|
148
|
+
/**
|
|
149
|
+
* Create a file-based configuration.
|
|
150
|
+
*
|
|
151
|
+
* @param baseDir - Directory to store state files
|
|
152
|
+
* @param strategy - Persistence strategy (defaults to 'auto')
|
|
153
|
+
* @returns ResumabilityConfig
|
|
154
|
+
*/
|
|
155
|
+
file: (baseDir: string, strategy?: "full-state" | "delta" | "hybrid" | "auto") => ResumabilityConfig;
|
|
156
|
+
/**
|
|
157
|
+
* Create a Redis-based configuration.
|
|
158
|
+
*
|
|
159
|
+
* @param redisClient - Redis client instance
|
|
160
|
+
* @param strategy - Persistence strategy (defaults to 'hybrid')
|
|
161
|
+
* @param keyPrefix - Redis key prefix (defaults to 'spider')
|
|
162
|
+
* @returns ResumabilityConfig
|
|
163
|
+
*/
|
|
164
|
+
redis: (redisClient: import("./backends/RedisStorageBackend.js").RedisClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", keyPrefix?: string) => ResumabilityConfig;
|
|
165
|
+
/**
|
|
166
|
+
* Create a PostgreSQL-based configuration.
|
|
167
|
+
*
|
|
168
|
+
* @param dbClient - Database client instance
|
|
169
|
+
* @param strategy - Persistence strategy (defaults to 'hybrid')
|
|
170
|
+
* @param config - PostgreSQL configuration
|
|
171
|
+
* @returns ResumabilityConfig
|
|
172
|
+
*/
|
|
173
|
+
postgres: (dbClient: import("./backends/PostgresStorageBackend.js").DatabaseClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", config?: import("./backends/PostgresStorageBackend.js").PostgresStorageConfig) => ResumabilityConfig;
|
|
174
|
+
};
|
|
175
|
+
export {};
|
|
176
|
+
//# sourceMappingURL=Resumability.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Resumability.service.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/Resumability.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAEL,uBAAuB,EACvB,gBAAgB,EAEhB,UAAU,EACV,cAAc,EACd,cAAc,EACf,MAAM,YAAY,CAAC;AAOpB;;;;;;;;GAQG;AACH,MAAM,WAAW,kBAAkB;IACjC,kCAAkC;IAClC,QAAQ,EAAE,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,qCAAqC;IACrC,OAAO,EAAE,cAAc,CAAC;IACxB,8EAA8E;IAC9E,YAAY,CAAC,EAAE,uBAAuB,CAAC;CACxC;;;QAgDO;;;;;;;;WAQG;4BACiB,kBAAkB;QAWtC;;;;;WAKG;sCAC2B,cAAc;QAe5C;;;;;WAKG;uBACY,cAAc;QAe7B;;;;;WAKG;uBACY,cAAc;QAe7B;;;;WAIG;;QAyBH;;;;WAIG;;;;;;;;;;;;QAsBH;;;;;;;WAOG;8BACmB,kBAAkB;;;AAhMhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,qBAAa,mBAAoB,SAAQ,wBA0KxC;IACC;;;;;;;OAOG;IACH,MAAM,CAAC,UAAU,GAAI,QAAQ,kBAAkB,iEAKQ;CACxD;AAuDD;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAC/B,OAAO,UAAU,EACjB,gBAAgB,WAAW,EAC3B,wBAAsB,KACrB,cAID,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;IAC9B;;;;;;OAMG;oBAEQ,MAAM,aACL,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,KACnD,kBAAkB;IAQrB;;;;;;;OAOG;yBAEY,OAAO,mCAAmC,EAAE,oBAAoB,aACnE,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,yBAEnD,kBAAkB;IASrB;;;;;;;OAOG;yBAES,OAAO,sCAAsC,EAAE,uBAAuB,aACtE,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,WAC3C,OAAO,sCAAsC,EAAE,qBAAqB,KAC5E,kBAAkB;CAQtB,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Effect } from 'effect';
|
|
2
|
+
import { PersistenceError, SpiderState, SpiderStateKey, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* File system storage backend for spider state persistence.
|
|
5
|
+
*
|
|
6
|
+
* Stores state and deltas as JSON files in a directory structure.
|
|
7
|
+
* Good for development, testing, and single-machine deployments.
|
|
8
|
+
*
|
|
9
|
+
* Directory structure:
|
|
10
|
+
* ```
|
|
11
|
+
* baseDir/
|
|
12
|
+
* sessions/
|
|
13
|
+
* sessionId/
|
|
14
|
+
* state.json # Full state
|
|
15
|
+
* snapshot.json # Latest snapshot
|
|
16
|
+
* deltas/
|
|
17
|
+
* 0001.json # Delta files
|
|
18
|
+
* 0002.json
|
|
19
|
+
* ...
|
|
20
|
+
* ```
|
|
21
|
+
*
|
|
22
|
+
* @group Backends
|
|
23
|
+
* @public
|
|
24
|
+
*/
|
|
25
|
+
export declare class FileStorageBackend implements StorageBackend {
|
|
26
|
+
private readonly baseDir;
|
|
27
|
+
readonly capabilities: StorageCapabilities;
|
|
28
|
+
readonly name = "FileStorageBackend";
|
|
29
|
+
constructor(baseDir: string);
|
|
30
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
31
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
32
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError, never>;
|
|
33
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError, never>;
|
|
34
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError, never>;
|
|
35
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
36
|
+
saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
37
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
38
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
39
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<{
|
|
40
|
+
state: SpiderState;
|
|
41
|
+
sequence: number;
|
|
42
|
+
} | null, PersistenceError>;
|
|
43
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
44
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
45
|
+
private getSessionDir;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=FileStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FileStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/FileStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AAGxC,OAAO,EACL,gBAAgB,EAChB,WAAW,EACX,cAAc,EACd,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAErB;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,kBAAmB,YAAW,cAAc;IAW3C,OAAO,CAAC,QAAQ,CAAC,OAAO;IAVpC,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,wBAAwB;gBAER,OAAO,EAAE,MAAM;IAE5C,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAuBpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACvB;IAG5B,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,EAAE,KAAK,CAAC,CA2B7C;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,EAAE,KAAK,CAAC,CAyC3D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,EAAE,KAAK,CAAC,CAe7C;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+BpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAQtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CA+D9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmCtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,EAC/C,gBAAgB,CACjB,CA4CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6CtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CAmDlE;IAEF,OAAO,CAAC,aAAa,CAEnB;CACH"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { Effect } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Database client interface for dependency injection.
|
|
6
|
+
*
|
|
7
|
+
* This allows users to provide their own database client implementation
|
|
8
|
+
* (pg, node-postgres, prisma, drizzle, etc.) without tight coupling.
|
|
9
|
+
*
|
|
10
|
+
* @group Backends
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export interface DatabaseClientInterface {
|
|
14
|
+
query<T = unknown>(sql: string, params?: unknown[]): Promise<{
|
|
15
|
+
rows: T[];
|
|
16
|
+
rowCount: number;
|
|
17
|
+
}>;
|
|
18
|
+
transaction?<T>(callback: (client: DatabaseClientInterface) => Promise<T>): Promise<T>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Configuration for PostgreSQL storage backend.
|
|
22
|
+
*/
|
|
23
|
+
export interface PostgresStorageConfig {
|
|
24
|
+
/** Table prefix for spider tables */
|
|
25
|
+
tablePrefix?: string;
|
|
26
|
+
/** Schema name (defaults to 'public') */
|
|
27
|
+
schema?: string;
|
|
28
|
+
/** Whether to auto-create tables */
|
|
29
|
+
autoCreateTables?: boolean;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* PostgreSQL storage backend for spider state persistence.
|
|
33
|
+
*
|
|
34
|
+
* Uses PostgreSQL for robust, ACID-compliant state persistence with
|
|
35
|
+
* excellent support for concurrent access and complex queries.
|
|
36
|
+
*
|
|
37
|
+
* Database schema:
|
|
38
|
+
* ```sql
|
|
39
|
+
* CREATE TABLE spider_sessions (
|
|
40
|
+
* id VARCHAR(255) PRIMARY KEY,
|
|
41
|
+
* name VARCHAR(255) NOT NULL,
|
|
42
|
+
* created_at TIMESTAMP NOT NULL,
|
|
43
|
+
* state_data JSONB,
|
|
44
|
+
* updated_at TIMESTAMP DEFAULT NOW()
|
|
45
|
+
* );
|
|
46
|
+
*
|
|
47
|
+
* CREATE TABLE spider_deltas (
|
|
48
|
+
* id SERIAL PRIMARY KEY,
|
|
49
|
+
* session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
|
|
50
|
+
* sequence_number BIGINT NOT NULL,
|
|
51
|
+
* operation_type VARCHAR(50) NOT NULL,
|
|
52
|
+
* operation_data JSONB NOT NULL,
|
|
53
|
+
* created_at TIMESTAMP DEFAULT NOW(),
|
|
54
|
+
* UNIQUE(session_id, sequence_number)
|
|
55
|
+
* );
|
|
56
|
+
*
|
|
57
|
+
* CREATE TABLE spider_snapshots (
|
|
58
|
+
* id SERIAL PRIMARY KEY,
|
|
59
|
+
* session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
|
|
60
|
+
* sequence_number BIGINT NOT NULL,
|
|
61
|
+
* state_data JSONB NOT NULL,
|
|
62
|
+
* created_at TIMESTAMP DEFAULT NOW()
|
|
63
|
+
* );
|
|
64
|
+
* ```
|
|
65
|
+
*
|
|
66
|
+
* @group Backends
|
|
67
|
+
* @public
|
|
68
|
+
*/
|
|
69
|
+
export declare class PostgresStorageBackend implements StorageBackend {
|
|
70
|
+
private readonly db;
|
|
71
|
+
readonly capabilities: StorageCapabilities;
|
|
72
|
+
readonly name = "PostgresStorageBackend";
|
|
73
|
+
private readonly tablePrefix;
|
|
74
|
+
private readonly schema;
|
|
75
|
+
private readonly autoCreateTables;
|
|
76
|
+
constructor(db: DatabaseClientInterface, config?: PostgresStorageConfig);
|
|
77
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
78
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
79
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
|
|
80
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError>;
|
|
81
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
82
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
83
|
+
saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
84
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
85
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
86
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<{
|
|
87
|
+
state: SpiderState;
|
|
88
|
+
sequence: number;
|
|
89
|
+
} | null, PersistenceError>;
|
|
90
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
91
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
92
|
+
private createTables;
|
|
93
|
+
private getTableName;
|
|
94
|
+
}
|
|
95
|
+
//# sourceMappingURL=PostgresStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PostgresStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/PostgresStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AACxC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAErB;;;;;;;;GAQG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,CAAC,GAAG,OAAO,EACf,GAAG,EAAE,MAAM,EACX,MAAM,CAAC,EAAE,OAAO,EAAE,GACjB,OAAO,CAAC;QAAE,IAAI,EAAE,CAAC,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5C,WAAW,CAAC,CAAC,CAAC,EACZ,QAAQ,EAAE,CAAC,MAAM,EAAE,uBAAuB,KAAK,OAAO,CAAC,CAAC,CAAC,GACxD,OAAO,CAAC,CAAC,CAAC,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,qCAAqC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,qBAAa,sBAAuB,YAAW,cAAc;IAgBzD,OAAO,CAAC,QAAQ,CAAC,EAAE;IAfrB,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;gBAGxB,EAAE,EAAE,uBAAuB,EAC5C,MAAM,CAAC,EAAE,qBAAqB;IAOhC,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAOpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACvB;IAG5B,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAsCtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,CAAC,CAoCpD;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAwEtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmCpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAiDtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAwC9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6BtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,EAC/C,gBAAgB,CACjB,CA4CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAkBtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CA+BlE;IAGF,OAAO,CAAC,YAAY,CAoFlB;IAEF,OAAO,CAAC,YAAY,CAElB;CACH"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { Effect } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Redis client interface for dependency injection.
|
|
6
|
+
*
|
|
7
|
+
* This allows users to provide their own Redis client implementation
|
|
8
|
+
* (node_redis, ioredis, etc.) without tight coupling.
|
|
9
|
+
*
|
|
10
|
+
* @group Backends
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export interface RedisClientInterface {
|
|
14
|
+
get(key: string): Promise<string | null>;
|
|
15
|
+
set(key: string, value: string): Promise<void>;
|
|
16
|
+
del(key: string): Promise<void>;
|
|
17
|
+
exists(key: string): Promise<boolean>;
|
|
18
|
+
hget(key: string, field: string): Promise<string | null>;
|
|
19
|
+
hset(key: string, field: string, value: string): Promise<void>;
|
|
20
|
+
hdel(key: string, field: string): Promise<void>;
|
|
21
|
+
hgetall(key: string): Promise<Record<string, string>>;
|
|
22
|
+
zadd(key: string, score: number, member: string): Promise<void>;
|
|
23
|
+
zrange(key: string, start: number, stop: number): Promise<string[]>;
|
|
24
|
+
zrangebyscore(key: string, min: number | string, max: number | string): Promise<string[]>;
|
|
25
|
+
zrem(key: string, member: string): Promise<void>;
|
|
26
|
+
zremrangebyscore(key: string, min: number | string, max: number | string): Promise<void>;
|
|
27
|
+
keys(pattern: string): Promise<string[]>;
|
|
28
|
+
pipeline?(): RedisPipeline;
|
|
29
|
+
multi?(): RedisMulti;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Redis pipeline interface for batch operations.
|
|
33
|
+
*/
|
|
34
|
+
export interface RedisPipeline {
|
|
35
|
+
zadd(key: string, score: number, member: string): RedisPipeline;
|
|
36
|
+
exec(): Promise<any[]>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Redis multi/transaction interface.
|
|
40
|
+
*/
|
|
41
|
+
export interface RedisMulti {
|
|
42
|
+
zadd(key: string, score: number, member: string): RedisMulti;
|
|
43
|
+
exec(): Promise<any[]>;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Redis storage backend for spider state persistence.
|
|
47
|
+
*
|
|
48
|
+
* Uses Redis data structures for efficient storage:
|
|
49
|
+
* - Hashes for full state and snapshots
|
|
50
|
+
* - Sorted sets for deltas (ordered by sequence number)
|
|
51
|
+
* - TTL support for automatic cleanup
|
|
52
|
+
*
|
|
53
|
+
* Redis key structure:
|
|
54
|
+
* ```
|
|
55
|
+
* spider:state:{sessionId} # Hash: full state
|
|
56
|
+
* spider:snapshot:{sessionId} # Hash: latest snapshot + sequence
|
|
57
|
+
* spider:deltas:{sessionId} # Sorted set: sequence -> delta JSON
|
|
58
|
+
* spider:sessions # Set: all session IDs
|
|
59
|
+
* ```
|
|
60
|
+
*
|
|
61
|
+
* @group Backends
|
|
62
|
+
* @public
|
|
63
|
+
*/
|
|
64
|
+
export declare class RedisStorageBackend implements StorageBackend {
|
|
65
|
+
private readonly redis;
|
|
66
|
+
private readonly keyPrefix;
|
|
67
|
+
readonly capabilities: StorageCapabilities;
|
|
68
|
+
readonly name = "RedisStorageBackend";
|
|
69
|
+
constructor(redis: RedisClientInterface, keyPrefix?: string);
|
|
70
|
+
initialize: () => Effect.Effect<void, PersistenceError>;
|
|
71
|
+
cleanup: () => Effect.Effect<void, PersistenceError>;
|
|
72
|
+
saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
|
|
73
|
+
loadState: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError>;
|
|
74
|
+
deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
75
|
+
saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
|
|
76
|
+
saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
|
|
77
|
+
loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
|
|
78
|
+
saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
|
|
79
|
+
loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<{
|
|
80
|
+
state: SpiderState;
|
|
81
|
+
sequence: number;
|
|
82
|
+
} | null, PersistenceError>;
|
|
83
|
+
compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
|
|
84
|
+
listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
|
|
85
|
+
private getStateKey;
|
|
86
|
+
private getSnapshotKey;
|
|
87
|
+
private getDeltasKey;
|
|
88
|
+
private getSessionsKey;
|
|
89
|
+
private addToSessionsList;
|
|
90
|
+
private removeFromSessionsList;
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=RedisStorageBackend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RedisStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/RedisStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AACxC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAErB;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IACzC,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACtC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IACzD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/D,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACtD,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChE,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACpE,aAAa,CACX,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,GAAG,MAAM,EACpB,GAAG,EAAE,MAAM,GAAG,MAAM,GACnB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACrB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjD,gBAAgB,CACd,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,GAAG,MAAM,EACpB,GAAG,EAAE,MAAM,GAAG,MAAM,GACnB,OAAO,CAAC,IAAI,CAAC,CAAC;IACjB,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACzC,QAAQ,CAAC,IAAI,aAAa,CAAC;IAC3B,KAAK,CAAC,IAAI,UAAU,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC;IAChE,IAAI,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,UAAU,CAAC;IAC7D,IAAI,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;CACxB;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,mBAAoB,YAAW,cAAc;IAYtD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAZ5B,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,yBAAyB;gBAGnB,KAAK,EAAE,oBAAoB,EAC3B,SAAS,SAAW;IAGvC,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAC1B;IAE5B,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACvB;IAG5B,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA0BtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,CAAC,CAwCpD;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6BpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA0DtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAyC9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAiCtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,EAC/C,gBAAgB,CACjB,CA2CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAetC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CAwClE;IAGF,OAAO,CAAC,WAAW,CACmB;IAEtC,OAAO,CAAC,cAAc,CACmB;IAEzC,OAAO,CAAC,YAAY,CACmB;IAEvC,OAAO,CAAC,cAAc,CAA8C;IAEpE,OAAO,CAAC,iBAAiB,CAgBvB;IAEF,OAAO,CAAC,sBAAsB,CAgB5B;CACH"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resumable spider crawling with configurable persistence strategies.
|
|
3
|
+
*
|
|
4
|
+
* This module provides a complete solution for resumable web crawling with
|
|
5
|
+
* support for different persistence strategies and storage backends.
|
|
6
|
+
*
|
|
7
|
+
* ## Key Features
|
|
8
|
+
*
|
|
9
|
+
* - **Multiple Strategies**: Full state, delta, hybrid, and auto-selection
|
|
10
|
+
* - **Multiple Backends**: File system, Redis, PostgreSQL with extensible interface
|
|
11
|
+
* - **Effect.js Native**: Full integration with Effect.js ecosystem
|
|
12
|
+
* - **Type Safe**: Complete TypeScript support with runtime validation
|
|
13
|
+
* - **Production Ready**: Handles concurrency, errors, and edge cases
|
|
14
|
+
*
|
|
15
|
+
* ## Quick Start
|
|
16
|
+
*
|
|
17
|
+
* ```typescript
|
|
18
|
+
* import { ResumabilityService, ResumabilityConfigs } from '@jambudipa.io/spider/resumability';
|
|
19
|
+
*
|
|
20
|
+
* // File-based resumability
|
|
21
|
+
* const resumabilityLayer = ResumabilityService.fromConfig(
|
|
22
|
+
* ResumabilityConfigs.file('./spider-state', 'hybrid')
|
|
23
|
+
* );
|
|
24
|
+
*
|
|
25
|
+
* // Use with Spider
|
|
26
|
+
* const program = Effect.gen(function* () {
|
|
27
|
+
* const spider = yield* Spider;
|
|
28
|
+
* const resumability = yield* ResumabilityService;
|
|
29
|
+
*
|
|
30
|
+
* // Configure resumable crawling...
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* Effect.runPromise(
|
|
34
|
+
* program.pipe(
|
|
35
|
+
* Effect.provide(Spider.Default),
|
|
36
|
+
* Effect.provide(resumabilityLayer)
|
|
37
|
+
* )
|
|
38
|
+
* );
|
|
39
|
+
* ```
|
|
40
|
+
*
|
|
41
|
+
* @group Resumability
|
|
42
|
+
* @public
|
|
43
|
+
*/
|
|
44
|
+
export type { StorageBackend, StorageCapabilities, PersistenceStrategy, StateOperation, HybridPersistenceConfig, } from './types.js';
|
|
45
|
+
export { StateDelta, PersistenceError, DEFAULT_HYBRID_CONFIG, } from './types.js';
|
|
46
|
+
export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './strategies.js';
|
|
47
|
+
export { FileStorageBackend } from './backends/FileStorageBackend.js';
|
|
48
|
+
export { RedisStorageBackend, type RedisClientInterface, type RedisPipeline, type RedisMulti, } from './backends/RedisStorageBackend.js';
|
|
49
|
+
export { PostgresStorageBackend, type DatabaseClientInterface, type PostgresStorageConfig, } from './backends/PostgresStorageBackend.js';
|
|
50
|
+
export { ResumabilityService, ResumabilityConfigs, createStateOperation, type ResumabilityConfig, } from './Resumability.service.js';
|
|
51
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AAGH,YAAY,EACV,cAAc,EACd,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,uBAAuB,GACxB,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAGpB,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,kCAAkC,CAAC;AACtE,OAAO,EACL,mBAAmB,EACnB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,sBAAsB,EACtB,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,GAC3B,MAAM,sCAAsC,CAAC;AAG9C,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,EACpB,KAAK,kBAAkB,GACxB,MAAM,2BAA2B,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { Effect } from 'effect';
|
|
2
|
+
import { SpiderState, SpiderStateKey } from '../Scheduler/SpiderScheduler.service.js';
|
|
3
|
+
import { HybridPersistenceConfig, PersistenceError, PersistenceStrategy, StateOperation, StorageBackend } from './types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Full state persistence strategy.
|
|
6
|
+
*
|
|
7
|
+
* Saves the complete spider state on every operation. Simple and reliable,
|
|
8
|
+
* but can be inefficient for large crawls with many URLs.
|
|
9
|
+
*
|
|
10
|
+
* @group Strategies
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
export declare class FullStatePersistence implements PersistenceStrategy {
|
|
14
|
+
private readonly backend;
|
|
15
|
+
constructor(backend: StorageBackend);
|
|
16
|
+
persist: (operation: StateOperation) => Effect.Effect<void, PersistenceError>;
|
|
17
|
+
restore: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError>;
|
|
18
|
+
cleanup: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
19
|
+
getInfo: () => {
|
|
20
|
+
name: string;
|
|
21
|
+
description: string;
|
|
22
|
+
capabilities: string[];
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Delta persistence strategy.
|
|
27
|
+
*
|
|
28
|
+
* Saves only incremental changes (deltas) instead of the full state.
|
|
29
|
+
* Much more efficient for large crawls, but requires delta replay for restoration.
|
|
30
|
+
*
|
|
31
|
+
* @group Strategies
|
|
32
|
+
* @public
|
|
33
|
+
*/
|
|
34
|
+
export declare class DeltaPersistence implements PersistenceStrategy {
|
|
35
|
+
private readonly backend;
|
|
36
|
+
constructor(backend: StorageBackend);
|
|
37
|
+
persist: (operation: StateOperation) => Effect.Effect<void, PersistenceError>;
|
|
38
|
+
restore: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError>;
|
|
39
|
+
cleanup: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
40
|
+
reconstructStateFromDeltas: (key: SpiderStateKey, deltas: ReadonlyArray<import("./types.js").StateDelta>) => Effect.Effect<SpiderState, PersistenceError>;
|
|
41
|
+
getInfo: () => {
|
|
42
|
+
name: string;
|
|
43
|
+
description: string;
|
|
44
|
+
capabilities: string[];
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Hybrid persistence strategy.
|
|
49
|
+
*
|
|
50
|
+
* Combines delta and full state approaches for optimal performance.
|
|
51
|
+
* Saves deltas for efficiency, with periodic snapshots for fast recovery.
|
|
52
|
+
*
|
|
53
|
+
* @group Strategies
|
|
54
|
+
* @public
|
|
55
|
+
*/
|
|
56
|
+
export declare class HybridPersistence implements PersistenceStrategy {
|
|
57
|
+
private readonly backend;
|
|
58
|
+
private readonly config;
|
|
59
|
+
private operationCount;
|
|
60
|
+
private lastSnapshotSequence;
|
|
61
|
+
private pendingDeltas;
|
|
62
|
+
constructor(backend: StorageBackend, config?: HybridPersistenceConfig);
|
|
63
|
+
persist: (operation: StateOperation) => Effect.Effect<void, PersistenceError>;
|
|
64
|
+
private saveSnapshot;
|
|
65
|
+
private saveDelta;
|
|
66
|
+
private flushPendingDeltas;
|
|
67
|
+
restore: (key: SpiderStateKey) => Effect.Effect<SpiderState | null, PersistenceError>;
|
|
68
|
+
private applyDeltasToState;
|
|
69
|
+
cleanup: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
|
|
70
|
+
getInfo: () => {
|
|
71
|
+
name: string;
|
|
72
|
+
description: string;
|
|
73
|
+
capabilities: string[];
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=strategies.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/strategies.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAChC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAEL,uBAAuB,EACvB,gBAAgB,EAChB,mBAAmB,EACnB,cAAc,EACd,cAAc,EACf,MAAM,YAAY,CAAC;AAEpB;;;;;;;;GAQG;AACH,qBAAa,oBAAqB,YAAW,mBAAmB;IAClD,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,cAAc;IAEpD,OAAO,GACL,WAAW,cAAc,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAiBtC;IAEF,OAAO,GACL,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,CAAC,CAcpD;IAEF,OAAO,GAAI,KAAK,cAAc,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAcpE;IAEF,OAAO;;;;MAKJ;CACJ;AAED;;;;;;;;GAQG;AACH,qBAAa,gBAAiB,YAAW,mBAAmB;IAC9C,OAAO,CAAC,QAAQ,CAAC,OAAO;gBAAP,OAAO,EAAE,cAAc;IAEpD,OAAO,GACL,WAAW,cAAc,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CActC;IAEF,OAAO,GACL,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,CAAC,CAoBpD;IAEF,OAAO,GAAI,KAAK,cAAc,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmBpE;IAEF,0BAA0B,GACxB,KAAK,cAAc,EACnB,QAAQ,aAAa,CAAC,OAAO,YAAY,EAAE,UAAU,CAAC,KACrD,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,gBAAgB,CAAC,CA+D1C;IAEL,OAAO;;;;MAKJ;CACJ;AAED;;;;;;;;GAQG;AACH,qBAAa,iBAAkB,YAAW,mBAAmB;IAMzD,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,MAAM;IANzB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,oBAAoB,CAAK;IACjC,OAAO,CAAC,aAAa,CAAyC;gBAG3C,OAAO,EAAE,cAAc,EACvB,MAAM,GAAE,uBAA+C;IAG1E,OAAO,GACL,WAAW,cAAc,KACxB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+BtC;IAEF,OAAO,CAAC,YAAY,CAkClB;IAEF,OAAO,CAAC,SAAS,CAmBf;IAEF,OAAO,CAAC,kBAAkB,CAwBxB;IAEF,OAAO,GACL,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,IAAI,EAAE,gBAAgB,CAAC,CAyCpD;IAEF,OAAO,CAAC,kBAAkB,CAwExB;IAEF,OAAO,GAAI,KAAK,cAAc,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAepE;IAEF,OAAO;;;;MAWJ;CACJ"}
|