@jambudipa/spider 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +75 -35
  2. package/dist/browser/BrowserManager.d.ts +63 -0
  3. package/dist/browser/BrowserManager.d.ts.map +1 -0
  4. package/dist/browser/PlaywrightAdapter.d.ts +166 -0
  5. package/dist/browser/PlaywrightAdapter.d.ts.map +1 -0
  6. package/dist/examples/01-basic-crawl-working.d.ts +13 -0
  7. package/dist/examples/01-basic-crawl-working.d.ts.map +1 -0
  8. package/dist/examples/02-multiple-urls-working.d.ts +13 -0
  9. package/dist/examples/02-multiple-urls-working.d.ts.map +1 -0
  10. package/dist/examples/03-url-filtering.d.ts +13 -0
  11. package/dist/examples/03-url-filtering.d.ts.map +1 -0
  12. package/dist/examples/04-robots-compliance.d.ts +14 -0
  13. package/dist/examples/04-robots-compliance.d.ts.map +1 -0
  14. package/dist/examples/05-link-extraction-selectors.d.ts +14 -0
  15. package/dist/examples/05-link-extraction-selectors.d.ts.map +1 -0
  16. package/dist/examples/06-custom-middleware.d.ts +18 -0
  17. package/dist/examples/06-custom-middleware.d.ts.map +1 -0
  18. package/dist/examples/07-resumability-demo.d.ts +14 -0
  19. package/dist/examples/07-resumability-demo.d.ts.map +1 -0
  20. package/dist/examples/08-worker-monitoring.d.ts +15 -0
  21. package/dist/examples/08-worker-monitoring.d.ts.map +1 -0
  22. package/dist/examples/09-error-handling-recovery.d.ts +15 -0
  23. package/dist/examples/09-error-handling-recovery.d.ts.map +1 -0
  24. package/dist/index.d.ts +33 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +3596 -1440
  27. package/dist/index.js.map +1 -1
  28. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
  29. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
  30. package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
  31. package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
  32. package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
  33. package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
  34. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
  35. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
  36. package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
  37. package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
  38. package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
  39. package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
  40. package/dist/lib/HttpClient/index.d.ts +8 -0
  41. package/dist/lib/HttpClient/index.d.ts.map +1 -0
  42. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
  43. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
  44. package/dist/lib/LinkExtractor/index.d.ts +37 -0
  45. package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
  46. package/dist/lib/Logging/FetchLogger.d.ts +24 -0
  47. package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
  48. package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
  49. package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
  50. package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
  51. package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
  52. package/dist/lib/Middleware/types.d.ts +99 -0
  53. package/dist/lib/Middleware/types.d.ts.map +1 -0
  54. package/dist/lib/PageData/PageData.d.ts +28 -0
  55. package/dist/lib/PageData/PageData.d.ts.map +1 -0
  56. package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
  57. package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
  58. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
  59. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
  60. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
  61. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
  62. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
  63. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
  64. package/dist/lib/Resumability/index.d.ts +51 -0
  65. package/dist/lib/Resumability/index.d.ts.map +1 -0
  66. package/dist/lib/Resumability/strategies.d.ts +76 -0
  67. package/dist/lib/Resumability/strategies.d.ts.map +1 -0
  68. package/dist/lib/Resumability/types.d.ts +201 -0
  69. package/dist/lib/Resumability/types.d.ts.map +1 -0
  70. package/dist/lib/Robots/Robots.service.d.ts +78 -0
  71. package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
  72. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
  73. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
  74. package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
  75. package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
  76. package/dist/lib/Spider/Spider.service.d.ts +249 -0
  77. package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
  78. package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
  79. package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
  80. package/dist/lib/StateManager/index.d.ts +5 -0
  81. package/dist/lib/StateManager/index.d.ts.map +1 -0
  82. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
  83. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
  84. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +110 -0
  85. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
  86. package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
  87. package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
  88. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
  89. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
  90. package/dist/lib/api-facades.d.ts +313 -0
  91. package/dist/lib/api-facades.d.ts.map +1 -0
  92. package/dist/lib/errors/effect-errors.d.ts +179 -0
  93. package/dist/lib/errors/effect-errors.d.ts.map +1 -0
  94. package/dist/lib/errors.d.ts +172 -0
  95. package/dist/lib/errors.d.ts.map +1 -0
  96. package/dist/lib/utils/FileUtils.d.ts +284 -0
  97. package/dist/lib/utils/FileUtils.d.ts.map +1 -0
  98. package/dist/lib/utils/JsonUtils.d.ts +196 -0
  99. package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
  100. package/dist/lib/utils/RegexUtils.d.ts +257 -0
  101. package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
  102. package/dist/lib/utils/SchemaUtils.d.ts +251 -0
  103. package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
  104. package/dist/lib/utils/UrlUtils.d.ts +223 -0
  105. package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
  106. package/dist/lib/utils/effect-migration.d.ts +31 -0
  107. package/dist/lib/utils/effect-migration.d.ts.map +1 -0
  108. package/dist/lib/utils/index.d.ts +15 -0
  109. package/dist/lib/utils/index.d.ts.map +1 -0
  110. package/dist/lib/utils/url-deduplication.d.ts +108 -0
  111. package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
  112. package/dist/lib/utils/url-deduplication.test.d.ts +5 -0
  113. package/dist/lib/utils/url-deduplication.test.d.ts.map +1 -0
  114. package/dist/test/infrastructure/EffectTestUtils.d.ts +167 -0
  115. package/dist/test/infrastructure/EffectTestUtils.d.ts.map +1 -0
  116. package/package.json +23 -9
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Data type definitions for Spider Middleware
3
+ * Using Effect's Data.Class for immutability and built-in equality
4
+ */
5
+ import { Data, Option } from 'effect';
6
+ import { PageData } from '../PageData/PageData.js';
7
+ /**
8
+ * Represents a single crawling task with URL and depth information.
9
+ * Used internally by the Spider service for task management.
10
+ */
11
+ export interface CrawlTask {
12
+ /** The URL to be crawled */
13
+ url: string;
14
+ /** The depth level of this URL relative to the starting URL */
15
+ depth: number;
16
+ /** The URL from which this URL was discovered (optional) */
17
+ fromUrl?: string;
18
+ /** Optional metadata to be passed through to the result */
19
+ metadata?: Record<string, unknown>;
20
+ /** Optional data extraction configuration */
21
+ extractData?: Record<string, unknown>;
22
+ }
23
+ /**
24
+ * Request object used in the middleware pipeline.
25
+ *
26
+ * Contains the crawl task along with optional headers and metadata
27
+ * that can be modified by middleware during processing.
28
+ *
29
+ * Uses Data.Class for:
30
+ * - Built-in equality checking
31
+ * - Immutability by default
32
+ * - Better pattern matching support
33
+ *
34
+ * @group Data Types
35
+ * @public
36
+ */
37
+ export declare class SpiderRequest extends Data.Class<{
38
+ /** The crawl task containing URL and depth information */
39
+ readonly task: CrawlTask;
40
+ /** HTTP headers to include with the request */
41
+ readonly headers: Option.Option<Record<string, string>>;
42
+ /** Additional metadata that can be used by middleware */
43
+ readonly meta: Option.Option<Record<string, unknown>>;
44
+ }> {
45
+ /**
46
+ * Create a SpiderRequest from a CrawlTask
47
+ */
48
+ static fromTask(task: CrawlTask, headers?: Record<string, string>, meta?: Record<string, unknown>): SpiderRequest;
49
+ /**
50
+ * Add or update headers
51
+ */
52
+ withHeaders(headers: Record<string, string>): SpiderRequest;
53
+ /**
54
+ * Add or update metadata
55
+ */
56
+ withMeta(meta: Record<string, unknown>): SpiderRequest;
57
+ }
58
+ /**
59
+ * Response object used in the middleware pipeline.
60
+ *
61
+ * Contains the extracted page data along with optional HTTP response
62
+ * information and metadata from middleware processing.
63
+ *
64
+ * Uses Data.Class for:
65
+ * - Built-in equality checking
66
+ * - Immutability by default
67
+ * - Better pattern matching support
68
+ *
69
+ * @group Data Types
70
+ * @public
71
+ */
72
+ export declare class SpiderResponse extends Data.Class<{
73
+ /** The extracted page data including content, links, and metadata */
74
+ readonly pageData: PageData;
75
+ /** HTTP status code of the response */
76
+ readonly statusCode: Option.Option<number>;
77
+ /** HTTP response headers */
78
+ readonly headers: Option.Option<Record<string, string>>;
79
+ /** Additional metadata from middleware processing */
80
+ readonly meta: Option.Option<Record<string, unknown>>;
81
+ }> {
82
+ /**
83
+ * Create a SpiderResponse from PageData
84
+ */
85
+ static fromPageData(pageData: PageData, statusCode?: number, headers?: Record<string, string>, meta?: Record<string, unknown>): SpiderResponse;
86
+ /**
87
+ * Update the page data
88
+ */
89
+ withPageData(pageData: PageData): SpiderResponse;
90
+ /**
91
+ * Add or update metadata
92
+ */
93
+ withMeta(meta: Record<string, unknown>): SpiderResponse;
94
+ /**
95
+ * Check if the response was successful (2xx status code)
96
+ */
97
+ isSuccessful(): boolean;
98
+ }
99
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/lib/Middleware/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAC;AAEnD;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,4BAA4B;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,+DAA+D;IAC/D,KAAK,EAAE,MAAM,CAAC;IACd,4DAA4D;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2DAA2D;IAC3D,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,6CAA6C;IAC7C,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACvC;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,aAAc,SAAQ,IAAI,CAAC,KAAK,CAAC;IAC5C,0DAA0D;IAC1D,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,+CAA+C;IAC/C,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,yDAAyD;IACzD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACvD,CAAC;IACA;;OAEG;IACH,MAAM,CAAC,QAAQ,CACb,IAAI,EAAE,SAAS,EACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,aAAa;IAQhB;;OAEG;IACH,WAAW,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,aAAa;IAQ3D;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,aAAa;CAOvD;AAED;;;;;;;;;;;;;GAaG;AACH,qBAAa,cAAe,SAAQ,IAAI,CAAC,KAAK,CAAC;IAC7C,qEAAqE;IACrE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IAC5B,uCAAuC;IACvC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC3C,4BAA4B;IAC5B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACxD,qDAAqD;IACrD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACvD,CAAC;IACA;;OAEG;IACH,MAAM,CAAC,YAAY,CACjB,QAAQ,EAAE,QAAQ,EAClB,UAAU,CAAC,EAAE,MAAM,EACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAC7B,cAAc;IASjB;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,cAAc;IAOhD;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,cAAc;IAQvD;;OAEG;IACH,YAAY,IAAI,OAAO;CAMxB"}
@@ -0,0 +1,28 @@
1
+ import { Schema } from 'effect';
2
+ export declare const PageDataSchema: Schema.Struct<{
3
+ url: Schema.filter<typeof Schema.String>;
4
+ html: typeof Schema.String;
5
+ title: Schema.optional<typeof Schema.String>;
6
+ /** All available metadata from meta tags */
7
+ metadata: Schema.Record$<typeof Schema.String, typeof Schema.String>;
8
+ /** Commonly used metadata fields for convenience */
9
+ commonMetadata: Schema.optional<Schema.Struct<{
10
+ description: Schema.optional<typeof Schema.String>;
11
+ keywords: Schema.optional<typeof Schema.String>;
12
+ author: Schema.optional<typeof Schema.String>;
13
+ robots: Schema.optional<typeof Schema.String>;
14
+ }>>;
15
+ statusCode: Schema.filter<Schema.filter<typeof Schema.Number>>;
16
+ /** All response headers */
17
+ headers: Schema.Record$<typeof Schema.String, typeof Schema.String>;
18
+ /** When the fetch operation started */
19
+ fetchedAt: typeof Schema.DateFromSelf;
20
+ /** How long the entire fetch and parse operation took in milliseconds */
21
+ scrapeDurationMs: typeof Schema.Number;
22
+ /** The crawl depth (number of hops from the starting URL) */
23
+ depth: Schema.filter<Schema.filter<typeof Schema.Number>>;
24
+ /** Optional extracted data from the page */
25
+ extractedData: Schema.optional<Schema.Record$<typeof Schema.String, typeof Schema.Unknown>>;
26
+ }>;
27
+ export type PageData = Schema.Schema.Type<typeof PageDataSchema>;
28
+ //# sourceMappingURL=PageData.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PageData.d.ts","sourceRoot":"","sources":["../../../src/lib/PageData/PageData.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,eAAO,MAAM,cAAc;;;;IAQzB,4CAA4C;;IAE5C,oDAAoD;;;;;;;;IAUpD,2BAA2B;;IAE3B,uCAAuC;;IAEvC,yEAAyE;;IAEzE,6DAA6D;;IAE7D,4CAA4C;;EAI5C,CAAC;AAEH,MAAM,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,cAAc,CAAC,CAAC"}
@@ -0,0 +1,178 @@
1
+ import { Effect, Option } from 'effect';
2
+ import { SpiderState, SpiderStateKey } from '../Scheduler/SpiderScheduler.service.js';
3
+ import { HybridPersistenceConfig, PersistenceError, StateDelta, StateOperation, StorageBackend } from './types.js';
4
+ import { type RedisClientInterface } from './backends/RedisStorageBackend.js';
5
+ import { type DatabaseClientInterface, type PostgresStorageConfig } from './backends/PostgresStorageBackend.js';
6
+ /**
7
+ * Configuration for the ResumabilityService.
8
+ *
9
+ * Allows choosing between different persistence strategies and
10
+ * configuring their behavior based on use case requirements.
11
+ *
12
+ * @group Configuration
13
+ * @public
14
+ */
15
+ export interface ResumabilityConfig {
16
+ /** Persistence strategy to use */
17
+ strategy: 'full-state' | 'delta' | 'hybrid' | 'auto';
18
+ /** Storage backend implementation */
19
+ backend: StorageBackend;
20
+ /** Configuration for hybrid strategy (only used when strategy is 'hybrid') */
21
+ hybridConfig?: HybridPersistenceConfig;
22
+ }
23
+ declare const ResumabilityService_base: Effect.Service.Class<ResumabilityService, "@jambudipa.io/ResumabilityService", {
24
+ readonly effect: Effect.Effect<{
25
+ /**
26
+ * Configure the resumability service with a specific strategy and backend.
27
+ *
28
+ * This method initializes the storage backend and creates the appropriate
29
+ * persistence strategy based on the configuration.
30
+ *
31
+ * @param config - Resumability configuration
32
+ * @returns Effect that completes when configuration is applied
33
+ */
34
+ configure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
35
+ /**
36
+ * Persist a state operation using the configured strategy.
37
+ *
38
+ * @param operation - State operation to persist
39
+ * @returns Effect that completes when operation is persisted
40
+ */
41
+ persistOperation: (operation: StateOperation) => Effect.Effect<undefined, PersistenceError, never>;
42
+ /**
43
+ * Restore spider state from persistent storage.
44
+ *
45
+ * @param key - State key identifying the session to restore
46
+ * @returns Effect containing the restored state, or null if not found
47
+ */
48
+ restore: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError, never>;
49
+ /**
50
+ * Clean up old state data for a session.
51
+ *
52
+ * @param key - State key identifying the session to clean up
53
+ * @returns Effect that completes when cleanup is finished
54
+ */
55
+ cleanup: (key: SpiderStateKey) => Effect.Effect<undefined, PersistenceError, never>;
56
+ /**
57
+ * List all available sessions in storage.
58
+ *
59
+ * @returns Effect containing array of session keys
60
+ */
61
+ listSessions: () => Effect.Effect<readonly SpiderStateKey[], PersistenceError, never>;
62
+ /**
63
+ * Get information about the current configuration.
64
+ *
65
+ * @returns Information about strategy and backend
66
+ */
67
+ getInfo: () => Effect.Effect<{
68
+ strategy: {
69
+ readonly name: string;
70
+ readonly description: string;
71
+ readonly capabilities: string[];
72
+ };
73
+ backend: {
74
+ name: string;
75
+ capabilities: import("./types.js").StorageCapabilities;
76
+ };
77
+ }, PersistenceError, never>;
78
+ /**
79
+ * Reconfigure the service with new settings.
80
+ *
81
+ * This will clean up the current backend and reinitialize with new config.
82
+ *
83
+ * @param config - New configuration
84
+ * @returns Effect that completes when reconfiguration is finished
85
+ */
86
+ reconfigure: (config: ResumabilityConfig) => Effect.Effect<void, PersistenceError, never>;
87
+ }, never, never>;
88
+ }>;
89
+ /**
90
+ * Service for resumable spider crawling with configurable persistence strategies.
91
+ *
92
+ * Provides a unified interface for different persistence approaches:
93
+ * - Full state: Simple, saves complete state on every change
94
+ * - Delta: Efficient, saves only incremental changes
95
+ * - Hybrid: Best of both worlds, deltas + periodic snapshots
96
+ * - Auto: Automatically chooses best strategy based on backend capabilities
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * // File-based full state persistence
101
+ * const resumabilityLayer = ResumabilityService.fromConfig({
102
+ * strategy: 'full-state',
103
+ * backend: new FileStorageBackend('./spider-state')
104
+ * });
105
+ *
106
+ * // Redis-based hybrid persistence
107
+ * const resumabilityLayer = ResumabilityService.fromConfig({
108
+ * strategy: 'hybrid',
109
+ * backend: new RedisStorageBackend(redisClient),
110
+ * hybridConfig: {
111
+ * snapshotInterval: 1000,
112
+ * maxDeltasBeforeSnapshot: 500
113
+ * }
114
+ * });
115
+ *
116
+ * // Auto-selected strategy based on backend
117
+ * const resumabilityLayer = ResumabilityService.fromConfig({
118
+ * strategy: 'auto',
119
+ * backend: new PostgresStorageBackend(pgClient)
120
+ * });
121
+ * ```
122
+ *
123
+ * @group Services
124
+ * @public
125
+ */
126
+ export declare class ResumabilityService extends ResumabilityService_base {
127
+ /**
128
+ * Create a ResumabilityService layer from configuration.
129
+ *
130
+ * This is the primary way to create and configure the ResumabilityService.
131
+ *
132
+ * @param config - Resumability configuration
133
+ * @returns Effect layer providing the configured ResumabilityService
134
+ */
135
+ static fromConfig: (config: ResumabilityConfig) => Effect.Effect<ResumabilityService, PersistenceError, never>;
136
+ }
137
+ /**
138
+ * Utility function to create a state operation.
139
+ *
140
+ * @param delta - The delta operation
141
+ * @param resultingState - The complete state after applying the delta
142
+ * @param shouldSnapshot - Whether this operation should trigger a snapshot
143
+ * @returns StateOperation object
144
+ */
145
+ export declare const createStateOperation: (delta: StateDelta, resultingState: SpiderState, shouldSnapshot?: boolean) => StateOperation;
146
+ /**
147
+ * Factory functions for creating common resumability configurations.
148
+ */
149
+ export declare const ResumabilityConfigs: {
150
+ /**
151
+ * Create a file-based configuration.
152
+ *
153
+ * @param baseDir - Directory to store state files
154
+ * @param strategy - Persistence strategy (defaults to 'auto')
155
+ * @returns ResumabilityConfig
156
+ */
157
+ file: (baseDir: string, strategy?: "full-state" | "delta" | "hybrid" | "auto") => ResumabilityConfig;
158
+ /**
159
+ * Create a Redis-based configuration.
160
+ *
161
+ * @param redisClient - Redis client instance
162
+ * @param strategy - Persistence strategy (defaults to 'hybrid')
163
+ * @param keyPrefix - Redis key prefix (defaults to 'spider')
164
+ * @returns ResumabilityConfig
165
+ */
166
+ redis: (redisClient: RedisClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", keyPrefix?: string) => ResumabilityConfig;
167
+ /**
168
+ * Create a PostgreSQL-based configuration.
169
+ *
170
+ * @param dbClient - Database client instance
171
+ * @param strategy - Persistence strategy (defaults to 'hybrid')
172
+ * @param config - PostgreSQL configuration
173
+ * @returns ResumabilityConfig
174
+ */
175
+ postgres: (dbClient: DatabaseClientInterface, strategy?: "full-state" | "delta" | "hybrid" | "auto", config?: PostgresStorageConfig) => ResumabilityConfig;
176
+ };
177
+ export {};
178
+ //# sourceMappingURL=Resumability.service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"Resumability.service.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/Resumability.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AACxC,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAEL,uBAAuB,EACvB,gBAAgB,EAEhB,UAAU,EACV,cAAc,EACd,cAAc,EACf,MAAM,YAAY,CAAC;AAOpB,OAAO,EAEL,KAAK,oBAAoB,EAC1B,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAEL,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,EAC3B,MAAM,sCAAsC,CAAC;AAE9C;;;;;;;;GAQG;AACH,MAAM,WAAW,kBAAkB;IACjC,kCAAkC;IAClC,QAAQ,EAAE,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,qCAAqC;IACrC,OAAO,EAAE,cAAc,CAAC;IACxB,8EAA8E;IAC9E,YAAY,CAAC,EAAE,uBAAuB,CAAC;CACxC;;;QAmDO;;;;;;;;WAQG;4BACiB,kBAAkB;QAWtC;;;;;WAKG;sCAC2B,cAAc;QAe5C;;;;;WAKG;uBACY,cAAc;QAe7B;;;;;WAKG;uBACY,cAAc;QAe7B;;;;WAIG;;QA0BH;;;;WAIG;;;;;;;;;;;;QAwBH;;;;;;;WAOG;8BACmB,kBAAkB;;;AAtMhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,qBAAa,mBAAoB,SAAQ,wBAgLxC;IACC;;;;;;;OAOG;IACH,MAAM,CAAC,UAAU,GAAI,QAAQ,kBAAkB,iEAKQ;CACxD;AAwDD;;;;;;;GAOG;AACH,eAAO,MAAM,oBAAoB,GAC/B,OAAO,UAAU,EACjB,gBAAgB,WAAW,EAC3B,wBAAsB,KACrB,cAID,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,mBAAmB;IAC9B;;;;;;OAMG;oBAEQ,MAAM,aACL,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,KACnD,kBAAkB;IAKrB;;;;;;;OAOG;yBAEY,oBAAoB,aACvB,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,yBAEnD,kBAAkB;IAKrB;;;;;;;OAOG;yBAES,uBAAuB,aACvB,YAAY,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,WAC3C,qBAAqB,KAC7B,kBAAkB;CAItB,CAAC"}
@@ -0,0 +1,47 @@
1
+ import { Effect, Option } from 'effect';
2
+ import { PersistenceError, SpiderState, SpiderStateKey, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
3
+ /**
4
+ * File system storage backend for spider state persistence.
5
+ *
6
+ * Stores state and deltas as JSON files in a directory structure.
7
+ * Good for development, testing, and single-machine deployments.
8
+ *
9
+ * Directory structure:
10
+ * ```
11
+ * baseDir/
12
+ * sessions/
13
+ * sessionId/
14
+ * state.json # Full state
15
+ * snapshot.json # Latest snapshot
16
+ * deltas/
17
+ * 0001.json # Delta files
18
+ * 0002.json
19
+ * ...
20
+ * ```
21
+ *
22
+ * @group Backends
23
+ * @public
24
+ */
25
+ export declare class FileStorageBackend implements StorageBackend {
26
+ readonly capabilities: StorageCapabilities;
27
+ readonly name = "FileStorageBackend";
28
+ private readonly storageDir;
29
+ constructor(baseDir: string);
30
+ initialize: () => Effect.Effect<void, PersistenceError>;
31
+ cleanup: () => Effect.Effect<void, PersistenceError>;
32
+ saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
33
+ loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
34
+ deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
35
+ saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
36
+ saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
37
+ loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
38
+ saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
39
+ loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
40
+ state: SpiderState;
41
+ sequence: number;
42
+ }>, PersistenceError>;
43
+ compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
44
+ listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
45
+ private getSessionDir;
46
+ }
47
+ //# sourceMappingURL=FileStorageBackend.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FileStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/FileStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AAGjE,OAAO,EACL,gBAAgB,EAChB,WAAW,EACX,cAAc,EACd,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AA4BrB;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,qBAAa,kBAAmB,YAAW,cAAc;IACvD,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,wBAAwB;IAErC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;gBAExB,OAAO,EAAE,MAAM;IAI3B,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAuBpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAgB;IAGnE,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAqCtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CA0C5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAetC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6CpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAQtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CA6D9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA0CtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CA6CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA6CtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CA6DlE;IAEF,OAAO,CAAC,aAAa,CAEnB;CACH"}
@@ -0,0 +1,95 @@
1
+ import { Effect, Option } from 'effect';
2
+ import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
3
+ import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
4
+ /**
5
+ * Database client interface for dependency injection.
6
+ *
7
+ * This allows users to provide their own database client implementation
8
+ * (pg, node-postgres, prisma, drizzle, etc.) without tight coupling.
9
+ *
10
+ * @group Backends
11
+ * @public
12
+ */
13
+ export interface DatabaseClientInterface {
14
+ query<T = unknown>(sql: string, params?: readonly unknown[]): Promise<{
15
+ rows: readonly T[];
16
+ rowCount: number;
17
+ }>;
18
+ transaction?<T>(callback: (client: DatabaseClientInterface) => Promise<T>): Promise<T>;
19
+ }
20
+ /**
21
+ * Configuration for PostgreSQL storage backend.
22
+ */
23
+ export interface PostgresStorageConfig {
24
+ /** Table prefix for spider tables */
25
+ tablePrefix?: string;
26
+ /** Schema name (defaults to 'public') */
27
+ schema?: string;
28
+ /** Whether to auto-create tables */
29
+ autoCreateTables?: boolean;
30
+ }
31
+ /**
32
+ * PostgreSQL storage backend for spider state persistence.
33
+ *
34
+ * Uses PostgreSQL for robust, ACID-compliant state persistence with
35
+ * excellent support for concurrent access and complex queries.
36
+ *
37
+ * Database schema:
38
+ * ```sql
39
+ * CREATE TABLE spider_sessions (
40
+ * id VARCHAR(255) PRIMARY KEY,
41
+ * name VARCHAR(255) NOT NULL,
42
+ * created_at TIMESTAMP NOT NULL,
43
+ * state_data JSONB,
44
+ * updated_at TIMESTAMP DEFAULT NOW()
45
+ * );
46
+ *
47
+ * CREATE TABLE spider_deltas (
48
+ * id SERIAL PRIMARY KEY,
49
+ * session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
50
+ * sequence_number BIGINT NOT NULL,
51
+ * operation_type VARCHAR(50) NOT NULL,
52
+ * operation_data JSONB NOT NULL,
53
+ * created_at TIMESTAMP DEFAULT NOW(),
54
+ * UNIQUE(session_id, sequence_number)
55
+ * );
56
+ *
57
+ * CREATE TABLE spider_snapshots (
58
+ * id SERIAL PRIMARY KEY,
59
+ * session_id VARCHAR(255) NOT NULL REFERENCES spider_sessions(id),
60
+ * sequence_number BIGINT NOT NULL,
61
+ * state_data JSONB NOT NULL,
62
+ * created_at TIMESTAMP DEFAULT NOW()
63
+ * );
64
+ * ```
65
+ *
66
+ * @group Backends
67
+ * @public
68
+ */
69
+ export declare class PostgresStorageBackend implements StorageBackend {
70
+ readonly db: DatabaseClientInterface;
71
+ readonly capabilities: StorageCapabilities;
72
+ readonly name = "PostgresStorageBackend";
73
+ private readonly tablePrefix;
74
+ private readonly schema;
75
+ private readonly autoCreateTables;
76
+ constructor(db: DatabaseClientInterface, config?: PostgresStorageConfig);
77
+ initialize: () => Effect.Effect<void, PersistenceError>;
78
+ cleanup: () => Effect.Effect<void, PersistenceError>;
79
+ saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
80
+ loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
81
+ deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
82
+ saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
83
+ saveDeltas: (deltas: readonly StateDelta[]) => Effect.Effect<void, PersistenceError>;
84
+ loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
85
+ saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
86
+ loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
87
+ state: SpiderState;
88
+ sequence: number;
89
+ }>, PersistenceError>;
90
+ compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
91
+ listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
92
+ private createTables;
93
+ private getTableName;
94
+ }
95
+ //# sourceMappingURL=PostgresStorageBackend.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PostgresStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/PostgresStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AACjE,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAQrB;;;;;;;;GAQG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,CAAC,GAAG,OAAO,EACf,GAAG,EAAE,MAAM,EACX,MAAM,CAAC,EAAE,SAAS,OAAO,EAAE,GAC1B,OAAO,CAAC;QAAE,IAAI,EAAE,SAAS,CAAC,EAAE,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACrD,WAAW,CAAC,CAAC,CAAC,EACZ,QAAQ,EAAE,CAAC,MAAM,EAAE,uBAAuB,KAAK,OAAO,CAAC,CAAC,CAAC,GACxD,OAAO,CAAC,CAAC,CAAC,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,qCAAqC;IACrC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oCAAoC;IACpC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AACH,qBAAa,sBAAuB,YAAW,cAAc;IAgBzD,QAAQ,CAAC,EAAE,EAAE,uBAAuB;IAftC,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,4BAA4B;IAEzC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;gBAGhC,EAAE,EAAE,uBAAuB,EACpC,MAAM,CAAC,EAAE,qBAAqB;IAOhC,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAOpD;IAEF,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAgB;IAGnE,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAyCtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CAoC5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmFtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCpE;IAEF,UAAU,GACR,QAAQ,SAAS,UAAU,EAAE,KAC5B,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+DtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CA6C9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA+BtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CA4CC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAkBtC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CAgClE;IAGF,OAAO,CAAC,YAAY,CAoFlB;IAEF,OAAO,CAAC,YAAY,CAElB;CACH"}
@@ -0,0 +1,92 @@
1
+ import { Effect, Option } from 'effect';
2
+ import { SpiderState, SpiderStateKey } from '../../Scheduler/SpiderScheduler.service.js';
3
+ import { PersistenceError, StateDelta, StorageBackend, StorageCapabilities } from '../types.js';
4
+ /**
5
+ * Redis client interface for dependency injection.
6
+ *
7
+ * This allows users to provide their own Redis client implementation
8
+ * (node_redis, ioredis, etc.) without tight coupling.
9
+ *
10
+ * @group Backends
11
+ * @public
12
+ */
13
+ export interface RedisClientInterface {
14
+ get(_key: string): Promise<string | null>;
15
+ set(_key: string, _value: string): Promise<void>;
16
+ del(_key: string): Promise<void>;
17
+ exists(_key: string): Promise<boolean>;
18
+ hget(_key: string, _field: string): Promise<string | null>;
19
+ hset(_key: string, _field: string, _value: string): Promise<void>;
20
+ hdel(_key: string, _field: string): Promise<void>;
21
+ hgetall(_key: string): Promise<Record<string, string>>;
22
+ zadd(_key: string, _score: number, _member: string): Promise<void>;
23
+ zrange(_key: string, _start: number, _stop: number): Promise<string[]>;
24
+ zrangebyscore(_key: string, _min: number | string, _max: number | string): Promise<string[]>;
25
+ zrem(_key: string, _member: string): Promise<void>;
26
+ zremrangebyscore(_key: string, _min: number | string, _max: number | string): Promise<void>;
27
+ keys(_pattern: string): Promise<string[]>;
28
+ pipeline?(): RedisPipeline;
29
+ multi?(): RedisMulti;
30
+ }
31
+ /**
32
+ * Redis pipeline interface for batch operations.
33
+ */
34
+ export interface RedisPipeline {
35
+ zadd(_key: string, _score: number, _member: string): RedisPipeline;
36
+ exec(): Promise<unknown[]>;
37
+ }
38
+ /**
39
+ * Redis multi/transaction interface.
40
+ */
41
+ export interface RedisMulti {
42
+ zadd(_key: string, _score: number, _member: string): RedisMulti;
43
+ exec(): Promise<unknown[]>;
44
+ }
45
+ /**
46
+ * Redis storage backend for spider state persistence.
47
+ *
48
+ * Uses Redis data structures for efficient storage:
49
+ * - Hashes for full state and snapshots
50
+ * - Sorted sets for deltas (ordered by sequence number)
51
+ * - TTL support for automatic cleanup
52
+ *
53
+ * Redis key structure:
54
+ * ```
55
+ * spider:state:{sessionId} # Hash: full state
56
+ * spider:snapshot:{sessionId} # Hash: latest snapshot + sequence
57
+ * spider:deltas:{sessionId} # Sorted set: sequence -> delta JSON
58
+ * spider:sessions # Set: all session IDs
59
+ * ```
60
+ *
61
+ * @group Backends
62
+ * @public
63
+ */
64
+ export declare class RedisStorageBackend implements StorageBackend {
65
+ readonly capabilities: StorageCapabilities;
66
+ readonly name = "RedisStorageBackend";
67
+ private readonly redis;
68
+ private readonly keyPrefix;
69
+ constructor(redis: RedisClientInterface, keyPrefix?: string);
70
+ initialize: () => Effect.Effect<void, PersistenceError>;
71
+ cleanup: () => Effect.Effect<void, PersistenceError>;
72
+ saveState: (key: SpiderStateKey, state: SpiderState) => Effect.Effect<void, PersistenceError>;
73
+ loadState: (key: SpiderStateKey) => Effect.Effect<Option.Option<SpiderState>, PersistenceError>;
74
+ deleteState: (key: SpiderStateKey) => Effect.Effect<void, PersistenceError>;
75
+ saveDelta: (delta: StateDelta) => Effect.Effect<void, PersistenceError>;
76
+ saveDeltas: (deltas: StateDelta[]) => Effect.Effect<void, PersistenceError>;
77
+ loadDeltas: (key: SpiderStateKey, fromSequence?: number) => Effect.Effect<StateDelta[], PersistenceError>;
78
+ saveSnapshot: (key: SpiderStateKey, state: SpiderState, sequence: number) => Effect.Effect<void, PersistenceError>;
79
+ loadLatestSnapshot: (key: SpiderStateKey) => Effect.Effect<Option.Option<{
80
+ state: SpiderState;
81
+ sequence: number;
82
+ }>, PersistenceError>;
83
+ compactDeltas: (key: SpiderStateKey, beforeSequence: number) => Effect.Effect<void, PersistenceError>;
84
+ listSessions: () => Effect.Effect<SpiderStateKey[], PersistenceError>;
85
+ private getStateKey;
86
+ private getSnapshotKey;
87
+ private getDeltasKey;
88
+ private getSessionsKey;
89
+ private addToSessionsList;
90
+ private removeFromSessionsList;
91
+ }
92
+ //# sourceMappingURL=RedisStorageBackend.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"RedisStorageBackend.d.ts","sourceRoot":"","sources":["../../../../src/lib/Resumability/backends/RedisStorageBackend.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,MAAM,EAAW,MAAM,EAAU,MAAM,QAAQ,CAAC;AAC1E,OAAO,EACL,WAAW,EACX,cAAc,EACf,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,gBAAgB,EAChB,UAAU,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,aAAa,CAAC;AAarB;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC1C,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjD,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACjC,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACvC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;IAC3D,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClE,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACvD,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnE,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACvE,aAAa,CACX,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,GAAG,MAAM,EACrB,IAAI,EAAE,MAAM,GAAG,MAAM,GACpB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACrB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,GAAG,MAAM,EACrB,IAAI,EAAE,MAAM,GAAG,MAAM,GACpB,OAAO,CAAC,IAAI,CAAC,CAAC;IACjB,IAAI,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,QAAQ,CAAC,IAAI,aAAa,CAAC;IAC3B,KAAK,CAAC,IAAI,UAAU,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,aAAa,CAAC;IACnE,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,UAAU,CAAC;IAChE,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,mBAAoB,YAAW,cAAc;IACxD,QAAQ,CAAC,YAAY,EAAE,mBAAmB,CAMxC;IAEF,QAAQ,CAAC,IAAI,yBAAyB;IAEtC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAuB;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,KAAK,EAAE,oBAAoB,EAAE,SAAS,SAAW;IAK7D,UAAU,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACxC;IAEd,OAAO,QAAO,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CACrC;IAGd,SAAS,GACP,KAAK,cAAc,EACnB,OAAO,WAAW,KACjB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CA4BtC;IAEF,SAAS,GACP,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,gBAAgB,CAAC,CAgC5D;IAEF,WAAW,GACT,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCtC;IAGF,SAAS,GAAI,OAAO,UAAU,KAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAoCpE;IAEF,UAAU,GACR,QAAQ,UAAU,EAAE,KACnB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAqEtC;IAEF,UAAU,GACR,KAAK,cAAc,EACnB,qBAAgB,KACf,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,gBAAgB,CAAC,CAkC9C;IAGF,YAAY,GACV,KAAK,cAAc,EACnB,OAAO,WAAW,EAClB,UAAU,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAmCtC;IAEF,kBAAkB,GAChB,KAAK,cAAc,KAClB,MAAM,CAAC,MAAM,CACd,MAAM,CAAC,MAAM,CAAC;QAAE,KAAK,EAAE,WAAW,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,EACvD,gBAAgB,CACjB,CAwCC;IAGF,aAAa,GACX,KAAK,cAAc,EACnB,gBAAgB,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAetC;IAEF,YAAY,QAAO,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,EAAE,gBAAgB,CAAC,CA2ClE;IAGF,OAAO,CAAC,WAAW,CACmB;IAEtC,OAAO,CAAC,cAAc,CACmB;IAEzC,OAAO,CAAC,YAAY,CACmB;IAEvC,OAAO,CAAC,cAAc,CAA8C;IAEpE,OAAO,CAAC,iBAAiB,CAkBvB;IAEF,OAAO,CAAC,sBAAsB,CAgB5B;CACH"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Resumable spider crawling with configurable persistence strategies.
3
+ *
4
+ * This module provides a complete solution for resumable web crawling with
5
+ * support for different persistence strategies and storage backends.
6
+ *
7
+ * ## Key Features
8
+ *
9
+ * - **Multiple Strategies**: Full state, delta, hybrid, and auto-selection
10
+ * - **Multiple Backends**: File system, Redis, PostgreSQL with extensible interface
11
+ * - **Effect Native**: Full integration with Effect ecosystem
12
+ * - **Type Safe**: Complete TypeScript support with runtime validation
13
+ * - **Production Ready**: Handles concurrency, errors, and edge cases
14
+ *
15
+ * ## Quick Start
16
+ *
17
+ * ```typescript
18
+ * import { ResumabilityService, ResumabilityConfigs } from '@jambudipa/spider/resumability';
19
+ *
20
+ * // File-based resumability
21
+ * const resumabilityLayer = ResumabilityService.fromConfig(
22
+ * ResumabilityConfigs.file('./spider-state', 'hybrid')
23
+ * );
24
+ *
25
+ * // Use with Spider
26
+ * const program = Effect.gen(function* () {
27
+ * const spider = yield* Spider;
28
+ * const resumability = yield* ResumabilityService;
29
+ *
30
+ * // Configure resumable crawling...
31
+ * });
32
+ *
33
+ * Effect.runPromise(
34
+ * program.pipe(
35
+ * Effect.provide(Spider.Default),
36
+ * Effect.provide(resumabilityLayer)
37
+ * )
38
+ * );
39
+ * ```
40
+ *
41
+ * @group Resumability
42
+ * @public
43
+ */
44
+ export type { StorageBackend, StorageCapabilities, PersistenceStrategy, StateOperation, HybridPersistenceConfig, } from './types.js';
45
+ export { StateDelta, PersistenceError, DEFAULT_HYBRID_CONFIG, } from './types.js';
46
+ export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './strategies.js';
47
+ export { FileStorageBackend } from './backends/FileStorageBackend.js';
48
+ export { RedisStorageBackend, type RedisClientInterface, type RedisPipeline, type RedisMulti, } from './backends/RedisStorageBackend.js';
49
+ export { PostgresStorageBackend, type DatabaseClientInterface, type PostgresStorageConfig, } from './backends/PostgresStorageBackend.js';
50
+ export { ResumabilityService, ResumabilityConfigs, createStateOperation, type ResumabilityConfig, } from './Resumability.service.js';
51
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/Resumability/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AAGH,YAAY,EACV,cAAc,EACd,mBAAmB,EACnB,mBAAmB,EACnB,cAAc,EACd,uBAAuB,GACxB,MAAM,YAAY,CAAC;AAEpB,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,qBAAqB,GACtB,MAAM,YAAY,CAAC;AAGpB,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAGzB,OAAO,EAAE,kBAAkB,EAAE,MAAM,kCAAkC,CAAC;AACtE,OAAO,EACL,mBAAmB,EACnB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,sBAAsB,EACtB,KAAK,uBAAuB,EAC5B,KAAK,qBAAqB,GAC3B,MAAM,sCAAsC,CAAC;AAG9C,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,EACpB,KAAK,kBAAkB,GACxB,MAAM,2BAA2B,CAAC"}