@jambudipa/spider 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +426 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4681 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +57 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
- package/dist/lib/HttpClient/CookieManager.d.ts +44 -0
- package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +88 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
- package/dist/lib/HttpClient/SessionStore.d.ts +82 -0
- package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts +58 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
- package/dist/lib/HttpClient/index.d.ts +8 -0
- package/dist/lib/HttpClient/index.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/index.d.ts +37 -0
- package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
- package/dist/lib/Logging/FetchLogger.d.ts +8 -0
- package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts +34 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts +276 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
- package/dist/lib/PageData/PageData.d.ts +28 -0
- package/dist/lib/PageData/PageData.d.ts.map +1 -0
- package/dist/lib/Resumability/Resumability.service.d.ts +176 -0
- package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/index.d.ts +51 -0
- package/dist/lib/Resumability/index.d.ts.map +1 -0
- package/dist/lib/Resumability/strategies.d.ts +76 -0
- package/dist/lib/Resumability/strategies.d.ts.map +1 -0
- package/dist/lib/Resumability/types.d.ts +201 -0
- package/dist/lib/Resumability/types.d.ts.map +1 -0
- package/dist/lib/Robots/Robots.service.d.ts +78 -0
- package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
- package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
- package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.service.d.ts +194 -0
- package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
- package/dist/lib/StateManager/StateManager.service.d.ts +68 -0
- package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
- package/dist/lib/StateManager/index.d.ts +5 -0
- package/dist/lib/StateManager/index.d.ts.map +1 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +77 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
- package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
- package/dist/lib/api-facades.d.ts +313 -0
- package/dist/lib/api-facades.d.ts.map +1 -0
- package/dist/lib/errors.d.ts +99 -0
- package/dist/lib/errors.d.ts.map +1 -0
- package/package.json +108 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Engine Service
|
|
3
|
+
* Provides browser automation capabilities using Playwright
|
|
4
|
+
*/
|
|
5
|
+
import { Context, Effect } from 'effect';
|
|
6
|
+
export interface PageElement {
|
|
7
|
+
selector: string;
|
|
8
|
+
text?: string;
|
|
9
|
+
attributes?: Record<string, string>;
|
|
10
|
+
}
|
|
11
|
+
export interface BrowserEngineService {
|
|
12
|
+
/**
|
|
13
|
+
* Create a new browser page
|
|
14
|
+
*/
|
|
15
|
+
createPage: () => Effect.Effect<void, Error, never>;
|
|
16
|
+
/**
|
|
17
|
+
* Navigate to a URL
|
|
18
|
+
*/
|
|
19
|
+
navigateTo: (url: string) => Effect.Effect<void, Error, never>;
|
|
20
|
+
/**
|
|
21
|
+
* Wait for a selector to appear
|
|
22
|
+
*/
|
|
23
|
+
waitForSelector: (selector: string, timeout?: number) => Effect.Effect<void, Error, never>;
|
|
24
|
+
/**
|
|
25
|
+
* Click an element
|
|
26
|
+
*/
|
|
27
|
+
click: (selector: string) => Effect.Effect<void, Error, never>;
|
|
28
|
+
/**
|
|
29
|
+
* Fill a form field
|
|
30
|
+
*/
|
|
31
|
+
fill: (selector: string, value: string) => Effect.Effect<void, Error, never>;
|
|
32
|
+
/**
|
|
33
|
+
* Scroll the page
|
|
34
|
+
*/
|
|
35
|
+
scroll: (distance: number) => Effect.Effect<void, never, never>;
|
|
36
|
+
/**
|
|
37
|
+
* Execute JavaScript in the page
|
|
38
|
+
*/
|
|
39
|
+
evaluate: <T>(script: string | Function) => Effect.Effect<T, Error, never>;
|
|
40
|
+
/**
|
|
41
|
+
* Get page HTML
|
|
42
|
+
*/
|
|
43
|
+
getHTML: () => Effect.Effect<string, Error, never>;
|
|
44
|
+
/**
|
|
45
|
+
* Take a screenshot
|
|
46
|
+
*/
|
|
47
|
+
screenshot: (path?: string) => Effect.Effect<Buffer, Error, never>;
|
|
48
|
+
/**
|
|
49
|
+
* Close the page
|
|
50
|
+
*/
|
|
51
|
+
closePage: () => Effect.Effect<void, never, never>;
|
|
52
|
+
}
|
|
53
|
+
declare const BrowserEngine_base: Context.TagClass<BrowserEngine, "BrowserEngine", BrowserEngineService>;
|
|
54
|
+
export declare class BrowserEngine extends BrowserEngine_base {
|
|
55
|
+
}
|
|
56
|
+
export {};
|
|
57
|
+
//# sourceMappingURL=BrowserEngine.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BrowserEngine.service.d.ts","sourceRoot":"","sources":["../../../src/lib/BrowserEngine/BrowserEngine.service.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEzC,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACrC;AAED,MAAM,WAAW,oBAAoB;IACnC;;OAEG;IACH,UAAU,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEpD;;OAEG;IACH,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE/D;;OAEG;IACH,eAAe,EAAE,CACf,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,MAAM,KACb,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEvC;;OAEG;IACH,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE/D;;OAEG;IACH,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE7E;;OAEG;IACH,MAAM,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEhE;;OAEG;IACH,QAAQ,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,GAAG,QAAQ,KAAK,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE3E;;OAEG;IACH,OAAO,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEnD;;OAEG;IACH,UAAU,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEnE;;OAEG;IACH,SAAS,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;CACpD;;AAED,qBAAa,aAAc,SAAQ,kBAGhC;CAAG"}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { Effect, Layer } from 'effect';
|
|
2
|
+
/**
|
|
3
|
+
* File extension filter categories based on Scrapy's IGNORED_EXTENSIONS.
|
|
4
|
+
* Each category can be individually enabled/disabled for flexible filtering.
|
|
5
|
+
*
|
|
6
|
+
* @group Configuration
|
|
7
|
+
* @public
|
|
8
|
+
*/
|
|
9
|
+
export interface FileExtensionFilters {
|
|
10
|
+
/** Archive files: 7z, 7zip, bz2, rar, tar, tar.gz, xz, zip (default: true) */
|
|
11
|
+
readonly filterArchives: boolean;
|
|
12
|
+
/** Image files: jpg, png, gif, svg, webp, etc. (default: true) */
|
|
13
|
+
readonly filterImages: boolean;
|
|
14
|
+
/** Audio files: mp3, wav, ogg, aac, etc. (default: true) */
|
|
15
|
+
readonly filterAudio: boolean;
|
|
16
|
+
/** Video files: mp4, avi, mov, webm, etc. (default: true) */
|
|
17
|
+
readonly filterVideo: boolean;
|
|
18
|
+
/** Office documents: pdf, doc, xls, ppt, odt, etc. (default: true) */
|
|
19
|
+
readonly filterOfficeDocuments: boolean;
|
|
20
|
+
/** Other files: css, js, exe, bin, rss, etc. (default: true) */
|
|
21
|
+
readonly filterOther: boolean;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Technical URL filtering options based on Scrapy's validation rules.
|
|
25
|
+
* These filters help ensure only valid, crawlable URLs are processed.
|
|
26
|
+
*
|
|
27
|
+
* @group Configuration
|
|
28
|
+
* @public
|
|
29
|
+
*/
|
|
30
|
+
export interface TechnicalFilters {
|
|
31
|
+
/** Filter URLs with unsupported schemes (default: true - only http/https/file/ftp allowed) */
|
|
32
|
+
readonly filterUnsupportedSchemes: boolean;
|
|
33
|
+
/** Filter URLs exceeding maximum length (default: true - 2083 chars like Scrapy) */
|
|
34
|
+
readonly filterLongUrls: boolean;
|
|
35
|
+
/** Maximum URL length in characters (default: 2083) */
|
|
36
|
+
readonly maxUrlLength: number;
|
|
37
|
+
/** Filter malformed/invalid URLs (default: true) */
|
|
38
|
+
readonly filterMalformedUrls: boolean;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Configuration options for spider behavior and limits.
|
|
42
|
+
*
|
|
43
|
+
* Controls all aspects of crawling including rate limits, filtering rules,
|
|
44
|
+
* and behavioral settings. All options have sensible defaults based on Scrapy.
|
|
45
|
+
*
|
|
46
|
+
* @group Configuration
|
|
47
|
+
* @public
|
|
48
|
+
*/
|
|
49
|
+
export interface SpiderConfigOptions {
|
|
50
|
+
/** Whether to ignore robots.txt files (default: false) */
|
|
51
|
+
readonly ignoreRobotsTxt: boolean;
|
|
52
|
+
/** Maximum number of concurrent worker fibers (default: 5) */
|
|
53
|
+
readonly maxConcurrentWorkers: number;
|
|
54
|
+
/** Concurrency level for crawling multiple starting URLs (default: 4) */
|
|
55
|
+
readonly concurrency: number | 'unbounded' | 'inherit';
|
|
56
|
+
/** Base delay between requests in milliseconds (default: 1000) */
|
|
57
|
+
readonly requestDelayMs: number;
|
|
58
|
+
/** Maximum crawl delay from robots.txt in milliseconds (default: 10000 - 10 seconds) */
|
|
59
|
+
readonly maxRobotsCrawlDelayMs: number;
|
|
60
|
+
/** User agent string to send with requests (default: 'JambudipaSpider/1.0') */
|
|
61
|
+
readonly userAgent: string;
|
|
62
|
+
/** Maximum crawl depth, undefined for unlimited (default: undefined) */
|
|
63
|
+
readonly maxDepth?: number;
|
|
64
|
+
/** Maximum pages to crawl, undefined for unlimited (default: undefined) */
|
|
65
|
+
readonly maxPages?: number;
|
|
66
|
+
/** Domains to restrict crawling to (default: undefined - all domains) */
|
|
67
|
+
readonly allowedDomains?: string[];
|
|
68
|
+
/** Domains to exclude from crawling (default: undefined - no blocks) */
|
|
69
|
+
readonly blockedDomains?: string[];
|
|
70
|
+
/** Allowed URL protocols (default: ['http:', 'https:']) */
|
|
71
|
+
readonly allowedProtocols: string[];
|
|
72
|
+
/** Whether to follow HTTP redirects (default: true) */
|
|
73
|
+
readonly followRedirects: boolean;
|
|
74
|
+
/** Whether to respect rel="nofollow" attributes (default: true) */
|
|
75
|
+
readonly respectNoFollow: boolean;
|
|
76
|
+
/**
|
|
77
|
+
* File extension filtering configuration.
|
|
78
|
+
* When undefined, uses default Scrapy-equivalent filtering (all categories enabled).
|
|
79
|
+
* Set to override default behavior for each category.
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* // Allow images but filter everything else
|
|
84
|
+
* fileExtensionFilters: {
|
|
85
|
+
* filterArchives: true,
|
|
86
|
+
* filterImages: false, // Allow images
|
|
87
|
+
* filterAudio: true,
|
|
88
|
+
* filterVideo: true,
|
|
89
|
+
* filterOfficeDocuments: true,
|
|
90
|
+
* filterOther: true
|
|
91
|
+
* }
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
readonly fileExtensionFilters?: FileExtensionFilters;
|
|
95
|
+
/**
|
|
96
|
+
* Technical URL filtering configuration.
|
|
97
|
+
* When undefined, uses default Scrapy-equivalent filtering (all enabled).
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```typescript
|
|
101
|
+
* // Disable URL length filtering for special cases
|
|
102
|
+
* technicalFilters: {
|
|
103
|
+
* filterUnsupportedSchemes: true,
|
|
104
|
+
* filterLongUrls: false, // Allow long URLs
|
|
105
|
+
* maxUrlLength: 2083,
|
|
106
|
+
* filterMalformedUrls: true
|
|
107
|
+
* }
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
readonly technicalFilters?: TechnicalFilters;
|
|
111
|
+
/**
|
|
112
|
+
* Custom file extensions to skip (legacy support).
|
|
113
|
+
* When specified, overrides fileExtensionFilters completely.
|
|
114
|
+
* Use fileExtensionFilters for more granular control.
|
|
115
|
+
*/
|
|
116
|
+
readonly skipFileExtensions?: string[];
|
|
117
|
+
/** Maximum concurrent requests across all domains (default: 10) */
|
|
118
|
+
readonly maxConcurrentRequests: number;
|
|
119
|
+
/** Maximum requests per second per domain (default: 2) */
|
|
120
|
+
readonly maxRequestsPerSecondPerDomain: number;
|
|
121
|
+
/**
|
|
122
|
+
* Whether to normalize URLs for deduplication (default: true).
|
|
123
|
+
* When enabled, URLs are normalized before checking for duplicates:
|
|
124
|
+
* - Trailing slashes are removed (example.com/path/ becomes example.com/path)
|
|
125
|
+
* - Fragment identifiers are removed (example.com#section becomes example.com)
|
|
126
|
+
* - Default ports are removed (http://example.com:80 becomes http://example.com)
|
|
127
|
+
* - Query parameters are sorted alphabetically
|
|
128
|
+
*
|
|
129
|
+
* This prevents crawling the same content multiple times when URLs differ only
|
|
130
|
+
* in formatting. Set to false if you need to treat these variations as distinct URLs.
|
|
131
|
+
*
|
|
132
|
+
* @default true
|
|
133
|
+
*/
|
|
134
|
+
readonly normalizeUrlsForDeduplication: boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Custom URL filter patterns to exclude from crawling.
|
|
137
|
+
* Provides regex patterns that will be tested against URLs to determine if they should be skipped.
|
|
138
|
+
* This is useful for filtering out admin areas, utility pages, or other unwanted URL patterns.
|
|
139
|
+
*
|
|
140
|
+
* @example
|
|
141
|
+
* ```typescript
|
|
142
|
+
* customUrlFilters: [
|
|
143
|
+
* /\/wp-admin\//i,
|
|
144
|
+
* /\/wp-content\/uploads\//i,
|
|
145
|
+
* /\/api\//i
|
|
146
|
+
* ]
|
|
147
|
+
* ```
|
|
148
|
+
*
|
|
149
|
+
* @default undefined
|
|
150
|
+
*/
|
|
151
|
+
readonly customUrlFilters?: RegExp[];
|
|
152
|
+
/**
|
|
153
|
+
* Whether to enable resumable crawling support (default: false).
|
|
154
|
+
* When enabled, the spider can save its state and resume interrupted crawls.
|
|
155
|
+
* Requires configuring a StatePersistence implementation.
|
|
156
|
+
*
|
|
157
|
+
* @default false
|
|
158
|
+
*/
|
|
159
|
+
readonly enableResumability: boolean;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Service interface for accessing spider configuration.
|
|
163
|
+
*
|
|
164
|
+
* Provides Effect-wrapped access to all configuration options with
|
|
165
|
+
* validation and computed properties. Used throughout the framework
|
|
166
|
+
* to access settings in a composable way.
|
|
167
|
+
*
|
|
168
|
+
* @group Configuration
|
|
169
|
+
* @public
|
|
170
|
+
*/
|
|
171
|
+
export interface SpiderConfigService {
|
|
172
|
+
/** Get the complete configuration options */
|
|
173
|
+
getOptions: () => Effect.Effect<SpiderConfigOptions>;
|
|
174
|
+
/** Check if a URL should be followed based on configured rules */
|
|
175
|
+
shouldFollowUrl: (urlString: string, fromUrl?: string, restrictToStartingDomain?: string) => Effect.Effect<{
|
|
176
|
+
follow: boolean;
|
|
177
|
+
reason?: string;
|
|
178
|
+
}>;
|
|
179
|
+
/** Get the configured user agent string */
|
|
180
|
+
getUserAgent: () => Effect.Effect<string>;
|
|
181
|
+
/** Get the request delay in milliseconds */
|
|
182
|
+
getRequestDelay: () => Effect.Effect<number>;
|
|
183
|
+
/** Get the maximum crawl delay from robots.txt in milliseconds */
|
|
184
|
+
getMaxRobotsCrawlDelay: () => Effect.Effect<number>;
|
|
185
|
+
/** Check if robots.txt should be ignored */
|
|
186
|
+
shouldIgnoreRobotsTxt: () => Effect.Effect<boolean>;
|
|
187
|
+
/** Get maximum concurrent workers */
|
|
188
|
+
getMaxConcurrentWorkers: () => Effect.Effect<number>;
|
|
189
|
+
/**
|
|
190
|
+
* Get maximum crawl depth (undefined if unlimited).
|
|
191
|
+
*
|
|
192
|
+
* Crawl depth refers to the number of link hops from the starting URL(s).
|
|
193
|
+
* For example:
|
|
194
|
+
* - Depth 0: Only the initial URL(s) are crawled
|
|
195
|
+
* - Depth 1: Initial URLs + all links found on those pages
|
|
196
|
+
* - Depth 2: Initial URLs + links from depth 1 + links found on depth 1 pages
|
|
197
|
+
*
|
|
198
|
+
* Cross-domain behavior: Depth counting applies only within allowed domains.
|
|
199
|
+
* If `allowedDomains` is configured, links to external domains are not followed
|
|
200
|
+
* regardless of depth. If no domain restrictions are set, depth applies across
|
|
201
|
+
* all domains encountered.
|
|
202
|
+
*/
|
|
203
|
+
getMaxDepth: () => Effect.Effect<number | undefined>;
|
|
204
|
+
/** Get maximum pages to crawl (undefined if unlimited) */
|
|
205
|
+
getMaxPages: () => Effect.Effect<number | undefined>;
|
|
206
|
+
/** Check if redirects should be followed */
|
|
207
|
+
shouldFollowRedirects: () => Effect.Effect<boolean>;
|
|
208
|
+
/** Check if nofollow attributes should be respected */
|
|
209
|
+
shouldRespectNoFollow: () => Effect.Effect<boolean>;
|
|
210
|
+
/** Get file extensions to skip */
|
|
211
|
+
getSkipFileExtensions: () => Effect.Effect<string[]>;
|
|
212
|
+
/** Get maximum concurrent requests across all domains */
|
|
213
|
+
getMaxConcurrentRequests: () => Effect.Effect<number>;
|
|
214
|
+
/** Get maximum requests per second per domain */
|
|
215
|
+
getMaxRequestsPerSecondPerDomain: () => Effect.Effect<number>;
|
|
216
|
+
/** Check if URLs should be normalized for deduplication */
|
|
217
|
+
shouldNormalizeUrlsForDeduplication: () => Effect.Effect<boolean>;
|
|
218
|
+
/** Get the concurrency level for crawling multiple starting URLs */
|
|
219
|
+
getConcurrency: () => Effect.Effect<number | 'unbounded' | 'inherit'>;
|
|
220
|
+
/** Check if resumable crawling is enabled */
|
|
221
|
+
isResumabilityEnabled: () => Effect.Effect<boolean>;
|
|
222
|
+
}
|
|
223
|
+
declare const SpiderConfig_base: Effect.Service.Class<SpiderConfigService, "@jambudipa.io/SpiderConfig", {
|
|
224
|
+
readonly effect: Effect.Effect<SpiderConfigService, never, never>;
|
|
225
|
+
}>;
|
|
226
|
+
/**
|
|
227
|
+
* The main SpiderConfig service for dependency injection.
|
|
228
|
+
*
|
|
229
|
+
* Provides default configuration that can be overridden using layers.
|
|
230
|
+
*
|
|
231
|
+
* @example
|
|
232
|
+
* ```typescript
|
|
233
|
+
* const program = Effect.gen(function* () {
|
|
234
|
+
* const config = yield* SpiderConfig;
|
|
235
|
+
* const userAgent = yield* config.getUserAgent();
|
|
236
|
+
* console.log(`Using: ${userAgent}`);
|
|
237
|
+
* });
|
|
238
|
+
*
|
|
239
|
+
* await Effect.runPromise(
|
|
240
|
+
* program.pipe(Effect.provide(SpiderConfig.Default))
|
|
241
|
+
* );
|
|
242
|
+
* ```
|
|
243
|
+
*
|
|
244
|
+
* @group Configuration
|
|
245
|
+
* @public
|
|
246
|
+
*/
|
|
247
|
+
export declare class SpiderConfig extends SpiderConfig_base {
|
|
248
|
+
/**
|
|
249
|
+
* Creates a Layer that provides SpiderConfig with custom options
|
|
250
|
+
* @param config - The configuration options or a pre-made SpiderConfigService
|
|
251
|
+
*/
|
|
252
|
+
static Live: (config: Partial<SpiderConfigOptions> | SpiderConfigService) => Layer.Layer<SpiderConfigService, never, never>;
|
|
253
|
+
}
|
|
254
|
+
export declare const makeSpiderConfig: (options?: Partial<SpiderConfigOptions>) => SpiderConfigService;
|
|
255
|
+
export {};
|
|
256
|
+
//# sourceMappingURL=SpiderConfig.service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SpiderConfig.service.d.ts","sourceRoot":"","sources":["../../../src/lib/Config/SpiderConfig.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAEvC;;;;;;GAMG;AACH,MAAM,WAAW,oBAAoB;IACnC,8EAA8E;IAC9E,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,kEAAkE;IAClE,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC;IAC/B,4DAA4D;IAC5D,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;IAC9B,6DAA6D;IAC7D,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;IAC9B,sEAAsE;IACtE,QAAQ,CAAC,qBAAqB,EAAE,OAAO,CAAC;IACxC,gEAAgE;IAChE,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;CAC/B;AAED;;;;;;GAMG;AACH,MAAM,WAAW,gBAAgB;IAC/B,8FAA8F;IAC9F,QAAQ,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC3C,oFAAoF;IACpF,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,uDAAuD;IACvD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,oDAAoD;IACpD,QAAQ,CAAC,mBAAmB,EAAE,OAAO,CAAC;CACvC;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,mBAAmB;IAClC,0DAA0D;IAC1D,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,8DAA8D;IAC9D,QAAQ,CAAC,oBAAoB,EAAE,MAAM,CAAC;IACtC,yEAAyE;IACzE,QAAQ,CAAC,WAAW,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAAC;IACvD,kEAAkE;IAClE,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,wFAAwF;IACxF,QAAQ,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACvC,+EAA+E;IAC/E,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,wEAAwE;IACxE,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,2EAA2E;IAC3E,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,yEAAyE;IACzE,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IACnC,wEAAwE;IACxE,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IACnC,2DAA2D;IAC3D,QAAQ,CAAC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IACpC,uDAAuD;IACvD,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC,mEAAmE;IACnE,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;IAClC;;;;;;;;;;;;;;;;;OAiBG;IACH,QAAQ,CAAC,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IACrD;;;;;;;;;;;;;;OAcG;IACH,QAAQ,CAAC,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IAC7C;;;;OAIG;IACH,QAAQ,CAAC,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IACvC,mEAAmE;IACnE,QAAQ,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACvC,0DAA0D;IAC1D,QAAQ,CAAC,6BAA6B,EAAE,MAAM,CAAC;IAC/C;;;;;;;;;;;;OAYG;IACH,QAAQ,CAAC,6BAA6B,EAAE,OAAO,CAAC;IAChD;;;;;;;;;;;;;;;OAeG;IACH,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;CACtC;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,mBAAmB;IAClC,6CAA6C;IAC7C,UAAU,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;IACrD,kEAAkE;IAClE,eAAe,EAAE,CACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,EAChB,wBAAwB,CAAC,EAAE,MAAM,KAC9B,MAAM,CAAC,MAAM,CAAC;QAAE,MAAM,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACzD,2CAA2C;IAC3C,YAAY,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC1C,4CAA4C;IAC5C,eAAe,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC7C,kEAAkE;IAClE,sBAAsB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACpD,4CAA4C;IAC5C,qBAAqB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACpD,qCAAqC;IACrC,uBAAuB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACrD;;;;;;;;;;;;;OAaG;IACH,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACrD,0DAA0D;IAC1D,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;IACrD,4CAA4C;IAC5C,qBAAqB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACpD,uDAAuD;IACvD,qBAAqB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACpD,kCAAkC;IAClC,qBAAqB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACrD,yDAAyD;IACzD,wBAAwB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACtD,iDAAiD;IACjD,gCAAgC,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC9D,2DAA2D;IAC3D,mCAAmC,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAClE,oEAAoE;IACpE,cAAc,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC;IACtE,6CAA6C;IAC7C,qBAAqB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;CACrD;;;;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,YAAa,SAAQ,iBAKjC;IACC;;;OAGG;IACH,MAAM,CAAC,IAAI,GAAI,QAAQ,OAAO,CAAC,mBAAmB,CAAC,GAAG,mBAAmB,oDAIrE;CACL;AAgKD,eAAO,MAAM,gBAAgB,GAC3B,UAAS,OAAO,CAAC,mBAAmB,CAAM,KACzC,mBA6QF,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cookie Manager Service
|
|
3
|
+
* Manages HTTP cookies for session persistence across requests
|
|
4
|
+
*/
|
|
5
|
+
import { Context, Effect, Layer } from 'effect';
|
|
6
|
+
export interface CookieManagerService {
|
|
7
|
+
/**
|
|
8
|
+
* Set a cookie for a URL
|
|
9
|
+
*/
|
|
10
|
+
setCookie: (cookieString: string, url: string) => Effect.Effect<void, Error, never>;
|
|
11
|
+
/**
|
|
12
|
+
* Get all cookies for a URL
|
|
13
|
+
*/
|
|
14
|
+
getCookies: (url: string) => Effect.Effect<string[], never, never>;
|
|
15
|
+
/**
|
|
16
|
+
* Get cookie header string for a URL
|
|
17
|
+
*/
|
|
18
|
+
getCookieHeader: (url: string) => Effect.Effect<string | null, never, never>;
|
|
19
|
+
/**
|
|
20
|
+
* Clear all cookies
|
|
21
|
+
*/
|
|
22
|
+
clearCookies: () => Effect.Effect<void, never, never>;
|
|
23
|
+
/**
|
|
24
|
+
* Serialize cookies for storage
|
|
25
|
+
*/
|
|
26
|
+
serialize: () => Effect.Effect<string, never, never>;
|
|
27
|
+
/**
|
|
28
|
+
* Load cookies from serialized string
|
|
29
|
+
*/
|
|
30
|
+
deserialize: (data: string) => Effect.Effect<void, Error, never>;
|
|
31
|
+
}
|
|
32
|
+
declare const CookieManager_base: Context.TagClass<CookieManager, "CookieManager", CookieManagerService>;
|
|
33
|
+
export declare class CookieManager extends CookieManager_base {
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Create a CookieManager service implementation
|
|
37
|
+
*/
|
|
38
|
+
export declare const makeCookieManager: () => Effect.Effect<CookieManagerService, never, never>;
|
|
39
|
+
/**
|
|
40
|
+
* CookieManager Layer
|
|
41
|
+
*/
|
|
42
|
+
export declare const CookieManagerLive: Layer.Layer<CookieManager, never, never>;
|
|
43
|
+
export {};
|
|
44
|
+
//# sourceMappingURL=CookieManager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CookieManager.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/CookieManager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAO,MAAM,QAAQ,CAAC;AAGrD,MAAM,WAAW,oBAAoB;IACnC;;OAEG;IACH,SAAS,EAAE,CACT,YAAY,EAAE,MAAM,EACpB,GAAG,EAAE,MAAM,KACR,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEvC;;OAEG;IACH,UAAU,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEnE;;OAEG;IACH,eAAe,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE7E;;OAEG;IACH,YAAY,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEtD;;OAEG;IACH,SAAS,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAErD;;OAEG;IACH,WAAW,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;CAClE;;AAED,qBAAa,aAAc,SAAQ,kBAGhC;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,iBAAiB,QAAO,MAAM,CAAC,MAAM,CAChD,oBAAoB,EACpB,KAAK,EACL,KAAK,CAmGH,CAAC;AAEL;;GAEG;AACH,eAAO,MAAM,iBAAiB,0CAG7B,CAAC"}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Enhanced HTTP Client
|
|
3
|
+
* Provides advanced HTTP capabilities including POST requests, cookie management, and session handling
|
|
4
|
+
*/
|
|
5
|
+
import { Context, Effect, Layer } from 'effect';
|
|
6
|
+
import { NetworkError, ResponseError } from '../errors.js';
|
|
7
|
+
import { SpiderLogger } from '../Logging/SpiderLogger.service.js';
|
|
8
|
+
import { CookieManager } from './CookieManager.js';
|
|
9
|
+
export interface HttpRequestOptions {
|
|
10
|
+
method?: 'GET' | 'POST' | 'PUT' | 'DELETE' | 'PATCH';
|
|
11
|
+
headers?: Record<string, string>;
|
|
12
|
+
body?: string | FormData | URLSearchParams;
|
|
13
|
+
timeout?: number;
|
|
14
|
+
followRedirects?: boolean;
|
|
15
|
+
credentials?: 'omit' | 'same-origin' | 'include';
|
|
16
|
+
}
|
|
17
|
+
export interface HttpResponse {
|
|
18
|
+
url: string;
|
|
19
|
+
status: number;
|
|
20
|
+
statusText: string;
|
|
21
|
+
headers: Record<string, string>;
|
|
22
|
+
body: string;
|
|
23
|
+
cookies?: string[];
|
|
24
|
+
}
|
|
25
|
+
export interface EnhancedHttpClientService {
|
|
26
|
+
/**
|
|
27
|
+
* Make a GET request
|
|
28
|
+
*/
|
|
29
|
+
get: (url: string, options?: HttpRequestOptions) => Effect.Effect<HttpResponse, NetworkError | ResponseError, never>;
|
|
30
|
+
/**
|
|
31
|
+
* Make a POST request
|
|
32
|
+
*/
|
|
33
|
+
post: (url: string, data?: any, options?: HttpRequestOptions) => Effect.Effect<HttpResponse, NetworkError | ResponseError, never>;
|
|
34
|
+
/**
|
|
35
|
+
* Make a request with any method
|
|
36
|
+
*/
|
|
37
|
+
request: (url: string, options?: HttpRequestOptions) => Effect.Effect<HttpResponse, NetworkError | ResponseError, never>;
|
|
38
|
+
/**
|
|
39
|
+
* Submit a form
|
|
40
|
+
*/
|
|
41
|
+
submitForm: (url: string, formData: Record<string, string>, options?: HttpRequestOptions) => Effect.Effect<HttpResponse, NetworkError | ResponseError, never>;
|
|
42
|
+
}
|
|
43
|
+
declare const EnhancedHttpClient_base: Context.TagClass<EnhancedHttpClient, "EnhancedHttpClient", EnhancedHttpClientService>;
|
|
44
|
+
export declare class EnhancedHttpClient extends EnhancedHttpClient_base {
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Create an EnhancedHttpClient service
|
|
48
|
+
*/
|
|
49
|
+
export declare const makeEnhancedHttpClient: Effect.Effect<{
|
|
50
|
+
get: (url: string, options?: HttpRequestOptions) => Effect.Effect<{
|
|
51
|
+
url: string;
|
|
52
|
+
status: number;
|
|
53
|
+
statusText: string;
|
|
54
|
+
headers: Record<string, string>;
|
|
55
|
+
body: string;
|
|
56
|
+
cookies: string[];
|
|
57
|
+
}, NetworkError | ResponseError, never>;
|
|
58
|
+
post: (url: string, data?: any, options?: HttpRequestOptions) => Effect.Effect<{
|
|
59
|
+
url: string;
|
|
60
|
+
status: number;
|
|
61
|
+
statusText: string;
|
|
62
|
+
headers: Record<string, string>;
|
|
63
|
+
body: string;
|
|
64
|
+
cookies: string[];
|
|
65
|
+
}, NetworkError | ResponseError, never>;
|
|
66
|
+
request: (url: string, options?: HttpRequestOptions) => Effect.Effect<{
|
|
67
|
+
url: string;
|
|
68
|
+
status: number;
|
|
69
|
+
statusText: string;
|
|
70
|
+
headers: Record<string, string>;
|
|
71
|
+
body: string;
|
|
72
|
+
cookies: string[];
|
|
73
|
+
}, NetworkError | ResponseError, never>;
|
|
74
|
+
submitForm: (url: string, formData: Record<string, string>, options?: HttpRequestOptions) => Effect.Effect<{
|
|
75
|
+
url: string;
|
|
76
|
+
status: number;
|
|
77
|
+
statusText: string;
|
|
78
|
+
headers: Record<string, string>;
|
|
79
|
+
body: string;
|
|
80
|
+
cookies: string[];
|
|
81
|
+
}, NetworkError | ResponseError, never>;
|
|
82
|
+
}, never, SpiderLogger | CookieManager>;
|
|
83
|
+
/**
|
|
84
|
+
* EnhancedHttpClient Layer with dependencies
|
|
85
|
+
*/
|
|
86
|
+
export declare const EnhancedHttpClientLive: Layer.Layer<EnhancedHttpClient, never, SpiderLogger | CookieManager>;
|
|
87
|
+
export {};
|
|
88
|
+
//# sourceMappingURL=EnhancedHttpClient.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EnhancedHttpClient.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/EnhancedHttpClient.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC3D,OAAO,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAClE,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,MAAM,WAAW,kBAAkB;IACjC,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,OAAO,CAAC;IACrD,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,eAAe,CAAC;IAC3C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS,CAAC;CAClD;AAED,MAAM,WAAW,YAAY;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,yBAAyB;IACxC;;OAEG;IACH,GAAG,EAAE,CACH,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,kBAAkB,KACzB,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,EAAE,KAAK,CAAC,CAAC;IAEtE;;OAEG;IACH,IAAI,EAAE,CACJ,GAAG,EAAE,MAAM,EACX,IAAI,CAAC,EAAE,GAAG,EACV,OAAO,CAAC,EAAE,kBAAkB,KACzB,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,EAAE,KAAK,CAAC,CAAC;IAEtE;;OAEG;IACH,OAAO,EAAE,CACP,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,kBAAkB,KACzB,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,EAAE,KAAK,CAAC,CAAC;IAEtE;;OAEG;IACH,UAAU,EAAE,CACV,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAChC,OAAO,CAAC,EAAE,kBAAkB,KACzB,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,EAAE,KAAK,CAAC,CAAC;CACvE;;AAED,qBAAa,kBAAmB,SAAQ,uBAGrC;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,sBAAsB;eAuHpB,MAAM,YAAY,kBAAkB;;;;;;;;gBAGnC,MAAM,SAAS,GAAG,YAAY,kBAAkB;;;;;;;;mBAtHpC,MAAM,YAAW,kBAAkB;;;;;;;;sBA6IpD,MAAM,YACD,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,YACtB,kBAAkB;;;;;;;;uCAmBhC,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,sBAAsB,sEAGlC,CAAC"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session Store Service
|
|
3
|
+
* Manages user sessions including cookies, tokens, and authentication state
|
|
4
|
+
*/
|
|
5
|
+
import { Context, Effect, Layer, Option } from 'effect';
|
|
6
|
+
import { CookieManager } from './CookieManager.js';
|
|
7
|
+
import { TokenType } from '../StateManager/StateManager.service.js';
|
|
8
|
+
export interface Session {
|
|
9
|
+
id: string;
|
|
10
|
+
cookies: string;
|
|
11
|
+
tokens: Map<TokenType, string>;
|
|
12
|
+
userData?: Record<string, any>;
|
|
13
|
+
createdAt: Date;
|
|
14
|
+
lastUsedAt: Date;
|
|
15
|
+
expiresAt?: Date;
|
|
16
|
+
}
|
|
17
|
+
export interface Credentials {
|
|
18
|
+
username: string;
|
|
19
|
+
password: string;
|
|
20
|
+
[key: string]: any;
|
|
21
|
+
}
|
|
22
|
+
export interface SessionStoreService {
|
|
23
|
+
/**
|
|
24
|
+
* Create a new session
|
|
25
|
+
*/
|
|
26
|
+
createSession: (id?: string) => Effect.Effect<Session, never, never>;
|
|
27
|
+
/**
|
|
28
|
+
* Get current session
|
|
29
|
+
*/
|
|
30
|
+
getCurrentSession: () => Effect.Effect<Option.Option<Session>, never, never>;
|
|
31
|
+
/**
|
|
32
|
+
* Load a session by ID
|
|
33
|
+
*/
|
|
34
|
+
loadSession: (id: string) => Effect.Effect<void, Error, never>;
|
|
35
|
+
/**
|
|
36
|
+
* Save current session
|
|
37
|
+
*/
|
|
38
|
+
saveSession: () => Effect.Effect<string, Error, never>;
|
|
39
|
+
/**
|
|
40
|
+
* Clear current session
|
|
41
|
+
*/
|
|
42
|
+
clearSession: () => Effect.Effect<void, never, never>;
|
|
43
|
+
/**
|
|
44
|
+
* Check if session is valid (not expired)
|
|
45
|
+
*/
|
|
46
|
+
isSessionValid: () => Effect.Effect<boolean, never, never>;
|
|
47
|
+
/**
|
|
48
|
+
* Update session data
|
|
49
|
+
*/
|
|
50
|
+
updateSessionData: (data: Record<string, any>) => Effect.Effect<void, Error, never>;
|
|
51
|
+
/**
|
|
52
|
+
* Export session for persistence
|
|
53
|
+
*/
|
|
54
|
+
exportSession: () => Effect.Effect<string, Error, never>;
|
|
55
|
+
/**
|
|
56
|
+
* Import session from persistence
|
|
57
|
+
*/
|
|
58
|
+
importSession: (data: string) => Effect.Effect<void, Error, never>;
|
|
59
|
+
}
|
|
60
|
+
declare const SessionStore_base: Context.TagClass<SessionStore, "SessionStore", SessionStoreService>;
|
|
61
|
+
export declare class SessionStore extends SessionStore_base {
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Create a SessionStore service implementation
|
|
65
|
+
*/
|
|
66
|
+
export declare const makeSessionStore: Effect.Effect<{
|
|
67
|
+
createSession: (id?: string) => Effect.Effect<Session, never, never>;
|
|
68
|
+
getCurrentSession: () => Effect.Effect<Option.None<Session> | Option.Some<Session>, never, never>;
|
|
69
|
+
loadSession: (id: string) => Effect.Effect<undefined, Error, never>;
|
|
70
|
+
saveSession: () => Effect.Effect<string, Error, never>;
|
|
71
|
+
clearSession: () => Effect.Effect<void, never, never>;
|
|
72
|
+
isSessionValid: () => Effect.Effect<boolean, never, never>;
|
|
73
|
+
updateSessionData: (data: Record<string, any>) => Effect.Effect<undefined, Error, never>;
|
|
74
|
+
exportSession: () => Effect.Effect<string, Error, never>;
|
|
75
|
+
importSession: (data: string) => Effect.Effect<void, Error, never>;
|
|
76
|
+
}, never, CookieManager>;
|
|
77
|
+
/**
|
|
78
|
+
* SessionStore Layer with dependencies
|
|
79
|
+
*/
|
|
80
|
+
export declare const SessionStoreLive: Layer.Layer<SessionStore, never, CookieManager>;
|
|
81
|
+
export {};
|
|
82
|
+
//# sourceMappingURL=SessionStore.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SessionStore.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/SessionStore.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAO,MAAM,QAAQ,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AAEpE,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,SAAS,EAAE,IAAI,CAAC;IAChB,UAAU,EAAE,IAAI,CAAC;IACjB,SAAS,CAAC,EAAE,IAAI,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,aAAa,EAAE,CAAC,EAAE,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAErE;;OAEG;IACH,iBAAiB,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE7E;;OAEG;IACH,WAAW,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE/D;;OAEG;IACH,WAAW,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEvD;;OAEG;IACH,YAAY,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEtD;;OAEG;IACH,cAAc,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE3D;;OAEG;IACH,iBAAiB,EAAE,CACjB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,KACtB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEvC;;OAEG;IACH,aAAa,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEzD;;OAEG;IACH,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;CACpE;;AAED,qBAAa,YAAa,SAAQ,iBAG/B;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,gBAAgB;yBAWJ,MAAM;;sBA6CT,MAAM;;;;8BAoGE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;;0BA6CvB,MAAM;wBA+B9B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,gBAAgB,iDAA+C,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Extractor Service
|
|
3
|
+
* Extracts and manages various types of tokens from HTTP responses
|
|
4
|
+
*/
|
|
5
|
+
import { Context, Effect, Layer } from 'effect';
|
|
6
|
+
import { StateManager, TokenType } from '../StateManager/StateManager.service.js';
|
|
7
|
+
import { EnhancedHttpClient, type HttpResponse } from './EnhancedHttpClient.js';
|
|
8
|
+
import { SpiderLogger } from '../Logging/SpiderLogger.service.js';
|
|
9
|
+
export interface TokenInfo {
|
|
10
|
+
type: TokenType;
|
|
11
|
+
value: string;
|
|
12
|
+
source: 'html' | 'header' | 'script' | 'json';
|
|
13
|
+
selector?: string;
|
|
14
|
+
pattern?: string;
|
|
15
|
+
}
|
|
16
|
+
export interface TokenExtractorService {
|
|
17
|
+
/**
|
|
18
|
+
* Extract all tokens from an HTTP response
|
|
19
|
+
*/
|
|
20
|
+
extractTokensFromResponse: (response: HttpResponse) => Effect.Effect<TokenInfo[], Error, never>;
|
|
21
|
+
/**
|
|
22
|
+
* Extract CSRF token from response
|
|
23
|
+
*/
|
|
24
|
+
extractCSRFFromResponse: (response: HttpResponse) => Effect.Effect<string | null, Error, never>;
|
|
25
|
+
/**
|
|
26
|
+
* Extract API token from response
|
|
27
|
+
*/
|
|
28
|
+
extractAPIFromResponse: (response: HttpResponse) => Effect.Effect<string | null, Error, never>;
|
|
29
|
+
/**
|
|
30
|
+
* Make authenticated request with automatic token injection
|
|
31
|
+
*/
|
|
32
|
+
authenticatedRequest: (url: string, options?: {
|
|
33
|
+
requireCSRF?: boolean;
|
|
34
|
+
requireAPI?: boolean;
|
|
35
|
+
customHeaders?: Record<string, string>;
|
|
36
|
+
}) => Effect.Effect<HttpResponse, Error, never>;
|
|
37
|
+
/**
|
|
38
|
+
* Detect and handle token rotation
|
|
39
|
+
*/
|
|
40
|
+
detectTokenRotation: (oldToken: string, response: HttpResponse, type: TokenType) => Effect.Effect<boolean, Error, never>;
|
|
41
|
+
/**
|
|
42
|
+
* Refresh expired tokens
|
|
43
|
+
*/
|
|
44
|
+
refreshToken: (type: TokenType, refreshUrl?: string) => Effect.Effect<string, Error, never>;
|
|
45
|
+
}
|
|
46
|
+
declare const TokenExtractor_base: Context.TagClass<TokenExtractor, "TokenExtractor", TokenExtractorService>;
|
|
47
|
+
export declare class TokenExtractor extends TokenExtractor_base {
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Create a TokenExtractor service implementation
|
|
51
|
+
*/
|
|
52
|
+
export declare const makeTokenExtractor: Effect.Effect<TokenExtractorService, never, SpiderLogger | EnhancedHttpClient | StateManager>;
|
|
53
|
+
/**
|
|
54
|
+
* TokenExtractor Layer with dependencies
|
|
55
|
+
*/
|
|
56
|
+
export declare const TokenExtractorLive: Layer.Layer<TokenExtractor, never, SpiderLogger | EnhancedHttpClient | StateManager>;
|
|
57
|
+
export {};
|
|
58
|
+
//# sourceMappingURL=TokenExtractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TokenExtractor.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/TokenExtractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,QAAQ,CAAC;AAEhD,OAAO,EACL,YAAY,EACZ,SAAS,EACV,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,KAAK,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAChF,OAAO,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAElE,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC;;OAEG;IACH,yBAAyB,EAAE,CACzB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE9C;;OAEG;IACH,uBAAuB,EAAE,CACvB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEhD;;OAEG;IACH,sBAAsB,EAAE,CACtB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAEhD;;OAEG;IACH,oBAAoB,EAAE,CACpB,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACxC,KACE,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE/C;;OAEG;IACH,mBAAmB,EAAE,CACnB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,YAAY,EACtB,IAAI,EAAE,SAAS,KACZ,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IAE1C;;OAEG;IACH,YAAY,EAAE,CACZ,IAAI,EAAE,SAAS,EACf,UAAU,CAAC,EAAE,MAAM,KAChB,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;CAC1C;;AAED,qBAAa,cAAe,SAAQ,mBAGjC;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,kBAAkB,+FA+b7B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,kBAAkB,sFAG9B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC"}
|