@jambudipa/spider 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +75 -35
  2. package/dist/browser/BrowserManager.d.ts +63 -0
  3. package/dist/browser/BrowserManager.d.ts.map +1 -0
  4. package/dist/browser/PlaywrightAdapter.d.ts +166 -0
  5. package/dist/browser/PlaywrightAdapter.d.ts.map +1 -0
  6. package/dist/examples/01-basic-crawl-working.d.ts +13 -0
  7. package/dist/examples/01-basic-crawl-working.d.ts.map +1 -0
  8. package/dist/examples/02-multiple-urls-working.d.ts +13 -0
  9. package/dist/examples/02-multiple-urls-working.d.ts.map +1 -0
  10. package/dist/examples/03-url-filtering.d.ts +13 -0
  11. package/dist/examples/03-url-filtering.d.ts.map +1 -0
  12. package/dist/examples/04-robots-compliance.d.ts +14 -0
  13. package/dist/examples/04-robots-compliance.d.ts.map +1 -0
  14. package/dist/examples/05-link-extraction-selectors.d.ts +14 -0
  15. package/dist/examples/05-link-extraction-selectors.d.ts.map +1 -0
  16. package/dist/examples/06-custom-middleware.d.ts +18 -0
  17. package/dist/examples/06-custom-middleware.d.ts.map +1 -0
  18. package/dist/examples/07-resumability-demo.d.ts +14 -0
  19. package/dist/examples/07-resumability-demo.d.ts.map +1 -0
  20. package/dist/examples/08-worker-monitoring.d.ts +15 -0
  21. package/dist/examples/08-worker-monitoring.d.ts.map +1 -0
  22. package/dist/examples/09-error-handling-recovery.d.ts +15 -0
  23. package/dist/examples/09-error-handling-recovery.d.ts.map +1 -0
  24. package/dist/index.d.ts +33 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +3596 -1440
  27. package/dist/index.js.map +1 -1
  28. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
  29. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
  30. package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
  31. package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
  32. package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
  33. package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
  34. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
  35. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
  36. package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
  37. package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
  38. package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
  39. package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
  40. package/dist/lib/HttpClient/index.d.ts +8 -0
  41. package/dist/lib/HttpClient/index.d.ts.map +1 -0
  42. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
  43. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
  44. package/dist/lib/LinkExtractor/index.d.ts +37 -0
  45. package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
  46. package/dist/lib/Logging/FetchLogger.d.ts +24 -0
  47. package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
  48. package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
  49. package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
  50. package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
  51. package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
  52. package/dist/lib/Middleware/types.d.ts +99 -0
  53. package/dist/lib/Middleware/types.d.ts.map +1 -0
  54. package/dist/lib/PageData/PageData.d.ts +28 -0
  55. package/dist/lib/PageData/PageData.d.ts.map +1 -0
  56. package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
  57. package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
  58. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
  59. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
  60. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
  61. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
  62. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
  63. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
  64. package/dist/lib/Resumability/index.d.ts +51 -0
  65. package/dist/lib/Resumability/index.d.ts.map +1 -0
  66. package/dist/lib/Resumability/strategies.d.ts +76 -0
  67. package/dist/lib/Resumability/strategies.d.ts.map +1 -0
  68. package/dist/lib/Resumability/types.d.ts +201 -0
  69. package/dist/lib/Resumability/types.d.ts.map +1 -0
  70. package/dist/lib/Robots/Robots.service.d.ts +78 -0
  71. package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
  72. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
  73. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
  74. package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
  75. package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
  76. package/dist/lib/Spider/Spider.service.d.ts +249 -0
  77. package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
  78. package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
  79. package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
  80. package/dist/lib/StateManager/index.d.ts +5 -0
  81. package/dist/lib/StateManager/index.d.ts.map +1 -0
  82. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
  83. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
  84. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +110 -0
  85. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
  86. package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
  87. package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
  88. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
  89. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
  90. package/dist/lib/api-facades.d.ts +313 -0
  91. package/dist/lib/api-facades.d.ts.map +1 -0
  92. package/dist/lib/errors/effect-errors.d.ts +179 -0
  93. package/dist/lib/errors/effect-errors.d.ts.map +1 -0
  94. package/dist/lib/errors.d.ts +172 -0
  95. package/dist/lib/errors.d.ts.map +1 -0
  96. package/dist/lib/utils/FileUtils.d.ts +284 -0
  97. package/dist/lib/utils/FileUtils.d.ts.map +1 -0
  98. package/dist/lib/utils/JsonUtils.d.ts +196 -0
  99. package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
  100. package/dist/lib/utils/RegexUtils.d.ts +257 -0
  101. package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
  102. package/dist/lib/utils/SchemaUtils.d.ts +251 -0
  103. package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
  104. package/dist/lib/utils/UrlUtils.d.ts +223 -0
  105. package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
  106. package/dist/lib/utils/effect-migration.d.ts +31 -0
  107. package/dist/lib/utils/effect-migration.d.ts.map +1 -0
  108. package/dist/lib/utils/index.d.ts +15 -0
  109. package/dist/lib/utils/index.d.ts.map +1 -0
  110. package/dist/lib/utils/url-deduplication.d.ts +108 -0
  111. package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
  112. package/dist/lib/utils/url-deduplication.test.d.ts +5 -0
  113. package/dist/lib/utils/url-deduplication.test.d.ts.map +1 -0
  114. package/dist/test/infrastructure/EffectTestUtils.d.ts +167 -0
  115. package/dist/test/infrastructure/EffectTestUtils.d.ts.map +1 -0
  116. package/package.json +23 -9
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Token Extractor Service
3
+ * Extracts and manages various types of tokens from HTTP responses
4
+ */
5
+ import { Context, Effect, Layer, Option } from 'effect';
6
+ import { StateManager, TokenType } from '../StateManager/StateManager.service.js';
7
+ import { EnhancedHttpClient, type HttpResponse } from './EnhancedHttpClient.js';
8
+ import { SpiderLogger } from '../Logging/SpiderLogger.service.js';
9
+ import { NetworkError, ParseError, TimeoutError } from '../errors/effect-errors.js';
10
+ declare const TokenNotAvailableError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
11
+ readonly _tag: "TokenNotAvailableError";
12
+ } & Readonly<A>;
13
+ export declare class TokenNotAvailableError extends TokenNotAvailableError_base<{
14
+ readonly message: string;
15
+ }> {
16
+ }
17
+ declare const TokenRefreshError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
18
+ readonly _tag: "TokenRefreshError";
19
+ } & Readonly<A>;
20
+ export declare class TokenRefreshError extends TokenRefreshError_base<{
21
+ readonly message: string;
22
+ readonly tokenType: TokenType;
23
+ }> {
24
+ }
25
+ declare const NoRefreshUrlError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
26
+ readonly _tag: "NoRefreshUrlError";
27
+ } & Readonly<A>;
28
+ export declare class NoRefreshUrlError extends NoRefreshUrlError_base<{
29
+ readonly message: string;
30
+ }> {
31
+ }
32
+ export interface TokenInfo {
33
+ type: TokenType;
34
+ value: string;
35
+ source: 'html' | 'header' | 'script' | 'json';
36
+ selector?: string;
37
+ pattern?: string;
38
+ }
39
+ type HttpRequestError = NetworkError | ParseError | TimeoutError;
40
+ type TokenExtractorError = HttpRequestError | Error | TokenNotAvailableError | TokenRefreshError | NoRefreshUrlError;
41
+ export interface TokenExtractorService {
42
+ /**
43
+ * Extract all tokens from an HTTP response
44
+ */
45
+ extractTokensFromResponse: (response: HttpResponse) => Effect.Effect<TokenInfo[]>;
46
+ /**
47
+ * Extract CSRF token from response
48
+ */
49
+ extractCSRFFromResponse: (response: HttpResponse) => Effect.Effect<Option.Option<string>>;
50
+ /**
51
+ * Extract API token from response
52
+ */
53
+ extractAPIFromResponse: (response: HttpResponse) => Effect.Effect<Option.Option<string>>;
54
+ /**
55
+ * Make authenticated request with automatic token injection
56
+ */
57
+ authenticatedRequest: (url: string, options?: {
58
+ requireCSRF?: boolean;
59
+ requireAPI?: boolean;
60
+ customHeaders?: Record<string, string>;
61
+ }) => Effect.Effect<HttpResponse, TokenExtractorError>;
62
+ /**
63
+ * Detect and handle token rotation
64
+ */
65
+ detectTokenRotation: (oldToken: string, response: HttpResponse, type: TokenType) => Effect.Effect<boolean>;
66
+ /**
67
+ * Refresh expired tokens
68
+ */
69
+ refreshToken: (type: TokenType, refreshUrl?: string) => Effect.Effect<string, TokenExtractorError>;
70
+ }
71
+ export type { TokenExtractorError };
72
+ declare const TokenExtractor_base: Context.TagClass<TokenExtractor, "TokenExtractor", TokenExtractorService>;
73
+ export declare class TokenExtractor extends TokenExtractor_base {
74
+ }
75
+ /**
76
+ * Create a TokenExtractor service implementation
77
+ */
78
+ export declare const makeTokenExtractor: Effect.Effect<TokenExtractorService, never, SpiderLogger | EnhancedHttpClient | StateManager>;
79
+ /**
80
+ * TokenExtractor Layer with dependencies
81
+ */
82
+ export declare const TokenExtractorLive: Layer.Layer<TokenExtractor, never, SpiderLogger | EnhancedHttpClient | StateManager>;
83
+ //# sourceMappingURL=TokenExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TokenExtractor.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/TokenExtractor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAW,MAAM,EAAkB,MAAM,QAAQ,CAAC;AAEjF,OAAO,EACL,YAAY,EACZ,SAAS,EACV,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,KAAK,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAChF,OAAO,EAAE,YAAY,EAAE,MAAM,oCAAoC,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;;;;AAGpF,qBAAa,sBAAuB,SAAQ,4BAA2C;IACrF,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;CAAG;;;;AAEL,qBAAa,iBAAkB,SAAQ,uBAAsC;IAC3E,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;CAC/B,CAAC;CAAG;;;;AAEL,qBAAa,iBAAkB,SAAQ,uBAAsC;IAC3E,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;CAAG;AAEL,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAGD,KAAK,gBAAgB,GAAG,YAAY,GAAG,UAAU,GAAG,YAAY,CAAC;AAGjE,KAAK,mBAAmB,GAAG,gBAAgB,GAAG,KAAK,GAAG,sBAAsB,GAAG,iBAAiB,GAAG,iBAAiB,CAAC;AAErH,MAAM,WAAW,qBAAqB;IACpC;;OAEG;IACH,yBAAyB,EAAE,CACzB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IAEhC;;OAEG;IACH,uBAAuB,EAAE,CACvB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;IAE1C;;OAEG;IACH,sBAAsB,EAAE,CACtB,QAAQ,EAAE,YAAY,KACnB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;IAE1C;;OAEG;IACH,oBAAoB,EAAE,CACpB,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE;QACR,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,UAAU,CAAC,EAAE,OAAO,CAAC;QACrB,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACxC,KACE,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,mBAAmB,CAAC,CAAC;IAEtD;;OAEG;IACH,mBAAmB,EAAE,CACnB,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,YAAY,EACtB,IAAI,EAAE,SAAS,KACZ,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE5B;;OAEG;IACH,YAAY,EAAE,CACZ,IAAI,EAAE,SAAS,EACf,UAAU,CAAC,EAAE,MAAM,KAChB,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;CACjD;AAED,YAAY,EAAE,mBAAmB,EAAE,CAAC;;AAEpC,qBAAa,cAAe,SAAQ,mBAGjC;CAAG;AAEN;;GAEG;AACH,eAAO,MAAM,kBAAkB,+FAkd7B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,kBAAkB,sFAG9B,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * HTTP Client module exports
3
+ */
4
+ export * from './CookieManager.js';
5
+ export * from './EnhancedHttpClient.js';
6
+ export * from './SessionStore.js';
7
+ export * from './TokenExtractor.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/HttpClient/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC"}
@@ -0,0 +1,166 @@
1
+ import { Effect } from 'effect';
2
+ /**
3
+ * Configuration for link extraction behavior.
4
+ *
5
+ * Focuses purely on HOW to extract links from HTML documents,
6
+ * not on processing or validating the extracted URLs.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * // Extract from specific CSS selectors
11
+ * const config: LinkExtractorConfig = {
12
+ * restrictCss: ['a.product-link', 'form[action]'],
13
+ * tags: ['a', 'form'],
14
+ * attrs: ['href', 'action']
15
+ * };
16
+ *
17
+ * // Extract from all standard elements
18
+ * const config: LinkExtractorConfig = {
19
+ * tags: ['a', 'area', 'form', 'frame', 'iframe'],
20
+ * attrs: ['href', 'action', 'src']
21
+ * };
22
+ * ```
23
+ *
24
+ * @group LinkExtractor
25
+ * @public
26
+ */
27
+ export interface LinkExtractorConfig {
28
+ /**
29
+ * CSS selectors to restrict extraction to specific elements.
30
+ * If specified, only elements matching these selectors will be processed.
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * restrictCss: [
35
+ * 'a.product-link', // Only product links
36
+ * '.content a', // Links within content area
37
+ * 'form[method="post"]' // POST forms only
38
+ * ]
39
+ * ```
40
+ */
41
+ readonly restrictCss?: string[];
42
+ /**
43
+ * HTML tag names to extract links from.
44
+ * Defaults to common link-containing elements.
45
+ *
46
+ * @example ['a', 'area', 'form', 'frame', 'iframe', 'link']
47
+ */
48
+ readonly tags?: string[];
49
+ /**
50
+ * HTML attributes to extract URLs from.
51
+ * Defaults to common URL-containing attributes.
52
+ *
53
+ * @example ['href', 'action', 'src', 'data-url']
54
+ */
55
+ readonly attrs?: string[];
56
+ /**
57
+ * Whether to extract URLs from form input elements.
58
+ * Looks for hidden inputs with URL-like names/values.
59
+ *
60
+ * @default false
61
+ */
62
+ readonly extractFromInputs?: boolean;
63
+ }
64
+ /**
65
+ * Result of link extraction from an HTML document.
66
+ *
67
+ * Contains the raw extracted URLs without any processing or validation.
68
+ *
69
+ * @group LinkExtractor
70
+ * @public
71
+ */
72
+ export interface LinkExtractionResult {
73
+ /**
74
+ * Raw URLs extracted from the HTML document.
75
+ * These are unprocessed and may be relative URLs, fragments, etc.
76
+ */
77
+ readonly links: string[];
78
+ /**
79
+ * Total number of potential URL-containing elements found.
80
+ * Includes elements that didn't yield valid URLs.
81
+ */
82
+ readonly totalElementsProcessed: number;
83
+ /**
84
+ * Breakdown of extraction by element type.
85
+ * Maps element types to the number of URLs extracted from them.
86
+ */
87
+ readonly extractionBreakdown: Record<string, number>;
88
+ }
89
+ declare const LinkExtractionError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
90
+ readonly _tag: "LinkExtractionError";
91
+ } & Readonly<A>;
92
+ /**
93
+ * Error that can occur during link extraction.
94
+ *
95
+ * @group Errors
96
+ * @public
97
+ */
98
+ export declare class LinkExtractionError extends LinkExtractionError_base<{
99
+ readonly message: string;
100
+ readonly cause?: unknown;
101
+ }> {
102
+ }
103
+ /**
104
+ * Service interface for extracting links from HTML documents.
105
+ *
106
+ * This service focuses purely on extraction - it does not process,
107
+ * validate, or filter the extracted URLs in any way.
108
+ *
109
+ * @group Services
110
+ * @public
111
+ */
112
+ export interface LinkExtractorServiceInterface {
113
+ /**
114
+ * Extracts all URLs from an HTML document based on configuration.
115
+ *
116
+ * This method only extracts URLs from the HTML - it does not:
117
+ * - Validate URLs
118
+ * - Resolve relative URLs to absolute URLs
119
+ * - Apply domain or pattern filtering
120
+ * - Canonicalize URLs
121
+ *
122
+ * URL processing should be handled separately by the consumer.
123
+ *
124
+ * @param html - The HTML content to extract links from
125
+ * @param config - Configuration for extraction behavior
126
+ * @returns Effect containing the extraction result
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const extractor = yield* LinkExtractorService;
131
+ * const result = yield* extractor.extractLinks(htmlContent, {
132
+ * tags: ['a', 'form'],
133
+ * attrs: ['href', 'action'],
134
+ * restrictCss: ['.content a']
135
+ * });
136
+ *
137
+ * console.log(`Found ${result.links.length} raw URLs`);
138
+ * // URLs may be relative, absolute, fragments, etc.
139
+ * ```
140
+ */
141
+ extractLinks: (html: string, config?: LinkExtractorConfig) => Effect.Effect<LinkExtractionResult, LinkExtractionError>;
142
+ }
143
+ declare const LinkExtractorService_base: Effect.Service.Class<LinkExtractorService, "@jambudipa.io/LinkExtractorService", {
144
+ readonly effect: Effect.Effect<{
145
+ extractLinks: (html: string, config?: LinkExtractorConfig) => Effect.Effect<LinkExtractionResult, LinkExtractionError, never>;
146
+ }, never, never>;
147
+ }>;
148
+ /**
149
+ * Implementation of the LinkExtractorService.
150
+ *
151
+ * Provides pure HTML link extraction without any URL processing.
152
+ *
153
+ * @group Services
154
+ * @public
155
+ */
156
+ export declare class LinkExtractorService extends LinkExtractorService_base {
157
+ }
158
+ /**
159
+ * Default layer for LinkExtractorService.
160
+ *
161
+ * @group Layers
162
+ * @public
163
+ */
164
+ export declare const LinkExtractorServiceLayer: import("effect/Layer").Layer<LinkExtractorService, never, never>;
165
+ export {};
166
+ //# sourceMappingURL=LinkExtractor.service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LinkExtractor.service.d.ts","sourceRoot":"","sources":["../../../src/lib/LinkExtractor/LinkExtractor.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAe,MAAM,EAAU,MAAM,QAAQ,CAAC;AAIrD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;;;;;;;;;;OAYG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IAEhC;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAEzB;;;;;OAKG;IACH,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IAE1B;;;;;OAKG;IACH,QAAQ,CAAC,iBAAiB,CAAC,EAAE,OAAO,CAAC;CACtC;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,oBAAoB;IACnC;;;OAGG;IACH,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC;IAEzB;;;OAGG;IACH,QAAQ,CAAC,sBAAsB,EAAE,MAAM,CAAC;IAExC;;;OAGG;IACH,QAAQ,CAAC,mBAAmB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtD;;;;AAED;;;;;GAKG;AACH,qBAAa,mBAAoB,SAAQ,yBAEvC;IACA,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;CAAG;AAEL;;;;;;;;GAQG;AACH,MAAM,WAAW,6BAA6B;IAC5C;;;;;;;;;;;;;;;;;;;;;;;;;;;OA2BG;IACH,YAAY,EAAE,CACZ,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,mBAAmB,KACzB,MAAM,CAAC,MAAM,CAAC,oBAAoB,EAAE,mBAAmB,CAAC,CAAC;CAC/D;;;6BAyB0B,MAAM,WAAW,mBAAmB;;;AAZ/D;;;;;;;GAOG;AACH,qBAAa,oBAAqB,SAAQ,yBAoBzC;CAAG;AAEJ;;;;;GAKG;AACH,eAAO,MAAM,yBAAyB,kEAA+B,CAAC"}
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Advanced link extraction functionality for the Spider framework.
3
+ *
4
+ * This module provides Scrapy-equivalent link extraction capabilities with support for:
5
+ * - CSS selector-based extraction
6
+ * - Pattern-based filtering (allow/deny regex patterns)
7
+ * - Domain-based filtering
8
+ * - URL canonicalization
9
+ * - Duplicate removal
10
+ * - Comprehensive extraction statistics
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * import { LinkExtractorService, type LinkExtractorConfig } from '@jambudipa/spider/LinkExtractor';
15
+ *
16
+ * const program = Effect.gen(function* () {
17
+ * const extractor = yield* LinkExtractorService;
18
+ *
19
+ * const result = yield* extractor.extractLinks(
20
+ * htmlContent,
21
+ * 'https://example.com',
22
+ * {
23
+ * allowPatterns: [/\/articles\/\d+/],
24
+ * restrictCss: ['.content a'],
25
+ * canonicalize: true
26
+ * }
27
+ * );
28
+ *
29
+ * console.log(`Extracted ${result.links.length} links`);
30
+ * });
31
+ * ```
32
+ *
33
+ * @group LinkExtractor
34
+ * @public
35
+ */
36
+ export { LinkExtractorService, LinkExtractorServiceLayer, type LinkExtractorConfig, type LinkExtractionResult, type LinkExtractorServiceInterface, LinkExtractionError, } from './LinkExtractor.service.js';
37
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/LinkExtractor/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,KAAK,mBAAmB,EACxB,KAAK,oBAAoB,EACzB,KAAK,6BAA6B,EAClC,mBAAmB,GACpB,MAAM,4BAA4B,CAAC"}
@@ -0,0 +1,24 @@
1
+ import { Context, Effect } from 'effect';
2
+ import { SpiderLogger } from './SpiderLogger.service.js';
3
+ declare const FetchError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
4
+ readonly _tag: "FetchError";
5
+ } & Readonly<A>;
6
+ /**
7
+ * Tagged error for fetch operations
8
+ */
9
+ export declare class FetchError extends FetchError_base<{
10
+ readonly url: string;
11
+ readonly reason: 'timeout' | 'network' | 'unknown';
12
+ readonly durationMs: number;
13
+ readonly cause?: unknown;
14
+ }> {
15
+ get message(): string;
16
+ }
17
+ /**
18
+ * Wrapper for fetch that adds comprehensive logging
19
+ */
20
+ export declare const makeLoggingFetch: Effect.Effect<(url: string, options?: RequestInit) => Effect.Effect<Response, FetchError>, never, SpiderLogger>;
21
+ export type LoggingFetchFn = (url: string, options?: RequestInit) => Effect.Effect<Response, FetchError>;
22
+ export declare const LoggingFetch: Context.Tag<LoggingFetchFn, LoggingFetchFn>;
23
+ export {};
24
+ //# sourceMappingURL=FetchLogger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FetchLogger.d.ts","sourceRoot":"","sources":["../../../src/lib/Logging/FetchLogger.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAA4B,MAAM,EAAU,MAAM,QAAQ,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;;;;AAEzD;;GAEG;AACH,qBAAa,UAAW,SAAQ,gBAA+B;IAC7D,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,MAAM,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;IACnD,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC;CAC1B,CAAC;IACA,IAAI,OAAO,IAAI,MAAM,CAEpB;CACF;AAED;;GAEG;AACH,eAAO,MAAM,gBAAgB,sBAGd,MAAM,YAAY,WAAW,KAAG,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,sBA+HhF,CAAC;AAEH,MAAM,MAAM,cAAc,GAAG,CAC3B,GAAG,EAAE,MAAM,EACX,OAAO,CAAC,EAAE,WAAW,KAClB,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;AAEzC,eAAO,MAAM,YAAY,6CAAqD,CAAC"}
@@ -0,0 +1,37 @@
1
+ import { Context, Effect, Layer } from 'effect';
2
+ export interface SpiderLogEvent {
3
+ timestamp: string;
4
+ type: 'domain_start' | 'domain_complete' | 'domain_error' | 'page_scraped' | 'queue_status' | 'worker_status' | 'rate_limit' | 'spider_lifecycle' | 'worker_lifecycle' | 'worker_state' | 'completion_monitor' | 'edge_case' | 'crawl_delay_capped';
5
+ domain?: string;
6
+ url?: string;
7
+ workerId?: string;
8
+ fiberId?: string;
9
+ message: string;
10
+ details?: Record<string, unknown>;
11
+ }
12
+ export interface SpiderLoggerService {
13
+ readonly logEvent: (event: Omit<SpiderLogEvent, 'timestamp'>) => Effect.Effect<void>;
14
+ readonly logDomainStart: (domain: string, startUrl: string) => Effect.Effect<void>;
15
+ readonly logDomainComplete: (domain: string, pagesScraped: number, reason: 'max_pages' | 'queue_empty' | 'error') => Effect.Effect<void>;
16
+ readonly logPageScraped: (url: string, domain: string, pageNumber: number) => Effect.Effect<void>;
17
+ readonly logQueueStatus: (domain: string, queueSize: number, activeWorkers: number) => Effect.Effect<void>;
18
+ readonly logRateLimit: (domain: string, requestsInWindow: number) => Effect.Effect<void>;
19
+ readonly logSpiderLifecycle: (event: 'start' | 'complete' | 'error', details?: Record<string, unknown>) => Effect.Effect<void>;
20
+ readonly logWorkerLifecycle: (workerId: string, domain: string, event: 'created' | 'entering_loop' | 'exiting_loop', reason?: string, details?: Record<string, unknown>) => Effect.Effect<void>;
21
+ readonly logWorkerState: (workerId: string, domain: string, event: 'taking_task' | 'marked_active' | 'marked_idle' | 'task_completed', details?: Record<string, unknown>) => Effect.Effect<void>;
22
+ readonly logCompletionMonitor: (domain: string, checkCount: number, queueSize: number, activeWorkers: number, stableCount: number, maxPagesReached: boolean, decision: string) => Effect.Effect<void>;
23
+ readonly logEdgeCase: (domain: string, caseType: string, details?: Record<string, unknown>) => Effect.Effect<void>;
24
+ readonly logDomainStatus: (domain: string, status: {
25
+ pagesScraped: number;
26
+ queueSize: number;
27
+ activeWorkers: number;
28
+ maxWorkers: number;
29
+ }) => Effect.Effect<void>;
30
+ }
31
+ declare const SpiderLogger_base: Context.TagClass<SpiderLogger, "SpiderLogger", SpiderLoggerService>;
32
+ export declare class SpiderLogger extends SpiderLogger_base {
33
+ }
34
+ export declare const makeSpiderLogger: (logDir?: string) => SpiderLoggerService;
35
+ export declare const SpiderLoggerLive: Layer.Layer<SpiderLogger, never, never>;
36
+ export {};
37
+ //# sourceMappingURL=SpiderLogger.service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SpiderLogger.service.d.ts","sourceRoot":"","sources":["../../../src/lib/Logging/SpiderLogger.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAW,OAAO,EAAY,MAAM,EAAE,KAAK,EAAU,MAAM,QAAQ,CAAC;AAI3E,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EACA,cAAc,GACd,iBAAiB,GACjB,cAAc,GACd,cAAc,GACd,cAAc,GACd,eAAe,GACf,YAAY,GACZ,kBAAkB,GAClB,kBAAkB,GAClB,cAAc,GACd,oBAAoB,GACpB,WAAW,GACX,oBAAoB,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,QAAQ,EAAE,CACjB,KAAK,EAAE,IAAI,CAAC,cAAc,EAAE,WAAW,CAAC,KACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,cAAc,EAAE,CACvB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,KACb,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,iBAAiB,EAAE,CAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,WAAW,GAAG,aAAa,GAAG,OAAO,KAC1C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,cAAc,EAAE,CACvB,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,KACf,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,cAAc,EAAE,CACvB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,MAAM,KAClB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,YAAY,EAAE,CACrB,MAAM,EAAE,MAAM,EACd,gBAAgB,EAAE,MAAM,KACrB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,kBAAkB,EAAE,CAC3B,KAAK,EAAE,OAAO,GAAG,UAAU,GAAG,OAAO,EACrC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAGzB,QAAQ,CAAC,kBAAkB,EAAE,CAC3B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,SAAS,GAAG,eAAe,GAAG,cAAc,EACnD,MAAM,CAAC,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,cAAc,EAAE,CACvB,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,aAAa,GAAG,eAAe,GAAG,aAAa,GAAG,gBAAgB,EACzE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,oBAAoB,EAAE,CAC7B,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,MAAM,EACjB,aAAa,EAAE,MAAM,EACrB,WAAW,EAAE,MAAM,EACnB,eAAe,EAAE,OAAO,EACxB,QAAQ,EAAE,MAAM,KACb,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,WAAW,EAAE,CACpB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACzB,QAAQ,CAAC,eAAe,EAAE,CACxB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE;QACN,YAAY,EAAE,MAAM,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,EAAE,MAAM,CAAC;QACtB,UAAU,EAAE,MAAM,CAAC;KACpB,KACE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;CAC1B;;AAED,qBAAa,YAAa,SAAQ,iBAG/B;CAAG;AAiCN,eAAO,MAAM,gBAAgB,GAAI,eAAwB,KAAG,mBA2U3D,CAAC;AAEF,eAAO,MAAM,gBAAgB,yCAAkD,CAAC"}
@@ -0,0 +1,239 @@
1
+ import { Effect, Option } from 'effect';
2
+ import { MiddlewareError } from '../errors.js';
3
+ import { SpiderRequest, SpiderResponse } from './types.js';
4
+ export { SpiderRequest, SpiderResponse } from './types.js';
5
+ /**
6
+ * Interface for implementing custom middleware components.
7
+ *
8
+ * Middleware can intercept and modify requests before they're sent,
9
+ * responses after they're received, and handle exceptions that occur
10
+ * during processing. All methods are optional.
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const loggingMiddleware: SpiderMiddleware = {
15
+ * processRequest: (request) => Effect.gen(function* () {
16
+ * console.log(`Requesting: ${request.task.url}`);
17
+ * return request;
18
+ * }),
19
+ *
20
+ * processResponse: (response, request) => Effect.gen(function* () {
21
+ * console.log(`Response: ${response.statusCode} for ${request.task.url}`);
22
+ * return response;
23
+ * }),
24
+ *
25
+ * processException: (error, request) => Effect.gen(function* () {
26
+ * console.error(`Error processing ${request.task.url}: ${error.message}`);
27
+ * return null; // Let the error propagate
28
+ * })
29
+ * };
30
+ * ```
31
+ *
32
+ * @group Interfaces
33
+ * @public
34
+ */
35
+ export interface SpiderMiddleware {
36
+ /**
37
+ * Process a request before it's sent to the target server.
38
+ * Can modify headers, metadata, or reject the request entirely.
39
+ */
40
+ processRequest?: (_request: SpiderRequest) => Effect.Effect<SpiderRequest, MiddlewareError>;
41
+ /**
42
+ * Process a response after it's received from the target server.
43
+ * Can modify the response data or metadata.
44
+ */
45
+ processResponse?: (_response: SpiderResponse, _request: SpiderRequest) => Effect.Effect<SpiderResponse, MiddlewareError>;
46
+ /**
47
+ * Handle exceptions that occur during request processing.
48
+ * Can attempt recovery by returning a SpiderResponse, or return Option.none() to propagate the error.
49
+ */
50
+ processException?: (_error: Error, _request: SpiderRequest) => Effect.Effect<Option.Option<SpiderResponse>, MiddlewareError>;
51
+ }
52
+ declare const MiddlewareManager_base: Effect.Service.Class<MiddlewareManager, "@jambudipa.io/MiddlewareManager", {
53
+ readonly effect: Effect.Effect<{
54
+ /**
55
+ * Processes a request through the middleware pipeline.
56
+ *
57
+ * Middleware are executed in order from first to last, with each middleware
58
+ * receiving the output of the previous middleware as input.
59
+ *
60
+ * @param request - The initial request to process
61
+ * @param middlewares - Array of middleware to apply
62
+ * @returns Effect containing the processed request
63
+ */
64
+ processRequest: (request: SpiderRequest, middlewares: SpiderMiddleware[]) => Effect.Effect<SpiderRequest, MiddlewareError, never>;
65
+ /**
66
+ * Processes a response through the middleware pipeline in reverse order.
67
+ *
68
+ * Middleware are executed in reverse order (last to first) to provide
69
+ * proper nesting of response processing.
70
+ *
71
+ * @param response - The response to process
72
+ * @param request - The original request (for context)
73
+ * @param middlewares - Array of middleware to apply
74
+ * @returns Effect containing the processed response
75
+ */
76
+ processResponse: (response: SpiderResponse, request: SpiderRequest, middlewares: SpiderMiddleware[]) => Effect.Effect<SpiderResponse, MiddlewareError, never>;
77
+ /**
78
+ * Processes an exception through the middleware pipeline in reverse order.
79
+ *
80
+ * Middleware are given a chance to handle or recover from exceptions.
81
+ * If a middleware returns Option.some(SpiderResponse), it indicates successful recovery.
82
+ * If it returns Option.none(), the exception continues to propagate.
83
+ *
84
+ * @param error - The error that occurred
85
+ * @param request - The request that caused the error
86
+ * @param middlewares - Array of middleware to apply
87
+ * @returns Effect containing a recovered response wrapped in Option
88
+ */
89
+ processException: (error: Error, request: SpiderRequest, middlewares: SpiderMiddleware[]) => Effect.Effect<Option.Option<SpiderResponse>, MiddlewareError, never>;
90
+ }, never, never>;
91
+ }>;
92
+ /**
93
+ * Manages the middleware pipeline for request and response processing.
94
+ *
95
+ * The MiddlewareManager orchestrates the execution of middleware in the correct order:
96
+ * - Requests are processed forward through the middleware array
97
+ * - Responses are processed in reverse order (last middleware first)
98
+ * - Exceptions are processed in reverse order for proper error handling
99
+ *
100
+ * @example
101
+ * ```typescript
102
+ * const program = Effect.gen(function* () {
103
+ * const manager = yield* MiddlewareManager;
104
+ *
105
+ * const middleware = [
106
+ * rateLimitMiddleware,
107
+ * loggingMiddleware,
108
+ * userAgentMiddleware
109
+ * ];
110
+ *
111
+ * const request: SpiderRequest = {
112
+ * task: { url: 'https://example.com', depth: 0 },
113
+ * headers: {}
114
+ * };
115
+ *
116
+ * const processedRequest = yield* manager.processRequest(request, middleware);
117
+ * console.log('Request processed through middleware pipeline');
118
+ * });
119
+ * ```
120
+ *
121
+ * @group Services
122
+ * @public
123
+ */
124
+ export declare class MiddlewareManager extends MiddlewareManager_base {
125
+ }
126
+ declare const RateLimitMiddleware_base: Effect.Service.Class<RateLimitMiddleware, "@jambudipa.io/RateLimitMiddleware", {
127
+ readonly effect: Effect.Effect<{
128
+ create: (config: {
129
+ maxConcurrentRequests: number;
130
+ maxRequestsPerSecondPerDomain: number;
131
+ requestDelayMs?: number;
132
+ }) => SpiderMiddleware;
133
+ }, never, never>;
134
+ }>;
135
+ /**
136
+ * Provides rate limiting functionality for respectful crawling.
137
+ *
138
+ * Controls request frequency at both global and per-domain levels to prevent
139
+ * overwhelming target servers and avoid being blocked.
140
+ *
141
+ * @example
142
+ * ```typescript
143
+ * const rateLimiter = yield* RateLimitMiddleware;
144
+ * const middleware = rateLimiter.create({
145
+ * maxConcurrentRequests: 5,
146
+ * maxRequestsPerSecondPerDomain: 2,
147
+ * requestDelayMs: 250
148
+ * });
149
+ * ```
150
+ *
151
+ * @group Middleware
152
+ * @public
153
+ */
154
+ export declare class RateLimitMiddleware extends RateLimitMiddleware_base {
155
+ }
156
+ declare const LoggingMiddleware_base: Effect.Service.Class<LoggingMiddleware, "@jambudipa.io/LoggingMiddleware", {
157
+ readonly effect: Effect.Effect<{
158
+ create: (config?: {
159
+ logRequests?: boolean;
160
+ logResponses?: boolean;
161
+ logErrors?: boolean;
162
+ logLevel?: "debug" | "info" | "warn" | "error";
163
+ }) => SpiderMiddleware;
164
+ }, never, never>;
165
+ }>;
166
+ /**
167
+ * Provides logging functionality using Effect.Logger.
168
+ *
169
+ * Logs requests, responses, and errors at configurable levels for debugging
170
+ * and monitoring purposes.
171
+ *
172
+ * @example
173
+ * ```typescript
174
+ * const logger = yield* LoggingMiddleware;
175
+ * const middleware = logger.create({
176
+ * logRequests: true,
177
+ * logResponses: true,
178
+ * logLevel: 'info'
179
+ * });
180
+ * ```
181
+ *
182
+ * @group Middleware
183
+ * @public
184
+ */
185
+ export declare class LoggingMiddleware extends LoggingMiddleware_base {
186
+ }
187
+ declare const UserAgentMiddleware_base: Effect.Service.Class<UserAgentMiddleware, "@jambudipa.io/UserAgentMiddleware", {
188
+ readonly effect: Effect.Effect<{
189
+ create: (userAgent: string) => SpiderMiddleware;
190
+ }, never, never>;
191
+ }>;
192
+ /**
193
+ * Adds User-Agent headers to requests.
194
+ *
195
+ * Sets a consistent User-Agent string for all requests to identify
196
+ * your crawler to web servers.
197
+ *
198
+ * @example
199
+ * ```typescript
200
+ * const userAgent = yield* UserAgentMiddleware;
201
+ * const middleware = userAgent.create('MyBot/1.0 (+https://example.com)');
202
+ * ```
203
+ *
204
+ * @group Middleware
205
+ * @public
206
+ */
207
+ export declare class UserAgentMiddleware extends UserAgentMiddleware_base {
208
+ }
209
+ declare const StatsMiddleware_base: Effect.Service.Class<StatsMiddleware, "@jambudipa.io/StatsMiddleware", {
210
+ readonly effect: Effect.Effect<{
211
+ create: () => {
212
+ middleware: SpiderMiddleware;
213
+ getStats: () => Effect.Effect<Record<string, number>>;
214
+ };
215
+ }, never, never>;
216
+ }>;
217
+ /**
218
+ * Collects statistics about crawling activity.
219
+ *
220
+ * Tracks various metrics including requests processed, response codes,
221
+ * bytes downloaded, and processing times for monitoring and optimization.
222
+ *
223
+ * @example
224
+ * ```typescript
225
+ * const statsService = yield* StatsMiddleware;
226
+ * const { middleware, getStats } = statsService.create();
227
+ *
228
+ * // Use middleware in your pipeline
229
+ * // Later get statistics
230
+ * const stats = yield* getStats();
231
+ * console.log(`Processed ${stats.requests_processed} requests`);
232
+ * ```
233
+ *
234
+ * @group Middleware
235
+ * @public
236
+ */
237
+ export declare class StatsMiddleware extends StatsMiddleware_base {
238
+ }
239
+ //# sourceMappingURL=SpiderMiddleware.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SpiderMiddleware.d.ts","sourceRoot":"","sources":["../../../src/lib/Middleware/SpiderMiddleware.ts"],"names":[],"mappings":"AAAA,OAAO,EAAY,MAAM,EAAkB,MAAM,EAAE,MAAM,QAAQ,CAAC;AAClE,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE3D,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE3D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;OAGG;IACH,cAAc,CAAC,EAAE,CACf,QAAQ,EAAE,aAAa,KACpB,MAAM,CAAC,MAAM,CAAC,aAAa,EAAE,eAAe,CAAC,CAAC;IAEnD;;;OAGG;IACH,eAAe,CAAC,EAAE,CAChB,SAAS,EAAE,cAAc,EACzB,QAAQ,EAAE,aAAa,KACpB,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;IAEpD;;;OAGG;IACH,gBAAgB,CAAC,EAAE,CACjB,MAAM,EAAE,KAAK,EACb,QAAQ,EAAE,aAAa,KACpB,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,eAAe,CAAC,CAAC;CACpE;;;QAsCK;;;;;;;;;WASG;kCAEQ,aAAa,eACT,gBAAgB,EAAE;QAQjC;;;;;;;;;;WAUG;oCAES,cAAc,WACf,aAAa,eACT,gBAAgB,EAAE;QAWjC;;;;;;;;;;;WAWG;kCAEM,KAAK,WACH,aAAa,eACT,gBAAgB,EAAE;;;AAhGvC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,qBAAa,iBAAkB,SAAQ,sBA4EtC;CAAG;;;yBA8BqB;YACf,qBAAqB,EAAE,MAAM,CAAC;YAC9B,6BAA6B,EAAE,MAAM,CAAC;YACtC,cAAc,CAAC,EAAE,MAAM,CAAC;SACzB,KAAG,gBAAgB;;;AAhC5B;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,mBAAoB,SAAQ,wBAqExC;CAAG;;;0BA0BY;YACN,WAAW,CAAC,EAAE,OAAO,CAAC;YACtB,YAAY,CAAC,EAAE,OAAO,CAAC;YACvB,SAAS,CAAC,EAAE,OAAO,CAAC;YACpB,QAAQ,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;SAChD,KACA,gBAAgB;;;AA9BzB;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,iBAAkB,SAAQ,sBA4EtC;CAAG;;;4BAqBsB,MAAM,KAAG,gBAAgB;;;AAnBnD;;;;;;;;;;;;;;GAcG;AACH,qBAAa,mBAAoB,SAAQ,wBAYxC;CAAG;;;sBA6BgB;YACV,UAAU,EAAE,gBAAgB,CAAC;YAC7B,QAAQ,EAAE,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;SACvD;;;AA9BT;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,eAAgB,SAAQ,oBAqEpC;CAAG"}