@jambudipa/spider 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +75 -35
  2. package/dist/browser/BrowserManager.d.ts +63 -0
  3. package/dist/browser/BrowserManager.d.ts.map +1 -0
  4. package/dist/browser/PlaywrightAdapter.d.ts +166 -0
  5. package/dist/browser/PlaywrightAdapter.d.ts.map +1 -0
  6. package/dist/examples/01-basic-crawl-working.d.ts +13 -0
  7. package/dist/examples/01-basic-crawl-working.d.ts.map +1 -0
  8. package/dist/examples/02-multiple-urls-working.d.ts +13 -0
  9. package/dist/examples/02-multiple-urls-working.d.ts.map +1 -0
  10. package/dist/examples/03-url-filtering.d.ts +13 -0
  11. package/dist/examples/03-url-filtering.d.ts.map +1 -0
  12. package/dist/examples/04-robots-compliance.d.ts +14 -0
  13. package/dist/examples/04-robots-compliance.d.ts.map +1 -0
  14. package/dist/examples/05-link-extraction-selectors.d.ts +14 -0
  15. package/dist/examples/05-link-extraction-selectors.d.ts.map +1 -0
  16. package/dist/examples/06-custom-middleware.d.ts +18 -0
  17. package/dist/examples/06-custom-middleware.d.ts.map +1 -0
  18. package/dist/examples/07-resumability-demo.d.ts +14 -0
  19. package/dist/examples/07-resumability-demo.d.ts.map +1 -0
  20. package/dist/examples/08-worker-monitoring.d.ts +15 -0
  21. package/dist/examples/08-worker-monitoring.d.ts.map +1 -0
  22. package/dist/examples/09-error-handling-recovery.d.ts +15 -0
  23. package/dist/examples/09-error-handling-recovery.d.ts.map +1 -0
  24. package/dist/index.d.ts +33 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +3596 -1440
  27. package/dist/index.js.map +1 -1
  28. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
  29. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
  30. package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
  31. package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
  32. package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
  33. package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
  34. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
  35. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
  36. package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
  37. package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
  38. package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
  39. package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
  40. package/dist/lib/HttpClient/index.d.ts +8 -0
  41. package/dist/lib/HttpClient/index.d.ts.map +1 -0
  42. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
  43. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
  44. package/dist/lib/LinkExtractor/index.d.ts +37 -0
  45. package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
  46. package/dist/lib/Logging/FetchLogger.d.ts +24 -0
  47. package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
  48. package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
  49. package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
  50. package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
  51. package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
  52. package/dist/lib/Middleware/types.d.ts +99 -0
  53. package/dist/lib/Middleware/types.d.ts.map +1 -0
  54. package/dist/lib/PageData/PageData.d.ts +28 -0
  55. package/dist/lib/PageData/PageData.d.ts.map +1 -0
  56. package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
  57. package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
  58. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
  59. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
  60. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
  61. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
  62. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
  63. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
  64. package/dist/lib/Resumability/index.d.ts +51 -0
  65. package/dist/lib/Resumability/index.d.ts.map +1 -0
  66. package/dist/lib/Resumability/strategies.d.ts +76 -0
  67. package/dist/lib/Resumability/strategies.d.ts.map +1 -0
  68. package/dist/lib/Resumability/types.d.ts +201 -0
  69. package/dist/lib/Resumability/types.d.ts.map +1 -0
  70. package/dist/lib/Robots/Robots.service.d.ts +78 -0
  71. package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
  72. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
  73. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
  74. package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
  75. package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
  76. package/dist/lib/Spider/Spider.service.d.ts +249 -0
  77. package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
  78. package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
  79. package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
  80. package/dist/lib/StateManager/index.d.ts +5 -0
  81. package/dist/lib/StateManager/index.d.ts.map +1 -0
  82. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
  83. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
  84. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +110 -0
  85. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
  86. package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
  87. package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
  88. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
  89. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
  90. package/dist/lib/api-facades.d.ts +313 -0
  91. package/dist/lib/api-facades.d.ts.map +1 -0
  92. package/dist/lib/errors/effect-errors.d.ts +179 -0
  93. package/dist/lib/errors/effect-errors.d.ts.map +1 -0
  94. package/dist/lib/errors.d.ts +172 -0
  95. package/dist/lib/errors.d.ts.map +1 -0
  96. package/dist/lib/utils/FileUtils.d.ts +284 -0
  97. package/dist/lib/utils/FileUtils.d.ts.map +1 -0
  98. package/dist/lib/utils/JsonUtils.d.ts +196 -0
  99. package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
  100. package/dist/lib/utils/RegexUtils.d.ts +257 -0
  101. package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
  102. package/dist/lib/utils/SchemaUtils.d.ts +251 -0
  103. package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
  104. package/dist/lib/utils/UrlUtils.d.ts +223 -0
  105. package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
  106. package/dist/lib/utils/effect-migration.d.ts +31 -0
  107. package/dist/lib/utils/effect-migration.d.ts.map +1 -0
  108. package/dist/lib/utils/index.d.ts +15 -0
  109. package/dist/lib/utils/index.d.ts.map +1 -0
  110. package/dist/lib/utils/url-deduplication.d.ts +108 -0
  111. package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
  112. package/dist/lib/utils/url-deduplication.test.d.ts +5 -0
  113. package/dist/lib/utils/url-deduplication.test.d.ts.map +1 -0
  114. package/dist/test/infrastructure/EffectTestUtils.d.ts +167 -0
  115. package/dist/test/infrastructure/EffectTestUtils.d.ts.map +1 -0
  116. package/package.json +23 -9
package/README.md CHANGED
@@ -1,6 +1,14 @@
1
1
  # @jambudipa/spider
2
2
 
3
- A powerful, Effect.js-based web crawling framework for modern TypeScript applications. Built for type safety, composability, and enterprise-scale crawling operations.
3
+ [![CI Status](https://github.com/jambudipa/spider/workflows/Spider%20Scenario%20Tests/badge.svg)](https://github.com/jambudipa/spider/actions)
4
+ [![Coverage](https://codecov.io/gh/jambudipa/spider/branch/main/graph/badge.svg)](https://codecov.io/gh/jambudipa/spider)
5
+ [![npm version](https://badge.fury.io/js/@jambudipa%2Fspider.svg)](https://badge.fury.io/js/@jambudipa%2Fspider)
6
+ [![Node.js Version](https://img.shields.io/node/v/@jambudipa/spider.svg)](https://nodejs.org/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ A powerful, Effect-based web crawling framework for modern TypeScript applications. Built for type safety, composability, and enterprise-scale crawling operations.
10
+
11
+ > **⚠️ Pre-Release API**: Spider is currently in pre-release development (v0.x.x). The API may change frequently as we refine the library towards a stable v1.0.0 release. Consider this when using Spider in production environments and expect potential breaking changes in minor version updates.
4
12
 
5
13
  ## 🏆 **Battle-Tested Against Real-World Scenarios**
6
14
 
@@ -25,11 +33,20 @@ A powerful, Effect.js-based web crawling framework for modern TypeScript applica
25
33
  | **Invalid Referer Blocking** | Header-based access control | Anti-Block |
26
34
  | **Persistent Cookie Blocking** | Long-term blocking mechanisms | Anti-Block |
27
35
 
28
- 🎯 **[View Live Test Results](https://github.com/jambudipa/spider/actions)** | 📊 **100% Test Pass Rate** | 🚀 **Production Ready**
36
+ 🎯 **[View Live Test Results](https://github.com/jambudipa/spider/actions/workflows/ci.yml)** | 📊 **All Scenario Tests Passing** | 🚀 **Production Ready**
37
+
38
+ > **Live Testing**: Our CI pipeline runs all 16 web scraping scenarios against real websites daily, ensuring Spider remains robust against changing web technologies.
39
+
40
+ ### 🔍 **Current Status** (Updated: Jan 2026)
41
+ - ✅ **Core Functionality**: All web scraping scenarios working
42
+ - ✅ **Type Safety**: Full TypeScript compilation without errors
43
+ - ✅ **Build System**: Package builds successfully for distribution
44
+ - ✅ **Test Suite**: 243 tests passing against live websites (25 test files)
45
+ - ✅ **Code Quality**: Clean - only 3 linting warnings (skipped test suites)
29
46
 
30
47
  ## ✨ Key Features
31
48
 
32
- - **🔥 Effect.js Foundation**: Type-safe, functional composition with robust error handling
49
+ - **🔥 Effect Foundation**: Type-safe, functional composition with robust error handling
33
50
  - **⚡ High Performance**: Concurrent crawling with intelligent worker pool management
34
51
  - **🤖 Robots.txt Compliant**: Automatic robots.txt parsing and compliance checking
35
52
  - **🔄 Resumable Crawls**: State persistence and crash recovery capabilities
@@ -71,22 +88,30 @@ Effect.runPromise(program.pipe(
71
88
  ))
72
89
  ```
73
90
 
74
- ## 🎯 What's Next?
91
+ ## 📚 Documentation
92
+
93
+ **Comprehensive documentation is now available** following the [Diátaxis framework](https://diataxis.fr/) for better learning and reference:
94
+
95
+ ### 🎓 New to Spider?
96
+ Start with our **[Tutorial](./docs/tutorial/getting-started.md)** - a hands-on guide that takes you from installation to building advanced scrapers.
97
+
98
+ ### 📋 Need to solve a specific problem?
99
+ Check our **[How-to Guides](./docs/how-to/)** for targeted solutions:
100
+ - **[Authentication](./docs/how-to/authentication.md)** - Handle logins, sessions, and auth flows
101
+ - **[Data Extraction](./docs/how-to/data-extraction.md)** - Extract structured data from HTML
102
+ - **[Resumable Operations](./docs/how-to/resumable-operations.md)** - Build fault-tolerant crawlers
75
103
 
76
- ### 🆕 New to Spider?
77
- - **[Getting Started Guide](./docs/guides/getting-started.md)** - Complete setup and first crawl
78
- - **[Examples](./docs/examples/)** - Working examples to get you started
79
- - **[Basic Configuration](./docs/guides/configuration.md)** - Configuration options
104
+ ### 📚 Need technical details?
105
+ See our **[Reference Documentation](./docs/reference/)**:
106
+ - **[API Reference](./docs/reference/api-reference.md)** - Complete API documentation
107
+ - **[Configuration](./docs/reference/configuration.md)** - All configuration options
80
108
 
81
- ### 🔄 Advanced Usage
82
- - **[Browser Automation](./docs/guides/browser-automation.md)** - Handle dynamic content
83
- - **[Anti-Bot Protection](./docs/guides/anti-bot.md)** - Bypass blocking mechanisms
84
- - **[Security Handling](./docs/guides/security.md)** - Authentication and sessions
109
+ ### 🧠 Want to understand the design?
110
+ Read our **[Explanations](./docs/explanation/)**:
111
+ - **[Architecture](./docs/explanation/architecture.md)** - System design and philosophy
112
+ - **[Web Scraping Concepts](./docs/explanation/web-scraping-concepts.md)** - Core principles
85
113
 
86
- ### 🏭 Building Production Systems?
87
- - **[Performance Guide](./docs/guides/performance.md)** - Scale your crawling operations
88
- - **[API Reference](./docs/api/)** - Complete technical documentation
89
- - **[Enterprise Patterns](./docs/examples/enterprise-patterns.md)** - Production-ready patterns
114
+ **📖 [Browse All Documentation →](./docs/README.md)**
90
115
 
91
116
  ## 🛠️ Quick Configuration
92
117
 
@@ -382,46 +407,61 @@ npm install
382
407
  # Build the package
383
408
  npm run build
384
409
 
385
- # Run tests
410
+ # Run tests (all scenarios)
386
411
  npm test
387
412
 
388
413
  # Run tests with coverage
389
414
  npm run test:coverage
390
415
 
391
- # Type checking
416
+ # Type checking (must pass)
392
417
  npm run typecheck
393
418
 
394
- # Linting
419
+ # Validate CI setup locally
420
+ npm run ci:validate
421
+
422
+ # Code quality
423
+ npm run lint # Shows 3 warnings (skipped tests)
424
+ npm run format # Formats code consistently
425
+ ```
426
+
427
+ ### 🛠️ Contributing & Code Quality
428
+
429
+ **Current State**: The codebase is fully functional with comprehensive test coverage and clean linting.
430
+
431
+ - ✅ **Functional Changes**: All PRs must pass scenario tests
432
+ - ✅ **Type Safety**: TypeScript compilation must succeed
433
+ - ✅ **Build System**: Package must build without errors
434
+ - ✅ **Code Style**: ESLint configured with Effect-idiomatic rules
435
+
436
+ **Code Quality Commands**:
437
+ ```bash
438
+ # Check for linting issues
395
439
  npm run lint
396
440
 
397
- # Format code
398
- npm run format
441
+ # Fix auto-fixable issues
442
+ npm run lint:fix
399
443
  ```
400
444
 
401
445
  ## License
402
446
 
403
447
  MIT License - see [LICENSE](LICENSE) file for details.
404
448
 
405
- ## 📚 Documentation
449
+ ## 📚 Complete Documentation
406
450
 
407
- Comprehensive documentation is available in the [`/docs`](./docs) directory:
451
+ All documentation is organized in the [`/docs`](./docs/) directory following the [Diátaxis framework](https://diataxis.fr/):
408
452
 
409
- ### 🚀 Quick Links
410
- - **[Getting Started Guide](./docs/guides/getting-started.md)** - Installation, setup, and first crawl
411
- - **[API Reference](./docs/api/)** - Complete API documentation
412
- - **[Examples](./docs/examples/)** - Working examples for common use cases
453
+ - **🎓 [Tutorial](./docs/tutorial/)** - Learning-oriented lessons for getting started
454
+ - **📋 [How-to Guides](./docs/how-to/)** - Problem-solving guides for specific tasks
455
+ - **📚 [Reference](./docs/reference/)** - Technical reference and API documentation
456
+ - **🧠 [Explanation](./docs/explanation/)** - Understanding-oriented documentation
413
457
 
414
- ### 📖 Complete Documentation
415
- - **[Documentation Index](./docs/README.md)** - Overview of all available documentation
416
- - **[User Guides](./docs/guides/)** - Step-by-step tutorials and best practices
417
- - **[Feature Documentation](./docs/features/)** - Deep dives into key capabilities
418
- - **[Advanced Examples](./docs/examples/)** - Real-world usage patterns
458
+ **📖 [Start with the Documentation Index →](./docs/README.md)**
419
459
 
420
460
  ## Support
421
461
 
422
- - [GitHub Issues](https://github.com/jambudipa/spider/issues)
423
- - [Complete Documentation](./docs/)
424
- - [Working Examples](./docs/examples/)
462
+ - [GitHub Issues](https://github.com/jambudipa/spider/issues) - Bug reports and feature requests
463
+ - [Documentation](./docs/) - Comprehensive guides and reference material
464
+ - [Tutorial](./docs/tutorial/getting-started.md) - Step-by-step learning guide
425
465
 
426
466
  ---
427
467
 
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Browser Manager for Playwright Integration
3
+ * Handles browser lifecycle, pooling, and resource management
4
+ */
5
+ import { Effect } from 'effect';
6
+ import { BrowserContext, Page, BrowserContextOptions } from 'playwright';
7
+ import { BrowserError } from '../lib/errors';
8
+ export interface BrowserConfig {
9
+ headless?: boolean;
10
+ timeout?: number;
11
+ poolSize?: number;
12
+ viewport?: {
13
+ width: number;
14
+ height: number;
15
+ };
16
+ userAgent?: string;
17
+ locale?: string;
18
+ extraHTTPHeaders?: Record<string, string>;
19
+ }
20
+ export declare class BrowserManager {
21
+ private browsers;
22
+ private contexts;
23
+ private config;
24
+ private isInitialised;
25
+ constructor(config?: BrowserConfig);
26
+ /**
27
+ * Initialise browser pool
28
+ */
29
+ initialise(): Effect.Effect<void, BrowserError>;
30
+ /**
31
+ * Launch a new browser instance
32
+ */
33
+ private launchBrowser;
34
+ /**
35
+ * Get or create a browser context
36
+ */
37
+ getContext(id: string, options?: BrowserContextOptions): Effect.Effect<BrowserContext, BrowserError>;
38
+ /**
39
+ * Create a new page in a context
40
+ */
41
+ createPage(contextId: string): Effect.Effect<Page, BrowserError>;
42
+ /**
43
+ * Get the browser with least contexts
44
+ */
45
+ private getLeastLoadedBrowser;
46
+ /**
47
+ * Close a specific context
48
+ */
49
+ closeContext(id: string): Effect.Effect<void, BrowserError>;
50
+ /**
51
+ * Close all resources
52
+ */
53
+ close(): Effect.Effect<void>;
54
+ /**
55
+ * Get statistics about browser pool
56
+ */
57
+ getStats(): {
58
+ browsers: number;
59
+ contexts: number;
60
+ pages: number;
61
+ };
62
+ }
63
+ //# sourceMappingURL=BrowserManager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BrowserManager.d.ts","sourceRoot":"","sources":["../../src/browser/BrowserManager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,MAAM,EAAkC,MAAM,QAAQ,CAAC;AAChE,OAAO,EAAW,cAAc,EAAE,IAAI,EAAY,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAC5F,OAAO,EAAuB,YAAY,EAAE,MAAM,eAAe,CAAC;AAElE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC3C;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,QAAQ,CAAiF;IACjG,OAAO,CAAC,MAAM,CAA0B;IACxC,OAAO,CAAC,aAAa,CAAS;gBAElB,MAAM,GAAE,aAAkB;IAYtC;;OAEG;IACH,UAAU,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,YAAY,CAAC;IAc/C;;OAEG;IACH,OAAO,CAAC,aAAa;IAmBrB;;OAEG;IACH,UAAU,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,qBAAqB,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE,YAAY,CAAC;IA+BpG;;OAEG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,YAAY,CAAC;IAahE;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAe7B;;OAEG;IACH,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,YAAY,CAAC;IAc3D;;OAEG;IACH,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;IAiD5B;;OAEG;IACH,QAAQ,IAAI;QACV,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;KACf;CAYF"}
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Playwright Adapter for Spider Integration
3
+ * Provides high-level browser automation capabilities
4
+ */
5
+ import { Effect, Option } from 'effect';
6
+ import { Page, Response as PlaywrightResponse, Route, Cookie, Request as PlaywrightRequest } from 'playwright';
7
+ import { BrowserManager } from './BrowserManager';
8
+ import { AdapterNotInitialisedError } from '../lib/errors';
9
+ export type RequestHandler = (request: PlaywrightRequest) => void;
10
+ export type ResponseHandler = (response: PlaywrightResponse) => void;
11
+ export interface WaitOptions {
12
+ timeout?: number;
13
+ state?: 'load' | 'domcontentloaded' | 'networkidle';
14
+ }
15
+ export interface ScrollOptions {
16
+ delay?: number;
17
+ maxScrolls?: number;
18
+ scrollDistance?: number;
19
+ }
20
+ declare const PageClosedError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
21
+ readonly _tag: "PageClosedError";
22
+ } & Readonly<A>;
23
+ /**
24
+ * Error thrown when page is closed during an operation
25
+ */
26
+ export declare class PageClosedError extends PageClosedError_base<{
27
+ readonly operation: string;
28
+ readonly message: string;
29
+ }> {
30
+ static create(operation: string): PageClosedError;
31
+ }
32
+ declare const StreamReadError_base: new <A extends Record<string, any> = {}>(args: import("effect/Types").Equals<A, {}> extends true ? void : { readonly [P in keyof A as P extends "_tag" ? never : P]: A[P]; }) => import("effect/Cause").YieldableError & {
33
+ readonly _tag: "StreamReadError";
34
+ } & Readonly<A>;
35
+ /**
36
+ * Error thrown when stream reading fails
37
+ */
38
+ export declare class StreamReadError extends StreamReadError_base<{
39
+ readonly cause: unknown;
40
+ readonly message: string;
41
+ }> {
42
+ static fromCause(cause: unknown): StreamReadError;
43
+ }
44
+ export declare class PlaywrightAdapter {
45
+ private browserManager;
46
+ private page;
47
+ private contextId;
48
+ private requestHandlers;
49
+ private responseHandlers;
50
+ constructor(browserManager: BrowserManager, contextId: string);
51
+ /**
52
+ * Initialise the adapter with a new page
53
+ */
54
+ initialise(): Effect.Effect<Page, AdapterNotInitialisedError>;
55
+ /**
56
+ * Get the current page instance (Effect)
57
+ */
58
+ getPageEffect(): Effect.Effect<Page, AdapterNotInitialisedError>;
59
+ /**
60
+ * Get the current page instance (direct)
61
+ * Returns Option for type-safe handling
62
+ */
63
+ getPage(): Option.Option<Page>;
64
+ /**
65
+ * Internal helper to get page or fail
66
+ */
67
+ private requirePage;
68
+ /**
69
+ * Navigate to a URL
70
+ */
71
+ goto(url: string, options?: WaitOptions): Effect.Effect<Option.Option<PlaywrightResponse>, AdapterNotInitialisedError>;
72
+ /**
73
+ * Wait for dynamic content to load
74
+ */
75
+ waitForDynamicContent(selector: string, options?: WaitOptions): Effect.Effect<void, AdapterNotInitialisedError>;
76
+ /**
77
+ * Scroll to bottom progressively
78
+ */
79
+ scrollToBottom(options?: ScrollOptions): Effect.Effect<void, AdapterNotInitialisedError>;
80
+ /**
81
+ * Click an element and wait for navigation or content
82
+ */
83
+ clickAndWait(selector: string, waitFor?: string | WaitOptions): Effect.Effect<void, AdapterNotInitialisedError>;
84
+ /**
85
+ * Intercept requests
86
+ */
87
+ interceptRequests(handler: RequestHandler): Effect.Effect<void>;
88
+ /**
89
+ * Intercept responses
90
+ */
91
+ interceptResponses(handler: ResponseHandler): Effect.Effect<void>;
92
+ /**
93
+ * Route specific URLs
94
+ */
95
+ route(pattern: string | RegExp, handler: (route: Route) => void): Effect.Effect<void, AdapterNotInitialisedError>;
96
+ /**
97
+ * Execute JavaScript in page context
98
+ */
99
+ evaluate<T>(fn: () => T): Effect.Effect<T, AdapterNotInitialisedError>;
100
+ /**
101
+ * Take a screenshot
102
+ */
103
+ screenshot(path: string): Effect.Effect<void, AdapterNotInitialisedError>;
104
+ /**
105
+ * Get page content
106
+ */
107
+ content(): Effect.Effect<string, AdapterNotInitialisedError>;
108
+ /**
109
+ * Fill a form field
110
+ */
111
+ fill(selector: string, value: string): Effect.Effect<void, AdapterNotInitialisedError>;
112
+ /**
113
+ * Select an option
114
+ */
115
+ select(selector: string, value: string): Effect.Effect<void, AdapterNotInitialisedError>;
116
+ /**
117
+ * Check if element exists
118
+ */
119
+ exists(selector: string): Effect.Effect<boolean, AdapterNotInitialisedError>;
120
+ /**
121
+ * Wait for network idle
122
+ */
123
+ waitForNetworkIdle(options?: WaitOptions): Effect.Effect<void, AdapterNotInitialisedError>;
124
+ /**
125
+ * Handle new tabs/windows
126
+ */
127
+ handleNewTab(callback: (page: Page) => Effect.Effect<void>): Effect.Effect<void, AdapterNotInitialisedError>;
128
+ /**
129
+ * Get cookies
130
+ */
131
+ getCookies(): Effect.Effect<readonly Cookie[], AdapterNotInitialisedError>;
132
+ /**
133
+ * Set cookies
134
+ */
135
+ setCookies(cookies: readonly Cookie[]): Effect.Effect<void, AdapterNotInitialisedError>;
136
+ /**
137
+ * Clear cookies
138
+ */
139
+ clearCookies(): Effect.Effect<void, AdapterNotInitialisedError>;
140
+ /**
141
+ * Helper to read a Node stream as a Buffer using Effect Stream
142
+ */
143
+ private readStreamAsBuffer;
144
+ /**
145
+ * Download file from URL
146
+ */
147
+ downloadFile(url: string, filename?: string): Effect.Effect<{
148
+ buffer: Buffer;
149
+ filename: string;
150
+ mimeType: string;
151
+ }, AdapterNotInitialisedError | PageClosedError | StreamReadError>;
152
+ /**
153
+ * Trigger download by clicking element
154
+ */
155
+ downloadFromClick(selector: string): Effect.Effect<{
156
+ buffer: Buffer;
157
+ filename: string;
158
+ mimeType: string;
159
+ }, AdapterNotInitialisedError | PageClosedError | StreamReadError>;
160
+ /**
161
+ * Close the page
162
+ */
163
+ close(): Effect.Effect<void>;
164
+ }
165
+ export {};
166
+ //# sourceMappingURL=PlaywrightAdapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PlaywrightAdapter.d.ts","sourceRoot":"","sources":["../../src/browser/PlaywrightAdapter.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAe,MAAM,EAAE,MAAM,EAAU,MAAM,QAAQ,CAAC;AAE7D,OAAO,EAAE,IAAI,EAAE,QAAQ,IAAI,kBAAkB,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,IAAI,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC/G,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,0BAA0B,EAAE,MAAM,eAAe,CAAC;AAE3D,MAAM,MAAM,cAAc,GAAG,CAAC,OAAO,EAAE,iBAAiB,KAAK,IAAI,CAAC;AAClE,MAAM,MAAM,eAAe,GAAG,CAAC,QAAQ,EAAE,kBAAkB,KAAK,IAAI,CAAC;AAErE,MAAM,WAAW,WAAW;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,kBAAkB,GAAG,aAAa,CAAC;CACrD;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;;;;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAoC;IACvE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;IACA,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,MAAM,GAAG,eAAe;CAMlD;;;;AAED;;GAEG;AACH,qBAAa,eAAgB,SAAQ,qBAAoC;IACvE,QAAQ,CAAC,KAAK,EAAE,OAAO,CAAC;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;IACA,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,GAAG,eAAe;CAMlD;AAED,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,IAAI,CAAsC;IAClD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,eAAe,CAA8C;IACrE,OAAO,CAAC,gBAAgB,CAA+C;gBAE3D,cAAc,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM;IAK7D;;OAEG;IACH,UAAU,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAyB7D;;OAEG;IACH,aAAa,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAShE;;;OAGG;IACH,OAAO,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;IAI9B;;OAEG;IACH,OAAO,CAAC,WAAW;IAOnB;;OAEG;IACH,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,EAAE,0BAA0B,CAAC;IAetH;;OAEG;IACH,qBAAqB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAc/G;;OAEG;IACH,cAAc,CAAC,OAAO,CAAC,EAAE,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IA+CxF;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IA0C/G;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;IAM/D;;OAEG;IACH,kBAAkB,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;IAMjE;;OAEG;IACH,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,EAAE,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAWjH;;OAEG;IACH,QAAQ,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,0BAA0B,CAAC;IAWtE;;OAEG;IACH,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAWzE;;OAEG;IACH,OAAO,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,0BAA0B,CAAC;IAW5D;;OAEG;IACH,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAWtF;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAWxF;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,0BAA0B,CAAC;IAY5E;;OAEG;IACH,kBAAkB,CAAC,OAAO,CAAC,EAAE,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAa1F;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,IAAI,KAAK,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAmB5G;;OAEG;IACH,UAAU,IAAI,MAAM,CAAC,MAAM,CAAC,SAAS,MAAM,EAAE,EAAE,0BAA0B,CAAC;IAW1E;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAWvF;;OAEG;IACH,YAAY,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,0BAA0B,CAAC;IAW/D;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAY1B;;OAEG;IACH,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC1D,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;KAClB,EAAE,0BAA0B,GAAG,eAAe,GAAG,eAAe,CAAC;IAiElE;;OAEG;IACH,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QACjD,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;KAClB,EAAE,0BAA0B,GAAG,eAAe,GAAG,eAAe,CAAC;IA2DlE;;OAEG;IACH,KAAK,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;CAsB7B"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Example 01: Basic Web Crawling
3
+ *
4
+ * This example demonstrates:
5
+ * - Basic Spider setup and configuration
6
+ * - Single URL crawling with depth control
7
+ * - Result collection and processing
8
+ * - Error handling
9
+ *
10
+ * Tests against: web-scraping.dev (static content)
11
+ */
12
+ export {};
13
+ //# sourceMappingURL=01-basic-crawl-working.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"01-basic-crawl-working.d.ts","sourceRoot":"","sources":["../../src/examples/01-basic-crawl-working.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Example 02: Multiple Starting URLs and Domain Isolation
3
+ *
4
+ * This example demonstrates:
5
+ * - Crawling multiple domains concurrently
6
+ * - Domain isolation and deduplication
7
+ * - Metadata passing through crawl
8
+ * - Concurrent request handling
9
+ *
10
+ * Tests against: Multiple sections of web-scraping.dev
11
+ */
12
+ export {};
13
+ //# sourceMappingURL=02-multiple-urls-working.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"02-multiple-urls-working.d.ts","sourceRoot":"","sources":["../../src/examples/02-multiple-urls-working.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Example 03: URL Filtering and Domain Restrictions
3
+ *
4
+ * This example demonstrates:
5
+ * - Custom URL filtering with regex patterns
6
+ * - File extension filtering
7
+ * - Domain restrictions
8
+ * - Technical URL filtering (malformed, long URLs, etc.)
9
+ *
10
+ * Tests against: web-scraping.dev with various filters
11
+ */
12
+ export {};
13
+ //# sourceMappingURL=03-url-filtering.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"03-url-filtering.d.ts","sourceRoot":"","sources":["../../src/examples/03-url-filtering.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Example 04: Robots.txt Compliance and Rate Limiting
3
+ *
4
+ * This example demonstrates:
5
+ * - Robots.txt compliance checking
6
+ * - Crawl delay respect from robots.txt
7
+ * - Rate limiting per domain
8
+ * - Request delay configuration
9
+ * - Respectful crawling practices
10
+ *
11
+ * Tests against: web-scraping.dev robots.txt rules
12
+ */
13
+ export {};
14
+ //# sourceMappingURL=04-robots-compliance.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"04-robots-compliance.d.ts","sourceRoot":"","sources":["../../src/examples/04-robots-compliance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Example 05: Advanced Link Extraction with CSS Selectors
3
+ *
4
+ * This example demonstrates:
5
+ * - Custom CSS selectors for targeted link extraction
6
+ * - Link extraction from different HTML elements
7
+ * - Form action URL extraction
8
+ * - Custom attribute extraction
9
+ * - Link extraction statistics and analysis
10
+ *
11
+ * Tests against: web-scraping.dev with targeted selectors
12
+ */
13
+ export {};
14
+ //# sourceMappingURL=05-link-extraction-selectors.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"05-link-extraction-selectors.d.ts","sourceRoot":"","sources":["../../src/examples/05-link-extraction-selectors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Example 06: Custom Middleware Usage
3
+ *
4
+ * This example demonstrates:
5
+ * - Creating custom middleware for request/response processing
6
+ * - Using built-in middleware (RateLimit, Logging, UserAgent, Stats)
7
+ * - Middleware error handling
8
+ * - Request preprocessing and response postprocessing
9
+ * - Statistics collection through middleware
10
+ *
11
+ * Tests against: web-scraping.dev with custom middleware pipeline
12
+ */
13
+ import { SpiderMiddleware } from '../index.js';
14
+ declare const TimingMiddleware: SpiderMiddleware;
15
+ declare const ContentAnalysisMiddleware: SpiderMiddleware;
16
+ declare const PatternDetectionMiddleware: SpiderMiddleware;
17
+ export { TimingMiddleware, ContentAnalysisMiddleware, PatternDetectionMiddleware };
18
+ //# sourceMappingURL=06-custom-middleware.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"06-custom-middleware.d.ts","sourceRoot":"","sources":["../../src/examples/06-custom-middleware.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,EAKL,gBAAgB,EAIjB,MAAM,aAAa,CAAC;AAGrB,QAAA,MAAM,gBAAgB,EAAE,gBAyCvB,CAAC;AAGF,QAAA,MAAM,yBAAyB,EAAE,gBAiBhC,CAAC;AAGF,QAAA,MAAM,0BAA0B,EAAE,gBAiBjC,CAAC;AAGF,OAAO,EAAE,gBAAgB,EAAE,yBAAyB,EAAE,0BAA0B,EAAE,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Example 07: Resumability and State Persistence
3
+ *
4
+ * This example demonstrates:
5
+ * - Resumable crawling with state persistence
6
+ * - Multiple storage backends (File, Redis, Postgres)
7
+ * - Session management and restoration
8
+ * - State persistence strategies (Full, Delta, Hybrid)
9
+ * - Graceful recovery from interruptions
10
+ *
11
+ * Tests against: web-scraping.dev with resumable sessions
12
+ */
13
+ export {};
14
+ //# sourceMappingURL=07-resumability-demo.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"07-resumability-demo.d.ts","sourceRoot":"","sources":["../../src/examples/07-resumability-demo.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Example 08: Worker Health Monitoring and Performance Analysis
3
+ *
4
+ * This example demonstrates:
5
+ * - Worker health monitoring and stuck worker detection
6
+ * - Memory usage monitoring and limits
7
+ * - Performance metrics collection
8
+ * - Concurrent worker management
9
+ * - Worker lifecycle tracking
10
+ * - Domain failure detection and recovery
11
+ *
12
+ * Tests against: web-scraping.dev with worker monitoring
13
+ */
14
+ export {};
15
+ //# sourceMappingURL=08-worker-monitoring.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"08-worker-monitoring.d.ts","sourceRoot":"","sources":["../../src/examples/08-worker-monitoring.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Example 09: Error Handling and Recovery Strategies
3
+ *
4
+ * This example demonstrates:
5
+ * - Comprehensive error handling for network failures
6
+ * - Recovery strategies for failed requests
7
+ * - Timeout handling and request cancellation
8
+ * - Domain failure detection and recovery
9
+ * - Graceful degradation under adverse conditions
10
+ * - Error classification and reporting
11
+ *
12
+ * Tests against: web-scraping.dev with simulated failures
13
+ */
14
+ export {};
15
+ //# sourceMappingURL=09-error-handling-recovery.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"09-error-handling-recovery.d.ts","sourceRoot":"","sources":["../../src/examples/09-error-handling-recovery.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG"}
@@ -0,0 +1,33 @@
1
+ export type { ISpider, ISpiderScheduler, IMiddlewareManager, IRateLimitMiddleware, ILoggingMiddleware, IUserAgentMiddleware, IStatsMiddleware, } from './lib/api-facades.js';
2
+ export * from './lib/Spider/Spider.service.js';
3
+ export * from './lib/Robots/Robots.service.js';
4
+ export * from './lib/Scraper/Scraper.service.js';
5
+ export * from './lib/PageData/PageData.js';
6
+ export type { SpiderConfigOptions, SpiderConfigService, } from './lib/Config/SpiderConfig.service.js';
7
+ export { SpiderConfig, makeSpiderConfig, } from './lib/Config/SpiderConfig.service.js';
8
+ export type { IUrlDeduplicator } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
9
+ export { UrlDeduplicatorService } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
10
+ export type { StatePersistence } from './lib/Scheduler/SpiderScheduler.service.js';
11
+ export { SpiderSchedulerService, SpiderStateKey, PriorityRequest, SpiderState, } from './lib/Scheduler/SpiderScheduler.service.js';
12
+ export type { SpiderMiddleware, SpiderRequest, SpiderResponse, } from './lib/Middleware/SpiderMiddleware.js';
13
+ export { MiddlewareManager, RateLimitMiddleware, LoggingMiddleware, UserAgentMiddleware, StatsMiddleware, } from './lib/Middleware/SpiderMiddleware.js';
14
+ export type { LinkExtractorConfig, LinkExtractionResult, LinkExtractorServiceInterface, } from './lib/LinkExtractor/LinkExtractor.service.js';
15
+ export { LinkExtractorService, LinkExtractorServiceLayer, LinkExtractionError, } from './lib/LinkExtractor/LinkExtractor.service.js';
16
+ export type { CrawlResult, CrawlTask, SpiderLinkExtractionOptions, } from './lib/Spider/Spider.service.js';
17
+ export type { PersistenceStrategy, StateOperation, StorageBackend, StorageCapabilities, HybridPersistenceConfig, } from './lib/Resumability/types.js';
18
+ export type { ResumabilityConfig } from './lib/Resumability/Resumability.service.js';
19
+ export { StateDelta, PersistenceError as ResumabilityError, DEFAULT_HYBRID_CONFIG, } from './lib/Resumability/types.js';
20
+ export { ResumabilityService, ResumabilityConfigs, createStateOperation, } from './lib/Resumability/Resumability.service.js';
21
+ export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './lib/Resumability/strategies.js';
22
+ export { FileStorageBackend } from './lib/Resumability/backends/FileStorageBackend.js';
23
+ export { NetworkError, ResponseError, RobotsTxtError, ConfigurationError, MiddlewareError, FileSystemError, PersistenceError, } from './lib/errors.js';
24
+ export type { SpiderError } from './lib/errors.js';
25
+ export type { SpiderLogEvent, SpiderLogger, } from './lib/Logging/SpiderLogger.service.js';
26
+ export { SpiderLogger as SpiderLoggerTag, makeSpiderLogger, SpiderLoggerLive, } from './lib/Logging/SpiderLogger.service.js';
27
+ export type { CookieManagerService, EnhancedHttpClientService, HttpRequestOptions, HttpResponse, Session, Credentials, SessionStoreService, TokenInfo, TokenExtractorService, } from './lib/HttpClient/index.js';
28
+ export { CookieManager, makeCookieManager, CookieManagerLive, EnhancedHttpClient, makeEnhancedHttpClient, EnhancedHttpClientLive, SessionStore, makeSessionStore, SessionStoreLive, TokenExtractor, makeTokenExtractor, TokenExtractorLive, } from './lib/HttpClient/index.js';
29
+ export type { Token, StateManagerService } from './lib/StateManager/index.js';
30
+ export { TokenType, StateManager, makeStateManager, StateManagerLive, } from './lib/StateManager/index.js';
31
+ export type { LoginCredentials, ScrapingSession, WebScrapingEngineService, } from './lib/WebScrapingEngine/index.js';
32
+ export { WebScrapingEngine, makeWebScrapingEngine, WebScrapingEngineLive, } from './lib/WebScrapingEngine/index.js';
33
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,kBAAkB,EAClB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,sBAAsB,CAAC;AAG9B,cAAc,gCAAgC,CAAC;AAC/C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,4BAA4B,CAAC;AAG3C,YAAY,EACV,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,YAAY,EACZ,gBAAgB,GACjB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EAAE,gBAAgB,EAAE,MAAM,kDAAkD,CAAC;AACzF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kDAAkD,CAAC;AAG1F,YAAY,EAAE,gBAAgB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,sBAAsB,EACtB,cAAc,EACd,eAAe,EACf,WAAW,GACZ,MAAM,4CAA4C,CAAC;AAGpD,YAAY,EACV,gBAAgB,EAChB,aAAa,EACb,cAAc,GACf,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,eAAe,GAChB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EACV,mBAAmB,EACnB,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,WAAW,EACX,SAAS,EACT,2BAA2B,GAC5B,MAAM,gCAAgC,CAAC;AAGxC,YAAY,EACV,mBAAmB,EACnB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AACrC,YAAY,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EACL,UAAU,EACV,gBAAgB,IAAI,iBAAiB,EACrC,qBAAqB,GACtB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,GACrB,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mDAAmD,CAAC;AAGvF,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAGnD,YAAY,EACV,cAAc,EACd,YAAY,GACb,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EACL,YAAY,IAAI,eAAe,EAC/B,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,uCAAuC,CAAC;AAG/C,YAAY,EACV,oBAAoB,EACpB,yBAAyB,EACzB,kBAAkB,EAClB,YAAY,EACZ,OAAO,EACP,WAAW,EACX,mBAAmB,EACnB,SAAS,EACT,qBAAqB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,sBAAsB,EACtB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,YAAY,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EACL,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,6BAA6B,CAAC;AAGrC,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,wBAAwB,GACzB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,kCAAkC,CAAC"}