@jambudipa/spider 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +10 -16
  2. package/dist/index.d.ts +33 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +3091 -1657
  5. package/dist/index.js.map +1 -1
  6. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
  7. package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
  8. package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
  9. package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
  10. package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
  11. package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
  12. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
  13. package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
  14. package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
  15. package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
  16. package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
  17. package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
  18. package/dist/lib/HttpClient/index.d.ts +8 -0
  19. package/dist/lib/HttpClient/index.d.ts.map +1 -0
  20. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
  21. package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
  22. package/dist/lib/LinkExtractor/index.d.ts +37 -0
  23. package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
  24. package/dist/lib/Logging/FetchLogger.d.ts +24 -0
  25. package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
  26. package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
  27. package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
  28. package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
  29. package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
  30. package/dist/lib/Middleware/types.d.ts +99 -0
  31. package/dist/lib/Middleware/types.d.ts.map +1 -0
  32. package/dist/lib/PageData/PageData.d.ts +28 -0
  33. package/dist/lib/PageData/PageData.d.ts.map +1 -0
  34. package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
  35. package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
  36. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
  37. package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
  38. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
  39. package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
  40. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
  41. package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
  42. package/dist/lib/Resumability/index.d.ts +51 -0
  43. package/dist/lib/Resumability/index.d.ts.map +1 -0
  44. package/dist/lib/Resumability/strategies.d.ts +76 -0
  45. package/dist/lib/Resumability/strategies.d.ts.map +1 -0
  46. package/dist/lib/Resumability/types.d.ts +201 -0
  47. package/dist/lib/Resumability/types.d.ts.map +1 -0
  48. package/dist/lib/Robots/Robots.service.d.ts +78 -0
  49. package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
  50. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
  51. package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
  52. package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
  53. package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
  54. package/dist/lib/Spider/Spider.defaults.d.ts +24 -0
  55. package/dist/lib/Spider/Spider.defaults.d.ts.map +1 -0
  56. package/dist/lib/Spider/Spider.service.d.ts +239 -0
  57. package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
  58. package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
  59. package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
  60. package/dist/lib/StateManager/index.d.ts +5 -0
  61. package/dist/lib/StateManager/index.d.ts.map +1 -0
  62. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
  63. package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
  64. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +109 -0
  65. package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
  66. package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
  67. package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
  68. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
  69. package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
  70. package/dist/lib/api-facades.d.ts +313 -0
  71. package/dist/lib/api-facades.d.ts.map +1 -0
  72. package/dist/lib/errors/effect-errors.d.ts +312 -0
  73. package/dist/lib/errors/effect-errors.d.ts.map +1 -0
  74. package/dist/lib/utils/FileUtils.d.ts +284 -0
  75. package/dist/lib/utils/FileUtils.d.ts.map +1 -0
  76. package/dist/lib/utils/JsonUtils.d.ts +196 -0
  77. package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
  78. package/dist/lib/utils/RegexUtils.d.ts +257 -0
  79. package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
  80. package/dist/lib/utils/SchemaUtils.d.ts +251 -0
  81. package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
  82. package/dist/lib/utils/UrlUtils.d.ts +223 -0
  83. package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
  84. package/dist/lib/utils/effect-migration.d.ts +31 -0
  85. package/dist/lib/utils/effect-migration.d.ts.map +1 -0
  86. package/dist/lib/utils/index.d.ts +15 -0
  87. package/dist/lib/utils/index.d.ts.map +1 -0
  88. package/dist/lib/utils/url-deduplication.d.ts +108 -0
  89. package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
  90. package/package.json +22 -13
package/README.md CHANGED
@@ -37,12 +37,12 @@ A powerful, Effect-based web crawling framework for modern TypeScript applicatio
37
37
 
38
38
  > **Live Testing**: Our CI pipeline runs all 16 web scraping scenarios against real websites daily, ensuring Spider remains robust against changing web technologies.
39
39
 
40
- ### 🔍 **Current Status** (Updated: Aug 2025)
40
+ ### 🔍 **Current Status** (Updated: Jan 2026)
41
41
  - ✅ **Core Functionality**: All web scraping scenarios working
42
42
  - ✅ **Type Safety**: Full TypeScript compilation without errors
43
43
  - ✅ **Build System**: Package builds successfully for distribution
44
- - ✅ **Test Suite**: 92+ scenario tests passing against live websites
45
- - ⚠️ **Code Quality**: 1,163 linting issues identified (technical debt - does not affect functionality)
44
+ - ✅ **Test Suite**: 243 tests passing against live websites (25 test files)
45
+ - **Code Quality**: Clean - only 3 linting warnings (skipped test suites)
46
46
 
47
47
  ## ✨ Key Features
48
48
 
@@ -419,33 +419,27 @@ npm run typecheck
419
419
  # Validate CI setup locally
420
420
  npm run ci:validate
421
421
 
422
- # Code quality (has known issues)
423
- npm run lint # Shows 1,163 issues
422
+ # Code quality
423
+ npm run lint # Shows 3 warnings (skipped tests)
424
424
  npm run format # Formats code consistently
425
425
  ```
426
426
 
427
427
  ### 🛠️ Contributing & Code Quality
428
428
 
429
- **Current State**: The codebase is fully functional with comprehensive test coverage, but has technical debt in code style consistency.
429
+ **Current State**: The codebase is fully functional with comprehensive test coverage and clean linting.
430
430
 
431
431
  - ✅ **Functional Changes**: All PRs must pass scenario tests
432
- - ✅ **Type Safety**: TypeScript compilation must succeed
432
+ - ✅ **Type Safety**: TypeScript compilation must succeed
433
433
  - ✅ **Build System**: Package must build without errors
434
- - 🔄 **Code Style**: Help wanted fixing linting issues (great first contribution!)
434
+ - **Code Style**: ESLint configured with Effect-idiomatic rules
435
435
 
436
- **Contributing to Code Quality**:
436
+ **Code Quality Commands**:
437
437
  ```bash
438
- # See specific linting issues
438
+ # Check for linting issues
439
439
  npm run lint
440
440
 
441
441
  # Fix auto-fixable issues
442
442
  npm run lint:fix
443
-
444
- # Focus areas for improvement:
445
- # - Unused variable cleanup (877 issues)
446
- # - Return type annotations (286 issues)
447
- # - Nullish coalescing operators
448
- # - Console.log removal in production code
449
443
  ```
450
444
 
451
445
  ## License
@@ -0,0 +1,33 @@
1
+ export type { ISpider, ISpiderScheduler, IMiddlewareManager, IRateLimitMiddleware, ILoggingMiddleware, IUserAgentMiddleware, IStatsMiddleware, } from './lib/api-facades.js';
2
+ export * from './lib/Spider/Spider.service.js';
3
+ export * from './lib/Robots/Robots.service.js';
4
+ export * from './lib/Scraper/Scraper.service.js';
5
+ export * from './lib/PageData/PageData.js';
6
+ export type { SpiderConfigOptions, SpiderConfigService, } from './lib/Config/SpiderConfig.service.js';
7
+ export { SpiderConfig, makeSpiderConfig, } from './lib/Config/SpiderConfig.service.js';
8
+ export type { IUrlDeduplicator } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
9
+ export { UrlDeduplicatorService } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
10
+ export type { StatePersistence } from './lib/Scheduler/SpiderScheduler.service.js';
11
+ export { SpiderSchedulerService, SpiderStateKey, PriorityRequest, SpiderState, } from './lib/Scheduler/SpiderScheduler.service.js';
12
+ export type { SpiderMiddleware, SpiderRequest, SpiderResponse, } from './lib/Middleware/SpiderMiddleware.js';
13
+ export { MiddlewareManager, RateLimitMiddleware, LoggingMiddleware, UserAgentMiddleware, StatsMiddleware, } from './lib/Middleware/SpiderMiddleware.js';
14
+ export type { LinkExtractorConfig, LinkExtractionResult, LinkExtractorServiceInterface, } from './lib/LinkExtractor/LinkExtractor.service.js';
15
+ export { LinkExtractorService, LinkExtractorServiceLayer, LinkExtractionError, } from './lib/LinkExtractor/LinkExtractor.service.js';
16
+ export type { CrawlResult, CrawlTask, SpiderLinkExtractionOptions, } from './lib/Spider/Spider.service.js';
17
+ export type { PersistenceStrategy, StateOperation, StorageBackend, StorageCapabilities, HybridPersistenceConfig, } from './lib/Resumability/types.js';
18
+ export type { ResumabilityConfig } from './lib/Resumability/Resumability.service.js';
19
+ export { StateDelta, PersistenceError as ResumabilityError, DEFAULT_HYBRID_CONFIG, } from './lib/Resumability/types.js';
20
+ export { ResumabilityService, ResumabilityConfigs, createStateOperation, } from './lib/Resumability/Resumability.service.js';
21
+ export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './lib/Resumability/strategies.js';
22
+ export { FileStorageBackend } from './lib/Resumability/backends/FileStorageBackend.js';
23
+ export { NetworkError, ResponseError, RobotsTxtError, ConfigurationError, MiddlewareError, FileSystemError, PersistenceError, ContentTypeError, RequestAbortError, AdapterNotInitialisedError, BrowserError, BrowserCleanupError, TimeoutError, ParseError, ValidationError, PageError, StateError, SessionError, CrawlError, QueueError, ConfigError, isSpiderError, isNetworkError, isBrowserError, } from './lib/errors/effect-errors.js';
24
+ export type { SpiderError, AllSpiderErrors } from './lib/errors/effect-errors.js';
25
+ export type { SpiderLogEvent, SpiderLogger, } from './lib/Logging/SpiderLogger.service.js';
26
+ export { SpiderLogger as SpiderLoggerTag, makeSpiderLogger, SpiderLoggerLive, } from './lib/Logging/SpiderLogger.service.js';
27
+ export type { CookieManagerService, EnhancedHttpClientService, HttpRequestOptions, HttpResponse, Session, Credentials, SessionStoreService, TokenInfo, TokenExtractorService, } from './lib/HttpClient/index.js';
28
+ export { CookieManager, makeCookieManager, CookieManagerLive, EnhancedHttpClient, makeEnhancedHttpClient, EnhancedHttpClientLive, SessionStore, makeSessionStore, SessionStoreLive, TokenExtractor, makeTokenExtractor, TokenExtractorLive, } from './lib/HttpClient/index.js';
29
+ export type { Token, StateManagerService } from './lib/StateManager/index.js';
30
+ export { TokenType, StateManager, makeStateManager, StateManagerLive, } from './lib/StateManager/index.js';
31
+ export type { LoginCredentials, ScrapingSession, WebScrapingEngineService, } from './lib/WebScrapingEngine/index.js';
32
+ export { WebScrapingEngine, makeWebScrapingEngine, WebScrapingEngineLive, } from './lib/WebScrapingEngine/index.js';
33
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,kBAAkB,EAClB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,sBAAsB,CAAC;AAG9B,cAAc,gCAAgC,CAAC;AAC/C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,4BAA4B,CAAC;AAG3C,YAAY,EACV,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,YAAY,EACZ,gBAAgB,GACjB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EAAE,gBAAgB,EAAE,MAAM,kDAAkD,CAAC;AACzF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kDAAkD,CAAC;AAG1F,YAAY,EAAE,gBAAgB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,sBAAsB,EACtB,cAAc,EACd,eAAe,EACf,WAAW,GACZ,MAAM,4CAA4C,CAAC;AAGpD,YAAY,EACV,gBAAgB,EAChB,aAAa,EACb,cAAc,GACf,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,eAAe,GAChB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EACV,mBAAmB,EACnB,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,WAAW,EACX,SAAS,EACT,2BAA2B,GAC5B,MAAM,gCAAgC,CAAC;AAGxC,YAAY,EACV,mBAAmB,EACnB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AACrC,YAAY,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EACL,UAAU,EACV,gBAAgB,IAAI,iBAAiB,EACrC,qBAAqB,GACtB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,GACrB,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mDAAmD,CAAC;AAGvF,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EACjB,0BAA0B,EAC1B,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,UAAU,EACV,eAAe,EACf,SAAS,EACT,UAAU,EACV,YAAY,EACZ,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,cAAc,GACf,MAAM,+BAA+B,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAGlF,YAAY,EACV,cAAc,EACd,YAAY,GACb,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EACL,YAAY,IAAI,eAAe,EAC/B,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,uCAAuC,CAAC;AAG/C,YAAY,EACV,oBAAoB,EACpB,yBAAyB,EACzB,kBAAkB,EAClB,YAAY,EACZ,OAAO,EACP,WAAW,EACX,mBAAmB,EACnB,SAAS,EACT,qBAAqB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,sBAAsB,EACtB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,YAAY,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EACL,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,6BAA6B,CAAC;AAGrC,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,wBAAwB,GACzB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,kCAAkC,CAAC"}