@jambudipa/spider 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -16
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3091 -1657
- package/dist/index.js.map +1 -1
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts +107 -0
- package/dist/lib/BrowserEngine/BrowserEngine.service.d.ts.map +1 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts +256 -0
- package/dist/lib/Config/SpiderConfig.service.d.ts.map +1 -0
- package/dist/lib/HttpClient/CookieManager.d.ts +58 -0
- package/dist/lib/HttpClient/CookieManager.d.ts.map +1 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts +63 -0
- package/dist/lib/HttpClient/EnhancedHttpClient.d.ts.map +1 -0
- package/dist/lib/HttpClient/SessionStore.d.ts +114 -0
- package/dist/lib/HttpClient/SessionStore.d.ts.map +1 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts +83 -0
- package/dist/lib/HttpClient/TokenExtractor.d.ts.map +1 -0
- package/dist/lib/HttpClient/index.d.ts +8 -0
- package/dist/lib/HttpClient/index.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts +166 -0
- package/dist/lib/LinkExtractor/LinkExtractor.service.d.ts.map +1 -0
- package/dist/lib/LinkExtractor/index.d.ts +37 -0
- package/dist/lib/LinkExtractor/index.d.ts.map +1 -0
- package/dist/lib/Logging/FetchLogger.d.ts +24 -0
- package/dist/lib/Logging/FetchLogger.d.ts.map +1 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts +37 -0
- package/dist/lib/Logging/SpiderLogger.service.d.ts.map +1 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts +239 -0
- package/dist/lib/Middleware/SpiderMiddleware.d.ts.map +1 -0
- package/dist/lib/Middleware/types.d.ts +99 -0
- package/dist/lib/Middleware/types.d.ts.map +1 -0
- package/dist/lib/PageData/PageData.d.ts +28 -0
- package/dist/lib/PageData/PageData.d.ts.map +1 -0
- package/dist/lib/Resumability/Resumability.service.d.ts +178 -0
- package/dist/lib/Resumability/Resumability.service.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts +47 -0
- package/dist/lib/Resumability/backends/FileStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts +95 -0
- package/dist/lib/Resumability/backends/PostgresStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts +92 -0
- package/dist/lib/Resumability/backends/RedisStorageBackend.d.ts.map +1 -0
- package/dist/lib/Resumability/index.d.ts +51 -0
- package/dist/lib/Resumability/index.d.ts.map +1 -0
- package/dist/lib/Resumability/strategies.d.ts +76 -0
- package/dist/lib/Resumability/strategies.d.ts.map +1 -0
- package/dist/lib/Resumability/types.d.ts +201 -0
- package/dist/lib/Resumability/types.d.ts.map +1 -0
- package/dist/lib/Robots/Robots.service.d.ts +78 -0
- package/dist/lib/Robots/Robots.service.d.ts.map +1 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts +211 -0
- package/dist/lib/Scheduler/SpiderScheduler.service.d.ts.map +1 -0
- package/dist/lib/Scraper/Scraper.service.d.ts +123 -0
- package/dist/lib/Scraper/Scraper.service.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.defaults.d.ts +24 -0
- package/dist/lib/Spider/Spider.defaults.d.ts.map +1 -0
- package/dist/lib/Spider/Spider.service.d.ts +239 -0
- package/dist/lib/Spider/Spider.service.d.ts.map +1 -0
- package/dist/lib/StateManager/StateManager.service.d.ts +107 -0
- package/dist/lib/StateManager/StateManager.service.d.ts.map +1 -0
- package/dist/lib/StateManager/index.d.ts +5 -0
- package/dist/lib/StateManager/index.d.ts.map +1 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts +58 -0
- package/dist/lib/UrlDeduplicator/UrlDeduplicator.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts +109 -0
- package/dist/lib/WebScrapingEngine/WebScrapingEngine.service.d.ts.map +1 -0
- package/dist/lib/WebScrapingEngine/index.d.ts +5 -0
- package/dist/lib/WebScrapingEngine/index.d.ts.map +1 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts +39 -0
- package/dist/lib/WorkerHealth/WorkerHealthMonitor.service.d.ts.map +1 -0
- package/dist/lib/api-facades.d.ts +313 -0
- package/dist/lib/api-facades.d.ts.map +1 -0
- package/dist/lib/errors/effect-errors.d.ts +312 -0
- package/dist/lib/errors/effect-errors.d.ts.map +1 -0
- package/dist/lib/utils/FileUtils.d.ts +284 -0
- package/dist/lib/utils/FileUtils.d.ts.map +1 -0
- package/dist/lib/utils/JsonUtils.d.ts +196 -0
- package/dist/lib/utils/JsonUtils.d.ts.map +1 -0
- package/dist/lib/utils/RegexUtils.d.ts +257 -0
- package/dist/lib/utils/RegexUtils.d.ts.map +1 -0
- package/dist/lib/utils/SchemaUtils.d.ts +251 -0
- package/dist/lib/utils/SchemaUtils.d.ts.map +1 -0
- package/dist/lib/utils/UrlUtils.d.ts +223 -0
- package/dist/lib/utils/UrlUtils.d.ts.map +1 -0
- package/dist/lib/utils/effect-migration.d.ts +31 -0
- package/dist/lib/utils/effect-migration.d.ts.map +1 -0
- package/dist/lib/utils/index.d.ts +15 -0
- package/dist/lib/utils/index.d.ts.map +1 -0
- package/dist/lib/utils/url-deduplication.d.ts +108 -0
- package/dist/lib/utils/url-deduplication.d.ts.map +1 -0
- package/package.json +22 -13
package/README.md
CHANGED
|
@@ -37,12 +37,12 @@ A powerful, Effect-based web crawling framework for modern TypeScript applicatio
|
|
|
37
37
|
|
|
38
38
|
> **Live Testing**: Our CI pipeline runs all 16 web scraping scenarios against real websites daily, ensuring Spider remains robust against changing web technologies.
|
|
39
39
|
|
|
40
|
-
### 🔍 **Current Status** (Updated:
|
|
40
|
+
### 🔍 **Current Status** (Updated: Jan 2026)
|
|
41
41
|
- ✅ **Core Functionality**: All web scraping scenarios working
|
|
42
42
|
- ✅ **Type Safety**: Full TypeScript compilation without errors
|
|
43
43
|
- ✅ **Build System**: Package builds successfully for distribution
|
|
44
|
-
- ✅ **Test Suite**:
|
|
45
|
-
-
|
|
44
|
+
- ✅ **Test Suite**: 243 tests passing against live websites (25 test files)
|
|
45
|
+
- ✅ **Code Quality**: Clean - only 3 linting warnings (skipped test suites)
|
|
46
46
|
|
|
47
47
|
## ✨ Key Features
|
|
48
48
|
|
|
@@ -419,33 +419,27 @@ npm run typecheck
|
|
|
419
419
|
# Validate CI setup locally
|
|
420
420
|
npm run ci:validate
|
|
421
421
|
|
|
422
|
-
# Code quality
|
|
423
|
-
npm run lint # Shows
|
|
422
|
+
# Code quality
|
|
423
|
+
npm run lint # Shows 3 warnings (skipped tests)
|
|
424
424
|
npm run format # Formats code consistently
|
|
425
425
|
```
|
|
426
426
|
|
|
427
427
|
### 🛠️ Contributing & Code Quality
|
|
428
428
|
|
|
429
|
-
**Current State**: The codebase is fully functional with comprehensive test coverage
|
|
429
|
+
**Current State**: The codebase is fully functional with comprehensive test coverage and clean linting.
|
|
430
430
|
|
|
431
431
|
- ✅ **Functional Changes**: All PRs must pass scenario tests
|
|
432
|
-
- ✅ **Type Safety**: TypeScript compilation must succeed
|
|
432
|
+
- ✅ **Type Safety**: TypeScript compilation must succeed
|
|
433
433
|
- ✅ **Build System**: Package must build without errors
|
|
434
|
-
-
|
|
434
|
+
- ✅ **Code Style**: ESLint configured with Effect-idiomatic rules
|
|
435
435
|
|
|
436
|
-
**
|
|
436
|
+
**Code Quality Commands**:
|
|
437
437
|
```bash
|
|
438
|
-
#
|
|
438
|
+
# Check for linting issues
|
|
439
439
|
npm run lint
|
|
440
440
|
|
|
441
441
|
# Fix auto-fixable issues
|
|
442
442
|
npm run lint:fix
|
|
443
|
-
|
|
444
|
-
# Focus areas for improvement:
|
|
445
|
-
# - Unused variable cleanup (877 issues)
|
|
446
|
-
# - Return type annotations (286 issues)
|
|
447
|
-
# - Nullish coalescing operators
|
|
448
|
-
# - Console.log removal in production code
|
|
449
443
|
```
|
|
450
444
|
|
|
451
445
|
## License
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export type { ISpider, ISpiderScheduler, IMiddlewareManager, IRateLimitMiddleware, ILoggingMiddleware, IUserAgentMiddleware, IStatsMiddleware, } from './lib/api-facades.js';
|
|
2
|
+
export * from './lib/Spider/Spider.service.js';
|
|
3
|
+
export * from './lib/Robots/Robots.service.js';
|
|
4
|
+
export * from './lib/Scraper/Scraper.service.js';
|
|
5
|
+
export * from './lib/PageData/PageData.js';
|
|
6
|
+
export type { SpiderConfigOptions, SpiderConfigService, } from './lib/Config/SpiderConfig.service.js';
|
|
7
|
+
export { SpiderConfig, makeSpiderConfig, } from './lib/Config/SpiderConfig.service.js';
|
|
8
|
+
export type { IUrlDeduplicator } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
|
|
9
|
+
export { UrlDeduplicatorService } from './lib/UrlDeduplicator/UrlDeduplicator.service.js';
|
|
10
|
+
export type { StatePersistence } from './lib/Scheduler/SpiderScheduler.service.js';
|
|
11
|
+
export { SpiderSchedulerService, SpiderStateKey, PriorityRequest, SpiderState, } from './lib/Scheduler/SpiderScheduler.service.js';
|
|
12
|
+
export type { SpiderMiddleware, SpiderRequest, SpiderResponse, } from './lib/Middleware/SpiderMiddleware.js';
|
|
13
|
+
export { MiddlewareManager, RateLimitMiddleware, LoggingMiddleware, UserAgentMiddleware, StatsMiddleware, } from './lib/Middleware/SpiderMiddleware.js';
|
|
14
|
+
export type { LinkExtractorConfig, LinkExtractionResult, LinkExtractorServiceInterface, } from './lib/LinkExtractor/LinkExtractor.service.js';
|
|
15
|
+
export { LinkExtractorService, LinkExtractorServiceLayer, LinkExtractionError, } from './lib/LinkExtractor/LinkExtractor.service.js';
|
|
16
|
+
export type { CrawlResult, CrawlTask, SpiderLinkExtractionOptions, } from './lib/Spider/Spider.service.js';
|
|
17
|
+
export type { PersistenceStrategy, StateOperation, StorageBackend, StorageCapabilities, HybridPersistenceConfig, } from './lib/Resumability/types.js';
|
|
18
|
+
export type { ResumabilityConfig } from './lib/Resumability/Resumability.service.js';
|
|
19
|
+
export { StateDelta, PersistenceError as ResumabilityError, DEFAULT_HYBRID_CONFIG, } from './lib/Resumability/types.js';
|
|
20
|
+
export { ResumabilityService, ResumabilityConfigs, createStateOperation, } from './lib/Resumability/Resumability.service.js';
|
|
21
|
+
export { FullStatePersistence, DeltaPersistence, HybridPersistence, } from './lib/Resumability/strategies.js';
|
|
22
|
+
export { FileStorageBackend } from './lib/Resumability/backends/FileStorageBackend.js';
|
|
23
|
+
export { NetworkError, ResponseError, RobotsTxtError, ConfigurationError, MiddlewareError, FileSystemError, PersistenceError, ContentTypeError, RequestAbortError, AdapterNotInitialisedError, BrowserError, BrowserCleanupError, TimeoutError, ParseError, ValidationError, PageError, StateError, SessionError, CrawlError, QueueError, ConfigError, isSpiderError, isNetworkError, isBrowserError, } from './lib/errors/effect-errors.js';
|
|
24
|
+
export type { SpiderError, AllSpiderErrors } from './lib/errors/effect-errors.js';
|
|
25
|
+
export type { SpiderLogEvent, SpiderLogger, } from './lib/Logging/SpiderLogger.service.js';
|
|
26
|
+
export { SpiderLogger as SpiderLoggerTag, makeSpiderLogger, SpiderLoggerLive, } from './lib/Logging/SpiderLogger.service.js';
|
|
27
|
+
export type { CookieManagerService, EnhancedHttpClientService, HttpRequestOptions, HttpResponse, Session, Credentials, SessionStoreService, TokenInfo, TokenExtractorService, } from './lib/HttpClient/index.js';
|
|
28
|
+
export { CookieManager, makeCookieManager, CookieManagerLive, EnhancedHttpClient, makeEnhancedHttpClient, EnhancedHttpClientLive, SessionStore, makeSessionStore, SessionStoreLive, TokenExtractor, makeTokenExtractor, TokenExtractorLive, } from './lib/HttpClient/index.js';
|
|
29
|
+
export type { Token, StateManagerService } from './lib/StateManager/index.js';
|
|
30
|
+
export { TokenType, StateManager, makeStateManager, StateManagerLive, } from './lib/StateManager/index.js';
|
|
31
|
+
export type { LoginCredentials, ScrapingSession, WebScrapingEngineService, } from './lib/WebScrapingEngine/index.js';
|
|
32
|
+
export { WebScrapingEngine, makeWebScrapingEngine, WebScrapingEngineLive, } from './lib/WebScrapingEngine/index.js';
|
|
33
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,kBAAkB,EAClB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,sBAAsB,CAAC;AAG9B,cAAc,gCAAgC,CAAC;AAC/C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,4BAA4B,CAAC;AAG3C,YAAY,EACV,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,YAAY,EACZ,gBAAgB,GACjB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EAAE,gBAAgB,EAAE,MAAM,kDAAkD,CAAC;AACzF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kDAAkD,CAAC;AAG1F,YAAY,EAAE,gBAAgB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,sBAAsB,EACtB,cAAc,EACd,eAAe,EACf,WAAW,GACZ,MAAM,4CAA4C,CAAC;AAGpD,YAAY,EACV,gBAAgB,EAChB,aAAa,EACb,cAAc,GACf,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,eAAe,GAChB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EACV,mBAAmB,EACnB,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,WAAW,EACX,SAAS,EACT,2BAA2B,GAC5B,MAAM,gCAAgC,CAAC;AAGxC,YAAY,EACV,mBAAmB,EACnB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AACrC,YAAY,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EACL,UAAU,EACV,gBAAgB,IAAI,iBAAiB,EACrC,qBAAqB,GACtB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,GACrB,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mDAAmD,CAAC;AAGvF,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EACjB,0BAA0B,EAC1B,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,UAAU,EACV,eAAe,EACf,SAAS,EACT,UAAU,EACV,YAAY,EACZ,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,cAAc,GACf,MAAM,+BAA+B,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAGlF,YAAY,EACV,cAAc,EACd,YAAY,GACb,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EACL,YAAY,IAAI,eAAe,EAC/B,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,uCAAuC,CAAC;AAG/C,YAAY,EACV,oBAAoB,EACpB,yBAAyB,EACzB,kBAAkB,EAClB,YAAY,EACZ,OAAO,EACP,WAAW,EACX,mBAAmB,EACnB,SAAS,EACT,qBAAqB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,sBAAsB,EACtB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,YAAY,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EACL,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,6BAA6B,CAAC;AAGrC,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,wBAAwB,GACzB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,kCAAkC,CAAC"}
|