@jambudipa/spider 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { Effect, Layer, Option, Chunk, MutableHashSet, Schema, Data, pipe, Context, DateTime, Console, Duration, MutableHashMap, Queue, HashMap, PubSub, MutableRef, Schedule, Stream, Fiber, Random, Struct, Ref } from "effect";
1
+ import { Effect, Layer, Option, Chunk, MutableHashSet, Schema, Data, pipe, Context, DateTime, Console, Duration, MutableHashMap, Queue, HashMap, Ref, HashSet, PubSub, MutableRef, Schedule, Stream, Fiber, Random, Struct } from "effect";
2
2
  import * as cheerio from "cheerio";
3
3
  import * as fs from "fs";
4
4
  import * as path from "path";
@@ -1747,6 +1747,62 @@ const deduplicateUrls = (urls, strategy = DEFAULT_DEDUPLICATION_STRATEGY) => Eff
1747
1747
  }
1748
1748
  };
1749
1749
  });
1750
+ const createUrlDeduplicator = (strategy = DEFAULT_DEDUPLICATION_STRATEGY) => Effect.gen(function* () {
1751
+ const seenUrls = yield* Ref.make(HashSet.empty());
1752
+ const urlStats = yield* Ref.make({
1753
+ processed: 0,
1754
+ unique: 0,
1755
+ duplicates: 0
1756
+ });
1757
+ return {
1758
+ /**
1759
+ * Check if a URL has been seen (after normalization)
1760
+ */
1761
+ hasSeenUrl: (url) => Effect.gen(function* () {
1762
+ const normalized = yield* normalizeUrl(url, strategy);
1763
+ const seen = yield* Ref.get(seenUrls);
1764
+ return HashSet.has(seen, normalized.normalized);
1765
+ }),
1766
+ /**
1767
+ * Add a URL to the seen set
1768
+ */
1769
+ markUrlSeen: (url) => Effect.gen(function* () {
1770
+ const normalized = yield* normalizeUrl(url, strategy);
1771
+ const seen = yield* Ref.get(seenUrls);
1772
+ if (HashSet.has(seen, normalized.normalized)) {
1773
+ yield* Ref.update(urlStats, (stats) => ({
1774
+ ...stats,
1775
+ processed: stats.processed + 1,
1776
+ duplicates: stats.duplicates + 1
1777
+ }));
1778
+ return false;
1779
+ } else {
1780
+ yield* Ref.set(seenUrls, HashSet.add(seen, normalized.normalized));
1781
+ yield* Ref.update(urlStats, (stats) => ({
1782
+ ...stats,
1783
+ processed: stats.processed + 1,
1784
+ unique: stats.unique + 1
1785
+ }));
1786
+ return true;
1787
+ }
1788
+ }),
1789
+ /**
1790
+ * Get deduplication statistics
1791
+ */
1792
+ getStats: () => Ref.get(urlStats),
1793
+ /**
1794
+ * Reset the deduplicator
1795
+ */
1796
+ reset: () => Effect.gen(function* () {
1797
+ yield* Ref.set(seenUrls, HashSet.empty());
1798
+ yield* Ref.set(urlStats, {
1799
+ processed: 0,
1800
+ unique: 0,
1801
+ duplicates: 0
1802
+ });
1803
+ })
1804
+ };
1805
+ });
1750
1806
  const SPIDER_DEFAULTS = Object.freeze({
1751
1807
  /** Threshold in ms after which a worker is considered stale (60s) */
1752
1808
  STALE_WORKER_THRESHOLD_MS: 6e4,
@@ -5080,6 +5136,120 @@ const ResumabilityConfigs = {
5080
5136
  backend: new PostgresStorageBackend(dbClient, config)
5081
5137
  })
5082
5138
  };
5139
+ class FetchError extends Data.TaggedError("FetchError") {
5140
+ get message() {
5141
+ return `Fetch failed for ${this.url}: ${this.reason} after ${this.durationMs}ms`;
5142
+ }
5143
+ }
5144
+ const makeLoggingFetch = Effect.gen(function* () {
5145
+ const logger = yield* SpiderLogger;
5146
+ return (url, options) => Effect.gen(function* () {
5147
+ const startTime = yield* DateTime.now;
5148
+ const startMs = DateTime.toEpochMillis(startTime);
5149
+ const domain = new URL(url).hostname;
5150
+ const optionDetails = Option.fromNullable(options).pipe(
5151
+ Option.map((opts) => ({
5152
+ method: opts.method,
5153
+ headers: Object.keys(opts.headers ?? {})
5154
+ }))
5155
+ );
5156
+ yield* logger.logEvent({
5157
+ type: "edge_case",
5158
+ domain,
5159
+ url,
5160
+ message: "[FETCH_START] Starting fetch request",
5161
+ details: {
5162
+ case: "fetch_start",
5163
+ url,
5164
+ timestamp: DateTime.formatIso(startTime),
5165
+ options: Option.getOrUndefined(optionDetails)
5166
+ }
5167
+ });
5168
+ const fetchEffect = Effect.tryPromise({
5169
+ try: () => globalThis.fetch(url, options),
5170
+ catch: (error) => new FetchError({
5171
+ url,
5172
+ reason: "network",
5173
+ durationMs: 0,
5174
+ // Duration will be calculated in error handler
5175
+ cause: error
5176
+ })
5177
+ });
5178
+ const timeoutDuration = Duration.seconds(30);
5179
+ const fetchWithTimeout = fetchEffect.pipe(
5180
+ Effect.timeoutOption(timeoutDuration),
5181
+ Effect.flatMap(
5182
+ (maybeResponse) => Option.match(maybeResponse, {
5183
+ onNone: () => Effect.gen(function* () {
5184
+ const currentTime = yield* DateTime.now;
5185
+ const durationMs2 = DateTime.toEpochMillis(currentTime) - startMs;
5186
+ yield* logger.logEvent({
5187
+ type: "edge_case",
5188
+ domain,
5189
+ url,
5190
+ message: `[FETCH_ABORT] Aborting fetch after ${durationMs2}ms`,
5191
+ details: {
5192
+ case: "fetch_abort",
5193
+ url,
5194
+ durationMs: durationMs2,
5195
+ reason: "timeout"
5196
+ }
5197
+ });
5198
+ return yield* Effect.fail(
5199
+ new FetchError({
5200
+ url,
5201
+ reason: "timeout",
5202
+ durationMs: Number(durationMs2)
5203
+ })
5204
+ );
5205
+ }),
5206
+ onSome: (response2) => Effect.succeed(response2)
5207
+ })
5208
+ )
5209
+ );
5210
+ const response = yield* fetchWithTimeout.pipe(
5211
+ Effect.catchAll(
5212
+ (error) => Effect.gen(function* () {
5213
+ const currentTime = yield* DateTime.now;
5214
+ const durationMs2 = DateTime.toEpochMillis(currentTime) - startMs;
5215
+ yield* logger.logEvent({
5216
+ type: "edge_case",
5217
+ domain,
5218
+ url,
5219
+ message: `[FETCH_ERROR] Failed after ${durationMs2}ms`,
5220
+ details: {
5221
+ case: "fetch_failed",
5222
+ url,
5223
+ durationMs: durationMs2,
5224
+ error: error._tag,
5225
+ message: error.message,
5226
+ isAborted: error.reason === "timeout"
5227
+ }
5228
+ });
5229
+ return yield* Effect.fail(error);
5230
+ })
5231
+ )
5232
+ );
5233
+ const endTime = yield* DateTime.now;
5234
+ const durationMs = DateTime.toEpochMillis(endTime) - startMs;
5235
+ yield* logger.logEvent({
5236
+ type: "edge_case",
5237
+ domain,
5238
+ url,
5239
+ message: `[FETCH_SUCCESS] Got response in ${durationMs}ms`,
5240
+ details: {
5241
+ case: "fetch_success",
5242
+ url,
5243
+ durationMs,
5244
+ status: response.status,
5245
+ statusText: response.statusText,
5246
+ contentType: response.headers.get("content-type")
5247
+ }
5248
+ });
5249
+ return response;
5250
+ });
5251
+ });
5252
+ const LoggingFetch = Context.GenericTag("LoggingFetch");
5083
5253
  class JsonParseError extends Data.TaggedError("JsonParseError") {
5084
5254
  get message() {
5085
5255
  const preview = this.input.length > 100 ? `${this.input.substring(0, 100)}...` : this.input;
@@ -6527,6 +6697,251 @@ const TokenExtractorLive = Layer.effect(
6527
6697
  TokenExtractor,
6528
6698
  makeTokenExtractor
6529
6699
  );
6700
+ class BrowserEngineService extends Effect.Service()(
6701
+ "@jambudipa.io/BrowserEngine",
6702
+ {
6703
+ effect: Effect.gen(function* () {
6704
+ const browserRef = yield* Ref.make(Option.none());
6705
+ const contextRef = yield* Ref.make(Option.none());
6706
+ const pageRef = yield* Ref.make(Option.none());
6707
+ const configRef = yield* Ref.make({
6708
+ headless: true,
6709
+ timeout: 3e4,
6710
+ viewport: { width: 1920, height: 1080 },
6711
+ userAgent: "Mozilla/5.0 (compatible; Spider/1.0)",
6712
+ locale: "en-GB"
6713
+ });
6714
+ const ensureBrowser = () => Effect.gen(function* () {
6715
+ const browserOpt = yield* Ref.get(browserRef);
6716
+ if (Option.isSome(browserOpt)) {
6717
+ return browserOpt.value;
6718
+ }
6719
+ const { chromium } = yield* Effect.tryPromise({
6720
+ try: () => import("playwright"),
6721
+ catch: () => BrowserError.launchFailed("Playwright not installed")
6722
+ });
6723
+ const config = yield* Ref.get(configRef);
6724
+ const browser = yield* Effect.tryPromise({
6725
+ try: () => chromium.launch({
6726
+ headless: config.headless,
6727
+ timeout: config.timeout
6728
+ }),
6729
+ catch: (error) => BrowserError.launchFailed(error)
6730
+ });
6731
+ yield* Ref.set(browserRef, Option.some(browser));
6732
+ return browser;
6733
+ });
6734
+ const ensureContext = () => Effect.gen(function* () {
6735
+ const contextOpt = yield* Ref.get(contextRef);
6736
+ if (Option.isSome(contextOpt)) {
6737
+ return contextOpt.value;
6738
+ }
6739
+ const browser = yield* ensureBrowser();
6740
+ const config = yield* Ref.get(configRef);
6741
+ const context = yield* Effect.tryPromise({
6742
+ try: () => browser.newContext({
6743
+ viewport: config.viewport,
6744
+ userAgent: config.userAgent,
6745
+ locale: config.locale
6746
+ }),
6747
+ catch: (error) => new BrowserError({
6748
+ operation: "newContext",
6749
+ cause: error
6750
+ })
6751
+ });
6752
+ yield* Ref.set(contextRef, Option.some(context));
6753
+ return context;
6754
+ });
6755
+ const getCurrentPage = () => Effect.gen(function* () {
6756
+ const pageOpt = yield* Ref.get(pageRef);
6757
+ return yield* Option.match(pageOpt, {
6758
+ onNone: () => Effect.fail(new PageError({
6759
+ url: "unknown",
6760
+ operation: "getCurrentPage",
6761
+ cause: "No active page"
6762
+ })),
6763
+ onSome: (page) => Effect.succeed(page)
6764
+ });
6765
+ });
6766
+ return {
6767
+ launch: () => Effect.gen(function* () {
6768
+ yield* ensureBrowser();
6769
+ yield* Effect.log("Browser launched successfully");
6770
+ }),
6771
+ createPage: () => Effect.gen(function* () {
6772
+ const context = yield* ensureContext();
6773
+ const page = yield* Effect.tryPromise({
6774
+ try: () => context.newPage(),
6775
+ catch: (error) => new BrowserError({
6776
+ operation: "newPage",
6777
+ cause: error
6778
+ })
6779
+ });
6780
+ yield* Ref.set(pageRef, Option.some(page));
6781
+ yield* Effect.log("New page created");
6782
+ return page;
6783
+ }),
6784
+ navigateTo: (url) => Effect.gen(function* () {
6785
+ const page = yield* getCurrentPage();
6786
+ yield* Effect.tryPromise({
6787
+ try: () => page.goto(url, { waitUntil: "networkidle" }),
6788
+ catch: (error) => new PageError({
6789
+ url,
6790
+ operation: "navigate",
6791
+ cause: error
6792
+ })
6793
+ });
6794
+ yield* Effect.logDebug(`Navigated to ${url}`);
6795
+ }),
6796
+ waitForSelector: (selector, timeout) => Effect.gen(function* () {
6797
+ const page = yield* getCurrentPage();
6798
+ const config = yield* Ref.get(configRef);
6799
+ yield* Effect.tryPromise({
6800
+ try: () => page.waitForSelector(selector, {
6801
+ timeout: timeout ?? config.timeout
6802
+ }),
6803
+ catch: (error) => new PageError({
6804
+ url: page.url(),
6805
+ operation: "waitForSelector",
6806
+ selector,
6807
+ cause: error
6808
+ })
6809
+ });
6810
+ }),
6811
+ click: (selector) => Effect.gen(function* () {
6812
+ const page = yield* getCurrentPage();
6813
+ yield* Effect.tryPromise({
6814
+ try: () => page.click(selector),
6815
+ catch: (error) => new PageError({
6816
+ url: page.url(),
6817
+ operation: "click",
6818
+ selector,
6819
+ cause: error
6820
+ })
6821
+ });
6822
+ yield* Effect.logDebug(`Clicked element: ${selector}`);
6823
+ }),
6824
+ fill: (selector, value) => Effect.gen(function* () {
6825
+ const page = yield* getCurrentPage();
6826
+ yield* Effect.tryPromise({
6827
+ try: () => page.fill(selector, value),
6828
+ catch: (error) => new PageError({
6829
+ url: page.url(),
6830
+ operation: "fill",
6831
+ selector,
6832
+ cause: error
6833
+ })
6834
+ });
6835
+ yield* Effect.logDebug(`Filled ${selector} with value`);
6836
+ }),
6837
+ scroll: (distance) => Effect.gen(function* () {
6838
+ const page = yield* getCurrentPage();
6839
+ yield* Effect.ignore(
6840
+ Effect.tryPromise({
6841
+ try: () => page.evaluate((d) => {
6842
+ window.scrollBy(0, d);
6843
+ }, distance),
6844
+ catch: (error) => error
6845
+ })
6846
+ );
6847
+ yield* Effect.logDebug(`Scrolled ${distance}px`);
6848
+ }),
6849
+ evaluate: (script) => Effect.gen(function* () {
6850
+ const page = yield* getCurrentPage();
6851
+ return yield* Effect.tryPromise({
6852
+ try: () => page.evaluate(script),
6853
+ catch: (error) => new PageError({
6854
+ url: page.url(),
6855
+ operation: "evaluate",
6856
+ cause: error
6857
+ })
6858
+ });
6859
+ }),
6860
+ getHTML: () => Effect.gen(function* () {
6861
+ const page = yield* getCurrentPage();
6862
+ return yield* Effect.tryPromise({
6863
+ try: () => page.content(),
6864
+ catch: (error) => new PageError({
6865
+ url: page.url(),
6866
+ operation: "getHTML",
6867
+ cause: error
6868
+ })
6869
+ });
6870
+ }),
6871
+ screenshot: (path2) => Effect.gen(function* () {
6872
+ const page = yield* getCurrentPage();
6873
+ const buffer = yield* Effect.tryPromise({
6874
+ try: () => page.screenshot({ path: path2, fullPage: true }),
6875
+ catch: (error) => new PageError({
6876
+ url: page.url(),
6877
+ operation: "screenshot",
6878
+ cause: error
6879
+ })
6880
+ });
6881
+ yield* Effect.log(`Screenshot taken${path2 ? ` and saved to ${path2}` : ""}`);
6882
+ return buffer;
6883
+ }),
6884
+ closePage: () => Effect.gen(function* () {
6885
+ const pageOpt = yield* Ref.get(pageRef);
6886
+ if (Option.isSome(pageOpt)) {
6887
+ yield* Effect.ignore(
6888
+ Effect.tryPromise({
6889
+ try: () => pageOpt.value.close(),
6890
+ catch: (error) => error
6891
+ })
6892
+ );
6893
+ yield* Ref.set(pageRef, Option.none());
6894
+ yield* Effect.log("Page closed");
6895
+ }
6896
+ }),
6897
+ close: () => Effect.gen(function* () {
6898
+ const pageOpt = yield* Ref.get(pageRef);
6899
+ if (Option.isSome(pageOpt)) {
6900
+ yield* Effect.ignore(
6901
+ Effect.tryPromise({
6902
+ try: () => pageOpt.value.close(),
6903
+ catch: (error) => error
6904
+ })
6905
+ );
6906
+ }
6907
+ const contextOpt = yield* Ref.get(contextRef);
6908
+ if (Option.isSome(contextOpt)) {
6909
+ yield* Effect.ignore(
6910
+ Effect.tryPromise({
6911
+ try: () => contextOpt.value.close(),
6912
+ catch: (error) => error
6913
+ })
6914
+ );
6915
+ }
6916
+ const browserOpt = yield* Ref.get(browserRef);
6917
+ if (Option.isSome(browserOpt)) {
6918
+ yield* Effect.ignore(
6919
+ Effect.tryPromise({
6920
+ try: () => browserOpt.value.close(),
6921
+ catch: (error) => error
6922
+ })
6923
+ );
6924
+ }
6925
+ yield* Ref.set(pageRef, Option.none());
6926
+ yield* Ref.set(contextRef, Option.none());
6927
+ yield* Ref.set(browserRef, Option.none());
6928
+ yield* Effect.log("Browser engine closed");
6929
+ })
6930
+ };
6931
+ })
6932
+ }
6933
+ ) {
6934
+ }
6935
+ const BrowserEngineLive = BrowserEngineService.Default;
6936
+ const BrowserEngineWithConfig = (_config) => BrowserEngineService.Default;
6937
+ const withBrowser = (operation) => Effect.gen(function* () {
6938
+ const engine = yield* BrowserEngineService;
6939
+ return yield* Effect.acquireUseRelease(
6940
+ Effect.succeed(engine),
6941
+ operation,
6942
+ (engine2) => engine2.close()
6943
+ );
6944
+ });
6530
6945
  class LoginError extends Data.TaggedError("LoginError") {
6531
6946
  }
6532
6947
  class SessionNotValidError extends Data.TaggedError("SessionNotValidError") {
@@ -6735,20 +7150,153 @@ const WebScrapingEngineLive = Layer.effect(
6735
7150
  WebScrapingEngine,
6736
7151
  makeWebScrapingEngine
6737
7152
  );
7153
+ class WorkerHealthMonitor extends Effect.Service()(
7154
+ "@jambudipa.io/WorkerHealthMonitor",
7155
+ {
7156
+ effect: Effect.gen(function* () {
7157
+ const logger = yield* SpiderLogger;
7158
+ const workers = yield* Ref.make(HashMap.empty());
7159
+ const stuckThresholdMs = 6e4;
7160
+ return {
7161
+ /**
7162
+ * Register a worker's activity
7163
+ */
7164
+ recordActivity: (workerId, domain, activity) => Effect.gen(function* () {
7165
+ const now = DateTime.unsafeNow();
7166
+ yield* Ref.update(workers, (map) => {
7167
+ const current = HashMap.get(map, workerId).pipe(
7168
+ (opt) => opt._tag === "Some" ? opt.value : {
7169
+ workerId,
7170
+ domain,
7171
+ lastActivity: now
7172
+ }
7173
+ );
7174
+ const updated = {
7175
+ ...current,
7176
+ domain,
7177
+ lastActivity: now,
7178
+ currentUrl: activity.url ?? current.currentUrl,
7179
+ fetchStartTime: activity.fetchStart ? now : current.fetchStartTime
7180
+ };
7181
+ return HashMap.set(map, workerId, updated);
7182
+ });
7183
+ }),
7184
+ /**
7185
+ * Remove a worker from monitoring
7186
+ */
7187
+ removeWorker: (workerId) => Ref.update(workers, (map) => HashMap.remove(map, workerId)),
7188
+ /**
7189
+ * Get stuck workers
7190
+ */
7191
+ getStuckWorkers: Effect.gen(function* () {
7192
+ const now = DateTime.unsafeNow();
7193
+ const workerMap = yield* Ref.get(workers);
7194
+ const stuck = [];
7195
+ for (const [, status] of workerMap) {
7196
+ const inactiveMs = DateTime.toEpochMillis(now) - DateTime.toEpochMillis(status.lastActivity);
7197
+ if (inactiveMs > stuckThresholdMs) {
7198
+ stuck.push(status);
7199
+ }
7200
+ }
7201
+ return stuck;
7202
+ }),
7203
+ /**
7204
+ * Monitor workers and log stuck ones
7205
+ */
7206
+ startMonitoring: Effect.gen(function* () {
7207
+ const self = {
7208
+ getStuckWorkers: Effect.gen(function* () {
7209
+ const now = DateTime.unsafeNow();
7210
+ const workerMap = yield* Ref.get(workers);
7211
+ const stuck = [];
7212
+ for (const [, status] of workerMap) {
7213
+ const inactiveMs = DateTime.toEpochMillis(now) - DateTime.toEpochMillis(status.lastActivity);
7214
+ if (inactiveMs > stuckThresholdMs) {
7215
+ stuck.push(status);
7216
+ }
7217
+ }
7218
+ return stuck;
7219
+ })
7220
+ };
7221
+ yield* Effect.repeat(
7222
+ Effect.gen(function* () {
7223
+ const stuck = yield* self.getStuckWorkers;
7224
+ if (stuck.length > 0) {
7225
+ for (const worker of stuck) {
7226
+ const nowMillis = DateTime.toEpochMillis(DateTime.unsafeNow());
7227
+ const inactiveMs = nowMillis - DateTime.toEpochMillis(worker.lastActivity);
7228
+ yield* logger.logEdgeCase(
7229
+ worker.domain,
7230
+ "worker_stuck_detected",
7231
+ {
7232
+ workerId: worker.workerId,
7233
+ currentUrl: worker.currentUrl,
7234
+ lastActivity: DateTime.formatIso(worker.lastActivity),
7235
+ inactiveMs,
7236
+ fetchStartTime: Option.fromNullable(worker.fetchStartTime).pipe(
7237
+ Option.map(DateTime.formatIso),
7238
+ Option.getOrElse(() => "N/A")
7239
+ )
7240
+ }
7241
+ );
7242
+ }
7243
+ }
7244
+ }),
7245
+ Schedule.fixed(Duration.seconds(30))
7246
+ );
7247
+ })
7248
+ };
7249
+ })
7250
+ }
7251
+ ) {
7252
+ }
7253
+ const safeJsonParse = (data, onError) => Schema.decodeUnknown(Schema.parseJson(Schema.Unknown))(data).pipe(
7254
+ Effect.mapError(onError)
7255
+ );
7256
+ const toOption = (value, logContext) => {
7257
+ const result = Option.fromNullable(value);
7258
+ if (logContext && Option.isNone(result)) {
7259
+ return Effect.logDebug(`[Migration] Null value encountered: ${logContext}`).pipe(
7260
+ Effect.map(() => result)
7261
+ );
7262
+ }
7263
+ return Effect.succeed(result);
7264
+ };
7265
+ const fromPromise = (promise, onError) => Effect.tryPromise({
7266
+ try: promise,
7267
+ catch: onError
7268
+ });
7269
+ const cleanupResources = (resources) => Effect.all(
7270
+ resources.map(
7271
+ ({ id, cleanup, onError }) => Effect.tryPromise({
7272
+ try: cleanup,
7273
+ catch: (error) => onError(id, error)
7274
+ })
7275
+ ),
7276
+ { mode: "either" }
7277
+ );
7278
+ const matchOption = (option, onNone, onSome) => Option.match(option, { onNone, onSome });
6738
7279
  export {
7280
+ APITokenNotFoundError,
6739
7281
  AdapterNotInitialisedError,
6740
7282
  BrowserCleanupError,
7283
+ BrowserEngineLive,
7284
+ BrowserEngineService,
7285
+ BrowserEngineWithConfig,
6741
7286
  BrowserError,
7287
+ CSRFTokenNotFoundError,
6742
7288
  ConfigError,
6743
7289
  ConfigurationError,
6744
7290
  ContentTypeError,
6745
7291
  CookieManager,
6746
7292
  CookieManagerLive,
6747
7293
  CrawlError,
7294
+ DEFAULT_DEDUPLICATION_STRATEGY,
6748
7295
  DEFAULT_HYBRID_CONFIG,
6749
7296
  DeltaPersistence,
6750
7297
  EnhancedHttpClient,
6751
7298
  EnhancedHttpClientLive,
7299
+ FetchError,
6752
7300
  FileStorageBackend,
6753
7301
  FileSystemError,
6754
7302
  FullStatePersistence,
@@ -6756,7 +7304,9 @@ export {
6756
7304
  LinkExtractionError,
6757
7305
  LinkExtractorService,
6758
7306
  LinkExtractorServiceLayer,
7307
+ LoggingFetch,
6759
7308
  LoggingMiddleware,
7309
+ LoginError,
6760
7310
  MiddlewareError,
6761
7311
  MiddlewareManager,
6762
7312
  NetworkError,
@@ -6764,6 +7314,7 @@ export {
6764
7314
  PageError,
6765
7315
  ParseError,
6766
7316
  PersistenceError$1 as PersistenceError,
7317
+ PostgresStorageBackend,
6767
7318
  PriorityRequest,
6768
7319
  QueueError,
6769
7320
  RateLimitMiddleware,
@@ -6774,8 +7325,11 @@ export {
6774
7325
  ResumabilityService,
6775
7326
  RobotsService,
6776
7327
  RobotsTxtError,
7328
+ SPIDER_DEFAULTS,
6777
7329
  ScraperService,
6778
7330
  SessionError$1 as SessionError,
7331
+ SessionLoadError,
7332
+ SessionNotValidError,
6779
7333
  SessionStore,
6780
7334
  SessionStoreLive,
6781
7335
  SpiderConfig,
@@ -6790,26 +7344,41 @@ export {
6790
7344
  StateManager,
6791
7345
  StateManagerLive,
6792
7346
  StatsMiddleware,
7347
+ StorageKeyNotFoundError,
6793
7348
  TimeoutError,
7349
+ TokenExpiredError,
6794
7350
  TokenExtractor,
6795
7351
  TokenExtractorLive,
7352
+ TokenNotFoundError,
6796
7353
  TokenType,
6797
7354
  UrlDeduplicatorService,
6798
7355
  UserAgentMiddleware,
6799
7356
  ValidationError,
6800
7357
  WebScrapingEngine,
6801
7358
  WebScrapingEngineLive,
7359
+ WorkerHealthMonitor,
7360
+ cleanupResources,
6802
7361
  createStateOperation,
7362
+ createUrlDeduplicator,
7363
+ deduplicateUrls,
7364
+ fromPromise,
6803
7365
  isBrowserError,
6804
7366
  isNetworkError,
6805
7367
  isSpiderError,
6806
7368
  makeCookieManager,
6807
7369
  makeEnhancedHttpClient,
7370
+ makeLoggingFetch,
6808
7371
  makeSessionStore,
6809
7372
  makeSpiderConfig,
6810
7373
  makeSpiderLogger,
6811
7374
  makeStateManager,
6812
7375
  makeTokenExtractor,
6813
- makeWebScrapingEngine
7376
+ makeWebScrapingEngine,
7377
+ matchOption,
7378
+ normalizeUrl,
7379
+ parseUrl,
7380
+ safeJsonParse,
7381
+ toOption,
7382
+ withBrowser
6814
7383
  };
6815
7384
  //# sourceMappingURL=index.js.map