@jambudipa/spider 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +289 -61
- package/dist/index.js.map +1 -1
- package/dist/lib/utils/url-deduplication.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,8 @@ export type { CookieManagerService, EnhancedHttpClientService, HttpRequestOption
|
|
|
28
28
|
export { CookieManager, makeCookieManager, CookieManagerLive, EnhancedHttpClient, makeEnhancedHttpClient, EnhancedHttpClientLive, SessionStore, makeSessionStore, SessionStoreLive, TokenExtractor, makeTokenExtractor, TokenExtractorLive, } from './lib/HttpClient/index.js';
|
|
29
29
|
export type { Token, StateManagerService } from './lib/StateManager/index.js';
|
|
30
30
|
export { TokenType, StateManager, makeStateManager, StateManagerLive, } from './lib/StateManager/index.js';
|
|
31
|
+
export type { BrowserEngineConfig, BrowserEngineServiceInterface, PageElement, } from './lib/BrowserEngine/BrowserEngine.service.js';
|
|
32
|
+
export { BrowserEngineService, BrowserEngineLive, BrowserEngineWithConfig, withBrowser, } from './lib/BrowserEngine/BrowserEngine.service.js';
|
|
31
33
|
export type { LoginCredentials, ScrapingSession, WebScrapingEngineService, } from './lib/WebScrapingEngine/index.js';
|
|
32
34
|
export { WebScrapingEngine, makeWebScrapingEngine, WebScrapingEngineLive, } from './lib/WebScrapingEngine/index.js';
|
|
33
35
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,kBAAkB,EAClB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,sBAAsB,CAAC;AAG9B,cAAc,gCAAgC,CAAC;AAC/C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,4BAA4B,CAAC;AAG3C,YAAY,EACV,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,YAAY,EACZ,gBAAgB,GACjB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EAAE,gBAAgB,EAAE,MAAM,kDAAkD,CAAC;AACzF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kDAAkD,CAAC;AAG1F,YAAY,EAAE,gBAAgB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,sBAAsB,EACtB,cAAc,EACd,eAAe,EACf,WAAW,GACZ,MAAM,4CAA4C,CAAC;AAGpD,YAAY,EACV,gBAAgB,EAChB,aAAa,EACb,cAAc,GACf,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,eAAe,GAChB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EACV,mBAAmB,EACnB,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,WAAW,EACX,SAAS,EACT,2BAA2B,GAC5B,MAAM,gCAAgC,CAAC;AAGxC,YAAY,EACV,mBAAmB,EACnB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AACrC,YAAY,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EACL,UAAU,EACV,gBAAgB,IAAI,iBAAiB,EACrC,qBAAqB,GACtB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,GACrB,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mDAAmD,CAAC;AAGvF,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EACjB,0BAA0B,EAC1B,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,UAAU,EACV,eAAe,EACf,SAAS,EACT,UAAU,EACV,YAAY,EACZ,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,cAAc,GACf,MAAM,+BAA+B,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAGlF,YAAY,EACV,cAAc,EACd,YAAY,GACb,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EACL,YAAY,IAAI,eAAe,EAC/B,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,uCAAuC,CAAC;AAG/C,YAAY,EACV,oBAAoB,EACpB,yBAAyB,EACzB,kBAAkB,EAClB,YAAY,EACZ,OAAO,EACP,WAAW,EACX,mBAAmB,EACnB,SAAS,EACT,qBAAqB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,sBAAsB,EACtB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,YAAY,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EACL,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,6BAA6B,CAAC;AAGrC,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,wBAAwB,GACzB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,kCAAkC,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,kBAAkB,EAClB,oBAAoB,EACpB,kBAAkB,EAClB,oBAAoB,EACpB,gBAAgB,GACjB,MAAM,sBAAsB,CAAC;AAG9B,cAAc,gCAAgC,CAAC;AAC/C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,4BAA4B,CAAC;AAG3C,YAAY,EACV,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,YAAY,EACZ,gBAAgB,GACjB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EAAE,gBAAgB,EAAE,MAAM,kDAAkD,CAAC;AACzF,OAAO,EAAE,sBAAsB,EAAE,MAAM,kDAAkD,CAAC;AAG1F,YAAY,EAAE,gBAAgB,EAAE,MAAM,4CAA4C,CAAC;AACnF,OAAO,EACL,sBAAsB,EACtB,cAAc,EACd,eAAe,EACf,WAAW,GACZ,MAAM,4CAA4C,CAAC;AAGpD,YAAY,EACV,gBAAgB,EAChB,aAAa,EACb,cAAc,GACf,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,iBAAiB,EACjB,mBAAmB,EACnB,eAAe,GAChB,MAAM,sCAAsC,CAAC;AAG9C,YAAY,EACV,mBAAmB,EACnB,oBAAoB,EACpB,6BAA6B,GAC9B,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,WAAW,EACX,SAAS,EACT,2BAA2B,GAC5B,MAAM,gCAAgC,CAAC;AAGxC,YAAY,EACV,mBAAmB,EACnB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AACrC,YAAY,EAAE,kBAAkB,EAAE,MAAM,4CAA4C,CAAC;AACrF,OAAO,EACL,UAAU,EACV,gBAAgB,IAAI,iBAAiB,EACrC,qBAAqB,GACtB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,mBAAmB,EACnB,mBAAmB,EACnB,oBAAoB,GACrB,MAAM,4CAA4C,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EAAE,kBAAkB,EAAE,MAAM,mDAAmD,CAAC;AAGvF,OAAO,EACL,YAAY,EACZ,aAAa,EACb,cAAc,EACd,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EACjB,0BAA0B,EAC1B,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,UAAU,EACV,eAAe,EACf,SAAS,EACT,UAAU,EACV,YAAY,EACZ,UAAU,EACV,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,cAAc,GACf,MAAM,+BAA+B,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAGlF,YAAY,EACV,cAAc,EACd,YAAY,GACb,MAAM,uCAAuC,CAAC;AAC/C,OAAO,EACL,YAAY,IAAI,eAAe,EAC/B,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,uCAAuC,CAAC;AAG/C,YAAY,EACV,oBAAoB,EACpB,yBAAyB,EACzB,kBAAkB,EAClB,YAAY,EACZ,OAAO,EACP,WAAW,EACX,mBAAmB,EACnB,SAAS,EACT,qBAAqB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,sBAAsB,EACtB,sBAAsB,EACtB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,2BAA2B,CAAC;AAGnC,YAAY,EAAE,KAAK,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,EACL,SAAS,EACT,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,6BAA6B,CAAC;AAGrC,YAAY,EACV,mBAAmB,EACnB,6BAA6B,EAC7B,WAAW,GACZ,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,uBAAuB,EACvB,WAAW,GACZ,MAAM,8CAA8C,CAAC;AAGtD,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,wBAAwB,GACzB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,GACtB,MAAM,kCAAkC,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Effect, Layer, Option, Chunk, MutableHashSet, Schema, Data, pipe, Context, DateTime, Console, Duration, MutableHashMap, Queue,
|
|
1
|
+
import { Effect, Layer, Option, Chunk, MutableHashSet, Schema, Data, pipe, Context, DateTime, Console, Duration, MutableHashMap, Queue, HashMap, PubSub, MutableRef, Schedule, Stream, Fiber, Random, Struct, Ref } from "effect";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import * as fs from "fs";
|
|
4
4
|
import * as path from "path";
|
|
@@ -1699,71 +1699,50 @@ const normalizeUrl = (url, strategy = DEFAULT_DEDUPLICATION_STRATEGY) => Effect.
|
|
|
1699
1699
|
};
|
|
1700
1700
|
});
|
|
1701
1701
|
const deduplicateUrls = (urls, strategy = DEFAULT_DEDUPLICATION_STRATEGY) => Effect.gen(function* () {
|
|
1702
|
-
|
|
1703
|
-
const skipped =
|
|
1702
|
+
let domainMap = HashMap.empty();
|
|
1703
|
+
const skipped = [];
|
|
1704
1704
|
let invalidCount = 0;
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
}
|
|
1740
|
-
}
|
|
1741
|
-
})
|
|
1742
|
-
),
|
|
1743
|
-
Effect.catchAll(
|
|
1744
|
-
(error) => Effect.gen(function* () {
|
|
1745
|
-
invalidCount++;
|
|
1746
|
-
yield* Ref.update(skipped, (arr) => [
|
|
1747
|
-
...arr,
|
|
1748
|
-
{ url: urlObj.url, reason: `Invalid URL: ${error.message}` }
|
|
1749
|
-
]);
|
|
1750
|
-
yield* Effect.logWarning(`Invalid URL skipped: ${urlObj.url}`);
|
|
1751
|
-
})
|
|
1752
|
-
)
|
|
1753
|
-
)
|
|
1754
|
-
),
|
|
1755
|
-
{ concurrency: "unbounded" }
|
|
1756
|
-
);
|
|
1757
|
-
const finalMap = yield* Ref.get(domainMap);
|
|
1758
|
-
const finalSkipped = yield* Ref.get(skipped);
|
|
1759
|
-
const deduplicated = Array.from(HashMap.values(finalMap));
|
|
1705
|
+
for (const urlObj of urls) {
|
|
1706
|
+
const normalizeResult = yield* Effect.either(normalizeUrl(urlObj.url, strategy));
|
|
1707
|
+
if (normalizeResult._tag === "Left") {
|
|
1708
|
+
invalidCount++;
|
|
1709
|
+
skipped.push({ url: urlObj.url, reason: `Invalid URL: ${normalizeResult.left.message}` });
|
|
1710
|
+
yield* Effect.logWarning(`Invalid URL skipped: ${urlObj.url}`);
|
|
1711
|
+
continue;
|
|
1712
|
+
}
|
|
1713
|
+
const normalized = normalizeResult.right;
|
|
1714
|
+
const key = strategy.wwwHandling === "preserve" ? normalized.normalized : normalized.domain;
|
|
1715
|
+
const existingOption = HashMap.get(domainMap, key);
|
|
1716
|
+
if (Option.isNone(existingOption)) {
|
|
1717
|
+
domainMap = HashMap.set(domainMap, key, urlObj);
|
|
1718
|
+
} else {
|
|
1719
|
+
const existing = existingOption.value;
|
|
1720
|
+
let shouldReplace = false;
|
|
1721
|
+
if (strategy.wwwHandling === "prefer-www") {
|
|
1722
|
+
const existingHasWww = existing.url.includes("://www.");
|
|
1723
|
+
const newHasWww = urlObj.url.includes("://www.");
|
|
1724
|
+
shouldReplace = !existingHasWww && newHasWww;
|
|
1725
|
+
} else if (strategy.wwwHandling === "prefer-non-www") {
|
|
1726
|
+
const existingHasWww = existing.url.includes("://www.");
|
|
1727
|
+
const newHasWww = urlObj.url.includes("://www.");
|
|
1728
|
+
shouldReplace = existingHasWww && !newHasWww;
|
|
1729
|
+
}
|
|
1730
|
+
if (shouldReplace) {
|
|
1731
|
+
domainMap = HashMap.set(domainMap, key, urlObj);
|
|
1732
|
+
skipped.push({ url: existing.url, reason: `Replaced by preferred variant: ${urlObj.url}` });
|
|
1733
|
+
} else {
|
|
1734
|
+
skipped.push({ url: urlObj.url, reason: `Duplicate of: ${existing.url}` });
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
const deduplicated = Array.from(HashMap.values(domainMap));
|
|
1760
1739
|
return {
|
|
1761
1740
|
deduplicated,
|
|
1762
|
-
skipped
|
|
1741
|
+
skipped,
|
|
1763
1742
|
stats: {
|
|
1764
1743
|
total: urls.length,
|
|
1765
1744
|
unique: deduplicated.length,
|
|
1766
|
-
duplicates:
|
|
1745
|
+
duplicates: skipped.filter((s) => s.reason.startsWith("Duplicate")).length,
|
|
1767
1746
|
invalid: invalidCount
|
|
1768
1747
|
}
|
|
1769
1748
|
};
|
|
@@ -6548,6 +6527,251 @@ const TokenExtractorLive = Layer.effect(
|
|
|
6548
6527
|
TokenExtractor,
|
|
6549
6528
|
makeTokenExtractor
|
|
6550
6529
|
);
|
|
6530
|
+
class BrowserEngineService extends Effect.Service()(
|
|
6531
|
+
"@jambudipa.io/BrowserEngine",
|
|
6532
|
+
{
|
|
6533
|
+
effect: Effect.gen(function* () {
|
|
6534
|
+
const browserRef = yield* Ref.make(Option.none());
|
|
6535
|
+
const contextRef = yield* Ref.make(Option.none());
|
|
6536
|
+
const pageRef = yield* Ref.make(Option.none());
|
|
6537
|
+
const configRef = yield* Ref.make({
|
|
6538
|
+
headless: true,
|
|
6539
|
+
timeout: 3e4,
|
|
6540
|
+
viewport: { width: 1920, height: 1080 },
|
|
6541
|
+
userAgent: "Mozilla/5.0 (compatible; Spider/1.0)",
|
|
6542
|
+
locale: "en-GB"
|
|
6543
|
+
});
|
|
6544
|
+
const ensureBrowser = () => Effect.gen(function* () {
|
|
6545
|
+
const browserOpt = yield* Ref.get(browserRef);
|
|
6546
|
+
if (Option.isSome(browserOpt)) {
|
|
6547
|
+
return browserOpt.value;
|
|
6548
|
+
}
|
|
6549
|
+
const { chromium } = yield* Effect.tryPromise({
|
|
6550
|
+
try: () => import("playwright"),
|
|
6551
|
+
catch: () => BrowserError.launchFailed("Playwright not installed")
|
|
6552
|
+
});
|
|
6553
|
+
const config = yield* Ref.get(configRef);
|
|
6554
|
+
const browser = yield* Effect.tryPromise({
|
|
6555
|
+
try: () => chromium.launch({
|
|
6556
|
+
headless: config.headless,
|
|
6557
|
+
timeout: config.timeout
|
|
6558
|
+
}),
|
|
6559
|
+
catch: (error) => BrowserError.launchFailed(error)
|
|
6560
|
+
});
|
|
6561
|
+
yield* Ref.set(browserRef, Option.some(browser));
|
|
6562
|
+
return browser;
|
|
6563
|
+
});
|
|
6564
|
+
const ensureContext = () => Effect.gen(function* () {
|
|
6565
|
+
const contextOpt = yield* Ref.get(contextRef);
|
|
6566
|
+
if (Option.isSome(contextOpt)) {
|
|
6567
|
+
return contextOpt.value;
|
|
6568
|
+
}
|
|
6569
|
+
const browser = yield* ensureBrowser();
|
|
6570
|
+
const config = yield* Ref.get(configRef);
|
|
6571
|
+
const context = yield* Effect.tryPromise({
|
|
6572
|
+
try: () => browser.newContext({
|
|
6573
|
+
viewport: config.viewport,
|
|
6574
|
+
userAgent: config.userAgent,
|
|
6575
|
+
locale: config.locale
|
|
6576
|
+
}),
|
|
6577
|
+
catch: (error) => new BrowserError({
|
|
6578
|
+
operation: "newContext",
|
|
6579
|
+
cause: error
|
|
6580
|
+
})
|
|
6581
|
+
});
|
|
6582
|
+
yield* Ref.set(contextRef, Option.some(context));
|
|
6583
|
+
return context;
|
|
6584
|
+
});
|
|
6585
|
+
const getCurrentPage = () => Effect.gen(function* () {
|
|
6586
|
+
const pageOpt = yield* Ref.get(pageRef);
|
|
6587
|
+
return yield* Option.match(pageOpt, {
|
|
6588
|
+
onNone: () => Effect.fail(new PageError({
|
|
6589
|
+
url: "unknown",
|
|
6590
|
+
operation: "getCurrentPage",
|
|
6591
|
+
cause: "No active page"
|
|
6592
|
+
})),
|
|
6593
|
+
onSome: (page) => Effect.succeed(page)
|
|
6594
|
+
});
|
|
6595
|
+
});
|
|
6596
|
+
return {
|
|
6597
|
+
launch: () => Effect.gen(function* () {
|
|
6598
|
+
yield* ensureBrowser();
|
|
6599
|
+
yield* Effect.log("Browser launched successfully");
|
|
6600
|
+
}),
|
|
6601
|
+
createPage: () => Effect.gen(function* () {
|
|
6602
|
+
const context = yield* ensureContext();
|
|
6603
|
+
const page = yield* Effect.tryPromise({
|
|
6604
|
+
try: () => context.newPage(),
|
|
6605
|
+
catch: (error) => new BrowserError({
|
|
6606
|
+
operation: "newPage",
|
|
6607
|
+
cause: error
|
|
6608
|
+
})
|
|
6609
|
+
});
|
|
6610
|
+
yield* Ref.set(pageRef, Option.some(page));
|
|
6611
|
+
yield* Effect.log("New page created");
|
|
6612
|
+
return page;
|
|
6613
|
+
}),
|
|
6614
|
+
navigateTo: (url) => Effect.gen(function* () {
|
|
6615
|
+
const page = yield* getCurrentPage();
|
|
6616
|
+
yield* Effect.tryPromise({
|
|
6617
|
+
try: () => page.goto(url, { waitUntil: "networkidle" }),
|
|
6618
|
+
catch: (error) => new PageError({
|
|
6619
|
+
url,
|
|
6620
|
+
operation: "navigate",
|
|
6621
|
+
cause: error
|
|
6622
|
+
})
|
|
6623
|
+
});
|
|
6624
|
+
yield* Effect.logDebug(`Navigated to ${url}`);
|
|
6625
|
+
}),
|
|
6626
|
+
waitForSelector: (selector, timeout) => Effect.gen(function* () {
|
|
6627
|
+
const page = yield* getCurrentPage();
|
|
6628
|
+
const config = yield* Ref.get(configRef);
|
|
6629
|
+
yield* Effect.tryPromise({
|
|
6630
|
+
try: () => page.waitForSelector(selector, {
|
|
6631
|
+
timeout: timeout ?? config.timeout
|
|
6632
|
+
}),
|
|
6633
|
+
catch: (error) => new PageError({
|
|
6634
|
+
url: page.url(),
|
|
6635
|
+
operation: "waitForSelector",
|
|
6636
|
+
selector,
|
|
6637
|
+
cause: error
|
|
6638
|
+
})
|
|
6639
|
+
});
|
|
6640
|
+
}),
|
|
6641
|
+
click: (selector) => Effect.gen(function* () {
|
|
6642
|
+
const page = yield* getCurrentPage();
|
|
6643
|
+
yield* Effect.tryPromise({
|
|
6644
|
+
try: () => page.click(selector),
|
|
6645
|
+
catch: (error) => new PageError({
|
|
6646
|
+
url: page.url(),
|
|
6647
|
+
operation: "click",
|
|
6648
|
+
selector,
|
|
6649
|
+
cause: error
|
|
6650
|
+
})
|
|
6651
|
+
});
|
|
6652
|
+
yield* Effect.logDebug(`Clicked element: ${selector}`);
|
|
6653
|
+
}),
|
|
6654
|
+
fill: (selector, value) => Effect.gen(function* () {
|
|
6655
|
+
const page = yield* getCurrentPage();
|
|
6656
|
+
yield* Effect.tryPromise({
|
|
6657
|
+
try: () => page.fill(selector, value),
|
|
6658
|
+
catch: (error) => new PageError({
|
|
6659
|
+
url: page.url(),
|
|
6660
|
+
operation: "fill",
|
|
6661
|
+
selector,
|
|
6662
|
+
cause: error
|
|
6663
|
+
})
|
|
6664
|
+
});
|
|
6665
|
+
yield* Effect.logDebug(`Filled ${selector} with value`);
|
|
6666
|
+
}),
|
|
6667
|
+
scroll: (distance) => Effect.gen(function* () {
|
|
6668
|
+
const page = yield* getCurrentPage();
|
|
6669
|
+
yield* Effect.ignore(
|
|
6670
|
+
Effect.tryPromise({
|
|
6671
|
+
try: () => page.evaluate((d) => {
|
|
6672
|
+
window.scrollBy(0, d);
|
|
6673
|
+
}, distance),
|
|
6674
|
+
catch: (error) => error
|
|
6675
|
+
})
|
|
6676
|
+
);
|
|
6677
|
+
yield* Effect.logDebug(`Scrolled ${distance}px`);
|
|
6678
|
+
}),
|
|
6679
|
+
evaluate: (script) => Effect.gen(function* () {
|
|
6680
|
+
const page = yield* getCurrentPage();
|
|
6681
|
+
return yield* Effect.tryPromise({
|
|
6682
|
+
try: () => page.evaluate(script),
|
|
6683
|
+
catch: (error) => new PageError({
|
|
6684
|
+
url: page.url(),
|
|
6685
|
+
operation: "evaluate",
|
|
6686
|
+
cause: error
|
|
6687
|
+
})
|
|
6688
|
+
});
|
|
6689
|
+
}),
|
|
6690
|
+
getHTML: () => Effect.gen(function* () {
|
|
6691
|
+
const page = yield* getCurrentPage();
|
|
6692
|
+
return yield* Effect.tryPromise({
|
|
6693
|
+
try: () => page.content(),
|
|
6694
|
+
catch: (error) => new PageError({
|
|
6695
|
+
url: page.url(),
|
|
6696
|
+
operation: "getHTML",
|
|
6697
|
+
cause: error
|
|
6698
|
+
})
|
|
6699
|
+
});
|
|
6700
|
+
}),
|
|
6701
|
+
screenshot: (path2) => Effect.gen(function* () {
|
|
6702
|
+
const page = yield* getCurrentPage();
|
|
6703
|
+
const buffer = yield* Effect.tryPromise({
|
|
6704
|
+
try: () => page.screenshot({ path: path2, fullPage: true }),
|
|
6705
|
+
catch: (error) => new PageError({
|
|
6706
|
+
url: page.url(),
|
|
6707
|
+
operation: "screenshot",
|
|
6708
|
+
cause: error
|
|
6709
|
+
})
|
|
6710
|
+
});
|
|
6711
|
+
yield* Effect.log(`Screenshot taken${path2 ? ` and saved to ${path2}` : ""}`);
|
|
6712
|
+
return buffer;
|
|
6713
|
+
}),
|
|
6714
|
+
closePage: () => Effect.gen(function* () {
|
|
6715
|
+
const pageOpt = yield* Ref.get(pageRef);
|
|
6716
|
+
if (Option.isSome(pageOpt)) {
|
|
6717
|
+
yield* Effect.ignore(
|
|
6718
|
+
Effect.tryPromise({
|
|
6719
|
+
try: () => pageOpt.value.close(),
|
|
6720
|
+
catch: (error) => error
|
|
6721
|
+
})
|
|
6722
|
+
);
|
|
6723
|
+
yield* Ref.set(pageRef, Option.none());
|
|
6724
|
+
yield* Effect.log("Page closed");
|
|
6725
|
+
}
|
|
6726
|
+
}),
|
|
6727
|
+
close: () => Effect.gen(function* () {
|
|
6728
|
+
const pageOpt = yield* Ref.get(pageRef);
|
|
6729
|
+
if (Option.isSome(pageOpt)) {
|
|
6730
|
+
yield* Effect.ignore(
|
|
6731
|
+
Effect.tryPromise({
|
|
6732
|
+
try: () => pageOpt.value.close(),
|
|
6733
|
+
catch: (error) => error
|
|
6734
|
+
})
|
|
6735
|
+
);
|
|
6736
|
+
}
|
|
6737
|
+
const contextOpt = yield* Ref.get(contextRef);
|
|
6738
|
+
if (Option.isSome(contextOpt)) {
|
|
6739
|
+
yield* Effect.ignore(
|
|
6740
|
+
Effect.tryPromise({
|
|
6741
|
+
try: () => contextOpt.value.close(),
|
|
6742
|
+
catch: (error) => error
|
|
6743
|
+
})
|
|
6744
|
+
);
|
|
6745
|
+
}
|
|
6746
|
+
const browserOpt = yield* Ref.get(browserRef);
|
|
6747
|
+
if (Option.isSome(browserOpt)) {
|
|
6748
|
+
yield* Effect.ignore(
|
|
6749
|
+
Effect.tryPromise({
|
|
6750
|
+
try: () => browserOpt.value.close(),
|
|
6751
|
+
catch: (error) => error
|
|
6752
|
+
})
|
|
6753
|
+
);
|
|
6754
|
+
}
|
|
6755
|
+
yield* Ref.set(pageRef, Option.none());
|
|
6756
|
+
yield* Ref.set(contextRef, Option.none());
|
|
6757
|
+
yield* Ref.set(browserRef, Option.none());
|
|
6758
|
+
yield* Effect.log("Browser engine closed");
|
|
6759
|
+
})
|
|
6760
|
+
};
|
|
6761
|
+
})
|
|
6762
|
+
}
|
|
6763
|
+
) {
|
|
6764
|
+
}
|
|
6765
|
+
const BrowserEngineLive = BrowserEngineService.Default;
|
|
6766
|
+
const BrowserEngineWithConfig = (_config) => BrowserEngineService.Default;
|
|
6767
|
+
const withBrowser = (operation) => Effect.gen(function* () {
|
|
6768
|
+
const engine = yield* BrowserEngineService;
|
|
6769
|
+
return yield* Effect.acquireUseRelease(
|
|
6770
|
+
Effect.succeed(engine),
|
|
6771
|
+
operation,
|
|
6772
|
+
(engine2) => engine2.close()
|
|
6773
|
+
);
|
|
6774
|
+
});
|
|
6551
6775
|
class LoginError extends Data.TaggedError("LoginError") {
|
|
6552
6776
|
}
|
|
6553
6777
|
class SessionNotValidError extends Data.TaggedError("SessionNotValidError") {
|
|
@@ -6759,6 +6983,9 @@ const WebScrapingEngineLive = Layer.effect(
|
|
|
6759
6983
|
export {
|
|
6760
6984
|
AdapterNotInitialisedError,
|
|
6761
6985
|
BrowserCleanupError,
|
|
6986
|
+
BrowserEngineLive,
|
|
6987
|
+
BrowserEngineService,
|
|
6988
|
+
BrowserEngineWithConfig,
|
|
6762
6989
|
BrowserError,
|
|
6763
6990
|
ConfigError,
|
|
6764
6991
|
ConfigurationError,
|
|
@@ -6831,6 +7058,7 @@ export {
|
|
|
6831
7058
|
makeSpiderLogger,
|
|
6832
7059
|
makeStateManager,
|
|
6833
7060
|
makeTokenExtractor,
|
|
6834
|
-
makeWebScrapingEngine
|
|
7061
|
+
makeWebScrapingEngine,
|
|
7062
|
+
withBrowser
|
|
6835
7063
|
};
|
|
6836
7064
|
//# sourceMappingURL=index.js.map
|