mcp-scraper 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/api-server.cjs +388 -75
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +243 -11
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +1 -1
- package/dist/bin/paa-harvest.cjs +14 -4
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +4 -3
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-3OIRNUF5.js → chunk-RE6HCRYC.js} +244 -12
- package/dist/chunk-RE6HCRYC.js.map +1 -0
- package/dist/{chunk-LUBDFS67.js → chunk-TM22BLWP.js} +15 -3
- package/dist/chunk-TM22BLWP.js.map +1 -0
- package/dist/index.cjs +12 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +1 -1
- package/dist/{server-YNJHP5PU.js → server-QXVVTKJP.js} +80 -30
- package/dist/server-QXVVTKJP.js.map +1 -0
- package/dist/{worker-PBG6LGET.js → worker-AUCXFHEL.js} +4 -3
- package/dist/worker-AUCXFHEL.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-3OIRNUF5.js.map +0 -1
- package/dist/chunk-LUBDFS67.js.map +0 -1
- package/dist/server-YNJHP5PU.js.map +0 -1
- package/dist/worker-PBG6LGET.js.map +0 -1
package/dist/bin/paa-harvest.cjs
CHANGED
|
@@ -26,6 +26,16 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
26
26
|
// src/cli.ts
|
|
27
27
|
var import_commander = require("commander");
|
|
28
28
|
|
|
29
|
+
// src/lib/browser-service-env.ts
|
|
30
|
+
function browserServiceApiKey() {
|
|
31
|
+
const value = (process.env.BROWSER_SERVICE_API_KEY ?? process.env.KERNEL_API_KEY)?.trim();
|
|
32
|
+
return value || void 0;
|
|
33
|
+
}
|
|
34
|
+
function browserServiceProxyId() {
|
|
35
|
+
const value = (process.env.BROWSER_SERVICE_PROXY_ID ?? process.env.KERNEL_PROXY_ID)?.trim();
|
|
36
|
+
return value || void 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
29
39
|
// src/schemas.ts
|
|
30
40
|
var import_zod = require("zod");
|
|
31
41
|
var HarvestOptionsSchema = import_zod.z.object({
|
|
@@ -2614,8 +2624,8 @@ async function harvest(rawOptions) {
|
|
|
2614
2624
|
const onAttemptEvent = getAttemptLogSink(rawOptions);
|
|
2615
2625
|
const requestedProxyMode = raw.proxyMode;
|
|
2616
2626
|
const proxyMode = requestedProxyMode === "none" ? "none" : requestedProxyMode === "configured" ? "configured" : "location";
|
|
2617
|
-
const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() :
|
|
2618
|
-
const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() :
|
|
2627
|
+
const kernelApiKey = typeof raw.kernelApiKey === "string" ? raw.kernelApiKey.trim() : browserServiceApiKey();
|
|
2628
|
+
const configuredKernelProxyId = typeof raw.kernelProxyId === "string" ? raw.kernelProxyId.trim() : browserServiceProxyId();
|
|
2619
2629
|
const proxyOpts = {
|
|
2620
2630
|
kernelApiKey,
|
|
2621
2631
|
proxyMode,
|
|
@@ -2800,7 +2810,7 @@ async function harvest(rawOptions) {
|
|
|
2800
2810
|
|
|
2801
2811
|
// src/cli.ts
|
|
2802
2812
|
var program = new import_commander.Command();
|
|
2803
|
-
program.name("paa-harvest").description("Recursively extract Google People Also Ask questions").requiredOption("-q, --query <query>", "Seed query").option("-l, --location <location>", 'Location name (e.g. "austin" or "Austin,Texas,United States")').option("--gl <gl>", "Google country code", "us").option("--hl <hl>", "Google language code", "en").option("-d, --depth <depth>", "BFS depth (1-30)", "3").option("-m, --max-questions <n>", "Max questions to harvest", "100").option("-o, --output <dir>", "Output directory", "./paa-output").option("-f, --format <format>", "Output format: json, csv, or both", "both").option("--headless", "Run browser in headless mode", false).option("--profile <dir>", "Persistent browser profile directory").option("--proxy <url>", "Proxy server URL").option("--kernel-api-key <key>", "
|
|
2813
|
+
program.name("paa-harvest").description("Recursively extract Google People Also Ask questions").requiredOption("-q, --query <query>", "Seed query").option("-l, --location <location>", 'Location name (e.g. "austin" or "Austin,Texas,United States")').option("--gl <gl>", "Google country code", "us").option("--hl <hl>", "Google language code", "en").option("-d, --depth <depth>", "BFS depth (1-30)", "3").option("-m, --max-questions <n>", "Max questions to harvest", "100").option("-o, --output <dir>", "Output directory", "./paa-output").option("-f, --format <format>", "Output format: json, csv, or both", "both").option("--headless", "Run browser in headless mode", false).option("--profile <dir>", "Persistent browser profile directory").option("--proxy <url>", "Proxy server URL").option("--kernel-api-key <key>", "Browser service API key (or set BROWSER_SERVICE_API_KEY env var)").action(async (opts) => {
|
|
2804
2814
|
try {
|
|
2805
2815
|
const result = await harvest({
|
|
2806
2816
|
query: opts.query,
|
|
@@ -2814,7 +2824,7 @@ program.name("paa-harvest").description("Recursively extract Google People Also
|
|
|
2814
2824
|
headless: opts.headless,
|
|
2815
2825
|
profileDir: opts.profile,
|
|
2816
2826
|
proxy: opts.proxy,
|
|
2817
|
-
kernelApiKey: opts.kernelApiKey ??
|
|
2827
|
+
kernelApiKey: opts.kernelApiKey ?? browserServiceApiKey()
|
|
2818
2828
|
});
|
|
2819
2829
|
console.log(JSON.stringify({ totalQuestions: result.totalQuestions, outputDir: result.stats.seed }));
|
|
2820
2830
|
} catch (err) {
|