mcp-scraper 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -8
- package/dist/bin/api-server.cjs +4691 -3614
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/browser-agent-stdio-server.cjs +85 -8
- package/dist/bin/browser-agent-stdio-server.cjs.map +1 -1
- package/dist/bin/browser-agent-stdio-server.js +83 -6
- package/dist/bin/browser-agent-stdio-server.js.map +1 -1
- package/dist/bin/mcp-stdio-server.cjs +170 -12
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +3 -3
- package/dist/bin/paa-harvest.cjs +223 -74
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -2
- package/dist/{chunk-GXBT5CDU.js → chunk-IQOCZGJJ.js} +39 -2
- package/dist/chunk-IQOCZGJJ.js.map +1 -0
- package/dist/{chunk-ZMOWIBMK.js → chunk-M2S27J6Z.js} +9 -2
- package/dist/{chunk-ZMOWIBMK.js.map → chunk-M2S27J6Z.js.map} +1 -1
- package/dist/{chunk-TM22BLWP.js → chunk-MY3S7EX7.js} +221 -76
- package/dist/chunk-MY3S7EX7.js.map +1 -0
- package/dist/{chunk-BMVQB3WN.js → chunk-OR7DLLH2.js} +173 -14
- package/dist/chunk-OR7DLLH2.js.map +1 -0
- package/dist/chunk-XR65SANX.js +7 -0
- package/dist/chunk-XR65SANX.js.map +1 -0
- package/dist/index.cjs +223 -74
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +2 -2
- package/dist/{server-ASCMKUQ5.js → server-CJMX2QUM.js} +880 -181
- package/dist/server-CJMX2QUM.js.map +1 -0
- package/dist/{worker-KJ4A7WIR.js → worker-NAKGTIF5.js} +4 -4
- package/package.json +1 -1
- package/dist/chunk-2BS7BUEE.js +0 -7
- package/dist/chunk-2BS7BUEE.js.map +0 -1
- package/dist/chunk-BMVQB3WN.js.map +0 -1
- package/dist/chunk-GXBT5CDU.js.map +0 -1
- package/dist/chunk-TM22BLWP.js.map +0 -1
- package/dist/server-ASCMKUQ5.js.map +0 -1
- /package/dist/{worker-KJ4A7WIR.js.map → worker-NAKGTIF5.js.map} +0 -0
|
@@ -5,9 +5,10 @@ import {
|
|
|
5
5
|
buildPaaExtractorMcpServer,
|
|
6
6
|
configureReportSaving,
|
|
7
7
|
harvestTimeoutBudget,
|
|
8
|
-
liveWebToolAnnotations
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
liveWebToolAnnotations,
|
|
9
|
+
outputBaseDir
|
|
10
|
+
} from "./chunk-OR7DLLH2.js";
|
|
11
|
+
import "./chunk-XR65SANX.js";
|
|
11
12
|
import {
|
|
12
13
|
BALANCE_PACK_LABELS,
|
|
13
14
|
BALANCE_PRICE_IDS,
|
|
@@ -25,7 +26,7 @@ import {
|
|
|
25
26
|
harvestProblemResponse,
|
|
26
27
|
insufficientBalanceResponse,
|
|
27
28
|
serializeHarvestProblem
|
|
28
|
-
} from "./chunk-
|
|
29
|
+
} from "./chunk-IQOCZGJJ.js";
|
|
29
30
|
import {
|
|
30
31
|
BrowserDriver,
|
|
31
32
|
MapsPlaceOptionsSchema,
|
|
@@ -38,14 +39,15 @@ import {
|
|
|
38
39
|
browserServiceApiKey,
|
|
39
40
|
browserServiceProxyId,
|
|
40
41
|
buildYouTubeChannelVideosUrl,
|
|
42
|
+
deleteKernelProxyId,
|
|
41
43
|
harvest,
|
|
42
44
|
resolveKernelProxyId
|
|
43
|
-
} from "./chunk-
|
|
45
|
+
} from "./chunk-MY3S7EX7.js";
|
|
44
46
|
import {
|
|
45
47
|
CaptchaError,
|
|
46
48
|
RECAPTCHA_INSTRUCTIONS,
|
|
47
49
|
sanitizeVendorName
|
|
48
|
-
} from "./chunk-
|
|
50
|
+
} from "./chunk-M2S27J6Z.js";
|
|
49
51
|
import {
|
|
50
52
|
SiteAuditJobRowSchema,
|
|
51
53
|
cancelJob,
|
|
@@ -4846,7 +4848,7 @@ async function extractSite(opts) {
|
|
|
4846
4848
|
}
|
|
4847
4849
|
|
|
4848
4850
|
// src/api/server.ts
|
|
4849
|
-
import { Hono as
|
|
4851
|
+
import { Hono as Hono11 } from "hono";
|
|
4850
4852
|
import { serve as serveInngest } from "inngest/hono";
|
|
4851
4853
|
|
|
4852
4854
|
// src/inngest/client.ts
|
|
@@ -9744,8 +9746,11 @@ var MapsSearchExtractor = class {
|
|
|
9744
9746
|
headless: options.headless,
|
|
9745
9747
|
kernelApiKey: options.kernelApiKey,
|
|
9746
9748
|
kernelProxyId: options.kernelProxyId,
|
|
9749
|
+
kernelProxyResolution: options.kernelProxyResolution,
|
|
9750
|
+
proxyMode: options.proxyMode,
|
|
9747
9751
|
viewport: { width: 1280, height: 900 },
|
|
9748
|
-
locale: `${options.hl}-${options.gl.toUpperCase()}
|
|
9752
|
+
locale: `${options.hl}-${options.gl.toUpperCase()}`,
|
|
9753
|
+
debug: options.debug
|
|
9749
9754
|
};
|
|
9750
9755
|
try {
|
|
9751
9756
|
await this.driver.launch(config);
|
|
@@ -9836,6 +9841,9 @@ var MapsSearchExtractor = class {
|
|
|
9836
9841
|
const value = parts.find((part) => pattern.test(part));
|
|
9837
9842
|
return value ?? null;
|
|
9838
9843
|
}
|
|
9844
|
+
function normalizedSet(values) {
|
|
9845
|
+
return new Set(values.filter(Boolean).map((value) => value.toLowerCase()));
|
|
9846
|
+
}
|
|
9839
9847
|
const out = [];
|
|
9840
9848
|
const seen = /* @__PURE__ */ new Set();
|
|
9841
9849
|
const anchors = Array.from(document.querySelectorAll('a[href*="/maps/place/"]'));
|
|
@@ -9851,11 +9859,17 @@ var MapsSearchExtractor = class {
|
|
|
9851
9859
|
const name = aria ?? heading ?? parts[0] ?? stableUrl;
|
|
9852
9860
|
const links = Array.from(card?.querySelectorAll("a[href]") ?? []);
|
|
9853
9861
|
const websiteUrl = links.find((link) => link.href.startsWith("http") && !link.href.includes("google."))?.href ?? null;
|
|
9854
|
-
const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? null;
|
|
9855
9862
|
const rating = firstMatching(parts, /^\d(?:\.\d)?$/);
|
|
9856
|
-
const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?$/);
|
|
9857
|
-
const
|
|
9863
|
+
const reviewCountRaw = firstMatching(parts, /^\(?[\d,]+\)?(?:\s+reviews?)?$/i);
|
|
9864
|
+
const phone = firstMatching(parts, /(?:\+?1[\s.-]?)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/);
|
|
9865
|
+
const hoursStatus = parts.find((part) => /^(open|closed|closes|opens)\b|^·\s*(opens|closes)\b/i.test(part)) ?? null;
|
|
9858
9866
|
const address = parts.find((part) => /\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part)) ?? null;
|
|
9867
|
+
const directionsUrl = links.find((link) => /google\.[^/]+\/maps\/dir|\/dir\//i.test(link.href))?.href ?? `https://www.google.com/maps/dir/?api=1&destination=${encodeURIComponent([name, address].filter(Boolean).join(", ") || name)}`;
|
|
9868
|
+
const excluded = normalizedSet([name, rating, reviewCountRaw, phone, hoursStatus, address, "Website", "Directions"]);
|
|
9869
|
+
const category = parts.find((part) => {
|
|
9870
|
+
const normalized = part.toLowerCase();
|
|
9871
|
+
return !excluded.has(normalized) && !/^\d(?:\.\d)?$|^\(?[\d,]+\)?(?:\s+reviews?)?$/i.test(part) && !/(?:\+?1[\s.-]?)?\(?\d{3}\)?[\s.-]\d{3}[\s.-]\d{4}/.test(part) && !/\b[A-Z]{2}\s+\d{5}\b|\b(?:St|Street|Ave|Avenue|Rd|Road|Blvd|Drive|Dr)\b/i.test(part) && !/^(open|closed|closes|opens)\b|^·\s*(opens|closes)\b|directions|website|book online|sponsored|visit site|financing/i.test(part);
|
|
9872
|
+
}) ?? null;
|
|
9859
9873
|
const { cid, cidDecimal } = cidFromUrl(placeUrl);
|
|
9860
9874
|
out.push({
|
|
9861
9875
|
position: out.length + 1,
|
|
@@ -9867,6 +9881,8 @@ var MapsSearchExtractor = class {
|
|
|
9867
9881
|
reviewCount: reviewCountRaw ? reviewCountRaw.replace(/[()]/g, "") : null,
|
|
9868
9882
|
category,
|
|
9869
9883
|
address,
|
|
9884
|
+
phone,
|
|
9885
|
+
hoursStatus,
|
|
9870
9886
|
websiteUrl,
|
|
9871
9887
|
directionsUrl,
|
|
9872
9888
|
metadata: parts.slice(0, 20)
|
|
@@ -9886,12 +9902,22 @@ function mapsErrorResponse(c, msg, errorCode) {
|
|
|
9886
9902
|
retryable: blocked
|
|
9887
9903
|
}, blocked ? 503 : 500);
|
|
9888
9904
|
}
|
|
9905
|
+
async function cleanupDisposableProxy(kernelApiKey, proxyId) {
|
|
9906
|
+
if (!kernelApiKey || !proxyId) return;
|
|
9907
|
+
await deleteKernelProxyId(kernelApiKey, proxyId).catch((err) => {
|
|
9908
|
+
console.warn(JSON.stringify({
|
|
9909
|
+
event: "maps_search_proxy_delete_failed",
|
|
9910
|
+
proxy_id_suffix: proxyId.slice(-6),
|
|
9911
|
+
message: err instanceof Error ? err.message : String(err)
|
|
9912
|
+
}));
|
|
9913
|
+
});
|
|
9914
|
+
}
|
|
9889
9915
|
var mapsApp = new Hono5();
|
|
9890
9916
|
mapsApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
9891
9917
|
const user = c.get("user");
|
|
9892
9918
|
const body = await c.req.json().catch(() => ({}));
|
|
9893
9919
|
const parsed = MapsSearchOptionsSchema.safeParse({
|
|
9894
|
-
kernelApiKey:
|
|
9920
|
+
kernelApiKey: browserServiceApiKey(),
|
|
9895
9921
|
...body
|
|
9896
9922
|
});
|
|
9897
9923
|
if (!parsed.success) {
|
|
@@ -9906,8 +9932,23 @@ mapsApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
9906
9932
|
if (!ok) return c.json(insufficientBalanceResponse(balance_mc, MC_COSTS.maps_search), 402);
|
|
9907
9933
|
const driver = new BrowserDriver();
|
|
9908
9934
|
const extractor = new MapsSearchExtractor(driver);
|
|
9935
|
+
let disposableProxyId;
|
|
9909
9936
|
try {
|
|
9910
|
-
const
|
|
9937
|
+
const resolution = await resolveKernelProxyId({
|
|
9938
|
+
kernelApiKey: parsed.data.kernelApiKey,
|
|
9939
|
+
proxyMode: parsed.data.proxyMode,
|
|
9940
|
+
configuredKernelProxyId: browserServiceProxyId(),
|
|
9941
|
+
location: parsed.data.location,
|
|
9942
|
+
proxyZip: parsed.data.proxyZip,
|
|
9943
|
+
gl: parsed.data.gl,
|
|
9944
|
+
fresh: parsed.data.proxyMode === "location"
|
|
9945
|
+
});
|
|
9946
|
+
disposableProxyId = resolution.disposableProxyId;
|
|
9947
|
+
const result = await extractor.extract({
|
|
9948
|
+
...parsed.data,
|
|
9949
|
+
kernelProxyId: parsed.data.proxyMode === "none" ? void 0 : resolution.kernelProxyId,
|
|
9950
|
+
kernelProxyResolution: resolution.resolution
|
|
9951
|
+
});
|
|
9911
9952
|
await logRequestEvent({
|
|
9912
9953
|
userId: user.id,
|
|
9913
9954
|
source: "maps_search",
|
|
@@ -9931,6 +9972,7 @@ mapsApp.post("/search", createApiKeyAuth(), async (c) => {
|
|
|
9931
9972
|
});
|
|
9932
9973
|
return mapsErrorResponse(c, msg, "maps_search_failed");
|
|
9933
9974
|
} finally {
|
|
9975
|
+
await cleanupDisposableProxy(parsed.data.kernelApiKey, disposableProxyId);
|
|
9934
9976
|
await driver.close();
|
|
9935
9977
|
}
|
|
9936
9978
|
});
|
|
@@ -9938,7 +9980,7 @@ mapsApp.post("/place", createApiKeyAuth(), async (c) => {
|
|
|
9938
9980
|
const user = c.get("user");
|
|
9939
9981
|
const body = await c.req.json().catch(() => ({}));
|
|
9940
9982
|
const parsed = MapsPlaceOptionsSchema.safeParse({
|
|
9941
|
-
kernelApiKey:
|
|
9983
|
+
kernelApiKey: browserServiceApiKey(),
|
|
9942
9984
|
...body
|
|
9943
9985
|
});
|
|
9944
9986
|
if (!parsed.success) {
|
|
@@ -10006,9 +10048,593 @@ mapsApp.post("/place", createApiKeyAuth(), async (c) => {
|
|
|
10006
10048
|
}
|
|
10007
10049
|
});
|
|
10008
10050
|
|
|
10009
|
-
// src/api/
|
|
10051
|
+
// src/api/directory-routes.ts
|
|
10010
10052
|
import { Hono as Hono6 } from "hono";
|
|
10011
10053
|
|
|
10054
|
+
// src/directory/directory-workflow.ts
|
|
10055
|
+
import { mkdir as mkdir2, writeFile } from "fs/promises";
|
|
10056
|
+
import { join as join4 } from "path";
|
|
10057
|
+
import { z as z15 } from "zod";
|
|
10058
|
+
|
|
10059
|
+
// src/directory/csv.ts
|
|
10060
|
+
function parseCsv(text) {
|
|
10061
|
+
const rows = [];
|
|
10062
|
+
let row = [];
|
|
10063
|
+
let field = "";
|
|
10064
|
+
let quoted = false;
|
|
10065
|
+
for (let i = 0; i < text.length; i += 1) {
|
|
10066
|
+
const ch = text[i];
|
|
10067
|
+
const next = text[i + 1];
|
|
10068
|
+
if (quoted) {
|
|
10069
|
+
if (ch === '"' && next === '"') {
|
|
10070
|
+
field += '"';
|
|
10071
|
+
i += 1;
|
|
10072
|
+
} else if (ch === '"') {
|
|
10073
|
+
quoted = false;
|
|
10074
|
+
} else {
|
|
10075
|
+
field += ch;
|
|
10076
|
+
}
|
|
10077
|
+
continue;
|
|
10078
|
+
}
|
|
10079
|
+
if (ch === '"') {
|
|
10080
|
+
quoted = true;
|
|
10081
|
+
} else if (ch === ",") {
|
|
10082
|
+
row.push(field);
|
|
10083
|
+
field = "";
|
|
10084
|
+
} else if (ch === "\n") {
|
|
10085
|
+
row.push(field);
|
|
10086
|
+
rows.push(row);
|
|
10087
|
+
row = [];
|
|
10088
|
+
field = "";
|
|
10089
|
+
} else if (ch !== "\r") {
|
|
10090
|
+
field += ch;
|
|
10091
|
+
}
|
|
10092
|
+
}
|
|
10093
|
+
if (field.length > 0 || row.length > 0) {
|
|
10094
|
+
row.push(field);
|
|
10095
|
+
rows.push(row);
|
|
10096
|
+
}
|
|
10097
|
+
return rows;
|
|
10098
|
+
}
|
|
10099
|
+
function csvRecords(text) {
|
|
10100
|
+
const rows = parseCsv(text).filter((row) => row.some((cell) => cell.trim() !== ""));
|
|
10101
|
+
const header = rows[0]?.map((cell) => cell.trim()) ?? [];
|
|
10102
|
+
return rows.slice(1).map((row) => {
|
|
10103
|
+
const record = {};
|
|
10104
|
+
for (let i = 0; i < header.length; i += 1) {
|
|
10105
|
+
record[header[i]] = row[i] ?? "";
|
|
10106
|
+
}
|
|
10107
|
+
return record;
|
|
10108
|
+
});
|
|
10109
|
+
}
|
|
10110
|
+
function csvCell(value) {
|
|
10111
|
+
if (value === null || value === void 0) return "";
|
|
10112
|
+
const text = String(value);
|
|
10113
|
+
return /[",\n\r]/.test(text) ? `"${text.replace(/"/g, '""')}"` : text;
|
|
10114
|
+
}
|
|
10115
|
+
function rowsToCsv(headers, rows) {
|
|
10116
|
+
return [
|
|
10117
|
+
headers.join(","),
|
|
10118
|
+
...rows.map((row) => headers.map((header) => csvCell(row[header])).join(","))
|
|
10119
|
+
].join("\n") + "\n";
|
|
10120
|
+
}
|
|
10121
|
+
|
|
10122
|
+
// src/directory/location-db.ts
|
|
10123
|
+
import { access, readFile } from "fs/promises";
|
|
10124
|
+
var POPULATION_YEARS = [2020, 2021, 2022, 2023, 2024, 2025];
|
|
10125
|
+
var STATE_META = {
|
|
10126
|
+
AL: { abbr: "AL", fips: "01", name: "Alabama" },
|
|
10127
|
+
AK: { abbr: "AK", fips: "02", name: "Alaska" },
|
|
10128
|
+
AZ: { abbr: "AZ", fips: "04", name: "Arizona" },
|
|
10129
|
+
AR: { abbr: "AR", fips: "05", name: "Arkansas" },
|
|
10130
|
+
CA: { abbr: "CA", fips: "06", name: "California" },
|
|
10131
|
+
CO: { abbr: "CO", fips: "08", name: "Colorado" },
|
|
10132
|
+
CT: { abbr: "CT", fips: "09", name: "Connecticut" },
|
|
10133
|
+
DE: { abbr: "DE", fips: "10", name: "Delaware" },
|
|
10134
|
+
DC: { abbr: "DC", fips: "11", name: "District of Columbia" },
|
|
10135
|
+
FL: { abbr: "FL", fips: "12", name: "Florida" },
|
|
10136
|
+
GA: { abbr: "GA", fips: "13", name: "Georgia" },
|
|
10137
|
+
HI: { abbr: "HI", fips: "15", name: "Hawaii" },
|
|
10138
|
+
ID: { abbr: "ID", fips: "16", name: "Idaho" },
|
|
10139
|
+
IL: { abbr: "IL", fips: "17", name: "Illinois" },
|
|
10140
|
+
IN: { abbr: "IN", fips: "18", name: "Indiana" },
|
|
10141
|
+
IA: { abbr: "IA", fips: "19", name: "Iowa" },
|
|
10142
|
+
KS: { abbr: "KS", fips: "20", name: "Kansas" },
|
|
10143
|
+
KY: { abbr: "KY", fips: "21", name: "Kentucky" },
|
|
10144
|
+
LA: { abbr: "LA", fips: "22", name: "Louisiana" },
|
|
10145
|
+
ME: { abbr: "ME", fips: "23", name: "Maine" },
|
|
10146
|
+
MD: { abbr: "MD", fips: "24", name: "Maryland" },
|
|
10147
|
+
MA: { abbr: "MA", fips: "25", name: "Massachusetts" },
|
|
10148
|
+
MI: { abbr: "MI", fips: "26", name: "Michigan" },
|
|
10149
|
+
MN: { abbr: "MN", fips: "27", name: "Minnesota" },
|
|
10150
|
+
MS: { abbr: "MS", fips: "28", name: "Mississippi" },
|
|
10151
|
+
MO: { abbr: "MO", fips: "29", name: "Missouri" },
|
|
10152
|
+
MT: { abbr: "MT", fips: "30", name: "Montana" },
|
|
10153
|
+
NE: { abbr: "NE", fips: "31", name: "Nebraska" },
|
|
10154
|
+
NV: { abbr: "NV", fips: "32", name: "Nevada" },
|
|
10155
|
+
NH: { abbr: "NH", fips: "33", name: "New Hampshire" },
|
|
10156
|
+
NJ: { abbr: "NJ", fips: "34", name: "New Jersey" },
|
|
10157
|
+
NM: { abbr: "NM", fips: "35", name: "New Mexico" },
|
|
10158
|
+
NY: { abbr: "NY", fips: "36", name: "New York" },
|
|
10159
|
+
NC: { abbr: "NC", fips: "37", name: "North Carolina" },
|
|
10160
|
+
ND: { abbr: "ND", fips: "38", name: "North Dakota" },
|
|
10161
|
+
OH: { abbr: "OH", fips: "39", name: "Ohio" },
|
|
10162
|
+
OK: { abbr: "OK", fips: "40", name: "Oklahoma" },
|
|
10163
|
+
OR: { abbr: "OR", fips: "41", name: "Oregon" },
|
|
10164
|
+
PA: { abbr: "PA", fips: "42", name: "Pennsylvania" },
|
|
10165
|
+
RI: { abbr: "RI", fips: "44", name: "Rhode Island" },
|
|
10166
|
+
SC: { abbr: "SC", fips: "45", name: "South Carolina" },
|
|
10167
|
+
SD: { abbr: "SD", fips: "46", name: "South Dakota" },
|
|
10168
|
+
TN: { abbr: "TN", fips: "47", name: "Tennessee" },
|
|
10169
|
+
TX: { abbr: "TX", fips: "48", name: "Texas" },
|
|
10170
|
+
UT: { abbr: "UT", fips: "49", name: "Utah" },
|
|
10171
|
+
VT: { abbr: "VT", fips: "50", name: "Vermont" },
|
|
10172
|
+
VA: { abbr: "VA", fips: "51", name: "Virginia" },
|
|
10173
|
+
WA: { abbr: "WA", fips: "53", name: "Washington" },
|
|
10174
|
+
WV: { abbr: "WV", fips: "54", name: "West Virginia" },
|
|
10175
|
+
WI: { abbr: "WI", fips: "55", name: "Wisconsin" },
|
|
10176
|
+
WY: { abbr: "WY", fips: "56", name: "Wyoming" }
|
|
10177
|
+
};
|
|
10178
|
+
var STATE_BY_NAME = new Map(Object.values(STATE_META).map((s) => [s.name.toLowerCase(), s]));
|
|
10179
|
+
function normalizeState(input) {
|
|
10180
|
+
const raw = input.trim();
|
|
10181
|
+
const byAbbr = STATE_META[raw.toUpperCase()];
|
|
10182
|
+
if (byAbbr) return byAbbr;
|
|
10183
|
+
const byName = STATE_BY_NAME.get(raw.toLowerCase());
|
|
10184
|
+
if (byName) return byName;
|
|
10185
|
+
throw new Error(`Unsupported state "${input}". Use a US state abbreviation such as TN.`);
|
|
10186
|
+
}
|
|
10187
|
+
function censusStateUrl(fips) {
|
|
10188
|
+
return `https://www2.census.gov/programs-surveys/popest/datasets/2020-2025/cities/totals/sub-est2025_${fips}.csv`;
|
|
10189
|
+
}
|
|
10190
|
+
function normalizeCityKey(value) {
|
|
10191
|
+
return value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
|
|
10192
|
+
}
|
|
10193
|
+
function displayCityFromCensus(name) {
|
|
10194
|
+
if (/^Nashville-Davidson metropolitan government/i.test(name)) return "Nashville";
|
|
10195
|
+
return name.replace(/\s+(city|town|village|municipality|borough)$/i, "").trim();
|
|
10196
|
+
}
|
|
10197
|
+
function numberOrNull(value) {
|
|
10198
|
+
if (value === void 0 || value.trim() === "") return null;
|
|
10199
|
+
const n = Number(value);
|
|
10200
|
+
return Number.isFinite(n) ? n : null;
|
|
10201
|
+
}
|
|
10202
|
+
function localLocationFileAllowed() {
|
|
10203
|
+
if (process.env.MCP_SCRAPER_ALLOW_LOCAL_LOCATION_FILES === "true") return true;
|
|
10204
|
+
if (process.env.VERCEL === "1" || process.env.NODE_ENV === "production") return false;
|
|
10205
|
+
return true;
|
|
10206
|
+
}
|
|
10207
|
+
async function existingPath(value) {
|
|
10208
|
+
const trimmed = value?.trim();
|
|
10209
|
+
if (!trimmed) return null;
|
|
10210
|
+
await access(trimmed);
|
|
10211
|
+
return trimmed;
|
|
10212
|
+
}
|
|
10213
|
+
async function resolveUsZipsPath(requestedPath) {
|
|
10214
|
+
const envPath = process.env.MCP_SCRAPER_USZIPS_CSV_PATH;
|
|
10215
|
+
if (requestedPath && !localLocationFileAllowed()) {
|
|
10216
|
+
throw new Error("usZipsCsvPath is only accepted in local/test mode. Set MCP_SCRAPER_USZIPS_CSV_PATH on the server for deployed use.");
|
|
10217
|
+
}
|
|
10218
|
+
const source = requestedPath ?? envPath;
|
|
10219
|
+
if (!source) return null;
|
|
10220
|
+
return existingPath(source);
|
|
10221
|
+
}
|
|
10222
|
+
async function loadZipGroups(stateAbbr, requestedPath, warnings) {
|
|
10223
|
+
if (!requestedPath && !process.env.MCP_SCRAPER_USZIPS_CSV_PATH) {
|
|
10224
|
+
return { path: null, groups: /* @__PURE__ */ new Map() };
|
|
10225
|
+
}
|
|
10226
|
+
const path5 = await resolveUsZipsPath(requestedPath);
|
|
10227
|
+
if (!path5) return { path: null, groups: /* @__PURE__ */ new Map() };
|
|
10228
|
+
const records = csvRecords(await readFile(path5, "utf8"));
|
|
10229
|
+
const groups = /* @__PURE__ */ new Map();
|
|
10230
|
+
for (const record of records) {
|
|
10231
|
+
const state = (record.state_abbr ?? record.state ?? "").trim().toUpperCase();
|
|
10232
|
+
const zip = (record.zipcode ?? record.zip ?? record.zip_code ?? "").trim();
|
|
10233
|
+
const city = (record.city ?? "").trim();
|
|
10234
|
+
const county = (record.county ?? "").trim();
|
|
10235
|
+
if (state !== stateAbbr || !zip || !city) continue;
|
|
10236
|
+
const key = normalizeCityKey(city);
|
|
10237
|
+
if (!groups.has(key)) groups.set(key, { zips: /* @__PURE__ */ new Set(), counties: /* @__PURE__ */ new Set() });
|
|
10238
|
+
const group = groups.get(key);
|
|
10239
|
+
group?.zips.add(zip);
|
|
10240
|
+
if (county) group?.counties.add(county);
|
|
10241
|
+
}
|
|
10242
|
+
if (!groups.size) warnings.push(`No ${stateAbbr} ZIP groups found in ${path5}`);
|
|
10243
|
+
return { path: path5, groups };
|
|
10244
|
+
}
|
|
10245
|
+
async function resolveDirectoryMarkets(options) {
|
|
10246
|
+
const state = normalizeState(options.state);
|
|
10247
|
+
const sourceUrl = censusStateUrl(state.fips);
|
|
10248
|
+
const warnings = [];
|
|
10249
|
+
const response = await fetch(sourceUrl);
|
|
10250
|
+
if (!response.ok) throw new Error(`Census location dataset request failed: ${response.status} ${response.statusText}`);
|
|
10251
|
+
const records = csvRecords(await response.text());
|
|
10252
|
+
const populationField = `POPESTIMATE${options.populationYear}`;
|
|
10253
|
+
const zipData = options.includeZipGroups ? await loadZipGroups(state.abbr, options.usZipsCsvPath, warnings) : { path: null, groups: /* @__PURE__ */ new Map() };
|
|
10254
|
+
const markets = records.filter((record) => record.SUMLEV === "162").map((record) => {
|
|
10255
|
+
const population = numberOrNull(record[populationField]);
|
|
10256
|
+
if (population === null || population < options.minPopulation) return null;
|
|
10257
|
+
const censusName = record.NAME?.trim() ?? "";
|
|
10258
|
+
if (!censusName) return null;
|
|
10259
|
+
const city = displayCityFromCensus(censusName);
|
|
10260
|
+
const zipGroup = zipData.groups.get(normalizeCityKey(city));
|
|
10261
|
+
return {
|
|
10262
|
+
city,
|
|
10263
|
+
state: state.abbr,
|
|
10264
|
+
location: `${city}, ${state.abbr}`,
|
|
10265
|
+
cityKey: `${city}|${state.abbr}`,
|
|
10266
|
+
censusName,
|
|
10267
|
+
population,
|
|
10268
|
+
populationYear: options.populationYear,
|
|
10269
|
+
estimatesBase2020: numberOrNull(record.ESTIMATESBASE2020),
|
|
10270
|
+
zips: zipGroup ? [...zipGroup.zips].sort() : [],
|
|
10271
|
+
counties: zipGroup ? [...zipGroup.counties].sort() : []
|
|
10272
|
+
};
|
|
10273
|
+
}).filter((market) => market !== null).sort((a, b) => b.population - a.population || a.city.localeCompare(b.city)).slice(0, options.maxCities);
|
|
10274
|
+
if (options.includeZipGroups && zipData.path && markets.some((m) => m.zips.length === 0)) {
|
|
10275
|
+
warnings.push("Some Census places did not match the configured US ZIPS city names.");
|
|
10276
|
+
}
|
|
10277
|
+
return { markets, censusSourceUrl: sourceUrl, usZipsSourcePath: zipData.path, warnings };
|
|
10278
|
+
}
|
|
10279
|
+
|
|
10280
|
+
// src/directory/directory-workflow.ts
|
|
10281
|
+
var DIRECTORY_MAX_ATTEMPTS = 3;
|
|
10282
|
+
var DIRECTORY_LOCATION_PROXY_MAX_ATTEMPTS = 5;
|
|
10283
|
+
var DirectoryWorkflowOptionsSchema = z15.object({
|
|
10284
|
+
query: z15.string().min(1),
|
|
10285
|
+
state: z15.string().min(2).default("TN"),
|
|
10286
|
+
minPopulation: z15.number().int().min(0).default(1e5),
|
|
10287
|
+
populationYear: z15.union(POPULATION_YEARS.map((year) => z15.literal(year))).default(2025),
|
|
10288
|
+
maxCities: z15.number().int().min(1).max(100).default(25),
|
|
10289
|
+
maxResultsPerCity: z15.number().int().min(1).max(50).default(50),
|
|
10290
|
+
concurrency: z15.number().int().min(1).max(5).default(5),
|
|
10291
|
+
includeZipGroups: z15.boolean().default(true),
|
|
10292
|
+
usZipsCsvPath: z15.string().optional(),
|
|
10293
|
+
saveCsv: z15.boolean().default(true),
|
|
10294
|
+
gl: z15.string().length(2).default("us"),
|
|
10295
|
+
hl: z15.string().length(2).default("en"),
|
|
10296
|
+
proxyMode: z15.enum(["location", "configured", "none"]).default("location"),
|
|
10297
|
+
proxyZip: z15.string().regex(/^\d{5}$/).optional(),
|
|
10298
|
+
debug: z15.boolean().default(false),
|
|
10299
|
+
headless: z15.boolean().default(true),
|
|
10300
|
+
kernelApiKey: z15.string().optional()
|
|
10301
|
+
});
|
|
10302
|
+
async function cleanupDisposableProxy2(kernelApiKey, proxyId) {
|
|
10303
|
+
if (!kernelApiKey || !proxyId) return;
|
|
10304
|
+
try {
|
|
10305
|
+
await deleteKernelProxyId(kernelApiKey, proxyId);
|
|
10306
|
+
} catch (err) {
|
|
10307
|
+
console.warn(JSON.stringify({
|
|
10308
|
+
event: "directory_workflow_proxy_delete_failed",
|
|
10309
|
+
proxy_id_suffix: proxyId.slice(-6),
|
|
10310
|
+
message: err instanceof Error ? err.message : String(err)
|
|
10311
|
+
}));
|
|
10312
|
+
}
|
|
10313
|
+
}
|
|
10314
|
+
function maxAttemptsForProxyMode(proxyMode) {
|
|
10315
|
+
return proxyMode === "location" ? DIRECTORY_LOCATION_PROXY_MAX_ATTEMPTS : DIRECTORY_MAX_ATTEMPTS;
|
|
10316
|
+
}
|
|
10317
|
+
function errorMessage(err) {
|
|
10318
|
+
return err instanceof Error ? err.message : String(err);
|
|
10319
|
+
}
|
|
10320
|
+
function looksLikeProxyTunnelFailure(message) {
|
|
10321
|
+
return /ERR_TUNNEL_CONNECTION_FAILED|ERR_PROXY_CONNECTION_FAILED|ERR_SOCKS_CONNECTION_FAILED|tunnel connection failed|proxy connection failed|transport error: proxy/i.test(message);
|
|
10322
|
+
}
|
|
10323
|
+
function looksLikeProxyUnavailable(message) {
|
|
10324
|
+
return /proxy unavailable|proxy_unavailable|connection_test_failed|did not return a proxy id|configured fallback/i.test(message);
|
|
10325
|
+
}
|
|
10326
|
+
function retryableCitySearchError(err, proxyMode) {
|
|
10327
|
+
if (err instanceof CaptchaError) return true;
|
|
10328
|
+
const message = errorMessage(err);
|
|
10329
|
+
if (/timeout|timed out|Timeout \d+ms exceeded|deadline/i.test(message)) return true;
|
|
10330
|
+
return proxyMode === "location" && (looksLikeProxyTunnelFailure(message) || looksLikeProxyUnavailable(message));
|
|
10331
|
+
}
|
|
10332
|
+
function proxyZipForAttempt(options, market, attemptIndex) {
|
|
10333
|
+
if (options.proxyZip) return options.proxyZip;
|
|
10334
|
+
if (!market.zips.length) return void 0;
|
|
10335
|
+
return market.zips[attemptIndex % market.zips.length];
|
|
10336
|
+
}
|
|
10337
|
+
async function mapLimit(items, limit, fn) {
|
|
10338
|
+
const out = new Array(items.length);
|
|
10339
|
+
let next = 0;
|
|
10340
|
+
async function worker() {
|
|
10341
|
+
while (next < items.length) {
|
|
10342
|
+
const index = next;
|
|
10343
|
+
next += 1;
|
|
10344
|
+
out[index] = await fn(items[index]);
|
|
10345
|
+
}
|
|
10346
|
+
}
|
|
10347
|
+
await Promise.all(Array.from({ length: Math.min(limit, items.length) }, () => worker()));
|
|
10348
|
+
return out;
|
|
10349
|
+
}
|
|
10350
|
+
async function searchCityAttempt(options, market, attemptIndex) {
|
|
10351
|
+
const driver = new BrowserDriver();
|
|
10352
|
+
const extractor = new MapsSearchExtractor(driver);
|
|
10353
|
+
const start = Date.now();
|
|
10354
|
+
let disposableProxyId;
|
|
10355
|
+
try {
|
|
10356
|
+
const proxyZip = proxyZipForAttempt(options, market, attemptIndex);
|
|
10357
|
+
const resolution = await resolveKernelProxyId({
|
|
10358
|
+
kernelApiKey: options.kernelApiKey,
|
|
10359
|
+
proxyMode: options.proxyMode,
|
|
10360
|
+
configuredKernelProxyId: browserServiceProxyId(),
|
|
10361
|
+
location: market.location,
|
|
10362
|
+
proxyZip,
|
|
10363
|
+
gl: options.gl,
|
|
10364
|
+
attemptIndex,
|
|
10365
|
+
fresh: options.proxyMode === "location"
|
|
10366
|
+
});
|
|
10367
|
+
disposableProxyId = resolution.disposableProxyId;
|
|
10368
|
+
const result = await extractor.extract({
|
|
10369
|
+
query: options.query,
|
|
10370
|
+
location: market.location,
|
|
10371
|
+
gl: options.gl,
|
|
10372
|
+
hl: options.hl,
|
|
10373
|
+
maxResults: options.maxResultsPerCity,
|
|
10374
|
+
headless: options.headless,
|
|
10375
|
+
kernelApiKey: options.kernelApiKey,
|
|
10376
|
+
kernelProxyId: options.proxyMode === "none" ? void 0 : resolution.kernelProxyId,
|
|
10377
|
+
kernelProxyResolution: resolution.resolution,
|
|
10378
|
+
proxyMode: options.proxyMode,
|
|
10379
|
+
proxyZip,
|
|
10380
|
+
debug: options.debug
|
|
10381
|
+
});
|
|
10382
|
+
return {
|
|
10383
|
+
city: market.city,
|
|
10384
|
+
state: market.state,
|
|
10385
|
+
location: market.location,
|
|
10386
|
+
cityKey: market.cityKey,
|
|
10387
|
+
censusName: market.censusName,
|
|
10388
|
+
population: market.population,
|
|
10389
|
+
populationYear: market.populationYear,
|
|
10390
|
+
zips: market.zips,
|
|
10391
|
+
counties: market.counties,
|
|
10392
|
+
status: result.results.length ? "ok" : "empty",
|
|
10393
|
+
error: null,
|
|
10394
|
+
resultCount: result.resultCount,
|
|
10395
|
+
durationMs: result.durationMs,
|
|
10396
|
+
results: result.results
|
|
10397
|
+
};
|
|
10398
|
+
} finally {
|
|
10399
|
+
await cleanupDisposableProxy2(options.kernelApiKey, disposableProxyId);
|
|
10400
|
+
}
|
|
10401
|
+
}
|
|
10402
|
+
async function searchCity(options, market) {
|
|
10403
|
+
const started = Date.now();
|
|
10404
|
+
const maxAttempts = maxAttemptsForProxyMode(options.proxyMode);
|
|
10405
|
+
let lastError = null;
|
|
10406
|
+
for (let attemptIndex = 0; attemptIndex < maxAttempts; attemptIndex += 1) {
|
|
10407
|
+
try {
|
|
10408
|
+
return await searchCityAttempt(options, market, attemptIndex);
|
|
10409
|
+
} catch (err) {
|
|
10410
|
+
lastError = err;
|
|
10411
|
+
const willRetry = attemptIndex < maxAttempts - 1 && retryableCitySearchError(err, options.proxyMode);
|
|
10412
|
+
console.warn(JSON.stringify({
|
|
10413
|
+
event: "directory_workflow_city_attempt_failed",
|
|
10414
|
+
city: market.city,
|
|
10415
|
+
state: market.state,
|
|
10416
|
+
attempt_number: attemptIndex + 1,
|
|
10417
|
+
max_attempts: maxAttempts,
|
|
10418
|
+
will_retry: willRetry,
|
|
10419
|
+
message: errorMessage(err)
|
|
10420
|
+
}));
|
|
10421
|
+
if (!willRetry) break;
|
|
10422
|
+
}
|
|
10423
|
+
}
|
|
10424
|
+
return {
|
|
10425
|
+
city: market.city,
|
|
10426
|
+
state: market.state,
|
|
10427
|
+
location: market.location,
|
|
10428
|
+
cityKey: market.cityKey,
|
|
10429
|
+
censusName: market.censusName,
|
|
10430
|
+
population: market.population,
|
|
10431
|
+
populationYear: market.populationYear,
|
|
10432
|
+
zips: market.zips,
|
|
10433
|
+
counties: market.counties,
|
|
10434
|
+
status: "failed",
|
|
10435
|
+
error: lastError ? errorMessage(lastError) : "City Maps search failed",
|
|
10436
|
+
resultCount: 0,
|
|
10437
|
+
durationMs: Date.now() - started,
|
|
10438
|
+
results: []
|
|
10439
|
+
};
|
|
10440
|
+
}
|
|
10441
|
+
function csvRowsFor(result) {
|
|
10442
|
+
const rows = [];
|
|
10443
|
+
for (const city of result.cities) {
|
|
10444
|
+
if (!city.results.length) {
|
|
10445
|
+
rows.push({
|
|
10446
|
+
source_query: result.query,
|
|
10447
|
+
source_location: city.location,
|
|
10448
|
+
city: city.city,
|
|
10449
|
+
state: city.state,
|
|
10450
|
+
city_key: city.cityKey,
|
|
10451
|
+
census_name: city.censusName,
|
|
10452
|
+
population: city.population,
|
|
10453
|
+
population_year: city.populationYear,
|
|
10454
|
+
zip_count: city.zips.length,
|
|
10455
|
+
zips: city.zips.join(" "),
|
|
10456
|
+
counties: city.counties.join(" | "),
|
|
10457
|
+
result_position: null,
|
|
10458
|
+
business_name: null,
|
|
10459
|
+
review_stars: null,
|
|
10460
|
+
category: null,
|
|
10461
|
+
address: null,
|
|
10462
|
+
phone: null,
|
|
10463
|
+
hours_status: null,
|
|
10464
|
+
website_url: null,
|
|
10465
|
+
directions_url: null,
|
|
10466
|
+
place_url: null,
|
|
10467
|
+
cid: null,
|
|
10468
|
+
cid_decimal: null,
|
|
10469
|
+
metadata: null,
|
|
10470
|
+
result_status: city.status,
|
|
10471
|
+
error: city.error,
|
|
10472
|
+
extracted_at: result.extractedAt,
|
|
10473
|
+
duration_ms: city.durationMs
|
|
10474
|
+
});
|
|
10475
|
+
continue;
|
|
10476
|
+
}
|
|
10477
|
+
for (const business of city.results) {
|
|
10478
|
+
rows.push({
|
|
10479
|
+
source_query: result.query,
|
|
10480
|
+
source_location: city.location,
|
|
10481
|
+
city: city.city,
|
|
10482
|
+
state: city.state,
|
|
10483
|
+
city_key: city.cityKey,
|
|
10484
|
+
census_name: city.censusName,
|
|
10485
|
+
population: city.population,
|
|
10486
|
+
population_year: city.populationYear,
|
|
10487
|
+
zip_count: city.zips.length,
|
|
10488
|
+
zips: city.zips.join(" "),
|
|
10489
|
+
counties: city.counties.join(" | "),
|
|
10490
|
+
result_position: business.position,
|
|
10491
|
+
business_name: business.name,
|
|
10492
|
+
review_stars: business.rating,
|
|
10493
|
+
category: business.category,
|
|
10494
|
+
address: business.address,
|
|
10495
|
+
phone: business.phone,
|
|
10496
|
+
hours_status: business.hoursStatus,
|
|
10497
|
+
website_url: business.websiteUrl,
|
|
10498
|
+
directions_url: business.directionsUrl,
|
|
10499
|
+
place_url: business.placeUrl,
|
|
10500
|
+
cid: business.cid,
|
|
10501
|
+
cid_decimal: business.cidDecimal,
|
|
10502
|
+
metadata: business.metadata.join(" | "),
|
|
10503
|
+
result_status: city.status,
|
|
10504
|
+
error: city.error,
|
|
10505
|
+
extracted_at: result.extractedAt,
|
|
10506
|
+
duration_ms: city.durationMs
|
|
10507
|
+
});
|
|
10508
|
+
}
|
|
10509
|
+
}
|
|
10510
|
+
return rows;
|
|
10511
|
+
}
|
|
10512
|
+
async function saveDirectoryCsv(result) {
|
|
10513
|
+
const outDir = join4(outputBaseDir(), "directory-workflows");
|
|
10514
|
+
await mkdir2(outDir, { recursive: true });
|
|
10515
|
+
const stamp = result.extractedAt.replace(/[:.]/g, "-");
|
|
10516
|
+
const slug = `${result.state}-${result.query}`.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 80);
|
|
10517
|
+
const path5 = join4(outDir, `${stamp}-${slug}-directory-workflow.csv`);
|
|
10518
|
+
const headers = [
|
|
10519
|
+
"source_query",
|
|
10520
|
+
"source_location",
|
|
10521
|
+
"city",
|
|
10522
|
+
"state",
|
|
10523
|
+
"city_key",
|
|
10524
|
+
"census_name",
|
|
10525
|
+
"population",
|
|
10526
|
+
"population_year",
|
|
10527
|
+
"zip_count",
|
|
10528
|
+
"zips",
|
|
10529
|
+
"counties",
|
|
10530
|
+
"result_position",
|
|
10531
|
+
"business_name",
|
|
10532
|
+
"review_stars",
|
|
10533
|
+
"category",
|
|
10534
|
+
"address",
|
|
10535
|
+
"phone",
|
|
10536
|
+
"hours_status",
|
|
10537
|
+
"website_url",
|
|
10538
|
+
"directions_url",
|
|
10539
|
+
"place_url",
|
|
10540
|
+
"cid",
|
|
10541
|
+
"cid_decimal",
|
|
10542
|
+
"metadata",
|
|
10543
|
+
"result_status",
|
|
10544
|
+
"error",
|
|
10545
|
+
"extracted_at",
|
|
10546
|
+
"duration_ms"
|
|
10547
|
+
];
|
|
10548
|
+
await writeFile(path5, rowsToCsv(headers, csvRowsFor(result)), "utf8");
|
|
10549
|
+
return path5;
|
|
10550
|
+
}
|
|
10551
|
+
async function runDirectoryWorkflowFromPlan(options, plan) {
|
|
10552
|
+
const started = Date.now();
|
|
10553
|
+
const extractedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
10554
|
+
const cities = await mapLimit(plan.markets, options.concurrency, (market) => searchCity(options, market));
|
|
10555
|
+
const base = {
|
|
10556
|
+
query: options.query,
|
|
10557
|
+
state: plan.markets[0]?.state ?? options.state.toUpperCase(),
|
|
10558
|
+
minPopulation: options.minPopulation,
|
|
10559
|
+
populationYear: options.populationYear,
|
|
10560
|
+
maxResultsPerCity: options.maxResultsPerCity,
|
|
10561
|
+
concurrency: options.concurrency,
|
|
10562
|
+
censusSourceUrl: plan.censusSourceUrl,
|
|
10563
|
+
usZipsSourcePath: plan.usZipsSourcePath,
|
|
10564
|
+
warnings: plan.warnings,
|
|
10565
|
+
extractedAt,
|
|
10566
|
+
selectedCityCount: plan.markets.length,
|
|
10567
|
+
totalResultCount: cities.reduce((sum, city) => sum + city.resultCount, 0),
|
|
10568
|
+
cities,
|
|
10569
|
+
durationMs: Date.now() - started
|
|
10570
|
+
};
|
|
10571
|
+
const csvPath = options.saveCsv ? await saveDirectoryCsv(base) : null;
|
|
10572
|
+
return { ...base, csvPath };
|
|
10573
|
+
}
|
|
10574
|
+
|
|
10575
|
+
// src/api/directory-routes.ts
|
|
10576
|
+
var directoryApp = new Hono6();
|
|
10577
|
+
directoryApp.post("/run", createApiKeyAuth(), async (c) => {
|
|
10578
|
+
const user = c.get("user");
|
|
10579
|
+
const body = await c.req.json().catch(() => ({}));
|
|
10580
|
+
const kernelApiKey = browserServiceApiKey();
|
|
10581
|
+
const parsed = DirectoryWorkflowOptionsSchema.safeParse({
|
|
10582
|
+
...body,
|
|
10583
|
+
kernelApiKey
|
|
10584
|
+
});
|
|
10585
|
+
if (!parsed.success) {
|
|
10586
|
+
return c.json({ error: parsed.error.issues[0]?.message ?? "Invalid request" }, 400);
|
|
10587
|
+
}
|
|
10588
|
+
if (!kernelApiKey && parsed.data.proxyMode !== "none") {
|
|
10589
|
+
return c.json({ error: "Browser service API key is required for directory workflow Maps searches unless proxyMode is none" }, 503);
|
|
10590
|
+
}
|
|
10591
|
+
const plan = await resolveDirectoryMarkets(parsed.data);
|
|
10592
|
+
const requiredMc = plan.markets.length * MC_COSTS.maps_search;
|
|
10593
|
+
if (requiredMc > 0) {
|
|
10594
|
+
const debit = await debitMc(
|
|
10595
|
+
user.id,
|
|
10596
|
+
requiredMc,
|
|
10597
|
+
LedgerOperation.MAPS_SEARCH,
|
|
10598
|
+
`directory_workflow ${parsed.data.query} ${parsed.data.state} ${plan.markets.length} cities`
|
|
10599
|
+
);
|
|
10600
|
+
if (!debit.ok) return c.json(insufficientBalanceResponse(debit.balance_mc, requiredMc), 402);
|
|
10601
|
+
}
|
|
10602
|
+
try {
|
|
10603
|
+
const result = await runDirectoryWorkflowFromPlan(parsed.data, plan);
|
|
10604
|
+
const failedCities = result.cities.filter((city) => city.status === "failed").length;
|
|
10605
|
+
if (failedCities > 0) {
|
|
10606
|
+
await creditMc(user.id, failedCities * MC_COSTS.maps_search, LedgerOperation.REFUND, "failed directory_workflow city maps searches");
|
|
10607
|
+
}
|
|
10608
|
+
await logRequestEvent({
|
|
10609
|
+
userId: user.id,
|
|
10610
|
+
source: "directory_workflow",
|
|
10611
|
+
status: failedCities === result.cities.length && result.cities.length > 0 ? "failed" : "done",
|
|
10612
|
+
query: result.query,
|
|
10613
|
+
location: result.state,
|
|
10614
|
+
resultCount: result.totalResultCount,
|
|
10615
|
+
result
|
|
10616
|
+
});
|
|
10617
|
+
return c.json(result);
|
|
10618
|
+
} catch (err) {
|
|
10619
|
+
if (requiredMc > 0) {
|
|
10620
|
+
await creditMc(user.id, requiredMc, LedgerOperation.REFUND, "failed directory_workflow call");
|
|
10621
|
+
}
|
|
10622
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
10623
|
+
await logRequestEvent({
|
|
10624
|
+
userId: user.id,
|
|
10625
|
+
source: "directory_workflow",
|
|
10626
|
+
status: "failed",
|
|
10627
|
+
query: parsed.data.query,
|
|
10628
|
+
location: parsed.data.state,
|
|
10629
|
+
error: message
|
|
10630
|
+
});
|
|
10631
|
+
return c.json({ error: message, error_code: "directory_workflow_failed", retryable: true }, 500);
|
|
10632
|
+
}
|
|
10633
|
+
});
|
|
10634
|
+
|
|
10635
|
+
// src/api/serp-intelligence-routes.ts
|
|
10636
|
+
import { Hono as Hono7 } from "hono";
|
|
10637
|
+
|
|
10012
10638
|
// src/serp-intelligence/page-snapshot-extractor.ts
|
|
10013
10639
|
import { createHash } from "crypto";
|
|
10014
10640
|
import pLimit3 from "p-limit";
|
|
@@ -10319,7 +10945,7 @@ async function capturePageSnapshots(targets, options = {}) {
|
|
|
10319
10945
|
}
|
|
10320
10946
|
|
|
10321
10947
|
// src/serp-intelligence/schemas.ts
|
|
10322
|
-
import { z as
|
|
10948
|
+
import { z as z16 } from "zod";
|
|
10323
10949
|
var SerpIntelligenceDeviceValues = ["desktop", "mobile"];
|
|
10324
10950
|
var SerpIntelligenceProxyModeValues = ["location", "configured", "none"];
|
|
10325
10951
|
var SerpIntelligenceAttemptOutcomeValues = [
|
|
@@ -10331,6 +10957,8 @@ var SerpIntelligenceAttemptOutcomeValues = [
|
|
|
10331
10957
|
"request_aborted",
|
|
10332
10958
|
"timeout",
|
|
10333
10959
|
"location_mismatch",
|
|
10960
|
+
"proxy_tunnel_failed",
|
|
10961
|
+
"proxy_unavailable",
|
|
10334
10962
|
"mcp_unavailable",
|
|
10335
10963
|
"error"
|
|
10336
10964
|
];
|
|
@@ -10379,171 +11007,171 @@ function isPublicHttpUrl(value) {
|
|
|
10379
11007
|
return false;
|
|
10380
11008
|
}
|
|
10381
11009
|
}
|
|
10382
|
-
var SerpIntelligencePublicHttpUrlSchema =
|
|
10383
|
-
var SerpIntelligenceCaptureBodySchema =
|
|
10384
|
-
query:
|
|
10385
|
-
location:
|
|
10386
|
-
gl:
|
|
10387
|
-
hl:
|
|
10388
|
-
device:
|
|
10389
|
-
proxyMode:
|
|
10390
|
-
proxyZip:
|
|
10391
|
-
pages:
|
|
10392
|
-
debug:
|
|
10393
|
-
includePageSnapshots:
|
|
10394
|
-
pageSnapshotLimit:
|
|
11010
|
+
var SerpIntelligencePublicHttpUrlSchema = z16.string().url().refine(isPublicHttpUrl, "url must be a public HTTP or HTTPS URL");
|
|
11011
|
+
var SerpIntelligenceCaptureBodySchema = z16.object({
|
|
11012
|
+
query: z16.string().trim().min(1, "query is required"),
|
|
11013
|
+
location: z16.string().trim().min(1).optional(),
|
|
11014
|
+
gl: z16.string().trim().length(2).default("us"),
|
|
11015
|
+
hl: z16.string().trim().length(2).default("en"),
|
|
11016
|
+
device: z16.enum(SerpIntelligenceDeviceValues).default("desktop"),
|
|
11017
|
+
proxyMode: z16.enum(SerpIntelligenceProxyModeValues).default("location"),
|
|
11018
|
+
proxyZip: z16.string().regex(/^\d{5}$/).optional(),
|
|
11019
|
+
pages: z16.number().int().min(1).max(2).default(1),
|
|
11020
|
+
debug: z16.boolean().default(false),
|
|
11021
|
+
includePageSnapshots: z16.boolean().default(false),
|
|
11022
|
+
pageSnapshotLimit: z16.number().int().min(0).max(10).default(0)
|
|
10395
11023
|
}).strict();
|
|
10396
|
-
var SerpIntelligencePageSnapshotRequestSchema =
|
|
11024
|
+
var SerpIntelligencePageSnapshotRequestSchema = z16.object({
|
|
10397
11025
|
url: SerpIntelligencePublicHttpUrlSchema,
|
|
10398
|
-
sourceKind:
|
|
10399
|
-
sourcePosition:
|
|
11026
|
+
sourceKind: z16.enum(SerpPageSnapshotSourceKindValues).default("configured_target"),
|
|
11027
|
+
sourcePosition: z16.number().int().min(1).optional()
|
|
10400
11028
|
}).strict();
|
|
10401
|
-
var SerpIntelligencePageSnapshotsBodySchema =
|
|
10402
|
-
urls:
|
|
10403
|
-
targets:
|
|
10404
|
-
maxConcurrency:
|
|
10405
|
-
timeoutMs:
|
|
10406
|
-
debug:
|
|
11029
|
+
var SerpIntelligencePageSnapshotsBodySchema = z16.object({
|
|
11030
|
+
urls: z16.array(SerpIntelligencePublicHttpUrlSchema).min(1).max(25),
|
|
11031
|
+
targets: z16.array(SerpIntelligencePageSnapshotRequestSchema).min(1).max(25).optional(),
|
|
11032
|
+
maxConcurrency: z16.number().int().min(1).max(5).default(2),
|
|
11033
|
+
timeoutMs: z16.number().int().min(1e3).max(6e4).default(15e3),
|
|
11034
|
+
debug: z16.boolean().default(false)
|
|
10407
11035
|
}).strict();
|
|
10408
|
-
var SerpIntelligenceAICitationSchema =
|
|
10409
|
-
text:
|
|
10410
|
-
href:
|
|
11036
|
+
var SerpIntelligenceAICitationSchema = z16.object({
|
|
11037
|
+
text: z16.string(),
|
|
11038
|
+
href: z16.string()
|
|
10411
11039
|
}).strict();
|
|
10412
|
-
var SerpIntelligenceOrganicResultSchema =
|
|
10413
|
-
position:
|
|
10414
|
-
title:
|
|
10415
|
-
url:
|
|
10416
|
-
domain:
|
|
10417
|
-
cite:
|
|
10418
|
-
snippet:
|
|
10419
|
-
isRedditStyle:
|
|
10420
|
-
inlineRating:
|
|
10421
|
-
value:
|
|
10422
|
-
count:
|
|
11040
|
+
var SerpIntelligenceOrganicResultSchema = z16.object({
|
|
11041
|
+
position: z16.number().int().min(1),
|
|
11042
|
+
title: z16.string(),
|
|
11043
|
+
url: z16.string(),
|
|
11044
|
+
domain: z16.string(),
|
|
11045
|
+
cite: z16.string().nullable(),
|
|
11046
|
+
snippet: z16.string().nullable(),
|
|
11047
|
+
isRedditStyle: z16.boolean(),
|
|
11048
|
+
inlineRating: z16.object({
|
|
11049
|
+
value: z16.string(),
|
|
11050
|
+
count: z16.string()
|
|
10423
11051
|
}).strict().nullable()
|
|
10424
11052
|
}).strict();
|
|
10425
|
-
var SerpIntelligenceLocationEvidenceSchema =
|
|
10426
|
-
status:
|
|
10427
|
-
expected:
|
|
10428
|
-
city:
|
|
10429
|
-
regionCode:
|
|
10430
|
-
canonicalLocation:
|
|
11053
|
+
var SerpIntelligenceLocationEvidenceSchema = z16.object({
|
|
11054
|
+
status: z16.enum(SerpIntelligenceLocalizationStatusValues),
|
|
11055
|
+
expected: z16.object({
|
|
11056
|
+
city: z16.string(),
|
|
11057
|
+
regionCode: z16.string().nullable(),
|
|
11058
|
+
canonicalLocation: z16.string()
|
|
10431
11059
|
}).strict().nullable(),
|
|
10432
|
-
candidates:
|
|
10433
|
-
city:
|
|
10434
|
-
regionCode:
|
|
10435
|
-
count:
|
|
10436
|
-
examples:
|
|
11060
|
+
candidates: z16.array(z16.object({
|
|
11061
|
+
city: z16.string(),
|
|
11062
|
+
regionCode: z16.string(),
|
|
11063
|
+
count: z16.number().int().min(0),
|
|
11064
|
+
examples: z16.array(z16.string())
|
|
10437
11065
|
}).strict())
|
|
10438
11066
|
}).strict();
|
|
10439
|
-
var SerpIntelligenceHarvestResultSchema =
|
|
10440
|
-
seed:
|
|
10441
|
-
location:
|
|
10442
|
-
extractedAt:
|
|
10443
|
-
totalQuestions:
|
|
10444
|
-
surface:
|
|
10445
|
-
aiOverview:
|
|
10446
|
-
detected:
|
|
10447
|
-
text:
|
|
10448
|
-
citations:
|
|
10449
|
-
expanded:
|
|
10450
|
-
fullyExpanded:
|
|
10451
|
-
sections:
|
|
11067
|
+
var SerpIntelligenceHarvestResultSchema = z16.object({
|
|
11068
|
+
seed: z16.string(),
|
|
11069
|
+
location: z16.string().nullable(),
|
|
11070
|
+
extractedAt: z16.string(),
|
|
11071
|
+
totalQuestions: z16.number().int().min(0),
|
|
11072
|
+
surface: z16.enum(["web", "aim", "unknown"]),
|
|
11073
|
+
aiOverview: z16.object({
|
|
11074
|
+
detected: z16.boolean(),
|
|
11075
|
+
text: z16.string().nullable(),
|
|
11076
|
+
citations: z16.array(SerpIntelligenceAICitationSchema),
|
|
11077
|
+
expanded: z16.boolean().optional(),
|
|
11078
|
+
fullyExpanded: z16.boolean().optional(),
|
|
11079
|
+
sections: z16.array(z16.string()).optional()
|
|
10452
11080
|
}).strict(),
|
|
10453
|
-
aiMode:
|
|
10454
|
-
detected:
|
|
10455
|
-
text:
|
|
10456
|
-
citations:
|
|
11081
|
+
aiMode: z16.object({
|
|
11082
|
+
detected: z16.boolean(),
|
|
11083
|
+
text: z16.string().nullable(),
|
|
11084
|
+
citations: z16.array(SerpIntelligenceAICitationSchema)
|
|
10457
11085
|
}).strict(),
|
|
10458
|
-
tree:
|
|
10459
|
-
flat:
|
|
10460
|
-
videos:
|
|
10461
|
-
forums:
|
|
10462
|
-
organicResults:
|
|
10463
|
-
localPack:
|
|
10464
|
-
entityIds:
|
|
10465
|
-
entities:
|
|
10466
|
-
name:
|
|
10467
|
-
kgId:
|
|
10468
|
-
cid:
|
|
10469
|
-
gcid:
|
|
11086
|
+
tree: z16.array(z16.unknown()),
|
|
11087
|
+
flat: z16.array(z16.unknown()),
|
|
11088
|
+
videos: z16.array(z16.unknown()),
|
|
11089
|
+
forums: z16.array(z16.unknown()),
|
|
11090
|
+
organicResults: z16.array(SerpIntelligenceOrganicResultSchema),
|
|
11091
|
+
localPack: z16.array(z16.unknown()),
|
|
11092
|
+
entityIds: z16.object({
|
|
11093
|
+
entities: z16.array(z16.object({
|
|
11094
|
+
name: z16.string(),
|
|
11095
|
+
kgId: z16.string().nullable(),
|
|
11096
|
+
cid: z16.string().nullable(),
|
|
11097
|
+
gcid: z16.string().nullable()
|
|
10470
11098
|
}).strict()),
|
|
10471
|
-
kgIds:
|
|
10472
|
-
cids:
|
|
10473
|
-
gcids:
|
|
11099
|
+
kgIds: z16.array(z16.string()),
|
|
11100
|
+
cids: z16.array(z16.string()),
|
|
11101
|
+
gcids: z16.array(z16.string())
|
|
10474
11102
|
}).strict(),
|
|
10475
|
-
stats:
|
|
10476
|
-
seed:
|
|
10477
|
-
totalQuestions:
|
|
10478
|
-
maxDepthReached:
|
|
10479
|
-
durationMs:
|
|
10480
|
-
errorCount:
|
|
11103
|
+
stats: z16.object({
|
|
11104
|
+
seed: z16.string(),
|
|
11105
|
+
totalQuestions: z16.number().int().min(0),
|
|
11106
|
+
maxDepthReached: z16.number().int().min(0),
|
|
11107
|
+
durationMs: z16.number().min(0),
|
|
11108
|
+
errorCount: z16.number().int().min(0)
|
|
10481
11109
|
}).strict(),
|
|
10482
|
-
diagnostics:
|
|
10483
|
-
completionStatus:
|
|
10484
|
-
problem:
|
|
10485
|
-
warnings:
|
|
10486
|
-
debug:
|
|
11110
|
+
diagnostics: z16.object({
|
|
11111
|
+
completionStatus: z16.enum(["paa_found", "no_paa", "serp_only"]),
|
|
11112
|
+
problem: z16.null(),
|
|
11113
|
+
warnings: z16.array(z16.unknown()).optional(),
|
|
11114
|
+
debug: z16.object({
|
|
10487
11115
|
locationEvidence: SerpIntelligenceLocationEvidenceSchema.optional()
|
|
10488
11116
|
}).passthrough().optional()
|
|
10489
11117
|
}).passthrough(),
|
|
10490
|
-
whatPeopleSaying:
|
|
11118
|
+
whatPeopleSaying: z16.array(z16.unknown())
|
|
10491
11119
|
}).strict();
|
|
10492
|
-
var SerpIntelligenceCaptureAttemptSchema =
|
|
10493
|
-
attemptNumber:
|
|
10494
|
-
outcome:
|
|
10495
|
-
startedAt:
|
|
10496
|
-
completedAt:
|
|
10497
|
-
durationMs:
|
|
10498
|
-
problemCode:
|
|
10499
|
-
message:
|
|
10500
|
-
kernelSessionId:
|
|
10501
|
-
cleanupSucceeded:
|
|
11120
|
+
var SerpIntelligenceCaptureAttemptSchema = z16.object({
|
|
11121
|
+
attemptNumber: z16.number().int().min(1),
|
|
11122
|
+
outcome: z16.enum(SerpIntelligenceAttemptOutcomeValues),
|
|
11123
|
+
startedAt: z16.string().optional(),
|
|
11124
|
+
completedAt: z16.string().optional(),
|
|
11125
|
+
durationMs: z16.number().min(0).optional(),
|
|
11126
|
+
problemCode: z16.string().optional(),
|
|
11127
|
+
message: z16.string().optional(),
|
|
11128
|
+
kernelSessionId: z16.string().nullable().optional(),
|
|
11129
|
+
cleanupSucceeded: z16.boolean().nullable().optional()
|
|
10502
11130
|
}).strict();
|
|
10503
|
-
var SerpPageSnapshotCaptureSchema =
|
|
11131
|
+
var SerpPageSnapshotCaptureSchema = z16.object({
|
|
10504
11132
|
url: SerpIntelligencePublicHttpUrlSchema,
|
|
10505
11133
|
requestedUrl: SerpIntelligencePublicHttpUrlSchema,
|
|
10506
11134
|
finalUrl: SerpIntelligencePublicHttpUrlSchema.nullable(),
|
|
10507
|
-
sourceKind:
|
|
10508
|
-
sourcePosition:
|
|
10509
|
-
status:
|
|
10510
|
-
fetchedVia:
|
|
10511
|
-
httpStatus:
|
|
10512
|
-
contentType:
|
|
10513
|
-
title:
|
|
11135
|
+
sourceKind: z16.enum(SerpPageSnapshotSourceKindValues),
|
|
11136
|
+
sourcePosition: z16.number().int().min(1).nullable(),
|
|
11137
|
+
status: z16.enum(SerpPageFetchStatusValues),
|
|
11138
|
+
fetchedVia: z16.enum(SerpPageFetchedViaValues).nullable(),
|
|
11139
|
+
httpStatus: z16.number().int().min(100).max(599).nullable(),
|
|
11140
|
+
contentType: z16.string().nullable(),
|
|
11141
|
+
title: z16.string().nullable(),
|
|
10514
11142
|
canonicalUrl: SerpIntelligencePublicHttpUrlSchema.nullable(),
|
|
10515
|
-
metaDescription:
|
|
10516
|
-
headings:
|
|
10517
|
-
level:
|
|
10518
|
-
text:
|
|
11143
|
+
metaDescription: z16.string().nullable(),
|
|
11144
|
+
headings: z16.array(z16.object({
|
|
11145
|
+
level: z16.number().int().min(1).max(6),
|
|
11146
|
+
text: z16.string()
|
|
10519
11147
|
}).strict()).default([]),
|
|
10520
|
-
artifact:
|
|
10521
|
-
htmlBlobUrl:
|
|
10522
|
-
textBlobUrl:
|
|
10523
|
-
markdownBlobUrl:
|
|
10524
|
-
screenshotBlobUrl:
|
|
10525
|
-
contentSha256:
|
|
10526
|
-
capturedAt:
|
|
11148
|
+
artifact: z16.object({
|
|
11149
|
+
htmlBlobUrl: z16.string().url().nullable(),
|
|
11150
|
+
textBlobUrl: z16.string().url().nullable(),
|
|
11151
|
+
markdownBlobUrl: z16.string().url().nullable(),
|
|
11152
|
+
screenshotBlobUrl: z16.string().url().nullable(),
|
|
11153
|
+
contentSha256: z16.string().nullable(),
|
|
11154
|
+
capturedAt: z16.string().nullable()
|
|
10527
11155
|
}).strict(),
|
|
10528
|
-
error:
|
|
10529
|
-
code:
|
|
10530
|
-
message:
|
|
11156
|
+
error: z16.object({
|
|
11157
|
+
code: z16.string(),
|
|
11158
|
+
message: z16.string()
|
|
10531
11159
|
}).strict().nullable()
|
|
10532
11160
|
}).strict();
|
|
10533
|
-
var SerpIntelligenceCaptureResponseSchema =
|
|
11161
|
+
var SerpIntelligenceCaptureResponseSchema = z16.object({
|
|
10534
11162
|
harvestResult: SerpIntelligenceHarvestResultSchema,
|
|
10535
|
-
attempts:
|
|
11163
|
+
attempts: z16.array(SerpIntelligenceCaptureAttemptSchema),
|
|
10536
11164
|
locationEvidence: SerpIntelligenceLocationEvidenceSchema.nullable(),
|
|
10537
|
-
pageSnapshotArtifacts:
|
|
10538
|
-
billing:
|
|
10539
|
-
creditsUsed:
|
|
10540
|
-
requestId:
|
|
10541
|
-
jobId:
|
|
11165
|
+
pageSnapshotArtifacts: z16.array(SerpPageSnapshotCaptureSchema),
|
|
11166
|
+
billing: z16.object({
|
|
11167
|
+
creditsUsed: z16.number().min(0).optional(),
|
|
11168
|
+
requestId: z16.string().optional(),
|
|
11169
|
+
jobId: z16.string().optional()
|
|
10542
11170
|
}).strict().optional()
|
|
10543
11171
|
}).strict();
|
|
10544
|
-
var SerpIntelligencePageSnapshotsResponseSchema =
|
|
10545
|
-
pageSnapshotArtifacts:
|
|
10546
|
-
attempts:
|
|
11172
|
+
var SerpIntelligencePageSnapshotsResponseSchema = z16.object({
|
|
11173
|
+
pageSnapshotArtifacts: z16.array(SerpPageSnapshotCaptureSchema),
|
|
11174
|
+
attempts: z16.array(SerpIntelligenceCaptureAttemptSchema).default([])
|
|
10547
11175
|
}).strict();
|
|
10548
11176
|
|
|
10549
11177
|
// src/serp-intelligence/serp-capture-service.ts
|
|
@@ -10715,7 +11343,7 @@ var SERP_INTELLIGENCE_RATE_LIMIT = 60;
|
|
|
10715
11343
|
var SERP_INTELLIGENCE_RATE_WINDOW_SECONDS = 60;
|
|
10716
11344
|
var POST_CAPTURE_ROUTE_LABEL = "POST /capture";
|
|
10717
11345
|
var POST_PAGE_SNAPSHOTS_ROUTE_LABEL = "POST /page-snapshots";
|
|
10718
|
-
var serpIntelligenceApp = new
|
|
11346
|
+
var serpIntelligenceApp = new Hono7();
|
|
10719
11347
|
serpIntelligenceApp.use("*", createApiKeyAuth());
|
|
10720
11348
|
function structuredError(input) {
|
|
10721
11349
|
return {
|
|
@@ -10890,7 +11518,7 @@ serpIntelligenceApp.post("/page-snapshots", async (c) => {
|
|
|
10890
11518
|
});
|
|
10891
11519
|
|
|
10892
11520
|
// src/mcp/mcp-routes.ts
|
|
10893
|
-
import { Hono as
|
|
11521
|
+
import { Hono as Hono8 } from "hono";
|
|
10894
11522
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
10895
11523
|
configureReportSaving(false);
|
|
10896
11524
|
function mcpAuthError() {
|
|
@@ -10920,11 +11548,11 @@ async function requireMcpCallerKey(c) {
|
|
|
10920
11548
|
if (!user) return mcpAuthError();
|
|
10921
11549
|
return callerKey;
|
|
10922
11550
|
}
|
|
10923
|
-
var mcpApp = new
|
|
11551
|
+
var mcpApp = new Hono8();
|
|
10924
11552
|
function registerSerpIntelligenceCaptureTools(server, executor) {
|
|
10925
11553
|
server.registerTool("capture_serp_snapshot", {
|
|
10926
11554
|
title: "SERP Intelligence Snapshot",
|
|
10927
|
-
description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized residential proxy evidence
|
|
11555
|
+
description: "Capture a structured SERP Intelligence Google snapshot through POST /serp-intelligence/capture, the same product capture path used by Phoenix. Split query from location, infer gl/hl, use proxyMode location for localized US residential evidence; location mode creates fresh proxy IDs across retries and rejects wrong-location evidence before returning. Use configured only for the static residential proxy, and none only for direct-network debugging. Set debug true when investigating location evidence, proxy behavior, CAPTCHA, or capture reliability.",
|
|
10928
11556
|
inputSchema: CaptureSerpSnapshotInputSchema,
|
|
10929
11557
|
annotations: liveWebToolAnnotations("SERP Intelligence Snapshot")
|
|
10930
11558
|
}, async (input) => executor.captureSerpSnapshot(input));
|
|
@@ -10956,7 +11584,7 @@ mcpApp.all("/", async (c) => {
|
|
|
10956
11584
|
});
|
|
10957
11585
|
|
|
10958
11586
|
// src/api/browser-agent-routes.ts
|
|
10959
|
-
import { Hono as
|
|
11587
|
+
import { Hono as Hono9 } from "hono";
|
|
10960
11588
|
|
|
10961
11589
|
// src/api/browser-agent-db.ts
|
|
10962
11590
|
import { randomUUID } from "crypto";
|
|
@@ -11233,6 +11861,10 @@ async function replayStop(runtimeSessionId, replayId) {
|
|
|
11233
11861
|
const k = client();
|
|
11234
11862
|
await k.browsers.replays.stop(replayId, { id: runtimeSessionId });
|
|
11235
11863
|
}
|
|
11864
|
+
async function replayDownload(runtimeSessionId, replayId) {
|
|
11865
|
+
const k = client();
|
|
11866
|
+
return k.browsers.replays.download(replayId, { id: runtimeSessionId });
|
|
11867
|
+
}
|
|
11236
11868
|
async function replayList(runtimeSessionId) {
|
|
11237
11869
|
const k = client();
|
|
11238
11870
|
const res = await k.browsers.replays.list(runtimeSessionId);
|
|
@@ -11261,6 +11893,7 @@ function publicSession(row) {
|
|
|
11261
11893
|
session_id: row.id,
|
|
11262
11894
|
status: row.status,
|
|
11263
11895
|
label: row.label,
|
|
11896
|
+
live_view_url: row.live_view_url,
|
|
11264
11897
|
created_at: row.created_at,
|
|
11265
11898
|
last_action_at: row.last_action_at,
|
|
11266
11899
|
closed_at: row.closed_at,
|
|
@@ -11272,6 +11905,14 @@ function failure(err) {
|
|
|
11272
11905
|
const msg = err instanceof Error ? err.message : String(err);
|
|
11273
11906
|
return { error: sanitizeVendorName(msg) };
|
|
11274
11907
|
}
|
|
11908
|
+
function replayDownloadUrl(sessionId, replayId) {
|
|
11909
|
+
return `/agent/sessions/${encodeURIComponent(sessionId)}/replays/${encodeURIComponent(replayId)}/download`;
|
|
11910
|
+
}
|
|
11911
|
+
function replayFilename(sessionId, replayId) {
|
|
11912
|
+
const safeSession = sessionId.replace(/[^a-zA-Z0-9_-]/g, "-").slice(0, 80);
|
|
11913
|
+
const safeReplay = replayId.replace(/[^a-zA-Z0-9_-]/g, "-").slice(0, 120);
|
|
11914
|
+
return `${safeSession}-${safeReplay}.mp4`;
|
|
11915
|
+
}
|
|
11275
11916
|
async function loadOpenSession(id, userId) {
|
|
11276
11917
|
const row = await getSessionRow(id);
|
|
11277
11918
|
if (!row) return null;
|
|
@@ -11279,7 +11920,7 @@ async function loadOpenSession(id, userId) {
|
|
|
11279
11920
|
return row;
|
|
11280
11921
|
}
|
|
11281
11922
|
function buildBrowserAgentRoutes() {
|
|
11282
|
-
const app2 = new
|
|
11923
|
+
const app2 = new Hono9();
|
|
11283
11924
|
app2.use("*", async (c, next) => {
|
|
11284
11925
|
await migrateBrowserAgent();
|
|
11285
11926
|
return next();
|
|
@@ -11495,7 +12136,11 @@ function buildBrowserAgentRoutes() {
|
|
|
11495
12136
|
viewUrl: started.viewUrl,
|
|
11496
12137
|
label: typeof body.label === "string" ? body.label : null
|
|
11497
12138
|
});
|
|
11498
|
-
return c.json({
|
|
12139
|
+
return c.json({
|
|
12140
|
+
replay_id: started.replayId,
|
|
12141
|
+
view_url: started.viewUrl,
|
|
12142
|
+
download_url: replayDownloadUrl(row.id, started.replayId)
|
|
12143
|
+
});
|
|
11499
12144
|
} catch (err) {
|
|
11500
12145
|
return c.json(failure(err), 502);
|
|
11501
12146
|
}
|
|
@@ -11517,7 +12162,12 @@ function buildBrowserAgentRoutes() {
|
|
|
11517
12162
|
viewUrl = null;
|
|
11518
12163
|
}
|
|
11519
12164
|
await recordReplayStop(replayId, viewUrl);
|
|
11520
|
-
return c.json({
|
|
12165
|
+
return c.json({
|
|
12166
|
+
ok: true,
|
|
12167
|
+
replay_id: replayId,
|
|
12168
|
+
view_url: viewUrl,
|
|
12169
|
+
download_url: replayDownloadUrl(row.id, replayId)
|
|
12170
|
+
});
|
|
11521
12171
|
} catch (err) {
|
|
11522
12172
|
return c.json(failure(err), 502);
|
|
11523
12173
|
}
|
|
@@ -11531,12 +12181,35 @@ function buildBrowserAgentRoutes() {
|
|
|
11531
12181
|
replays: rows.map((r) => ({
|
|
11532
12182
|
replay_id: r.replay_id,
|
|
11533
12183
|
view_url: r.view_url,
|
|
12184
|
+
download_url: replayDownloadUrl(row.id, r.replay_id),
|
|
11534
12185
|
label: r.label,
|
|
11535
12186
|
started_at: r.started_at,
|
|
11536
12187
|
stopped_at: r.stopped_at
|
|
11537
12188
|
}))
|
|
11538
12189
|
});
|
|
11539
12190
|
});
|
|
12191
|
+
app2.get("/sessions/:id/replays/:replayId/download", async (c) => {
|
|
12192
|
+
const user = c.get("user");
|
|
12193
|
+
const row = await loadOpenSession(c.req.param("id"), user.id);
|
|
12194
|
+
if (!row) return c.json({ error: "not found" }, 404);
|
|
12195
|
+
const replayId = c.req.param("replayId");
|
|
12196
|
+
const rows = await listReplayRows(row.id);
|
|
12197
|
+
if (!rows.some((r) => r.replay_id === replayId)) return c.json({ error: "replay not found" }, 404);
|
|
12198
|
+
try {
|
|
12199
|
+
const res = await replayDownload(row.runtime_session_id, replayId);
|
|
12200
|
+
if (!res.ok) return c.json({ error: `replay download failed (${res.status})` }, res.status);
|
|
12201
|
+
return new Response(res.body, {
|
|
12202
|
+
status: 200,
|
|
12203
|
+
headers: {
|
|
12204
|
+
"Content-Type": res.headers.get("content-type") ?? "video/mp4",
|
|
12205
|
+
"Content-Disposition": `attachment; filename="${replayFilename(row.id, replayId)}"`,
|
|
12206
|
+
"Cache-Control": "private, max-age=300"
|
|
12207
|
+
}
|
|
12208
|
+
});
|
|
12209
|
+
} catch (err) {
|
|
12210
|
+
return c.json(failure(err), 502);
|
|
12211
|
+
}
|
|
12212
|
+
});
|
|
11540
12213
|
return app2;
|
|
11541
12214
|
}
|
|
11542
12215
|
|
|
@@ -11560,6 +12233,7 @@ function renderConsoleHtml(initialSessionId) {
|
|
|
11560
12233
|
input[type=text], input[type=password], input[type=url] { background: #141925; border: 1px solid #232b3a; color: #e6eaf2; border-radius: 7px; padding: 7px 10px; }
|
|
11561
12234
|
button { background: #2b6cff; border: 0; color: #fff; border-radius: 7px; padding: 7px 12px; cursor: pointer; font-weight: 500; }
|
|
11562
12235
|
button.ghost { background: #1a2030; color: #cdd5e4; border: 1px solid #28303f; }
|
|
12236
|
+
button.linkish { background: transparent; color: #7aa2ff; border: 0; padding: 0; font-size: 13px; font-weight: 500; }
|
|
11563
12237
|
button:disabled { opacity: .5; cursor: default; }
|
|
11564
12238
|
.layout { display: grid; grid-template-columns: 280px 1fr; height: calc(100vh - 53px); }
|
|
11565
12239
|
aside { border-right: 1px solid #1c2230; overflow-y: auto; padding: 12px; }
|
|
@@ -11632,6 +12306,26 @@ async function closeCurrent() {
|
|
|
11632
12306
|
await refreshSessions();
|
|
11633
12307
|
}
|
|
11634
12308
|
|
|
12309
|
+
async function downloadReplay(replayId) {
|
|
12310
|
+
if (!state.current || !replayId) return;
|
|
12311
|
+
const res = await fetch('/agent/sessions/' + encodeURIComponent(state.current) + '/replays/' + encodeURIComponent(replayId) + '/download', {
|
|
12312
|
+
headers: { 'x-api-key': state.key },
|
|
12313
|
+
});
|
|
12314
|
+
if (!res.ok) {
|
|
12315
|
+
alert('Replay download failed: ' + await res.text());
|
|
12316
|
+
return;
|
|
12317
|
+
}
|
|
12318
|
+
const blob = await res.blob();
|
|
12319
|
+
const url = URL.createObjectURL(blob);
|
|
12320
|
+
const a = document.createElement('a');
|
|
12321
|
+
a.href = url;
|
|
12322
|
+
a.download = state.current + '-' + replayId + '.mp4';
|
|
12323
|
+
document.body.appendChild(a);
|
|
12324
|
+
a.click();
|
|
12325
|
+
a.remove();
|
|
12326
|
+
URL.revokeObjectURL(url);
|
|
12327
|
+
}
|
|
12328
|
+
|
|
11635
12329
|
function frameSrc() {
|
|
11636
12330
|
if (!state.liveUrl) return null;
|
|
11637
12331
|
const sep = state.liveUrl.includes('?') ? '&' : '?';
|
|
@@ -11686,9 +12380,10 @@ function render() {
|
|
|
11686
12380
|
const rep = h('<div class="replays"><h3>Replays</h3></div>');
|
|
11687
12381
|
if (!state.replays.length) rep.appendChild(h('<div class="muted">No replays recorded.</div>'));
|
|
11688
12382
|
for (const r of state.replays) {
|
|
11689
|
-
const status = r.stopped_at ? 'ready' : 'recording
|
|
12383
|
+
const status = r.stopped_at ? 'ready' : 'recording...';
|
|
11690
12384
|
const link = r.view_url ? '<a href="' + esc(r.view_url) + '" target="_blank" rel="noopener">view mp4</a>' : '<span class="muted">' + status + '</span>';
|
|
11691
|
-
|
|
12385
|
+
const download = r.stopped_at ? '<button class="linkish replay-download" data-rid="' + esc(r.replay_id) + '">download mp4</button>' : '';
|
|
12386
|
+
rep.appendChild(h('<div class="replay"><span class="muted">' + esc(r.started_at || '') + '</span><span class="spacer"></span>' + link + download + '</div>'));
|
|
11692
12387
|
}
|
|
11693
12388
|
main.appendChild(rep);
|
|
11694
12389
|
|
|
@@ -11702,6 +12397,9 @@ function render() {
|
|
|
11702
12397
|
if (reload) reload.onclick = () => selectSession(state.current);
|
|
11703
12398
|
const close = document.getElementById('close');
|
|
11704
12399
|
if (close) close.onclick = closeCurrent;
|
|
12400
|
+
document.querySelectorAll('.replay-download').forEach(btn => {
|
|
12401
|
+
btn.onclick = () => downloadReplay(btn.getAttribute('data-rid'));
|
|
12402
|
+
});
|
|
11705
12403
|
}
|
|
11706
12404
|
|
|
11707
12405
|
render();
|
|
@@ -11713,9 +12411,9 @@ if (state.key) { refreshSessions(); if (state.current) selectSession(state.curre
|
|
|
11713
12411
|
|
|
11714
12412
|
// src/api/stripe-routes.ts
|
|
11715
12413
|
import Stripe from "stripe";
|
|
11716
|
-
import { Hono as
|
|
12414
|
+
import { Hono as Hono10 } from "hono";
|
|
11717
12415
|
var stripe = new Stripe(process.env.STRIPE_SECRET_KEY, { apiVersion: "2026-02-25.clover" });
|
|
11718
|
-
var stripeApp = new
|
|
12416
|
+
var stripeApp = new Hono10();
|
|
11719
12417
|
stripeApp.post("/webhooks", async (c) => {
|
|
11720
12418
|
const sig = c.req.header("stripe-signature");
|
|
11721
12419
|
const body = await c.req.text();
|
|
@@ -11810,27 +12508,27 @@ import { getCookie, setCookie, deleteCookie } from "hono/cookie";
|
|
|
11810
12508
|
import Stripe2 from "stripe";
|
|
11811
12509
|
|
|
11812
12510
|
// src/api/billing-schemas.ts
|
|
11813
|
-
import { z as
|
|
11814
|
-
var BillingCheckoutBodySchema =
|
|
11815
|
-
priceId:
|
|
12511
|
+
import { z as z17 } from "zod";
|
|
12512
|
+
var BillingCheckoutBodySchema = z17.object({
|
|
12513
|
+
priceId: z17.string().min(1)
|
|
11816
12514
|
});
|
|
11817
|
-
var FreeCreditBreakdownSchema =
|
|
11818
|
-
signup_grant_mc:
|
|
11819
|
-
monthly_refresh_mc:
|
|
11820
|
-
total_free_mc:
|
|
11821
|
-
signup_grant_credits:
|
|
11822
|
-
monthly_refresh_credits:
|
|
11823
|
-
total_free_credits:
|
|
12515
|
+
var FreeCreditBreakdownSchema = z17.object({
|
|
12516
|
+
signup_grant_mc: z17.number().int().nonnegative(),
|
|
12517
|
+
monthly_refresh_mc: z17.number().int().nonnegative(),
|
|
12518
|
+
total_free_mc: z17.number().int().nonnegative(),
|
|
12519
|
+
signup_grant_credits: z17.number().nonnegative(),
|
|
12520
|
+
monthly_refresh_credits: z17.number().nonnegative(),
|
|
12521
|
+
total_free_credits: z17.number().nonnegative()
|
|
11824
12522
|
});
|
|
11825
|
-
var BillingBalanceResponseSchema =
|
|
11826
|
-
balance_mc:
|
|
11827
|
-
balance_credits:
|
|
12523
|
+
var BillingBalanceResponseSchema = z17.object({
|
|
12524
|
+
balance_mc: z17.number().int().nonnegative(),
|
|
12525
|
+
balance_credits: z17.number().nonnegative(),
|
|
11828
12526
|
free_credits: FreeCreditBreakdownSchema,
|
|
11829
|
-
ledger:
|
|
12527
|
+
ledger: z17.array(z17.any())
|
|
11830
12528
|
});
|
|
11831
|
-
var MonthlyRefreshSweepResultSchema =
|
|
11832
|
-
usersRefreshed:
|
|
11833
|
-
totalMcGranted:
|
|
12529
|
+
var MonthlyRefreshSweepResultSchema = z17.object({
|
|
12530
|
+
usersRefreshed: z17.number().int().nonnegative(),
|
|
12531
|
+
totalMcGranted: z17.number().int().nonnegative()
|
|
11834
12532
|
});
|
|
11835
12533
|
|
|
11836
12534
|
// src/api/credit-operations.ts
|
|
@@ -12041,7 +12739,7 @@ var sessionAuth = createMiddleware3(async (c, next) => {
|
|
|
12041
12739
|
c.set("sessionUser", { ...refreshed, balance_mc: balanceMc });
|
|
12042
12740
|
return next();
|
|
12043
12741
|
});
|
|
12044
|
-
var app = new
|
|
12742
|
+
var app = new Hono11();
|
|
12045
12743
|
var STRIPE_API_VERSION = "2026-02-25.clover";
|
|
12046
12744
|
function requireStripeSecret() {
|
|
12047
12745
|
const secret2 = process.env.STRIPE_SECRET_KEY?.trim();
|
|
@@ -12687,7 +13385,7 @@ app.get("/cron/tick", async (c) => {
|
|
|
12687
13385
|
if (!process.env.CRON_SECRET || secret2 !== `Bearer ${process.env.CRON_SECRET}`) {
|
|
12688
13386
|
return c.json({ error: "Unauthorized" }, 401);
|
|
12689
13387
|
}
|
|
12690
|
-
const { drainQueue } = await import("./worker-
|
|
13388
|
+
const { drainQueue } = await import("./worker-NAKGTIF5.js");
|
|
12691
13389
|
const budget = { maxJobs: 10, deadlineMs: Date.now() + 28e4 };
|
|
12692
13390
|
const [results, sweepResult] = await Promise.all([
|
|
12693
13391
|
drainQueue(budget),
|
|
@@ -12701,6 +13399,7 @@ app.route("/youtube", youtubeApp);
|
|
|
12701
13399
|
app.route("/screenshot", screenshotApp);
|
|
12702
13400
|
app.route("/facebook", facebookAdApp);
|
|
12703
13401
|
app.route("/maps", mapsApp);
|
|
13402
|
+
app.route("/directory", directoryApp);
|
|
12704
13403
|
app.route("/serp-intelligence", serpIntelligenceApp);
|
|
12705
13404
|
app.route("/mcp", mcpApp);
|
|
12706
13405
|
app.route("/agent", buildBrowserAgentRoutes());
|
|
@@ -12812,4 +13511,4 @@ app.get("/blog/:slug/", (c) => {
|
|
|
12812
13511
|
export {
|
|
12813
13512
|
app
|
|
12814
13513
|
};
|
|
12815
|
-
//# sourceMappingURL=server-
|
|
13514
|
+
//# sourceMappingURL=server-CJMX2QUM.js.map
|