mcp-scraper 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/bin/api-server.cjs +573 -172
- package/dist/bin/api-server.cjs.map +1 -1
- package/dist/bin/api-server.js +2 -2
- package/dist/bin/mcp-stdio-server.cjs +300 -150
- package/dist/bin/mcp-stdio-server.cjs.map +1 -1
- package/dist/bin/mcp-stdio-server.js +2 -1
- package/dist/bin/mcp-stdio-server.js.map +1 -1
- package/dist/bin/paa-harvest.cjs +22 -1
- package/dist/bin/paa-harvest.cjs.map +1 -1
- package/dist/bin/paa-harvest.js +2 -1
- package/dist/bin/paa-harvest.js.map +1 -1
- package/dist/{chunk-4OHPDEZM.js → chunk-3OIRNUF5.js} +303 -151
- package/dist/chunk-3OIRNUF5.js.map +1 -0
- package/dist/{chunk-W4P2U5VF.js → chunk-LUBDFS67.js} +32 -32
- package/dist/chunk-LUBDFS67.js.map +1 -0
- package/dist/{chunk-7HB7NDOY.js → chunk-ZK456YXN.js} +12 -2
- package/dist/chunk-ZK456YXN.js.map +1 -0
- package/dist/chunk-ZMOWIBMK.js +36 -0
- package/dist/chunk-ZMOWIBMK.js.map +1 -0
- package/dist/index.cjs +22 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/{server-V5XMVRYE.js → server-YNJHP5PU.js} +235 -22
- package/dist/server-YNJHP5PU.js.map +1 -0
- package/dist/{worker-UT4ZQU2T.js → worker-PBG6LGET.js} +4 -3
- package/dist/{worker-UT4ZQU2T.js.map → worker-PBG6LGET.js.map} +1 -1
- package/docs/adr/0001-in-page-graphql-interception-for-anti-bot-scraping.md +58 -0
- package/docs/adr/README.md +11 -0
- package/docs/mcp-tool-quality-spec.md +238 -0
- package/package.json +5 -4
- package/dist/chunk-4OHPDEZM.js.map +0 -1
- package/dist/chunk-7HB7NDOY.js.map +0 -1
- package/dist/chunk-W4P2U5VF.js.map +0 -1
- package/dist/server-V5XMVRYE.js.map +0 -1
package/dist/bin/paa-harvest.cjs
CHANGED
|
@@ -61,6 +61,16 @@ var MapsPlaceOptionsSchema = import_zod.z.object({
|
|
|
61
61
|
kernelProxyId: import_zod.z.string().optional(),
|
|
62
62
|
headless: import_zod.z.boolean().default(true)
|
|
63
63
|
});
|
|
64
|
+
var MapsSearchOptionsSchema = import_zod.z.object({
|
|
65
|
+
query: import_zod.z.string().min(1),
|
|
66
|
+
location: import_zod.z.string().optional(),
|
|
67
|
+
gl: import_zod.z.string().length(2).default("us"),
|
|
68
|
+
hl: import_zod.z.string().length(2).default("en"),
|
|
69
|
+
maxResults: import_zod.z.number().int().min(1).max(50).default(10),
|
|
70
|
+
kernelApiKey: import_zod.z.string().optional(),
|
|
71
|
+
kernelProxyId: import_zod.z.string().optional(),
|
|
72
|
+
headless: import_zod.z.boolean().default(true)
|
|
73
|
+
});
|
|
64
74
|
var RawPAAItemSchema = import_zod.z.object({
|
|
65
75
|
question: import_zod.z.string().min(1),
|
|
66
76
|
answer: import_zod.z.string().optional(),
|
|
@@ -924,8 +934,19 @@ function addCandidate(candidates, city, region, example) {
|
|
|
924
934
|
}
|
|
925
935
|
candidates.set(key, { city: normalizedCity, regionCode, count: 1, examples: [example] });
|
|
926
936
|
}
|
|
937
|
+
function decodeSerpText(text) {
|
|
938
|
+
try {
|
|
939
|
+
return decodeURIComponent(text);
|
|
940
|
+
} catch {
|
|
941
|
+
}
|
|
942
|
+
try {
|
|
943
|
+
return decodeURIComponent(text.replace(/%(?![0-9a-fA-F]{2})/g, "%25"));
|
|
944
|
+
} catch {
|
|
945
|
+
return text;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
927
948
|
function scanText(candidates, text) {
|
|
928
|
-
const normalized =
|
|
949
|
+
const normalized = decodeSerpText(text).replace(/[+/|_-]+/g, " ");
|
|
929
950
|
for (const match of normalized.matchAll(CITY_STATE_RE)) {
|
|
930
951
|
addCandidate(candidates, match[1] ?? "", match[2] ?? "", normalized.slice(0, 180));
|
|
931
952
|
}
|