crawlio-browser 1.5.5 → 1.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +384 -56
- package/dist/mcp-server/{chunk-A4EQCKHH.js → chunk-YEKQAHYW.js} +1 -1
- package/dist/mcp-server/index.js +888 -23
- package/dist/mcp-server/{init-77AO6DDJ.js → init-ZLXCKEQB.js} +2 -2
- package/package.json +2 -4
- package/skills/browser-automation/SKILL.md +15 -3
- package/skills/web-research/SKILL.md +1 -0
- package/.claude-plugin/plugin.json +0 -10
- package/assets/AppIcon.icns +0 -0
package/dist/mcp-server/index.js
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
WS_PORT_MAX,
|
|
10
10
|
WS_RECONNECT_GRACE,
|
|
11
11
|
WS_STALE_THRESHOLD
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-YEKQAHYW.js";
|
|
13
13
|
|
|
14
14
|
// src/mcp-server/index.ts
|
|
15
15
|
import { randomBytes as randomBytes2 } from "crypto";
|
|
@@ -1408,7 +1408,11 @@ var TOOL_TIMEOUTS = {
|
|
|
1408
1408
|
stop_recording: 1e4,
|
|
1409
1409
|
get_recording_status: 5e3,
|
|
1410
1410
|
compile_recording: 5e3,
|
|
1411
|
-
ocr_screenshot: 3e4
|
|
1411
|
+
ocr_screenshot: 3e4,
|
|
1412
|
+
wait_for_network_idle: 35e3,
|
|
1413
|
+
detect_tables: 15e3,
|
|
1414
|
+
extract_table: 15e3,
|
|
1415
|
+
extract_data: 3e4
|
|
1412
1416
|
};
|
|
1413
1417
|
function toolSuccess(content) {
|
|
1414
1418
|
return { content: [{ type: "text", text: JSON.stringify(content ?? {}) }], isError: false };
|
|
@@ -1418,6 +1422,38 @@ function toolError(message) {
|
|
|
1418
1422
|
}
|
|
1419
1423
|
var ACTIONABILITY_BACKOFF = [0, 20, 100, 100, 500];
|
|
1420
1424
|
var smartObjectCache = null;
|
|
1425
|
+
var sessionFindings = [];
|
|
1426
|
+
var sessionGaps = [];
|
|
1427
|
+
function summarizeAccessibility(raw) {
|
|
1428
|
+
const tree = raw.tree ?? raw.nodes ?? [];
|
|
1429
|
+
let nodeCount = typeof raw.nodeCount === "number" ? raw.nodeCount : 0;
|
|
1430
|
+
let landmarkCount = 0;
|
|
1431
|
+
let imagesWithoutAlt = 0;
|
|
1432
|
+
const headingStructure = [];
|
|
1433
|
+
const landmarkRoles = /* @__PURE__ */ new Set(["banner", "navigation", "main", "contentinfo", "complementary", "search", "form", "region"]);
|
|
1434
|
+
function walk(nodes) {
|
|
1435
|
+
for (const node of nodes) {
|
|
1436
|
+
nodeCount++;
|
|
1437
|
+
const role = String(node.role ?? "");
|
|
1438
|
+
if (landmarkRoles.has(role)) landmarkCount++;
|
|
1439
|
+
if (role === "img" || role === "image") {
|
|
1440
|
+
const name = String(node.name ?? "");
|
|
1441
|
+
if (!name) imagesWithoutAlt++;
|
|
1442
|
+
}
|
|
1443
|
+
if (role === "heading") {
|
|
1444
|
+
const level = typeof node.level === "number" ? node.level : 0;
|
|
1445
|
+
const text = String(node.name ?? "").substring(0, 120);
|
|
1446
|
+
if (level > 0) headingStructure.push({ level, text });
|
|
1447
|
+
}
|
|
1448
|
+
if (Array.isArray(node.children)) walk(node.children);
|
|
1449
|
+
}
|
|
1450
|
+
}
|
|
1451
|
+
if (tree.length > 0) {
|
|
1452
|
+
nodeCount = 0;
|
|
1453
|
+
walk(tree);
|
|
1454
|
+
}
|
|
1455
|
+
return { nodeCount, landmarkCount, imagesWithoutAlt, headingStructure: headingStructure.slice(0, 30) };
|
|
1456
|
+
}
|
|
1421
1457
|
async function checkActionability(bridge2, selector) {
|
|
1422
1458
|
const result = await bridge2.send({
|
|
1423
1459
|
type: "browser_evaluate",
|
|
@@ -1444,7 +1480,7 @@ async function checkActionability(bridge2, selector) {
|
|
|
1444
1480
|
return { actionable: true };
|
|
1445
1481
|
})()`
|
|
1446
1482
|
}, 5e3);
|
|
1447
|
-
return result;
|
|
1483
|
+
return result.result ?? result;
|
|
1448
1484
|
}
|
|
1449
1485
|
async function pollActionability(bridge2, selector, timeoutMs = 3e3) {
|
|
1450
1486
|
const start = Date.now();
|
|
@@ -2497,7 +2533,7 @@ function createTools(bridge2, crawlio2) {
|
|
|
2497
2533
|
return toolSuccess(data);
|
|
2498
2534
|
}
|
|
2499
2535
|
},
|
|
2500
|
-
// Network replay
|
|
2536
|
+
// Network replay
|
|
2501
2537
|
{
|
|
2502
2538
|
name: "replay_request",
|
|
2503
2539
|
description: "Re-fire a previously captured network request with optional modifications. Requires active network capture. Specify the request by URL. Optionally override headers, body, or method. Returns the new response status, headers, and body. Useful for API testing, auth token replay, and form submission testing.",
|
|
@@ -3706,17 +3742,531 @@ function createTools(bridge2, crawlio2) {
|
|
|
3706
3742
|
});
|
|
3707
3743
|
}
|
|
3708
3744
|
}
|
|
3745
|
+
},
|
|
3746
|
+
// --- Network idle detection (Phase 11) ---
|
|
3747
|
+
{
|
|
3748
|
+
name: "wait_for_network_idle",
|
|
3749
|
+
description: "Wait for all network requests to complete (idle detection). Uses CDP Network domain event tracking \u2014 catches ALL request types (fetch, XHR, images, CSS, fonts, scripts). Returns when no requests are pending for the specified idle window, or on timeout.",
|
|
3750
|
+
inputSchema: {
|
|
3751
|
+
type: "object",
|
|
3752
|
+
properties: {
|
|
3753
|
+
timeout: { type: "number", description: "Max wait in ms (default 15000, max 30000)" },
|
|
3754
|
+
idleTime: { type: "number", description: "Quiet window before declaring idle in ms (default 500, max 5000)" }
|
|
3755
|
+
}
|
|
3756
|
+
},
|
|
3757
|
+
handler: async (args) => {
|
|
3758
|
+
const schema = z.object({
|
|
3759
|
+
timeout: z.number().int().min(100).max(3e4).default(15e3),
|
|
3760
|
+
idleTime: z.number().int().min(100).max(5e3).default(500)
|
|
3761
|
+
});
|
|
3762
|
+
const parsed = schema.parse(args);
|
|
3763
|
+
const data = await bridge2.send({
|
|
3764
|
+
type: "wait_for_network_idle",
|
|
3765
|
+
timeout: parsed.timeout,
|
|
3766
|
+
idleTime: parsed.idleTime
|
|
3767
|
+
}, TOOL_TIMEOUTS.wait_for_network_idle);
|
|
3768
|
+
return toolSuccess(data);
|
|
3769
|
+
}
|
|
3770
|
+
},
|
|
3771
|
+
// --- Structured data extraction (Phase 11 — full-mode tools) ---
|
|
3772
|
+
{
|
|
3773
|
+
name: "detect_tables",
|
|
3774
|
+
description: "Detect repeating data patterns (tables, lists, grids, card layouts) on the page using class-frequency scoring. Returns ranked candidates with selectors, row counts, and sample text. Use extract_table to extract data from a candidate.",
|
|
3775
|
+
inputSchema: {
|
|
3776
|
+
type: "object",
|
|
3777
|
+
properties: {
|
|
3778
|
+
maxCandidates: { type: "number", description: "Maximum candidates to return (default 5, max 20)" }
|
|
3779
|
+
}
|
|
3780
|
+
},
|
|
3781
|
+
handler: async (args) => {
|
|
3782
|
+
const schema = z.object({
|
|
3783
|
+
maxCandidates: z.number().int().min(1).max(20).default(5)
|
|
3784
|
+
});
|
|
3785
|
+
const parsed = schema.parse(args);
|
|
3786
|
+
const maxCandidates = parsed.maxCandidates;
|
|
3787
|
+
const result = await bridge2.send({
|
|
3788
|
+
type: "browser_evaluate",
|
|
3789
|
+
expression: `(() => {
|
|
3790
|
+
function getClasses(el) {
|
|
3791
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
3792
|
+
}
|
|
3793
|
+
function getMatchingChildren(parent) {
|
|
3794
|
+
const children = [...parent.children].filter(c =>
|
|
3795
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
3796
|
+
c.textContent.trim().length > 0
|
|
3797
|
+
);
|
|
3798
|
+
if (children.length < 2) return [];
|
|
3799
|
+
const freq = {};
|
|
3800
|
+
children.forEach(c => {
|
|
3801
|
+
const key = getClasses(c).sort().join(' ');
|
|
3802
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
3803
|
+
});
|
|
3804
|
+
const threshold = children.length / 2 - 2;
|
|
3805
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
3806
|
+
if (!patterns.length) {
|
|
3807
|
+
const indiv = {};
|
|
3808
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
3809
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
3810
|
+
}
|
|
3811
|
+
return children.filter(c => {
|
|
3812
|
+
const cls = getClasses(c);
|
|
3813
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
3814
|
+
});
|
|
3815
|
+
}
|
|
3816
|
+
function buildSelector(el) {
|
|
3817
|
+
const parts = [];
|
|
3818
|
+
let node = el;
|
|
3819
|
+
while (node && node !== document.body && node !== document.documentElement) {
|
|
3820
|
+
let tag = node.tagName.toLowerCase();
|
|
3821
|
+
if (node.id && !/\\d/.test(node.id)) tag += '#' + CSS.escape(node.id);
|
|
3822
|
+
else if (node.className && typeof node.className === 'string') {
|
|
3823
|
+
const cls = node.className.trim().split(/\\s+/).filter(Boolean).slice(0, 3);
|
|
3824
|
+
if (cls.length) tag += '.' + cls.map(c => CSS.escape(c)).join('.');
|
|
3825
|
+
}
|
|
3826
|
+
parts.unshift(tag);
|
|
3827
|
+
node = node.parentElement;
|
|
3828
|
+
}
|
|
3829
|
+
return parts.join(' > ');
|
|
3830
|
+
}
|
|
3831
|
+
const candidates = [];
|
|
3832
|
+
document.querySelectorAll('body *').forEach(el => {
|
|
3833
|
+
const w = el.offsetWidth, h = el.offsetHeight;
|
|
3834
|
+
if (w < 100 || h < 50) return;
|
|
3835
|
+
const matching = getMatchingChildren(el);
|
|
3836
|
+
if (matching.length < 2) return;
|
|
3837
|
+
const score = w * h * matching.length * matching.length;
|
|
3838
|
+
const text = matching.map(c => c.textContent.trim()).join(' ').substring(0, 200);
|
|
3839
|
+
candidates.push({ selector: buildSelector(el), score, rowCount: matching.length, sampleText: text, area: w * h });
|
|
3840
|
+
});
|
|
3841
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
3842
|
+
return candidates.slice(0, ${maxCandidates});
|
|
3843
|
+
})()`
|
|
3844
|
+
}, TOOL_TIMEOUTS.detect_tables);
|
|
3845
|
+
const data = result;
|
|
3846
|
+
return toolSuccess(data?.result ?? []);
|
|
3847
|
+
}
|
|
3848
|
+
},
|
|
3849
|
+
{
|
|
3850
|
+
name: "extract_table",
|
|
3851
|
+
description: "Extract structured data from a container element using IDS-inspired recursive path-keyed extraction. Pass a selector from detect_tables. Returns columns (with fill rates) and rows as key-value objects.",
|
|
3852
|
+
inputSchema: {
|
|
3853
|
+
type: "object",
|
|
3854
|
+
properties: {
|
|
3855
|
+
selector: { type: "string", description: "CSS selector of the container element" },
|
|
3856
|
+
maxRows: { type: "number", description: "Maximum rows to extract (default 200, max 1000)" }
|
|
3857
|
+
},
|
|
3858
|
+
required: ["selector"]
|
|
3859
|
+
},
|
|
3860
|
+
handler: async (args) => {
|
|
3861
|
+
const schema = z.object({
|
|
3862
|
+
selector: z.string().min(1),
|
|
3863
|
+
maxRows: z.number().int().min(1).max(1e3).default(200)
|
|
3864
|
+
});
|
|
3865
|
+
const parsed = schema.parse(args);
|
|
3866
|
+
const selector = parsed.selector;
|
|
3867
|
+
const maxRows = parsed.maxRows;
|
|
3868
|
+
const result = await bridge2.send({
|
|
3869
|
+
type: "browser_evaluate",
|
|
3870
|
+
expression: `(() => {
|
|
3871
|
+
function getClasses(el) {
|
|
3872
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
3873
|
+
}
|
|
3874
|
+
function getMatchingChildren(parent) {
|
|
3875
|
+
const children = [...parent.children].filter(c =>
|
|
3876
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
3877
|
+
c.textContent.trim().length > 0
|
|
3878
|
+
);
|
|
3879
|
+
if (children.length < 2) return [];
|
|
3880
|
+
const freq = {};
|
|
3881
|
+
children.forEach(c => {
|
|
3882
|
+
const key = getClasses(c).sort().join(' ');
|
|
3883
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
3884
|
+
});
|
|
3885
|
+
const threshold = children.length / 2 - 2;
|
|
3886
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
3887
|
+
if (!patterns.length) {
|
|
3888
|
+
const indiv = {};
|
|
3889
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
3890
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
3891
|
+
}
|
|
3892
|
+
return children.filter(c => {
|
|
3893
|
+
const cls = getClasses(c);
|
|
3894
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
3895
|
+
});
|
|
3896
|
+
}
|
|
3897
|
+
function directText(el) {
|
|
3898
|
+
let text = '';
|
|
3899
|
+
for (const n of el.childNodes) {
|
|
3900
|
+
if (n.nodeType === 3) text += n.textContent;
|
|
3901
|
+
}
|
|
3902
|
+
return text.trim();
|
|
3903
|
+
}
|
|
3904
|
+
function extractRow(el, prefix) {
|
|
3905
|
+
const data = {};
|
|
3906
|
+
const tag = el.tagName.toLowerCase();
|
|
3907
|
+
const cls = getClasses(el).slice(0, 2).join('.');
|
|
3908
|
+
const key = prefix + '/' + (cls ? tag + '.' + cls : tag);
|
|
3909
|
+
const dt = directText(el);
|
|
3910
|
+
if (dt) data[key] = dt;
|
|
3911
|
+
if (el.href) data[key + ' href'] = el.href;
|
|
3912
|
+
if (el.src) data[key + ' src'] = el.src;
|
|
3913
|
+
for (const child of el.children) {
|
|
3914
|
+
Object.assign(data, extractRow(child, key));
|
|
3915
|
+
}
|
|
3916
|
+
return data;
|
|
3917
|
+
}
|
|
3918
|
+
const container = document.querySelector(${JSON.stringify(selector)});
|
|
3919
|
+
if (!container) return { selector: ${JSON.stringify(selector)}, columns: [], rows: [], totalRows: 0, truncated: false };
|
|
3920
|
+
const matching = getMatchingChildren(container);
|
|
3921
|
+
const totalRows = matching.length;
|
|
3922
|
+
const limited = matching.slice(0, ${maxRows});
|
|
3923
|
+
const rawRows = limited.map(el => extractRow(el, ''));
|
|
3924
|
+
const allKeys = new Set();
|
|
3925
|
+
rawRows.forEach(r => Object.keys(r).forEach(k => allKeys.add(k)));
|
|
3926
|
+
const columns = [];
|
|
3927
|
+
const keyToName = {};
|
|
3928
|
+
for (const key of allKeys) {
|
|
3929
|
+
const values = rawRows.map(r => r[key] || '');
|
|
3930
|
+
const filled = values.filter(v => v).length;
|
|
3931
|
+
const fillRate = filled / rawRows.length;
|
|
3932
|
+
if (fillRate < 0.2) continue;
|
|
3933
|
+
const unique = new Set(values.filter(v => v));
|
|
3934
|
+
if (unique.size <= 1 && rawRows.length > 2) continue;
|
|
3935
|
+
const parts = key.split('/').filter(Boolean);
|
|
3936
|
+
const last = parts[parts.length - 1] || key;
|
|
3937
|
+
let name = last.replace(/^[a-z]+\\./, '').replace(/ (href|src)$/, ' $1').replace(/\\./g, ' ').trim() || last;
|
|
3938
|
+
keyToName[key] = name;
|
|
3939
|
+
columns.push({ name, path: key, fillRate: Math.round(fillRate * 100) / 100 });
|
|
3940
|
+
}
|
|
3941
|
+
const nameCount = {};
|
|
3942
|
+
columns.forEach(c => { nameCount[c.name] = (nameCount[c.name] || 0) + 1; });
|
|
3943
|
+
const nameSeen = {};
|
|
3944
|
+
columns.forEach(c => {
|
|
3945
|
+
if (nameCount[c.name] > 1) {
|
|
3946
|
+
nameSeen[c.name] = (nameSeen[c.name] || 0) + 1;
|
|
3947
|
+
if (nameSeen[c.name] > 1) c.name = c.name + ' ' + nameSeen[c.name];
|
|
3948
|
+
}
|
|
3949
|
+
});
|
|
3950
|
+
const rows = rawRows.map(raw => {
|
|
3951
|
+
const row = {};
|
|
3952
|
+
columns.forEach(c => { row[c.name] = raw[c.path] || ''; });
|
|
3953
|
+
return row;
|
|
3954
|
+
});
|
|
3955
|
+
return { selector: ${JSON.stringify(selector)}, columns, rows, totalRows, truncated: totalRows > ${maxRows} };
|
|
3956
|
+
})()`
|
|
3957
|
+
}, TOOL_TIMEOUTS.extract_table);
|
|
3958
|
+
const data = result;
|
|
3959
|
+
return toolSuccess(data?.result ?? { selector, columns: [], rows: [], totalRows: 0, truncated: false });
|
|
3960
|
+
}
|
|
3961
|
+
},
|
|
3962
|
+
{
|
|
3963
|
+
name: "extract_data",
|
|
3964
|
+
description: "Compound extraction: detect tables + extract each + collect JSON-LD structured data from the page. Returns all tables with columns/rows plus any schema.org/JSON-LD data. Combines detect_tables, extract_table, and JSON-LD extraction in one call.",
|
|
3965
|
+
inputSchema: {
|
|
3966
|
+
type: "object",
|
|
3967
|
+
properties: {
|
|
3968
|
+
maxTables: { type: "number", description: "Maximum tables to extract (default 3, max 10)" },
|
|
3969
|
+
maxRowsPerTable: { type: "number", description: "Maximum rows per table (default 200, max 1000)" }
|
|
3970
|
+
}
|
|
3971
|
+
},
|
|
3972
|
+
handler: async (args) => {
|
|
3973
|
+
const schema = z.object({
|
|
3974
|
+
maxTables: z.number().int().min(1).max(10).default(3),
|
|
3975
|
+
maxRowsPerTable: z.number().int().min(1).max(1e3).default(200)
|
|
3976
|
+
});
|
|
3977
|
+
const parsed = schema.parse(args);
|
|
3978
|
+
const maxTables = parsed.maxTables;
|
|
3979
|
+
const maxRowsPerTable = parsed.maxRowsPerTable;
|
|
3980
|
+
const status = await bridge2.send({ type: "get_connection_status" }, 5e3);
|
|
3981
|
+
const url = status?.connectedTab?.url || "";
|
|
3982
|
+
const detectResult = await bridge2.send({
|
|
3983
|
+
type: "browser_evaluate",
|
|
3984
|
+
expression: `(() => {
|
|
3985
|
+
function getClasses(el) {
|
|
3986
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
3987
|
+
}
|
|
3988
|
+
function getMatchingChildren(parent) {
|
|
3989
|
+
const children = [...parent.children].filter(c =>
|
|
3990
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
3991
|
+
c.textContent.trim().length > 0
|
|
3992
|
+
);
|
|
3993
|
+
if (children.length < 2) return [];
|
|
3994
|
+
const freq = {};
|
|
3995
|
+
children.forEach(c => {
|
|
3996
|
+
const key = getClasses(c).sort().join(' ');
|
|
3997
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
3998
|
+
});
|
|
3999
|
+
const threshold = children.length / 2 - 2;
|
|
4000
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
4001
|
+
if (!patterns.length) {
|
|
4002
|
+
const indiv = {};
|
|
4003
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
4004
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
4005
|
+
}
|
|
4006
|
+
return children.filter(c => {
|
|
4007
|
+
const cls = getClasses(c);
|
|
4008
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
4009
|
+
});
|
|
4010
|
+
}
|
|
4011
|
+
function buildSelector(el) {
|
|
4012
|
+
const parts = [];
|
|
4013
|
+
let node = el;
|
|
4014
|
+
while (node && node !== document.body && node !== document.documentElement) {
|
|
4015
|
+
let tag = node.tagName.toLowerCase();
|
|
4016
|
+
if (node.id && !/\\d/.test(node.id)) tag += '#' + CSS.escape(node.id);
|
|
4017
|
+
else if (node.className && typeof node.className === 'string') {
|
|
4018
|
+
const cls = node.className.trim().split(/\\s+/).filter(Boolean).slice(0, 3);
|
|
4019
|
+
if (cls.length) tag += '.' + cls.map(c => CSS.escape(c)).join('.');
|
|
4020
|
+
}
|
|
4021
|
+
parts.unshift(tag);
|
|
4022
|
+
node = node.parentElement;
|
|
4023
|
+
}
|
|
4024
|
+
return parts.join(' > ');
|
|
4025
|
+
}
|
|
4026
|
+
const candidates = [];
|
|
4027
|
+
document.querySelectorAll('body *').forEach(el => {
|
|
4028
|
+
const w = el.offsetWidth, h = el.offsetHeight;
|
|
4029
|
+
if (w < 100 || h < 50) return;
|
|
4030
|
+
const matching = getMatchingChildren(el);
|
|
4031
|
+
if (matching.length < 2) return;
|
|
4032
|
+
const score = w * h * matching.length * matching.length;
|
|
4033
|
+
const text = matching.map(c => c.textContent.trim()).join(' ').substring(0, 200);
|
|
4034
|
+
candidates.push({ selector: buildSelector(el), score, rowCount: matching.length, sampleText: text, area: w * h });
|
|
4035
|
+
});
|
|
4036
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
4037
|
+
return candidates.slice(0, ${maxTables});
|
|
4038
|
+
})()`
|
|
4039
|
+
}, TOOL_TIMEOUTS.detect_tables);
|
|
4040
|
+
const candidates = detectResult?.result ?? [];
|
|
4041
|
+
const tables = [];
|
|
4042
|
+
for (const candidate of candidates) {
|
|
4043
|
+
const sel = candidate.selector;
|
|
4044
|
+
const extractResult = await bridge2.send({
|
|
4045
|
+
type: "browser_evaluate",
|
|
4046
|
+
expression: `(() => {
|
|
4047
|
+
function getClasses(el) {
|
|
4048
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
4049
|
+
}
|
|
4050
|
+
function getMatchingChildren(parent) {
|
|
4051
|
+
const children = [...parent.children].filter(c =>
|
|
4052
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
4053
|
+
c.textContent.trim().length > 0
|
|
4054
|
+
);
|
|
4055
|
+
if (children.length < 2) return [];
|
|
4056
|
+
const freq = {};
|
|
4057
|
+
children.forEach(c => {
|
|
4058
|
+
const key = getClasses(c).sort().join(' ');
|
|
4059
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
4060
|
+
});
|
|
4061
|
+
const threshold = children.length / 2 - 2;
|
|
4062
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
4063
|
+
if (!patterns.length) {
|
|
4064
|
+
const indiv = {};
|
|
4065
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
4066
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
4067
|
+
}
|
|
4068
|
+
return children.filter(c => {
|
|
4069
|
+
const cls = getClasses(c);
|
|
4070
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
4071
|
+
});
|
|
4072
|
+
}
|
|
4073
|
+
function directText(el) {
|
|
4074
|
+
let text = '';
|
|
4075
|
+
for (const n of el.childNodes) {
|
|
4076
|
+
if (n.nodeType === 3) text += n.textContent;
|
|
4077
|
+
}
|
|
4078
|
+
return text.trim();
|
|
4079
|
+
}
|
|
4080
|
+
function extractRow(el, prefix) {
|
|
4081
|
+
const data = {};
|
|
4082
|
+
const tag = el.tagName.toLowerCase();
|
|
4083
|
+
const cls = getClasses(el).slice(0, 2).join('.');
|
|
4084
|
+
const key = prefix + '/' + (cls ? tag + '.' + cls : tag);
|
|
4085
|
+
const dt = directText(el);
|
|
4086
|
+
if (dt) data[key] = dt;
|
|
4087
|
+
if (el.href) data[key + ' href'] = el.href;
|
|
4088
|
+
if (el.src) data[key + ' src'] = el.src;
|
|
4089
|
+
for (const child of el.children) {
|
|
4090
|
+
Object.assign(data, extractRow(child, key));
|
|
4091
|
+
}
|
|
4092
|
+
return data;
|
|
4093
|
+
}
|
|
4094
|
+
const container = document.querySelector(${JSON.stringify(sel)});
|
|
4095
|
+
if (!container) return { selector: ${JSON.stringify(sel)}, columns: [], rows: [], totalRows: 0, truncated: false };
|
|
4096
|
+
const matching = getMatchingChildren(container);
|
|
4097
|
+
const totalRows = matching.length;
|
|
4098
|
+
const limited = matching.slice(0, ${maxRowsPerTable});
|
|
4099
|
+
const rawRows = limited.map(el => extractRow(el, ''));
|
|
4100
|
+
const allKeys = new Set();
|
|
4101
|
+
rawRows.forEach(r => Object.keys(r).forEach(k => allKeys.add(k)));
|
|
4102
|
+
const columns = [];
|
|
4103
|
+
for (const key of allKeys) {
|
|
4104
|
+
const values = rawRows.map(r => r[key] || '');
|
|
4105
|
+
const filled = values.filter(v => v).length;
|
|
4106
|
+
const fillRate = filled / rawRows.length;
|
|
4107
|
+
if (fillRate < 0.2) continue;
|
|
4108
|
+
const unique = new Set(values.filter(v => v));
|
|
4109
|
+
if (unique.size <= 1 && rawRows.length > 2) continue;
|
|
4110
|
+
const parts = key.split('/').filter(Boolean);
|
|
4111
|
+
const last = parts[parts.length - 1] || key;
|
|
4112
|
+
let name = last.replace(/^[a-z]+\\./, '').replace(/ (href|src)$/, ' $1').replace(/\\./g, ' ').trim() || last;
|
|
4113
|
+
columns.push({ name, path: key, fillRate: Math.round(fillRate * 100) / 100 });
|
|
4114
|
+
}
|
|
4115
|
+
const nameCount = {};
|
|
4116
|
+
columns.forEach(c => { nameCount[c.name] = (nameCount[c.name] || 0) + 1; });
|
|
4117
|
+
const nameSeen = {};
|
|
4118
|
+
columns.forEach(c => {
|
|
4119
|
+
if (nameCount[c.name] > 1) {
|
|
4120
|
+
nameSeen[c.name] = (nameSeen[c.name] || 0) + 1;
|
|
4121
|
+
if (nameSeen[c.name] > 1) c.name = c.name + ' ' + nameSeen[c.name];
|
|
4122
|
+
}
|
|
4123
|
+
});
|
|
4124
|
+
const rows = rawRows.map(raw => {
|
|
4125
|
+
const row = {};
|
|
4126
|
+
columns.forEach(c => { row[c.name] = raw[c.path] || ''; });
|
|
4127
|
+
return row;
|
|
4128
|
+
});
|
|
4129
|
+
return { selector: ${JSON.stringify(sel)}, columns, rows, totalRows, truncated: totalRows > ${maxRowsPerTable} };
|
|
4130
|
+
})()`
|
|
4131
|
+
}, TOOL_TIMEOUTS.extract_table);
|
|
4132
|
+
const extraction = extractResult?.result;
|
|
4133
|
+
if (extraction && (extraction.columns?.length ?? 0) >= 2 && (extraction.rows?.length ?? 0) >= 2) {
|
|
4134
|
+
tables.push(extraction);
|
|
4135
|
+
}
|
|
4136
|
+
}
|
|
4137
|
+
const jsonLdResult = await bridge2.send({
|
|
4138
|
+
type: "browser_evaluate",
|
|
4139
|
+
expression: `(() => {
|
|
4140
|
+
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
|
4141
|
+
const result = [];
|
|
4142
|
+
scripts.forEach(s => { try { result.push(JSON.parse(s.textContent)); } catch {} });
|
|
4143
|
+
return result;
|
|
4144
|
+
})()`
|
|
4145
|
+
}, 5e3);
|
|
4146
|
+
return toolSuccess({
|
|
4147
|
+
tables,
|
|
4148
|
+
structuredData: jsonLdResult?.result ?? [],
|
|
4149
|
+
url
|
|
4150
|
+
});
|
|
4151
|
+
}
|
|
3709
4152
|
}
|
|
3710
4153
|
];
|
|
3711
4154
|
}
|
|
4155
|
+
var COMPARISON_DIMENSIONS = [
|
|
4156
|
+
"framework",
|
|
4157
|
+
"performance",
|
|
4158
|
+
"security",
|
|
4159
|
+
"seo",
|
|
4160
|
+
"accessibility",
|
|
4161
|
+
"error-surface",
|
|
4162
|
+
"third-party-load",
|
|
4163
|
+
"architecture",
|
|
4164
|
+
"content-delivery",
|
|
4165
|
+
"mobile-readiness",
|
|
4166
|
+
"data-structure"
|
|
4167
|
+
];
|
|
4168
|
+
function observeField(data, dimension) {
|
|
4169
|
+
switch (dimension) {
|
|
4170
|
+
case "framework":
|
|
4171
|
+
return data.capture?.framework ? { type: "present", dimension, value: data.capture.framework } : { type: "absent", dimension, gap: { dimension, reason: "No framework detected", impact: "data-absent", reducesConfidence: false } };
|
|
4172
|
+
case "performance":
|
|
4173
|
+
return data.performance ? { type: "present", dimension, value: data.performance } : { type: "absent", dimension, gap: data.gaps?.find((g) => g.dimension === "performance") ?? { dimension, reason: "Performance data unavailable", impact: "data-absent", reducesConfidence: false } };
|
|
4174
|
+
case "security":
|
|
4175
|
+
return data.security ? { type: "present", dimension, value: data.security } : { type: "absent", dimension, gap: data.gaps?.find((g) => g.dimension === "security") ?? { dimension, reason: "Security data unavailable", impact: "data-absent", reducesConfidence: false } };
|
|
4176
|
+
case "seo":
|
|
4177
|
+
return data.meta?._title || data.meta?._canonical || (data.meta?._structuredData?.length ?? 0) > 0 ? { type: "present", dimension, value: { title: data.meta?._title, canonical: data.meta?._canonical, structuredData: data.meta?._structuredData?.length ?? 0 } } : { type: "absent", dimension };
|
|
4178
|
+
case "accessibility":
|
|
4179
|
+
return data.accessibility ? { type: "present", dimension, value: data.accessibility } : { type: "absent", dimension, gap: data.gaps?.find((g) => g.dimension === "accessibility") ?? { dimension, reason: "Accessibility data unavailable", impact: "data-absent", reducesConfidence: false } };
|
|
4180
|
+
case "error-surface": {
|
|
4181
|
+
const consoleLogs = data.capture?.console;
|
|
4182
|
+
return consoleLogs ? { type: "present", dimension, value: consoleLogs } : { type: "absent", dimension };
|
|
4183
|
+
}
|
|
4184
|
+
case "third-party-load": {
|
|
4185
|
+
const network = data.capture?.network;
|
|
4186
|
+
return network ? { type: "present", dimension, value: network } : { type: "absent", dimension };
|
|
4187
|
+
}
|
|
4188
|
+
case "architecture":
|
|
4189
|
+
return data.capture?.framework ? { type: "present", dimension, value: data.capture.framework } : { type: "absent", dimension };
|
|
4190
|
+
case "content-delivery":
|
|
4191
|
+
return data.security ? { type: "present", dimension, value: data.security } : { type: "absent", dimension };
|
|
4192
|
+
case "mobile-readiness": {
|
|
4193
|
+
const mr = data.mobileReadiness;
|
|
4194
|
+
return mr ? { type: mr.hasViewportMeta ? "present" : "degraded", dimension, value: mr } : { type: "absent", dimension, gap: data.gaps?.find((g) => g.dimension === "mobile-readiness") ?? { dimension, reason: "Mobile readiness data unavailable", impact: "data-absent", reducesConfidence: false } };
|
|
4195
|
+
}
|
|
4196
|
+
case "data-structure":
|
|
4197
|
+
return (data.meta?._structuredData?.length ?? 0) > 0 ? { type: "present", dimension, value: { structuredDataCount: data.meta._structuredData.length } } : { type: "absent", dimension };
|
|
4198
|
+
default:
|
|
4199
|
+
return { type: "absent", dimension };
|
|
4200
|
+
}
|
|
4201
|
+
}
|
|
4202
|
+
function extractMetrics(data) {
|
|
4203
|
+
const metrics = {};
|
|
4204
|
+
const perf = data.performance;
|
|
4205
|
+
if (perf) {
|
|
4206
|
+
const wv = perf.webVitals;
|
|
4207
|
+
if (wv) {
|
|
4208
|
+
if (typeof wv.lcp === "number") metrics["LCP"] = wv.lcp;
|
|
4209
|
+
if (typeof wv.cls === "number") metrics["CLS"] = wv.cls;
|
|
4210
|
+
if (typeof wv.fid === "number") metrics["FID"] = wv.fid;
|
|
4211
|
+
}
|
|
4212
|
+
const timing = perf.timing;
|
|
4213
|
+
if (timing) {
|
|
4214
|
+
if (typeof timing.domContentLoaded === "number") metrics["domContentLoaded"] = timing.domContentLoaded;
|
|
4215
|
+
if (typeof timing.load === "number") metrics["loadTime"] = timing.load;
|
|
4216
|
+
if (typeof timing.firstByte === "number") metrics["TTFB"] = timing.firstByte;
|
|
4217
|
+
}
|
|
4218
|
+
const chrome = perf.chrome;
|
|
4219
|
+
if (chrome) {
|
|
4220
|
+
if (typeof chrome.taskDuration === "number") metrics["taskDuration"] = chrome.taskDuration;
|
|
4221
|
+
if (typeof chrome.scriptDuration === "number") metrics["scriptDuration"] = chrome.scriptDuration;
|
|
4222
|
+
if (typeof chrome.jsHeapUsedSize === "number") metrics["jsHeapUsedSize"] = chrome.jsHeapUsedSize;
|
|
4223
|
+
}
|
|
4224
|
+
const m = perf.metrics;
|
|
4225
|
+
if (m) {
|
|
4226
|
+
if (typeof m.LCP === "number" && !metrics["LCP"]) metrics["LCP"] = m.LCP;
|
|
4227
|
+
if (typeof m.FCP === "number") metrics["FCP"] = m.FCP;
|
|
4228
|
+
if (typeof m.CLS === "number" && !metrics["CLS"]) metrics["CLS"] = m.CLS;
|
|
4229
|
+
}
|
|
4230
|
+
}
|
|
4231
|
+
const net = data.capture?.network;
|
|
4232
|
+
if (net && typeof net.total === "number") metrics["networkRequests"] = net.total;
|
|
4233
|
+
const dom = data.capture?.dom;
|
|
4234
|
+
if (dom && typeof dom.nodeCount === "number") metrics["domNodeCount"] = dom.nodeCount;
|
|
4235
|
+
return metrics;
|
|
4236
|
+
}
|
|
4237
|
+
function buildComparisonScaffold(a, b) {
|
|
4238
|
+
const dimensions = COMPARISON_DIMENSIONS.map((name) => {
|
|
4239
|
+
const obsA = observeField(a, name);
|
|
4240
|
+
const obsB = observeField(b, name);
|
|
4241
|
+
return { name, siteA: obsA, siteB: obsB, comparable: obsA.type === "present" && obsB.type === "present" };
|
|
4242
|
+
});
|
|
4243
|
+
const fieldsA = new Set(Object.keys(a.capture));
|
|
4244
|
+
const fieldsB = new Set(Object.keys(b.capture));
|
|
4245
|
+
const sharedFields = [...fieldsA].filter((f) => fieldsB.has(f));
|
|
4246
|
+
const missingFields = {
|
|
4247
|
+
siteA: [...fieldsB].filter((f) => !fieldsA.has(f)),
|
|
4248
|
+
siteB: [...fieldsA].filter((f) => !fieldsB.has(f))
|
|
4249
|
+
};
|
|
4250
|
+
const metricsA = extractMetrics(a);
|
|
4251
|
+
const metricsB = extractMetrics(b);
|
|
4252
|
+
const allMetricKeys = /* @__PURE__ */ new Set([...Object.keys(metricsA), ...Object.keys(metricsB)]);
|
|
4253
|
+
const metrics = [...allMetricKeys].map((name) => ({
|
|
4254
|
+
name,
|
|
4255
|
+
siteA: metricsA[name] ?? null,
|
|
4256
|
+
siteB: metricsB[name] ?? null
|
|
4257
|
+
}));
|
|
4258
|
+
return { dimensions, sharedFields, missingFields, metrics };
|
|
4259
|
+
}
|
|
3712
4260
|
function parseSnapshotRef(selector) {
|
|
3713
4261
|
const m = /^\[ref=([a-zA-Z0-9]+)\]$/.exec(selector.trim());
|
|
3714
4262
|
return m ? m[1] : null;
|
|
3715
4263
|
}
|
|
3716
4264
|
async function buildSmartObject(bridge2) {
|
|
3717
4265
|
const evaluate = (expression) => {
|
|
4266
|
+
const trimmed = expression.trim();
|
|
3718
4267
|
const hasReturn = /(?:^|[;\n{])\s*return\s/m.test(expression);
|
|
3719
|
-
const
|
|
4268
|
+
const isExpression = trimmed.startsWith("(");
|
|
4269
|
+
const expr = hasReturn && !isExpression ? `(async () => { ${expression} })()` : expression;
|
|
3720
4270
|
return bridge2.send({ type: "browser_evaluate", expression: expr }, 5e3);
|
|
3721
4271
|
};
|
|
3722
4272
|
const smart = {
|
|
@@ -3894,6 +4444,245 @@ async function buildSmartObject(bridge2) {
|
|
|
3894
4444
|
getVersion: () => evaluate(`window.jQuery?.fn?.jquery`)
|
|
3895
4445
|
};
|
|
3896
4446
|
}
|
|
4447
|
+
smart.finding = (input) => {
|
|
4448
|
+
const { claim, evidence, sourceUrl, confidence, method, dimension } = input;
|
|
4449
|
+
if (typeof claim !== "string" || !claim) throw new Error("finding.claim is required (string)");
|
|
4450
|
+
if (!Array.isArray(evidence) || evidence.length === 0) throw new Error("finding.evidence is required (non-empty string[])");
|
|
4451
|
+
if (typeof sourceUrl !== "string" || !sourceUrl) throw new Error("finding.sourceUrl is required (string)");
|
|
4452
|
+
if (!["high", "medium", "low"].includes(confidence)) throw new Error("finding.confidence must be 'high' | 'medium' | 'low'");
|
|
4453
|
+
if (typeof method !== "string" || !method) throw new Error("finding.method is required (string)");
|
|
4454
|
+
for (const e of evidence) {
|
|
4455
|
+
if (typeof e !== "string") throw new Error("finding.evidence[] must be strings");
|
|
4456
|
+
}
|
|
4457
|
+
let finalConfidence = confidence;
|
|
4458
|
+
let confidenceCapped = false;
|
|
4459
|
+
let cappedBy;
|
|
4460
|
+
if (typeof dimension === "string") {
|
|
4461
|
+
const gap = sessionGaps.find((g) => g.dimension === dimension && g.reducesConfidence);
|
|
4462
|
+
if (gap) {
|
|
4463
|
+
const capMap = { high: "medium", medium: "low", low: "low" };
|
|
4464
|
+
const capped = capMap[finalConfidence];
|
|
4465
|
+
if (capped && capped !== finalConfidence) {
|
|
4466
|
+
finalConfidence = capped;
|
|
4467
|
+
confidenceCapped = true;
|
|
4468
|
+
cappedBy = gap.dimension;
|
|
4469
|
+
}
|
|
4470
|
+
}
|
|
4471
|
+
}
|
|
4472
|
+
const finding = {
|
|
4473
|
+
claim,
|
|
4474
|
+
evidence,
|
|
4475
|
+
sourceUrl,
|
|
4476
|
+
confidence: finalConfidence,
|
|
4477
|
+
method,
|
|
4478
|
+
dimension: typeof dimension === "string" ? dimension : void 0
|
|
4479
|
+
};
|
|
4480
|
+
if (confidenceCapped) {
|
|
4481
|
+
finding.confidenceCapped = true;
|
|
4482
|
+
finding.cappedBy = cappedBy;
|
|
4483
|
+
}
|
|
4484
|
+
sessionFindings.push(finding);
|
|
4485
|
+
return finding;
|
|
4486
|
+
};
|
|
4487
|
+
smart.findings = () => [...sessionFindings];
|
|
4488
|
+
smart.clearFindings = () => {
|
|
4489
|
+
sessionFindings = [];
|
|
4490
|
+
sessionGaps = [];
|
|
4491
|
+
};
|
|
4492
|
+
smart.detectTables = async (opts) => {
|
|
4493
|
+
const maxCandidates = opts?.maxCandidates ?? 5;
|
|
4494
|
+
const result = await evaluate(`(() => {
|
|
4495
|
+
function getClasses(el) {
|
|
4496
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
4497
|
+
}
|
|
4498
|
+
function getMatchingChildren(parent) {
|
|
4499
|
+
const children = [...parent.children].filter(c =>
|
|
4500
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
4501
|
+
c.textContent.trim().length > 0
|
|
4502
|
+
);
|
|
4503
|
+
if (children.length < 2) return [];
|
|
4504
|
+
const freq = {};
|
|
4505
|
+
children.forEach(c => {
|
|
4506
|
+
const key = getClasses(c).sort().join(' ');
|
|
4507
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
4508
|
+
});
|
|
4509
|
+
const threshold = children.length / 2 - 2;
|
|
4510
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
4511
|
+
if (!patterns.length) {
|
|
4512
|
+
const indiv = {};
|
|
4513
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
4514
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
4515
|
+
}
|
|
4516
|
+
return children.filter(c => {
|
|
4517
|
+
const cls = getClasses(c);
|
|
4518
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
4519
|
+
});
|
|
4520
|
+
}
|
|
4521
|
+
function buildSelector(el) {
|
|
4522
|
+
const parts = [];
|
|
4523
|
+
let node = el;
|
|
4524
|
+
while (node && node !== document.body && node !== document.documentElement) {
|
|
4525
|
+
let tag = node.tagName.toLowerCase();
|
|
4526
|
+
if (node.id && !/\\d/.test(node.id)) tag += '#' + CSS.escape(node.id);
|
|
4527
|
+
else if (node.className && typeof node.className === 'string') {
|
|
4528
|
+
const cls = node.className.trim().split(/\\s+/).filter(Boolean).slice(0, 3);
|
|
4529
|
+
if (cls.length) tag += '.' + cls.map(c => CSS.escape(c)).join('.');
|
|
4530
|
+
}
|
|
4531
|
+
parts.unshift(tag);
|
|
4532
|
+
node = node.parentElement;
|
|
4533
|
+
}
|
|
4534
|
+
return parts.join(' > ');
|
|
4535
|
+
}
|
|
4536
|
+
const candidates = [];
|
|
4537
|
+
document.querySelectorAll('body *').forEach(el => {
|
|
4538
|
+
const w = el.offsetWidth, h = el.offsetHeight;
|
|
4539
|
+
if (w < 100 || h < 50) return;
|
|
4540
|
+
const matching = getMatchingChildren(el);
|
|
4541
|
+
if (matching.length < 2) return;
|
|
4542
|
+
const score = w * h * matching.length * matching.length;
|
|
4543
|
+
const text = matching.map(c => c.textContent.trim()).join(' ').substring(0, 200);
|
|
4544
|
+
candidates.push({ selector: buildSelector(el), score, rowCount: matching.length, sampleText: text, area: w * h });
|
|
4545
|
+
});
|
|
4546
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
4547
|
+
return candidates.slice(0, ${maxCandidates});
|
|
4548
|
+
})()`);
|
|
4549
|
+
return result.result || [];
|
|
4550
|
+
};
|
|
4551
|
+
smart.extractTable = async (selector, opts) => {
|
|
4552
|
+
const maxRows = opts?.maxRows ?? 200;
|
|
4553
|
+
const result = await evaluate(`(() => {
|
|
4554
|
+
function getClasses(el) {
|
|
4555
|
+
return (el.className || '').toString().trim().split(/\\s+/).filter(c => c && !c.match(/\\d/));
|
|
4556
|
+
}
|
|
4557
|
+
function getMatchingChildren(parent) {
|
|
4558
|
+
const children = [...parent.children].filter(c =>
|
|
4559
|
+
!['SCRIPT','IMG','STYLE','SVG','NOSCRIPT'].includes(c.tagName) &&
|
|
4560
|
+
c.textContent.trim().length > 0
|
|
4561
|
+
);
|
|
4562
|
+
if (children.length < 2) return [];
|
|
4563
|
+
const freq = {};
|
|
4564
|
+
children.forEach(c => {
|
|
4565
|
+
const key = getClasses(c).sort().join(' ');
|
|
4566
|
+
freq[key] = (freq[key] || 0) + 1;
|
|
4567
|
+
});
|
|
4568
|
+
const threshold = children.length / 2 - 2;
|
|
4569
|
+
let patterns = Object.keys(freq).filter(k => k && freq[k] >= threshold);
|
|
4570
|
+
if (!patterns.length) {
|
|
4571
|
+
const indiv = {};
|
|
4572
|
+
children.forEach(c => getClasses(c).forEach(cls => { indiv[cls] = (indiv[cls] || 0) + 1; }));
|
|
4573
|
+
patterns = Object.keys(indiv).filter(k => indiv[k] >= threshold);
|
|
4574
|
+
}
|
|
4575
|
+
return children.filter(c => {
|
|
4576
|
+
const cls = getClasses(c);
|
|
4577
|
+
return patterns.some(p => p.split(' ').every(pc => !pc || cls.includes(pc)));
|
|
4578
|
+
});
|
|
4579
|
+
}
|
|
4580
|
+
function directText(el) {
|
|
4581
|
+
let text = '';
|
|
4582
|
+
for (const n of el.childNodes) {
|
|
4583
|
+
if (n.nodeType === 3) text += n.textContent;
|
|
4584
|
+
}
|
|
4585
|
+
return text.trim();
|
|
4586
|
+
}
|
|
4587
|
+
function extractRow(el, prefix) {
|
|
4588
|
+
const data = {};
|
|
4589
|
+
const tag = el.tagName.toLowerCase();
|
|
4590
|
+
const cls = getClasses(el).slice(0, 2).join('.');
|
|
4591
|
+
const key = prefix + '/' + (cls ? tag + '.' + cls : tag);
|
|
4592
|
+
const dt = directText(el);
|
|
4593
|
+
if (dt) data[key] = dt;
|
|
4594
|
+
if (el.href) data[key + ' href'] = el.href;
|
|
4595
|
+
if (el.src) data[key + ' src'] = el.src;
|
|
4596
|
+
for (const child of el.children) {
|
|
4597
|
+
Object.assign(data, extractRow(child, key));
|
|
4598
|
+
}
|
|
4599
|
+
return data;
|
|
4600
|
+
}
|
|
4601
|
+
const container = document.querySelector(${JSON.stringify(selector)});
|
|
4602
|
+
if (!container) return { selector: ${JSON.stringify(selector)}, columns: [], rows: [], totalRows: 0, truncated: false };
|
|
4603
|
+
const matching = getMatchingChildren(container);
|
|
4604
|
+
const totalRows = matching.length;
|
|
4605
|
+
const limited = matching.slice(0, ${maxRows});
|
|
4606
|
+
const rawRows = limited.map(el => extractRow(el, ''));
|
|
4607
|
+
// Collect all keys
|
|
4608
|
+
const allKeys = new Set();
|
|
4609
|
+
rawRows.forEach(r => Object.keys(r).forEach(k => allKeys.add(k)));
|
|
4610
|
+
// Build columns: filter sparse + constant
|
|
4611
|
+
const columns = [];
|
|
4612
|
+
const keyToName = {};
|
|
4613
|
+
for (const key of allKeys) {
|
|
4614
|
+
const values = rawRows.map(r => r[key] || '');
|
|
4615
|
+
const filled = values.filter(v => v).length;
|
|
4616
|
+
const fillRate = filled / rawRows.length;
|
|
4617
|
+
if (fillRate < 0.2) continue;
|
|
4618
|
+
// Drop constant columns
|
|
4619
|
+
const unique = new Set(values.filter(v => v));
|
|
4620
|
+
if (unique.size <= 1 && rawRows.length > 2) continue;
|
|
4621
|
+
// Smart name: shortest class or last path segment
|
|
4622
|
+
const parts = key.split('/').filter(Boolean);
|
|
4623
|
+
const last = parts[parts.length - 1] || key;
|
|
4624
|
+
let name = last.replace(/^[a-z]+\\./, '').replace(/ (href|src)$/, ' $1').replace(/\\./g, ' ').trim() || last;
|
|
4625
|
+
keyToName[key] = name;
|
|
4626
|
+
columns.push({ name, path: key, fillRate: Math.round(fillRate * 100) / 100 });
|
|
4627
|
+
}
|
|
4628
|
+
// Deduplicate names
|
|
4629
|
+
const nameCount = {};
|
|
4630
|
+
columns.forEach(c => { nameCount[c.name] = (nameCount[c.name] || 0) + 1; });
|
|
4631
|
+
const nameSeen = {};
|
|
4632
|
+
columns.forEach(c => {
|
|
4633
|
+
if (nameCount[c.name] > 1) {
|
|
4634
|
+
nameSeen[c.name] = (nameSeen[c.name] || 0) + 1;
|
|
4635
|
+
if (nameSeen[c.name] > 1) c.name = c.name + ' ' + nameSeen[c.name];
|
|
4636
|
+
}
|
|
4637
|
+
});
|
|
4638
|
+
// Build final rows
|
|
4639
|
+
const rows = rawRows.map(raw => {
|
|
4640
|
+
const row = {};
|
|
4641
|
+
columns.forEach(c => { row[c.name] = raw[c.path] || ''; });
|
|
4642
|
+
return row;
|
|
4643
|
+
});
|
|
4644
|
+
return { selector: ${JSON.stringify(selector)}, columns, rows, totalRows, truncated: totalRows > ${maxRows} };
|
|
4645
|
+
})()`);
|
|
4646
|
+
return result.result || { selector, columns: [], rows: [], totalRows: 0, truncated: false };
|
|
4647
|
+
};
|
|
4648
|
+
smart.waitForNetworkIdle = async (opts) => {
|
|
4649
|
+
const timeout = Math.min(opts?.timeout ?? 15e3, 3e4);
|
|
4650
|
+
const idleTime = Math.min(opts?.idleTime ?? 500, 5e3);
|
|
4651
|
+
const result = await bridge2.send({
|
|
4652
|
+
type: "wait_for_network_idle",
|
|
4653
|
+
timeout,
|
|
4654
|
+
idleTime
|
|
4655
|
+
}, timeout + 5e3);
|
|
4656
|
+
return result;
|
|
4657
|
+
};
|
|
4658
|
+
smart.extractData = async (opts) => {
|
|
4659
|
+
const maxTables = opts?.maxTables ?? 3;
|
|
4660
|
+
const maxRowsPerTable = opts?.maxRowsPerTable ?? 200;
|
|
4661
|
+
const status = await bridge2.send({ type: "get_connection_status" }, 5e3);
|
|
4662
|
+
const url = status?.connectedTab?.url || "";
|
|
4663
|
+
const candidates = await smart.detectTables({ maxCandidates: maxTables });
|
|
4664
|
+
const tables = [];
|
|
4665
|
+
for (const candidate of candidates) {
|
|
4666
|
+
const extraction = await smart.extractTable(
|
|
4667
|
+
candidate.selector,
|
|
4668
|
+
{ maxRows: maxRowsPerTable }
|
|
4669
|
+
);
|
|
4670
|
+
if (extraction.columns.length >= 2 && extraction.rows.length >= 2) {
|
|
4671
|
+
tables.push(extraction);
|
|
4672
|
+
}
|
|
4673
|
+
}
|
|
4674
|
+
const jsonLd = await evaluate(`(() => {
|
|
4675
|
+
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
|
4676
|
+
const result = [];
|
|
4677
|
+
scripts.forEach(s => { try { result.push(JSON.parse(s.textContent)); } catch {} });
|
|
4678
|
+
return result;
|
|
4679
|
+
})()`);
|
|
4680
|
+
return {
|
|
4681
|
+
tables,
|
|
4682
|
+
structuredData: jsonLd.result || [],
|
|
4683
|
+
url
|
|
4684
|
+
};
|
|
4685
|
+
};
|
|
3897
4686
|
smart.scrollCapture = async (opts) => {
|
|
3898
4687
|
const max = opts?.maxSections ?? 10;
|
|
3899
4688
|
const px = opts?.pixelsPerScroll ?? 800;
|
|
@@ -3916,7 +4705,7 @@ async function buildSmartObject(bridge2) {
|
|
|
3916
4705
|
smart.waitForIdle = async (timeout) => {
|
|
3917
4706
|
const ms = Math.min(timeout ?? 5e3, 15e3);
|
|
3918
4707
|
const result = await evaluate(`
|
|
3919
|
-
|
|
4708
|
+
new Promise((resolve) => {
|
|
3920
4709
|
let timer;
|
|
3921
4710
|
const reset = () => { clearTimeout(timer); timer = setTimeout(() => resolve('idle'), 500); };
|
|
3922
4711
|
const observer = new MutationObserver(reset);
|
|
@@ -3927,13 +4716,38 @@ async function buildSmartObject(bridge2) {
|
|
|
3927
4716
|
`);
|
|
3928
4717
|
return { status: result.result };
|
|
3929
4718
|
};
|
|
3930
|
-
smart.extractPage = async () => {
|
|
3931
|
-
const
|
|
3932
|
-
|
|
3933
|
-
|
|
3934
|
-
|
|
3935
|
-
|
|
3936
|
-
|
|
4719
|
+
smart.extractPage = async (opts) => {
|
|
4720
|
+
const gaps = [];
|
|
4721
|
+
const tracing = opts?.trace === true;
|
|
4722
|
+
const traceSteps = [];
|
|
4723
|
+
const traceStart = tracing ? Date.now() : 0;
|
|
4724
|
+
const traced = async (name, fn) => {
|
|
4725
|
+
if (!tracing) return fn();
|
|
4726
|
+
const s = Date.now();
|
|
4727
|
+
try {
|
|
4728
|
+
const r = await fn();
|
|
4729
|
+
traceSteps.push({ name, elapsed: Date.now() - s, success: true });
|
|
4730
|
+
return r;
|
|
4731
|
+
} catch (e) {
|
|
4732
|
+
traceSteps.push({ name, elapsed: Date.now() - s, success: false });
|
|
4733
|
+
throw e;
|
|
4734
|
+
}
|
|
4735
|
+
};
|
|
4736
|
+
const [capture, perf, security, fonts, meta, rawA11y, rawMobile] = await Promise.all([
|
|
4737
|
+
traced("capture_page", () => bridge2.send({ type: "capture_page" }, 6e4)),
|
|
4738
|
+
traced("get_performance_metrics", () => bridge2.send({ type: "get_performance_metrics" }, 5e3)).catch((e) => {
|
|
4739
|
+
gaps.push({ dimension: "performance", reason: e instanceof Error ? e.message : String(e), impact: "method-failed", reducesConfidence: true });
|
|
4740
|
+
return null;
|
|
4741
|
+
}),
|
|
4742
|
+
traced("get_security_state", () => bridge2.send({ type: "get_security_state" }, 5e3)).catch((e) => {
|
|
4743
|
+
gaps.push({ dimension: "security", reason: e instanceof Error ? e.message : String(e), impact: "method-failed", reducesConfidence: true });
|
|
4744
|
+
return null;
|
|
4745
|
+
}),
|
|
4746
|
+
traced("detect_fonts", () => bridge2.send({ type: "detect_fonts" }, 1e4)).catch((e) => {
|
|
4747
|
+
gaps.push({ dimension: "fonts", reason: e instanceof Error ? e.message : String(e), impact: "method-failed", reducesConfidence: false });
|
|
4748
|
+
return null;
|
|
4749
|
+
}),
|
|
4750
|
+
traced("meta_extraction", () => evaluate(`(() => {
|
|
3937
4751
|
const m = {}; document.querySelectorAll('meta').forEach(el => {
|
|
3938
4752
|
const k = el.getAttribute('property') || el.getAttribute('name');
|
|
3939
4753
|
if (k) m[k] = el.getAttribute('content');
|
|
@@ -3944,22 +4758,66 @@ async function buildSmartObject(bridge2) {
|
|
|
3944
4758
|
m._headings = [...document.querySelectorAll('h1,h2,h3')].slice(0, 30).map(h => ({ level: h.tagName, text: h.textContent?.trim()?.substring(0, 120) }));
|
|
3945
4759
|
m._nav = [...new Set([...document.querySelectorAll('nav a, header a')].map(a => a.href))].slice(0, 50);
|
|
3946
4760
|
return m;
|
|
3947
|
-
})()`)
|
|
4761
|
+
})()`)),
|
|
4762
|
+
traced("accessibility_tree", () => bridge2.send({ type: "get_accessibility_tree", depth: 3 }, 1e4)).catch((e) => {
|
|
4763
|
+
gaps.push({ dimension: "accessibility", reason: e instanceof Error ? e.message : String(e), impact: "method-failed", reducesConfidence: false });
|
|
4764
|
+
return null;
|
|
4765
|
+
}),
|
|
4766
|
+
traced("mobile_readiness", () => evaluate(`(() => {
|
|
4767
|
+
const vp = document.querySelector('meta[name="viewport"]');
|
|
4768
|
+
let mediaQueryCount = 0;
|
|
4769
|
+
try { [...document.styleSheets].forEach(s => { try { [...s.cssRules].forEach(r => { if (r instanceof CSSMediaRule) mediaQueryCount++; }); } catch {} }); } catch {}
|
|
4770
|
+
return {
|
|
4771
|
+
hasViewportMeta: !!vp,
|
|
4772
|
+
viewportContent: vp?.getAttribute('content') || null,
|
|
4773
|
+
mediaQueryCount,
|
|
4774
|
+
bodyScrollWidth: document.body?.scrollWidth ?? 0,
|
|
4775
|
+
windowInnerWidth: window.innerWidth,
|
|
4776
|
+
isOverflowing: (document.body?.scrollWidth ?? 0) > window.innerWidth,
|
|
4777
|
+
};
|
|
4778
|
+
})()`)).catch((e) => {
|
|
4779
|
+
gaps.push({ dimension: "mobile-readiness", reason: e instanceof Error ? e.message : String(e), impact: "method-failed", reducesConfidence: false });
|
|
4780
|
+
return null;
|
|
4781
|
+
})
|
|
3948
4782
|
]);
|
|
3949
|
-
|
|
4783
|
+
const a11ySummary = rawA11y ? summarizeAccessibility(rawA11y) : null;
|
|
4784
|
+
const mobileResult = rawMobile;
|
|
4785
|
+
const mobileReadiness = mobileResult?.result ? { hasViewportMeta: !!mobileResult.result.hasViewportMeta, viewportContent: mobileResult.result.viewportContent ?? null, mediaQueryCount: mobileResult.result.mediaQueryCount ?? 0, isOverflowing: !!mobileResult.result.isOverflowing } : null;
|
|
4786
|
+
for (const gap of gaps) {
|
|
4787
|
+
if (!sessionGaps.some((g) => g.dimension === gap.dimension)) sessionGaps.push(gap);
|
|
4788
|
+
}
|
|
4789
|
+
const evidence = {
|
|
3950
4790
|
capture,
|
|
3951
4791
|
performance: perf,
|
|
3952
4792
|
security,
|
|
3953
4793
|
fonts,
|
|
3954
|
-
meta: meta?.result || null
|
|
4794
|
+
meta: meta?.result || null,
|
|
4795
|
+
accessibility: a11ySummary,
|
|
4796
|
+
mobileReadiness,
|
|
4797
|
+
gaps
|
|
3955
4798
|
};
|
|
4799
|
+
if (tracing) {
|
|
4800
|
+
evidence._trace = {
|
|
4801
|
+
method: "extractPage",
|
|
4802
|
+
startedAt: traceStart,
|
|
4803
|
+
elapsed: Date.now() - traceStart,
|
|
4804
|
+
steps: traceSteps,
|
|
4805
|
+
outcome: gaps.length === 0 ? "success" : "partial"
|
|
4806
|
+
};
|
|
4807
|
+
}
|
|
4808
|
+
return evidence;
|
|
3956
4809
|
};
|
|
3957
|
-
smart.comparePages = async (urlA, urlB) => {
|
|
4810
|
+
smart.comparePages = async (urlA, urlB, opts) => {
|
|
3958
4811
|
await smart.navigate(urlA);
|
|
3959
|
-
const a = await smart.extractPage();
|
|
4812
|
+
const a = await smart.extractPage(opts);
|
|
3960
4813
|
await smart.navigate(urlB);
|
|
3961
|
-
const b = await smart.extractPage();
|
|
3962
|
-
|
|
4814
|
+
const b = await smart.extractPage(opts);
|
|
4815
|
+
const scaffold = buildComparisonScaffold(a, b);
|
|
4816
|
+
return {
|
|
4817
|
+
siteA: { url: urlA, ...a },
|
|
4818
|
+
siteB: { url: urlB, ...b },
|
|
4819
|
+
scaffold
|
|
4820
|
+
};
|
|
3963
4821
|
};
|
|
3964
4822
|
return smart;
|
|
3965
4823
|
}
|
|
@@ -4102,8 +4960,15 @@ function createCodeModeTools(bridge2, crawlio2) {
|
|
|
4102
4960
|
" smart.snapshot() \u2014 capture accessibility snapshot",
|
|
4103
4961
|
" smart.scrollCapture(opts?) \u2014 state-aware page scroll with screenshots, stops at page bottom",
|
|
4104
4962
|
" smart.waitForIdle(timeout?) \u2014 wait for DOM mutations to settle (500ms quiet window)",
|
|
4105
|
-
" smart.extractPage() \u2014 capture_page + perf
|
|
4106
|
-
" smart.comparePages(urlA, urlB) \u2014 navigate to each URL, run extractPage(), return
|
|
4963
|
+
" smart.extractPage(opts?) \u2014 capture_page + perf + security + fonts + meta + accessibility + mobileReadiness. Returns { capture, performance, security, fonts, meta, accessibility, mobileReadiness, gaps[] }. opts: { trace: true } adds _trace.",
|
|
4964
|
+
" smart.comparePages(urlA, urlB, opts?) \u2014 navigate to each URL, run extractPage(), return { siteA, siteB, scaffold }. scaffold has dimensions[], sharedFields, missingFields, metrics.",
|
|
4965
|
+
" smart.finding({ claim, evidence, sourceUrl, confidence, method, dimension? }) \u2014 create validated Finding, accumulate in session. Confidence auto-capped if dimension has active gap with reducesConfidence.",
|
|
4966
|
+
" smart.findings() \u2014 return all accumulated Finding[] from current session.",
|
|
4967
|
+
" smart.clearFindings() \u2014 reset accumulated findings and session gaps.",
|
|
4968
|
+
" smart.detectTables(opts?) \u2014 find repeating data patterns in the page. Returns TableCandidate[] (selector, score, rowCount, sampleText). Uses class-frequency scoring.",
|
|
4969
|
+
" smart.extractTable(selector, opts?) \u2014 extract structured data from a container. Returns { columns, rows, totalRows, truncated }. opts: { maxRows: 200 }.",
|
|
4970
|
+
" smart.waitForNetworkIdle(opts?) \u2014 wait for all network requests to settle (CDP-level, catches fetch/XHR/images/CSS/fonts). Returns { status, elapsed }. opts: { timeout: 15000, idleTime: 500 }.",
|
|
4971
|
+
" smart.extractData(opts?) \u2014 compound: detectTables + extractTable + JSON-LD. Returns { tables, structuredData, url }.",
|
|
4107
4972
|
" Framework namespaces (injected based on detected framework):",
|
|
4108
4973
|
" smart.react.{getVersion,getRootCount,hasProfiler,isHookInstalled}",
|
|
4109
4974
|
" smart.vue.{getVersion,getAppCount,getConfig,isDevMode}",
|
|
@@ -4211,7 +5076,7 @@ function createCodeModeTools(bridge2, crawlio2) {
|
|
|
4211
5076
|
process.title = "Crawlio Agent";
|
|
4212
5077
|
var initMode = process.argv.includes("init") || process.argv.includes("--setup") || process.argv.includes("setup");
|
|
4213
5078
|
if (initMode) {
|
|
4214
|
-
const { runInit } = await import("./init-
|
|
5079
|
+
const { runInit } = await import("./init-ZLXCKEQB.js");
|
|
4215
5080
|
await runInit(process.argv.slice(2));
|
|
4216
5081
|
process.exit(0);
|
|
4217
5082
|
}
|