@vulcn/driver-browser 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +389 -4
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -2
- package/dist/index.d.ts +36 -2
- package/dist/index.js +388 -4
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -26,6 +26,7 @@ __export(index_exports, {
|
|
|
26
26
|
BrowserStepSchema: () => BrowserStepSchema,
|
|
27
27
|
checkBrowsers: () => checkBrowsers,
|
|
28
28
|
configSchema: () => configSchema,
|
|
29
|
+
crawlAndBuildSessions: () => crawlAndBuildSessions,
|
|
29
30
|
default: () => index_default,
|
|
30
31
|
installBrowsers: () => installBrowsers,
|
|
31
32
|
launchBrowser: () => launchBrowser
|
|
@@ -434,6 +435,7 @@ var BrowserRunner = class _BrowserRunner {
|
|
|
434
435
|
});
|
|
435
436
|
const context = await browser.newContext({ viewport });
|
|
436
437
|
const page = await context.newPage();
|
|
438
|
+
await ctx.options.onPageReady?.(page);
|
|
437
439
|
const eventFindings = [];
|
|
438
440
|
let currentPayloadInfo = null;
|
|
439
441
|
const dialogHandler = async (dialog) => {
|
|
@@ -491,13 +493,24 @@ var BrowserRunner = class _BrowserRunner {
|
|
|
491
493
|
(step) => step.type === "browser.input" && step.injectable !== false
|
|
492
494
|
);
|
|
493
495
|
const allPayloads = [];
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
496
|
+
const payloadsByCategory = payloads.map(
|
|
497
|
+
(ps) => ps.payloads.map((value) => ({ payloadSet: ps, value }))
|
|
498
|
+
);
|
|
499
|
+
const maxLen = Math.max(...payloadsByCategory.map((c) => c.length));
|
|
500
|
+
for (let i = 0; i < maxLen; i++) {
|
|
501
|
+
for (const category of payloadsByCategory) {
|
|
502
|
+
if (i < category.length) {
|
|
503
|
+
allPayloads.push(category[i]);
|
|
504
|
+
}
|
|
497
505
|
}
|
|
498
506
|
}
|
|
507
|
+
const confirmedTypes = /* @__PURE__ */ new Set();
|
|
499
508
|
for (const injectableStep of injectableSteps) {
|
|
500
509
|
for (const { payloadSet, value } of allPayloads) {
|
|
510
|
+
const stepTypeKey = `${injectableStep.id}::${payloadSet.category}`;
|
|
511
|
+
if (confirmedTypes.has(stepTypeKey)) {
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
501
514
|
try {
|
|
502
515
|
currentPayloadInfo = {
|
|
503
516
|
stepId: injectableStep.id,
|
|
@@ -521,8 +534,16 @@ var BrowserRunner = class _BrowserRunner {
|
|
|
521
534
|
if (reflectionFinding) {
|
|
522
535
|
allFindings.push(reflectionFinding);
|
|
523
536
|
}
|
|
537
|
+
const seenKeys = /* @__PURE__ */ new Set();
|
|
524
538
|
for (const finding of allFindings) {
|
|
525
|
-
|
|
539
|
+
const dedupKey = `${finding.type}::${finding.stepId}::${finding.title}`;
|
|
540
|
+
if (!seenKeys.has(dedupKey)) {
|
|
541
|
+
seenKeys.add(dedupKey);
|
|
542
|
+
ctx.addFinding(finding);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
if (allFindings.length > 0) {
|
|
546
|
+
confirmedTypes.add(stepTypeKey);
|
|
526
547
|
}
|
|
527
548
|
eventFindings.length = 0;
|
|
528
549
|
payloadsTested++;
|
|
@@ -536,6 +557,7 @@ var BrowserRunner = class _BrowserRunner {
|
|
|
536
557
|
page.off("dialog", dialogHandler);
|
|
537
558
|
page.off("console", consoleHandler);
|
|
538
559
|
currentPayloadInfo = null;
|
|
560
|
+
await ctx.options.onBeforeClose?.(page);
|
|
539
561
|
await browser.close();
|
|
540
562
|
}
|
|
541
563
|
return {
|
|
@@ -677,6 +699,356 @@ var BrowserRunner = class _BrowserRunner {
|
|
|
677
699
|
}
|
|
678
700
|
};
|
|
679
701
|
|
|
702
|
+
// src/crawler.ts
|
|
703
|
+
var INJECTABLE_INPUT_TYPES = /* @__PURE__ */ new Set([
|
|
704
|
+
"text",
|
|
705
|
+
"search",
|
|
706
|
+
"url",
|
|
707
|
+
"email",
|
|
708
|
+
"tel",
|
|
709
|
+
"password",
|
|
710
|
+
"textarea",
|
|
711
|
+
""
|
|
712
|
+
]);
|
|
713
|
+
var CRAWL_DEFAULTS = {
|
|
714
|
+
maxDepth: 2,
|
|
715
|
+
maxPages: 20,
|
|
716
|
+
pageTimeout: 1e4,
|
|
717
|
+
sameOrigin: true
|
|
718
|
+
};
|
|
719
|
+
async function crawlAndBuildSessions(config, options = {}) {
|
|
720
|
+
const opts = { ...CRAWL_DEFAULTS, ...options };
|
|
721
|
+
const startUrl = config.startUrl;
|
|
722
|
+
let normalizedUrl;
|
|
723
|
+
try {
|
|
724
|
+
normalizedUrl = new URL(startUrl);
|
|
725
|
+
} catch {
|
|
726
|
+
throw new Error(`Invalid URL: ${startUrl}`);
|
|
727
|
+
}
|
|
728
|
+
const origin = normalizedUrl.origin;
|
|
729
|
+
const visited = /* @__PURE__ */ new Set();
|
|
730
|
+
const allForms = [];
|
|
731
|
+
const queue = [[normalizedUrl.href, 0]];
|
|
732
|
+
const { browser } = await launchBrowser({
|
|
733
|
+
browser: config.browser ?? "chromium",
|
|
734
|
+
headless: config.headless ?? true
|
|
735
|
+
});
|
|
736
|
+
const context = await browser.newContext({
|
|
737
|
+
viewport: config.viewport ?? { width: 1280, height: 720 }
|
|
738
|
+
});
|
|
739
|
+
try {
|
|
740
|
+
while (queue.length > 0 && visited.size < opts.maxPages) {
|
|
741
|
+
const [url, depth] = queue.shift();
|
|
742
|
+
const normalizedPageUrl = normalizeUrl(url);
|
|
743
|
+
if (visited.has(normalizedPageUrl)) continue;
|
|
744
|
+
visited.add(normalizedPageUrl);
|
|
745
|
+
console.log(`[crawler] [depth=${depth}] Crawling: ${normalizedPageUrl}`);
|
|
746
|
+
const page = await context.newPage();
|
|
747
|
+
try {
|
|
748
|
+
await page.goto(normalizedPageUrl, {
|
|
749
|
+
waitUntil: "domcontentloaded",
|
|
750
|
+
timeout: opts.pageTimeout
|
|
751
|
+
});
|
|
752
|
+
await page.waitForTimeout(1e3);
|
|
753
|
+
const forms = await discoverForms(page, normalizedPageUrl);
|
|
754
|
+
allForms.push(...forms);
|
|
755
|
+
const injectableCount = forms.reduce(
|
|
756
|
+
(s, f) => s + f.inputs.filter((i) => i.injectable).length,
|
|
757
|
+
0
|
|
758
|
+
);
|
|
759
|
+
console.log(
|
|
760
|
+
`[crawler] Found ${forms.length} form(s), ${injectableCount} injectable input(s)`
|
|
761
|
+
);
|
|
762
|
+
opts.onPageCrawled?.(normalizedPageUrl, forms.length);
|
|
763
|
+
if (depth < opts.maxDepth) {
|
|
764
|
+
const links = await discoverLinks(page, origin, opts.sameOrigin);
|
|
765
|
+
for (const link of links) {
|
|
766
|
+
const normalizedLink = normalizeUrl(link);
|
|
767
|
+
if (!visited.has(normalizedLink)) {
|
|
768
|
+
queue.push([normalizedLink, depth + 1]);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
console.log(`[crawler] Found ${links.length} link(s) to follow`);
|
|
772
|
+
}
|
|
773
|
+
} catch (err) {
|
|
774
|
+
console.warn(
|
|
775
|
+
`[crawler] Failed: ${err instanceof Error ? err.message : String(err)}`
|
|
776
|
+
);
|
|
777
|
+
} finally {
|
|
778
|
+
await page.close();
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
} finally {
|
|
782
|
+
await browser.close();
|
|
783
|
+
}
|
|
784
|
+
console.log(
|
|
785
|
+
`[crawler] Complete: ${visited.size} page(s), ${allForms.length} form(s)`
|
|
786
|
+
);
|
|
787
|
+
return buildSessions(allForms);
|
|
788
|
+
}
|
|
789
|
+
async function discoverForms(page, pageUrl) {
|
|
790
|
+
const forms = [];
|
|
791
|
+
const explicitForms = await page.evaluate(() => {
|
|
792
|
+
const results = [];
|
|
793
|
+
const formElements = document.querySelectorAll("form");
|
|
794
|
+
formElements.forEach((form, formIndex) => {
|
|
795
|
+
const inputs = [];
|
|
796
|
+
const inputEls = form.querySelectorAll(
|
|
797
|
+
'input, textarea, [contenteditable="true"]'
|
|
798
|
+
);
|
|
799
|
+
inputEls.forEach((input, inputIndex) => {
|
|
800
|
+
const el = input;
|
|
801
|
+
const type = el.tagName.toLowerCase() === "textarea" ? "textarea" : el.getAttribute("type") || "text";
|
|
802
|
+
const name = el.name || el.id || `input-${inputIndex}`;
|
|
803
|
+
let selector = "";
|
|
804
|
+
if (el.id) {
|
|
805
|
+
selector = `#${CSS.escape(el.id)}`;
|
|
806
|
+
} else if (el.name) {
|
|
807
|
+
selector = `form:nth-of-type(${formIndex + 1}) [name="${CSS.escape(el.name)}"]`;
|
|
808
|
+
} else {
|
|
809
|
+
selector = `form:nth-of-type(${formIndex + 1}) ${el.tagName.toLowerCase()}:nth-of-type(${inputIndex + 1})`;
|
|
810
|
+
}
|
|
811
|
+
inputs.push({
|
|
812
|
+
selector,
|
|
813
|
+
type,
|
|
814
|
+
name,
|
|
815
|
+
placeholder: el.placeholder || ""
|
|
816
|
+
});
|
|
817
|
+
});
|
|
818
|
+
let submitSelector = null;
|
|
819
|
+
const submitBtn = form.querySelector('button[type="submit"], input[type="submit"]') || form.querySelector("button:not([type])") || form.querySelector('button, input[type="button"]');
|
|
820
|
+
if (submitBtn) {
|
|
821
|
+
const btn = submitBtn;
|
|
822
|
+
if (btn.id) {
|
|
823
|
+
submitSelector = `#${CSS.escape(btn.id)}`;
|
|
824
|
+
} else {
|
|
825
|
+
const tag = btn.tagName.toLowerCase();
|
|
826
|
+
const type = btn.getAttribute("type");
|
|
827
|
+
if (type) {
|
|
828
|
+
submitSelector = `form:nth-of-type(${formIndex + 1}) ${tag}[type="${type}"]`;
|
|
829
|
+
} else {
|
|
830
|
+
submitSelector = `form:nth-of-type(${formIndex + 1}) ${tag}`;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
results.push({
|
|
835
|
+
formIndex,
|
|
836
|
+
action: form.action || "",
|
|
837
|
+
method: (form.method || "GET").toUpperCase(),
|
|
838
|
+
inputs,
|
|
839
|
+
submitSelector
|
|
840
|
+
});
|
|
841
|
+
});
|
|
842
|
+
return results;
|
|
843
|
+
});
|
|
844
|
+
for (const form of explicitForms) {
|
|
845
|
+
if (form.inputs.length === 0) continue;
|
|
846
|
+
forms.push({
|
|
847
|
+
pageUrl,
|
|
848
|
+
formSelector: `form:nth-of-type(${form.formIndex + 1})`,
|
|
849
|
+
action: form.action,
|
|
850
|
+
method: form.method,
|
|
851
|
+
inputs: form.inputs.map((input) => ({
|
|
852
|
+
selector: input.selector,
|
|
853
|
+
type: input.type,
|
|
854
|
+
name: input.name,
|
|
855
|
+
injectable: INJECTABLE_INPUT_TYPES.has(input.type.toLowerCase()),
|
|
856
|
+
placeholder: input.placeholder || void 0
|
|
857
|
+
})),
|
|
858
|
+
submitSelector: form.submitSelector
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
const standaloneInputs = await page.evaluate(() => {
|
|
862
|
+
const results = [];
|
|
863
|
+
const allInputs = document.querySelectorAll(
|
|
864
|
+
'input:not(form input), textarea:not(form textarea), [contenteditable="true"]:not(form [contenteditable])'
|
|
865
|
+
);
|
|
866
|
+
allInputs.forEach((input) => {
|
|
867
|
+
const el = input;
|
|
868
|
+
const type = el.tagName.toLowerCase() === "textarea" ? "textarea" : el.getAttribute("type") || "text";
|
|
869
|
+
const name = el.name || el.id || "";
|
|
870
|
+
let selector = "";
|
|
871
|
+
if (el.id) {
|
|
872
|
+
selector = `#${CSS.escape(el.id)}`;
|
|
873
|
+
} else if (el.name) {
|
|
874
|
+
selector = `[name="${CSS.escape(el.name)}"]`;
|
|
875
|
+
} else {
|
|
876
|
+
selector = `${el.tagName.toLowerCase()}[type="${type}"]`;
|
|
877
|
+
}
|
|
878
|
+
let nearbyButtonSelector = null;
|
|
879
|
+
const parent = el.parentElement;
|
|
880
|
+
if (parent) {
|
|
881
|
+
const btn = parent.querySelector("button") || parent.querySelector('input[type="submit"]') || parent.querySelector('input[type="button"]');
|
|
882
|
+
if (btn) {
|
|
883
|
+
const btnEl = btn;
|
|
884
|
+
if (btnEl.id) {
|
|
885
|
+
nearbyButtonSelector = `#${CSS.escape(btnEl.id)}`;
|
|
886
|
+
}
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
results.push({
|
|
890
|
+
selector,
|
|
891
|
+
type,
|
|
892
|
+
name,
|
|
893
|
+
placeholder: el.placeholder || "",
|
|
894
|
+
nearbyButtonSelector
|
|
895
|
+
});
|
|
896
|
+
});
|
|
897
|
+
return results;
|
|
898
|
+
});
|
|
899
|
+
for (const input of standaloneInputs) {
|
|
900
|
+
if (!INJECTABLE_INPUT_TYPES.has(input.type.toLowerCase())) continue;
|
|
901
|
+
forms.push({
|
|
902
|
+
pageUrl,
|
|
903
|
+
formSelector: "(standalone)",
|
|
904
|
+
action: pageUrl,
|
|
905
|
+
method: "GET",
|
|
906
|
+
inputs: [
|
|
907
|
+
{
|
|
908
|
+
selector: input.selector,
|
|
909
|
+
type: input.type,
|
|
910
|
+
name: input.name,
|
|
911
|
+
injectable: true,
|
|
912
|
+
placeholder: input.placeholder || void 0
|
|
913
|
+
}
|
|
914
|
+
],
|
|
915
|
+
submitSelector: input.nearbyButtonSelector
|
|
916
|
+
});
|
|
917
|
+
}
|
|
918
|
+
return forms;
|
|
919
|
+
}
|
|
920
|
+
var REDIRECT_PARAMS = /* @__PURE__ */ new Set([
|
|
921
|
+
"to",
|
|
922
|
+
"url",
|
|
923
|
+
"redirect",
|
|
924
|
+
"redirect_uri",
|
|
925
|
+
"redirect_url",
|
|
926
|
+
"return",
|
|
927
|
+
"return_url",
|
|
928
|
+
"returnto",
|
|
929
|
+
"next",
|
|
930
|
+
"goto",
|
|
931
|
+
"dest",
|
|
932
|
+
"destination",
|
|
933
|
+
"continue",
|
|
934
|
+
"target",
|
|
935
|
+
"rurl",
|
|
936
|
+
"out",
|
|
937
|
+
"link",
|
|
938
|
+
"forward"
|
|
939
|
+
]);
|
|
940
|
+
function isExternalRedirectLink(link, origin) {
|
|
941
|
+
try {
|
|
942
|
+
const parsed = new URL(link);
|
|
943
|
+
if (parsed.origin !== origin) return false;
|
|
944
|
+
for (const [key, value] of parsed.searchParams) {
|
|
945
|
+
if (REDIRECT_PARAMS.has(key.toLowerCase())) {
|
|
946
|
+
try {
|
|
947
|
+
const targetUrl = new URL(value);
|
|
948
|
+
if (targetUrl.origin !== origin) return true;
|
|
949
|
+
} catch {
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
return false;
|
|
954
|
+
} catch {
|
|
955
|
+
return false;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
async function discoverLinks(page, origin, sameOrigin) {
|
|
959
|
+
const links = await page.evaluate(() => {
|
|
960
|
+
return Array.from(document.querySelectorAll("a[href]")).map((a) => a.href).filter((href) => href.startsWith("http"));
|
|
961
|
+
});
|
|
962
|
+
return links.filter((link) => {
|
|
963
|
+
try {
|
|
964
|
+
const linkOrigin = new URL(link).origin;
|
|
965
|
+
if (sameOrigin && linkOrigin !== origin) return false;
|
|
966
|
+
if (isExternalRedirectLink(link, origin)) return false;
|
|
967
|
+
return true;
|
|
968
|
+
} catch {
|
|
969
|
+
return false;
|
|
970
|
+
}
|
|
971
|
+
});
|
|
972
|
+
}
|
|
973
|
+
function buildSessions(forms) {
|
|
974
|
+
const targetForms = forms.filter((f) => f.inputs.some((i) => i.injectable));
|
|
975
|
+
return targetForms.map((form, idx) => buildSessionForForm(form, idx));
|
|
976
|
+
}
|
|
977
|
+
function buildSessionForForm(form, index) {
|
|
978
|
+
const steps = [];
|
|
979
|
+
let stepNum = 1;
|
|
980
|
+
steps.push({
|
|
981
|
+
id: `step-${stepNum++}`,
|
|
982
|
+
type: "browser.navigate",
|
|
983
|
+
url: form.pageUrl,
|
|
984
|
+
timestamp: Date.now()
|
|
985
|
+
});
|
|
986
|
+
const injectableInputs = form.inputs.filter((i) => i.injectable);
|
|
987
|
+
for (const input of injectableInputs) {
|
|
988
|
+
steps.push({
|
|
989
|
+
id: `step-${stepNum++}`,
|
|
990
|
+
type: "browser.input",
|
|
991
|
+
selector: input.selector,
|
|
992
|
+
value: "test",
|
|
993
|
+
injectable: true,
|
|
994
|
+
timestamp: Date.now() + stepNum * 100
|
|
995
|
+
});
|
|
996
|
+
}
|
|
997
|
+
if (form.submitSelector) {
|
|
998
|
+
steps.push({
|
|
999
|
+
id: `step-${stepNum++}`,
|
|
1000
|
+
type: "browser.click",
|
|
1001
|
+
selector: form.submitSelector,
|
|
1002
|
+
timestamp: Date.now() + stepNum * 100
|
|
1003
|
+
});
|
|
1004
|
+
} else {
|
|
1005
|
+
steps.push({
|
|
1006
|
+
id: `step-${stepNum++}`,
|
|
1007
|
+
type: "browser.keypress",
|
|
1008
|
+
key: "Enter",
|
|
1009
|
+
timestamp: Date.now() + stepNum * 100
|
|
1010
|
+
});
|
|
1011
|
+
}
|
|
1012
|
+
const inputNames = injectableInputs.map((i) => i.name || i.type).join(", ");
|
|
1013
|
+
const pagePath = (() => {
|
|
1014
|
+
try {
|
|
1015
|
+
return new URL(form.pageUrl).pathname;
|
|
1016
|
+
} catch {
|
|
1017
|
+
return form.pageUrl;
|
|
1018
|
+
}
|
|
1019
|
+
})();
|
|
1020
|
+
return {
|
|
1021
|
+
name: `Crawl: ${pagePath} \u2014 form ${index + 1} (${inputNames})`,
|
|
1022
|
+
driver: "browser",
|
|
1023
|
+
driverConfig: {
|
|
1024
|
+
startUrl: form.pageUrl,
|
|
1025
|
+
browser: "chromium",
|
|
1026
|
+
headless: true,
|
|
1027
|
+
viewport: { width: 1280, height: 720 }
|
|
1028
|
+
},
|
|
1029
|
+
steps,
|
|
1030
|
+
metadata: {
|
|
1031
|
+
recordedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1032
|
+
version: "0.3.0",
|
|
1033
|
+
source: "crawler",
|
|
1034
|
+
formAction: form.action,
|
|
1035
|
+
formMethod: form.method
|
|
1036
|
+
}
|
|
1037
|
+
};
|
|
1038
|
+
}
|
|
1039
|
+
function normalizeUrl(url) {
|
|
1040
|
+
try {
|
|
1041
|
+
const parsed = new URL(url);
|
|
1042
|
+
parsed.hash = "";
|
|
1043
|
+
if (parsed.pathname !== "/" && parsed.pathname.endsWith("/")) {
|
|
1044
|
+
parsed.pathname = parsed.pathname.slice(0, -1);
|
|
1045
|
+
}
|
|
1046
|
+
return parsed.href;
|
|
1047
|
+
} catch {
|
|
1048
|
+
return url;
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
680
1052
|
// src/index.ts
|
|
681
1053
|
var configSchema = import_zod.z.object({
|
|
682
1054
|
/** Starting URL for recording */
|
|
@@ -746,6 +1118,18 @@ var recorderDriver = {
|
|
|
746
1118
|
async start(config, options) {
|
|
747
1119
|
const parsedConfig = configSchema.parse(config);
|
|
748
1120
|
return BrowserRecorder.start(parsedConfig, options);
|
|
1121
|
+
},
|
|
1122
|
+
async crawl(config, options) {
|
|
1123
|
+
const parsedConfig = configSchema.parse(config);
|
|
1124
|
+
return crawlAndBuildSessions(
|
|
1125
|
+
{
|
|
1126
|
+
startUrl: parsedConfig.startUrl ?? "",
|
|
1127
|
+
browser: parsedConfig.browser,
|
|
1128
|
+
headless: parsedConfig.headless,
|
|
1129
|
+
viewport: parsedConfig.viewport
|
|
1130
|
+
},
|
|
1131
|
+
options
|
|
1132
|
+
);
|
|
749
1133
|
}
|
|
750
1134
|
};
|
|
751
1135
|
var runnerDriver = {
|
|
@@ -772,6 +1156,7 @@ var index_default = browserDriver;
|
|
|
772
1156
|
BrowserStepSchema,
|
|
773
1157
|
checkBrowsers,
|
|
774
1158
|
configSchema,
|
|
1159
|
+
crawlAndBuildSessions,
|
|
775
1160
|
installBrowsers,
|
|
776
1161
|
launchBrowser
|
|
777
1162
|
});
|