@xbrowser/cli 1.7.2 → 1.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +56 -12
- package/dist/daemon-main.js +56 -12
- package/dist/index.js +56 -12
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2795,22 +2795,66 @@ async function extractLinks(page, origin) {
|
|
|
2795
2795
|
}, origin);
|
|
2796
2796
|
}
|
|
2797
2797
|
async function detectSpaRoutes(page, origin) {
|
|
2798
|
-
|
|
2799
|
-
|
|
2798
|
+
const routeSet = /* @__PURE__ */ new Set();
|
|
2799
|
+
const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
|
|
2800
|
+
const isParamRoute = (p) => p.includes(":") || p.includes("*");
|
|
2801
|
+
function extractPaths(source) {
|
|
2802
|
+
let match;
|
|
2803
|
+
while ((match = pathRegex.exec(source)) !== null) {
|
|
2804
|
+
const path3 = match[1];
|
|
2805
|
+
if (!isParamRoute(path3)) routeSet.add(path3);
|
|
2806
|
+
}
|
|
2807
|
+
}
|
|
2808
|
+
const scriptData = await page.evaluate(() => {
|
|
2809
|
+
const scripts = Array.from(document.querySelectorAll("script"));
|
|
2810
|
+
return {
|
|
2811
|
+
inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
|
|
2812
|
+
externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
|
|
2813
|
+
};
|
|
2814
|
+
});
|
|
2815
|
+
const { inlineContent, externalUrls } = scriptData;
|
|
2816
|
+
extractPaths(inlineContent);
|
|
2817
|
+
for (const src of externalUrls) {
|
|
2800
2818
|
try {
|
|
2801
|
-
const
|
|
2802
|
-
const
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
const path3 = match[1];
|
|
2807
|
-
if (path3.includes(":") || path3.includes("*") || routeSet.has(path3)) continue;
|
|
2808
|
-
routeSet.add(path3);
|
|
2819
|
+
const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
|
|
2820
|
+
const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
|
|
2821
|
+
if (resp.ok) {
|
|
2822
|
+
const text = await resp.text();
|
|
2823
|
+
extractPaths(text);
|
|
2809
2824
|
}
|
|
2810
2825
|
} catch {
|
|
2811
2826
|
}
|
|
2812
|
-
|
|
2813
|
-
|
|
2827
|
+
}
|
|
2828
|
+
try {
|
|
2829
|
+
const vueRoutes = await page.evaluate((evalOrigin) => {
|
|
2830
|
+
const routes2 = [];
|
|
2831
|
+
const win = window;
|
|
2832
|
+
const vue3App = win.__vue_app__;
|
|
2833
|
+
const vue3Router = vue3App?.config?.globalProperties?.$router;
|
|
2834
|
+
let router = vue3Router;
|
|
2835
|
+
if (!router) {
|
|
2836
|
+
const el = document.querySelector("#app") || document.querySelector("[__vue__]");
|
|
2837
|
+
const vm = el ? el.__vue__ : void 0;
|
|
2838
|
+
const vmRouter = vm?.$router;
|
|
2839
|
+
const vmRootRouter = vm?.$root?.$router;
|
|
2840
|
+
if (!router) router = vmRouter || vmRootRouter;
|
|
2841
|
+
}
|
|
2842
|
+
const routeList = router?.options?.routes;
|
|
2843
|
+
if (routeList) {
|
|
2844
|
+
for (const r of routeList) {
|
|
2845
|
+
if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
|
|
2846
|
+
routes2.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
|
|
2847
|
+
}
|
|
2848
|
+
}
|
|
2849
|
+
}
|
|
2850
|
+
return routes2;
|
|
2851
|
+
}, origin);
|
|
2852
|
+
for (const r of vueRoutes) routeSet.add(r);
|
|
2853
|
+
} catch {
|
|
2854
|
+
}
|
|
2855
|
+
return Array.from(routeSet).map(
|
|
2856
|
+
(p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
|
|
2857
|
+
);
|
|
2814
2858
|
}
|
|
2815
2859
|
function parseRobotsTxt(text) {
|
|
2816
2860
|
const rules = [];
|
package/dist/daemon-main.js
CHANGED
|
@@ -2762,22 +2762,66 @@ async function extractLinks(page, origin) {
|
|
|
2762
2762
|
}, origin);
|
|
2763
2763
|
}
|
|
2764
2764
|
async function detectSpaRoutes(page, origin) {
|
|
2765
|
-
|
|
2766
|
-
|
|
2765
|
+
const routeSet = /* @__PURE__ */ new Set();
|
|
2766
|
+
const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
|
|
2767
|
+
const isParamRoute = (p) => p.includes(":") || p.includes("*");
|
|
2768
|
+
function extractPaths(source) {
|
|
2769
|
+
let match;
|
|
2770
|
+
while ((match = pathRegex.exec(source)) !== null) {
|
|
2771
|
+
const path2 = match[1];
|
|
2772
|
+
if (!isParamRoute(path2)) routeSet.add(path2);
|
|
2773
|
+
}
|
|
2774
|
+
}
|
|
2775
|
+
const scriptData = await page.evaluate(() => {
|
|
2776
|
+
const scripts = Array.from(document.querySelectorAll("script"));
|
|
2777
|
+
return {
|
|
2778
|
+
inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
|
|
2779
|
+
externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
|
|
2780
|
+
};
|
|
2781
|
+
});
|
|
2782
|
+
const { inlineContent, externalUrls } = scriptData;
|
|
2783
|
+
extractPaths(inlineContent);
|
|
2784
|
+
for (const src of externalUrls) {
|
|
2767
2785
|
try {
|
|
2768
|
-
const
|
|
2769
|
-
const
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
const path2 = match[1];
|
|
2774
|
-
if (path2.includes(":") || path2.includes("*") || routeSet.has(path2)) continue;
|
|
2775
|
-
routeSet.add(path2);
|
|
2786
|
+
const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
|
|
2787
|
+
const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
|
|
2788
|
+
if (resp.ok) {
|
|
2789
|
+
const text = await resp.text();
|
|
2790
|
+
extractPaths(text);
|
|
2776
2791
|
}
|
|
2777
2792
|
} catch {
|
|
2778
2793
|
}
|
|
2779
|
-
|
|
2780
|
-
|
|
2794
|
+
}
|
|
2795
|
+
try {
|
|
2796
|
+
const vueRoutes = await page.evaluate((evalOrigin) => {
|
|
2797
|
+
const routes = [];
|
|
2798
|
+
const win = window;
|
|
2799
|
+
const vue3App = win.__vue_app__;
|
|
2800
|
+
const vue3Router = vue3App?.config?.globalProperties?.$router;
|
|
2801
|
+
let router = vue3Router;
|
|
2802
|
+
if (!router) {
|
|
2803
|
+
const el = document.querySelector("#app") || document.querySelector("[__vue__]");
|
|
2804
|
+
const vm = el ? el.__vue__ : void 0;
|
|
2805
|
+
const vmRouter = vm?.$router;
|
|
2806
|
+
const vmRootRouter = vm?.$root?.$router;
|
|
2807
|
+
if (!router) router = vmRouter || vmRootRouter;
|
|
2808
|
+
}
|
|
2809
|
+
const routeList = router?.options?.routes;
|
|
2810
|
+
if (routeList) {
|
|
2811
|
+
for (const r of routeList) {
|
|
2812
|
+
if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
|
|
2813
|
+
routes.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
|
|
2814
|
+
}
|
|
2815
|
+
}
|
|
2816
|
+
}
|
|
2817
|
+
return routes;
|
|
2818
|
+
}, origin);
|
|
2819
|
+
for (const r of vueRoutes) routeSet.add(r);
|
|
2820
|
+
} catch {
|
|
2821
|
+
}
|
|
2822
|
+
return Array.from(routeSet).map(
|
|
2823
|
+
(p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
|
|
2824
|
+
);
|
|
2781
2825
|
}
|
|
2782
2826
|
function parseRobotsTxt(text) {
|
|
2783
2827
|
const rules = [];
|
package/dist/index.js
CHANGED
|
@@ -2835,22 +2835,66 @@ async function extractLinks(page, origin) {
|
|
|
2835
2835
|
}, origin);
|
|
2836
2836
|
}
|
|
2837
2837
|
async function detectSpaRoutes(page, origin) {
|
|
2838
|
-
|
|
2839
|
-
|
|
2838
|
+
const routeSet = /* @__PURE__ */ new Set();
|
|
2839
|
+
const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
|
|
2840
|
+
const isParamRoute = (p) => p.includes(":") || p.includes("*");
|
|
2841
|
+
function extractPaths(source) {
|
|
2842
|
+
let match;
|
|
2843
|
+
while ((match = pathRegex.exec(source)) !== null) {
|
|
2844
|
+
const path5 = match[1];
|
|
2845
|
+
if (!isParamRoute(path5)) routeSet.add(path5);
|
|
2846
|
+
}
|
|
2847
|
+
}
|
|
2848
|
+
const scriptData = await page.evaluate(() => {
|
|
2849
|
+
const scripts = Array.from(document.querySelectorAll("script"));
|
|
2850
|
+
return {
|
|
2851
|
+
inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
|
|
2852
|
+
externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
|
|
2853
|
+
};
|
|
2854
|
+
});
|
|
2855
|
+
const { inlineContent, externalUrls } = scriptData;
|
|
2856
|
+
extractPaths(inlineContent);
|
|
2857
|
+
for (const src of externalUrls) {
|
|
2840
2858
|
try {
|
|
2841
|
-
const
|
|
2842
|
-
const
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
const path5 = match[1];
|
|
2847
|
-
if (path5.includes(":") || path5.includes("*") || routeSet.has(path5)) continue;
|
|
2848
|
-
routeSet.add(path5);
|
|
2859
|
+
const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
|
|
2860
|
+
const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
|
|
2861
|
+
if (resp.ok) {
|
|
2862
|
+
const text = await resp.text();
|
|
2863
|
+
extractPaths(text);
|
|
2849
2864
|
}
|
|
2850
2865
|
} catch {
|
|
2851
2866
|
}
|
|
2852
|
-
|
|
2853
|
-
|
|
2867
|
+
}
|
|
2868
|
+
try {
|
|
2869
|
+
const vueRoutes = await page.evaluate((evalOrigin) => {
|
|
2870
|
+
const routes2 = [];
|
|
2871
|
+
const win = window;
|
|
2872
|
+
const vue3App = win.__vue_app__;
|
|
2873
|
+
const vue3Router = vue3App?.config?.globalProperties?.$router;
|
|
2874
|
+
let router = vue3Router;
|
|
2875
|
+
if (!router) {
|
|
2876
|
+
const el = document.querySelector("#app") || document.querySelector("[__vue__]");
|
|
2877
|
+
const vm = el ? el.__vue__ : void 0;
|
|
2878
|
+
const vmRouter = vm?.$router;
|
|
2879
|
+
const vmRootRouter = vm?.$root?.$router;
|
|
2880
|
+
if (!router) router = vmRouter || vmRootRouter;
|
|
2881
|
+
}
|
|
2882
|
+
const routeList = router?.options?.routes;
|
|
2883
|
+
if (routeList) {
|
|
2884
|
+
for (const r of routeList) {
|
|
2885
|
+
if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
|
|
2886
|
+
routes2.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
|
|
2887
|
+
}
|
|
2888
|
+
}
|
|
2889
|
+
}
|
|
2890
|
+
return routes2;
|
|
2891
|
+
}, origin);
|
|
2892
|
+
for (const r of vueRoutes) routeSet.add(r);
|
|
2893
|
+
} catch {
|
|
2894
|
+
}
|
|
2895
|
+
return Array.from(routeSet).map(
|
|
2896
|
+
(p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
|
|
2897
|
+
);
|
|
2854
2898
|
}
|
|
2855
2899
|
function parseRobotsTxt(text) {
|
|
2856
2900
|
const rules = [];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xbrowser/cli",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.4",
|
|
4
4
|
"description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|