@xbrowser/cli 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2795,22 +2795,59 @@ async function extractLinks(page, origin) {
2795
2795
  }, origin);
2796
2796
  }
2797
2797
  async function detectSpaRoutes(page, origin) {
2798
- return page.evaluate((evalOrigin) => {
2799
- const routeSet = /* @__PURE__ */ new Set();
2798
+ const routeSet = /* @__PURE__ */ new Set();
2799
+ const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2800
+ const isParamRoute = (p) => p.includes(":") || p.includes("*");
2801
+ function extractPaths(source) {
2802
+ let match;
2803
+ while ((match = pathRegex.exec(source)) !== null) {
2804
+ const path3 = match[1];
2805
+ if (!isParamRoute(path3)) routeSet.add(path3);
2806
+ }
2807
+ }
2808
+ const scriptData = await page.evaluate(() => {
2809
+ const scripts = Array.from(document.querySelectorAll("script"));
2810
+ return {
2811
+ inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
2812
+ externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
2813
+ };
2814
+ });
2815
+ const { inlineContent, externalUrls } = scriptData;
2816
+ extractPaths(inlineContent);
2817
+ for (const src of externalUrls) {
2800
2818
  try {
2801
- const scripts = document.querySelectorAll("script");
2802
- const allContent = Array.from(scripts).map((s) => s.textContent || "").join("\n");
2803
- const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2804
- let match;
2805
- while ((match = pathRegex.exec(allContent)) !== null) {
2806
- const path3 = match[1];
2807
- if (path3.includes(":") || path3.includes("*") || routeSet.has(path3)) continue;
2808
- routeSet.add(path3);
2819
+ const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
2820
+ const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
2821
+ if (resp.ok) {
2822
+ const text = await resp.text();
2823
+ extractPaths(text);
2809
2824
  }
2810
2825
  } catch {
2811
2826
  }
2812
- return Array.from(routeSet).map((path3) => `${evalOrigin.replace(/\/$/, "")}${path3}`);
2813
- }, origin);
2827
+ }
2828
+ try {
2829
+ const vueRoutes = await page.evaluate((evalOrigin) => {
2830
+ const routes2 = [];
2831
+ const win = window;
2832
+ const vueApp = win.__vue_app__;
2833
+ const gp = vueApp?.config?.globalProperties;
2834
+ const router = gp?.$router;
2835
+ const routeList = router?.options?.routes;
2836
+ if (routeList) {
2837
+ for (const r of routeList) {
2838
+ if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
2839
+ routes2.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
2840
+ }
2841
+ }
2842
+ }
2843
+ return routes2;
2844
+ }, origin);
2845
+ for (const r of vueRoutes) routeSet.add(r);
2846
+ } catch {
2847
+ }
2848
+ return Array.from(routeSet).map(
2849
+ (p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
2850
+ );
2814
2851
  }
2815
2852
  function parseRobotsTxt(text) {
2816
2853
  const rules = [];
@@ -2951,7 +2988,7 @@ var crawlCommand = registerCommand({
2951
2988
  allowSubdomains: z17.boolean().default(false),
2952
2989
  allowExternalLinks: z17.boolean().default(false),
2953
2990
  allowBackwardCrawling: z17.boolean().default(false),
2954
- enableSpa: z17.boolean().default(false).describe("Detect SPA (Vue/React) routes from router config"),
2991
+ enableSpa: z17.boolean().default(true).describe("Disable to skip SPA route detection"),
2955
2992
  format: z17.enum(["markdown", "html"]).default("markdown"),
2956
2993
  onlyMainContent: z17.boolean().default(true),
2957
2994
  concurrency: z17.number().default(3),
@@ -2762,22 +2762,59 @@ async function extractLinks(page, origin) {
2762
2762
  }, origin);
2763
2763
  }
2764
2764
  async function detectSpaRoutes(page, origin) {
2765
- return page.evaluate((evalOrigin) => {
2766
- const routeSet = /* @__PURE__ */ new Set();
2765
+ const routeSet = /* @__PURE__ */ new Set();
2766
+ const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2767
+ const isParamRoute = (p) => p.includes(":") || p.includes("*");
2768
+ function extractPaths(source) {
2769
+ let match;
2770
+ while ((match = pathRegex.exec(source)) !== null) {
2771
+ const path2 = match[1];
2772
+ if (!isParamRoute(path2)) routeSet.add(path2);
2773
+ }
2774
+ }
2775
+ const scriptData = await page.evaluate(() => {
2776
+ const scripts = Array.from(document.querySelectorAll("script"));
2777
+ return {
2778
+ inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
2779
+ externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
2780
+ };
2781
+ });
2782
+ const { inlineContent, externalUrls } = scriptData;
2783
+ extractPaths(inlineContent);
2784
+ for (const src of externalUrls) {
2767
2785
  try {
2768
- const scripts = document.querySelectorAll("script");
2769
- const allContent = Array.from(scripts).map((s) => s.textContent || "").join("\n");
2770
- const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2771
- let match;
2772
- while ((match = pathRegex.exec(allContent)) !== null) {
2773
- const path2 = match[1];
2774
- if (path2.includes(":") || path2.includes("*") || routeSet.has(path2)) continue;
2775
- routeSet.add(path2);
2786
+ const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
2787
+ const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
2788
+ if (resp.ok) {
2789
+ const text = await resp.text();
2790
+ extractPaths(text);
2776
2791
  }
2777
2792
  } catch {
2778
2793
  }
2779
- return Array.from(routeSet).map((path2) => `${evalOrigin.replace(/\/$/, "")}${path2}`);
2780
- }, origin);
2794
+ }
2795
+ try {
2796
+ const vueRoutes = await page.evaluate((evalOrigin) => {
2797
+ const routes = [];
2798
+ const win = window;
2799
+ const vueApp = win.__vue_app__;
2800
+ const gp = vueApp?.config?.globalProperties;
2801
+ const router = gp?.$router;
2802
+ const routeList = router?.options?.routes;
2803
+ if (routeList) {
2804
+ for (const r of routeList) {
2805
+ if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
2806
+ routes.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
2807
+ }
2808
+ }
2809
+ }
2810
+ return routes;
2811
+ }, origin);
2812
+ for (const r of vueRoutes) routeSet.add(r);
2813
+ } catch {
2814
+ }
2815
+ return Array.from(routeSet).map(
2816
+ (p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
2817
+ );
2781
2818
  }
2782
2819
  function parseRobotsTxt(text) {
2783
2820
  const rules = [];
@@ -2918,7 +2955,7 @@ var crawlCommand = registerCommand({
2918
2955
  allowSubdomains: z17.boolean().default(false),
2919
2956
  allowExternalLinks: z17.boolean().default(false),
2920
2957
  allowBackwardCrawling: z17.boolean().default(false),
2921
- enableSpa: z17.boolean().default(false).describe("Detect SPA (Vue/React) routes from router config"),
2958
+ enableSpa: z17.boolean().default(true).describe("Disable to skip SPA route detection"),
2922
2959
  format: z17.enum(["markdown", "html"]).default("markdown"),
2923
2960
  onlyMainContent: z17.boolean().default(true),
2924
2961
  concurrency: z17.number().default(3),
package/dist/index.js CHANGED
@@ -2835,22 +2835,59 @@ async function extractLinks(page, origin) {
2835
2835
  }, origin);
2836
2836
  }
2837
2837
  async function detectSpaRoutes(page, origin) {
2838
- return page.evaluate((evalOrigin) => {
2839
- const routeSet = /* @__PURE__ */ new Set();
2838
+ const routeSet = /* @__PURE__ */ new Set();
2839
+ const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2840
+ const isParamRoute = (p) => p.includes(":") || p.includes("*");
2841
+ function extractPaths(source) {
2842
+ let match;
2843
+ while ((match = pathRegex.exec(source)) !== null) {
2844
+ const path5 = match[1];
2845
+ if (!isParamRoute(path5)) routeSet.add(path5);
2846
+ }
2847
+ }
2848
+ const scriptData = await page.evaluate(() => {
2849
+ const scripts = Array.from(document.querySelectorAll("script"));
2850
+ return {
2851
+ inlineContent: scripts.map((s) => s.textContent || "").join("\n"),
2852
+ externalUrls: scripts.map((s) => s.src).filter((src) => src && !src.includes("analytics") && !src.includes("google") && !src.includes("baidu"))
2853
+ };
2854
+ });
2855
+ const { inlineContent, externalUrls } = scriptData;
2856
+ extractPaths(inlineContent);
2857
+ for (const src of externalUrls) {
2840
2858
  try {
2841
- const scripts = document.querySelectorAll("script");
2842
- const allContent = Array.from(scripts).map((s) => s.textContent || "").join("\n");
2843
- const pathRegex = /['"`](\/[a-zA-Z0-9_\-/]+)['"`]/g;
2844
- let match;
2845
- while ((match = pathRegex.exec(allContent)) !== null) {
2846
- const path5 = match[1];
2847
- if (path5.includes(":") || path5.includes("*") || routeSet.has(path5)) continue;
2848
- routeSet.add(path5);
2859
+ const absoluteSrc = src.startsWith("http") ? src : new URL(src, page.url()).href;
2860
+ const resp = await fetch(absoluteSrc, { signal: AbortSignal.timeout(5e3) });
2861
+ if (resp.ok) {
2862
+ const text = await resp.text();
2863
+ extractPaths(text);
2849
2864
  }
2850
2865
  } catch {
2851
2866
  }
2852
- return Array.from(routeSet).map((path5) => `${evalOrigin.replace(/\/$/, "")}${path5}`);
2853
- }, origin);
2867
+ }
2868
+ try {
2869
+ const vueRoutes = await page.evaluate((evalOrigin) => {
2870
+ const routes2 = [];
2871
+ const win = window;
2872
+ const vueApp = win.__vue_app__;
2873
+ const gp = vueApp?.config?.globalProperties;
2874
+ const router = gp?.$router;
2875
+ const routeList = router?.options?.routes;
2876
+ if (routeList) {
2877
+ for (const r of routeList) {
2878
+ if (r.path && !r.path.includes(":") && r.path !== "/" && r.path !== "") {
2879
+ routes2.push(`${evalOrigin.replace(/\/$/, "")}/#${r.path}`);
2880
+ }
2881
+ }
2882
+ }
2883
+ return routes2;
2884
+ }, origin);
2885
+ for (const r of vueRoutes) routeSet.add(r);
2886
+ } catch {
2887
+ }
2888
+ return Array.from(routeSet).map(
2889
+ (p) => p.startsWith("http") ? p : `${origin.replace(/\/$/, "")}${p.startsWith("/") ? "" : "/"}${p}`
2890
+ );
2854
2891
  }
2855
2892
  function parseRobotsTxt(text) {
2856
2893
  const rules = [];
@@ -2991,7 +3028,7 @@ var crawlCommand = registerCommand({
2991
3028
  allowSubdomains: z17.boolean().default(false),
2992
3029
  allowExternalLinks: z17.boolean().default(false),
2993
3030
  allowBackwardCrawling: z17.boolean().default(false),
2994
- enableSpa: z17.boolean().default(false).describe("Detect SPA (Vue/React) routes from router config"),
3031
+ enableSpa: z17.boolean().default(true).describe("Disable to skip SPA route detection"),
2995
3032
  format: z17.enum(["markdown", "html"]).default("markdown"),
2996
3033
  onlyMainContent: z17.boolean().default(true),
2997
3034
  concurrency: z17.number().default(3),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xbrowser/cli",
3
- "version": "1.7.1",
3
+ "version": "1.7.3",
4
4
  "description": "Browser automation CLI for web scraping, headless browsing, SEO analysis, and AI agent workflows. A command-line alternative to Playwright, Puppeteer, and Selenium.",
5
5
  "type": "module",
6
6
  "bin": {