cbrowser 10.7.1 → 10.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -779,6 +779,11 @@ export class CBrowser {
779
779
  // =========================================================================
780
780
  /**
781
781
  * Navigate to a URL.
782
+ *
783
+ * v10.10.0: Uses progressive loading strategy to avoid hangs on SPAs:
784
+ * 1. Try networkidle with short timeout (10s)
785
+ * 2. Fall back to domcontentloaded + stability check
786
+ * 3. Always succeeds if page loads at all
782
787
  */
783
788
  async navigate(url) {
784
789
  // Skip session restore since we're explicitly navigating to a new URL
@@ -797,10 +802,36 @@ export class CBrowser {
797
802
  warnings.push(msg.text());
798
803
  }
799
804
  });
800
- await page.goto(url, {
801
- waitUntil: "networkidle",
802
- timeout: this.config.timeout,
803
- });
805
+ // Progressive loading strategy (v10.10.0)
806
+ // Many SPAs (GitHub, NYT, etc.) never reach networkidle
807
+ const networkIdleTimeout = Math.min(10000, this.config.timeout || 30000);
808
+ try {
809
+ // Try networkidle first with short timeout
810
+ await page.goto(url, {
811
+ waitUntil: "networkidle",
812
+ timeout: networkIdleTimeout,
813
+ });
814
+ }
815
+ catch (e) {
816
+ const error = e;
817
+ if (error.message?.includes("Timeout") || error.message?.includes("timeout")) {
818
+ // Fallback: Use domcontentloaded + manual stability check
819
+ if (this.config.verbose) {
820
+ console.log(`⚠️ networkidle timeout, falling back to domcontentloaded...`);
821
+ }
822
+ // v10.10.0: Fallback to domcontentloaded (rethrows on failure)
823
+ await page.goto(url, {
824
+ waitUntil: "domcontentloaded",
825
+ timeout: this.config.timeout || 30000,
826
+ });
827
+ // Wait for page to stabilize (no major DOM changes for 500ms)
828
+ await this.waitForStability(page, 2000);
829
+ }
830
+ else {
831
+ // Non-timeout error, rethrow
832
+ throw e;
833
+ }
834
+ }
804
835
  const loadTime = Date.now() - startTime;
805
836
  const screenshot = await this.screenshot();
806
837
  return {
@@ -812,6 +843,44 @@ export class CBrowser {
812
843
  loadTime,
813
844
  };
814
845
  }
846
+ /**
847
+ * Wait for page to stabilize (minimal DOM mutations).
848
+ * Used as fallback when networkidle times out on SPAs.
849
+ * @internal
850
+ */
851
+ async waitForStability(page, maxWaitMs = 2000) {
852
+ const checkInterval = 200;
853
+ const minStableChecks = 2; // Require 2 consecutive stable checks
854
+ let stableChecks = 0;
855
+ let elapsed = 0;
856
+ while (elapsed < maxWaitMs && stableChecks < minStableChecks) {
857
+ const mutationCount = await page.evaluate(() => {
858
+ return new Promise((resolve) => {
859
+ let mutations = 0;
860
+ const observer = new MutationObserver((records) => {
861
+ mutations += records.length;
862
+ });
863
+ observer.observe(document.body || document.documentElement, {
864
+ childList: true,
865
+ subtree: true,
866
+ attributes: true,
867
+ });
868
+ setTimeout(() => {
869
+ observer.disconnect();
870
+ resolve(mutations);
871
+ }, 150);
872
+ });
873
+ });
874
+ if (mutationCount < 3) {
875
+ stableChecks++;
876
+ }
877
+ else {
878
+ stableChecks = 0;
879
+ }
880
+ await page.waitForTimeout(checkInterval);
881
+ elapsed += checkInterval + 150;
882
+ }
883
+ }
815
884
  // =========================================================================
816
885
  // Interaction
817
886
  // =========================================================================
@@ -1903,39 +1972,78 @@ export class CBrowser {
1903
1972
  async findAlternativeSelectors(originalSelector) {
1904
1973
  const page = await this.getPage();
1905
1974
  const alternatives = [];
1975
+ // v10.10.0: Tokenize selector for better word-level matching
1976
+ const selectorLower = originalSelector.toLowerCase();
1977
+ const selectorWords = selectorLower
1978
+ .replace(/[^\w\s]/g, " ")
1979
+ .split(/\s+/)
1980
+ .filter(w => w.length > 2 && !["the", "a", "an", "to", "for", "of", "in", "on"].includes(w));
1906
1981
  try {
1907
1982
  // Try to find elements with similar text
1908
1983
  const elements = await page.$$('button, a, [role="button"], input[type="submit"]');
1909
- for (const el of elements.slice(0, 10)) {
1984
+ for (const el of elements.slice(0, 20)) {
1910
1985
  const text = await el.textContent().catch(() => "");
1911
1986
  const ariaLabel = await el.getAttribute("aria-label").catch(() => "");
1912
- const _title = await el.getAttribute("title").catch(() => "");
1987
+ const title = await el.getAttribute("title").catch(() => "");
1913
1988
  const id = await el.getAttribute("id").catch(() => "");
1914
- const _className = await el.getAttribute("class").catch(() => "");
1915
- // Check if text matches original selector
1916
- if (text && originalSelector.toLowerCase().includes(text.toLowerCase().trim().substring(0, 20))) {
1917
- alternatives.push({
1918
- selector: `text="${text.trim()}"`,
1919
- confidence: 0.8,
1920
- reason: `Text match: "${text.trim()}"`,
1921
- });
1989
+ const href = await el.getAttribute("href").catch(() => "");
1990
+ const textLower = (text || "").toLowerCase().trim();
1991
+ const ariaLower = (ariaLabel || "").toLowerCase();
1992
+ // v10.10.0: Bidirectional word-level matching for text
1993
+ if (textLower) {
1994
+ const textWords = textLower.replace(/[^\w\s]/g, " ").split(/\s+/).filter(w => w.length > 2);
1995
+ const matchingWords = selectorWords.filter(sw => textWords.some(tw => tw.includes(sw) || sw.includes(tw)));
1996
+ const matchRatio = selectorWords.length > 0 ? matchingWords.length / selectorWords.length : 0;
1997
+ // Also check if text IS a word in the selector (e.g., "submit" in "submit a story")
1998
+ const exactWordMatch = selectorWords.includes(textLower);
1999
+ if (matchRatio >= 0.5 || exactWordMatch || selectorLower.includes(textLower)) {
2000
+ // v10.10.0: Use plain text (not text="...") since findElement uses getByText
2001
+ const textTrimmed = text?.trim() || "";
2002
+ alternatives.push({
2003
+ selector: textTrimmed,
2004
+ confidence: exactWordMatch ? 0.9 : matchRatio >= 0.7 ? 0.85 : 0.75,
2005
+ reason: `Text match: "${textTrimmed}"`,
2006
+ });
2007
+ }
2008
+ }
2009
+ // Check aria-label with word matching
2010
+ if (ariaLower) {
2011
+ const ariaWords = ariaLower.replace(/[^\w\s]/g, " ").split(/\s+/).filter(w => w.length > 2);
2012
+ const matchingWords = selectorWords.filter(sw => ariaWords.some(aw => aw.includes(sw) || sw.includes(aw)));
2013
+ const matchRatio = selectorWords.length > 0 ? matchingWords.length / selectorWords.length : 0;
2014
+ if (matchRatio >= 0.5 || selectorLower.includes(ariaLower)) {
2015
+ // CSS selector works with findElement's Strategy 7
2016
+ alternatives.push({
2017
+ selector: `css:[aria-label="${ariaLabel}"]`,
2018
+ confidence: 0.9,
2019
+ reason: `Aria-label: "${ariaLabel}"`,
2020
+ });
2021
+ }
1922
2022
  }
1923
- // Check aria-label
1924
- if (ariaLabel && originalSelector.toLowerCase().includes(ariaLabel.toLowerCase().substring(0, 20))) {
2023
+ // Check title attribute
2024
+ if (title && selectorLower.includes(title.toLowerCase())) {
1925
2025
  alternatives.push({
1926
- selector: `[aria-label="${ariaLabel}"]`,
1927
- confidence: 0.9,
1928
- reason: `Aria-label: "${ariaLabel}"`,
2026
+ selector: `css:[title="${title}"]`,
2027
+ confidence: 0.85,
2028
+ reason: `Title: "${title}"`,
1929
2029
  });
1930
2030
  }
1931
2031
  // Check id
1932
- if (id && originalSelector.toLowerCase().includes(id.toLowerCase())) {
2032
+ if (id && selectorLower.includes(id.toLowerCase())) {
1933
2033
  alternatives.push({
1934
- selector: `#${id}`,
2034
+ selector: `css:#${id}`,
1935
2035
  confidence: 0.95,
1936
2036
  reason: `ID match: #${id}`,
1937
2037
  });
1938
2038
  }
2039
+ // v10.10.0: Check href for link selectors
2040
+ if (href && selectorWords.some(w => href.toLowerCase().includes(w))) {
2041
+ alternatives.push({
2042
+ selector: `css:a[href*="${href.slice(0, 50)}"]`,
2043
+ confidence: 0.7,
2044
+ reason: `Href match: ${href.slice(0, 30)}...`,
2045
+ });
2046
+ }
1939
2047
  }
1940
2048
  // Sort by confidence
1941
2049
  alternatives.sort((a, b) => b.confidence - a.confidence);