mx-cloud 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -360,18 +360,169 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
360
360
  window.scrapeList = function (_a) {
361
361
  return __awaiter(this, arguments, void 0, function* ({ listSelector, fields, limit = 10 }) {
362
362
  // XPath evaluation functions
363
- const evaluateXPath = (rootElement, xpath) => {
363
+ const queryInsideContext = (context, part) => {
364
364
  try {
365
- const ownerDoc = rootElement.nodeType === Node.DOCUMENT_NODE
366
- ? rootElement
367
- : rootElement.ownerDocument;
368
- if (!ownerDoc)
369
- return null;
370
- const result = ownerDoc.evaluate(xpath, rootElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
371
- return result.singleNodeValue;
365
+ const { tagName, conditions } = parseXPathPart(part);
366
+ const candidateElements = Array.from(context.querySelectorAll(tagName));
367
+ if (candidateElements.length === 0) {
368
+ return [];
369
+ }
370
+ const matchingElements = candidateElements.filter((el) => {
371
+ return elementMatchesConditions(el, conditions);
372
+ });
373
+ return matchingElements;
372
374
  }
373
- catch (error) {
374
- console.warn("XPath evaluation failed:", xpath, error);
375
+ catch (err) {
376
+ console.error("Error in queryInsideContext:", err);
377
+ return [];
378
+ }
379
+ };
380
+ // Helper function to parse XPath part
381
+ const parseXPathPart = (part) => {
382
+ const tagMatch = part.match(/^([a-zA-Z0-9-]+)/);
383
+ const tagName = tagMatch ? tagMatch[1] : "*";
384
+ const conditionMatches = part.match(/\[([^\]]+)\]/g);
385
+ const conditions = conditionMatches
386
+ ? conditionMatches.map((c) => c.slice(1, -1))
387
+ : [];
388
+ return { tagName, conditions };
389
+ };
390
+ // Helper function to check if element matches all conditions
391
+ const elementMatchesConditions = (element, conditions) => {
392
+ for (const condition of conditions) {
393
+ if (!elementMatchesCondition(element, condition)) {
394
+ return false;
395
+ }
396
+ }
397
+ return true;
398
+ };
399
+ // Helper function to check if element matches a single condition
400
+ const elementMatchesCondition = (element, condition) => {
401
+ var _a, _b;
402
+ condition = condition.trim();
403
+ if (/^\d+$/.test(condition)) {
404
+ return true;
405
+ }
406
+ // Handle @attribute="value"
407
+ const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/);
408
+ if (attrMatch) {
409
+ const [, attr, value] = attrMatch;
410
+ const elementValue = element.getAttribute(attr);
411
+ return elementValue === value;
412
+ }
413
+ // Handle contains(@class, 'value')
414
+ const classContainsMatch = condition.match(/^contains\(@class,\s*["']([^"']+)["']\)$/);
415
+ if (classContainsMatch) {
416
+ const className = classContainsMatch[1];
417
+ return element.classList.contains(className);
418
+ }
419
+ // Handle contains(@attribute, 'value')
420
+ const attrContainsMatch = condition.match(/^contains\(@([^,]+),\s*["']([^"']+)["']\)$/);
421
+ if (attrContainsMatch) {
422
+ const [, attr, value] = attrContainsMatch;
423
+ const elementValue = element.getAttribute(attr) || "";
424
+ return elementValue.includes(value);
425
+ }
426
+ // Handle text()="value"
427
+ const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/);
428
+ if (textMatch) {
429
+ const expectedText = textMatch[1];
430
+ const elementText = ((_a = element.textContent) === null || _a === void 0 ? void 0 : _a.trim()) || "";
431
+ return elementText === expectedText;
432
+ }
433
+ // Handle contains(text(), 'value')
434
+ const textContainsMatch = condition.match(/^contains\(text\(\),\s*["']([^"']+)["']\)$/);
435
+ if (textContainsMatch) {
436
+ const expectedText = textContainsMatch[1];
437
+ const elementText = ((_b = element.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || "";
438
+ return elementText.includes(expectedText);
439
+ }
440
+ // Handle count(*)=0 (element has no children)
441
+ if (condition === "count(*)=0") {
442
+ return element.children.length === 0;
443
+ }
444
+ // Handle other count conditions
445
+ const countMatch = condition.match(/^count\(\*\)=(\d+)$/);
446
+ if (countMatch) {
447
+ const expectedCount = parseInt(countMatch[1]);
448
+ return element.children.length === expectedCount;
449
+ }
450
+ return true;
451
+ };
452
+ const evaluateXPath = (document, xpath, isShadow = false) => {
453
+ try {
454
+ const result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
455
+ if (!isShadow) {
456
+ if (result === null) {
457
+ return null;
458
+ }
459
+ return result;
460
+ }
461
+ let cleanPath = xpath;
462
+ let isIndexed = false;
463
+ const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/);
464
+ if (indexedMatch) {
465
+ cleanPath = indexedMatch[1] + indexedMatch[3];
466
+ isIndexed = true;
467
+ }
468
+ const pathParts = cleanPath
469
+ .replace(/^\/\//, "")
470
+ .split("/")
471
+ .map((p) => p.trim())
472
+ .filter((p) => p.length > 0);
473
+ let currentContexts = [document];
474
+ for (let i = 0; i < pathParts.length; i++) {
475
+ const part = pathParts[i];
476
+ const nextContexts = [];
477
+ for (const ctx of currentContexts) {
478
+ const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/);
479
+ let partWithoutPosition = part;
480
+ let requestedPosition = null;
481
+ if (positionalMatch) {
482
+ partWithoutPosition = positionalMatch[1];
483
+ requestedPosition = parseInt(positionalMatch[2]);
484
+ }
485
+ const matched = queryInsideContext(ctx, partWithoutPosition);
486
+ let elementsToAdd = matched;
487
+ if (requestedPosition !== null) {
488
+ const index = requestedPosition - 1; // XPath is 1-based, arrays are 0-based
489
+ if (index >= 0 && index < matched.length) {
490
+ elementsToAdd = [matched[index]];
491
+ }
492
+ else {
493
+ console.warn(`Position ${requestedPosition} out of range (${matched.length} elements found)`);
494
+ elementsToAdd = [];
495
+ }
496
+ }
497
+ elementsToAdd.forEach((el) => {
498
+ nextContexts.push(el);
499
+ if (el.shadowRoot) {
500
+ nextContexts.push(el.shadowRoot);
501
+ }
502
+ });
503
+ }
504
+ if (nextContexts.length === 0) {
505
+ return null;
506
+ }
507
+ currentContexts = nextContexts;
508
+ }
509
+ if (currentContexts.length > 0) {
510
+ if (isIndexed && indexedMatch) {
511
+ const requestedIndex = parseInt(indexedMatch[2]) - 1;
512
+ if (requestedIndex >= 0 && requestedIndex < currentContexts.length) {
513
+ return currentContexts[requestedIndex];
514
+ }
515
+ else {
516
+ console.warn(`Requested index ${requestedIndex + 1} out of range (${currentContexts.length} elements found)`);
517
+ return null;
518
+ }
519
+ }
520
+ return currentContexts[0];
521
+ }
522
+ return null;
523
+ }
524
+ catch (err) {
525
+ console.error("Critical XPath failure:", xpath, err);
375
526
  return null;
376
527
  }
377
528
  };
@@ -834,7 +985,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
834
985
  if (isXPathSelector(field.selector)) {
835
986
  // Create indexed absolute XPath
836
987
  const indexedSelector = createIndexedXPath(field.selector, listSelector, containerIndex + 1);
837
- element = evaluateXPath(document, indexedSelector);
988
+ element = evaluateXPath(document, indexedSelector, field.isShadow);
838
989
  }
839
990
  else {
840
991
  // Fallback for CSS selectors within XPath containers
@@ -11,12 +11,14 @@ declare global {
11
11
  selector: string;
12
12
  tag: string;
13
13
  attribute: string;
14
+ listFallbackSelector?: string;
14
15
  }>) => Record<string, any>;
15
16
  scrapeList: (config: {
16
17
  listSelector: string;
17
18
  fields: any;
18
19
  limit?: number;
19
20
  pagination: any;
21
+ listFallbackSelector?: string;
20
22
  }) => Record<string, any>[];
21
23
  scrapeListAuto: (listSelector: string) => {
22
24
  selector: string;
@@ -95,6 +97,44 @@ export default class Interpreter extends EventEmitter {
95
97
  private generatePageNodeInformation;
96
98
  private detectElementChanges;
97
99
  private validateWorkflowAction;
100
+ /**
101
+ * Test if a selector is working on the current page
102
+ * @param {Page} page - Playwright page object
103
+ * @param {string} selector - Selector to test
104
+ * @param {boolean} isListSelector - Whether this should find multiple elements
105
+ * @returns {Promise<boolean>} - Whether the selector works
106
+ */
107
+ private testSelectorWorks;
108
+ /**
109
+ * Generate new selector from fallback selector
110
+ * @param {Page} page - Playwright page object
111
+ * @param {string} fallbackSelector - Fallback selector to use
112
+ * @param {boolean} isListSelector - Whether this is a list selector
113
+ * @param {string} listContext - List selector context for field selectors
114
+ * @returns {Promise<string|null>} - New selector or null if failed
115
+ */
116
+ private generateSelectorFromFallback;
117
+ /**
118
+ * Validate and fix scrapeList action selectors
119
+ * @param {Object} scrapeListConfig - ScrapeList configuration object
120
+ * @param {Page} page - Playwright page object
121
+ * @returns {Promise<boolean>} - Whether any changes were made
122
+ */
123
+ private validateScrapeListAction;
124
+ /**
125
+ * Validate and fix scrapeSchema action selectors
126
+ * @param {Object} scrapeSchemaConfig - ScrapeSchema configuration object
127
+ * @param {Page} page - Playwright page object
128
+ * @returns {Promise<boolean>} - Whether any changes were made
129
+ */
130
+ private validateScrapeSchemaAction;
131
+ /**
132
+ * Validate and fix selectors for a workflow action just before execution
133
+ * @param {Page} page - Playwright page object
134
+ * @param {WhereWhatPair} action - The action to validate
135
+ * @returns {Promise<WhereWhatPair>} - The potentially modified action
136
+ */
137
+ private validateAndFixSelectors;
98
138
  private runLoop;
99
139
  private ensureScriptsLoaded;
100
140
  /**
@@ -559,7 +559,43 @@ class Interpreter extends events_1.EventEmitter {
559
559
  }
560
560
  return false;
561
561
  };
562
- // Enhanced button finder with retry mechanism
562
+ // Helper function to detect if a selector is XPath
563
+ const isXPathSelector = (selector) => {
564
+ return selector.startsWith('//') ||
565
+ selector.startsWith('/') ||
566
+ selector.startsWith('./') ||
567
+ selector.includes('contains(@') ||
568
+ selector.includes('[count(') ||
569
+ selector.includes('@class=') ||
570
+ selector.includes('@id=') ||
571
+ selector.includes(' and ') ||
572
+ selector.includes(' or ');
573
+ };
574
+ // Helper function to wait for selector (CSS or XPath)
575
+ const waitForSelectorUniversal = (selector_2, ...args_1) => __awaiter(this, [selector_2, ...args_1], void 0, function* (selector, options = {}) {
576
+ try {
577
+ if (isXPathSelector(selector)) {
578
+ // Use XPath locator
579
+ const locator = page.locator(`xpath=${selector}`);
580
+ yield locator.waitFor({
581
+ state: 'attached',
582
+ timeout: options.timeout || 10000
583
+ });
584
+ return yield locator.elementHandle();
585
+ }
586
+ else {
587
+ // Use CSS selector
588
+ return yield page.waitForSelector(selector, {
589
+ state: 'attached',
590
+ timeout: options.timeout || 10000
591
+ });
592
+ }
593
+ }
594
+ catch (error) {
595
+ return null;
596
+ }
597
+ });
598
+ // Enhanced button finder with retry mechanism for both CSS and XPath selectors
563
599
  const findWorkingButton = (selectors) => __awaiter(this, void 0, void 0, function* () {
564
600
  let updatedSelectors = [...selectors];
565
601
  for (let i = 0; i < selectors.length; i++) {
@@ -568,10 +604,7 @@ class Interpreter extends events_1.EventEmitter {
568
604
  let selectorSuccess = false;
569
605
  while (retryCount < MAX_RETRIES && !selectorSuccess) {
570
606
  try {
571
- const button = yield page.waitForSelector(selector, {
572
- state: 'attached',
573
- timeout: 10000
574
- });
607
+ const button = yield waitForSelectorUniversal(selector, { timeout: 10000 });
575
608
  if (button) {
576
609
  debugLog('Found working selector:', selector);
577
610
  return {
@@ -829,9 +862,9 @@ class Interpreter extends events_1.EventEmitter {
829
862
  if (checkLimit())
830
863
  return allResults;
831
864
  let loadMoreCounter = 0;
832
- let previousResultCount = allResults.length;
833
- let noNewItemsCounter = 0;
834
- const MAX_NO_NEW_ITEMS = 2;
865
+ // let previousResultCount = allResults.length;
866
+ // let noNewItemsCounter = 0;
867
+ // const MAX_NO_NEW_ITEMS = 2;
835
868
  while (true) {
836
869
  // Find working button with retry mechanism
837
870
  const { button: loadMoreButton, workingSelector, updatedSelectors } = yield findWorkingButton(availableSelectors);
@@ -888,20 +921,19 @@ class Interpreter extends events_1.EventEmitter {
888
921
  const heightChanged = currentHeight !== previousHeight;
889
922
  previousHeight = currentHeight;
890
923
  yield scrapeCurrentPage();
891
- const currentResultCount = allResults.length;
892
- const newItemsAdded = currentResultCount > previousResultCount;
893
- if (!newItemsAdded) {
894
- noNewItemsCounter++;
895
- debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`);
896
- if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) {
897
- debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`);
898
- return allResults;
899
- }
900
- }
901
- else {
902
- noNewItemsCounter = 0;
903
- previousResultCount = currentResultCount;
904
- }
924
+ // const currentResultCount = allResults.length;
925
+ // const newItemsAdded = currentResultCount > previousResultCount;
926
+ // if (!newItemsAdded) {
927
+ // noNewItemsCounter++;
928
+ // debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`);
929
+ // if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) {
930
+ // debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`);
931
+ // return allResults;
932
+ // }
933
+ // } else {
934
+ // noNewItemsCounter = 0;
935
+ // previousResultCount = currentResultCount;
936
+ // }
905
937
  if (checkLimit())
906
938
  return allResults;
907
939
  if (!heightChanged) {
@@ -1564,6 +1596,266 @@ class Interpreter extends events_1.EventEmitter {
1564
1596
  return modifiedAction;
1565
1597
  });
1566
1598
  }
1599
+ /**
1600
+ * Test if a selector is working on the current page
1601
+ * @param {Page} page - Playwright page object
1602
+ * @param {string} selector - Selector to test
1603
+ * @param {boolean} isListSelector - Whether this should find multiple elements
1604
+ * @returns {Promise<boolean>} - Whether the selector works
1605
+ */
1606
+ testSelectorWorks(page_1, selector_2) {
1607
+ return __awaiter(this, arguments, void 0, function* (page, selector, isListSelector = false) {
1608
+ try {
1609
+ if (!selector || selector.trim() === '') {
1610
+ return false;
1611
+ }
1612
+ const isXPath = selector.startsWith('//') ||
1613
+ selector.startsWith('/') ||
1614
+ selector.includes('contains(@') ||
1615
+ selector.includes('@class=') ||
1616
+ selector.includes('@id=');
1617
+ let count = 0;
1618
+ if (isXPath) {
1619
+ const locator = page.locator(`xpath=${selector}`);
1620
+ count = yield locator.count();
1621
+ }
1622
+ else {
1623
+ const elements = yield page.$$(selector);
1624
+ count = elements ? elements.length : 0;
1625
+ }
1626
+ // For list selectors, we need multiple elements
1627
+ if (isListSelector) {
1628
+ return count >= 2;
1629
+ }
1630
+ // For field selectors, we need at least one element
1631
+ return count >= 1;
1632
+ }
1633
+ catch (error) {
1634
+ return false;
1635
+ }
1636
+ });
1637
+ }
1638
+ /**
1639
+ * Generate new selector from fallback selector
1640
+ * @param {Page} page - Playwright page object
1641
+ * @param {string} fallbackSelector - Fallback selector to use
1642
+ * @param {boolean} isListSelector - Whether this is a list selector
1643
+ * @param {string} listContext - List selector context for field selectors
1644
+ * @returns {Promise<string|null>} - New selector or null if failed
1645
+ */
1646
+ generateSelectorFromFallback(page_1, fallbackSelector_1) {
1647
+ return __awaiter(this, arguments, void 0, function* (page, fallbackSelector, isListSelector = false, listContext = '', isPagination = false) {
1648
+ var _a, _b;
1649
+ try {
1650
+ // First check if fallback selector works
1651
+ const fallbackWorks = yield this.testSelectorWorks(page, fallbackSelector, isListSelector);
1652
+ if (!fallbackWorks) {
1653
+ return null;
1654
+ }
1655
+ // Get element using fallback selector
1656
+ const isXPath = fallbackSelector.startsWith('//') ||
1657
+ fallbackSelector.startsWith('/') ||
1658
+ fallbackSelector.includes('contains(@');
1659
+ let element;
1660
+ if (isXPath) {
1661
+ element = yield page.locator(`xpath=${fallbackSelector}`).first().elementHandle();
1662
+ }
1663
+ else {
1664
+ element = yield page.$(fallbackSelector);
1665
+ }
1666
+ if (!element) {
1667
+ return null;
1668
+ }
1669
+ // Generate new selectors
1670
+ let newSelectors;
1671
+ if (isListSelector) {
1672
+ return yield (0, selector_1.generateListSelectorFromFallback)(page, fallbackSelector);
1673
+ }
1674
+ else if (listContext) {
1675
+ return yield (0, selector_1.generateListFieldSelectorFromFallback)(page, fallbackSelector, listContext);
1676
+ }
1677
+ else {
1678
+ newSelectors = yield (0, selector_1.generateFieldSelectorFromFallback)(page, fallbackSelector);
1679
+ if (isPagination) {
1680
+ // For pagination, chain selectors in priority order
1681
+ let chainedSelectors = [
1682
+ (_a = newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.iframeSelector) === null || _a === void 0 ? void 0 : _a.full,
1683
+ (_b = newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.shadowSelector) === null || _b === void 0 ? void 0 : _b.full,
1684
+ newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.testIdSelector,
1685
+ newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.id,
1686
+ newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.hrefSelector,
1687
+ newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.accessibilitySelector,
1688
+ newSelectors === null || newSelectors === void 0 ? void 0 : newSelectors.attrSelector,
1689
+ ]
1690
+ .filter(selector => selector !== null && selector !== undefined)
1691
+ .join(',');
1692
+ return chainedSelectors;
1693
+ }
1694
+ else {
1695
+ // For non-pagination, use getBestSelector
1696
+ const tagName = yield element.evaluate(el => el.tagName.toLowerCase());
1697
+ return yield (0, utils_1.getBestSelector)({
1698
+ selectors: newSelectors,
1699
+ tagName: tagName
1700
+ });
1701
+ }
1702
+ }
1703
+ }
1704
+ catch (error) {
1705
+ console.error(`Failed to generate selector from fallback: ${error.message}`);
1706
+ return null;
1707
+ }
1708
+ });
1709
+ }
1710
+ /**
1711
+ * Validate and fix scrapeList action selectors
1712
+ * @param {Object} scrapeListConfig - ScrapeList configuration object
1713
+ * @param {Page} page - Playwright page object
1714
+ * @returns {Promise<boolean>} - Whether any changes were made
1715
+ */
1716
+ validateScrapeListAction(scrapeListConfig, page) {
1717
+ return __awaiter(this, void 0, void 0, function* () {
1718
+ let hasChanges = false;
1719
+ try {
1720
+ // Validate listSelector
1721
+ const listSelectorWorks = yield this.testSelectorWorks(page, scrapeListConfig.listSelector, true);
1722
+ if (!listSelectorWorks && scrapeListConfig.listFallbackSelector) {
1723
+ console.log(`ListSelector "${scrapeListConfig.listSelector}" not working, trying fallback...`);
1724
+ const newListSelector = yield this.generateSelectorFromFallback(page, scrapeListConfig.listFallbackSelector, true);
1725
+ if (newListSelector) {
1726
+ console.log(`Updated listSelector: ${scrapeListConfig.listSelector} -> ${newListSelector}`);
1727
+ scrapeListConfig.listSelector = newListSelector;
1728
+ hasChanges = true;
1729
+ }
1730
+ }
1731
+ // Validate field selectors
1732
+ if (scrapeListConfig.fields) {
1733
+ for (const [fieldName, fieldConfig] of Object.entries(scrapeListConfig.fields)) {
1734
+ const fieldSelectorWorks = yield this.testSelectorWorks(page, fieldConfig.selector, false);
1735
+ if (!fieldSelectorWorks && fieldConfig.fallbackSelector) {
1736
+ console.log(`Field selector "${fieldConfig.selector}" for ${fieldName} not working, trying fallback...`);
1737
+ const newFieldSelector = yield this.generateSelectorFromFallback(page, fieldConfig.fallbackSelector, false, scrapeListConfig.listSelector);
1738
+ if (newFieldSelector) {
1739
+ console.log(`Updated field selector for ${fieldName}: ${fieldConfig.selector} -> ${newFieldSelector}`);
1740
+ fieldConfig.selector = newFieldSelector;
1741
+ hasChanges = true;
1742
+ }
1743
+ }
1744
+ }
1745
+ }
1746
+ // Validate pagination selector if it exists and is not empty
1747
+ if (scrapeListConfig.pagination &&
1748
+ scrapeListConfig.pagination.selector &&
1749
+ scrapeListConfig.pagination.selector.trim() !== '') {
1750
+ // Handle comma-separated pagination selectors
1751
+ const paginationSelectors = scrapeListConfig.pagination.selector.split(',').map(s => s.trim());
1752
+ let workingSelector = null;
1753
+ for (const selector of paginationSelectors) {
1754
+ const works = yield this.testSelectorWorks(page, selector, false);
1755
+ if (works) {
1756
+ workingSelector = selector;
1757
+ break;
1758
+ }
1759
+ }
1760
+ if (!workingSelector && scrapeListConfig.pagination.fallbackSelector) {
1761
+ console.log(`Pagination selector not working, trying fallback...`);
1762
+ const newPaginationSelector = yield this.generateSelectorFromFallback(page, scrapeListConfig.pagination.fallbackSelector, false, '', true);
1763
+ if (newPaginationSelector) {
1764
+ console.log(`Updated pagination selector: ${scrapeListConfig.pagination.selector} -> ${newPaginationSelector}`);
1765
+ scrapeListConfig.pagination.selector = newPaginationSelector;
1766
+ hasChanges = true;
1767
+ }
1768
+ }
1769
+ else if (workingSelector && workingSelector !== scrapeListConfig.pagination.selector) {
1770
+ scrapeListConfig.pagination.selector = workingSelector;
1771
+ hasChanges = true;
1772
+ }
1773
+ }
1774
+ }
1775
+ catch (error) {
1776
+ console.error(`Error validating scrapeList action: ${error.message}`);
1777
+ }
1778
+ return hasChanges;
1779
+ });
1780
+ }
1781
+ /**
1782
+ * Validate and fix scrapeSchema action selectors
1783
+ * @param {Object} scrapeSchemaConfig - ScrapeSchema configuration object
1784
+ * @param {Page} page - Playwright page object
1785
+ * @returns {Promise<boolean>} - Whether any changes were made
1786
+ */
1787
+ validateScrapeSchemaAction(scrapeSchemaConfig, page) {
1788
+ return __awaiter(this, void 0, void 0, function* () {
1789
+ let hasChanges = false;
1790
+ try {
1791
+ for (const [fieldName, fieldConfig] of Object.entries(scrapeSchemaConfig)) {
1792
+ if (fieldConfig.selector) {
1793
+ const selectorWorks = yield this.testSelectorWorks(page, fieldConfig.selector, false);
1794
+ if (!selectorWorks && fieldConfig.fallbackSelector) {
1795
+ console.log(`Schema field selector "${fieldConfig.selector}" for ${fieldName} not working, trying fallback...`);
1796
+ const newSelector = yield this.generateSelectorFromFallback(page, fieldConfig.fallbackSelector, false);
1797
+ if (newSelector) {
1798
+ console.log(`Updated schema field selector for ${fieldName}: ${fieldConfig.selector} -> ${newSelector}`);
1799
+ fieldConfig.selector = newSelector;
1800
+ hasChanges = true;
1801
+ }
1802
+ }
1803
+ }
1804
+ }
1805
+ }
1806
+ catch (error) {
1807
+ console.error(`Error validating scrapeSchema action: ${error.message}`);
1808
+ }
1809
+ return hasChanges;
1810
+ });
1811
+ }
1812
+ /**
1813
+ * Validate and fix selectors for a workflow action just before execution
1814
+ * @param {Page} page - Playwright page object
1815
+ * @param {WhereWhatPair} action - The action to validate
1816
+ * @returns {Promise<WhereWhatPair>} - The potentially modified action
1817
+ */
1818
+ validateAndFixSelectors(page, action) {
1819
+ return __awaiter(this, void 0, void 0, function* () {
1820
+ const modifiedAction = JSON.parse(JSON.stringify(action));
1821
+ let totalChanges = 0;
1822
+ try {
1823
+ // Process each action in the 'what' array
1824
+ for (let i = 0; i < modifiedAction.what.length; i++) {
1825
+ const whatAction = modifiedAction.what[i];
1826
+ // Handle scrapeList actions
1827
+ if (whatAction.action === 'scrapeList' && whatAction.args && whatAction.args[0]) {
1828
+ console.log(`Validating scrapeList action...`);
1829
+ const hasChanges = yield this.validateScrapeListAction(whatAction.args[0], page);
1830
+ if (hasChanges) {
1831
+ totalChanges++;
1832
+ console.log(`Fixed scrapeList selectors`);
1833
+ }
1834
+ }
1835
+ // Handle scrapeSchema actions
1836
+ if (whatAction.action === 'scrapeSchema' && whatAction.args && whatAction.args[0]) {
1837
+ console.log(`Validating scrapeSchema action...`);
1838
+ const hasChanges = yield this.validateScrapeSchemaAction(whatAction.args[0], page);
1839
+ if (hasChanges) {
1840
+ totalChanges++;
1841
+ console.log(`Fixed scrapeSchema selectors`);
1842
+ }
1843
+ }
1844
+ }
1845
+ if (totalChanges > 0) {
1846
+ console.log(`Selector validation completed: ${totalChanges} actions modified`);
1847
+ }
1848
+ else {
1849
+ console.log(`Selector validation completed: No changes needed`);
1850
+ }
1851
+ }
1852
+ catch (error) {
1853
+ console.error(`Error in selector validation: ${error.message}`);
1854
+ this.trackAutohealFailure(`Selector validation failed: ${error.message}`);
1855
+ }
1856
+ return modifiedAction;
1857
+ });
1858
+ }
1567
1859
  runLoop(p, workflow) {
1568
1860
  return __awaiter(this, void 0, void 0, function* () {
1569
1861
  var _a, _b;
@@ -1661,8 +1953,9 @@ class Interpreter extends events_1.EventEmitter {
1661
1953
  }
1662
1954
  lastAction = action;
1663
1955
  try {
1664
- console.log("Carrying out:", action.what);
1665
- yield this.carryOutSteps(p, action.what);
1956
+ const validatedAction = yield this.validateAndFixSelectors(p, action);
1957
+ console.log("Carrying out:", validatedAction.what);
1958
+ yield this.carryOutSteps(p, validatedAction.what);
1666
1959
  usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
1667
1960
  workflowCopy.splice(actionId, 1);
1668
1961
  console.log(`Action with ID ${action.id} removed from the workflow copy.`);
@@ -47,7 +47,7 @@ class Preprocessor {
47
47
  */
48
48
  static getParams(workflow) {
49
49
  const getParamsRecurse = (object) => {
50
- if (typeof object === 'object') {
50
+ if (typeof object === 'object' && object !== null) {
51
51
  // Recursion base case
52
52
  if (object.$param) {
53
53
  return [object.$param];
@@ -123,13 +123,26 @@ class Preprocessor {
123
123
  const out = object;
124
124
  // for every key (child) of the object
125
125
  Object.keys(object).forEach((key) => {
126
- // if the field has only one key, which is `k`
127
- if (Object.keys(object[key]).length === 1 && object[key][k]) {
128
- // process the current special tag (init param, hydrate regex...)
129
- out[key] = f(object[key][k]);
126
+ const childValue = object[key];
127
+ // Skip if childValue is null, undefined, or not an object
128
+ if (!childValue || typeof childValue !== 'object') {
129
+ return; // Continue to next iteration
130
130
  }
131
- else {
132
- initSpecialRecurse(object[key], k, f);
131
+ try {
132
+ const childKeys = Object.keys(childValue);
133
+ // if the field has only one key, which is `k`
134
+ if (childKeys.length === 1 && childValue[k]) {
135
+ // process the current special tag (init param, hydrate regex...)
136
+ out[key] = f(childValue[k]);
137
+ }
138
+ else {
139
+ // Recursively process the child object
140
+ initSpecialRecurse(childValue, k, f);
141
+ }
142
+ }
143
+ catch (error) {
144
+ // If Object.keys fails or any other error, just continue
145
+ console.warn(`Error processing key "${key}" in initSpecialRecurse:`, error);
133
146
  }
134
147
  });
135
148
  return out;
@@ -29,4 +29,31 @@ interface SelectorResult {
29
29
  * @returns {Promise<Selectors|null|undefined>}
30
30
  */
31
31
  export declare const generateNonUniqueSelectors: (page: Page, elementHandle: ElementHandle, listSelector?: string) => Promise<SelectorResult>;
32
+ /**
33
+ * Generate new list selector from fallback element (based on your reference implementation)
34
+ * @param page - Playwright page object
35
+ * @param fallbackSelector - Fallback selector to use
36
+ * @returns New list selector or null if failed
37
+ */
38
+ export declare const generateListSelectorFromFallback: (page: Page, fallbackSelector: string) => Promise<string | null>;
39
+ /**
40
+ * Generate new field selector from fallback selector (one field at a time)
41
+ * @param page - Playwright page object
42
+ * @param fallbackSelector - Fallback selector to use
43
+ * @param listSelector - The list selector context
44
+ * @returns New field selector or null if failed
45
+ */
46
+ export declare const generateListFieldSelectorFromFallback: (page: Page, fallbackSelector: string, listSelector: string) => Promise<string | null>;
47
+ export declare const generateFieldSelectorFromFallback: (page: Page, fallbackSelector: string) => Promise<{
48
+ id: string | null;
49
+ generalSelector: string | null;
50
+ attrSelector: string | null;
51
+ testIdSelector: string | null;
52
+ text: string;
53
+ href?: string;
54
+ hrefSelector: string | null;
55
+ accessibilitySelector: string | null;
56
+ formSelector: string | null;
57
+ relSelector: string | null;
58
+ } | null>;
32
59
  export {};
package/build/selector.js CHANGED
@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  });
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.generateNonUniqueSelectors = exports.generateSelectors = void 0;
12
+ exports.generateFieldSelectorFromFallback = exports.generateListFieldSelectorFromFallback = exports.generateListSelectorFromFallback = exports.generateNonUniqueSelectors = exports.generateSelectors = void 0;
13
13
  const generateSelectors = (page, elementHandle) => __awaiter(void 0, void 0, void 0, function* () {
14
14
  try {
15
15
  const selectors = yield elementHandle.evaluate((element) => {
@@ -848,3 +848,487 @@ const generateNonUniqueSelectors = (page_1, elementHandle_1, ...args_1) => __awa
848
848
  }
849
849
  });
850
850
  exports.generateNonUniqueSelectors = generateNonUniqueSelectors;
851
+ /**
852
+ * Generate new list selector from fallback element (based on your reference implementation)
853
+ * @param page - Playwright page object
854
+ * @param fallbackSelector - Fallback selector to use
855
+ * @returns New list selector or null if failed
856
+ */
857
+ const generateListSelectorFromFallback = (page, fallbackSelector) => __awaiter(void 0, void 0, void 0, function* () {
858
+ try {
859
+ // Execute selector generation within the page context
860
+ const newSelector = yield page.evaluate((selector) => {
861
+ try {
862
+ // Check if selector is XPath
863
+ const isXPath = selector.startsWith('//') ||
864
+ selector.startsWith('/') ||
865
+ selector.includes('contains(@');
866
+ let elements;
867
+ if (isXPath) {
868
+ // Use XPath evaluation
869
+ const xpathResult = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
870
+ elements = [];
871
+ let node = xpathResult.iterateNext();
872
+ while (node && elements.length < 5) { // Limit to 5 elements for performance
873
+ if (node.nodeType === Node.ELEMENT_NODE) {
874
+ elements.push(node);
875
+ }
876
+ node = xpathResult.iterateNext();
877
+ }
878
+ }
879
+ else {
880
+ // Use CSS selector
881
+ const nodeList = document.querySelectorAll(selector);
882
+ elements = Array.from(nodeList).slice(0, 5); // Limit to 5 elements
883
+ }
884
+ if (!elements || elements.length === 0) {
885
+ return null;
886
+ }
887
+ // Extract element data for analysis
888
+ const elementData = elements.map((el) => ({
889
+ tagName: el.tagName.toLowerCase(),
890
+ className: el.getAttribute('class') || '',
891
+ attributes: Array.from(el.attributes).reduce((attrs, attr) => {
892
+ if (!['id', 'style', 'data-mx-id'].includes(attr.name)) {
893
+ attrs[attr.name] = attr.value;
894
+ }
895
+ return attrs;
896
+ }, {}),
897
+ childrenCount: el.children.length
898
+ }));
899
+ if (elementData.length === 0) {
900
+ return null;
901
+ }
902
+ const firstElement = elementData[0];
903
+ const tagName = firstElement.tagName;
904
+ // Check if all elements have the same tag name
905
+ const allSameTag = elementData.every((el) => el.tagName === tagName);
906
+ if (!allSameTag) {
907
+ console.warn("Inconsistent tag names in group, using first element's tag");
908
+ }
909
+ // Start building XPath - ALWAYS generate primary XPath
910
+ let xpath = `//${tagName}`;
911
+ const predicates = [];
912
+ // Get common classes
913
+ const allClasses = elementData.map((el) => el.className.split(/\s+/).filter(Boolean));
914
+ if (allClasses.length > 0 && allClasses[0].length > 0) {
915
+ // Find classes that appear in most elements (at least 60%)
916
+ const classFrequency = new Map();
917
+ allClasses.forEach((classes) => {
918
+ classes.forEach((cls) => {
919
+ classFrequency.set(cls, (classFrequency.get(cls) || 0) + 1);
920
+ });
921
+ });
922
+ const minFrequency = Math.ceil(allClasses.length * 0.6);
923
+ const commonClasses = Array.from(classFrequency.entries())
924
+ .filter(([_, count]) => count >= minFrequency)
925
+ .map(([cls, _]) => cls);
926
+ if (commonClasses.length > 0) {
927
+ predicates.push(...commonClasses.map((cls) => `contains(@class, '${cls}')`));
928
+ }
929
+ }
930
+ // Get common attributes (excluding id, style, data-mx-id)
931
+ if (elementData.length > 1) {
932
+ const commonAttributes = {};
933
+ const firstAttrs = firstElement.attributes;
934
+ for (const [attr, value] of Object.entries(firstAttrs)) {
935
+ const isCommon = elementData.every((el) => el.attributes[attr] === value);
936
+ if (isCommon) {
937
+ commonAttributes[attr] = value;
938
+ }
939
+ }
940
+ for (const [attr, value] of Object.entries(commonAttributes)) {
941
+ predicates.push(`@${attr}='${value}'`);
942
+ }
943
+ }
944
+ // Optional: Common child count (only if consistent across most elements)
945
+ const childCountFrequency = new Map();
946
+ elementData.forEach((el) => {
947
+ childCountFrequency.set(el.childrenCount, (childCountFrequency.get(el.childrenCount) || 0) + 1);
948
+ });
949
+ const mostCommonChildCount = Array.from(childCountFrequency.entries())
950
+ .sort((a, b) => b[1] - a[1])[0];
951
+ if (mostCommonChildCount && mostCommonChildCount[1] >= Math.ceil(elementData.length * 0.8)) {
952
+ predicates.push(`count(*)=${mostCommonChildCount[0]}`);
953
+ }
954
+ // Build final XPath
955
+ if (predicates.length > 0) {
956
+ xpath += `[${predicates.join(' and ')}]`;
957
+ }
958
+ console.log(`Generated list selector: ${xpath} from fallback: ${selector}`);
959
+ return xpath;
960
+ }
961
+ catch (error) {
962
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
963
+ console.error(`Failed to generate list selector from fallback: ${errorMessage}`);
964
+ return null;
965
+ }
966
+ }, fallbackSelector);
967
+ return newSelector;
968
+ }
969
+ catch (error) {
970
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
971
+ console.error(`Failed to execute selector generation: ${errorMessage}`);
972
+ return null;
973
+ }
974
+ });
975
+ exports.generateListSelectorFromFallback = generateListSelectorFromFallback;
976
+ /**
977
+ * Generate new field selector from fallback selector (one field at a time)
978
+ * @param page - Playwright page object
979
+ * @param fallbackSelector - Fallback selector to use
980
+ * @param listSelector - The list selector context
981
+ * @returns New field selector or null if failed
982
+ */
983
+ const generateListFieldSelectorFromFallback = (page, fallbackSelector, listSelector) => __awaiter(void 0, void 0, void 0, function* () {
984
+ try {
985
+ // Execute field selector generation within the page context
986
+ const newSelector = yield page.evaluate(({ fallbackSel, listSel }) => {
987
+ // Helper function to check if selector is XPath
988
+ const isXPathSelector = (selector) => {
989
+ return selector.startsWith('//') ||
990
+ selector.startsWith('/') ||
991
+ selector.includes('contains(@') ||
992
+ selector.includes('@class=') ||
993
+ selector.includes('@id=');
994
+ };
995
+ // Helper function to evaluate XPath
996
+ const evaluateXPath = (xpath) => {
997
+ try {
998
+ const result = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
999
+ const elements = [];
1000
+ for (let i = 0; i < result.snapshotLength; i++) {
1001
+ const node = result.snapshotItem(i);
1002
+ if (node && node.nodeType === Node.ELEMENT_NODE) {
1003
+ elements.push(node);
1004
+ }
1005
+ }
1006
+ return elements;
1007
+ }
1008
+ catch (error) {
1009
+ return [];
1010
+ }
1011
+ };
1012
+ // Helper function to get sibling position
1013
+ const getSiblingPosition = (element, parent) => {
1014
+ const siblings = Array.from(parent.children || [])
1015
+ .filter((child) => child.tagName === element.tagName);
1016
+ return siblings.indexOf(element) + 1;
1017
+ };
1018
+ // Generate optimized structural step
1019
+ const generateOptimizedStructuralStep = (element, rootElement) => {
1020
+ const tagName = element.tagName.toLowerCase();
1021
+ const parent = element.parentElement;
1022
+ if (!parent) {
1023
+ return tagName;
1024
+ }
1025
+ // Use classes first
1026
+ const classes = Array.from(element.classList);
1027
+ if (classes.length > 0) {
1028
+ const classSelector = classes
1029
+ .map((cls) => `contains(@class, '${cls}')`)
1030
+ .join(" and ");
1031
+ return `${tagName}[${classSelector}]`;
1032
+ }
1033
+ // Try meaningful attributes
1034
+ const meaningfulAttrs = ["role", "type", "name", "src", "aria-label"];
1035
+ for (const attrName of meaningfulAttrs) {
1036
+ if (element.hasAttribute(attrName)) {
1037
+ const value = element.getAttribute(attrName).replace(/'/g, "\\'");
1038
+ return `${tagName}[@${attrName}='${value}']`;
1039
+ }
1040
+ }
1041
+ // Try test ID
1042
+ const testId = element.getAttribute("data-testid");
1043
+ if (testId) {
1044
+ return `${tagName}[@data-testid='${testId}']`;
1045
+ }
1046
+ // Try ID
1047
+ if (element.id && !element.id.match(/^\d/)) {
1048
+ return `${tagName}[@id='${element.id}']`;
1049
+ }
1050
+ // Try other data attributes
1051
+ for (const attr of Array.from(element.attributes)) {
1052
+ if (attr.name.startsWith("data-") &&
1053
+ attr.name !== "data-testid" &&
1054
+ attr.name !== "data-mx-id" &&
1055
+ attr.value) {
1056
+ return `${tagName}[@${attr.name}='${attr.value}']`;
1057
+ }
1058
+ }
1059
+ // Fallback to position
1060
+ const position = getSiblingPosition(element, parent);
1061
+ return `${tagName}[${position}]`;
1062
+ };
1063
+ // Get optimized structural path
1064
+ const getOptimizedStructuralPath = (targetElement, rootElement) => {
1065
+ if (!rootElement.contains(targetElement) || targetElement === rootElement) {
1066
+ return null;
1067
+ }
1068
+ const pathParts = [];
1069
+ let current = targetElement;
1070
+ // Build path from target up to root
1071
+ while (current && current !== rootElement) {
1072
+ const pathPart = generateOptimizedStructuralStep(current, rootElement);
1073
+ if (pathPart) {
1074
+ pathParts.unshift(pathPart);
1075
+ }
1076
+ current = current.parentElement;
1077
+ if (!current)
1078
+ break;
1079
+ }
1080
+ return pathParts.length > 0 ? "/" + pathParts.join("/") : null;
1081
+ };
1082
+ try {
1083
+ // Get the first element from fallback selector
1084
+ let targetElement = null;
1085
+ if (isXPathSelector(fallbackSel)) {
1086
+ const elements = evaluateXPath(fallbackSel);
1087
+ targetElement = elements[0] || null;
1088
+ }
1089
+ else {
1090
+ targetElement = document.querySelector(fallbackSel);
1091
+ }
1092
+ if (!targetElement) {
1093
+ return null;
1094
+ }
1095
+ // Get the list container elements
1096
+ const parentElements = evaluateXPath(listSel);
1097
+ let containingParent = null;
1098
+ for (const parent of parentElements) {
1099
+ if (parent.contains(targetElement)) {
1100
+ containingParent = parent;
1101
+ break;
1102
+ }
1103
+ }
1104
+ if (!containingParent) {
1105
+ return null;
1106
+ }
1107
+ // Build structural path
1108
+ const structuralPath = getOptimizedStructuralPath(targetElement, containingParent);
1109
+ if (!structuralPath) {
1110
+ return null;
1111
+ }
1112
+ // Combine list selector with structural path
1113
+ const newSelector = listSel + structuralPath;
1114
+ console.log(`Generated field selector: ${newSelector} from fallback: ${fallbackSel}`);
1115
+ return newSelector;
1116
+ }
1117
+ catch (error) {
1118
+ console.error("Error generating field selector:", error);
1119
+ return null;
1120
+ }
1121
+ }, { fallbackSel: fallbackSelector, listSel: listSelector });
1122
+ return newSelector;
1123
+ }
1124
+ catch (error) {
1125
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
1126
+ console.error(`Failed to generate field selector: ${errorMessage}`);
1127
+ return null;
1128
+ }
1129
+ });
1130
+ exports.generateListFieldSelectorFromFallback = generateListFieldSelectorFromFallback;
1131
+ /* Generate all schema field selectors from fallback selector (CSS to CSS)
1132
+ * @param page - Playwright page object
1133
+ * @param fallbackSelector - CSS fallback selector to use
1134
+ * @returns Array of all generated CSS selectors or null if failed
1135
+ */
1136
+ const generateFieldSelectorFromFallback = (page, fallbackSelector) => __awaiter(void 0, void 0, void 0, function* () {
1137
+ try {
1138
+ // Execute schema field selector generation within the page context
1139
+ const selectors = yield page.evaluate((fallbackSel) => {
1140
+ // CSS escape function (simplified version from your reference)
1141
+ function cssesc(string, options = {}) {
1142
+ const { isIdentifier = false } = options;
1143
+ let output = '';
1144
+ for (let i = 0; i < string.length; i++) {
1145
+ const char = string.charAt(i);
1146
+ const code = char.charCodeAt(0);
1147
+ if (code < 0x20 || code > 0x7e) {
1148
+ output += '\\' + code.toString(16).toUpperCase() + ' ';
1149
+ }
1150
+ else if (/[\t\n\f\r\x0B]/.test(char)) {
1151
+ output += '\\' + code.toString(16).toUpperCase() + ' ';
1152
+ }
1153
+ else if (char === '\\' || (isIdentifier && /[ -,\.\/:-@\[\]\^`\{-~]/.test(char))) {
1154
+ output += '\\' + char;
1155
+ }
1156
+ else {
1157
+ output += char;
1158
+ }
1159
+ }
1160
+ if (isIdentifier && /\d/.test(string.charAt(0))) {
1161
+ output = '\\3' + string.charAt(0) + ' ' + output.slice(1);
1162
+ }
1163
+ return output;
1164
+ }
1165
+ // Main finder function (simplified version from your reference)
1166
+ function finder(input, options = {}) {
1167
+ if (input.nodeType !== Node.ELEMENT_NODE) {
1168
+ throw new Error("Can't generate CSS selector for non-element node type.");
1169
+ }
1170
+ if (input.tagName.toLowerCase() === 'html') {
1171
+ return 'html';
1172
+ }
1173
+ // If attr function is provided, use it to filter attributes
1174
+ if (options.attr) {
1175
+ const attrs = Array.from(input.attributes).filter(attr => options.attr(attr.name) && attr.name !== 'data-mx-id');
1176
+ if (attrs.length > 0) {
1177
+ const attr = attrs[0];
1178
+ return `[${cssesc(attr.name, { isIdentifier: true })}="${cssesc(attr.value)}"]`;
1179
+ }
1180
+ }
1181
+ // Try ID first
1182
+ const elementId = input.getAttribute('id');
1183
+ if (elementId && !elementId.match(/^\d/)) {
1184
+ return '#' + cssesc(elementId, { isIdentifier: true });
1185
+ }
1186
+ // Try classes
1187
+ const classes = Array.from(input.classList);
1188
+ if (classes.length > 0) {
1189
+ const classSelector = classes.map(cls => '.' + cssesc(cls, { isIdentifier: true })).join('');
1190
+ const tagName = input.tagName.toLowerCase();
1191
+ return tagName + classSelector;
1192
+ }
1193
+ // Try attributes
1194
+ const meaningfulAttrs = ['data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy', 'name', 'aria-label', 'alt', 'title', 'href', 'role', 'type'];
1195
+ for (const attrName of meaningfulAttrs) {
1196
+ if (input.hasAttribute(attrName) && attrName !== 'data-mx-id') {
1197
+ const value = input.getAttribute(attrName);
1198
+ if (value) {
1199
+ return `[${cssesc(attrName, { isIdentifier: true })}="${cssesc(value)}"]`;
1200
+ }
1201
+ }
1202
+ }
1203
+ // Fallback to tag name with nth-child if needed
1204
+ const tagName = input.tagName.toLowerCase();
1205
+ const parent = input.parentElement;
1206
+ if (parent) {
1207
+ const siblings = Array.from(parent.children).filter(child => child.tagName === input.tagName);
1208
+ if (siblings.length > 1) {
1209
+ const index = siblings.indexOf(input) + 1;
1210
+ return `${tagName}:nth-child(${index})`;
1211
+ }
1212
+ }
1213
+ return tagName;
1214
+ }
1215
+ // Generate selectors for attributes
1216
+ function genSelectorForAttributes(element, attributes) {
1217
+ try {
1218
+ for (const attr of attributes) {
1219
+ if (element.hasAttribute(attr)) {
1220
+ const value = element.getAttribute(attr);
1221
+ if (value && value.length > 0) {
1222
+ if (attr === 'rel') {
1223
+ return `[rel="${value}"]`;
1224
+ }
1225
+ return `[${cssesc(attr, { isIdentifier: true })}="${cssesc(value)}"]`;
1226
+ }
1227
+ }
1228
+ }
1229
+ return null;
1230
+ }
1231
+ catch (e) {
1232
+ return null;
1233
+ }
1234
+ }
1235
+ // Check if character is number
1236
+ function isCharacterNumber(char) {
1237
+ return char && char.length === 1 && /[0-9]/.test(char);
1238
+ }
1239
+ // Generate attribute set
1240
+ function genAttributeSet(element, attributes) {
1241
+ return new Set(attributes.filter((attr) => {
1242
+ const attrValue = element.getAttribute(attr);
1243
+ return attrValue != null && attrValue.length > 0;
1244
+ }));
1245
+ }
1246
+ // Check if attributes are defined
1247
+ function isAttributesDefined(element, attributes) {
1248
+ return genAttributeSet(element, attributes).size > 0;
1249
+ }
1250
+ // Generate valid attribute filter
1251
+ function genValidAttributeFilter(element, attributes) {
1252
+ const attrSet = genAttributeSet(element, attributes);
1253
+ return (name) => attrSet.has(name);
1254
+ }
1255
+ // Main selector generation function (based on genSelectors from your reference)
1256
+ function genSelectors(element) {
1257
+ var _a;
1258
+ const href = element.getAttribute('href');
1259
+ let generalSelector = null;
1260
+ try {
1261
+ generalSelector = finder(element);
1262
+ }
1263
+ catch (e) {
1264
+ console.warn('Error generating general selector:', e);
1265
+ }
1266
+ let attrSelector = null;
1267
+ try {
1268
+ attrSelector = finder(element, { attr: () => true });
1269
+ }
1270
+ catch (e) {
1271
+ console.warn('Error generating attr selector:', e);
1272
+ }
1273
+ const relSelector = genSelectorForAttributes(element, ['rel']);
1274
+ const hrefSelector = genSelectorForAttributes(element, ['href']);
1275
+ const formSelector = genSelectorForAttributes(element, ['name', 'placeholder', 'for']);
1276
+ const accessibilitySelector = genSelectorForAttributes(element, ['aria-label', 'alt', 'title']);
1277
+ const testIdSelector = genSelectorForAttributes(element, [
1278
+ 'data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy'
1279
+ ]);
1280
+ // We won't use an id selector if the id is invalid (starts with a number)
1281
+ let idSelector = null;
1282
+ try {
1283
+ idSelector = isAttributesDefined(element, ['id']) &&
1284
+ !isCharacterNumber((_a = element.id) === null || _a === void 0 ? void 0 : _a[0])
1285
+ ? finder(element, {
1286
+ attr: (name) => name === 'id',
1287
+ })
1288
+ : null;
1289
+ }
1290
+ catch (e) {
1291
+ console.warn('Error generating id selector:', e);
1292
+ }
1293
+ return {
1294
+ id: idSelector,
1295
+ generalSelector,
1296
+ attrSelector,
1297
+ testIdSelector,
1298
+ text: element.innerText,
1299
+ href: href !== null && href !== void 0 ? href : undefined,
1300
+ hrefSelector,
1301
+ accessibilitySelector,
1302
+ formSelector,
1303
+ relSelector,
1304
+ };
1305
+ }
1306
+ try {
1307
+ // Get the target element from CSS fallback selector
1308
+ const targetElement = document.querySelector(fallbackSel);
1309
+ if (!targetElement) {
1310
+ console.warn('Target element not found with CSS fallback selector:', fallbackSel);
1311
+ return null;
1312
+ }
1313
+ // Prioritize Link logic (from your reference)
1314
+ const { parentElement } = targetElement;
1315
+ const element = (parentElement === null || parentElement === void 0 ? void 0 : parentElement.tagName) === 'A' ? parentElement : targetElement;
1316
+ // Generate all selectors using the same logic as your reference
1317
+ const generatedSelectors = genSelectors(element);
1318
+ console.log('Generated schema field CSS selectors:', generatedSelectors);
1319
+ return generatedSelectors;
1320
+ }
1321
+ catch (error) {
1322
+ console.error('Error in schema field CSS selector generation:', error);
1323
+ return null;
1324
+ }
1325
+ }, fallbackSelector);
1326
+ return selectors;
1327
+ }
1328
+ catch (error) {
1329
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error';
1330
+ console.error(`Failed to generate schema field CSS selectors: ${errorMessage}`);
1331
+ return null;
1332
+ }
1333
+ });
1334
+ exports.generateFieldSelectorFromFallback = generateFieldSelectorFromFallback;
@@ -83,6 +83,7 @@ export interface SchemaConfig {
83
83
  nodeInfo: SerializedNode;
84
84
  selector: string;
85
85
  attribute: string;
86
+ fallbackSelector?: string;
86
87
  coordinates: Coordinates;
87
88
  elementMetadata: {
88
89
  classList: string[];
@@ -93,6 +94,7 @@ export interface SchemaConfig {
93
94
  }
94
95
  export interface ScrapeListSchema {
95
96
  listSelector: string;
97
+ listFallbackSelector?: string;
96
98
  listSelectorInfo: {
97
99
  nodeInfo: SerializedNode;
98
100
  coordinates: Coordinates;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mx-cloud",
3
- "version": "0.0.12",
3
+ "version": "0.0.14",
4
4
  "description": "mx cloud",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",