mx-cloud 0.0.25 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,12 +38,13 @@ interface InterpreterOptions {
38
38
  serializableCallback: (output: any) => (void | Promise<void>);
39
39
  binaryCallback: (output: any, mimeType: string) => (void | Promise<void>);
40
40
  debug: boolean;
41
- robotType?: 'extract' | 'scrape' | 'deep-extract';
41
+ robotType?: 'extract' | 'scrape' | 'crawl' | 'search' | 'deep-extract';
42
42
  debugChannel: Partial<{
43
43
  activeId: (id: number) => void;
44
44
  debugMessage: (msg: string) => void;
45
45
  setActionType: (type: string) => void;
46
46
  incrementScrapeListIndex: () => void;
47
+ progressUpdate: (current: number, total: number, percentage: number) => void;
47
48
  }>;
48
49
  }
49
50
  /**
@@ -66,28 +67,14 @@ export default class Interpreter extends EventEmitter {
66
67
  private serializableDataByType;
67
68
  private pendingDeepExtraction;
68
69
  private isInDeepExtractionPhase;
70
+ private deepExtractionStats;
71
+ private totalActions;
72
+ private executedActions;
69
73
  constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
70
74
  trackAutohealFailure(error: string): void;
71
75
  private applyAdBlocker;
72
76
  private disableAdBlocker;
73
- private getSelectors;
74
- /**
75
- * Returns the context object from given Page and the current workflow.\
76
- * \
77
- * `workflow` is used for selector extraction - function searches for used selectors to
78
- * look for later in the page's context.
79
- * @param page Playwright Page object
80
- * @param workflow Current **initialized** workflow (array of where-what pairs).
81
- * @returns {PageState} State of the current page.
82
- */
83
- private getState;
84
- /**
85
- * Tests if the given action is applicable with the given context.
86
- * @param where Tested *where* condition
87
- * @param context Current browser context.
88
- * @returns True if `where` is applicable in the given context, false otherwise
89
- */
90
- private applicable;
77
+ private callWithTimeout;
91
78
  /**
92
79
  * Sets the abort flag to immediately stop all operations
93
80
  */
@@ -107,12 +94,7 @@ export default class Interpreter extends EventEmitter {
107
94
  */
108
95
  private carryOutSteps;
109
96
  private handlePagination;
110
- private getMatchingActionId;
111
- private removeShadowSelectors;
112
97
  private removeSpecialSelectors;
113
- private generatePageNodeInformation;
114
- private detectElementChanges;
115
- private validateWorkflowAction;
116
98
  /**
117
99
  * Test if a selector is working on the current page
118
100
  * @param {Page} page - Playwright page object
@@ -189,6 +171,16 @@ export default class Interpreter extends EventEmitter {
189
171
  /**
190
172
  * Helper function to check if a URL matches a goto pattern.
191
173
  */
174
+ /**
175
+ * Generic pattern matching for deep extraction URLs.
176
+ * Works across any website by analyzing URL structure rather than relying on keywords.
177
+ *
178
+ * Strategy:
179
+ * 1. Match URLs with same origin and path length
180
+ * 2. Identify "structural" segments (numbers, short words, etc.) that should match exactly
181
+ * 3. Allow other segments to vary (dynamic content like IDs, slugs, names)
182
+ * 4. Skip exact matches to avoid duplicates
183
+ */
192
184
  private matchesGotoPattern;
193
185
  /**
194
186
  * Executes hierarchical deep extraction by processing each level recursively.