mx-cloud 0.0.25 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/interpret.d.ts +15 -23
- package/build/interpret.js +1006 -1052
- package/build/selector.d.ts +1 -32
- package/build/selector.js +1 -839
- package/build/types/workflow.d.ts +1 -1
- package/build/utils/utils.d.ts +0 -4
- package/build/utils/utils.js +0 -7
- package/package.json +1 -1
package/build/interpret.d.ts
CHANGED
|
@@ -44,6 +44,7 @@ interface InterpreterOptions {
|
|
|
44
44
|
debugMessage: (msg: string) => void;
|
|
45
45
|
setActionType: (type: string) => void;
|
|
46
46
|
incrementScrapeListIndex: () => void;
|
|
47
|
+
progressUpdate: (current: number, total: number, percentage: number) => void;
|
|
47
48
|
}>;
|
|
48
49
|
}
|
|
49
50
|
/**
|
|
@@ -66,28 +67,14 @@ export default class Interpreter extends EventEmitter {
|
|
|
66
67
|
private serializableDataByType;
|
|
67
68
|
private pendingDeepExtraction;
|
|
68
69
|
private isInDeepExtractionPhase;
|
|
70
|
+
private deepExtractionStats;
|
|
71
|
+
private totalActions;
|
|
72
|
+
private executedActions;
|
|
69
73
|
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
|
|
70
74
|
trackAutohealFailure(error: string): void;
|
|
71
75
|
private applyAdBlocker;
|
|
72
76
|
private disableAdBlocker;
|
|
73
|
-
private
|
|
74
|
-
/**
|
|
75
|
-
* Returns the context object from given Page and the current workflow.\
|
|
76
|
-
* \
|
|
77
|
-
* `workflow` is used for selector extraction - function searches for used selectors to
|
|
78
|
-
* look for later in the page's context.
|
|
79
|
-
* @param page Playwright Page object
|
|
80
|
-
* @param workflow Current **initialized** workflow (array of where-what pairs).
|
|
81
|
-
* @returns {PageState} State of the current page.
|
|
82
|
-
*/
|
|
83
|
-
private getState;
|
|
84
|
-
/**
|
|
85
|
-
* Tests if the given action is applicable with the given context.
|
|
86
|
-
* @param where Tested *where* condition
|
|
87
|
-
* @param context Current browser context.
|
|
88
|
-
* @returns True if `where` is applicable in the given context, false otherwise
|
|
89
|
-
*/
|
|
90
|
-
private applicable;
|
|
77
|
+
private callWithTimeout;
|
|
91
78
|
/**
|
|
92
79
|
* Sets the abort flag to immediately stop all operations
|
|
93
80
|
*/
|
|
@@ -107,12 +94,7 @@ export default class Interpreter extends EventEmitter {
|
|
|
107
94
|
*/
|
|
108
95
|
private carryOutSteps;
|
|
109
96
|
private handlePagination;
|
|
110
|
-
private getMatchingActionId;
|
|
111
|
-
private removeShadowSelectors;
|
|
112
97
|
private removeSpecialSelectors;
|
|
113
|
-
private generatePageNodeInformation;
|
|
114
|
-
private detectElementChanges;
|
|
115
|
-
private validateWorkflowAction;
|
|
116
98
|
/**
|
|
117
99
|
* Test if a selector is working on the current page
|
|
118
100
|
* @param {Page} page - Playwright page object
|
|
@@ -189,6 +171,16 @@ export default class Interpreter extends EventEmitter {
|
|
|
189
171
|
/**
|
|
190
172
|
* Helper function to check if a URL matches a goto pattern.
|
|
191
173
|
*/
|
|
174
|
+
/**
|
|
175
|
+
* Generic pattern matching for deep extraction URLs.
|
|
176
|
+
* Works across any website by analyzing URL structure rather than relying on keywords.
|
|
177
|
+
*
|
|
178
|
+
* Strategy:
|
|
179
|
+
* 1. Match URLs with same origin and path length
|
|
180
|
+
* 2. Identify "structural" segments (numbers, short words, etc.) that should match exactly
|
|
181
|
+
* 3. Allow other segments to vary (dynamic content like IDs, slugs, names)
|
|
182
|
+
* 4. Skip exact matches to avoid duplicates
|
|
183
|
+
*/
|
|
192
184
|
private matchesGotoPattern;
|
|
193
185
|
/**
|
|
194
186
|
* Executes hierarchical deep extraction by processing each level recursively.
|