maxun-core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,555 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
25
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
26
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
27
+ return new (P || (P = Promise))(function (resolve, reject) {
28
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
29
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
30
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
31
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
32
+ });
33
+ };
34
+ var __importDefault = (this && this.__importDefault) || function (mod) {
35
+ return (mod && mod.__esModule) ? mod : { "default": mod };
36
+ };
37
+ Object.defineProperty(exports, "__esModule", { value: true });
38
+ const adblocker_playwright_1 = require("@cliqz/adblocker-playwright");
39
+ const cross_fetch_1 = __importDefault(require("cross-fetch"));
40
+ const path_1 = __importDefault(require("path"));
41
+ const events_1 = require("events");
42
+ const logic_1 = require("./types/logic");
43
+ const utils_1 = require("./utils/utils");
44
+ const concurrency_1 = __importDefault(require("./utils/concurrency"));
45
+ const preprocessor_1 = __importDefault(require("./preprocessor"));
46
+ const logger_1 = __importStar(require("./utils/logger"));
47
+ /**
48
+ * Class for running the Smart Workflows.
49
+ */
50
+ class Interpreter extends events_1.EventEmitter {
51
+ constructor(workflow, options) {
52
+ var _a;
53
+ super();
54
+ this.stopper = null;
55
+ this.blocker = null;
56
+ this.workflow = workflow.workflow;
57
+ this.initializedWorkflow = null;
58
+ this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => { (0, logger_1.default)(JSON.stringify(data), logger_1.Level.WARN); }, binaryCallback: () => { (0, logger_1.default)('Received binary data, thrashing them.', logger_1.Level.WARN); }, debug: false, debugChannel: {} }, options);
59
+ this.concurrency = new concurrency_1.default(this.options.maxConcurrency);
60
+ this.log = (...args) => (0, logger_1.default)(...args);
61
+ const error = preprocessor_1.default.validateWorkflow(workflow);
62
+ if (error) {
63
+ throw (error);
64
+ }
65
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.debugMessage) {
66
+ const oldLog = this.log;
67
+ // @ts-ignore
68
+ this.log = (...args) => {
69
+ if (args[1] !== logger_1.Level.LOG) {
70
+ this.options.debugChannel.debugMessage(typeof args[0] === 'string' ? args[0] : args[0].message);
71
+ }
72
+ oldLog(...args);
73
+ };
74
+ }
75
+ adblocker_playwright_1.PlaywrightBlocker.fromPrebuiltAdsAndTracking(cross_fetch_1.default).then(blocker => {
76
+ this.blocker = blocker;
77
+ }).catch(err => {
78
+ this.log(`Failed to initialize ad-blocker:`, logger_1.Level.ERROR);
79
+ });
80
+ }
81
+ applyAdBlocker(page) {
82
+ return __awaiter(this, void 0, void 0, function* () {
83
+ if (this.blocker) {
84
+ yield this.blocker.enableBlockingInPage(page);
85
+ }
86
+ });
87
+ }
88
+ disableAdBlocker(page) {
89
+ return __awaiter(this, void 0, void 0, function* () {
90
+ if (this.blocker) {
91
+ yield this.blocker.disableBlockingInPage(page);
92
+ }
93
+ });
94
+ }
95
+ /**
96
+ * Returns the context object from given Page and the current workflow.\
97
+ * \
98
+ * `workflow` is used for selector extraction - function searches for used selectors to
99
+ * look for later in the page's context.
100
+ * @param page Playwright Page object
101
+ * @param workflow Current **initialized** workflow (array of where-what pairs).
102
+ * @returns {PageState} State of the current page.
103
+ */
104
+ getState(page, workflow) {
105
+ return __awaiter(this, void 0, void 0, function* () {
106
+ yield page.setViewportSize({ width: 900, height: 400 });
107
+ /**
108
+ * All the selectors present in the current Workflow
109
+ */
110
+ const selectors = preprocessor_1.default.extractSelectors(workflow);
111
+ /**
112
+ * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability).
113
+ * @param selector Selector to be queried
114
+ * @returns True if the targetted element is actionable, false otherwise.
115
+ */
116
+ const actionable = (selector) => __awaiter(this, void 0, void 0, function* () {
117
+ try {
118
+ const proms = [
119
+ page.isEnabled(selector, { timeout: 500 }),
120
+ page.isVisible(selector, { timeout: 500 }),
121
+ ];
122
+ return yield Promise.all(proms).then((bools) => bools.every((x) => x));
123
+ }
124
+ catch (e) {
125
+ // log(<Error>e, Level.ERROR);
126
+ return false;
127
+ }
128
+ });
129
+ /**
130
+ * Object of selectors present in the current page.
131
+ */
132
+ const presentSelectors = yield Promise.all(selectors.map((selector) => __awaiter(this, void 0, void 0, function* () {
133
+ if (yield actionable(selector)) {
134
+ return [selector];
135
+ }
136
+ return [];
137
+ }))).then((x) => x.flat());
138
+ return {
139
+ url: page.url(),
140
+ cookies: (yield page.context().cookies([page.url()]))
141
+ .reduce((p, cookie) => (Object.assign(Object.assign({}, p), { [cookie.name]: cookie.value })), {}),
142
+ selectors: presentSelectors,
143
+ };
144
+ });
145
+ }
146
+ /**
147
+ * Tests if the given action is applicable with the given context.
148
+ * @param where Tested *where* condition
149
+ * @param context Current browser context.
150
+ * @returns True if `where` is applicable in the given context, false otherwise
151
+ */
152
+ applicable(where, context, usedActions = []) {
153
+ /**
154
+ * Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\
155
+ * \
156
+ * For every key in `subset`, there must be a corresponding key with equal scalar
157
+ * value in `superset`, or `inclusive(subset[key], superset[key])` must hold.
158
+ * @param subset Arbitrary non-cyclic JS object (where clause)
159
+ * @param superset Arbitrary non-cyclic JS object (browser context)
160
+ * @returns `true` if `subset <= superset`, `false` otherwise.
161
+ */
162
+ const inclusive = (subset, superset) => (Object.entries(subset).every(([key, value]) => {
163
+ /**
164
+ * Arrays are compared without order (are transformed into objects before comparison).
165
+ */
166
+ const parsedValue = Array.isArray(value) ? (0, utils_1.arrayToObject)(value) : value;
167
+ const parsedSuperset = {};
168
+ parsedSuperset[key] = Array.isArray(superset[key])
169
+ ? (0, utils_1.arrayToObject)(superset[key])
170
+ : superset[key];
171
+ // Every `subset` key must exist in the `superset` and
172
+ // have the same value (strict equality), or subset[key] <= superset[key]
173
+ return parsedSuperset[key]
174
+ && ((parsedSuperset[key] === parsedValue)
175
+ || ((parsedValue).constructor.name === 'RegExp' && parsedValue.test(parsedSuperset[key]))
176
+ || ((parsedValue).constructor.name !== 'RegExp'
177
+ && typeof parsedValue === 'object' && inclusive(parsedValue, parsedSuperset[key])));
178
+ }));
179
+ // Every value in the "where" object should be compliant to the current state.
180
+ return Object.entries(where).every(([key, value]) => {
181
+ if (logic_1.operators.includes(key)) {
182
+ const array = Array.isArray(value)
183
+ ? value
184
+ : Object.entries(value).map((a) => Object.fromEntries([a]));
185
+ // every condition is treated as a single context
186
+ switch (key) {
187
+ case '$and':
188
+ return array === null || array === void 0 ? void 0 : array.every((x) => this.applicable(x, context));
189
+ case '$or':
190
+ return array === null || array === void 0 ? void 0 : array.some((x) => this.applicable(x, context));
191
+ case '$not':
192
+ return !this.applicable(value, context); // $not should be a unary operator
193
+ default:
194
+ throw new Error('Undefined logic operator.');
195
+ }
196
+ }
197
+ else if (logic_1.meta.includes(key)) {
198
+ const testRegexString = (x) => {
199
+ if (typeof value === 'string') {
200
+ return x === value;
201
+ }
202
+ return value.test(x);
203
+ };
204
+ switch (key) {
205
+ case '$before':
206
+ return !usedActions.find(testRegexString);
207
+ case '$after':
208
+ return !!usedActions.find(testRegexString);
209
+ default:
210
+ throw new Error('Undefined meta operator.');
211
+ }
212
+ }
213
+ else {
214
+ // Current key is a base condition (url, cookies, selectors)
215
+ return inclusive({ [key]: value }, context);
216
+ }
217
+ });
218
+ }
219
+ /**
220
+ * Given a Playwright's page object and a "declarative" list of actions, this function
221
+ * calls all mentioned functions on the Page object.\
222
+ * \
223
+ * Manipulates the iterator indexes (experimental feature, likely to be removed in
224
+ * the following versions of maxun-core)
225
+ * @param page Playwright Page object
226
+ * @param steps Array of actions.
227
+ */
228
+ carryOutSteps(page, steps) {
229
+ var _a;
230
+ return __awaiter(this, void 0, void 0, function* () {
231
+ /**
232
+ * Defines overloaded (or added) methods/actions usable in the workflow.
233
+ * If a method overloads any existing method of the Page class, it accepts the same set
234
+ * of parameters *(but can override some!)*\
235
+ * \
236
+ * Also, following piece of code defines functions to be run in the browser's context.
237
+ * Beware of false linter errors - here, we know better!
238
+ */
239
+ const wawActions = {
240
+ screenshot: (params) => __awaiter(this, void 0, void 0, function* () {
241
+ const screenshotBuffer = yield page.screenshot(Object.assign(Object.assign({}, params), { path: undefined }));
242
+ yield this.options.binaryCallback(screenshotBuffer, 'image/png');
243
+ }),
244
+ enqueueLinks: (selector) => __awaiter(this, void 0, void 0, function* () {
245
+ const links = yield page.locator(selector)
246
+ .evaluateAll(
247
+ // @ts-ignore
248
+ (elements) => elements.map((a) => a.href).filter((x) => x));
249
+ const context = page.context();
250
+ for (const link of links) {
251
+ // eslint-disable-next-line
252
+ this.concurrency.addJob(() => __awaiter(this, void 0, void 0, function* () {
253
+ try {
254
+ const newPage = yield context.newPage();
255
+ yield newPage.setViewportSize({ width: 900, height: 400 });
256
+ yield newPage.goto(link);
257
+ yield newPage.waitForLoadState('networkidle');
258
+ yield this.runLoop(newPage, this.initializedWorkflow);
259
+ }
260
+ catch (e) {
261
+ // `runLoop` uses soft mode, so it recovers from it's own exceptions
262
+ // but newPage(), goto() and waitForLoadState() don't (and will kill
263
+ // the interpreter by throwing).
264
+ this.log(e, logger_1.Level.ERROR);
265
+ }
266
+ }));
267
+ }
268
+ yield page.close();
269
+ }),
270
+ scrape: (selector) => __awaiter(this, void 0, void 0, function* () {
271
+ yield this.ensureScriptsLoaded(page);
272
+ const scrapeResults = yield page.evaluate((s) => window.scrape(s !== null && s !== void 0 ? s : null), selector);
273
+ yield this.options.serializableCallback(scrapeResults);
274
+ }),
275
+ scrapeSchema: (schema) => __awaiter(this, void 0, void 0, function* () {
276
+ yield this.ensureScriptsLoaded(page);
277
+ const scrapeResult = yield page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
278
+ yield this.options.serializableCallback(scrapeResult);
279
+ }),
280
+ scrapeList: (config) => __awaiter(this, void 0, void 0, function* () {
281
+ yield this.ensureScriptsLoaded(page);
282
+ if (!config.pagination) {
283
+ const scrapeResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
284
+ yield this.options.serializableCallback(scrapeResults);
285
+ }
286
+ else {
287
+ const scrapeResults = yield this.handlePagination(page, config);
288
+ yield this.options.serializableCallback(scrapeResults);
289
+ }
290
+ }),
291
+ scrapeListAuto: (config) => __awaiter(this, void 0, void 0, function* () {
292
+ yield this.ensureScriptsLoaded(page);
293
+ const scrapeResults = yield page.evaluate((listSelector) => {
294
+ return window.scrapeListAuto(listSelector);
295
+ }, config.listSelector);
296
+ yield this.options.serializableCallback(scrapeResults);
297
+ }),
298
+ scroll: (pages) => __awaiter(this, void 0, void 0, function* () {
299
+ yield page.evaluate((pagesInternal) => __awaiter(this, void 0, void 0, function* () {
300
+ for (let i = 1; i <= (pagesInternal !== null && pagesInternal !== void 0 ? pagesInternal : 1); i += 1) {
301
+ // @ts-ignore
302
+ window.scrollTo(0, window.scrollY + window.innerHeight);
303
+ }
304
+ }), pages !== null && pages !== void 0 ? pages : 1);
305
+ }),
306
+ script: (code) => __awaiter(this, void 0, void 0, function* () {
307
+ const AsyncFunction = Object.getPrototypeOf(() => __awaiter(this, void 0, void 0, function* () { })).constructor;
308
+ const x = new AsyncFunction('page', 'log', code);
309
+ yield x(page, this.log);
310
+ }),
311
+ flag: () => __awaiter(this, void 0, void 0, function* () {
312
+ return new Promise((res) => {
313
+ this.emit('flag', page, res);
314
+ });
315
+ }),
316
+ };
317
+ for (const step of steps) {
318
+ this.log(`Launching ${step.action}`, logger_1.Level.LOG);
319
+ if (step.action in wawActions) {
320
+ // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
321
+ const params = !step.args || Array.isArray(step.args) ? step.args : [step.args];
322
+ yield wawActions[step.action](...(params !== null && params !== void 0 ? params : []));
323
+ }
324
+ else {
325
+ // Implements the dot notation for the "method name" in the workflow
326
+ const levels = step.action.split('.');
327
+ const methodName = levels[levels.length - 1];
328
+ let invokee = page;
329
+ for (const level of levels.splice(0, levels.length - 1)) {
330
+ invokee = invokee[level];
331
+ }
332
+ if (!step.args || Array.isArray(step.args)) {
333
+ yield invokee[methodName](...((_a = step.args) !== null && _a !== void 0 ? _a : []));
334
+ }
335
+ else {
336
+ yield invokee[methodName](step.args);
337
+ }
338
+ }
339
+ yield new Promise((res) => { setTimeout(res, 500); });
340
+ }
341
+ });
342
+ }
343
+ handlePagination(page, config) {
344
+ return __awaiter(this, void 0, void 0, function* () {
345
+ let allResults = [];
346
+ let previousHeight = 0;
347
+ // track unique items per page to avoid re-scraping
348
+ let scrapedItems = new Set();
349
+ while (true) {
350
+ switch (config.pagination.type) {
351
+ case 'scrollDown':
352
+ yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
353
+ yield page.waitForTimeout(2000);
354
+ const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
355
+ if (currentHeight === previousHeight) {
356
+ const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
357
+ allResults = allResults.concat(finalResults);
358
+ return allResults;
359
+ }
360
+ previousHeight = currentHeight;
361
+ break;
362
+ case 'scrollUp':
363
+ yield page.evaluate(() => window.scrollTo(0, 0));
364
+ yield page.waitForTimeout(2000);
365
+ const currentTopHeight = yield page.evaluate(() => document.documentElement.scrollTop);
366
+ if (currentTopHeight === 0) {
367
+ const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
368
+ allResults = allResults.concat(finalResults);
369
+ return allResults;
370
+ }
371
+ previousHeight = currentTopHeight;
372
+ break;
373
+ case 'clickNext':
374
+ const pageResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
375
+ // Filter out already scraped items
376
+ const newResults = pageResults.filter(item => {
377
+ const uniqueKey = JSON.stringify(item);
378
+ if (scrapedItems.has(uniqueKey))
379
+ return false; // Ignore if already scraped
380
+ scrapedItems.add(uniqueKey); // Mark as scraped
381
+ return true;
382
+ });
383
+ allResults = allResults.concat(newResults);
384
+ if (config.limit && allResults.length >= config.limit) {
385
+ return allResults.slice(0, config.limit);
386
+ }
387
+ const nextButton = yield page.$(config.pagination.selector);
388
+ if (!nextButton) {
389
+ return allResults; // No more pages to scrape
390
+ }
391
+ yield Promise.all([
392
+ nextButton.click(),
393
+ page.waitForNavigation({ waitUntil: 'networkidle' })
394
+ ]);
395
+ yield page.waitForTimeout(1000);
396
+ break;
397
+ case 'clickLoadMore':
398
+ while (true) {
399
+ const loadMoreButton = yield page.$(config.pagination.selector);
400
+ if (!loadMoreButton) {
401
+ // No more "Load More" button, so scrape the remaining items
402
+ const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
403
+ allResults = allResults.concat(finalResults);
404
+ return allResults;
405
+ }
406
+ // Click the 'Load More' button to load additional items
407
+ yield loadMoreButton.click();
408
+ yield page.waitForTimeout(2000); // Wait for new items to load
409
+ // After clicking 'Load More', scroll down to load more items
410
+ yield page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
411
+ yield page.waitForTimeout(2000);
412
+ // Check if more items are available
413
+ const currentHeight = yield page.evaluate(() => document.body.scrollHeight);
414
+ if (currentHeight === previousHeight) {
415
+ // No more items loaded, return the scraped results
416
+ const finalResults = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
417
+ allResults = allResults.concat(finalResults);
418
+ return allResults;
419
+ }
420
+ previousHeight = currentHeight;
421
+ if (config.limit && allResults.length >= config.limit) {
422
+ // If limit is set and reached, return the limited results
423
+ allResults = allResults.slice(0, config.limit);
424
+ break;
425
+ }
426
+ }
427
+ break;
428
+ default:
429
+ const results = yield page.evaluate((cfg) => window.scrapeList(cfg), config);
430
+ allResults = allResults.concat(results);
431
+ return allResults;
432
+ }
433
+ if (config.limit && allResults.length >= config.limit) {
434
+ allResults = allResults.slice(0, config.limit);
435
+ break;
436
+ }
437
+ }
438
+ return allResults;
439
+ });
440
+ }
441
+ runLoop(p, workflow) {
442
+ var _a, _b;
443
+ return __awaiter(this, void 0, void 0, function* () {
444
+ // apply ad-blocker to the current page
445
+ yield this.applyAdBlocker(p);
446
+ const usedActions = [];
447
+ let lastAction = null;
448
+ let repeatCount = 0;
449
+ /**
450
+ * Enables the interpreter functionality for popup windows.
451
+ * User-requested concurrency should be entirely managed by the concurrency manager,
452
+ * e.g. via `enqueueLinks`.
453
+ */
454
+ p.on('popup', (popup) => {
455
+ this.concurrency.addJob(() => this.runLoop(popup, workflow));
456
+ });
457
+ /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
458
+ while (true) {
459
+ // Checks whether the page was closed from outside,
460
+ // or the workflow execution has been stopped via `interpreter.stop()`
461
+ if (p.isClosed() || !this.stopper) {
462
+ return;
463
+ }
464
+ try {
465
+ yield p.waitForLoadState();
466
+ }
467
+ catch (e) {
468
+ yield p.close();
469
+ return;
470
+ }
471
+ let pageState = {};
472
+ try {
473
+ pageState = yield this.getState(p, workflow);
474
+ }
475
+ catch (e) {
476
+ this.log('The browser has been closed.');
477
+ return;
478
+ }
479
+ if (this.options.debug) {
480
+ this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, logger_1.Level.WARN);
481
+ }
482
+ const actionId = workflow.findIndex((step) => this.applicable(step.where, pageState, usedActions));
483
+ const action = workflow[actionId];
484
+ this.log(`Matched ${JSON.stringify(action === null || action === void 0 ? void 0 : action.where)}`, logger_1.Level.LOG);
485
+ if (action) { // action is matched
486
+ if ((_a = this.options.debugChannel) === null || _a === void 0 ? void 0 : _a.activeId) {
487
+ this.options.debugChannel.activeId(actionId);
488
+ }
489
+ repeatCount = action === lastAction ? repeatCount + 1 : 0;
490
+ if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) {
491
+ return;
492
+ }
493
+ lastAction = action;
494
+ try {
495
+ yield this.carryOutSteps(p, action.what);
496
+ usedActions.push((_b = action.id) !== null && _b !== void 0 ? _b : 'undefined');
497
+ }
498
+ catch (e) {
499
+ this.log(e, logger_1.Level.ERROR);
500
+ }
501
+ }
502
+ else {
503
+ //await this.disableAdBlocker(p);
504
+ return;
505
+ }
506
+ }
507
+ });
508
+ }
509
+ ensureScriptsLoaded(page) {
510
+ return __awaiter(this, void 0, void 0, function* () {
511
+ const isScriptLoaded = yield page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function');
512
+ if (!isScriptLoaded) {
513
+ yield page.addInitScript({ path: path_1.default.join(__dirname, 'browserSide', 'scraper.js') });
514
+ }
515
+ });
516
+ }
517
+ /**
518
+ * Spawns a browser context and runs given workflow.
519
+ * \
520
+ * Resolves after the playback is finished.
521
+ * @param {Page} [page] Page to run the workflow on.
522
+ * @param {ParamType} params Workflow specific, set of parameters
523
+ * for the `{$param: nameofparam}` fields.
524
+ */
525
+ run(page, params) {
526
+ return __awaiter(this, void 0, void 0, function* () {
527
+ if (this.stopper) {
528
+ throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.');
529
+ }
530
+ /**
531
+ * `this.workflow` with the parameters initialized.
532
+ */
533
+ this.initializedWorkflow = preprocessor_1.default.initWorkflow(this.workflow, params);
534
+ yield this.ensureScriptsLoaded(page);
535
+ this.stopper = () => {
536
+ this.stopper = null;
537
+ };
538
+ this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow));
539
+ yield this.concurrency.waitForCompletion();
540
+ this.stopper = null;
541
+ });
542
+ }
543
+ stop() {
544
+ return __awaiter(this, void 0, void 0, function* () {
545
+ if (this.stopper) {
546
+ yield this.stopper();
547
+ this.stopper = null;
548
+ }
549
+ else {
550
+ throw new Error('Cannot stop, there is no running workflow!');
551
+ }
552
+ });
553
+ }
554
+ }
555
+ exports.default = Interpreter;
@@ -0,0 +1,24 @@
1
+ import { Workflow, WorkflowFile, ParamType, SelectorArray } from './types/workflow';
2
+ /**
3
+ * Class for static processing the workflow files/objects.
4
+ */
5
+ export default class Preprocessor {
6
+ static validateWorkflow(workflow: WorkflowFile): any;
7
+ /**
8
+ * Extracts parameter names from the workflow.
9
+ * @param {WorkflowFile} workflow The given workflow
10
+ * @returns {String[]} List of parameters' names.
11
+ */
12
+ static getParams(workflow: WorkflowFile): string[];
13
+ /**
14
+ * List all the selectors used in the given workflow (only literal "selector"
15
+ * field in WHERE clauses so far)
16
+ */
17
+ static extractSelectors(workflow: Workflow): SelectorArray;
18
+ /**
19
+ * Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
20
+ * with the defined value.
21
+ * @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
22
+ */
23
+ static initWorkflow(workflow: Workflow, params?: ParamType): Workflow;
24
+ }