maxun-core 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  import { Page } from 'playwright';
3
3
  import { EventEmitter } from 'events';
4
4
  import { WorkflowFile, ParamType } from './types/workflow';
5
+ import { ProxyConfig } from './proxy';
5
6
  /**
6
7
  * Defines optional intepreter options (passed in constructor)
7
8
  */
@@ -15,6 +16,8 @@ interface InterpreterOptions {
15
16
  activeId: Function;
16
17
  debugMessage: Function;
17
18
  }>;
19
+ proxy?: ProxyConfig | null;
20
+ onProxyError?: (error: Error, proxy: ProxyConfig) => Promise<ProxyConfig | null>;
18
21
  }
19
22
  /**
20
23
  * Class for running the Smart Workflows.
@@ -27,7 +30,13 @@ export default class Interpreter extends EventEmitter {
27
30
  private stopper;
28
31
  private log;
29
32
  private blocker;
33
+ private browser;
34
+ private contexts;
35
+ private currentProxy;
30
36
  constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>);
37
+ updateProxy(proxyConfig: ProxyConfig | null): void;
38
+ private createProxyContext;
39
+ private createProxyPage;
31
40
  private applyAdBlocker;
32
41
  private disableAdBlocker;
33
42
  /**
@@ -53,9 +53,16 @@ class Interpreter extends events_1.EventEmitter {
53
53
  super();
54
54
  this.stopper = null;
55
55
  this.blocker = null;
56
+ this.browser = null;
57
+ this.contexts = [];
58
+ this.currentProxy = null;
56
59
  this.workflow = workflow.workflow;
57
60
  this.initializedWorkflow = null;
58
- this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => { (0, logger_1.default)(JSON.stringify(data), logger_1.Level.WARN); }, binaryCallback: () => { (0, logger_1.default)('Received binary data, thrashing them.', logger_1.Level.WARN); }, debug: false, debugChannel: {} }, options);
61
+ this.options = Object.assign({ maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => { (0, logger_1.default)(JSON.stringify(data), logger_1.Level.WARN); }, binaryCallback: () => { (0, logger_1.default)('Received binary data, thrashing them.', logger_1.Level.WARN); }, debug: false, debugChannel: {}, proxy: null, onProxyError: (error, proxy) => __awaiter(this, void 0, void 0, function* () {
62
+ this.log(`Proxy error: ${error.message}`, logger_1.Level.ERROR);
63
+ return null;
64
+ }) }, options);
65
+ this.currentProxy = this.options.proxy;
59
66
  this.concurrency = new concurrency_1.default(this.options.maxConcurrency);
60
67
  this.log = (...args) => (0, logger_1.default)(...args);
61
68
  const error = preprocessor_1.default.validateWorkflow(workflow);
@@ -78,6 +85,42 @@ class Interpreter extends events_1.EventEmitter {
78
85
  this.log(`Failed to initialize ad-blocker:`, logger_1.Level.ERROR);
79
86
  });
80
87
  }
88
+ updateProxy(proxyConfig) {
89
+ this.currentProxy = proxyConfig;
90
+ this.log(`Proxy configuration updated`, logger_1.Level.LOG);
91
+ }
92
+ createProxyContext(browser) {
93
+ return __awaiter(this, void 0, void 0, function* () {
94
+ if (!this.currentProxy) {
95
+ return browser.newContext();
96
+ }
97
+ try {
98
+ const context = yield browser.newContext({
99
+ proxy: this.currentProxy
100
+ });
101
+ this.contexts.push(context);
102
+ return context;
103
+ }
104
+ catch (error) {
105
+ if (this.options.onProxyError) {
106
+ const newProxy = yield this.options.onProxyError(error, this.currentProxy);
107
+ if (newProxy) {
108
+ this.currentProxy = newProxy;
109
+ return this.createProxyContext(browser);
110
+ }
111
+ }
112
+ throw error;
113
+ }
114
+ });
115
+ }
116
+ // create a new page with proxy
117
+ createProxyPage(context) {
118
+ return __awaiter(this, void 0, void 0, function* () {
119
+ const page = yield context.newPage();
120
+ yield page.setViewportSize({ width: 900, height: 400 });
121
+ return page;
122
+ });
123
+ }
81
124
  applyAdBlocker(page) {
82
125
  return __awaiter(this, void 0, void 0, function* () {
83
126
  if (this.blocker) {
@@ -246,12 +289,12 @@ class Interpreter extends events_1.EventEmitter {
246
289
  .evaluateAll(
247
290
  // @ts-ignore
248
291
  (elements) => elements.map((a) => a.href).filter((x) => x));
249
- const context = page.context();
292
+ const context = yield this.createProxyContext(page.context().browser());
250
293
  for (const link of links) {
251
294
  // eslint-disable-next-line
252
295
  this.concurrency.addJob(() => __awaiter(this, void 0, void 0, function* () {
253
296
  try {
254
- const newPage = yield context.newPage();
297
+ const newPage = yield this.createProxyPage(context);
255
298
  yield newPage.setViewportSize({ width: 900, height: 400 });
256
299
  yield newPage.goto(link);
257
300
  yield newPage.waitForLoadState('networkidle');
@@ -531,12 +574,20 @@ class Interpreter extends events_1.EventEmitter {
531
574
  * `this.workflow` with the parameters initialized.
532
575
  */
533
576
  this.initializedWorkflow = preprocessor_1.default.initWorkflow(this.workflow, params);
534
- yield this.ensureScriptsLoaded(page);
577
+ // Create a new context with proxy configuration
578
+ const context = yield this.createProxyContext(page.context().browser());
579
+ // Create a new page with proxy
580
+ const proxyPage = yield this.createProxyPage(context);
581
+ // Copy over the current page's URL and state
582
+ yield proxyPage.goto(page.url());
583
+ yield this.ensureScriptsLoaded(proxyPage);
535
584
  this.stopper = () => {
536
585
  this.stopper = null;
537
586
  };
538
- this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow));
587
+ this.concurrency.addJob(() => this.runLoop(proxyPage, this.initializedWorkflow));
539
588
  yield this.concurrency.waitForCompletion();
589
+ yield Promise.all(this.contexts.map(ctx => ctx.close()));
590
+ this.contexts = [];
540
591
  this.stopper = null;
541
592
  });
542
593
  }
@@ -0,0 +1,5 @@
1
+ export interface ProxyConfig {
2
+ server: string;
3
+ username?: string;
4
+ password?: string;
5
+ }
package/build/proxy.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "maxun-core",
3
- "version": "0.0.1",
3
+ "version": "0.0.2",
4
4
  "description": "Core package for Maxun, responsible for data extraction",
5
5
  "main": "build/index.js",
6
6
  "typings": "build/index.d.ts",