header-generator 1.1.3 → 1.2.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -134,6 +134,7 @@ Returns a new object that contains ordered headers.
134
134
  | Param | Type | Description |
135
135
  | --- | --- | --- |
136
136
  | browsers | <code>Array.&lt;(BrowserSpecification\|string)&gt;</code> | List of BrowserSpecifications to generate the headers for, or one of `chrome`, `firefox` and `safari`. |
137
+ | browserListQuery | <code>string</code> | Browser generation query based on the real world data. For more info see the [query docs](https://github.com/browserslist/browserslist#full-list). If `browserListQuery` is passed the `browsers` array is ignored. |
137
138
  | operatingSystems | <code>Array.&lt;string&gt;</code> | List of operating systems to generate the headers for. The options are `windows`, `macos`, `linux`, `android` and `ios`. |
138
139
  | devices | <code>Array.&lt;string&gt;</code> | List of devices to generate the headers for. Options are `desktop` and `mobile`. |
139
140
  | locales | <code>Array.&lt;string&gt;</code> | List of at most 10 languages to include in the [Accept-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language) request header in the language format accepted by that header, for example `en`, `en-US` or `de`. |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "header-generator",
3
- "version": "1.1.3",
3
+ "version": "1.2.0-beta.0",
4
4
  "description": "NodeJs package for generating browser-like headers.",
5
5
  "author": {
6
6
  "name": "Apify",
@@ -14,6 +14,7 @@
14
14
  "src"
15
15
  ],
16
16
  "dependencies": {
17
+ "browserslist": "^4.19.1",
17
18
  "generative-bayesian-network": "0.1.0-beta.1",
18
19
  "ow": "^0.23.0"
19
20
  },
@@ -0,0 +1,5 @@
1
+ exports.SUPPORTED_BROWSERS = [
2
+ 'chrome',
3
+ 'firefox',
4
+ 'safari',
5
+ ];
@@ -11,7 +11,8 @@ const headerNetworkDefinition = require('./data_files/header-network-definition.
11
11
  const inputNetworkDefinition = require('./data_files/input-network-definition.json');
12
12
  const headersOrder = require('./data_files/headers-order.json');
13
13
  const uniqueBrowserStrings = require('./data_files/browser-helper-file.json');
14
- const { getBrowser, getUserAgent } = require('./utils');
14
+ const { getBrowser, getUserAgent, getBrowsersFromQuery } = require('./utils');
15
+ const { SUPPORTED_BROWSERS } = require('./constants');
15
16
 
16
17
  const uniqueBrowsers = [];
17
18
  for (const browserString of uniqueBrowserStrings) {
@@ -104,6 +105,7 @@ const headerGeneratorOptionsShape = {
104
105
  devices: ow.optional.array.ofType(ow.string),
105
106
  locales: ow.optional.array.ofType(ow.string),
106
107
  httpVersion: ow.optional.string,
108
+ browserListQuery: ow.optional.string,
107
109
  };
108
110
 
109
111
  /**
@@ -130,10 +132,14 @@ function getOrderFromUserAgent(headers) {
130
132
  * @param {string} httpVersion - Http version to be used to generate headers (the headers differ depending on the version).
131
133
  * Either 1 or 2. If none specified the httpVersion specified in `HeaderGeneratorOptions` is used.
132
134
  */
135
+
133
136
  /**
134
137
  * @typedef HeaderGeneratorOptions
135
138
  * @param {Array<BrowserSpecification|string>} browsers - List of BrowserSpecifications to generate the headers for,
136
139
  * or one of `chrome`, `firefox` and `safari`.
140
+ * @param {string} browserListQuery - Browser generation query based on the real world data.
141
+ * For more info see the [query docs](https://github.com/browserslist/browserslist#full-list).
142
+ * If `browserListQuery` is passed the `browsers` array is ignored.
137
143
  * @param {Array<string>} operatingSystems - List of operating systems to generate the headers for.
138
144
  * The options are `windows`, `macos`, `linux`, `android` and `ios`.
139
145
  * @param {Array<string>} devices - List of devices to generate the headers for. Options are `desktop` and `mobile`.
@@ -153,30 +159,23 @@ class HeaderGenerator {
153
159
  */
154
160
  constructor(options = {}) {
155
161
  ow(options, 'HeaderGeneratorOptions', ow.object.exactShape(headerGeneratorOptionsShape));
156
- this.defaultOptions = JSON.parse(JSON.stringify(options));
157
162
  // Use a default setup when the necessary values are not provided
158
- if (!this.defaultOptions.locales) {
159
- this.defaultOptions.locales = ['en-US'];
160
- }
161
- if (!this.defaultOptions.httpVersion) {
162
- this.defaultOptions.httpVersion = '2';
163
- }
164
- if (!this.defaultOptions.browsers) {
165
- this.defaultOptions.browsers = [
166
- { name: 'chrome' },
167
- { name: 'firefox' },
168
- { name: 'safari' },
169
- ];
170
- }
171
- if (!this.defaultOptions.operatingSystems) {
172
- this.defaultOptions.operatingSystems = [
173
- 'windows',
174
- 'macos',
175
- 'linux',
176
- 'android',
177
- 'ios',
178
- ];
179
- }
163
+ const {
164
+ browsers = SUPPORTED_BROWSERS,
165
+ operatingSystems = ['windows', 'macos', 'linux', 'android', 'ios'],
166
+ devices = ['desktop'],
167
+ locales = ['en-US'],
168
+ httpVersion = '2',
169
+ browserListQuery,
170
+ } = options;
171
+ this.browserListQuery = browserListQuery;
172
+ this.globalOptions = {
173
+ browsers: this._prepareBrowsersConfig(browsers, browserListQuery, httpVersion),
174
+ operatingSystems,
175
+ devices,
176
+ locales,
177
+ httpVersion,
178
+ };
180
179
 
181
180
  this.inputGeneratorNetwork = new BayesianNetwork(inputNetworkDefinition);
182
181
  this.headerGeneratorNetwork = new BayesianNetwork(headerNetworkDefinition);
@@ -190,17 +189,8 @@ class HeaderGenerator {
190
189
  */
191
190
  getHeaders(options = {}, requestDependentHeaders = {}) {
192
191
  ow(options, 'HeaderGeneratorOptions', ow.object.exactShape(headerGeneratorOptionsShape));
193
- const headerOptions = JSON.parse(JSON.stringify({ ...this.defaultOptions, ...options }));
194
- headerOptions.browsers = headerOptions.browsers.map((browserObject) => {
195
- if (typeof browserObject === 'string') {
196
- browserObject = { name: browserObject };
197
- }
198
-
199
- if (!browserObject.httpVersion) {
200
- browserObject.httpVersion = headerOptions.httpVersion;
201
- }
202
- return browserObject;
203
- });
192
+ const headerOptions = JSON.parse(JSON.stringify({ ...this.globalOptions, ...options }));
193
+ headerOptions.browsers = this._prepareBrowsersConfig(headerOptions.browsers, headerOptions.browserListQuery, headerOptions.httpVersion);
204
194
 
205
195
  const possibleAttributeValues = {};
206
196
 
@@ -329,6 +319,23 @@ class HeaderGenerator {
329
319
 
330
320
  return orderedSample;
331
321
  }
322
+
323
+ _prepareBrowsersConfig(browsers, browserListQuery, httpVersion) {
324
+ let finalBrowsers = browsers;
325
+
326
+ if (browserListQuery) {
327
+ finalBrowsers = getBrowsersFromQuery(browserListQuery);
328
+ }
329
+
330
+ return finalBrowsers.map((browser) => {
331
+ if (typeof browser === 'string') {
332
+ return { name: browser, httpVersion };
333
+ }
334
+
335
+ browser.httpVersion = httpVersion;
336
+ return browser;
337
+ });
338
+ }
332
339
  }
333
340
 
334
341
  module.exports = HeaderGenerator;
package/src/utils.js CHANGED
@@ -1,3 +1,6 @@
1
+ const browsersList = require('browserslist');
2
+ const { SUPPORTED_BROWSERS } = require('./constants');
3
+
1
4
  const getUserAgent = (headers) => {
2
5
  let userAgent;
3
6
  for (const [header, value] of Object.entries(headers)) {
@@ -27,7 +30,57 @@ const getBrowser = (userAgent) => {
27
30
  return browser;
28
31
  };
29
32
 
33
+ const getBrowsersWithVersions = (browserList) => {
34
+ const browsersWithVersions = {};
35
+
36
+ for (const browserDefinition of browserList) {
37
+ const [browser, version] = browserDefinition.split(' ');
38
+ if (!SUPPORTED_BROWSERS.includes(browser)) {
39
+ // eslint-disable-next-line no-continue
40
+ continue;
41
+ }
42
+
43
+ if (browsersWithVersions[browser]) {
44
+ browsersWithVersions[browser].push(version);
45
+ } else {
46
+ browsersWithVersions[browser] = [version];
47
+ }
48
+ }
49
+ return browsersWithVersions;
50
+ };
51
+ const getOptimizedVersionDistribution = (browsersWithVersions) => {
52
+ const finalOptimizedBrowsers = [];
53
+
54
+ Object.entries(browsersWithVersions).forEach(([browser, versions]) => {
55
+ const sortedVersions = versions.sort((a, b) => a - b);
56
+ let lowestVersionSoFar = sortedVersions[0];
57
+
58
+ sortedVersions.forEach((version, index) => {
59
+ const nextVersion = sortedVersions[index + 1];
60
+ const isLast = index === sortedVersions.length - 1;
61
+ const isNextVersionGap = nextVersion - version > 1;
62
+
63
+ if (isNextVersionGap || isLast) {
64
+ finalOptimizedBrowsers.push({
65
+ name: browser,
66
+ minVersion: lowestVersionSoFar,
67
+ maxVersion: version,
68
+ });
69
+ lowestVersionSoFar = nextVersion;
70
+ }
71
+ });
72
+ });
73
+ return finalOptimizedBrowsers;
74
+ };
75
+
76
+ const getBrowsersFromQuery = (browserListQuery) => {
77
+ const browserList = browsersList(browserListQuery);
78
+ const browsersWithVersions = getBrowsersWithVersions(browserList);
79
+ return getOptimizedVersionDistribution(browsersWithVersions);
80
+ };
81
+
30
82
  module.exports = {
31
83
  getUserAgent,
32
84
  getBrowser,
85
+ getBrowsersFromQuery,
33
86
  };