es6-crawler-detect 4.0.0-beta.1 → 4.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/index.d.ts +5 -0
  2. package/dist/index.d.ts.map +1 -0
  3. package/dist/index.js +18 -0
  4. package/dist/index.js.map +1 -0
  5. package/dist/lib/crawler/crawlers.d.ts +5 -0
  6. package/dist/lib/crawler/crawlers.d.ts.map +1 -0
  7. package/dist/lib/crawler/crawlers.js +1400 -0
  8. package/dist/lib/crawler/crawlers.js.map +1 -0
  9. package/dist/lib/crawler/exclusions.d.ts +5 -0
  10. package/dist/lib/crawler/exclusions.d.ts.map +1 -0
  11. package/dist/lib/crawler/exclusions.js +59 -0
  12. package/dist/lib/crawler/exclusions.js.map +1 -0
  13. package/dist/lib/crawler/headers.d.ts +5 -0
  14. package/dist/lib/crawler/headers.d.ts.map +1 -0
  15. package/dist/lib/crawler/headers.js +21 -0
  16. package/dist/lib/crawler/headers.js.map +1 -0
  17. package/dist/lib/crawler/provider.d.ts +4 -0
  18. package/dist/lib/crawler/provider.d.ts.map +1 -0
  19. package/dist/lib/crawler/provider.js +3 -0
  20. package/dist/lib/crawler/provider.js.map +1 -0
  21. package/dist/lib/crawler.d.ts +21 -0
  22. package/dist/lib/crawler.d.ts.map +1 -0
  23. package/dist/lib/crawler.js +98 -0
  24. package/dist/lib/crawler.js.map +1 -0
  25. package/dist/types.d.ts +9 -0
  26. package/dist/types.d.ts.map +1 -0
  27. package/dist/types.js +3 -0
  28. package/dist/types.js.map +1 -0
  29. package/package.json +3 -2
  30. package/tsconfig.json +13 -1
  31. package/dist/main.bundle.js +0 -1
  32. package/example/node/dist/crawler.min.js +0 -1
  33. package/example/node/server.js +0 -38
  34. package/src/index.js +0 -22
  35. package/src/lib/crawler/crawlers.js +0 -1399
  36. package/src/lib/crawler/exclusions.js +0 -58
  37. package/src/lib/crawler/headers.js +0 -20
  38. package/src/lib/crawler/provider.js +0 -2
  39. package/src/lib/crawler.js +0 -98
  40. package/src/types.js +0 -2
  41. package/test/lib/crawler.spec.js +0 -149
  42. package/webpack.common.js +0 -20
  43. package/webpack.dev.js +0 -7
  44. package/webpack.prod.js +0 -14
@@ -1,58 +0,0 @@
1
- 'use strict';
2
- Object.defineProperty(exports, '__esModule', { value: true });
3
- exports.Exclusions = void 0;
4
- class Exclusions {
5
- getAll() {
6
- return [
7
- 'Safari.[\\d\\.]*',
8
- 'Firefox.[\\d\\.]*',
9
- ' Chrome.[\\d\\.]*',
10
- 'Chromium.[\\d\\.]*',
11
- 'MSIE.[\\d\\.]',
12
- 'Opera\\/[\\d\\.]*',
13
- 'Mozilla.[\\d\\.]*',
14
- 'AppleWebKit.[\\d\\.]*',
15
- 'Trident.[\\d\\.]*',
16
- 'Windows NT.[\\d\\.]*',
17
- 'Android [\\d\\.]*',
18
- 'Macintosh.',
19
- 'Ubuntu',
20
- 'Linux',
21
- '[ ]Intel',
22
- 'Mac OS X [\\d_]*',
23
- '(like )?Gecko(.[\\d\\.]*)?',
24
- 'KHTML,',
25
- 'CriOS.[\\d\\.]*',
26
- 'CPU iPhone OS ([0-9_])* like Mac OS X',
27
- 'CPU OS ([0-9_])* like Mac OS X',
28
- 'iPod',
29
- 'compatible',
30
- 'x86_..',
31
- 'i686',
32
- 'x64',
33
- 'X11',
34
- 'rv:[\\d\\.]*',
35
- 'Version.[\\d\\.]*',
36
- 'WOW64',
37
- 'Win64',
38
- 'Dalvik.[\\d\\.]*',
39
- ' \\.NET CLR [\\d\\.]*',
40
- 'Presto.[\\d\\.]*',
41
- 'Media Center PC',
42
- 'BlackBerry',
43
- 'Build',
44
- 'Opera Mini\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\/\\d{1,2}\\.',
45
- 'Opera',
46
- ' \\.NET[\\d\\.]*',
47
- 'cubot',
48
- '; M bot',
49
- '; CRONO',
50
- '; B bot',
51
- '; IDbot',
52
- '; ID bot',
53
- '; POWER BOT',
54
- 'OCTOPUS-CORE',
55
- ];
56
- }
57
- }
58
- exports.Exclusions = Exclusions;
@@ -1,20 +0,0 @@
1
- 'use strict';
2
- Object.defineProperty(exports, '__esModule', { value: true });
3
- exports.Headers = void 0;
4
- class Headers {
5
- getAll() {
6
- return [
7
- 'USER-AGENT',
8
- 'X-OPERAMINI-PHONE-UA',
9
- 'X-DEVICE-USER-AGENT',
10
- 'X-ORIGINAL-USER-AGENT',
11
- 'X-SKYFIRE-PHONE',
12
- 'X-BOLT-PHONE-UA',
13
- 'DEVICE-STOCK-UA',
14
- 'X-UCBROWSER-DEVICE-UA',
15
- 'FROM',
16
- 'X-SCANNER',
17
- ];
18
- }
19
- }
20
- exports.Headers = Headers;
@@ -1,2 +0,0 @@
1
- 'use strict';
2
- Object.defineProperty(exports, '__esModule', { value: true });
@@ -1,98 +0,0 @@
1
- 'use strict';
2
- Object.defineProperty(exports, '__esModule', { value: true });
3
- exports.Crawler = void 0;
4
- const crawlers_1 = require('./crawler/crawlers');
5
- const exclusions_1 = require('./crawler/exclusions');
6
- const headers_1 = require('./crawler/headers');
7
- class Crawler {
8
- crawlers;
9
- headers;
10
- exclusions;
11
- request;
12
- compiledRegexList;
13
- compiledExclusions;
14
- httpHeaders;
15
- userAgent;
16
- matches;
17
- constructor(request, headers, userAgent) {
18
- this.crawlers = new crawlers_1.Crawlers();
19
- this.headers = new headers_1.Headers();
20
- this.exclusions = new exclusions_1.Exclusions();
21
- this.request = request ?? {};
22
- // The regex-list must not be used with g-flag!
23
- // See: https://stackoverflow.com/questions/1520800/why-does-a-regexp-with-global-flag-give-wrong-results
24
- this.compiledRegexList = this.compileRegex(this.crawlers.getAll(), 'i');
25
- // The exclusions should be used with g-flag in order to remove each value.
26
- this.compiledExclusions = this.compileRegex(this.exclusions.getAll(), 'gi');
27
- this.httpHeaders = this.setHttpHeaders(headers);
28
- this.userAgent = this.setUserAgent(userAgent);
29
- }
30
- compileRegex(patterns, flags) {
31
- return new RegExp(patterns.join('|'), flags);
32
- }
33
- setHttpHeaders(headers) {
34
- // Use the Request headers if httpHeaders is not defined
35
- if (!headers || Object.keys(headers).length === 0) {
36
- if (Object.keys(this.request).length) {
37
- if (this.request.headers) {
38
- return this.request.headers;
39
- }
40
- }
41
- return '';
42
- }
43
- // Save the headers.
44
- return headers;
45
- }
46
- setUserAgent(userAgent) {
47
- if (!userAgent?.length) {
48
- userAgent = '';
49
- for (const header of this.getUaHttpHeaders()) {
50
- if (
51
- typeof this.httpHeaders === 'object' &&
52
- !Array.isArray(this.httpHeaders)
53
- ) {
54
- if (Object.hasOwn(this.httpHeaders, header.toLowerCase())) {
55
- const headerValue = this.httpHeaders[header.toLowerCase()];
56
- if (typeof headerValue === 'string') {
57
- const separator = userAgent.length > 0 ? ' ' : '';
58
- userAgent += separator + headerValue;
59
- } else if (Array.isArray(headerValue)) {
60
- const separator = userAgent.length > 0 ? ' ' : '';
61
- userAgent += separator + headerValue.join(' ');
62
- }
63
- }
64
- }
65
- }
66
- }
67
- return userAgent;
68
- }
69
- getUaHttpHeaders() {
70
- return this.headers.getAll();
71
- }
72
- getMatches() {
73
- if (this.matches !== undefined) {
74
- if (this.matches?.length) {
75
- return this.matches[0];
76
- }
77
- return null;
78
- }
79
- return {};
80
- }
81
- isCrawler(userAgent) {
82
- if (Buffer.byteLength(userAgent ?? '', 'utf8') > 4096) {
83
- return false;
84
- }
85
- let agent = userAgent ?? this.userAgent;
86
- // test on compiled regx
87
- agent = agent.replace(this.compiledExclusions, '');
88
- if (agent.trim().length === 0) {
89
- return false;
90
- }
91
- const matches = this.compiledRegexList.exec(agent);
92
- if (matches) {
93
- this.matches = matches;
94
- }
95
- return matches !== null && matches.length > 0;
96
- }
97
- }
98
- exports.Crawler = Crawler;
package/src/types.js DELETED
@@ -1,2 +0,0 @@
1
- 'use strict';
2
- Object.defineProperty(exports, '__esModule', { value: true });
@@ -1,149 +0,0 @@
1
- 'use strict';
2
- var __createBinding =
3
- (this && this.__createBinding) ||
4
- (Object.create
5
- ? function (o, m, k, k2) {
6
- if (k2 === undefined) k2 = k;
7
- var desc = Object.getOwnPropertyDescriptor(m, k);
8
- if (
9
- !desc ||
10
- ('get' in desc ? !m.__esModule : desc.writable || desc.configurable)
11
- ) {
12
- desc = {
13
- enumerable: true,
14
- get: function () {
15
- return m[k];
16
- },
17
- };
18
- }
19
- Object.defineProperty(o, k2, desc);
20
- }
21
- : function (o, m, k, k2) {
22
- if (k2 === undefined) k2 = k;
23
- o[k2] = m[k];
24
- });
25
- var __setModuleDefault =
26
- (this && this.__setModuleDefault) ||
27
- (Object.create
28
- ? function (o, v) {
29
- Object.defineProperty(o, 'default', { enumerable: true, value: v });
30
- }
31
- : function (o, v) {
32
- o['default'] = v;
33
- });
34
- var __importStar =
35
- (this && this.__importStar) ||
36
- function (mod) {
37
- if (mod && mod.__esModule) return mod;
38
- var result = {};
39
- if (mod != null)
40
- for (var k in mod)
41
- if (k !== 'default' && Object.prototype.hasOwnProperty.call(mod, k))
42
- __createBinding(result, mod, k);
43
- __setModuleDefault(result, mod);
44
- return result;
45
- };
46
- var __importDefault =
47
- (this && this.__importDefault) ||
48
- function (mod) {
49
- return mod && mod.__esModule ? mod : { default: mod };
50
- };
51
- Object.defineProperty(exports, '__esModule', { value: true });
52
- const fs = __importStar(require('fs'));
53
- const readline = __importStar(require('readline'));
54
- const assert_1 = __importDefault(require('assert'));
55
- const src_1 = require('../../src');
56
- let crawler = new src_1.Crawler();
57
- describe('regex-compilation', () => {
58
- it('will join list of patterns with pipes', () => {
59
- assert_1.default.strictEqual(
60
- crawler.compileRegex(['some', 'patterns']).source,
61
- 'some|patterns'
62
- );
63
- assert_1.default.strictEqual(
64
- crawler.compileRegex(['single']).source,
65
- 'single'
66
- );
67
- });
68
- it('keeps the whitespace', () => {
69
- assert_1.default.strictEqual(
70
- crawler.compileRegex([' keep-whitespaces ']).source,
71
- ' keep-whitespaces '
72
- );
73
- });
74
- it('will accept regex-flags for compilation', () => {
75
- const patterns = ['some', 'patterns'];
76
- assert_1.default.strictEqual(
77
- crawler.compileRegex(patterns, 'g').flags,
78
- 'g'
79
- );
80
- assert_1.default.strictEqual(
81
- crawler.compileRegex(patterns, 'i').flags,
82
- 'i'
83
- );
84
- });
85
- it('should be case insensitive', () => {
86
- assert_1.default.strictEqual(crawler.isCrawler('Facebot\\1.0'), true);
87
- assert_1.default.strictEqual(
88
- crawler.getMatches(),
89
- 'Facebot',
90
- 'Crawler was not able to identify crawler correctly'
91
- );
92
- });
93
- });
94
- describe('crawler-identification', () => {
95
- it('should be able to identify crawlers', async () => {
96
- const rl = readline.createInterface({
97
- input: fs.createReadStream('./test/lib/database/crawlers.txt'),
98
- crlfDelay: Infinity,
99
- });
100
- for await (const line of rl) {
101
- assert_1.default.strictEqual(
102
- crawler.isCrawler(line),
103
- true,
104
- `${line} is not a crawler`
105
- );
106
- }
107
- rl.close();
108
- });
109
- it('should be able to identify devices', async () => {
110
- const rl = readline.createInterface({
111
- input: fs.createReadStream('./test/lib/database/devices.txt'),
112
- crlfDelay: Infinity,
113
- });
114
- for await (const line of rl) {
115
- assert_1.default.strictEqual(
116
- crawler.isCrawler(line),
117
- false,
118
- `${line} is not a device`
119
- );
120
- }
121
- rl.close();
122
- });
123
- it('should identify the crawler from given headers', async () => {
124
- crawler = new src_1.Crawler(undefined, {
125
- host: '127.0.0.1:3000',
126
- 'user-agent': 'curl/7.73.0',
127
- accept: '*/*',
128
- });
129
- assert_1.default.strictEqual(crawler.isCrawler(), true);
130
- });
131
- it('should identify the crawler from request headers', async () => {
132
- crawler = new src_1.Crawler({
133
- headers: { 'user-agent': 'curl/7.73.0', accept: '*/*' },
134
- });
135
- assert_1.default.strictEqual(crawler.isCrawler(), true);
136
- });
137
- it('should identify the crawler from request headers with exact pattern', async () => {
138
- crawler = new src_1.Crawler({
139
- headers: { 'user-agent': 'b0t', accept: '*/*' },
140
- });
141
- assert_1.default.strictEqual(crawler.isCrawler(), true);
142
- });
143
- it('should not throw an exception on empty request header', async () => {
144
- crawler = new src_1.Crawler({
145
- headers: { accept: '*/*' },
146
- });
147
- assert_1.default.doesNotThrow(() => crawler.isCrawler());
148
- });
149
- });
package/webpack.common.js DELETED
@@ -1,20 +0,0 @@
1
- const path = require('path');
2
-
3
- module.exports = {
4
- context: path.resolve(__dirname, 'src'),
5
- entry: './index.js',
6
- output: {
7
- path: path.resolve(__dirname, 'dist'),
8
- filename: '[name].bundle.js',
9
- library: 'Crawler',
10
- },
11
- module: {
12
- rules: [
13
- {
14
- test: /\.js$/,
15
- use: 'babel-loader',
16
- exclude: /node_modules/,
17
- },
18
- ],
19
- },
20
- };
package/webpack.dev.js DELETED
@@ -1,7 +0,0 @@
1
- const { merge } = require('webpack-merge');
2
- const common = require('./webpack.common');
3
-
4
- module.exports = merge(common, {
5
- mode: 'development',
6
- devtool: 'inline-source-map',
7
- });
package/webpack.prod.js DELETED
@@ -1,14 +0,0 @@
1
- const { merge } = require('webpack-merge');
2
- const TerserPlugin = require('terser-webpack-plugin');
3
- const common = require('./webpack.common');
4
-
5
- module.exports = merge(common, {
6
- mode: 'production',
7
- optimization: {
8
- minimizer: [
9
- new TerserPlugin({
10
- parallel: true,
11
- }),
12
- ],
13
- },
14
- });