wappalyzer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/js/driver.js ADDED
@@ -0,0 +1,46 @@
1
+ var w = wappalyzer;
2
+
3
+ w.driver = {
4
+ debug: false,
5
+ data: {},
6
+ timeout: 5000,
7
+
8
+ /**
9
+ * Log messages to console
10
+ */
11
+ log: function(args) {
12
+ if ( w.driver.debug ) { print(args.type + ': ' + args.message + "\n"); }
13
+ },
14
+
15
+ /**
16
+ * Initialize
17
+ */
18
+ init: function() {
19
+ var app, apps = {};
20
+
21
+ w.analyze(w.driver.data.host, w.driver.data.url, {
22
+ html: w.driver.data.html,
23
+ headers: w.driver.data.headers
24
+ });
25
+
26
+ for ( app in w.detected[w.driver.data.url] ) {
27
+ apps[app] = {
28
+ categories: [],
29
+ confidence: w.detected[w.driver.data.url][app].confidenceTotal,
30
+ version: w.detected[w.driver.data.url][app].version
31
+ };
32
+
33
+ w.apps[app].cats.forEach(function(cat) {
34
+ apps[app].categories.push(w.categories[cat]);
35
+ });
36
+ };
37
+
38
+ return JSON.stringify(apps);
39
+ },
40
+
41
+ /**
42
+ * Dummy
43
+ */
44
+ displayApps: function() {
45
+ }
46
+ };
@@ -0,0 +1,530 @@
1
+ /**
2
+ * Wappalyzer v2
3
+ *
4
+ * Created by Elbert Alias <elbert@alias.io>
5
+ *
6
+ * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
+ */
8
+
9
+ var wappalyzer = (function() {
10
+ //'use strict';
11
+
12
+ /**
13
+ * Application class
14
+ */
15
+ var Application = function(app, detected) {
16
+ this.app = app;
17
+ this.confidence = {};
18
+ this.confidenceTotal = 0;
19
+ this.detected = Boolean(detected);
20
+ this.excludes = [];
21
+ this.version = '';
22
+ this.versions = [];
23
+ };
24
+
25
+ Application.prototype = {
26
+ /**
27
+ * Calculate confidence total
28
+ */
29
+ getConfidence: function() {
30
+ var total = 0, id;
31
+
32
+ for ( id in this.confidence ) {
33
+ total += this.confidence[id];
34
+ }
35
+
36
+ return this.confidenceTotal = Math.min(total, 100);
37
+ },
38
+
39
+ /**
40
+ * Resolve version number (find the longest version number that contains all shorter detected version numbers)
41
+ */
42
+ getVersion: function() {
43
+ var i, resolved;
44
+
45
+ if ( !this.versions.length ) {
46
+ return;
47
+ }
48
+
49
+ this.versions.sort(function(a, b) {
50
+ return a.length - b.length;
51
+ });
52
+
53
+ resolved = this.versions[0];
54
+
55
+ for ( i = 1; i < this.versions.length; i++ ) {
56
+ if ( this.versions[i].indexOf(resolved) === -1 ) {
57
+ break;
58
+ }
59
+
60
+ resolved = this.versions[i];
61
+ }
62
+
63
+ return this.version = resolved;
64
+ },
65
+
66
+ setDetected: function(pattern, type, value, key) {
67
+ this.detected = true;
68
+
69
+ // Set confidence level
70
+ this.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence ? pattern.confidence : 100;
71
+
72
+ // Detect version number
73
+ if ( pattern.version ) {
74
+ var
75
+ version = pattern.version,
76
+ matches = pattern.regex.exec(value);
77
+
78
+ if ( matches ) {
79
+ matches.forEach(function(match, i) {
80
+ // Parse ternary operator
81
+ var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
82
+
83
+ if ( ternary && ternary.length === 3 ) {
84
+ w.log({ match: match, i: i, ternary: ternary });
85
+
86
+ version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
87
+
88
+ w.log({ version: version });
89
+ }
90
+
91
+ // Replace back references
92
+ version = version.replace(new RegExp('\\\\' + i, 'g'), match ? match : '');
93
+ });
94
+
95
+ if ( version && this.versions.indexOf(version) < 0 ) {
96
+ this.versions.push(version);
97
+ }
98
+
99
+ this.getVersion();
100
+ }
101
+ }
102
+ }
103
+ };
104
+
105
+ var Profiler = function() {
106
+ this.regexCount = 0;
107
+ this.startTime = new Date().getTime();
108
+ this.lastTime = new Date().getTime();
109
+ this.slowest = { duration: null, app: '', type: '', pattern: '' };
110
+ this.timedOut = false;
111
+ };
112
+
113
+ Profiler.prototype = {
114
+ checkPoint: function(app, type, regex) {
115
+ var duration = new Date().getTime() - this.lastTime;
116
+
117
+ if ( !this.slowest.duration || duration > this.slowest.duration ) {
118
+ this.slowest.duration = duration;
119
+ this.slowest.app = app;
120
+ this.slowest.type = type;
121
+ this.slowest.regex = regex;
122
+ }
123
+
124
+ this.regexCount++;
125
+
126
+ this.lastTime = new Date().getTime();
127
+
128
+ this.timedOut = this.lastTime - this.startTime > 1000;
129
+ }
130
+ };
131
+
132
+ /**
133
+ * Call driver functions
134
+ */
135
+ var driver = function(func, args) {
136
+ if ( typeof w.driver[func] !== 'function' ) {
137
+ w.log('not implemented: w.driver.' + func, 'warn');
138
+
139
+ return;
140
+ }
141
+
142
+ if ( func !== 'log' ) {
143
+ w.log('w.driver.' + func);
144
+ }
145
+
146
+ return w.driver[func](args);
147
+ };
148
+
149
+ /**
150
+ * Parse apps.json patterns
151
+ */
152
+ var parse = function(patterns) {
153
+ var
154
+ attrs,
155
+ parsed = [];
156
+
157
+ // Convert single patterns to an array
158
+ if ( typeof patterns === 'string' ) {
159
+ patterns = [ patterns ];
160
+ }
161
+
162
+ patterns.forEach(function(pattern) {
163
+ attrs = {};
164
+
165
+ pattern.split('\\;').forEach(function(attr, i) {
166
+ if ( i ) {
167
+ // Key value pairs
168
+ attr = attr.split(':');
169
+
170
+ if ( attr.length > 1 ) {
171
+ attrs[attr.shift()] = attr.join(':');
172
+ }
173
+ } else {
174
+ attrs.string = attr;
175
+
176
+ try {
177
+ attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
178
+ } catch (e) {
179
+ attrs.regex = new RegExp();
180
+
181
+ w.log(e + ': ' + attr, 'error');
182
+ }
183
+ }
184
+ });
185
+
186
+ parsed.push(attrs);
187
+ });
188
+
189
+ return parsed;
190
+ };
191
+
192
+ /**
193
+ * Main script
194
+ */
195
+ var w = {
196
+ apps: {},
197
+ cats: null,
198
+ ping: { hostnames: {} },
199
+ adCache: [],
200
+ detected: {},
201
+
202
+ config: {
203
+ websiteURL: 'https://wappalyzer.com/',
204
+ twitterURL: 'https://twitter.com/Wappalyzer',
205
+ githubURL: 'https://github.com/AliasIO/Wappalyzer',
206
+ },
207
+
208
+ /**
209
+ * Log messages to console
210
+ */
211
+ log: function(message, type) {
212
+ if ( type === undefined ) {
213
+ type = 'debug';
214
+ }
215
+
216
+ if ( typeof message === 'object' ) {
217
+ message = JSON.stringify(message);
218
+ }
219
+
220
+ driver('log', { message: '[wappalyzer ' + type + '] ' + message, type: type });
221
+ },
222
+
223
+ /**
224
+ * Initialize
225
+ */
226
+ init: function() {
227
+ w.log('w.init');
228
+
229
+ // Checks
230
+ if ( w.driver === undefined ) {
231
+ w.log('no driver, exiting');
232
+
233
+ return;
234
+ }
235
+
236
+ // Initialize driver
237
+ driver('init');
238
+ },
239
+
240
+ /**
241
+ * Analyze the request
242
+ */
243
+ analyze: function(hostname, url, data) {
244
+ var
245
+ i, j, app, confidence, type, regexMeta, regexScript, match, content, meta, header, version, id,
246
+ profiler = new Profiler(),
247
+ apps = {},
248
+ excludes = [],
249
+ checkImplies = true;
250
+
251
+ w.log('w.analyze');
252
+
253
+ // Remove hash from URL
254
+ data.url = url = url.split('#')[0];
255
+
256
+ if ( w.apps === undefined || w.categories === undefined ) {
257
+ w.log('apps.json not loaded, check for syntax errors');
258
+
259
+ return;
260
+ }
261
+
262
+ if ( w.detected[url] === undefined ) {
263
+ w.detected[url] = {};
264
+ }
265
+
266
+ for ( app in w.apps ) {
267
+ // Exit loop after one second to prevent CPU hogging
268
+ // Remaining patterns will not be evaluated
269
+ if ( profiler.timedOut ) {
270
+ w.log('Timeout, exiting loop');
271
+
272
+ break;
273
+ }
274
+
275
+ apps[app] = w.detected[url] && w.detected[url][app] ? w.detected[url][app] : new Application(app);
276
+
277
+ for ( type in w.apps[app] ) {
278
+ switch ( type ) {
279
+ case 'url':
280
+ parse(w.apps[app][type]).forEach(function(pattern) {
281
+ if ( pattern.regex.test(url) ) {
282
+ apps[app].setDetected(pattern, type, url);
283
+ }
284
+
285
+ profiler.checkPoint(app, type, pattern.regex);
286
+ });
287
+
288
+ break;
289
+ case 'html':
290
+ if ( typeof data[type] !== 'string' || !data.html ) {
291
+ break;
292
+ }
293
+
294
+ parse(w.apps[app][type]).forEach(function(pattern) {
295
+ if ( pattern.regex.test(data[type]) ) {
296
+ apps[app].setDetected(pattern, type, data[type]);
297
+ }
298
+
299
+ profiler.checkPoint(app, type, pattern.regex);
300
+ });
301
+
302
+ break;
303
+ case 'script':
304
+ if ( typeof data.html !== 'string' || !data.html ) {
305
+ break;
306
+ }
307
+
308
+ regexScript = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig');
309
+
310
+ parse(w.apps[app][type]).forEach(function(pattern) {
311
+ while ( match = regexScript.exec(data.html) ) {
312
+ if ( pattern.regex.test(match[2]) ) {
313
+ apps[app].setDetected(pattern, type, match[2]);
314
+ }
315
+ }
316
+
317
+ profiler.checkPoint(app, type, pattern.regex);
318
+ });
319
+
320
+ break;
321
+ case 'meta':
322
+ if ( typeof data.html !== 'string' || !data.html ) {
323
+ break;
324
+ }
325
+
326
+ regexMeta = /<meta[^>]+>/ig;
327
+
328
+ while ( match = regexMeta.exec(data.html) ) {
329
+ for ( meta in w.apps[app][type] ) {
330
+ profiler.checkPoint(app, type, regexMeta);
331
+
332
+ if ( new RegExp('name=["\']' + meta + '["\']', 'i').test(match) ) {
333
+ content = match.toString().match(/content=("|')([^"']+)("|')/i);
334
+
335
+ parse(w.apps[app].meta[meta]).forEach(function(pattern) {
336
+ if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
337
+ apps[app].setDetected(pattern, type, content[2], meta);
338
+ }
339
+
340
+ profiler.checkPoint(app, type, pattern.regex);
341
+ });
342
+ }
343
+ }
344
+ }
345
+
346
+ break;
347
+ case 'headers':
348
+ if ( typeof data[type] !== 'object' || !data[type] ) {
349
+ break;
350
+ }
351
+
352
+ for ( header in w.apps[app].headers ) {
353
+ parse(w.apps[app][type][header]).forEach(function(pattern) {
354
+ if ( typeof data[type][header.toLowerCase()] === 'string' && pattern.regex.test(data[type][header.toLowerCase()]) ) {
355
+ apps[app].setDetected(pattern, type, data[type][header.toLowerCase()], header);
356
+ }
357
+
358
+ profiler.checkPoint(app, type, pattern.regex);
359
+ });
360
+ }
361
+
362
+ break;
363
+ case 'env':
364
+ if ( typeof data[type] !== 'object' || !data[type] ) {
365
+ break;
366
+ }
367
+
368
+ parse(w.apps[app][type]).forEach(function(pattern) {
369
+ for ( i in data[type] ) {
370
+
371
+ if ( pattern.regex.test(data[type][i]) ) {
372
+ apps[app].setDetected(pattern, type, data[type][i]);
373
+ }
374
+ }
375
+
376
+ profiler.checkPoint(app, type, pattern.regex);
377
+ });
378
+
379
+ break;
380
+ }
381
+ }
382
+ }
383
+
384
+ w.log('[ profiler ] Tested ' + profiler.regexCount + ' regular expressions in ' + ( (new Date().getTime() - profiler.startTime) / 1000 ) + 's');
385
+ w.log('[ profiler ] Slowest pattern took ' + ( profiler.slowest.duration / 1000 ) + 's: ' + profiler.slowest.app + ' | ' + profiler.slowest.type + ' | ' + profiler.slowest.regex);
386
+
387
+ for ( app in apps ) {
388
+ if ( !apps[app].detected ) {
389
+ delete apps[app];
390
+ }
391
+ }
392
+
393
+ // Exclude app in detected apps only
394
+ for ( app in apps ) {
395
+ if (w.apps[app].excludes ) {
396
+ if ( typeof w.apps[app].excludes === 'string' ) {
397
+ w.apps[app].excludes = [ w.apps[app].excludes ];
398
+ }
399
+
400
+ w.apps[app].excludes.forEach(function(excluded) {
401
+ excludes.push(excluded);
402
+ });
403
+ }
404
+ }
405
+
406
+ // Remove excluded applications
407
+ for ( app in apps ) {
408
+ if ( excludes.indexOf(app) !== -1 ) {
409
+ delete apps[app];
410
+ }
411
+ }
412
+
413
+ // Implied applications
414
+ // Run several passes as implied apps may imply other apps
415
+ while ( checkImplies ) {
416
+ checkImplies = false;
417
+
418
+ for ( app in apps ) {
419
+ confidence = apps[app].confidence;
420
+
421
+ if ( w.apps[app] && w.apps[app].implies ) {
422
+ // Cast strings to an array
423
+ if ( typeof w.apps[app].implies === 'string' ) {
424
+ w.apps[app].implies = [ w.apps[app].implies ];
425
+ }
426
+
427
+ w.apps[app].implies.forEach(function(implied) {
428
+ implied = parse(implied)[0];
429
+
430
+ if ( !w.apps[implied.string] ) {
431
+ w.log('Implied application ' + implied.string + ' does not exist', 'warn');
432
+
433
+ return;
434
+ }
435
+
436
+ if ( !apps.hasOwnProperty(implied.string) ) {
437
+ apps[implied.string] = w.detected[url] && w.detected[url][implied.string] ? w.detected[url][implied.string] : new Application(implied.string, true);
438
+
439
+ checkImplies = true;
440
+ }
441
+
442
+ // Apply app confidence to implied app
443
+ for ( id in confidence ) {
444
+ apps[implied.string].confidence[id + ' implied by ' + app] = confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
445
+ }
446
+ });
447
+ }
448
+ }
449
+ }
450
+
451
+ w.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url);
452
+
453
+ // Keep history of detected apps
454
+ for ( app in apps ) {
455
+ confidence = apps[app].confidence;
456
+ version = apps[app].version;
457
+
458
+ // Per URL
459
+ w.detected[url][app] = apps[app];
460
+
461
+ for ( id in confidence ) {
462
+ w.detected[url][app].confidence[id] = confidence[id];
463
+ }
464
+
465
+ if ( w.detected[url][app].getConfidence() >= 100 ) {
466
+ // Per hostname
467
+ if ( /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/.test(hostname) && !/((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/.test(url) ) {
468
+ if ( !w.ping.hostnames.hasOwnProperty(hostname) ) {
469
+ w.ping.hostnames[hostname] = { applications: {}, meta: {} };
470
+ }
471
+
472
+ if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) {
473
+ w.ping.hostnames[hostname].applications[app] = { hits: 0 };
474
+ }
475
+
476
+ w.ping.hostnames[hostname].applications[app].hits ++;
477
+
478
+ if ( version ) {
479
+ w.ping.hostnames[hostname].applications[app].version = version;
480
+ }
481
+ } else {
482
+ w.log('Ignoring hostname "' + hostname + '"');
483
+ }
484
+ }
485
+ }
486
+
487
+ // Additional information
488
+ if ( w.ping.hostnames.hasOwnProperty(hostname) ) {
489
+ if ( typeof data.html === 'string' && data.html ) {
490
+ match = data.html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
491
+
492
+ if ( match && match.length ) {
493
+ w.ping.hostnames[hostname].meta['language'] = match[1];
494
+ }
495
+
496
+ regexMeta = /<meta[^>]+>/ig;
497
+
498
+ while ( match = regexMeta.exec(data.html) ) {
499
+ if ( !match.length ) {
500
+ continue;
501
+ }
502
+
503
+ match = match[0].match(/name="(author|copyright|country|description|keywords)"[^>]*content="([^"]+)"/i);
504
+
505
+ if ( match && match.length === 3 ) {
506
+ w.ping.hostnames[hostname].meta[match[1]] = match[2];
507
+ }
508
+ }
509
+ }
510
+ }
511
+
512
+ if ( Object.keys(w.ping.hostnames).length >= 20 || w.adCache.length >= 40 ) {
513
+ driver('ping');
514
+ }
515
+
516
+ apps = null;
517
+ data = null;
518
+
519
+ driver('displayApps');
520
+ }
521
+ };
522
+
523
+ return w;
524
+ })();
525
+
526
+ // CommonJS package
527
+ // See http://wiki.commonjs.org/wiki/CommonJS
528
+ if ( typeof exports === 'object' ) {
529
+ exports.wappalyzer = wappalyzer;
530
+ }