user-agents 1.0.1444 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,260 +1,144 @@
1
1
  /* eslint-disable import/no-extraneous-dependencies */
2
- import fs from 'fs';
3
- import { gzipSync } from 'zlib';
2
+ import fs from "fs";
3
+ import { gzipSync } from "zlib";
4
4
 
5
- import jsonStableStringify from 'json-stable-stringify';
6
- import gaApi from 'ga-api';
7
- import moment from 'moment';
8
- import random from 'random';
5
+ import * as dynamoose from "dynamoose";
6
+ import stableStringify from "fast-json-stable-stringify";
7
+ import isbot from "isbot";
8
+ import random from "random";
9
+ import UAParser from "ua-parser-js";
9
10
 
10
-
11
- // Custom dimensions, see: https://intoli.com/blog/user-agents/
12
- const customDimensionMap = {
13
- 'ga:dimension1': 'appName',
14
- 'ga:dimension2': 'connection',
15
- 'ga:dimension3': 'cpuClass',
16
- 'ga:dimension5': 'oscpu',
17
- 'ga:dimension6': 'platform',
18
- 'ga:dimension7': 'pluginsLength',
19
- 'ga:dimension8': 'vendor',
20
- 'ga:dimension9': 'userAgent',
21
- };
22
- // And the special timestamp session ID that we'll use for joining data.
23
- const sessionIdDimension = 'ga:dimension10';
24
-
25
- // Standard dimensions used by Google Analytics.
26
- const standardDimensionMap = {
27
- 'ga:browserSize': 'browserSize',
28
- 'ga:deviceCategory': 'deviceCategory',
29
- 'ga:screenResolution': 'screenResolution',
30
- };
31
-
32
-
33
- // These primarily help map missing data to `null`/`undefined` properly.
34
- const parseCustomDimension = (value, json = false) => {
35
- if (value === 'null') {
36
- return null;
37
- }
38
- if (value === 'undefined') {
39
- return undefined;
40
- }
41
- if (json && value) {
42
- try {
43
- return parseCustomDimension(JSON.parse(value));
44
- } catch (error) {
45
- console.error(`Error parsing "${value}" as JSON.`, error);
46
- return null;
47
- }
48
- }
49
- if (typeof value === 'object' && value !== null) {
50
- const parsedObject = {};
51
- Object.entries(value).forEach(([key, childValue]) => {
52
- parsedObject[key] = parseCustomDimension(childValue);
53
- });
54
- return parsedObject;
55
- }
56
- return value;
57
- };
58
-
59
- const parseStandardDimension = value => (
60
- value === '(not set)' ? null : value
61
- );
62
-
63
-
64
- const fetchAnalyticsRows = (dimensions, page = 0) => new Promise((resolve, reject) => {
65
- // Fetch session data from the last 24-48 hours.
66
- const maximumAgeInDays = parseInt(process.env.MAXIMUM_AGE || 1, 10);
67
- const endDate = moment().format('YYYY-MM-DD');
68
- const startDate = moment().subtract(maximumAgeInDays, 'days').format('YYYY-MM-DD');
69
-
70
- // This is the maximum value allowed by the API.
71
- const maxResults = 10000;
72
- const startIndex = 1 + (page * maxResults);
73
-
74
- gaApi({
75
- // Credential details.
76
- clientId: 'user-agents-npm-package-update.apps.googleusercontent.com',
77
- email: 'user-agents-npm-package-update@user-agents-npm-package.iam.gserviceaccount.com',
78
- key: 'google-analytics-credentials.json',
79
- ids: 'ga:115995502',
80
- // Request details.
81
- endDate,
82
- dimensions: dimensions.join(','),
83
- maxResults,
84
- metrics: 'ga:sessions',
85
- sort: sessionIdDimension,
86
- startDate,
87
- startIndex,
88
- }, (error, data) => {
89
- if (error) {
90
- return reject(error);
91
- }
92
- return resolve(data.rows);
93
- }, { cacheDir: '.' });
11
+ const ddb = new dynamoose.aws.ddb.DynamoDB({
12
+ region: "us-east-2",
94
13
  });
14
+ dynamoose.aws.ddb.set(ddb);
15
+
16
+ const SubmissionModel = dynamoose.model(
17
+ "userAgentsAnalyticsSubmissionTable",
18
+ new dynamoose.Schema(
19
+ {
20
+ id: {
21
+ type: String,
22
+ hashKey: true,
23
+ },
24
+ ip: String,
25
+ profile: Object,
26
+ },
27
+ {
28
+ saveUnknown: ["profile.**"],
29
+ timestamps: { createdAt: "timestamp", updatedAt: undefined },
30
+ },
31
+ ),
32
+ { create: false, update: false },
33
+ );
95
34
 
35
+ const getUserAgentTable = async (limit = 1e4) => {
36
+ const minimumTimestamp = Date.now() - 1 * 24 * 60 * 60 * 1000;
96
37
 
97
- const getRawSessions = async () => {
98
- // We can request a maximum of 7 dimensions at once, so we need to break these up into groups
99
- // of 6 + 1 (the 1 being the session ID). We can then join these together into higher dimensional
100
- // objects based on the common session IDs.
101
- const maximumDimensionsPerRequest = 7;
102
- const dimensions = Object.keys(customDimensionMap).concat(Object.keys(standardDimensionMap));
103
- const dimensionGroupCount = Math.ceil(dimensions.length / (maximumDimensionsPerRequest - 1));
104
- const dimensionGroups = [];
105
- for (let i = 0; i < dimensionGroupCount; i += 1) {
106
- const startIndex = (maximumDimensionsPerRequest - 1) * i;
107
- const endIndex = (startIndex + maximumDimensionsPerRequest) - 1;
108
- dimensionGroups.push([sessionIdDimension].concat(dimensions.slice(startIndex, endIndex)));
109
- }
110
-
111
- // Now we loop through and paginate the results, joining the dimensions by session ID as we go.
112
- const sessions = {};
113
- const groupCounts = {};
114
- let page = 0;
115
- let newRowCount;
38
+ // Scan through all recent profiles keeping track of the count of each.
39
+ let lastKey = null;
40
+ const countsByProfile = {};
41
+ let totalCount = 0;
42
+ let uniqueCount = 0;
43
+ let ipAddressAlreadySeen = {};
116
44
  do {
117
- newRowCount = 0;
118
- for (let groupIndex = 0; groupIndex < dimensionGroupCount; groupIndex += 1) {
119
- const dimensionGroup = dimensionGroups[groupIndex];
120
- const rows = (await fetchAnalyticsRows(dimensionGroup, page)) || [];
121
- newRowCount = Math.max(newRowCount, rows.length);
122
- rows.forEach((row) => {
123
- const sessionId = row[0];
124
- groupCounts[sessionId] = (groupCounts[sessionId] || 0) + 1;
125
-
126
- sessions[sessionId] = sessions[sessionId] || {};
127
- // Exclude the session ID (first) and the session count metric (last).
128
- row.slice(1, -1).forEach((value, index) => {
129
- sessions[sessionId][dimensionGroup[index + 1]] = value;
130
- });
131
- });
45
+ const scan = SubmissionModel.scan(
46
+ new dynamoose.Condition().filter("timestamp").gt(minimumTimestamp),
47
+ );
48
+ if (lastKey) {
49
+ scan.startAt(lastKey);
132
50
  }
133
51
 
134
- // Move on to the next page of requests if necessary.
135
- page += 1;
136
- } while (newRowCount > 0);
52
+ const response = await scan.exec();
53
+ response.forEach(({ ip, profile }) => {
54
+ // Only count one profile per IP address.
55
+ if (ipAddressAlreadySeen[ip]) return;
56
+ ipAddressAlreadySeen[ip] = true;
137
57
 
138
- // Delete any partial data.
139
- Object.keys(sessions).forEach((sessionId) => {
140
- if (groupCounts[sessionId] !== dimensionGroupCount) {
141
- delete sessions[sessionId];
142
- }
143
- });
144
-
145
- return sessions;
146
- };
147
-
148
-
149
- const parseSessions = (rawSessions) => {
150
- const sessions = {};
151
- Object.entries(rawSessions).forEach(([sessionId, rawSession]) => {
152
- const session = {
153
- timestamp: parseInt(sessionId.split('-')[0], 10),
154
- };
155
-
156
- Object.entries(customDimensionMap).forEach(([rawDimension, dimension]) => {
157
- const json = dimension === 'connection';
158
- session[dimension] = parseCustomDimension(rawSession[rawDimension], json);
159
- if (dimension === 'connection' && session[dimension]) {
160
- if (session[dimension].rtt) {
161
- session[dimension].rtt = parseInt(session[dimension].rtt, 10);
162
- }
163
- if (session[dimension].downlink) {
164
- session[dimension].downlink = parseFloat(session[dimension].downlink);
165
- }
166
- if (session[dimension].downlinkMax) {
167
- session[dimension].downlinkMax = parseFloat(session[dimension].downlinkMax);
168
- }
169
- }
58
+ // Filter out bots like Googlebot and YandexBot.
59
+ if (isbot(profile.userAgent)) return;
170
60
 
171
- if (dimension === 'pluginsLength') {
172
- session[dimension] = parseInt(session[dimension], 10);
61
+ // Track the counts for this exact profile.
62
+ const stringifiedProfile = stableStringify(profile);
63
+ if (!countsByProfile[stringifiedProfile]) {
64
+ countsByProfile[stringifiedProfile] = 0;
65
+ uniqueCount += 1;
173
66
  }
67
+ countsByProfile[stringifiedProfile] += 1;
68
+ totalCount += 1;
174
69
  });
175
70
 
176
- Object.entries(standardDimensionMap).forEach(([rawDimension, dimension]) => {
177
- const value = parseStandardDimension(rawSession[rawDimension]);
178
- if (dimension === 'browserSize' || dimension === 'screenResolution') {
179
- let height = null;
180
- let width = null;
181
- if (/\d+x\d+/.test(value)) {
182
- [width, height] = value.split('x').map(pixels => parseInt(pixels, 10));
183
- }
184
- const dimensionPrefix = dimension === 'browserSize' ? 'viewport' : 'screen';
185
- session[`${dimensionPrefix}Height`] = height;
186
- session[`${dimensionPrefix}Width`] = width;
187
- } else {
188
- session[dimension] = value;
189
- }
190
- });
71
+ lastKey = response.lastKey;
72
+ } while (lastKey);
191
73
 
192
- sessions[sessionId] = session;
74
+ // Add some noise to the counts/weights.
75
+ const n = () => random.normal();
76
+ Object.entries(countsByProfile).forEach(([stringifiedProfile, count]) => {
77
+ const unnormalizedWeight =
78
+ Array(2 * count)
79
+ .fill()
80
+ .reduce((sum) => sum + n()() ** 2, 0) / 2;
81
+ countsByProfile[stringifiedProfile] = unnormalizedWeight;
193
82
  });
194
83
 
195
- return sessions;
196
- };
197
-
198
-
199
- const getUserAgentTable = async () => {
200
- // Fetch the sessions and process them into parsed objects.
201
- const rawSessions = await getRawSessions();
202
- const sessions = parseSessions(rawSessions);
203
-
204
- // Calculate the number of unique occurrences of each fingerprint.
205
- const uniqueSessions = {};
206
- Object.values(sessions).forEach((session) => {
207
- // Exclude headless browser user agents.
208
- if (/headless/i.test(session.userAgent)) {
209
- return;
210
- }
211
-
212
- const uniqueKey = jsonStableStringify(session);
213
- if (!uniqueSessions[uniqueKey]) {
214
- uniqueSessions[uniqueKey] = {
215
- ...session,
216
- weight: 0,
217
- };
218
- delete uniqueSessions[uniqueKey].timestamp;
84
+ // Accumulate the profiles and add/remove a few properties to match the historical format.
85
+ const profiles = [];
86
+ for (let stringifiedProfile in countsByProfile) {
87
+ if (countsByProfile.hasOwnProperty(stringifiedProfile)) {
88
+ const profile = JSON.parse(stringifiedProfile);
89
+ profile.weight = countsByProfile[stringifiedProfile];
90
+ delete profile.sessionId;
91
+
92
+ // Deleting these because they weren't in the old format, but we should leave them in...
93
+ delete profile.language;
94
+ delete profile.oscpu;
95
+
96
+ // Find the device category.
97
+ const parser = new UAParser(profile.userAgent);
98
+ const device = parser.getDevice();
99
+ // Sketchy, but I validated this on historical data and it is a 100% match.
100
+ profile.deviceCategory =
101
+ { mobile: "mobile", tablet: "tablet", undefined: "desktop" }[
102
+ `${device.type}`
103
+ ] ?? "desktop";
104
+
105
+ profiles.push(profile);
106
+ delete countsByProfile[stringifiedProfile];
219
107
  }
220
- uniqueSessions[uniqueKey].weight += 1;
221
- });
222
-
223
- // Normalize the weights to 1.
224
- let totalWeight = 0;
108
+ }
225
109
 
226
- const n = () => random.normal();
227
- Object.values(uniqueSessions).forEach((session) => {
228
- // eslint-disable-next-line no-param-reassign
229
- session.weight = Array(2 * session.weight).fill().reduce(sum => sum + (n()() ** 2), 0) / 2;
230
- totalWeight += session.weight;
231
- });
232
- Object.values(uniqueSessions).forEach((session) => {
233
- // eslint-disable-next-line no-param-reassign
234
- session.weight /= totalWeight;
110
+ // Sort by descending weight.
111
+ profiles.sort((a, b) => b.weight - a.weight);
112
+
113
+ // Apply the count limit and normalize the weights.
114
+ profiles.splice(limit);
115
+ const totalWeight = profiles.reduce(
116
+ (total, profile) => total + profile.weight,
117
+ 0,
118
+ );
119
+ profiles.forEach((profile) => {
120
+ profile.weight /= totalWeight;
235
121
  });
236
122
 
237
- // Sort them by descreasing weight.
238
- const sessionList = Object.values(uniqueSessions);
239
- sessionList.sort((a, b) => b.weight - a.weight);
240
-
241
- return sessionList;
123
+ return profiles;
242
124
  };
243
125
 
244
-
245
126
  if (!module.parent) {
246
127
  const filename = process.argv[2];
247
128
  if (!filename) {
248
- throw new Error('An output filename must be passed as an argument to the command.');
129
+ throw new Error(
130
+ "An output filename must be passed as an argument to the command.",
131
+ );
249
132
  }
250
- getUserAgentTable().then(async (userAgents) => {
251
- const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
252
- // Compress the content if the extension ends with `.gz`.
253
- const content = filename.endsWith('.gz')
254
- ? gzipSync(stringifiedUserAgents)
255
- : stringifiedUserAgents;
256
- fs.writeFileSync(filename, content);
257
- })
133
+ getUserAgentTable()
134
+ .then(async (userAgents) => {
135
+ const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
136
+ // Compress the content if the extension ends with `.gz`.
137
+ const content = filename.endsWith(".gz")
138
+ ? gzipSync(stringifiedUserAgents)
139
+ : stringifiedUserAgents;
140
+ fs.writeFileSync(filename, content);
141
+ })
258
142
  .catch((error) => {
259
143
  // eslint-disable-next-line no-console
260
144
  console.error(error);
@@ -262,5 +146,4 @@ if (!module.parent) {
262
146
  });
263
147
  }
264
148
 
265
-
266
149
  export default getUserAgentTable;
Binary file