user-agents 1.0.1443 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,260 +1,138 @@
1
1
  /* eslint-disable import/no-extraneous-dependencies */
2
- import fs from 'fs';
3
- import { gzipSync } from 'zlib';
2
+ import fs from "fs";
3
+ import { gzipSync } from "zlib";
4
4
 
5
- import jsonStableStringify from 'json-stable-stringify';
6
- import gaApi from 'ga-api';
7
- import moment from 'moment';
8
- import random from 'random';
5
+ import * as dynamoose from "dynamoose";
6
+ import stableStringify from "fast-json-stable-stringify";
7
+ import random from "random";
8
+ import UAParser from "ua-parser-js";
9
9
 
10
-
11
- // Custom dimensions, see: https://intoli.com/blog/user-agents/
12
- const customDimensionMap = {
13
- 'ga:dimension1': 'appName',
14
- 'ga:dimension2': 'connection',
15
- 'ga:dimension3': 'cpuClass',
16
- 'ga:dimension5': 'oscpu',
17
- 'ga:dimension6': 'platform',
18
- 'ga:dimension7': 'pluginsLength',
19
- 'ga:dimension8': 'vendor',
20
- 'ga:dimension9': 'userAgent',
21
- };
22
- // And the special timestamp session ID that we'll use for joining data.
23
- const sessionIdDimension = 'ga:dimension10';
24
-
25
- // Standard dimensions used by Google Analytics.
26
- const standardDimensionMap = {
27
- 'ga:browserSize': 'browserSize',
28
- 'ga:deviceCategory': 'deviceCategory',
29
- 'ga:screenResolution': 'screenResolution',
30
- };
31
-
32
-
33
- // These primarily help map missing data to `null`/`undefined` properly.
34
- const parseCustomDimension = (value, json = false) => {
35
- if (value === 'null') {
36
- return null;
37
- }
38
- if (value === 'undefined') {
39
- return undefined;
40
- }
41
- if (json && value) {
42
- try {
43
- return parseCustomDimension(JSON.parse(value));
44
- } catch (error) {
45
- console.error(`Error parsing "${value}" as JSON.`, error);
46
- return null;
47
- }
48
- }
49
- if (typeof value === 'object' && value !== null) {
50
- const parsedObject = {};
51
- Object.entries(value).forEach(([key, childValue]) => {
52
- parsedObject[key] = parseCustomDimension(childValue);
53
- });
54
- return parsedObject;
55
- }
56
- return value;
57
- };
58
-
59
- const parseStandardDimension = value => (
60
- value === '(not set)' ? null : value
61
- );
62
-
63
-
64
- const fetchAnalyticsRows = (dimensions, page = 0) => new Promise((resolve, reject) => {
65
- // Fetch session data from the last 24-48 hours.
66
- const maximumAgeInDays = parseInt(process.env.MAXIMUM_AGE || 1, 10);
67
- const endDate = moment().format('YYYY-MM-DD');
68
- const startDate = moment().subtract(maximumAgeInDays, 'days').format('YYYY-MM-DD');
69
-
70
- // This is the maximum value allowed by the API.
71
- const maxResults = 10000;
72
- const startIndex = 1 + (page * maxResults);
73
-
74
- gaApi({
75
- // Credential details.
76
- clientId: 'user-agents-npm-package-update.apps.googleusercontent.com',
77
- email: 'user-agents-npm-package-update@user-agents-npm-package.iam.gserviceaccount.com',
78
- key: 'google-analytics-credentials.json',
79
- ids: 'ga:115995502',
80
- // Request details.
81
- endDate,
82
- dimensions: dimensions.join(','),
83
- maxResults,
84
- metrics: 'ga:sessions',
85
- sort: sessionIdDimension,
86
- startDate,
87
- startIndex,
88
- }, (error, data) => {
89
- if (error) {
90
- return reject(error);
91
- }
92
- return resolve(data.rows);
93
- }, { cacheDir: '.' });
10
+ const ddb = new dynamoose.aws.ddb.DynamoDB({
11
+ region: "us-east-2",
94
12
  });
13
+ dynamoose.aws.ddb.set(ddb);
14
+
15
+ const SubmissionModel = dynamoose.model(
16
+ "userAgentsAnalyticsSubmissionTable",
17
+ new dynamoose.Schema(
18
+ {
19
+ id: {
20
+ type: String,
21
+ hashKey: true,
22
+ },
23
+ ip: String,
24
+ profile: Object,
25
+ },
26
+ {
27
+ saveUnknown: ["profile.**"],
28
+ timestamps: { createdAt: "timestamp", updatedAt: undefined },
29
+ },
30
+ ),
31
+ { create: false, update: false },
32
+ );
95
33
 
34
+ const getUserAgentTable = async (limit = 1e4) => {
35
+ const minimumTimestamp = Date.now() - 1 * 24 * 60 * 60 * 1000;
96
36
 
97
- const getRawSessions = async () => {
98
- // We can request a maximum of 7 dimensions at once, so we need to break these up into groups
99
- // of 6 + 1 (the 1 being the session ID). We can then join these together into higher dimensional
100
- // objects based on the common session IDs.
101
- const maximumDimensionsPerRequest = 7;
102
- const dimensions = Object.keys(customDimensionMap).concat(Object.keys(standardDimensionMap));
103
- const dimensionGroupCount = Math.ceil(dimensions.length / (maximumDimensionsPerRequest - 1));
104
- const dimensionGroups = [];
105
- for (let i = 0; i < dimensionGroupCount; i += 1) {
106
- const startIndex = (maximumDimensionsPerRequest - 1) * i;
107
- const endIndex = (startIndex + maximumDimensionsPerRequest) - 1;
108
- dimensionGroups.push([sessionIdDimension].concat(dimensions.slice(startIndex, endIndex)));
109
- }
110
-
111
- // Now we loop through and paginate the results, joining the dimensions by session ID as we go.
112
- const sessions = {};
113
- const groupCounts = {};
114
- let page = 0;
115
- let newRowCount;
37
+ // Scan through all recent profiles keeping track of the count of each.
38
+ let lastKey = null;
39
+ const countsByProfile = {};
40
+ let totalCount = 0;
41
+ let uniqueCount = 0;
42
+ let ipAddressAlreadySeen = {};
116
43
  do {
117
- newRowCount = 0;
118
- for (let groupIndex = 0; groupIndex < dimensionGroupCount; groupIndex += 1) {
119
- const dimensionGroup = dimensionGroups[groupIndex];
120
- const rows = (await fetchAnalyticsRows(dimensionGroup, page)) || [];
121
- newRowCount = Math.max(newRowCount, rows.length);
122
- rows.forEach((row) => {
123
- const sessionId = row[0];
124
- groupCounts[sessionId] = (groupCounts[sessionId] || 0) + 1;
125
-
126
- sessions[sessionId] = sessions[sessionId] || {};
127
- // Exclude the session ID (first) and the session count metric (last).
128
- row.slice(1, -1).forEach((value, index) => {
129
- sessions[sessionId][dimensionGroup[index + 1]] = value;
130
- });
131
- });
132
- }
133
-
134
- // Move on to the next page of requests if necessary.
135
- page += 1;
136
- } while (newRowCount > 0);
137
-
138
- // Delete any partial data.
139
- Object.keys(sessions).forEach((sessionId) => {
140
- if (groupCounts[sessionId] !== dimensionGroupCount) {
141
- delete sessions[sessionId];
44
+ const scan = SubmissionModel.scan(
45
+ new dynamoose.Condition().filter("timestamp").gt(minimumTimestamp),
46
+ );
47
+ if (lastKey) {
48
+ scan.startAt(lastKey);
142
49
  }
143
- });
144
50
 
145
- return sessions;
146
- };
51
+ const response = await scan.exec();
52
+ response.forEach(({ ip, profile }) => {
53
+ if (ipAddressAlreadySeen[ip]) return;
54
+ ipAddressAlreadySeen[ip] = true;
147
55
 
148
-
149
- const parseSessions = (rawSessions) => {
150
- const sessions = {};
151
- Object.entries(rawSessions).forEach(([sessionId, rawSession]) => {
152
- const session = {
153
- timestamp: parseInt(sessionId.split('-')[0], 10),
154
- };
155
-
156
- Object.entries(customDimensionMap).forEach(([rawDimension, dimension]) => {
157
- const json = dimension === 'connection';
158
- session[dimension] = parseCustomDimension(rawSession[rawDimension], json);
159
- if (dimension === 'connection' && session[dimension]) {
160
- if (session[dimension].rtt) {
161
- session[dimension].rtt = parseInt(session[dimension].rtt, 10);
162
- }
163
- if (session[dimension].downlink) {
164
- session[dimension].downlink = parseFloat(session[dimension].downlink);
165
- }
166
- if (session[dimension].downlinkMax) {
167
- session[dimension].downlinkMax = parseFloat(session[dimension].downlinkMax);
168
- }
169
- }
170
-
171
- if (dimension === 'pluginsLength') {
172
- session[dimension] = parseInt(session[dimension], 10);
56
+ const stringifiedProfile = stableStringify(profile);
57
+ if (!countsByProfile[stringifiedProfile]) {
58
+ countsByProfile[stringifiedProfile] = 0;
59
+ uniqueCount += 1;
173
60
  }
61
+ countsByProfile[stringifiedProfile] += 1;
62
+ totalCount += 1;
174
63
  });
175
64
 
176
- Object.entries(standardDimensionMap).forEach(([rawDimension, dimension]) => {
177
- const value = parseStandardDimension(rawSession[rawDimension]);
178
- if (dimension === 'browserSize' || dimension === 'screenResolution') {
179
- let height = null;
180
- let width = null;
181
- if (/\d+x\d+/.test(value)) {
182
- [width, height] = value.split('x').map(pixels => parseInt(pixels, 10));
183
- }
184
- const dimensionPrefix = dimension === 'browserSize' ? 'viewport' : 'screen';
185
- session[`${dimensionPrefix}Height`] = height;
186
- session[`${dimensionPrefix}Width`] = width;
187
- } else {
188
- session[dimension] = value;
189
- }
190
- });
65
+ lastKey = response.lastKey;
66
+ } while (lastKey);
191
67
 
192
- sessions[sessionId] = session;
68
+ // Add some noise to the counts/weights.
69
+ const n = () => random.normal();
70
+ Object.entries(countsByProfile).forEach(([stringifiedProfile, count]) => {
71
+ const unnormalizedWeight =
72
+ Array(2 * count)
73
+ .fill()
74
+ .reduce((sum) => sum + n()() ** 2, 0) / 2;
75
+ countsByProfile[stringifiedProfile] = unnormalizedWeight;
193
76
  });
194
77
 
195
- return sessions;
196
- };
197
-
198
-
199
- const getUserAgentTable = async () => {
200
- // Fetch the sessions and process them into parsed objects.
201
- const rawSessions = await getRawSessions();
202
- const sessions = parseSessions(rawSessions);
203
-
204
- // Calculate the number of unique occurrences of each fingerprint.
205
- const uniqueSessions = {};
206
- Object.values(sessions).forEach((session) => {
207
- // Exclude headless browser user agents.
208
- if (/headless/i.test(session.userAgent)) {
209
- return;
210
- }
211
-
212
- const uniqueKey = jsonStableStringify(session);
213
- if (!uniqueSessions[uniqueKey]) {
214
- uniqueSessions[uniqueKey] = {
215
- ...session,
216
- weight: 0,
217
- };
218
- delete uniqueSessions[uniqueKey].timestamp;
78
+ // Accumulate the profiles and add/remove a few properties to match the historical format.
79
+ const profiles = [];
80
+ for (let stringifiedProfile in countsByProfile) {
81
+ if (countsByProfile.hasOwnProperty(stringifiedProfile)) {
82
+ const profile = JSON.parse(stringifiedProfile);
83
+ profile.weight = countsByProfile[stringifiedProfile];
84
+ delete profile.sessionId;
85
+
86
+ // Deleting these because they weren't in the old format, but we should leave them in...
87
+ delete profile.language;
88
+ delete profile.oscpu;
89
+
90
+ // Find the device category.
91
+ const parser = new UAParser(profile.userAgent);
92
+ const device = parser.getDevice();
93
+ // Sketchy, but I validated this on historical data and it is a 100% match.
94
+ profile.deviceCategory =
95
+ { mobile: "mobile", tablet: "tablet", undefined: "desktop" }[
96
+ `${device.type}`
97
+ ] ?? "desktop";
98
+
99
+ profiles.push(profile);
100
+ delete countsByProfile[stringifiedProfile];
219
101
  }
220
- uniqueSessions[uniqueKey].weight += 1;
221
- });
222
-
223
- // Normalize the weights to 1.
224
- let totalWeight = 0;
102
+ }
225
103
 
226
- const n = () => random.normal();
227
- Object.values(uniqueSessions).forEach((session) => {
228
- // eslint-disable-next-line no-param-reassign
229
- session.weight = Array(2 * session.weight).fill().reduce(sum => sum + (n()() ** 2), 0) / 2;
230
- totalWeight += session.weight;
231
- });
232
- Object.values(uniqueSessions).forEach((session) => {
233
- // eslint-disable-next-line no-param-reassign
234
- session.weight /= totalWeight;
104
+ // Sort by descending weight.
105
+ profiles.sort((a, b) => b.weight - a.weight);
106
+
107
+ // Apply the count limit and normalize the weights.
108
+ profiles.splice(limit);
109
+ const totalWeight = profiles.reduce(
110
+ (total, profile) => total + profile.weight,
111
+ 0,
112
+ );
113
+ profiles.forEach((profile) => {
114
+ profile.weight /= totalWeight;
235
115
  });
236
116
 
237
- // Sort them by descreasing weight.
238
- const sessionList = Object.values(uniqueSessions);
239
- sessionList.sort((a, b) => b.weight - a.weight);
240
-
241
- return sessionList;
117
+ return profiles;
242
118
  };
243
119
 
244
-
245
120
  if (!module.parent) {
246
121
  const filename = process.argv[2];
247
122
  if (!filename) {
248
- throw new Error('An output filename must be passed as an argument to the command.');
123
+ throw new Error(
124
+ "An output filename must be passed as an argument to the command.",
125
+ );
249
126
  }
250
- getUserAgentTable().then(async (userAgents) => {
251
- const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
252
- // Compress the content if the extension ends with `.gz`.
253
- const content = filename.endsWith('.gz')
254
- ? gzipSync(stringifiedUserAgents)
255
- : stringifiedUserAgents;
256
- fs.writeFileSync(filename, content);
257
- })
127
+ getUserAgentTable()
128
+ .then(async (userAgents) => {
129
+ const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
130
+ // Compress the content if the extension ends with `.gz`.
131
+ const content = filename.endsWith(".gz")
132
+ ? gzipSync(stringifiedUserAgents)
133
+ : stringifiedUserAgents;
134
+ fs.writeFileSync(filename, content);
135
+ })
258
136
  .catch((error) => {
259
137
  // eslint-disable-next-line no-console
260
138
  console.error(error);
@@ -262,5 +140,4 @@ if (!module.parent) {
262
140
  });
263
141
  }
264
142
 
265
-
266
143
  export default getUserAgentTable;
Binary file