user-agents 1.0.1443 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.circleci/config.yml +3 -2
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
- package/src/update-data.js +109 -232
- package/src/user-agents.json.gz +0 -0
package/src/update-data.js
CHANGED
|
@@ -1,260 +1,138 @@
|
|
|
1
1
|
/* eslint-disable import/no-extraneous-dependencies */
|
|
2
|
-
import fs from
|
|
3
|
-
import { gzipSync } from
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import { gzipSync } from "zlib";
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
import
|
|
5
|
+
import * as dynamoose from "dynamoose";
|
|
6
|
+
import stableStringify from "fast-json-stable-stringify";
|
|
7
|
+
import random from "random";
|
|
8
|
+
import UAParser from "ua-parser-js";
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
const customDimensionMap = {
|
|
13
|
-
'ga:dimension1': 'appName',
|
|
14
|
-
'ga:dimension2': 'connection',
|
|
15
|
-
'ga:dimension3': 'cpuClass',
|
|
16
|
-
'ga:dimension5': 'oscpu',
|
|
17
|
-
'ga:dimension6': 'platform',
|
|
18
|
-
'ga:dimension7': 'pluginsLength',
|
|
19
|
-
'ga:dimension8': 'vendor',
|
|
20
|
-
'ga:dimension9': 'userAgent',
|
|
21
|
-
};
|
|
22
|
-
// And the special timestamp session ID that we'll use for joining data.
|
|
23
|
-
const sessionIdDimension = 'ga:dimension10';
|
|
24
|
-
|
|
25
|
-
// Standard dimensions used by Google Analytics.
|
|
26
|
-
const standardDimensionMap = {
|
|
27
|
-
'ga:browserSize': 'browserSize',
|
|
28
|
-
'ga:deviceCategory': 'deviceCategory',
|
|
29
|
-
'ga:screenResolution': 'screenResolution',
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
// These primarily help map missing data to `null`/`undefined` properly.
|
|
34
|
-
const parseCustomDimension = (value, json = false) => {
|
|
35
|
-
if (value === 'null') {
|
|
36
|
-
return null;
|
|
37
|
-
}
|
|
38
|
-
if (value === 'undefined') {
|
|
39
|
-
return undefined;
|
|
40
|
-
}
|
|
41
|
-
if (json && value) {
|
|
42
|
-
try {
|
|
43
|
-
return parseCustomDimension(JSON.parse(value));
|
|
44
|
-
} catch (error) {
|
|
45
|
-
console.error(`Error parsing "${value}" as JSON.`, error);
|
|
46
|
-
return null;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
if (typeof value === 'object' && value !== null) {
|
|
50
|
-
const parsedObject = {};
|
|
51
|
-
Object.entries(value).forEach(([key, childValue]) => {
|
|
52
|
-
parsedObject[key] = parseCustomDimension(childValue);
|
|
53
|
-
});
|
|
54
|
-
return parsedObject;
|
|
55
|
-
}
|
|
56
|
-
return value;
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
const parseStandardDimension = value => (
|
|
60
|
-
value === '(not set)' ? null : value
|
|
61
|
-
);
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const fetchAnalyticsRows = (dimensions, page = 0) => new Promise((resolve, reject) => {
|
|
65
|
-
// Fetch session data from the last 24-48 hours.
|
|
66
|
-
const maximumAgeInDays = parseInt(process.env.MAXIMUM_AGE || 1, 10);
|
|
67
|
-
const endDate = moment().format('YYYY-MM-DD');
|
|
68
|
-
const startDate = moment().subtract(maximumAgeInDays, 'days').format('YYYY-MM-DD');
|
|
69
|
-
|
|
70
|
-
// This is the maximum value allowed by the API.
|
|
71
|
-
const maxResults = 10000;
|
|
72
|
-
const startIndex = 1 + (page * maxResults);
|
|
73
|
-
|
|
74
|
-
gaApi({
|
|
75
|
-
// Credential details.
|
|
76
|
-
clientId: 'user-agents-npm-package-update.apps.googleusercontent.com',
|
|
77
|
-
email: 'user-agents-npm-package-update@user-agents-npm-package.iam.gserviceaccount.com',
|
|
78
|
-
key: 'google-analytics-credentials.json',
|
|
79
|
-
ids: 'ga:115995502',
|
|
80
|
-
// Request details.
|
|
81
|
-
endDate,
|
|
82
|
-
dimensions: dimensions.join(','),
|
|
83
|
-
maxResults,
|
|
84
|
-
metrics: 'ga:sessions',
|
|
85
|
-
sort: sessionIdDimension,
|
|
86
|
-
startDate,
|
|
87
|
-
startIndex,
|
|
88
|
-
}, (error, data) => {
|
|
89
|
-
if (error) {
|
|
90
|
-
return reject(error);
|
|
91
|
-
}
|
|
92
|
-
return resolve(data.rows);
|
|
93
|
-
}, { cacheDir: '.' });
|
|
10
|
+
const ddb = new dynamoose.aws.ddb.DynamoDB({
|
|
11
|
+
region: "us-east-2",
|
|
94
12
|
});
|
|
13
|
+
dynamoose.aws.ddb.set(ddb);
|
|
14
|
+
|
|
15
|
+
const SubmissionModel = dynamoose.model(
|
|
16
|
+
"userAgentsAnalyticsSubmissionTable",
|
|
17
|
+
new dynamoose.Schema(
|
|
18
|
+
{
|
|
19
|
+
id: {
|
|
20
|
+
type: String,
|
|
21
|
+
hashKey: true,
|
|
22
|
+
},
|
|
23
|
+
ip: String,
|
|
24
|
+
profile: Object,
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
saveUnknown: ["profile.**"],
|
|
28
|
+
timestamps: { createdAt: "timestamp", updatedAt: undefined },
|
|
29
|
+
},
|
|
30
|
+
),
|
|
31
|
+
{ create: false, update: false },
|
|
32
|
+
);
|
|
95
33
|
|
|
34
|
+
const getUserAgentTable = async (limit = 1e4) => {
|
|
35
|
+
const minimumTimestamp = Date.now() - 1 * 24 * 60 * 60 * 1000;
|
|
96
36
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const dimensionGroupCount = Math.ceil(dimensions.length / (maximumDimensionsPerRequest - 1));
|
|
104
|
-
const dimensionGroups = [];
|
|
105
|
-
for (let i = 0; i < dimensionGroupCount; i += 1) {
|
|
106
|
-
const startIndex = (maximumDimensionsPerRequest - 1) * i;
|
|
107
|
-
const endIndex = (startIndex + maximumDimensionsPerRequest) - 1;
|
|
108
|
-
dimensionGroups.push([sessionIdDimension].concat(dimensions.slice(startIndex, endIndex)));
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// Now we loop through and paginate the results, joining the dimensions by session ID as we go.
|
|
112
|
-
const sessions = {};
|
|
113
|
-
const groupCounts = {};
|
|
114
|
-
let page = 0;
|
|
115
|
-
let newRowCount;
|
|
37
|
+
// Scan through all recent profiles keeping track of the count of each.
|
|
38
|
+
let lastKey = null;
|
|
39
|
+
const countsByProfile = {};
|
|
40
|
+
let totalCount = 0;
|
|
41
|
+
let uniqueCount = 0;
|
|
42
|
+
let ipAddressAlreadySeen = {};
|
|
116
43
|
do {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
rows.forEach((row) => {
|
|
123
|
-
const sessionId = row[0];
|
|
124
|
-
groupCounts[sessionId] = (groupCounts[sessionId] || 0) + 1;
|
|
125
|
-
|
|
126
|
-
sessions[sessionId] = sessions[sessionId] || {};
|
|
127
|
-
// Exclude the session ID (first) and the session count metric (last).
|
|
128
|
-
row.slice(1, -1).forEach((value, index) => {
|
|
129
|
-
sessions[sessionId][dimensionGroup[index + 1]] = value;
|
|
130
|
-
});
|
|
131
|
-
});
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// Move on to the next page of requests if necessary.
|
|
135
|
-
page += 1;
|
|
136
|
-
} while (newRowCount > 0);
|
|
137
|
-
|
|
138
|
-
// Delete any partial data.
|
|
139
|
-
Object.keys(sessions).forEach((sessionId) => {
|
|
140
|
-
if (groupCounts[sessionId] !== dimensionGroupCount) {
|
|
141
|
-
delete sessions[sessionId];
|
|
44
|
+
const scan = SubmissionModel.scan(
|
|
45
|
+
new dynamoose.Condition().filter("timestamp").gt(minimumTimestamp),
|
|
46
|
+
);
|
|
47
|
+
if (lastKey) {
|
|
48
|
+
scan.startAt(lastKey);
|
|
142
49
|
}
|
|
143
|
-
});
|
|
144
50
|
|
|
145
|
-
|
|
146
|
-
}
|
|
51
|
+
const response = await scan.exec();
|
|
52
|
+
response.forEach(({ ip, profile }) => {
|
|
53
|
+
if (ipAddressAlreadySeen[ip]) return;
|
|
54
|
+
ipAddressAlreadySeen[ip] = true;
|
|
147
55
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
const session = {
|
|
153
|
-
timestamp: parseInt(sessionId.split('-')[0], 10),
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
Object.entries(customDimensionMap).forEach(([rawDimension, dimension]) => {
|
|
157
|
-
const json = dimension === 'connection';
|
|
158
|
-
session[dimension] = parseCustomDimension(rawSession[rawDimension], json);
|
|
159
|
-
if (dimension === 'connection' && session[dimension]) {
|
|
160
|
-
if (session[dimension].rtt) {
|
|
161
|
-
session[dimension].rtt = parseInt(session[dimension].rtt, 10);
|
|
162
|
-
}
|
|
163
|
-
if (session[dimension].downlink) {
|
|
164
|
-
session[dimension].downlink = parseFloat(session[dimension].downlink);
|
|
165
|
-
}
|
|
166
|
-
if (session[dimension].downlinkMax) {
|
|
167
|
-
session[dimension].downlinkMax = parseFloat(session[dimension].downlinkMax);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
if (dimension === 'pluginsLength') {
|
|
172
|
-
session[dimension] = parseInt(session[dimension], 10);
|
|
56
|
+
const stringifiedProfile = stableStringify(profile);
|
|
57
|
+
if (!countsByProfile[stringifiedProfile]) {
|
|
58
|
+
countsByProfile[stringifiedProfile] = 0;
|
|
59
|
+
uniqueCount += 1;
|
|
173
60
|
}
|
|
61
|
+
countsByProfile[stringifiedProfile] += 1;
|
|
62
|
+
totalCount += 1;
|
|
174
63
|
});
|
|
175
64
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if (dimension === 'browserSize' || dimension === 'screenResolution') {
|
|
179
|
-
let height = null;
|
|
180
|
-
let width = null;
|
|
181
|
-
if (/\d+x\d+/.test(value)) {
|
|
182
|
-
[width, height] = value.split('x').map(pixels => parseInt(pixels, 10));
|
|
183
|
-
}
|
|
184
|
-
const dimensionPrefix = dimension === 'browserSize' ? 'viewport' : 'screen';
|
|
185
|
-
session[`${dimensionPrefix}Height`] = height;
|
|
186
|
-
session[`${dimensionPrefix}Width`] = width;
|
|
187
|
-
} else {
|
|
188
|
-
session[dimension] = value;
|
|
189
|
-
}
|
|
190
|
-
});
|
|
65
|
+
lastKey = response.lastKey;
|
|
66
|
+
} while (lastKey);
|
|
191
67
|
|
|
192
|
-
|
|
68
|
+
// Add some noise to the counts/weights.
|
|
69
|
+
const n = () => random.normal();
|
|
70
|
+
Object.entries(countsByProfile).forEach(([stringifiedProfile, count]) => {
|
|
71
|
+
const unnormalizedWeight =
|
|
72
|
+
Array(2 * count)
|
|
73
|
+
.fill()
|
|
74
|
+
.reduce((sum) => sum + n()() ** 2, 0) / 2;
|
|
75
|
+
countsByProfile[stringifiedProfile] = unnormalizedWeight;
|
|
193
76
|
});
|
|
194
77
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
delete uniqueSessions[uniqueKey].timestamp;
|
|
78
|
+
// Accumulate the profiles and add/remove a few properties to match the historical format.
|
|
79
|
+
const profiles = [];
|
|
80
|
+
for (let stringifiedProfile in countsByProfile) {
|
|
81
|
+
if (countsByProfile.hasOwnProperty(stringifiedProfile)) {
|
|
82
|
+
const profile = JSON.parse(stringifiedProfile);
|
|
83
|
+
profile.weight = countsByProfile[stringifiedProfile];
|
|
84
|
+
delete profile.sessionId;
|
|
85
|
+
|
|
86
|
+
// Deleting these because they weren't in the old format, but we should leave them in...
|
|
87
|
+
delete profile.language;
|
|
88
|
+
delete profile.oscpu;
|
|
89
|
+
|
|
90
|
+
// Find the device category.
|
|
91
|
+
const parser = new UAParser(profile.userAgent);
|
|
92
|
+
const device = parser.getDevice();
|
|
93
|
+
// Sketchy, but I validated this on historical data and it is a 100% match.
|
|
94
|
+
profile.deviceCategory =
|
|
95
|
+
{ mobile: "mobile", tablet: "tablet", undefined: "desktop" }[
|
|
96
|
+
`${device.type}`
|
|
97
|
+
] ?? "desktop";
|
|
98
|
+
|
|
99
|
+
profiles.push(profile);
|
|
100
|
+
delete countsByProfile[stringifiedProfile];
|
|
219
101
|
}
|
|
220
|
-
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
// Normalize the weights to 1.
|
|
224
|
-
let totalWeight = 0;
|
|
102
|
+
}
|
|
225
103
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
104
|
+
// Sort by descending weight.
|
|
105
|
+
profiles.sort((a, b) => b.weight - a.weight);
|
|
106
|
+
|
|
107
|
+
// Apply the count limit and normalize the weights.
|
|
108
|
+
profiles.splice(limit);
|
|
109
|
+
const totalWeight = profiles.reduce(
|
|
110
|
+
(total, profile) => total + profile.weight,
|
|
111
|
+
0,
|
|
112
|
+
);
|
|
113
|
+
profiles.forEach((profile) => {
|
|
114
|
+
profile.weight /= totalWeight;
|
|
235
115
|
});
|
|
236
116
|
|
|
237
|
-
|
|
238
|
-
const sessionList = Object.values(uniqueSessions);
|
|
239
|
-
sessionList.sort((a, b) => b.weight - a.weight);
|
|
240
|
-
|
|
241
|
-
return sessionList;
|
|
117
|
+
return profiles;
|
|
242
118
|
};
|
|
243
119
|
|
|
244
|
-
|
|
245
120
|
if (!module.parent) {
|
|
246
121
|
const filename = process.argv[2];
|
|
247
122
|
if (!filename) {
|
|
248
|
-
throw new Error(
|
|
123
|
+
throw new Error(
|
|
124
|
+
"An output filename must be passed as an argument to the command.",
|
|
125
|
+
);
|
|
249
126
|
}
|
|
250
|
-
getUserAgentTable()
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
127
|
+
getUserAgentTable()
|
|
128
|
+
.then(async (userAgents) => {
|
|
129
|
+
const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
|
|
130
|
+
// Compress the content if the extension ends with `.gz`.
|
|
131
|
+
const content = filename.endsWith(".gz")
|
|
132
|
+
? gzipSync(stringifiedUserAgents)
|
|
133
|
+
: stringifiedUserAgents;
|
|
134
|
+
fs.writeFileSync(filename, content);
|
|
135
|
+
})
|
|
258
136
|
.catch((error) => {
|
|
259
137
|
// eslint-disable-next-line no-console
|
|
260
138
|
console.error(error);
|
|
@@ -262,5 +140,4 @@ if (!module.parent) {
|
|
|
262
140
|
});
|
|
263
141
|
}
|
|
264
142
|
|
|
265
|
-
|
|
266
143
|
export default getUserAgentTable;
|
package/src/user-agents.json.gz
CHANGED
|
Binary file
|