user-agents 1.0.1444 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.circleci/config.yml +3 -2
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +5 -4
- package/src/update-data.js +114 -231
- package/src/user-agents.json.gz +0 -0
package/src/update-data.js
CHANGED
|
@@ -1,260 +1,144 @@
|
|
|
1
1
|
/* eslint-disable import/no-extraneous-dependencies */
|
|
2
|
-
import fs from
|
|
3
|
-
import { gzipSync } from
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import { gzipSync } from "zlib";
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
import random from
|
|
5
|
+
import * as dynamoose from "dynamoose";
|
|
6
|
+
import stableStringify from "fast-json-stable-stringify";
|
|
7
|
+
import isbot from "isbot";
|
|
8
|
+
import random from "random";
|
|
9
|
+
import UAParser from "ua-parser-js";
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
const customDimensionMap = {
|
|
13
|
-
'ga:dimension1': 'appName',
|
|
14
|
-
'ga:dimension2': 'connection',
|
|
15
|
-
'ga:dimension3': 'cpuClass',
|
|
16
|
-
'ga:dimension5': 'oscpu',
|
|
17
|
-
'ga:dimension6': 'platform',
|
|
18
|
-
'ga:dimension7': 'pluginsLength',
|
|
19
|
-
'ga:dimension8': 'vendor',
|
|
20
|
-
'ga:dimension9': 'userAgent',
|
|
21
|
-
};
|
|
22
|
-
// And the special timestamp session ID that we'll use for joining data.
|
|
23
|
-
const sessionIdDimension = 'ga:dimension10';
|
|
24
|
-
|
|
25
|
-
// Standard dimensions used by Google Analytics.
|
|
26
|
-
const standardDimensionMap = {
|
|
27
|
-
'ga:browserSize': 'browserSize',
|
|
28
|
-
'ga:deviceCategory': 'deviceCategory',
|
|
29
|
-
'ga:screenResolution': 'screenResolution',
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
// These primarily help map missing data to `null`/`undefined` properly.
|
|
34
|
-
const parseCustomDimension = (value, json = false) => {
|
|
35
|
-
if (value === 'null') {
|
|
36
|
-
return null;
|
|
37
|
-
}
|
|
38
|
-
if (value === 'undefined') {
|
|
39
|
-
return undefined;
|
|
40
|
-
}
|
|
41
|
-
if (json && value) {
|
|
42
|
-
try {
|
|
43
|
-
return parseCustomDimension(JSON.parse(value));
|
|
44
|
-
} catch (error) {
|
|
45
|
-
console.error(`Error parsing "${value}" as JSON.`, error);
|
|
46
|
-
return null;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
if (typeof value === 'object' && value !== null) {
|
|
50
|
-
const parsedObject = {};
|
|
51
|
-
Object.entries(value).forEach(([key, childValue]) => {
|
|
52
|
-
parsedObject[key] = parseCustomDimension(childValue);
|
|
53
|
-
});
|
|
54
|
-
return parsedObject;
|
|
55
|
-
}
|
|
56
|
-
return value;
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
const parseStandardDimension = value => (
|
|
60
|
-
value === '(not set)' ? null : value
|
|
61
|
-
);
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const fetchAnalyticsRows = (dimensions, page = 0) => new Promise((resolve, reject) => {
|
|
65
|
-
// Fetch session data from the last 24-48 hours.
|
|
66
|
-
const maximumAgeInDays = parseInt(process.env.MAXIMUM_AGE || 1, 10);
|
|
67
|
-
const endDate = moment().format('YYYY-MM-DD');
|
|
68
|
-
const startDate = moment().subtract(maximumAgeInDays, 'days').format('YYYY-MM-DD');
|
|
69
|
-
|
|
70
|
-
// This is the maximum value allowed by the API.
|
|
71
|
-
const maxResults = 10000;
|
|
72
|
-
const startIndex = 1 + (page * maxResults);
|
|
73
|
-
|
|
74
|
-
gaApi({
|
|
75
|
-
// Credential details.
|
|
76
|
-
clientId: 'user-agents-npm-package-update.apps.googleusercontent.com',
|
|
77
|
-
email: 'user-agents-npm-package-update@user-agents-npm-package.iam.gserviceaccount.com',
|
|
78
|
-
key: 'google-analytics-credentials.json',
|
|
79
|
-
ids: 'ga:115995502',
|
|
80
|
-
// Request details.
|
|
81
|
-
endDate,
|
|
82
|
-
dimensions: dimensions.join(','),
|
|
83
|
-
maxResults,
|
|
84
|
-
metrics: 'ga:sessions',
|
|
85
|
-
sort: sessionIdDimension,
|
|
86
|
-
startDate,
|
|
87
|
-
startIndex,
|
|
88
|
-
}, (error, data) => {
|
|
89
|
-
if (error) {
|
|
90
|
-
return reject(error);
|
|
91
|
-
}
|
|
92
|
-
return resolve(data.rows);
|
|
93
|
-
}, { cacheDir: '.' });
|
|
11
|
+
const ddb = new dynamoose.aws.ddb.DynamoDB({
|
|
12
|
+
region: "us-east-2",
|
|
94
13
|
});
|
|
14
|
+
dynamoose.aws.ddb.set(ddb);
|
|
15
|
+
|
|
16
|
+
const SubmissionModel = dynamoose.model(
|
|
17
|
+
"userAgentsAnalyticsSubmissionTable",
|
|
18
|
+
new dynamoose.Schema(
|
|
19
|
+
{
|
|
20
|
+
id: {
|
|
21
|
+
type: String,
|
|
22
|
+
hashKey: true,
|
|
23
|
+
},
|
|
24
|
+
ip: String,
|
|
25
|
+
profile: Object,
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
saveUnknown: ["profile.**"],
|
|
29
|
+
timestamps: { createdAt: "timestamp", updatedAt: undefined },
|
|
30
|
+
},
|
|
31
|
+
),
|
|
32
|
+
{ create: false, update: false },
|
|
33
|
+
);
|
|
95
34
|
|
|
35
|
+
const getUserAgentTable = async (limit = 1e4) => {
|
|
36
|
+
const minimumTimestamp = Date.now() - 1 * 24 * 60 * 60 * 1000;
|
|
96
37
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const dimensionGroupCount = Math.ceil(dimensions.length / (maximumDimensionsPerRequest - 1));
|
|
104
|
-
const dimensionGroups = [];
|
|
105
|
-
for (let i = 0; i < dimensionGroupCount; i += 1) {
|
|
106
|
-
const startIndex = (maximumDimensionsPerRequest - 1) * i;
|
|
107
|
-
const endIndex = (startIndex + maximumDimensionsPerRequest) - 1;
|
|
108
|
-
dimensionGroups.push([sessionIdDimension].concat(dimensions.slice(startIndex, endIndex)));
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// Now we loop through and paginate the results, joining the dimensions by session ID as we go.
|
|
112
|
-
const sessions = {};
|
|
113
|
-
const groupCounts = {};
|
|
114
|
-
let page = 0;
|
|
115
|
-
let newRowCount;
|
|
38
|
+
// Scan through all recent profiles keeping track of the count of each.
|
|
39
|
+
let lastKey = null;
|
|
40
|
+
const countsByProfile = {};
|
|
41
|
+
let totalCount = 0;
|
|
42
|
+
let uniqueCount = 0;
|
|
43
|
+
let ipAddressAlreadySeen = {};
|
|
116
44
|
do {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
rows.forEach((row) => {
|
|
123
|
-
const sessionId = row[0];
|
|
124
|
-
groupCounts[sessionId] = (groupCounts[sessionId] || 0) + 1;
|
|
125
|
-
|
|
126
|
-
sessions[sessionId] = sessions[sessionId] || {};
|
|
127
|
-
// Exclude the session ID (first) and the session count metric (last).
|
|
128
|
-
row.slice(1, -1).forEach((value, index) => {
|
|
129
|
-
sessions[sessionId][dimensionGroup[index + 1]] = value;
|
|
130
|
-
});
|
|
131
|
-
});
|
|
45
|
+
const scan = SubmissionModel.scan(
|
|
46
|
+
new dynamoose.Condition().filter("timestamp").gt(minimumTimestamp),
|
|
47
|
+
);
|
|
48
|
+
if (lastKey) {
|
|
49
|
+
scan.startAt(lastKey);
|
|
132
50
|
}
|
|
133
51
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
52
|
+
const response = await scan.exec();
|
|
53
|
+
response.forEach(({ ip, profile }) => {
|
|
54
|
+
// Only count one profile per IP address.
|
|
55
|
+
if (ipAddressAlreadySeen[ip]) return;
|
|
56
|
+
ipAddressAlreadySeen[ip] = true;
|
|
137
57
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if (groupCounts[sessionId] !== dimensionGroupCount) {
|
|
141
|
-
delete sessions[sessionId];
|
|
142
|
-
}
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
return sessions;
|
|
146
|
-
};
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
const parseSessions = (rawSessions) => {
|
|
150
|
-
const sessions = {};
|
|
151
|
-
Object.entries(rawSessions).forEach(([sessionId, rawSession]) => {
|
|
152
|
-
const session = {
|
|
153
|
-
timestamp: parseInt(sessionId.split('-')[0], 10),
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
Object.entries(customDimensionMap).forEach(([rawDimension, dimension]) => {
|
|
157
|
-
const json = dimension === 'connection';
|
|
158
|
-
session[dimension] = parseCustomDimension(rawSession[rawDimension], json);
|
|
159
|
-
if (dimension === 'connection' && session[dimension]) {
|
|
160
|
-
if (session[dimension].rtt) {
|
|
161
|
-
session[dimension].rtt = parseInt(session[dimension].rtt, 10);
|
|
162
|
-
}
|
|
163
|
-
if (session[dimension].downlink) {
|
|
164
|
-
session[dimension].downlink = parseFloat(session[dimension].downlink);
|
|
165
|
-
}
|
|
166
|
-
if (session[dimension].downlinkMax) {
|
|
167
|
-
session[dimension].downlinkMax = parseFloat(session[dimension].downlinkMax);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
58
|
+
// Filter out bots like Googlebot and YandexBot.
|
|
59
|
+
if (isbot(profile.userAgent)) return;
|
|
170
60
|
|
|
171
|
-
|
|
172
|
-
|
|
61
|
+
// Track the counts for this exact profile.
|
|
62
|
+
const stringifiedProfile = stableStringify(profile);
|
|
63
|
+
if (!countsByProfile[stringifiedProfile]) {
|
|
64
|
+
countsByProfile[stringifiedProfile] = 0;
|
|
65
|
+
uniqueCount += 1;
|
|
173
66
|
}
|
|
67
|
+
countsByProfile[stringifiedProfile] += 1;
|
|
68
|
+
totalCount += 1;
|
|
174
69
|
});
|
|
175
70
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
if (dimension === 'browserSize' || dimension === 'screenResolution') {
|
|
179
|
-
let height = null;
|
|
180
|
-
let width = null;
|
|
181
|
-
if (/\d+x\d+/.test(value)) {
|
|
182
|
-
[width, height] = value.split('x').map(pixels => parseInt(pixels, 10));
|
|
183
|
-
}
|
|
184
|
-
const dimensionPrefix = dimension === 'browserSize' ? 'viewport' : 'screen';
|
|
185
|
-
session[`${dimensionPrefix}Height`] = height;
|
|
186
|
-
session[`${dimensionPrefix}Width`] = width;
|
|
187
|
-
} else {
|
|
188
|
-
session[dimension] = value;
|
|
189
|
-
}
|
|
190
|
-
});
|
|
71
|
+
lastKey = response.lastKey;
|
|
72
|
+
} while (lastKey);
|
|
191
73
|
|
|
192
|
-
|
|
74
|
+
// Add some noise to the counts/weights.
|
|
75
|
+
const n = () => random.normal();
|
|
76
|
+
Object.entries(countsByProfile).forEach(([stringifiedProfile, count]) => {
|
|
77
|
+
const unnormalizedWeight =
|
|
78
|
+
Array(2 * count)
|
|
79
|
+
.fill()
|
|
80
|
+
.reduce((sum) => sum + n()() ** 2, 0) / 2;
|
|
81
|
+
countsByProfile[stringifiedProfile] = unnormalizedWeight;
|
|
193
82
|
});
|
|
194
83
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
delete uniqueSessions[uniqueKey].timestamp;
|
|
84
|
+
// Accumulate the profiles and add/remove a few properties to match the historical format.
|
|
85
|
+
const profiles = [];
|
|
86
|
+
for (let stringifiedProfile in countsByProfile) {
|
|
87
|
+
if (countsByProfile.hasOwnProperty(stringifiedProfile)) {
|
|
88
|
+
const profile = JSON.parse(stringifiedProfile);
|
|
89
|
+
profile.weight = countsByProfile[stringifiedProfile];
|
|
90
|
+
delete profile.sessionId;
|
|
91
|
+
|
|
92
|
+
// Deleting these because they weren't in the old format, but we should leave them in...
|
|
93
|
+
delete profile.language;
|
|
94
|
+
delete profile.oscpu;
|
|
95
|
+
|
|
96
|
+
// Find the device category.
|
|
97
|
+
const parser = new UAParser(profile.userAgent);
|
|
98
|
+
const device = parser.getDevice();
|
|
99
|
+
// Sketchy, but I validated this on historical data and it is a 100% match.
|
|
100
|
+
profile.deviceCategory =
|
|
101
|
+
{ mobile: "mobile", tablet: "tablet", undefined: "desktop" }[
|
|
102
|
+
`${device.type}`
|
|
103
|
+
] ?? "desktop";
|
|
104
|
+
|
|
105
|
+
profiles.push(profile);
|
|
106
|
+
delete countsByProfile[stringifiedProfile];
|
|
219
107
|
}
|
|
220
|
-
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
// Normalize the weights to 1.
|
|
224
|
-
let totalWeight = 0;
|
|
108
|
+
}
|
|
225
109
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
110
|
+
// Sort by descending weight.
|
|
111
|
+
profiles.sort((a, b) => b.weight - a.weight);
|
|
112
|
+
|
|
113
|
+
// Apply the count limit and normalize the weights.
|
|
114
|
+
profiles.splice(limit);
|
|
115
|
+
const totalWeight = profiles.reduce(
|
|
116
|
+
(total, profile) => total + profile.weight,
|
|
117
|
+
0,
|
|
118
|
+
);
|
|
119
|
+
profiles.forEach((profile) => {
|
|
120
|
+
profile.weight /= totalWeight;
|
|
235
121
|
});
|
|
236
122
|
|
|
237
|
-
|
|
238
|
-
const sessionList = Object.values(uniqueSessions);
|
|
239
|
-
sessionList.sort((a, b) => b.weight - a.weight);
|
|
240
|
-
|
|
241
|
-
return sessionList;
|
|
123
|
+
return profiles;
|
|
242
124
|
};
|
|
243
125
|
|
|
244
|
-
|
|
245
126
|
if (!module.parent) {
|
|
246
127
|
const filename = process.argv[2];
|
|
247
128
|
if (!filename) {
|
|
248
|
-
throw new Error(
|
|
129
|
+
throw new Error(
|
|
130
|
+
"An output filename must be passed as an argument to the command.",
|
|
131
|
+
);
|
|
249
132
|
}
|
|
250
|
-
getUserAgentTable()
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
133
|
+
getUserAgentTable()
|
|
134
|
+
.then(async (userAgents) => {
|
|
135
|
+
const stringifiedUserAgents = JSON.stringify(userAgents, null, 2);
|
|
136
|
+
// Compress the content if the extension ends with `.gz`.
|
|
137
|
+
const content = filename.endsWith(".gz")
|
|
138
|
+
? gzipSync(stringifiedUserAgents)
|
|
139
|
+
: stringifiedUserAgents;
|
|
140
|
+
fs.writeFileSync(filename, content);
|
|
141
|
+
})
|
|
258
142
|
.catch((error) => {
|
|
259
143
|
// eslint-disable-next-line no-console
|
|
260
144
|
console.error(error);
|
|
@@ -262,5 +146,4 @@ if (!module.parent) {
|
|
|
262
146
|
});
|
|
263
147
|
}
|
|
264
148
|
|
|
265
|
-
|
|
266
149
|
export default getUserAgentTable;
|
package/src/user-agents.json.gz
CHANGED
|
Binary file
|