@redthreadlabs/tracelog 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,12 +11,22 @@ const os = require('os');
11
11
  const path = require('path');
12
12
  const zlib = require('zlib');
13
13
  const { createGzip } = require('zlib');
14
- const { pipeline } = require('stream');
14
+ const { pipeline, Transform, Writable } = require('stream');
15
+ const { StringDecoder } = require('string_decoder');
15
16
  const {
16
17
  S3Client,
17
18
  PutObjectCommand,
18
19
  DeleteObjectCommand,
19
20
  } = require('@aws-sdk/client-s3');
21
+ // The S3 key layout, the sidecar shape, and the histogram-deriving
22
+ // MetaAccumulator are the shared contract — owned by tracelog-schema so the
23
+ // agent (writer) and the viewer (reader) can never drift.
24
+ const {
25
+ buildKey,
26
+ normalizeHost,
27
+ MetaAccumulator,
28
+ sidecarKey,
29
+ } = require('@redthreadlabs/tracelog-schema');
20
30
 
21
31
  // S3 key layout is FIXED (not configurable): it is the contract between
22
32
  // tracelog and the in-browser log viewer, which scans the bucket with
@@ -38,6 +48,22 @@ const {
38
48
  // > 0, then the literal 'current' for the live file. A host that died
39
49
  // mid-interval leaves its final '_current' upload in place, interval intact.
40
50
 
51
+ // Every log object gets a tiny JSON sidecar at `<logkey>.meta.json` carrying
52
+ // the facts the gzipped body hides: uncompressed size, record count, and an
53
+ // hourly interval×kind histogram of the records inside. The histogram matters
54
+ // because buffered remote clients (tracelog-client) can land records from a
55
+ // past day in today's file — so a file's nominal interval is a filing label,
56
+ // not a truthful description of its contents. The viewer reads these into its
57
+ // size ledger for deterministic memory/cache accounting and factual rollups,
58
+ // and falls back to estimation for files written before sidecars existed.
59
+ function _safeSize(p) {
60
+ try {
61
+ return fs.statSync(p).size;
62
+ } catch (e) {
63
+ return 0;
64
+ }
65
+ }
66
+
41
67
  class S3Uploader {
42
68
  /**
43
69
  * @param {Object} opts
@@ -86,6 +112,12 @@ class S3Uploader {
86
112
  }
87
113
 
88
114
  this._pendingUploads = 0;
115
+
116
+ // Incremental sidecar accumulators for in-progress current files, keyed by
117
+ // their S3 key. Each periodic uploadCurrent parses only the newly-appended
118
+ // bytes; a restart drops this map, so the next upload re-parses the file
119
+ // from byte 0 once and resumes incremental — the file always wins.
120
+ this._currentAccs = new Map();
89
121
  }
90
122
 
91
123
  /**
@@ -111,16 +143,28 @@ class S3Uploader {
111
143
 
112
144
  this._pendingUploads++;
113
145
 
146
+ // Count records while the bytes stream through to gzip — one read pass,
147
+ // no extra memory. StringDecoder keeps multi-byte chars whole across chunk
148
+ // boundaries.
149
+ const acc = new MetaAccumulator();
150
+ const decoder = new StringDecoder('utf8');
114
151
  const readStream = fs.createReadStream(filePath);
152
+ const counter = new Transform({
153
+ transform(chunk, enc, cb) {
154
+ try { acc.addChunk(decoder.write(chunk)); } catch (e) { /* never break upload */ }
155
+ cb(null, chunk);
156
+ },
157
+ });
115
158
  const gzip = createGzip();
116
159
  const writeStream = fs.createWriteStream(gzPath);
117
160
 
118
- pipeline(readStream, gzip, writeStream, (err) => {
161
+ pipeline(readStream, counter, gzip, writeStream, (err) => {
119
162
  if (err) {
120
163
  this._logError('Failed to gzip %s: %s', filePath, err.message);
121
164
  this._pendingUploads--;
122
165
  return;
123
166
  }
167
+ try { acc.addChunk(decoder.end()); acc.flushPartial(); } catch (e) { /* ignore */ }
124
168
 
125
169
  const body = fs.createReadStream(gzPath);
126
170
  const command = new PutObjectCommand({
@@ -137,6 +181,7 @@ class S3Uploader {
137
181
  if (this._log) {
138
182
  this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
139
183
  }
184
+ this._uploadSidecar(key, vars.interval, _safeSize(filePath), _safeSize(gzPath), acc);
140
185
  try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
141
186
  try { fs.unlinkSync(gzPath); } catch (e) { /* ignore */ }
142
187
  this._deleteStaleCurrent(vars);
@@ -160,33 +205,54 @@ class S3Uploader {
160
205
 
161
206
  this._pendingUploads++;
162
207
 
163
- const body = fs.createReadStream(filePath);
164
- const command = new PutObjectCommand({
165
- Bucket: this._bucket,
166
- Key: key,
167
- Body: body,
168
- ContentType: 'application/x-ndjson',
208
+ // Count records in a streaming pass (no gzip here), then upload the file.
209
+ const acc = new MetaAccumulator();
210
+ const decoder = new StringDecoder('utf8');
211
+ const sink = new Writable({
212
+ write(chunk, enc, cb) {
213
+ try { acc.addChunk(decoder.write(chunk)); } catch (e) { /* ignore */ }
214
+ cb();
215
+ },
169
216
  });
170
217
 
171
- this._s3
172
- .send(command)
173
- .then(() => {
174
- if (this._log) {
175
- this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
176
- }
177
- try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
178
- this._deleteStaleCurrent(vars);
179
- })
180
- .catch((uploadErr) => {
181
- this._logError(
182
- 'Failed to upload %s to S3: %s',
183
- filePath,
184
- uploadErr.message,
185
- );
186
- })
187
- .finally(() => {
218
+ pipeline(fs.createReadStream(filePath), sink, (err) => {
219
+ if (err) {
220
+ this._logError('Failed to read %s: %s', filePath, err.message);
188
221
  this._pendingUploads--;
222
+ return;
223
+ }
224
+ try { acc.addChunk(decoder.end()); acc.flushPartial(); } catch (e) { /* ignore */ }
225
+
226
+ const bytes = _safeSize(filePath);
227
+ const body = fs.createReadStream(filePath);
228
+ const command = new PutObjectCommand({
229
+ Bucket: this._bucket,
230
+ Key: key,
231
+ Body: body,
232
+ ContentType: 'application/x-ndjson',
189
233
  });
234
+
235
+ this._s3
236
+ .send(command)
237
+ .then(() => {
238
+ if (this._log) {
239
+ this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
240
+ }
241
+ this._uploadSidecar(key, vars.interval, bytes, bytes, acc);
242
+ try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
243
+ this._deleteStaleCurrent(vars);
244
+ })
245
+ .catch((uploadErr) => {
246
+ this._logError(
247
+ 'Failed to upload %s to S3: %s',
248
+ filePath,
249
+ uploadErr.message,
250
+ );
251
+ })
252
+ .finally(() => {
253
+ this._pendingUploads--;
254
+ });
255
+ });
190
256
  }
191
257
 
192
258
  /**
@@ -200,18 +266,24 @@ class S3Uploader {
200
266
  key += '.gz';
201
267
  }
202
268
 
203
- this._pendingUploads++;
204
- this._s3
205
- .send(new DeleteObjectCommand({ Bucket: this._bucket, Key: key }))
206
- .then(() => {
207
- if (this._log) {
208
- this._log.debug('Deleted stale current log s3://%s/%s', this._bucket, key);
209
- }
210
- })
211
- .catch(() => { /* best-effort */ })
212
- .finally(() => {
213
- this._pendingUploads--;
214
- });
269
+ // The current snapshot is finalized — stop tracking its incremental meta.
270
+ this._currentAccs.delete(key);
271
+
272
+ // Delete the snapshot object and its sidecar (best-effort, both).
273
+ for (const k of [key, sidecarKey(key)]) {
274
+ this._pendingUploads++;
275
+ this._s3
276
+ .send(new DeleteObjectCommand({ Bucket: this._bucket, Key: k }))
277
+ .then(() => {
278
+ if (this._log) {
279
+ this._log.debug('Deleted stale current log s3://%s/%s', this._bucket, k);
280
+ }
281
+ })
282
+ .catch(() => { /* best-effort */ })
283
+ .finally(() => {
284
+ this._pendingUploads--;
285
+ });
286
+ }
215
287
  }
216
288
 
217
289
  /**
@@ -245,6 +317,7 @@ class S3Uploader {
245
317
 
246
318
  _uploadCurrentGzipped(filePath, rawBody, vars, cb) {
247
319
  const key = this._buildKey(vars) + '.gz';
320
+ const acc = this._currentAcc(key, rawBody);
248
321
 
249
322
  this._pendingUploads++;
250
323
 
@@ -270,6 +343,7 @@ class S3Uploader {
270
343
  if (this._log) {
271
344
  this._log.debug('Uploaded current log to s3://%s/%s', this._bucket, key);
272
345
  }
346
+ this._uploadSidecar(key, vars.interval, rawBody.length, compressed.length, acc);
273
347
  })
274
348
  .catch((uploadErr) => {
275
349
  this._logError(
@@ -286,6 +360,7 @@ class S3Uploader {
286
360
 
287
361
  _uploadCurrentRaw(filePath, rawBody, vars, cb) {
288
362
  const key = this._buildKey(vars);
363
+ const acc = this._currentAcc(key, rawBody);
289
364
 
290
365
  this._pendingUploads++;
291
366
 
@@ -302,6 +377,7 @@ class S3Uploader {
302
377
  if (this._log) {
303
378
  this._log.debug('Uploaded current log to s3://%s/%s', this._bucket, key);
304
379
  }
380
+ this._uploadSidecar(key, vars.interval, rawBody.length, rawBody.length, acc);
305
381
  })
306
382
  .catch((uploadErr) => {
307
383
  this._logError(
@@ -315,6 +391,64 @@ class S3Uploader {
315
391
  });
316
392
  }
317
393
 
394
+ /**
395
+ * The incremental sidecar accumulator for an in-progress current file. Parses
396
+ * only bytes appended since the last upload; re-derives from byte 0 when this
397
+ * key is first seen (or after a restart drops the map, or if the file ever
398
+ * shrank). Offsets land on newline boundaries, so tail slices never split a
399
+ * line or a multi-byte char.
400
+ * @param {string} key - the current snapshot's S3 key
401
+ * @param {Buffer} rawBody - the full current file contents
402
+ */
403
+ _currentAcc(key, rawBody) {
404
+ let acc = this._currentAccs.get(key);
405
+ if (!acc || rawBody.length < acc.offset) {
406
+ acc = new MetaAccumulator();
407
+ this._currentAccs.set(key, acc);
408
+ }
409
+ if (rawBody.length > acc.offset) {
410
+ acc.addChunk(rawBody.toString('utf8', acc.offset));
411
+ acc.offset = rawBody.length;
412
+ }
413
+ return acc;
414
+ }
415
+
416
+ /**
417
+ * Upload the metadata sidecar for a just-uploaded log object. Best-effort and
418
+ * fully decoupled: a failure here never affects the log upload — the viewer
419
+ * just falls back to estimating that file's size.
420
+ */
421
+ _uploadSidecar(objectKey, interval, bytes, compressed, acc) {
422
+ let body;
423
+ try {
424
+ body = JSON.stringify(acc.toMeta(interval, bytes, compressed));
425
+ } catch (e) {
426
+ return; // never let sidecar serialization affect the run
427
+ }
428
+
429
+ this._pendingUploads++;
430
+ this._s3
431
+ .send(new PutObjectCommand({
432
+ Bucket: this._bucket,
433
+ Key: sidecarKey(objectKey),
434
+ Body: body,
435
+ ContentType: 'application/json',
436
+ }))
437
+ .then(() => {
438
+ if (this._log) {
439
+ this._log.debug('Uploaded sidecar s3://%s/%s', this._bucket, sidecarKey(objectKey));
440
+ }
441
+ })
442
+ .catch((err) => {
443
+ if (this._log) {
444
+ this._log.debug('Sidecar upload failed for %s: %s', objectKey, err.message);
445
+ }
446
+ })
447
+ .finally(() => {
448
+ this._pendingUploads--;
449
+ });
450
+ }
451
+
318
452
  /**
319
453
  * Build the S3 key for a log file (see the layout contract above).
320
454
  *
@@ -325,14 +459,8 @@ class S3Uploader {
325
459
  * - {boolean} [current] - True for the live (incomplete) file snapshot
326
460
  */
327
461
  _buildKey(vars) {
328
- let basename = this._host;
329
- if (vars.seq > 0) {
330
- basename += `_${vars.seq}`;
331
- }
332
- if (vars.current) {
333
- basename += '_current';
334
- }
335
- return `${vars.channel}/${vars.interval}/${basename}.jsonl`;
462
+ // the layout is the shared contract; host comes from this uploader
463
+ return buildKey({ ...vars, host: this._host });
336
464
  }
337
465
 
338
466
  _logError(fmt, ...args) {
@@ -342,18 +470,6 @@ class S3Uploader {
342
470
  }
343
471
  }
344
472
 
345
- /**
346
- * Normalize a hostname into the host label used in S3 keys. EC2 internal
347
- * hostnames (ip-A-B-C-D or ip-A-B-C-D.ec2.internal etc.) become the dotted
348
- * IP address, which avoids embedding hyphens in the basename; any other
349
- * hostname is used as-is.
350
- */
351
- function normalizeHost(hostname) {
352
- const m = /^ip-(\d{1,3})-(\d{1,3})-(\d{1,3})-(\d{1,3})(\..*)?$/.exec(hostname);
353
- if (m) {
354
- return `${m[1]}.${m[2]}.${m[3]}.${m[4]}`;
355
- }
356
- return hostname;
357
- }
358
-
359
- module.exports = { S3Uploader, normalizeHost };
473
+ // normalizeHost + MetaAccumulator are re-exported from tracelog-schema (the
474
+ // shared contract) so existing importers keep working.
475
+ module.exports = { S3Uploader, normalizeHost, MetaAccumulator };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redthreadlabs/tracelog",
3
- "version": "1.9.0",
3
+ "version": "1.11.0",
4
4
  "description": "Node.js APM instrumentation that writes traces to JSONL files",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -50,6 +50,7 @@
50
50
  "dependencies": {
51
51
  "@aws-sdk/client-s3": "^3.0.0",
52
52
  "@elastic/ecs-pino-format": "^1.5.0",
53
+ "@redthreadlabs/tracelog-schema": "^0.1.0",
53
54
  "after-all-results": "^2.0.0",
54
55
  "async-value-promise": "^1.1.1",
55
56
  "basic-auth": "^2.0.1",