@redthreadlabs/tracelog 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,8 @@ const os = require('os');
11
11
  const path = require('path');
12
12
  const zlib = require('zlib');
13
13
  const { createGzip } = require('zlib');
14
- const { pipeline } = require('stream');
14
+ const { pipeline, Transform, Writable } = require('stream');
15
+ const { StringDecoder } = require('string_decoder');
15
16
  const {
16
17
  S3Client,
17
18
  PutObjectCommand,
@@ -38,6 +39,139 @@ const {
38
39
  // > 0, then the literal 'current' for the live file. A host that died
39
40
  // mid-interval leaves its final '_current' upload in place, interval intact.
40
41
 
42
+ // Every log object gets a tiny JSON sidecar at `<logkey>.meta.json` carrying
43
+ // the facts the gzipped body hides: uncompressed size, record count, and an
44
+ // hourly interval×kind histogram of the records inside. The histogram matters
45
+ // because buffered remote clients (tracelog-client) can land records from a
46
+ // past day in today's file — so a file's nominal interval is a filing label,
47
+ // not a truthful description of its contents. The viewer reads these into its
48
+ // size ledger for deterministic memory/cache accounting and factual rollups,
49
+ // and falls back to estimation for files written before sidecars existed.
50
+ const SIDECAR_VERSION = 1;
51
+ const SIDECAR_SUFFIX = '.meta.json';
52
+
53
+ function _pad2(n) {
54
+ return String(n).padStart(2, '0');
55
+ }
56
+
57
+ /** epoch-ms → UTC hour-bucket label 'YYYY-MM-DDTHH' (matches the viewer). */
58
+ function _hourBucket(ms) {
59
+ const d = new Date(ms);
60
+ return (
61
+ `${d.getUTCFullYear()}-${_pad2(d.getUTCMonth() + 1)}-${_pad2(d.getUTCDate())}` +
62
+ `T${_pad2(d.getUTCHours())}`
63
+ );
64
+ }
65
+
66
+ function _safeSize(p) {
67
+ try {
68
+ return fs.statSync(p).size;
69
+ } catch (e) {
70
+ return 0;
71
+ }
72
+ }
73
+
74
+ /**
75
+ * Derives a log file's sidecar histogram by parsing its NDJSON lines. The file
76
+ * on disk is the source of truth: counts come from the exact bytes being
77
+ * uploaded, so they cannot drift from the object, and a restart (which wipes
78
+ * any in-memory write-time counters) or an orphaned file from a crashed run is
79
+ * handled for free — we just re-derive from the file.
80
+ *
81
+ * Tolerant by design: an unparseable line is skipped (not a record); a record
82
+ * with a missing/garbage timestamp is counted as `malformed` rather than
83
+ * forced into an interval. Append-only safe: addChunk may be fed successive
84
+ * tails of a growing current file, since every line is newline-terminated so
85
+ * chunk/offset boundaries always land between lines.
86
+ */
87
+ class MetaAccumulator {
88
+ constructor() {
89
+ this.offset = 0; // bytes consumed so far (for incremental current parsing)
90
+ this.records = 0;
91
+ this.malformed = 0;
92
+ this.intervals = Object.create(null); // { 'YYYY-MM-DDTHH': { kind: count } }
93
+ this._partial = '';
94
+ }
95
+
96
+ addChunk(text) {
97
+ if (!text) return;
98
+ const s = this._partial + text;
99
+ let start = 0;
100
+ let nl;
101
+ while ((nl = s.indexOf('\n', start)) !== -1) {
102
+ this._addLine(s.slice(start, nl));
103
+ start = nl + 1;
104
+ }
105
+ this._partial = s.slice(start);
106
+ }
107
+
108
+ flushPartial() {
109
+ if (this._partial) {
110
+ this._addLine(this._partial);
111
+ this._partial = '';
112
+ }
113
+ }
114
+
115
+ _addLine(line) {
116
+ const t = line.trim();
117
+ if (!t) return;
118
+ let obj;
119
+ try {
120
+ obj = JSON.parse(t);
121
+ } catch (e) {
122
+ return; // corrupt line — not a countable record (the viewer skips it too)
123
+ }
124
+ if (!obj || typeof obj !== 'object') return;
125
+ const kind = Object.keys(obj)[0];
126
+ if (!kind || kind === 'metadata') return; // the file's metadata line
127
+ this.records++;
128
+ const body = obj[kind];
129
+ const tsUs =
130
+ body &&
131
+ typeof body.timestamp === 'number' &&
132
+ isFinite(body.timestamp) &&
133
+ body.timestamp > 0
134
+ ? body.timestamp
135
+ : 0;
136
+ if (!tsUs) {
137
+ this.malformed++;
138
+ return;
139
+ }
140
+ const bucket = _hourBucket(tsUs / 1000); // serialized timestamps are epoch-µs
141
+ const byKind =
142
+ this.intervals[bucket] || (this.intervals[bucket] = Object.create(null));
143
+ byKind[kind] = (byKind[kind] || 0) + 1;
144
+ }
145
+
146
+ /**
147
+ * The sidecar object for this file. records === malformed + Σ(intervals).
148
+ *
149
+ * Keys are emitted in a fixed, sorted order at every level — top-level fields
150
+ * in schema order, interval buckets and their kinds sorted lexically — so the
151
+ * same contents always serialize to byte-identical JSON regardless of the
152
+ * order records arrived in. That makes a sidecar's ETag a reliable
153
+ * sameness check.
154
+ */
155
+ toMeta(interval, bytes, compressed) {
156
+ const intervals = Object.create(null);
157
+ for (const hour of Object.keys(this.intervals).sort()) {
158
+ const src = this.intervals[hour];
159
+ const sorted = Object.create(null);
160
+ for (const kind of Object.keys(src).sort()) sorted[kind] = src[kind];
161
+ intervals[hour] = sorted;
162
+ }
163
+ return {
164
+ v: SIDECAR_VERSION,
165
+ interval,
166
+ bytes,
167
+ compressed,
168
+ records: this.records,
169
+ malformed: this.malformed,
170
+ intervals,
171
+ };
172
+ }
173
+ }
174
+
41
175
  class S3Uploader {
42
176
  /**
43
177
  * @param {Object} opts
@@ -86,6 +220,12 @@ class S3Uploader {
86
220
  }
87
221
 
88
222
  this._pendingUploads = 0;
223
+
224
+ // Incremental sidecar accumulators for in-progress current files, keyed by
225
+ // their S3 key. Each periodic uploadCurrent parses only the newly-appended
226
+ // bytes; a restart drops this map, so the next upload re-parses the file
227
+ // from byte 0 once and resumes incremental — the file always wins.
228
+ this._currentAccs = new Map();
89
229
  }
90
230
 
91
231
  /**
@@ -111,16 +251,28 @@ class S3Uploader {
111
251
 
112
252
  this._pendingUploads++;
113
253
 
254
+ // Count records while the bytes stream through to gzip — one read pass,
255
+ // no extra memory. StringDecoder keeps multi-byte chars whole across chunk
256
+ // boundaries.
257
+ const acc = new MetaAccumulator();
258
+ const decoder = new StringDecoder('utf8');
114
259
  const readStream = fs.createReadStream(filePath);
260
+ const counter = new Transform({
261
+ transform(chunk, enc, cb) {
262
+ try { acc.addChunk(decoder.write(chunk)); } catch (e) { /* never break upload */ }
263
+ cb(null, chunk);
264
+ },
265
+ });
115
266
  const gzip = createGzip();
116
267
  const writeStream = fs.createWriteStream(gzPath);
117
268
 
118
- pipeline(readStream, gzip, writeStream, (err) => {
269
+ pipeline(readStream, counter, gzip, writeStream, (err) => {
119
270
  if (err) {
120
271
  this._logError('Failed to gzip %s: %s', filePath, err.message);
121
272
  this._pendingUploads--;
122
273
  return;
123
274
  }
275
+ try { acc.addChunk(decoder.end()); acc.flushPartial(); } catch (e) { /* ignore */ }
124
276
 
125
277
  const body = fs.createReadStream(gzPath);
126
278
  const command = new PutObjectCommand({
@@ -137,6 +289,7 @@ class S3Uploader {
137
289
  if (this._log) {
138
290
  this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
139
291
  }
292
+ this._uploadSidecar(key, vars.interval, _safeSize(filePath), _safeSize(gzPath), acc);
140
293
  try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
141
294
  try { fs.unlinkSync(gzPath); } catch (e) { /* ignore */ }
142
295
  this._deleteStaleCurrent(vars);
@@ -160,33 +313,54 @@ class S3Uploader {
160
313
 
161
314
  this._pendingUploads++;
162
315
 
163
- const body = fs.createReadStream(filePath);
164
- const command = new PutObjectCommand({
165
- Bucket: this._bucket,
166
- Key: key,
167
- Body: body,
168
- ContentType: 'application/x-ndjson',
316
+ // Count records in a streaming pass (no gzip here), then upload the file.
317
+ const acc = new MetaAccumulator();
318
+ const decoder = new StringDecoder('utf8');
319
+ const sink = new Writable({
320
+ write(chunk, enc, cb) {
321
+ try { acc.addChunk(decoder.write(chunk)); } catch (e) { /* ignore */ }
322
+ cb();
323
+ },
169
324
  });
170
325
 
171
- this._s3
172
- .send(command)
173
- .then(() => {
174
- if (this._log) {
175
- this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
176
- }
177
- try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
178
- this._deleteStaleCurrent(vars);
179
- })
180
- .catch((uploadErr) => {
181
- this._logError(
182
- 'Failed to upload %s to S3: %s',
183
- filePath,
184
- uploadErr.message,
185
- );
186
- })
187
- .finally(() => {
326
+ pipeline(fs.createReadStream(filePath), sink, (err) => {
327
+ if (err) {
328
+ this._logError('Failed to read %s: %s', filePath, err.message);
188
329
  this._pendingUploads--;
330
+ return;
331
+ }
332
+ try { acc.addChunk(decoder.end()); acc.flushPartial(); } catch (e) { /* ignore */ }
333
+
334
+ const bytes = _safeSize(filePath);
335
+ const body = fs.createReadStream(filePath);
336
+ const command = new PutObjectCommand({
337
+ Bucket: this._bucket,
338
+ Key: key,
339
+ Body: body,
340
+ ContentType: 'application/x-ndjson',
189
341
  });
342
+
343
+ this._s3
344
+ .send(command)
345
+ .then(() => {
346
+ if (this._log) {
347
+ this._log.debug('Uploaded completed log to s3://%s/%s', this._bucket, key);
348
+ }
349
+ this._uploadSidecar(key, vars.interval, bytes, bytes, acc);
350
+ try { fs.unlinkSync(filePath); } catch (e) { /* ignore */ }
351
+ this._deleteStaleCurrent(vars);
352
+ })
353
+ .catch((uploadErr) => {
354
+ this._logError(
355
+ 'Failed to upload %s to S3: %s',
356
+ filePath,
357
+ uploadErr.message,
358
+ );
359
+ })
360
+ .finally(() => {
361
+ this._pendingUploads--;
362
+ });
363
+ });
190
364
  }
191
365
 
192
366
  /**
@@ -200,18 +374,24 @@ class S3Uploader {
200
374
  key += '.gz';
201
375
  }
202
376
 
203
- this._pendingUploads++;
204
- this._s3
205
- .send(new DeleteObjectCommand({ Bucket: this._bucket, Key: key }))
206
- .then(() => {
207
- if (this._log) {
208
- this._log.debug('Deleted stale current log s3://%s/%s', this._bucket, key);
209
- }
210
- })
211
- .catch(() => { /* best-effort */ })
212
- .finally(() => {
213
- this._pendingUploads--;
214
- });
377
+ // The current snapshot is finalized — stop tracking its incremental meta.
378
+ this._currentAccs.delete(key);
379
+
380
+ // Delete the snapshot object and its sidecar (best-effort, both).
381
+ for (const k of [key, key + SIDECAR_SUFFIX]) {
382
+ this._pendingUploads++;
383
+ this._s3
384
+ .send(new DeleteObjectCommand({ Bucket: this._bucket, Key: k }))
385
+ .then(() => {
386
+ if (this._log) {
387
+ this._log.debug('Deleted stale current log s3://%s/%s', this._bucket, k);
388
+ }
389
+ })
390
+ .catch(() => { /* best-effort */ })
391
+ .finally(() => {
392
+ this._pendingUploads--;
393
+ });
394
+ }
215
395
  }
216
396
 
217
397
  /**
@@ -245,6 +425,7 @@ class S3Uploader {
245
425
 
246
426
  _uploadCurrentGzipped(filePath, rawBody, vars, cb) {
247
427
  const key = this._buildKey(vars) + '.gz';
428
+ const acc = this._currentAcc(key, rawBody);
248
429
 
249
430
  this._pendingUploads++;
250
431
 
@@ -270,6 +451,7 @@ class S3Uploader {
270
451
  if (this._log) {
271
452
  this._log.debug('Uploaded current log to s3://%s/%s', this._bucket, key);
272
453
  }
454
+ this._uploadSidecar(key, vars.interval, rawBody.length, compressed.length, acc);
273
455
  })
274
456
  .catch((uploadErr) => {
275
457
  this._logError(
@@ -286,6 +468,7 @@ class S3Uploader {
286
468
 
287
469
  _uploadCurrentRaw(filePath, rawBody, vars, cb) {
288
470
  const key = this._buildKey(vars);
471
+ const acc = this._currentAcc(key, rawBody);
289
472
 
290
473
  this._pendingUploads++;
291
474
 
@@ -302,6 +485,7 @@ class S3Uploader {
302
485
  if (this._log) {
303
486
  this._log.debug('Uploaded current log to s3://%s/%s', this._bucket, key);
304
487
  }
488
+ this._uploadSidecar(key, vars.interval, rawBody.length, rawBody.length, acc);
305
489
  })
306
490
  .catch((uploadErr) => {
307
491
  this._logError(
@@ -315,6 +499,64 @@ class S3Uploader {
315
499
  });
316
500
  }
317
501
 
502
+ /**
503
+ * The incremental sidecar accumulator for an in-progress current file. Parses
504
+ * only bytes appended since the last upload; re-derives from byte 0 when this
505
+ * key is first seen (or after a restart drops the map, or if the file ever
506
+ * shrank). Offsets land on newline boundaries, so tail slices never split a
507
+ * line or a multi-byte char.
508
+ * @param {string} key - the current snapshot's S3 key
509
+ * @param {Buffer} rawBody - the full current file contents
510
+ */
511
+ _currentAcc(key, rawBody) {
512
+ let acc = this._currentAccs.get(key);
513
+ if (!acc || rawBody.length < acc.offset) {
514
+ acc = new MetaAccumulator();
515
+ this._currentAccs.set(key, acc);
516
+ }
517
+ if (rawBody.length > acc.offset) {
518
+ acc.addChunk(rawBody.toString('utf8', acc.offset));
519
+ acc.offset = rawBody.length;
520
+ }
521
+ return acc;
522
+ }
523
+
524
+ /**
525
+ * Upload the metadata sidecar for a just-uploaded log object. Best-effort and
526
+ * fully decoupled: a failure here never affects the log upload — the viewer
527
+ * just falls back to estimating that file's size.
528
+ */
529
+ _uploadSidecar(objectKey, interval, bytes, compressed, acc) {
530
+ let body;
531
+ try {
532
+ body = JSON.stringify(acc.toMeta(interval, bytes, compressed));
533
+ } catch (e) {
534
+ return; // never let sidecar serialization affect the run
535
+ }
536
+
537
+ this._pendingUploads++;
538
+ this._s3
539
+ .send(new PutObjectCommand({
540
+ Bucket: this._bucket,
541
+ Key: objectKey + SIDECAR_SUFFIX,
542
+ Body: body,
543
+ ContentType: 'application/json',
544
+ }))
545
+ .then(() => {
546
+ if (this._log) {
547
+ this._log.debug('Uploaded sidecar s3://%s/%s%s', this._bucket, objectKey, SIDECAR_SUFFIX);
548
+ }
549
+ })
550
+ .catch((err) => {
551
+ if (this._log) {
552
+ this._log.debug('Sidecar upload failed for %s: %s', objectKey, err.message);
553
+ }
554
+ })
555
+ .finally(() => {
556
+ this._pendingUploads--;
557
+ });
558
+ }
559
+
318
560
  /**
319
561
  * Build the S3 key for a log file (see the layout contract above).
320
562
  *
@@ -356,4 +598,4 @@ function normalizeHost(hostname) {
356
598
  return hostname;
357
599
  }
358
600
 
359
- module.exports = { S3Uploader, normalizeHost };
601
+ module.exports = { S3Uploader, normalizeHost, MetaAccumulator };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@redthreadlabs/tracelog",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "Node.js APM instrumentation that writes traces to JSONL files",
5
5
  "publishConfig": {
6
6
  "access": "public"