pond-ts 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- var _a;
2
1
  import { BoundedSequence } from './BoundedSequence.js';
3
2
  import { parseTimestampString } from './calendar.js';
4
3
  import { Interval } from './Interval.js';
@@ -6,6 +5,7 @@ import { Time } from './Time.js';
6
5
  import { TimeRange } from './TimeRange.js';
7
6
  import { compareEventKeys } from './temporal.js';
8
7
  import { Event } from './Event.js';
8
+ import { PartitionedTimeSeries } from './PartitionedTimeSeries.js';
9
9
  import { Sequence } from './Sequence.js';
10
10
  import { validateAndNormalize } from './validate.js';
11
11
  import { parseDuration } from './utils/duration.js';
@@ -553,7 +553,7 @@ export class TimeSeries {
553
553
  * the supplied `parse.timeZone`, which defaults to `UTC`.
554
554
  */
555
555
  static fromJSON(input) {
556
- return new _a({
556
+ return new TimeSeries({
557
557
  name: input.name,
558
558
  schema: input.schema,
559
559
  rows: parseJsonRows(input.schema, input.rows, input.parse),
@@ -593,7 +593,7 @@ export class TimeSeries {
593
593
  */
594
594
  static fromEvents(events, options) {
595
595
  const sorted = [...events].sort((a, b) => compareEventKeys(a.key(), b.key()));
596
- return _a.#fromTrustedEvents(options.name, options.schema, sorted);
596
+ return TimeSeries.#fromTrustedEvents(options.name, options.schema, sorted);
597
597
  }
598
598
  /**
599
599
  * Example: `TimeSeries.concat([s1, s2, s3])`.
@@ -659,7 +659,7 @@ export class TimeSeries {
659
659
  allEvents.push(event);
660
660
  }
661
661
  allEvents.sort((a, b) => compareEventKeys(a.key(), b.key()));
662
- return _a.#fromTrustedEvents(head.name, head.schema, allEvents);
662
+ return TimeSeries.#fromTrustedEvents(head.name, head.schema, allEvents);
663
663
  }
664
664
  /** Example: `new TimeSeries({ name, schema, rows })`. Creates an immutable time series from a schema and row-oriented input data. */
665
665
  constructor(input) {
@@ -717,7 +717,7 @@ export class TimeSeries {
717
717
  * order and normalized key invariants.
718
718
  */
719
719
  static #fromTrustedEvents(name, schema, events) {
720
- const series = Object.create(_a.prototype);
720
+ const series = Object.create(TimeSeries.prototype);
721
721
  series.name = name;
722
722
  series.schema = Object.freeze(schema.slice());
723
723
  series.events = Object.freeze(events.slice());
@@ -756,7 +756,7 @@ export class TimeSeries {
756
756
  /** Example: `series.map(nextSchema, event => event)`. Maps each event into a new typed schema and returns a new series. */
757
757
  map(schema, mapper) {
758
758
  const mappedEvents = this.events.map((event, index) => mapper(event, index));
759
- return new _a({
759
+ return new TimeSeries({
760
760
  name: this.name,
761
761
  schema,
762
762
  rows: toRows(schema, mappedEvents),
@@ -770,9 +770,9 @@ export class TimeSeries {
770
770
  ]);
771
771
  const resultEvents = this.events.map((event) => event.asTime(options));
772
772
  if ((options.at ?? 'begin') === 'begin') {
773
- return _a.#fromTrustedEvents(this.name, schema, resultEvents);
773
+ return TimeSeries.#fromTrustedEvents(this.name, schema, resultEvents);
774
774
  }
775
- return new _a({
775
+ return new TimeSeries({
776
776
  name: this.name,
777
777
  schema,
778
778
  rows: toRows(schema, resultEvents),
@@ -785,7 +785,7 @@ export class TimeSeries {
785
785
  ...this.schema.slice(1),
786
786
  ]);
787
787
  const resultEvents = this.events.map((event) => event.asTimeRange());
788
- return _a.#fromTrustedEvents(this.name, schema, resultEvents);
788
+ return TimeSeries.#fromTrustedEvents(this.name, schema, resultEvents);
789
789
  }
790
790
  asInterval(value) {
791
791
  const schema = Object.freeze([
@@ -797,7 +797,7 @@ export class TimeSeries {
797
797
  ? event.asInterval(() => value(event, index))
798
798
  : event.asInterval(value);
799
799
  });
800
- return _a.#fromTrustedEvents(this.name, schema, nextEvents);
800
+ return TimeSeries.#fromTrustedEvents(this.name, schema, nextEvents);
801
801
  }
802
802
  join(other, options = {}) {
803
803
  const [left, right] = prepareSeriesForJoin([
@@ -856,7 +856,7 @@ export class TimeSeries {
856
856
  rightIndex += 1;
857
857
  }
858
858
  }
859
- return _a.#fromTrustedEvents(left.name, resultSchema, joinedEvents);
859
+ return TimeSeries.#fromTrustedEvents(left.name, resultSchema, joinedEvents);
860
860
  }
861
861
  /**
862
862
  * Example: `series.align(Sequence.every("1m"))`.
@@ -881,6 +881,13 @@ export class TimeSeries {
881
881
  * - `Sequence.every("1m")` defines an epoch-anchored minute grid
882
882
  * - `series.align(Sequence.every("1m"))` aligns onto the slice of that minute grid spanning the
883
883
  * current series extent
884
+ *
885
+ * **Multi-entity series:** alignment samples cross entity boundaries —
886
+ * `host-A`'s aligned bucket would interpolate or hold against
887
+ * `host-B`'s value. On a series carrying multiple entities (host,
888
+ * region, device id), use
889
+ * `series.partitionBy(col).align(...).collect()` to scope per entity.
890
+ * See {@link TimeSeries.partitionBy}.
884
891
  */
885
892
  align(sequence, options = {}) {
886
893
  const method = options.method ?? 'hold';
@@ -888,7 +895,7 @@ export class TimeSeries {
888
895
  const range = options.range ?? this.timeRange();
889
896
  const resultSchema = makeAlignedSchema(this.schema);
890
897
  if (!range) {
891
- return new _a({
898
+ return new TimeSeries({
892
899
  name: this.name,
893
900
  schema: resultSchema,
894
901
  rows: [],
@@ -928,7 +935,7 @@ export class TimeSeries {
928
935
  .map((column) => data[column.name]),
929
936
  ]);
930
937
  });
931
- return new _a({
938
+ return new TimeSeries({
932
939
  name: this.name,
933
940
  schema: resultSchema,
934
941
  rows: alignedRows,
@@ -968,7 +975,7 @@ export class TimeSeries {
968
975
  }
969
976
  bucket.push(event);
970
977
  }
971
- const buildGroup = (events) => new _a({
978
+ const buildGroup = (events) => new TimeSeries({
972
979
  name: this.name,
973
980
  schema: this.schema,
974
981
  rows: toRows(this.schema, events),
@@ -986,6 +993,46 @@ export class TimeSeries {
986
993
  }
987
994
  return result;
988
995
  }
996
+ /**
997
+ * Example: `series.partitionBy('host').fill({ cpu: 'linear' })`.
998
+ * Returns a {@link PartitionedTimeSeries} view that scopes stateful
999
+ * transforms to within each partition. Most stateful operators
1000
+ * (`fill`, `align`, `rolling`, `smooth`, `baseline`, `outliers`,
1001
+ * `diff`, `rate`, `pctChange`, `cumulative`, `shift`, `aggregate`)
1002
+ * read neighboring events when computing each output and silently
1003
+ * cross entity boundaries on multi-entity series — `partitionBy`
1004
+ * fixes that by running the op independently per partition and
1005
+ * reassembling.
1006
+ *
1007
+ * Composite partitioning by multiple columns is supported by passing
1008
+ * an array: `series.partitionBy(['host', 'region'])`.
1009
+ *
1010
+ * The return shape is always `TimeSeries`, not
1011
+ * `PartitionedTimeSeries` — each operation is a single step. To
1012
+ * chain another partitioned op, re-`partitionBy` after.
1013
+ *
1014
+ * Coming from pondjs / pandas: this is roughly the equivalent of
1015
+ * `df.groupby(col)` returning an object whose methods auto-apply
1016
+ * per group, but the return type is the regrouped frame, not the
1017
+ * grouped view.
1018
+ *
1019
+ * @example
1020
+ * ```ts
1021
+ * // Per-host fill — no cross-host interpolation
1022
+ * series.partitionBy('host').fill({ cpu: 'linear' });
1023
+ *
1024
+ * // Composite partitioning
1025
+ * series.partitionBy(['host', 'region']).rolling('5m', { cpu: 'avg' });
1026
+ *
1027
+ * // Arbitrary composition via .apply()
1028
+ * series.partitionBy('host').apply(g =>
1029
+ * g.fill({ cpu: 'linear' }).rolling('5m', { cpu: 'avg' }),
1030
+ * );
1031
+ * ```
1032
+ */
1033
+ partitionBy(by) {
1034
+ return new PartitionedTimeSeries(this, by);
1035
+ }
989
1036
  pivotByGroup(groupCol, valueCol, options = {}) {
990
1037
  if (this.schema[0].kind !== 'time') {
991
1038
  throw new TypeError(`pivotByGroup requires a time-keyed series; got ${this.schema[0].kind}`);
@@ -1076,7 +1123,7 @@ export class TimeSeries {
1076
1123
  }
1077
1124
  outputRows.push(row);
1078
1125
  }
1079
- return new _a({
1126
+ return new TimeSeries({
1080
1127
  name: this.name,
1081
1128
  schema: outputSchema,
1082
1129
  rows: outputRows,
@@ -1094,9 +1141,16 @@ export class TimeSeries {
1094
1141
  *
1095
1142
  * Example: `series.diff("requests", { drop: true })`.
1096
1143
  * Drops the first event instead of keeping it with undefined values.
1144
+ *
1145
+ * **Multi-entity series:** the "previous event" may belong to a
1146
+ * different entity, producing meaningless deltas across entity
1147
+ * boundaries. On a series carrying multiple entities (host, region,
1148
+ * device id), use
1149
+ * `series.partitionBy(col).diff(...).collect()` to scope per entity.
1150
+ * See {@link TimeSeries.partitionBy}.
1097
1151
  */
1098
1152
  diff(columns, options) {
1099
- return this.#diffOrRate('diff', columns, options);
1153
+ return TimeSeries.#diffOrRate(this, 'diff', columns, options);
1100
1154
  }
1101
1155
  /**
1102
1156
  * Example: `series.rate("requests")`.
@@ -1110,9 +1164,16 @@ export class TimeSeries {
1110
1164
  *
1111
1165
  * Example: `series.rate("requests", { drop: true })`.
1112
1166
  * Drops the first event instead of keeping it with undefined values.
1167
+ *
1168
+ * **Multi-entity series:** the "previous event" may belong to a
1169
+ * different entity, producing meaningless rates across entity
1170
+ * boundaries. On a series carrying multiple entities (host, region,
1171
+ * device id), use
1172
+ * `series.partitionBy(col).rate(...).collect()` to scope per entity.
1173
+ * See {@link TimeSeries.partitionBy}.
1113
1174
  */
1114
1175
  rate(columns, options) {
1115
- return this.#diffOrRate('rate', columns, options);
1176
+ return TimeSeries.#diffOrRate(this, 'rate', columns, options);
1116
1177
  }
1117
1178
  /**
1118
1179
  * Example: `series.pctChange("requests")`.
@@ -1120,18 +1181,31 @@ export class TimeSeries {
1120
1181
  * numeric columns. Non-specified columns pass through unchanged. The first
1121
1182
  * event gets `undefined` in affected columns unless `{ drop: true }` is
1122
1183
  * passed.
1184
+ *
1185
+ * **Multi-entity series:** the "previous event" may belong to a
1186
+ * different entity, producing meaningless percentages across entity
1187
+ * boundaries. On a series carrying multiple entities (host, region,
1188
+ * device id), use
1189
+ * `series.partitionBy(col).pctChange(...).collect()` to scope per
1190
+ * entity. See {@link TimeSeries.partitionBy}.
1123
1191
  */
1124
1192
  pctChange(columns, options) {
1125
- return this.#diffOrRate('pctChange', columns, options);
1126
- }
1127
- #diffOrRate(mode, columns, options) {
1193
+ return TimeSeries.#diffOrRate(this, 'pctChange', columns, options);
1194
+ }
1195
+ // Static private — the brand check is on the class itself, which
1196
+ // exists regardless of how individual instances were constructed.
1197
+ // This keeps the impl runtime-private (not reachable via
1198
+ // `series.diffOrRateImpl(...)` like a TS-only `private` field would
1199
+ // have been) while still working on instances built via
1200
+ // `#fromTrustedEvents`.
1201
+ static #diffOrRate(series, mode, columns, options) {
1128
1202
  const cols = typeof columns === 'string' ? [columns] : columns;
1129
1203
  const drop = options?.drop === true;
1130
1204
  if (cols.length === 0) {
1131
1205
  throw new Error(`${mode}() requires at least one column name`);
1132
1206
  }
1133
1207
  const targetSet = new Set(cols);
1134
- const outSchema = Object.freeze(this.schema.map((col, i) => {
1208
+ const outSchema = Object.freeze(series.schema.map((col, i) => {
1135
1209
  if (i === 0)
1136
1210
  return col;
1137
1211
  if (targetSet.has(col.name)) {
@@ -1139,9 +1213,9 @@ export class TimeSeries {
1139
1213
  }
1140
1214
  return col;
1141
1215
  }));
1142
- const events = this.events;
1216
+ const events = series.events;
1143
1217
  if (events.length === 0) {
1144
- return _a.#fromTrustedEvents(this.name, outSchema, []);
1218
+ return TimeSeries.#fromTrustedEvents(series.name, outSchema, []);
1145
1219
  }
1146
1220
  const resultEvents = [];
1147
1221
  if (!drop) {
@@ -1177,7 +1251,7 @@ export class TimeSeries {
1177
1251
  }
1178
1252
  resultEvents.push(new Event(curr.key(), data));
1179
1253
  }
1180
- return _a.#fromTrustedEvents(this.name, outSchema, resultEvents);
1254
+ return TimeSeries.#fromTrustedEvents(series.name, outSchema, resultEvents);
1181
1255
  }
1182
1256
  /**
1183
1257
  * Example: `series.cumulative({ requests: "sum" })`.
@@ -1186,6 +1260,13 @@ export class TimeSeries {
1186
1260
  *
1187
1261
  * Built-in accumulators: `"sum"`, `"max"`, `"min"`, `"count"`.
1188
1262
  * Custom accumulators: `(acc: number, value: number) => number`.
1263
+ *
1264
+ * **Multi-entity series:** the running accumulation interleaves
1265
+ * across entities — `host-A`'s next event sums on top of
1266
+ * `host-B`'s last value rather than `host-A`'s. On a series carrying
1267
+ * multiple entities (host, region, device id), use
1268
+ * `series.partitionBy(col).cumulative(...).collect()` to scope per
1269
+ * entity. See {@link TimeSeries.partitionBy}.
1189
1270
  */
1190
1271
  cumulative(spec) {
1191
1272
  const entries = Object.entries(spec);
@@ -1203,7 +1284,7 @@ export class TimeSeries {
1203
1284
  }));
1204
1285
  const events = this.events;
1205
1286
  if (events.length === 0) {
1206
- return _a.#fromTrustedEvents(this.name, outSchema, []);
1287
+ return TimeSeries.#fromTrustedEvents(this.name, outSchema, []);
1207
1288
  }
1208
1289
  const state = new Map();
1209
1290
  for (const [name, reducer] of entries) {
@@ -1255,12 +1336,19 @@ export class TimeSeries {
1255
1336
  }
1256
1337
  resultEvents.push(new Event(event.key(), data));
1257
1338
  }
1258
- return _a.#fromTrustedEvents(this.name, outSchema, resultEvents);
1339
+ return TimeSeries.#fromTrustedEvents(this.name, outSchema, resultEvents);
1259
1340
  }
1260
1341
  /**
1261
1342
  * Example: `series.shift("value", 1)`.
1262
1343
  * Lags column values by N events (positive N) or leads them (negative N).
1263
1344
  * Vacated positions get `undefined`.
1345
+ *
1346
+ * **Multi-entity series:** the value pulled in from N positions away
1347
+ * may belong to a different entity, producing meaningless lagged
1348
+ * values across entity boundaries. On a series carrying multiple
1349
+ * entities (host, region, device id), use
1350
+ * `series.partitionBy(col).shift(...).collect()` to scope per entity.
1351
+ * See {@link TimeSeries.partitionBy}.
1264
1352
  */
1265
1353
  shift(columns, n) {
1266
1354
  const cols = typeof columns === 'string' ? [columns] : columns;
@@ -1281,7 +1369,7 @@ export class TimeSeries {
1281
1369
  }));
1282
1370
  const events = this.events;
1283
1371
  if (events.length === 0) {
1284
- return _a.#fromTrustedEvents(this.name, outSchema, []);
1372
+ return TimeSeries.#fromTrustedEvents(this.name, outSchema, []);
1285
1373
  }
1286
1374
  const resultEvents = [];
1287
1375
  for (let i = 0; i < events.length; i++) {
@@ -1297,7 +1385,7 @@ export class TimeSeries {
1297
1385
  }
1298
1386
  resultEvents.push(new Event(events[i].key(), data));
1299
1387
  }
1300
- return _a.#fromTrustedEvents(this.name, outSchema, resultEvents);
1388
+ return TimeSeries.#fromTrustedEvents(this.name, outSchema, resultEvents);
1301
1389
  }
1302
1390
  /**
1303
1391
  * Example: `series.fill("hold")`.
@@ -1305,15 +1393,38 @@ export class TimeSeries {
1305
1393
  *
1306
1394
  * Example: `series.fill({ cpu: "linear", host: "hold" })`.
1307
1395
  * Per-column fill strategies. Unmentioned columns are left as-is.
1308
- * Strategy names: `"hold"` (forward fill), `"linear"` (time-interpolated),
1309
- * `"zero"` (fill with 0). A non-string value is used as a literal fill value.
1396
+ * Strategy names: `"hold"` (forward fill), `"bfill"` (backward fill),
1397
+ * `"linear"` (time-interpolated), `"zero"` (fill with 0). A non-string
1398
+ * value is used as a literal fill value.
1399
+ *
1400
+ * **Gap semantics — all-or-nothing.** A "gap" is a run of consecutive
1401
+ * `undefined` cells in one column. For each gap:
1402
+ * - With no options: fill the whole gap (existing default).
1403
+ * - With `{ limit: N }`: fill only if the gap length is at most N
1404
+ * cells. Otherwise leave the gap fully unfilled.
1405
+ * - With `{ maxGap: '3m' }`: fill only if the gap's *temporal* span
1406
+ * (from the prior known value to the next known value) is at most
1407
+ * the duration. Otherwise leave the gap fully unfilled.
1408
+ * - With both: fill only if both caps are met.
1409
+ *
1410
+ * The all-or-nothing semantic is the v0.9.0 default. Earlier
1411
+ * versions partially filled (`limit: 3` on a 5-cell gap filled 3,
1412
+ * left 2 unfilled). The new semantic avoids fabricating data
1413
+ * across what's actually a long outage — partial fills propagate
1414
+ * stale values past their useful lifetime.
1310
1415
  *
1311
- * Example: `series.fill("hold", { limit: 3 })`.
1312
- * Caps consecutive fills per column. After `limit` consecutive fills, further
1313
- * `undefined` values are left as-is until a real value resets the counter.
1416
+ * `"linear"` requires known values on both sides of a gap; leading
1417
+ * and trailing gaps are unfilled. `"hold"` fills any internal or
1418
+ * trailing gap (leading has no prior value). `"bfill"` fills any
1419
+ * internal or leading gap (trailing has no next value). `"zero"`
1420
+ * and literal fills work on any gap that fits the size caps.
1314
1421
  *
1315
- * `"linear"` requires known values on both sides of a gap to interpolate.
1316
- * Leading and trailing `undefined` runs are left unfilled.
1422
+ * **Multi-entity series:** fill walks one chronological event
1423
+ * sequence `host-A`'s missing cell would `linear`-interpolate or
1424
+ * `hold`-carry against `host-B`'s neighboring value. On a series
1425
+ * carrying multiple entities (host, region, device id), use
1426
+ * `series.partitionBy(col).fill(...).collect()` to scope per entity.
1427
+ * See {@link TimeSeries.partitionBy}.
1317
1428
  */
1318
1429
  fill(strategy, options) {
1319
1430
  if (this.events.length === 0) {
@@ -1343,6 +1454,7 @@ export class TimeSeries {
1343
1454
  }
1344
1455
  }
1345
1456
  const limit = options?.limit;
1457
+ const maxGapMs = options?.maxGap === undefined ? undefined : parseDuration(options.maxGap);
1346
1458
  const n = this.events.length;
1347
1459
  const columns = {};
1348
1460
  for (const name of colNames) {
@@ -1358,106 +1470,110 @@ export class TimeSeries {
1358
1470
  for (let i = 0; i < n; i++) {
1359
1471
  times[i] = this.events[i].begin();
1360
1472
  }
1473
+ // Walk each column and apply per-strategy fill on a per-gap basis,
1474
+ // with all-or-nothing limit / maxGap checks.
1361
1475
  for (const [name, spec] of specs) {
1362
1476
  const col = columns[name];
1363
1477
  if (!col)
1364
1478
  continue;
1365
- switch (spec.mode) {
1366
- case 'hold': {
1367
- let last;
1368
- let consecutive = 0;
1369
- for (let i = 0; i < n; i++) {
1370
- if (col[i] !== undefined) {
1371
- last = col[i];
1372
- consecutive = 0;
1373
- }
1374
- else if (last !== undefined) {
1375
- consecutive++;
1376
- if (limit === undefined || consecutive <= limit) {
1377
- col[i] = last;
1378
- }
1379
- }
1380
- }
1381
- break;
1479
+ let i = 0;
1480
+ while (i < n) {
1481
+ if (col[i] !== undefined) {
1482
+ i += 1;
1483
+ continue;
1382
1484
  }
1383
- case 'bfill': {
1384
- let next;
1385
- let consecutive = 0;
1386
- for (let i = n - 1; i >= 0; i--) {
1387
- if (col[i] !== undefined) {
1388
- next = col[i];
1389
- consecutive = 0;
1390
- }
1391
- else if (next !== undefined) {
1392
- consecutive++;
1393
- if (limit === undefined || consecutive <= limit) {
1394
- col[i] = next;
1395
- }
1396
- }
1397
- }
1398
- break;
1485
+ // Found the start of a gap.
1486
+ const start = i;
1487
+ while (i < n && col[i] === undefined)
1488
+ i += 1;
1489
+ const end = i; // exclusive
1490
+ const length = end - start;
1491
+ const hasPrev = start > 0;
1492
+ const hasNext = end < n;
1493
+ // Strategy-level fillability: do we have the neighbors required?
1494
+ let strategyOk;
1495
+ switch (spec.mode) {
1496
+ case 'linear':
1497
+ strategyOk = hasPrev && hasNext;
1498
+ break;
1499
+ case 'hold':
1500
+ strategyOk = hasPrev;
1501
+ break;
1502
+ case 'bfill':
1503
+ strategyOk = hasNext;
1504
+ break;
1505
+ default:
1506
+ strategyOk = true; // zero, literal — no neighbor needed
1399
1507
  }
1400
- case 'zero': {
1401
- let consecutive = 0;
1402
- for (let i = 0; i < n; i++) {
1403
- if (col[i] !== undefined) {
1404
- consecutive = 0;
1405
- }
1406
- else {
1407
- consecutive++;
1408
- if (limit === undefined || consecutive <= limit) {
1409
- col[i] = 0;
1410
- }
1411
- }
1508
+ if (!strategyOk)
1509
+ continue;
1510
+ // Size caps: count and temporal span.
1511
+ if (limit !== undefined && length > limit)
1512
+ continue;
1513
+ if (maxGapMs !== undefined) {
1514
+ // Span = time from the last known value to the next known
1515
+ // value. For internal gaps this uses both neighbors; for
1516
+ // edge-only gaps (hold trailing, bfill leading), use the
1517
+ // available neighbor and the gap's own first/last timestamp
1518
+ // as the other end so maxGap caps the carry-forward distance.
1519
+ let span;
1520
+ if (hasPrev && hasNext) {
1521
+ span = times[end] - times[start - 1];
1412
1522
  }
1413
- break;
1414
- }
1415
- case 'literal': {
1416
- let consecutive = 0;
1417
- for (let i = 0; i < n; i++) {
1418
- if (col[i] !== undefined) {
1419
- consecutive = 0;
1420
- }
1421
- else {
1422
- consecutive++;
1423
- if (limit === undefined || consecutive <= limit) {
1424
- col[i] = spec.value;
1425
- }
1426
- }
1523
+ else if (hasPrev) {
1524
+ // trailing gap (hold): cap distance from prev known to last gap cell
1525
+ span = times[end - 1] - times[start - 1];
1427
1526
  }
1428
- break;
1527
+ else if (hasNext) {
1528
+ // leading gap (bfill): cap distance from first gap cell to next known
1529
+ span = times[end] - times[start];
1530
+ }
1531
+ else {
1532
+ span = 0; // unreachable given strategyOk above, but safe
1533
+ }
1534
+ if (span > maxGapMs)
1535
+ continue;
1429
1536
  }
1430
- case 'linear': {
1431
- let gapStart = -1;
1432
- for (let i = 0; i < n; i++) {
1433
- if (col[i] !== undefined) {
1434
- if (gapStart >= 0 && gapStart > 0) {
1435
- const before = col[gapStart - 1];
1436
- const after = col[i];
1437
- const t0 = times[gapStart - 1];
1438
- const t1 = times[i];
1439
- const span = t1 - t0;
1440
- const gapLen = i - gapStart;
1441
- for (let j = gapStart; j < i; j++) {
1442
- const fillIndex = j - gapStart + 1;
1443
- if (limit !== undefined && fillIndex > limit)
1444
- break;
1445
- if (span === 0) {
1446
- col[j] = before;
1447
- }
1448
- else {
1449
- const ratio = (times[j] - t0) / span;
1450
- col[j] = before + (after - before) * ratio;
1451
- }
1452
- }
1537
+ // Fill the gap per strategy.
1538
+ switch (spec.mode) {
1539
+ case 'hold': {
1540
+ const v = col[start - 1];
1541
+ for (let j = start; j < end; j++)
1542
+ col[j] = v;
1543
+ break;
1544
+ }
1545
+ case 'bfill': {
1546
+ const v = col[end];
1547
+ for (let j = start; j < end; j++)
1548
+ col[j] = v;
1549
+ break;
1550
+ }
1551
+ case 'zero': {
1552
+ for (let j = start; j < end; j++)
1553
+ col[j] = 0;
1554
+ break;
1555
+ }
1556
+ case 'literal': {
1557
+ for (let j = start; j < end; j++)
1558
+ col[j] = spec.value;
1559
+ break;
1560
+ }
1561
+ case 'linear': {
1562
+ const before = col[start - 1];
1563
+ const after = col[end];
1564
+ const t0 = times[start - 1];
1565
+ const t1 = times[end];
1566
+ const tspan = t1 - t0;
1567
+ for (let j = start; j < end; j++) {
1568
+ if (tspan === 0) {
1569
+ col[j] = before;
1570
+ }
1571
+ else {
1572
+ col[j] = before + (after - before) * ((times[j] - t0) / tspan);
1453
1573
  }
1454
- gapStart = -1;
1455
- }
1456
- else if (gapStart < 0) {
1457
- gapStart = i;
1458
1574
  }
1575
+ break;
1459
1576
  }
1460
- break;
1461
1577
  }
1462
1578
  }
1463
1579
  }
@@ -1469,7 +1585,156 @@ export class TimeSeries {
1469
1585
  }
1470
1586
  resultEvents.push(new Event(this.events[i].key(), data));
1471
1587
  }
1472
- return _a.#fromTrustedEvents(this.name, this.schema, resultEvents);
1588
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, resultEvents);
1589
+ }
1590
+ /**
1591
+ * Example: `series.dedupe()`.
1592
+ * Collapses events that share a key. The default key is the full
1593
+ * event key — `begin()` for time-keyed series, `begin()`+`end()` for
1594
+ * time-range, and `begin()`+`end()`+`value` for interval-keyed
1595
+ * series. Two events with the same full key are treated as
1596
+ * duplicates. The default resolution is `'last'` wins.
1597
+ *
1598
+ * **Multi-entity series:** events from different entities at the
1599
+ * same key collapse as if they were duplicates of each other —
1600
+ * `host-A`@t and `host-B`@t collide on the timestamp alone. On a
1601
+ * series carrying multiple entities (host, region, device id), use
1602
+ * `series.partitionBy(col).dedupe(...).collect()` so the partition
1603
+ * column is part of the duplicate identity. See
1604
+ * {@link TimeSeries.partitionBy}.
1605
+ *
1606
+ * ```ts
1607
+ * // Per-host dedupe — same time AND same host is the duplicate key.
1608
+ * series.partitionBy('host').dedupe({ keep: 'last' }).collect();
1609
+ * ```
1610
+ *
1611
+ * The `keep` option chooses the resolution policy:
1612
+ *
1613
+ * - `'first'` — keep the first occurrence at each key.
1614
+ * - `'last'` — keep the last occurrence (default; matches WebSocket
1615
+ * replay semantics).
1616
+ * - `'error'` — throw on the first duplicate seen. Useful for
1617
+ * ingestion paths that want to fail loudly on shape violations.
1618
+ * - `'drop'` — discard *every* event at any duplicate key.
1619
+ * Conservative; the value of "1.5 events at this timestamp" is
1620
+ * rarely defensible.
1621
+ * - `{ min: col }` / `{ max: col }` — keep the event with the
1622
+ * smallest / largest value at the named numeric column. Ties keep
1623
+ * the earliest tied event. Events with `undefined` at that column
1624
+ * lose to any event with a defined value.
1625
+ * - `(events) => Event` — custom resolver. Receives all duplicates
1626
+ * at a single key (length ≥ 2) and returns one. The cleanest
1627
+ * pattern is to start from one of the input events and use
1628
+ * `event.set(field, value)` so the type stays narrow:
1629
+ *
1630
+ * ```ts
1631
+ * series.dedupe({
1632
+ * keep: (events) => {
1633
+ * const last = events[events.length - 1];
1634
+ * const avg =
1635
+ * events.reduce((a, e) => a + (e.get('cpu') ?? 0), 0) /
1636
+ * events.length;
1637
+ * return last.set('cpu', avg);
1638
+ * },
1639
+ * });
1640
+ * ```
1641
+ *
1642
+ * Real-world ingest produces duplicates: WebSocket replays, Kafka
1643
+ * at-least-once, retried HTTP fetches, polling overlaps. `dedupe()`
1644
+ * is the post-ingest cleanup primitive.
1645
+ */
1646
+ dedupe(options = {}) {
1647
+ const keep = options.keep ?? 'last';
1648
+ if (this.events.length === 0) {
1649
+ return this;
1650
+ }
1651
+ // Bucket key encoder. For time-keyed series, `begin()` alone fully
1652
+ // identifies an event key; for time-range, both `begin()` and
1653
+ // `end()` matter; for interval-keyed, the labeled `value` is part
1654
+ // of identity too. A naive `begin()`-only key would silently
1655
+ // collapse semantically distinct interval/timeRange events.
1656
+ const firstKind = this.schema[0].kind;
1657
+ const keyOf = (event) => {
1658
+ if (firstKind === 'time') {
1659
+ return `${event.begin()}`;
1660
+ }
1661
+ if (firstKind === 'timeRange') {
1662
+ return `${event.begin()}:${event.end()}`;
1663
+ }
1664
+ // interval
1665
+ const k = event.key();
1666
+ return `${event.begin()}:${event.end()}:${String(k.value)}`;
1667
+ };
1668
+ // Single-pass bucket by full event key. Map iteration is insertion-
1669
+ // order; since the input events are already sorted by key, each
1670
+ // bucket corresponds to a unique key and the buckets traverse in
1671
+ // input order. No re-sort needed.
1672
+ const buckets = new Map();
1673
+ for (const event of this.events) {
1674
+ const k = keyOf(event);
1675
+ let bucket = buckets.get(k);
1676
+ if (!bucket) {
1677
+ bucket = [];
1678
+ buckets.set(k, bucket);
1679
+ }
1680
+ bucket.push(event);
1681
+ }
1682
+ const resolved = [];
1683
+ for (const [keyStr, bucket] of buckets) {
1684
+ if (bucket.length === 1) {
1685
+ resolved.push(bucket[0]);
1686
+ continue;
1687
+ }
1688
+ // Multiple events sharing the same key — apply the policy.
1689
+ if (typeof keep === 'function') {
1690
+ resolved.push(keep(bucket));
1691
+ continue;
1692
+ }
1693
+ if (keep === 'first') {
1694
+ resolved.push(bucket[0]);
1695
+ continue;
1696
+ }
1697
+ if (keep === 'last') {
1698
+ resolved.push(bucket[bucket.length - 1]);
1699
+ continue;
1700
+ }
1701
+ if (keep === 'error') {
1702
+ // Use the first event's begin() for the human-readable timestamp.
1703
+ // For interval/timeRange-keyed series, also include the full
1704
+ // encoded key so the failure mode names the exact collision.
1705
+ const t = bucket[0].begin();
1706
+ const detail = firstKind === 'time'
1707
+ ? `${new Date(t).toISOString()} (${t})`
1708
+ : `key "${keyStr}"`;
1709
+ throw new Error(`dedupe: ${bucket.length} events at ${detail}. ` +
1710
+ `Specify a different 'keep' policy or fix upstream.`);
1711
+ }
1712
+ if (keep === 'drop') {
1713
+ continue;
1714
+ }
1715
+ if ('min' in keep || 'max' in keep) {
1716
+ const isMin = 'min' in keep;
1717
+ const col = (isMin ? keep.min : keep.max);
1718
+ let best = bucket[0];
1719
+ let bestVal = best.get(col);
1720
+ for (let i = 1; i < bucket.length; i += 1) {
1721
+ const candidate = bucket[i];
1722
+ const v = candidate.get(col);
1723
+ if (v === undefined)
1724
+ continue;
1725
+ if (bestVal === undefined || (isMin ? v < bestVal : v > bestVal)) {
1726
+ best = candidate;
1727
+ bestVal = v;
1728
+ }
1729
+ }
1730
+ resolved.push(best);
1731
+ continue;
1732
+ }
1733
+ // Defensive fallthrough: unrecognized keep shape.
1734
+ throw new TypeError(`dedupe: invalid keep option ${JSON.stringify(keep)}. ` +
1735
+ `Expected 'first' | 'last' | 'error' | 'drop' | { min: col } | { max: col } | (events) => Event.`);
1736
+ }
1737
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, resolved);
1473
1738
  }
1474
1739
  rolling(sequenceOrWindow, windowOrMapping, mappingOrOptions, maybeOptions = {}) {
1475
1740
  let mapping;
@@ -1531,7 +1796,7 @@ export class TimeSeries {
1531
1796
  ...resultColumnDefs,
1532
1797
  ]);
1533
1798
  if (!range) {
1534
- return new _a({
1799
+ return new TimeSeries({
1535
1800
  name: this.name,
1536
1801
  schema: resultSchema,
1537
1802
  rows: [],
@@ -1550,7 +1815,7 @@ export class TimeSeries {
1550
1815
  });
1551
1816
  return Object.freeze([bucket, ...aggregated]);
1552
1817
  });
1553
- return new _a({
1818
+ return new TimeSeries({
1554
1819
  name: this.name,
1555
1820
  schema: resultSchema,
1556
1821
  rows: resultRows,
@@ -1691,7 +1956,7 @@ export class TimeSeries {
1691
1956
  groupStart = groupEnd;
1692
1957
  }
1693
1958
  }
1694
- return new _a({
1959
+ return new TimeSeries({
1695
1960
  name: this.name,
1696
1961
  schema: resultSchema,
1697
1962
  rows: resultRows,
@@ -1713,6 +1978,13 @@ export class TimeSeries {
1713
1978
  *
1714
1979
  * When `output` is omitted, the smoothed values replace the target column. When `output` is
1715
1980
  * supplied, the smoothed values are appended as a new optional numeric column.
1981
+ *
1982
+ * **Multi-entity series:** the smoothing window pulls values from
1983
+ * every entity into each smoothed point — `host-A`'s smoothed value
1984
+ * is blended with `host-B`'s and `host-C`'s. On a series carrying
1985
+ * multiple entities (host, region, device id), use
1986
+ * `series.partitionBy(col).smooth(...).collect()` to scope per
1987
+ * entity. See {@link TimeSeries.partitionBy}.
1716
1988
  */
1717
1989
  smooth(column, method, options) {
1718
1990
  const output = options.output;
@@ -1767,7 +2039,7 @@ export class TimeSeries {
1767
2039
  ]);
1768
2040
  });
1769
2041
  const keptRows = warmup > 0 ? resultRows.slice(warmup) : resultRows;
1770
- return new _a({
2042
+ return new TimeSeries({
1771
2043
  name: this.name,
1772
2044
  schema: resultSchema,
1773
2045
  rows: keptRows,
@@ -1805,7 +2077,7 @@ export class TimeSeries {
1805
2077
  .map((nextColumn) => nextEvent.data()[nextColumn.name]),
1806
2078
  ]);
1807
2079
  });
1808
- return new _a({
2080
+ return new TimeSeries({
1809
2081
  name: this.name,
1810
2082
  schema: resultSchema,
1811
2083
  rows: resultRows,
@@ -1899,7 +2171,7 @@ export class TimeSeries {
1899
2171
  .map((nextColumn) => nextEvent.data()[nextColumn.name]),
1900
2172
  ]);
1901
2173
  });
1902
- return new _a({
2174
+ return new TimeSeries({
1903
2175
  name: this.name,
1904
2176
  schema: resultSchema,
1905
2177
  rows: resultRows,
@@ -1907,11 +2179,11 @@ export class TimeSeries {
1907
2179
  }
1908
2180
  /** Example: `series.slice(0, 10)`. Returns a positional half-open slice of the series. */
1909
2181
  slice(beginIndex, endIndex) {
1910
- return _a.#fromTrustedEvents(this.name, this.schema, this.events.slice(beginIndex, endIndex));
2182
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, this.events.slice(beginIndex, endIndex));
1911
2183
  }
1912
2184
  /** Example: `series.filter(event => event.get("active"))`. Returns a new series containing only events that match the predicate. */
1913
2185
  filter(predicate) {
1914
- return _a.#fromTrustedEvents(this.name, this.schema, this.events.filter((event, index) => predicate(event, index)));
2186
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, this.events.filter((event, index) => predicate(event, index)));
1915
2187
  }
1916
2188
  /** Example: `series.find(event => event.get("value") > 0)`. Returns the first event that matches the predicate, if any. */
1917
2189
  find(predicate) {
@@ -2032,7 +2304,7 @@ export class TimeSeries {
2032
2304
  const trimmedEvents = this.events
2033
2305
  .map((event) => event.trim(range))
2034
2306
  .filter((event) => event !== undefined);
2035
- return _a.#fromTrustedEvents(this.name, this.schema, trimmedEvents);
2307
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, trimmedEvents);
2036
2308
  }
2037
2309
  /** Example: `series.before(Date.now())`. Returns the events ending strictly before the supplied temporal boundary. */
2038
2310
  before(boundary) {
@@ -2090,7 +2362,7 @@ export class TimeSeries {
2090
2362
  const selectedEvent = event.select(...keys);
2091
2363
  return selectedEvent;
2092
2364
  });
2093
- return _a.#fromTrustedEvents(this.name, resultSchema, resultEvents);
2365
+ return TimeSeries.#fromTrustedEvents(this.name, resultSchema, resultEvents);
2094
2366
  }
2095
2367
  /** Example: `series.rename({ cpu: "usage" })`. Returns a new series with payload field names renamed according to the supplied mapping. */
2096
2368
  rename(mapping) {
@@ -2108,7 +2380,7 @@ export class TimeSeries {
2108
2380
  const renamedEvent = event.rename(mapping);
2109
2381
  return renamedEvent;
2110
2382
  });
2111
- return _a.#fromTrustedEvents(this.name, resultSchema, resultEvents);
2383
+ return TimeSeries.#fromTrustedEvents(this.name, resultSchema, resultEvents);
2112
2384
  }
2113
2385
  collapse(keys, output, reducer, options) {
2114
2386
  const nextEvents = this.events.map((event) => {
@@ -2136,7 +2408,7 @@ export class TimeSeries {
2136
2408
  : 'string',
2137
2409
  },
2138
2410
  ]);
2139
- return _a.#fromTrustedEvents(this.name, resultSchema, nextEvents);
2411
+ return TimeSeries.#fromTrustedEvents(this.name, resultSchema, nextEvents);
2140
2412
  }
2141
2413
  /**
2142
2414
  * Example: `series.arrayContains("tags", "critical")`.
@@ -2146,7 +2418,7 @@ export class TimeSeries {
2146
2418
  * carries a list of scalars.
2147
2419
  */
2148
2420
  arrayContains(col, value) {
2149
- return _a.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2421
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2150
2422
  const data = event.data();
2151
2423
  const arr = data[col];
2152
2424
  return Array.isArray(arr) && arr.includes(value);
@@ -2160,7 +2432,7 @@ export class TimeSeries {
2160
2432
  * array are dropped.
2161
2433
  */
2162
2434
  arrayContainsAll(col, values) {
2163
- return _a.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2435
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2164
2436
  const data = event.data();
2165
2437
  const arr = data[col];
2166
2438
  if (!Array.isArray(arr))
@@ -2179,7 +2451,7 @@ export class TimeSeries {
2179
2451
  * an empty series. Events with an `undefined` array are dropped.
2180
2452
  */
2181
2453
  arrayContainsAny(col, values) {
2182
- return _a.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2454
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, this.events.filter((event) => {
2183
2455
  const data = event.data();
2184
2456
  const arr = data[col];
2185
2457
  if (!Array.isArray(arr))
@@ -2219,7 +2491,7 @@ export class TimeSeries {
2219
2491
  return data[column.name];
2220
2492
  }));
2221
2493
  });
2222
- return new _a({
2494
+ return new TimeSeries({
2223
2495
  name: this.name,
2224
2496
  schema: resultSchema,
2225
2497
  rows: resultRows,
@@ -2255,7 +2527,7 @@ export class TimeSeries {
2255
2527
  }));
2256
2528
  }
2257
2529
  }
2258
- return new _a({
2530
+ return new TimeSeries({
2259
2531
  name: this.name,
2260
2532
  schema: resultSchema,
2261
2533
  rows: resultRows,
@@ -2356,6 +2628,15 @@ export class TimeSeries {
2356
2628
  *
2357
2629
  * Internally a single `rolling(window, { avg, sd })` pass over the
2358
2630
  * source; band edges are derived arithmetically per event.
2631
+ *
2632
+ * **Multi-entity series:** the baseline window aggregates across
2633
+ * every entity, so `host-A`'s `avg`/`sd` reflect the cross-entity
2634
+ * mean/spread rather than `host-A`'s own. Anomaly detection on a
2635
+ * multi-entity baseline flags events relative to the wrong
2636
+ * population. On a series carrying multiple entities (host, region,
2637
+ * device id), use
2638
+ * `series.partitionBy(col).baseline(...).collect()` to scope per
2639
+ * entity. See {@link TimeSeries.partitionBy}.
2359
2640
  */
2360
2641
  baseline(col, options) {
2361
2642
  const { window, sigma, alignment } = options;
@@ -2410,7 +2691,7 @@ export class TimeSeries {
2410
2691
  lowerNum,
2411
2692
  ]);
2412
2693
  });
2413
- return new _a({
2694
+ return new TimeSeries({
2414
2695
  name: this.name,
2415
2696
  schema: resultSchema,
2416
2697
  rows: resultRows,
@@ -2439,6 +2720,14 @@ export class TimeSeries {
2439
2720
  * Internally: computes `rolling(window, { avg, sd })` using the
2440
2721
  * output-map form, zips with the source events by index, and keeps
2441
2722
  * events where `|value - avg| > sigma * sd`.
2723
+ *
2724
+ * **Multi-entity series:** the rolling baseline aggregates across
2725
+ * every entity, so the deviation threshold reflects the wrong
2726
+ * population — `host-A`'s "outlier" status is decided against the
2727
+ * cross-entity mean rather than `host-A`'s own. On a series carrying
2728
+ * multiple entities (host, region, device id), use
2729
+ * `series.partitionBy(col).outliers(...).collect()` to scope per
2730
+ * entity. See {@link TimeSeries.partitionBy}.
2442
2731
  */
2443
2732
  outliers(col, options) {
2444
2733
  const { window, sigma, alignment } = options;
@@ -2475,7 +2764,7 @@ export class TimeSeries {
2475
2764
  kept.push(src);
2476
2765
  }
2477
2766
  }
2478
- return _a.#fromTrustedEvents(this.name, this.schema, kept);
2767
+ return TimeSeries.#fromTrustedEvents(this.name, this.schema, kept);
2479
2768
  }
2480
2769
  /**
2481
2770
  * Example: `TimeSeries.fromPoints(pts, { schema: [...] })`.
@@ -2500,7 +2789,7 @@ export class TimeSeries {
2500
2789
  throw new TypeError(`TimeSeries.fromPoints requires a time-keyed schema; got first column kind '${schema[0].kind}'`);
2501
2790
  }
2502
2791
  const valueCols = schema.slice(1);
2503
- return new _a({
2792
+ return new TimeSeries({
2504
2793
  name: options.name ?? 'points',
2505
2794
  schema,
2506
2795
  rows: points.map((p) => [
@@ -2510,7 +2799,6 @@ export class TimeSeries {
2510
2799
  });
2511
2800
  }
2512
2801
  }
2513
- _a = TimeSeries;
2514
2802
  function aggregateInternal(series, sequence, mapping, options = {}) {
2515
2803
  const range = options.range ?? series.timeRange();
2516
2804
  const aggregateColumns = normalizeAggregateColumns(series.schema, mapping);