@mux-magic/tools 0.1.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/applyRenameRegex.test.ts +49 -0
- package/src/applyRenameRegex.ts +22 -0
- package/src/index.ts +42 -0
- package/src/logMessage.test.ts +112 -9
- package/src/logMessage.ts +29 -0
- package/src/logging/context.test.ts +57 -0
- package/src/logging/context.ts +23 -0
- package/src/logging/lineSink.test.ts +135 -0
- package/src/logging/lineSink.ts +74 -0
- package/src/logging/logger.test.ts +154 -0
- package/src/logging/logger.ts +96 -0
- package/src/logging/mode.test.ts +36 -0
- package/src/logging/mode.ts +29 -0
- package/src/logging/startSpan.test.ts +150 -0
- package/src/logging/startSpan.ts +51 -0
- package/src/sourcePath.test.ts +16 -0
- package/src/sourcePath.ts +17 -0
- package/src/taskScheduler.injection.test.ts +72 -0
- package/src/taskScheduler.test.ts +673 -0
- package/src/taskScheduler.ts +414 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
import {
|
|
2
|
+
finalize,
|
|
3
|
+
ignoreElements,
|
|
4
|
+
mergeMap,
|
|
5
|
+
Observable,
|
|
6
|
+
type OperatorFunction,
|
|
7
|
+
Subject,
|
|
8
|
+
type Subscriber,
|
|
9
|
+
type Subscription,
|
|
10
|
+
tap,
|
|
11
|
+
} from "rxjs"
|
|
12
|
+
|
|
13
|
+
// The scheduler historically read the active job id from server-only
|
|
14
|
+
// AsyncLocalStorage. To keep @mux-magic/tools free of server imports,
|
|
15
|
+
// the provider is injected at init time. CLI passes nothing (constant
|
|
16
|
+
// null); server passes its real getActiveJobId from logCapture.
|
|
17
|
+
type GetActiveJobId = () => string | null | undefined
|
|
18
|
+
|
|
19
|
+
const nullJobIdProvider: GetActiveJobId = () => null
|
|
20
|
+
|
|
21
|
+
let getActiveJobId: GetActiveJobId = nullJobIdProvider
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Process-wide Task scheduler
|
|
25
|
+
//
|
|
26
|
+
// A "Task" is a unit of heavy work (per-file copy, ffmpeg invocation, etc.)
|
|
27
|
+
// that's part of a Job. Tasks compete for a fixed pool of `concurrency`
|
|
28
|
+
// slots under TWO coupled constraints:
|
|
29
|
+
// 1. inflight-global < MAX_THREADS (unchanged from original design)
|
|
30
|
+
// 2. inflight-for-job < job.claim (new — per-job quota)
|
|
31
|
+
//
|
|
32
|
+
// The per-job claim is registered before a job's tasks are enqueued via
|
|
33
|
+
// `registerJobClaim(jobId, claim)` and torn down via `unregisterJobClaim`
|
|
34
|
+
// once the job finishes. Tasks without a jobId (null) are gated only by
|
|
35
|
+
// the global cap.
|
|
36
|
+
//
|
|
37
|
+
// Wiring: a single inbox Subject carries `{ bridge$, jobId }` pairs through
|
|
38
|
+
// a custom scheduler operator. Each `runTask(work$)` pushes a gated inner
|
|
39
|
+
// Observable onto the inbox; when the operator grants a slot (both constraints
|
|
40
|
+
// satisfied), it subscribes to bridge$ which then starts work$ and forwards
|
|
41
|
+
// values to the caller. Slot is held for as long as the bridge$ subscription
|
|
42
|
+
// is alive, then released on complete/error or on caller unsubscribe.
|
|
43
|
+
//
|
|
44
|
+
// Fair scheduling: when the front of the queue can't be admitted (per-job
|
|
45
|
+
// cap full), the operator scans forward to find any task from a different
|
|
46
|
+
// job that can run — preventing one job's saturated claim from blocking
|
|
47
|
+
// other jobs' tasks.
|
|
48
|
+
//
|
|
49
|
+
// Composition rule: operators that already route through `runTask` /
|
|
50
|
+
// `runTasks` MUST NOT be nested inside another finite-concurrency
|
|
51
|
+
// `mergeMap(..., n)` operating over scheduled work. Use unbounded
|
|
52
|
+
// `mergeAll()` upstream and let the scheduler do the bounding.
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
type ScheduledTask = {
|
|
56
|
+
bridge$: Observable<never>
|
|
57
|
+
jobId: string | null
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
let concurrency: number | null = null
|
|
61
|
+
let inbox: Subject<ScheduledTask> | null = null
|
|
62
|
+
|
|
63
|
+
// Per-job thread claim registry — populated by registerJobClaim /
|
|
64
|
+
// unregisterJobClaim outside the scheduler operator so callers can
|
|
65
|
+
// register before enqueueing tasks.
|
|
66
|
+
const claimByJob = new Map<string, number>()
|
|
67
|
+
|
|
68
|
+
const ensureInbox = (): Subject<ScheduledTask> => {
|
|
69
|
+
if (inbox === null) {
|
|
70
|
+
throw new Error(
|
|
71
|
+
"Task scheduler not initialized. Call initTaskScheduler() at process startup.",
|
|
72
|
+
)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return inbox
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Custom scheduler operator: replaces the former `mergeAll(concurrency)`.
|
|
79
|
+
// Enforces the global cap AND the per-job claim on every admission decision.
|
|
80
|
+
const buildScheduler =
|
|
81
|
+
(
|
|
82
|
+
maxConcurrency: number,
|
|
83
|
+
): ((
|
|
84
|
+
source: Observable<ScheduledTask>,
|
|
85
|
+
) => Observable<never>) =>
|
|
86
|
+
(source$) =>
|
|
87
|
+
new Observable<never>((outerSub) => {
|
|
88
|
+
let inflight = 0
|
|
89
|
+
const inflightByJob = new Map<string, number>()
|
|
90
|
+
const queue: ScheduledTask[] = []
|
|
91
|
+
|
|
92
|
+
const canAdmit = ({
|
|
93
|
+
jobId,
|
|
94
|
+
}: ScheduledTask): boolean => {
|
|
95
|
+
if (inflight >= maxConcurrency) return false
|
|
96
|
+
if (jobId === null) return true
|
|
97
|
+
const claim =
|
|
98
|
+
claimByJob.get(jobId) ?? maxConcurrency
|
|
99
|
+
return (inflightByJob.get(jobId) ?? 0) < claim
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const onComplete = (jobId: string | null): void => {
|
|
103
|
+
inflight -= 1
|
|
104
|
+
if (jobId !== null) {
|
|
105
|
+
const count = (inflightByJob.get(jobId) ?? 0) - 1
|
|
106
|
+
if (count <= 0) {
|
|
107
|
+
inflightByJob.delete(jobId)
|
|
108
|
+
} else {
|
|
109
|
+
inflightByJob.set(jobId, count)
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
admitFromQueue()
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const admit = (index: number): void => {
|
|
116
|
+
const task = queue.splice(index, 1)[0]
|
|
117
|
+
if (!task) return
|
|
118
|
+
inflight += 1
|
|
119
|
+
if (task.jobId !== null) {
|
|
120
|
+
inflightByJob.set(
|
|
121
|
+
task.jobId,
|
|
122
|
+
(inflightByJob.get(task.jobId) ?? 0) + 1,
|
|
123
|
+
)
|
|
124
|
+
}
|
|
125
|
+
task.bridge$.subscribe({
|
|
126
|
+
complete: () => onComplete(task.jobId),
|
|
127
|
+
error: () => onComplete(task.jobId),
|
|
128
|
+
})
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const admitFromQueue = (): void => {
|
|
132
|
+
// Loop: each admit() removes one task from the queue and may open
|
|
133
|
+
// room for another (e.g. a per-job cap was the constraint, not the
|
|
134
|
+
// global cap). Stop when no admissible task remains.
|
|
135
|
+
while (true) {
|
|
136
|
+
const index = queue.findIndex(canAdmit)
|
|
137
|
+
if (index < 0) break
|
|
138
|
+
admit(index)
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const subscription = source$.subscribe({
|
|
143
|
+
next: (task) => {
|
|
144
|
+
queue.push(task)
|
|
145
|
+
admitFromQueue()
|
|
146
|
+
},
|
|
147
|
+
error: (error) => outerSub.error(error),
|
|
148
|
+
complete: () => outerSub.complete(),
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
return () => subscription.unsubscribe()
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
// Registers a per-job thread-count claim. Call this before enqueueing
|
|
155
|
+
// any tasks for the job; the scheduler reads the claim at admission time.
|
|
156
|
+
// If the jobId already has a claim, the new value overwrites it.
|
|
157
|
+
export const registerJobClaim = (
|
|
158
|
+
jobId: string,
|
|
159
|
+
claim: number,
|
|
160
|
+
): void => {
|
|
161
|
+
claimByJob.set(jobId, claim)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Removes the per-job claim after the job finishes. Safe to call even
|
|
165
|
+
// if the job had no registered claim (no-op).
|
|
166
|
+
export const unregisterJobClaim = (jobId: string): void => {
|
|
167
|
+
claimByJob.delete(jobId)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Init once at process startup. CLI passes 1 (sequential, equivalent to
|
|
171
|
+
// the historical concatMap behavior). API passes Number(MAX_THREADS) ||
|
|
172
|
+
// cpus().length. Idempotent on repeat calls with the same value; throws
|
|
173
|
+
// on conflicting re-init so a stray import path doesn't silently
|
|
174
|
+
// downgrade concurrency.
|
|
175
|
+
export const initTaskScheduler = (
|
|
176
|
+
newConcurrency: number,
|
|
177
|
+
options?: {
|
|
178
|
+
getActiveJobId?: GetActiveJobId
|
|
179
|
+
},
|
|
180
|
+
): void => {
|
|
181
|
+
if (
|
|
182
|
+
concurrency !== null &&
|
|
183
|
+
concurrency === newConcurrency
|
|
184
|
+
) {
|
|
185
|
+
if (options?.getActiveJobId) {
|
|
186
|
+
getActiveJobId = options.getActiveJobId
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (concurrency !== null) {
|
|
193
|
+
throw new Error(
|
|
194
|
+
`Task scheduler already initialized at concurrency=${concurrency}; refusing to re-init at ${newConcurrency}`,
|
|
195
|
+
)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
concurrency = newConcurrency
|
|
199
|
+
|
|
200
|
+
if (options?.getActiveJobId) {
|
|
201
|
+
getActiveJobId = options.getActiveJobId
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const newInbox = new Subject<ScheduledTask>()
|
|
205
|
+
|
|
206
|
+
newInbox.pipe(buildScheduler(newConcurrency)).subscribe()
|
|
207
|
+
|
|
208
|
+
inbox = newInbox
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Wraps work$ as a Task. The returned Observable is cold — subscribing
|
|
212
|
+
// enqueues the work; unsubscribing releases the slot (whether queued or
|
|
213
|
+
// running). Values from work$ mirror through to the caller. If work$
|
|
214
|
+
// errors, the caller sees the error.
|
|
215
|
+
//
|
|
216
|
+
// explicitJobId — pass a string or null in tests to bypass the async
|
|
217
|
+
// context lookup. Omit in production; the scheduler reads the current
|
|
218
|
+
// job id from the AsyncLocalStorage set by withJobContext() at subscribe
|
|
219
|
+
// time (inside the Observable factory), so the context is always live.
|
|
220
|
+
export const runTask = <T>(
|
|
221
|
+
work$: Observable<T>,
|
|
222
|
+
explicitJobId?: string | null,
|
|
223
|
+
): Observable<T> =>
|
|
224
|
+
new Observable<T>((subscriber) => {
|
|
225
|
+
const jobId =
|
|
226
|
+
explicitJobId !== undefined
|
|
227
|
+
? explicitJobId
|
|
228
|
+
: (getActiveJobId() ?? null)
|
|
229
|
+
const queue = ensureInbox()
|
|
230
|
+
|
|
231
|
+
let isCancelled = false
|
|
232
|
+
let bridgeSubscriber: Subscriber<never> | null = null
|
|
233
|
+
let innerSubscription: Subscription | null = null
|
|
234
|
+
|
|
235
|
+
// Gated inner Observable. The scheduler subscribes to this when
|
|
236
|
+
// a slot opens; we then start work$ and forward values to the caller.
|
|
237
|
+
// Slot stays held for as long as this Observable is "alive" — it
|
|
238
|
+
// completes when work$ ends naturally OR when the caller unsubscribes.
|
|
239
|
+
const bridge$ = new Observable<never>((bridgeSub) => {
|
|
240
|
+
if (isCancelled) {
|
|
241
|
+
bridgeSub.complete()
|
|
242
|
+
|
|
243
|
+
return
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
bridgeSubscriber = bridgeSub
|
|
247
|
+
|
|
248
|
+
innerSubscription = work$.subscribe({
|
|
249
|
+
next: (value) => {
|
|
250
|
+
subscriber.next(value)
|
|
251
|
+
},
|
|
252
|
+
error: (error) => {
|
|
253
|
+
subscriber.error(error)
|
|
254
|
+
|
|
255
|
+
bridgeSub.complete()
|
|
256
|
+
},
|
|
257
|
+
complete: () => {
|
|
258
|
+
subscriber.complete()
|
|
259
|
+
|
|
260
|
+
bridgeSub.complete()
|
|
261
|
+
},
|
|
262
|
+
})
|
|
263
|
+
|
|
264
|
+
return () => {
|
|
265
|
+
innerSubscription?.unsubscribe()
|
|
266
|
+
}
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
queue.next({ bridge$, jobId })
|
|
270
|
+
|
|
271
|
+
return () => {
|
|
272
|
+
isCancelled = true
|
|
273
|
+
|
|
274
|
+
innerSubscription?.unsubscribe()
|
|
275
|
+
|
|
276
|
+
// If the bridge has already been picked up by the scheduler,
|
|
277
|
+
// explicitly complete it so the slot is freed. If still queued,
|
|
278
|
+
// bridgeSubscriber is null and the isCancelled flag short-circuits
|
|
279
|
+
// when its slot eventually opens.
|
|
280
|
+
bridgeSubscriber?.complete()
|
|
281
|
+
}
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
// Pipeable form. Each upstream emission becomes a Task. Equivalent to
|
|
285
|
+
// `mergeMap(value => runTask(project(value, index)))` with unbounded
|
|
286
|
+
// outer concurrency — the scheduler is the actual cap.
|
|
287
|
+
export const runTasks = <T, R>(
|
|
288
|
+
project: (value: T, index: number) => Observable<R>,
|
|
289
|
+
): OperatorFunction<T, R> =>
|
|
290
|
+
mergeMap((value: T, index: number) =>
|
|
291
|
+
runTask(project(value, index)),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
// Pipeable form preserving input order on output. Each upstream value
|
|
295
|
+
// is projected via mergeMap (parallel by default), but emissions are
|
|
296
|
+
// released downstream in input-index order — file 5 is held back until
|
|
297
|
+
// files 1-4 have emitted, even if file 5 finishes first.
|
|
298
|
+
//
|
|
299
|
+
// Does NOT route the projected work through `runTask`. Use this when
|
|
300
|
+
// the heavy work is already wrapped (e.g. the projector body uses
|
|
301
|
+
// `runTasks(...)` over a sub-stream), or when the iteration is plain
|
|
302
|
+
// orchestration that shouldn't compete for scheduler slots — e.g.
|
|
303
|
+
// iterating over a `groupBy`'s GroupedObservables when each group's
|
|
304
|
+
// inner per-file work is what actually does IO.
|
|
305
|
+
//
|
|
306
|
+
// Why "not via runTask": if both the outer iteration AND the inner
|
|
307
|
+
// per-element work occupy scheduler slots, MAX_THREADS outer slots
|
|
308
|
+
// can starve inner work (deadlock). Keep one layer scheduled.
|
|
309
|
+
//
|
|
310
|
+
// Memory: out-of-order results buffer in a Map keyed by index until
|
|
311
|
+
// the head-of-queue completes. For commands that emit thousands of
|
|
312
|
+
// large values per element, the buffer grows with the slowest-element
|
|
313
|
+
// lag — fine for the per-file summary use case (one or a few small
|
|
314
|
+
// values per element); revisit if a future caller streams large
|
|
315
|
+
// payloads.
|
|
316
|
+
export const mergeMapOrdered =
|
|
317
|
+
<T, R>(
|
|
318
|
+
project: (value: T, index: number) => Observable<R>,
|
|
319
|
+
): OperatorFunction<T, R> =>
|
|
320
|
+
(source) =>
|
|
321
|
+
new Observable<R>((subscriber) => {
|
|
322
|
+
let nextEmitIndex = 0
|
|
323
|
+
const buffered = new Map<number, R[]>()
|
|
324
|
+
const completed = new Set<number>()
|
|
325
|
+
let isUpstreamComplete = false
|
|
326
|
+
let inflightCount = 0
|
|
327
|
+
|
|
328
|
+
// Releases buffered results downstream in input-index order. Walks
|
|
329
|
+
// forward from `nextEmitIndex` while the next slot is marked
|
|
330
|
+
// completed; stops at the first gap. Called on every inner
|
|
331
|
+
// completion AND on upstream complete.
|
|
332
|
+
const tryFlush = (): void => {
|
|
333
|
+
while (completed.has(nextEmitIndex)) {
|
|
334
|
+
const items = buffered.get(nextEmitIndex) ?? []
|
|
335
|
+
items.forEach((item) => {
|
|
336
|
+
subscriber.next(item)
|
|
337
|
+
})
|
|
338
|
+
buffered.delete(nextEmitIndex)
|
|
339
|
+
completed.delete(nextEmitIndex)
|
|
340
|
+
nextEmitIndex += 1
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (isUpstreamComplete && inflightCount === 0) {
|
|
344
|
+
subscriber.complete()
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const upstreamSubscription = source
|
|
349
|
+
.pipe(
|
|
350
|
+
mergeMap((value: T, index: number) => {
|
|
351
|
+
inflightCount += 1
|
|
352
|
+
|
|
353
|
+
return project(value, index).pipe(
|
|
354
|
+
tap((result) => {
|
|
355
|
+
const arr = buffered.get(index) ?? []
|
|
356
|
+
arr.push(result)
|
|
357
|
+
buffered.set(index, arr)
|
|
358
|
+
}),
|
|
359
|
+
finalize(() => {
|
|
360
|
+
inflightCount -= 1
|
|
361
|
+
completed.add(index)
|
|
362
|
+
tryFlush()
|
|
363
|
+
}),
|
|
364
|
+
// Values were already captured by the tap above; suppress
|
|
365
|
+
// them here so the outer mergeMap doesn't re-emit them
|
|
366
|
+
// out of order.
|
|
367
|
+
ignoreElements(),
|
|
368
|
+
)
|
|
369
|
+
}),
|
|
370
|
+
)
|
|
371
|
+
.subscribe({
|
|
372
|
+
error: (error) => {
|
|
373
|
+
subscriber.error(error)
|
|
374
|
+
},
|
|
375
|
+
complete: () => {
|
|
376
|
+
isUpstreamComplete = true
|
|
377
|
+
tryFlush()
|
|
378
|
+
},
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
return () => {
|
|
382
|
+
upstreamSubscription.unsubscribe()
|
|
383
|
+
}
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
// Pipeable form: each upstream value runs as a Task in parallel
|
|
387
|
+
// (capped by the scheduler), with emissions released in input-index
|
|
388
|
+
// order. Thin wrapper over `mergeMapOrdered` that wraps the projector
|
|
389
|
+
// in `runTask` for callers whose per-element work is the unit of
|
|
390
|
+
// scheduled IO/CPU (e.g. one network call + processing per file).
|
|
391
|
+
//
|
|
392
|
+
// Do NOT use this as the OUTER operator over a stream whose inner
|
|
393
|
+
// work also goes through the scheduler — both layers would compete
|
|
394
|
+
// for the same MAX_THREADS pool and risk deadlock. Use plain
|
|
395
|
+
// `mergeMapOrdered` for such orchestration and reserve the runTask
|
|
396
|
+
// wrapping for the deepest per-IO layer.
|
|
397
|
+
export const runTasksOrdered = <T, R>(
|
|
398
|
+
project: (value: T, index: number) => Observable<R>,
|
|
399
|
+
): OperatorFunction<T, R> =>
|
|
400
|
+
mergeMapOrdered((value: T, index: number) =>
|
|
401
|
+
runTask(project(value, index)),
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
// Test-only — reset singleton between vitest runs so tests can re-init at
|
|
405
|
+
// a different concurrency.
|
|
406
|
+
export const __resetTaskSchedulerForTests = (): void => {
|
|
407
|
+
concurrency = null
|
|
408
|
+
claimByJob.clear()
|
|
409
|
+
getActiveJobId = nullJobIdProvider
|
|
410
|
+
|
|
411
|
+
inbox?.complete()
|
|
412
|
+
|
|
413
|
+
inbox = null
|
|
414
|
+
}
|