phantomllm 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,38 +1,6 @@
1
- import { GenericContainer, Wait } from 'testcontainers';
2
-
3
- // src/driver/container.config.ts
4
- var DEFAULT_IMAGE = "phantomllm-server:latest";
5
- var DEFAULT_PORT = 8080;
6
- function resolveContainerConfig(options) {
7
- return {
8
- image: options?.image ?? process.env["PHANTOMLLM_IMAGE"] ?? DEFAULT_IMAGE,
9
- containerPort: options?.containerPort ?? DEFAULT_PORT,
10
- reuse: options?.reuse ?? true,
11
- startupTimeout: options?.startupTimeout ?? 3e4
12
- };
13
- }
14
- var ContainerManager = class {
15
- constructor(config) {
16
- this.config = config;
17
- }
18
- container = null;
19
- async start() {
20
- const container = new GenericContainer(this.config.image).withExposedPorts(this.config.containerPort).withWaitStrategy(
21
- Wait.forHttp("/_admin/health", this.config.containerPort).forStatusCode(200)
22
- ).withStartupTimeout(this.config.startupTimeout).withLabels({ "com.phantomllm": "true" });
23
- this.container = await container.start();
24
- return {
25
- host: this.container.getHost(),
26
- port: this.container.getMappedPort(this.config.containerPort)
27
- };
28
- }
29
- async stop() {
30
- if (this.container) {
31
- await this.container.stop();
32
- this.container = null;
33
- }
34
- }
35
- };
1
+ import Fastify from 'fastify';
2
+ import fp from 'fastify-plugin';
3
+ import { randomUUID, randomBytes } from 'crypto';
36
4
 
37
5
  // src/errors/base.ts
38
6
  var MockLLMError = class extends Error {
@@ -81,13 +49,24 @@ var AdminClient = class {
81
49
  this.baseUrl = baseUrl;
82
50
  }
83
51
  pendingStubs = [];
52
+ pendingConfigs = [];
84
53
  enqueueStub(stub) {
85
54
  this.pendingStubs.push(stub);
86
55
  }
56
+ enqueueConfig(config) {
57
+ this.pendingConfigs.push(config);
58
+ }
87
59
  async flush() {
88
- if (this.pendingStubs.length === 0) return;
89
- const stubs = this.pendingStubs.splice(0);
90
- await this.post("/_admin/stubs/batch", { stubs });
60
+ if (this.pendingConfigs.length > 0) {
61
+ const configs = this.pendingConfigs.splice(0);
62
+ for (const config of configs) {
63
+ await this.post("/_admin/config", config);
64
+ }
65
+ }
66
+ if (this.pendingStubs.length > 0) {
67
+ const stubs = this.pendingStubs.splice(0);
68
+ await this.post("/_admin/stubs/batch", { stubs });
69
+ }
91
70
  }
92
71
  async clearStubs() {
93
72
  await this.flush();
@@ -240,6 +219,16 @@ var GivenStubs = class {
240
219
  }
241
220
  };
242
221
 
222
+ // src/stubs/expect.ts
223
+ var ExpectConditions = class {
224
+ constructor(adminClient) {
225
+ this.adminClient = adminClient;
226
+ }
227
+ apiKey(key) {
228
+ this.adminClient.enqueueConfig({ apiKey: key });
229
+ }
230
+ };
231
+
243
232
  // src/errors/lifecycle.errors.ts
244
233
  var ContainerNotStartedError = class extends MockLLMError {
245
234
  code = "CONTAINER_NOT_STARTED";
@@ -251,18 +240,483 @@ var ContainerNotStartedError = class extends MockLLMError {
251
240
  }
252
241
  };
253
242
 
243
+ // src/server/stubs/stub.matcher.ts
244
+ function matchModel(matcher, model) {
245
+ if (matcher.model === void 0) return true;
246
+ return matcher.model === model;
247
+ }
248
+ function matchContent(matcher, messages) {
249
+ if (matcher.content === void 0) return true;
250
+ const needle = matcher.content.toLowerCase();
251
+ return messages.some(
252
+ (m) => m.role === "user" && m.content !== null && m.content.toLowerCase().includes(needle)
253
+ );
254
+ }
255
+ function matchInput(matcher, input) {
256
+ if (matcher.input === void 0) return true;
257
+ const needle = matcher.input.toLowerCase();
258
+ const items = Array.isArray(input) ? input : [input];
259
+ return items.some((item) => item.toLowerCase().includes(needle));
260
+ }
261
+ function stubMatches(entry, endpoint, model, messages, input) {
262
+ const { matcher } = entry;
263
+ if (matcher.endpoint !== void 0 && matcher.endpoint !== endpoint) {
264
+ return false;
265
+ }
266
+ if (!matchModel(matcher, model)) return false;
267
+ if (messages !== void 0 && !matchContent(matcher, messages)) return false;
268
+ if (input !== void 0 && !matchInput(matcher, input)) return false;
269
+ return true;
270
+ }
271
+ function specificity(matcher) {
272
+ let count = 0;
273
+ if (matcher.model !== void 0) count++;
274
+ if (matcher.content !== void 0) count++;
275
+ if (matcher.input !== void 0) count++;
276
+ return count;
277
+ }
278
+
279
+ // src/server/stubs/stub.registry.ts
280
+ var StubRegistry = class {
281
+ stubs = [];
282
+ requests = [];
283
+ register(matcher, response, delay2) {
284
+ const entry = {
285
+ id: randomUUID(),
286
+ createdAt: Date.now(),
287
+ matcher,
288
+ response,
289
+ delay: delay2 ?? 0,
290
+ callCount: 0
291
+ };
292
+ this.stubs.push(entry);
293
+ return entry;
294
+ }
295
+ findMatch(endpoint, model, messages, input) {
296
+ const matches = this.stubs.filter(
297
+ (s) => stubMatches(s, endpoint, model, messages, input)
298
+ );
299
+ if (matches.length === 0) return void 0;
300
+ matches.sort((a, b) => {
301
+ const specDiff = specificity(b.matcher) - specificity(a.matcher);
302
+ if (specDiff !== 0) return specDiff;
303
+ return a.createdAt - b.createdAt;
304
+ });
305
+ const match = matches[0];
306
+ match.callCount++;
307
+ return match;
308
+ }
309
+ clear() {
310
+ const count = this.stubs.length;
311
+ this.stubs = [];
312
+ return count;
313
+ }
314
+ getAll() {
315
+ return this.stubs;
316
+ }
317
+ recordRequest(request) {
318
+ this.requests.push(request);
319
+ }
320
+ getRequests() {
321
+ return this.requests;
322
+ }
323
+ clearRequests() {
324
+ this.requests = [];
325
+ }
326
+ };
327
+
328
+ // src/server/plugins/registry.plugin.ts
329
+ var registryPlugin = async (fastify) => {
330
+ const registry = new StubRegistry();
331
+ fastify.decorate("stubRegistry", registry);
332
+ };
333
+ var stubRegistryPlugin = fp(registryPlugin, {
334
+ name: "stub-registry"
335
+ });
336
+ var authPlugin = async (fastify) => {
337
+ const config = {
338
+ apiKey: process.env["PHANTOMLLM_API_KEY"] || void 0
339
+ };
340
+ fastify.decorate("authConfig", config);
341
+ fastify.addHook("onRequest", async (request, reply) => {
342
+ if (request.url.startsWith("/_admin")) return;
343
+ if (!fastify.authConfig.apiKey) return;
344
+ const header = request.headers.authorization;
345
+ if (!header) {
346
+ return reply.status(401).send({
347
+ error: {
348
+ message: "Missing Authorization header. Expected: Bearer <api-key>",
349
+ type: "authentication_error",
350
+ param: null,
351
+ code: "missing_api_key"
352
+ }
353
+ });
354
+ }
355
+ const token = header.replace(/^Bearer\s+/i, "");
356
+ if (token !== fastify.authConfig.apiKey) {
357
+ return reply.status(401).send({
358
+ error: {
359
+ message: "Invalid API key provided.",
360
+ type: "authentication_error",
361
+ param: null,
362
+ code: "invalid_api_key"
363
+ }
364
+ });
365
+ }
366
+ });
367
+ };
368
+ var apiKeyAuthPlugin = fp(authPlugin, { name: "api-key-auth" });
369
+
370
+ // src/server/admin/admin.routes.ts
371
+ var adminRoutes = async (fastify) => {
372
+ fastify.post(
373
+ "/_admin/stubs",
374
+ async (request, reply) => {
375
+ const { matcher, response, delay: delay2 } = request.body;
376
+ const stub = fastify.stubRegistry.register(matcher, response, delay2);
377
+ return reply.status(201).send({ id: stub.id, stub });
378
+ }
379
+ );
380
+ fastify.post(
381
+ "/_admin/stubs/batch",
382
+ async (request, reply) => {
383
+ const { stubs: definitions } = request.body;
384
+ const created = definitions.map(
385
+ (def) => fastify.stubRegistry.register(def.matcher, def.response, def.delay)
386
+ );
387
+ return reply.status(201).send(created);
388
+ }
389
+ );
390
+ fastify.delete("/_admin/stubs", async (_request, reply) => {
391
+ const count = fastify.stubRegistry.clear();
392
+ const body = { cleared: count };
393
+ return reply.send(body);
394
+ });
395
+ fastify.get("/_admin/health", async (_request, reply) => {
396
+ const body = {
397
+ status: "ok",
398
+ stubCount: fastify.stubRegistry.getAll().length,
399
+ uptime: process.uptime()
400
+ };
401
+ return reply.send(body);
402
+ });
403
+ fastify.get("/_admin/requests", async (_request, reply) => {
404
+ return reply.send({ requests: fastify.stubRegistry.getRequests() });
405
+ });
406
+ fastify.delete("/_admin/requests", async (_request, reply) => {
407
+ fastify.stubRegistry.clearRequests();
408
+ return reply.status(204).send();
409
+ });
410
+ fastify.post(
411
+ "/_admin/config",
412
+ async (request, reply) => {
413
+ const { apiKey } = request.body;
414
+ if (apiKey !== void 0) {
415
+ fastify.authConfig.apiKey = apiKey ?? void 0;
416
+ }
417
+ return reply.send({ ok: true });
418
+ }
419
+ );
420
+ };
421
+ function generateChatCompletionId() {
422
+ return "chatcmpl-" + randomBytes(18).toString("base64url").slice(0, 24);
423
+ }
424
+
425
+ // src/server/utils/token.counter.ts
426
+ function estimateTokens(text) {
427
+ return Math.max(1, Math.ceil(text.length / 4));
428
+ }
429
+ function estimatePromptTokens(messages) {
430
+ let tokens = 2;
431
+ for (const msg of messages) {
432
+ tokens += 4;
433
+ tokens += estimateTokens(msg.content ?? "");
434
+ }
435
+ return tokens;
436
+ }
437
+
438
+ // src/server/responses/chat.response.ts
439
+ function buildChatCompletion(content, model, messages) {
440
+ const completionTokens = estimateTokens(content);
441
+ const promptTokens = messages ? estimatePromptTokens(messages) : 0;
442
+ return {
443
+ id: generateChatCompletionId(),
444
+ object: "chat.completion",
445
+ created: Math.floor(Date.now() / 1e3),
446
+ model,
447
+ system_fingerprint: "fp_mock",
448
+ choices: [
449
+ {
450
+ index: 0,
451
+ message: { role: "assistant", content },
452
+ finish_reason: "stop",
453
+ logprobs: null
454
+ }
455
+ ],
456
+ usage: {
457
+ prompt_tokens: promptTokens,
458
+ completion_tokens: completionTokens,
459
+ total_tokens: promptTokens + completionTokens
460
+ }
461
+ };
462
+ }
463
+
464
+ // src/server/responses/error.response.ts
465
+ function buildErrorResponse(_status, message, type) {
466
+ return {
467
+ error: {
468
+ message,
469
+ type: type ?? "invalid_request_error",
470
+ param: null,
471
+ code: null
472
+ }
473
+ };
474
+ }
475
+ function buildNoStubMatchResponse(method, path, model, messages) {
476
+ const msgSummary = messages !== void 0 ? ` messages=${JSON.stringify(messages)}` : "";
477
+ return {
478
+ status: 418,
479
+ body: buildErrorResponse(
480
+ 418,
481
+ `No stub matched request: ${method} ${path} model=${model}${msgSummary}`,
482
+ "stub_not_found"
483
+ )
484
+ };
485
+ }
486
+
487
+ // src/server/chat/chat.streaming.ts
488
+ function buildChunk(id, model, content, finishReason, usage) {
489
+ return {
490
+ id,
491
+ object: "chat.completion.chunk",
492
+ created: Math.floor(Date.now() / 1e3),
493
+ model,
494
+ system_fingerprint: "fp_mock",
495
+ choices: [
496
+ {
497
+ index: 0,
498
+ delta: finishReason === null ? { role: "assistant", content } : {},
499
+ finish_reason: finishReason,
500
+ logprobs: null
501
+ }
502
+ ],
503
+ usage: usage ?? null
504
+ };
505
+ }
506
+ function writeSSE(reply, chunk) {
507
+ reply.raw.write(`data: ${JSON.stringify(chunk)}
508
+
509
+ `);
510
+ }
511
+ function streamChunks(reply, model, chunks, messages, includeUsage) {
512
+ reply.raw.writeHead(200, {
513
+ "content-type": "text/event-stream",
514
+ "cache-control": "no-cache",
515
+ connection: "keep-alive"
516
+ });
517
+ reply.hijack();
518
+ const id = generateChatCompletionId();
519
+ for (const word of chunks) {
520
+ writeSSE(reply, buildChunk(id, model, word, null));
521
+ }
522
+ const fullContent = chunks.join("");
523
+ const completionTokens = estimateTokens(fullContent);
524
+ const promptTokens = estimatePromptTokens(messages);
525
+ const usage = includeUsage ? {
526
+ prompt_tokens: promptTokens,
527
+ completion_tokens: completionTokens,
528
+ total_tokens: promptTokens + completionTokens
529
+ } : void 0;
530
+ writeSSE(reply, buildChunk(id, model, "", "stop", usage));
531
+ reply.raw.write("data: [DONE]\n\n");
532
+ reply.raw.end();
533
+ }
534
+
535
+ // src/server/chat/chat.handler.ts
536
+ function delay(ms) {
537
+ return new Promise((resolve) => {
538
+ setTimeout(resolve, ms);
539
+ });
540
+ }
541
+ async function handleChatCompletion(request, reply) {
542
+ const { model, messages, stream, stream_options } = request.body;
543
+ const registry = request.server.stubRegistry;
544
+ const recorded = {
545
+ timestamp: Date.now(),
546
+ method: request.method,
547
+ path: request.url,
548
+ headers: request.headers,
549
+ body: request.body
550
+ };
551
+ registry.recordRequest(recorded);
552
+ const stub = registry.findMatch("chat", model, messages);
553
+ if (!stub) {
554
+ const noMatch = buildNoStubMatchResponse(
555
+ request.method,
556
+ request.url,
557
+ model,
558
+ messages
559
+ );
560
+ return reply.status(noMatch.status).send(noMatch.body);
561
+ }
562
+ if (stub.response.type === "error") {
563
+ return reply.status(stub.response.status).send({
564
+ error: {
565
+ message: stub.response.error.message,
566
+ type: stub.response.error.type,
567
+ param: null,
568
+ code: stub.response.error.code
569
+ }
570
+ });
571
+ }
572
+ if (stub.delay > 0) {
573
+ await delay(stub.delay);
574
+ }
575
+ if (stream === true) {
576
+ const includeUsage = stream_options?.include_usage === true;
577
+ const chunks = stub.response.type === "streaming-chat" ? stub.response.chunks : stub.response.type === "chat" ? stub.response.body.split(/(\s+)/).filter((s) => s.length > 0) : [];
578
+ streamChunks(reply, model, chunks, messages, includeUsage);
579
+ return;
580
+ }
581
+ if (stub.response.type === "chat") {
582
+ return reply.send(buildChatCompletion(stub.response.body, model, messages));
583
+ }
584
+ if (stub.response.type === "streaming-chat") {
585
+ const fullContent = stub.response.chunks.join("");
586
+ return reply.send(buildChatCompletion(fullContent, model, messages));
587
+ }
588
+ }
589
+
590
+ // src/server/chat/chat.routes.ts
591
+ var chatRoutes = async (fastify) => {
592
+ fastify.post("/chat/completions", handleChatCompletion);
593
+ };
594
+
595
+ // src/server/responses/embedding.response.ts
596
+ function buildEmbeddingResponse(vectors, model, input) {
597
+ const inputs = Array.isArray(input) ? input : [input];
598
+ const promptTokens = inputs.reduce(
599
+ (sum, text) => sum + estimateTokens(text),
600
+ 0
601
+ );
602
+ return {
603
+ object: "list",
604
+ data: vectors.map((embedding, index) => ({
605
+ object: "embedding",
606
+ index,
607
+ embedding
608
+ })),
609
+ model,
610
+ usage: {
611
+ prompt_tokens: promptTokens,
612
+ total_tokens: promptTokens
613
+ }
614
+ };
615
+ }
616
+
617
+ // src/server/embeddings/embeddings.handler.ts
618
+ var DEFAULT_DIMENSION = 1536;
619
+ function zeroVector(dim) {
620
+ return new Array(dim).fill(0);
621
+ }
622
+ async function handleEmbeddings(request, reply) {
623
+ const { model, input } = request.body;
624
+ const registry = request.server.stubRegistry;
625
+ const recorded = {
626
+ timestamp: Date.now(),
627
+ method: request.method,
628
+ path: request.url,
629
+ headers: request.headers,
630
+ body: request.body
631
+ };
632
+ registry.recordRequest(recorded);
633
+ const stub = registry.findMatch("embeddings", model, void 0, input);
634
+ if (stub && stub.response.type === "error") {
635
+ return reply.status(stub.response.status).send({
636
+ error: {
637
+ message: stub.response.error.message,
638
+ type: stub.response.error.type,
639
+ param: null,
640
+ code: stub.response.error.code
641
+ }
642
+ });
643
+ }
644
+ if (stub && stub.response.type === "embedding") {
645
+ return reply.send(
646
+ buildEmbeddingResponse(stub.response.vectors, model, input)
647
+ );
648
+ }
649
+ const inputs = Array.isArray(input) ? input : [input];
650
+ const vectors = inputs.map(() => zeroVector(DEFAULT_DIMENSION));
651
+ return reply.send(buildEmbeddingResponse(vectors, model, input));
652
+ }
653
+
654
+ // src/server/embeddings/embeddings.routes.ts
655
+ var embeddingsRoutes = async (fastify) => {
656
+ fastify.post("/embeddings", handleEmbeddings);
657
+ };
658
+
659
+ // src/server/models/models.routes.ts
660
+ var DEFAULT_MODELS = [
661
+ { id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" },
662
+ { id: "gpt-4o", object: "model", created: 1715367049, owned_by: "openai" },
663
+ {
664
+ id: "gpt-3.5-turbo",
665
+ object: "model",
666
+ created: 1677610602,
667
+ owned_by: "openai"
668
+ },
669
+ {
670
+ id: "text-embedding-3-small",
671
+ object: "model",
672
+ created: 1705948997,
673
+ owned_by: "openai"
674
+ }
675
+ ];
676
+ var modelsRoutes = async (fastify) => {
677
+ fastify.get("/models", async (_request, reply) => {
678
+ const registry = fastify.stubRegistry;
679
+ const allStubs = registry.getAll();
680
+ const modelsStub = allStubs.find((s) => s.response.type === "models");
681
+ if (modelsStub) {
682
+ modelsStub.callCount++;
683
+ const config = modelsStub.response;
684
+ if (config.type !== "models") return;
685
+ const data = config.models.map((m) => ({
686
+ id: m.id,
687
+ object: "model",
688
+ created: Math.floor(Date.now() / 1e3),
689
+ owned_by: m.ownedBy ?? "phantomllm"
690
+ }));
691
+ const body2 = { object: "list", data };
692
+ return reply.send(body2);
693
+ }
694
+ const body = { object: "list", data: DEFAULT_MODELS };
695
+ return reply.send(body);
696
+ });
697
+ };
698
+
699
+ // src/server/app.ts
700
+ async function buildApp(opts = {}) {
701
+ const app = Fastify(opts);
702
+ await app.register(stubRegistryPlugin);
703
+ await app.register(apiKeyAuthPlugin);
704
+ await app.register(adminRoutes);
705
+ await app.register(chatRoutes, { prefix: "/v1" });
706
+ await app.register(embeddingsRoutes, { prefix: "/v1" });
707
+ await app.register(modelsRoutes, { prefix: "/v1" });
708
+ return app;
709
+ }
710
+
254
711
  // src/driver/mock-llm.ts
255
712
  var MockLLM = class {
256
713
  state = "idle";
257
714
  startPromise = null;
258
- containerManager;
715
+ app = null;
259
716
  adminClient = null;
260
717
  _given = null;
718
+ _expect = null;
261
719
  _baseUrl = null;
262
- constructor(options) {
263
- const config = resolveContainerConfig(options);
264
- this.containerManager = new ContainerManager(config);
265
- }
266
720
  async start() {
267
721
  if (this.state === "running") return;
268
722
  if (this.state === "starting" && this.startPromise) return this.startPromise;
@@ -271,21 +725,29 @@ var MockLLM = class {
271
725
  return this.startPromise;
272
726
  }
273
727
  async doStart() {
274
- const { host, port } = await this.containerManager.start();
275
- this._baseUrl = `http://${host}:${port}`;
728
+ this.app = await buildApp({ logger: false });
729
+ await this.app.listen({ port: 0, host: "127.0.0.1" });
730
+ const address = this.app.server.address();
731
+ const port = typeof address === "object" && address ? address.port : 0;
732
+ this._baseUrl = `http://127.0.0.1:${port}`;
276
733
  this.adminClient = new AdminClient(this._baseUrl);
277
734
  this._given = new GivenStubs(this.adminClient);
735
+ this._expect = new ExpectConditions(this.adminClient);
278
736
  this.state = "running";
279
737
  }
280
738
  async stop() {
281
739
  if (this.state === "stopped" || this.state === "idle") return;
282
740
  this.state = "stopping";
283
741
  try {
284
- await this.containerManager.stop();
742
+ if (this.app) {
743
+ await this.app.close();
744
+ }
285
745
  } finally {
286
746
  this.state = "stopped";
747
+ this.app = null;
287
748
  this.adminClient = null;
288
749
  this._given = null;
750
+ this._expect = null;
289
751
  this._baseUrl = null;
290
752
  }
291
753
  }
@@ -300,6 +762,10 @@ var MockLLM = class {
300
762
  this.assertRunning();
301
763
  return this._given;
302
764
  }
765
+ get expect() {
766
+ this.assertRunning();
767
+ return this._expect;
768
+ }
303
769
  async clear() {
304
770
  this.assertRunning();
305
771
  await this.adminClient.clearStubs();