phantomllm 0.2.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,40 +1,13 @@
1
1
  'use strict';
2
2
 
3
- var testcontainers = require('testcontainers');
3
+ var Fastify = require('fastify');
4
+ var fp = require('fastify-plugin');
5
+ var crypto = require('crypto');
4
6
 
5
- // src/driver/container.config.ts
6
- var DEFAULT_IMAGE = "phantomllm-server:latest";
7
- var DEFAULT_PORT = 8080;
8
- function resolveContainerConfig(options) {
9
- return {
10
- image: options?.image ?? process.env["PHANTOMLLM_IMAGE"] ?? DEFAULT_IMAGE,
11
- containerPort: options?.containerPort ?? DEFAULT_PORT,
12
- reuse: options?.reuse ?? true,
13
- startupTimeout: options?.startupTimeout ?? 3e4
14
- };
15
- }
16
- var ContainerManager = class {
17
- constructor(config) {
18
- this.config = config;
19
- }
20
- container = null;
21
- async start() {
22
- const container = new testcontainers.GenericContainer(this.config.image).withExposedPorts(this.config.containerPort).withWaitStrategy(
23
- testcontainers.Wait.forHttp("/_admin/health", this.config.containerPort).forStatusCode(200)
24
- ).withStartupTimeout(this.config.startupTimeout).withLabels({ "com.phantomllm": "true" });
25
- this.container = await container.start();
26
- return {
27
- host: this.container.getHost(),
28
- port: this.container.getMappedPort(this.config.containerPort)
29
- };
30
- }
31
- async stop() {
32
- if (this.container) {
33
- await this.container.stop();
34
- this.container = null;
35
- }
36
- }
37
- };
7
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
8
+
9
+ var Fastify__default = /*#__PURE__*/_interopDefault(Fastify);
10
+ var fp__default = /*#__PURE__*/_interopDefault(fp);
38
11
 
39
12
  // src/errors/base.ts
40
13
  var MockLLMError = class extends Error {
@@ -83,13 +56,24 @@ var AdminClient = class {
83
56
  this.baseUrl = baseUrl;
84
57
  }
85
58
  pendingStubs = [];
59
+ pendingConfigs = [];
86
60
  enqueueStub(stub) {
87
61
  this.pendingStubs.push(stub);
88
62
  }
63
+ enqueueConfig(config) {
64
+ this.pendingConfigs.push(config);
65
+ }
89
66
  async flush() {
90
- if (this.pendingStubs.length === 0) return;
91
- const stubs = this.pendingStubs.splice(0);
92
- await this.post("/_admin/stubs/batch", { stubs });
67
+ if (this.pendingConfigs.length > 0) {
68
+ const configs = this.pendingConfigs.splice(0);
69
+ for (const config of configs) {
70
+ await this.post("/_admin/config", config);
71
+ }
72
+ }
73
+ if (this.pendingStubs.length > 0) {
74
+ const stubs = this.pendingStubs.splice(0);
75
+ await this.post("/_admin/stubs/batch", { stubs });
76
+ }
93
77
  }
94
78
  async clearStubs() {
95
79
  await this.flush();
@@ -242,6 +226,16 @@ var GivenStubs = class {
242
226
  }
243
227
  };
244
228
 
229
+ // src/stubs/expect.ts
230
+ var ExpectConditions = class {
231
+ constructor(adminClient) {
232
+ this.adminClient = adminClient;
233
+ }
234
+ apiKey(key) {
235
+ this.adminClient.enqueueConfig({ apiKey: key });
236
+ }
237
+ };
238
+
245
239
  // src/errors/lifecycle.errors.ts
246
240
  var ContainerNotStartedError = class extends MockLLMError {
247
241
  code = "CONTAINER_NOT_STARTED";
@@ -253,18 +247,483 @@ var ContainerNotStartedError = class extends MockLLMError {
253
247
  }
254
248
  };
255
249
 
250
+ // src/server/stubs/stub.matcher.ts
251
+ function matchModel(matcher, model) {
252
+ if (matcher.model === void 0) return true;
253
+ return matcher.model === model;
254
+ }
255
+ function matchContent(matcher, messages) {
256
+ if (matcher.content === void 0) return true;
257
+ const needle = matcher.content.toLowerCase();
258
+ return messages.some(
259
+ (m) => m.role === "user" && m.content !== null && m.content.toLowerCase().includes(needle)
260
+ );
261
+ }
262
+ function matchInput(matcher, input) {
263
+ if (matcher.input === void 0) return true;
264
+ const needle = matcher.input.toLowerCase();
265
+ const items = Array.isArray(input) ? input : [input];
266
+ return items.some((item) => item.toLowerCase().includes(needle));
267
+ }
268
+ function stubMatches(entry, endpoint, model, messages, input) {
269
+ const { matcher } = entry;
270
+ if (matcher.endpoint !== void 0 && matcher.endpoint !== endpoint) {
271
+ return false;
272
+ }
273
+ if (!matchModel(matcher, model)) return false;
274
+ if (messages !== void 0 && !matchContent(matcher, messages)) return false;
275
+ if (input !== void 0 && !matchInput(matcher, input)) return false;
276
+ return true;
277
+ }
278
+ function specificity(matcher) {
279
+ let count = 0;
280
+ if (matcher.model !== void 0) count++;
281
+ if (matcher.content !== void 0) count++;
282
+ if (matcher.input !== void 0) count++;
283
+ return count;
284
+ }
285
+
286
+ // src/server/stubs/stub.registry.ts
287
+ var StubRegistry = class {
288
+ stubs = [];
289
+ requests = [];
290
+ register(matcher, response, delay2) {
291
+ const entry = {
292
+ id: crypto.randomUUID(),
293
+ createdAt: Date.now(),
294
+ matcher,
295
+ response,
296
+ delay: delay2 ?? 0,
297
+ callCount: 0
298
+ };
299
+ this.stubs.push(entry);
300
+ return entry;
301
+ }
302
+ findMatch(endpoint, model, messages, input) {
303
+ const matches = this.stubs.filter(
304
+ (s) => stubMatches(s, endpoint, model, messages, input)
305
+ );
306
+ if (matches.length === 0) return void 0;
307
+ matches.sort((a, b) => {
308
+ const specDiff = specificity(b.matcher) - specificity(a.matcher);
309
+ if (specDiff !== 0) return specDiff;
310
+ return a.createdAt - b.createdAt;
311
+ });
312
+ const match = matches[0];
313
+ match.callCount++;
314
+ return match;
315
+ }
316
+ clear() {
317
+ const count = this.stubs.length;
318
+ this.stubs = [];
319
+ return count;
320
+ }
321
+ getAll() {
322
+ return this.stubs;
323
+ }
324
+ recordRequest(request) {
325
+ this.requests.push(request);
326
+ }
327
+ getRequests() {
328
+ return this.requests;
329
+ }
330
+ clearRequests() {
331
+ this.requests = [];
332
+ }
333
+ };
334
+
335
+ // src/server/plugins/registry.plugin.ts
336
+ var registryPlugin = async (fastify) => {
337
+ const registry = new StubRegistry();
338
+ fastify.decorate("stubRegistry", registry);
339
+ };
340
+ var stubRegistryPlugin = fp__default.default(registryPlugin, {
341
+ name: "stub-registry"
342
+ });
343
+ var authPlugin = async (fastify) => {
344
+ const config = {
345
+ apiKey: process.env["PHANTOMLLM_API_KEY"] || void 0
346
+ };
347
+ fastify.decorate("authConfig", config);
348
+ fastify.addHook("onRequest", async (request, reply) => {
349
+ if (request.url.startsWith("/_admin")) return;
350
+ if (!fastify.authConfig.apiKey) return;
351
+ const header = request.headers.authorization;
352
+ if (!header) {
353
+ return reply.status(401).send({
354
+ error: {
355
+ message: "Missing Authorization header. Expected: Bearer <api-key>",
356
+ type: "authentication_error",
357
+ param: null,
358
+ code: "missing_api_key"
359
+ }
360
+ });
361
+ }
362
+ const token = header.replace(/^Bearer\s+/i, "");
363
+ if (token !== fastify.authConfig.apiKey) {
364
+ return reply.status(401).send({
365
+ error: {
366
+ message: "Invalid API key provided.",
367
+ type: "authentication_error",
368
+ param: null,
369
+ code: "invalid_api_key"
370
+ }
371
+ });
372
+ }
373
+ });
374
+ };
375
+ var apiKeyAuthPlugin = fp__default.default(authPlugin, { name: "api-key-auth" });
376
+
377
+ // src/server/admin/admin.routes.ts
378
+ var adminRoutes = async (fastify) => {
379
+ fastify.post(
380
+ "/_admin/stubs",
381
+ async (request, reply) => {
382
+ const { matcher, response, delay: delay2 } = request.body;
383
+ const stub = fastify.stubRegistry.register(matcher, response, delay2);
384
+ return reply.status(201).send({ id: stub.id, stub });
385
+ }
386
+ );
387
+ fastify.post(
388
+ "/_admin/stubs/batch",
389
+ async (request, reply) => {
390
+ const { stubs: definitions } = request.body;
391
+ const created = definitions.map(
392
+ (def) => fastify.stubRegistry.register(def.matcher, def.response, def.delay)
393
+ );
394
+ return reply.status(201).send(created);
395
+ }
396
+ );
397
+ fastify.delete("/_admin/stubs", async (_request, reply) => {
398
+ const count = fastify.stubRegistry.clear();
399
+ const body = { cleared: count };
400
+ return reply.send(body);
401
+ });
402
+ fastify.get("/_admin/health", async (_request, reply) => {
403
+ const body = {
404
+ status: "ok",
405
+ stubCount: fastify.stubRegistry.getAll().length,
406
+ uptime: process.uptime()
407
+ };
408
+ return reply.send(body);
409
+ });
410
+ fastify.get("/_admin/requests", async (_request, reply) => {
411
+ return reply.send({ requests: fastify.stubRegistry.getRequests() });
412
+ });
413
+ fastify.delete("/_admin/requests", async (_request, reply) => {
414
+ fastify.stubRegistry.clearRequests();
415
+ return reply.status(204).send();
416
+ });
417
+ fastify.post(
418
+ "/_admin/config",
419
+ async (request, reply) => {
420
+ const { apiKey } = request.body;
421
+ if (apiKey !== void 0) {
422
+ fastify.authConfig.apiKey = apiKey ?? void 0;
423
+ }
424
+ return reply.send({ ok: true });
425
+ }
426
+ );
427
+ };
428
+ function generateChatCompletionId() {
429
+ return "chatcmpl-" + crypto.randomBytes(18).toString("base64url").slice(0, 24);
430
+ }
431
+
432
+ // src/server/utils/token.counter.ts
433
+ function estimateTokens(text) {
434
+ return Math.max(1, Math.ceil(text.length / 4));
435
+ }
436
+ function estimatePromptTokens(messages) {
437
+ let tokens = 2;
438
+ for (const msg of messages) {
439
+ tokens += 4;
440
+ tokens += estimateTokens(msg.content ?? "");
441
+ }
442
+ return tokens;
443
+ }
444
+
445
+ // src/server/responses/chat.response.ts
446
+ function buildChatCompletion(content, model, messages) {
447
+ const completionTokens = estimateTokens(content);
448
+ const promptTokens = messages ? estimatePromptTokens(messages) : 0;
449
+ return {
450
+ id: generateChatCompletionId(),
451
+ object: "chat.completion",
452
+ created: Math.floor(Date.now() / 1e3),
453
+ model,
454
+ system_fingerprint: "fp_mock",
455
+ choices: [
456
+ {
457
+ index: 0,
458
+ message: { role: "assistant", content },
459
+ finish_reason: "stop",
460
+ logprobs: null
461
+ }
462
+ ],
463
+ usage: {
464
+ prompt_tokens: promptTokens,
465
+ completion_tokens: completionTokens,
466
+ total_tokens: promptTokens + completionTokens
467
+ }
468
+ };
469
+ }
470
+
471
+ // src/server/responses/error.response.ts
472
+ function buildErrorResponse(_status, message, type) {
473
+ return {
474
+ error: {
475
+ message,
476
+ type: type ?? "invalid_request_error",
477
+ param: null,
478
+ code: null
479
+ }
480
+ };
481
+ }
482
+ function buildNoStubMatchResponse(method, path, model, messages) {
483
+ const msgSummary = messages !== void 0 ? ` messages=${JSON.stringify(messages)}` : "";
484
+ return {
485
+ status: 418,
486
+ body: buildErrorResponse(
487
+ 418,
488
+ `No stub matched request: ${method} ${path} model=${model}${msgSummary}`,
489
+ "stub_not_found"
490
+ )
491
+ };
492
+ }
493
+
494
+ // src/server/chat/chat.streaming.ts
495
+ function buildChunk(id, model, content, finishReason, usage) {
496
+ return {
497
+ id,
498
+ object: "chat.completion.chunk",
499
+ created: Math.floor(Date.now() / 1e3),
500
+ model,
501
+ system_fingerprint: "fp_mock",
502
+ choices: [
503
+ {
504
+ index: 0,
505
+ delta: finishReason === null ? { role: "assistant", content } : {},
506
+ finish_reason: finishReason,
507
+ logprobs: null
508
+ }
509
+ ],
510
+ usage: usage ?? null
511
+ };
512
+ }
513
+ function writeSSE(reply, chunk) {
514
+ reply.raw.write(`data: ${JSON.stringify(chunk)}
515
+
516
+ `);
517
+ }
518
+ function streamChunks(reply, model, chunks, messages, includeUsage) {
519
+ reply.raw.writeHead(200, {
520
+ "content-type": "text/event-stream",
521
+ "cache-control": "no-cache",
522
+ connection: "keep-alive"
523
+ });
524
+ reply.hijack();
525
+ const id = generateChatCompletionId();
526
+ for (const word of chunks) {
527
+ writeSSE(reply, buildChunk(id, model, word, null));
528
+ }
529
+ const fullContent = chunks.join("");
530
+ const completionTokens = estimateTokens(fullContent);
531
+ const promptTokens = estimatePromptTokens(messages);
532
+ const usage = includeUsage ? {
533
+ prompt_tokens: promptTokens,
534
+ completion_tokens: completionTokens,
535
+ total_tokens: promptTokens + completionTokens
536
+ } : void 0;
537
+ writeSSE(reply, buildChunk(id, model, "", "stop", usage));
538
+ reply.raw.write("data: [DONE]\n\n");
539
+ reply.raw.end();
540
+ }
541
+
542
+ // src/server/chat/chat.handler.ts
543
+ function delay(ms) {
544
+ return new Promise((resolve) => {
545
+ setTimeout(resolve, ms);
546
+ });
547
+ }
548
+ async function handleChatCompletion(request, reply) {
549
+ const { model, messages, stream, stream_options } = request.body;
550
+ const registry = request.server.stubRegistry;
551
+ const recorded = {
552
+ timestamp: Date.now(),
553
+ method: request.method,
554
+ path: request.url,
555
+ headers: request.headers,
556
+ body: request.body
557
+ };
558
+ registry.recordRequest(recorded);
559
+ const stub = registry.findMatch("chat", model, messages);
560
+ if (!stub) {
561
+ const noMatch = buildNoStubMatchResponse(
562
+ request.method,
563
+ request.url,
564
+ model,
565
+ messages
566
+ );
567
+ return reply.status(noMatch.status).send(noMatch.body);
568
+ }
569
+ if (stub.response.type === "error") {
570
+ return reply.status(stub.response.status).send({
571
+ error: {
572
+ message: stub.response.error.message,
573
+ type: stub.response.error.type,
574
+ param: null,
575
+ code: stub.response.error.code
576
+ }
577
+ });
578
+ }
579
+ if (stub.delay > 0) {
580
+ await delay(stub.delay);
581
+ }
582
+ if (stream === true) {
583
+ const includeUsage = stream_options?.include_usage === true;
584
+ const chunks = stub.response.type === "streaming-chat" ? stub.response.chunks : stub.response.type === "chat" ? stub.response.body.split(/(\s+)/).filter((s) => s.length > 0) : [];
585
+ streamChunks(reply, model, chunks, messages, includeUsage);
586
+ return;
587
+ }
588
+ if (stub.response.type === "chat") {
589
+ return reply.send(buildChatCompletion(stub.response.body, model, messages));
590
+ }
591
+ if (stub.response.type === "streaming-chat") {
592
+ const fullContent = stub.response.chunks.join("");
593
+ return reply.send(buildChatCompletion(fullContent, model, messages));
594
+ }
595
+ }
596
+
597
+ // src/server/chat/chat.routes.ts
598
+ var chatRoutes = async (fastify) => {
599
+ fastify.post("/chat/completions", handleChatCompletion);
600
+ };
601
+
602
+ // src/server/responses/embedding.response.ts
603
+ function buildEmbeddingResponse(vectors, model, input) {
604
+ const inputs = Array.isArray(input) ? input : [input];
605
+ const promptTokens = inputs.reduce(
606
+ (sum, text) => sum + estimateTokens(text),
607
+ 0
608
+ );
609
+ return {
610
+ object: "list",
611
+ data: vectors.map((embedding, index) => ({
612
+ object: "embedding",
613
+ index,
614
+ embedding
615
+ })),
616
+ model,
617
+ usage: {
618
+ prompt_tokens: promptTokens,
619
+ total_tokens: promptTokens
620
+ }
621
+ };
622
+ }
623
+
624
+ // src/server/embeddings/embeddings.handler.ts
625
+ var DEFAULT_DIMENSION = 1536;
626
+ function zeroVector(dim) {
627
+ return new Array(dim).fill(0);
628
+ }
629
+ async function handleEmbeddings(request, reply) {
630
+ const { model, input } = request.body;
631
+ const registry = request.server.stubRegistry;
632
+ const recorded = {
633
+ timestamp: Date.now(),
634
+ method: request.method,
635
+ path: request.url,
636
+ headers: request.headers,
637
+ body: request.body
638
+ };
639
+ registry.recordRequest(recorded);
640
+ const stub = registry.findMatch("embeddings", model, void 0, input);
641
+ if (stub && stub.response.type === "error") {
642
+ return reply.status(stub.response.status).send({
643
+ error: {
644
+ message: stub.response.error.message,
645
+ type: stub.response.error.type,
646
+ param: null,
647
+ code: stub.response.error.code
648
+ }
649
+ });
650
+ }
651
+ if (stub && stub.response.type === "embedding") {
652
+ return reply.send(
653
+ buildEmbeddingResponse(stub.response.vectors, model, input)
654
+ );
655
+ }
656
+ const inputs = Array.isArray(input) ? input : [input];
657
+ const vectors = inputs.map(() => zeroVector(DEFAULT_DIMENSION));
658
+ return reply.send(buildEmbeddingResponse(vectors, model, input));
659
+ }
660
+
661
+ // src/server/embeddings/embeddings.routes.ts
662
+ var embeddingsRoutes = async (fastify) => {
663
+ fastify.post("/embeddings", handleEmbeddings);
664
+ };
665
+
666
+ // src/server/models/models.routes.ts
667
+ var DEFAULT_MODELS = [
668
+ { id: "gpt-4", object: "model", created: 1687882411, owned_by: "openai" },
669
+ { id: "gpt-4o", object: "model", created: 1715367049, owned_by: "openai" },
670
+ {
671
+ id: "gpt-3.5-turbo",
672
+ object: "model",
673
+ created: 1677610602,
674
+ owned_by: "openai"
675
+ },
676
+ {
677
+ id: "text-embedding-3-small",
678
+ object: "model",
679
+ created: 1705948997,
680
+ owned_by: "openai"
681
+ }
682
+ ];
683
+ var modelsRoutes = async (fastify) => {
684
+ fastify.get("/models", async (_request, reply) => {
685
+ const registry = fastify.stubRegistry;
686
+ const allStubs = registry.getAll();
687
+ const modelsStub = allStubs.find((s) => s.response.type === "models");
688
+ if (modelsStub) {
689
+ modelsStub.callCount++;
690
+ const config = modelsStub.response;
691
+ if (config.type !== "models") return;
692
+ const data = config.models.map((m) => ({
693
+ id: m.id,
694
+ object: "model",
695
+ created: Math.floor(Date.now() / 1e3),
696
+ owned_by: m.ownedBy ?? "phantomllm"
697
+ }));
698
+ const body2 = { object: "list", data };
699
+ return reply.send(body2);
700
+ }
701
+ const body = { object: "list", data: DEFAULT_MODELS };
702
+ return reply.send(body);
703
+ });
704
+ };
705
+
706
+ // src/server/app.ts
707
+ async function buildApp(opts = {}) {
708
+ const app = Fastify__default.default(opts);
709
+ await app.register(stubRegistryPlugin);
710
+ await app.register(apiKeyAuthPlugin);
711
+ await app.register(adminRoutes);
712
+ await app.register(chatRoutes, { prefix: "/v1" });
713
+ await app.register(embeddingsRoutes, { prefix: "/v1" });
714
+ await app.register(modelsRoutes, { prefix: "/v1" });
715
+ return app;
716
+ }
717
+
256
718
  // src/driver/mock-llm.ts
257
719
  var MockLLM = class {
258
720
  state = "idle";
259
721
  startPromise = null;
260
- containerManager;
722
+ app = null;
261
723
  adminClient = null;
262
724
  _given = null;
725
+ _expect = null;
263
726
  _baseUrl = null;
264
- constructor(options) {
265
- const config = resolveContainerConfig(options);
266
- this.containerManager = new ContainerManager(config);
267
- }
268
727
  async start() {
269
728
  if (this.state === "running") return;
270
729
  if (this.state === "starting" && this.startPromise) return this.startPromise;
@@ -273,21 +732,29 @@ var MockLLM = class {
273
732
  return this.startPromise;
274
733
  }
275
734
  async doStart() {
276
- const { host, port } = await this.containerManager.start();
277
- this._baseUrl = `http://${host}:${port}`;
735
+ this.app = await buildApp({ logger: false });
736
+ await this.app.listen({ port: 0, host: "127.0.0.1" });
737
+ const address = this.app.server.address();
738
+ const port = typeof address === "object" && address ? address.port : 0;
739
+ this._baseUrl = `http://127.0.0.1:${port}`;
278
740
  this.adminClient = new AdminClient(this._baseUrl);
279
741
  this._given = new GivenStubs(this.adminClient);
742
+ this._expect = new ExpectConditions(this.adminClient);
280
743
  this.state = "running";
281
744
  }
282
745
  async stop() {
283
746
  if (this.state === "stopped" || this.state === "idle") return;
284
747
  this.state = "stopping";
285
748
  try {
286
- await this.containerManager.stop();
749
+ if (this.app) {
750
+ await this.app.close();
751
+ }
287
752
  } finally {
288
753
  this.state = "stopped";
754
+ this.app = null;
289
755
  this.adminClient = null;
290
756
  this._given = null;
757
+ this._expect = null;
291
758
  this._baseUrl = null;
292
759
  }
293
760
  }
@@ -302,6 +769,10 @@ var MockLLM = class {
302
769
  this.assertRunning();
303
770
  return this._given;
304
771
  }
772
+ get expect() {
773
+ this.assertRunning();
774
+ return this._expect;
775
+ }
305
776
  async clear() {
306
777
  this.assertRunning();
307
778
  await this.adminClient.clearStubs();