@langwatch/mcp-server 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/dist/archive-scenario-YFD5THOR.js +19 -0
  3. package/dist/archive-scenario-YFD5THOR.js.map +1 -0
  4. package/dist/chunk-5UOPNRXW.js +37 -0
  5. package/dist/chunk-5UOPNRXW.js.map +1 -0
  6. package/dist/chunk-6U4TCGFC.js +40 -0
  7. package/dist/chunk-6U4TCGFC.js.map +1 -0
  8. package/dist/chunk-IX6QJKAD.js +22 -0
  9. package/dist/chunk-IX6QJKAD.js.map +1 -0
  10. package/dist/{chunk-HOPTUDCZ.js → chunk-LLRQIF52.js} +5 -12
  11. package/dist/chunk-LLRQIF52.js.map +1 -0
  12. package/dist/create-evaluator-E5X5ZP3B.js +27 -0
  13. package/dist/create-evaluator-E5X5ZP3B.js.map +1 -0
  14. package/dist/create-prompt-7Z35MIL6.js +36 -0
  15. package/dist/create-prompt-7Z35MIL6.js.map +1 -0
  16. package/dist/create-scenario-DIMPJRPY.js +26 -0
  17. package/dist/create-scenario-DIMPJRPY.js.map +1 -0
  18. package/dist/discover-evaluator-schema-H23XCLNE.js +1402 -0
  19. package/dist/discover-evaluator-schema-H23XCLNE.js.map +1 -0
  20. package/dist/discover-scenario-schema-MEEEVND7.js +65 -0
  21. package/dist/discover-scenario-schema-MEEEVND7.js.map +1 -0
  22. package/dist/{get-analytics-3IFTN6MY.js → get-analytics-4YJW4S5L.js} +2 -2
  23. package/dist/get-evaluator-WDEH2F7M.js +47 -0
  24. package/dist/get-evaluator-WDEH2F7M.js.map +1 -0
  25. package/dist/{get-prompt-2ZB5B3QC.js → get-prompt-F6PDVC76.js} +2 -5
  26. package/dist/get-prompt-F6PDVC76.js.map +1 -0
  27. package/dist/get-scenario-H24ZYNT5.js +33 -0
  28. package/dist/get-scenario-H24ZYNT5.js.map +1 -0
  29. package/dist/{get-trace-7IXKKCJJ.js → get-trace-27USKGO7.js} +2 -2
  30. package/dist/index.js +27066 -8845
  31. package/dist/index.js.map +1 -1
  32. package/dist/list-evaluators-KRGI72EH.js +34 -0
  33. package/dist/list-evaluators-KRGI72EH.js.map +1 -0
  34. package/dist/list-model-providers-A5YCFTPI.js +35 -0
  35. package/dist/list-model-providers-A5YCFTPI.js.map +1 -0
  36. package/dist/{list-prompts-J72LTP7Z.js → list-prompts-LKJSE7XN.js} +6 -7
  37. package/dist/list-prompts-LKJSE7XN.js.map +1 -0
  38. package/dist/list-scenarios-ZK5CMGC4.js +40 -0
  39. package/dist/list-scenarios-ZK5CMGC4.js.map +1 -0
  40. package/dist/{search-traces-RW2NDHN5.js → search-traces-SOKAAMAR.js} +2 -2
  41. package/dist/set-model-provider-7MGULZDH.js +33 -0
  42. package/dist/set-model-provider-7MGULZDH.js.map +1 -0
  43. package/dist/update-evaluator-A3XINFLJ.js +24 -0
  44. package/dist/update-evaluator-A3XINFLJ.js.map +1 -0
  45. package/dist/update-prompt-IW7X2UQM.js +22 -0
  46. package/dist/update-prompt-IW7X2UQM.js.map +1 -0
  47. package/dist/update-scenario-ZT7TOBFR.js +27 -0
  48. package/dist/update-scenario-ZT7TOBFR.js.map +1 -0
  49. package/package.json +11 -11
  50. package/src/__tests__/all-tools.integration.test.ts +1337 -0
  51. package/src/__tests__/discover-evaluator-schema.unit.test.ts +89 -0
  52. package/src/__tests__/evaluator-tools.unit.test.ts +262 -0
  53. package/src/__tests__/integration.integration.test.ts +9 -34
  54. package/src/__tests__/langwatch-api.unit.test.ts +4 -32
  55. package/src/__tests__/model-provider-tools.unit.test.ts +190 -0
  56. package/src/__tests__/scenario-tools.integration.test.ts +286 -0
  57. package/src/__tests__/scenario-tools.unit.test.ts +185 -0
  58. package/src/__tests__/tools.unit.test.ts +59 -65
  59. package/src/index.ts +338 -48
  60. package/src/langwatch-api-evaluators.ts +70 -0
  61. package/src/langwatch-api-model-providers.ts +41 -0
  62. package/src/langwatch-api-scenarios.ts +67 -0
  63. package/src/langwatch-api.ts +6 -30
  64. package/src/tools/archive-scenario.ts +19 -0
  65. package/src/tools/create-evaluator.ts +33 -0
  66. package/src/tools/create-prompt.ts +30 -5
  67. package/src/tools/create-scenario.ts +30 -0
  68. package/src/tools/discover-evaluator-schema.ts +143 -0
  69. package/src/tools/discover-scenario-schema.ts +71 -0
  70. package/src/tools/get-evaluator.ts +53 -0
  71. package/src/tools/get-prompt.ts +1 -4
  72. package/src/tools/get-scenario.ts +36 -0
  73. package/src/tools/list-evaluators.ts +37 -0
  74. package/src/tools/list-model-providers.ts +40 -0
  75. package/src/tools/list-prompts.ts +5 -6
  76. package/src/tools/list-scenarios.ts +47 -0
  77. package/src/tools/set-model-provider.ts +46 -0
  78. package/src/tools/update-evaluator.ts +30 -0
  79. package/src/tools/update-prompt.ts +9 -25
  80. package/src/tools/update-scenario.ts +32 -0
  81. package/uv.lock +1788 -1322
  82. package/dist/chunk-HOPTUDCZ.js.map +0 -1
  83. package/dist/create-prompt-UBC537BJ.js +0 -22
  84. package/dist/create-prompt-UBC537BJ.js.map +0 -1
  85. package/dist/get-prompt-2ZB5B3QC.js.map +0 -1
  86. package/dist/list-prompts-J72LTP7Z.js.map +0 -1
  87. package/dist/update-prompt-G6HHZSUM.js +0 -31
  88. package/dist/update-prompt-G6HHZSUM.js.map +0 -1
  89. /package/dist/{get-analytics-3IFTN6MY.js.map → get-analytics-4YJW4S5L.js.map} +0 -0
  90. /package/dist/{get-trace-7IXKKCJJ.js.map → get-trace-27USKGO7.js.map} +0 -0
  91. /package/dist/{search-traces-RW2NDHN5.js.map → search-traces-SOKAAMAR.js.map} +0 -0
@@ -8,7 +8,6 @@ vi.mock("../langwatch-api.js", () => ({
8
8
  getPrompt: vi.fn(),
9
9
  createPrompt: vi.fn(),
10
10
  updatePrompt: vi.fn(),
11
- createPromptVersion: vi.fn(),
12
11
  }));
13
12
 
14
13
  import {
@@ -19,7 +18,6 @@ import {
19
18
  getPrompt,
20
19
  createPrompt,
21
20
  updatePrompt,
22
- createPromptVersion,
23
21
  type PromptSummary,
24
22
  } from "../langwatch-api.js";
25
23
 
@@ -38,7 +36,6 @@ const mockListPrompts = vi.mocked(listPrompts);
38
36
  const mockGetPrompt = vi.mocked(getPrompt);
39
37
  const mockCreatePrompt = vi.mocked(createPrompt);
40
38
  const mockUpdatePrompt = vi.mocked(updatePrompt);
41
- const mockCreatePromptVersion = vi.mocked(createPromptVersion);
42
39
 
43
40
  beforeEach(() => {
44
41
  vi.clearAllMocks();
@@ -449,22 +446,20 @@ describe("handleListPrompts()", () => {
449
446
  handle: "greeting",
450
447
  name: "Greeting Prompt",
451
448
  latestVersionNumber: 3,
452
- description: "A friendly greeting prompt",
453
449
  },
454
450
  {
455
451
  id: "p2",
456
452
  name: "Summary",
457
453
  version: 1,
458
- description: "",
459
454
  },
460
455
  ]);
461
456
 
462
457
  const result = await handleListPrompts();
463
458
 
464
459
  expect(result).toContain("# Prompts (2 total)");
465
- expect(result).toContain("| Handle | Name | Latest Version | Description |");
466
- expect(result).toContain("| greeting | Greeting Prompt | v3 | A friendly greeting prompt |");
467
- expect(result).toContain("| p2 | Summary | v1 | |");
460
+ expect(result).toContain("| Handle | Name | Latest Version |");
461
+ expect(result).toContain("| greeting | Greeting Prompt | v3 |");
462
+ expect(result).toContain("| p2 | Summary | v1 |");
468
463
  });
469
464
  });
470
465
 
@@ -488,14 +483,14 @@ describe("handleListPrompts()", () => {
488
483
  });
489
484
  });
490
485
 
491
- it("includes usage tip about get_prompt", async () => {
486
+ it("includes usage tip about platform_get_prompt", async () => {
492
487
  mockListPrompts.mockResolvedValue([
493
488
  { handle: "test", name: "Test", latestVersionNumber: 1 },
494
489
  ]);
495
490
 
496
491
  const result = await handleListPrompts();
497
492
 
498
- expect(result).toContain("get_prompt");
493
+ expect(result).toContain("platform_get_prompt");
499
494
  });
500
495
  });
501
496
 
@@ -506,13 +501,11 @@ describe("handleGetPrompt()", () => {
506
501
  id: "p1",
507
502
  handle: "greeting",
508
503
  name: "Greeting Prompt",
509
- description: "A greeting",
510
504
  latestVersionNumber: 2,
511
505
  versions: [
512
506
  {
513
507
  version: 2,
514
- model: "gpt-4o",
515
- modelProvider: "openai",
508
+ model: "openai/gpt-4o",
516
509
  messages: [
517
510
  { role: "system", content: "You are a greeter." },
518
511
  { role: "user", content: "Hello!" },
@@ -531,10 +524,9 @@ describe("handleGetPrompt()", () => {
531
524
  expect(result).toContain("# Prompt: Greeting Prompt");
532
525
  expect(result).toContain("**Handle**: greeting");
533
526
  expect(result).toContain("**ID**: p1");
534
- expect(result).toContain("**Description**: A greeting");
535
527
  expect(result).toContain("**Latest Version**: v2");
536
- expect(result).toContain("**Model**: gpt-4o");
537
- expect(result).toContain("**Provider**: openai");
528
+ expect(result).toContain("**Model**: openai/gpt-4o");
529
+ expect(result).not.toContain("**Provider**");
538
530
  });
539
531
 
540
532
  it("formats messages", async () => {
@@ -598,15 +590,14 @@ describe("handleGetPrompt()", () => {
598
590
  it("uses prompt-level model config", async () => {
599
591
  mockGetPrompt.mockResolvedValue({
600
592
  name: "Simple",
601
- model: "gpt-3.5-turbo",
602
- modelProvider: "openai",
593
+ model: "openai/gpt-3.5-turbo",
603
594
  messages: [{ role: "system", content: "Be brief." }],
604
595
  });
605
596
 
606
597
  const result = await handleGetPrompt({ idOrHandle: "simple" });
607
598
 
608
- expect(result).toContain("**Model**: gpt-3.5-turbo");
609
- expect(result).toContain("**Provider**: openai");
599
+ expect(result).toContain("**Model**: openai/gpt-3.5-turbo");
600
+ expect(result).not.toContain("**Provider**");
610
601
  expect(result).toContain("### system\nBe brief.");
611
602
  });
612
603
  });
@@ -626,17 +617,55 @@ describe("handleCreatePrompt()", () => {
626
617
  name: "My Prompt",
627
618
  handle: "my-prompt",
628
619
  messages: [{ role: "system", content: "You are helpful." }],
629
- model: "gpt-4o",
630
- modelProvider: "openai",
620
+ model: "openai/gpt-4o",
631
621
  });
632
622
 
633
623
  expect(result).toContain("Prompt created successfully!");
634
624
  expect(result).toContain("**ID**: new-id-123");
635
625
  expect(result).toContain("**Handle**: my-prompt");
636
626
  expect(result).toContain("**Name**: My Prompt");
637
- expect(result).toContain("**Model**: gpt-4o (openai)");
627
+ expect(result).toContain("**Model**: openai/gpt-4o");
638
628
  expect(result).toContain("**Version**: v1");
639
629
  });
630
+
631
+ it("generates handle from name when handle is not provided", async () => {
632
+ mockCreatePrompt.mockResolvedValue({
633
+ id: "new-id",
634
+ handle: "my-prompt",
635
+ });
636
+
637
+ await handleCreatePrompt({
638
+ name: "My Prompt!",
639
+ messages: [{ role: "system", content: "test" }],
640
+ model: "openai/gpt-4o",
641
+ });
642
+
643
+ expect(mockCreatePrompt).toHaveBeenCalledWith({
644
+ handle: "my-prompt",
645
+ messages: [{ role: "system", content: "test" }],
646
+ model: "openai/gpt-4o",
647
+ });
648
+ });
649
+
650
+ it("uses provided handle instead of generating one", async () => {
651
+ mockCreatePrompt.mockResolvedValue({
652
+ id: "new-id",
653
+ handle: "custom-handle",
654
+ });
655
+
656
+ await handleCreatePrompt({
657
+ name: "My Prompt",
658
+ handle: "custom-handle",
659
+ messages: [{ role: "system", content: "test" }],
660
+ model: "openai/gpt-4o",
661
+ });
662
+
663
+ expect(mockCreatePrompt).toHaveBeenCalledWith({
664
+ handle: "custom-handle",
665
+ messages: [{ role: "system", content: "test" }],
666
+ model: "openai/gpt-4o",
667
+ });
668
+ });
640
669
  });
641
670
 
642
671
  describe("when API returns no name", () => {
@@ -646,8 +675,7 @@ describe("handleCreatePrompt()", () => {
646
675
  const result = await handleCreatePrompt({
647
676
  name: "Fallback Name",
648
677
  messages: [{ role: "system", content: "test" }],
649
- model: "gpt-4o",
650
- modelProvider: "openai",
678
+ model: "openai/gpt-4o",
651
679
  });
652
680
 
653
681
  expect(result).toContain("**Name**: Fallback Name");
@@ -656,7 +684,7 @@ describe("handleCreatePrompt()", () => {
656
684
  });
657
685
 
658
686
  describe("handleUpdatePrompt()", () => {
659
- describe("when updating in place", () => {
687
+ describe("when updating a prompt", () => {
660
688
  it("formats an update success message", async () => {
661
689
  mockUpdatePrompt.mockResolvedValue({
662
690
  id: "p1",
@@ -677,53 +705,19 @@ describe("handleUpdatePrompt()", () => {
677
705
  expect(result).toContain("**Commit**: Update system prompt");
678
706
  });
679
707
 
680
- it("calls updatePrompt API", async () => {
708
+ it("calls updatePrompt API with correct params", async () => {
681
709
  mockUpdatePrompt.mockResolvedValue({});
682
710
 
683
711
  await handleUpdatePrompt({
684
712
  idOrHandle: "greeting",
685
- model: "gpt-4o",
713
+ model: "openai/gpt-4o",
714
+ commitMessage: "Switch model",
686
715
  });
687
716
 
688
717
  expect(mockUpdatePrompt).toHaveBeenCalledWith("greeting", {
689
- model: "gpt-4o",
690
- });
691
- expect(mockCreatePromptVersion).not.toHaveBeenCalled();
692
- });
693
- });
694
-
695
- describe("when creating a new version", () => {
696
- it("formats a version creation success message", async () => {
697
- mockCreatePromptVersion.mockResolvedValue({
698
- id: "p1",
699
- latestVersionNumber: 3,
700
- });
701
-
702
- const result = await handleUpdatePrompt({
703
- idOrHandle: "greeting",
704
- messages: [{ role: "system", content: "New version" }],
705
- createVersion: true,
706
- commitMessage: "v3",
707
- });
708
-
709
- expect(result).toContain("New version created successfully!");
710
- expect(result).toContain("**Version**: v3");
711
- expect(result).toContain("**Commit**: v3");
712
- });
713
-
714
- it("calls createPromptVersion API", async () => {
715
- mockCreatePromptVersion.mockResolvedValue({});
716
-
717
- await handleUpdatePrompt({
718
- idOrHandle: "greeting",
719
- messages: [{ role: "system", content: "New" }],
720
- createVersion: true,
721
- });
722
-
723
- expect(mockCreatePromptVersion).toHaveBeenCalledWith("greeting", {
724
- messages: [{ role: "system", content: "New" }],
718
+ model: "openai/gpt-4o",
719
+ commitMessage: "Switch model",
725
720
  });
726
- expect(mockUpdatePrompt).not.toHaveBeenCalled();
727
721
  });
728
722
  });
729
723
  });