@sourcepress/ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/.omc/state/last-tool-error.json +7 -0
  2. package/.turbo/turbo-build.log +4 -0
  3. package/.turbo/turbo-test.log +24 -0
  4. package/LICENSE +21 -0
  5. package/dist/__tests__/budget.test.d.ts +2 -0
  6. package/dist/__tests__/budget.test.d.ts.map +1 -0
  7. package/dist/__tests__/budget.test.js +96 -0
  8. package/dist/__tests__/budget.test.js.map +1 -0
  9. package/dist/__tests__/classify.test.d.ts +2 -0
  10. package/dist/__tests__/classify.test.d.ts.map +1 -0
  11. package/dist/__tests__/classify.test.js +72 -0
  12. package/dist/__tests__/classify.test.js.map +1 -0
  13. package/dist/__tests__/eval-runner.test.d.ts +2 -0
  14. package/dist/__tests__/eval-runner.test.d.ts.map +1 -0
  15. package/dist/__tests__/eval-runner.test.js +171 -0
  16. package/dist/__tests__/eval-runner.test.js.map +1 -0
  17. package/dist/__tests__/extract.test.d.ts +2 -0
  18. package/dist/__tests__/extract.test.d.ts.map +1 -0
  19. package/dist/__tests__/extract.test.js +79 -0
  20. package/dist/__tests__/extract.test.js.map +1 -0
  21. package/dist/__tests__/find-gaps.test.d.ts +2 -0
  22. package/dist/__tests__/find-gaps.test.d.ts.map +1 -0
  23. package/dist/__tests__/find-gaps.test.js +82 -0
  24. package/dist/__tests__/find-gaps.test.js.map +1 -0
  25. package/dist/__tests__/generate.test.d.ts +2 -0
  26. package/dist/__tests__/generate.test.d.ts.map +1 -0
  27. package/dist/__tests__/generate.test.js +68 -0
  28. package/dist/__tests__/generate.test.js.map +1 -0
  29. package/dist/__tests__/improve-prompt.test.d.ts +2 -0
  30. package/dist/__tests__/improve-prompt.test.d.ts.map +1 -0
  31. package/dist/__tests__/improve-prompt.test.js +32 -0
  32. package/dist/__tests__/improve-prompt.test.js.map +1 -0
  33. package/dist/__tests__/intent-impact.test.d.ts +2 -0
  34. package/dist/__tests__/intent-impact.test.d.ts.map +1 -0
  35. package/dist/__tests__/intent-impact.test.js +51 -0
  36. package/dist/__tests__/intent-impact.test.js.map +1 -0
  37. package/dist/__tests__/judge.test.d.ts +2 -0
  38. package/dist/__tests__/judge.test.d.ts.map +1 -0
  39. package/dist/__tests__/judge.test.js +61 -0
  40. package/dist/__tests__/judge.test.js.map +1 -0
  41. package/dist/__tests__/score.test.d.ts +2 -0
  42. package/dist/__tests__/score.test.d.ts.map +1 -0
  43. package/dist/__tests__/score.test.js +50 -0
  44. package/dist/__tests__/score.test.js.map +1 -0
  45. package/dist/__tests__/staleness.test.d.ts +2 -0
  46. package/dist/__tests__/staleness.test.d.ts.map +1 -0
  47. package/dist/__tests__/staleness.test.js +66 -0
  48. package/dist/__tests__/staleness.test.js.map +1 -0
  49. package/dist/budget.d.ts +13 -0
  50. package/dist/budget.d.ts.map +1 -0
  51. package/dist/budget.js +40 -0
  52. package/dist/budget.js.map +1 -0
  53. package/dist/eval/runner.d.ts +34 -0
  54. package/dist/eval/runner.d.ts.map +1 -0
  55. package/dist/eval/runner.js +128 -0
  56. package/dist/eval/runner.js.map +1 -0
  57. package/dist/functions/classify.d.ts +5 -0
  58. package/dist/functions/classify.d.ts.map +1 -0
  59. package/dist/functions/classify.js +43 -0
  60. package/dist/functions/classify.js.map +1 -0
  61. package/dist/functions/extract.d.ts +5 -0
  62. package/dist/functions/extract.d.ts.map +1 -0
  63. package/dist/functions/extract.js +57 -0
  64. package/dist/functions/extract.js.map +1 -0
  65. package/dist/functions/find-gaps.d.ts +5 -0
  66. package/dist/functions/find-gaps.d.ts.map +1 -0
  67. package/dist/functions/find-gaps.js +51 -0
  68. package/dist/functions/find-gaps.js.map +1 -0
  69. package/dist/functions/generate.d.ts +5 -0
  70. package/dist/functions/generate.d.ts.map +1 -0
  71. package/dist/functions/generate.js +39 -0
  72. package/dist/functions/generate.js.map +1 -0
  73. package/dist/functions/improve-prompt.d.ts +5 -0
  74. package/dist/functions/improve-prompt.d.ts.map +1 -0
  75. package/dist/functions/improve-prompt.js +38 -0
  76. package/dist/functions/improve-prompt.js.map +1 -0
  77. package/dist/functions/index.d.ts +11 -0
  78. package/dist/functions/index.d.ts.map +1 -0
  79. package/dist/functions/index.js +11 -0
  80. package/dist/functions/index.js.map +1 -0
  81. package/dist/functions/intent-impact.d.ts +5 -0
  82. package/dist/functions/intent-impact.d.ts.map +1 -0
  83. package/dist/functions/intent-impact.js +45 -0
  84. package/dist/functions/intent-impact.js.map +1 -0
  85. package/dist/functions/judge.d.ts +5 -0
  86. package/dist/functions/judge.d.ts.map +1 -0
  87. package/dist/functions/judge.js +32 -0
  88. package/dist/functions/judge.js.map +1 -0
  89. package/dist/functions/model-factory.d.ts +4 -0
  90. package/dist/functions/model-factory.d.ts.map +1 -0
  91. package/dist/functions/model-factory.js +52 -0
  92. package/dist/functions/model-factory.js.map +1 -0
  93. package/dist/functions/score.d.ts +5 -0
  94. package/dist/functions/score.d.ts.map +1 -0
  95. package/dist/functions/score.js +47 -0
  96. package/dist/functions/score.js.map +1 -0
  97. package/dist/functions/staleness.d.ts +5 -0
  98. package/dist/functions/staleness.d.ts.map +1 -0
  99. package/dist/functions/staleness.js +45 -0
  100. package/dist/functions/staleness.js.map +1 -0
  101. package/dist/functions/usage.d.ts +8 -0
  102. package/dist/functions/usage.d.ts.map +1 -0
  103. package/dist/functions/usage.js +13 -0
  104. package/dist/functions/usage.js.map +1 -0
  105. package/dist/index.d.ts +8 -0
  106. package/dist/index.d.ts.map +1 -0
  107. package/dist/index.js +6 -0
  108. package/dist/index.js.map +1 -0
  109. package/dist/provider.d.ts +10 -0
  110. package/dist/provider.d.ts.map +1 -0
  111. package/dist/provider.js +32 -0
  112. package/dist/provider.js.map +1 -0
  113. package/dist/types.d.ts +207 -0
  114. package/dist/types.d.ts.map +1 -0
  115. package/dist/types.js +2 -0
  116. package/dist/types.js.map +1 -0
  117. package/package.json +41 -0
  118. package/src/__tests__/budget.test.ts +103 -0
  119. package/src/__tests__/classify.test.ts +90 -0
  120. package/src/__tests__/eval-runner.test.ts +199 -0
  121. package/src/__tests__/extract.test.ts +92 -0
  122. package/src/__tests__/find-gaps.test.ts +93 -0
  123. package/src/__tests__/generate.test.ts +92 -0
  124. package/src/__tests__/improve-prompt.test.ts +42 -0
  125. package/src/__tests__/intent-impact.test.ts +62 -0
  126. package/src/__tests__/judge.test.ts +78 -0
  127. package/src/__tests__/score.test.ts +61 -0
  128. package/src/__tests__/staleness.test.ts +77 -0
  129. package/src/budget.ts +47 -0
  130. package/src/eval/runner.ts +163 -0
  131. package/src/functions/classify.ts +54 -0
  132. package/src/functions/extract.ts +72 -0
  133. package/src/functions/find-gaps.ts +65 -0
  134. package/src/functions/generate.ts +51 -0
  135. package/src/functions/improve-prompt.ts +48 -0
  136. package/src/functions/index.ts +10 -0
  137. package/src/functions/intent-impact.ts +56 -0
  138. package/src/functions/judge.ts +41 -0
  139. package/src/functions/model-factory.ts +60 -0
  140. package/src/functions/score.ts +56 -0
  141. package/src/functions/staleness.ts +54 -0
  142. package/src/functions/usage.ts +25 -0
  143. package/src/index.ts +47 -0
  144. package/src/provider.ts +41 -0
  145. package/src/types.ts +225 -0
  146. package/tsconfig.json +5 -0
  147. package/vitest.config.ts +2 -0
@@ -0,0 +1,103 @@
1
+ import { beforeEach, describe, expect, it } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+
4
+ describe("BudgetTracker", () => {
5
+ let tracker: BudgetTracker;
6
+ beforeEach(() => {
7
+ tracker = new BudgetTracker({ daily_limit_usd: 5.0, warn_at_usd: 3.0 });
8
+ });
9
+
10
+ it("starts with zero spent", () => {
11
+ const status = tracker.getStatus();
12
+ expect(status.spent_today_usd).toBe(0);
13
+ expect(status.remaining_usd).toBe(5.0);
14
+ expect(status.is_over_limit).toBe(false);
15
+ expect(status.is_warned).toBe(false);
16
+ });
17
+
18
+ it("tracks token usage", () => {
19
+ tracker.record({
20
+ input_tokens: 1000,
21
+ output_tokens: 500,
22
+ estimated_cost_usd: 0.01,
23
+ function_name: "classify",
24
+ timestamp: new Date().toISOString(),
25
+ });
26
+ const status = tracker.getStatus();
27
+ expect(status.spent_today_usd).toBe(0.01);
28
+ expect(status.remaining_usd).toBe(4.99);
29
+ });
30
+
31
+ it("warns when approaching limit", () => {
32
+ tracker.record({
33
+ input_tokens: 100000,
34
+ output_tokens: 50000,
35
+ estimated_cost_usd: 3.5,
36
+ function_name: "extract",
37
+ timestamp: new Date().toISOString(),
38
+ });
39
+ const status = tracker.getStatus();
40
+ expect(status.is_warned).toBe(true);
41
+ expect(status.is_over_limit).toBe(false);
42
+ });
43
+
44
+ it("detects over-limit", () => {
45
+ tracker.record({
46
+ input_tokens: 200000,
47
+ output_tokens: 100000,
48
+ estimated_cost_usd: 5.5,
49
+ function_name: "judge",
50
+ timestamp: new Date().toISOString(),
51
+ });
52
+ expect(tracker.getStatus().is_over_limit).toBe(true);
53
+ });
54
+
55
+ it("can check if operation is allowed", () => {
56
+ expect(tracker.canSpend(4.0)).toBe(true);
57
+ expect(tracker.canSpend(6.0)).toBe(false);
58
+ tracker.record({
59
+ input_tokens: 100000,
60
+ output_tokens: 50000,
61
+ estimated_cost_usd: 4.0,
62
+ function_name: "score",
63
+ timestamp: new Date().toISOString(),
64
+ });
65
+ expect(tracker.canSpend(0.5)).toBe(true);
66
+ expect(tracker.canSpend(2.0)).toBe(false);
67
+ });
68
+
69
+ it("returns usage history", () => {
70
+ tracker.record({
71
+ input_tokens: 1000,
72
+ output_tokens: 500,
73
+ estimated_cost_usd: 0.01,
74
+ function_name: "classify",
75
+ timestamp: new Date().toISOString(),
76
+ });
77
+ tracker.record({
78
+ input_tokens: 2000,
79
+ output_tokens: 1000,
80
+ estimated_cost_usd: 0.02,
81
+ function_name: "extract",
82
+ timestamp: new Date().toISOString(),
83
+ });
84
+ const history = tracker.getHistory();
85
+ expect(history).toHaveLength(2);
86
+ expect(history[0].function_name).toBe("classify");
87
+ expect(history[1].function_name).toBe("extract");
88
+ });
89
+
90
+ it("resets daily spending", () => {
91
+ tracker.record({
92
+ input_tokens: 100000,
93
+ output_tokens: 50000,
94
+ estimated_cost_usd: 3.0,
95
+ function_name: "judge",
96
+ timestamp: new Date().toISOString(),
97
+ });
98
+ tracker.resetDaily();
99
+ const status = tracker.getStatus();
100
+ expect(status.spent_today_usd).toBe(0);
101
+ expect(status.remaining_usd).toBe(5.0);
102
+ });
103
+ });
@@ -0,0 +1,90 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { classify } from "../functions/classify.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+
9
+ const mockProvider: ResolvedProvider = {
10
+ provider: "anthropic",
11
+ model: "claude-sonnet-4-5-20250514",
12
+ };
13
+
14
+ describe("classify", () => {
15
+ it("classifies structured text correctly", async () => {
16
+ vi.mocked(generateObject).mockResolvedValueOnce({
17
+ object: {
18
+ quality: "structured",
19
+ quality_score: 8,
20
+ type: "project-notes",
21
+ reasoning: "Well-organized document with clear sections",
22
+ },
23
+ usage: { promptTokens: 500, completionTokens: 100 },
24
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
25
+ } as any);
26
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
27
+ const result = await classify(
28
+ { text: "Meeting with Acme Corp 2026-04-01. Discussed Next.js migration timeline." },
29
+ mockProvider,
30
+ budget,
31
+ );
32
+ expect(result.quality).toBe("structured");
33
+ expect(result.quality_score).toBe(8);
34
+ expect(result.type).toBe("project-notes");
35
+ expect(result.usage.function_name).toBe("classify");
36
+ });
37
+
38
+ it("classifies draft-quality text", async () => {
39
+ vi.mocked(generateObject).mockResolvedValueOnce({
40
+ object: { quality: "draft", quality_score: 5, type: "brainstorm", reasoning: "Rough notes" },
41
+ usage: { promptTokens: 300, completionTokens: 80 },
42
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
43
+ } as any);
44
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
45
+ const result = await classify(
46
+ { text: "maybe we should try... react? or vue?" },
47
+ mockProvider,
48
+ budget,
49
+ );
50
+ expect(result.quality).toBe("draft");
51
+ expect(result.quality_score).toBeLessThanOrEqual(6);
52
+ });
53
+
54
+ it("respects available_types constraint", async () => {
55
+ vi.mocked(generateObject).mockResolvedValueOnce({
56
+ object: {
57
+ quality: "structured",
58
+ quality_score: 7,
59
+ type: "meeting-notes",
60
+ reasoning: "Typed as meeting-notes",
61
+ },
62
+ usage: { promptTokens: 400, completionTokens: 90 },
63
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
64
+ } as any);
65
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
66
+ const result = await classify(
67
+ {
68
+ text: "Team standup 2026-04-04",
69
+ available_types: ["meeting-notes", "project-notes", "transcript"],
70
+ },
71
+ mockProvider,
72
+ budget,
73
+ );
74
+ expect(result.type).toBe("meeting-notes");
75
+ });
76
+
77
+ it("records budget usage", async () => {
78
+ vi.mocked(generateObject).mockResolvedValueOnce({
79
+ object: { quality: "structured", quality_score: 8, type: "notes", reasoning: "OK" },
80
+ usage: { promptTokens: 500, completionTokens: 100 },
81
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
82
+ } as any);
83
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
84
+ await classify({ text: "test" }, mockProvider, budget);
85
+ const history = budget.getHistory();
86
+ expect(history).toHaveLength(1);
87
+ expect(history[0].function_name).toBe("classify");
88
+ expect(history[0].input_tokens).toBe(500);
89
+ });
90
+ });
@@ -0,0 +1,199 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { EvalRunner } from "../eval/runner.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+
9
+ const mockProvider: ResolvedProvider = {
10
+ provider: "anthropic",
11
+ model: "claude-sonnet-4-5-20250514",
12
+ };
13
+
14
+ const baseConfig = {
15
+ content_type: "case-study",
16
+ knowledge_context:
17
+ "Acme Corp migrated 12 microservices to Next.js in 8 weeks. Load time improved by 40%.",
18
+ gold_standard:
19
+ "# Perfect Case Study\n\nDetailed migration with 12 services, 8 weeks, 40% improvement.",
20
+ judge_prompt: "Score this content 0-100 against the gold standard.",
21
+ generation_prompt: "Write a compelling case study.",
22
+ threshold: 70,
23
+ max_iterations: 3,
24
+ };
25
+
26
+ describe("EvalRunner", () => {
27
+ it("keeps content when score meets threshold on first iteration", async () => {
28
+ // Generate
29
+ vi.mocked(generateObject).mockResolvedValueOnce({
30
+ object: {
31
+ frontmatter: { title: "Acme Corp Case Study" },
32
+ body: "# Acme Corp\n\nWe migrated 12 services in 8 weeks, improving load time by 40%.",
33
+ },
34
+ usage: { promptTokens: 3000, completionTokens: 500 },
35
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
36
+ } as any);
37
+
38
+ // Judge — score above threshold
39
+ vi.mocked(generateObject).mockResolvedValueOnce({
40
+ object: { score: 85, reasoning: "Excellent match. Specific metrics included." },
41
+ usage: { promptTokens: 2000, completionTokens: 200 },
42
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
43
+ } as any);
44
+
45
+ const budget = new BudgetTracker({ daily_limit_usd: 10.0 });
46
+ const runner = new EvalRunner(mockProvider, budget);
47
+ const result = await runner.run(baseConfig);
48
+
49
+ expect(result.final_status).toBe("keep");
50
+ expect(result.final_score).toBe(85);
51
+ expect(result.iterations).toHaveLength(1);
52
+ expect(result.final_content).toBeTruthy();
53
+ expect(result.prompt_improved).toBe(false);
54
+ });
55
+
56
+ it("improves prompt and retries when score is below threshold", async () => {
57
+ // Iteration 1: Generate
58
+ vi.mocked(generateObject).mockResolvedValueOnce({
59
+ object: {
60
+ frontmatter: { title: "Acme" },
61
+ body: "We helped Acme.",
62
+ },
63
+ usage: { promptTokens: 2000, completionTokens: 200 },
64
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
65
+ } as any);
66
+
67
+ // Iteration 1: Judge — below threshold
68
+ vi.mocked(generateObject).mockResolvedValueOnce({
69
+ object: { score: 40, reasoning: "Too generic, no metrics." },
70
+ usage: { promptTokens: 1500, completionTokens: 150 },
71
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
72
+ } as any);
73
+
74
+ // Iteration 1: Improve prompt
75
+ vi.mocked(generateObject).mockResolvedValueOnce({
76
+ object: {
77
+ improved_prompt: "Write a case study with specific metrics and client quotes.",
78
+ changes_summary: "Added metrics requirement.",
79
+ },
80
+ usage: { promptTokens: 1000, completionTokens: 200 },
81
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
82
+ } as any);
83
+
84
+ // Iteration 2: Generate with improved prompt
85
+ vi.mocked(generateObject).mockResolvedValueOnce({
86
+ object: {
87
+ frontmatter: { title: "Acme Corp — Migration to Next.js" },
88
+ body: "# Acme Corp\n\n12 services migrated in 8 weeks. 40% load time improvement.",
89
+ },
90
+ usage: { promptTokens: 2500, completionTokens: 400 },
91
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
92
+ } as any);
93
+
94
+ // Iteration 2: Judge — above threshold
95
+ vi.mocked(generateObject).mockResolvedValueOnce({
96
+ object: {
97
+ score: 78,
98
+ reasoning: "Good metrics, clear structure. Matches gold standard well.",
99
+ },
100
+ usage: { promptTokens: 2000, completionTokens: 180 },
101
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
102
+ } as any);
103
+
104
+ const budget = new BudgetTracker({ daily_limit_usd: 10.0 });
105
+ const runner = new EvalRunner(mockProvider, budget);
106
+ const result = await runner.run(baseConfig);
107
+
108
+ expect(result.final_status).toBe("keep");
109
+ expect(result.final_score).toBe(78);
110
+ expect(result.iterations).toHaveLength(2);
111
+ expect(result.prompt_improved).toBe(true);
112
+ expect(result.final_prompt).toContain("metrics");
113
+ });
114
+
115
+ it("discards after max iterations if threshold never met", async () => {
116
+ // Mock 3 iterations, all below threshold
117
+ for (let i = 0; i < 3; i++) {
118
+ // Generate
119
+ vi.mocked(generateObject).mockResolvedValueOnce({
120
+ object: {
121
+ frontmatter: { title: "Attempt" },
122
+ body: "Generic content.",
123
+ },
124
+ usage: { promptTokens: 2000, completionTokens: 200 },
125
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
126
+ } as any);
127
+
128
+ // Judge — below threshold
129
+ vi.mocked(generateObject).mockResolvedValueOnce({
130
+ object: { score: 30 + i * 10, reasoning: "Still not good enough." },
131
+ usage: { promptTokens: 1500, completionTokens: 150 },
132
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
133
+ } as any);
134
+
135
+ // Improve prompt (not on last iteration)
136
+ if (i < 2) {
137
+ vi.mocked(generateObject).mockResolvedValueOnce({
138
+ object: {
139
+ improved_prompt: `Improved prompt v${i + 2}.`,
140
+ changes_summary: "Tweaked instructions.",
141
+ },
142
+ usage: { promptTokens: 1000, completionTokens: 200 },
143
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
144
+ } as any);
145
+ }
146
+ }
147
+
148
+ const budget = new BudgetTracker({ daily_limit_usd: 10.0 });
149
+ const runner = new EvalRunner(mockProvider, budget);
150
+ const result = await runner.run(baseConfig);
151
+
152
+ expect(result.final_status).toBe("discard");
153
+ expect(result.iterations).toHaveLength(3);
154
+ expect(result.final_content).toBeUndefined();
155
+ });
156
+
157
+ it("judgeOnly scores existing content without loop", async () => {
158
+ vi.mocked(generateObject).mockResolvedValueOnce({
159
+ object: { score: 72, reasoning: "Decent quality, matches intent." },
160
+ usage: { promptTokens: 1500, completionTokens: 100 },
161
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
162
+ } as any);
163
+
164
+ const budget = new BudgetTracker({ daily_limit_usd: 10.0 });
165
+ const runner = new EvalRunner(mockProvider, budget);
166
+ const result = await runner.judgeOnly({
167
+ draft: "# Existing content\n\nSome published text.",
168
+ gold_standard: "# Perfect example.",
169
+ judge_prompt: "Score 0-100.",
170
+ });
171
+
172
+ expect(result.score).toBe(72);
173
+ expect(result.reasoning).toBeTruthy();
174
+ });
175
+
176
+ it("accumulates total usage across iterations", async () => {
177
+ // Generate
178
+ vi.mocked(generateObject).mockResolvedValueOnce({
179
+ object: { frontmatter: { title: "T" }, body: "B" },
180
+ usage: { promptTokens: 1000, completionTokens: 100 },
181
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
182
+ } as any);
183
+
184
+ // Judge — keep
185
+ vi.mocked(generateObject).mockResolvedValueOnce({
186
+ object: { score: 90, reasoning: "Great." },
187
+ usage: { promptTokens: 500, completionTokens: 50 },
188
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
189
+ } as any);
190
+
191
+ const budget = new BudgetTracker({ daily_limit_usd: 10.0 });
192
+ const runner = new EvalRunner(mockProvider, budget);
193
+ const result = await runner.run(baseConfig);
194
+
195
+ expect(result.total_usage.input_tokens).toBe(1500);
196
+ expect(result.total_usage.output_tokens).toBe(150);
197
+ expect(result.total_usage.estimated_cost_usd).toBeGreaterThan(0);
198
+ });
199
+ });
@@ -0,0 +1,92 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { extract } from "../functions/extract.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+
9
+ const mockProvider: ResolvedProvider = {
10
+ provider: "anthropic",
11
+ model: "claude-sonnet-4-5-20250514",
12
+ };
13
+
14
+ describe("extract", () => {
15
+ it("extracts entities and relations from text", async () => {
16
+ vi.mocked(generateObject).mockResolvedValueOnce({
17
+ object: {
18
+ entities: [
19
+ { type: "client", name: "Acme Corp", aliases: ["Acme", "ACME"], confidence: 0.95 },
20
+ { type: "technology", name: "Next.js", aliases: ["NextJS"], confidence: 0.99 },
21
+ ],
22
+ relations: [
23
+ {
24
+ from_entity: "Acme Corp",
25
+ to_entity: "Next.js",
26
+ relation_type: "uses",
27
+ confidence: 0.9,
28
+ evidence: "Acme Corp is migrating to Next.js",
29
+ },
30
+ ],
31
+ },
32
+ usage: { promptTokens: 800, completionTokens: 200 },
33
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
34
+ } as any);
35
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
36
+ const result = await extract(
37
+ {
38
+ text: "Meeting with Acme Corp. They want to migrate to Next.js.",
39
+ file_path: "knowledge/clients/acme.md",
40
+ },
41
+ mockProvider,
42
+ budget,
43
+ );
44
+ expect(result.entities).toHaveLength(2);
45
+ expect(result.entities[0].name).toBe("Acme Corp");
46
+ expect(result.relations).toHaveLength(1);
47
+ expect(result.usage.function_name).toBe("extract");
48
+ });
49
+
50
+ it("includes existing entities as context", async () => {
51
+ vi.mocked(generateObject).mockResolvedValueOnce({
52
+ object: {
53
+ entities: [{ type: "technology", name: "React", aliases: [], confidence: 0.95 }],
54
+ relations: [
55
+ {
56
+ from_entity: "React",
57
+ to_entity: "Next.js",
58
+ relation_type: "used_by",
59
+ confidence: 0.98,
60
+ evidence: "React is the foundation of Next.js",
61
+ },
62
+ ],
63
+ },
64
+ usage: { promptTokens: 1000, completionTokens: 150 },
65
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
66
+ } as any);
67
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
68
+ const result = await extract(
69
+ {
70
+ text: "React is great for building UIs.",
71
+ file_path: "knowledge/tech/react.md",
72
+ existing_entities: [{ type: "technology", name: "Next.js" }],
73
+ },
74
+ mockProvider,
75
+ budget,
76
+ );
77
+ expect(result.entities).toHaveLength(1);
78
+ expect(result.relations).toHaveLength(1);
79
+ });
80
+
81
+ it("records budget usage", async () => {
82
+ vi.mocked(generateObject).mockResolvedValueOnce({
83
+ object: { entities: [], relations: [] },
84
+ usage: { promptTokens: 400, completionTokens: 50 },
85
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
86
+ } as any);
87
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
88
+ await extract({ text: "Empty text", file_path: "test.md" }, mockProvider, budget);
89
+ expect(budget.getHistory()).toHaveLength(1);
90
+ expect(budget.getHistory()[0].function_name).toBe("extract");
91
+ });
92
+ });
@@ -0,0 +1,93 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { findGaps } from "../functions/find-gaps.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+ const mockProvider: ResolvedProvider = {
9
+ provider: "anthropic",
10
+ model: "claude-sonnet-4-5-20250514",
11
+ };
12
+
13
+ describe("findGaps", () => {
14
+ it("identifies knowledge without corresponding content", async () => {
15
+ vi.mocked(generateObject).mockResolvedValueOnce({
16
+ object: {
17
+ gaps: [
18
+ {
19
+ entity_name: "React",
20
+ entity_type: "technology",
21
+ suggested_content_type: "blog-post",
22
+ priority: "high",
23
+ reason: "Core tech with no content page.",
24
+ },
25
+ {
26
+ entity_name: "TypeScript",
27
+ entity_type: "technology",
28
+ suggested_content_type: "service-page",
29
+ priority: "medium",
30
+ reason: "Appears in projects but no content.",
31
+ },
32
+ ],
33
+ },
34
+ usage: { promptTokens: 2500, completionTokens: 300 },
35
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
36
+ } as any);
37
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
38
+ const result = await findGaps(
39
+ {
40
+ entities: [
41
+ { type: "technology", name: "React" },
42
+ { type: "technology", name: "TypeScript" },
43
+ { type: "technology", name: "Next.js" },
44
+ ],
45
+ relations: [{ from: "Next.js", to: "React", type: "built_on" }],
46
+ existing_content: [
47
+ {
48
+ path: "content/cases/acme.mdx",
49
+ title: "Acme Corp — Next.js Migration",
50
+ summary: "Case study about Next.js",
51
+ },
52
+ ],
53
+ },
54
+ mockProvider,
55
+ budget,
56
+ );
57
+ expect(result.gaps).toHaveLength(2);
58
+ expect(result.gaps[0].entity_name).toBe("React");
59
+ expect(result.gaps[0].priority).toBe("high");
60
+ expect(result.usage.function_name).toBe("findGaps");
61
+ });
62
+
63
+ it("uses business context for prioritization", async () => {
64
+ vi.mocked(generateObject).mockResolvedValueOnce({
65
+ object: {
66
+ gaps: [
67
+ {
68
+ entity_name: "AI Strategy",
69
+ entity_type: "service",
70
+ suggested_content_type: "service-page",
71
+ priority: "high",
72
+ reason: "Core business offering.",
73
+ },
74
+ ],
75
+ },
76
+ usage: { promptTokens: 2000, completionTokens: 150 },
77
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
78
+ } as any);
79
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
80
+ const result = await findGaps(
81
+ {
82
+ entities: [{ type: "service", name: "AI Strategy" }],
83
+ relations: [],
84
+ existing_content: [],
85
+ business_context: "We are a digital agency focused on AI strategy consulting.",
86
+ },
87
+ mockProvider,
88
+ budget,
89
+ );
90
+ expect(result.gaps).toHaveLength(1);
91
+ expect(result.gaps[0].priority).toBe("high");
92
+ });
93
+ });
@@ -0,0 +1,92 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { generate } from "../functions/generate.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+
9
+ const mockProvider: ResolvedProvider = {
10
+ provider: "anthropic",
11
+ model: "claude-sonnet-4-5-20250514",
12
+ };
13
+
14
+ describe("generate", () => {
15
+ it("generates content from knowledge context", async () => {
16
+ vi.mocked(generateObject).mockResolvedValueOnce({
17
+ object: {
18
+ frontmatter: { title: "Acme Corp Case Study", client: "acme-corp" },
19
+ body: "# Acme Corp\n\nWe migrated 12 services in 8 weeks.",
20
+ },
21
+ usage: { promptTokens: 3000, completionTokens: 500 },
22
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
23
+ } as any);
24
+
25
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
26
+ const result = await generate(
27
+ {
28
+ content_type: "case-study",
29
+ generation_prompt: "Write a compelling case study with specific metrics.",
30
+ knowledge_context:
31
+ "Acme Corp migrated 12 microservices to Next.js in 8 weeks. Load time improved by 40%.",
32
+ },
33
+ mockProvider,
34
+ budget,
35
+ );
36
+
37
+ expect(result.content).toContain("Acme Corp");
38
+ expect(result.frontmatter.title).toBe("Acme Corp Case Study");
39
+ expect(result.usage.function_name).toBe("generate");
40
+ });
41
+
42
+ it("includes intent in generation when provided", async () => {
43
+ vi.mocked(generateObject).mockResolvedValueOnce({
44
+ object: {
45
+ frontmatter: { title: "Test" },
46
+ body: "Vi hjälpte kunden att migrera.",
47
+ },
48
+ usage: { promptTokens: 3500, completionTokens: 400 },
49
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
50
+ } as any);
51
+
52
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
53
+ const result = await generate(
54
+ {
55
+ content_type: "case-study",
56
+ generation_prompt: "Write a case study.",
57
+ knowledge_context: "Acme Corp project data.",
58
+ intent: 'Always use "vi" form. No buzzwords.',
59
+ },
60
+ mockProvider,
61
+ budget,
62
+ );
63
+
64
+ expect(result.content).toBeTruthy();
65
+ expect(result.usage.function_name).toBe("generate");
66
+ });
67
+
68
+ it("tracks budget usage", async () => {
69
+ vi.mocked(generateObject).mockResolvedValueOnce({
70
+ object: {
71
+ frontmatter: { title: "Test" },
72
+ body: "Content body.",
73
+ },
74
+ usage: { promptTokens: 2000, completionTokens: 300 },
75
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
76
+ } as any);
77
+
78
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
79
+ await generate(
80
+ {
81
+ content_type: "blog-post",
82
+ generation_prompt: "Write a blog post.",
83
+ knowledge_context: "Blog knowledge.",
84
+ },
85
+ mockProvider,
86
+ budget,
87
+ );
88
+
89
+ const status = budget.getStatus();
90
+ expect(status.spent_today_usd).toBeGreaterThan(0);
91
+ });
92
+ });