@sourcepress/ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/.omc/state/last-tool-error.json +7 -0
  2. package/.turbo/turbo-build.log +4 -0
  3. package/.turbo/turbo-test.log +24 -0
  4. package/LICENSE +21 -0
  5. package/dist/__tests__/budget.test.d.ts +2 -0
  6. package/dist/__tests__/budget.test.d.ts.map +1 -0
  7. package/dist/__tests__/budget.test.js +96 -0
  8. package/dist/__tests__/budget.test.js.map +1 -0
  9. package/dist/__tests__/classify.test.d.ts +2 -0
  10. package/dist/__tests__/classify.test.d.ts.map +1 -0
  11. package/dist/__tests__/classify.test.js +72 -0
  12. package/dist/__tests__/classify.test.js.map +1 -0
  13. package/dist/__tests__/eval-runner.test.d.ts +2 -0
  14. package/dist/__tests__/eval-runner.test.d.ts.map +1 -0
  15. package/dist/__tests__/eval-runner.test.js +171 -0
  16. package/dist/__tests__/eval-runner.test.js.map +1 -0
  17. package/dist/__tests__/extract.test.d.ts +2 -0
  18. package/dist/__tests__/extract.test.d.ts.map +1 -0
  19. package/dist/__tests__/extract.test.js +79 -0
  20. package/dist/__tests__/extract.test.js.map +1 -0
  21. package/dist/__tests__/find-gaps.test.d.ts +2 -0
  22. package/dist/__tests__/find-gaps.test.d.ts.map +1 -0
  23. package/dist/__tests__/find-gaps.test.js +82 -0
  24. package/dist/__tests__/find-gaps.test.js.map +1 -0
  25. package/dist/__tests__/generate.test.d.ts +2 -0
  26. package/dist/__tests__/generate.test.d.ts.map +1 -0
  27. package/dist/__tests__/generate.test.js +68 -0
  28. package/dist/__tests__/generate.test.js.map +1 -0
  29. package/dist/__tests__/improve-prompt.test.d.ts +2 -0
  30. package/dist/__tests__/improve-prompt.test.d.ts.map +1 -0
  31. package/dist/__tests__/improve-prompt.test.js +32 -0
  32. package/dist/__tests__/improve-prompt.test.js.map +1 -0
  33. package/dist/__tests__/intent-impact.test.d.ts +2 -0
  34. package/dist/__tests__/intent-impact.test.d.ts.map +1 -0
  35. package/dist/__tests__/intent-impact.test.js +51 -0
  36. package/dist/__tests__/intent-impact.test.js.map +1 -0
  37. package/dist/__tests__/judge.test.d.ts +2 -0
  38. package/dist/__tests__/judge.test.d.ts.map +1 -0
  39. package/dist/__tests__/judge.test.js +61 -0
  40. package/dist/__tests__/judge.test.js.map +1 -0
  41. package/dist/__tests__/score.test.d.ts +2 -0
  42. package/dist/__tests__/score.test.d.ts.map +1 -0
  43. package/dist/__tests__/score.test.js +50 -0
  44. package/dist/__tests__/score.test.js.map +1 -0
  45. package/dist/__tests__/staleness.test.d.ts +2 -0
  46. package/dist/__tests__/staleness.test.d.ts.map +1 -0
  47. package/dist/__tests__/staleness.test.js +66 -0
  48. package/dist/__tests__/staleness.test.js.map +1 -0
  49. package/dist/budget.d.ts +13 -0
  50. package/dist/budget.d.ts.map +1 -0
  51. package/dist/budget.js +40 -0
  52. package/dist/budget.js.map +1 -0
  53. package/dist/eval/runner.d.ts +34 -0
  54. package/dist/eval/runner.d.ts.map +1 -0
  55. package/dist/eval/runner.js +128 -0
  56. package/dist/eval/runner.js.map +1 -0
  57. package/dist/functions/classify.d.ts +5 -0
  58. package/dist/functions/classify.d.ts.map +1 -0
  59. package/dist/functions/classify.js +43 -0
  60. package/dist/functions/classify.js.map +1 -0
  61. package/dist/functions/extract.d.ts +5 -0
  62. package/dist/functions/extract.d.ts.map +1 -0
  63. package/dist/functions/extract.js +57 -0
  64. package/dist/functions/extract.js.map +1 -0
  65. package/dist/functions/find-gaps.d.ts +5 -0
  66. package/dist/functions/find-gaps.d.ts.map +1 -0
  67. package/dist/functions/find-gaps.js +51 -0
  68. package/dist/functions/find-gaps.js.map +1 -0
  69. package/dist/functions/generate.d.ts +5 -0
  70. package/dist/functions/generate.d.ts.map +1 -0
  71. package/dist/functions/generate.js +39 -0
  72. package/dist/functions/generate.js.map +1 -0
  73. package/dist/functions/improve-prompt.d.ts +5 -0
  74. package/dist/functions/improve-prompt.d.ts.map +1 -0
  75. package/dist/functions/improve-prompt.js +38 -0
  76. package/dist/functions/improve-prompt.js.map +1 -0
  77. package/dist/functions/index.d.ts +11 -0
  78. package/dist/functions/index.d.ts.map +1 -0
  79. package/dist/functions/index.js +11 -0
  80. package/dist/functions/index.js.map +1 -0
  81. package/dist/functions/intent-impact.d.ts +5 -0
  82. package/dist/functions/intent-impact.d.ts.map +1 -0
  83. package/dist/functions/intent-impact.js +45 -0
  84. package/dist/functions/intent-impact.js.map +1 -0
  85. package/dist/functions/judge.d.ts +5 -0
  86. package/dist/functions/judge.d.ts.map +1 -0
  87. package/dist/functions/judge.js +32 -0
  88. package/dist/functions/judge.js.map +1 -0
  89. package/dist/functions/model-factory.d.ts +4 -0
  90. package/dist/functions/model-factory.d.ts.map +1 -0
  91. package/dist/functions/model-factory.js +52 -0
  92. package/dist/functions/model-factory.js.map +1 -0
  93. package/dist/functions/score.d.ts +5 -0
  94. package/dist/functions/score.d.ts.map +1 -0
  95. package/dist/functions/score.js +47 -0
  96. package/dist/functions/score.js.map +1 -0
  97. package/dist/functions/staleness.d.ts +5 -0
  98. package/dist/functions/staleness.d.ts.map +1 -0
  99. package/dist/functions/staleness.js +45 -0
  100. package/dist/functions/staleness.js.map +1 -0
  101. package/dist/functions/usage.d.ts +8 -0
  102. package/dist/functions/usage.d.ts.map +1 -0
  103. package/dist/functions/usage.js +13 -0
  104. package/dist/functions/usage.js.map +1 -0
  105. package/dist/index.d.ts +8 -0
  106. package/dist/index.d.ts.map +1 -0
  107. package/dist/index.js +6 -0
  108. package/dist/index.js.map +1 -0
  109. package/dist/provider.d.ts +10 -0
  110. package/dist/provider.d.ts.map +1 -0
  111. package/dist/provider.js +32 -0
  112. package/dist/provider.js.map +1 -0
  113. package/dist/types.d.ts +207 -0
  114. package/dist/types.d.ts.map +1 -0
  115. package/dist/types.js +2 -0
  116. package/dist/types.js.map +1 -0
  117. package/package.json +41 -0
  118. package/src/__tests__/budget.test.ts +103 -0
  119. package/src/__tests__/classify.test.ts +90 -0
  120. package/src/__tests__/eval-runner.test.ts +199 -0
  121. package/src/__tests__/extract.test.ts +92 -0
  122. package/src/__tests__/find-gaps.test.ts +93 -0
  123. package/src/__tests__/generate.test.ts +92 -0
  124. package/src/__tests__/improve-prompt.test.ts +42 -0
  125. package/src/__tests__/intent-impact.test.ts +62 -0
  126. package/src/__tests__/judge.test.ts +78 -0
  127. package/src/__tests__/score.test.ts +61 -0
  128. package/src/__tests__/staleness.test.ts +77 -0
  129. package/src/budget.ts +47 -0
  130. package/src/eval/runner.ts +163 -0
  131. package/src/functions/classify.ts +54 -0
  132. package/src/functions/extract.ts +72 -0
  133. package/src/functions/find-gaps.ts +65 -0
  134. package/src/functions/generate.ts +51 -0
  135. package/src/functions/improve-prompt.ts +48 -0
  136. package/src/functions/index.ts +10 -0
  137. package/src/functions/intent-impact.ts +56 -0
  138. package/src/functions/judge.ts +41 -0
  139. package/src/functions/model-factory.ts +60 -0
  140. package/src/functions/score.ts +56 -0
  141. package/src/functions/staleness.ts +54 -0
  142. package/src/functions/usage.ts +25 -0
  143. package/src/index.ts +47 -0
  144. package/src/provider.ts +41 -0
  145. package/src/types.ts +225 -0
  146. package/tsconfig.json +5 -0
  147. package/vitest.config.ts +2 -0
@@ -0,0 +1,42 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { improvePrompt } from "../functions/improve-prompt.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+
9
+ const mockProvider: ResolvedProvider = {
10
+ provider: "anthropic",
11
+ model: "claude-sonnet-4-5-20250514",
12
+ };
13
+
14
+ describe("improvePrompt", () => {
15
+ it("improves a generation prompt based on judge feedback", async () => {
16
+ vi.mocked(generateObject).mockResolvedValueOnce({
17
+ object: {
18
+ improved_prompt:
19
+ "Write a case study with specific metrics, client quotes, and a clear before/after comparison.",
20
+ changes_summary: "Added requirements for metrics and quotes based on judge feedback.",
21
+ },
22
+ usage: { promptTokens: 1500, completionTokens: 300 },
23
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
24
+ } as any);
25
+
26
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
27
+ const result = await improvePrompt(
28
+ {
29
+ current_prompt: "Write a case study.",
30
+ judge_reasoning: "Draft is generic, lacks specific metrics and client quotes.",
31
+ judge_score: 45,
32
+ content_type: "case-study",
33
+ },
34
+ mockProvider,
35
+ budget,
36
+ );
37
+
38
+ expect(result.improved_prompt).toContain("metrics");
39
+ expect(result.changes_summary).toBeTruthy();
40
+ expect(result.usage.function_name).toBe("improvePrompt");
41
+ });
42
+ });
@@ -0,0 +1,62 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { analyzeIntentImpact } from "../functions/intent-impact.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+ const mockProvider: ResolvedProvider = {
9
+ provider: "anthropic",
10
+ model: "claude-sonnet-4-5-20250514",
11
+ };
12
+
13
+ describe("analyzeIntentImpact", () => {
14
+ it("identifies pages affected by intent change", async () => {
15
+ vi.mocked(generateObject).mockResolvedValueOnce({
16
+ object: {
17
+ affected_pages: [
18
+ { path: "content/cases/acme.mdx", impact: "high", reason: "Uses forbidden word" },
19
+ { path: "content/posts/tech-update.mdx", impact: "low", reason: "Minor tone adjustment" },
20
+ ],
21
+ summary: "2 pages affected",
22
+ },
23
+ usage: { promptTokens: 3000, completionTokens: 200 },
24
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
25
+ } as any);
26
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
27
+ const result = await analyzeIntentImpact(
28
+ {
29
+ changed_intent: 'Forbidden words: "revolutionerande", "synergier"',
30
+ previous_intent: 'Forbidden words: "synergier"',
31
+ content_pages: [
32
+ { path: "content/cases/acme.mdx", text: "En revolutionerande migration till Next.js." },
33
+ { path: "content/posts/tech-update.mdx", text: "Vi uppdaterade vår tech-stack." },
34
+ ],
35
+ },
36
+ mockProvider,
37
+ budget,
38
+ );
39
+ expect(result.affected_pages).toHaveLength(2);
40
+ expect(result.affected_pages[0].impact).toBe("high");
41
+ expect(result.usage.function_name).toBe("analyzeIntentImpact");
42
+ });
43
+
44
+ it("returns empty when no pages affected", async () => {
45
+ vi.mocked(generateObject).mockResolvedValueOnce({
46
+ object: { affected_pages: [], summary: "No pages affected." },
47
+ usage: { promptTokens: 2000, completionTokens: 80 },
48
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
49
+ } as any);
50
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
51
+ const result = await analyzeIntentImpact(
52
+ {
53
+ changed_intent: "Use we-form.",
54
+ previous_intent: "Use we-form.",
55
+ content_pages: [{ path: "content/about.mdx", text: "Vi hjälper företag att växa." }],
56
+ },
57
+ mockProvider,
58
+ budget,
59
+ );
60
+ expect(result.affected_pages).toHaveLength(0);
61
+ });
62
+ });
@@ -0,0 +1,78 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { judge } from "../functions/judge.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+ const mockProvider: ResolvedProvider = {
9
+ provider: "anthropic",
10
+ model: "claude-sonnet-4-5-20250514",
11
+ };
12
+
13
+ describe("judge", () => {
14
+ it("judges a draft against gold standard", async () => {
15
+ vi.mocked(generateObject).mockResolvedValueOnce({
16
+ object: {
17
+ score: 82,
18
+ reasoning: "Draft matches gold standard structure. Tone is correct. Missing one metric.",
19
+ },
20
+ usage: { promptTokens: 2000, completionTokens: 200 },
21
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
22
+ } as any);
23
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
24
+ const result = await judge(
25
+ {
26
+ draft: "# Acme Case Study\n\nWe helped Acme migrate.",
27
+ gold_standard:
28
+ "# Perfect Case Study\n\nWe migrated 12 services in 8 weeks, reducing load time by 40%.",
29
+ judge_prompt: "Score 0-100.",
30
+ },
31
+ mockProvider,
32
+ budget,
33
+ );
34
+ expect(result.score).toBe(82);
35
+ expect(result.reasoning).toBeTruthy();
36
+ expect(result.usage.function_name).toBe("judge");
37
+ });
38
+
39
+ it("gives low score for poor match", async () => {
40
+ vi.mocked(generateObject).mockResolvedValueOnce({
41
+ object: { score: 35, reasoning: "Draft is generic, lacks specifics, wrong tone." },
42
+ usage: { promptTokens: 1500, completionTokens: 100 },
43
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
44
+ } as any);
45
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
46
+ const result = await judge(
47
+ {
48
+ draft: "We are a great company.",
49
+ gold_standard: "# Perfect Case Study\n\nDetailed metrics.",
50
+ judge_prompt: "Score 0-100.",
51
+ },
52
+ mockProvider,
53
+ budget,
54
+ );
55
+ expect(result.score).toBeLessThan(50);
56
+ });
57
+
58
+ it("includes intent in judgment when provided", async () => {
59
+ vi.mocked(generateObject).mockResolvedValueOnce({
60
+ object: { score: 70, reasoning: "Matches intent tone but not all structural requirements." },
61
+ usage: { promptTokens: 2500, completionTokens: 180 },
62
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
63
+ } as any);
64
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
65
+ const result = await judge(
66
+ {
67
+ draft: "Case study draft.",
68
+ gold_standard: "Perfect case study.",
69
+ judge_prompt: "Score 0-100.",
70
+ intent: 'Always use "we" form. No buzzwords.',
71
+ },
72
+ mockProvider,
73
+ budget,
74
+ );
75
+ expect(result.score).toBe(70);
76
+ expect(result.usage.function_name).toBe("judge");
77
+ });
78
+ });
@@ -0,0 +1,61 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { score } from "../functions/score.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+ const mockProvider: ResolvedProvider = {
9
+ provider: "anthropic",
10
+ model: "claude-sonnet-4-5-20250514",
11
+ };
12
+
13
+ describe("score", () => {
14
+ it("scores content against intent", async () => {
15
+ vi.mocked(generateObject).mockResolvedValueOnce({
16
+ object: {
17
+ score: 74,
18
+ issues: ["Generic heading", "Missing specific results"],
19
+ strengths: ["Correct tone of voice", "Good structure"],
20
+ },
21
+ usage: { promptTokens: 1000, completionTokens: 150 },
22
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
23
+ } as any);
24
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
25
+ const result = await score(
26
+ {
27
+ content: "# Acme Corp Case Study\n\nWe helped Acme Corp modernize their tech stack.",
28
+ intent: "Use concrete results with numbers.",
29
+ },
30
+ mockProvider,
31
+ budget,
32
+ );
33
+ expect(result.score).toBe(74);
34
+ expect(result.issues).toContain("Generic heading");
35
+ expect(result.strengths.length).toBeGreaterThan(0);
36
+ expect(result.usage.function_name).toBe("score");
37
+ });
38
+
39
+ it("returns high score for well-matched content", async () => {
40
+ vi.mocked(generateObject).mockResolvedValueOnce({
41
+ object: {
42
+ score: 92,
43
+ issues: [],
44
+ strengths: ["Specific metrics", "Perfect tone", "Clear CTA"],
45
+ },
46
+ usage: { promptTokens: 800, completionTokens: 100 },
47
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
48
+ } as any);
49
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
50
+ const result = await score(
51
+ {
52
+ content: "# Acme Corp: 40% Faster with Next.js\n\nWe migrated 12 services in 8 weeks.",
53
+ intent: "Use concrete results with numbers.",
54
+ },
55
+ mockProvider,
56
+ budget,
57
+ );
58
+ expect(result.score).toBeGreaterThanOrEqual(90);
59
+ expect(result.issues).toHaveLength(0);
60
+ });
61
+ });
@@ -0,0 +1,77 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BudgetTracker } from "../budget.js";
3
+ import { detectStaleness } from "../functions/staleness.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+
6
+ vi.mock("ai", () => ({ generateObject: vi.fn() }));
7
+ import { generateObject } from "ai";
8
+ const mockProvider: ResolvedProvider = {
9
+ provider: "anthropic",
10
+ model: "claude-sonnet-4-5-20250514",
11
+ };
12
+
13
+ describe("detectStaleness", () => {
14
+ it("detects stale content when sources have changed", async () => {
15
+ vi.mocked(generateObject).mockResolvedValueOnce({
16
+ object: {
17
+ stale: true,
18
+ reason: "Source file has new information about project completion.",
19
+ stale_sources: ["knowledge/clients/acme.md"],
20
+ confidence: 0.92,
21
+ },
22
+ usage: { promptTokens: 3000, completionTokens: 150 },
23
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
24
+ } as any);
25
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
26
+ const result = await detectStaleness(
27
+ {
28
+ content: "Acme Corp is currently migrating to Next.js.",
29
+ content_generated_at: "2026-03-01T10:00:00Z",
30
+ source_texts: [
31
+ {
32
+ path: "knowledge/clients/acme.md",
33
+ text: "Acme Corp completed their Next.js migration on 2026-03-15.",
34
+ updated_at: "2026-03-16T10:00:00Z",
35
+ },
36
+ ],
37
+ },
38
+ mockProvider,
39
+ budget,
40
+ );
41
+ expect(result.stale).toBe(true);
42
+ expect(result.stale_sources).toContain("knowledge/clients/acme.md");
43
+ expect(result.confidence).toBeGreaterThan(0.8);
44
+ expect(result.usage.function_name).toBe("detectStaleness");
45
+ });
46
+
47
+ it("reports fresh content when no meaningful changes", async () => {
48
+ vi.mocked(generateObject).mockResolvedValueOnce({
49
+ object: {
50
+ stale: false,
51
+ reason: "Source changes are minor formatting edits.",
52
+ stale_sources: [],
53
+ confidence: 0.88,
54
+ },
55
+ usage: { promptTokens: 2500, completionTokens: 100 },
56
+ // biome-ignore lint/suspicious/noExplicitAny: partial mock of generateObject return type
57
+ } as any);
58
+ const budget = new BudgetTracker({ daily_limit_usd: 5.0 });
59
+ const result = await detectStaleness(
60
+ {
61
+ content: "Acme Corp case study content.",
62
+ content_generated_at: "2026-04-01T10:00:00Z",
63
+ source_texts: [
64
+ {
65
+ path: "knowledge/clients/acme.md",
66
+ text: "Acme Corp case study notes (reformatted).",
67
+ updated_at: "2026-04-02T10:00:00Z",
68
+ },
69
+ ],
70
+ },
71
+ mockProvider,
72
+ budget,
73
+ );
74
+ expect(result.stale).toBe(false);
75
+ expect(result.stale_sources).toHaveLength(0);
76
+ });
77
+ });
package/src/budget.ts ADDED
@@ -0,0 +1,47 @@
1
+ import type { BudgetConfig, BudgetStatus, TokenUsage } from "./types.js";
2
+
3
+ export class BudgetTracker {
4
+ private config: BudgetConfig;
5
+ private usage: TokenUsage[] = [];
6
+ private dailySpent = 0;
7
+
8
+ constructor(config: BudgetConfig) {
9
+ this.config = config;
10
+ }
11
+
12
+ record(usage: TokenUsage): void {
13
+ this.usage.push(usage);
14
+ this.dailySpent += usage.estimated_cost_usd;
15
+ }
16
+
17
+ getStatus(): BudgetStatus {
18
+ const remaining = Math.max(0, this.config.daily_limit_usd - this.dailySpent);
19
+ const warnAt = this.config.warn_at_usd ?? this.config.daily_limit_usd * 0.8;
20
+ const now = new Date();
21
+ const resetAt = new Date(now);
22
+ resetAt.setUTCDate(resetAt.getUTCDate() + 1);
23
+ resetAt.setUTCHours(0, 0, 0, 0);
24
+ return {
25
+ spent_today_usd: Math.round(this.dailySpent * 1000) / 1000,
26
+ limit_usd: this.config.daily_limit_usd,
27
+ warn_at_usd: warnAt,
28
+ remaining_usd: Math.round(remaining * 1000) / 1000,
29
+ is_over_limit: this.dailySpent >= this.config.daily_limit_usd,
30
+ is_warned: this.dailySpent >= warnAt,
31
+ reset_at: resetAt.toISOString(),
32
+ };
33
+ }
34
+
35
+ canSpend(estimated_cost_usd: number): boolean {
36
+ return this.dailySpent + estimated_cost_usd <= this.config.daily_limit_usd;
37
+ }
38
+
39
+ getHistory(): TokenUsage[] {
40
+ return [...this.usage];
41
+ }
42
+
43
+ resetDaily(): void {
44
+ this.usage = [];
45
+ this.dailySpent = 0;
46
+ }
47
+ }
@@ -0,0 +1,163 @@
1
+ import type { BudgetTracker } from "../budget.js";
2
+ import { generate } from "../functions/generate.js";
3
+ import { improvePrompt } from "../functions/improve-prompt.js";
4
+ import { judge } from "../functions/judge.js";
5
+ import type { ResolvedProvider } from "../provider.js";
6
+ import type { EvalResult, EvalRunConfig, EvalRunResult, TokenUsage } from "../types.js";
7
+
8
+ function mergeUsage(a: TokenUsage, b: TokenUsage): TokenUsage {
9
+ return {
10
+ input_tokens: a.input_tokens + b.input_tokens,
11
+ output_tokens: a.output_tokens + b.output_tokens,
12
+ estimated_cost_usd: a.estimated_cost_usd + b.estimated_cost_usd,
13
+ function_name: "eval-run",
14
+ timestamp: new Date().toISOString(),
15
+ };
16
+ }
17
+
18
+ function emptyUsage(): TokenUsage {
19
+ return {
20
+ input_tokens: 0,
21
+ output_tokens: 0,
22
+ estimated_cost_usd: 0,
23
+ function_name: "eval-run",
24
+ timestamp: new Date().toISOString(),
25
+ };
26
+ }
27
+
28
+ export class EvalRunner {
29
+ private provider: ResolvedProvider;
30
+ private budget: BudgetTracker;
31
+
32
+ constructor(provider: ResolvedProvider, budget: BudgetTracker) {
33
+ this.provider = provider;
34
+ this.budget = budget;
35
+ }
36
+
37
+ /**
38
+ * Run the generate-judge-decide-improve loop.
39
+ *
40
+ * 1. GENERATE — AI creates content with current prompt
41
+ * 2. JUDGE — Locked judge-prompt scores 0-100
42
+ * 3. DECIDE — Score >= threshold? Keep. Otherwise discard.
43
+ * 4. IMPROVE — If discard: agent adjusts generation prompt
44
+ * 5. LOG — Result recorded
45
+ * 6. REPEAT — Until threshold met or max iterations
46
+ */
47
+ async run(config: EvalRunConfig): Promise<EvalRunResult> {
48
+ const iterations: EvalResult[] = [];
49
+ let currentPrompt = config.generation_prompt;
50
+ let totalUsage = emptyUsage();
51
+ let finalContent: string | undefined;
52
+ let finalScore = 0;
53
+ let finalStatus: "keep" | "discard" = "discard";
54
+ let promptImproved = false;
55
+
56
+ for (let i = 0; i < config.max_iterations; i++) {
57
+ // 1. GENERATE
58
+ const generated = await generate(
59
+ {
60
+ content_type: config.content_type,
61
+ generation_prompt: currentPrompt,
62
+ knowledge_context: config.knowledge_context,
63
+ intent: config.intent,
64
+ collection_schema: config.collection_schema,
65
+ },
66
+ this.provider,
67
+ this.budget,
68
+ );
69
+ totalUsage = mergeUsage(totalUsage, generated.usage);
70
+
71
+ // Compose the draft as frontmatter + body for judging
72
+ const draft = `---\n${Object.entries(generated.frontmatter)
73
+ .map(([k, v]) => `${k}: ${JSON.stringify(v)}`)
74
+ .join("\n")}\n---\n\n${generated.content}`;
75
+
76
+ // 2. JUDGE
77
+ const judged = await judge(
78
+ {
79
+ draft,
80
+ gold_standard: config.gold_standard,
81
+ judge_prompt: config.judge_prompt,
82
+ intent: config.intent,
83
+ },
84
+ this.provider,
85
+ this.budget,
86
+ );
87
+ totalUsage = mergeUsage(totalUsage, judged.usage);
88
+
89
+ // 3. DECIDE
90
+ const status: EvalResult["status"] = judged.score >= config.threshold ? "keep" : "discard";
91
+
92
+ const evalResult: EvalResult = {
93
+ id: `eval-${config.content_type}-${Date.now()}-${i}`,
94
+ content_type: config.content_type,
95
+ prompt_version: `v${i + 1}`,
96
+ score: judged.score,
97
+ status,
98
+ reasoning: judged.reasoning,
99
+ iteration: i + 1,
100
+ generated_content: draft,
101
+ timestamp: new Date().toISOString(),
102
+ total_usage: { ...totalUsage },
103
+ };
104
+
105
+ iterations.push(evalResult);
106
+ finalScore = judged.score;
107
+ finalContent = draft;
108
+
109
+ if (status === "keep") {
110
+ finalStatus = "keep";
111
+ break;
112
+ }
113
+
114
+ // 4. IMPROVE — adjust generation prompt (not on last iteration)
115
+ if (i < config.max_iterations - 1) {
116
+ const improved = await improvePrompt(
117
+ {
118
+ current_prompt: currentPrompt,
119
+ judge_reasoning: judged.reasoning,
120
+ judge_score: judged.score,
121
+ content_type: config.content_type,
122
+ },
123
+ this.provider,
124
+ this.budget,
125
+ );
126
+ totalUsage = mergeUsage(totalUsage, improved.usage);
127
+ currentPrompt = improved.improved_prompt;
128
+ promptImproved = true;
129
+
130
+ // Mark previous iteration as improved
131
+ evalResult.status = "improved";
132
+ }
133
+ }
134
+
135
+ return {
136
+ iterations,
137
+ final_score: finalScore,
138
+ final_status: finalStatus,
139
+ final_content: finalStatus === "keep" ? finalContent : undefined,
140
+ final_prompt: currentPrompt,
141
+ prompt_improved: promptImproved,
142
+ total_usage: totalUsage,
143
+ };
144
+ }
145
+
146
+ /**
147
+ * Single judge pass — score existing content without the improve loop.
148
+ * Used for quality scoring of already-published content.
149
+ */
150
+ async judgeOnly(input: {
151
+ draft: string;
152
+ gold_standard: string;
153
+ judge_prompt: string;
154
+ intent?: string;
155
+ }): Promise<{ score: number; reasoning: string; usage: TokenUsage }> {
156
+ const result = await judge(input, this.provider, this.budget);
157
+ return {
158
+ score: result.score,
159
+ reasoning: result.reasoning,
160
+ usage: result.usage,
161
+ };
162
+ }
163
+ }
@@ -0,0 +1,54 @@
1
+ import { generateObject } from "ai";
2
+ import { z } from "zod";
3
+ import type { BudgetTracker } from "../budget.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+ import type { ClassifyInput, ClassifyResult } from "../types.js";
6
+ import { createModel } from "./model-factory.js";
7
+ import { recordUsage } from "./usage.js";
8
+
9
+ const classifySchema = z.object({
10
+ quality: z.enum(["structured", "draft", "thoughts"]),
11
+ quality_score: z.number().min(1).max(10),
12
+ type: z.string(),
13
+ reasoning: z.string(),
14
+ });
15
+
16
+ export async function classify(
17
+ input: ClassifyInput,
18
+ provider: ResolvedProvider,
19
+ budget: BudgetTracker,
20
+ ): Promise<ClassifyResult> {
21
+ const typeConstraint = input.available_types
22
+ ? `Choose the type from these options: ${input.available_types.join(", ")}`
23
+ : "Infer the most appropriate type (e.g. project-notes, meeting-notes, transcript, brainstorm, research, documentation)";
24
+
25
+ const { object, usage } = await generateObject({
26
+ model: await createModel(provider),
27
+ schema: classifySchema,
28
+ prompt: `You are a knowledge classifier for a content engine. Analyze the following text and classify it.
29
+
30
+ Quality levels:
31
+ - "structured": Well-organized, has clear sections, headings, or data points. Score 7-10.
32
+ - "draft": Has useful information but needs editing, incomplete. Score 4-6.
33
+ - "thoughts": Raw brainstorm, bullet points, rough ideas. Score 1-3.
34
+
35
+ ${typeConstraint}
36
+
37
+ Text to classify:
38
+ ---
39
+ ${input.text}
40
+ ---
41
+
42
+ Respond with quality, quality_score (1-10), type, and reasoning.`,
43
+ });
44
+
45
+ const tokenUsage = recordUsage(budget, provider, usage, "classify");
46
+
47
+ return {
48
+ quality: object.quality,
49
+ quality_score: object.quality_score,
50
+ type: object.type,
51
+ reasoning: object.reasoning,
52
+ usage: tokenUsage,
53
+ };
54
+ }
@@ -0,0 +1,72 @@
1
+ import { generateObject } from "ai";
2
+ import { z } from "zod";
3
+ import type { BudgetTracker } from "../budget.js";
4
+ import type { ResolvedProvider } from "../provider.js";
5
+ import type { ExtractInput, ExtractResult } from "../types.js";
6
+ import { createModel } from "./model-factory.js";
7
+ import { recordUsage } from "./usage.js";
8
+
9
+ const extractSchema = z.object({
10
+ entities: z.array(
11
+ z.object({
12
+ type: z.string(),
13
+ name: z.string(),
14
+ aliases: z.array(z.string()),
15
+ confidence: z.number().min(0).max(1),
16
+ }),
17
+ ),
18
+ relations: z.array(
19
+ z.object({
20
+ from_entity: z.string(),
21
+ to_entity: z.string(),
22
+ relation_type: z.string(),
23
+ confidence: z.number().min(0).max(1),
24
+ evidence: z.string(),
25
+ }),
26
+ ),
27
+ });
28
+
29
+ export async function extract(
30
+ input: ExtractInput,
31
+ provider: ResolvedProvider,
32
+ budget: BudgetTracker,
33
+ ): Promise<ExtractResult> {
34
+ const existingContext = input.existing_entities?.length
35
+ ? `\nExisting entities in the knowledge graph (you may create relations to these):\n${input.existing_entities.map((e) => `- ${e.name} (${e.type})`).join("\n")}`
36
+ : "";
37
+
38
+ const { object, usage } = await generateObject({
39
+ model: await createModel(provider),
40
+ schema: extractSchema,
41
+ maxTokens: 16384,
42
+ prompt: `You are an entity and relation extractor for a knowledge graph. Extract the most important named entities and their relationships from the following text.
43
+
44
+ Focus on concrete, named entities — not generic concepts. Limit to the 30 most significant entities.
45
+
46
+ Entity types to look for: client, technology, service, person, organization, concept, project, location, event.
47
+
48
+ For each entity provide:
49
+ - type: the entity category
50
+ - name: the canonical name
51
+ - aliases: alternative names or abbreviations
52
+ - confidence: 0.0-1.0 how confident you are this is a real entity
53
+
54
+ For each relation provide:
55
+ - from_entity / to_entity: entity names (must match extracted entities or existing ones)
56
+ - relation_type: e.g. "uses", "built_on", "employs", "located_in", "needs", "provides"
57
+ - confidence: 0.0-1.0
58
+ - evidence: the text snippet supporting this relation
59
+ ${existingContext}
60
+
61
+ Source file: ${input.file_path}
62
+
63
+ Text to analyze:
64
+ ---
65
+ ${input.text}
66
+ ---`,
67
+ });
68
+
69
+ const tokenUsage = recordUsage(budget, provider, usage, "extract");
70
+
71
+ return { entities: object.entities, relations: object.relations, usage: tokenUsage };
72
+ }