@indexnetwork/protocol 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assert that `output` satisfies the given `criteria` according to an LLM judge.
|
|
3
|
+
* Throws an error (with reasoning embedded) if the assertion fails.
|
|
4
|
+
* Uses the SMARTEST_VERIFIER_MODEL env var (default: google/gemini-2.5-flash).
|
|
5
|
+
*
|
|
6
|
+
* @param output - The value produced by the system under test.
|
|
7
|
+
* @param criteria - Natural language description of what the output must satisfy.
|
|
8
|
+
* @throws {Error} If the LLM judge determines the output does not meet the criteria.
|
|
9
|
+
*/
|
|
10
|
+
export declare function assertLLM(output: unknown, criteria: string): Promise<void>;
|
|
11
|
+
//# sourceMappingURL=llm-assert.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-assert.d.ts","sourceRoot":"","sources":["../../../src/support/tests/llm-assert.ts"],"names":[],"mappings":"AAeA;;;;;;;;GAQG;AACH,wBAAsB,SAAS,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAyBhF"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { ChatOpenAI } from "@langchain/openai";
|
|
2
|
+
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
const JUDGE_SYSTEM_PROMPT = `You are a test oracle for an AI system. Given the output of a system under test and evaluation criteria, determine whether the output passes or fails.
|
|
5
|
+
|
|
6
|
+
Return JSON with two fields:
|
|
7
|
+
- pass: true if the output satisfies the criteria, false otherwise
|
|
8
|
+
- reasoning: concise explanation of your judgment (1-3 sentences)`;
|
|
9
|
+
const judgeOutputSchema = z.object({
|
|
10
|
+
pass: z.boolean(),
|
|
11
|
+
reasoning: z.string(),
|
|
12
|
+
});
|
|
13
|
+
/**
|
|
14
|
+
* Assert that `output` satisfies the given `criteria` according to an LLM judge.
|
|
15
|
+
* Throws an error (with reasoning embedded) if the assertion fails.
|
|
16
|
+
* Uses the SMARTEST_VERIFIER_MODEL env var (default: google/gemini-2.5-flash).
|
|
17
|
+
*
|
|
18
|
+
* @param output - The value produced by the system under test.
|
|
19
|
+
* @param criteria - Natural language description of what the output must satisfy.
|
|
20
|
+
* @throws {Error} If the LLM judge determines the output does not meet the criteria.
|
|
21
|
+
*/
|
|
22
|
+
export async function assertLLM(output, criteria) {
|
|
23
|
+
const modelId = process.env.SMARTEST_VERIFIER_MODEL ?? "google/gemini-2.5-flash";
|
|
24
|
+
const model = new ChatOpenAI({
|
|
25
|
+
model: modelId,
|
|
26
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
27
|
+
configuration: {
|
|
28
|
+
baseURL: process.env.OPENROUTER_BASE_URL ?? "https://openrouter.ai/api/v1",
|
|
29
|
+
},
|
|
30
|
+
temperature: 0,
|
|
31
|
+
maxTokens: 512,
|
|
32
|
+
});
|
|
33
|
+
const structured = model.withStructuredOutput(judgeOutputSchema, { name: "llm_judge" });
|
|
34
|
+
const userMessage = `Output:\n${JSON.stringify(output, null, 2)}\n\nCriteria:\n${criteria}`;
|
|
35
|
+
const result = await structured.invoke([
|
|
36
|
+
new SystemMessage(JUDGE_SYSTEM_PROMPT),
|
|
37
|
+
new HumanMessage(userMessage),
|
|
38
|
+
]);
|
|
39
|
+
if (!result.pass) {
|
|
40
|
+
throw new Error(`LLM assertion failed: ${result.reasoning}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=llm-assert.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llm-assert.js","sourceRoot":"","sources":["../../../src/support/tests/llm-assert.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,mBAAmB,GAAG;;;;kEAIsC,CAAC;AAEnE,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,IAAI,EAAE,CAAC,CAAC,OAAO,EAAE;IACjB,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAAe,EAAE,QAAgB;IAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,yBAAyB,CAAC;IAEjF,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC;QAC3B,KAAK,EAAE,OAAO;QACd,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAmB;QACvC,aAAa,EAAE;YACb,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,8BAA8B;SAC3E;QACD,WAAW,EAAE,CAAC;QACd,SAAS,EAAE,GAAG;KACf,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,KAAK,CAAC,oBAAoB,CAAC,iBAAiB,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;IAExF,MAAM,WAAW,GAAG,YAAY,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,kBAAkB,QAAQ,EAAE,CAAC;IAE5F,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC;QACrC,IAAI,aAAa,CAAC,mBAAmB,CAAC;QACtC,IAAI,YAAY,CAAC,WAAW,CAAC;KAC9B,CAAC,CAAC;IAEH,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IAC/D,CAAC;AACH,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@indexnetwork/protocol",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
"scripts": {
|
|
15
15
|
"build": "tsc",
|
|
16
16
|
"dev": "tsc --watch",
|
|
17
|
+
"test": "bun test",
|
|
17
18
|
"prepublishOnly": "bun run build"
|
|
18
19
|
},
|
|
19
20
|
"dependencies": {
|