promptfoo 0.72.1 → 0.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/package.json +14 -8
- package/dist/src/accounts.d.ts.map +1 -1
- package/dist/src/accounts.js +2 -1
- package/dist/src/accounts.js.map +1 -1
- package/dist/src/assertions/validateAssertions.d.ts +1 -1
- package/dist/src/assertions/validateAssertions.d.ts.map +1 -1
- package/dist/src/assertions/validateAssertions.js +7 -7
- package/dist/src/assertions/validateAssertions.js.map +1 -1
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +2 -1
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cache.js +2 -2
- package/dist/src/cache.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +10 -0
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +15 -14
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/generate/redteam.d.ts +1 -46
- package/dist/src/commands/generate/redteam.d.ts.map +1 -1
- package/dist/src/commands/generate/redteam.js +70 -26
- package/dist/src/commands/generate/redteam.js.map +1 -1
- package/dist/src/commands/import.d.ts.map +1 -1
- package/dist/src/commands/import.js +2 -1
- package/dist/src/commands/import.js.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/init.js +12 -0
- package/dist/src/commands/init.js.map +1 -1
- package/dist/src/commands/redteam.d.ts.map +1 -1
- package/dist/src/commands/redteam.js +72 -18
- package/dist/src/commands/redteam.js.map +1 -1
- package/dist/src/commands/view.d.ts.map +1 -1
- package/dist/src/commands/view.js +2 -1
- package/dist/src/commands/view.js.map +1 -1
- package/dist/src/config.d.ts.map +1 -1
- package/dist/src/config.js +2 -20
- package/dist/src/config.js.map +1 -1
- package/dist/src/configTypes.d.ts +7 -0
- package/dist/src/configTypes.d.ts.map +1 -0
- package/dist/src/configTypes.js +3 -0
- package/dist/src/configTypes.js.map +1 -0
- package/dist/src/database/index.d.ts +4 -0
- package/dist/src/database/index.d.ts.map +1 -0
- package/dist/src/database/index.js +50 -0
- package/dist/src/database/index.js.map +1 -0
- package/dist/src/{database.d.ts → database/operations.d.ts} +173 -115
- package/dist/src/{database.d.ts.map → database/operations.d.ts.map} +1 -1
- package/dist/src/{database.js → database/operations.js} +1 -25
- package/dist/src/database/operations.js.map +1 -0
- package/dist/src/evaluator.js +2 -2
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/globalConfig.d.ts +1 -7
- package/dist/src/globalConfig.d.ts.map +1 -1
- package/dist/src/globalConfig.js +3 -3
- package/dist/src/globalConfig.js.map +1 -1
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +6 -2
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/prompts/grading.js +2 -2
- package/dist/src/prompts/processors/python.js +4 -4
- package/dist/src/prompts/processors/python.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +1 -1
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/bam.d.ts +1 -1
- package/dist/src/providers/bam.d.ts.map +1 -1
- package/dist/src/providers/bedrock.d.ts +1 -1
- package/dist/src/providers/bedrock.d.ts.map +1 -1
- package/dist/src/providers/cohere.d.ts.map +1 -1
- package/dist/src/providers/cohere.js +4 -1
- package/dist/src/providers/cohere.js.map +1 -1
- package/dist/src/providers/http.d.ts +1 -1
- package/dist/src/providers/http.d.ts.map +1 -1
- package/dist/src/providers/http.js +3 -3
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers/llama.d.ts +1 -1
- package/dist/src/providers/llama.d.ts.map +1 -1
- package/dist/src/providers/localai.d.ts +1 -1
- package/dist/src/providers/localai.d.ts.map +1 -1
- package/dist/src/providers/ollama.d.ts +1 -1
- package/dist/src/providers/ollama.d.ts.map +1 -1
- package/dist/src/providers/openai.d.ts +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +48 -50
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/palm.d.ts +1 -1
- package/dist/src/providers/palm.d.ts.map +1 -1
- package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
- package/dist/src/providers/pythonCompletion.js +6 -5
- package/dist/src/providers/pythonCompletion.js.map +1 -1
- package/dist/src/providers/replicate.js +2 -2
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/scriptCompletion.js +3 -3
- package/dist/src/providers/scriptCompletion.js.map +1 -1
- package/dist/src/providers/vertex.d.ts +1 -1
- package/dist/src/providers/vertex.d.ts.map +1 -1
- package/dist/src/providers/vertex.js +23 -10
- package/dist/src/providers/vertex.js.map +1 -1
- package/dist/src/providers/vertexUtil.d.ts +2 -1
- package/dist/src/providers/vertexUtil.d.ts.map +1 -1
- package/dist/src/providers/vertexUtil.js.map +1 -1
- package/dist/src/providers/webhook.d.ts +1 -1
- package/dist/src/providers/webhook.d.ts.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +5 -2
- package/dist/src/providers.js.map +1 -1
- package/dist/src/python/pythonUtils.d.ts +4 -0
- package/dist/src/python/pythonUtils.d.ts.map +1 -0
- package/dist/src/python/pythonUtils.js +51 -0
- package/dist/src/python/pythonUtils.js.map +1 -0
- package/dist/src/python/wrapper.d.ts +0 -10
- package/dist/src/python/wrapper.d.ts.map +1 -1
- package/dist/src/python/wrapper.js +2 -49
- package/dist/src/python/wrapper.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +60 -3
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +71 -10
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/index.d.ts +15 -4
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +110 -33
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/base.d.ts +10 -13
- package/dist/src/redteam/plugins/base.d.ts.map +1 -1
- package/dist/src/redteam/plugins/base.js +36 -20
- package/dist/src/redteam/plugins/base.js.map +1 -1
- package/dist/src/redteam/plugins/debugInterface.d.ts +7 -0
- package/dist/src/redteam/plugins/debugInterface.d.ts.map +1 -0
- package/dist/src/redteam/plugins/debugInterface.js +49 -0
- package/dist/src/redteam/plugins/debugInterface.js.map +1 -0
- package/dist/src/redteam/plugins/harmful.d.ts +1 -54
- package/dist/src/redteam/plugins/harmful.d.ts.map +1 -1
- package/dist/src/redteam/plugins/harmful.js +8 -51
- package/dist/src/redteam/plugins/harmful.js.map +1 -1
- package/dist/src/redteam/plugins/pii.d.ts +9 -10
- package/dist/src/redteam/plugins/pii.d.ts.map +1 -1
- package/dist/src/redteam/plugins/pii.js +33 -53
- package/dist/src/redteam/plugins/pii.js.map +1 -1
- package/dist/src/redteam/plugins/rbac.d.ts +7 -0
- package/dist/src/redteam/plugins/rbac.d.ts.map +1 -0
- package/dist/src/redteam/plugins/rbac.js +49 -0
- package/dist/src/redteam/plugins/rbac.js.map +1 -0
- package/dist/src/redteam/plugins/shellInjection.d.ts +7 -0
- package/dist/src/redteam/plugins/shellInjection.d.ts.map +1 -0
- package/dist/src/redteam/plugins/shellInjection.js +54 -0
- package/dist/src/redteam/plugins/shellInjection.js.map +1 -0
- package/dist/src/redteam/plugins/sqlInjection.d.ts +7 -0
- package/dist/src/redteam/plugins/sqlInjection.d.ts.map +1 -0
- package/dist/src/redteam/plugins/sqlInjection.js +44 -0
- package/dist/src/redteam/plugins/sqlInjection.js.map +1 -0
- package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterative.js +2 -1
- package/dist/src/redteam/providers/iterative.js.map +1 -1
- package/dist/src/redteam/strategies/injections.d.ts.map +1 -0
- package/dist/src/redteam/{methods → strategies}/injections.js +0 -12
- package/dist/src/redteam/strategies/injections.js.map +1 -0
- package/dist/src/redteam/strategies/iterative.d.ts.map +1 -0
- package/dist/src/redteam/strategies/iterative.js.map +1 -0
- package/dist/src/redteam/types.d.ts +196 -0
- package/dist/src/redteam/types.d.ts.map +1 -0
- package/dist/src/redteam/types.js +124 -0
- package/dist/src/redteam/types.js.map +1 -0
- package/dist/src/redteam/util.d.ts +22 -0
- package/dist/src/redteam/util.d.ts.map +1 -0
- package/dist/src/redteam/util.js +56 -0
- package/dist/src/redteam/util.js.map +1 -0
- package/dist/src/testCases.d.ts +1 -2
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types/databaseTypes.d.ts +17 -0
- package/dist/src/types/databaseTypes.d.ts.map +1 -0
- package/dist/src/types/databaseTypes.js +3 -0
- package/dist/src/types/databaseTypes.js.map +1 -0
- package/dist/src/{types.d.ts → types/index.d.ts} +4296 -1425
- package/dist/src/{types.d.ts.map → types/index.d.ts.map} +1 -1
- package/dist/src/{types.js → types/index.js} +38 -9
- package/dist/src/types/index.js.map +1 -0
- package/dist/src/util/config.d.ts +3 -0
- package/dist/src/util/config.d.ts.map +1 -0
- package/dist/src/util/config.js +42 -0
- package/dist/src/util/config.js.map +1 -0
- package/dist/src/util/index.d.ts +41 -25
- package/dist/src/util/index.d.ts.map +1 -1
- package/dist/src/util/index.js +81 -119
- package/dist/src/util/index.js.map +1 -1
- package/dist/src/util/json.d.ts +3 -0
- package/dist/src/util/json.d.ts.map +1 -0
- package/dist/src/util/json.js +27 -0
- package/dist/src/util/json.js.map +1 -0
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/155-4e319e68476266ee.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/163-93bab94bc04ae71b.js +6 -0
- package/dist/src/web/nextui/_next/static/chunks/{258-4acb452fe85cff6f.js → 258-b6257e5de9d0e2c7.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/281-64d9f1415a301ee5.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/304-cf667fe8f06238b4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/322-b47b6cc3f5b5fdb8.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/325-0d36870ade5e5263.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/339-79124d204fa988a3.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/378-c135e497df1864cb.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/476-0afb5eb266c57b7f.js +17 -0
- package/dist/src/web/nextui/_next/static/chunks/493-ebd12f00541c4969.js +2 -0
- package/dist/src/web/nextui/_next/static/chunks/497-e280a5610a3d7d42.js +32 -0
- package/dist/src/web/nextui/_next/static/chunks/515-e06d044f12d8a1bd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/69-5e4d5e60859a86d2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/737-32d5472455807d7a.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/903-7cc440d9f9e9f95d.js +97 -0
- package/dist/src/web/nextui/_next/static/chunks/905-78cd666f27891ee6.js +28 -0
- package/dist/src/web/nextui/_next/static/chunks/916-b92fea2ab6136411.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/969-6ab6c8f4158da970.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-7247b17e8f179a46.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-8560afaf494f9882.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-6e6ec3c778ccd110.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-fe10d5df88bc44ef.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-0a39d3450aa09dda.js → page-057e5eb83e0f614b.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-88fa70fc5bf755bc.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-15abf0d1049cb47c.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/{page-5650318e57510b6c.js → page-122e9cfa52eb218a.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/progress/page-a62ca531a4bb4149.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-10bb2e571670139c.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-a8b83960f08e2bb2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ea3e180c3116fb2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{main-app-929a26b3c8cd3f7a.js → main-app-7a1376166cb8b72e.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/{webpack-c9f728822666f852.js → webpack-ee33d95b3e219985.js} +1 -1
- package/dist/src/web/nextui/_next/static/css/13f881e4290b6d59.css +1 -0
- package/dist/src/web/nextui/_next/static/css/87b5e6e04efd27e5.css +1 -0
- package/dist/src/web/nextui/_next/static/css/e17fdafd8599f69b.css +1 -0
- package/dist/src/web/nextui/_next/static/css/eb523daddb89dfc1.css +1 -0
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +6 -6
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +6 -6
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +6 -6
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +8 -8
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/logo-panda.svg +91 -0
- package/dist/src/web/nextui/progress/index.html +1 -1
- package/dist/src/web/nextui/progress/index.txt +6 -6
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +6 -6
- package/dist/src/web/nextui/report/index.html +1 -1
- package/dist/src/web/nextui/report/index.txt +8 -8
- package/dist/src/web/nextui/setup/index.html +1 -1
- package/dist/src/web/nextui/setup/index.txt +7 -7
- package/dist/test/__mocks__/tempCustomModule.d.ts +13 -0
- package/dist/test/__mocks__/tempCustomModule.d.ts.map +1 -0
- package/dist/test/__mocks__/tempCustomModule.js +15 -0
- package/dist/test/__mocks__/tempCustomModule.js.map +1 -0
- package/dist/test/account.test.d.ts +2 -0
- package/dist/test/account.test.d.ts.map +1 -0
- package/dist/test/account.test.js +46 -0
- package/dist/test/account.test.js.map +1 -0
- package/dist/test/assertions/AssertionResult.test.d.ts +2 -0
- package/dist/test/assertions/AssertionResult.test.d.ts.map +1 -0
- package/dist/test/assertions/AssertionResult.test.js +184 -0
- package/dist/test/assertions/AssertionResult.test.js.map +1 -0
- package/dist/test/assertions/validateAssertions.test.d.ts +2 -0
- package/dist/test/assertions/validateAssertions.test.d.ts.map +1 -0
- package/dist/test/assertions/validateAssertions.test.js +40 -0
- package/dist/test/assertions/validateAssertions.test.js.map +1 -0
- package/dist/test/assertions.test.d.ts +2 -0
- package/dist/test/assertions.test.d.ts.map +1 -0
- package/dist/test/assertions.test.js +3162 -0
- package/dist/test/assertions.test.js.map +1 -0
- package/dist/test/cache.test.d.ts +2 -0
- package/dist/test/cache.test.d.ts.map +1 -0
- package/dist/test/cache.test.js +90 -0
- package/dist/test/cache.test.js.map +1 -0
- package/dist/test/checkNodeVersion.test.d.ts +2 -0
- package/dist/test/checkNodeVersion.test.d.ts.map +1 -0
- package/dist/test/checkNodeVersion.test.js +85 -0
- package/dist/test/checkNodeVersion.test.js.map +1 -0
- package/dist/test/commands/eval/filterFailingTests.test.d.ts +2 -0
- package/dist/test/commands/eval/filterFailingTests.test.d.ts.map +1 -0
- package/dist/test/commands/eval/filterFailingTests.test.js +84 -0
- package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -0
- package/dist/test/commands/eval/filterProviders.test.d.ts +2 -0
- package/dist/test/commands/eval/filterProviders.test.d.ts.map +1 -0
- package/dist/test/commands/eval/filterProviders.test.js +50 -0
- package/dist/test/commands/eval/filterProviders.test.js.map +1 -0
- package/dist/test/commands/eval/filterTests.test.d.ts +2 -0
- package/dist/test/commands/eval/filterTests.test.d.ts.map +1 -0
- package/dist/test/commands/eval/filterTests.test.js +51 -0
- package/dist/test/commands/eval/filterTests.test.js.map +1 -0
- package/dist/test/config.test.d.ts +2 -0
- package/dist/test/config.test.d.ts.map +1 -0
- package/dist/test/config.test.js +404 -0
- package/dist/test/config.test.js.map +1 -0
- package/dist/test/csv.test.d.ts +2 -0
- package/dist/test/csv.test.d.ts.map +1 -0
- package/dist/test/csv.test.js +239 -0
- package/dist/test/csv.test.js.map +1 -0
- package/dist/test/evaluator.test.d.ts +2 -0
- package/dist/test/evaluator.test.d.ts.map +1 -0
- package/dist/test/evaluator.test.js +922 -0
- package/dist/test/evaluator.test.js.map +1 -0
- package/dist/test/globalConfig.test.d.ts +2 -0
- package/dist/test/globalConfig.test.d.ts.map +1 -0
- package/dist/test/globalConfig.test.js +91 -0
- package/dist/test/globalConfig.test.js.map +1 -0
- package/dist/test/is-sql-tests/node-sql-parser.test.d.ts +2 -0
- package/dist/test/is-sql-tests/node-sql-parser.test.d.ts.map +1 -0
- package/dist/test/is-sql-tests/node-sql-parser.test.js +179 -0
- package/dist/test/is-sql-tests/node-sql-parser.test.js.map +1 -0
- package/dist/test/matchers.test.d.ts +2 -0
- package/dist/test/matchers.test.d.ts.map +1 -0
- package/dist/test/matchers.test.js +807 -0
- package/dist/test/matchers.test.js.map +1 -0
- package/dist/test/prompts.processors.javascript.test.d.ts +2 -0
- package/dist/test/prompts.processors.javascript.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.javascript.test.js +93 -0
- package/dist/test/prompts.processors.javascript.test.js.map +1 -0
- package/dist/test/prompts.processors.json.test.d.ts +2 -0
- package/dist/test/prompts.processors.json.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.json.test.js +67 -0
- package/dist/test/prompts.processors.json.test.js.map +1 -0
- package/dist/test/prompts.processors.jsonl.test.d.ts +2 -0
- package/dist/test/prompts.processors.jsonl.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.jsonl.test.js +99 -0
- package/dist/test/prompts.processors.jsonl.test.js.map +1 -0
- package/dist/test/prompts.processors.python.test.d.ts +2 -0
- package/dist/test/prompts.processors.python.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.python.test.js +100 -0
- package/dist/test/prompts.processors.python.test.js.map +1 -0
- package/dist/test/prompts.processors.python.utils.test.d.ts +2 -0
- package/dist/test/prompts.processors.python.utils.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.python.utils.test.js +68 -0
- package/dist/test/prompts.processors.python.utils.test.js.map +1 -0
- package/dist/test/prompts.processors.string.test.d.ts +2 -0
- package/dist/test/prompts.processors.string.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.string.test.js +24 -0
- package/dist/test/prompts.processors.string.test.js.map +1 -0
- package/dist/test/prompts.processors.text.test.d.ts +2 -0
- package/dist/test/prompts.processors.text.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.text.test.js +109 -0
- package/dist/test/prompts.processors.text.test.js.map +1 -0
- package/dist/test/prompts.processors.yaml.test.d.ts +2 -0
- package/dist/test/prompts.processors.yaml.test.d.ts.map +1 -0
- package/dist/test/prompts.processors.yaml.test.js +76 -0
- package/dist/test/prompts.processors.yaml.test.js.map +1 -0
- package/dist/test/prompts.test.d.ts +2 -0
- package/dist/test/prompts.test.d.ts.map +1 -0
- package/dist/test/prompts.test.js +562 -0
- package/dist/test/prompts.test.js.map +1 -0
- package/dist/test/prompts.utils.test.d.ts +2 -0
- package/dist/test/prompts.utils.test.d.ts.map +1 -0
- package/dist/test/prompts.utils.test.js +123 -0
- package/dist/test/prompts.utils.test.js.map +1 -0
- package/dist/test/providers.anthropic.test.d.ts +2 -0
- package/dist/test/providers.anthropic.test.d.ts.map +1 -0
- package/dist/test/providers.anthropic.test.js +520 -0
- package/dist/test/providers.anthropic.test.js.map +1 -0
- package/dist/test/providers.azure.test.d.ts +2 -0
- package/dist/test/providers.azure.test.d.ts.map +1 -0
- package/dist/test/providers.azure.test.js +96 -0
- package/dist/test/providers.azure.test.js.map +1 -0
- package/dist/test/providers.bedrock.test.d.ts +2 -0
- package/dist/test/providers.bedrock.test.d.ts.map +1 -0
- package/dist/test/providers.bedrock.test.js +349 -0
- package/dist/test/providers.bedrock.test.js.map +1 -0
- package/dist/test/providers.http.test.d.ts +2 -0
- package/dist/test/providers.http.test.d.ts.map +1 -0
- package/dist/test/providers.http.test.js +130 -0
- package/dist/test/providers.http.test.js.map +1 -0
- package/dist/test/providers.llama.test.d.ts +2 -0
- package/dist/test/providers.llama.test.d.ts.map +1 -0
- package/dist/test/providers.llama.test.js +101 -0
- package/dist/test/providers.llama.test.js.map +1 -0
- package/dist/test/providers.pythonCompletion.test.d.ts +2 -0
- package/dist/test/providers.pythonCompletion.test.d.ts.map +1 -0
- package/dist/test/providers.pythonCompletion.test.js +149 -0
- package/dist/test/providers.pythonCompletion.test.js.map +1 -0
- package/dist/test/providers.test.d.ts +2 -0
- package/dist/test/providers.test.d.ts.map +1 -0
- package/dist/test/providers.test.js +883 -0
- package/dist/test/providers.test.js.map +1 -0
- package/dist/test/providers.vertex.test.d.ts +2 -0
- package/dist/test/providers.vertex.test.d.ts.map +1 -0
- package/dist/test/providers.vertex.test.js +153 -0
- package/dist/test/providers.vertex.test.js.map +1 -0
- package/dist/test/pythonWrapper.test.d.ts +2 -0
- package/dist/test/pythonWrapper.test.d.ts.map +1 -0
- package/dist/test/pythonWrapper.test.js +65 -0
- package/dist/test/pythonWrapper.test.js.map +1 -0
- package/dist/test/redteam/plugins/base.test.d.ts +2 -0
- package/dist/test/redteam/plugins/base.test.d.ts.map +1 -0
- package/dist/test/redteam/plugins/base.test.js +149 -0
- package/dist/test/redteam/plugins/base.test.js.map +1 -0
- package/dist/test/redteam/purpose.test.d.ts +2 -0
- package/dist/test/redteam/purpose.test.d.ts.map +1 -0
- package/dist/test/redteam/purpose.test.js +37 -0
- package/dist/test/redteam/purpose.test.js.map +1 -0
- package/dist/test/redteam/types.test.d.ts +2 -0
- package/dist/test/redteam/types.test.d.ts.map +1 -0
- package/dist/test/redteam/types.test.js +325 -0
- package/dist/test/redteam/types.test.js.map +1 -0
- package/dist/test/redteam/util.test.d.ts +2 -0
- package/dist/test/redteam/util.test.d.ts.map +1 -0
- package/dist/test/redteam/util.test.js +99 -0
- package/dist/test/redteam/util.test.js.map +1 -0
- package/dist/test/telemetry.test.d.ts +2 -0
- package/dist/test/telemetry.test.d.ts.map +1 -0
- package/dist/test/telemetry.test.js +68 -0
- package/dist/test/telemetry.test.js.map +1 -0
- package/dist/test/testCases.test.d.ts +2 -0
- package/dist/test/testCases.test.d.ts.map +1 -0
- package/dist/test/testCases.test.js +362 -0
- package/dist/test/testCases.test.js.map +1 -0
- package/dist/test/updates.test.d.ts +2 -0
- package/dist/test/updates.test.d.ts.map +1 -0
- package/dist/test/updates.test.js +63 -0
- package/dist/test/updates.test.js.map +1 -0
- package/dist/test/util.templates.test.d.ts +2 -0
- package/dist/test/util.templates.test.d.ts.map +1 -0
- package/dist/test/util.templates.test.js +75 -0
- package/dist/test/util.templates.test.js.map +1 -0
- package/dist/test/util.test.d.ts +2 -0
- package/dist/test/util.test.d.ts.map +1 -0
- package/dist/test/util.test.js +661 -0
- package/dist/test/util.test.js.map +1 -0
- package/dist/test/utils.d.ts +6 -0
- package/dist/test/utils.d.ts.map +1 -0
- package/dist/test/utils.js +16 -0
- package/dist/test/utils.js.map +1 -0
- package/package.json +14 -8
- package/dist/src/database.js.map +0 -1
- package/dist/src/redteam/methods/injections.d.ts.map +0 -1
- package/dist/src/redteam/methods/injections.js.map +0 -1
- package/dist/src/redteam/methods/iterative.d.ts.map +0 -1
- package/dist/src/redteam/methods/iterative.js.map +0 -1
- package/dist/src/types.js.map +0 -1
- package/dist/src/web/nextui/_next/static/chunks/163-e65e0f7f442a0c72.js +0 -6
- package/dist/src/web/nextui/_next/static/chunks/180-46db19289d856800.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/304-9d57a2251034b801.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/322-4a0fc9b0508f47e8.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/339-db5bd05e24dde905.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/374-d40afe599198abd3.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/378-e22c8aeb8e031fad.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/448-4da3bf74eae4996a.js +0 -97
- package/dist/src/web/nextui/_next/static/chunks/53-fae6e50ace6c83a1.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/578-8efe57f906f7573c.js +0 -44
- package/dist/src/web/nextui/_next/static/chunks/620-7a5a7bf6e1fe49e5.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/737-538f50dc31cc8c49.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/905-0da344cee75690e7.js +0 -32
- package/dist/src/web/nextui/_next/static/chunks/932-75585b3be8ce494d.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/973-ffe3726c956d08ef.js +0 -2
- package/dist/src/web/nextui/_next/static/chunks/995-803c74b81e7bf6cd.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-2e2c0c725127ea2d.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-ccdb1f6890601666.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-642db4f4a8e8ba40.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-ce320e6d1e6d1d23.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-32eaa14d2384c5b0.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-4282b1d33566e258.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/progress/page-462526776efd6fd6.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-0ae78bc0bf7c56bc.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-e4f7851d41eb0f92.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6cde750ec428cd75.js +0 -1
- package/dist/src/web/nextui/_next/static/css/35ab7ce8025438b0.css +0 -1
- package/dist/src/web/nextui/_next/static/css/5bd2f45de1f3ba83.css +0 -1
- package/dist/src/web/nextui/_next/static/css/94b771e23fc5d5f5.css +0 -1
- package/dist/src/web/nextui/_next/static/css/dab5d695b3657d59.css +0 -1
- /package/dist/src/redteam/{methods → strategies}/injections.d.ts +0 -0
- /package/dist/src/redteam/{methods → strategies}/iterative.d.ts +0 -0
- /package/dist/src/redteam/{methods → strategies}/iterative.js +0 -0
- /package/dist/src/web/nextui/_next/static/{8E4q0Tmu22G1wENZyXEq0 → L-tpppAe26U-euPv62afH}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{8E4q0Tmu22G1wENZyXEq0 → L-tpppAe26U-euPv62afH}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,3162 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
26
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
27
|
+
};
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
const dedent_1 = __importDefault(require("dedent"));
|
|
30
|
+
const fs = __importStar(require("fs"));
|
|
31
|
+
const node_fetch_1 = require("node-fetch");
|
|
32
|
+
const path = __importStar(require("path"));
|
|
33
|
+
const assertions_1 = require("../src/assertions");
|
|
34
|
+
const fetch_1 = require("../src/fetch");
|
|
35
|
+
const openai_1 = require("../src/providers/openai");
|
|
36
|
+
const replicate_1 = require("../src/providers/replicate");
|
|
37
|
+
const pythonUtils_1 = require("../src/python/pythonUtils");
|
|
38
|
+
const wrapper_1 = require("../src/python/wrapper");
|
|
39
|
+
const utils_1 = require("./utils");
|
|
40
|
+
jest.mock('proxy-agent', () => ({
|
|
41
|
+
ProxyAgent: jest.fn().mockImplementation(() => ({})),
|
|
42
|
+
}));
|
|
43
|
+
jest.mock('../src/fetch', () => {
|
|
44
|
+
const actual = jest.requireActual('../src/fetch');
|
|
45
|
+
return {
|
|
46
|
+
...actual,
|
|
47
|
+
fetchWithRetries: jest.fn(actual.fetchWithRetries),
|
|
48
|
+
};
|
|
49
|
+
});
|
|
50
|
+
jest.mock('../src/python/wrapper', () => {
|
|
51
|
+
const actual = jest.requireActual('../src/python/wrapper');
|
|
52
|
+
return {
|
|
53
|
+
...actual,
|
|
54
|
+
runPythonCode: jest.fn(actual.runPythonCode),
|
|
55
|
+
};
|
|
56
|
+
});
|
|
57
|
+
jest.mock('../src/python/pythonUtils', () => {
|
|
58
|
+
const actual = jest.requireActual('../src/python/pythonUtils');
|
|
59
|
+
return {
|
|
60
|
+
...actual,
|
|
61
|
+
runPython: jest.fn(actual.runPython),
|
|
62
|
+
};
|
|
63
|
+
});
|
|
64
|
+
jest.mock('glob', () => ({
|
|
65
|
+
globSync: jest.fn(),
|
|
66
|
+
}));
|
|
67
|
+
jest.mock('fs', () => ({
|
|
68
|
+
readFileSync: jest.fn(),
|
|
69
|
+
promises: {
|
|
70
|
+
readFile: jest.fn(),
|
|
71
|
+
},
|
|
72
|
+
}));
|
|
73
|
+
jest.mock('../src/esm');
|
|
74
|
+
jest.mock('../src/database', () => ({
|
|
75
|
+
getDb: jest.fn(),
|
|
76
|
+
}));
|
|
77
|
+
jest.mock('../src/cliState', () => ({
|
|
78
|
+
basePath: '/config_path',
|
|
79
|
+
}));
|
|
80
|
+
const Grader = new utils_1.TestGrader();
|
|
81
|
+
describe('runAssertions', () => {
|
|
82
|
+
const test = {
|
|
83
|
+
assert: [
|
|
84
|
+
{
|
|
85
|
+
type: 'equals',
|
|
86
|
+
value: 'Expected output',
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
};
|
|
90
|
+
beforeEach(() => {
|
|
91
|
+
jest.resetModules();
|
|
92
|
+
});
|
|
93
|
+
afterEach(() => {
|
|
94
|
+
jest.clearAllMocks();
|
|
95
|
+
});
|
|
96
|
+
it('should pass when all assertions pass', async () => {
|
|
97
|
+
const output = 'Expected output';
|
|
98
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
99
|
+
prompt: 'Some prompt',
|
|
100
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
101
|
+
test,
|
|
102
|
+
providerResponse: { output },
|
|
103
|
+
});
|
|
104
|
+
expect(result).toMatchObject({
|
|
105
|
+
pass: true,
|
|
106
|
+
reason: 'All assertions passed',
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
it('should fail when any assertion fails', async () => {
|
|
110
|
+
const output = 'Different output';
|
|
111
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
112
|
+
prompt: 'Some prompt',
|
|
113
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
114
|
+
test,
|
|
115
|
+
providerResponse: { output },
|
|
116
|
+
});
|
|
117
|
+
expect(result).toMatchObject({
|
|
118
|
+
pass: false,
|
|
119
|
+
reason: 'Expected output "Expected output" to equal "Different output"',
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
it('should handle output as an object', async () => {
|
|
123
|
+
const output = { key: 'value' };
|
|
124
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
125
|
+
prompt: 'Some prompt',
|
|
126
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
127
|
+
test,
|
|
128
|
+
providerResponse: { output },
|
|
129
|
+
});
|
|
130
|
+
expect(result).toMatchObject({
|
|
131
|
+
pass: false,
|
|
132
|
+
reason: 'Expected output "Expected output" to equal "{"key":"value"}"',
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
it('should fail when combined score is less than threshold', async () => {
|
|
136
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
137
|
+
prompt: 'Some prompt',
|
|
138
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
139
|
+
test: {
|
|
140
|
+
threshold: 0.5,
|
|
141
|
+
assert: [
|
|
142
|
+
{
|
|
143
|
+
type: 'equals',
|
|
144
|
+
value: 'Hello world',
|
|
145
|
+
weight: 2,
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
type: 'contains',
|
|
149
|
+
value: 'world',
|
|
150
|
+
weight: 1,
|
|
151
|
+
},
|
|
152
|
+
],
|
|
153
|
+
},
|
|
154
|
+
providerResponse: { output: 'Hi there world' },
|
|
155
|
+
});
|
|
156
|
+
expect(result).toMatchObject({
|
|
157
|
+
pass: false,
|
|
158
|
+
reason: 'Aggregate score 0.33 < 0.5 threshold',
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
it('should pass when combined score is greater than threshold', async () => {
|
|
162
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
163
|
+
prompt: 'Some prompt',
|
|
164
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
165
|
+
test: {
|
|
166
|
+
threshold: 0.25,
|
|
167
|
+
assert: [
|
|
168
|
+
{
|
|
169
|
+
type: 'equals',
|
|
170
|
+
value: 'Hello world',
|
|
171
|
+
weight: 2,
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
type: 'contains',
|
|
175
|
+
value: 'world',
|
|
176
|
+
weight: 1,
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
},
|
|
180
|
+
providerResponse: { output: 'Hi there world' },
|
|
181
|
+
});
|
|
182
|
+
expect(result).toMatchObject({
|
|
183
|
+
pass: true,
|
|
184
|
+
reason: 'Aggregate score 0.33 ≥ 0.25 threshold',
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
describe('assert-set', () => {
|
|
188
|
+
const prompt = 'Some prompt';
|
|
189
|
+
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4');
|
|
190
|
+
it('assert-set success', async () => {
|
|
191
|
+
const output = 'Expected output';
|
|
192
|
+
const test = {
|
|
193
|
+
assert: [
|
|
194
|
+
{
|
|
195
|
+
type: 'assert-set',
|
|
196
|
+
assert: [
|
|
197
|
+
{
|
|
198
|
+
type: 'equals',
|
|
199
|
+
value: output,
|
|
200
|
+
},
|
|
201
|
+
],
|
|
202
|
+
},
|
|
203
|
+
],
|
|
204
|
+
};
|
|
205
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
206
|
+
prompt,
|
|
207
|
+
provider,
|
|
208
|
+
test,
|
|
209
|
+
providerResponse: { output },
|
|
210
|
+
});
|
|
211
|
+
expect(result).toMatchObject({
|
|
212
|
+
pass: true,
|
|
213
|
+
reason: 'All assertions passed',
|
|
214
|
+
});
|
|
215
|
+
});
|
|
216
|
+
it('assert-set failure', async () => {
|
|
217
|
+
const output = 'Expected output';
|
|
218
|
+
const test = {
|
|
219
|
+
assert: [
|
|
220
|
+
{
|
|
221
|
+
type: 'assert-set',
|
|
222
|
+
assert: [
|
|
223
|
+
{
|
|
224
|
+
type: 'equals',
|
|
225
|
+
value: 'Something different',
|
|
226
|
+
},
|
|
227
|
+
],
|
|
228
|
+
},
|
|
229
|
+
],
|
|
230
|
+
};
|
|
231
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
232
|
+
prompt,
|
|
233
|
+
provider,
|
|
234
|
+
test,
|
|
235
|
+
providerResponse: { output },
|
|
236
|
+
});
|
|
237
|
+
expect(result).toMatchObject({
|
|
238
|
+
pass: false,
|
|
239
|
+
reason: 'Expected output "Something different" to equal "Expected output"',
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
it('assert-set threshold success', async () => {
|
|
243
|
+
const output = 'Expected output';
|
|
244
|
+
const test = {
|
|
245
|
+
assert: [
|
|
246
|
+
{
|
|
247
|
+
type: 'assert-set',
|
|
248
|
+
threshold: 0.25,
|
|
249
|
+
assert: [
|
|
250
|
+
{
|
|
251
|
+
type: 'equals',
|
|
252
|
+
value: 'Hello world',
|
|
253
|
+
weight: 2,
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
type: 'contains',
|
|
257
|
+
value: 'Expected',
|
|
258
|
+
weight: 1,
|
|
259
|
+
},
|
|
260
|
+
],
|
|
261
|
+
},
|
|
262
|
+
],
|
|
263
|
+
};
|
|
264
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
265
|
+
prompt,
|
|
266
|
+
provider,
|
|
267
|
+
test,
|
|
268
|
+
providerResponse: { output },
|
|
269
|
+
});
|
|
270
|
+
expect(result).toMatchObject({
|
|
271
|
+
pass: true,
|
|
272
|
+
reason: 'All assertions passed',
|
|
273
|
+
});
|
|
274
|
+
});
|
|
275
|
+
it('assert-set threshold failure', async () => {
|
|
276
|
+
const output = 'Expected output';
|
|
277
|
+
const test = {
|
|
278
|
+
assert: [
|
|
279
|
+
{
|
|
280
|
+
type: 'assert-set',
|
|
281
|
+
threshold: 0.5,
|
|
282
|
+
assert: [
|
|
283
|
+
{
|
|
284
|
+
type: 'equals',
|
|
285
|
+
value: 'Hello world',
|
|
286
|
+
weight: 2,
|
|
287
|
+
},
|
|
288
|
+
{
|
|
289
|
+
type: 'contains',
|
|
290
|
+
value: 'Expected',
|
|
291
|
+
weight: 1,
|
|
292
|
+
},
|
|
293
|
+
],
|
|
294
|
+
},
|
|
295
|
+
],
|
|
296
|
+
};
|
|
297
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
298
|
+
prompt,
|
|
299
|
+
provider,
|
|
300
|
+
test,
|
|
301
|
+
providerResponse: { output },
|
|
302
|
+
});
|
|
303
|
+
expect(result).toMatchObject({
|
|
304
|
+
pass: false,
|
|
305
|
+
reason: 'Aggregate score 0.33 < 0.5 threshold',
|
|
306
|
+
});
|
|
307
|
+
});
|
|
308
|
+
it('assert-set with metric', async () => {
|
|
309
|
+
const metric = 'The best metric';
|
|
310
|
+
const output = 'Expected output';
|
|
311
|
+
const test = {
|
|
312
|
+
assert: [
|
|
313
|
+
{
|
|
314
|
+
type: 'assert-set',
|
|
315
|
+
metric,
|
|
316
|
+
threshold: 0.5,
|
|
317
|
+
assert: [
|
|
318
|
+
{
|
|
319
|
+
type: 'equals',
|
|
320
|
+
value: 'Hello world',
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
type: 'contains',
|
|
324
|
+
value: 'Expected',
|
|
325
|
+
},
|
|
326
|
+
],
|
|
327
|
+
},
|
|
328
|
+
],
|
|
329
|
+
};
|
|
330
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
331
|
+
prompt,
|
|
332
|
+
provider,
|
|
333
|
+
test,
|
|
334
|
+
providerResponse: { output },
|
|
335
|
+
});
|
|
336
|
+
expect(result.namedScores).toStrictEqual({
|
|
337
|
+
[metric]: 0.5,
|
|
338
|
+
});
|
|
339
|
+
});
|
|
340
|
+
it('uses assert-set weight', async () => {
|
|
341
|
+
const output = 'Expected';
|
|
342
|
+
const test = {
|
|
343
|
+
assert: [
|
|
344
|
+
{
|
|
345
|
+
type: 'equals',
|
|
346
|
+
value: 'Nope',
|
|
347
|
+
weight: 10,
|
|
348
|
+
},
|
|
349
|
+
{
|
|
350
|
+
type: 'assert-set',
|
|
351
|
+
weight: 90,
|
|
352
|
+
assert: [
|
|
353
|
+
{
|
|
354
|
+
type: 'equals',
|
|
355
|
+
value: 'Expected',
|
|
356
|
+
},
|
|
357
|
+
],
|
|
358
|
+
},
|
|
359
|
+
],
|
|
360
|
+
};
|
|
361
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
362
|
+
prompt,
|
|
363
|
+
provider,
|
|
364
|
+
test,
|
|
365
|
+
providerResponse: { output },
|
|
366
|
+
});
|
|
367
|
+
expect(result.score).toBe(0.9);
|
|
368
|
+
});
|
|
369
|
+
});
|
|
370
|
+
it('preserves default provider', async () => {
|
|
371
|
+
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
372
|
+
const output = 'Expected output';
|
|
373
|
+
const test = {
|
|
374
|
+
assert: [
|
|
375
|
+
{
|
|
376
|
+
type: 'moderation',
|
|
377
|
+
provider: 'replicate:moderation:foo/bar',
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
type: 'llm-rubric',
|
|
381
|
+
value: 'insert rubric here',
|
|
382
|
+
},
|
|
383
|
+
],
|
|
384
|
+
};
|
|
385
|
+
const callApiSpy = jest.spyOn(openai_1.DefaultGradingJsonProvider, 'callApi').mockResolvedValue({
|
|
386
|
+
output: JSON.stringify({ pass: true, score: 1.0, reason: 'I love you' }),
|
|
387
|
+
});
|
|
388
|
+
const callModerationApiSpy = jest
|
|
389
|
+
.spyOn(replicate_1.ReplicateModerationProvider.prototype, 'callModerationApi')
|
|
390
|
+
.mockResolvedValue({ flags: [] });
|
|
391
|
+
const result = await (0, assertions_1.runAssertions)({
|
|
392
|
+
prompt: 'foobar',
|
|
393
|
+
provider,
|
|
394
|
+
test,
|
|
395
|
+
providerResponse: { output },
|
|
396
|
+
});
|
|
397
|
+
expect(result.pass).toBeTruthy();
|
|
398
|
+
expect(callApiSpy).toHaveBeenCalledTimes(1);
|
|
399
|
+
expect(callModerationApiSpy).toHaveBeenCalledTimes(1);
|
|
400
|
+
});
|
|
401
|
+
});
|
|
402
|
+
describe('runAssertion', () => {
|
|
403
|
+
beforeEach(() => {
|
|
404
|
+
jest.resetModules();
|
|
405
|
+
});
|
|
406
|
+
afterEach(() => {
|
|
407
|
+
jest.clearAllMocks();
|
|
408
|
+
});
|
|
409
|
+
const equalityAssertion = {
|
|
410
|
+
type: 'equals',
|
|
411
|
+
value: 'Expected output',
|
|
412
|
+
};
|
|
413
|
+
const equalityAssertionWithObject = {
|
|
414
|
+
type: 'equals',
|
|
415
|
+
value: { key: 'value' },
|
|
416
|
+
};
|
|
417
|
+
const isJsonAssertion = {
|
|
418
|
+
type: 'is-json',
|
|
419
|
+
};
|
|
420
|
+
const isJsonAssertionWithSchema = {
|
|
421
|
+
type: 'is-json',
|
|
422
|
+
value: {
|
|
423
|
+
required: ['latitude', 'longitude'],
|
|
424
|
+
type: 'object',
|
|
425
|
+
properties: {
|
|
426
|
+
latitude: {
|
|
427
|
+
type: 'number',
|
|
428
|
+
minimum: -90,
|
|
429
|
+
maximum: 90,
|
|
430
|
+
},
|
|
431
|
+
longitude: {
|
|
432
|
+
type: 'number',
|
|
433
|
+
minimum: -180,
|
|
434
|
+
maximum: 180,
|
|
435
|
+
},
|
|
436
|
+
},
|
|
437
|
+
},
|
|
438
|
+
};
|
|
439
|
+
const isJsonAssertionWithSchemaYamlString = {
|
|
440
|
+
type: 'is-json',
|
|
441
|
+
value: `
|
|
442
|
+
required: ["latitude", "longitude"]
|
|
443
|
+
type: object
|
|
444
|
+
properties:
|
|
445
|
+
latitude:
|
|
446
|
+
type: number
|
|
447
|
+
minimum: -90
|
|
448
|
+
maximum: 90
|
|
449
|
+
longitude:
|
|
450
|
+
type: number
|
|
451
|
+
minimum: -180
|
|
452
|
+
maximum: 180
|
|
453
|
+
`,
|
|
454
|
+
};
|
|
455
|
+
const isSqlAssertion = {
|
|
456
|
+
type: 'is-sql',
|
|
457
|
+
};
|
|
458
|
+
const notIsSqlAssertion = {
|
|
459
|
+
type: 'not-is-sql',
|
|
460
|
+
};
|
|
461
|
+
const isSqlAssertionWithDatabase = {
|
|
462
|
+
type: 'is-sql',
|
|
463
|
+
value: {
|
|
464
|
+
databaseType: 'MySQL',
|
|
465
|
+
},
|
|
466
|
+
};
|
|
467
|
+
const isSqlAssertionWithDatabaseAndWhiteTableList = {
|
|
468
|
+
type: 'is-sql',
|
|
469
|
+
value: {
|
|
470
|
+
databaseType: 'MySQL',
|
|
471
|
+
allowedTables: ['(select|update|insert|delete)::null::departments'],
|
|
472
|
+
},
|
|
473
|
+
};
|
|
474
|
+
const isSqlAssertionWithDatabaseAndWhiteColumnList = {
|
|
475
|
+
type: 'is-sql',
|
|
476
|
+
value: {
|
|
477
|
+
databaseType: 'MySQL',
|
|
478
|
+
allowedColumns: ['select::null::name', 'update::null::id'],
|
|
479
|
+
},
|
|
480
|
+
};
|
|
481
|
+
const isSqlAssertionWithDatabaseAndBothList = {
|
|
482
|
+
type: 'is-sql',
|
|
483
|
+
value: {
|
|
484
|
+
databaseType: 'MySQL',
|
|
485
|
+
allowedTables: ['(select|update|insert|delete)::null::departments'],
|
|
486
|
+
allowedColumns: ['select::null::name', 'update::null::id'],
|
|
487
|
+
},
|
|
488
|
+
};
|
|
489
|
+
const containsJsonAssertion = {
|
|
490
|
+
type: 'contains-json',
|
|
491
|
+
};
|
|
492
|
+
const containsJsonAssertionWithSchema = {
|
|
493
|
+
type: 'contains-json',
|
|
494
|
+
value: {
|
|
495
|
+
required: ['latitude', 'longitude'],
|
|
496
|
+
type: 'object',
|
|
497
|
+
properties: {
|
|
498
|
+
latitude: {
|
|
499
|
+
type: 'number',
|
|
500
|
+
minimum: -90,
|
|
501
|
+
maximum: 90,
|
|
502
|
+
},
|
|
503
|
+
longitude: {
|
|
504
|
+
type: 'number',
|
|
505
|
+
minimum: -180,
|
|
506
|
+
maximum: 180,
|
|
507
|
+
},
|
|
508
|
+
},
|
|
509
|
+
},
|
|
510
|
+
};
|
|
511
|
+
const javascriptStringAssertion = {
|
|
512
|
+
type: 'javascript',
|
|
513
|
+
value: 'output === "Expected output"',
|
|
514
|
+
};
|
|
515
|
+
const javascriptMultilineStringAssertion = {
|
|
516
|
+
type: 'javascript',
|
|
517
|
+
value: `
|
|
518
|
+
if (output === "Expected output") {
|
|
519
|
+
return {
|
|
520
|
+
pass: true,
|
|
521
|
+
score: 0.5,
|
|
522
|
+
reason: 'Assertion passed',
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
return {
|
|
526
|
+
pass: false,
|
|
527
|
+
score: 0,
|
|
528
|
+
reason: 'Assertion failed',
|
|
529
|
+
};`,
|
|
530
|
+
};
|
|
531
|
+
const javascriptStringAssertionWithNumber = {
|
|
532
|
+
type: 'javascript',
|
|
533
|
+
value: 'output.length * 10',
|
|
534
|
+
};
|
|
535
|
+
const javascriptStringAssertionWithNumberAndThreshold = {
|
|
536
|
+
type: 'javascript',
|
|
537
|
+
value: 'output.length * 10',
|
|
538
|
+
threshold: 0.5,
|
|
539
|
+
};
|
|
540
|
+
const javascriptFunctionAssertion = {
|
|
541
|
+
type: 'javascript',
|
|
542
|
+
value: async (output) => ({
|
|
543
|
+
pass: true,
|
|
544
|
+
score: 0.5,
|
|
545
|
+
reason: 'Assertion passed',
|
|
546
|
+
assertion: null,
|
|
547
|
+
}),
|
|
548
|
+
};
|
|
549
|
+
const javascriptFunctionFailAssertion = {
|
|
550
|
+
type: 'javascript',
|
|
551
|
+
value: async (output) => ({
|
|
552
|
+
pass: false,
|
|
553
|
+
score: 0.5,
|
|
554
|
+
reason: 'Assertion failed',
|
|
555
|
+
assertion: null,
|
|
556
|
+
}),
|
|
557
|
+
};
|
|
558
|
+
it('should pass when the equality assertion passes', async () => {
|
|
559
|
+
const output = 'Expected output';
|
|
560
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
561
|
+
prompt: 'Some prompt',
|
|
562
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
563
|
+
assertion: equalityAssertion,
|
|
564
|
+
test: {},
|
|
565
|
+
providerResponse: { output },
|
|
566
|
+
});
|
|
567
|
+
expect(result).toMatchObject({
|
|
568
|
+
pass: true,
|
|
569
|
+
reason: 'Assertion passed',
|
|
570
|
+
});
|
|
571
|
+
});
|
|
572
|
+
it('should fail when the equality assertion fails', async () => {
|
|
573
|
+
const output = 'Different output';
|
|
574
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
575
|
+
prompt: 'Some prompt',
|
|
576
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
577
|
+
assertion: equalityAssertion,
|
|
578
|
+
test: {},
|
|
579
|
+
providerResponse: { output },
|
|
580
|
+
});
|
|
581
|
+
expect(result).toMatchObject({
|
|
582
|
+
pass: false,
|
|
583
|
+
reason: 'Expected output "Expected output" to equal "Different output"',
|
|
584
|
+
});
|
|
585
|
+
});
|
|
586
|
+
const notEqualsAssertion = {
|
|
587
|
+
type: 'not-equals',
|
|
588
|
+
value: 'Unexpected output',
|
|
589
|
+
};
|
|
590
|
+
it('should pass when the not-equals assertion passes', async () => {
|
|
591
|
+
const output = 'Expected output';
|
|
592
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
593
|
+
prompt: 'Some prompt',
|
|
594
|
+
assertion: notEqualsAssertion,
|
|
595
|
+
test: {},
|
|
596
|
+
providerResponse: { output },
|
|
597
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
598
|
+
});
|
|
599
|
+
expect(result).toMatchObject({
|
|
600
|
+
pass: true,
|
|
601
|
+
reason: 'Assertion passed',
|
|
602
|
+
});
|
|
603
|
+
});
|
|
604
|
+
it('should fail when the not-equals assertion fails', async () => {
|
|
605
|
+
const output = 'Unexpected output';
|
|
606
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
607
|
+
prompt: 'Some prompt',
|
|
608
|
+
assertion: notEqualsAssertion,
|
|
609
|
+
test: {},
|
|
610
|
+
providerResponse: { output },
|
|
611
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
612
|
+
});
|
|
613
|
+
expect(result).toMatchObject({
|
|
614
|
+
pass: false,
|
|
615
|
+
reason: 'Expected output "Unexpected output" to not equal "Unexpected output"',
|
|
616
|
+
});
|
|
617
|
+
});
|
|
618
|
+
it('should handle output as an object', async () => {
|
|
619
|
+
const output = { key: 'value' };
|
|
620
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
621
|
+
prompt: 'Some prompt',
|
|
622
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
623
|
+
assertion: equalityAssertion,
|
|
624
|
+
test: {},
|
|
625
|
+
providerResponse: { output },
|
|
626
|
+
});
|
|
627
|
+
expect(result).toMatchObject({
|
|
628
|
+
pass: false,
|
|
629
|
+
reason: 'Expected output "Expected output" to equal "{"key":"value"}"',
|
|
630
|
+
});
|
|
631
|
+
});
|
|
632
|
+
it('should pass when the equality assertion with object passes', async () => {
|
|
633
|
+
const output = { key: 'value' };
|
|
634
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
635
|
+
prompt: 'Some prompt',
|
|
636
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
637
|
+
assertion: equalityAssertionWithObject,
|
|
638
|
+
test: {},
|
|
639
|
+
providerResponse: { output },
|
|
640
|
+
});
|
|
641
|
+
expect(result).toMatchObject({
|
|
642
|
+
pass: true,
|
|
643
|
+
reason: 'Assertion passed',
|
|
644
|
+
});
|
|
645
|
+
});
|
|
646
|
+
it('should fail when the equality assertion with object fails', async () => {
|
|
647
|
+
const output = { key: 'not value' };
|
|
648
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
649
|
+
prompt: 'Some prompt',
|
|
650
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
651
|
+
assertion: equalityAssertionWithObject,
|
|
652
|
+
test: {},
|
|
653
|
+
providerResponse: { output },
|
|
654
|
+
});
|
|
655
|
+
expect(result).toMatchObject({
|
|
656
|
+
pass: false,
|
|
657
|
+
reason: 'Expected output "{"key":"value"}" to equal "{"key":"not value"}"',
|
|
658
|
+
});
|
|
659
|
+
});
|
|
660
|
+
it('should pass when the equality assertion with object passes with external json', async () => {
|
|
661
|
+
const assertion = {
|
|
662
|
+
type: 'equals',
|
|
663
|
+
value: 'file:///output.json',
|
|
664
|
+
};
|
|
665
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ key: 'value' }));
|
|
666
|
+
const output = '{"key": "value"}';
|
|
667
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
668
|
+
prompt: 'Some prompt',
|
|
669
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
670
|
+
assertion,
|
|
671
|
+
test: {},
|
|
672
|
+
providerResponse: { output },
|
|
673
|
+
});
|
|
674
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/output.json'), 'utf8');
|
|
675
|
+
expect(result).toMatchObject({
|
|
676
|
+
pass: true,
|
|
677
|
+
reason: 'Assertion passed',
|
|
678
|
+
});
|
|
679
|
+
});
|
|
680
|
+
it('should fail when the equality assertion with object fails with external object', async () => {
|
|
681
|
+
const assertion = {
|
|
682
|
+
type: 'equals',
|
|
683
|
+
value: 'file:///output.json',
|
|
684
|
+
};
|
|
685
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ key: 'value' }));
|
|
686
|
+
const output = '{"key": "not value"}';
|
|
687
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
688
|
+
prompt: 'Some prompt',
|
|
689
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
690
|
+
assertion,
|
|
691
|
+
test: {},
|
|
692
|
+
providerResponse: { output },
|
|
693
|
+
});
|
|
694
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/output.json'), 'utf8');
|
|
695
|
+
expect(result).toMatchObject({
|
|
696
|
+
pass: false,
|
|
697
|
+
reason: 'Expected output "{"key":"value"}" to equal "{"key": "not value"}"',
|
|
698
|
+
});
|
|
699
|
+
});
|
|
700
|
+
it('should pass when the is-json assertion passes', async () => {
|
|
701
|
+
const output = '{"key": "value"}';
|
|
702
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
703
|
+
prompt: 'Some prompt',
|
|
704
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
705
|
+
assertion: isJsonAssertion,
|
|
706
|
+
test: {},
|
|
707
|
+
providerResponse: { output },
|
|
708
|
+
});
|
|
709
|
+
expect(result).toMatchObject({
|
|
710
|
+
pass: true,
|
|
711
|
+
reason: 'Assertion passed',
|
|
712
|
+
});
|
|
713
|
+
});
|
|
714
|
+
it('should fail when the is-json assertion fails', async () => {
|
|
715
|
+
const output = 'Not valid JSON';
|
|
716
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
717
|
+
prompt: 'Some prompt',
|
|
718
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
719
|
+
assertion: isJsonAssertion,
|
|
720
|
+
test: {},
|
|
721
|
+
providerResponse: { output },
|
|
722
|
+
});
|
|
723
|
+
expect(result).toMatchObject({
|
|
724
|
+
pass: false,
|
|
725
|
+
reason: 'Expected output to be valid JSON',
|
|
726
|
+
});
|
|
727
|
+
});
|
|
728
|
+
it('should pass when the is-json assertion passes with schema', async () => {
|
|
729
|
+
const output = '{"latitude": 80.123, "longitude": -1}';
|
|
730
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
731
|
+
prompt: 'Some prompt',
|
|
732
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
733
|
+
assertion: isJsonAssertionWithSchema,
|
|
734
|
+
test: {},
|
|
735
|
+
providerResponse: { output },
|
|
736
|
+
});
|
|
737
|
+
expect(result).toMatchObject({
|
|
738
|
+
pass: true,
|
|
739
|
+
reason: 'Assertion passed',
|
|
740
|
+
});
|
|
741
|
+
});
|
|
742
|
+
it('should fail when the is-json assertion fails with schema', async () => {
|
|
743
|
+
const output = '{"latitude": "high", "longitude": [-1]}';
|
|
744
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
745
|
+
prompt: 'Some prompt',
|
|
746
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
747
|
+
assertion: isJsonAssertionWithSchema,
|
|
748
|
+
test: {},
|
|
749
|
+
providerResponse: { output },
|
|
750
|
+
});
|
|
751
|
+
expect(result).toMatchObject({
|
|
752
|
+
pass: false,
|
|
753
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/latitude must be number',
|
|
754
|
+
});
|
|
755
|
+
});
|
|
756
|
+
it('should pass when the is-json assertion passes with schema YAML string', async () => {
|
|
757
|
+
const output = '{"latitude": 80.123, "longitude": -1}';
|
|
758
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
759
|
+
prompt: 'Some prompt',
|
|
760
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
761
|
+
assertion: isJsonAssertionWithSchemaYamlString,
|
|
762
|
+
test: {},
|
|
763
|
+
providerResponse: { output },
|
|
764
|
+
});
|
|
765
|
+
expect(result).toMatchObject({
|
|
766
|
+
pass: true,
|
|
767
|
+
reason: 'Assertion passed',
|
|
768
|
+
});
|
|
769
|
+
});
|
|
770
|
+
it('should fail when the is-json assertion fails with schema YAML string', async () => {
|
|
771
|
+
const output = '{"latitude": "high", "longitude": [-1]}';
|
|
772
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
773
|
+
prompt: 'Some prompt',
|
|
774
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
775
|
+
assertion: isJsonAssertionWithSchemaYamlString,
|
|
776
|
+
test: {},
|
|
777
|
+
providerResponse: { output },
|
|
778
|
+
});
|
|
779
|
+
expect(result).toMatchObject({
|
|
780
|
+
pass: false,
|
|
781
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/latitude must be number',
|
|
782
|
+
});
|
|
783
|
+
});
|
|
784
|
+
it('should validate JSON with formats using ajv-formats', async () => {
|
|
785
|
+
const output = '{"date": "2021-08-29"}';
|
|
786
|
+
const schemaWithFormat = {
|
|
787
|
+
type: 'object',
|
|
788
|
+
properties: {
|
|
789
|
+
date: {
|
|
790
|
+
type: 'string',
|
|
791
|
+
format: 'date',
|
|
792
|
+
},
|
|
793
|
+
},
|
|
794
|
+
required: ['date'],
|
|
795
|
+
};
|
|
796
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
797
|
+
prompt: 'Some prompt',
|
|
798
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
799
|
+
assertion: { type: 'is-json', value: schemaWithFormat },
|
|
800
|
+
test: {},
|
|
801
|
+
providerResponse: { output },
|
|
802
|
+
});
|
|
803
|
+
expect(result).toMatchObject({
|
|
804
|
+
pass: true,
|
|
805
|
+
reason: 'Assertion passed',
|
|
806
|
+
});
|
|
807
|
+
});
|
|
808
|
+
it('should validate JSON with formats using ajv-formats - failure', async () => {
|
|
809
|
+
const output = '{"date": "not a date"}';
|
|
810
|
+
const schemaWithFormat = {
|
|
811
|
+
type: 'object',
|
|
812
|
+
properties: {
|
|
813
|
+
date: {
|
|
814
|
+
type: 'string',
|
|
815
|
+
format: 'date',
|
|
816
|
+
},
|
|
817
|
+
},
|
|
818
|
+
required: ['date'],
|
|
819
|
+
};
|
|
820
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
821
|
+
prompt: 'Some prompt',
|
|
822
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
823
|
+
assertion: { type: 'is-json', value: schemaWithFormat },
|
|
824
|
+
test: {},
|
|
825
|
+
providerResponse: { output },
|
|
826
|
+
});
|
|
827
|
+
expect(result).toMatchObject({
|
|
828
|
+
pass: false,
|
|
829
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/date must match format "date"',
|
|
830
|
+
});
|
|
831
|
+
});
|
|
832
|
+
it('should pass when the is-json assertion passes with external schema', async () => {
|
|
833
|
+
const assertion = {
|
|
834
|
+
type: 'is-json',
|
|
835
|
+
value: 'file:///schema.json',
|
|
836
|
+
};
|
|
837
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({
|
|
838
|
+
required: ['latitude', 'longitude'],
|
|
839
|
+
type: 'object',
|
|
840
|
+
properties: {
|
|
841
|
+
latitude: {
|
|
842
|
+
type: 'number',
|
|
843
|
+
minimum: -90,
|
|
844
|
+
maximum: 90,
|
|
845
|
+
},
|
|
846
|
+
longitude: {
|
|
847
|
+
type: 'number',
|
|
848
|
+
minimum: -180,
|
|
849
|
+
maximum: 180,
|
|
850
|
+
},
|
|
851
|
+
},
|
|
852
|
+
}));
|
|
853
|
+
const output = '{"latitude": 80.123, "longitude": -1}';
|
|
854
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
855
|
+
prompt: 'Some prompt',
|
|
856
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
857
|
+
assertion,
|
|
858
|
+
test: {},
|
|
859
|
+
providerResponse: { output },
|
|
860
|
+
});
|
|
861
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/schema.json'), 'utf8');
|
|
862
|
+
expect(result).toMatchObject({
|
|
863
|
+
pass: true,
|
|
864
|
+
reason: 'Assertion passed',
|
|
865
|
+
});
|
|
866
|
+
});
|
|
867
|
+
it('should fail when the is-json assertion fails with external schema', async () => {
|
|
868
|
+
const assertion = {
|
|
869
|
+
type: 'is-json',
|
|
870
|
+
value: 'file:///schema.json',
|
|
871
|
+
};
|
|
872
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({
|
|
873
|
+
required: ['latitude', 'longitude'],
|
|
874
|
+
type: 'object',
|
|
875
|
+
properties: {
|
|
876
|
+
latitude: {
|
|
877
|
+
type: 'number',
|
|
878
|
+
minimum: -90,
|
|
879
|
+
maximum: 90,
|
|
880
|
+
},
|
|
881
|
+
longitude: {
|
|
882
|
+
type: 'number',
|
|
883
|
+
minimum: -180,
|
|
884
|
+
maximum: 180,
|
|
885
|
+
},
|
|
886
|
+
},
|
|
887
|
+
}));
|
|
888
|
+
const output = '{"latitude": "high", "longitude": [-1]}';
|
|
889
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
890
|
+
prompt: 'Some prompt',
|
|
891
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
892
|
+
assertion,
|
|
893
|
+
test: {},
|
|
894
|
+
providerResponse: { output },
|
|
895
|
+
});
|
|
896
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/schema.json'), 'utf8');
|
|
897
|
+
expect(result).toMatchObject({
|
|
898
|
+
pass: false,
|
|
899
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/latitude must be number',
|
|
900
|
+
});
|
|
901
|
+
});
|
|
902
|
+
it('should pass when the is-sql assertion passes', async () => {
|
|
903
|
+
const output = 'SELECT id, name FROM users';
|
|
904
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
905
|
+
prompt: 'Some prompt',
|
|
906
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
907
|
+
assertion: isSqlAssertion,
|
|
908
|
+
test: {},
|
|
909
|
+
providerResponse: { output },
|
|
910
|
+
});
|
|
911
|
+
expect(result).toMatchObject({
|
|
912
|
+
pass: true,
|
|
913
|
+
reason: 'Assertion passed',
|
|
914
|
+
});
|
|
915
|
+
});
|
|
916
|
+
it('should fail when the is-sql assertion fails', async () => {
|
|
917
|
+
const output = 'SELECT * FROM orders ORDERY BY order_date';
|
|
918
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
919
|
+
prompt: 'Some prompt',
|
|
920
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
921
|
+
assertion: isSqlAssertion,
|
|
922
|
+
test: {},
|
|
923
|
+
providerResponse: { output },
|
|
924
|
+
});
|
|
925
|
+
expect(result).toMatchObject({
|
|
926
|
+
pass: false,
|
|
927
|
+
reason: 'SQL statement does not conform to the provided MySQL database syntax.',
|
|
928
|
+
});
|
|
929
|
+
});
|
|
930
|
+
it('should pass when the not-is-sql assertion passes', async () => {
|
|
931
|
+
const output = 'SELECT * FROM orders ORDERY BY order_date';
|
|
932
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
933
|
+
prompt: 'Some prompt',
|
|
934
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
935
|
+
assertion: notIsSqlAssertion,
|
|
936
|
+
test: {},
|
|
937
|
+
providerResponse: { output },
|
|
938
|
+
});
|
|
939
|
+
expect(result).toMatchObject({
|
|
940
|
+
pass: true,
|
|
941
|
+
reason: 'Assertion passed',
|
|
942
|
+
});
|
|
943
|
+
});
|
|
944
|
+
it('should fail when the not-is-sql assertion fails', async () => {
|
|
945
|
+
const output = 'SELECT id, name FROM users';
|
|
946
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
947
|
+
prompt: 'Some prompt',
|
|
948
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
949
|
+
assertion: notIsSqlAssertion,
|
|
950
|
+
test: {},
|
|
951
|
+
providerResponse: { output },
|
|
952
|
+
});
|
|
953
|
+
expect(result).toMatchObject({
|
|
954
|
+
pass: false,
|
|
955
|
+
reason: 'The output SQL statement is valid',
|
|
956
|
+
});
|
|
957
|
+
});
|
|
958
|
+
it('should pass when the is-sql assertion passes given MySQL Database syntax', async () => {
|
|
959
|
+
const output = 'SELECT id, name FROM users';
|
|
960
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
961
|
+
prompt: 'Some prompt',
|
|
962
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
963
|
+
assertion: isSqlAssertionWithDatabase,
|
|
964
|
+
test: {},
|
|
965
|
+
providerResponse: { output },
|
|
966
|
+
});
|
|
967
|
+
expect(result).toMatchObject({
|
|
968
|
+
pass: true,
|
|
969
|
+
reason: 'Assertion passed',
|
|
970
|
+
});
|
|
971
|
+
});
|
|
972
|
+
it('should fail when the is-sql assertion fails given MySQL Database syntax', async () => {
|
|
973
|
+
const output = `SELECT first_name, last_name FROM employees WHERE first_name ILIKE 'john%'`;
|
|
974
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
975
|
+
prompt: 'Some prompt',
|
|
976
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
977
|
+
assertion: isSqlAssertionWithDatabase,
|
|
978
|
+
test: {},
|
|
979
|
+
providerResponse: { output },
|
|
980
|
+
});
|
|
981
|
+
expect(result).toMatchObject({
|
|
982
|
+
pass: false,
|
|
983
|
+
reason: 'SQL statement does not conform to the provided MySQL database syntax.',
|
|
984
|
+
});
|
|
985
|
+
});
|
|
986
|
+
it('should pass when the is-sql assertion passes given MySQL Database syntax and allowedTables', async () => {
|
|
987
|
+
const output = 'SELECT * FROM departments WHERE department_id = 1';
|
|
988
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
989
|
+
prompt: 'Some prompt',
|
|
990
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
991
|
+
assertion: isSqlAssertionWithDatabaseAndWhiteTableList,
|
|
992
|
+
test: {},
|
|
993
|
+
providerResponse: { output },
|
|
994
|
+
});
|
|
995
|
+
expect(result).toMatchObject({
|
|
996
|
+
pass: true,
|
|
997
|
+
reason: 'Assertion passed',
|
|
998
|
+
});
|
|
999
|
+
});
|
|
1000
|
+
it('should fail when the is-sql assertion fails given MySQL Database syntax and allowedTables', async () => {
|
|
1001
|
+
const output = 'UPDATE employees SET department_id = 2 WHERE employee_id = 1';
|
|
1002
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1003
|
+
prompt: 'Some prompt',
|
|
1004
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1005
|
+
assertion: isSqlAssertionWithDatabaseAndWhiteTableList,
|
|
1006
|
+
test: {},
|
|
1007
|
+
providerResponse: { output },
|
|
1008
|
+
});
|
|
1009
|
+
expect(result).toMatchObject({
|
|
1010
|
+
pass: false,
|
|
1011
|
+
reason: `SQL validation failed: authority = 'update::null::employees' is required in table whiteList to execute SQL = 'UPDATE employees SET department_id = 2 WHERE employee_id = 1'.`,
|
|
1012
|
+
});
|
|
1013
|
+
});
|
|
1014
|
+
it('should pass when the is-sql assertion passes given MySQL Database syntax and allowedColumns', async () => {
|
|
1015
|
+
const output = 'SELECT name FROM t';
|
|
1016
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1017
|
+
prompt: 'Some prompt',
|
|
1018
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1019
|
+
assertion: isSqlAssertionWithDatabaseAndWhiteColumnList,
|
|
1020
|
+
test: {},
|
|
1021
|
+
providerResponse: { output },
|
|
1022
|
+
});
|
|
1023
|
+
expect(result).toMatchObject({
|
|
1024
|
+
pass: true,
|
|
1025
|
+
reason: 'Assertion passed',
|
|
1026
|
+
});
|
|
1027
|
+
});
|
|
1028
|
+
it('should fail when the is-sql assertion fails given MySQL Database syntax and allowedColumns', async () => {
|
|
1029
|
+
const output = 'SELECT age FROM a WHERE id = 1';
|
|
1030
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1031
|
+
prompt: 'Some prompt',
|
|
1032
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1033
|
+
assertion: isSqlAssertionWithDatabaseAndWhiteColumnList,
|
|
1034
|
+
test: {},
|
|
1035
|
+
providerResponse: { output },
|
|
1036
|
+
});
|
|
1037
|
+
expect(result).toMatchObject({
|
|
1038
|
+
pass: false,
|
|
1039
|
+
reason: `SQL validation failed: authority = 'select::null::age' is required in column whiteList to execute SQL = 'SELECT age FROM a WHERE id = 1'.`,
|
|
1040
|
+
});
|
|
1041
|
+
});
|
|
1042
|
+
it('should pass when the is-sql assertion passes given MySQL Database syntax, allowedTables, and allowedColumns', async () => {
|
|
1043
|
+
const output = 'SELECT name FROM departments';
|
|
1044
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1045
|
+
prompt: 'Some prompt',
|
|
1046
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1047
|
+
assertion: isSqlAssertionWithDatabaseAndBothList,
|
|
1048
|
+
test: {},
|
|
1049
|
+
providerResponse: { output },
|
|
1050
|
+
});
|
|
1051
|
+
expect(result).toMatchObject({
|
|
1052
|
+
pass: true,
|
|
1053
|
+
reason: 'Assertion passed',
|
|
1054
|
+
});
|
|
1055
|
+
});
|
|
1056
|
+
it('should fail when the is-sql assertion fails given MySQL Database syntax, allowedTables, and allowedColumns', async () => {
|
|
1057
|
+
const output = `INSERT INTO departments (name) VALUES ('HR')`;
|
|
1058
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1059
|
+
prompt: 'Some prompt',
|
|
1060
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1061
|
+
assertion: isSqlAssertionWithDatabaseAndBothList,
|
|
1062
|
+
test: {},
|
|
1063
|
+
providerResponse: { output },
|
|
1064
|
+
});
|
|
1065
|
+
expect(result).toMatchObject({
|
|
1066
|
+
pass: false,
|
|
1067
|
+
reason: `SQL validation failed: authority = 'insert::departments::name' is required in column whiteList to execute SQL = 'INSERT INTO departments (name) VALUES ('HR')'.`,
|
|
1068
|
+
});
|
|
1069
|
+
});
|
|
1070
|
+
it('should fail when the is-sql assertion fails due to missing table authority for MySQL Database syntax', async () => {
|
|
1071
|
+
const output = 'UPDATE a SET id = 1';
|
|
1072
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1073
|
+
prompt: 'Some prompt',
|
|
1074
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1075
|
+
assertion: isSqlAssertionWithDatabaseAndBothList,
|
|
1076
|
+
test: {},
|
|
1077
|
+
providerResponse: { output },
|
|
1078
|
+
});
|
|
1079
|
+
expect(result).toMatchObject({
|
|
1080
|
+
pass: false,
|
|
1081
|
+
reason: `SQL validation failed: authority = 'update::null::a' is required in table whiteList to execute SQL = 'UPDATE a SET id = 1'.`,
|
|
1082
|
+
});
|
|
1083
|
+
});
|
|
1084
|
+
it('should fail when the is-sql assertion fails due to missing authorities for DELETE statement in MySQL Database syntax', async () => {
|
|
1085
|
+
const output = `DELETE FROM employees;`;
|
|
1086
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1087
|
+
prompt: 'Some prompt',
|
|
1088
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1089
|
+
assertion: isSqlAssertionWithDatabaseAndBothList,
|
|
1090
|
+
test: {},
|
|
1091
|
+
providerResponse: { output },
|
|
1092
|
+
});
|
|
1093
|
+
expect(result).toMatchObject({
|
|
1094
|
+
pass: false,
|
|
1095
|
+
reason: `SQL validation failed: authority = 'delete::null::employees' is required in table whiteList to execute SQL = 'DELETE FROM employees;'. SQL validation failed: authority = 'delete::employees::(.*)' is required in column whiteList to execute SQL = 'DELETE FROM employees;'.`,
|
|
1096
|
+
});
|
|
1097
|
+
});
|
|
1098
|
+
it('should pass when the contains-sql assertion passes', async () => {
|
|
1099
|
+
const output = 'wassup\n```\nSELECT id, name FROM users\n```\nyolo';
|
|
1100
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1101
|
+
prompt: 'Some prompt',
|
|
1102
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1103
|
+
assertion: {
|
|
1104
|
+
type: 'contains-sql',
|
|
1105
|
+
},
|
|
1106
|
+
test: {},
|
|
1107
|
+
providerResponse: { output },
|
|
1108
|
+
});
|
|
1109
|
+
expect(result).toMatchObject({
|
|
1110
|
+
pass: true,
|
|
1111
|
+
reason: 'Assertion passed',
|
|
1112
|
+
});
|
|
1113
|
+
});
|
|
1114
|
+
it('should pass when the contains-sql assertion sees `sql` in code block', async () => {
|
|
1115
|
+
const output = 'wassup\n```sql\nSELECT id, name FROM users\n```\nyolo';
|
|
1116
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1117
|
+
prompt: 'Some prompt',
|
|
1118
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1119
|
+
assertion: {
|
|
1120
|
+
type: 'contains-sql',
|
|
1121
|
+
},
|
|
1122
|
+
test: {},
|
|
1123
|
+
providerResponse: { output },
|
|
1124
|
+
});
|
|
1125
|
+
expect(result).toMatchObject({
|
|
1126
|
+
pass: true,
|
|
1127
|
+
reason: 'Assertion passed',
|
|
1128
|
+
});
|
|
1129
|
+
});
|
|
1130
|
+
it('should pass when the contains-sql assertion sees sql without code block', async () => {
|
|
1131
|
+
const output = 'SELECT id, name FROM users';
|
|
1132
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1133
|
+
prompt: 'Some prompt',
|
|
1134
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1135
|
+
assertion: {
|
|
1136
|
+
type: 'contains-sql',
|
|
1137
|
+
},
|
|
1138
|
+
test: {},
|
|
1139
|
+
providerResponse: { output },
|
|
1140
|
+
});
|
|
1141
|
+
expect(result).toMatchObject({
|
|
1142
|
+
pass: true,
|
|
1143
|
+
reason: 'Assertion passed',
|
|
1144
|
+
});
|
|
1145
|
+
});
|
|
1146
|
+
it('should fail when the contains-sql does not contain code block', async () => {
|
|
1147
|
+
const output = 'nothin';
|
|
1148
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1149
|
+
prompt: 'Some prompt',
|
|
1150
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1151
|
+
assertion: {
|
|
1152
|
+
type: 'contains-sql',
|
|
1153
|
+
},
|
|
1154
|
+
test: {},
|
|
1155
|
+
providerResponse: { output },
|
|
1156
|
+
});
|
|
1157
|
+
expect(result).toMatchObject({
|
|
1158
|
+
pass: false,
|
|
1159
|
+
});
|
|
1160
|
+
});
|
|
1161
|
+
it('should fail when the contains-sql does not contain sql in code block', async () => {
|
|
1162
|
+
const output = '```python\nprint("Hello, World!")\n```';
|
|
1163
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1164
|
+
prompt: 'Some prompt',
|
|
1165
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1166
|
+
assertion: {
|
|
1167
|
+
type: 'contains-sql',
|
|
1168
|
+
},
|
|
1169
|
+
test: {},
|
|
1170
|
+
providerResponse: { output },
|
|
1171
|
+
});
|
|
1172
|
+
expect(result).toMatchObject({
|
|
1173
|
+
pass: false,
|
|
1174
|
+
});
|
|
1175
|
+
});
|
|
1176
|
+
it('should pass when the contains-json assertion passes', async () => {
|
|
1177
|
+
const output = 'this is some other stuff \n\n {"key": "value", "key2": {"key3": "value2", "key4": ["value3", "value4"]}} \n\n blah blah';
|
|
1178
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1179
|
+
prompt: 'Some prompt',
|
|
1180
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1181
|
+
assertion: containsJsonAssertion,
|
|
1182
|
+
test: {},
|
|
1183
|
+
providerResponse: { output },
|
|
1184
|
+
});
|
|
1185
|
+
expect(result).toMatchObject({
|
|
1186
|
+
pass: true,
|
|
1187
|
+
reason: 'Assertion passed',
|
|
1188
|
+
});
|
|
1189
|
+
});
|
|
1190
|
+
it('should pass when the contains-json assertion passes with multiple json values', async () => {
|
|
1191
|
+
const output = 'this is some other stuff \n\n {"key": "value", "key2": {"key3": "value2", "key4": ["value3", "value4"]}} another {"key": "value", "key2": {"key3": "value2", "key4": ["value3", "value4"]}}\n\n blah blah';
|
|
1192
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1193
|
+
prompt: 'Some prompt',
|
|
1194
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1195
|
+
assertion: containsJsonAssertion,
|
|
1196
|
+
test: {},
|
|
1197
|
+
providerResponse: { output },
|
|
1198
|
+
});
|
|
1199
|
+
expect(result).toMatchObject({
|
|
1200
|
+
pass: true,
|
|
1201
|
+
reason: 'Assertion passed',
|
|
1202
|
+
});
|
|
1203
|
+
});
|
|
1204
|
+
it('should pass when the contains-json assertion passes with valid and invalid json', async () => {
|
|
1205
|
+
const output = 'There is an extra opening bracket \n\n { {"key": "value"} \n\n blah blah';
|
|
1206
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1207
|
+
prompt: 'Some prompt',
|
|
1208
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1209
|
+
assertion: containsJsonAssertion,
|
|
1210
|
+
test: {},
|
|
1211
|
+
providerResponse: { output },
|
|
1212
|
+
});
|
|
1213
|
+
expect(result).toMatchObject({
|
|
1214
|
+
pass: true,
|
|
1215
|
+
reason: 'Assertion passed',
|
|
1216
|
+
});
|
|
1217
|
+
});
|
|
1218
|
+
it('should fail when the contains-json assertion fails', async () => {
|
|
1219
|
+
const output = 'Not valid JSON';
|
|
1220
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1221
|
+
prompt: 'Some prompt',
|
|
1222
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1223
|
+
assertion: containsJsonAssertion,
|
|
1224
|
+
test: {},
|
|
1225
|
+
providerResponse: { output },
|
|
1226
|
+
});
|
|
1227
|
+
expect(result).toMatchObject({
|
|
1228
|
+
pass: false,
|
|
1229
|
+
reason: 'Expected output to contain valid JSON',
|
|
1230
|
+
});
|
|
1231
|
+
});
|
|
1232
|
+
it('should pass when the contains-json assertion passes with schema', async () => {
|
|
1233
|
+
const output = 'here is the answer\n\n```{"latitude": 80.123, "longitude": -1}```';
|
|
1234
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1235
|
+
prompt: 'Some prompt',
|
|
1236
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1237
|
+
assertion: containsJsonAssertionWithSchema,
|
|
1238
|
+
test: {},
|
|
1239
|
+
providerResponse: { output },
|
|
1240
|
+
});
|
|
1241
|
+
expect(result).toMatchObject({
|
|
1242
|
+
pass: true,
|
|
1243
|
+
reason: 'Assertion passed',
|
|
1244
|
+
});
|
|
1245
|
+
});
|
|
1246
|
+
it('should pass when the contains-json assertion passes with schema with YAML string', async () => {
|
|
1247
|
+
const output = 'here is the answer\n\n```{"latitude": 80.123, "longitude": -1}```';
|
|
1248
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1249
|
+
prompt: 'Some prompt',
|
|
1250
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1251
|
+
assertion: containsJsonAssertionWithSchema,
|
|
1252
|
+
test: {},
|
|
1253
|
+
providerResponse: { output },
|
|
1254
|
+
});
|
|
1255
|
+
expect(result).toMatchObject({
|
|
1256
|
+
pass: true,
|
|
1257
|
+
reason: 'Assertion passed',
|
|
1258
|
+
});
|
|
1259
|
+
});
|
|
1260
|
+
it('should pass when the contains-json assertion passes with external schema', async () => {
|
|
1261
|
+
const assertion = {
|
|
1262
|
+
type: 'contains-json',
|
|
1263
|
+
value: 'file:///schema.json',
|
|
1264
|
+
};
|
|
1265
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({
|
|
1266
|
+
required: ['latitude', 'longitude'],
|
|
1267
|
+
type: 'object',
|
|
1268
|
+
properties: {
|
|
1269
|
+
latitude: {
|
|
1270
|
+
type: 'number',
|
|
1271
|
+
minimum: -90,
|
|
1272
|
+
maximum: 90,
|
|
1273
|
+
},
|
|
1274
|
+
longitude: {
|
|
1275
|
+
type: 'number',
|
|
1276
|
+
minimum: -180,
|
|
1277
|
+
maximum: 180,
|
|
1278
|
+
},
|
|
1279
|
+
},
|
|
1280
|
+
}));
|
|
1281
|
+
const output = 'here is the answer\n\n```{"latitude": 80.123, "longitude": -1}```';
|
|
1282
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1283
|
+
prompt: 'Some prompt',
|
|
1284
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1285
|
+
assertion,
|
|
1286
|
+
test: {},
|
|
1287
|
+
providerResponse: { output },
|
|
1288
|
+
});
|
|
1289
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/schema.json'), 'utf8');
|
|
1290
|
+
expect(result).toMatchObject({
|
|
1291
|
+
pass: true,
|
|
1292
|
+
reason: 'Assertion passed',
|
|
1293
|
+
});
|
|
1294
|
+
});
|
|
1295
|
+
it('should fail contains-json assertion with invalid data against external schema', async () => {
|
|
1296
|
+
const assertion = {
|
|
1297
|
+
type: 'contains-json',
|
|
1298
|
+
value: 'file:///schema.json',
|
|
1299
|
+
};
|
|
1300
|
+
jest.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({
|
|
1301
|
+
required: ['latitude', 'longitude'],
|
|
1302
|
+
type: 'object',
|
|
1303
|
+
properties: {
|
|
1304
|
+
latitude: {
|
|
1305
|
+
type: 'number',
|
|
1306
|
+
minimum: -90,
|
|
1307
|
+
maximum: 90,
|
|
1308
|
+
},
|
|
1309
|
+
longitude: {
|
|
1310
|
+
type: 'number',
|
|
1311
|
+
minimum: -180,
|
|
1312
|
+
maximum: 180,
|
|
1313
|
+
},
|
|
1314
|
+
},
|
|
1315
|
+
}));
|
|
1316
|
+
const output = 'here is the answer\n\n```{"latitude": "medium", "longitude": -1}```';
|
|
1317
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1318
|
+
prompt: 'Some prompt',
|
|
1319
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1320
|
+
assertion,
|
|
1321
|
+
test: {},
|
|
1322
|
+
providerResponse: { output },
|
|
1323
|
+
});
|
|
1324
|
+
expect(fs.readFileSync).toHaveBeenCalledWith(path.resolve('/schema.json'), 'utf8');
|
|
1325
|
+
expect(result).toMatchObject({
|
|
1326
|
+
pass: false,
|
|
1327
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/latitude must be number',
|
|
1328
|
+
});
|
|
1329
|
+
});
|
|
1330
|
+
it('should fail contains-json assertion with predefined schema and invalid data', async () => {
|
|
1331
|
+
const output = 'here is the answer\n\n```{"latitude": "medium", "longitude": -1}```';
|
|
1332
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1333
|
+
prompt: 'Some prompt',
|
|
1334
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1335
|
+
assertion: containsJsonAssertionWithSchema,
|
|
1336
|
+
test: {},
|
|
1337
|
+
providerResponse: { output },
|
|
1338
|
+
});
|
|
1339
|
+
expect(result).toEqual(expect.objectContaining({
|
|
1340
|
+
pass: false,
|
|
1341
|
+
reason: 'JSON does not conform to the provided schema. Errors: data/latitude must be number',
|
|
1342
|
+
}));
|
|
1343
|
+
});
|
|
1344
|
+
it('should pass when the javascript assertion passes', async () => {
|
|
1345
|
+
const output = 'Expected output';
|
|
1346
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1347
|
+
prompt: 'Some prompt',
|
|
1348
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1349
|
+
assertion: javascriptStringAssertion,
|
|
1350
|
+
test: {},
|
|
1351
|
+
providerResponse: { output },
|
|
1352
|
+
});
|
|
1353
|
+
expect(result).toMatchObject({
|
|
1354
|
+
pass: true,
|
|
1355
|
+
reason: 'Assertion passed',
|
|
1356
|
+
});
|
|
1357
|
+
});
|
|
1358
|
+
it('should pass a score through when the javascript returns a number', async () => {
|
|
1359
|
+
const output = 'Expected output';
|
|
1360
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1361
|
+
prompt: 'Some prompt',
|
|
1362
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1363
|
+
assertion: javascriptStringAssertionWithNumber,
|
|
1364
|
+
test: {},
|
|
1365
|
+
providerResponse: { output },
|
|
1366
|
+
});
|
|
1367
|
+
expect(result).toMatchObject({
|
|
1368
|
+
pass: true,
|
|
1369
|
+
score: output.length * 10,
|
|
1370
|
+
reason: 'Assertion passed',
|
|
1371
|
+
});
|
|
1372
|
+
});
|
|
1373
|
+
it('should pass when javascript returns a number above threshold', async () => {
|
|
1374
|
+
const output = 'Expected output';
|
|
1375
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1376
|
+
prompt: 'Some prompt',
|
|
1377
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1378
|
+
assertion: javascriptStringAssertionWithNumberAndThreshold,
|
|
1379
|
+
test: {},
|
|
1380
|
+
providerResponse: { output },
|
|
1381
|
+
});
|
|
1382
|
+
expect(result).toMatchObject({
|
|
1383
|
+
pass: true,
|
|
1384
|
+
score: output.length * 10,
|
|
1385
|
+
reason: 'Assertion passed',
|
|
1386
|
+
});
|
|
1387
|
+
});
|
|
1388
|
+
it('should fail when javascript returns a number below threshold', async () => {
|
|
1389
|
+
const output = '';
|
|
1390
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1391
|
+
prompt: 'Some prompt',
|
|
1392
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1393
|
+
assertion: javascriptStringAssertionWithNumberAndThreshold,
|
|
1394
|
+
test: {},
|
|
1395
|
+
providerResponse: { output },
|
|
1396
|
+
});
|
|
1397
|
+
expect(result).toMatchObject({
|
|
1398
|
+
pass: false,
|
|
1399
|
+
score: output.length * 10,
|
|
1400
|
+
reason: expect.stringContaining('Custom function returned false'),
|
|
1401
|
+
});
|
|
1402
|
+
});
|
|
1403
|
+
it('should set score when javascript returns false', async () => {
|
|
1404
|
+
const output = 'Test output';
|
|
1405
|
+
const assertion = {
|
|
1406
|
+
type: 'javascript',
|
|
1407
|
+
value: 'output.length < 1',
|
|
1408
|
+
};
|
|
1409
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1410
|
+
prompt: 'Some prompt',
|
|
1411
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1412
|
+
assertion,
|
|
1413
|
+
test: {},
|
|
1414
|
+
providerResponse: { output },
|
|
1415
|
+
});
|
|
1416
|
+
expect(result).toMatchObject({
|
|
1417
|
+
pass: false,
|
|
1418
|
+
score: 0,
|
|
1419
|
+
reason: expect.stringContaining('Custom function returned false'),
|
|
1420
|
+
});
|
|
1421
|
+
});
|
|
1422
|
+
it('should fail when the javascript assertion fails', async () => {
|
|
1423
|
+
const output = 'Different output';
|
|
1424
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1425
|
+
prompt: 'Some prompt',
|
|
1426
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1427
|
+
assertion: javascriptStringAssertion,
|
|
1428
|
+
test: {},
|
|
1429
|
+
providerResponse: { output },
|
|
1430
|
+
});
|
|
1431
|
+
expect(result).toMatchObject({
|
|
1432
|
+
pass: false,
|
|
1433
|
+
reason: 'Custom function returned false\noutput === "Expected output"',
|
|
1434
|
+
});
|
|
1435
|
+
});
|
|
1436
|
+
it('should pass when assertion passes - with vars', async () => {
|
|
1437
|
+
const output = 'Expected output';
|
|
1438
|
+
const assertion = {
|
|
1439
|
+
type: 'equals',
|
|
1440
|
+
value: '{{ foo }}',
|
|
1441
|
+
};
|
|
1442
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1443
|
+
prompt: 'variable value',
|
|
1444
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1445
|
+
assertion,
|
|
1446
|
+
test: { vars: { foo: 'Expected output' } },
|
|
1447
|
+
providerResponse: { output },
|
|
1448
|
+
});
|
|
1449
|
+
expect(result).toMatchObject({
|
|
1450
|
+
pass: true,
|
|
1451
|
+
reason: 'Assertion passed',
|
|
1452
|
+
});
|
|
1453
|
+
});
|
|
1454
|
+
it('should pass when javascript function assertion passes - with vars', async () => {
|
|
1455
|
+
const output = 'Expected output';
|
|
1456
|
+
const javascriptStringAssertionWithVars = {
|
|
1457
|
+
type: 'javascript',
|
|
1458
|
+
value: 'output === "Expected output" && context.vars.foo === "bar"',
|
|
1459
|
+
};
|
|
1460
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1461
|
+
prompt: 'Some prompt',
|
|
1462
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1463
|
+
assertion: javascriptStringAssertionWithVars,
|
|
1464
|
+
test: { vars: { foo: 'bar' } },
|
|
1465
|
+
providerResponse: { output },
|
|
1466
|
+
});
|
|
1467
|
+
expect(result).toMatchObject({
|
|
1468
|
+
pass: true,
|
|
1469
|
+
reason: 'Assertion passed',
|
|
1470
|
+
});
|
|
1471
|
+
});
|
|
1472
|
+
it('should fail when the javascript does not match vars', async () => {
|
|
1473
|
+
const output = 'Expected output';
|
|
1474
|
+
const javascriptStringAssertionWithVars = {
|
|
1475
|
+
type: 'javascript',
|
|
1476
|
+
value: 'output === "Expected output" && context.vars.foo === "something else"',
|
|
1477
|
+
};
|
|
1478
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1479
|
+
prompt: 'Some prompt',
|
|
1480
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1481
|
+
assertion: javascriptStringAssertionWithVars,
|
|
1482
|
+
test: { vars: { foo: 'bar' } },
|
|
1483
|
+
providerResponse: { output },
|
|
1484
|
+
});
|
|
1485
|
+
expect(result).toMatchObject({
|
|
1486
|
+
pass: false,
|
|
1487
|
+
reason: 'Custom function returned false\noutput === "Expected output" && context.vars.foo === "something else"',
|
|
1488
|
+
});
|
|
1489
|
+
});
|
|
1490
|
+
it('should pass when the function returns pass', async () => {
|
|
1491
|
+
const output = 'Expected output';
|
|
1492
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1493
|
+
prompt: 'Some prompt',
|
|
1494
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1495
|
+
assertion: javascriptFunctionAssertion,
|
|
1496
|
+
test: {},
|
|
1497
|
+
providerResponse: { output },
|
|
1498
|
+
});
|
|
1499
|
+
expect(result).toMatchObject({
|
|
1500
|
+
pass: true,
|
|
1501
|
+
score: 0.5,
|
|
1502
|
+
reason: 'Assertion passed',
|
|
1503
|
+
});
|
|
1504
|
+
});
|
|
1505
|
+
it('should fail when the function returns fail', async () => {
|
|
1506
|
+
const output = 'Expected output';
|
|
1507
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1508
|
+
prompt: 'Some prompt',
|
|
1509
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1510
|
+
assertion: javascriptFunctionFailAssertion,
|
|
1511
|
+
test: {},
|
|
1512
|
+
providerResponse: { output },
|
|
1513
|
+
});
|
|
1514
|
+
expect(result).toMatchObject({
|
|
1515
|
+
pass: false,
|
|
1516
|
+
score: 0.5,
|
|
1517
|
+
reason: 'Assertion failed',
|
|
1518
|
+
});
|
|
1519
|
+
});
|
|
1520
|
+
it('should pass when the multiline javascript assertion passes', async () => {
|
|
1521
|
+
const output = 'Expected output';
|
|
1522
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1523
|
+
prompt: 'Some prompt',
|
|
1524
|
+
assertion: javascriptMultilineStringAssertion,
|
|
1525
|
+
test: {},
|
|
1526
|
+
providerResponse: { output },
|
|
1527
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1528
|
+
});
|
|
1529
|
+
expect(result).toMatchObject({
|
|
1530
|
+
pass: true,
|
|
1531
|
+
reason: 'Assertion passed',
|
|
1532
|
+
});
|
|
1533
|
+
});
|
|
1534
|
+
it('should pass when the multiline javascript assertion fails', async () => {
|
|
1535
|
+
const output = 'Not the expected output';
|
|
1536
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1537
|
+
prompt: 'Some prompt',
|
|
1538
|
+
assertion: javascriptMultilineStringAssertion,
|
|
1539
|
+
test: {},
|
|
1540
|
+
providerResponse: { output },
|
|
1541
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1542
|
+
});
|
|
1543
|
+
expect(result).toMatchObject({
|
|
1544
|
+
pass: false,
|
|
1545
|
+
reason: 'Assertion failed',
|
|
1546
|
+
});
|
|
1547
|
+
});
|
|
1548
|
+
const notContainsAssertion = {
|
|
1549
|
+
type: 'not-contains',
|
|
1550
|
+
value: 'Unexpected output',
|
|
1551
|
+
};
|
|
1552
|
+
it('should pass when the not-contains assertion passes', async () => {
|
|
1553
|
+
const output = 'Expected output';
|
|
1554
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1555
|
+
prompt: 'Some prompt',
|
|
1556
|
+
assertion: notContainsAssertion,
|
|
1557
|
+
test: {},
|
|
1558
|
+
providerResponse: { output },
|
|
1559
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1560
|
+
});
|
|
1561
|
+
expect(result).toMatchObject({
|
|
1562
|
+
pass: true,
|
|
1563
|
+
reason: 'Assertion passed',
|
|
1564
|
+
});
|
|
1565
|
+
});
|
|
1566
|
+
it('should fail when the not-contains assertion fails', async () => {
|
|
1567
|
+
const output = 'Unexpected output';
|
|
1568
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1569
|
+
prompt: 'Some prompt',
|
|
1570
|
+
assertion: notContainsAssertion,
|
|
1571
|
+
test: {},
|
|
1572
|
+
providerResponse: { output },
|
|
1573
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1574
|
+
});
|
|
1575
|
+
expect(result).toMatchObject({
|
|
1576
|
+
pass: false,
|
|
1577
|
+
reason: 'Expected output to not contain "Unexpected output"',
|
|
1578
|
+
});
|
|
1579
|
+
});
|
|
1580
|
+
// Test for icontains assertion
|
|
1581
|
+
const containsLowerAssertion = {
|
|
1582
|
+
type: 'icontains',
|
|
1583
|
+
value: 'expected output',
|
|
1584
|
+
};
|
|
1585
|
+
it('should pass when the icontains assertion passes', async () => {
|
|
1586
|
+
const output = 'EXPECTED OUTPUT';
|
|
1587
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1588
|
+
prompt: 'Some prompt',
|
|
1589
|
+
assertion: containsLowerAssertion,
|
|
1590
|
+
test: {},
|
|
1591
|
+
providerResponse: { output },
|
|
1592
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1593
|
+
});
|
|
1594
|
+
expect(result).toMatchObject({
|
|
1595
|
+
pass: true,
|
|
1596
|
+
reason: 'Assertion passed',
|
|
1597
|
+
});
|
|
1598
|
+
});
|
|
1599
|
+
it('should fail when the icontains assertion fails', async () => {
|
|
1600
|
+
const output = 'Different output';
|
|
1601
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1602
|
+
prompt: 'Some prompt',
|
|
1603
|
+
assertion: containsLowerAssertion,
|
|
1604
|
+
test: {},
|
|
1605
|
+
providerResponse: { output },
|
|
1606
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1607
|
+
});
|
|
1608
|
+
expect(result).toMatchObject({
|
|
1609
|
+
pass: false,
|
|
1610
|
+
reason: 'Expected output to contain "expected output"',
|
|
1611
|
+
});
|
|
1612
|
+
});
|
|
1613
|
+
// Test for not-icontains assertion
|
|
1614
|
+
const notContainsLowerAssertion = {
|
|
1615
|
+
type: 'not-icontains',
|
|
1616
|
+
value: 'unexpected output',
|
|
1617
|
+
};
|
|
1618
|
+
it('should pass when the not-icontains assertion passes', async () => {
|
|
1619
|
+
const output = 'Expected output';
|
|
1620
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1621
|
+
prompt: 'Some prompt',
|
|
1622
|
+
assertion: notContainsLowerAssertion,
|
|
1623
|
+
test: {},
|
|
1624
|
+
providerResponse: { output },
|
|
1625
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1626
|
+
});
|
|
1627
|
+
expect(result).toMatchObject({
|
|
1628
|
+
pass: true,
|
|
1629
|
+
reason: 'Assertion passed',
|
|
1630
|
+
});
|
|
1631
|
+
});
|
|
1632
|
+
it('should fail when the not-icontains assertion fails', async () => {
|
|
1633
|
+
const output = 'UNEXPECTED OUTPUT';
|
|
1634
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1635
|
+
prompt: 'Some prompt',
|
|
1636
|
+
assertion: notContainsLowerAssertion,
|
|
1637
|
+
test: {},
|
|
1638
|
+
providerResponse: { output },
|
|
1639
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1640
|
+
});
|
|
1641
|
+
expect(result).toMatchObject({
|
|
1642
|
+
pass: false,
|
|
1643
|
+
reason: 'Expected output to not contain "unexpected output"',
|
|
1644
|
+
});
|
|
1645
|
+
});
|
|
1646
|
+
// Test for contains-any assertion
|
|
1647
|
+
const containsAnyAssertion = {
|
|
1648
|
+
type: 'contains-any',
|
|
1649
|
+
value: ['option1', 'option2', 'option3'],
|
|
1650
|
+
};
|
|
1651
|
+
it('should pass when the contains-any assertion passes', async () => {
|
|
1652
|
+
const output = 'This output contains option1';
|
|
1653
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1654
|
+
prompt: 'Some prompt',
|
|
1655
|
+
assertion: containsAnyAssertion,
|
|
1656
|
+
test: {},
|
|
1657
|
+
providerResponse: { output },
|
|
1658
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1659
|
+
});
|
|
1660
|
+
expect(result).toMatchObject({
|
|
1661
|
+
pass: true,
|
|
1662
|
+
reason: 'Assertion passed',
|
|
1663
|
+
});
|
|
1664
|
+
});
|
|
1665
|
+
it('should fail when the contains-any assertion fails', async () => {
|
|
1666
|
+
const output = 'This output does not contain any option';
|
|
1667
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1668
|
+
prompt: 'Some prompt',
|
|
1669
|
+
assertion: containsAnyAssertion,
|
|
1670
|
+
test: {},
|
|
1671
|
+
providerResponse: { output },
|
|
1672
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1673
|
+
});
|
|
1674
|
+
expect(result).toMatchObject({
|
|
1675
|
+
pass: false,
|
|
1676
|
+
reason: 'Expected output to contain one of "option1, option2, option3"',
|
|
1677
|
+
});
|
|
1678
|
+
});
|
|
1679
|
+
it('should pass when the icontains-any assertion passes', async () => {
|
|
1680
|
+
const output = 'This output contains OPTION1';
|
|
1681
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1682
|
+
prompt: 'Some prompt',
|
|
1683
|
+
assertion: {
|
|
1684
|
+
type: 'icontains-any',
|
|
1685
|
+
value: ['option1', 'option2', 'option3'],
|
|
1686
|
+
},
|
|
1687
|
+
test: {},
|
|
1688
|
+
providerResponse: { output },
|
|
1689
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1690
|
+
});
|
|
1691
|
+
expect(result).toMatchObject({
|
|
1692
|
+
pass: true,
|
|
1693
|
+
reason: 'Assertion passed',
|
|
1694
|
+
});
|
|
1695
|
+
});
|
|
1696
|
+
it('should fail when the icontains-any assertion fails', async () => {
|
|
1697
|
+
const output = 'This output does not contain any option';
|
|
1698
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1699
|
+
prompt: 'Some prompt',
|
|
1700
|
+
assertion: {
|
|
1701
|
+
type: 'icontains-any',
|
|
1702
|
+
value: ['option1', 'option2', 'option3'],
|
|
1703
|
+
},
|
|
1704
|
+
test: {},
|
|
1705
|
+
providerResponse: { output },
|
|
1706
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1707
|
+
});
|
|
1708
|
+
expect(result).toMatchObject({
|
|
1709
|
+
pass: false,
|
|
1710
|
+
reason: 'Expected output to contain one of "option1, option2, option3"',
|
|
1711
|
+
});
|
|
1712
|
+
});
|
|
1713
|
+
// Test for contains-all assertion
|
|
1714
|
+
const containsAllAssertion = {
|
|
1715
|
+
type: 'contains-all',
|
|
1716
|
+
value: ['option1', 'option2', 'option3'],
|
|
1717
|
+
};
|
|
1718
|
+
it('should pass when the contains-all assertion passes', async () => {
|
|
1719
|
+
const output = 'This output contains option1, option2, and option3';
|
|
1720
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1721
|
+
prompt: 'Some prompt',
|
|
1722
|
+
assertion: containsAllAssertion,
|
|
1723
|
+
test: {},
|
|
1724
|
+
providerResponse: { output },
|
|
1725
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1726
|
+
});
|
|
1727
|
+
expect(result).toMatchObject({
|
|
1728
|
+
pass: true,
|
|
1729
|
+
reason: 'Assertion passed',
|
|
1730
|
+
});
|
|
1731
|
+
});
|
|
1732
|
+
it('should fail when the contains-all assertion fails', async () => {
|
|
1733
|
+
const output = 'This output contains only option1 and option2';
|
|
1734
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1735
|
+
prompt: 'Some prompt',
|
|
1736
|
+
assertion: containsAllAssertion,
|
|
1737
|
+
test: {},
|
|
1738
|
+
providerResponse: { output },
|
|
1739
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1740
|
+
});
|
|
1741
|
+
expect(result).toMatchObject({
|
|
1742
|
+
pass: false,
|
|
1743
|
+
reason: 'Expected output to contain all of "option1, option2, option3"',
|
|
1744
|
+
});
|
|
1745
|
+
});
|
|
1746
|
+
it('should pass when the icontains-all assertion passes', async () => {
|
|
1747
|
+
const output = 'This output contains OPTION1, option2, and opTiOn3';
|
|
1748
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1749
|
+
prompt: 'Some prompt',
|
|
1750
|
+
assertion: {
|
|
1751
|
+
type: 'icontains-all',
|
|
1752
|
+
value: ['option1', 'option2', 'option3'],
|
|
1753
|
+
},
|
|
1754
|
+
test: {},
|
|
1755
|
+
providerResponse: { output },
|
|
1756
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1757
|
+
});
|
|
1758
|
+
expect(result).toMatchObject({
|
|
1759
|
+
pass: true,
|
|
1760
|
+
reason: 'Assertion passed',
|
|
1761
|
+
});
|
|
1762
|
+
});
|
|
1763
|
+
it('should fail when the icontains-all assertion fails', async () => {
|
|
1764
|
+
const output = 'This output contains OPTION1, option2, and opTiOn3';
|
|
1765
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1766
|
+
prompt: 'Some prompt',
|
|
1767
|
+
assertion: {
|
|
1768
|
+
type: 'icontains-all',
|
|
1769
|
+
value: ['option1', 'option2', 'option3', 'option4'],
|
|
1770
|
+
},
|
|
1771
|
+
test: {},
|
|
1772
|
+
providerResponse: { output },
|
|
1773
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1774
|
+
});
|
|
1775
|
+
expect(result).toMatchObject({
|
|
1776
|
+
pass: false,
|
|
1777
|
+
reason: 'Expected output to contain all of "option1, option2, option3, option4"',
|
|
1778
|
+
});
|
|
1779
|
+
});
|
|
1780
|
+
// Test for regex assertion
|
|
1781
|
+
const containsRegexAssertion = {
|
|
1782
|
+
type: 'regex',
|
|
1783
|
+
value: '\\d{3}-\\d{2}-\\d{4}',
|
|
1784
|
+
};
|
|
1785
|
+
it('should pass when the regex assertion passes', async () => {
|
|
1786
|
+
const output = 'This output contains 123-45-6789';
|
|
1787
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1788
|
+
prompt: 'Some prompt',
|
|
1789
|
+
assertion: containsRegexAssertion,
|
|
1790
|
+
test: {},
|
|
1791
|
+
providerResponse: { output },
|
|
1792
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1793
|
+
});
|
|
1794
|
+
expect(result).toMatchObject({
|
|
1795
|
+
pass: true,
|
|
1796
|
+
reason: 'Assertion passed',
|
|
1797
|
+
});
|
|
1798
|
+
});
|
|
1799
|
+
it('should fail when the regex assertion fails', async () => {
|
|
1800
|
+
const output = 'This output does not contain the pattern';
|
|
1801
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1802
|
+
prompt: 'Some prompt',
|
|
1803
|
+
assertion: containsRegexAssertion,
|
|
1804
|
+
test: {},
|
|
1805
|
+
providerResponse: { output },
|
|
1806
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1807
|
+
});
|
|
1808
|
+
expect(result).toMatchObject({
|
|
1809
|
+
pass: false,
|
|
1810
|
+
reason: 'Expected output to match regex "\\d{3}-\\d{2}-\\d{4}"',
|
|
1811
|
+
});
|
|
1812
|
+
});
|
|
1813
|
+
// Test for not-regex assertion
|
|
1814
|
+
const notContainsRegexAssertion = {
|
|
1815
|
+
type: 'not-regex',
|
|
1816
|
+
value: '\\d{3}-\\d{2}-\\d{4}',
|
|
1817
|
+
};
|
|
1818
|
+
it('should pass when the not-regex assertion passes', async () => {
|
|
1819
|
+
const output = 'This output does not contain the pattern';
|
|
1820
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1821
|
+
prompt: 'Some prompt',
|
|
1822
|
+
assertion: notContainsRegexAssertion,
|
|
1823
|
+
test: {},
|
|
1824
|
+
providerResponse: { output },
|
|
1825
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1826
|
+
});
|
|
1827
|
+
expect(result).toMatchObject({
|
|
1828
|
+
pass: true,
|
|
1829
|
+
reason: 'Assertion passed',
|
|
1830
|
+
});
|
|
1831
|
+
});
|
|
1832
|
+
it('should fail when the not-regex assertion fails', async () => {
|
|
1833
|
+
const output = 'This output contains 123-45-6789';
|
|
1834
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1835
|
+
prompt: 'Some prompt',
|
|
1836
|
+
assertion: notContainsRegexAssertion,
|
|
1837
|
+
test: {},
|
|
1838
|
+
providerResponse: { output },
|
|
1839
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1840
|
+
});
|
|
1841
|
+
expect(result).toMatchObject({
|
|
1842
|
+
pass: false,
|
|
1843
|
+
reason: 'Expected output to not match regex "\\d{3}-\\d{2}-\\d{4}"',
|
|
1844
|
+
});
|
|
1845
|
+
});
|
|
1846
|
+
// Tests for webhook assertion
|
|
1847
|
+
const webhookAssertion = {
|
|
1848
|
+
type: 'webhook',
|
|
1849
|
+
value: 'https://example.com/webhook',
|
|
1850
|
+
};
|
|
1851
|
+
it('should pass when the webhook assertion passes', async () => {
|
|
1852
|
+
const output = 'Expected output';
|
|
1853
|
+
jest
|
|
1854
|
+
.mocked(fetch_1.fetchWithRetries)
|
|
1855
|
+
.mockImplementation(() => Promise.resolve(new node_fetch_1.Response(JSON.stringify({ pass: true }), { status: 200 })));
|
|
1856
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1857
|
+
prompt: 'Some prompt',
|
|
1858
|
+
assertion: webhookAssertion,
|
|
1859
|
+
test: {},
|
|
1860
|
+
providerResponse: { output },
|
|
1861
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1862
|
+
});
|
|
1863
|
+
expect(result).toMatchObject({
|
|
1864
|
+
pass: true,
|
|
1865
|
+
reason: 'Assertion passed',
|
|
1866
|
+
});
|
|
1867
|
+
});
|
|
1868
|
+
it('should fail when the webhook assertion fails', async () => {
|
|
1869
|
+
const output = 'Different output';
|
|
1870
|
+
jest
|
|
1871
|
+
.mocked(fetch_1.fetchWithRetries)
|
|
1872
|
+
.mockImplementation(() => Promise.resolve(new node_fetch_1.Response(JSON.stringify({ pass: false }), { status: 200 })));
|
|
1873
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1874
|
+
prompt: 'Some prompt',
|
|
1875
|
+
assertion: webhookAssertion,
|
|
1876
|
+
test: {},
|
|
1877
|
+
providerResponse: { output },
|
|
1878
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1879
|
+
});
|
|
1880
|
+
expect(result).toMatchObject({
|
|
1881
|
+
pass: false,
|
|
1882
|
+
reason: 'Webhook returned false',
|
|
1883
|
+
});
|
|
1884
|
+
});
|
|
1885
|
+
it('should fail when the webhook returns an error', async () => {
|
|
1886
|
+
const output = 'Expected output';
|
|
1887
|
+
jest
|
|
1888
|
+
.mocked(fetch_1.fetchWithRetries)
|
|
1889
|
+
.mockImplementation(() => Promise.resolve(new node_fetch_1.Response('', { status: 500 })));
|
|
1890
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1891
|
+
prompt: 'Some prompt',
|
|
1892
|
+
assertion: webhookAssertion,
|
|
1893
|
+
test: {},
|
|
1894
|
+
providerResponse: { output },
|
|
1895
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1896
|
+
});
|
|
1897
|
+
expect(result).toMatchObject({
|
|
1898
|
+
pass: false,
|
|
1899
|
+
reason: 'Webhook error: Webhook response status: 500',
|
|
1900
|
+
});
|
|
1901
|
+
});
|
|
1902
|
+
// Test for rouge-n assertion
|
|
1903
|
+
const rougeNAssertion = {
|
|
1904
|
+
type: 'rouge-n',
|
|
1905
|
+
value: 'This is the expected output.',
|
|
1906
|
+
threshold: 0.75,
|
|
1907
|
+
};
|
|
1908
|
+
it('should pass when the rouge-n assertion passes', async () => {
|
|
1909
|
+
const output = 'This is the expected output.';
|
|
1910
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1911
|
+
prompt: 'Some prompt',
|
|
1912
|
+
assertion: rougeNAssertion,
|
|
1913
|
+
test: {},
|
|
1914
|
+
providerResponse: { output },
|
|
1915
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1916
|
+
});
|
|
1917
|
+
expect(result).toMatchObject({
|
|
1918
|
+
pass: true,
|
|
1919
|
+
reason: 'ROUGE-N score 1.00 is greater than or equal to threshold 0.75',
|
|
1920
|
+
});
|
|
1921
|
+
});
|
|
1922
|
+
it('should fail when the rouge-n assertion fails', async () => {
|
|
1923
|
+
const output = 'some different output';
|
|
1924
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1925
|
+
prompt: 'Some prompt',
|
|
1926
|
+
assertion: rougeNAssertion,
|
|
1927
|
+
test: {},
|
|
1928
|
+
providerResponse: { output },
|
|
1929
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1930
|
+
});
|
|
1931
|
+
expect(result).toMatchObject({
|
|
1932
|
+
pass: false,
|
|
1933
|
+
reason: 'ROUGE-N score 0.17 is less than threshold 0.75',
|
|
1934
|
+
});
|
|
1935
|
+
});
|
|
1936
|
+
// Test for starts-with assertion
|
|
1937
|
+
const startsWithAssertion = {
|
|
1938
|
+
type: 'starts-with',
|
|
1939
|
+
value: 'Expected',
|
|
1940
|
+
};
|
|
1941
|
+
it('should pass when the starts-with assertion passes', async () => {
|
|
1942
|
+
const output = 'Expected output';
|
|
1943
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1944
|
+
prompt: 'Some prompt',
|
|
1945
|
+
assertion: startsWithAssertion,
|
|
1946
|
+
test: {},
|
|
1947
|
+
providerResponse: { output },
|
|
1948
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1949
|
+
});
|
|
1950
|
+
expect(result).toMatchObject({
|
|
1951
|
+
pass: true,
|
|
1952
|
+
reason: 'Assertion passed',
|
|
1953
|
+
});
|
|
1954
|
+
});
|
|
1955
|
+
it('should fail when the starts-with assertion fails', async () => {
|
|
1956
|
+
const output = 'Different output';
|
|
1957
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1958
|
+
prompt: 'Some prompt',
|
|
1959
|
+
assertion: startsWithAssertion,
|
|
1960
|
+
test: {},
|
|
1961
|
+
providerResponse: { output },
|
|
1962
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1963
|
+
});
|
|
1964
|
+
expect(result).toMatchObject({
|
|
1965
|
+
pass: false,
|
|
1966
|
+
reason: 'Expected output to start with "Expected"',
|
|
1967
|
+
});
|
|
1968
|
+
});
|
|
1969
|
+
it('should use the provider from the assertion if it exists', async () => {
|
|
1970
|
+
// Assertion grader passes
|
|
1971
|
+
const output = 'Expected output';
|
|
1972
|
+
const assertion = {
|
|
1973
|
+
type: 'llm-rubric',
|
|
1974
|
+
value: 'Expected output',
|
|
1975
|
+
provider: Grader,
|
|
1976
|
+
};
|
|
1977
|
+
// Test grader fails
|
|
1978
|
+
const BogusGrader = {
|
|
1979
|
+
id() {
|
|
1980
|
+
return 'BogusGrader';
|
|
1981
|
+
},
|
|
1982
|
+
async callApi() {
|
|
1983
|
+
throw new Error('Should not be called');
|
|
1984
|
+
},
|
|
1985
|
+
};
|
|
1986
|
+
const test = {
|
|
1987
|
+
assert: [assertion],
|
|
1988
|
+
options: {
|
|
1989
|
+
provider: BogusGrader,
|
|
1990
|
+
},
|
|
1991
|
+
};
|
|
1992
|
+
// Expect test to pass because assertion grader takes priority
|
|
1993
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1994
|
+
prompt: 'Some prompt',
|
|
1995
|
+
assertion: assertion,
|
|
1996
|
+
test: test,
|
|
1997
|
+
providerResponse: { output },
|
|
1998
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
1999
|
+
});
|
|
2000
|
+
expect(result).toMatchObject({
|
|
2001
|
+
pass: true,
|
|
2002
|
+
reason: 'Test grading output',
|
|
2003
|
+
});
|
|
2004
|
+
});
|
|
2005
|
+
// Test for levenshtein assertion
|
|
2006
|
+
const levenshteinAssertion = {
|
|
2007
|
+
type: 'levenshtein',
|
|
2008
|
+
value: 'Expected output',
|
|
2009
|
+
threshold: 5,
|
|
2010
|
+
};
|
|
2011
|
+
it('should pass when the levenshtein assertion passes', async () => {
|
|
2012
|
+
const output = 'Expected output';
|
|
2013
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2014
|
+
prompt: 'Some prompt',
|
|
2015
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2016
|
+
assertion: levenshteinAssertion,
|
|
2017
|
+
test: {},
|
|
2018
|
+
providerResponse: { output },
|
|
2019
|
+
});
|
|
2020
|
+
expect(result).toMatchObject({
|
|
2021
|
+
pass: true,
|
|
2022
|
+
reason: 'Assertion passed',
|
|
2023
|
+
});
|
|
2024
|
+
});
|
|
2025
|
+
it('should fail when the levenshtein assertion fails', async () => {
|
|
2026
|
+
const output = 'Different output';
|
|
2027
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2028
|
+
prompt: 'Some prompt',
|
|
2029
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2030
|
+
assertion: levenshteinAssertion,
|
|
2031
|
+
test: {},
|
|
2032
|
+
providerResponse: { output },
|
|
2033
|
+
});
|
|
2034
|
+
expect(result).toMatchObject({
|
|
2035
|
+
pass: false,
|
|
2036
|
+
reason: 'Levenshtein distance 8 is greater than threshold 5',
|
|
2037
|
+
});
|
|
2038
|
+
});
|
|
2039
|
+
it.each([
|
|
2040
|
+
[
|
|
2041
|
+
'boolean',
|
|
2042
|
+
jest.fn((output) => output === 'Expected output'),
|
|
2043
|
+
true,
|
|
2044
|
+
'Assertion passed',
|
|
2045
|
+
],
|
|
2046
|
+
['number', jest.fn((output) => output.length), true, 'Assertion passed'],
|
|
2047
|
+
[
|
|
2048
|
+
'GradingResult',
|
|
2049
|
+
jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
|
|
2050
|
+
true,
|
|
2051
|
+
'Custom reason',
|
|
2052
|
+
],
|
|
2053
|
+
[
|
|
2054
|
+
'boolean',
|
|
2055
|
+
jest.fn((output) => output !== 'Expected output'),
|
|
2056
|
+
false,
|
|
2057
|
+
'Custom function returned false',
|
|
2058
|
+
],
|
|
2059
|
+
['number', jest.fn((output) => 0), false, 'Custom function returned false'],
|
|
2060
|
+
[
|
|
2061
|
+
'GradingResult',
|
|
2062
|
+
jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
|
|
2063
|
+
false,
|
|
2064
|
+
'Custom reason',
|
|
2065
|
+
],
|
|
2066
|
+
[
|
|
2067
|
+
'boolean Promise',
|
|
2068
|
+
jest.fn((output) => Promise.resolve(true)),
|
|
2069
|
+
true,
|
|
2070
|
+
'Assertion passed',
|
|
2071
|
+
],
|
|
2072
|
+
])('should pass when the file:// assertion with .js file returns a %s', async (type, mockFn, expectedPass, expectedReason) => {
|
|
2073
|
+
const output = 'Expected output';
|
|
2074
|
+
jest.doMock(path.resolve('/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2075
|
+
const fileAssertion = {
|
|
2076
|
+
type: 'javascript',
|
|
2077
|
+
value: 'file:///path/to/assert.js',
|
|
2078
|
+
};
|
|
2079
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2080
|
+
prompt: 'Some prompt',
|
|
2081
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2082
|
+
assertion: fileAssertion,
|
|
2083
|
+
test: {},
|
|
2084
|
+
providerResponse: { output },
|
|
2085
|
+
});
|
|
2086
|
+
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2087
|
+
prompt: 'Some prompt',
|
|
2088
|
+
vars: {},
|
|
2089
|
+
test: {},
|
|
2090
|
+
});
|
|
2091
|
+
expect(result).toMatchObject({
|
|
2092
|
+
pass: expectedPass,
|
|
2093
|
+
reason: expect.stringContaining(expectedReason),
|
|
2094
|
+
});
|
|
2095
|
+
});
|
|
2096
|
+
it('should resolve js paths relative to the configuration file', async () => {
|
|
2097
|
+
const output = 'Expected output';
|
|
2098
|
+
const mockFn = jest.fn((output) => output === 'Expected output');
|
|
2099
|
+
jest.doMock(path.resolve('/config_path/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2100
|
+
const fileAssertion = {
|
|
2101
|
+
type: 'javascript',
|
|
2102
|
+
value: 'file://./path/to/assert.js',
|
|
2103
|
+
};
|
|
2104
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2105
|
+
prompt: 'Some prompt',
|
|
2106
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2107
|
+
assertion: fileAssertion,
|
|
2108
|
+
test: {},
|
|
2109
|
+
providerResponse: { output },
|
|
2110
|
+
});
|
|
2111
|
+
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2112
|
+
prompt: 'Some prompt',
|
|
2113
|
+
vars: {},
|
|
2114
|
+
test: {},
|
|
2115
|
+
});
|
|
2116
|
+
expect(result).toMatchObject({
|
|
2117
|
+
pass: true,
|
|
2118
|
+
reason: 'Assertion passed',
|
|
2119
|
+
});
|
|
2120
|
+
});
|
|
2121
|
+
it('should handle output strings with both single and double quotes correctly in python assertion', async () => {
|
|
2122
|
+
const expectedPythonValue = '0.5';
|
|
2123
|
+
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(expectedPythonValue);
|
|
2124
|
+
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2125
|
+
const pythonAssertion = {
|
|
2126
|
+
type: 'python',
|
|
2127
|
+
value: expectedPythonValue,
|
|
2128
|
+
};
|
|
2129
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2130
|
+
prompt: 'Some prompt',
|
|
2131
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2132
|
+
assertion: pythonAssertion,
|
|
2133
|
+
test: {},
|
|
2134
|
+
providerResponse: { output },
|
|
2135
|
+
});
|
|
2136
|
+
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2137
|
+
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2138
|
+
output,
|
|
2139
|
+
{ prompt: 'Some prompt', test: {}, vars: {} },
|
|
2140
|
+
]);
|
|
2141
|
+
expect(result).toMatchObject({
|
|
2142
|
+
pass: true,
|
|
2143
|
+
reason: 'Assertion passed',
|
|
2144
|
+
score: Number(expectedPythonValue),
|
|
2145
|
+
});
|
|
2146
|
+
});
|
|
2147
|
+
it.each([
|
|
2148
|
+
['boolean', false, 0, 'Python code returned false', false, undefined],
|
|
2149
|
+
['number', 0, 0, 'Python code returned false', false, undefined],
|
|
2150
|
+
[
|
|
2151
|
+
'GradingResult',
|
|
2152
|
+
`{"pass": false, "score": 0, "reason": "Custom error"}`,
|
|
2153
|
+
0,
|
|
2154
|
+
'Custom error',
|
|
2155
|
+
false,
|
|
2156
|
+
undefined,
|
|
2157
|
+
],
|
|
2158
|
+
['boolean', true, 1, 'Assertion passed', true, undefined],
|
|
2159
|
+
['number', 1, 1, 'Assertion passed', true, undefined],
|
|
2160
|
+
[
|
|
2161
|
+
'GradingResult',
|
|
2162
|
+
`{"pass": true, "score": 1, "reason": "Custom success"}`,
|
|
2163
|
+
1,
|
|
2164
|
+
'Custom success',
|
|
2165
|
+
true,
|
|
2166
|
+
undefined,
|
|
2167
|
+
],
|
|
2168
|
+
[
|
|
2169
|
+
'GradingResult',
|
|
2170
|
+
// This score is less than the assertion threshold in the test
|
|
2171
|
+
`{"pass": true, "score": 0.4, "reason": "Foo bar"}`,
|
|
2172
|
+
0.4,
|
|
2173
|
+
'Python score 0.4 is less than threshold 0.5',
|
|
2174
|
+
false,
|
|
2175
|
+
0.5,
|
|
2176
|
+
],
|
|
2177
|
+
])('should handle inline return type %s with return value: %p', async (type, returnValue, expectedScore, expectedReason, expectedPass, threshold) => {
|
|
2178
|
+
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2179
|
+
let resolvedValue;
|
|
2180
|
+
if (type === 'GradingResult') {
|
|
2181
|
+
resolvedValue = JSON.parse(returnValue);
|
|
2182
|
+
}
|
|
2183
|
+
else {
|
|
2184
|
+
resolvedValue = returnValue;
|
|
2185
|
+
}
|
|
2186
|
+
const pythonAssertion = {
|
|
2187
|
+
type: 'python',
|
|
2188
|
+
value: returnValue.toString(),
|
|
2189
|
+
threshold,
|
|
2190
|
+
};
|
|
2191
|
+
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(resolvedValue);
|
|
2192
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2193
|
+
prompt: 'Some prompt',
|
|
2194
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2195
|
+
assertion: pythonAssertion,
|
|
2196
|
+
test: {},
|
|
2197
|
+
providerResponse: { output },
|
|
2198
|
+
});
|
|
2199
|
+
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2200
|
+
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2201
|
+
output,
|
|
2202
|
+
{ prompt: 'Some prompt', test: {}, vars: {} },
|
|
2203
|
+
]);
|
|
2204
|
+
expect(result).toMatchObject({
|
|
2205
|
+
pass: expectedPass,
|
|
2206
|
+
reason: expect.stringMatching(expectedReason),
|
|
2207
|
+
score: expectedScore,
|
|
2208
|
+
});
|
|
2209
|
+
});
|
|
2210
|
+
it.each([
|
|
2211
|
+
['boolean', 'True', true, 'Assertion passed'],
|
|
2212
|
+
['number', '0.5', true, 'Assertion passed'],
|
|
2213
|
+
['boolean', true, true, 'Assertion passed'],
|
|
2214
|
+
['number', 0.5, true, 'Assertion passed'],
|
|
2215
|
+
[
|
|
2216
|
+
'GradingResult',
|
|
2217
|
+
'{"pass": true, "score": 1, "reason": "Custom reason"}',
|
|
2218
|
+
true,
|
|
2219
|
+
'Custom reason',
|
|
2220
|
+
],
|
|
2221
|
+
['boolean', 'False', false, 'Python code returned false'],
|
|
2222
|
+
['number', '0', false, 'Python code returned false'],
|
|
2223
|
+
[
|
|
2224
|
+
'GradingResult',
|
|
2225
|
+
'{"pass": false, "score": 0, "reason": "Custom reason"}',
|
|
2226
|
+
false,
|
|
2227
|
+
'Custom reason',
|
|
2228
|
+
],
|
|
2229
|
+
])('should handle when the file:// assertion with .py file returns a %s', async (type, pythonOutput, expectedPass, expectedReason) => {
|
|
2230
|
+
const output = 'Expected output';
|
|
2231
|
+
jest.mocked(pythonUtils_1.runPython).mockResolvedValueOnce(pythonOutput);
|
|
2232
|
+
const fileAssertion = {
|
|
2233
|
+
type: 'python',
|
|
2234
|
+
value: 'file:///path/to/assert.py',
|
|
2235
|
+
};
|
|
2236
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2237
|
+
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2238
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2239
|
+
assertion: fileAssertion,
|
|
2240
|
+
test: {},
|
|
2241
|
+
providerResponse: { output },
|
|
2242
|
+
});
|
|
2243
|
+
expect(pythonUtils_1.runPython).toHaveBeenCalledWith(path.resolve('/path/to/assert.py'), 'get_assert', [
|
|
2244
|
+
output,
|
|
2245
|
+
{
|
|
2246
|
+
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2247
|
+
vars: {},
|
|
2248
|
+
test: {},
|
|
2249
|
+
},
|
|
2250
|
+
]);
|
|
2251
|
+
expect(result).toMatchObject({
|
|
2252
|
+
pass: expectedPass,
|
|
2253
|
+
reason: expect.stringContaining(expectedReason),
|
|
2254
|
+
});
|
|
2255
|
+
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2256
|
+
});
|
|
2257
|
+
it('should handle when python file assertions throw an error', async () => {
|
|
2258
|
+
const output = 'Expected output';
|
|
2259
|
+
jest
|
|
2260
|
+
.mocked(pythonUtils_1.runPython)
|
|
2261
|
+
.mockRejectedValue(new Error('The Python script `call_api` function must return a dict with an `output`'));
|
|
2262
|
+
const fileAssertion = {
|
|
2263
|
+
type: 'python',
|
|
2264
|
+
value: 'file:///path/to/assert.py',
|
|
2265
|
+
};
|
|
2266
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2267
|
+
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2268
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2269
|
+
assertion: fileAssertion,
|
|
2270
|
+
test: {},
|
|
2271
|
+
providerResponse: { output },
|
|
2272
|
+
});
|
|
2273
|
+
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2274
|
+
expect(result).toEqual({
|
|
2275
|
+
assertion: {
|
|
2276
|
+
type: 'python',
|
|
2277
|
+
value: 'file:///path/to/assert.py',
|
|
2278
|
+
},
|
|
2279
|
+
pass: false,
|
|
2280
|
+
reason: 'The Python script `call_api` function must return a dict with an `output`',
|
|
2281
|
+
score: 0,
|
|
2282
|
+
});
|
|
2283
|
+
});
|
|
2284
|
+
describe('latency assertion', () => {
|
|
2285
|
+
it('should pass when the latency assertion passes', async () => {
|
|
2286
|
+
const output = 'Expected output';
|
|
2287
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2288
|
+
prompt: 'Some prompt',
|
|
2289
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2290
|
+
assertion: {
|
|
2291
|
+
type: 'latency',
|
|
2292
|
+
threshold: 100,
|
|
2293
|
+
},
|
|
2294
|
+
latencyMs: 50,
|
|
2295
|
+
test: {},
|
|
2296
|
+
providerResponse: { output },
|
|
2297
|
+
});
|
|
2298
|
+
expect(result).toMatchObject({
|
|
2299
|
+
pass: true,
|
|
2300
|
+
reason: 'Assertion passed',
|
|
2301
|
+
});
|
|
2302
|
+
});
|
|
2303
|
+
it('should fail when the latency assertion fails', async () => {
|
|
2304
|
+
const output = 'Expected output';
|
|
2305
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2306
|
+
prompt: 'Some prompt',
|
|
2307
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2308
|
+
assertion: {
|
|
2309
|
+
type: 'latency',
|
|
2310
|
+
threshold: 100,
|
|
2311
|
+
},
|
|
2312
|
+
latencyMs: 1000,
|
|
2313
|
+
test: {},
|
|
2314
|
+
providerResponse: { output },
|
|
2315
|
+
});
|
|
2316
|
+
expect(result).toMatchObject({
|
|
2317
|
+
pass: false,
|
|
2318
|
+
reason: 'Latency 1000ms is greater than threshold 100ms',
|
|
2319
|
+
});
|
|
2320
|
+
});
|
|
2321
|
+
it('should throw an error when grading result is missing latencyMs', async () => {
|
|
2322
|
+
const output = 'Expected output';
|
|
2323
|
+
await expect((0, assertions_1.runAssertion)({
|
|
2324
|
+
prompt: 'Some prompt',
|
|
2325
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4'),
|
|
2326
|
+
assertion: {
|
|
2327
|
+
type: 'latency',
|
|
2328
|
+
threshold: 100,
|
|
2329
|
+
},
|
|
2330
|
+
test: {},
|
|
2331
|
+
providerResponse: { output },
|
|
2332
|
+
})).rejects.toThrow('Latency assertion does not support cached results. Rerun the eval with --no-cache');
|
|
2333
|
+
});
|
|
2334
|
+
});
|
|
2335
|
+
describe('perplexity assertion', () => {
|
|
2336
|
+
it('should pass when the perplexity assertion passes', async () => {
|
|
2337
|
+
const logProbs = [-0.2, -0.4, -0.1, -0.3]; // Dummy logProbs for testing
|
|
2338
|
+
const provider = {
|
|
2339
|
+
callApi: jest.fn().mockResolvedValue({ logProbs }),
|
|
2340
|
+
};
|
|
2341
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2342
|
+
prompt: 'Some prompt',
|
|
2343
|
+
provider,
|
|
2344
|
+
assertion: {
|
|
2345
|
+
type: 'perplexity',
|
|
2346
|
+
threshold: 2,
|
|
2347
|
+
},
|
|
2348
|
+
test: {},
|
|
2349
|
+
providerResponse: { output: 'Some output', logProbs },
|
|
2350
|
+
});
|
|
2351
|
+
expect(result).toMatchObject({
|
|
2352
|
+
pass: true,
|
|
2353
|
+
reason: 'Assertion passed',
|
|
2354
|
+
});
|
|
2355
|
+
});
|
|
2356
|
+
it('should fail when the perplexity assertion fails', async () => {
|
|
2357
|
+
const logProbs = [-0.2, -0.4, -0.1, -0.3]; // Dummy logProbs for testing
|
|
2358
|
+
const provider = {
|
|
2359
|
+
callApi: jest.fn().mockResolvedValue({ logProbs }),
|
|
2360
|
+
};
|
|
2361
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2362
|
+
prompt: 'Some prompt',
|
|
2363
|
+
provider,
|
|
2364
|
+
assertion: {
|
|
2365
|
+
type: 'perplexity',
|
|
2366
|
+
threshold: 0.2,
|
|
2367
|
+
},
|
|
2368
|
+
test: {},
|
|
2369
|
+
providerResponse: { output: 'Some output', logProbs },
|
|
2370
|
+
});
|
|
2371
|
+
expect(result).toMatchObject({
|
|
2372
|
+
pass: false,
|
|
2373
|
+
reason: 'Perplexity 1.28 is greater than threshold 0.2',
|
|
2374
|
+
});
|
|
2375
|
+
});
|
|
2376
|
+
});
|
|
2377
|
+
describe('perplexity-score assertion', () => {
|
|
2378
|
+
it('should pass when the perplexity-score assertion passes', async () => {
|
|
2379
|
+
const logProbs = [-0.2, -0.4, -0.1, -0.3];
|
|
2380
|
+
const provider = {
|
|
2381
|
+
callApi: jest.fn().mockResolvedValue({ logProbs }),
|
|
2382
|
+
};
|
|
2383
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2384
|
+
prompt: 'Some prompt',
|
|
2385
|
+
provider,
|
|
2386
|
+
assertion: {
|
|
2387
|
+
type: 'perplexity-score',
|
|
2388
|
+
threshold: 0.25,
|
|
2389
|
+
},
|
|
2390
|
+
test: {},
|
|
2391
|
+
providerResponse: { output: 'Some output', logProbs },
|
|
2392
|
+
});
|
|
2393
|
+
expect(result).toMatchObject({
|
|
2394
|
+
pass: true,
|
|
2395
|
+
reason: 'Assertion passed',
|
|
2396
|
+
});
|
|
2397
|
+
});
|
|
2398
|
+
it('should fail when the perplexity-score assertion fails', async () => {
|
|
2399
|
+
const logProbs = [-0.2, -0.4, -0.1, -0.3];
|
|
2400
|
+
const provider = {
|
|
2401
|
+
callApi: jest.fn().mockResolvedValue({ logProbs }),
|
|
2402
|
+
};
|
|
2403
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2404
|
+
prompt: 'Some prompt',
|
|
2405
|
+
provider,
|
|
2406
|
+
assertion: {
|
|
2407
|
+
type: 'perplexity-score',
|
|
2408
|
+
threshold: 0.5,
|
|
2409
|
+
},
|
|
2410
|
+
test: {},
|
|
2411
|
+
providerResponse: { output: 'Some output', logProbs },
|
|
2412
|
+
});
|
|
2413
|
+
expect(result).toMatchObject({
|
|
2414
|
+
pass: false,
|
|
2415
|
+
reason: 'Perplexity score 0.44 is less than threshold 0.5',
|
|
2416
|
+
});
|
|
2417
|
+
});
|
|
2418
|
+
});
|
|
2419
|
+
describe('cost assertion', () => {
|
|
2420
|
+
it('should pass when the cost is below the threshold', async () => {
|
|
2421
|
+
const cost = 0.0005;
|
|
2422
|
+
const provider = {
|
|
2423
|
+
callApi: jest.fn().mockResolvedValue({ cost }),
|
|
2424
|
+
};
|
|
2425
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2426
|
+
prompt: 'Some prompt',
|
|
2427
|
+
provider,
|
|
2428
|
+
assertion: {
|
|
2429
|
+
type: 'cost',
|
|
2430
|
+
threshold: 0.001,
|
|
2431
|
+
},
|
|
2432
|
+
test: {},
|
|
2433
|
+
providerResponse: { output: 'Some output', cost },
|
|
2434
|
+
});
|
|
2435
|
+
expect(result).toMatchObject({
|
|
2436
|
+
pass: true,
|
|
2437
|
+
reason: 'Assertion passed',
|
|
2438
|
+
});
|
|
2439
|
+
});
|
|
2440
|
+
it('should fail when the cost exceeds the threshold', async () => {
|
|
2441
|
+
const cost = 0.002;
|
|
2442
|
+
const provider = {
|
|
2443
|
+
callApi: jest.fn().mockResolvedValue({ cost }),
|
|
2444
|
+
};
|
|
2445
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2446
|
+
prompt: 'Some prompt',
|
|
2447
|
+
provider,
|
|
2448
|
+
assertion: {
|
|
2449
|
+
type: 'cost',
|
|
2450
|
+
threshold: 0.001,
|
|
2451
|
+
},
|
|
2452
|
+
test: {},
|
|
2453
|
+
providerResponse: { output: 'Some output', cost },
|
|
2454
|
+
});
|
|
2455
|
+
expect(result).toMatchObject({
|
|
2456
|
+
pass: false,
|
|
2457
|
+
reason: 'Cost 0.0020 is greater than threshold 0.001',
|
|
2458
|
+
});
|
|
2459
|
+
});
|
|
2460
|
+
});
|
|
2461
|
+
describe('is-valid-openai-function-call assertion', () => {
|
|
2462
|
+
it('should pass for a valid function call with correct arguments', async () => {
|
|
2463
|
+
const output = { arguments: '{"x": 10, "y": 20}', name: 'add' };
|
|
2464
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2465
|
+
prompt: 'Some prompt',
|
|
2466
|
+
provider: new openai_1.OpenAiChatCompletionProvider('foo', {
|
|
2467
|
+
config: {
|
|
2468
|
+
functions: [
|
|
2469
|
+
{
|
|
2470
|
+
name: 'add',
|
|
2471
|
+
parameters: {
|
|
2472
|
+
type: 'object',
|
|
2473
|
+
properties: {
|
|
2474
|
+
x: { type: 'number' },
|
|
2475
|
+
y: { type: 'number' },
|
|
2476
|
+
},
|
|
2477
|
+
required: ['x', 'y'],
|
|
2478
|
+
},
|
|
2479
|
+
},
|
|
2480
|
+
],
|
|
2481
|
+
},
|
|
2482
|
+
}),
|
|
2483
|
+
assertion: {
|
|
2484
|
+
type: 'is-valid-openai-function-call',
|
|
2485
|
+
},
|
|
2486
|
+
test: {},
|
|
2487
|
+
providerResponse: { output },
|
|
2488
|
+
});
|
|
2489
|
+
expect(result).toMatchObject({
|
|
2490
|
+
pass: true,
|
|
2491
|
+
reason: 'Assertion passed',
|
|
2492
|
+
});
|
|
2493
|
+
});
|
|
2494
|
+
it('should fail for an invalid function call with incorrect arguments', async () => {
|
|
2495
|
+
const output = { arguments: '{"x": "10", "y": 20}', name: 'add' };
|
|
2496
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2497
|
+
prompt: 'Some prompt',
|
|
2498
|
+
provider: new openai_1.OpenAiChatCompletionProvider('foo', {
|
|
2499
|
+
config: {
|
|
2500
|
+
functions: [
|
|
2501
|
+
{
|
|
2502
|
+
name: 'add',
|
|
2503
|
+
parameters: {
|
|
2504
|
+
type: 'object',
|
|
2505
|
+
properties: {
|
|
2506
|
+
x: { type: 'number' },
|
|
2507
|
+
y: { type: 'number' },
|
|
2508
|
+
},
|
|
2509
|
+
required: ['x', 'y'],
|
|
2510
|
+
},
|
|
2511
|
+
},
|
|
2512
|
+
],
|
|
2513
|
+
},
|
|
2514
|
+
}),
|
|
2515
|
+
assertion: {
|
|
2516
|
+
type: 'is-valid-openai-function-call',
|
|
2517
|
+
},
|
|
2518
|
+
test: {},
|
|
2519
|
+
providerResponse: { output },
|
|
2520
|
+
});
|
|
2521
|
+
expect(result).toMatchObject({
|
|
2522
|
+
pass: false,
|
|
2523
|
+
reason: expect.stringContaining('Call to "add" does not match schema'),
|
|
2524
|
+
});
|
|
2525
|
+
});
|
|
2526
|
+
});
|
|
2527
|
+
describe('is-valid-openai-tools-call assertion', () => {
|
|
2528
|
+
it('should pass for a valid tools call with correct arguments', async () => {
|
|
2529
|
+
const output = [
|
|
2530
|
+
{ type: 'function', function: { arguments: '{"x": 10, "y": 20}', name: 'add' } },
|
|
2531
|
+
];
|
|
2532
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2533
|
+
prompt: 'Some prompt',
|
|
2534
|
+
provider: new openai_1.OpenAiChatCompletionProvider('foo', {
|
|
2535
|
+
config: {
|
|
2536
|
+
tools: [
|
|
2537
|
+
{
|
|
2538
|
+
type: 'function',
|
|
2539
|
+
function: {
|
|
2540
|
+
name: 'add',
|
|
2541
|
+
parameters: {
|
|
2542
|
+
type: 'object',
|
|
2543
|
+
properties: {
|
|
2544
|
+
x: { type: 'number' },
|
|
2545
|
+
y: { type: 'number' },
|
|
2546
|
+
},
|
|
2547
|
+
required: ['x', 'y'],
|
|
2548
|
+
},
|
|
2549
|
+
},
|
|
2550
|
+
},
|
|
2551
|
+
],
|
|
2552
|
+
},
|
|
2553
|
+
}),
|
|
2554
|
+
assertion: {
|
|
2555
|
+
type: 'is-valid-openai-tools-call',
|
|
2556
|
+
},
|
|
2557
|
+
test: {},
|
|
2558
|
+
providerResponse: { output },
|
|
2559
|
+
});
|
|
2560
|
+
expect(result).toMatchObject({
|
|
2561
|
+
pass: true,
|
|
2562
|
+
reason: 'Assertion passed',
|
|
2563
|
+
});
|
|
2564
|
+
});
|
|
2565
|
+
it('should fail for an invalid tools call with incorrect arguments', async () => {
|
|
2566
|
+
const output = [
|
|
2567
|
+
{ type: 'function', function: { arguments: '{"x": "foobar", "y": 20}', name: 'add' } },
|
|
2568
|
+
];
|
|
2569
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2570
|
+
prompt: 'Some prompt',
|
|
2571
|
+
provider: new openai_1.OpenAiChatCompletionProvider('foo', {
|
|
2572
|
+
config: {
|
|
2573
|
+
tools: [
|
|
2574
|
+
{
|
|
2575
|
+
type: 'function',
|
|
2576
|
+
function: {
|
|
2577
|
+
name: 'add',
|
|
2578
|
+
parameters: {
|
|
2579
|
+
type: 'object',
|
|
2580
|
+
properties: {
|
|
2581
|
+
x: { type: 'number' },
|
|
2582
|
+
y: { type: 'number' },
|
|
2583
|
+
},
|
|
2584
|
+
required: ['x', 'y'],
|
|
2585
|
+
},
|
|
2586
|
+
},
|
|
2587
|
+
},
|
|
2588
|
+
],
|
|
2589
|
+
},
|
|
2590
|
+
}),
|
|
2591
|
+
assertion: {
|
|
2592
|
+
type: 'is-valid-openai-tools-call',
|
|
2593
|
+
},
|
|
2594
|
+
test: {},
|
|
2595
|
+
providerResponse: { output },
|
|
2596
|
+
});
|
|
2597
|
+
expect(result).toMatchObject({
|
|
2598
|
+
pass: false,
|
|
2599
|
+
reason: expect.stringContaining('Call to "add" does not match schema'),
|
|
2600
|
+
});
|
|
2601
|
+
});
|
|
2602
|
+
});
|
|
2603
|
+
describe('Similarity assertion', () => {
|
|
2604
|
+
beforeEach(() => {
|
|
2605
|
+
jest.spyOn(openai_1.DefaultEmbeddingProvider, 'callEmbeddingApi').mockImplementation((text) => {
|
|
2606
|
+
if (text === 'Test output' || text.startsWith('Similar output')) {
|
|
2607
|
+
return Promise.resolve({
|
|
2608
|
+
embedding: [1, 0, 0],
|
|
2609
|
+
tokenUsage: { total: 5, prompt: 2, completion: 3 },
|
|
2610
|
+
});
|
|
2611
|
+
}
|
|
2612
|
+
else if (text.startsWith('Different output')) {
|
|
2613
|
+
return Promise.resolve({
|
|
2614
|
+
embedding: [0, 1, 0],
|
|
2615
|
+
tokenUsage: { total: 5, prompt: 2, completion: 3 },
|
|
2616
|
+
});
|
|
2617
|
+
}
|
|
2618
|
+
return Promise.reject(new Error('Unexpected input'));
|
|
2619
|
+
});
|
|
2620
|
+
});
|
|
2621
|
+
afterEach(() => {
|
|
2622
|
+
jest.restoreAllMocks();
|
|
2623
|
+
});
|
|
2624
|
+
it('should pass for a similar assertion with a string value', async () => {
|
|
2625
|
+
const output = 'Test output';
|
|
2626
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2627
|
+
prompt: 'Some prompt',
|
|
2628
|
+
assertion: {
|
|
2629
|
+
type: 'similar',
|
|
2630
|
+
value: 'Similar output',
|
|
2631
|
+
},
|
|
2632
|
+
test: {},
|
|
2633
|
+
providerResponse: { output },
|
|
2634
|
+
});
|
|
2635
|
+
expect(result).toMatchObject({
|
|
2636
|
+
pass: true,
|
|
2637
|
+
reason: 'Similarity 1.00 is greater than threshold 0.75',
|
|
2638
|
+
});
|
|
2639
|
+
});
|
|
2640
|
+
it('should fail for a similar assertion with a string value', async () => {
|
|
2641
|
+
const output = 'Test output';
|
|
2642
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2643
|
+
prompt: 'Some prompt',
|
|
2644
|
+
assertion: {
|
|
2645
|
+
type: 'similar',
|
|
2646
|
+
value: 'Different output',
|
|
2647
|
+
},
|
|
2648
|
+
test: {},
|
|
2649
|
+
providerResponse: { output },
|
|
2650
|
+
});
|
|
2651
|
+
expect(result).toMatchObject({
|
|
2652
|
+
pass: false,
|
|
2653
|
+
reason: 'Similarity 0.00 is less than threshold 0.75',
|
|
2654
|
+
});
|
|
2655
|
+
});
|
|
2656
|
+
it('should pass for a similar assertion with an array of string values', async () => {
|
|
2657
|
+
const output = 'Test output';
|
|
2658
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2659
|
+
prompt: 'Some prompt',
|
|
2660
|
+
assertion: {
|
|
2661
|
+
type: 'similar',
|
|
2662
|
+
value: ['Similar output 1', 'Different output 1'],
|
|
2663
|
+
},
|
|
2664
|
+
test: {},
|
|
2665
|
+
providerResponse: { output },
|
|
2666
|
+
});
|
|
2667
|
+
expect(result).toMatchObject({
|
|
2668
|
+
pass: true,
|
|
2669
|
+
reason: 'Similarity 1.00 is greater than threshold 0.75',
|
|
2670
|
+
});
|
|
2671
|
+
});
|
|
2672
|
+
it('should fail for a similar assertion with an array of string values', async () => {
|
|
2673
|
+
const output = 'Test output';
|
|
2674
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2675
|
+
prompt: 'Some prompt',
|
|
2676
|
+
assertion: {
|
|
2677
|
+
type: 'similar',
|
|
2678
|
+
value: ['Different output 1', 'Different output 2'],
|
|
2679
|
+
},
|
|
2680
|
+
test: {},
|
|
2681
|
+
providerResponse: { output },
|
|
2682
|
+
});
|
|
2683
|
+
expect(result).toMatchObject({
|
|
2684
|
+
pass: false,
|
|
2685
|
+
reason: 'None of the provided values met the similarity threshold',
|
|
2686
|
+
});
|
|
2687
|
+
});
|
|
2688
|
+
});
|
|
2689
|
+
describe('is-xml', () => {
|
|
2690
|
+
const provider = {
|
|
2691
|
+
callApi: jest.fn().mockResolvedValue({ cost: 0.001 }),
|
|
2692
|
+
};
|
|
2693
|
+
it('should pass when the output is valid XML', async () => {
|
|
2694
|
+
const output = '<root><child>Content</child></root>';
|
|
2695
|
+
const assertion = { type: 'is-xml' };
|
|
2696
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2697
|
+
prompt: 'Generate XML',
|
|
2698
|
+
provider,
|
|
2699
|
+
assertion,
|
|
2700
|
+
test: {},
|
|
2701
|
+
providerResponse: { output },
|
|
2702
|
+
});
|
|
2703
|
+
expect(result).toEqual({
|
|
2704
|
+
pass: true,
|
|
2705
|
+
score: 1,
|
|
2706
|
+
reason: 'Assertion passed',
|
|
2707
|
+
assertion,
|
|
2708
|
+
});
|
|
2709
|
+
});
|
|
2710
|
+
it('should fail when the output is not valid XML', async () => {
|
|
2711
|
+
const output = '<root><child>Content</child></root';
|
|
2712
|
+
const assertion = { type: 'is-xml' };
|
|
2713
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2714
|
+
prompt: 'Generate XML',
|
|
2715
|
+
provider,
|
|
2716
|
+
assertion,
|
|
2717
|
+
test: {},
|
|
2718
|
+
providerResponse: { output },
|
|
2719
|
+
});
|
|
2720
|
+
expect(result).toMatchObject({
|
|
2721
|
+
pass: false,
|
|
2722
|
+
score: 0,
|
|
2723
|
+
reason: expect.stringMatching(/XML parsing failed/),
|
|
2724
|
+
assertion: assertion,
|
|
2725
|
+
});
|
|
2726
|
+
});
|
|
2727
|
+
it('should pass when required elements are present', async () => {
|
|
2728
|
+
const output = '<analysis><classification>T-shirt</classification><color>Red</color></analysis>';
|
|
2729
|
+
const assertion = {
|
|
2730
|
+
type: 'is-xml',
|
|
2731
|
+
value: 'analysis.classification,analysis.color',
|
|
2732
|
+
};
|
|
2733
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2734
|
+
prompt: 'Generate XML',
|
|
2735
|
+
provider,
|
|
2736
|
+
assertion,
|
|
2737
|
+
test: {},
|
|
2738
|
+
providerResponse: { output },
|
|
2739
|
+
});
|
|
2740
|
+
expect(result).toEqual({
|
|
2741
|
+
pass: true,
|
|
2742
|
+
score: 1,
|
|
2743
|
+
reason: 'Assertion passed',
|
|
2744
|
+
assertion,
|
|
2745
|
+
});
|
|
2746
|
+
});
|
|
2747
|
+
it('should fail when required elements are missing', async () => {
|
|
2748
|
+
const output = '<analysis><classification>T-shirt</classification></analysis>';
|
|
2749
|
+
const assertion = {
|
|
2750
|
+
type: 'is-xml',
|
|
2751
|
+
value: 'analysis.classification,analysis.color',
|
|
2752
|
+
};
|
|
2753
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2754
|
+
prompt: 'Generate XML',
|
|
2755
|
+
provider,
|
|
2756
|
+
assertion,
|
|
2757
|
+
test: {},
|
|
2758
|
+
providerResponse: { output },
|
|
2759
|
+
});
|
|
2760
|
+
expect(result).toEqual({
|
|
2761
|
+
pass: false,
|
|
2762
|
+
score: 0,
|
|
2763
|
+
reason: 'XML is missing required elements: analysis.color',
|
|
2764
|
+
assertion: assertion,
|
|
2765
|
+
});
|
|
2766
|
+
});
|
|
2767
|
+
it('should pass when nested elements are present', async () => {
|
|
2768
|
+
const output = '<root><parent><child><grandchild>Content</grandchild></child></parent></root>';
|
|
2769
|
+
const assertion = {
|
|
2770
|
+
type: 'is-xml',
|
|
2771
|
+
value: 'root.parent.child.grandchild',
|
|
2772
|
+
};
|
|
2773
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2774
|
+
prompt: 'Generate XML',
|
|
2775
|
+
provider,
|
|
2776
|
+
assertion,
|
|
2777
|
+
test: {},
|
|
2778
|
+
providerResponse: { output },
|
|
2779
|
+
});
|
|
2780
|
+
expect(result).toEqual({
|
|
2781
|
+
pass: true,
|
|
2782
|
+
score: 1,
|
|
2783
|
+
reason: 'Assertion passed',
|
|
2784
|
+
assertion,
|
|
2785
|
+
});
|
|
2786
|
+
});
|
|
2787
|
+
it('should handle inverse assertion correctly', async () => {
|
|
2788
|
+
const output = 'This is not XML';
|
|
2789
|
+
const assertion = { type: 'not-is-xml' };
|
|
2790
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2791
|
+
prompt: 'Generate non-XML',
|
|
2792
|
+
provider,
|
|
2793
|
+
assertion,
|
|
2794
|
+
test: {},
|
|
2795
|
+
providerResponse: { output },
|
|
2796
|
+
});
|
|
2797
|
+
expect(result).toEqual({
|
|
2798
|
+
pass: true,
|
|
2799
|
+
score: 1,
|
|
2800
|
+
reason: 'Assertion passed',
|
|
2801
|
+
assertion,
|
|
2802
|
+
});
|
|
2803
|
+
});
|
|
2804
|
+
it('should pass when required elements are specified as an array', async () => {
|
|
2805
|
+
const output = '<root><element1>Content1</element1><element2>Content2</element2></root>';
|
|
2806
|
+
const assertion = {
|
|
2807
|
+
type: 'is-xml',
|
|
2808
|
+
value: ['root.element1', 'root.element2'],
|
|
2809
|
+
};
|
|
2810
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2811
|
+
prompt: 'Generate XML',
|
|
2812
|
+
provider,
|
|
2813
|
+
assertion,
|
|
2814
|
+
test: {},
|
|
2815
|
+
providerResponse: { output },
|
|
2816
|
+
});
|
|
2817
|
+
expect(result).toEqual({
|
|
2818
|
+
pass: true,
|
|
2819
|
+
score: 1,
|
|
2820
|
+
reason: 'Assertion passed',
|
|
2821
|
+
assertion,
|
|
2822
|
+
});
|
|
2823
|
+
});
|
|
2824
|
+
it('should pass when required elements are specified as an object', async () => {
|
|
2825
|
+
const output = '<root><element1>Content1</element1><element2>Content2</element2></root>';
|
|
2826
|
+
const assertion = {
|
|
2827
|
+
type: 'contains-xml',
|
|
2828
|
+
value: {
|
|
2829
|
+
requiredElements: ['root.element1', 'root.element2'],
|
|
2830
|
+
},
|
|
2831
|
+
};
|
|
2832
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2833
|
+
prompt: 'Generate XML',
|
|
2834
|
+
provider,
|
|
2835
|
+
assertion,
|
|
2836
|
+
test: {},
|
|
2837
|
+
providerResponse: { output },
|
|
2838
|
+
});
|
|
2839
|
+
expect(result).toEqual({
|
|
2840
|
+
pass: true,
|
|
2841
|
+
score: 1,
|
|
2842
|
+
reason: 'Assertion passed',
|
|
2843
|
+
assertion,
|
|
2844
|
+
});
|
|
2845
|
+
});
|
|
2846
|
+
it('should throw an error when xml assertion value is invalid', async () => {
|
|
2847
|
+
const output = '<root><element1>Content1</element1><element2>Content2</element2></root>';
|
|
2848
|
+
const assertion = {
|
|
2849
|
+
type: 'is-xml',
|
|
2850
|
+
value: { invalidKey: ['root.element1', 'root.element2'] },
|
|
2851
|
+
};
|
|
2852
|
+
await expect((0, assertions_1.runAssertion)({
|
|
2853
|
+
prompt: 'Generate XML',
|
|
2854
|
+
provider,
|
|
2855
|
+
assertion,
|
|
2856
|
+
test: {},
|
|
2857
|
+
providerResponse: { output },
|
|
2858
|
+
})).rejects.toThrow('xml assertion must contain a string, array value, or no value');
|
|
2859
|
+
});
|
|
2860
|
+
it('should handle multiple XML blocks in contains-xml assertion', async () => {
|
|
2861
|
+
const output = 'Some text <xml1>content1</xml1> more text <xml2>content2</xml2>';
|
|
2862
|
+
const assertion = {
|
|
2863
|
+
type: 'contains-xml',
|
|
2864
|
+
value: ['xml1', 'xml2'],
|
|
2865
|
+
};
|
|
2866
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2867
|
+
prompt: 'Generate text with multiple XML blocks',
|
|
2868
|
+
provider,
|
|
2869
|
+
assertion,
|
|
2870
|
+
test: {},
|
|
2871
|
+
providerResponse: { output },
|
|
2872
|
+
});
|
|
2873
|
+
expect(result).toEqual({
|
|
2874
|
+
pass: true,
|
|
2875
|
+
score: 1,
|
|
2876
|
+
reason: 'Assertion passed',
|
|
2877
|
+
assertion,
|
|
2878
|
+
});
|
|
2879
|
+
});
|
|
2880
|
+
});
|
|
2881
|
+
describe('contains-xml', () => {
|
|
2882
|
+
const provider = {
|
|
2883
|
+
callApi: jest.fn().mockResolvedValue({ cost: 0.001 }),
|
|
2884
|
+
};
|
|
2885
|
+
it('should pass when the output contains valid XML', async () => {
|
|
2886
|
+
const output = 'Some text before <root><child>Content</child></root> and after';
|
|
2887
|
+
const assertion = { type: 'contains-xml' };
|
|
2888
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2889
|
+
prompt: 'Generate text with XML',
|
|
2890
|
+
provider,
|
|
2891
|
+
assertion,
|
|
2892
|
+
test: {},
|
|
2893
|
+
providerResponse: { output },
|
|
2894
|
+
});
|
|
2895
|
+
expect(result).toEqual({
|
|
2896
|
+
pass: true,
|
|
2897
|
+
score: 1,
|
|
2898
|
+
reason: 'Assertion passed',
|
|
2899
|
+
assertion,
|
|
2900
|
+
});
|
|
2901
|
+
});
|
|
2902
|
+
it('should fail when the output does not contain valid XML', async () => {
|
|
2903
|
+
const output = 'This is just plain text without any XML';
|
|
2904
|
+
const assertion = { type: 'contains-xml' };
|
|
2905
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2906
|
+
prompt: 'Generate text without XML',
|
|
2907
|
+
provider,
|
|
2908
|
+
assertion,
|
|
2909
|
+
test: {},
|
|
2910
|
+
providerResponse: { output },
|
|
2911
|
+
});
|
|
2912
|
+
expect(result).toEqual({
|
|
2913
|
+
pass: false,
|
|
2914
|
+
score: 0,
|
|
2915
|
+
reason: 'No XML content found in the output',
|
|
2916
|
+
assertion: assertion,
|
|
2917
|
+
});
|
|
2918
|
+
});
|
|
2919
|
+
it('should pass when required elements are present in the XML', async () => {
|
|
2920
|
+
const output = 'Before <analysis><classification>T-shirt</classification><color>Red</color></analysis> After';
|
|
2921
|
+
const assertion = {
|
|
2922
|
+
type: 'contains-xml',
|
|
2923
|
+
value: 'analysis.classification,analysis.color',
|
|
2924
|
+
};
|
|
2925
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2926
|
+
prompt: 'Generate text with specific XML',
|
|
2927
|
+
provider,
|
|
2928
|
+
assertion,
|
|
2929
|
+
test: {},
|
|
2930
|
+
providerResponse: { output },
|
|
2931
|
+
});
|
|
2932
|
+
expect(result).toEqual({
|
|
2933
|
+
pass: true,
|
|
2934
|
+
score: 1,
|
|
2935
|
+
reason: 'Assertion passed',
|
|
2936
|
+
assertion,
|
|
2937
|
+
});
|
|
2938
|
+
});
|
|
2939
|
+
it('should fail when required elements are missing in the XML', async () => {
|
|
2940
|
+
const output = 'Before <analysis><classification>T-shirt</classification></analysis> After';
|
|
2941
|
+
const assertion = {
|
|
2942
|
+
type: 'contains-xml',
|
|
2943
|
+
value: 'analysis.classification,analysis.color',
|
|
2944
|
+
};
|
|
2945
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2946
|
+
prompt: 'Generate text with specific XML',
|
|
2947
|
+
provider,
|
|
2948
|
+
assertion,
|
|
2949
|
+
test: {},
|
|
2950
|
+
providerResponse: { output },
|
|
2951
|
+
});
|
|
2952
|
+
expect(result).toEqual({
|
|
2953
|
+
pass: false,
|
|
2954
|
+
score: 0,
|
|
2955
|
+
reason: 'No valid XML content found matching the requirements',
|
|
2956
|
+
assertion: assertion,
|
|
2957
|
+
});
|
|
2958
|
+
});
|
|
2959
|
+
it('should pass when nested elements are present in the XML', async () => {
|
|
2960
|
+
const output = 'Start <root><parent><child><grandchild>Content</grandchild></child></parent></root> End';
|
|
2961
|
+
const assertion = {
|
|
2962
|
+
type: 'contains-xml',
|
|
2963
|
+
value: 'root.parent.child.grandchild',
|
|
2964
|
+
};
|
|
2965
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2966
|
+
prompt: 'Generate text with nested XML',
|
|
2967
|
+
provider,
|
|
2968
|
+
assertion,
|
|
2969
|
+
test: {},
|
|
2970
|
+
providerResponse: { output },
|
|
2971
|
+
});
|
|
2972
|
+
expect(result).toEqual({
|
|
2973
|
+
pass: true,
|
|
2974
|
+
score: 1,
|
|
2975
|
+
reason: 'Assertion passed',
|
|
2976
|
+
assertion,
|
|
2977
|
+
});
|
|
2978
|
+
});
|
|
2979
|
+
it('should handle inverse assertion correctly', async () => {
|
|
2980
|
+
const output = 'This is just plain text without any XML';
|
|
2981
|
+
const assertion = { type: 'not-contains-xml' };
|
|
2982
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
2983
|
+
prompt: 'Generate text without XML',
|
|
2984
|
+
provider,
|
|
2985
|
+
assertion,
|
|
2986
|
+
test: {},
|
|
2987
|
+
providerResponse: { output },
|
|
2988
|
+
});
|
|
2989
|
+
expect(result).toEqual({
|
|
2990
|
+
pass: true,
|
|
2991
|
+
score: 1,
|
|
2992
|
+
reason: 'Assertion passed',
|
|
2993
|
+
assertion,
|
|
2994
|
+
});
|
|
2995
|
+
});
|
|
2996
|
+
it('should fail inverse assertion when XML is present', async () => {
|
|
2997
|
+
const output = 'Some text with <xml>content</xml> in it';
|
|
2998
|
+
const assertion = { type: 'not-contains-xml' };
|
|
2999
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
3000
|
+
prompt: 'Generate text without XML',
|
|
3001
|
+
provider,
|
|
3002
|
+
assertion,
|
|
3003
|
+
test: {},
|
|
3004
|
+
providerResponse: { output },
|
|
3005
|
+
});
|
|
3006
|
+
expect(result).toEqual({
|
|
3007
|
+
pass: false,
|
|
3008
|
+
score: 0,
|
|
3009
|
+
reason: 'XML is valid and contains all required elements',
|
|
3010
|
+
assertion,
|
|
3011
|
+
});
|
|
3012
|
+
});
|
|
3013
|
+
});
|
|
3014
|
+
});
|
|
3015
|
+
describe('validateXml', () => {
|
|
3016
|
+
it('should validate a simple valid XML string', () => {
|
|
3017
|
+
expect((0, assertions_1.validateXml)('<root><child>Content</child></root>')).toEqual({
|
|
3018
|
+
isValid: true,
|
|
3019
|
+
reason: 'XML is valid and contains all required elements',
|
|
3020
|
+
});
|
|
3021
|
+
});
|
|
3022
|
+
it('should invalidate a malformed XML string', () => {
|
|
3023
|
+
expect((0, assertions_1.validateXml)('<root><child>Content</child></root')).toEqual({
|
|
3024
|
+
isValid: false,
|
|
3025
|
+
reason: expect.stringContaining('XML parsing failed'),
|
|
3026
|
+
});
|
|
3027
|
+
});
|
|
3028
|
+
it('should validate XML with attributes', () => {
|
|
3029
|
+
expect((0, assertions_1.validateXml)('<root><child id="1">Content</child></root>')).toEqual({
|
|
3030
|
+
isValid: true,
|
|
3031
|
+
reason: 'XML is valid and contains all required elements',
|
|
3032
|
+
});
|
|
3033
|
+
});
|
|
3034
|
+
it('should validate XML with namespaces', () => {
|
|
3035
|
+
expect((0, assertions_1.validateXml)('<root xmlns:ns="http://example.com"><ns:child>Content</ns:child></root>')).toEqual({
|
|
3036
|
+
isValid: true,
|
|
3037
|
+
reason: 'XML is valid and contains all required elements',
|
|
3038
|
+
});
|
|
3039
|
+
});
|
|
3040
|
+
it('should validate when all required elements are present', () => {
|
|
3041
|
+
expect((0, assertions_1.validateXml)('<analysis><classification>T-shirt</classification><color>Red</color></analysis>', ['analysis.classification', 'analysis.color'])).toEqual({
|
|
3042
|
+
isValid: true,
|
|
3043
|
+
reason: 'XML is valid and contains all required elements',
|
|
3044
|
+
});
|
|
3045
|
+
});
|
|
3046
|
+
it('should invalidate when a required element is missing', () => {
|
|
3047
|
+
expect((0, assertions_1.validateXml)('<analysis><classification>T-shirt</classification></analysis>', [
|
|
3048
|
+
'analysis.classification',
|
|
3049
|
+
'analysis.color',
|
|
3050
|
+
])).toEqual({
|
|
3051
|
+
isValid: false,
|
|
3052
|
+
reason: 'XML is missing required elements: analysis.color',
|
|
3053
|
+
});
|
|
3054
|
+
});
|
|
3055
|
+
it('should validate nested elements correctly', () => {
|
|
3056
|
+
expect((0, assertions_1.validateXml)('<root><parent><child><grandchild>Content</grandchild></child></parent></root>', [
|
|
3057
|
+
'root.parent.child.grandchild',
|
|
3058
|
+
])).toEqual({
|
|
3059
|
+
isValid: true,
|
|
3060
|
+
reason: 'XML is valid and contains all required elements',
|
|
3061
|
+
});
|
|
3062
|
+
});
|
|
3063
|
+
it('should invalidate when a nested required element is missing', () => {
|
|
3064
|
+
expect((0, assertions_1.validateXml)('<root><parent><child></child></parent></root>', [
|
|
3065
|
+
'root.parent.child.grandchild',
|
|
3066
|
+
])).toEqual({
|
|
3067
|
+
isValid: false,
|
|
3068
|
+
reason: 'XML is missing required elements: root.parent.child.grandchild',
|
|
3069
|
+
});
|
|
3070
|
+
});
|
|
3071
|
+
it('should handle empty elements correctly', () => {
|
|
3072
|
+
expect((0, assertions_1.validateXml)('<root><emptyChild></emptyChild><nonEmptyChild>Content</nonEmptyChild></root>', [
|
|
3073
|
+
'root.emptyChild',
|
|
3074
|
+
'root.nonEmptyChild',
|
|
3075
|
+
])).toEqual({
|
|
3076
|
+
isValid: true,
|
|
3077
|
+
reason: 'XML is valid and contains all required elements',
|
|
3078
|
+
});
|
|
3079
|
+
});
|
|
3080
|
+
it('should validate XML with multiple siblings', () => {
|
|
3081
|
+
expect((0, assertions_1.validateXml)('<root><child>Content1</child><child>Content2</child></root>', ['root.child'])).toEqual({
|
|
3082
|
+
isValid: true,
|
|
3083
|
+
reason: 'XML is valid and contains all required elements',
|
|
3084
|
+
});
|
|
3085
|
+
});
|
|
3086
|
+
it('should handle XML with CDATA sections', () => {
|
|
3087
|
+
expect((0, assertions_1.validateXml)('<root><child><![CDATA[<p>This is CDATA content</p>]]></child></root>', [
|
|
3088
|
+
'root.child',
|
|
3089
|
+
])).toEqual({
|
|
3090
|
+
isValid: true,
|
|
3091
|
+
reason: 'XML is valid and contains all required elements',
|
|
3092
|
+
});
|
|
3093
|
+
});
|
|
3094
|
+
it('should validate XML with processing instructions', () => {
|
|
3095
|
+
const xml = '<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="style.xsl"?><root><child>Content</child></root>';
|
|
3096
|
+
expect((0, assertions_1.validateXml)(xml, ['root.child'])).toEqual({
|
|
3097
|
+
isValid: true,
|
|
3098
|
+
reason: 'XML is valid and contains all required elements',
|
|
3099
|
+
});
|
|
3100
|
+
});
|
|
3101
|
+
it('should handle XML with comments', () => {
|
|
3102
|
+
expect((0, assertions_1.validateXml)('<root><!-- This is a comment --><child>Content</child></root>', ['root.child'])).toEqual({
|
|
3103
|
+
isValid: true,
|
|
3104
|
+
reason: 'XML is valid and contains all required elements',
|
|
3105
|
+
});
|
|
3106
|
+
});
|
|
3107
|
+
it('should validate the example XML structure', () => {
|
|
3108
|
+
const xml = (0, dedent_1.default) `
|
|
3109
|
+
<analysis>
|
|
3110
|
+
<classification>T-shirt/top</classification>
|
|
3111
|
+
<color>White with black print</color>
|
|
3112
|
+
<features>Large circular graphic design on the front, resembling a smiley face or emoji</features>
|
|
3113
|
+
<style>Modern, casual streetwear</style>
|
|
3114
|
+
<confidence>9</confidence>
|
|
3115
|
+
<reasoning>The image clearly shows a short-sleeved garment with a round neckline, which is characteristic of a T-shirt. The large circular graphic on the front is distinctive and appears to be a stylized smiley face or emoji design, which is popular in contemporary casual fashion. The stark contrast between the white fabric and black print is very clear, leaving little room for misinterpretation. The style is unmistakably modern and aligned with current trends in graphic tees. My confidence is high (9) because all elements of the image are clear and consistent with a typical graphic T-shirt design.</reasoning>
|
|
3116
|
+
</analysis>
|
|
3117
|
+
`;
|
|
3118
|
+
expect((0, assertions_1.validateXml)(xml, [
|
|
3119
|
+
'analysis.classification',
|
|
3120
|
+
'analysis.color',
|
|
3121
|
+
'analysis.features',
|
|
3122
|
+
'analysis.style',
|
|
3123
|
+
'analysis.confidence',
|
|
3124
|
+
'analysis.reasoning',
|
|
3125
|
+
])).toEqual({
|
|
3126
|
+
isValid: true,
|
|
3127
|
+
reason: 'XML is valid and contains all required elements',
|
|
3128
|
+
});
|
|
3129
|
+
});
|
|
3130
|
+
});
|
|
3131
|
+
describe('containsXml', () => {
|
|
3132
|
+
it('should return true when valid XML is present', () => {
|
|
3133
|
+
const input = 'Some text <root><child>Content</child></root> more text';
|
|
3134
|
+
const result = (0, assertions_1.containsXml)(input);
|
|
3135
|
+
expect(result.isValid).toBe(true);
|
|
3136
|
+
});
|
|
3137
|
+
it('should return false when no XML is present', () => {
|
|
3138
|
+
const input = 'This is just plain text';
|
|
3139
|
+
expect((0, assertions_1.containsXml)(input)).toEqual({
|
|
3140
|
+
isValid: false,
|
|
3141
|
+
reason: 'No XML content found in the output',
|
|
3142
|
+
});
|
|
3143
|
+
});
|
|
3144
|
+
it('should validate required elements', () => {
|
|
3145
|
+
const input = 'Text <root><child>Content</child></root> more';
|
|
3146
|
+
const result = (0, assertions_1.containsXml)(input, ['root.child']);
|
|
3147
|
+
expect(result.isValid).toBe(true);
|
|
3148
|
+
});
|
|
3149
|
+
it('should return false when required elements are missing', () => {
|
|
3150
|
+
const input = 'Text <root><child>Content</child></root> more';
|
|
3151
|
+
expect((0, assertions_1.containsXml)(input, ['root.missing'])).toEqual({
|
|
3152
|
+
isValid: false,
|
|
3153
|
+
reason: 'No valid XML content found matching the requirements',
|
|
3154
|
+
});
|
|
3155
|
+
});
|
|
3156
|
+
it('should handle multiple XML fragments', () => {
|
|
3157
|
+
const input = '<root1>Content</root1> text <root2><child>More</child></root2>';
|
|
3158
|
+
const result = (0, assertions_1.containsXml)(input, ['root2.child']);
|
|
3159
|
+
expect(result.isValid).toBe(true);
|
|
3160
|
+
});
|
|
3161
|
+
});
|
|
3162
|
+
//# sourceMappingURL=assertions.test.js.map
|