promptfoo 0.102.3 → 0.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/app/assets/{index-isMhDyh1.js → index-hVGk-Oul.js} +276 -277
  3. package/dist/src/app/assets/{index.es-BNF9PUeN.js → index.es-CcK3JjZn.js} +1 -1
  4. package/dist/src/app/assets/{sync-DaNyhPy-.js → sync-BaigR5eq.js} +1 -1
  5. package/dist/src/app/index.html +1 -1
  6. package/dist/src/assertions/geval.d.ts +3 -0
  7. package/dist/src/assertions/geval.d.ts.map +1 -0
  8. package/dist/src/assertions/geval.js +39 -0
  9. package/dist/src/assertions/geval.js.map +1 -0
  10. package/dist/src/assertions/index.d.ts +1 -1
  11. package/dist/src/assertions/index.d.ts.map +1 -1
  12. package/dist/src/assertions/index.js +2 -0
  13. package/dist/src/assertions/index.js.map +1 -1
  14. package/dist/src/assertions/utils.d.ts +2 -2
  15. package/dist/src/database/tables.d.ts +12 -12
  16. package/dist/src/googleSheets.d.ts +7 -0
  17. package/dist/src/googleSheets.d.ts.map +1 -1
  18. package/dist/src/googleSheets.js +32 -1
  19. package/dist/src/googleSheets.js.map +1 -1
  20. package/dist/src/integrations/huggingfaceDatasets.d.ts +3 -0
  21. package/dist/src/integrations/huggingfaceDatasets.d.ts.map +1 -0
  22. package/dist/src/integrations/huggingfaceDatasets.js +87 -0
  23. package/dist/src/integrations/huggingfaceDatasets.js.map +1 -0
  24. package/dist/src/matchers.d.ts +1 -0
  25. package/dist/src/matchers.d.ts.map +1 -1
  26. package/dist/src/matchers.js +74 -0
  27. package/dist/src/matchers.js.map +1 -1
  28. package/dist/src/models/evalResult.d.ts.map +1 -1
  29. package/dist/src/models/evalResult.js +8 -1
  30. package/dist/src/models/evalResult.js.map +1 -1
  31. package/dist/src/providers/bedrock.d.ts.map +1 -1
  32. package/dist/src/providers/bedrock.js +10 -3
  33. package/dist/src/providers/bedrock.js.map +1 -1
  34. package/dist/src/providers/simulatedUser.d.ts.map +1 -1
  35. package/dist/src/providers/simulatedUser.js +5 -0
  36. package/dist/src/providers/simulatedUser.js.map +1 -1
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +4 -0
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/redteam/constants.d.ts +7 -3
  41. package/dist/src/redteam/constants.d.ts.map +1 -1
  42. package/dist/src/redteam/constants.js +27 -1
  43. package/dist/src/redteam/constants.js.map +1 -1
  44. package/dist/src/redteam/graders.d.ts.map +1 -1
  45. package/dist/src/redteam/graders.js +2 -0
  46. package/dist/src/redteam/graders.js.map +1 -1
  47. package/dist/src/redteam/index.d.ts +1 -1
  48. package/dist/src/redteam/index.d.ts.map +1 -1
  49. package/dist/src/redteam/index.js +10 -2
  50. package/dist/src/redteam/index.js.map +1 -1
  51. package/dist/src/redteam/plugins/beavertails.d.ts +17 -0
  52. package/dist/src/redteam/plugins/beavertails.d.ts.map +1 -0
  53. package/dist/src/redteam/plugins/beavertails.js +104 -0
  54. package/dist/src/redteam/plugins/beavertails.js.map +1 -0
  55. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  56. package/dist/src/redteam/plugins/index.js +2 -0
  57. package/dist/src/redteam/plugins/index.js.map +1 -1
  58. package/dist/src/redteam/plugins/intent.d.ts +6 -3
  59. package/dist/src/redteam/plugins/intent.d.ts.map +1 -1
  60. package/dist/src/redteam/plugins/intent.js +32 -13
  61. package/dist/src/redteam/plugins/intent.js.map +1 -1
  62. package/dist/src/redteam/plugins/pliny.d.ts.map +1 -1
  63. package/dist/src/redteam/plugins/pliny.js +1 -1
  64. package/dist/src/redteam/plugins/pliny.js.map +1 -1
  65. package/dist/src/redteam/providers/bestOfN.d.ts +16 -0
  66. package/dist/src/redteam/providers/bestOfN.d.ts.map +1 -0
  67. package/dist/src/redteam/providers/bestOfN.js +103 -0
  68. package/dist/src/redteam/providers/bestOfN.js.map +1 -0
  69. package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
  70. package/dist/src/redteam/providers/crescendo/index.js +20 -0
  71. package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
  72. package/dist/src/redteam/providers/goat.d.ts.map +1 -1
  73. package/dist/src/redteam/providers/goat.js +7 -0
  74. package/dist/src/redteam/providers/goat.js.map +1 -1
  75. package/dist/src/redteam/providers/iterative.d.ts +3 -0
  76. package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
  77. package/dist/src/redteam/providers/iterative.js +63 -26
  78. package/dist/src/redteam/providers/iterative.js.map +1 -1
  79. package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
  80. package/dist/src/redteam/providers/iterativeImage.js +17 -0
  81. package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
  82. package/dist/src/redteam/providers/iterativeTree.d.ts +7 -17
  83. package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
  84. package/dist/src/redteam/providers/iterativeTree.js +59 -9
  85. package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
  86. package/dist/src/redteam/providers/prompts.d.ts.map +1 -1
  87. package/dist/src/redteam/providers/prompts.js +25 -3
  88. package/dist/src/redteam/providers/prompts.js.map +1 -1
  89. package/dist/src/redteam/providers/shared.d.ts +1 -0
  90. package/dist/src/redteam/providers/shared.d.ts.map +1 -1
  91. package/dist/src/redteam/providers/shared.js +8 -0
  92. package/dist/src/redteam/providers/shared.js.map +1 -1
  93. package/dist/src/redteam/shared.d.ts.map +1 -1
  94. package/dist/src/redteam/shared.js +1 -0
  95. package/dist/src/redteam/shared.js.map +1 -1
  96. package/dist/src/redteam/strategies/bestOfN.d.ts +3 -0
  97. package/dist/src/redteam/strategies/bestOfN.d.ts.map +1 -0
  98. package/dist/src/redteam/strategies/bestOfN.js +35 -0
  99. package/dist/src/redteam/strategies/bestOfN.js.map +1 -0
  100. package/dist/src/redteam/strategies/index.d.ts.map +1 -1
  101. package/dist/src/redteam/strategies/index.js +22 -12
  102. package/dist/src/redteam/strategies/index.js.map +1 -1
  103. package/dist/src/redteam/util.d.ts.map +1 -1
  104. package/dist/src/redteam/util.js +14 -3
  105. package/dist/src/redteam/util.js.map +1 -1
  106. package/dist/src/server/routes/redteam.js +2 -1
  107. package/dist/src/server/routes/redteam.js.map +1 -1
  108. package/dist/src/telemetry.d.ts +6 -0
  109. package/dist/src/telemetry.d.ts.map +1 -1
  110. package/dist/src/telemetry.js +6 -0
  111. package/dist/src/telemetry.js.map +1 -1
  112. package/dist/src/testCases.d.ts.map +1 -1
  113. package/dist/src/testCases.js +7 -0
  114. package/dist/src/testCases.js.map +1 -1
  115. package/dist/src/types/index.d.ts +343 -343
  116. package/dist/src/types/index.d.ts.map +1 -1
  117. package/dist/src/types/index.js +1 -0
  118. package/dist/src/types/index.js.map +1 -1
  119. package/dist/src/util/index.d.ts +4 -4
  120. package/dist/src/validators/redteam.js +2 -2
  121. package/dist/src/validators/redteam.js.map +1 -1
  122. package/dist/test/factories/evalFactory.d.ts +8 -8
  123. package/dist/test/googleSheets.test.d.ts +2 -0
  124. package/dist/test/googleSheets.test.d.ts.map +1 -0
  125. package/dist/test/googleSheets.test.js +240 -0
  126. package/dist/test/googleSheets.test.js.map +1 -0
  127. package/dist/test/integrations/huggingfaceDatasets.test.d.ts +2 -0
  128. package/dist/test/integrations/huggingfaceDatasets.test.d.ts.map +1 -0
  129. package/dist/test/integrations/huggingfaceDatasets.test.js +147 -0
  130. package/dist/test/integrations/huggingfaceDatasets.test.js.map +1 -0
  131. package/dist/test/models/evalResult.test.d.ts +2 -0
  132. package/dist/test/models/evalResult.test.d.ts.map +1 -0
  133. package/dist/test/models/evalResult.test.js +217 -0
  134. package/dist/test/models/evalResult.test.js.map +1 -0
  135. package/dist/test/providers/bedrock.test.js +110 -0
  136. package/dist/test/providers/bedrock.test.js.map +1 -1
  137. package/dist/test/redteam/plugins/intent.test.d.ts +2 -0
  138. package/dist/test/redteam/plugins/intent.test.d.ts.map +1 -0
  139. package/dist/test/redteam/plugins/intent.test.js +172 -0
  140. package/dist/test/redteam/plugins/intent.test.js.map +1 -0
  141. package/dist/test/redteam/providers/iterative.test.d.ts +2 -0
  142. package/dist/test/redteam/providers/iterative.test.d.ts.map +1 -0
  143. package/dist/test/redteam/providers/iterative.test.js +61 -0
  144. package/dist/test/redteam/providers/iterative.test.js.map +1 -0
  145. package/dist/test/redteam/providers/iterativeTree.test.js +46 -8
  146. package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
  147. package/dist/test/redteam/validators.test.js +2 -0
  148. package/dist/test/redteam/validators.test.js.map +1 -1
  149. package/dist/tsconfig.tsbuildinfo +1 -1
  150. package/package.json +1 -1
@@ -539,7 +539,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
539
539
  description?: string | undefined;
540
540
  providerOutput?: string | {} | undefined;
541
541
  assert?: ({
542
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
542
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
543
543
  value?: import("../types").AssertionValue | undefined;
544
544
  config?: Record<string, any> | undefined;
545
545
  provider?: any;
@@ -554,7 +554,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
554
554
  } | {
555
555
  type: "assert-set";
556
556
  assert: {
557
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
557
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
558
558
  value?: import("../types").AssertionValue | undefined;
559
559
  config?: Record<string, any> | undefined;
560
560
  provider?: any;
@@ -681,7 +681,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
681
681
  description?: string | undefined;
682
682
  providerOutput?: string | {} | undefined;
683
683
  assert?: ({
684
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
684
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
685
685
  value?: import("../types").AssertionValue | undefined;
686
686
  config?: Record<string, any> | undefined;
687
687
  provider?: any;
@@ -696,7 +696,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
696
696
  } | {
697
697
  type: "assert-set";
698
698
  assert: {
699
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
699
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
700
700
  value?: import("../types").AssertionValue | undefined;
701
701
  config?: Record<string, any> | undefined;
702
702
  provider?: any;
@@ -820,7 +820,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
820
820
  description?: string | undefined;
821
821
  providerOutput?: string | {} | undefined;
822
822
  assert?: ({
823
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
823
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
824
824
  value?: import("../types").AssertionValue | undefined;
825
825
  config?: Record<string, any> | undefined;
826
826
  provider?: any;
@@ -835,7 +835,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
835
835
  } | {
836
836
  type: "assert-set";
837
837
  assert: {
838
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
838
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
839
839
  value?: import("../types").AssertionValue | undefined;
840
840
  config?: Record<string, any> | undefined;
841
841
  provider?: any;
@@ -960,7 +960,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
960
960
  }) | undefined;
961
961
  providerOutput?: string | {} | undefined;
962
962
  assert?: ({
963
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
963
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
964
964
  value?: import("../types").AssertionValue | undefined;
965
965
  config?: Record<string, any> | undefined;
966
966
  provider?: any;
@@ -975,7 +975,7 @@ export declare const evalsTable: import("drizzle-orm/sqlite-core").SQLiteTableWi
975
975
  } | {
976
976
  type: "assert-set";
977
977
  assert: {
978
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
978
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
979
979
  value?: import("../types").AssertionValue | undefined;
980
980
  config?: Record<string, any> | undefined;
981
981
  provider?: any;
@@ -1493,7 +1493,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
1493
1493
  description?: string | undefined;
1494
1494
  providerOutput?: string | {} | undefined;
1495
1495
  assert?: ({
1496
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1496
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1497
1497
  value?: import("../types").AssertionValue | undefined;
1498
1498
  config?: Record<string, any> | undefined;
1499
1499
  provider?: any;
@@ -1508,7 +1508,7 @@ export declare const evalResultsTable: import("drizzle-orm/sqlite-core").SQLiteT
1508
1508
  } | {
1509
1509
  type: "assert-set";
1510
1510
  assert: {
1511
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1511
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1512
1512
  value?: import("../types").AssertionValue | undefined;
1513
1513
  config?: Record<string, any> | undefined;
1514
1514
  provider?: any;
@@ -1966,7 +1966,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
1966
1966
  description?: string | undefined;
1967
1967
  providerOutput?: string | {} | undefined;
1968
1968
  assert?: ({
1969
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1969
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1970
1970
  value?: import("../types").AssertionValue | undefined;
1971
1971
  config?: Record<string, any> | undefined;
1972
1972
  provider?: any;
@@ -1981,7 +1981,7 @@ export declare const datasetsTable: import("drizzle-orm/sqlite-core").SQLiteTabl
1981
1981
  } | {
1982
1982
  type: "assert-set";
1983
1983
  assert: {
1984
- type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1984
+ type: "cost" | "moderation" | `promptfoo:redteam:${string}` | "factuality" | "answer-relevance" | "bleu" | "classifier" | "contains-all" | "contains-any" | "contains-json" | "contains-sql" | "contains-xml" | "contains" | "context-faithfulness" | "context-recall" | "context-relevance" | "equals" | "g-eval" | "icontains-all" | "icontains-any" | "icontains" | "is-json" | "is-refusal" | "is-sql" | "is-valid-openai-function-call" | "is-valid-openai-tools-call" | "is-xml" | "javascript" | "latency" | "levenshtein" | "llm-rubric" | "model-graded-closedqa" | "model-graded-factuality" | "perplexity-score" | "perplexity" | "python" | "regex" | "rouge-n" | "similar" | "starts-with" | "webhook" | "not-cost" | "not-moderation" | "not-factuality" | "not-answer-relevance" | "not-bleu" | "not-classifier" | "not-contains-all" | "not-contains-any" | "not-contains-json" | "not-contains-sql" | "not-contains-xml" | "not-contains" | "not-context-faithfulness" | "not-context-recall" | "not-context-relevance" | "not-equals" | "not-g-eval" | "not-icontains-all" | "not-icontains-any" | "not-icontains" | "not-is-json" | "not-is-refusal" | "not-is-sql" | "not-is-valid-openai-function-call" | "not-is-valid-openai-tools-call" | "not-is-xml" | "not-javascript" | "not-latency" | "not-levenshtein" | "not-llm-rubric" | "not-model-graded-closedqa" | "not-model-graded-factuality" | "not-perplexity-score" | "not-perplexity" | "not-python" | "not-regex" | "not-rouge-n" | "not-similar" | "not-starts-with" | "not-webhook" | "select-best" | "human";
1985
1985
  value?: import("../types").AssertionValue | undefined;
1986
1986
  config?: Record<string, any> | undefined;
1987
1987
  provider?: any;
@@ -1,4 +1,11 @@
1
1
  import type { CsvRow } from './types';
2
+ export declare function checkGoogleSheetAccess(url: string): Promise<{
3
+ public: boolean;
4
+ status: number;
5
+ } | {
6
+ public: boolean;
7
+ status?: undefined;
8
+ }>;
2
9
  export declare function fetchCsvFromGoogleSheetUnauthenticated(url: string): Promise<CsvRow[]>;
3
10
  export declare function fetchCsvFromGoogleSheetAuthenticated(url: string): Promise<CsvRow[]>;
4
11
  export declare function fetchCsvFromGoogleSheet(url: string): Promise<CsvRow[]>;
@@ -1 +1 @@
1
- {"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAgBtC,wBAAsB,sCAAsC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAa3F;AAED,wBAAsB,oCAAoC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CA0CzF;AAED,wBAAsB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO5E;AAED,wBAAsB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA+BtF"}
1
+ {"version":3,"file":"googleSheets.d.ts","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEtC,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,MAAM;;;;;;GAYvD;AAED,wBAAsB,sCAAsC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAa3F;AAED,wBAAsB,oCAAoC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CA0CzF;AAED,wBAAsB,uBAAuB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO5E;AAED,wBAAsB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA6DtF"}
@@ -36,6 +36,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
36
36
  return (mod && mod.__esModule) ? mod : { "default": mod };
37
37
  };
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.checkGoogleSheetAccess = checkGoogleSheetAccess;
39
40
  exports.fetchCsvFromGoogleSheetUnauthenticated = fetchCsvFromGoogleSheetUnauthenticated;
40
41
  exports.fetchCsvFromGoogleSheetAuthenticated = fetchCsvFromGoogleSheetAuthenticated;
41
42
  exports.fetchCsvFromGoogleSheet = fetchCsvFromGoogleSheet;
@@ -125,7 +126,37 @@ async function writeCsvToGoogleSheet(rows, url) {
125
126
  throw new Error(`Invalid Google Sheets URL: ${url}`);
126
127
  }
127
128
  const spreadsheetId = match[1];
128
- const range = 'A1:ZZZ';
129
+ let range = 'A1:ZZZ';
130
+ const gid = Number(new URL(url).searchParams.get('gid'));
131
+ if (gid) {
132
+ const spreadsheet = await sheets.spreadsheets.get({ spreadsheetId, auth });
133
+ const sheetName = spreadsheet.data.sheets?.find((sheet) => sheet.properties?.sheetId === gid)
134
+ ?.properties?.title;
135
+ if (!sheetName) {
136
+ throw new Error(`Sheet not found for gid: ${gid}`);
137
+ }
138
+ range = `${sheetName}!${range}`;
139
+ }
140
+ else {
141
+ // Create a new sheet if no gid is provided
142
+ const newSheetTitle = `Sheet${Date.now()}`;
143
+ await sheets.spreadsheets.batchUpdate({
144
+ spreadsheetId,
145
+ auth,
146
+ requestBody: {
147
+ requests: [
148
+ {
149
+ addSheet: {
150
+ properties: {
151
+ title: newSheetTitle,
152
+ },
153
+ },
154
+ },
155
+ ],
156
+ },
157
+ });
158
+ range = `${newSheetTitle}!${range}`;
159
+ }
129
160
  // Extract headers from the first row
130
161
  const headers = Object.keys(rows[0]);
131
162
  // Convert rows to a 2D array
@@ -1 +1 @@
1
- {"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAiBA,wFAaC;AAED,oFA0CC;AAED,0DAOC;AAED,sDA+BC;AApHD,sDAA8B;AAG9B,KAAK,UAAU,sBAAsB,CAAC,GAAW;IAC/C,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gBAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,sCAAsC,CAAC,GAAW;IACtE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,wDAAa,gBAAgB,GAAC,CAAC;IAC3D,MAAM,EAAE,cAAc,EAAE,GAAG,wDAAa,SAAS,GAAC,CAAC;IAEnD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,+CAA+C,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,QAAQ,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAEM,KAAK,UAAU,oCAAoC,CAAC,GAAW;IACpE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,uDAAuD,CAAC;KAClE,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC1B,MAAM,MAAM,GAAW,EAAE,CAAC;QAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAAC,GAAW;IACvD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC/D,gBAAM,CAAC,KAAK,CAAC,sBAAsB,GAAG,eAAe,QAAQ,EAAE,CAAC,CAAC;IACjE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,sCAAsC,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,oCAAoC,CAAC,GAAG,CAAC,CAAC;AACnD,CAAC;AAEM,KAAK,UAAU,qBAAqB,CAAC,IAAc,EAAE,GAAW;IACrE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,8CAA8C,CAAC;KACzD,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,QAAQ,CAAC;IAEvB,qCAAqC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErF,0BAA0B;IAC1B,gBAAM,CAAC,KAAK,CAAC,qCAAqC,GAAG,SAAS,MAAM,CAAC,MAAM,OAAO,CAAC,CAAC;IACpF,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC;QACtC,aAAa;QACb,KAAK;QACL,gBAAgB,EAAE,cAAc;QAChC,IAAI;QACJ,WAAW,EAAE;YACX,MAAM;SACP;KACF,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"googleSheets.js","sourceRoot":"","sources":["../../src/googleSheets.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAGA,wDAYC;AAED,wFAaC;AAED,oFA0CC;AAED,0DAOC;AAED,sDA6DC;AAlJD,sDAA8B;AAGvB,KAAK,UAAU,sBAAsB,CAAC,GAAW;IACtD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;YAChB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACnD,CAAC;aAAM,CAAC;YACN,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,CAAC;QACpD,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gBAAM,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,sCAAsC,CAAC,GAAW;IACtE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,wDAAa,gBAAgB,GAAC,CAAC;IAC3D,MAAM,EAAE,cAAc,EAAE,GAAG,wDAAa,SAAS,GAAC,CAAC;IAEnD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,SAAS,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;QAC5B,MAAM,IAAI,KAAK,CAAC,+CAA+C,GAAG,EAAE,CAAC,CAAC;IACxE,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtC,OAAO,QAAQ,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;AAC9C,CAAC;AAEM,KAAK,UAAU,oCAAoC,CAAC,GAAW;IACpE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,uDAAuD,CAAC;KAClE,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAEtF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC;IAClC,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,0CAA0C;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC1B,MAAM,MAAM,GAAW,EAAE,CAAC;QAC1B,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;YAChC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QACH,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;AACL,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAAC,GAAW;IACvD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,sBAAsB,CAAC,GAAG,CAAC,CAAC;IAC/D,gBAAM,CAAC,KAAK,CAAC,sBAAsB,GAAG,eAAe,QAAQ,EAAE,CAAC,CAAC;IACjE,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,sCAAsC,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,oCAAoC,CAAC,GAAG,CAAC,CAAC;AACnD,CAAC;AAEM,KAAK,UAAU,qBAAqB,CAAC,IAAc,EAAE,GAAW;IACrE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,wDAAa,oBAAoB,GAAC,CAAC;IACtF,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,UAAU,CAAC;QACrC,MAAM,EAAE,CAAC,8CAA8C,CAAC;KACzD,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,8BAA8B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,aAAa,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,KAAK,GAAG,QAAQ,CAAC;IACrB,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;IACzD,IAAI,GAAG,EAAE,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,KAAK,GAAG,CAAC;YAC3F,EAAE,UAAU,EAAE,KAAK,CAAC;QACtB,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,KAAK,GAAG,GAAG,SAAS,IAAI,KAAK,EAAE,CAAC;IAClC,CAAC;SAAM,CAAC;QACN,2CAA2C;QAC3C,MAAM,aAAa,GAAG,QAAQ,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAC3C,MAAM,MAAM,CAAC,YAAY,CAAC,WAAW,CAAC;YACpC,aAAa;YACb,IAAI;YACJ,WAAW,EAAE;gBACX,QAAQ,EAAE;oBACR;wBACE,QAAQ,EAAE;4BACR,UAAU,EAAE;gCACV,KAAK,EAAE,aAAa;6BACrB;yBACF;qBACF;iBACF;aACF;SACF,CAAC,CAAC;QACH,KAAK,GAAG,GAAG,aAAa,IAAI,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,qCAAqC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAErC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAErF,0BAA0B;IAC1B,gBAAM,CAAC,KAAK,CAAC,qCAAqC,GAAG,SAAS,MAAM,CAAC,MAAM,OAAO,CAAC,CAAC;IACpF,MAAM,MAAM,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC;QACtC,aAAa;QACb,KAAK;QACL,gBAAgB,EAAE,cAAc;QAChC,IAAI;QACJ,WAAW,EAAE;YACX,MAAM;SACP;KACF,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { TestCase } from '../types';
2
+ export declare function fetchHuggingFaceDataset(datasetPath: string, limit?: number): Promise<TestCase[]>;
3
+ //# sourceMappingURL=huggingfaceDatasets.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"huggingfaceDatasets.d.ts","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AA+CzC,wBAAsB,uBAAuB,CAC3C,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,QAAQ,EAAE,CAAC,CA6ErB"}
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.fetchHuggingFaceDataset = fetchHuggingFaceDataset;
7
+ const fetch_1 = require("../fetch");
8
+ const logger_1 = __importDefault(require("../logger"));
9
+ function parseDatasetPath(path) {
10
+ // Remove the huggingface://datasets/ prefix and split into path and query
11
+ const [pathPart, queryPart] = path.replace('huggingface://datasets/', '').split('?');
12
+ const [owner, repo] = pathPart.split('/');
13
+ // Start with default parameters
14
+ const defaultParams = new URLSearchParams({
15
+ split: 'test',
16
+ config: 'default',
17
+ });
18
+ // Parse user query parameters
19
+ const userParams = new URLSearchParams(queryPart || '');
20
+ // Merge user params into defaults (user params override defaults)
21
+ const queryParams = new URLSearchParams();
22
+ for (const [key, value] of defaultParams) {
23
+ queryParams.set(key, value);
24
+ }
25
+ for (const [key, value] of userParams) {
26
+ queryParams.set(key, value);
27
+ }
28
+ return { owner, repo, queryParams };
29
+ }
30
+ async function fetchHuggingFaceDataset(datasetPath, limit) {
31
+ const baseUrl = 'https://datasets-server.huggingface.co/rows';
32
+ const { owner, repo, queryParams } = parseDatasetPath(datasetPath);
33
+ logger_1.default.info(`[Huggingface Dataset] Fetching dataset: ${owner}/${repo} ...`);
34
+ const tests = [];
35
+ let offset = 0;
36
+ const pageSize = 100; // Number of rows per request
37
+ const queryParamLimit = queryParams.get('limit');
38
+ const userLimit = limit ?? (queryParamLimit ? Number.parseInt(queryParamLimit, 10) : undefined);
39
+ while (true) {
40
+ // Create a new URLSearchParams for this request
41
+ const requestParams = new URLSearchParams(queryParams);
42
+ requestParams.set('offset', offset.toString());
43
+ requestParams.set('length', Math.min(pageSize, userLimit ? userLimit - offset : pageSize).toString());
44
+ const url = `${baseUrl}?dataset=${encodeURIComponent(`${owner}/${repo}`)}&${requestParams.toString()}`;
45
+ logger_1.default.debug(`[Huggingface Dataset] Fetching page from ${url}`);
46
+ const response = await (0, fetch_1.fetchWithProxy)(url);
47
+ if (!response.ok) {
48
+ const error = `[Huggingface Dataset] Failed to fetch dataset: ${response.statusText}.\nFetched ${url}`;
49
+ logger_1.default.error(error);
50
+ throw new Error(error);
51
+ }
52
+ const data = (await response.json());
53
+ logger_1.default.debug(`[Huggingface Dataset] Received ${data.rows.length} rows (total: ${data.num_rows_total})`);
54
+ if (offset === 0) {
55
+ // Log schema information on first request
56
+ logger_1.default.debug('[Huggingface Dataset] Dataset features:', data.features);
57
+ logger_1.default.debug('[Huggingface Dataset] Using query parameters:', Object.fromEntries(queryParams));
58
+ }
59
+ // Convert HuggingFace rows to test cases
60
+ for (const { row } of data.rows) {
61
+ const test = {
62
+ vars: {
63
+ ...row,
64
+ },
65
+ };
66
+ tests.push(test);
67
+ }
68
+ logger_1.default.debug(`[Huggingface Dataset] Processed ${tests.length} total test cases so far`);
69
+ // Check if we've reached user's limit or end of dataset
70
+ if (userLimit && tests.length >= userLimit) {
71
+ logger_1.default.debug(`[Huggingface Dataset] Reached user-specified limit of ${userLimit}`);
72
+ break;
73
+ }
74
+ // Check if we've fetched all rows
75
+ if (offset + data.rows.length >= data.num_rows_total) {
76
+ logger_1.default.debug('[Huggingface Dataset] Finished fetching all rows');
77
+ break;
78
+ }
79
+ offset += data.rows.length;
80
+ logger_1.default.debug(`[Huggingface Dataset] Fetching next page starting at offset ${offset}`);
81
+ }
82
+ // If user specified a limit, ensure we don't return more than that
83
+ const finalTests = userLimit ? tests.slice(0, userLimit) : tests;
84
+ logger_1.default.debug(`[Huggingface Dataset] Successfully loaded ${finalTests.length} test cases`);
85
+ return finalTests;
86
+ }
87
+ //# sourceMappingURL=huggingfaceDatasets.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"huggingfaceDatasets.js","sourceRoot":"","sources":["../../../src/integrations/huggingfaceDatasets.ts"],"names":[],"mappings":";;;;;AAiDA,0DAgFC;AAjID,oCAA0C;AAC1C,uDAA+B;AAkB/B,SAAS,gBAAgB,CAAC,IAAY;IAKpC,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrF,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAE1C,gCAAgC;IAChC,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC;QACxC,KAAK,EAAE,MAAM;QACb,MAAM,EAAE,SAAS;KAClB,CAAC,CAAC;IAEH,8BAA8B;IAC9B,MAAM,UAAU,GAAG,IAAI,eAAe,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAExD,kEAAkE;IAClE,MAAM,WAAW,GAAG,IAAI,eAAe,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,aAAa,EAAE,CAAC;QACzC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QACtC,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;AACtC,CAAC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,WAAmB,EACnB,KAAc;IAEd,MAAM,OAAO,GAAG,6CAA6C,CAAC;IAC9D,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAEnE,gBAAM,CAAC,IAAI,CAAC,2CAA2C,KAAK,IAAI,IAAI,MAAM,CAAC,CAAC;IAE5E,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,6BAA6B;IACnD,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACjD,MAAM,SAAS,GAAG,KAAK,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAChG,OAAO,IAAI,EAAE,CAAC;QACZ,gDAAgD;QAChD,MAAM,aAAa,GAAG,IAAI,eAAe,CAAC,WAAW,CAAC,CAAC;QACvD,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/C,aAAa,CAAC,GAAG,CACf,QAAQ,EACR,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CACzE,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,OAAO,YAAY,kBAAkB,CAAC,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC,IAAI,aAAa,CAAC,QAAQ,EAAE,EAAE,CAAC;QACvG,gBAAM,CAAC,KAAK,CAAC,4CAA4C,GAAG,EAAE,CAAC,CAAC;QAEhE,MAAM,QAAQ,GAAG,MAAM,IAAA,sBAAc,EAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,KAAK,GAAG,kDAAkD,QAAQ,CAAC,UAAU,cAAc,GAAG,EAAE,CAAC;YACvG,gBAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAC5D,gBAAM,CAAC,KAAK,CACV,kCAAkC,IAAI,CAAC,IAAI,CAAC,MAAM,iBAAiB,IAAI,CAAC,cAAc,GAAG,CAC1F,CAAC;QAEF,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,0CAA0C;YAC1C,gBAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvE,gBAAM,CAAC,KAAK,CACV,+CAA+C,EAC/C,MAAM,CAAC,WAAW,CAAC,WAAW,CAAC,CAChC,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAChC,MAAM,IAAI,GAAa;gBACrB,IAAI,EAAE;oBACJ,GAAG,GAAG;iBACP;aACF,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;QAED,gBAAM,CAAC,KAAK,CAAC,mCAAmC,KAAK,CAAC,MAAM,0BAA0B,CAAC,CAAC;QAExF,wDAAwD;QACxD,IAAI,SAAS,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAC3C,gBAAM,CAAC,KAAK,CAAC,yDAAyD,SAAS,EAAE,CAAC,CAAC;YACnF,MAAM;QACR,CAAC;QAED,kCAAkC;QAClC,IAAI,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACrD,gBAAM,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;YACjE,MAAM;QACR,CAAC;QAED,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;QAC3B,gBAAM,CAAC,KAAK,CAAC,+DAA+D,MAAM,EAAE,CAAC,CAAC;IACxF,CAAC;IAED,mEAAmE;IACnE,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;IAEjE,gBAAM,CAAC,KAAK,CAAC,6CAA6C,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;IAC1F,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -15,6 +15,7 @@ export declare function renderLlmRubricPrompt(rubric: string, llmOutput: string,
15
15
  export declare function matchesLlmRubric(rubric: string, llmOutput: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
16
16
  export declare function matchesFactuality(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
17
17
  export declare function matchesClosedQa(input: string, expected: string, output: string, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
18
+ export declare function matchesGEval(criteria: string, input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
18
19
  export declare function matchesAnswerRelevance(input: string, output: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
19
20
  export declare function matchesContextRecall(context: string, groundTruth: string, threshold: number, grading?: GradingConfig, vars?: Record<string, string | object>): Promise<Omit<GradingResult, 'assertion'>>;
20
21
  export declare function matchesContextRelevance(question: string, context: string, threshold: number, grading?: GradingConfig): Promise<Omit<GradingResult, 'assertion'>>;
@@ -1 +1 @@
1
- {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAqBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
1
+ {"version":3,"file":"matchers.d.ts","sourceRoot":"","sources":["../../src/matchers.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAGV,WAAW,EAEX,aAAa,EACb,aAAa,EAIb,YAAY,EAEb,MAAM,SAAS,CAAC;AAiDjB,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,GAClC,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqC7B;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,YAAY,EAClB,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,EACnC,eAAe,EAAE,WAAW,GAAG,IAAI,EACnC,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmCtB;AAgBD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,OAAe,EACxB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA8F3C;AAED;;;;;;;GAOG;AACH,wBAAsB,qBAAqB,CACzC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoC3C;AAED,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,UAavC;AAED,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyD3C;AAED,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAwF3C;AAED,wBAAsB,eAAe,CACnC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAoD3C;AAED,wBAAsB,YAAY,CAChC,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAmF3C;AAED,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA6F3C;AAED,wBAAsB,oBAAoB,CACxC,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EACnB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0C3C;AAED,wBAAsB,uBAAuB,CAC3C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,GACtB,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CAyC3C;AAED,wBAAsB,0BAA0B,CAC9C,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC,CA0E3C;AAED,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,CAAC,EAAE,aAAa,EACvB,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,GACrC,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE,WAAW,CAAC,EAAE,CAAC,CAyD7C;AAED,UAAU,sBAAsB;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,wBAAsB,iBAAiB,CACrC,EAAE,UAAU,EAAE,iBAAiB,EAAE,UAAe,EAAE,EAAE,sBAAsB,EAC1E,OAAO,CAAC,EAAE,aAAa;;;;GAwDxB"}
@@ -11,12 +11,14 @@ exports.renderLlmRubricPrompt = renderLlmRubricPrompt;
11
11
  exports.matchesLlmRubric = matchesLlmRubric;
12
12
  exports.matchesFactuality = matchesFactuality;
13
13
  exports.matchesClosedQa = matchesClosedQa;
14
+ exports.matchesGEval = matchesGEval;
14
15
  exports.matchesAnswerRelevance = matchesAnswerRelevance;
15
16
  exports.matchesContextRecall = matchesContextRecall;
16
17
  exports.matchesContextRelevance = matchesContextRelevance;
17
18
  exports.matchesContextFaithfulness = matchesContextFaithfulness;
18
19
  exports.matchesSelectBest = matchesSelectBest;
19
20
  exports.matchesModeration = matchesModeration;
21
+ const dedent_1 = __importDefault(require("dedent"));
20
22
  const cliState_1 = __importDefault(require("./cliState"));
21
23
  const envars_1 = require("./envars");
22
24
  const logger_1 = __importDefault(require("./logger"));
@@ -428,6 +430,78 @@ async function matchesClosedQa(input, expected, output, grading, vars) {
428
430
  return fail(`Error parsing output: ${err.message}`, resp.tokenUsage);
429
431
  }
430
432
  }
433
+ async function matchesGEval(criteria, input, output, threshold, grading) {
434
+ if (!input) {
435
+ throw Error('No source text to estimate reply');
436
+ }
437
+ const maxScore = 10;
438
+ const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'reply geval check');
439
+ const promptSteps = (0, dedent_1.default) `
440
+ Given an evaluation criteria which outlines how you should judge some text, generate 3-4 concise evaluation steps for any text based on the criteria below.
441
+
442
+ Evaluation Criteria:
443
+ ${criteria}
444
+
445
+ **
446
+ IMPORTANT: Please make sure to only return in minified JSON format, with the "steps" key as a list of strings. No additional words, explanation or formatting is needed.
447
+ Example JSON:
448
+ {"steps": <list_of_strings>}
449
+ **
450
+
451
+ JSON:
452
+ `;
453
+ const respSteps = await textProvider.callApi(promptSteps);
454
+ let steps;
455
+ try {
456
+ // NOTE: use regexp for reliable, because sometimes LLM wraps response to markdown format ```json...```
457
+ steps = JSON.parse(respSteps.output.match(/\{"steps".+\}/g)[0]).steps;
458
+ if (!steps.length) {
459
+ return fail('LLM does not propose any evaluation step');
460
+ }
461
+ }
462
+ catch {
463
+ return fail(`LLM-proposed evaluation steps are not in JSON format: ${respSteps.output}`);
464
+ }
465
+ const promptText = (0, dedent_1.default) `
466
+ You will be given one Reply for a Source Text below. Your task is to rate the Reply on one metric.
467
+ Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.
468
+
469
+ Evaluation Criteria:
470
+ ${criteria}
471
+
472
+ Evaluation Steps:
473
+ - ${steps.join('\n- ')}
474
+ - Given the evaluation steps, return a JSON with two keys: 1) a "score" key ranging from 0 - ${maxScore}, with ${maxScore} being that it follows the Evaluation Criteria outlined in the Evaluation Steps and 0 being that it does not; 2) a "reason" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Source Text and Reply in your reason, but be very concise with it!
475
+
476
+ Source Text:
477
+ ${input}
478
+
479
+ Reply:
480
+ ${output}
481
+
482
+ **
483
+ IMPORTANT: Please make sure to only return in minified JSON format, with the "score" and "reason" key. No additional words, explanation or formatting is needed.
484
+
485
+ Example JSON:
486
+ {"score":0,"reason":"The text does not follow the evaluation steps provided."}
487
+ **
488
+
489
+ JSON:
490
+ `;
491
+ const resp = await textProvider.callApi(promptText);
492
+ let result;
493
+ try {
494
+ result = JSON.parse(resp.output.match(/\{.+\}/g)[0]);
495
+ }
496
+ catch {
497
+ return fail(`LLM-proposed evaluation result is not in JSON format: ${resp.output}`);
498
+ }
499
+ return {
500
+ pass: result.score / maxScore >= threshold,
501
+ score: result.score / maxScore,
502
+ reason: result.reason,
503
+ };
504
+ }
431
505
  async function matchesAnswerRelevance(input, output, threshold, grading) {
432
506
  const embeddingProvider = await getAndCheckProvider('embedding', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).embeddingProvider, 'answer relevancy check');
433
507
  const textProvider = await getAndCheckProvider('text', grading?.provider, (await (0, defaults_1.getDefaultProviders)()).gradingProvider, 'answer relevancy check');