@huggingface/tasks 0.20.7 → 0.20.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,5 +52,10 @@ export declare const EVALUATION_FRAMEWORKS: {
52
52
  readonly description: "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.";
53
53
  readonly url: "https://github.com/NVIDIA-NeMo/Evaluator";
54
54
  };
55
+ readonly "yc-bench": {
56
+ readonly name: "yc-bench";
57
+ readonly description: "YC Bench is a long-horizon deterministic benchmark for LLM agents. The agent plays CEO of an AI startup over a simulated 1–3 year run.";
58
+ readonly url: "https://github.com/collinear-ai/yc-bench";
59
+ };
55
60
  };
56
61
  //# sourceMappingURL=eval.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4DxB,CAAC"}
@@ -55,4 +55,9 @@ exports.EVALUATION_FRAMEWORKS = {
55
55
  description: "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.",
56
56
  url: "https://github.com/NVIDIA-NeMo/Evaluator",
57
57
  },
58
+ "yc-bench": {
59
+ name: "yc-bench",
60
+ description: "YC Bench is a long-horizon deterministic benchmark for LLM agents. The agent plays CEO of an AI startup over a simulated 1–3 year run.",
61
+ url: "https://github.com/collinear-ai/yc-bench",
62
+ },
58
63
  };
@@ -52,5 +52,10 @@ export declare const EVALUATION_FRAMEWORKS: {
52
52
  readonly description: "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.";
53
53
  readonly url: "https://github.com/NVIDIA-NeMo/Evaluator";
54
54
  };
55
+ readonly "yc-bench": {
56
+ readonly name: "yc-bench";
57
+ readonly description: "YC Bench is a long-horizon deterministic benchmark for LLM agents. The agent plays CEO of an AI startup over a simulated 1–3 year run.";
58
+ readonly url: "https://github.com/collinear-ai/yc-bench";
59
+ };
55
60
  };
56
61
  //# sourceMappingURL=eval.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA4DxB,CAAC"}
package/dist/esm/eval.js CHANGED
@@ -52,4 +52,9 @@ export const EVALUATION_FRAMEWORKS = {
52
52
  description: "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.",
53
53
  url: "https://github.com/NVIDIA-NeMo/Evaluator",
54
54
  },
55
+ "yc-bench": {
56
+ name: "yc-bench",
57
+ description: "YC Bench is a long-horizon deterministic benchmark for LLM agents. The agent plays CEO of an AI startup over a simulated 1–3 year run.",
58
+ url: "https://github.com/collinear-ai/yc-bench",
59
+ },
55
60
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
- "version": "0.20.7",
3
+ "version": "0.20.8",
4
4
  "description": "List of ML tasks for huggingface.co/tasks",
5
5
  "keywords": [
6
6
  "hub",
package/src/eval.ts CHANGED
@@ -55,4 +55,10 @@ export const EVALUATION_FRAMEWORKS = {
55
55
  "NeMo Evaluator is an open-source platform for robust, reproducible, and scalable evaluation of Large Language Models across 100+ benchmarks.",
56
56
  url: "https://github.com/NVIDIA-NeMo/Evaluator",
57
57
  },
58
+ "yc-bench": {
59
+ name: "yc-bench",
60
+ description:
61
+ "YC Bench is a long-horizon deterministic benchmark for LLM agents. The agent plays CEO of an AI startup over a simulated 1–3 year run.",
62
+ url: "https://github.com/collinear-ai/yc-bench",
63
+ },
58
64
  } as const;