@huggingface/tasks 0.20.1 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,11 @@ export declare const EVALUATION_FRAMEWORKS: {
32
32
  readonly description: "Archipelago is a system for running and evaluating AI agents against MCP applications.";
33
33
  readonly url: "https://github.com/Mercor-Intelligence/archipelago";
34
34
  };
35
+ readonly "screenspot-pro": {
36
+ readonly name: "screenspot-pro";
37
+ readonly description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.";
38
+ readonly url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding";
39
+ };
35
40
  readonly "swe-bench": {
36
41
  readonly name: "swe-bench";
37
42
  readonly description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.";
@@ -1 +1 @@
1
- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDxB,CAAC"}
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}
@@ -35,6 +35,11 @@ exports.EVALUATION_FRAMEWORKS = {
35
35
  description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
36
36
  url: "https://github.com/Mercor-Intelligence/archipelago",
37
37
  },
38
+ "screenspot-pro": {
39
+ name: "screenspot-pro",
40
+ description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
41
+ url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
42
+ },
38
43
  "swe-bench": {
39
44
  name: "swe-bench",
40
45
  description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",
@@ -32,6 +32,11 @@ export declare const EVALUATION_FRAMEWORKS: {
32
32
  readonly description: "Archipelago is a system for running and evaluating AI agents against MCP applications.";
33
33
  readonly url: "https://github.com/Mercor-Intelligence/archipelago";
34
34
  };
35
+ readonly "screenspot-pro": {
36
+ readonly name: "screenspot-pro";
37
+ readonly description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.";
38
+ readonly url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding";
39
+ };
35
40
  readonly "swe-bench": {
36
41
  readonly name: "swe-bench";
37
42
  readonly description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.";
@@ -1 +1 @@
1
- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDxB,CAAC"}
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}
package/dist/esm/eval.js CHANGED
@@ -32,6 +32,11 @@ export const EVALUATION_FRAMEWORKS = {
32
32
  description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
33
33
  url: "https://github.com/Mercor-Intelligence/archipelago",
34
34
  },
35
+ "screenspot-pro": {
36
+ name: "screenspot-pro",
37
+ description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
38
+ url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
39
+ },
35
40
  "swe-bench": {
36
41
  name: "swe-bench",
37
42
  description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
- "version": "0.20.1",
3
+ "version": "0.20.2",
4
4
  "description": "List of ML tasks for huggingface.co/tasks",
5
5
  "keywords": [
6
6
  "hub",
package/src/eval.ts CHANGED
@@ -32,6 +32,12 @@ export const EVALUATION_FRAMEWORKS = {
32
32
  description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
33
33
  url: "https://github.com/Mercor-Intelligence/archipelago",
34
34
  },
35
+ "screenspot-pro": {
36
+ name: "screenspot-pro",
37
+ description:
38
+ "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
39
+ url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
40
+ },
35
41
  "swe-bench": {
36
42
  name: "swe-bench",
37
43
  description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",