npm - @huggingface/tasks - Versions diffs - 0.20.1 → 0.20.2 - Mend

@huggingface/tasks 0.20.1 → 0.20.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/commonjs/eval.d.ts +5 -0
package/dist/commonjs/eval.d.ts.map +1 -1
package/dist/commonjs/eval.js +5 -0
package/dist/esm/eval.d.ts +5 -0
package/dist/esm/eval.d.ts.map +1 -1
package/dist/esm/eval.js +5 -0
package/package.json +1 -1
package/src/eval.ts +6 -0

package/dist/commonjs/eval.d.ts CHANGED Viewed

@@ -32,6 +32,11 @@ export declare const EVALUATION_FRAMEWORKS: {
         readonly description: "Archipelago is a system for running and evaluating AI agents against MCP applications.";
         readonly url: "https://github.com/Mercor-Intelligence/archipelago";
     };
+    readonly "screenspot-pro": {
+        readonly name: "screenspot-pro";
+        readonly description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.";
+        readonly url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding";
+    };
     readonly "swe-bench": {
         readonly name: "swe-bench";
         readonly description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.";

package/dist/commonjs/eval.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDxB~~,CAAC"}
1	+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}

package/dist/commonjs/eval.js CHANGED Viewed

@@ -35,6 +35,11 @@ exports.EVALUATION_FRAMEWORKS = {
         description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
         url: "https://github.com/Mercor-Intelligence/archipelago",
     },
+    "screenspot-pro": {
+        name: "screenspot-pro",
+        description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
+        url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
+    },
     "swe-bench": {
         name: "swe-bench",
         description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",

package/dist/esm/eval.d.ts CHANGED Viewed

@@ -32,6 +32,11 @@ export declare const EVALUATION_FRAMEWORKS: {
         readonly description: "Archipelago is a system for running and evaluating AI agents against MCP applications.";
         readonly url: "https://github.com/Mercor-Intelligence/archipelago";
     };
+    readonly "screenspot-pro": {
+        readonly name: "screenspot-pro";
+        readonly description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.";
+        readonly url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding";
+    };
     readonly "swe-bench": {
         readonly name: "swe-bench";
         readonly description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.";

package/dist/esm/eval.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDxB~~,CAAC"}
1	+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../src/eval.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDxB,CAAC"}

package/dist/esm/eval.js CHANGED Viewed

@@ -32,6 +32,11 @@ export const EVALUATION_FRAMEWORKS = {
         description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
         url: "https://github.com/Mercor-Intelligence/archipelago",
     },
+    "screenspot-pro": {
+        name: "screenspot-pro",
+        description: "ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
+        url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
+    },
     "swe-bench": {
         name: "swe-bench",
         description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/tasks",
-  "version": "0.20.1",
+  "version": "0.20.2",
   "description": "List of ML tasks for huggingface.co/tasks",
   "keywords": [
     "hub",

package/src/eval.ts CHANGED Viewed

@@ -32,6 +32,12 @@ export const EVALUATION_FRAMEWORKS = {
 		description: "Archipelago is a system for running and evaluating AI agents against MCP applications.",
 		url: "https://github.com/Mercor-Intelligence/archipelago",
 	},
+	"screenspot-pro": {
+		name: "screenspot-pro",
+		description:
+			"ScreenSpot-Pro is a GUI grounding benchmark designed to evaluate how well AI agents can locate and identify UI elements across professional software applications in high-resolution screenshots, covering 1,585 annotated images from 26 professional tools.",
+		url: "https://github.com/likaixin2000/ScreenSpot-Pro-GUI-Grounding",
+	},
 	"swe-bench": {
 		name: "swe-bench",
 		description: "SWE Bench is a framework for evaluating the performance of LLMs on software engineering tasks.",