PyPI - crfm-helm - Versions diffs - 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl - Mend - Supply Chain Defender

crfm-helm 0.5.2py3-none-any.whl → 0.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (209) hide show

helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} RENAMED Viewed

@@ -84,9 +84,19 @@ metrics:
     description: Fraction of instances where the generated code compiles successfully.
     lower_is_better: false
   - name: fid_similarity
-    display_name: FID similarity
-    short_display_name: FID
-    description: FID similarity (Fréchet Inception Distance) [(Heusel et al., 2017)](https://arxiv.org/abs/1706.08500) between an image generated by the model and the target image.
+    display_name: CIS
+    short_display_name: CIS
+    description: The cosine similarity between the Inception feature vectors.
+    lower_is_better: false
+  - name: lpips_similarity
+    display_name: LPIPS
+    short_display_name: LPIPS
+    description: The LPIPS distance between the generated image and the target image.
+    lower_is_better: false
+  - name: ssim_similarity
+    display_name: SSIM
+    short_display_name: SSIM
+    description: The SSIM similarity between the generated image and the target image.
     lower_is_better: false
   # Accuracy metrics:
@@ -165,6 +175,10 @@ metric_groups:
         split: ${main_split}
       - name: earth_mover_similarity
         split: ${main_split}
+      - name: lpips_similarity
+        split: ${main_split}
+      - name: ssim_similarity
+        split: ${main_split}
   - name: generation_text
     display_name: Generation (text)
@@ -175,7 +189,7 @@ metric_groups:
 ############################################################
 run_groups:
   - name: core_scenarios
-    display_name: Image2Structure
+    display_name: Image2Struct
     description: Scenarios for evaluating the ability of Vision-Language models to generate structured outputs from images.
     category: All scenarios
     subgroups:
@@ -183,13 +197,13 @@ run_groups:
       - image2webpage
       - image2musicsheet
-  - name: image2structure_real
-    display_name: Image2Structure (Wild)
+  - name: image2struct_wild
+    display_name: Image2Struct (Wild)
     description: Scenarios for evaluating the ability of Vision-Language models to generate structured outputs from images. These scenarios contain images that do not have a ground truth.
     category: All scenarios
     subgroups:
-      - image2latex_real
-      - image2webpage_real
+      - image2latex_wild
+      - image2webpage_wild
   - name: image2latex
     display_name: Image2LaTeX
@@ -209,9 +223,9 @@ run_groups:
       when: "2024"
       language: English
-  - name: image2latex_easy
-    display_name: I2LaTeX (Easy)
-    description: The 1/3 easiest examples of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  - name: image2latex_equation
+    display_name: I2LaTeX (Equation)
+    description: The Image2LaTeX benchmark subset for converting images of mathematical equations to LaTeX.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -223,14 +237,14 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: mathematical equations, tables, algorithms, tikz
+      what: mathematical equations
       who: dataset authors
       when: "2024"
       language: English
-  - name: image2latex_medium
-    display_name: I2LaTeX (Medium)
-    description: The 1/3 examples with medium diffulty of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  - name: image2latex_table
+    display_name: I2LaTeX (Table)
+    description: The Image2LaTeX benchmark subset for converting images of tables to LaTeX.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -242,14 +256,13 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: mathematical equations, tables, algorithms, tikz
+      what: tables
       who: dataset authors
       when: "2024"
-      language: English
-  - name: image2latex_hard
-    display_name: I2LaTeX (Hard)
-    description: The 1/3 hardest examples of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  - name: image2latex_algorithm
+    display_name: I2LaTeX (Algorithm)
+    description: The Image2LaTeX benchmark subset for converting images of algorithms to LaTeX.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -261,12 +274,86 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: mathematical equations, tables, algorithms, tikz
+      what: algorithms
       who: dataset authors
       when: "2024"
-      language: English
-  - name: image2latex_real
+  - name: image2latex_plot
+    display_name: I2LaTeX (Tikz)
+    description: The Image2LaTeX benchmark subset for converting images of tikz to LaTeX.
+    metric_groups:
+      - accuracy_simple
+      - compilation
+      - generation_image
+      - generation_text
+      - general_information
+    environment:
+      main_name: earth_mover_similarity
+      main_split: valid
+    taxonomy:
+      task: image-to-text
+      what: tikz (plots)
+      who: dataset authors
+      when: "2024"
+  # - name: image2latex_easy
+  #   display_name: I2LaTeX (Easy)
+  #   description: The 1/3 easiest examples of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: mathematical equations, tables, algorithms, tikz
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  # - name: image2latex_medium
+  #   display_name: I2LaTeX (Medium)
+  #   description: The 1/3 examples with medium diffulty of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: mathematical equations, tables, algorithms, tikz
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  # - name: image2latex_hard
+  #   display_name: I2LaTeX (Hard)
+  #   description: The 1/3 hardest examples of the Image2LaTeX benchmark according to a simple heuristic counting the number of characters in the ground truth LaTeX code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: mathematical equations, tables, algorithms, tikz
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  - name: image2latex_wild
     display_name: Image2LaTeX (Wild)
     description: Images of mathematical equations gathered from Wikipedia that do not have a LaTeX ground truth.
     metric_groups:
@@ -301,9 +388,9 @@ run_groups:
       when: "2024"
       language: English
-  - name: image2webpage_easy
-    display_name: I2webpage (Easy)
-    description: The 1/3 easiest examples of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  - name: image2webpage_css
+    display_name: I2webpage (CSS)
+    description: The Image2webpage benchmark subset for converting images of webpages to code repo containing mostly CSS.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -315,14 +402,13 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: css, html, javascript
+      what: code (mostly CSS)
       who: dataset authors
       when: "2024"
-      language: English
-  - name: image2webpage_medium
-    display_name: I2webpage (Medium)
-    description: The 1/3 examples with medium diffulty of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  - name: image2webpage_html
+    display_name: I2webpage (HTML)
+    description: The Image2webpage benchmark subset for converting images of webpages to code repo containing mostly HTML.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -334,14 +420,13 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: css, html, javascript
+      what: code (mostly HTML)
       who: dataset authors
       when: "2024"
-      language: English
-  - name: image2webpage_hard
-    display_name: I2webpage (Hard)
-    description: The 1/3 hardest examples of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  - name: image2webpage_javascript
+    display_name: I2webpage (Javascript)
+    description: The Image2webpage benchmark subset for converting images of webpages to code repo containing mostly Javascript.
     metric_groups:
       - accuracy_simple
       - compilation
@@ -353,12 +438,68 @@ run_groups:
       main_split: valid
     taxonomy:
       task: image-to-text
-      what: css, html, javascript
+      what: code (mostly Javascript)
       who: dataset authors
       when: "2024"
-      language: English
-  - name: image2webpage_real
+  # - name: image2webpage_easy
+  #   display_name: I2webpage (Easy)
+  #   description: The 1/3 easiest examples of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: css, html, javascript
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  # - name: image2webpage_medium
+  #   display_name: I2webpage (Medium)
+  #   description: The 1/3 examples with medium diffulty of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: css, html, javascript
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  # - name: image2webpage_hard
+  #   display_name: I2webpage (Hard)
+  #   description: The 1/3 hardest examples of the Image2webpage benchmark according to a simple heuristic counting the number of characters in the ground truth HTML/CSS/Javascript code.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - generation_text
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: css, html, javascript
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
+  - name: image2webpage_wild
     display_name: Image2webpage (Wild)
     description: Images of webpages gathered from the internet by taking sceenshots and so on that do not have a HTML/CSS/Javascript ground truth.
     metric_groups:
@@ -392,56 +533,56 @@ run_groups:
       when: "2024"
       language: English
-  - name: image2musicsheet_easy
-    display_name: I2musicsheet (Easy)
-    description: The 1/3 easiest examples of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
-    metric_groups:
-      - accuracy_simple
-      - compilation
-      - generation_image
-      - general_information
-    environment:
-      main_name: earth_mover_similarity
-      main_split: valid
-    taxonomy:
-      task: image-to-text
-      what: music sheets
-      who: dataset authors
-      when: "2024"
-      language: English
+  # - name: image2musicsheet_easy
+  #   display_name: I2musicsheet (Easy)
+  #   description: The 1/3 easiest examples of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: music sheets
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
-  - name: image2musicsheet_medium
-    display_name: I2musicsheet (Medium)
-    description: The 1/3 examples with medium diffulty of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
-    metric_groups:
-      - accuracy_simple
-      - compilation
-      - generation_image
-      - general_information
-    environment:
-      main_name: earth_mover_similarity
-      main_split: valid
-    taxonomy:
-      task: image-to-text
-      what: music sheets
-      who: dataset authors
-      when: "2024"
-      language: English
+  # - name: image2musicsheet_medium
+  #   display_name: I2musicsheet (Medium)
+  #   description: The 1/3 examples with medium diffulty of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: music sheets
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English
-  - name: image2musicsheet_hard
-    display_name: I2musicsheet (Hard)
-    description: The 1/3 hardest examples of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
-    metric_groups:
-      - accuracy_simple
-      - compilation
-      - generation_image
-      - general_information
-    environment:
-      main_name: earth_mover_similarity
-      main_split: valid
-    taxonomy:
-      task: image-to-text
-      what: music sheets
-      who: dataset authors
-      when: "2024"
-      language: English
+  # - name: image2musicsheet_hard
+  #   display_name: I2musicsheet (Hard)
+  #   description: The 1/3 hardest examples of the Image2musicsheet benchmark according to a simple heuristic counting the number of black pixels in the target image.
+  #   metric_groups:
+  #     - accuracy_simple
+  #     - compilation
+  #     - generation_image
+  #     - general_information
+  #   environment:
+  #     main_name: earth_mover_similarity
+  #     main_split: valid
+  #   taxonomy:
+  #     task: image-to-text
+  #     what: music sheets
+  #     who: dataset authors
+  #     when: "2024"
+  #     language: English