PyPI - commonlid - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

commonlid 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{commonlid-0.2.2 → commonlid-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: commonlid
-Version: 0.2.2
+Version: 0.2.4
 Summary: Evaluate language identification models on CommonLID and other benchmarks.
 Project-URL: Homepage, https://huggingface.co/datasets/commoncrawl/CommonLID
 Project-URL: Paper, https://arxiv.org/abs/2601.18026
@@ -247,6 +247,8 @@ Requires-Dist: torch>=2.4; extra == 'all'
 Requires-Dist: transformers<5,>=4.46; extra == 'all'
 Provides-Extra: cld3
 Requires-Dist: cld3-py>=3.1; extra == 'cld3'
+Provides-Extra: commonlingua
+Requires-Dist: torch>=2.4; extra == 'commonlingua'
 Provides-Extra: dev
 Requires-Dist: azure-identity>=1.17; extra == 'dev'
 Requires-Dist: botocore>=1.35; extra == 'dev'
@@ -315,6 +317,7 @@ From PyPI:
 pip install commonlid                      # core deps + classical LID models
 pip install "commonlid[llm]"               # + DSPy-based LLM evaluation
 pip install "commonlid[afrolid]"           # + torch/transformers for AfroLID
+pip install "commonlid[commonlingua]"      # + torch for the CommonLingua byte-level model
 pip install "commonlid[notebooks]"         # + jupyterlab + matplotlib for paper_tables.ipynb
 pip install "commonlid[all]"               # everything runtime-facing
 ```
@@ -468,7 +471,7 @@ from commonlid import list_models, list_datasets
 assert list_models() == [
     "AfroLID", "GlotLID", "OpenLID-v2", "cld2", "cld3",
-    "fasttext", "funlangid", "pyfranc",
+    "commonlingua", "fasttext", "funlangid", "pyfranc",
 ]
 assert list_datasets() == [
     "bibles_300", "bibles_300_nano",
@@ -574,6 +577,7 @@ for line in preds_path.read_text().splitlines():
 | `fasttext` | [facebook/fasttext-language-identification](https://huggingface.co/facebook/fasttext-language-identification) | fasttext |
 | `pyfranc` | [pyfranc](https://pypi.org/project/pyfranc/) | Pure Python |
 | `AfroLID` | [UBC-NLP/afrolid_1.5](https://huggingface.co/UBC-NLP/afrolid_1.5) | Requires `[afrolid]` extra |
+| `commonlingua` | [PleIAs/CommonLingua](https://huggingface.co/PleIAs/CommonLingua) | 2.35M-param byte-level model, 334 languages; requires `[commonlingua]` extra |
 | `funlangid` | Vendored in `src/commonlid/vendor/fun_langid.py` | Simple char-4gram baseline |
 LLM models are instantiated dynamically (`DSPyLLMModel`) and not
@@ -704,6 +708,8 @@ exclude the `und` bucket by default (toggle with `include_und=True`).
 ## Adding a new model
+A guide for adding a new model can be found [here](docs/contributing/adding_a_model.md).
 <!-- readme-test: fast; id=add-model (registers into an isolated registry) -->
 ```python
 # src/commonlid/models/my_model.py

{commonlid-0.2.2 → commonlid-0.2.4}/README.md RENAMED Viewed

@@ -39,6 +39,7 @@ From PyPI:
 pip install commonlid                      # core deps + classical LID models
 pip install "commonlid[llm]"               # + DSPy-based LLM evaluation
 pip install "commonlid[afrolid]"           # + torch/transformers for AfroLID
+pip install "commonlid[commonlingua]"      # + torch for the CommonLingua byte-level model
 pip install "commonlid[notebooks]"         # + jupyterlab + matplotlib for paper_tables.ipynb
 pip install "commonlid[all]"               # everything runtime-facing
 ```
@@ -192,7 +193,7 @@ from commonlid import list_models, list_datasets
 assert list_models() == [
     "AfroLID", "GlotLID", "OpenLID-v2", "cld2", "cld3",
-    "fasttext", "funlangid", "pyfranc",
+    "commonlingua", "fasttext", "funlangid", "pyfranc",
 ]
 assert list_datasets() == [
     "bibles_300", "bibles_300_nano",
@@ -298,6 +299,7 @@ for line in preds_path.read_text().splitlines():
 | `fasttext` | [facebook/fasttext-language-identification](https://huggingface.co/facebook/fasttext-language-identification) | fasttext |
 | `pyfranc` | [pyfranc](https://pypi.org/project/pyfranc/) | Pure Python |
 | `AfroLID` | [UBC-NLP/afrolid_1.5](https://huggingface.co/UBC-NLP/afrolid_1.5) | Requires `[afrolid]` extra |
+| `commonlingua` | [PleIAs/CommonLingua](https://huggingface.co/PleIAs/CommonLingua) | 2.35M-param byte-level model, 334 languages; requires `[commonlingua]` extra |
 | `funlangid` | Vendored in `src/commonlid/vendor/fun_langid.py` | Simple char-4gram baseline |
 LLM models are instantiated dynamically (`DSPyLLMModel`) and not
@@ -428,6 +430,8 @@ exclude the `und` bucket by default (toggle with `include_und=True`).
 ## Adding a new model
+A guide for adding a new model can be found [here](docs/contributing/adding_a_model.md).
 <!-- readme-test: fast; id=add-model (registers into an isolated registry) -->
 ```python
 # src/commonlid/models/my_model.py

commonlid-0.2.4/docs/contributing/adding_a_model.md ADDED Viewed

@@ -0,0 +1,84 @@
+# Adding a model to the leaderboard
+The CommonLID leaderboard is available [here](https://huggingface.co/spaces/commoncrawl/commonlid).
+1. Add the [model implementation](#adding-a-model-implementation) to `commonlid`
+2. [Evaluate](#evaluate-new-model) the desired model using `commonlid` on the benchmarks
+3. Push the results to the [results repository](https://huggingface.co/datasets/commoncrawl/commonlid-results) via a PR. Once merged they will appear on the leaderboard.
+## Requesting an evaluation
+If you want a model to be evaluated but are not submitting the results yourself, open an issue instead and provide the required information.
+## Adding a model implementation
+Adding a model implementation to `commonlid` is quite straightforward. Typically, it only requires that you provide the text-to-language prediction method and add it to the [model directory](https://github.com/commoncrawl/commonlid-eval/tree/main/src/commonlid/models):
+```python
+# src/commonlid/models/my_model.py
+from collections.abc import Sequence
+from commonlid.core.lid_model import LIDModel
+from commonlid.core.registry import get_model, register_model
+@register_model
+class MyModel(LIDModel):
+    model_id = "my_model"
+    def _predict_batch(self, texts: Sequence[str]) -> list[str | None]:
+        # Return one ISO 639-3 code (or None for undetermined) per input.
+        # `texts` arrives post-OpenLID-normer cleaning by default;
+        # set `requires_preprocessing = False` to receive raw text.
+        return ["eng"] * len(texts)
+assert get_model("my_model").predict(["hi"]) == ["eng"]
+```
+Then import it from `src/commonlid/models/__init__.py` so the decorator
+fires on `import commonlid`:
+```python
+from commonlid.models import my_model as _my_model  # noqa: F401
+```
+### Adding model dependencies
+If you are adding a model that requires additional dependencies, you can add them to the `pyproject.toml` file, under optional dependencies:
+```toml
+cld3 = ["cld3-py>=3.1"]
+```
+This ensures that the implementation does not break if a package is updated.
+As it is an optional dependency, you can't use top-level dependencies, but will instead have to use import inside the wrapper scope.
+## Evaluate new model
+As soon as the model implementation is registered, you can run this command to evaluate your model on CommonLID and its nano version:
+```bash
+commonlid run \
+  --model my_model \
+  --dataset commonlid --dataset commonlid_nano \
+  --output-dir ./data/results
+```
+You may indeed reinstall the `commonlid` package with your changes if the package was not installed in editable mode.
+## Uploading the results data (PR-based)
+After running the evaluation locally, you can upload the results to our [HF results repository](https://huggingface.co/datasets/commoncrawl/commonlid-results) as follows:
+```bash
+hf auth login                                   # token with write access to the results dataset
+make leaderboard-upload                         # opens a Pull Request from ./data/results
+# Override the target with: make leaderboard-upload LEADERBOARD_REPO=other/repo LEADERBOARD_DIR=./elsewhere
+# Optional: pass --skip-predictions via `uv run commonlid leaderboard upload ...` directly.
+```
+The CLI always opens a Pull Request rather than pushing to the default
+branch, so the dataset owner reviews before merging.

{commonlid-0.2.2 → commonlid-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "commonlid"
-version = "0.2.2"
+version = "0.2.4"
 description = "Evaluate language identification models on CommonLID and other benchmarks."
 readme = "README.md"
 license = { file = "LICENSE" }
@@ -59,6 +59,11 @@ llm = [
     "botocore>=1.35",
 ]
 cld3 = ["cld3-py>=3.1"]
+commonlingua = [
+    # CommonLingua is a 2.35M-param byte-level model; needs torch but not the
+    # transformers stack that [afrolid] pulls in.
+    "torch>=2.4",
+]
 leaderboard = [
     # gradio 4.x imports HfFolder from huggingface_hub, which was removed in
     # huggingface-hub 1.0; gradio 5 dropped that import.
@@ -88,7 +93,7 @@ notebooks = [
     "nbclient>=0.10",
 ]
 all = [
-    "commonlid[afrolid,llm]",
+    "commonlid[afrolid,llm,commonlingua]",
 ]
 [project.scripts]
@@ -208,6 +213,8 @@ omit = [
     # afrolid needs the heavy `[afrolid]` extra (torch + transformers); not
     # installed in dev and so exercised only via mocked unit tests.
     "src/commonlid/models/afrolid.py",
+    # commonlingua needs the `[commonlingua]` extra (torch); same precedent.
+    "src/commonlid/models/commonlingua.py",
 ]
 [tool.coverage.report]

{commonlid-0.2.2 → commonlid-0.2.4}/src/commonlid/evaluation/evaluator.py RENAMED Viewed

@@ -159,6 +159,21 @@ class Evaluator:
         )
         n_with_gold = sum(1 for g in ytrue if g is not None)
         samples_per_second = (len(ytrue) / elapsed) if elapsed > 0 else 0.0
+        # `None` here is meaningful: it tells downstream consumers that the
+        # model's support set is undefined (e.g. LLMs), distinct from a model
+        # that declared an empty set. Errors during discovery downgrade to
+        # the same "unknown" sentinel rather than crashing the run.
+        try:
+            supported = model.discover_supported_languages()
+        except Exception as exc:
+            logger.warning(
+                "%s   discover_supported_languages() raised %s: %s -- recording as None",
+                prefix,
+                type(exc).__name__,
+                exc,
+            )
+            supported = None
+        supported_languages = sorted(supported) if supported is not None else None
         result = Result(
             model_id=model.model_id,
             dataset_id=dataset.dataset_id,
@@ -170,6 +185,7 @@ class Evaluator:
             limit=self.config.limit,
             timestamp=datetime.now(timezone.utc).isoformat(),
             commonlid_version=__version__,
+            supported_languages=supported_languages,
         )
         run_dir = self.config.output_dir / dataset.dataset_id / model.model_id

{commonlid-0.2.2 → commonlid-0.2.4}/src/commonlid/evaluation/results.py RENAMED Viewed

@@ -13,12 +13,20 @@ from typing import Any
 from commonlid.metrics.aggregate import macro_average, micro_average
 from commonlid.metrics.core import LanguageMetrics
-SCHEMA_VERSION = 2
+SCHEMA_VERSION = 3
 @dataclass(slots=True)
 class Result:
-    """Aggregate outcome of one model evaluated on one dataset."""
+    """Aggregate outcome of one model evaluated on one dataset.
+    ``supported_languages`` follows a tri-state convention shared with
+    :meth:`LIDModel.discover_supported_languages`: ``None`` means the
+    model's support set is undefined (e.g. LLM-based models that can be
+    prompted for any language), a list of ISO 639-3 codes is the closed
+    set the model declares, and an empty list is the degenerate "supports
+    zero languages" case. The leaderboard's ``(cov.)`` view consumes this.
+    """
     model_id: str
     dataset_id: str
@@ -32,6 +40,7 @@ class Result:
     commonlid_version: str = ""
     python_version: str = field(default_factory=lambda: sys.version.split()[0])
     platform: str = field(default_factory=platform.platform)
+    supported_languages: list[str] | None = None
     extra: dict[str, Any] = field(default_factory=dict)
     def summary(self) -> dict[str, Any]:
@@ -52,6 +61,7 @@ class Result:
             "macro": macro_average(self.per_language),
             "micro": micro_average(self.per_language),
             "per_language": {lang: asdict(m) for lang, m in sorted(self.per_language.items())},
+            "supported_languages": self.supported_languages,
             "extra": self.extra,
         }

{commonlid-0.2.2 → commonlid-0.2.4}/src/commonlid/leaderboard/app.py RENAMED Viewed

@@ -42,6 +42,26 @@ BLOG_URL = (
 )
 PAPER_URL = "https://arxiv.org/abs/2601.18026"
+WEBSITE_URL = "https://commonlid.org/"
+NEW_MODEL_URL = (
+    "https://github.com/commoncrawl/commonlid-eval/blob/main/docs/contributing/adding_a_model.md"
+)
+Scope = Literal["all", "cov"]
+#: Radio choices shown above each dataset's results table.
+SCOPE_CHOICES: list[tuple[str, Scope]] = [
+    ("Scores are calculated over the whole dataset.", "all"),
+    (
+        "Scores are calculated on the subset of language varieties covered by the model. (cov.)",
+        "cov",
+    ),
+]
+#: Sentinel string used when a row has no cov data (rendered as em-dash).
+_NA_DISPLAY = "—"
 #: Display columns in the headline table (in order). Macro F1 is the headline metric.
 _HEADLINE_COLUMNS: list[tuple[str, str]] = [
     ("model_id", "Model"),
@@ -51,6 +71,19 @@ _HEADLINE_COLUMNS: list[tuple[str, str]] = [
     ("n_languages", "Languages"),
     ("samples_per_second", "Samples/s"),
 ]
+#: Same columns, projected from the ``*_cov`` source fields. Display
+#: labels stay identical so the table layout doesn't shift when the
+#: scope radio is toggled.
+_HEADLINE_COLUMNS_COV: list[tuple[str, str]] = [
+    ("model_id", "Model"),
+    ("macro_f1_cov", "Macro F1"),
+    ("micro_f1_cov", "Micro F1"),
+    ("mean_fpr_cov", "Mean FPR (%)"),
+    ("n_languages_cov", "Languages"),
+    ("samples_per_second", "Samples/s"),
+]
 #: Right-aligned numeric columns get the ``number`` Gradio datatype which
 #: pushes values to the right edge of the cell.
 _GradioDtype = Literal["str", "number", "bool", "date", "markdown", "html"]
@@ -134,6 +167,46 @@ _DRILLDOWN_COLUMN_HELP: list[tuple[str, str]] = [
 ]
+#: Per-column human descriptions for the **(cov.)** view — same metrics,
+#: but restricted to the model's declared support set.
+_HEADLINE_COLUMN_HELP_COV: list[tuple[str, str]] = [
+    ("Model", "Identifier of the language identification model."),
+    (
+        "Macro F1",
+        "Unweighted mean of per-language F1 (x100) **restricted to languages the "
+        "model declares it supports** (paper `(cov.)` definition). Languages outside "
+        "the model's support set are excluded from the average — a model that covers "
+        "a small but accurate subset of the benchmark is no longer penalised for the "
+        "long tail of languages it never claimed to handle. **Higher is better.** "
+        f"Models without a declared support set show `{_NA_DISPLAY}`.",
+    ),
+    (
+        "Micro F1",
+        "Sample-weighted F1 (x100) pooled over the **model-supported subset** of "
+        "gold samples only. **Higher is better.** "
+        f"`{_NA_DISPLAY}` when no support set is declared.",
+    ),
+    (
+        "Mean FPR (%)",
+        "Mean per-language false-positive rate computed only on samples whose gold "
+        "language is in the model's support set; TN counts confusion across other "
+        "supported languages, not the long tail. **Lower is better.** "
+        f"`{_NA_DISPLAY}` when no support set is declared.",
+    ),
+    (
+        "Languages",
+        "Number of model-supported languages that have at least one gold sample in "
+        "this dataset (`|supported ∩ gold|`). This is the size of the slice every "
+        "other `(cov.)` metric is averaged over.",
+    ),
+    (
+        "Samples/s",
+        "Throughput during evaluation (samples processed per second). Unaffected by "
+        "the scope toggle — it is a model-property, not a metric.",
+    ),
+]
 def _columns_help_markdown(items: list[tuple[str, str]]) -> str:
     """Render a (column, description) list as a Markdown bullet block."""
     return "\n".join(f"- **{label}** — {desc}" for label, desc in items)
@@ -157,30 +230,55 @@ def _styled_value(table: Any, right_align_after_col: int = 0) -> dict[str, Any]:
     return {"data": data, "headers": headers, "metadata": {"styling": styling}}
-def _format_table(df: Any) -> Any:
+def _fmt(value: Any, decimals: int, *, scale: float = 1.0) -> str:
+    """Format a numeric value with ``decimals`` precision, em-dash for ``None``/``NaN``."""
+    import pandas as pd
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return _NA_DISPLAY
+    return f"{float(value) * scale:.{decimals}f}"
+def _format_table(df: Any, scope: Scope = "all") -> Any:
     """Project + format a results DataFrame for one Gradio tab.
     Numeric columns are converted to **fixed-decimal strings** (e.g. ``0.00``
     not ``0``) so the rendered cells line up vertically; sort ordering is
-    preserved by sorting on the raw ``macro_f1`` *before* formatting.
+    preserved by sorting on the raw float *before* formatting.
     - Macro F1 / Micro F1 / Samples/s use **1 decimal**.
     - Mean FPR (%) uses **2 decimals**.
+    - In ``scope="cov"``, rows without ``supported_languages`` data render
+      em-dashes for every cov metric and sort to the bottom.
     """
     import pandas as pd
+    columns = _HEADLINE_COLUMNS_COV if scope == "cov" else _HEADLINE_COLUMNS
+    display_labels = [label for _, label in columns]
     if df.empty:
-        return pd.DataFrame(columns=[label for _, label in _HEADLINE_COLUMNS])
+        return pd.DataFrame(columns=display_labels)
     out = df.copy()
-    # Sort on the raw float so the resulting order is correct; format only
-    # afterwards (string sort would order "10" before "9").
-    out = out.sort_values("macro_f1", ascending=False, kind="stable").reset_index(drop=True)
-    out["macro_f1"] = (out["macro_f1"] * 100).map(lambda x: f"{x:.1f}")
-    out["micro_f1"] = (out["micro_f1"] * 100).map(lambda x: f"{x:.1f}")
-    out["mean_fpr"] = (out["mean_fpr"] * 100).map(lambda x: f"{x:.2f}")
-    out["samples_per_second"] = out["samples_per_second"].map(lambda x: f"{x:.1f}")
-    out = out[[k for k, _ in _HEADLINE_COLUMNS]]
-    out.columns = [label for _, label in _HEADLINE_COLUMNS]
+    source = {key: key for key, _ in columns}
+    sort_key = source["macro_f1_cov"] if scope == "cov" else source["macro_f1"]
+    # ``na_position="last"`` sinks rows without cov data to the bottom of
+    # the (cov.) view; the "all" view has no NaNs in this column.
+    out = out.sort_values(sort_key, ascending=False, kind="stable", na_position="last")
+    out = out.reset_index(drop=True)
+    macro_key = source["macro_f1_cov"] if scope == "cov" else source["macro_f1"]
+    micro_key = source["micro_f1_cov"] if scope == "cov" else source["micro_f1"]
+    fpr_key = source["mean_fpr_cov"] if scope == "cov" else source["mean_fpr"]
+    langs_key = source["n_languages_cov"] if scope == "cov" else source["n_languages"]
+    out[macro_key] = out[macro_key].map(lambda x: _fmt(x, 1, scale=100))
+    out[micro_key] = out[micro_key].map(lambda x: _fmt(x, 1, scale=100))
+    out[fpr_key] = out[fpr_key].map(lambda x: _fmt(x, 2, scale=100))
+    out[langs_key] = out[langs_key].map(lambda x: _fmt(x, 0))
+    out["samples_per_second"] = out["samples_per_second"].map(lambda x: _fmt(x, 1))
+    out = out[[k for k, _ in columns]]
+    out.columns = display_labels
     return out
@@ -314,23 +412,29 @@ def _format_license(license_name: str, license_url: str | None) -> str:
 def _make_select_handler(
     dataset_id: str,
-    table: Any,
     snapshot_root: Path,
 ) -> Any:
     """Build the row-select callback as a closure over the captured state.
+    Uses ``gr.SelectData.row_value`` (Gradio's per-click payload that
+    contains the clicked row as a 1-D list) so the drilldown picks up the
+    *current* table ordering — switching the scope radio and then clicking
+    a row resolves to the row at its post-toggle position. Passing the
+    Dataframe component as an event input would not work: Gradio 6
+    preprocesses Dataframe inputs into ``pandas.DataFrame`` objects, not
+    the ``{"data", "headers"}`` dict we feed in via ``_styled_value``.
     Gradio inspects ``__defaults__`` when registering events, and comparing a
     DataFrame default against a type annotation hits an unimplemented arrow
     dtype path. A closure keeps the state out of the function signature.
     """
     def _on_select(evt: gr.SelectData) -> tuple[str, Any]:
-        if evt.index is None:
+        if evt.index is None or not evt.row_value:
             return ("_Click a row to load per-language metrics._", None)
-        row_idx = evt.index[0] if isinstance(evt.index, list | tuple) else evt.index
         try:
-            model_id = table.iloc[row_idx]["Model"]
-        except (IndexError, KeyError):
+            model_id = evt.row_value[0]
+        except (IndexError, TypeError):
             return ("_Could not resolve clicked row._", None)
         per_lang = _per_language_drilldown(snapshot_root, dataset_id, model_id)
         return (
@@ -341,6 +445,19 @@ def _make_select_handler(
     return _on_select
+def _make_scope_handler(sub_df: Any) -> Any:
+    """Build the scope-radio change callback: swap the table data + legend in lockstep."""
+    def _on_change(scope: Scope) -> tuple[Any, str]:
+        help_items = _HEADLINE_COLUMN_HELP_COV if scope == "cov" else _HEADLINE_COLUMN_HELP
+        return (
+            _styled_value(_format_table(sub_df, scope=scope)),
+            _columns_help_markdown(help_items),
+        )
+    return _on_change
 def build_app(
     *,
     repo_id: str = DEFAULT_REPO_ID,
@@ -369,7 +486,7 @@ def build_app(
         f"Headline metric: **macro F1**. Models are ranked by macro F1 "
         f"within each tab; click a row to see per-language metrics.\n"
         f"\n"
-        f"📝 [Blog post]({BLOG_URL})  •  📄 [Paper]({PAPER_URL})"
+        f"🌐 [Website]({WEBSITE_URL})  •  📝 [Blog post]({BLOG_URL})  •  📄 [Paper]({PAPER_URL})  •  🆕 [Add a model]({NEW_MODEL_URL})"
     )
     repo_url = f"https://huggingface.co/datasets/{repo_id}"
     if revision:
@@ -384,7 +501,7 @@ def build_app(
                 with gr.Tab(label=tab_label):
                     gr.Markdown(_dataset_metadata_markdown(dataset_id))
                     sub = df[df["dataset_id"] == dataset_id]
-                    table = _format_table(sub)
+                    table = _format_table(sub, scope="all")
                     if table.empty:
                         gr.Markdown(
                             f"_No results for `{dataset_id}` in `{repo_id}` yet."
@@ -394,6 +511,12 @@ def build_app(
                         )
                         continue
+                    scope_radio = gr.Radio(
+                        choices=SCOPE_CHOICES,
+                        value="all",
+                        label="Scoring scope",
+                        interactive=True,
+                    )
                     leaderboard = gr.Dataframe(
                         value=_styled_value(table),
                         datatype=_HEADLINE_DATATYPES,
@@ -402,7 +525,7 @@ def build_app(
                         label=f"{dataset_id} — sorted by Macro F1",
                     )
                     with gr.Accordion("What do these columns mean?", open=False):
-                        gr.Markdown(_columns_help_markdown(_HEADLINE_COLUMN_HELP))
+                        legend = gr.Markdown(_columns_help_markdown(_HEADLINE_COLUMN_HELP))
                     drilldown_label = gr.Markdown("_Click a row to load per-language metrics._")
                     # Seed the drilldown grid with an empty DataFrame so the Component
                     # has stable column headers before the first row click.
@@ -415,8 +538,13 @@ def build_app(
                     with gr.Accordion("What do these per-language columns mean?", open=False):
                         gr.Markdown(_columns_help_markdown(_DRILLDOWN_COLUMN_HELP))
+                    scope_radio.change(
+                        _make_scope_handler(sub),
+                        inputs=[scope_radio],
+                        outputs=[leaderboard, legend],
+                    )
                     leaderboard.select(
-                        _make_select_handler(dataset_id, table, snapshot_root),
+                        _make_select_handler(dataset_id, snapshot_root),
                         outputs=[drilldown_label, drilldown],
                     )
         gr.Markdown(footer)

commonlid 0.2.2__tar.gz → 0.2.4__tar.gz

commonlid 0.2.2tar.gz → 0.2.4tar.gz