PyPI - datamaestro-text - Versions diffs - 2025.9.11__py3-none-any.whl → 2026.2.2__py3-none-any.whl - Mend

datamaestro-text 2025.9.11py3-none-any.whl → 2026.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

datamaestro_text/__init__.py +1 -1
datamaestro_text/config/com/github/ikat.py +0 -1
datamaestro_text/config/com/oscar-corpus.py +1 -1
datamaestro_text/config/com/smashwords/bookcorpus.py +1 -1
datamaestro_text/config/edu/stanford/aclimdb.py +1 -1
datamaestro_text/config/edu/stanford/glove.py +0 -1
datamaestro_text/config/fr/granddebat.py +186 -0
datamaestro_text/config/gov/nist/ir/covid.py +1 -2
datamaestro_text/config/io/metamind/research/wikitext.py +1 -1
datamaestro_text/data/conversation/__init__.py +6 -6
datamaestro_text/data/conversation/base.py +4 -4
datamaestro_text/data/conversation/canard.py +3 -4
datamaestro_text/data/conversation/ikat.py +0 -1
datamaestro_text/data/conversation/orconvqa.py +3 -3
datamaestro_text/data/debate/__init__.py +5 -0
datamaestro_text/data/debate/granddebat.py +68 -0
datamaestro_text/data/embeddings.py +1 -0
datamaestro_text/data/ir/__init__.py +1 -1
datamaestro_text/data/ir/base.py +1 -1
datamaestro_text/data/ir/csv.py +7 -8
datamaestro_text/data/ir/data.py +1 -1
datamaestro_text/data/ir/formats.py +2 -3
datamaestro_text/data/ir/stores.py +1 -1
datamaestro_text/data/text.py +1 -0
datamaestro_text/datasets/__init__.py +1 -0
datamaestro_text/datasets/irds/data.py +14 -20
datamaestro_text/datasets/irds/datasets.py +1 -1
datamaestro_text/download/tmdb.py +0 -1
datamaestro_text/transforms/ir/__init__.py +13 -14
datamaestro_text/utils/shuffle.py +1 -1
datamaestro_text/version.py +3 -3
{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/METADATA +15 -17
{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/RECORD +36 -33
{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/WHEEL +1 -2
datamaestro_text-2025.9.11.dist-info/top_level.txt +0 -1
{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/entry_points.txt +0 -0
{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/licenses/LICENSE +0 -0

datamaestro_text/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import datamaestro
-from .version import version, version_tuple
+from .version import version as version, version_tuple as version_tuple
 class Repository(datamaestro.Repository):

datamaestro_text/config/com/github/ikat.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # See documentation on https://datamaestro.readthedocs.io
-import bz2
 from datamaestro.download import reference
 from datamaestro.definitions import datatasks, datatags, dataset
 from datamaestro_text.data.conversation.base import ConversationUserTopics

datamaestro_text/config/com/oscar-corpus.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from datamaestro.definitions import argument, datatasks, datatags, dataset
+from datamaestro.definitions import dataset
 from datamaestro.download.single import filedownloader
 from datamaestro_text.data.text import TextFile
 from datamaestro.utils import HashCheck

datamaestro_text/config/com/smashwords/bookcorpus.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # See documentation on https://datamaestro.readthedocs.io
-from datamaestro.definitions import argument, datatasks, datatags, dataset
+from datamaestro.definitions import datatasks, datatags, dataset
 from datamaestro_text.data.text import TextFolder
 from datamaestro.download.archive import tardownloader
 from datamaestro.utils import HashCheck

datamaestro_text/config/edu/stanford/aclimdb.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from datamaestro.data.ml import FolderBased, Supervised
-from datamaestro.definitions import datatasks, datatags, dataset
+from datamaestro.definitions import dataset
 from datamaestro.download.archive import tardownloader

datamaestro_text/config/edu/stanford/glove.py CHANGED Viewed

@@ -5,7 +5,6 @@ GloVe is an unsupervised learning algorithm for obtaining vector representations
 """
 from datamaestro.definitions import dataset
-from datamaestro.data import Base, Generic
 from datamaestro.download import reference
 from datamaestro.download.archive import zipdownloader
 from datamaestro.download.single import filedownloader

datamaestro_text/config/fr/granddebat.py ADDED Viewed

@@ -0,0 +1,186 @@
+# See documentation on https://datamaestro.readthedocs.io
+from pathlib import Path
+from datamaestro.definitions import datatags, dataset
+from datamaestro_text.data.debate import GrandDebatFile
+from datamaestro.download.single import filedownloader
+from datamaestro.utils import HashCheck
+from datamaestro.stream import Transform
+import io
+import json
+import ijson
+import os
+import threading
+class JsonToJsonl(Transform):
+    """Transforms a JSON file with an array into a JSONL file with one line per
+    array element"""
+    def __call__(self, fileobj: io.IOBase) -> io.IOBase:
+        # Stream items from the top-level array into a read-end pipe.
+        try:
+            fileobj.seek(0)
+        except Exception:
+            pass
+        r_fd, w_fd = os.pipe()
+        r_file = os.fdopen(r_fd, "rb")
+        w_file = os.fdopen(w_fd, "wb")
+        def _writer(fin, fout):
+            try:
+                for item in ijson.items(fin, "item"):
+                    line = json.dumps(item, ensure_ascii=False) + "\n"
+                    fout.write(line.encode("utf-8"))
+                fout.close()
+            except Exception:
+                try:
+                    fout.close()
+                except Exception:
+                    pass
+        t = threading.Thread(target=_writer, args=(fileobj, w_file), daemon=True)
+        t.start()
+        return r_file
+@filedownloader(
+    "la_transition_ecologique_2019_03_21.jsonl",
+    "http://opendata.auth-6f31f706db6f4a24b55f42a6a79c5086.storage.sbg.cloud.ovh.net/2019-03-21/LA_TRANSITION_ECOLOGIQUE.json",
+    checker=HashCheck("c4ed3a8b8c43d5806d1f090e03f7aa91"),
+    transforms=JsonToJsonl(),
+)
+@datatags("politics", "debate", "french")
+@dataset(
+    GrandDebatFile,
+    url="https://granddebat.fr",
+)
+def transition(la_transition_ecologique_2019_03_21: Path):
+    """Grand Débat National (transition écologique)
+    The *Grand Débat National* (GDN) is a country-wide citizen consultation held
+    in France in 2019.
+    The consultation prompted citizens to express their views across four main
+    themes: *Taxation and public spending*, *Organization of the state and
+    public services*, *Democracy and citizenship*, and *Ecological transition*.
+    A significant portion of this consultation involved online questionnaires,
+    each concluding with a critical open-ended prompt: "Do you have anything to
+    add about [theme]?".
+    """
+    return GrandDebatFile.C(path=la_transition_ecologique_2019_03_21)
+@filedownloader(
+    "fiscalité_et_dépenses_publiques_2019_03_21.jsonl",
+    "http://opendata.auth-6f31f706db6f4a24b55f42a6a79c5086.storage.sbg.cloud.ovh.net/2019-03-21/LA_FISCALITE_ET_LES_DEPENSES_PUBLIQUES.json",
+    checker=HashCheck("c4ed3a8b8c43d5806d1f090e03f7aa91"),
+    transforms=JsonToJsonl(),
+)
+@datatags("politics", "debate", "french")
+@dataset(
+    GrandDebatFile,
+    url="https://granddebat.fr",
+)
+def fiscalité(fiscalité_et_dépenses_publiques_2019_03_21: Path):
+    """Grand Débat National (fiscalité et dépenses publiques)
+    The *Grand Débat National* (GDN) is a country-wide citizen consultation held
+    in France in 2019.
+    The consultation prompted citizens to express their views across four main
+    themes: *Taxation and public spending*, *Organization of the state and
+    public services*, *Democracy and citizenship*, and *Ecological transition*.
+    A significant portion of this consultation involved online questionnaires,
+    each concluding with a critical open-ended prompt: "Do you have anything to
+    add about [theme]?".
+    """
+    return GrandDebatFile.C(path=fiscalité_et_dépenses_publiques_2019_03_21)
+@filedownloader(
+    "democratie_et_citoyennete_2019_03_21.jsonl",
+    "http://opendata.auth-6f31f706db6f4a24b55f42a6a79c5086.storage.sbg.cloud.ovh.net/2019-03-21/DEMOCRATIE_ET_CITOYENNETE.json",
+    checker=HashCheck("049aaeca7e51747f2da5d68143c760fa"),
+    transforms=JsonToJsonl(),
+)
+@datatags("politics", "debate", "french")
+@dataset(
+    GrandDebatFile,
+    url="https://granddebat.fr",
+)
+def démocratie(democratie_et_citoyennete_2019_03_21: Path):
+    """Grand Débat National (démocratie et citoyenneté)
+    The *Grand Débat National* (GDN) is a country-wide citizen consultation held
+    in France in 2019.
+    The consultation prompted citizens to express their views across four main
+    themes: *Taxation and public spending*, *Organization of the state and
+    public services*, *Democracy and citizenship*, and *Ecological transition*.
+    A significant portion of this consultation involved online questionnaires,
+    each concluding with a critical open-ended prompt: "Do you have anything to
+    add about [theme]?".
+    """
+    return GrandDebatFile.C(path=democratie_et_citoyennete_2019_03_21)
+@filedownloader(
+    "organisation_etat_services_publics_2019_03_21.jsonl",
+    "http://opendata.auth-6f31f706db6f4a24b55f42a6a79c5086.storage.sbg.cloud.ovh.net/2019-03-21/ORGANISATION_DE_LETAT_ET_DES_SERVICES_PUBLICS.json",
+    checker=HashCheck("0ccb7c401889f738b73b0caab897a68b"),
+    transforms=JsonToJsonl(),
+)
+@datatags("politics", "debate", "french")
+@dataset(
+    GrandDebatFile,
+    url="https://granddebat.fr",
+)
+def organisation(organisation_etat_services_publics_2019_03_21: Path):
+    """Grand Débat National (organisation de l'État et des services publics)
+    The *Grand Débat National* (GDN) is a country-wide citizen consultation held
+    in France in 2019.
+    The consultation prompted citizens to express their views across four main
+    themes: *Taxation and public spending*, *Organization of the state and
+    public services*, *Democracy and citizenship*, and *Ecological transition*.
+    A significant portion of this consultation involved online questionnaires,
+    each concluding with a critical open-ended prompt: "Do you have anything to
+    add about [theme]?".
+    """
+    return GrandDebatFile.C(path=organisation_etat_services_publics_2019_03_21)
+@filedownloader(
+    "les_evenements_2019_03_21.jsonl",
+    "http://opendata.auth-6f31f706db6f4a24b55f42a6a79c5086.storage.sbg.cloud.ovh.net/2019-03-21/LES_EVENEMENTS.json",
+    checker=HashCheck("c4ed3a8b8c43d5806d1f090e03f7aa91"),
+    transforms=JsonToJsonl(),
+)
+@datatags("politics", "debate", "french")
+@dataset(
+    GrandDebatFile,
+    url="https://granddebat.fr",
+)
+def evenements(les_evenements_2019_03_21: Path):
+    """Grand Débat National (événements)
+    The *Grand Débat National* (GDN) is a country-wide citizen consultation held
+    in France in 2019.
+    The consultation prompted citizens to express their views across four main
+    themes: *Taxation and public spending*, *Organization of the state and
+    public services*, *Democracy and citizenship*, and *Ecological transition*.
+    A significant portion of this consultation involved online questionnaires,
+    each concluding with a critical open-ended prompt: "Do you have anything to
+    add about [theme]?".
+    """
+    return GrandDebatFile.C(path=les_evenements_2019_03_21)

datamaestro_text/config/gov/nist/ir/covid.py CHANGED Viewed

@@ -1,5 +1,4 @@
-"""CORD-19 dataset
-"""
+"""CORD-19 dataset"""
 from datamaestro.annotations.agreement import useragreement
 from datamaestro.definitions import datatasks, dataset

datamaestro_text/config/io/metamind/research/wikitext.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from datamaestro.data import Base, File
+from datamaestro.data import File
 from datamaestro.definitions import (
     datatasks,
     datatags,

datamaestro_text/data/conversation/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from .base import (
-    AnswerEntry,
-    ConversationDataset,
-    ConversationHistory,
-    ConversationHistoryItem,
-    DecontextualizedItem,
-    EntryType,
+    AnswerEntry as AnswerEntry,
+    ConversationDataset as ConversationDataset,
+    ConversationHistory as ConversationHistory,
+    ConversationHistoryItem as ConversationHistoryItem,
+    DecontextualizedItem as DecontextualizedItem,
+    EntryType as EntryType,
 )

datamaestro_text/data/conversation/base.py CHANGED Viewed

@@ -2,13 +2,13 @@ from abc import ABC, abstractmethod
 from enum import Enum
 from datamaestro_text.data.ir.base import IDItem, SimpleTextItem
 from experimaestro import Param
-from typing import Dict, Generic, Iterator, List, Optional, Sequence, Tuple
+from typing import Dict, Iterator, List, Optional, Sequence, Tuple
 from attr import define
 from datamaestro.record import record_type
 from datamaestro.data import Base
 from datamaestro.record import Record, Item
 from datamaestro_text.data.ir import TopicRecord, Topics
-from datamaestro_text.utils.iter import FactoryIterable, LazyList, RangeView
+from datamaestro_text.utils.iter import FactoryIterable, LazyList
 # ---- Basic types
@@ -267,7 +267,7 @@ class ConversationUserTopics(Topics):
         """Returns an iterator over topics"""
         # Extracts topics from conversations, Each user query is a topic (can perform retrieval on it)
         # TODO: merge with xpmir.learning.DatasetConversationBase -> same logic
         records: List[TopicRecord] = []
         for conversation in self.conversations.__iter__():
             nodes = [
@@ -279,4 +279,4 @@ class ConversationUserTopics(Topics):
                 records.append(
                     node.entry.update(ConversationHistoryItem(node.history()))
                 )
-        return iter(records)
+        return iter(records)

datamaestro_text/data/conversation/canard.py CHANGED Viewed

@@ -11,7 +11,6 @@ from datamaestro_text.data.conversation.base import (
     EntryType,
 )
 from datamaestro_text.data.ir import IDItem, SimpleTextItem
-import logging
 @define(kw_only=True)
@@ -82,9 +81,9 @@ class CanardDataset(ConversationDataset, File):
                 )
             else:
                 # The utterance before the last is the last user query
-                assert (
-                    entry.history[-2] == history[-1][SimpleTextItem].text
-                ), f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
+                assert entry.history[-2] == history[-1][SimpleTextItem].text, (
+                    f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
+                )
                 # The last utterance is the system side
                 history.append(

datamaestro_text/data/conversation/ikat.py CHANGED Viewed

@@ -5,7 +5,6 @@ import logging
 from datamaestro.data import File
 from datamaestro.record import Record
-from datamaestro_text.data.ir import Topics
 from datamaestro_text.data.ir.base import (
     IDItem,
     SimpleTextItem,

datamaestro_text/data/conversation/orconvqa.py CHANGED Viewed

@@ -113,9 +113,9 @@ class OrConvQADataset(ConversationDataset, File):
                 if relevance > 0:
                     relevances[rank] = (entry.answer.answer_start, None)
-            assert (
-                len(relevances) <= 1
-            ), f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
+            assert len(relevances) <= 1, (
+                f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
+            )
             history.append(
                 Record(

datamaestro_text/data/debate/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Data classes for debate datasets"""
+from .granddebat import GrandDebatEntry, GrandDebatFile, GrandDebatResponse
+__all__ = ["GrandDebatEntry", "GrandDebatFile", "GrandDebatResponse"]

datamaestro_text/data/debate/granddebat.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Data classes for the Grand Débat National dataset"""
+import json
+from dataclasses import dataclass, field
+from typing import Iterator, List, Optional
+from datamaestro.data import File
+@dataclass
+class GrandDebatResponse:
+    """A response to a question in the Grand Débat National"""
+    question_id: str
+    question_title: str
+    value: Optional[str]
+    formatted_value: Optional[str]
+@dataclass
+class GrandDebatEntry:
+    """An entry (contribution) in the Grand Débat National dataset"""
+    id: str
+    reference: str
+    title: str
+    created_at: str
+    published_at: str
+    updated_at: Optional[str]
+    trashed: bool
+    trashed_status: Optional[str]
+    author_id: str
+    author_type: str
+    author_zip_code: str
+    responses: List[GrandDebatResponse] = field(default_factory=list)
+class GrandDebatFile(File):
+    """A Grand Débat National JSONL file with iteration support"""
+    def __iter__(self) -> Iterator[GrandDebatEntry]:
+        """Iterate over entries in the JSONL file"""
+        with self.path.open("r", encoding="utf-8") as f:
+            for line in f:
+                data = json.loads(line)
+                responses = [
+                    GrandDebatResponse(
+                        question_id=r["questionId"],
+                        question_title=r["questionTitle"],
+                        value=r.get("value"),
+                        formatted_value=r.get("formattedValue"),
+                    )
+                    for r in data.get("responses", [])
+                ]
+                yield GrandDebatEntry(
+                    id=data["id"],
+                    reference=data["reference"],
+                    title=data["title"],
+                    created_at=data["createdAt"],
+                    published_at=data["publishedAt"],
+                    updated_at=data.get("updatedAt"),
+                    trashed=data["trashed"],
+                    trashed_status=data.get("trashedStatus"),
+                    author_id=data["authorId"],
+                    author_type=data["authorType"],
+                    author_zip_code=data["authorZipCode"],
+                    responses=responses,
+                )

datamaestro_text/data/embeddings.py CHANGED Viewed

@@ -20,6 +20,7 @@ class WordEmbeddings(Base):
 class WordEmbeddingsText(WordEmbeddings, File):
     """Word embeddings as a text word / values"""
     encoding: Meta[str] = "utf-8"
     def load(self):

datamaestro_text/data/ir/__init__.py CHANGED Viewed

@@ -6,7 +6,7 @@ from functools import cached_property
 import logging
 from pathlib import Path
 from attrs import define
-from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type, TYPE_CHECKING
+from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
 import random
 from experimaestro import Config
 from datamaestro.definitions import datatasks, Param, Meta

datamaestro_text/data/ir/base.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from attrs import define
 from typing import List
-from datamaestro.record import Record, Item, record_type
+from datamaestro.record import Record, Item
 TopicRecord = DocumentRecord = Record

datamaestro_text/data/ir/csv.py CHANGED Viewed

@@ -1,27 +1,26 @@
 from functools import cached_property
 from pathlib import Path
-from typing import Iterator, Tuple, Type
-from experimaestro import Param, Option, Constant, Meta
-from datamaestro.definitions import argument
+from experimaestro import Param, Meta
 from datamaestro.record import Record, RecordType
 import datamaestro_text.data.ir as ir
 from datamaestro_text.data.ir.base import IDItem, SimpleTextItem
 from datamaestro_text.interfaces.plaintext import read_tsv
-@argument("path", type=Path)
-@argument("separator", type=str, default="\t", ignored=True)
 class AdhocRunWithText(ir.AdhocRun):
     "(qid, doc.id, query, passage)"
-    pass
+    path: Meta[Path]
+    separator: Meta[str] = "\t"
-@argument("path", type=Path)
-@argument("separator", type=str, default="\t", ignored=True)
 class Topics(ir.Topics):
     "Pairs of query id - query using a separator"
+    path: Meta[Path]
+    separator: Meta[str] = "\t"
     def iter(self):
         return (
             Record(IDItem(qid), SimpleTextItem(title))

datamaestro_text/data/ir/data.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .base import *
1	+ from .base import * # noqa: F403

datamaestro_text/data/ir/formats.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from functools import cached_property
-from typing import ClassVar, Tuple, List
+from typing import Tuple, List
 from attrs import define
 from datamaestro.record import record_type
 from ir_datasets.datasets.wapo import WapoDocMedia
@@ -11,9 +11,8 @@ from ir_datasets.datasets.cord19 import Cord19FullTextSection
 class DocumentWithTitle(TextItem):
     """Web document with title and body"""
-    body: str
     title: str
+    body: str
     @cached_property
     def text(self):

datamaestro_text/data/ir/stores.py CHANGED Viewed

@@ -82,7 +82,7 @@ class IKatClueWeb22DocumentStore(LZ4DocumentStore):
                 file_checksum = hasher.hexdigest()
                 assert file_checksum == checksum, (
-                    f"Expected {checksum}, " f"got {file_checksum} for {filename}"
+                    f"Expected {checksum}, got {file_checksum} for {filename}"
                 )
             # Get the MD5 hashes of all the passages

datamaestro_text/data/text.py CHANGED Viewed

@@ -14,6 +14,7 @@ class TrainingText(Supervised):
 class TextFolder(Folder):
     "A folder composed of texts"
     pass

datamaestro_text/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # IR datasets integration package

datamaestro_text/datasets/irds/data.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from functools import partial
+from functools import cached_property, partial
 from pathlib import Path
 from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
@@ -9,7 +8,6 @@ import ir_datasets
 import ir_datasets.datasets as _irds
 from datamaestro.record import RecordType, record_type
 from experimaestro import Config, Meta, Option, Param
-from experimaestro.compat import cached_property
 from ir_datasets.formats import (
     GenericDoc,
     GenericDocPair,
@@ -112,6 +110,9 @@ class Documents(ir.DocumentStore, IRDSId):
         _irds.beir.BeirCordDoc: tuple_constructor(
             formats.CordDocument, "doc_id", "text", "title", "url", "pubmed_id"
         ),
+        _irds.miracl.MiraclDoc: tuple_constructor(
+            formats.DocumentWithTitle, "doc_id", "title", "text"
+        ),
         _irds.beir.BeirTitleDoc: tuple_constructor(
             formats.TitleDocument, "doc_id", "text", "title"
         ),
@@ -202,11 +203,11 @@ class Documents(ir.DocumentStore, IRDSId):
     def iter(self) -> Iterator[ir.DocumentRecord]:
         """Returns an iterator over adhoc documents"""
-        for doc in self.dataset.docs_iter():
+        for doc in self._docs:
             yield self.converter(self.document_recordtype, doc)
     def iter_documents_from(self, start=0):
-        for doc in self.dataset.docs_iter()[start:]:
+        for doc in self._docs[start:]:
             yield self.converter(self.document_recordtype, doc)
     @property
@@ -219,19 +220,22 @@ class Documents(ir.DocumentStore, IRDSId):
         try:
             # Translate to ir datasets docstore options
             import ir_datasets.indices as ir_indices
             file_access = {
                 ir.FileAccess.MMAP: ir_indices.FileAccess.MMAP,
                 ir.FileAccess.FILE: ir_indices.FileAccess.FILE,
-                ir.FileAccess.MEMORY: ir_indices.FileAccess.MEMORY
+                ir.FileAccess.MEMORY: ir_indices.FileAccess.MEMORY,
             }[self.file_access]
             kwargs = {"options": ir_indices.DocstoreOptions(file_access=file_access)}
         except ImportError:
-            logging.warning("This version of ir-datasets cannot handle docstore options")
+            logging.warning(
+                "This version of ir-datasets cannot handle docstore options"
+            )
         return self.dataset.docs_store(**kwargs)
-    @cached_property
+    @property
     def _docs(self):
-        return self.dataset.docs_iter()
+        return iter(self.store)
     def docid_internal2external(self, ix: int):
         return self._docs[ix].doc_id
@@ -261,12 +265,6 @@ class Documents(ir.DocumentStore, IRDSId):
         return converter
-if hasattr(_irds, "miracl"):
-    Documents.CONVERTERS[_irds.miracl.MiraclDoc] = tuple_constructor(
-        formats.DocumentWithTitle, "doc_id", "text", "title"
-    )
 class LZ4DocumentStore(ir.DocumentStore, ABC):
     """A LZ4-based document store"""
@@ -614,11 +612,7 @@ class Cast2022TopicsHandler(CastTopicsHandler):
             records = []
             nodes: Dict[str, ConversationTreeNode] = {}
-            for (
-                query
-            ) in (
-                self.dataset.dataset.queries_iter()
-            ):  # type: _irds.trec_cast.Cast2022Query
+            for query in self.dataset.dataset.queries_iter():  # type: _irds.trec_cast.Cast2022Query
                 parent = nodes[query.parent_id] if query.parent_id else None
                 if query.participant == "User":

datamaestro_text/datasets/irds/datasets.py CHANGED Viewed

@@ -103,7 +103,7 @@ class TrainingTripletsDataset(Dataset):
     SUFFIX = "docpairs"
     def _prepare(self, download=False) -> Documents:
-        return TrainingTriplets(
+        return TrainingTriplets.C(
             id=self.fullid,
         )

datamaestro_text/download/tmdb.py CHANGED Viewed

@@ -10,7 +10,6 @@ from collections import namedtuple
 from datamaestro.download import Download
 from datamaestro.definitions import AbstractDataset
-from datamaestro.utils import TemporaryDirectory
 APIKEY_KEY = "org.themoviedb.apikey"

datamaestro_text/transforms/ir/__init__.py CHANGED Viewed

@@ -2,7 +2,6 @@ import logging
 import gzip
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Type
 from experimaestro import Config, Task, Param, Annotated, pathgenerator, Option, tqdm
 import numpy as np
 from datamaestro.record import RecordType
@@ -131,26 +130,26 @@ class ShuffledTrainingTripletsLines(Task):
     def __validate__(self):
         if self.topic_ids:
-            assert self.data.topic_recordtype.has(
-                ir.IDItem
-            ), f"No topic ID in the source data ({self.data.topic_recordtype})"
+            assert self.data.topic_recordtype.has(ir.IDItem), (
+                f"No topic ID in the source data ({self.data.topic_recordtype})"
+            )
         else:
-            assert self.data.topic_recordtype.has(
-                ir.TextItem
-            ), f"No topic text in the source data ({self.data.topic_recordtype})"
+            assert self.data.topic_recordtype.has(ir.TextItem), (
+                f"No topic text in the source data ({self.data.topic_recordtype})"
+            )
         if self.doc_ids:
-            assert self.data.document_recordtype.has(
-                ir.IDItem
-            ), "No doc ID in the source data"
+            assert self.data.document_recordtype.has(ir.IDItem), (
+                "No doc ID in the source data"
+            )
         else:
-            assert self.data.document_recordtype.has(
-                ir.TextItem
-            ), "No doc text in the source data"
+            assert self.data.document_recordtype.has(ir.TextItem), (
+                "No doc text in the source data"
+            )
     def task_outputs(self, dep):
         return dep(
-            ir.TrainingTripletsLines(
+            ir.TrainingTripletsLines.C(
                 id="",
                 path=self.path,
                 topic_ids=self.topic_ids,

datamaestro_text/utils/shuffle.py CHANGED Viewed

@@ -50,7 +50,7 @@ def shuffle(
     *,
     memory=MEMORY,
     random=None,
-    tmp_path: Optional[Path] = None
+    tmp_path: Optional[Path] = None,
 ):
     """Shuffle using temporary file"""
     if random is None:

datamaestro_text/version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '2025.9.11'
-__version_tuple__ = version_tuple = (2025, 9, 11)
+__version__ = version = '2026.2.2'
+__version_tuple__ = version_tuple = (2026, 2, 2)
-__commit_id__ = commit_id = 'gadcc9bd27'
+__commit_id__ = commit_id = None

{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,33 +1,31 @@
 Metadata-Version: 2.4
 Name: datamaestro-text
-Version: 2025.9.11
+Version: 2026.2.2
 Summary: Datamaestro module for text-related datasets
+Project-URL: Homepage, https://github.com/experimaestro/datamaestro_text
+Project-URL: Documentation, https://datamaestro-text.readthedocs.io/en/latest/
+Project-URL: Repository, https://github.com/experimaestro/datamaestro_text
+Project-URL: Bug Tracker, https://github.com/experimaestro/datamaestro_text/issues
 Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
-License: GPL-3
-Project-URL: homepage, https://github.com/experimaestro/datamaestro_text
-Project-URL: documentation, https://datamaestro-text.readthedocs.io/en/latest/
-Project-URL: repository, https://github.com/experimaestro/datamaestro_text
-Keywords: dataset manager,information retrieval,experiments
+License: GPL-3.0-or-later
+License-File: LICENSE
+Keywords: dataset manager,experiments,information retrieval
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: datamaestro>=1.5.0
-Requires-Dist: ir_datasets>=0.5.8
 Requires-Dist: attrs
-Provides-Extra: dev
-Requires-Dist: pytest; extra == "dev"
-Requires-Dist: docutils; extra == "dev"
-Requires-Dist: sphobjinv; extra == "dev"
-Requires-Dist: flake8; extra == "dev"
-Requires-Dist: sphinx; extra == "dev"
-Dynamic: license-file
+Requires-Dist: datamaestro>=1.6.2
+Requires-Dist: experimaestro
+Requires-Dist: ir-datasets>=0.5.8
+Description-Content-Type: text/markdown
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![PyPI version](https://badge.fury.io/py/datamaestro-text.svg)](https://badge.fury.io/py/datamaestro-text)

{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/RECORD RENAMED Viewed

@@ -1,29 +1,30 @@
-datamaestro_text/__init__.py,sha256=hU8jZpkXl3F74qIfqnJl7v4nJ9YxfoR7IpJpUREFNRI,248
-datamaestro_text/version.py,sha256=aHTcqRU_FAq8moUGgwqhCMrhMi8VBbk38TX-uMF8p20,720
+datamaestro_text/__init__.py,sha256=MP7ShYx32k5irdgml1PjnmSofzioYQh9rzUEcHs5eys,276
+datamaestro_text/version.py,sha256=PcJXzZYuv0SaBM1rOymP9IhKDJxqcLKUPHINlOD-hL0,710
 datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
-datamaestro_text/config/com/oscar-corpus.py,sha256=dDfdy_uhG6esRQQzUOOORwK8KvEq4c4hZbWt22pv-zY,733
+datamaestro_text/config/com/oscar-corpus.py,sha256=gEWz8Nxpv7VXU8X-vfRZLwPfq0KXtkGSNtsfoqfcUI0,702
 datamaestro_text/config/com/sentiment140.py,sha256=itfBEgcOniECXKOw8I2dhzyS9LOMsltMLfKK6NGRpVY,1293
 datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
-datamaestro_text/config/com/github/ikat.py,sha256=DCayX-t2OBeW5bOJvRxoQgIH3vy-__mYzdmVcnayAkk,4230
+datamaestro_text/config/com/github/ikat.py,sha256=nAmBre9zNlnGhx-C50EvLGvHqtoB7Ce-mZUZqM_ymO8,4219
 datamaestro_text/config/com/github/aagohary/canard.py,sha256=5fLwCLNBGM_7--naTCDayAMYLvK3yTD8auaEf-dqrb4,1768
 datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=zP3w7A9KSvJVCo44OaB1az1pDKWxE6qXS4qFm3hqg3Y,3064
 datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=MFJYaxJoqJQ6kMDHa2PIE0zoxYTA8Kyl26-vzFoMML0,3032
 datamaestro_text/config/com/github/soskek/bookcorpus.yaml,sha256=qJKs35yeEIilEMgNvU3OEqMp1TSn7mDM2T-uYyA7kTU,1607
 datamaestro_text/config/com/microsoft/wikiqa.yaml,sha256=U7rU-W2Xz1MYv2YXT4jCTj5DsHyM0VssbQPNa3EISaM,540
 datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=hN2KOdi6ToHlodozqsYAOtxaqiUGkGGtRtb3RFSgnEU,11645
-datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=hCFjZg9t1AHET05qU31uatiAOD5EEzzWG9y_W90jcOE,861
+datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=LgUcnR-z99kTrZj6QaCLuLrj1bG-wHMM5GlVNmbrY2k,851
 datamaestro_text/config/edu/cornell/nlvr.yaml,sha256=9Yk5VZMncSmrP7JNuGXqExksgX5nQ_Zfnlps8hWze3Q,921
 datamaestro_text/config/edu/stanford/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro_text/config/edu/stanford/aclimdb.py,sha256=QtriReAVsbJlxkgfJWQCZdCeJ9LswYnOR9mFrgghL9c,647
-datamaestro_text/config/edu/stanford/glove.py,sha256=bXxwiJqT8alPs5MwwxVuY0xBO8g1QWhm2L3AKVskTlA,2391
+datamaestro_text/config/edu/stanford/aclimdb.py,sha256=gv_4IauUCURbMzMWpSMyx3qgOAXVQuBwKR-mMaKExpc,626
+datamaestro_text/config/edu/stanford/glove.py,sha256=FiVYbzQMD11CiKfklrggtm7YXBCevyTXXwhehRd65H8,2348
 datamaestro_text/config/edu/stanford/im2p.yaml,sha256=JoToNyEPpmwdyLFedCBot5ypyw7p9rzi12mGXJuZin0,2909
 datamaestro_text/config/edu/upenn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/config/edu/upenn/ldc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=YEU3kIXzv71Vjg9bUoXyQ-vSLvC-y4LlE3mJQf38XuY,1589
+datamaestro_text/config/fr/granddebat.py,sha256=JRLC3q6o-XhJECjAh40w2p40pCSRw9K3-YMDUpdNwMM,7016
 datamaestro_text/config/gov/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/config/gov/nist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro_text/config/gov/nist/ir/covid.py,sha256=wn2E7sQ8M6pAucVD4sKJYImyzKUKphyiDFJD0oYRCbg,4004
+datamaestro_text/config/gov/nist/ir/covid.py,sha256=i9xxZcrKeX1gezK_TE68oropMF9PKHX2ofyREEUWYPY,4003
 datamaestro_text/config/gov/nist/trec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=vcFaLlZ-chwDt013MYI8bYZ2ug39jPaeimsiok_sqfU,11035
 datamaestro_text/config/gov/nist/trec/clueweb.yaml,sha256=sm4UbdtMzWoDVPsewtVDS7Vj2jBOdgp18Xqo1X4ysQc,792
@@ -34,7 +35,7 @@ datamaestro_text/config/gov/nist/trec/web.yaml,sha256=iNIJ-PCeLwpUaJByJesyCqfFJo
 datamaestro_text/config/io/github/rajpurkar/squad.yaml,sha256=JchkTruEhVd0uOTGgek2tOgBL4UT3G3KVASvZcNigLg,1143
 datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=rg_qAnMrXYUZhQYxA12r_Npl0ggyfTLJQjdSCjU0QxM,1228
 datamaestro_text/config/io/metamind/research/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro_text/config/io/metamind/research/wikitext.py,sha256=DjyBmG74JvuMt9RpMwuLAnxzOdByIWsk4VnXgkJp1NM,2307
+datamaestro_text/config/io/metamind/research/wikitext.py,sha256=jw_CbBbradIUp_mrhG-z3rfa4_0ybvIBSkDqJvGLCCI,2301
 datamaestro_text/config/net/mattmahoney/enwiki.yaml,sha256=HCUn3s0AydXX3BjJ6yUXY0vGLGWSBkOCaDhQ4PA2Adg,2452
 datamaestro_text/config/org/acm/recsys/cb2014.yaml,sha256=5SAK3Am1k0HFugSSCIQN5mLPBfr1zZZAkhLrSH5pHQc,1274
 datamaestro_text/config/org/cocodataset/index.yaml,sha256=KISJChMeKwlZbSnHmRcGMsm6jbcFGVe1aA4GhP2fzqw,474
@@ -42,47 +43,49 @@ datamaestro_text/config/org/grouplens/movielens.py,sha256=tV6OSTDdtjll1dQBCsYIls
 datamaestro_text/config/org/universaldependencies/french.py,sha256=etedb3_SC-fV5Oa2rM4_smZk6t4CPiNvU4C4keUFZHY,2214
 datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml,sha256=IBy82CDNNLjJPNPzues1EgDXu0A5WDvUFeVNSOyrIpI,1137
 datamaestro_text/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro_text/data/embeddings.py,sha256=qdeZ4m6drFNkqW_4pKxJKdJHtOnDRs0P7QVJ7AO6xFE,1127
+datamaestro_text/data/embeddings.py,sha256=YMoNLyVvaOt86bq_8X71_Fgu7EYYI71vr67xSQsi57I,1128
 datamaestro_text/data/recommendation.py,sha256=wHV_9SXSclouuXaBmvwg3ncZLOFfdKRZG3IHkkPJX9Y,279
 datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
-datamaestro_text/data/text.py,sha256=_9J7-j06BOIZ1HsOcBsWy_asanAKkDoZIsWWMMj4tm4,496
-datamaestro_text/data/conversation/__init__.py,sha256=esOWnSImMlQs5QtfxUt559ABLd6a5wwoNpj9XtIq71Y,159
-datamaestro_text/data/conversation/base.py,sha256=BAqu5YOjlRcBOSiyvrjiTIuuc1OalM0BDtcEvJYhO2Y,7515
-datamaestro_text/data/conversation/canard.py,sha256=FI02rPNuzSsA_uXOcgcuQn31d3QahmHndzYCV1Zm8rk,3305
-datamaestro_text/data/conversation/ikat.py,sha256=djxTv0vHLXIUPzfmwpB25fyUWJNVPqthTtDbQ6CzmQo,4363
-datamaestro_text/data/conversation/orconvqa.py,sha256=RL9cpK7QK2dsk_j2e4Th8wzGFq-G3ww_EjdM1eKh-8o,3764
+datamaestro_text/data/text.py,sha256=Lln4eoegU9B27oS-2mv3eEQC6MyRBgVhoewQ2-YNxEQ,497
+datamaestro_text/data/conversation/__init__.py,sha256=Kk7FxPz_0oGO2PtIa8zH7UBqbCUsywTHfA-yKd_KO6c,284
+datamaestro_text/data/conversation/base.py,sha256=gF_-izQ1ijX7w49pKQvjfjUVzrX3VSHXxcqVIPWmAfY,7488
+datamaestro_text/data/conversation/canard.py,sha256=aYpkHzuJWGT3-myFNUjCYAtvG3gVh_d3Zc5lyiasQ04,3290
+datamaestro_text/data/conversation/ikat.py,sha256=hoGqHUWyT8BhC_ouUmnwoh93B2jGLHn8uc6npKP4Sl8,4319
+datamaestro_text/data/conversation/orconvqa.py,sha256=zNp02jyYgny0qtIFOMjmrUy7hG8VKWcELHWrg3FBCc0,3764
 datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
-datamaestro_text/data/ir/__init__.py,sha256=ZRJrUeeUyD1ncMN5JINVvFJ2lDr3KsbgiiEBJkczSi0,9814
-datamaestro_text/data/ir/base.py,sha256=uwIiKn0ryK5DWUQsEegeTs6bye0uAOGl0XVm_cvV3ZU,1506
+datamaestro_text/data/debate/__init__.py,sha256=PzCV3Bd9fmonE-OQp4VtK1NglH42-iv34WAWUIU-eYk,187
+datamaestro_text/data/debate/granddebat.py,sha256=4-HMfgvF2bPru56D3hkA1E2bN3dgIUmcvX9eOIXroLA,2176
+datamaestro_text/data/ir/__init__.py,sha256=oYI7eIScg-olxPh95XBgTK-E2PunieXvqQPlrRlHU8M,9799
+datamaestro_text/data/ir/base.py,sha256=ksluGOOzOwbdZ2SPnwiDMMUhBa6P1Ti2sr6Ch5xXUgg,1493
 datamaestro_text/data/ir/cord19.py,sha256=yu1Khgy6AZjH2BPQKdnxDid0vQgQ8zvb8-FQlHH-GOU,1465
-datamaestro_text/data/ir/csv.py,sha256=tnxuqR_MZ3GQhuoXEMYyWLQw8PyD0gRqsnzIP5Gsziw,1212
-datamaestro_text/data/ir/data.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
-datamaestro_text/data/ir/formats.py,sha256=Tbu5rrssq9m76a448-ixgs7a-KKvNCvXYkdv5FkFqp4,3641
+datamaestro_text/data/ir/csv.py,sha256=0jnaV-wKLgslH7izR-xP_RX7l90vykQTn3bPhaCFR-c,1027
+datamaestro_text/data/ir/data.py,sha256=6ASVsyVVfiSd1m8C8QTrxVLnFVmtoW3d9c9nQ07zlbY,34
+datamaestro_text/data/ir/formats.py,sha256=rKflCuY8UBpXC3nltBqzC4waWYoxuyP91xJvG7p690Y,3630
 datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
-datamaestro_text/data/ir/stores.py,sha256=A4Ew0L4P6iLLmDKhxqjjVkHcz797BHf4d76YguGkB1A,4412
+datamaestro_text/data/ir/stores.py,sha256=rdOwYCG_NzHSsUQpJ1aneiA2SDWrcfdi16aY-df852U,4408
 datamaestro_text/data/ir/trec.py,sha256=IOtQRMUz8zx-dYEMR2NIIM6qXEUjsV0eVOhGvKIRJK4,1974
 datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
+datamaestro_text/datasets/__init__.py,sha256=ORn-Q1gGibg-N5grVc7MqOYfExels3FRI51oQ4xI1QA,34
 datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
-datamaestro_text/datasets/irds/data.py,sha256=YlDbGFsh6_mCmk49F3bwdsLEbpHVvMv4gvc1H8KZnpo,23096
-datamaestro_text/datasets/irds/datasets.py,sha256=n5mvD2omVI4PkeuJgX13Cio6gLrgYjxq8ZUKKyqocEs,5625
+datamaestro_text/datasets/irds/data.py,sha256=sIU7_rt4I1E9rjkIGcpNfbD5mtO97vxFsUDmouRMDV4,22914
+datamaestro_text/datasets/irds/datasets.py,sha256=CJ8MA44XCwIQGZTzYIJnR-qFm890rUZZB7C3lKIwNyY,5627
 datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
 datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
-datamaestro_text/download/tmdb.py,sha256=kU_Vz9jhznlyeKMHziVu58IHoWv8zPu6CZTHVNQvmu4,4009
+datamaestro_text/download/tmdb.py,sha256=sfnSUJwGSjBsLNVVhT30db2m0R8mrRkDZpbpBUt7GMg,3960
 datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
 datamaestro_text/interfaces/trec.py,sha256=GrP0N_Hcj5f73KS6CSzkyN4aaI-XoBJ19oVMpHVR3QM,3579
 datamaestro_text/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamaestro_text/test/test_datasets.py,sha256=hD1pe-CjEnOj0JNqDLasz_97ltFzMbYfyYBy1QyYdf8,202
 datamaestro_text/test/test_documented.py,sha256=Kl90fz_r_dsV0oXE1Mad34GiQyZ9dc6bfZKNtxP0O2s,453
 datamaestro_text/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro_text/transforms/ir/__init__.py,sha256=eWxr0ZtfK9F9J4TachW5V8Ra5AhQJueDyOGR8vXi1uA,6553
+datamaestro_text/transforms/ir/__init__.py,sha256=7D6wurKVQf-f2mu1I3tT-baQbKo7yRCxW8pOHh-MSjM,6539
 datamaestro_text/utils/__init__.py,sha256=2449YLTAtKJzkmt84Mu8sBRCCveNs5fiaqTCK_p5ha0,3340
 datamaestro_text/utils/files.py,sha256=jhcirufgTztEkx1hs2-qgScEcHnIcGF_BjCeuCSsNv0,2838
 datamaestro_text/utils/iter.py,sha256=QBajeSPLHvkeh6BCTZDSqWlOYNjwUDvgTTZ_YxJntXw,2701
 datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
-datamaestro_text/utils/shuffle.py,sha256=o8JTz3mr0lYWyv0zEh91jEK12ci1etMiUnzh5GkOHCM,3490
-datamaestro_text-2025.9.11.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-datamaestro_text-2025.9.11.dist-info/METADATA,sha256=ChGV_8bnixfGl91eG_3-Qwba8tjMwe2VPCwXdGxG_xM,1848
-datamaestro_text-2025.9.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-datamaestro_text-2025.9.11.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
-datamaestro_text-2025.9.11.dist-info/top_level.txt,sha256=gYSeqViE8r7eCxSdqFJL74OwljOwKsGPaIhEcCXqc-o,17
-datamaestro_text-2025.9.11.dist-info/RECORD,,
+datamaestro_text/utils/shuffle.py,sha256=xXzgBQ8An7tKboxI0z123Tl6ywXI4S0tWf8MnfOon0c,3491
+datamaestro_text-2026.2.2.dist-info/METADATA,sha256=cHXRhpnNO6sliuE09Jg-eHJtr2kl1Z4Dy3mE1RCGELA,1886
+datamaestro_text-2026.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+datamaestro_text-2026.2.2.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
+datamaestro_text-2026.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+datamaestro_text-2026.2.2.dist-info/RECORD,,

{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

datamaestro_text-2025.9.11.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- datamaestro_text

{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datamaestro_text-2025.9.11.dist-info → datamaestro_text-2026.2.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

datamaestro-text 2025.9.11__py3-none-any.whl → 2026.2.2__py3-none-any.whl

datamaestro-text 2025.9.11py3-none-any.whl → 2026.2.2py3-none-any.whl