PyPI - datamaestro-text - Versions diffs - 2023.12.5__tar.gz → 2023.12.12__tar.gz - Mend

datamaestro-text 2023.12.5tar.gz → 2023.12.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.readthedocs.yml RENAMED Viewed

@@ -20,3 +20,4 @@ python:
     - method: pip
       path: .
     - requirements: docs/requirements.txt
+    - requirements: requirements.txt

{datamaestro-text-2023.12.5/src/datamaestro_text.egg-info → datamaestro-text-2023.12.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamaestro-text
-Version: 2023.12.5
+Version: 2023.12.12
 Summary: Datamaestro module for text-related datasets
 Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
 License: GPL-3

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/conversation.rst RENAMED Viewed

@@ -6,3 +6,5 @@ Contextualized Query Rewriting
 ==============================
 .. dm:datasets:: com.github.aagohary.canard text
+.. dm:datasets:: com.github.prdwb.orconvqa text

datamaestro-text-2023.12.12/docs/source/datasets/irds.rst ADDED Viewed

@@ -0,0 +1,19 @@
+IR Datasets
+===========
+XPMIR provides an interface to the `IR Datasets <https://ir-datasets.com/>`_ library.
+The list below is provided as a reference, but might not be up-to-date if your
+version of `ir-datasets` is more ancient or newer than the one used at generation time.
+Data types
+----------
+.. autoxpmconfig:: xpmir.datasets.irds.data.Topics
+.. autoxpmconfig:: xpmir.datasets.irds.data.Documents
+.. autoxpmconfig:: xpmir.datasets.irds.data.AdhocAssessments
+List of datasets
+----------------
+.. dm:repository:: irds

datamaestro-text-2023.12.12/src/datamaestro_text/config/com/github/prdwb/orconvqa.py ADDED Viewed

@@ -0,0 +1,92 @@
+# See documentation on https://datamaestro.readthedocs.io
+from collections import namedtuple
+import gzip
+import json
+from pathlib import Path
+from typing import Iterator, NamedTuple
+import attrs
+from datamaestro.definitions import datatasks, datatags, dataset
+from datamaestro.download.single import filedownloader
+from datamaestro.utils import HashCheck
+from datamaestro_text.data.conversation.orconvqa import OrConvQADataset
+from datamaestro.data.ml import Supervised
+from datamaestro_text.data.ir import DocumentStore
+from datamaestro_text.data.ir.formats import OrConvQADocument
+from datamaestro_text.data.ir.stores import OrConvQADocumentStore
+from datamaestro_text.datasets.irds.data import LZ4DocumentStore
+from datamaestro_text.datasets.irds.helpers import lz4docstore_downloader
+@datatags("conversation", "context", "query")
+@datatasks("query rewriting")
+@filedownloader(
+    "train.jsonl",
+    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/train.txt",
+    checker=HashCheck("7513a9ef12d8b7a4471166dc4fef77b7"),
+)
+@filedownloader(
+    "dev.jsonl",
+    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/dev.txt",
+    checker=HashCheck("7765658995cc9ffd5eb39a400d814b20"),
+)
+@filedownloader(
+    "test.jsonl",
+    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/test.txt",
+    checker=HashCheck("0cf3a755f06297b9c02e7db45f8dc8be"),
+)
+@dataset(
+    Supervised,
+    url="https://github.com/prdwb/orconvqa-release",
+)
+def preprocessed(train, dev, test):
+    """Open-Retrieval Conversational Question Answering datasets
+    OrConvQA is an aggregation of three existing datasets:
+    1. the QuAC dataset that offers information-seeking conversations,
+    1. the CANARD dataset that consists of context-independent rewrites of QuAC questions, and
+    3. the Wikipedia corpus that serves as the knowledge source of answering questions.
+    Each dataset is an instance of :class:`datamaestro_text.data.conversation.OrConvQADataset`
+    """
+    return {
+        "train": OrConvQADataset(path=train),
+        "validation": OrConvQADataset(path=dev),
+        "test": OrConvQADataset(path=test),
+    }
+def orConvQADocumentReader(source: Path) -> Iterator[OrConvQADocumentStore.NAMED_TUPLE]:
+    with gzip.open(source, "rt") as fp:
+        for line in fp:
+            yield OrConvQADocumentStore.NAMED_TUPLE(**json.loads(line))
+@lz4docstore_downloader(
+    "all_blocks",
+    "https://ciir.cs.umass.edu/downloads/ORConvQA/all_blocks.txt.gz",
+    orConvQADocumentReader,
+    OrConvQADocumentStore.NAMED_TUPLE,
+    "id",
+    checker=HashCheck("1095a3408690e7bbe4d8a87a2bae6356"),
+    size=5_086_902_800,
+    count_hint=11_377_951,
+)
+@dataset(
+    OrConvQADocumentStore,
+    url="https://github.com/prdwb/orconvqa-release",
+)
+def passages(all_blocks):
+    """orConvQA wikipedia files
+    OrConvQA is an aggregation of three existing datasets:
+    1. the QuAC dataset that offers information-seeking conversations,
+    1. the CANARD dataset that consists of context-independent rewrites of QuAC questions, and
+    3. the Wikipedia corpus that serves as the knowledge source of answering questions.
+    """
+    return {"path": all_blocks, "count": 11_377_951}

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/__init__.py RENAMED Viewed

@@ -95,7 +95,7 @@ class DocumentStore(Documents):
         self, randint: Optional[Callable[[int], int]]
     ) -> Iterator[Document]:
         """Sample documents from the dataset"""
-        length = self.documentcount()
+        length = self.documentcount
         randint = randint or (lambda max: random.randint(0, max - 1))
         while True:
             yield self.document_int(randint(length))

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/formats.py RENAMED Viewed

@@ -130,6 +130,20 @@ class TweetDoc(IDHolder, Document):
         return f"{self.text}"
+@define
+class OrConvQADocument(IDHolder, Document):
+    id: str
+    title: str
+    text: str
+    aid: str
+    bid: int
+    has_text: ClassVar[bool] = True
+    def get_text(self):
+        return f"{self.title} {self.text}"
 @define
 class TrecTopic(GenericTopic):
     text: str

datamaestro-text-2023.12.12/src/datamaestro_text/data/ir/stores.py ADDED Viewed

@@ -0,0 +1,22 @@
+from collections import namedtuple
+from typing import List
+from experimaestro import Constant
+import attrs
+from datamaestro_text.datasets.irds.data import LZ4DocumentStore
+from datamaestro_text.data.ir.formats import OrConvQADocument
+class OrConvQADocumentStore(LZ4DocumentStore):
+    NAMED_TUPLE = namedtuple(
+        "OrConvQADocument", [a.name for a in attrs.fields(OrConvQADocument)]
+    )
+    lookup_field: Constant[str] = "id"
+    fields: Constant[List[str]] = list(NAMED_TUPLE._fields)
+    index_fields: Constant[List[str]] = ["id"]
+    data_cls = NAMED_TUPLE
+    def converter(self, data: NAMED_TUPLE) -> OrConvQADocument:
+        return OrConvQADocument(**data._asdict())

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/datasets/irds/data.py RENAMED Viewed

@@ -1,7 +1,9 @@
 import logging
-from typing import Any, Iterator, Tuple, Type, List
+from pathlib import Path
+from typing import Any, Iterator, NamedTuple, Tuple, Type, List
 import attrs
 import ir_datasets
+from ir_datasets.indices import PickleLz4FullStore
 from ir_datasets.formats import (
     GenericDoc,
     GenericQuery,
@@ -10,7 +12,7 @@ from ir_datasets.formats import (
     TrecQuery,
 )
 import ir_datasets.datasets as _irds
-from experimaestro import Config
+from experimaestro import Config, Param
 from experimaestro.compat import cached_property
 from experimaestro import Option
 import datamaestro_text.data.ir as ir
@@ -208,6 +210,67 @@ if hasattr(_irds, "miracl"):
     )
+# Fix while PR https://github.com/allenai/ir_datasets/pull/252
+# is not in.
+class DMPickleLz4FullStore(PickleLz4FullStore):
+    def get_many(self, doc_ids, field=None):
+        result = {}
+        field_idx = self._doc_cls._fields.index(field) if field is not None else None
+        for doc in self.get_many_iter(doc_ids):
+            if field is not None:
+                result[getattr(doc, self._id_field)] = doc[field_idx]
+            else:
+                result[getattr(doc, self._id_field)] = doc
+        return result
+class LZ4DocumentStore(ir.DocumentStore):
+    """A LZ4-based document store"""
+    path: Param[Path]
+    #: Lookup field
+    lookup_field: Param[str]
+    # Extra indexed fields (e.g. URLs)
+    index_fields: List[str]
+    @cached_property
+    def store(self):
+        return DMPickleLz4FullStore(
+            self.path, None, self.data_cls, self.lookup_field, self.index_fields
+        )
+    @cached_property
+    def _docs(self):
+        return self.store.__iter__()
+    def docid_internal2external(self, ix: int):
+        return getattr(self._docs[ix], self.store._id_field)
+    def document_ext(self, docid: str) -> Document:
+        return self.converter(self.store.get(docid))
+    def documents_ext(self, docids: List[str]) -> Document:
+        """Returns documents given their external IDs (optimized for batch)"""
+        retrieved = self.store.get_many(docids)
+        return [self.converter(retrieved[docid]) for docid in docids]
+    def converter(self, data):
+        """Converts a document from LZ4 tuples to any other format"""
+        # By default, use identity
+        return data
+    def iter(self) -> Iterator[Document]:
+        """Returns an iterator over documents"""
+        return map(self.converter, self.store.__iter__())
+    def documentcount(self):
+        if self.count:
+            return self.count
+        return self.store.count()
 @attrs.define()
 class IRDSQueryWrapper(ir.Topic):
     query: Any

datamaestro-text-2023.12.12/src/datamaestro_text/datasets/irds/helpers.py ADDED Viewed

@@ -0,0 +1,71 @@
+import logging
+from typing import Optional, Type, Callable, Iterator
+from ir_datasets.indices import PickleLz4FullStore
+from datamaestro.download import Download
+from datamaestro.utils import FileChecker
+from pathlib import Path
+import urllib3
+class lz4docstore_downloader(Download):
+    """Uses ir_datasets Lz4FullStore to build a document store for a stream of documents"""
+    def __init__(
+        self,
+        varname: str,
+        url: str,
+        iter_factory: Callable[[Path], Iterator],
+        doc_cls: Type,
+        lookup_field: str,
+        *,
+        count_hint: Optional[int] = None,
+        size: Optional[int] = None,
+        checker: FileChecker = None,
+    ):
+        super().__init__(varname)
+        self.iter_factory = iter_factory
+        self.url = url
+        self.doc_cls = doc_cls
+        self.size = size
+        self.lookup_field = lookup_field
+        self.count_hint = count_hint
+        self.checker = checker
+        p = urllib3.util.parse_url(self.url)
+        assert p is not None
+        self.name = Path(p.path).with_suffix("").name
+    def prepare(self):
+        return self.definition.datapath / self.name
+    def download(self, force=False):
+        # Creates directory if needed
+        destination = self.definition.datapath / self.name
+        destination.mkdir(exist_ok=True)
+        # Early exit
+        if (destination / "done").is_file() and not force:
+            return True
+        # Download (cache)
+        logging.info("Building the document index")
+        with self.context.downloadURL(self.url, size=self.size) as file:
+            # Checks the file
+            if self.checker:
+                self.checker.check(file.path)
+            # Builds the LZ4 store
+            store = PickleLz4FullStore(
+                destination,
+                lambda: self.iter_factory(Path(file.path)),
+                self.doc_cls,
+                lookup_field=self.lookup_field,
+                index_fields=[self.lookup_field],
+                key_field_prefix=None,
+                size_hint=None,
+                count_hint=self.count_hint,
+            )
+            store.build()
+            # All good!
+            (destination / "done").touch()

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '2023.12.5'
-__version_tuple__ = version_tuple = (2023, 12, 5)
+__version__ = version = '2023.12.12'
+__version_tuple__ = version_tuple = (2023, 12, 12)

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12/src/datamaestro_text.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datamaestro-text
-Version: 2023.12.5
+Version: 2023.12.12
 Summary: Datamaestro module for text-related datasets
 Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
 License: GPL-3

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text.egg-info/SOURCES.txt RENAMED Viewed

@@ -27,6 +27,7 @@ docs/source/datasets/conversation.rst
 docs/source/datasets/embeddings.rst
 docs/source/datasets/index.rst
 docs/source/datasets/ir.rst
+docs/source/datasets/irds.rst
 docs/source/datasets/recommendation.rst
 docs/source/datasets/text.rst
 src/datamaestro_text/__init__.py
@@ -91,11 +92,13 @@ src/datamaestro_text/data/ir/csv.py
 src/datamaestro_text/data/ir/data.py
 src/datamaestro_text/data/ir/formats.py
 src/datamaestro_text/data/ir/huggingface.py
+src/datamaestro_text/data/ir/stores.py
 src/datamaestro_text/data/ir/trec.py
 src/datamaestro_text/data/ir/utils.py
 src/datamaestro_text/datasets/irds/__init__.py
 src/datamaestro_text/datasets/irds/data.py
 src/datamaestro_text/datasets/irds/datasets.py
+src/datamaestro_text/datasets/irds/helpers.py
 src/datamaestro_text/datasets/irds/utils.py
 src/datamaestro_text/download/tmdb.py
 src/datamaestro_text/interfaces/plaintext.py

datamaestro-text-2023.12.5/src/datamaestro_text/config/com/github/prdwb/orconvqa.py DELETED Viewed

@@ -1,49 +0,0 @@
-# See documentation on https://datamaestro.readthedocs.io
-from datamaestro.definitions import datatasks, datatags, dataset
-from datamaestro.download.single import filedownloader
-from datamaestro.utils import HashCheck
-from datamaestro_text.data.conversation.orconvqa import OrConvQADataset
-from datamaestro.data.ml import Supervised
-@datatags("conversation", "context", "query")
-@datatasks("query rewriting")
-@filedownloader(
-    "train.jsonl",
-    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/train.txt",
-    checker=HashCheck("7513a9ef12d8b7a4471166dc4fef77b7"),
-)
-@filedownloader(
-    "dev.jsonl",
-    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/dev.txt",
-    checker=HashCheck("7765658995cc9ffd5eb39a400d814b20"),
-)
-@filedownloader(
-    "test.jsonl",
-    "https://ciir.cs.umass.edu/downloads/ORConvQA/preprocessed/test.txt",
-    checker=HashCheck("0cf3a755f06297b9c02e7db45f8dc8be"),
-)
-@dataset(
-    Supervised,
-    url="https://github.com/prdwb/orconvqa-release",
-)
-def preprocessed(train, dev, test):
-    """Question-in-context rewriting
-    CANARD is a dataset for question-in-context rewriting that consists of
-    questions each given in a dialog context together with a context-independent
-    rewriting of the question. The context of each question is the dialog
-    utterances that precede the question. CANARD can be used to evaluate
-    question rewriting models that handle important linguistic phenomena such as
-    co-reference and ellipsis resolution.
-    Each dataset is an instance of :class:`datamaestro_text.data.conversation.OrConvQADataset`
-    """
-    return {
-        "train": OrConvQADataset(path=train),
-        "validation": OrConvQADataset(path=dev),
-        "test": OrConvQADataset(path=test),
-    }

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.circleci/config.yml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.flake8 RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.github/workflows/pytest.yml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.gitignore RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/.pre-commit-config.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/LICENSE RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/MANIFEST.in RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/Makefile RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/README.md RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/Makefile RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/make.bat RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/requirements.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/api/conversation.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/api/index.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/api/ir.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/api/text.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/conf.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/embeddings.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/index.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/ir.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/recommendation.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/datasets/text.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/docs/source/index.rst RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/mkdocs.yml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/pyproject.toml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/requirements-dev.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/requirements.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/setup.cfg RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/ai/quac.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/fastml/goodbooks-10k.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/github/aagohary/canard.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/github/soskek/bookcorpus.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/microsoft/msmarco/passage.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/microsoft/wikiqa.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/oscar-corpus.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/sentiment140.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/com/smashwords/bookcorpus.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/cornell/nlvr.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/stanford/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/stanford/aclimdb.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/stanford/glove.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/stanford/im2p.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/upenn/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/upenn/ldc/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/edu/upenn/ldc/aquaint.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/ir/covid.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/adhoc.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/clueweb.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/deeplearning.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/index.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/tipster.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/gov/nist/trec/web.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/io/github/rajpurkar/squad.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/io/github/thunlp/fewrel.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/io/metamind/research/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/io/metamind/research/wikitext.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/net/mattmahoney/enwiki.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/org/acm/recsys/cb2014.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/org/cocodataset/index.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/org/grouplens/movielens.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/org/universaldependencies/french.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/conversation/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/conversation/base.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/conversation/canard.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/conversation/orconvqa.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/embeddings.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/base.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/cord19.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/csv.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/data.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/huggingface.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/trec.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/ir/utils.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/recommendation.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/tagging.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/data/text.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/datasets/irds/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/datasets/irds/datasets.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/datasets/irds/utils.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/download/tmdb.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/interfaces/plaintext.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/interfaces/trec.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/test/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/test/test_datasets.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/transforms/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/transforms/ir/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/utils/__init__.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/utils/files.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/utils/iter.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/utils/randomstream.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text/utils/shuffle.py RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text.egg-info/entry_points.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text.egg-info/requires.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/src/datamaestro_text.egg-info/top_level.txt RENAMED Viewed

File without changes

{datamaestro-text-2023.12.5 → datamaestro-text-2023.12.12}/tox.ini RENAMED Viewed

File without changes

datamaestro-text 2023.12.5__tar.gz → 2023.12.12__tar.gz

datamaestro-text 2023.12.5tar.gz → 2023.12.12tar.gz