datamaestro-text 2026.1.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. datamaestro_text/__init__.py +1 -1
  2. datamaestro_text/config/com/github/aagohary/canard.py +27 -24
  3. datamaestro_text/config/com/github/apple/ml-qrecc.py +30 -25
  4. datamaestro_text/config/com/github/ikat.py +76 -62
  5. datamaestro_text/config/com/github/prdwb/orconvqa.py +41 -37
  6. datamaestro_text/config/com/microsoft/msmarco/passage.py +278 -207
  7. datamaestro_text/config/com/oscar-corpus.py +13 -10
  8. datamaestro_text/config/com/sentiment140.py +17 -12
  9. datamaestro_text/config/com/smashwords/bookcorpus.py +13 -10
  10. datamaestro_text/config/edu/stanford/aclimdb.py +14 -9
  11. datamaestro_text/config/edu/stanford/glove.py +66 -32
  12. datamaestro_text/config/edu/upenn/ldc/aquaint.py +35 -17
  13. datamaestro_text/config/fr/granddebat.py +57 -48
  14. datamaestro_text/config/gov/nist/ir/covid.py +62 -52
  15. datamaestro_text/config/gov/nist/trec/adhoc.py +395 -255
  16. datamaestro_text/config/gov/nist/trec/tipster.py +170 -64
  17. datamaestro_text/config/io/github/thunlp/fewrel.py +20 -15
  18. datamaestro_text/config/io/metamind/research/wikitext.py +51 -33
  19. datamaestro_text/config/org/grouplens/movielens.py +28 -37
  20. datamaestro_text/config/org/universaldependencies/french.py +16 -11
  21. datamaestro_text/data/conversation/__init__.py +6 -6
  22. datamaestro_text/data/conversation/base.py +2 -2
  23. datamaestro_text/data/conversation/canard.py +3 -4
  24. datamaestro_text/data/conversation/ikat.py +0 -1
  25. datamaestro_text/data/conversation/orconvqa.py +3 -3
  26. datamaestro_text/data/embeddings.py +1 -0
  27. datamaestro_text/data/ir/__init__.py +1 -1
  28. datamaestro_text/data/ir/base.py +1 -1
  29. datamaestro_text/data/ir/data.py +1 -1
  30. datamaestro_text/data/ir/formats.py +2 -1
  31. datamaestro_text/data/ir/stores.py +1 -1
  32. datamaestro_text/data/text.py +1 -0
  33. datamaestro_text/datasets/__init__.py +1 -0
  34. datamaestro_text/datasets/irds/data.py +1 -6
  35. datamaestro_text/download/tmdb.py +0 -1
  36. datamaestro_text/test/test_documented.py +2 -2
  37. datamaestro_text/transforms/ir/__init__.py +12 -13
  38. datamaestro_text/utils/shuffle.py +1 -1
  39. datamaestro_text/version.py +2 -2
  40. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/METADATA +2 -8
  41. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/RECORD +44 -43
  42. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/WHEEL +0 -0
  43. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/entry_points.txt +0 -0
  44. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/licenses/LICENSE +0 -0
@@ -11,7 +11,6 @@ from datamaestro_text.data.conversation.base import (
11
11
  EntryType,
12
12
  )
13
13
  from datamaestro_text.data.ir import IDItem, SimpleTextItem
14
- import logging
15
14
 
16
15
 
17
16
  @define(kw_only=True)
@@ -82,9 +81,9 @@ class CanardDataset(ConversationDataset, File):
82
81
  )
83
82
  else:
84
83
  # The utterance before the last is the last user query
85
- assert (
86
- entry.history[-2] == history[-1][SimpleTextItem].text
87
- ), f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
84
+ assert entry.history[-2] == history[-1][SimpleTextItem].text, (
85
+ f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
86
+ )
88
87
 
89
88
  # The last utterance is the system side
90
89
  history.append(
@@ -5,7 +5,6 @@ import logging
5
5
  from datamaestro.data import File
6
6
  from datamaestro.record import Record
7
7
 
8
- from datamaestro_text.data.ir import Topics
9
8
  from datamaestro_text.data.ir.base import (
10
9
  IDItem,
11
10
  SimpleTextItem,
@@ -113,9 +113,9 @@ class OrConvQADataset(ConversationDataset, File):
113
113
  if relevance > 0:
114
114
  relevances[rank] = (entry.answer.answer_start, None)
115
115
 
116
- assert (
117
- len(relevances) <= 1
118
- ), f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
116
+ assert len(relevances) <= 1, (
117
+ f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
118
+ )
119
119
 
120
120
  history.append(
121
121
  Record(
@@ -20,6 +20,7 @@ class WordEmbeddings(Base):
20
20
 
21
21
  class WordEmbeddingsText(WordEmbeddings, File):
22
22
  """Word embeddings as a text word / values"""
23
+
23
24
  encoding: Meta[str] = "utf-8"
24
25
 
25
26
  def load(self):
@@ -6,7 +6,7 @@ from functools import cached_property
6
6
  import logging
7
7
  from pathlib import Path
8
8
  from attrs import define
9
- from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type, TYPE_CHECKING
9
+ from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
10
10
  import random
11
11
  from experimaestro import Config
12
12
  from datamaestro.definitions import datatasks, Param, Meta
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from attrs import define
3
3
  from typing import List
4
- from datamaestro.record import Record, Item, record_type
4
+ from datamaestro.record import Record, Item
5
5
 
6
6
 
7
7
  TopicRecord = DocumentRecord = Record
@@ -1 +1 @@
1
- from .base import *
1
+ from .base import * # noqa: F403
@@ -1,5 +1,5 @@
1
1
  from functools import cached_property
2
- from typing import ClassVar, Tuple, List
2
+ from typing import Tuple, List
3
3
  from attrs import define
4
4
  from datamaestro.record import record_type
5
5
  from ir_datasets.datasets.wapo import WapoDocMedia
@@ -10,6 +10,7 @@ from ir_datasets.datasets.cord19 import Cord19FullTextSection
10
10
  @define
11
11
  class DocumentWithTitle(TextItem):
12
12
  """Web document with title and body"""
13
+
13
14
  title: str
14
15
  body: str
15
16
 
@@ -82,7 +82,7 @@ class IKatClueWeb22DocumentStore(LZ4DocumentStore):
82
82
 
83
83
  file_checksum = hasher.hexdigest()
84
84
  assert file_checksum == checksum, (
85
- f"Expected {checksum}, " f"got {file_checksum} for {filename}"
85
+ f"Expected {checksum}, got {file_checksum} for {filename}"
86
86
  )
87
87
 
88
88
  # Get the MD5 hashes of all the passages
@@ -14,6 +14,7 @@ class TrainingText(Supervised):
14
14
 
15
15
  class TextFolder(Folder):
16
16
  "A folder composed of texts"
17
+
17
18
  pass
18
19
 
19
20
 
@@ -0,0 +1 @@
1
+ # IR datasets integration package
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from abc import ABC, abstractmethod
3
- from dataclasses import dataclass
4
3
  from functools import cached_property, partial
5
4
  from pathlib import Path
6
5
  from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
@@ -613,11 +612,7 @@ class Cast2022TopicsHandler(CastTopicsHandler):
613
612
  records = []
614
613
  nodes: Dict[str, ConversationTreeNode] = {}
615
614
 
616
- for (
617
- query
618
- ) in (
619
- self.dataset.dataset.queries_iter()
620
- ): # type: _irds.trec_cast.Cast2022Query
615
+ for query in self.dataset.dataset.queries_iter(): # type: _irds.trec_cast.Cast2022Query
621
616
  parent = nodes[query.parent_id] if query.parent_id else None
622
617
 
623
618
  if query.participant == "User":
@@ -10,7 +10,6 @@ from collections import namedtuple
10
10
 
11
11
  from datamaestro.download import Download
12
12
  from datamaestro.definitions import AbstractDataset
13
- from datamaestro.utils import TemporaryDirectory
14
13
 
15
14
  APIKEY_KEY = "org.themoviedb.apikey"
16
15
 
@@ -1,11 +1,11 @@
1
1
  from pathlib import Path
2
- from experimaestro.tools.documentation import DocumentationAnalyzer
2
+ from datamaestro.test.checks import DatamaestroAnalyzer
3
3
 
4
4
 
5
5
  def test_documented():
6
6
  """Test if every configuration is documented"""
7
7
  doc_path = Path(__file__).parents[3] / "docs" / "source" / "index.rst"
8
- analyzer = DocumentationAnalyzer(
8
+ analyzer = DatamaestroAnalyzer(
9
9
  doc_path, set(["datamaestro_text"]), set(["datamaestro_text.test"])
10
10
  )
11
11
 
@@ -2,7 +2,6 @@ import logging
2
2
  import gzip
3
3
  from abc import ABC, abstractmethod
4
4
  from pathlib import Path
5
- from typing import Type
6
5
  from experimaestro import Config, Task, Param, Annotated, pathgenerator, Option, tqdm
7
6
  import numpy as np
8
7
  from datamaestro.record import RecordType
@@ -131,22 +130,22 @@ class ShuffledTrainingTripletsLines(Task):
131
130
 
132
131
  def __validate__(self):
133
132
  if self.topic_ids:
134
- assert self.data.topic_recordtype.has(
135
- ir.IDItem
136
- ), f"No topic ID in the source data ({self.data.topic_recordtype})"
133
+ assert self.data.topic_recordtype.has(ir.IDItem), (
134
+ f"No topic ID in the source data ({self.data.topic_recordtype})"
135
+ )
137
136
  else:
138
- assert self.data.topic_recordtype.has(
139
- ir.TextItem
140
- ), f"No topic text in the source data ({self.data.topic_recordtype})"
137
+ assert self.data.topic_recordtype.has(ir.TextItem), (
138
+ f"No topic text in the source data ({self.data.topic_recordtype})"
139
+ )
141
140
 
142
141
  if self.doc_ids:
143
- assert self.data.document_recordtype.has(
144
- ir.IDItem
145
- ), "No doc ID in the source data"
142
+ assert self.data.document_recordtype.has(ir.IDItem), (
143
+ "No doc ID in the source data"
144
+ )
146
145
  else:
147
- assert self.data.document_recordtype.has(
148
- ir.TextItem
149
- ), "No doc text in the source data"
146
+ assert self.data.document_recordtype.has(ir.TextItem), (
147
+ "No doc text in the source data"
148
+ )
150
149
 
151
150
  def task_outputs(self, dep):
152
151
  return dep(
@@ -50,7 +50,7 @@ def shuffle(
50
50
  *,
51
51
  memory=MEMORY,
52
52
  random=None,
53
- tmp_path: Optional[Path] = None
53
+ tmp_path: Optional[Path] = None,
54
54
  ):
55
55
  """Shuffle using temporary file"""
56
56
  if random is None:
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '2026.1.1'
32
- __version_tuple__ = version_tuple = (2026, 1, 1)
31
+ __version__ = version = '2026.2.3'
32
+ __version_tuple__ = version_tuple = (2026, 2, 3)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro-text
3
- Version: 2026.1.1
3
+ Version: 2026.2.3
4
4
  Summary: Datamaestro module for text-related datasets
5
5
  Project-URL: Homepage, https://github.com/experimaestro/datamaestro_text
6
6
  Project-URL: Documentation, https://datamaestro-text.readthedocs.io/en/latest/
@@ -22,15 +22,9 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
23
  Requires-Python: >=3.10
24
24
  Requires-Dist: attrs
25
- Requires-Dist: datamaestro>=1.6.2
25
+ Requires-Dist: datamaestro>=1.8.0
26
26
  Requires-Dist: experimaestro
27
27
  Requires-Dist: ir-datasets>=0.5.8
28
- Provides-Extra: dev
29
- Requires-Dist: docutils; extra == 'dev'
30
- Requires-Dist: flake8; extra == 'dev'
31
- Requires-Dist: pytest; extra == 'dev'
32
- Requires-Dist: sphinx<8; extra == 'dev'
33
- Requires-Dist: sphobjinv; extra == 'dev'
34
28
  Description-Content-Type: text/markdown
35
29
 
36
30
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![PyPI version](https://badge.fury.io/py/datamaestro-text.svg)](https://badge.fury.io/py/datamaestro-text)
@@ -1,90 +1,91 @@
1
- datamaestro_text/__init__.py,sha256=hU8jZpkXl3F74qIfqnJl7v4nJ9YxfoR7IpJpUREFNRI,248
2
- datamaestro_text/version.py,sha256=KRGjQBj37k6x1t02kZiDs0px7bfHmVSimH49hjV1IAU,710
1
+ datamaestro_text/__init__.py,sha256=MP7ShYx32k5irdgml1PjnmSofzioYQh9rzUEcHs5eys,276
2
+ datamaestro_text/version.py,sha256=edJBGPQ4F2AYJsi0FzQah0-fNB-WSNTAzAc5bmjAFkU,710
3
3
  datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
5
- datamaestro_text/config/com/oscar-corpus.py,sha256=6F2RYOyE9_5uq_t8VrTggWxcFzefFPrmcxQXvXhfia8,723
6
- datamaestro_text/config/com/sentiment140.py,sha256=itfBEgcOniECXKOw8I2dhzyS9LOMsltMLfKK6NGRpVY,1293
5
+ datamaestro_text/config/com/oscar-corpus.py,sha256=aKnp87k0ksxqzA8gjzzC3F_DkWMgQymj0LiYH817Ph8,789
6
+ datamaestro_text/config/com/sentiment140.py,sha256=SaLXv1dbfyzdqfcrLUnRHYeHtfN3O-d_RFtpy2DthnI,1425
7
7
  datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
8
- datamaestro_text/config/com/github/ikat.py,sha256=DCayX-t2OBeW5bOJvRxoQgIH3vy-__mYzdmVcnayAkk,4230
9
- datamaestro_text/config/com/github/aagohary/canard.py,sha256=5fLwCLNBGM_7--naTCDayAMYLvK3yTD8auaEf-dqrb4,1768
10
- datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=zP3w7A9KSvJVCo44OaB1az1pDKWxE6qXS4qFm3hqg3Y,3064
11
- datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=MFJYaxJoqJQ6kMDHa2PIE0zoxYTA8Kyl26-vzFoMML0,3032
8
+ datamaestro_text/config/com/github/ikat.py,sha256=lZtwtymutzb09DXaOZ2SWFLCcMgD5gVw56KNM53NtVs,4657
9
+ datamaestro_text/config/com/github/aagohary/canard.py,sha256=7XOvdAX3ZFuS_tcItnBiBXSXgCsiZ-BcyyIZ8397qH4,1927
10
+ datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=9L5Mj2kS6-KKGHPh0fr3B4OnpO1mDto8_V0glR65Qhg,3039
11
+ datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=i6K49WtYd_nbofJozBpyBFRzEPePRGEzZ7rbkAmrKcU,3291
12
12
  datamaestro_text/config/com/github/soskek/bookcorpus.yaml,sha256=qJKs35yeEIilEMgNvU3OEqMp1TSn7mDM2T-uYyA7kTU,1607
13
13
  datamaestro_text/config/com/microsoft/wikiqa.yaml,sha256=U7rU-W2Xz1MYv2YXT4jCTj5DsHyM0VssbQPNa3EISaM,540
14
- datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=hN2KOdi6ToHlodozqsYAOtxaqiUGkGGtRtb3RFSgnEU,11645
15
- datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=LgUcnR-z99kTrZj6QaCLuLrj1bG-wHMM5GlVNmbrY2k,851
14
+ datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=IFkIMt2DKgdj6wcvzsNocV6zQpt4bNv3l1XhOuxpY5Q,14464
15
+ datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=Ahg5nlZl6fv7e398sUQ1GpJvEh9vVKrt2yyRqRNlNhQ,940
16
16
  datamaestro_text/config/edu/cornell/nlvr.yaml,sha256=9Yk5VZMncSmrP7JNuGXqExksgX5nQ_Zfnlps8hWze3Q,921
17
17
  datamaestro_text/config/edu/stanford/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- datamaestro_text/config/edu/stanford/aclimdb.py,sha256=QtriReAVsbJlxkgfJWQCZdCeJ9LswYnOR9mFrgghL9c,647
19
- datamaestro_text/config/edu/stanford/glove.py,sha256=bXxwiJqT8alPs5MwwxVuY0xBO8g1QWhm2L3AKVskTlA,2391
18
+ datamaestro_text/config/edu/stanford/aclimdb.py,sha256=Q6_1RGcTsUD7C4wYCb6YtXFz6Lz5JsPnYhPj5LKLJlg,732
19
+ datamaestro_text/config/edu/stanford/glove.py,sha256=BtrHRnelBhu6wkMzhfQ6dSyx1ePeS95PQ3LticuPLqY,3075
20
20
  datamaestro_text/config/edu/stanford/im2p.yaml,sha256=JoToNyEPpmwdyLFedCBot5ypyw7p9rzi12mGXJuZin0,2909
21
21
  datamaestro_text/config/edu/upenn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  datamaestro_text/config/edu/upenn/ldc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=YEU3kIXzv71Vjg9bUoXyQ-vSLvC-y4LlE3mJQf38XuY,1589
24
- datamaestro_text/config/fr/granddebat.py,sha256=JRLC3q6o-XhJECjAh40w2p40pCSRw9K3-YMDUpdNwMM,7016
23
+ datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=neyOMi1SceTKTeJV6qE2cSS2nhT1oMD82LUGCasVUz8,1912
24
+ datamaestro_text/config/fr/granddebat.py,sha256=A5dhdPhSuiJZlD_WRD33GA99Dpxs5KY5A4n6SfsuE18,6995
25
25
  datamaestro_text/config/gov/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  datamaestro_text/config/gov/nist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- datamaestro_text/config/gov/nist/ir/covid.py,sha256=wn2E7sQ8M6pAucVD4sKJYImyzKUKphyiDFJD0oYRCbg,4004
27
+ datamaestro_text/config/gov/nist/ir/covid.py,sha256=y06hJxT_L_1LbUTHIdM1R7qpp_yLO4sayvRVY-Sj2A8,4442
28
28
  datamaestro_text/config/gov/nist/trec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=vcFaLlZ-chwDt013MYI8bYZ2ug39jPaeimsiok_sqfU,11035
29
+ datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=d99rU30_itO3-PM2TG8tM_1cw67m-eN2d9WrM7JLTvU,14781
30
30
  datamaestro_text/config/gov/nist/trec/clueweb.yaml,sha256=sm4UbdtMzWoDVPsewtVDS7Vj2jBOdgp18Xqo1X4ysQc,792
31
31
  datamaestro_text/config/gov/nist/trec/deeplearning.yaml,sha256=QGM7PtXLJRttNdOPE16o7-k3e5tA9HgcaM_-qFDV_5Q,2125
32
32
  datamaestro_text/config/gov/nist/trec/index.yaml,sha256=oSUhUmtukf5oXqUbJLhae8xZx7Uac5V6uZoUB-RJ7Sw,2711
33
- datamaestro_text/config/gov/nist/trec/tipster.py,sha256=DirpnHpS10e27LcL7v9ksKreKVy7EgfVhyztV49VRds,5364
33
+ datamaestro_text/config/gov/nist/trec/tipster.py,sha256=LMwTbNxOgbAOiEgb7S92KN1i-aEGh3T9HM8m44-2mfk,7542
34
34
  datamaestro_text/config/gov/nist/trec/web.yaml,sha256=iNIJ-PCeLwpUaJByJesyCqfFJolWCts_VETACha3hfQ,563
35
35
  datamaestro_text/config/io/github/rajpurkar/squad.yaml,sha256=JchkTruEhVd0uOTGgek2tOgBL4UT3G3KVASvZcNigLg,1143
36
- datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=rg_qAnMrXYUZhQYxA12r_Npl0ggyfTLJQjdSCjU0QxM,1228
36
+ datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=j4yVKIOoNAhZGnvzxD9-7By8nMe0gHjnc3iovpXjL68,1375
37
37
  datamaestro_text/config/io/metamind/research/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- datamaestro_text/config/io/metamind/research/wikitext.py,sha256=DjyBmG74JvuMt9RpMwuLAnxzOdByIWsk4VnXgkJp1NM,2307
38
+ datamaestro_text/config/io/metamind/research/wikitext.py,sha256=v685QWAS6dcBqF9zU9uj54wZJ-kv-xxpiJXOA_YpNeg,2779
39
39
  datamaestro_text/config/net/mattmahoney/enwiki.yaml,sha256=HCUn3s0AydXX3BjJ6yUXY0vGLGWSBkOCaDhQ4PA2Adg,2452
40
40
  datamaestro_text/config/org/acm/recsys/cb2014.yaml,sha256=5SAK3Am1k0HFugSSCIQN5mLPBfr1zZZAkhLrSH5pHQc,1274
41
41
  datamaestro_text/config/org/cocodataset/index.yaml,sha256=KISJChMeKwlZbSnHmRcGMsm6jbcFGVe1aA4GhP2fzqw,474
42
- datamaestro_text/config/org/grouplens/movielens.py,sha256=tV6OSTDdtjll1dQBCsYIlsBbtOO-MCiLles2aj0MgDA,1840
43
- datamaestro_text/config/org/universaldependencies/french.py,sha256=etedb3_SC-fV5Oa2rM4_smZk6t4CPiNvU4C4keUFZHY,2214
42
+ datamaestro_text/config/org/grouplens/movielens.py,sha256=NWsJDjZYewYeDY7fJ5Kt5iefBvFcHRsecfUQlpDQPX4,1712
43
+ datamaestro_text/config/org/universaldependencies/french.py,sha256=nHITYOW3kHKhmDHU80xAEPUJfWlsmytDZ744iSjJL1g,2333
44
44
  datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml,sha256=IBy82CDNNLjJPNPzues1EgDXu0A5WDvUFeVNSOyrIpI,1137
45
45
  datamaestro_text/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- datamaestro_text/data/embeddings.py,sha256=qdeZ4m6drFNkqW_4pKxJKdJHtOnDRs0P7QVJ7AO6xFE,1127
46
+ datamaestro_text/data/embeddings.py,sha256=YMoNLyVvaOt86bq_8X71_Fgu7EYYI71vr67xSQsi57I,1128
47
47
  datamaestro_text/data/recommendation.py,sha256=wHV_9SXSclouuXaBmvwg3ncZLOFfdKRZG3IHkkPJX9Y,279
48
48
  datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
49
- datamaestro_text/data/text.py,sha256=_9J7-j06BOIZ1HsOcBsWy_asanAKkDoZIsWWMMj4tm4,496
50
- datamaestro_text/data/conversation/__init__.py,sha256=esOWnSImMlQs5QtfxUt559ABLd6a5wwoNpj9XtIq71Y,159
51
- datamaestro_text/data/conversation/base.py,sha256=rLOP2dqvMr2E9ONKgPF6CKUCAEHcXt5WqabJyo76AEI,7508
52
- datamaestro_text/data/conversation/canard.py,sha256=FI02rPNuzSsA_uXOcgcuQn31d3QahmHndzYCV1Zm8rk,3305
53
- datamaestro_text/data/conversation/ikat.py,sha256=djxTv0vHLXIUPzfmwpB25fyUWJNVPqthTtDbQ6CzmQo,4363
54
- datamaestro_text/data/conversation/orconvqa.py,sha256=RL9cpK7QK2dsk_j2e4Th8wzGFq-G3ww_EjdM1eKh-8o,3764
49
+ datamaestro_text/data/text.py,sha256=Lln4eoegU9B27oS-2mv3eEQC6MyRBgVhoewQ2-YNxEQ,497
50
+ datamaestro_text/data/conversation/__init__.py,sha256=Kk7FxPz_0oGO2PtIa8zH7UBqbCUsywTHfA-yKd_KO6c,284
51
+ datamaestro_text/data/conversation/base.py,sha256=gF_-izQ1ijX7w49pKQvjfjUVzrX3VSHXxcqVIPWmAfY,7488
52
+ datamaestro_text/data/conversation/canard.py,sha256=aYpkHzuJWGT3-myFNUjCYAtvG3gVh_d3Zc5lyiasQ04,3290
53
+ datamaestro_text/data/conversation/ikat.py,sha256=hoGqHUWyT8BhC_ouUmnwoh93B2jGLHn8uc6npKP4Sl8,4319
54
+ datamaestro_text/data/conversation/orconvqa.py,sha256=zNp02jyYgny0qtIFOMjmrUy7hG8VKWcELHWrg3FBCc0,3764
55
55
  datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
56
56
  datamaestro_text/data/debate/__init__.py,sha256=PzCV3Bd9fmonE-OQp4VtK1NglH42-iv34WAWUIU-eYk,187
57
57
  datamaestro_text/data/debate/granddebat.py,sha256=4-HMfgvF2bPru56D3hkA1E2bN3dgIUmcvX9eOIXroLA,2176
58
- datamaestro_text/data/ir/__init__.py,sha256=ZRJrUeeUyD1ncMN5JINVvFJ2lDr3KsbgiiEBJkczSi0,9814
59
- datamaestro_text/data/ir/base.py,sha256=uwIiKn0ryK5DWUQsEegeTs6bye0uAOGl0XVm_cvV3ZU,1506
58
+ datamaestro_text/data/ir/__init__.py,sha256=oYI7eIScg-olxPh95XBgTK-E2PunieXvqQPlrRlHU8M,9799
59
+ datamaestro_text/data/ir/base.py,sha256=ksluGOOzOwbdZ2SPnwiDMMUhBa6P1Ti2sr6Ch5xXUgg,1493
60
60
  datamaestro_text/data/ir/cord19.py,sha256=yu1Khgy6AZjH2BPQKdnxDid0vQgQ8zvb8-FQlHH-GOU,1465
61
61
  datamaestro_text/data/ir/csv.py,sha256=0jnaV-wKLgslH7izR-xP_RX7l90vykQTn3bPhaCFR-c,1027
62
- datamaestro_text/data/ir/data.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
63
- datamaestro_text/data/ir/formats.py,sha256=eyP7PJ6A4Pd1uv3nbeU1N_Q3Bee7XSYTaYsiHP1MFns,3639
62
+ datamaestro_text/data/ir/data.py,sha256=6ASVsyVVfiSd1m8C8QTrxVLnFVmtoW3d9c9nQ07zlbY,34
63
+ datamaestro_text/data/ir/formats.py,sha256=rKflCuY8UBpXC3nltBqzC4waWYoxuyP91xJvG7p690Y,3630
64
64
  datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
65
- datamaestro_text/data/ir/stores.py,sha256=A4Ew0L4P6iLLmDKhxqjjVkHcz797BHf4d76YguGkB1A,4412
65
+ datamaestro_text/data/ir/stores.py,sha256=rdOwYCG_NzHSsUQpJ1aneiA2SDWrcfdi16aY-df852U,4408
66
66
  datamaestro_text/data/ir/trec.py,sha256=IOtQRMUz8zx-dYEMR2NIIM6qXEUjsV0eVOhGvKIRJK4,1974
67
67
  datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
68
+ datamaestro_text/datasets/__init__.py,sha256=ORn-Q1gGibg-N5grVc7MqOYfExels3FRI51oQ4xI1QA,34
68
69
  datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
69
- datamaestro_text/datasets/irds/data.py,sha256=5ZtJTEV9qtbl_Do4VR6EvYoxPTlsRkkjoBunXDLfmHI,23012
70
+ datamaestro_text/datasets/irds/data.py,sha256=sIU7_rt4I1E9rjkIGcpNfbD5mtO97vxFsUDmouRMDV4,22914
70
71
  datamaestro_text/datasets/irds/datasets.py,sha256=CJ8MA44XCwIQGZTzYIJnR-qFm890rUZZB7C3lKIwNyY,5627
71
72
  datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
72
73
  datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
73
- datamaestro_text/download/tmdb.py,sha256=kU_Vz9jhznlyeKMHziVu58IHoWv8zPu6CZTHVNQvmu4,4009
74
+ datamaestro_text/download/tmdb.py,sha256=sfnSUJwGSjBsLNVVhT30db2m0R8mrRkDZpbpBUt7GMg,3960
74
75
  datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
75
76
  datamaestro_text/interfaces/trec.py,sha256=GrP0N_Hcj5f73KS6CSzkyN4aaI-XoBJ19oVMpHVR3QM,3579
76
77
  datamaestro_text/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
78
  datamaestro_text/test/test_datasets.py,sha256=hD1pe-CjEnOj0JNqDLasz_97ltFzMbYfyYBy1QyYdf8,202
78
- datamaestro_text/test/test_documented.py,sha256=Kl90fz_r_dsV0oXE1Mad34GiQyZ9dc6bfZKNtxP0O2s,453
79
+ datamaestro_text/test/test_documented.py,sha256=VaDbX8Ea5rKoZ1X0ZSmHnQ-rLHdHGBxUVyX67sRcx4I,439
79
80
  datamaestro_text/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
- datamaestro_text/transforms/ir/__init__.py,sha256=Pb8C-jwjtCur6gU-Lv4AosSFFKw2o6jMQcJE1A_5PD8,6555
81
+ datamaestro_text/transforms/ir/__init__.py,sha256=7D6wurKVQf-f2mu1I3tT-baQbKo7yRCxW8pOHh-MSjM,6539
81
82
  datamaestro_text/utils/__init__.py,sha256=2449YLTAtKJzkmt84Mu8sBRCCveNs5fiaqTCK_p5ha0,3340
82
83
  datamaestro_text/utils/files.py,sha256=jhcirufgTztEkx1hs2-qgScEcHnIcGF_BjCeuCSsNv0,2838
83
84
  datamaestro_text/utils/iter.py,sha256=QBajeSPLHvkeh6BCTZDSqWlOYNjwUDvgTTZ_YxJntXw,2701
84
85
  datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
85
- datamaestro_text/utils/shuffle.py,sha256=o8JTz3mr0lYWyv0zEh91jEK12ci1etMiUnzh5GkOHCM,3490
86
- datamaestro_text-2026.1.1.dist-info/METADATA,sha256=PnX4hQbQmrRUUBily4GEDJ53HsE-u1QqhK7gcv5nWeU,2103
87
- datamaestro_text-2026.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
88
- datamaestro_text-2026.1.1.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
89
- datamaestro_text-2026.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
90
- datamaestro_text-2026.1.1.dist-info/RECORD,,
86
+ datamaestro_text/utils/shuffle.py,sha256=xXzgBQ8An7tKboxI0z123Tl6ywXI4S0tWf8MnfOon0c,3491
87
+ datamaestro_text-2026.2.3.dist-info/METADATA,sha256=8r5gtB3jtwhAUDZ-W69U20Whwt-7VEMjJo8ZjeFfpiU,1886
88
+ datamaestro_text-2026.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
89
+ datamaestro_text-2026.2.3.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
90
+ datamaestro_text-2026.2.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
91
+ datamaestro_text-2026.2.3.dist-info/RECORD,,