datamaestro-text 2026.1.1__py3-none-any.whl → 2026.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. datamaestro_text/__init__.py +1 -1
  2. datamaestro_text/config/com/github/ikat.py +0 -1
  3. datamaestro_text/config/com/oscar-corpus.py +1 -1
  4. datamaestro_text/config/edu/stanford/aclimdb.py +1 -1
  5. datamaestro_text/config/edu/stanford/glove.py +0 -1
  6. datamaestro_text/config/gov/nist/ir/covid.py +1 -2
  7. datamaestro_text/config/io/metamind/research/wikitext.py +1 -1
  8. datamaestro_text/data/conversation/__init__.py +6 -6
  9. datamaestro_text/data/conversation/base.py +2 -2
  10. datamaestro_text/data/conversation/canard.py +3 -4
  11. datamaestro_text/data/conversation/ikat.py +0 -1
  12. datamaestro_text/data/conversation/orconvqa.py +3 -3
  13. datamaestro_text/data/embeddings.py +1 -0
  14. datamaestro_text/data/ir/__init__.py +1 -1
  15. datamaestro_text/data/ir/base.py +1 -1
  16. datamaestro_text/data/ir/data.py +1 -1
  17. datamaestro_text/data/ir/formats.py +2 -1
  18. datamaestro_text/data/ir/stores.py +1 -1
  19. datamaestro_text/data/text.py +1 -0
  20. datamaestro_text/datasets/__init__.py +1 -0
  21. datamaestro_text/datasets/irds/data.py +1 -6
  22. datamaestro_text/download/tmdb.py +0 -1
  23. datamaestro_text/transforms/ir/__init__.py +12 -13
  24. datamaestro_text/utils/shuffle.py +1 -1
  25. datamaestro_text/version.py +2 -2
  26. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/METADATA +1 -7
  27. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/RECORD +30 -29
  28. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/WHEEL +0 -0
  29. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/entry_points.txt +0 -0
  30. {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
1
1
  import datamaestro
2
2
 
3
- from .version import version, version_tuple
3
+ from .version import version as version, version_tuple as version_tuple
4
4
 
5
5
 
6
6
  class Repository(datamaestro.Repository):
@@ -1,6 +1,5 @@
1
1
  # See documentation on https://datamaestro.readthedocs.io
2
2
 
3
- import bz2
4
3
  from datamaestro.download import reference
5
4
  from datamaestro.definitions import datatasks, datatags, dataset
6
5
  from datamaestro_text.data.conversation.base import ConversationUserTopics
@@ -1,4 +1,4 @@
1
- from datamaestro.definitions import datatasks, datatags, dataset
1
+ from datamaestro.definitions import dataset
2
2
  from datamaestro.download.single import filedownloader
3
3
  from datamaestro_text.data.text import TextFile
4
4
  from datamaestro.utils import HashCheck
@@ -1,5 +1,5 @@
1
1
  from datamaestro.data.ml import FolderBased, Supervised
2
- from datamaestro.definitions import datatasks, datatags, dataset
2
+ from datamaestro.definitions import dataset
3
3
  from datamaestro.download.archive import tardownloader
4
4
 
5
5
 
@@ -5,7 +5,6 @@ GloVe is an unsupervised learning algorithm for obtaining vector representations
5
5
  """
6
6
 
7
7
  from datamaestro.definitions import dataset
8
- from datamaestro.data import Base, Generic
9
8
  from datamaestro.download import reference
10
9
  from datamaestro.download.archive import zipdownloader
11
10
  from datamaestro.download.single import filedownloader
@@ -1,5 +1,4 @@
1
- """CORD-19 dataset
2
- """
1
+ """CORD-19 dataset"""
3
2
 
4
3
  from datamaestro.annotations.agreement import useragreement
5
4
  from datamaestro.definitions import datatasks, dataset
@@ -1,4 +1,4 @@
1
- from datamaestro.data import Base, File
1
+ from datamaestro.data import File
2
2
  from datamaestro.definitions import (
3
3
  datatasks,
4
4
  datatags,
@@ -1,8 +1,8 @@
1
1
  from .base import (
2
- AnswerEntry,
3
- ConversationDataset,
4
- ConversationHistory,
5
- ConversationHistoryItem,
6
- DecontextualizedItem,
7
- EntryType,
2
+ AnswerEntry as AnswerEntry,
3
+ ConversationDataset as ConversationDataset,
4
+ ConversationHistory as ConversationHistory,
5
+ ConversationHistoryItem as ConversationHistoryItem,
6
+ DecontextualizedItem as DecontextualizedItem,
7
+ EntryType as EntryType,
8
8
  )
@@ -2,13 +2,13 @@ from abc import ABC, abstractmethod
2
2
  from enum import Enum
3
3
  from datamaestro_text.data.ir.base import IDItem, SimpleTextItem
4
4
  from experimaestro import Param
5
- from typing import Dict, Generic, Iterator, List, Optional, Sequence, Tuple
5
+ from typing import Dict, Iterator, List, Optional, Sequence, Tuple
6
6
  from attr import define
7
7
  from datamaestro.record import record_type
8
8
  from datamaestro.data import Base
9
9
  from datamaestro.record import Record, Item
10
10
  from datamaestro_text.data.ir import TopicRecord, Topics
11
- from datamaestro_text.utils.iter import FactoryIterable, LazyList, RangeView
11
+ from datamaestro_text.utils.iter import FactoryIterable, LazyList
12
12
 
13
13
  # ---- Basic types
14
14
 
@@ -11,7 +11,6 @@ from datamaestro_text.data.conversation.base import (
11
11
  EntryType,
12
12
  )
13
13
  from datamaestro_text.data.ir import IDItem, SimpleTextItem
14
- import logging
15
14
 
16
15
 
17
16
  @define(kw_only=True)
@@ -82,9 +81,9 @@ class CanardDataset(ConversationDataset, File):
82
81
  )
83
82
  else:
84
83
  # The utterance before the last is the last user query
85
- assert (
86
- entry.history[-2] == history[-1][SimpleTextItem].text
87
- ), f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
84
+ assert entry.history[-2] == history[-1][SimpleTextItem].text, (
85
+ f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
86
+ )
88
87
 
89
88
  # The last utterance is the system side
90
89
  history.append(
@@ -5,7 +5,6 @@ import logging
5
5
  from datamaestro.data import File
6
6
  from datamaestro.record import Record
7
7
 
8
- from datamaestro_text.data.ir import Topics
9
8
  from datamaestro_text.data.ir.base import (
10
9
  IDItem,
11
10
  SimpleTextItem,
@@ -113,9 +113,9 @@ class OrConvQADataset(ConversationDataset, File):
113
113
  if relevance > 0:
114
114
  relevances[rank] = (entry.answer.answer_start, None)
115
115
 
116
- assert (
117
- len(relevances) <= 1
118
- ), f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
116
+ assert len(relevances) <= 1, (
117
+ f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
118
+ )
119
119
 
120
120
  history.append(
121
121
  Record(
@@ -20,6 +20,7 @@ class WordEmbeddings(Base):
20
20
 
21
21
  class WordEmbeddingsText(WordEmbeddings, File):
22
22
  """Word embeddings as a text word / values"""
23
+
23
24
  encoding: Meta[str] = "utf-8"
24
25
 
25
26
  def load(self):
@@ -6,7 +6,7 @@ from functools import cached_property
6
6
  import logging
7
7
  from pathlib import Path
8
8
  from attrs import define
9
- from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type, TYPE_CHECKING
9
+ from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
10
10
  import random
11
11
  from experimaestro import Config
12
12
  from datamaestro.definitions import datatasks, Param, Meta
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from attrs import define
3
3
  from typing import List
4
- from datamaestro.record import Record, Item, record_type
4
+ from datamaestro.record import Record, Item
5
5
 
6
6
 
7
7
  TopicRecord = DocumentRecord = Record
@@ -1 +1 @@
1
- from .base import *
1
+ from .base import * # noqa: F403
@@ -1,5 +1,5 @@
1
1
  from functools import cached_property
2
- from typing import ClassVar, Tuple, List
2
+ from typing import Tuple, List
3
3
  from attrs import define
4
4
  from datamaestro.record import record_type
5
5
  from ir_datasets.datasets.wapo import WapoDocMedia
@@ -10,6 +10,7 @@ from ir_datasets.datasets.cord19 import Cord19FullTextSection
10
10
  @define
11
11
  class DocumentWithTitle(TextItem):
12
12
  """Web document with title and body"""
13
+
13
14
  title: str
14
15
  body: str
15
16
 
@@ -82,7 +82,7 @@ class IKatClueWeb22DocumentStore(LZ4DocumentStore):
82
82
 
83
83
  file_checksum = hasher.hexdigest()
84
84
  assert file_checksum == checksum, (
85
- f"Expected {checksum}, " f"got {file_checksum} for {filename}"
85
+ f"Expected {checksum}, got {file_checksum} for {filename}"
86
86
  )
87
87
 
88
88
  # Get the MD5 hashes of all the passages
@@ -14,6 +14,7 @@ class TrainingText(Supervised):
14
14
 
15
15
  class TextFolder(Folder):
16
16
  "A folder composed of texts"
17
+
17
18
  pass
18
19
 
19
20
 
@@ -0,0 +1 @@
1
+ # IR datasets integration package
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from abc import ABC, abstractmethod
3
- from dataclasses import dataclass
4
3
  from functools import cached_property, partial
5
4
  from pathlib import Path
6
5
  from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
@@ -613,11 +612,7 @@ class Cast2022TopicsHandler(CastTopicsHandler):
613
612
  records = []
614
613
  nodes: Dict[str, ConversationTreeNode] = {}
615
614
 
616
- for (
617
- query
618
- ) in (
619
- self.dataset.dataset.queries_iter()
620
- ): # type: _irds.trec_cast.Cast2022Query
615
+ for query in self.dataset.dataset.queries_iter(): # type: _irds.trec_cast.Cast2022Query
621
616
  parent = nodes[query.parent_id] if query.parent_id else None
622
617
 
623
618
  if query.participant == "User":
@@ -10,7 +10,6 @@ from collections import namedtuple
10
10
 
11
11
  from datamaestro.download import Download
12
12
  from datamaestro.definitions import AbstractDataset
13
- from datamaestro.utils import TemporaryDirectory
14
13
 
15
14
  APIKEY_KEY = "org.themoviedb.apikey"
16
15
 
@@ -2,7 +2,6 @@ import logging
2
2
  import gzip
3
3
  from abc import ABC, abstractmethod
4
4
  from pathlib import Path
5
- from typing import Type
6
5
  from experimaestro import Config, Task, Param, Annotated, pathgenerator, Option, tqdm
7
6
  import numpy as np
8
7
  from datamaestro.record import RecordType
@@ -131,22 +130,22 @@ class ShuffledTrainingTripletsLines(Task):
131
130
 
132
131
  def __validate__(self):
133
132
  if self.topic_ids:
134
- assert self.data.topic_recordtype.has(
135
- ir.IDItem
136
- ), f"No topic ID in the source data ({self.data.topic_recordtype})"
133
+ assert self.data.topic_recordtype.has(ir.IDItem), (
134
+ f"No topic ID in the source data ({self.data.topic_recordtype})"
135
+ )
137
136
  else:
138
- assert self.data.topic_recordtype.has(
139
- ir.TextItem
140
- ), f"No topic text in the source data ({self.data.topic_recordtype})"
137
+ assert self.data.topic_recordtype.has(ir.TextItem), (
138
+ f"No topic text in the source data ({self.data.topic_recordtype})"
139
+ )
141
140
 
142
141
  if self.doc_ids:
143
- assert self.data.document_recordtype.has(
144
- ir.IDItem
145
- ), "No doc ID in the source data"
142
+ assert self.data.document_recordtype.has(ir.IDItem), (
143
+ "No doc ID in the source data"
144
+ )
146
145
  else:
147
- assert self.data.document_recordtype.has(
148
- ir.TextItem
149
- ), "No doc text in the source data"
146
+ assert self.data.document_recordtype.has(ir.TextItem), (
147
+ "No doc text in the source data"
148
+ )
150
149
 
151
150
  def task_outputs(self, dep):
152
151
  return dep(
@@ -50,7 +50,7 @@ def shuffle(
50
50
  *,
51
51
  memory=MEMORY,
52
52
  random=None,
53
- tmp_path: Optional[Path] = None
53
+ tmp_path: Optional[Path] = None,
54
54
  ):
55
55
  """Shuffle using temporary file"""
56
56
  if random is None:
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '2026.1.1'
32
- __version_tuple__ = version_tuple = (2026, 1, 1)
31
+ __version__ = version = '2026.2.2'
32
+ __version_tuple__ = version_tuple = (2026, 2, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro-text
3
- Version: 2026.1.1
3
+ Version: 2026.2.2
4
4
  Summary: Datamaestro module for text-related datasets
5
5
  Project-URL: Homepage, https://github.com/experimaestro/datamaestro_text
6
6
  Project-URL: Documentation, https://datamaestro-text.readthedocs.io/en/latest/
@@ -25,12 +25,6 @@ Requires-Dist: attrs
25
25
  Requires-Dist: datamaestro>=1.6.2
26
26
  Requires-Dist: experimaestro
27
27
  Requires-Dist: ir-datasets>=0.5.8
28
- Provides-Extra: dev
29
- Requires-Dist: docutils; extra == 'dev'
30
- Requires-Dist: flake8; extra == 'dev'
31
- Requires-Dist: pytest; extra == 'dev'
32
- Requires-Dist: sphinx<8; extra == 'dev'
33
- Requires-Dist: sphobjinv; extra == 'dev'
34
28
  Description-Content-Type: text/markdown
35
29
 
36
30
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![PyPI version](https://badge.fury.io/py/datamaestro-text.svg)](https://badge.fury.io/py/datamaestro-text)
@@ -1,11 +1,11 @@
1
- datamaestro_text/__init__.py,sha256=hU8jZpkXl3F74qIfqnJl7v4nJ9YxfoR7IpJpUREFNRI,248
2
- datamaestro_text/version.py,sha256=KRGjQBj37k6x1t02kZiDs0px7bfHmVSimH49hjV1IAU,710
1
+ datamaestro_text/__init__.py,sha256=MP7ShYx32k5irdgml1PjnmSofzioYQh9rzUEcHs5eys,276
2
+ datamaestro_text/version.py,sha256=PcJXzZYuv0SaBM1rOymP9IhKDJxqcLKUPHINlOD-hL0,710
3
3
  datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
5
- datamaestro_text/config/com/oscar-corpus.py,sha256=6F2RYOyE9_5uq_t8VrTggWxcFzefFPrmcxQXvXhfia8,723
5
+ datamaestro_text/config/com/oscar-corpus.py,sha256=gEWz8Nxpv7VXU8X-vfRZLwPfq0KXtkGSNtsfoqfcUI0,702
6
6
  datamaestro_text/config/com/sentiment140.py,sha256=itfBEgcOniECXKOw8I2dhzyS9LOMsltMLfKK6NGRpVY,1293
7
7
  datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
8
- datamaestro_text/config/com/github/ikat.py,sha256=DCayX-t2OBeW5bOJvRxoQgIH3vy-__mYzdmVcnayAkk,4230
8
+ datamaestro_text/config/com/github/ikat.py,sha256=nAmBre9zNlnGhx-C50EvLGvHqtoB7Ce-mZUZqM_ymO8,4219
9
9
  datamaestro_text/config/com/github/aagohary/canard.py,sha256=5fLwCLNBGM_7--naTCDayAMYLvK3yTD8auaEf-dqrb4,1768
10
10
  datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=zP3w7A9KSvJVCo44OaB1az1pDKWxE6qXS4qFm3hqg3Y,3064
11
11
  datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=MFJYaxJoqJQ6kMDHa2PIE0zoxYTA8Kyl26-vzFoMML0,3032
@@ -15,8 +15,8 @@ datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=hN2KOdi6ToHlodoz
15
15
  datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=LgUcnR-z99kTrZj6QaCLuLrj1bG-wHMM5GlVNmbrY2k,851
16
16
  datamaestro_text/config/edu/cornell/nlvr.yaml,sha256=9Yk5VZMncSmrP7JNuGXqExksgX5nQ_Zfnlps8hWze3Q,921
17
17
  datamaestro_text/config/edu/stanford/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- datamaestro_text/config/edu/stanford/aclimdb.py,sha256=QtriReAVsbJlxkgfJWQCZdCeJ9LswYnOR9mFrgghL9c,647
19
- datamaestro_text/config/edu/stanford/glove.py,sha256=bXxwiJqT8alPs5MwwxVuY0xBO8g1QWhm2L3AKVskTlA,2391
18
+ datamaestro_text/config/edu/stanford/aclimdb.py,sha256=gv_4IauUCURbMzMWpSMyx3qgOAXVQuBwKR-mMaKExpc,626
19
+ datamaestro_text/config/edu/stanford/glove.py,sha256=FiVYbzQMD11CiKfklrggtm7YXBCevyTXXwhehRd65H8,2348
20
20
  datamaestro_text/config/edu/stanford/im2p.yaml,sha256=JoToNyEPpmwdyLFedCBot5ypyw7p9rzi12mGXJuZin0,2909
21
21
  datamaestro_text/config/edu/upenn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  datamaestro_text/config/edu/upenn/ldc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -24,7 +24,7 @@ datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=YEU3kIXzv71Vjg9bUoXyQ-vS
24
24
  datamaestro_text/config/fr/granddebat.py,sha256=JRLC3q6o-XhJECjAh40w2p40pCSRw9K3-YMDUpdNwMM,7016
25
25
  datamaestro_text/config/gov/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  datamaestro_text/config/gov/nist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- datamaestro_text/config/gov/nist/ir/covid.py,sha256=wn2E7sQ8M6pAucVD4sKJYImyzKUKphyiDFJD0oYRCbg,4004
27
+ datamaestro_text/config/gov/nist/ir/covid.py,sha256=i9xxZcrKeX1gezK_TE68oropMF9PKHX2ofyREEUWYPY,4003
28
28
  datamaestro_text/config/gov/nist/trec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=vcFaLlZ-chwDt013MYI8bYZ2ug39jPaeimsiok_sqfU,11035
30
30
  datamaestro_text/config/gov/nist/trec/clueweb.yaml,sha256=sm4UbdtMzWoDVPsewtVDS7Vj2jBOdgp18Xqo1X4ysQc,792
@@ -35,7 +35,7 @@ datamaestro_text/config/gov/nist/trec/web.yaml,sha256=iNIJ-PCeLwpUaJByJesyCqfFJo
35
35
  datamaestro_text/config/io/github/rajpurkar/squad.yaml,sha256=JchkTruEhVd0uOTGgek2tOgBL4UT3G3KVASvZcNigLg,1143
36
36
  datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=rg_qAnMrXYUZhQYxA12r_Npl0ggyfTLJQjdSCjU0QxM,1228
37
37
  datamaestro_text/config/io/metamind/research/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- datamaestro_text/config/io/metamind/research/wikitext.py,sha256=DjyBmG74JvuMt9RpMwuLAnxzOdByIWsk4VnXgkJp1NM,2307
38
+ datamaestro_text/config/io/metamind/research/wikitext.py,sha256=jw_CbBbradIUp_mrhG-z3rfa4_0ybvIBSkDqJvGLCCI,2301
39
39
  datamaestro_text/config/net/mattmahoney/enwiki.yaml,sha256=HCUn3s0AydXX3BjJ6yUXY0vGLGWSBkOCaDhQ4PA2Adg,2452
40
40
  datamaestro_text/config/org/acm/recsys/cb2014.yaml,sha256=5SAK3Am1k0HFugSSCIQN5mLPBfr1zZZAkhLrSH5pHQc,1274
41
41
  datamaestro_text/config/org/cocodataset/index.yaml,sha256=KISJChMeKwlZbSnHmRcGMsm6jbcFGVe1aA4GhP2fzqw,474
@@ -43,48 +43,49 @@ datamaestro_text/config/org/grouplens/movielens.py,sha256=tV6OSTDdtjll1dQBCsYIls
43
43
  datamaestro_text/config/org/universaldependencies/french.py,sha256=etedb3_SC-fV5Oa2rM4_smZk6t4CPiNvU4C4keUFZHY,2214
44
44
  datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml,sha256=IBy82CDNNLjJPNPzues1EgDXu0A5WDvUFeVNSOyrIpI,1137
45
45
  datamaestro_text/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- datamaestro_text/data/embeddings.py,sha256=qdeZ4m6drFNkqW_4pKxJKdJHtOnDRs0P7QVJ7AO6xFE,1127
46
+ datamaestro_text/data/embeddings.py,sha256=YMoNLyVvaOt86bq_8X71_Fgu7EYYI71vr67xSQsi57I,1128
47
47
  datamaestro_text/data/recommendation.py,sha256=wHV_9SXSclouuXaBmvwg3ncZLOFfdKRZG3IHkkPJX9Y,279
48
48
  datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
49
- datamaestro_text/data/text.py,sha256=_9J7-j06BOIZ1HsOcBsWy_asanAKkDoZIsWWMMj4tm4,496
50
- datamaestro_text/data/conversation/__init__.py,sha256=esOWnSImMlQs5QtfxUt559ABLd6a5wwoNpj9XtIq71Y,159
51
- datamaestro_text/data/conversation/base.py,sha256=rLOP2dqvMr2E9ONKgPF6CKUCAEHcXt5WqabJyo76AEI,7508
52
- datamaestro_text/data/conversation/canard.py,sha256=FI02rPNuzSsA_uXOcgcuQn31d3QahmHndzYCV1Zm8rk,3305
53
- datamaestro_text/data/conversation/ikat.py,sha256=djxTv0vHLXIUPzfmwpB25fyUWJNVPqthTtDbQ6CzmQo,4363
54
- datamaestro_text/data/conversation/orconvqa.py,sha256=RL9cpK7QK2dsk_j2e4Th8wzGFq-G3ww_EjdM1eKh-8o,3764
49
+ datamaestro_text/data/text.py,sha256=Lln4eoegU9B27oS-2mv3eEQC6MyRBgVhoewQ2-YNxEQ,497
50
+ datamaestro_text/data/conversation/__init__.py,sha256=Kk7FxPz_0oGO2PtIa8zH7UBqbCUsywTHfA-yKd_KO6c,284
51
+ datamaestro_text/data/conversation/base.py,sha256=gF_-izQ1ijX7w49pKQvjfjUVzrX3VSHXxcqVIPWmAfY,7488
52
+ datamaestro_text/data/conversation/canard.py,sha256=aYpkHzuJWGT3-myFNUjCYAtvG3gVh_d3Zc5lyiasQ04,3290
53
+ datamaestro_text/data/conversation/ikat.py,sha256=hoGqHUWyT8BhC_ouUmnwoh93B2jGLHn8uc6npKP4Sl8,4319
54
+ datamaestro_text/data/conversation/orconvqa.py,sha256=zNp02jyYgny0qtIFOMjmrUy7hG8VKWcELHWrg3FBCc0,3764
55
55
  datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
56
56
  datamaestro_text/data/debate/__init__.py,sha256=PzCV3Bd9fmonE-OQp4VtK1NglH42-iv34WAWUIU-eYk,187
57
57
  datamaestro_text/data/debate/granddebat.py,sha256=4-HMfgvF2bPru56D3hkA1E2bN3dgIUmcvX9eOIXroLA,2176
58
- datamaestro_text/data/ir/__init__.py,sha256=ZRJrUeeUyD1ncMN5JINVvFJ2lDr3KsbgiiEBJkczSi0,9814
59
- datamaestro_text/data/ir/base.py,sha256=uwIiKn0ryK5DWUQsEegeTs6bye0uAOGl0XVm_cvV3ZU,1506
58
+ datamaestro_text/data/ir/__init__.py,sha256=oYI7eIScg-olxPh95XBgTK-E2PunieXvqQPlrRlHU8M,9799
59
+ datamaestro_text/data/ir/base.py,sha256=ksluGOOzOwbdZ2SPnwiDMMUhBa6P1Ti2sr6Ch5xXUgg,1493
60
60
  datamaestro_text/data/ir/cord19.py,sha256=yu1Khgy6AZjH2BPQKdnxDid0vQgQ8zvb8-FQlHH-GOU,1465
61
61
  datamaestro_text/data/ir/csv.py,sha256=0jnaV-wKLgslH7izR-xP_RX7l90vykQTn3bPhaCFR-c,1027
62
- datamaestro_text/data/ir/data.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
63
- datamaestro_text/data/ir/formats.py,sha256=eyP7PJ6A4Pd1uv3nbeU1N_Q3Bee7XSYTaYsiHP1MFns,3639
62
+ datamaestro_text/data/ir/data.py,sha256=6ASVsyVVfiSd1m8C8QTrxVLnFVmtoW3d9c9nQ07zlbY,34
63
+ datamaestro_text/data/ir/formats.py,sha256=rKflCuY8UBpXC3nltBqzC4waWYoxuyP91xJvG7p690Y,3630
64
64
  datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
65
- datamaestro_text/data/ir/stores.py,sha256=A4Ew0L4P6iLLmDKhxqjjVkHcz797BHf4d76YguGkB1A,4412
65
+ datamaestro_text/data/ir/stores.py,sha256=rdOwYCG_NzHSsUQpJ1aneiA2SDWrcfdi16aY-df852U,4408
66
66
  datamaestro_text/data/ir/trec.py,sha256=IOtQRMUz8zx-dYEMR2NIIM6qXEUjsV0eVOhGvKIRJK4,1974
67
67
  datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
68
+ datamaestro_text/datasets/__init__.py,sha256=ORn-Q1gGibg-N5grVc7MqOYfExels3FRI51oQ4xI1QA,34
68
69
  datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
69
- datamaestro_text/datasets/irds/data.py,sha256=5ZtJTEV9qtbl_Do4VR6EvYoxPTlsRkkjoBunXDLfmHI,23012
70
+ datamaestro_text/datasets/irds/data.py,sha256=sIU7_rt4I1E9rjkIGcpNfbD5mtO97vxFsUDmouRMDV4,22914
70
71
  datamaestro_text/datasets/irds/datasets.py,sha256=CJ8MA44XCwIQGZTzYIJnR-qFm890rUZZB7C3lKIwNyY,5627
71
72
  datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
72
73
  datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
73
- datamaestro_text/download/tmdb.py,sha256=kU_Vz9jhznlyeKMHziVu58IHoWv8zPu6CZTHVNQvmu4,4009
74
+ datamaestro_text/download/tmdb.py,sha256=sfnSUJwGSjBsLNVVhT30db2m0R8mrRkDZpbpBUt7GMg,3960
74
75
  datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
75
76
  datamaestro_text/interfaces/trec.py,sha256=GrP0N_Hcj5f73KS6CSzkyN4aaI-XoBJ19oVMpHVR3QM,3579
76
77
  datamaestro_text/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
78
  datamaestro_text/test/test_datasets.py,sha256=hD1pe-CjEnOj0JNqDLasz_97ltFzMbYfyYBy1QyYdf8,202
78
79
  datamaestro_text/test/test_documented.py,sha256=Kl90fz_r_dsV0oXE1Mad34GiQyZ9dc6bfZKNtxP0O2s,453
79
80
  datamaestro_text/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
- datamaestro_text/transforms/ir/__init__.py,sha256=Pb8C-jwjtCur6gU-Lv4AosSFFKw2o6jMQcJE1A_5PD8,6555
81
+ datamaestro_text/transforms/ir/__init__.py,sha256=7D6wurKVQf-f2mu1I3tT-baQbKo7yRCxW8pOHh-MSjM,6539
81
82
  datamaestro_text/utils/__init__.py,sha256=2449YLTAtKJzkmt84Mu8sBRCCveNs5fiaqTCK_p5ha0,3340
82
83
  datamaestro_text/utils/files.py,sha256=jhcirufgTztEkx1hs2-qgScEcHnIcGF_BjCeuCSsNv0,2838
83
84
  datamaestro_text/utils/iter.py,sha256=QBajeSPLHvkeh6BCTZDSqWlOYNjwUDvgTTZ_YxJntXw,2701
84
85
  datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
85
- datamaestro_text/utils/shuffle.py,sha256=o8JTz3mr0lYWyv0zEh91jEK12ci1etMiUnzh5GkOHCM,3490
86
- datamaestro_text-2026.1.1.dist-info/METADATA,sha256=PnX4hQbQmrRUUBily4GEDJ53HsE-u1QqhK7gcv5nWeU,2103
87
- datamaestro_text-2026.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
88
- datamaestro_text-2026.1.1.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
89
- datamaestro_text-2026.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
90
- datamaestro_text-2026.1.1.dist-info/RECORD,,
86
+ datamaestro_text/utils/shuffle.py,sha256=xXzgBQ8An7tKboxI0z123Tl6ywXI4S0tWf8MnfOon0c,3491
87
+ datamaestro_text-2026.2.2.dist-info/METADATA,sha256=cHXRhpnNO6sliuE09Jg-eHJtr2kl1Z4Dy3mE1RCGELA,1886
88
+ datamaestro_text-2026.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
89
+ datamaestro_text-2026.2.2.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
90
+ datamaestro_text-2026.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
91
+ datamaestro_text-2026.2.2.dist-info/RECORD,,