datamaestro-text 2026.1.1__py3-none-any.whl → 2026.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro_text/__init__.py +1 -1
- datamaestro_text/config/com/github/ikat.py +0 -1
- datamaestro_text/config/com/oscar-corpus.py +1 -1
- datamaestro_text/config/edu/stanford/aclimdb.py +1 -1
- datamaestro_text/config/edu/stanford/glove.py +0 -1
- datamaestro_text/config/gov/nist/ir/covid.py +1 -2
- datamaestro_text/config/io/metamind/research/wikitext.py +1 -1
- datamaestro_text/data/conversation/__init__.py +6 -6
- datamaestro_text/data/conversation/base.py +2 -2
- datamaestro_text/data/conversation/canard.py +3 -4
- datamaestro_text/data/conversation/ikat.py +0 -1
- datamaestro_text/data/conversation/orconvqa.py +3 -3
- datamaestro_text/data/embeddings.py +1 -0
- datamaestro_text/data/ir/__init__.py +1 -1
- datamaestro_text/data/ir/base.py +1 -1
- datamaestro_text/data/ir/data.py +1 -1
- datamaestro_text/data/ir/formats.py +2 -1
- datamaestro_text/data/ir/stores.py +1 -1
- datamaestro_text/data/text.py +1 -0
- datamaestro_text/datasets/__init__.py +1 -0
- datamaestro_text/datasets/irds/data.py +1 -6
- datamaestro_text/download/tmdb.py +0 -1
- datamaestro_text/transforms/ir/__init__.py +12 -13
- datamaestro_text/utils/shuffle.py +1 -1
- datamaestro_text/version.py +2 -2
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/METADATA +1 -7
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/RECORD +30 -29
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/WHEEL +0 -0
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/entry_points.txt +0 -0
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/licenses/LICENSE +0 -0
datamaestro_text/__init__.py
CHANGED
|
@@ -5,7 +5,6 @@ GloVe is an unsupervised learning algorithm for obtaining vector representations
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from datamaestro.definitions import dataset
|
|
8
|
-
from datamaestro.data import Base, Generic
|
|
9
8
|
from datamaestro.download import reference
|
|
10
9
|
from datamaestro.download.archive import zipdownloader
|
|
11
10
|
from datamaestro.download.single import filedownloader
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from .base import (
|
|
2
|
-
AnswerEntry,
|
|
3
|
-
ConversationDataset,
|
|
4
|
-
ConversationHistory,
|
|
5
|
-
ConversationHistoryItem,
|
|
6
|
-
DecontextualizedItem,
|
|
7
|
-
EntryType,
|
|
2
|
+
AnswerEntry as AnswerEntry,
|
|
3
|
+
ConversationDataset as ConversationDataset,
|
|
4
|
+
ConversationHistory as ConversationHistory,
|
|
5
|
+
ConversationHistoryItem as ConversationHistoryItem,
|
|
6
|
+
DecontextualizedItem as DecontextualizedItem,
|
|
7
|
+
EntryType as EntryType,
|
|
8
8
|
)
|
|
@@ -2,13 +2,13 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from datamaestro_text.data.ir.base import IDItem, SimpleTextItem
|
|
4
4
|
from experimaestro import Param
|
|
5
|
-
from typing import Dict,
|
|
5
|
+
from typing import Dict, Iterator, List, Optional, Sequence, Tuple
|
|
6
6
|
from attr import define
|
|
7
7
|
from datamaestro.record import record_type
|
|
8
8
|
from datamaestro.data import Base
|
|
9
9
|
from datamaestro.record import Record, Item
|
|
10
10
|
from datamaestro_text.data.ir import TopicRecord, Topics
|
|
11
|
-
from datamaestro_text.utils.iter import FactoryIterable, LazyList
|
|
11
|
+
from datamaestro_text.utils.iter import FactoryIterable, LazyList
|
|
12
12
|
|
|
13
13
|
# ---- Basic types
|
|
14
14
|
|
|
@@ -11,7 +11,6 @@ from datamaestro_text.data.conversation.base import (
|
|
|
11
11
|
EntryType,
|
|
12
12
|
)
|
|
13
13
|
from datamaestro_text.data.ir import IDItem, SimpleTextItem
|
|
14
|
-
import logging
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@define(kw_only=True)
|
|
@@ -82,9 +81,9 @@ class CanardDataset(ConversationDataset, File):
|
|
|
82
81
|
)
|
|
83
82
|
else:
|
|
84
83
|
# The utterance before the last is the last user query
|
|
85
|
-
assert (
|
|
86
|
-
entry.history
|
|
87
|
-
)
|
|
84
|
+
assert entry.history[-2] == history[-1][SimpleTextItem].text, (
|
|
85
|
+
f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
|
|
86
|
+
)
|
|
88
87
|
|
|
89
88
|
# The last utterance is the system side
|
|
90
89
|
history.append(
|
|
@@ -113,9 +113,9 @@ class OrConvQADataset(ConversationDataset, File):
|
|
|
113
113
|
if relevance > 0:
|
|
114
114
|
relevances[rank] = (entry.answer.answer_start, None)
|
|
115
115
|
|
|
116
|
-
assert (
|
|
117
|
-
len(relevances)
|
|
118
|
-
)
|
|
116
|
+
assert len(relevances) <= 1, (
|
|
117
|
+
f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
|
|
118
|
+
)
|
|
119
119
|
|
|
120
120
|
history.append(
|
|
121
121
|
Record(
|
|
@@ -6,7 +6,7 @@ from functools import cached_property
|
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from attrs import define
|
|
9
|
-
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
|
|
9
|
+
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
|
|
10
10
|
import random
|
|
11
11
|
from experimaestro import Config
|
|
12
12
|
from datamaestro.definitions import datatasks, Param, Meta
|
datamaestro_text/data/ir/base.py
CHANGED
datamaestro_text/data/ir/data.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from .base import *
|
|
1
|
+
from .base import * # noqa: F403
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import cached_property
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Tuple, List
|
|
3
3
|
from attrs import define
|
|
4
4
|
from datamaestro.record import record_type
|
|
5
5
|
from ir_datasets.datasets.wapo import WapoDocMedia
|
|
@@ -10,6 +10,7 @@ from ir_datasets.datasets.cord19 import Cord19FullTextSection
|
|
|
10
10
|
@define
|
|
11
11
|
class DocumentWithTitle(TextItem):
|
|
12
12
|
"""Web document with title and body"""
|
|
13
|
+
|
|
13
14
|
title: str
|
|
14
15
|
body: str
|
|
15
16
|
|
|
@@ -82,7 +82,7 @@ class IKatClueWeb22DocumentStore(LZ4DocumentStore):
|
|
|
82
82
|
|
|
83
83
|
file_checksum = hasher.hexdigest()
|
|
84
84
|
assert file_checksum == checksum, (
|
|
85
|
-
f"Expected {checksum},
|
|
85
|
+
f"Expected {checksum}, got {file_checksum} for {filename}"
|
|
86
86
|
)
|
|
87
87
|
|
|
88
88
|
# Get the MD5 hashes of all the passages
|
datamaestro_text/data/text.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# IR datasets integration package
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from dataclasses import dataclass
|
|
4
3
|
from functools import cached_property, partial
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
|
|
@@ -613,11 +612,7 @@ class Cast2022TopicsHandler(CastTopicsHandler):
|
|
|
613
612
|
records = []
|
|
614
613
|
nodes: Dict[str, ConversationTreeNode] = {}
|
|
615
614
|
|
|
616
|
-
for (
|
|
617
|
-
query
|
|
618
|
-
) in (
|
|
619
|
-
self.dataset.dataset.queries_iter()
|
|
620
|
-
): # type: _irds.trec_cast.Cast2022Query
|
|
615
|
+
for query in self.dataset.dataset.queries_iter(): # type: _irds.trec_cast.Cast2022Query
|
|
621
616
|
parent = nodes[query.parent_id] if query.parent_id else None
|
|
622
617
|
|
|
623
618
|
if query.participant == "User":
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
import gzip
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type
|
|
6
5
|
from experimaestro import Config, Task, Param, Annotated, pathgenerator, Option, tqdm
|
|
7
6
|
import numpy as np
|
|
8
7
|
from datamaestro.record import RecordType
|
|
@@ -131,22 +130,22 @@ class ShuffledTrainingTripletsLines(Task):
|
|
|
131
130
|
|
|
132
131
|
def __validate__(self):
|
|
133
132
|
if self.topic_ids:
|
|
134
|
-
assert self.data.topic_recordtype.has(
|
|
135
|
-
|
|
136
|
-
)
|
|
133
|
+
assert self.data.topic_recordtype.has(ir.IDItem), (
|
|
134
|
+
f"No topic ID in the source data ({self.data.topic_recordtype})"
|
|
135
|
+
)
|
|
137
136
|
else:
|
|
138
|
-
assert self.data.topic_recordtype.has(
|
|
139
|
-
|
|
140
|
-
)
|
|
137
|
+
assert self.data.topic_recordtype.has(ir.TextItem), (
|
|
138
|
+
f"No topic text in the source data ({self.data.topic_recordtype})"
|
|
139
|
+
)
|
|
141
140
|
|
|
142
141
|
if self.doc_ids:
|
|
143
|
-
assert self.data.document_recordtype.has(
|
|
144
|
-
|
|
145
|
-
)
|
|
142
|
+
assert self.data.document_recordtype.has(ir.IDItem), (
|
|
143
|
+
"No doc ID in the source data"
|
|
144
|
+
)
|
|
146
145
|
else:
|
|
147
|
-
assert self.data.document_recordtype.has(
|
|
148
|
-
|
|
149
|
-
)
|
|
146
|
+
assert self.data.document_recordtype.has(ir.TextItem), (
|
|
147
|
+
"No doc text in the source data"
|
|
148
|
+
)
|
|
150
149
|
|
|
151
150
|
def task_outputs(self, dep):
|
|
152
151
|
return dep(
|
datamaestro_text/version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '2026.
|
|
32
|
-
__version_tuple__ = version_tuple = (2026,
|
|
31
|
+
__version__ = version = '2026.2.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (2026, 2, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamaestro-text
|
|
3
|
-
Version: 2026.
|
|
3
|
+
Version: 2026.2.2
|
|
4
4
|
Summary: Datamaestro module for text-related datasets
|
|
5
5
|
Project-URL: Homepage, https://github.com/experimaestro/datamaestro_text
|
|
6
6
|
Project-URL: Documentation, https://datamaestro-text.readthedocs.io/en/latest/
|
|
@@ -25,12 +25,6 @@ Requires-Dist: attrs
|
|
|
25
25
|
Requires-Dist: datamaestro>=1.6.2
|
|
26
26
|
Requires-Dist: experimaestro
|
|
27
27
|
Requires-Dist: ir-datasets>=0.5.8
|
|
28
|
-
Provides-Extra: dev
|
|
29
|
-
Requires-Dist: docutils; extra == 'dev'
|
|
30
|
-
Requires-Dist: flake8; extra == 'dev'
|
|
31
|
-
Requires-Dist: pytest; extra == 'dev'
|
|
32
|
-
Requires-Dist: sphinx<8; extra == 'dev'
|
|
33
|
-
Requires-Dist: sphobjinv; extra == 'dev'
|
|
34
28
|
Description-Content-Type: text/markdown
|
|
35
29
|
|
|
36
30
|
[](https://github.com/pre-commit/pre-commit) [](https://badge.fury.io/py/datamaestro-text)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
datamaestro_text/__init__.py,sha256=
|
|
2
|
-
datamaestro_text/version.py,sha256=
|
|
1
|
+
datamaestro_text/__init__.py,sha256=MP7ShYx32k5irdgml1PjnmSofzioYQh9rzUEcHs5eys,276
|
|
2
|
+
datamaestro_text/version.py,sha256=PcJXzZYuv0SaBM1rOymP9IhKDJxqcLKUPHINlOD-hL0,710
|
|
3
3
|
datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
|
|
5
|
-
datamaestro_text/config/com/oscar-corpus.py,sha256=
|
|
5
|
+
datamaestro_text/config/com/oscar-corpus.py,sha256=gEWz8Nxpv7VXU8X-vfRZLwPfq0KXtkGSNtsfoqfcUI0,702
|
|
6
6
|
datamaestro_text/config/com/sentiment140.py,sha256=itfBEgcOniECXKOw8I2dhzyS9LOMsltMLfKK6NGRpVY,1293
|
|
7
7
|
datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
|
|
8
|
-
datamaestro_text/config/com/github/ikat.py,sha256=
|
|
8
|
+
datamaestro_text/config/com/github/ikat.py,sha256=nAmBre9zNlnGhx-C50EvLGvHqtoB7Ce-mZUZqM_ymO8,4219
|
|
9
9
|
datamaestro_text/config/com/github/aagohary/canard.py,sha256=5fLwCLNBGM_7--naTCDayAMYLvK3yTD8auaEf-dqrb4,1768
|
|
10
10
|
datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=zP3w7A9KSvJVCo44OaB1az1pDKWxE6qXS4qFm3hqg3Y,3064
|
|
11
11
|
datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=MFJYaxJoqJQ6kMDHa2PIE0zoxYTA8Kyl26-vzFoMML0,3032
|
|
@@ -15,8 +15,8 @@ datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=hN2KOdi6ToHlodoz
|
|
|
15
15
|
datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=LgUcnR-z99kTrZj6QaCLuLrj1bG-wHMM5GlVNmbrY2k,851
|
|
16
16
|
datamaestro_text/config/edu/cornell/nlvr.yaml,sha256=9Yk5VZMncSmrP7JNuGXqExksgX5nQ_Zfnlps8hWze3Q,921
|
|
17
17
|
datamaestro_text/config/edu/stanford/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
datamaestro_text/config/edu/stanford/aclimdb.py,sha256=
|
|
19
|
-
datamaestro_text/config/edu/stanford/glove.py,sha256=
|
|
18
|
+
datamaestro_text/config/edu/stanford/aclimdb.py,sha256=gv_4IauUCURbMzMWpSMyx3qgOAXVQuBwKR-mMaKExpc,626
|
|
19
|
+
datamaestro_text/config/edu/stanford/glove.py,sha256=FiVYbzQMD11CiKfklrggtm7YXBCevyTXXwhehRd65H8,2348
|
|
20
20
|
datamaestro_text/config/edu/stanford/im2p.yaml,sha256=JoToNyEPpmwdyLFedCBot5ypyw7p9rzi12mGXJuZin0,2909
|
|
21
21
|
datamaestro_text/config/edu/upenn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
datamaestro_text/config/edu/upenn/ldc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -24,7 +24,7 @@ datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=YEU3kIXzv71Vjg9bUoXyQ-vS
|
|
|
24
24
|
datamaestro_text/config/fr/granddebat.py,sha256=JRLC3q6o-XhJECjAh40w2p40pCSRw9K3-YMDUpdNwMM,7016
|
|
25
25
|
datamaestro_text/config/gov/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
datamaestro_text/config/gov/nist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
datamaestro_text/config/gov/nist/ir/covid.py,sha256=
|
|
27
|
+
datamaestro_text/config/gov/nist/ir/covid.py,sha256=i9xxZcrKeX1gezK_TE68oropMF9PKHX2ofyREEUWYPY,4003
|
|
28
28
|
datamaestro_text/config/gov/nist/trec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=vcFaLlZ-chwDt013MYI8bYZ2ug39jPaeimsiok_sqfU,11035
|
|
30
30
|
datamaestro_text/config/gov/nist/trec/clueweb.yaml,sha256=sm4UbdtMzWoDVPsewtVDS7Vj2jBOdgp18Xqo1X4ysQc,792
|
|
@@ -35,7 +35,7 @@ datamaestro_text/config/gov/nist/trec/web.yaml,sha256=iNIJ-PCeLwpUaJByJesyCqfFJo
|
|
|
35
35
|
datamaestro_text/config/io/github/rajpurkar/squad.yaml,sha256=JchkTruEhVd0uOTGgek2tOgBL4UT3G3KVASvZcNigLg,1143
|
|
36
36
|
datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=rg_qAnMrXYUZhQYxA12r_Npl0ggyfTLJQjdSCjU0QxM,1228
|
|
37
37
|
datamaestro_text/config/io/metamind/research/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
datamaestro_text/config/io/metamind/research/wikitext.py,sha256=
|
|
38
|
+
datamaestro_text/config/io/metamind/research/wikitext.py,sha256=jw_CbBbradIUp_mrhG-z3rfa4_0ybvIBSkDqJvGLCCI,2301
|
|
39
39
|
datamaestro_text/config/net/mattmahoney/enwiki.yaml,sha256=HCUn3s0AydXX3BjJ6yUXY0vGLGWSBkOCaDhQ4PA2Adg,2452
|
|
40
40
|
datamaestro_text/config/org/acm/recsys/cb2014.yaml,sha256=5SAK3Am1k0HFugSSCIQN5mLPBfr1zZZAkhLrSH5pHQc,1274
|
|
41
41
|
datamaestro_text/config/org/cocodataset/index.yaml,sha256=KISJChMeKwlZbSnHmRcGMsm6jbcFGVe1aA4GhP2fzqw,474
|
|
@@ -43,48 +43,49 @@ datamaestro_text/config/org/grouplens/movielens.py,sha256=tV6OSTDdtjll1dQBCsYIls
|
|
|
43
43
|
datamaestro_text/config/org/universaldependencies/french.py,sha256=etedb3_SC-fV5Oa2rM4_smZk6t4CPiNvU4C4keUFZHY,2214
|
|
44
44
|
datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml,sha256=IBy82CDNNLjJPNPzues1EgDXu0A5WDvUFeVNSOyrIpI,1137
|
|
45
45
|
datamaestro_text/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
datamaestro_text/data/embeddings.py,sha256=
|
|
46
|
+
datamaestro_text/data/embeddings.py,sha256=YMoNLyVvaOt86bq_8X71_Fgu7EYYI71vr67xSQsi57I,1128
|
|
47
47
|
datamaestro_text/data/recommendation.py,sha256=wHV_9SXSclouuXaBmvwg3ncZLOFfdKRZG3IHkkPJX9Y,279
|
|
48
48
|
datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
|
|
49
|
-
datamaestro_text/data/text.py,sha256=
|
|
50
|
-
datamaestro_text/data/conversation/__init__.py,sha256=
|
|
51
|
-
datamaestro_text/data/conversation/base.py,sha256=
|
|
52
|
-
datamaestro_text/data/conversation/canard.py,sha256=
|
|
53
|
-
datamaestro_text/data/conversation/ikat.py,sha256=
|
|
54
|
-
datamaestro_text/data/conversation/orconvqa.py,sha256=
|
|
49
|
+
datamaestro_text/data/text.py,sha256=Lln4eoegU9B27oS-2mv3eEQC6MyRBgVhoewQ2-YNxEQ,497
|
|
50
|
+
datamaestro_text/data/conversation/__init__.py,sha256=Kk7FxPz_0oGO2PtIa8zH7UBqbCUsywTHfA-yKd_KO6c,284
|
|
51
|
+
datamaestro_text/data/conversation/base.py,sha256=gF_-izQ1ijX7w49pKQvjfjUVzrX3VSHXxcqVIPWmAfY,7488
|
|
52
|
+
datamaestro_text/data/conversation/canard.py,sha256=aYpkHzuJWGT3-myFNUjCYAtvG3gVh_d3Zc5lyiasQ04,3290
|
|
53
|
+
datamaestro_text/data/conversation/ikat.py,sha256=hoGqHUWyT8BhC_ouUmnwoh93B2jGLHn8uc6npKP4Sl8,4319
|
|
54
|
+
datamaestro_text/data/conversation/orconvqa.py,sha256=zNp02jyYgny0qtIFOMjmrUy7hG8VKWcELHWrg3FBCc0,3764
|
|
55
55
|
datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
|
|
56
56
|
datamaestro_text/data/debate/__init__.py,sha256=PzCV3Bd9fmonE-OQp4VtK1NglH42-iv34WAWUIU-eYk,187
|
|
57
57
|
datamaestro_text/data/debate/granddebat.py,sha256=4-HMfgvF2bPru56D3hkA1E2bN3dgIUmcvX9eOIXroLA,2176
|
|
58
|
-
datamaestro_text/data/ir/__init__.py,sha256=
|
|
59
|
-
datamaestro_text/data/ir/base.py,sha256=
|
|
58
|
+
datamaestro_text/data/ir/__init__.py,sha256=oYI7eIScg-olxPh95XBgTK-E2PunieXvqQPlrRlHU8M,9799
|
|
59
|
+
datamaestro_text/data/ir/base.py,sha256=ksluGOOzOwbdZ2SPnwiDMMUhBa6P1Ti2sr6Ch5xXUgg,1493
|
|
60
60
|
datamaestro_text/data/ir/cord19.py,sha256=yu1Khgy6AZjH2BPQKdnxDid0vQgQ8zvb8-FQlHH-GOU,1465
|
|
61
61
|
datamaestro_text/data/ir/csv.py,sha256=0jnaV-wKLgslH7izR-xP_RX7l90vykQTn3bPhaCFR-c,1027
|
|
62
|
-
datamaestro_text/data/ir/data.py,sha256=
|
|
63
|
-
datamaestro_text/data/ir/formats.py,sha256=
|
|
62
|
+
datamaestro_text/data/ir/data.py,sha256=6ASVsyVVfiSd1m8C8QTrxVLnFVmtoW3d9c9nQ07zlbY,34
|
|
63
|
+
datamaestro_text/data/ir/formats.py,sha256=rKflCuY8UBpXC3nltBqzC4waWYoxuyP91xJvG7p690Y,3630
|
|
64
64
|
datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
|
|
65
|
-
datamaestro_text/data/ir/stores.py,sha256=
|
|
65
|
+
datamaestro_text/data/ir/stores.py,sha256=rdOwYCG_NzHSsUQpJ1aneiA2SDWrcfdi16aY-df852U,4408
|
|
66
66
|
datamaestro_text/data/ir/trec.py,sha256=IOtQRMUz8zx-dYEMR2NIIM6qXEUjsV0eVOhGvKIRJK4,1974
|
|
67
67
|
datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
|
|
68
|
+
datamaestro_text/datasets/__init__.py,sha256=ORn-Q1gGibg-N5grVc7MqOYfExels3FRI51oQ4xI1QA,34
|
|
68
69
|
datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
|
|
69
|
-
datamaestro_text/datasets/irds/data.py,sha256=
|
|
70
|
+
datamaestro_text/datasets/irds/data.py,sha256=sIU7_rt4I1E9rjkIGcpNfbD5mtO97vxFsUDmouRMDV4,22914
|
|
70
71
|
datamaestro_text/datasets/irds/datasets.py,sha256=CJ8MA44XCwIQGZTzYIJnR-qFm890rUZZB7C3lKIwNyY,5627
|
|
71
72
|
datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
|
|
72
73
|
datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
|
|
73
|
-
datamaestro_text/download/tmdb.py,sha256=
|
|
74
|
+
datamaestro_text/download/tmdb.py,sha256=sfnSUJwGSjBsLNVVhT30db2m0R8mrRkDZpbpBUt7GMg,3960
|
|
74
75
|
datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
|
|
75
76
|
datamaestro_text/interfaces/trec.py,sha256=GrP0N_Hcj5f73KS6CSzkyN4aaI-XoBJ19oVMpHVR3QM,3579
|
|
76
77
|
datamaestro_text/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
78
|
datamaestro_text/test/test_datasets.py,sha256=hD1pe-CjEnOj0JNqDLasz_97ltFzMbYfyYBy1QyYdf8,202
|
|
78
79
|
datamaestro_text/test/test_documented.py,sha256=Kl90fz_r_dsV0oXE1Mad34GiQyZ9dc6bfZKNtxP0O2s,453
|
|
79
80
|
datamaestro_text/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
-
datamaestro_text/transforms/ir/__init__.py,sha256=
|
|
81
|
+
datamaestro_text/transforms/ir/__init__.py,sha256=7D6wurKVQf-f2mu1I3tT-baQbKo7yRCxW8pOHh-MSjM,6539
|
|
81
82
|
datamaestro_text/utils/__init__.py,sha256=2449YLTAtKJzkmt84Mu8sBRCCveNs5fiaqTCK_p5ha0,3340
|
|
82
83
|
datamaestro_text/utils/files.py,sha256=jhcirufgTztEkx1hs2-qgScEcHnIcGF_BjCeuCSsNv0,2838
|
|
83
84
|
datamaestro_text/utils/iter.py,sha256=QBajeSPLHvkeh6BCTZDSqWlOYNjwUDvgTTZ_YxJntXw,2701
|
|
84
85
|
datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
|
|
85
|
-
datamaestro_text/utils/shuffle.py,sha256=
|
|
86
|
-
datamaestro_text-2026.
|
|
87
|
-
datamaestro_text-2026.
|
|
88
|
-
datamaestro_text-2026.
|
|
89
|
-
datamaestro_text-2026.
|
|
90
|
-
datamaestro_text-2026.
|
|
86
|
+
datamaestro_text/utils/shuffle.py,sha256=xXzgBQ8An7tKboxI0z123Tl6ywXI4S0tWf8MnfOon0c,3491
|
|
87
|
+
datamaestro_text-2026.2.2.dist-info/METADATA,sha256=cHXRhpnNO6sliuE09Jg-eHJtr2kl1Z4Dy3mE1RCGELA,1886
|
|
88
|
+
datamaestro_text-2026.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
89
|
+
datamaestro_text-2026.2.2.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
|
|
90
|
+
datamaestro_text-2026.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
91
|
+
datamaestro_text-2026.2.2.dist-info/RECORD,,
|
|
File without changes
|
{datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.2.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|