datamaestro-text 2026.1.1__py3-none-any.whl → 2026.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro_text/__init__.py +1 -1
- datamaestro_text/config/com/github/aagohary/canard.py +27 -24
- datamaestro_text/config/com/github/apple/ml-qrecc.py +30 -25
- datamaestro_text/config/com/github/ikat.py +76 -62
- datamaestro_text/config/com/github/prdwb/orconvqa.py +41 -37
- datamaestro_text/config/com/microsoft/msmarco/passage.py +278 -207
- datamaestro_text/config/com/oscar-corpus.py +13 -10
- datamaestro_text/config/com/sentiment140.py +17 -12
- datamaestro_text/config/com/smashwords/bookcorpus.py +13 -10
- datamaestro_text/config/edu/stanford/aclimdb.py +14 -9
- datamaestro_text/config/edu/stanford/glove.py +66 -32
- datamaestro_text/config/edu/upenn/ldc/aquaint.py +35 -17
- datamaestro_text/config/fr/granddebat.py +57 -48
- datamaestro_text/config/gov/nist/ir/covid.py +62 -52
- datamaestro_text/config/gov/nist/trec/adhoc.py +395 -255
- datamaestro_text/config/gov/nist/trec/tipster.py +170 -64
- datamaestro_text/config/io/github/thunlp/fewrel.py +20 -15
- datamaestro_text/config/io/metamind/research/wikitext.py +51 -33
- datamaestro_text/config/org/grouplens/movielens.py +28 -37
- datamaestro_text/config/org/universaldependencies/french.py +16 -11
- datamaestro_text/data/conversation/__init__.py +6 -6
- datamaestro_text/data/conversation/base.py +2 -2
- datamaestro_text/data/conversation/canard.py +3 -4
- datamaestro_text/data/conversation/ikat.py +0 -1
- datamaestro_text/data/conversation/orconvqa.py +3 -3
- datamaestro_text/data/embeddings.py +1 -0
- datamaestro_text/data/ir/__init__.py +1 -1
- datamaestro_text/data/ir/base.py +1 -1
- datamaestro_text/data/ir/data.py +1 -1
- datamaestro_text/data/ir/formats.py +2 -1
- datamaestro_text/data/ir/stores.py +1 -1
- datamaestro_text/data/text.py +1 -0
- datamaestro_text/datasets/__init__.py +1 -0
- datamaestro_text/datasets/irds/data.py +1 -6
- datamaestro_text/download/tmdb.py +0 -1
- datamaestro_text/test/test_documented.py +2 -2
- datamaestro_text/transforms/ir/__init__.py +12 -13
- datamaestro_text/utils/shuffle.py +1 -1
- datamaestro_text/version.py +2 -2
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/METADATA +2 -8
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/RECORD +44 -43
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/WHEEL +0 -0
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/entry_points.txt +0 -0
- {datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,7 +11,6 @@ from datamaestro_text.data.conversation.base import (
|
|
|
11
11
|
EntryType,
|
|
12
12
|
)
|
|
13
13
|
from datamaestro_text.data.ir import IDItem, SimpleTextItem
|
|
14
|
-
import logging
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@define(kw_only=True)
|
|
@@ -82,9 +81,9 @@ class CanardDataset(ConversationDataset, File):
|
|
|
82
81
|
)
|
|
83
82
|
else:
|
|
84
83
|
# The utterance before the last is the last user query
|
|
85
|
-
assert (
|
|
86
|
-
entry.history
|
|
87
|
-
)
|
|
84
|
+
assert entry.history[-2] == history[-1][SimpleTextItem].text, (
|
|
85
|
+
f"{entry.dialogue_id} {entry.history} / {history[-4:-1]}"
|
|
86
|
+
)
|
|
88
87
|
|
|
89
88
|
# The last utterance is the system side
|
|
90
89
|
history.append(
|
|
@@ -113,9 +113,9 @@ class OrConvQADataset(ConversationDataset, File):
|
|
|
113
113
|
if relevance > 0:
|
|
114
114
|
relevances[rank] = (entry.answer.answer_start, None)
|
|
115
115
|
|
|
116
|
-
assert (
|
|
117
|
-
len(relevances)
|
|
118
|
-
)
|
|
116
|
+
assert len(relevances) <= 1, (
|
|
117
|
+
f"Too many relevance labels ({len(relevances)}) for {entry.query_id}"
|
|
118
|
+
)
|
|
119
119
|
|
|
120
120
|
history.append(
|
|
121
121
|
Record(
|
|
@@ -6,7 +6,7 @@ from functools import cached_property
|
|
|
6
6
|
import logging
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from attrs import define
|
|
9
|
-
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
|
|
9
|
+
from typing import Callable, Dict, Iterator, List, Optional, Tuple, Type
|
|
10
10
|
import random
|
|
11
11
|
from experimaestro import Config
|
|
12
12
|
from datamaestro.definitions import datatasks, Param, Meta
|
datamaestro_text/data/ir/base.py
CHANGED
datamaestro_text/data/ir/data.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from .base import *
|
|
1
|
+
from .base import * # noqa: F403
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import cached_property
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Tuple, List
|
|
3
3
|
from attrs import define
|
|
4
4
|
from datamaestro.record import record_type
|
|
5
5
|
from ir_datasets.datasets.wapo import WapoDocMedia
|
|
@@ -10,6 +10,7 @@ from ir_datasets.datasets.cord19 import Cord19FullTextSection
|
|
|
10
10
|
@define
|
|
11
11
|
class DocumentWithTitle(TextItem):
|
|
12
12
|
"""Web document with title and body"""
|
|
13
|
+
|
|
13
14
|
title: str
|
|
14
15
|
body: str
|
|
15
16
|
|
|
@@ -82,7 +82,7 @@ class IKatClueWeb22DocumentStore(LZ4DocumentStore):
|
|
|
82
82
|
|
|
83
83
|
file_checksum = hasher.hexdigest()
|
|
84
84
|
assert file_checksum == checksum, (
|
|
85
|
-
f"Expected {checksum},
|
|
85
|
+
f"Expected {checksum}, got {file_checksum} for {filename}"
|
|
86
86
|
)
|
|
87
87
|
|
|
88
88
|
# Get the MD5 hashes of all the passages
|
datamaestro_text/data/text.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# IR datasets integration package
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from dataclasses import dataclass
|
|
4
3
|
from functools import cached_property, partial
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
|
|
@@ -613,11 +612,7 @@ class Cast2022TopicsHandler(CastTopicsHandler):
|
|
|
613
612
|
records = []
|
|
614
613
|
nodes: Dict[str, ConversationTreeNode] = {}
|
|
615
614
|
|
|
616
|
-
for (
|
|
617
|
-
query
|
|
618
|
-
) in (
|
|
619
|
-
self.dataset.dataset.queries_iter()
|
|
620
|
-
): # type: _irds.trec_cast.Cast2022Query
|
|
615
|
+
for query in self.dataset.dataset.queries_iter(): # type: _irds.trec_cast.Cast2022Query
|
|
621
616
|
parent = nodes[query.parent_id] if query.parent_id else None
|
|
622
617
|
|
|
623
618
|
if query.participant == "User":
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from
|
|
2
|
+
from datamaestro.test.checks import DatamaestroAnalyzer
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def test_documented():
|
|
6
6
|
"""Test if every configuration is documented"""
|
|
7
7
|
doc_path = Path(__file__).parents[3] / "docs" / "source" / "index.rst"
|
|
8
|
-
analyzer =
|
|
8
|
+
analyzer = DatamaestroAnalyzer(
|
|
9
9
|
doc_path, set(["datamaestro_text"]), set(["datamaestro_text.test"])
|
|
10
10
|
)
|
|
11
11
|
|
|
@@ -2,7 +2,6 @@ import logging
|
|
|
2
2
|
import gzip
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Type
|
|
6
5
|
from experimaestro import Config, Task, Param, Annotated, pathgenerator, Option, tqdm
|
|
7
6
|
import numpy as np
|
|
8
7
|
from datamaestro.record import RecordType
|
|
@@ -131,22 +130,22 @@ class ShuffledTrainingTripletsLines(Task):
|
|
|
131
130
|
|
|
132
131
|
def __validate__(self):
|
|
133
132
|
if self.topic_ids:
|
|
134
|
-
assert self.data.topic_recordtype.has(
|
|
135
|
-
|
|
136
|
-
)
|
|
133
|
+
assert self.data.topic_recordtype.has(ir.IDItem), (
|
|
134
|
+
f"No topic ID in the source data ({self.data.topic_recordtype})"
|
|
135
|
+
)
|
|
137
136
|
else:
|
|
138
|
-
assert self.data.topic_recordtype.has(
|
|
139
|
-
|
|
140
|
-
)
|
|
137
|
+
assert self.data.topic_recordtype.has(ir.TextItem), (
|
|
138
|
+
f"No topic text in the source data ({self.data.topic_recordtype})"
|
|
139
|
+
)
|
|
141
140
|
|
|
142
141
|
if self.doc_ids:
|
|
143
|
-
assert self.data.document_recordtype.has(
|
|
144
|
-
|
|
145
|
-
)
|
|
142
|
+
assert self.data.document_recordtype.has(ir.IDItem), (
|
|
143
|
+
"No doc ID in the source data"
|
|
144
|
+
)
|
|
146
145
|
else:
|
|
147
|
-
assert self.data.document_recordtype.has(
|
|
148
|
-
|
|
149
|
-
)
|
|
146
|
+
assert self.data.document_recordtype.has(ir.TextItem), (
|
|
147
|
+
"No doc text in the source data"
|
|
148
|
+
)
|
|
150
149
|
|
|
151
150
|
def task_outputs(self, dep):
|
|
152
151
|
return dep(
|
datamaestro_text/version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '2026.
|
|
32
|
-
__version_tuple__ = version_tuple = (2026,
|
|
31
|
+
__version__ = version = '2026.2.3'
|
|
32
|
+
__version_tuple__ = version_tuple = (2026, 2, 3)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamaestro-text
|
|
3
|
-
Version: 2026.
|
|
3
|
+
Version: 2026.2.3
|
|
4
4
|
Summary: Datamaestro module for text-related datasets
|
|
5
5
|
Project-URL: Homepage, https://github.com/experimaestro/datamaestro_text
|
|
6
6
|
Project-URL: Documentation, https://datamaestro-text.readthedocs.io/en/latest/
|
|
@@ -22,15 +22,9 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
22
22
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
23
|
Requires-Python: >=3.10
|
|
24
24
|
Requires-Dist: attrs
|
|
25
|
-
Requires-Dist: datamaestro>=1.
|
|
25
|
+
Requires-Dist: datamaestro>=1.8.0
|
|
26
26
|
Requires-Dist: experimaestro
|
|
27
27
|
Requires-Dist: ir-datasets>=0.5.8
|
|
28
|
-
Provides-Extra: dev
|
|
29
|
-
Requires-Dist: docutils; extra == 'dev'
|
|
30
|
-
Requires-Dist: flake8; extra == 'dev'
|
|
31
|
-
Requires-Dist: pytest; extra == 'dev'
|
|
32
|
-
Requires-Dist: sphinx<8; extra == 'dev'
|
|
33
|
-
Requires-Dist: sphobjinv; extra == 'dev'
|
|
34
28
|
Description-Content-Type: text/markdown
|
|
35
29
|
|
|
36
30
|
[](https://github.com/pre-commit/pre-commit) [](https://badge.fury.io/py/datamaestro-text)
|
|
@@ -1,90 +1,91 @@
|
|
|
1
|
-
datamaestro_text/__init__.py,sha256=
|
|
2
|
-
datamaestro_text/version.py,sha256=
|
|
1
|
+
datamaestro_text/__init__.py,sha256=MP7ShYx32k5irdgml1PjnmSofzioYQh9rzUEcHs5eys,276
|
|
2
|
+
datamaestro_text/version.py,sha256=edJBGPQ4F2AYJsi0FzQah0-fNB-WSNTAzAc5bmjAFkU,710
|
|
3
3
|
datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
|
|
5
|
-
datamaestro_text/config/com/oscar-corpus.py,sha256=
|
|
6
|
-
datamaestro_text/config/com/sentiment140.py,sha256=
|
|
5
|
+
datamaestro_text/config/com/oscar-corpus.py,sha256=aKnp87k0ksxqzA8gjzzC3F_DkWMgQymj0LiYH817Ph8,789
|
|
6
|
+
datamaestro_text/config/com/sentiment140.py,sha256=SaLXv1dbfyzdqfcrLUnRHYeHtfN3O-d_RFtpy2DthnI,1425
|
|
7
7
|
datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
|
|
8
|
-
datamaestro_text/config/com/github/ikat.py,sha256=
|
|
9
|
-
datamaestro_text/config/com/github/aagohary/canard.py,sha256=
|
|
10
|
-
datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=
|
|
11
|
-
datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=
|
|
8
|
+
datamaestro_text/config/com/github/ikat.py,sha256=lZtwtymutzb09DXaOZ2SWFLCcMgD5gVw56KNM53NtVs,4657
|
|
9
|
+
datamaestro_text/config/com/github/aagohary/canard.py,sha256=7XOvdAX3ZFuS_tcItnBiBXSXgCsiZ-BcyyIZ8397qH4,1927
|
|
10
|
+
datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=9L5Mj2kS6-KKGHPh0fr3B4OnpO1mDto8_V0glR65Qhg,3039
|
|
11
|
+
datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=i6K49WtYd_nbofJozBpyBFRzEPePRGEzZ7rbkAmrKcU,3291
|
|
12
12
|
datamaestro_text/config/com/github/soskek/bookcorpus.yaml,sha256=qJKs35yeEIilEMgNvU3OEqMp1TSn7mDM2T-uYyA7kTU,1607
|
|
13
13
|
datamaestro_text/config/com/microsoft/wikiqa.yaml,sha256=U7rU-W2Xz1MYv2YXT4jCTj5DsHyM0VssbQPNa3EISaM,540
|
|
14
|
-
datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=
|
|
15
|
-
datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=
|
|
14
|
+
datamaestro_text/config/com/microsoft/msmarco/passage.py,sha256=IFkIMt2DKgdj6wcvzsNocV6zQpt4bNv3l1XhOuxpY5Q,14464
|
|
15
|
+
datamaestro_text/config/com/smashwords/bookcorpus.py,sha256=Ahg5nlZl6fv7e398sUQ1GpJvEh9vVKrt2yyRqRNlNhQ,940
|
|
16
16
|
datamaestro_text/config/edu/cornell/nlvr.yaml,sha256=9Yk5VZMncSmrP7JNuGXqExksgX5nQ_Zfnlps8hWze3Q,921
|
|
17
17
|
datamaestro_text/config/edu/stanford/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
datamaestro_text/config/edu/stanford/aclimdb.py,sha256=
|
|
19
|
-
datamaestro_text/config/edu/stanford/glove.py,sha256=
|
|
18
|
+
datamaestro_text/config/edu/stanford/aclimdb.py,sha256=Q6_1RGcTsUD7C4wYCb6YtXFz6Lz5JsPnYhPj5LKLJlg,732
|
|
19
|
+
datamaestro_text/config/edu/stanford/glove.py,sha256=BtrHRnelBhu6wkMzhfQ6dSyx1ePeS95PQ3LticuPLqY,3075
|
|
20
20
|
datamaestro_text/config/edu/stanford/im2p.yaml,sha256=JoToNyEPpmwdyLFedCBot5ypyw7p9rzi12mGXJuZin0,2909
|
|
21
21
|
datamaestro_text/config/edu/upenn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
datamaestro_text/config/edu/upenn/ldc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=
|
|
24
|
-
datamaestro_text/config/fr/granddebat.py,sha256=
|
|
23
|
+
datamaestro_text/config/edu/upenn/ldc/aquaint.py,sha256=neyOMi1SceTKTeJV6qE2cSS2nhT1oMD82LUGCasVUz8,1912
|
|
24
|
+
datamaestro_text/config/fr/granddebat.py,sha256=A5dhdPhSuiJZlD_WRD33GA99Dpxs5KY5A4n6SfsuE18,6995
|
|
25
25
|
datamaestro_text/config/gov/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
datamaestro_text/config/gov/nist/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
datamaestro_text/config/gov/nist/ir/covid.py,sha256=
|
|
27
|
+
datamaestro_text/config/gov/nist/ir/covid.py,sha256=y06hJxT_L_1LbUTHIdM1R7qpp_yLO4sayvRVY-Sj2A8,4442
|
|
28
28
|
datamaestro_text/config/gov/nist/trec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=
|
|
29
|
+
datamaestro_text/config/gov/nist/trec/adhoc.py,sha256=d99rU30_itO3-PM2TG8tM_1cw67m-eN2d9WrM7JLTvU,14781
|
|
30
30
|
datamaestro_text/config/gov/nist/trec/clueweb.yaml,sha256=sm4UbdtMzWoDVPsewtVDS7Vj2jBOdgp18Xqo1X4ysQc,792
|
|
31
31
|
datamaestro_text/config/gov/nist/trec/deeplearning.yaml,sha256=QGM7PtXLJRttNdOPE16o7-k3e5tA9HgcaM_-qFDV_5Q,2125
|
|
32
32
|
datamaestro_text/config/gov/nist/trec/index.yaml,sha256=oSUhUmtukf5oXqUbJLhae8xZx7Uac5V6uZoUB-RJ7Sw,2711
|
|
33
|
-
datamaestro_text/config/gov/nist/trec/tipster.py,sha256=
|
|
33
|
+
datamaestro_text/config/gov/nist/trec/tipster.py,sha256=LMwTbNxOgbAOiEgb7S92KN1i-aEGh3T9HM8m44-2mfk,7542
|
|
34
34
|
datamaestro_text/config/gov/nist/trec/web.yaml,sha256=iNIJ-PCeLwpUaJByJesyCqfFJolWCts_VETACha3hfQ,563
|
|
35
35
|
datamaestro_text/config/io/github/rajpurkar/squad.yaml,sha256=JchkTruEhVd0uOTGgek2tOgBL4UT3G3KVASvZcNigLg,1143
|
|
36
|
-
datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=
|
|
36
|
+
datamaestro_text/config/io/github/thunlp/fewrel.py,sha256=j4yVKIOoNAhZGnvzxD9-7By8nMe0gHjnc3iovpXjL68,1375
|
|
37
37
|
datamaestro_text/config/io/metamind/research/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
datamaestro_text/config/io/metamind/research/wikitext.py,sha256=
|
|
38
|
+
datamaestro_text/config/io/metamind/research/wikitext.py,sha256=v685QWAS6dcBqF9zU9uj54wZJ-kv-xxpiJXOA_YpNeg,2779
|
|
39
39
|
datamaestro_text/config/net/mattmahoney/enwiki.yaml,sha256=HCUn3s0AydXX3BjJ6yUXY0vGLGWSBkOCaDhQ4PA2Adg,2452
|
|
40
40
|
datamaestro_text/config/org/acm/recsys/cb2014.yaml,sha256=5SAK3Am1k0HFugSSCIQN5mLPBfr1zZZAkhLrSH5pHQc,1274
|
|
41
41
|
datamaestro_text/config/org/cocodataset/index.yaml,sha256=KISJChMeKwlZbSnHmRcGMsm6jbcFGVe1aA4GhP2fzqw,474
|
|
42
|
-
datamaestro_text/config/org/grouplens/movielens.py,sha256=
|
|
43
|
-
datamaestro_text/config/org/universaldependencies/french.py,sha256=
|
|
42
|
+
datamaestro_text/config/org/grouplens/movielens.py,sha256=NWsJDjZYewYeDY7fJ5Kt5iefBvFcHRsecfUQlpDQPX4,1712
|
|
43
|
+
datamaestro_text/config/org/universaldependencies/french.py,sha256=nHITYOW3kHKhmDHU80xAEPUJfWlsmytDZ744iSjJL1g,2333
|
|
44
44
|
datamaestro_text/config/uk/ac/ucl/cs/qangaroo.yaml,sha256=IBy82CDNNLjJPNPzues1EgDXu0A5WDvUFeVNSOyrIpI,1137
|
|
45
45
|
datamaestro_text/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
datamaestro_text/data/embeddings.py,sha256=
|
|
46
|
+
datamaestro_text/data/embeddings.py,sha256=YMoNLyVvaOt86bq_8X71_Fgu7EYYI71vr67xSQsi57I,1128
|
|
47
47
|
datamaestro_text/data/recommendation.py,sha256=wHV_9SXSclouuXaBmvwg3ncZLOFfdKRZG3IHkkPJX9Y,279
|
|
48
48
|
datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
|
|
49
|
-
datamaestro_text/data/text.py,sha256=
|
|
50
|
-
datamaestro_text/data/conversation/__init__.py,sha256=
|
|
51
|
-
datamaestro_text/data/conversation/base.py,sha256=
|
|
52
|
-
datamaestro_text/data/conversation/canard.py,sha256=
|
|
53
|
-
datamaestro_text/data/conversation/ikat.py,sha256=
|
|
54
|
-
datamaestro_text/data/conversation/orconvqa.py,sha256=
|
|
49
|
+
datamaestro_text/data/text.py,sha256=Lln4eoegU9B27oS-2mv3eEQC6MyRBgVhoewQ2-YNxEQ,497
|
|
50
|
+
datamaestro_text/data/conversation/__init__.py,sha256=Kk7FxPz_0oGO2PtIa8zH7UBqbCUsywTHfA-yKd_KO6c,284
|
|
51
|
+
datamaestro_text/data/conversation/base.py,sha256=gF_-izQ1ijX7w49pKQvjfjUVzrX3VSHXxcqVIPWmAfY,7488
|
|
52
|
+
datamaestro_text/data/conversation/canard.py,sha256=aYpkHzuJWGT3-myFNUjCYAtvG3gVh_d3Zc5lyiasQ04,3290
|
|
53
|
+
datamaestro_text/data/conversation/ikat.py,sha256=hoGqHUWyT8BhC_ouUmnwoh93B2jGLHn8uc6npKP4Sl8,4319
|
|
54
|
+
datamaestro_text/data/conversation/orconvqa.py,sha256=zNp02jyYgny0qtIFOMjmrUy7hG8VKWcELHWrg3FBCc0,3764
|
|
55
55
|
datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
|
|
56
56
|
datamaestro_text/data/debate/__init__.py,sha256=PzCV3Bd9fmonE-OQp4VtK1NglH42-iv34WAWUIU-eYk,187
|
|
57
57
|
datamaestro_text/data/debate/granddebat.py,sha256=4-HMfgvF2bPru56D3hkA1E2bN3dgIUmcvX9eOIXroLA,2176
|
|
58
|
-
datamaestro_text/data/ir/__init__.py,sha256=
|
|
59
|
-
datamaestro_text/data/ir/base.py,sha256=
|
|
58
|
+
datamaestro_text/data/ir/__init__.py,sha256=oYI7eIScg-olxPh95XBgTK-E2PunieXvqQPlrRlHU8M,9799
|
|
59
|
+
datamaestro_text/data/ir/base.py,sha256=ksluGOOzOwbdZ2SPnwiDMMUhBa6P1Ti2sr6Ch5xXUgg,1493
|
|
60
60
|
datamaestro_text/data/ir/cord19.py,sha256=yu1Khgy6AZjH2BPQKdnxDid0vQgQ8zvb8-FQlHH-GOU,1465
|
|
61
61
|
datamaestro_text/data/ir/csv.py,sha256=0jnaV-wKLgslH7izR-xP_RX7l90vykQTn3bPhaCFR-c,1027
|
|
62
|
-
datamaestro_text/data/ir/data.py,sha256=
|
|
63
|
-
datamaestro_text/data/ir/formats.py,sha256=
|
|
62
|
+
datamaestro_text/data/ir/data.py,sha256=6ASVsyVVfiSd1m8C8QTrxVLnFVmtoW3d9c9nQ07zlbY,34
|
|
63
|
+
datamaestro_text/data/ir/formats.py,sha256=rKflCuY8UBpXC3nltBqzC4waWYoxuyP91xJvG7p690Y,3630
|
|
64
64
|
datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
|
|
65
|
-
datamaestro_text/data/ir/stores.py,sha256=
|
|
65
|
+
datamaestro_text/data/ir/stores.py,sha256=rdOwYCG_NzHSsUQpJ1aneiA2SDWrcfdi16aY-df852U,4408
|
|
66
66
|
datamaestro_text/data/ir/trec.py,sha256=IOtQRMUz8zx-dYEMR2NIIM6qXEUjsV0eVOhGvKIRJK4,1974
|
|
67
67
|
datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
|
|
68
|
+
datamaestro_text/datasets/__init__.py,sha256=ORn-Q1gGibg-N5grVc7MqOYfExels3FRI51oQ4xI1QA,34
|
|
68
69
|
datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
|
|
69
|
-
datamaestro_text/datasets/irds/data.py,sha256=
|
|
70
|
+
datamaestro_text/datasets/irds/data.py,sha256=sIU7_rt4I1E9rjkIGcpNfbD5mtO97vxFsUDmouRMDV4,22914
|
|
70
71
|
datamaestro_text/datasets/irds/datasets.py,sha256=CJ8MA44XCwIQGZTzYIJnR-qFm890rUZZB7C3lKIwNyY,5627
|
|
71
72
|
datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
|
|
72
73
|
datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
|
|
73
|
-
datamaestro_text/download/tmdb.py,sha256=
|
|
74
|
+
datamaestro_text/download/tmdb.py,sha256=sfnSUJwGSjBsLNVVhT30db2m0R8mrRkDZpbpBUt7GMg,3960
|
|
74
75
|
datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
|
|
75
76
|
datamaestro_text/interfaces/trec.py,sha256=GrP0N_Hcj5f73KS6CSzkyN4aaI-XoBJ19oVMpHVR3QM,3579
|
|
76
77
|
datamaestro_text/test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
78
|
datamaestro_text/test/test_datasets.py,sha256=hD1pe-CjEnOj0JNqDLasz_97ltFzMbYfyYBy1QyYdf8,202
|
|
78
|
-
datamaestro_text/test/test_documented.py,sha256=
|
|
79
|
+
datamaestro_text/test/test_documented.py,sha256=VaDbX8Ea5rKoZ1X0ZSmHnQ-rLHdHGBxUVyX67sRcx4I,439
|
|
79
80
|
datamaestro_text/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
-
datamaestro_text/transforms/ir/__init__.py,sha256=
|
|
81
|
+
datamaestro_text/transforms/ir/__init__.py,sha256=7D6wurKVQf-f2mu1I3tT-baQbKo7yRCxW8pOHh-MSjM,6539
|
|
81
82
|
datamaestro_text/utils/__init__.py,sha256=2449YLTAtKJzkmt84Mu8sBRCCveNs5fiaqTCK_p5ha0,3340
|
|
82
83
|
datamaestro_text/utils/files.py,sha256=jhcirufgTztEkx1hs2-qgScEcHnIcGF_BjCeuCSsNv0,2838
|
|
83
84
|
datamaestro_text/utils/iter.py,sha256=QBajeSPLHvkeh6BCTZDSqWlOYNjwUDvgTTZ_YxJntXw,2701
|
|
84
85
|
datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
|
|
85
|
-
datamaestro_text/utils/shuffle.py,sha256=
|
|
86
|
-
datamaestro_text-2026.
|
|
87
|
-
datamaestro_text-2026.
|
|
88
|
-
datamaestro_text-2026.
|
|
89
|
-
datamaestro_text-2026.
|
|
90
|
-
datamaestro_text-2026.
|
|
86
|
+
datamaestro_text/utils/shuffle.py,sha256=xXzgBQ8An7tKboxI0z123Tl6ywXI4S0tWf8MnfOon0c,3491
|
|
87
|
+
datamaestro_text-2026.2.3.dist-info/METADATA,sha256=8r5gtB3jtwhAUDZ-W69U20Whwt-7VEMjJo8ZjeFfpiU,1886
|
|
88
|
+
datamaestro_text-2026.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
89
|
+
datamaestro_text-2026.2.3.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
|
|
90
|
+
datamaestro_text-2026.2.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
91
|
+
datamaestro_text-2026.2.3.dist-info/RECORD,,
|
|
File without changes
|
{datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{datamaestro_text-2026.1.1.dist-info → datamaestro_text-2026.2.3.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|