datamaestro-text 2024.5.31__py3-none-any.whl → 2025.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro_text/config/com/github/apple/ml-qrecc.py +50 -0
- datamaestro_text/data/conversation/base.py +2 -2
- datamaestro_text/data/ir/formats.py +20 -5
- datamaestro_text/data/ir/stores.py +12 -6
- datamaestro_text/datasets/irds/data.py +222 -204
- datamaestro_text/datasets/irds/helpers.py +58 -2
- datamaestro_text/version.py +2 -2
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/METADATA +3 -3
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/RECORD +13 -13
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/WHEEL +1 -1
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/LICENSE +0 -0
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/entry_points.txt +0 -0
- {datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
# See documentation on https://datamaestro.readthedocs.io
|
|
2
2
|
|
|
3
|
+
import re
|
|
4
|
+
import json
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
from datamaestro.definitions import datatasks, datatags, dataset
|
|
5
7
|
from datamaestro.data.ml import Supervised
|
|
8
|
+
from datamaestro.download import reference
|
|
6
9
|
from datamaestro.download.archive import zipdownloader
|
|
10
|
+
from datamaestro.download.wayback import wayback_documents
|
|
7
11
|
from datamaestro.utils import HashCheck
|
|
8
12
|
from datamaestro_text.data.conversation.qrecc import QReCCDataset
|
|
13
|
+
from datamaestro_text.datasets.irds.data import (
|
|
14
|
+
LZ4JSONLDocumentStore,
|
|
15
|
+
SimpleJsonDocument,
|
|
16
|
+
)
|
|
17
|
+
from datamaestro_text.datasets.irds.helpers import lz4docstore_builder
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
@datatags("conversation", "context", "query")
|
|
@@ -35,3 +44,44 @@ def main(data: Path):
|
|
|
35
44
|
"train": QReCCDataset(path=data / "qrecc_train.json"),
|
|
36
45
|
"test": QReCCDataset(path=data / "qrecc_test.json"),
|
|
37
46
|
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataset(
|
|
50
|
+
url="https://github.com/apple/ml-qrecc",
|
|
51
|
+
doi="https://doi.org/10.48550/arXiv.2010.04898",
|
|
52
|
+
)
|
|
53
|
+
class Content(LZ4JSONLDocumentStore):
|
|
54
|
+
"""QReCC mentionned URLs content"""
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def __create_dataset__(dataset, options=None):
|
|
58
|
+
ds = reference(reference=main).setup(dataset, options)
|
|
59
|
+
documents_path = wayback_documents(
|
|
60
|
+
"20191127", lambda: Content._urls(ds), name="wayback.jsonl"
|
|
61
|
+
).setup(dataset, options)
|
|
62
|
+
|
|
63
|
+
store_path = lz4docstore_builder(
|
|
64
|
+
"store",
|
|
65
|
+
lambda: Content._documents(documents_path),
|
|
66
|
+
SimpleJsonDocument,
|
|
67
|
+
"id",
|
|
68
|
+
).setup(dataset, options)
|
|
69
|
+
|
|
70
|
+
return LZ4JSONLDocumentStore(jsonl_path=store_path)
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def _documents(path: Path):
|
|
74
|
+
"""Iterates over documents from wayback"""
|
|
75
|
+
with path.open("rt") as fp:
|
|
76
|
+
for line in fp:
|
|
77
|
+
yield SimpleJsonDocument(**json.loads(line))
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def _urls(supervised: Supervised[QReCCDataset, None, QReCCDataset]):
|
|
81
|
+
urls = set()
|
|
82
|
+
for ds in [supervised.train, supervised.test]:
|
|
83
|
+
for entry in ds.entries():
|
|
84
|
+
if entry.answer_url:
|
|
85
|
+
url = re.sub("#.*$", "", entry.answer_url)
|
|
86
|
+
urls.add(url)
|
|
87
|
+
return urls
|
|
@@ -188,7 +188,7 @@ class SingleConversationTreeNode(ConversationNode):
|
|
|
188
188
|
def history(self) -> Sequence[Record]:
|
|
189
189
|
return self.tree.history[self.index + 1 :]
|
|
190
190
|
|
|
191
|
-
def parent(self) -> ConversationNode
|
|
191
|
+
def parent(self) -> Optional[ConversationNode]:
|
|
192
192
|
return (
|
|
193
193
|
SingleConversationTreeNode(self.tree, self.index + 1)
|
|
194
194
|
if self.index < len(self.tree.history) - 1
|
|
@@ -235,7 +235,7 @@ class ConversationTreeNode(ConversationNode, ConversationTree):
|
|
|
235
235
|
for child in self.children:
|
|
236
236
|
yield from child
|
|
237
237
|
|
|
238
|
-
def parent(self) -> ConversationNode
|
|
238
|
+
def parent(self) -> Optional[ConversationNode]:
|
|
239
239
|
return self.parent
|
|
240
240
|
|
|
241
241
|
def children(self) -> List[ConversationNode]:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import cached_property
|
|
2
|
-
from typing import ClassVar, Tuple
|
|
2
|
+
from typing import ClassVar, Tuple, List
|
|
3
3
|
from attrs import define
|
|
4
4
|
from datamaestro.record import record_type
|
|
5
5
|
from ir_datasets.datasets.wapo import WapoDocMedia
|
|
@@ -117,7 +117,6 @@ class TweetDoc(TextItem):
|
|
|
117
117
|
|
|
118
118
|
@define
|
|
119
119
|
class OrConvQADocument(TextItem):
|
|
120
|
-
id: str
|
|
121
120
|
title: str
|
|
122
121
|
body: str
|
|
123
122
|
aid: str
|
|
@@ -127,12 +126,21 @@ class OrConvQADocument(TextItem):
|
|
|
127
126
|
def text(self):
|
|
128
127
|
return f"{self.title} {self.body}"
|
|
129
128
|
|
|
129
|
+
@define
|
|
130
|
+
class Touche2020(TextItem):
|
|
131
|
+
text: str
|
|
132
|
+
title: str
|
|
133
|
+
stance: str
|
|
134
|
+
url: str
|
|
130
135
|
|
|
131
136
|
@define
|
|
132
|
-
class
|
|
137
|
+
class SciDocs(TextItem):
|
|
133
138
|
text: str
|
|
134
|
-
|
|
135
|
-
|
|
139
|
+
title: str
|
|
140
|
+
authors: List[str]
|
|
141
|
+
year: int
|
|
142
|
+
cited_by: List[str]
|
|
143
|
+
references: List[str]
|
|
136
144
|
|
|
137
145
|
|
|
138
146
|
@define
|
|
@@ -167,6 +175,13 @@ class TrecMb14Query(TextItem):
|
|
|
167
175
|
def get_text(self):
|
|
168
176
|
return f"{self.query}"
|
|
169
177
|
|
|
178
|
+
@define
|
|
179
|
+
class SciDocsTopic(TextItem):
|
|
180
|
+
text: str
|
|
181
|
+
authors: List[str]
|
|
182
|
+
year: int
|
|
183
|
+
cited_by: List[str]
|
|
184
|
+
references: List[str]
|
|
170
185
|
|
|
171
186
|
@define()
|
|
172
187
|
class TrecTopic(SimpleTextItem):
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
from collections import namedtuple
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, NamedTuple
|
|
3
3
|
from experimaestro import Constant
|
|
4
4
|
import attrs
|
|
5
5
|
|
|
6
6
|
from datamaestro.record import Record
|
|
7
|
+
from datamaestro_text.data.ir.base import IDItem
|
|
7
8
|
from datamaestro_text.datasets.irds.data import LZ4DocumentStore
|
|
8
9
|
from datamaestro_text.data.ir.formats import OrConvQADocument
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class OrConvQADocumentStore(LZ4DocumentStore):
|
|
12
|
-
NAMED_TUPLE
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
class NAMED_TUPLE(NamedTuple):
|
|
14
|
+
id: str
|
|
15
|
+
title: str
|
|
16
|
+
body: str
|
|
17
|
+
aid: str
|
|
18
|
+
bid: int
|
|
15
19
|
|
|
16
20
|
lookup_field: Constant[str] = "id"
|
|
17
21
|
fields: Constant[List[str]] = list(NAMED_TUPLE._fields)
|
|
@@ -19,5 +23,7 @@ class OrConvQADocumentStore(LZ4DocumentStore):
|
|
|
19
23
|
|
|
20
24
|
data_cls = NAMED_TUPLE
|
|
21
25
|
|
|
22
|
-
def converter(self, data: NAMED_TUPLE) ->
|
|
23
|
-
|
|
26
|
+
def converter(self, data: NAMED_TUPLE) -> Record:
|
|
27
|
+
fields = data._asdict()
|
|
28
|
+
del fields["id"]
|
|
29
|
+
return Record(OrConvQADocument(**fields), IDItem(data.id))
|
|
@@ -1,36 +1,44 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from abc import ABC, abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
2
4
|
from functools import partial
|
|
3
|
-
import logging
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Dict, Iterator, Tuple, Type
|
|
6
|
+
from typing import Dict, Iterator, List, NamedTuple, Tuple, Type
|
|
7
|
+
|
|
6
8
|
import ir_datasets
|
|
7
|
-
|
|
9
|
+
import ir_datasets.datasets as _irds
|
|
10
|
+
from datamaestro.record import RecordType, record_type
|
|
11
|
+
from experimaestro import Config, Meta, Option, Param
|
|
12
|
+
from experimaestro.compat import cached_property
|
|
8
13
|
from ir_datasets.formats import (
|
|
9
14
|
GenericDoc,
|
|
10
|
-
GenericQuery,
|
|
11
15
|
GenericDocPair,
|
|
16
|
+
GenericQuery,
|
|
12
17
|
TrecParsedDoc,
|
|
13
18
|
TrecQuery,
|
|
14
19
|
)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
from experimaestro.compat import cached_property
|
|
18
|
-
from experimaestro import Option
|
|
19
|
-
from datamaestro.record import RecordType, record_type
|
|
20
|
-
from datamaestro_text.data.conversation.base import AnswerEntry
|
|
20
|
+
from ir_datasets.indices import PickleLz4FullStore
|
|
21
|
+
|
|
21
22
|
import datamaestro_text.data.ir as ir
|
|
23
|
+
import datamaestro_text.data.ir.formats as formats
|
|
24
|
+
from datamaestro_text.data.conversation.base import (
|
|
25
|
+
AnswerDocumentID,
|
|
26
|
+
AnswerEntry,
|
|
27
|
+
ConversationHistoryItem,
|
|
28
|
+
ConversationTreeNode,
|
|
29
|
+
DecontextualizedDictItem,
|
|
30
|
+
EntryType,
|
|
31
|
+
)
|
|
22
32
|
from datamaestro_text.data.ir.base import (
|
|
23
|
-
Record,
|
|
24
|
-
TopicRecord,
|
|
25
|
-
DocumentRecord,
|
|
26
|
-
SimpleTextItem,
|
|
27
33
|
AdhocAssessedTopic,
|
|
28
|
-
|
|
34
|
+
DocumentRecord,
|
|
29
35
|
IDItem,
|
|
36
|
+
Record,
|
|
37
|
+
SimpleAdhocAssessment,
|
|
38
|
+
SimpleTextItem,
|
|
39
|
+
TopicRecord,
|
|
30
40
|
create_record,
|
|
31
41
|
)
|
|
32
|
-
import datamaestro_text.data.ir.formats as formats
|
|
33
|
-
|
|
34
42
|
|
|
35
43
|
# Interface between ir_datasets and datamaestro:
|
|
36
44
|
# provides adapted data types
|
|
@@ -109,6 +117,12 @@ class Documents(ir.DocumentStore, IRDSId):
|
|
|
109
117
|
_irds.beir.BeirTitleUrlDoc: tuple_constructor(
|
|
110
118
|
formats.TitleUrlDocument, "doc_id", "text", "title", "url"
|
|
111
119
|
),
|
|
120
|
+
_irds.beir.BeirToucheDoc: tuple_constructor(
|
|
121
|
+
formats.Touche2020, "doc_id", "text", "title", "stance", "url"
|
|
122
|
+
),
|
|
123
|
+
_irds.beir.BeirSciDoc: tuple_constructor(
|
|
124
|
+
formats.SciDocs, "doc_id", "text", "title", "authors", "year", "cited_by", "references"
|
|
125
|
+
),
|
|
112
126
|
_irds.msmarco_document.MsMarcoDocument: tuple_constructor(
|
|
113
127
|
formats.MsMarcoDocument, "doc_id", "url", "title", "body"
|
|
114
128
|
),
|
|
@@ -216,20 +230,6 @@ if hasattr(_irds, "miracl"):
|
|
|
216
230
|
)
|
|
217
231
|
|
|
218
232
|
|
|
219
|
-
# Fix while PR https://github.com/allenai/ir_datasets/pull/252
|
|
220
|
-
# is not in.
|
|
221
|
-
class DMPickleLz4FullStore(PickleLz4FullStore):
|
|
222
|
-
def get_many(self, doc_ids, field=None):
|
|
223
|
-
result = {}
|
|
224
|
-
field_idx = self._doc_cls._fields.index(field) if field is not None else None
|
|
225
|
-
for doc in self.get_many_iter(doc_ids):
|
|
226
|
-
if field is not None:
|
|
227
|
-
result[getattr(doc, self._id_field)] = doc[field_idx]
|
|
228
|
-
else:
|
|
229
|
-
result[getattr(doc, self._id_field)] = doc
|
|
230
|
-
return result
|
|
231
|
-
|
|
232
|
-
|
|
233
233
|
class LZ4DocumentStore(ir.DocumentStore):
|
|
234
234
|
"""A LZ4-based document store"""
|
|
235
235
|
|
|
@@ -243,7 +243,7 @@ class LZ4DocumentStore(ir.DocumentStore):
|
|
|
243
243
|
|
|
244
244
|
@cached_property
|
|
245
245
|
def store(self):
|
|
246
|
-
return
|
|
246
|
+
return PickleLz4FullStore(
|
|
247
247
|
self.path, None, self.data_cls, self.lookup_field, self.index_fields
|
|
248
248
|
)
|
|
249
249
|
|
|
@@ -262,10 +262,10 @@ class LZ4DocumentStore(ir.DocumentStore):
|
|
|
262
262
|
retrieved = self.store.get_many(docids)
|
|
263
263
|
return [self.converter(retrieved[docid]) for docid in docids]
|
|
264
264
|
|
|
265
|
+
@abstractmethod
|
|
265
266
|
def converter(self, data):
|
|
266
|
-
"""Converts a document from LZ4 tuples to
|
|
267
|
-
|
|
268
|
-
return data
|
|
267
|
+
"""Converts a document from LZ4 tuples to a document record"""
|
|
268
|
+
...
|
|
269
269
|
|
|
270
270
|
def iter(self) -> Iterator[DocumentRecord]:
|
|
271
271
|
"""Returns an iterator over documents"""
|
|
@@ -278,6 +278,25 @@ class LZ4DocumentStore(ir.DocumentStore):
|
|
|
278
278
|
return self.store.count()
|
|
279
279
|
|
|
280
280
|
|
|
281
|
+
class SimpleJsonDocument(NamedTuple):
|
|
282
|
+
id: str
|
|
283
|
+
text: str
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class LZ4JSONLDocumentStore(LZ4DocumentStore):
|
|
287
|
+
jsonl_path: Meta[Path]
|
|
288
|
+
"""json-l based document store
|
|
289
|
+
|
|
290
|
+
Each line is of the form
|
|
291
|
+
```json
|
|
292
|
+
{ "id": "...", "text": "..." }
|
|
293
|
+
```
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def converter(self, data):
|
|
297
|
+
return DocumentRecord(IDItem(data["id"]), SimpleTextItem(data["text"]))
|
|
298
|
+
|
|
299
|
+
|
|
281
300
|
class TopicsHandler(ABC):
|
|
282
301
|
@abstractmethod
|
|
283
302
|
def topic_int(self, internal_topic_id: int) -> TopicRecord:
|
|
@@ -349,6 +368,12 @@ class Topics(ir.TopicsStore, IRDSId):
|
|
|
349
368
|
TrecQuery: tuple_constructor(
|
|
350
369
|
formats.TrecTopic, "query_id", "title", "description", "narrative"
|
|
351
370
|
),
|
|
371
|
+
_irds.beir.BeirToucheQuery: tuple_constructor(
|
|
372
|
+
formats.TrecTopic, "query_id", "text", "description", "narrative"
|
|
373
|
+
),
|
|
374
|
+
_irds.beir.BeirSciQuery: tuple_constructor(
|
|
375
|
+
formats.SciDocsTopic, "query_id", "text", "authors", "year", "cited_by", "references"
|
|
376
|
+
),
|
|
352
377
|
_irds.tweets2013_ia.TrecMb13Query: tuple_constructor(
|
|
353
378
|
formats.TrecMb13Query, "query_id", "query", "time", "tweet_time"
|
|
354
379
|
),
|
|
@@ -392,197 +417,190 @@ class Topics(ir.TopicsStore, IRDSId):
|
|
|
392
417
|
return self.handler.iter()
|
|
393
418
|
|
|
394
419
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
420
|
+
class CastTopicsHandler(TopicsHandler):
|
|
421
|
+
def __init__(self, dataset):
|
|
422
|
+
self.dataset = dataset
|
|
423
|
+
|
|
424
|
+
@cached_property
|
|
425
|
+
def ext2records(self):
|
|
426
|
+
return {record[IDItem].id: record for record in self.records}
|
|
427
|
+
|
|
428
|
+
def topic_int(self, internal_topic_id: int) -> TopicRecord:
|
|
429
|
+
"""Returns a document given its internal ID"""
|
|
430
|
+
return self.records[internal_topic_id]
|
|
431
|
+
|
|
432
|
+
def topic_ext(self, external_topic_id: str) -> TopicRecord:
|
|
433
|
+
"""Returns a document given its external ID"""
|
|
434
|
+
return self.ext2records[external_topic_id]
|
|
435
|
+
|
|
436
|
+
def iter(self) -> Iterator[ir.TopicRecord]:
|
|
437
|
+
"""Returns an iterator over topics"""
|
|
438
|
+
return iter(self.records)
|
|
403
439
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
440
|
+
@cached_property
|
|
441
|
+
def records(self):
|
|
442
|
+
try:
|
|
443
|
+
topic_number = None
|
|
444
|
+
node = None
|
|
445
|
+
conversation = []
|
|
446
|
+
records = []
|
|
447
|
+
|
|
448
|
+
for query in self.dataset.dataset.queries_iter():
|
|
449
|
+
decontextualized = DecontextualizedDictItem(
|
|
450
|
+
"manual",
|
|
451
|
+
{
|
|
452
|
+
"manual": query.manual_rewritten_utterance,
|
|
453
|
+
"auto": query.automatic_rewritten_utterance,
|
|
454
|
+
},
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
is_new_conversation = topic_number != query.topic_number
|
|
458
|
+
|
|
459
|
+
topic = Record(
|
|
460
|
+
IDItem(query.query_id),
|
|
461
|
+
SimpleTextItem(query.raw_utterance),
|
|
462
|
+
decontextualized,
|
|
463
|
+
ConversationHistoryItem(
|
|
464
|
+
[] if is_new_conversation else node.conversation(False)
|
|
465
|
+
),
|
|
466
|
+
EntryType.USER_QUERY,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
if is_new_conversation:
|
|
470
|
+
conversation = []
|
|
471
|
+
node = ConversationTreeNode(topic)
|
|
472
|
+
topic_number = query.topic_number
|
|
473
|
+
else:
|
|
474
|
+
node = node.add(ConversationTreeNode(topic))
|
|
475
|
+
|
|
476
|
+
records.append(topic)
|
|
477
|
+
|
|
478
|
+
conversation.append(node)
|
|
479
|
+
node = node.add(
|
|
480
|
+
ConversationTreeNode(
|
|
481
|
+
Record(
|
|
482
|
+
AnswerDocumentID(self.get_canonical_result_id(query)),
|
|
483
|
+
EntryType.SYSTEM_ANSWER,
|
|
484
|
+
)
|
|
444
485
|
)
|
|
486
|
+
)
|
|
487
|
+
conversation.append(node)
|
|
488
|
+
except Exception:
|
|
489
|
+
logging.exception("Error while computing topic records")
|
|
490
|
+
raise
|
|
491
|
+
|
|
492
|
+
return records
|
|
493
|
+
|
|
494
|
+
@staticmethod
|
|
495
|
+
def get_canonical_result_id():
|
|
496
|
+
return None
|
|
497
|
+
|
|
445
498
|
|
|
446
|
-
|
|
499
|
+
class Cast2020TopicsHandler(CastTopicsHandler):
|
|
500
|
+
@staticmethod
|
|
501
|
+
def get_canonical_result_id(query: _irds.trec_cast.Cast2020Query):
|
|
502
|
+
return query.manual_canonical_result_id
|
|
447
503
|
|
|
504
|
+
|
|
505
|
+
class Cast2021TopicsHandler(CastTopicsHandler):
|
|
506
|
+
@staticmethod
|
|
507
|
+
def get_canonical_result_id(query: _irds.trec_cast.Cast2021Query):
|
|
508
|
+
return query.canonical_result_id
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
class Cast2022TopicsHandler(CastTopicsHandler):
|
|
512
|
+
def __init__(self, dataset):
|
|
513
|
+
self.dataset = dataset
|
|
514
|
+
|
|
515
|
+
@cached_property
|
|
516
|
+
def records(self):
|
|
517
|
+
try:
|
|
518
|
+
records = []
|
|
519
|
+
nodes: Dict[str, ConversationTreeNode] = {}
|
|
520
|
+
|
|
521
|
+
for (
|
|
522
|
+
query
|
|
523
|
+
) in (
|
|
524
|
+
self.dataset.dataset.queries_iter()
|
|
525
|
+
): # type: _irds.trec_cast.Cast2022Query
|
|
526
|
+
parent = nodes[query.parent_id] if query.parent_id else None
|
|
527
|
+
|
|
528
|
+
if query.participant == "User":
|
|
448
529
|
topic = Record(
|
|
449
530
|
IDItem(query.query_id),
|
|
450
531
|
SimpleTextItem(query.raw_utterance),
|
|
451
|
-
|
|
532
|
+
DecontextualizedDictItem(
|
|
533
|
+
"manual",
|
|
534
|
+
{
|
|
535
|
+
"manual": query.manual_rewritten_utterance,
|
|
536
|
+
},
|
|
537
|
+
),
|
|
452
538
|
ConversationHistoryItem(
|
|
453
|
-
|
|
539
|
+
parent.conversation(False) if parent else []
|
|
454
540
|
),
|
|
455
541
|
EntryType.USER_QUERY,
|
|
456
542
|
)
|
|
457
|
-
|
|
458
|
-
if is_new_conversation:
|
|
459
|
-
conversation = []
|
|
460
|
-
node = ConversationTreeNode(topic)
|
|
461
|
-
topic_number = query.topic_number
|
|
462
|
-
else:
|
|
463
|
-
node = node.add(ConversationTreeNode(topic))
|
|
464
|
-
|
|
543
|
+
node = ConversationTreeNode(topic)
|
|
465
544
|
records.append(topic)
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
AnswerDocumentID(self.get_canonical_result_id(query)),
|
|
472
|
-
EntryType.SYSTEM_ANSWER,
|
|
473
|
-
)
|
|
545
|
+
else:
|
|
546
|
+
node = ConversationTreeNode(
|
|
547
|
+
Record(
|
|
548
|
+
AnswerEntry(query.response),
|
|
549
|
+
EntryType.SYSTEM_ANSWER,
|
|
474
550
|
)
|
|
475
551
|
)
|
|
476
|
-
conversation.append(node)
|
|
477
|
-
except Exception:
|
|
478
|
-
logging.exception("Error while computing topic records")
|
|
479
|
-
raise
|
|
480
|
-
|
|
481
|
-
return records
|
|
482
|
-
|
|
483
|
-
@staticmethod
|
|
484
|
-
def get_canonical_result_id():
|
|
485
|
-
return None
|
|
486
|
-
|
|
487
|
-
class Cast2020TopicsHandler(CastTopicsHandler):
|
|
488
|
-
@staticmethod
|
|
489
|
-
def get_canonical_result_id(query: _irds.trec_cast.Cast2020Query):
|
|
490
|
-
return query.manual_canonical_result_id
|
|
491
|
-
|
|
492
|
-
class Cast2021TopicsHandler(CastTopicsHandler):
|
|
493
|
-
@staticmethod
|
|
494
|
-
def get_canonical_result_id(query: _irds.trec_cast.Cast2021Query):
|
|
495
|
-
return query.canonical_result_id
|
|
496
|
-
|
|
497
|
-
class Cast2022TopicsHandler(CastTopicsHandler):
|
|
498
|
-
def __init__(self, dataset):
|
|
499
|
-
self.dataset = dataset
|
|
500
|
-
|
|
501
|
-
@cached_property
|
|
502
|
-
def records(self):
|
|
503
|
-
try:
|
|
504
|
-
records = []
|
|
505
|
-
nodes: Dict[str, ConversationTreeNode] = {}
|
|
506
|
-
|
|
507
|
-
for (
|
|
508
|
-
query
|
|
509
|
-
) in (
|
|
510
|
-
self.dataset.dataset.queries_iter()
|
|
511
|
-
): # type: _irds.trec_cast.Cast2022Query
|
|
512
|
-
parent = nodes[query.parent_id] if query.parent_id else None
|
|
513
|
-
|
|
514
|
-
if query.participant == "User":
|
|
515
|
-
topic = Record(
|
|
516
|
-
IDItem(query.query_id),
|
|
517
|
-
SimpleTextItem(query.raw_utterance),
|
|
518
|
-
DecontextualizedDictItem(
|
|
519
|
-
"manual",
|
|
520
|
-
{
|
|
521
|
-
"manual": query.manual_rewritten_utterance,
|
|
522
|
-
},
|
|
523
|
-
),
|
|
524
|
-
ConversationHistoryItem(
|
|
525
|
-
parent.conversation(False) if parent else []
|
|
526
|
-
),
|
|
527
|
-
EntryType.USER_QUERY,
|
|
528
|
-
)
|
|
529
|
-
node = ConversationTreeNode(topic)
|
|
530
|
-
records.append(topic)
|
|
531
|
-
else:
|
|
532
|
-
node = ConversationTreeNode(
|
|
533
|
-
Record(
|
|
534
|
-
AnswerEntry(query.response),
|
|
535
|
-
EntryType.SYSTEM_ANSWER,
|
|
536
|
-
)
|
|
537
|
-
)
|
|
538
552
|
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
return records
|
|
547
|
-
|
|
548
|
-
Topics.HANDLERS.update(
|
|
549
|
-
{
|
|
550
|
-
# _irds.trec_cast.Cast2019Query: Cast2019TopicsHandler,
|
|
551
|
-
_irds.trec_cast.Cast2020Query: Cast2020TopicsHandler,
|
|
552
|
-
_irds.trec_cast.Cast2021Query: Cast2021TopicsHandler,
|
|
553
|
-
_irds.trec_cast.Cast2022Query: Cast2022TopicsHandler,
|
|
554
|
-
}
|
|
555
|
-
)
|
|
553
|
+
nodes[query.query_id] = node
|
|
554
|
+
if parent:
|
|
555
|
+
parent.add(node)
|
|
556
|
+
except Exception:
|
|
557
|
+
logging.exception("Error while computing topic records")
|
|
558
|
+
raise
|
|
556
559
|
|
|
557
|
-
|
|
558
|
-
def check(self, cls):
|
|
559
|
-
assert issubclass(cls, _irds.trec_cast.CastDoc)
|
|
560
|
+
return records
|
|
560
561
|
|
|
561
|
-
@cached_property
|
|
562
|
-
def target_cls(self):
|
|
563
|
-
return formats.TitleUrlDocument
|
|
564
562
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
563
|
+
Topics.HANDLERS.update(
|
|
564
|
+
{
|
|
565
|
+
# _irds.trec_cast.Cast2019Query: Cast2019TopicsHandler,
|
|
566
|
+
_irds.trec_cast.Cast2020Query: Cast2020TopicsHandler,
|
|
567
|
+
_irds.trec_cast.Cast2021Query: Cast2021TopicsHandler,
|
|
568
|
+
_irds.trec_cast.Cast2022Query: Cast2022TopicsHandler,
|
|
569
|
+
}
|
|
570
|
+
)
|
|
569
571
|
|
|
570
|
-
class CastPassageDocHandler:
|
|
571
|
-
def check(self, cls):
|
|
572
|
-
assert issubclass(cls, _irds.trec_cast.CastPassageDoc)
|
|
573
572
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
573
|
+
class CastDocHandler:
|
|
574
|
+
def check(self, cls):
|
|
575
|
+
assert issubclass(cls, _irds.trec_cast.CastDoc)
|
|
576
|
+
|
|
577
|
+
@cached_property
|
|
578
|
+
def target_cls(self):
|
|
579
|
+
return formats.TitleUrlDocument
|
|
580
|
+
|
|
581
|
+
def __call__(self, _, doc: _irds.trec_cast.CastDoc):
|
|
582
|
+
return Record(
|
|
583
|
+
IDItem(doc.doc_id), formats.SimpleTextItem(" ".join(doc.passages))
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
class CastPassageDocHandler:
|
|
588
|
+
def check(self, cls):
|
|
589
|
+
assert issubclass(cls, _irds.trec_cast.CastPassageDoc)
|
|
590
|
+
|
|
591
|
+
@cached_property
|
|
592
|
+
def target_cls(self):
|
|
593
|
+
return formats.TitleUrlDocument
|
|
594
|
+
|
|
595
|
+
def __call__(self, _, doc: _irds.trec_cast.CastPassageDoc):
|
|
596
|
+
return Record(
|
|
597
|
+
IDItem(doc.doc_id),
|
|
598
|
+
formats.TitleUrlDocument(doc.text, doc.title, doc.url),
|
|
599
|
+
)
|
|
577
600
|
|
|
578
|
-
def __call__(self, _, doc: _irds.trec_cast.CastPassageDoc):
|
|
579
|
-
return Record(
|
|
580
|
-
IDItem(doc.doc_id),
|
|
581
|
-
formats.TitleUrlDocument(doc.text, doc.title, doc.url),
|
|
582
|
-
)
|
|
583
601
|
|
|
584
|
-
|
|
585
|
-
|
|
602
|
+
Documents.CONVERTERS[_irds.trec_cast.CastDoc] = CastDocHandler()
|
|
603
|
+
Documents.CONVERTERS[_irds.trec_cast.CastPassageDoc] = CastPassageDocHandler()
|
|
586
604
|
|
|
587
605
|
|
|
588
606
|
class Adhoc(ir.Adhoc, IRDSId):
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Optional, Type, Callable, Iterator
|
|
3
3
|
from ir_datasets.indices import PickleLz4FullStore
|
|
4
|
-
from datamaestro.download import
|
|
4
|
+
from datamaestro.download import Resource
|
|
5
5
|
from datamaestro.utils import FileChecker
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import urllib3
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class lz4docstore_downloader(
|
|
10
|
+
class lz4docstore_downloader(Resource):
|
|
11
11
|
"""Uses ir_datasets Lz4FullStore to build a document store for a stream of documents"""
|
|
12
12
|
|
|
13
13
|
def __init__(
|
|
@@ -69,3 +69,59 @@ class lz4docstore_downloader(Download):
|
|
|
69
69
|
|
|
70
70
|
# All good!
|
|
71
71
|
(destination / "done").touch()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class lz4docstore_builder(Resource):
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
name: str,
|
|
78
|
+
iter_factory: Callable[[], Iterator],
|
|
79
|
+
doc_cls: Type,
|
|
80
|
+
lookup_field: str,
|
|
81
|
+
*,
|
|
82
|
+
count_hint: Optional[int] = None,
|
|
83
|
+
):
|
|
84
|
+
"""Uses ir_datasets Lz4FullStore to build a document store for a stream of documents
|
|
85
|
+
|
|
86
|
+
:param name: The name of the variable for path construction
|
|
87
|
+
:param iter_factory: Iterator over documents
|
|
88
|
+
:param doc_cls: The class of documents (must be a dataclass because of how ir-datasets works)
|
|
89
|
+
:param lookup_field: Which field to use for lookup
|
|
90
|
+
:param count_hint: Number of documents (hint), defaults to None
|
|
91
|
+
"""
|
|
92
|
+
super().__init__(name)
|
|
93
|
+
self.iter_factory = iter_factory
|
|
94
|
+
self.doc_cls = doc_cls
|
|
95
|
+
self.lookup_field = lookup_field
|
|
96
|
+
self.count_hint = count_hint
|
|
97
|
+
|
|
98
|
+
def prepare(self):
|
|
99
|
+
return self.definition.datapath / self.varname
|
|
100
|
+
|
|
101
|
+
def download(self, force=False):
|
|
102
|
+
# Creates directory if needed
|
|
103
|
+
destination = self.definition.datapath / self.varname
|
|
104
|
+
destination.mkdir(exist_ok=True)
|
|
105
|
+
|
|
106
|
+
# Early exit
|
|
107
|
+
if (destination / "done").is_file() and not force:
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
# Download (cache)
|
|
111
|
+
logging.info("Building the document index")
|
|
112
|
+
|
|
113
|
+
# Builds the LZ4 store
|
|
114
|
+
store = PickleLz4FullStore(
|
|
115
|
+
destination,
|
|
116
|
+
lambda: self.iter_factory(),
|
|
117
|
+
self.doc_cls,
|
|
118
|
+
lookup_field=self.lookup_field,
|
|
119
|
+
index_fields=[self.lookup_field],
|
|
120
|
+
key_field_prefix=None,
|
|
121
|
+
size_hint=None,
|
|
122
|
+
count_hint=self.count_hint,
|
|
123
|
+
)
|
|
124
|
+
store.build()
|
|
125
|
+
|
|
126
|
+
# All good!
|
|
127
|
+
(destination / "done").touch()
|
datamaestro_text/version.py
CHANGED
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '
|
|
16
|
-
__version_tuple__ = version_tuple = (
|
|
15
|
+
__version__ = version = '2025.1.7'
|
|
16
|
+
__version_tuple__ = version_tuple = (2025, 1, 7)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datamaestro-text
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2025.1.7
|
|
4
4
|
Summary: Datamaestro module for text-related datasets
|
|
5
5
|
Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
|
|
6
6
|
License: GPL-3
|
|
@@ -18,8 +18,8 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
18
18
|
Requires-Python: >=3.8
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
-
Requires-Dist: datamaestro
|
|
22
|
-
Requires-Dist:
|
|
21
|
+
Requires-Dist: datamaestro>=1.2.1
|
|
22
|
+
Requires-Dist: ir_datasets>=0.5.8
|
|
23
23
|
Requires-Dist: attrs
|
|
24
24
|
|
|
25
25
|
[](https://github.com/pre-commit/pre-commit) [](https://badge.fury.io/py/datamaestro-text)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
datamaestro_text/__init__.py,sha256=hU8jZpkXl3F74qIfqnJl7v4nJ9YxfoR7IpJpUREFNRI,248
|
|
2
|
-
datamaestro_text/version.py,sha256=
|
|
2
|
+
datamaestro_text/version.py,sha256=k65KHkS4PD3NjDDCJgWcTwRBE4yVcKs32B8SUxv2DvE,417
|
|
3
3
|
datamaestro_text/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
datamaestro_text/config/ai/quac.yaml,sha256=h1D7UJo2z1nZ_9MXpDHuQNJG9Ma2oayUdJV6lyumAIg,1103
|
|
5
5
|
datamaestro_text/config/com/oscar-corpus.py,sha256=dDfdy_uhG6esRQQzUOOORwK8KvEq4c4hZbWt22pv-zY,733
|
|
6
6
|
datamaestro_text/config/com/sentiment140.py,sha256=iRV_rSjQcr9WNjjQ4EdznaEMGLADV_rwpeRx7ycQi0k,1383
|
|
7
7
|
datamaestro_text/config/com/fastml/goodbooks-10k.yaml,sha256=5ZABxUnBFs2ZnCXtBH8YoBiPb3SocRRdH1DLSfVWF-Y,1172
|
|
8
8
|
datamaestro_text/config/com/github/aagohary/canard.py,sha256=tIwb_KxMUR3st7rzQUkt6rIjolTl8uKvDq6t795b1nY,1468
|
|
9
|
-
datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=
|
|
9
|
+
datamaestro_text/config/com/github/apple/ml-qrecc.py,sha256=_lufJlg-4zaQyjr5Ae-X-9hXzpl2JFjfxh_RhnBbva4,3068
|
|
10
10
|
datamaestro_text/config/com/github/prdwb/orconvqa.py,sha256=Tg3hbnvilxW_Lwt7fpKvGMtu-6mc9oNIHM-LX6JTR1U,3026
|
|
11
11
|
datamaestro_text/config/com/github/soskek/bookcorpus.yaml,sha256=qJKs35yeEIilEMgNvU3OEqMp1TSn7mDM2T-uYyA7kTU,1607
|
|
12
12
|
datamaestro_text/config/com/microsoft/wikiqa.yaml,sha256=U7rU-W2Xz1MYv2YXT4jCTj5DsHyM0VssbQPNa3EISaM,540
|
|
@@ -46,7 +46,7 @@ datamaestro_text/data/recommendation.py,sha256=MatelpJQiCMpNDuaQVBkRY809J1qiQo-8
|
|
|
46
46
|
datamaestro_text/data/tagging.py,sha256=yWm7bNLks77cAySa1aZNCmLFxTTqhqXZ0PaoaEYU6hI,697
|
|
47
47
|
datamaestro_text/data/text.py,sha256=pOI8nrEd6RoQA28DVH1JufHTunr9vG3FQzwElR8YirI,499
|
|
48
48
|
datamaestro_text/data/conversation/__init__.py,sha256=esOWnSImMlQs5QtfxUt559ABLd6a5wwoNpj9XtIq71Y,159
|
|
49
|
-
datamaestro_text/data/conversation/base.py,sha256=
|
|
49
|
+
datamaestro_text/data/conversation/base.py,sha256=zXX5sPMoVtDf4WTkLx45IvZz6lgKPgcUZFu6N4lVBlc,6457
|
|
50
50
|
datamaestro_text/data/conversation/canard.py,sha256=IMxu5NfytWZLZ_cyT8UFOICbDE82HLJoNBT_6j36Faw,1808
|
|
51
51
|
datamaestro_text/data/conversation/orconvqa.py,sha256=VG3GV5_IPQcvla9rrQPypDNcZYmKNMgozmr2oudeLA4,3802
|
|
52
52
|
datamaestro_text/data/conversation/qrecc.py,sha256=es4GmqPtE63A7O_GARe8Zy3rQvuLEhAvUA7CfN_nMeA,2562
|
|
@@ -55,15 +55,15 @@ datamaestro_text/data/ir/base.py,sha256=Cw8egjChpx4ksUwp-vTA70B2OWxROH7FIeJylsXg
|
|
|
55
55
|
datamaestro_text/data/ir/cord19.py,sha256=7xDIzsy63WrA9lxxyNOMu5ECRymu5x23EzYG977nS6Y,1440
|
|
56
56
|
datamaestro_text/data/ir/csv.py,sha256=tnxuqR_MZ3GQhuoXEMYyWLQw8PyD0gRqsnzIP5Gsziw,1212
|
|
57
57
|
datamaestro_text/data/ir/data.py,sha256=ERmmOxz_9mUkIuccNbzUa5Y6gVLLVDdyc4cCxbCCUbY,20
|
|
58
|
-
datamaestro_text/data/ir/formats.py,sha256=
|
|
58
|
+
datamaestro_text/data/ir/formats.py,sha256=rU9uJhdFDdlnQ2qfFowK_--1WVOH1fVgfXWOPPDF_FY,3160
|
|
59
59
|
datamaestro_text/data/ir/huggingface.py,sha256=G71VFDN-SllZy4LFxumEbCumEJvb5-orAbLemHjWhiA,956
|
|
60
|
-
datamaestro_text/data/ir/stores.py,sha256=
|
|
60
|
+
datamaestro_text/data/ir/stores.py,sha256=rlOogoBAfsP7o01KqvHiTF1XqzK2Fp6QbRmuasabKAE,859
|
|
61
61
|
datamaestro_text/data/ir/trec.py,sha256=qDOzmgUn0hMqeP9KdI190-9IKsLl6yNRqIoBz6s-cBs,1898
|
|
62
62
|
datamaestro_text/data/ir/utils.py,sha256=6-GhXVtgkBZGhIs2-ODZua_3DmKjSSVydStpHDqbAwE,833
|
|
63
63
|
datamaestro_text/datasets/irds/__init__.py,sha256=Tq0HN1qojnZYLBumM59BuTkz7r0gcu-5OXmDDLgPpAc,707
|
|
64
|
-
datamaestro_text/datasets/irds/data.py,sha256=
|
|
64
|
+
datamaestro_text/datasets/irds/data.py,sha256=BTsFZdjFJ0XdlYRh2rcshAoeknHrypf7be2BGUbWWFk,19931
|
|
65
65
|
datamaestro_text/datasets/irds/datasets.py,sha256=qtN-nX2_OV9FD339aZjwVL1jFf6I4T6MbNCcuRAjmtU,5682
|
|
66
|
-
datamaestro_text/datasets/irds/helpers.py,sha256=
|
|
66
|
+
datamaestro_text/datasets/irds/helpers.py,sha256=fGE-fbuJbXdTzl1qo55247jzn9cvApY-d82GJBgfY1E,3982
|
|
67
67
|
datamaestro_text/datasets/irds/utils.py,sha256=m30JLIrV_HgilN11TvY9dGTyumES6LLzWZDUAMT915M,1425
|
|
68
68
|
datamaestro_text/download/tmdb.py,sha256=kU_Vz9jhznlyeKMHziVu58IHoWv8zPu6CZTHVNQvmu4,4009
|
|
69
69
|
datamaestro_text/interfaces/plaintext.py,sha256=cWfS_xjqZxQ0EV4Ax5BEarZ4lnhQ1I7mc_vgfBgE76w,885
|
|
@@ -78,9 +78,9 @@ datamaestro_text/utils/files.py,sha256=n6ZGl5LNrZbHLcV9RFwd7cFT0vPUezit-2dsBzs1v
|
|
|
78
78
|
datamaestro_text/utils/iter.py,sha256=2_UZ8y9Ma4k5U9ZD4w55Zfb6NGrKM1L4G40OygRm1is,2459
|
|
79
79
|
datamaestro_text/utils/randomstream.py,sha256=_-boH4IIqN8qcl3IktjpNp9vmF4TWRzHUSNVwg7WAr8,973
|
|
80
80
|
datamaestro_text/utils/shuffle.py,sha256=o8JTz3mr0lYWyv0zEh91jEK12ci1etMiUnzh5GkOHCM,3490
|
|
81
|
-
datamaestro_text-
|
|
82
|
-
datamaestro_text-
|
|
83
|
-
datamaestro_text-
|
|
84
|
-
datamaestro_text-
|
|
85
|
-
datamaestro_text-
|
|
86
|
-
datamaestro_text-
|
|
81
|
+
datamaestro_text-2025.1.7.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
82
|
+
datamaestro_text-2025.1.7.dist-info/METADATA,sha256=-wsBcUcnEnOMiHb1ROIf43r55ZNNNjpIemuFXm5hHUY,1609
|
|
83
|
+
datamaestro_text-2025.1.7.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
|
84
|
+
datamaestro_text-2025.1.7.dist-info/entry_points.txt,sha256=lO1P5hE183L5qEEVHlG8d_ik0HNXnX7Eo87cQLdcl-Y,111
|
|
85
|
+
datamaestro_text-2025.1.7.dist-info/top_level.txt,sha256=gYSeqViE8r7eCxSdqFJL74OwljOwKsGPaIhEcCXqc-o,17
|
|
86
|
+
datamaestro_text-2025.1.7.dist-info/RECORD,,
|
|
File without changes
|
{datamaestro_text-2024.5.31.dist-info → datamaestro_text-2025.1.7.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|