nucliadb 6.3.5.post4021__py3-none-any.whl → 6.3.5.post4026__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ import asyncio
22
+ from abc import ABC, abstractmethod
23
+ from contextvars import ContextVar
24
+ from dataclasses import dataclass
25
+ from functools import cached_property
26
+ from typing import Generic, Optional, TypeVar
27
+
28
+ from lru import LRU
29
+
30
+ from nucliadb.ingest.orm.resource import Resource as ResourceORM
31
+ from nucliadb_protos.utils_pb2 import ExtractedText
32
+ from nucliadb_telemetry.metrics import Counter, Gauge
33
+
34
+ # specific metrics per cache type
35
+ cached_resources = Gauge("nucliadb_cached_resources")
36
+ cached_extracted_texts = Gauge("nucliadb_cached_extracted_texts")
37
+ resource_cache_ops = Counter("nucliadb_resource_cache_ops", labels={"type": ""})
38
+ extracted_text_cache_ops = Counter("nucliadb_extracted_text_cache_ops", labels={"type": ""})
39
+
40
+
41
+ T = TypeVar("T")
42
+
43
+
44
+ @dataclass
45
+ class CacheMetrics:
46
+ _cache_size: Gauge
47
+ ops: Counter
48
+
49
+
50
+ class Cache(Generic[T], ABC):
51
+ """Low-level bounded cache implementation with access to per-key async locks
52
+ in case cache users want to lock concurrent access.
53
+
54
+ This cache is measured using a mandatory metric all subclasses must define.
55
+
56
+ """
57
+
58
+ def __init__(self, cache_size: int) -> None:
59
+ self.cache: LRU[str, T] = LRU(cache_size, callback=self._evicted_callback)
60
+ self.locks: dict[str, asyncio.Lock] = {}
61
+
62
+ def _evicted_callback(self, key: str, value: T):
63
+ self.locks.pop(key, None)
64
+ self.metrics.ops.inc({"type": "evict"})
65
+
66
+ def get(self, key: str) -> Optional[T]:
67
+ return self.cache.get(key)
68
+
69
+ # Get a lock for a specific key. Locks will be evicted at the same time as
70
+ # key-value pairs
71
+ def get_lock(self, key: str) -> asyncio.Lock:
72
+ return self.locks.setdefault(key, asyncio.Lock())
73
+
74
+ def set(self, key: str, value: T):
75
+ len_before = len(self.cache)
76
+
77
+ self.cache[key] = value
78
+
79
+ len_after = len(self.cache)
80
+ if len_after - len_before > 0:
81
+ self.metrics._cache_size.inc(len_after - len_before)
82
+
83
+ def contains(self, key: str) -> bool:
84
+ return key in self.cache
85
+
86
+ def clear(self):
87
+ self.metrics._cache_size.dec(len(self.cache))
88
+ self.cache.clear()
89
+ self.locks.clear()
90
+
91
+ def __del__(self):
92
+ # we want to clear the cache before deleting the object and set the
93
+ # metric appropriately
94
+ self.clear()
95
+
96
+ @abstractmethod
97
+ @cached_property
98
+ def metrics(self) -> CacheMetrics: ...
99
+
100
+
101
+ class ResourceCache(Cache[ResourceORM]):
102
+ metrics = CacheMetrics(
103
+ _cache_size=cached_resources,
104
+ ops=resource_cache_ops,
105
+ )
106
+
107
+ # This cache size is an arbitrary number, once we have a metric in place and
108
+ # we analyze memory consumption, we can adjust it with more knoweldge
109
+ def __init__(self, cache_size: int = 128) -> None:
110
+ super().__init__(cache_size)
111
+
112
+
113
+ class ExtractedTextCache(Cache[ExtractedText]):
114
+ """
115
+ Used to cache extracted text from a resource in memory during the process
116
+ of search results hydration.
117
+
118
+ This is needed to avoid fetching the same extracted text multiple times,
119
+ as matching text blocks are processed in parallel and the extracted text is
120
+ fetched for each field where the text block is found.
121
+ """
122
+
123
+ metrics = CacheMetrics(
124
+ _cache_size=cached_extracted_texts,
125
+ ops=extracted_text_cache_ops,
126
+ )
127
+
128
+ def __init__(self, cache_size: int = 128):
129
+ super().__init__(cache_size)
130
+
131
+
132
+ # Global caches (per asyncio task)
133
+
134
+ rcache: ContextVar[Optional[ResourceCache]] = ContextVar("rcache", default=None)
135
+ etcache: ContextVar[Optional[ExtractedTextCache]] = ContextVar("etcache", default=None)
136
+
137
+
138
+ # Cache management
139
+
140
+
141
+ # Get or create a resource cache specific to the current asyncio task (and all
142
+ # its subtasks). If you spawn subtasks that use this cache, make sure to create
143
+ # it in the parent task, otherwise each subtask will have its own independent
144
+ # cache instance
145
+ def get_or_create_resource_cache(clear: bool = False) -> ResourceCache:
146
+ cache: Optional[ResourceCache] = rcache.get()
147
+ if cache is None or clear:
148
+ cache = ResourceCache()
149
+ rcache.set(cache)
150
+ return cache
151
+
152
+
153
+ def get_resource_cache() -> Optional[ResourceCache]:
154
+ return rcache.get()
155
+
156
+
157
+ def set_resource_cache() -> None:
158
+ cache = ResourceCache()
159
+ rcache.set(cache)
160
+
161
+
162
+ # Delete resource cache and all its content
163
+ def delete_resource_cache() -> None:
164
+ cache = rcache.get()
165
+ if cache is not None:
166
+ rcache.set(None)
167
+ del cache
168
+
169
+
170
+ def get_extracted_text_cache() -> Optional[ExtractedTextCache]:
171
+ return etcache.get()
172
+
173
+
174
+ def set_extracted_text_cache() -> None:
175
+ value = ExtractedTextCache()
176
+ etcache.set(value)
177
+
178
+
179
+ def delete_extracted_text_cache() -> None:
180
+ cache = etcache.get()
181
+ if cache is not None:
182
+ etcache.set(None)
183
+ del cache
@@ -354,7 +354,7 @@ class Processor:
354
354
  rid=uuid,
355
355
  source=message.source,
356
356
  )
357
- logger.warning("This message did not modify the resource")
357
+ logger.info("This message did not modify the resource")
358
358
  except (
359
359
  asyncio.TimeoutError,
360
360
  asyncio.CancelledError,
@@ -17,14 +17,18 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- import asyncio
21
20
  import contextlib
22
21
  import logging
23
- from contextvars import ContextVar
24
22
  from typing import Optional
25
23
 
26
- from lru import LRU
27
-
24
+ from nucliadb.common.cache import (
25
+ delete_extracted_text_cache,
26
+ delete_resource_cache,
27
+ get_extracted_text_cache,
28
+ get_resource_cache,
29
+ set_extracted_text_cache,
30
+ set_resource_cache,
31
+ )
28
32
  from nucliadb.common.ids import FieldId
29
33
  from nucliadb.common.maindb.utils import get_driver
30
34
  from nucliadb.ingest.fields.base import Field
@@ -32,51 +36,10 @@ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
32
36
  from nucliadb.ingest.orm.resource import Resource as ResourceORM
33
37
  from nucliadb.search import SERVICE_NAME
34
38
  from nucliadb_protos.utils_pb2 import ExtractedText
35
- from nucliadb_telemetry import metrics
36
39
  from nucliadb_utils.utilities import get_storage
37
40
 
38
41
  logger = logging.getLogger(__name__)
39
42
 
40
- rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar("rcache", default=None)
41
- etcache: ContextVar[Optional["ExtractedTextCache"]] = ContextVar("etcache", default=None)
42
-
43
-
44
- RESOURCE_LOCKS: dict[str, asyncio.Lock] = LRU(1000) # type: ignore
45
- RESOURCE_CACHE_OPS = metrics.Counter("nucliadb_resource_cache_ops", labels={"type": ""})
46
- EXTRACTED_CACHE_OPS = metrics.Counter("nucliadb_extracted_text_cache_ops", labels={"type": ""})
47
-
48
-
49
- def set_extracted_text_cache() -> None:
50
- value = ExtractedTextCache()
51
- etcache.set(value)
52
-
53
-
54
- def get_extracted_text_cache() -> Optional["ExtractedTextCache"]:
55
- return etcache.get()
56
-
57
-
58
- def clear_extracted_text_cache() -> None:
59
- value = etcache.get()
60
- if value is not None:
61
- value.clear()
62
- etcache.set(None)
63
-
64
-
65
- def set_resource_cache() -> None:
66
- value: dict[str, ResourceORM] = {}
67
- rcache.set(value)
68
-
69
-
70
- def get_resource_cache() -> Optional[dict[str, ResourceORM]]:
71
- return rcache.get()
72
-
73
-
74
- def clear_resource_cache() -> None:
75
- value = rcache.get()
76
- if value is not None:
77
- value.clear()
78
- rcache.set(None)
79
-
80
43
 
81
44
  async def get_resource(kbid: str, uuid: str) -> Optional[ResourceORM]:
82
45
  """
@@ -86,22 +49,18 @@ async def get_resource(kbid: str, uuid: str) -> Optional[ResourceORM]:
86
49
 
87
50
  resource_cache = get_resource_cache()
88
51
  if resource_cache is None:
89
- RESOURCE_CACHE_OPS.inc({"type": "miss"})
90
52
  logger.warning("Resource cache not set")
91
53
  return await _orm_get_resource(kbid, uuid)
92
54
 
93
- if uuid not in RESOURCE_LOCKS:
94
- RESOURCE_LOCKS[uuid] = asyncio.Lock()
95
-
96
- async with RESOURCE_LOCKS[uuid]:
97
- if uuid not in resource_cache:
98
- RESOURCE_CACHE_OPS.inc({"type": "miss"})
55
+ async with resource_cache.get_lock(uuid):
56
+ if not resource_cache.contains(uuid):
57
+ resource_cache.metrics.ops.inc({"type": "miss"})
99
58
  orm_resource = await _orm_get_resource(kbid, uuid)
100
59
  else:
101
- RESOURCE_CACHE_OPS.inc({"type": "hit"})
60
+ resource_cache.metrics.ops.inc({"type": "hit"})
102
61
 
103
62
  if orm_resource is not None:
104
- resource_cache[uuid] = orm_resource
63
+ resource_cache.set(uuid, orm_resource)
105
64
  else:
106
65
  orm_resource = resource_cache.get(uuid)
107
66
 
@@ -115,59 +74,30 @@ async def _orm_get_resource(kbid: str, uuid: str) -> Optional[ResourceORM]:
115
74
  return await kb.get(uuid)
116
75
 
117
76
 
118
- class ExtractedTextCache:
119
- """
120
- Used to cache extracted text from a resource in memory during the process
121
- of search results hydration.
122
-
123
- This is needed to avoid fetching the same extracted text multiple times,
124
- as matching text blocks are processed in parallel and the extracted text is
125
- fetched for each field where the text block is found.
126
- """
127
-
128
- def __init__(self):
129
- self.locks = {}
130
- self.values = {}
131
-
132
- def get_value(self, key: str) -> Optional[ExtractedText]:
133
- return self.values.get(key)
134
-
135
- def get_lock(self, key: str) -> asyncio.Lock:
136
- return self.locks.setdefault(key, asyncio.Lock())
137
-
138
- def set_value(self, key: str, value: ExtractedText) -> None:
139
- self.values[key] = value
140
-
141
- def clear(self):
142
- self.values.clear()
143
- self.locks.clear()
144
-
145
-
146
77
  async def get_field_extracted_text(field: Field) -> Optional[ExtractedText]:
147
78
  cache = get_extracted_text_cache()
148
79
  if cache is None:
149
80
  logger.warning("Extracted text cache not set")
150
- EXTRACTED_CACHE_OPS.inc({"type": "miss"})
151
81
  return await field.get_extracted_text()
152
82
 
153
83
  key = f"{field.kbid}/{field.uuid}/{field.id}"
154
- extracted_text = cache.get_value(key)
84
+ extracted_text = cache.get(key)
155
85
  if extracted_text is not None:
156
- EXTRACTED_CACHE_OPS.inc({"type": "hit"})
86
+ cache.metrics.ops.inc({"type": "hit"})
157
87
  return extracted_text
158
88
 
159
89
  async with cache.get_lock(key):
160
90
  # Check again in case another task already fetched it
161
- extracted_text = cache.get_value(key)
91
+ extracted_text = cache.get(key)
162
92
  if extracted_text is not None:
163
- EXTRACTED_CACHE_OPS.inc({"type": "hit"})
93
+ cache.metrics.ops.inc({"type": "hit"})
164
94
  return extracted_text
165
95
 
166
- EXTRACTED_CACHE_OPS.inc({"type": "miss"})
96
+ cache.metrics.ops.inc({"type": "miss"})
167
97
  extracted_text = await field.get_extracted_text()
168
98
  if extracted_text is not None:
169
99
  # Only cache if we actually have extracted text
170
- cache.set_value(key, extracted_text)
100
+ cache.set(key, extracted_text)
171
101
  return extracted_text
172
102
 
173
103
 
@@ -202,5 +132,5 @@ def request_caches():
202
132
  try:
203
133
  yield
204
134
  finally:
205
- clear_resource_cache()
206
- clear_extracted_text_cache()
135
+ delete_resource_cache()
136
+ delete_extracted_text_cache()
@@ -72,7 +72,9 @@ async def generate_field_streaming_payloads(
72
72
 
73
73
  for status in trainset.filter.status:
74
74
  request.filter.labels.append(f"/n/s/{status}")
75
+
75
76
  total = 0
77
+ resources = set()
76
78
 
77
79
  async for document_item in node.stream_get_fields(request):
78
80
  text_labels = []
@@ -81,6 +83,7 @@ async def generate_field_streaming_payloads(
81
83
 
82
84
  field_id = f"{document_item.uuid}{document_item.field}"
83
85
  total += 1
86
+ resources.add(document_item.uuid)
84
87
 
85
88
  field_parts = document_item.field.split("/")
86
89
  if len(field_parts) == 3:
@@ -117,6 +120,27 @@ async def generate_field_streaming_payloads(
117
120
 
118
121
  yield tl
119
122
 
123
+ if total % 1000 == 0:
124
+ logger.info(
125
+ "Field streaming in progress",
126
+ extra={
127
+ "fields": total,
128
+ "resources": len(resources),
129
+ "kbid": kbid,
130
+ "shard_replica_id": shard_replica_id,
131
+ },
132
+ )
133
+
134
+ logger.info(
135
+ "Field streaming finished",
136
+ extra={
137
+ "fields": total,
138
+ "resources": len(resources),
139
+ "kbid": kbid,
140
+ "shard_replica_id": shard_replica_id,
141
+ },
142
+ )
143
+
120
144
 
121
145
  async def get_field_text(kbid: str, rid: str, field: str, field_type: str) -> Optional[ExtractedText]:
122
146
  orm_resource = await get_resource_from_cache_or_db(kbid, rid)
@@ -18,9 +18,9 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from contextvars import ContextVar
22
21
  from typing import Any, AsyncGenerator, AsyncIterator, Optional, Type
23
22
 
23
+ from nucliadb.common.cache import get_or_create_resource_cache
24
24
  from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
25
  from nucliadb.common.maindb.utils import get_driver
26
26
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
@@ -29,29 +29,21 @@ from nucliadb.train import SERVICE_NAME, logger
29
29
  from nucliadb.train.types import T
30
30
  from nucliadb_utils.utilities import get_storage
31
31
 
32
- rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar("rcache", default=None)
33
-
34
-
35
- def get_resource_cache(clear: bool = False) -> dict[str, ResourceORM]:
36
- value: Optional[dict[str, ResourceORM]] = rcache.get()
37
- if value is None or clear:
38
- value = {}
39
- rcache.set(value)
40
- return value
41
-
42
32
 
43
33
  async def get_resource_from_cache_or_db(kbid: str, uuid: str) -> Optional[ResourceORM]:
44
- resouce_cache = get_resource_cache()
34
+ resource_cache = get_or_create_resource_cache()
45
35
  orm_resource: Optional[ResourceORM] = None
46
- if uuid not in resouce_cache:
36
+ if not resource_cache.contains(uuid):
37
+ resource_cache.metrics.ops.inc({"type": "miss"})
47
38
  storage = await get_storage(service_name=SERVICE_NAME)
48
39
  async with get_driver().transaction(read_only=True) as transaction:
49
40
  kb = KnowledgeBoxORM(transaction, storage, kbid)
50
41
  orm_resource = await kb.get(uuid)
51
42
  if orm_resource is not None:
52
- resouce_cache[uuid] = orm_resource
43
+ resource_cache.set(uuid, orm_resource)
53
44
  else:
54
- orm_resource = resouce_cache.get(uuid)
45
+ resource_cache.metrics.ops.inc({"type": "hit"})
46
+ orm_resource = resource_cache.get(uuid)
55
47
  return orm_resource
56
48
 
57
49
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.5.post4021
3
+ Version: 6.3.5.post4026
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post4021
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post4021
25
- Requires-Dist: nucliadb-protos>=6.3.5.post4021
26
- Requires-Dist: nucliadb-models>=6.3.5.post4021
27
- Requires-Dist: nidx-protos>=6.3.5.post4021
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post4026
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post4026
25
+ Requires-Dist: nucliadb-protos>=6.3.5.post4026
26
+ Requires-Dist: nucliadb-models>=6.3.5.post4026
27
+ Requires-Dist: nidx-protos>=6.3.5.post4026
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -53,6 +53,7 @@ nucliadb/backups/settings.py,sha256=SyzsInj1BRbBI0atg5IXWbMbOZ_eVg4eSQ3IcnUhCxQ,
53
53
  nucliadb/backups/tasks.py,sha256=WkL1LgdYBHbV_A5ilyYv5p3zmXwxH68TDudytN5f7zk,4225
54
54
  nucliadb/backups/utils.py,sha256=_Vogjqcru5oqNZM-bZ0q7Ju79Bv1PD-LVFEa7Z-Q13I,1261
55
55
  nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
56
+ nucliadb/common/cache.py,sha256=4iZEfoXgpwTj5Yh25wnllJQWJ8TqefpVOH2y3Z6wjjE,5589
56
57
  nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
57
58
  nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
58
59
  nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
@@ -155,7 +156,7 @@ nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnq
155
156
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
156
157
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
157
158
  nucliadb/ingest/orm/processor/pgcatalog.py,sha256=H-OCRz0RuTUb80LZBxDowLA9V7ECv1DWiXlnzKW5XGI,3103
158
- nucliadb/ingest/orm/processor/processor.py,sha256=q2iBJJ_5SV_bxA3t5MrbV70iQhir94aFbjZjnYJzEAQ,33141
159
+ nucliadb/ingest/orm/processor/processor.py,sha256=L2iKYTC_qh64pfl4joKQ0h7YaiskGq-fBJ4QtsDRTqw,33138
159
160
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
160
161
  nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
161
162
  nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -227,7 +228,7 @@ nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG
227
228
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
228
229
  nucliadb/search/requesters/utils.py,sha256=cQZ4-NftiMljoWQ7-Zl7nWfr6u_FY8u_wc9kTvKQcAg,6999
229
230
  nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
230
- nucliadb/search/search/cache.py,sha256=n9vkN6Y6Xnr2RBJyoH0WzjzGTJOMfKekU9tfPTWWCPc,6810
231
+ nucliadb/search/search/cache.py,sha256=s5wfWq40I4HOLtsKxe3Q5vanL9-3J5T8e0G3yvQ7OKg,4817
231
232
  nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
232
233
  nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
233
234
  nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
@@ -313,14 +314,14 @@ nucliadb/train/api/v1/shards.py,sha256=GJRnQe8P-7_VTIN1oxVmxlrDA08qVN7opEZdbF4Wx
313
314
  nucliadb/train/api/v1/trainset.py,sha256=kpnpDgiMWr1FKHZJgwH7hue5kzilA8-i9X0YHlNeHuU,2113
314
315
  nucliadb/train/generators/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
315
316
  nucliadb/train/generators/field_classifier.py,sha256=yatj7U-LHRN5xTR6XsYz_3acIAUKTcpkNZcZaSY8MtE,3482
316
- nucliadb/train/generators/field_streaming.py,sha256=kjwg4VNiROVqVDN--mRd4ylLw55Sg2VYxKRDdbmpYSM,5398
317
+ nucliadb/train/generators/field_streaming.py,sha256=BeIiekrkrd2WkPf9_vlJkC9Qxe6bfVFmyu3VvTdzxVM,6034
317
318
  nucliadb/train/generators/image_classifier.py,sha256=B4P88JfpjMcAZIPzlSOYaGseq5NgfssEr_Ecvlprr3g,1859
318
319
  nucliadb/train/generators/paragraph_classifier.py,sha256=0pOZYcT1cAmG7gjSD1HIUaMM5T3Ag-96iUTXRhiV8MI,2761
319
320
  nucliadb/train/generators/paragraph_streaming.py,sha256=dsM7a5hBd2iokvFuxnZhQeko4Jad6djyP2p3tevku8A,3586
320
321
  nucliadb/train/generators/question_answer_streaming.py,sha256=P7-de4W4yW2mgEQ82fF2OZVyx6QJHXezY52qDciDcmw,5680
321
322
  nucliadb/train/generators/sentence_classifier.py,sha256=DuvXfnWvLhklYR_qFGk2LqUyl2JE7CMVFwuHaPyC9Ys,5121
322
323
  nucliadb/train/generators/token_classifier.py,sha256=0848GqoXh8ywU82cPUrkzOM53-lZ1MVCw--8yDABigY,9557
323
- nucliadb/train/generators/utils.py,sha256=1uSELmM4CpKy9jWp6j_u7_n_KR-udRNkes4UmPMOCcI,3907
324
+ nucliadb/train/generators/utils.py,sha256=bArGm3MyFvHAIImhnQ3GZ6u2mZAhmRd3t3AbfDK-Aeg,3756
324
325
  nucliadb/writer/__init__.py,sha256=S298mrZL3vr62OrBqi97mdLxgR5cReMlRJgnaQHZV7s,1304
325
326
  nucliadb/writer/app.py,sha256=ABBO8-u4pDAa61b3mCdD0TFhuHAYcxMkgpZSGgWARuE,2736
326
327
  nucliadb/writer/back_pressure.py,sha256=JaiC2JAugVA92gDHzABZFiuQexiOKZC9C-3Jn9VF-M0,17898
@@ -360,8 +361,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
360
361
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
361
362
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
362
363
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
363
- nucliadb-6.3.5.post4021.dist-info/METADATA,sha256=klG094d3HBaMOI97l0itzintcAyyrpZvd7zvJnnKP1w,4301
364
- nucliadb-6.3.5.post4021.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
365
- nucliadb-6.3.5.post4021.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
366
- nucliadb-6.3.5.post4021.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
367
- nucliadb-6.3.5.post4021.dist-info/RECORD,,
364
+ nucliadb-6.3.5.post4026.dist-info/METADATA,sha256=pxJ_dFn3jLaLxEXtBXk93R87X5pt9NFdaWNvgBsfvag,4301
365
+ nucliadb-6.3.5.post4026.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
366
+ nucliadb-6.3.5.post4026.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
367
+ nucliadb-6.3.5.post4026.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
368
+ nucliadb-6.3.5.post4026.dist-info/RECORD,,