nucliadb-utils 5.0.1.post994__py3-none-any.whl → 5.0.1.post1007__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -93,3 +93,24 @@ class AuditStorage:
93
93
  generative_answer_first_chunk_time: Optional[float] = None,
94
94
  ):
95
95
  raise NotImplementedError
96
+
97
+ def report_fields_and_paragraphs(self, kbid: str, paragraphs: int, fields: int):
98
+ raise NotImplementedError
99
+
100
+ def report_resources(
101
+ self,
102
+ *,
103
+ kbid: str,
104
+ resources: int,
105
+ ):
106
+ raise NotImplementedError
107
+
108
+ def delete_kb(self, kbid: str):
109
+ raise NotImplementedError
110
+
111
+ def suggest(
112
+ self,
113
+ kbid: str,
114
+ client_type: int,
115
+ ):
116
+ raise NotImplementedError
@@ -94,3 +94,24 @@ class BasicAuditStorage(AuditStorage):
94
94
  generative_answer_first_chunk_time: Optional[float] = None,
95
95
  ):
96
96
  logger.debug(f"CHAT {kbid} {user} {origin}")
97
+
98
+ def report_fields_and_paragraphs(self, kbid: str, paragraphs: int, fields: int):
99
+ logger.debug(f"FIELDS & PARAGRAPHS {kbid} {paragraphs} {fields}")
100
+
101
+ def report_resources(
102
+ self,
103
+ *,
104
+ kbid: str,
105
+ resources: int,
106
+ ):
107
+ logger.debug(f"RESOURCES {kbid} {resources}")
108
+
109
+ def delete_kb(self, kbid: str):
110
+ logger.debug(f"DELETE_KB {kbid}")
111
+
112
+ def suggest(
113
+ self,
114
+ kbid: str,
115
+ client_type: int,
116
+ ):
117
+ logger.debug(f"SUGGEST {kbid} {client_type}")
@@ -37,12 +37,24 @@ from nucliadb_protos.audit_pb2 import (
37
37
  AuditField,
38
38
  AuditRequest,
39
39
  ChatContext,
40
+ ClientType,
40
41
  )
41
42
  from nucliadb_protos.nodereader_pb2 import SearchRequest
42
43
  from nucliadb_protos.resources_pb2 import FieldID
43
44
  from nucliadb_utils import logger
44
45
  from nucliadb_utils.audit.audit import AuditStorage
45
46
  from nucliadb_utils.nats import get_traced_jetstream
47
+ from nucliadb_utils.nuclia_usage.protos.kb_usage_pb2 import (
48
+ ClientType as ClientTypeKbUsage,
49
+ )
50
+ from nucliadb_utils.nuclia_usage.protos.kb_usage_pb2 import (
51
+ KBSource,
52
+ Search,
53
+ SearchType,
54
+ Service,
55
+ Storage,
56
+ )
57
+ from nucliadb_utils.nuclia_usage.utils.kb_usage_report import KbUsageReportUtility
46
58
 
47
59
 
48
60
  class RequestContext:
@@ -123,6 +135,9 @@ class AuditMiddleware(BaseHTTPMiddleware):
123
135
  return response
124
136
 
125
137
 
138
+ KB_USAGE_STREAM_SUBJECT = "kb-usage.nuclia_db"
139
+
140
+
126
141
  class StreamAuditStorage(AuditStorage):
127
142
  task: Optional[asyncio.Task]
128
143
  initialized: bool
@@ -181,9 +196,16 @@ class StreamAuditStorage(AuditStorage):
181
196
  self.js = get_traced_jetstream(self.nc, self.service)
182
197
  self.task = asyncio.create_task(self.run())
183
198
 
199
+ self.kb_usage_utility = KbUsageReportUtility(
200
+ nats_stream=self.js, nats_subject=KB_USAGE_STREAM_SUBJECT
201
+ )
202
+ await self.kb_usage_utility.initialize()
203
+
184
204
  self.initialized = True
185
205
 
186
206
  async def finalize(self):
207
+ await self.kb_usage_utility.finalize()
208
+
187
209
  if self.task is not None:
188
210
  self.task.cancel()
189
211
  if self.nc:
@@ -258,6 +280,29 @@ class StreamAuditStorage(AuditStorage):
258
280
 
259
281
  self.send(auditrequest)
260
282
 
283
+ def report_fields_and_paragraphs(self, kbid: str, paragraphs: int, fields: int):
284
+ self.kb_usage_utility.send_kb_usage(
285
+ service=Service.NUCLIA_DB,
286
+ account_id=None,
287
+ kb_id=kbid,
288
+ kb_source=KBSource.HOSTED,
289
+ storage=Storage(paragraphs=paragraphs, fields=fields),
290
+ )
291
+
292
+ def report_resources(
293
+ self,
294
+ *,
295
+ kbid: str,
296
+ resources: int,
297
+ ):
298
+ self.kb_usage_utility.send_kb_usage(
299
+ service=Service.NUCLIA_DB,
300
+ account_id=None,
301
+ kb_id=kbid,
302
+ kb_source=KBSource.HOSTED,
303
+ storage=Storage(resources=resources),
304
+ )
305
+
261
306
  def visited(
262
307
  self,
263
308
  kbid: str,
@@ -277,6 +322,15 @@ class StreamAuditStorage(AuditStorage):
277
322
  auditrequest.kbid = kbid
278
323
  auditrequest.type = AuditRequest.VISITED
279
324
 
325
+ def delete_kb(self, kbid: str):
326
+ self.kb_usage_utility.send_kb_usage(
327
+ service=Service.NUCLIA_DB,
328
+ account_id=None,
329
+ kb_id=kbid,
330
+ kb_source=KBSource.HOSTED,
331
+ storage=Storage(paragraphs=0, fields=0, resources=0),
332
+ )
333
+
280
334
  def search(
281
335
  self,
282
336
  kbid: str,
@@ -302,6 +356,43 @@ class StreamAuditStorage(AuditStorage):
302
356
  auditrequest.resources = resources
303
357
  auditrequest.type = AuditRequest.SEARCH
304
358
 
359
+ self.kb_usage_utility.send_kb_usage(
360
+ service=Service.NUCLIA_DB,
361
+ account_id=None,
362
+ kb_id=kbid,
363
+ kb_source=KBSource.HOSTED,
364
+ # TODO unify AuditRequest client type and Nuclia Usage client type
365
+ searches=[
366
+ Search(
367
+ client=ClientTypeKbUsage.Value(ClientType.Name(client_type)), # type: ignore
368
+ type=SearchType.SEARCH,
369
+ tokens=2000,
370
+ num_searches=1,
371
+ )
372
+ ],
373
+ )
374
+
375
+ def suggest(
376
+ self,
377
+ kbid: str,
378
+ client_type: int,
379
+ ):
380
+ self.kb_usage_utility.send_kb_usage(
381
+ service=Service.NUCLIA_DB,
382
+ account_id=None,
383
+ kb_id=kbid,
384
+ kb_source=KBSource.HOSTED,
385
+ # TODO unify AuditRequest client type and Nuclia Usage client type
386
+ searches=[
387
+ Search(
388
+ client=ClientTypeKbUsage.Value(ClientType.Name(client_type)), # type: ignore
389
+ type=SearchType.SUGGEST,
390
+ tokens=0,
391
+ num_searches=1,
392
+ )
393
+ ],
394
+ )
395
+
305
396
  def chat(
306
397
  self,
307
398
  kbid: str,
@@ -22,9 +22,10 @@ import logging
22
22
  from collections.abc import Iterable
23
23
  from contextlib import suppress
24
24
  from datetime import datetime, timezone
25
- from typing import List, Optional
25
+ from typing import Optional
26
+
27
+ from nats.js.client import JetStreamContext
26
28
 
27
- from nucliadb_utils.nats import NatsConnectionManager
28
29
  from nucliadb_utils.nuclia_usage.protos.kb_usage_pb2 import (
29
30
  KBSource,
30
31
  KbUsage,
@@ -39,48 +40,28 @@ logger = logging.getLogger(__name__)
39
40
 
40
41
 
41
42
  class KbUsageReportUtility:
42
- task: Optional[asyncio.Task]
43
- initialized: bool
44
43
  queue: asyncio.Queue
45
- service: str
46
44
 
47
45
  def __init__(
48
46
  self,
47
+ nats_stream: JetStreamContext,
49
48
  nats_subject: str,
50
- nats_servers: List[str],
51
- nats_creds: Optional[str] = None,
52
49
  max_queue_size: int = 100,
53
- service: str = "",
54
50
  ):
55
- self.nats_connection_manager = NatsConnectionManager(
56
- service_name=service,
57
- nats_servers=nats_servers,
58
- nats_creds=nats_creds,
59
- )
51
+ self.nats_stream = nats_stream
60
52
  self.nats_subject = nats_subject
61
53
  self.queue = asyncio.Queue(max_queue_size)
62
54
  self.task = None
63
- self.initialized = False
64
55
 
65
56
  async def initialize(self):
66
- if not self.initialized and self.nats_connection_manager._nats_servers:
67
- await self.nats_connection_manager.initialize()
68
-
69
- if self.task is None:
70
- self.task = asyncio.create_task(self.run())
71
-
72
- self.initialized = True
57
+ if self.task is None:
58
+ self.task = asyncio.create_task(self.run())
73
59
 
74
60
  async def finalize(self):
75
- if self.initialized:
76
- if self.task is not None:
77
- self.task.cancel()
78
- with suppress(asyncio.CancelledError, asyncio.exceptions.TimeoutError):
79
- await asyncio.wait_for(self.task, timeout=2)
80
-
81
- await self.nats_connection_manager.finalize()
82
-
83
- self.initialized = False
61
+ if self.task is not None:
62
+ self.task.cancel()
63
+ with suppress(asyncio.CancelledError, asyncio.exceptions.TimeoutError):
64
+ await asyncio.wait_for(self.task, timeout=2)
84
65
 
85
66
  async def run(self) -> None:
86
67
  while True:
@@ -93,15 +74,13 @@ class KbUsageReportUtility:
93
74
  self.queue.task_done()
94
75
 
95
76
  def send(self, message: KbUsage):
96
- if not self.initialized:
97
- return
98
77
  try:
99
78
  self.queue.put_nowait(message)
100
79
  except asyncio.QueueFull:
101
80
  logger.warning("KbUsage utility queue is full, dropping message")
102
81
 
103
82
  async def _send(self, message: KbUsage) -> int:
104
- res = await self.nats_connection_manager.js.publish(
83
+ res = await self.nats_stream.publish(
105
84
  self.nats_subject,
106
85
  message.SerializeToString(),
107
86
  )
@@ -40,7 +40,6 @@ from nucliadb_utils.encryption.settings import settings as encryption_settings
40
40
  from nucliadb_utils.exceptions import ConfigurationError
41
41
  from nucliadb_utils.indexing import IndexingUtility
42
42
  from nucliadb_utils.nats import NatsConnectionManager
43
- from nucliadb_utils.nuclia_usage.utils.kb_usage_report import KbUsageReportUtility
44
43
  from nucliadb_utils.partition import PartitionUtility
45
44
  from nucliadb_utils.settings import (
46
45
  FileBackendConfig,
@@ -49,7 +48,6 @@ from nucliadb_utils.settings import (
49
48
  nuclia_settings,
50
49
  storage_settings,
51
50
  transaction_settings,
52
- usage_settings,
53
51
  )
54
52
  from nucliadb_utils.storages.settings import settings as extended_storage_settings
55
53
  from nucliadb_utils.store import MAIN
@@ -325,33 +323,6 @@ def get_audit() -> Optional[AuditStorage]:
325
323
  return get_utility(Utility.AUDIT)
326
324
 
327
325
 
328
- def get_usage_utility() -> Optional[KbUsageReportUtility]:
329
- return get_utility(Utility.USAGE)
330
-
331
-
332
- async def start_usage_utility(service: str):
333
- usage_utility: Optional[KbUsageReportUtility] = get_utility(Utility.USAGE)
334
- if usage_utility is not None:
335
- return
336
-
337
- usage_utility = KbUsageReportUtility(
338
- nats_subject=cast(str, usage_settings.usage_jetstream_subject),
339
- nats_servers=usage_settings.usage_jetstream_servers,
340
- nats_creds=usage_settings.usage_jetstream_auth,
341
- service=service,
342
- )
343
- logger.info(f"Configuring usage report utility {usage_settings.usage_jetstream_subject}")
344
- await usage_utility.initialize()
345
- set_utility(Utility.USAGE, usage_utility)
346
-
347
-
348
- async def stop_usage_utility():
349
- usage_utility = get_usage_utility()
350
- if usage_utility:
351
- await usage_utility.finalize()
352
- clean_utility(Utility.USAGE)
353
-
354
-
355
326
  def register_audit_utility(service: str) -> AuditStorage:
356
327
  if audit_settings.audit_driver == "basic":
357
328
  b_audit_utility: AuditStorage = BasicAuditStorage()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nucliadb_utils
3
- Version: 5.0.1.post994
3
+ Version: 5.0.1.post1007
4
4
  Home-page: https://nuclia.com
5
5
  License: BSD
6
6
  Classifier: Development Status :: 4 - Beta
@@ -23,8 +23,8 @@ Requires-Dist: PyNaCl
23
23
  Requires-Dist: pyjwt >=2.4.0
24
24
  Requires-Dist: memorylru >=1.1.2
25
25
  Requires-Dist: mrflagly
26
- Requires-Dist: nucliadb-protos >=5.0.1.post994
27
- Requires-Dist: nucliadb-telemetry >=5.0.1.post994
26
+ Requires-Dist: nucliadb-protos >=5.0.1.post1007
27
+ Requires-Dist: nucliadb-telemetry >=5.0.1.post1007
28
28
  Provides-Extra: cache
29
29
  Requires-Dist: redis >=4.3.4 ; extra == 'cache'
30
30
  Requires-Dist: orjson >=3.6.7 ; extra == 'cache'
@@ -16,15 +16,15 @@ nucliadb_utils/settings.py,sha256=QR51SX0T17-_YofMNpci-nkI77l_CUWFy4H7i8hNOHU,79
16
16
  nucliadb_utils/signals.py,sha256=JRNv2y9zLtBjOANBf7krGfDGfOc9qcoXZ6N1nKWS2FE,2674
17
17
  nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
18
18
  nucliadb_utils/transaction.py,sha256=mwcI3aIHAvU5KOGqd_Uz_d1XQzXhk_-NWY8NqU1lfb0,7307
19
- nucliadb_utils/utilities.py,sha256=trAez4j2TYYT5n1qpimyHOEAnsQqDqD2qCFgVSDi184,16747
19
+ nucliadb_utils/utilities.py,sha256=idajCm_4Sojh7b3HTkP0fTfG2Mb6PIB9xtMmcfB7Nl0,15758
20
20
  nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
21
21
  nucliadb_utils/aiopynecone/client.py,sha256=pl3WWGpjwoc-qzgDChuIE2LU7Mopu1aKL9iPHkr8QD0,20069
22
22
  nucliadb_utils/aiopynecone/exceptions.py,sha256=EEE0XoGs1zIB5yOJ_fy6yoG4uIb4cWIawYdJeNe4eDo,3012
23
23
  nucliadb_utils/aiopynecone/models.py,sha256=tUugQ-ACQAyT-lhsfLwKAOgb6ilLchLwZfnRV7xwFck,3047
24
24
  nucliadb_utils/audit/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
25
- nucliadb_utils/audit/audit.py,sha256=kpjiTLgWtLXA5DNgJcmeIaodel11Mz0Y-JfQt-yfhis,2641
26
- nucliadb_utils/audit/basic.py,sha256=IXrsdaXaXHVRLkBWPmq4HIO4SxWnri5VFns_h3_8v7k,3184
27
- nucliadb_utils/audit/stream.py,sha256=_RZf1G9_c5eaxfutq1OdMmnRaRM7EiaXH33Kb4mN9Gw,11525
25
+ nucliadb_utils/audit/audit.py,sha256=ictuErJy3X36S7s_f_sdNQQrQ4ackAYCT6M2-4nyenE,3086
26
+ nucliadb_utils/audit/basic.py,sha256=-Yztp0I745J6dz_zHPKdXv60OH1m9_d69NlpbhsGFZI,3717
27
+ nucliadb_utils/audit/stream.py,sha256=yPWe9iCJmTKLtB1iktSOKR8HDB0I3KF7vsWOwloljGM,14299
28
28
  nucliadb_utils/cache/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
29
29
  nucliadb_utils/cache/exceptions.py,sha256=Zu-O_-0-yctOEgoDGI92gPzWfBMRrpiAyESA62ld6MA,975
30
30
  nucliadb_utils/cache/nats.py,sha256=-AjCfkFgKVdJUlGR0hT9JDSNkPVFg4S6w9eW-ZIcXPM,7037
@@ -43,7 +43,7 @@ nucliadb_utils/nuclia_usage/protos/kb_usage_pb2.pyi,sha256=xhyc3jJBh0KZuWcgmIbwS
43
43
  nucliadb_utils/nuclia_usage/protos/kb_usage_pb2_grpc.py,sha256=dhop8WwjplPfORYPYb9HtcS9gHMzqXPJQGqXYRjV-6M,1008
44
44
  nucliadb_utils/nuclia_usage/protos/kb_usage_pb2_grpc.pyi,sha256=6RIsZ2934iodEckflpBStgLKEkFhKfNmZ72UKg2Bwb4,911
45
45
  nucliadb_utils/nuclia_usage/utils/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
46
- nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=t4-QXUTOTeMBWFFO8hdqiIco1fgC2Jla4NIWuRSs6N4,4566
46
+ nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=P1fKvrcYdSPNJ1ycd0ArSXOqhUcvz1BaQhxAWHo0kcc,3847
47
47
  nucliadb_utils/storages/__init__.py,sha256=5Qc8AUWiJv9_JbGCBpAn88AIJhwDlm0OPQpg2ZdRL4U,872
48
48
  nucliadb_utils/storages/azure.py,sha256=egMDwLNIGSQyVevuySt2AswzFdNAcih05BbRg3-p8IU,16015
49
49
  nucliadb_utils/storages/exceptions.py,sha256=mm_wX4YRtp7u7enkk_4pMSlX5AQQuFbq4xLmupVDt3Y,2502
@@ -64,8 +64,8 @@ nucliadb_utils/tests/indexing.py,sha256=YW2QhkhO9Q_8A4kKWJaWSvXvyQ_AiAwY1VylcfVQ
64
64
  nucliadb_utils/tests/local.py,sha256=7nuP8EFUAiA8ZH50R1iPV9EUXBySQxOanVm3Zht_e0g,1835
65
65
  nucliadb_utils/tests/nats.py,sha256=xqpww4jZjTKY9oPGlJdDJG67L3FIBQsa9qDHxILR8r8,7687
66
66
  nucliadb_utils/tests/s3.py,sha256=IdMxK_cNdSHLvO1u8BwsKFzD87Hk1MVPDZ57zx6h-rA,3656
67
- nucliadb_utils-5.0.1.post994.dist-info/METADATA,sha256=N4xswJv9a86AvuhIlJfSmCkc4Z2nHXX4dvvgAFZ4Vm0,2073
68
- nucliadb_utils-5.0.1.post994.dist-info/WHEEL,sha256=Rp8gFpivVLXx-k3U95ozHnQw8yDcPxmhOpn_Gx8d5nc,91
69
- nucliadb_utils-5.0.1.post994.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
70
- nucliadb_utils-5.0.1.post994.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
- nucliadb_utils-5.0.1.post994.dist-info/RECORD,,
67
+ nucliadb_utils-5.0.1.post1007.dist-info/METADATA,sha256=bi_1ZbQ1e8GXrrr_iS4FCzJYRxfwLtJH5pdkl6r2zoU,2076
68
+ nucliadb_utils-5.0.1.post1007.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
69
+ nucliadb_utils-5.0.1.post1007.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
70
+ nucliadb_utils-5.0.1.post1007.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
71
+ nucliadb_utils-5.0.1.post1007.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.0.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5