gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +502 -32
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  7. gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  8. gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
  9. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  10. gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
  11. gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
  12. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  14. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  15. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  16. gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  17. gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
  18. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  19. gnosisllm_knowledge/cli/app.py +436 -31
  20. gnosisllm_knowledge/cli/commands/agentic.py +26 -9
  21. gnosisllm_knowledge/cli/commands/load.py +169 -19
  22. gnosisllm_knowledge/cli/commands/memory.py +733 -0
  23. gnosisllm_knowledge/cli/commands/search.py +9 -10
  24. gnosisllm_knowledge/cli/commands/setup.py +49 -23
  25. gnosisllm_knowledge/cli/display/service.py +43 -0
  26. gnosisllm_knowledge/cli/utils/config.py +62 -4
  27. gnosisllm_knowledge/core/domain/__init__.py +54 -0
  28. gnosisllm_knowledge/core/domain/discovery.py +166 -0
  29. gnosisllm_knowledge/core/domain/document.py +19 -19
  30. gnosisllm_knowledge/core/domain/memory.py +440 -0
  31. gnosisllm_knowledge/core/domain/result.py +11 -3
  32. gnosisllm_knowledge/core/domain/search.py +12 -25
  33. gnosisllm_knowledge/core/domain/source.py +11 -12
  34. gnosisllm_knowledge/core/events/__init__.py +8 -0
  35. gnosisllm_knowledge/core/events/types.py +198 -5
  36. gnosisllm_knowledge/core/exceptions.py +227 -0
  37. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  38. gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  39. gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  40. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  41. gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  42. gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
  43. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  44. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  45. gnosisllm_knowledge/fetchers/__init__.py +8 -0
  46. gnosisllm_knowledge/fetchers/config.py +27 -0
  47. gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  48. gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  49. gnosisllm_knowledge/loaders/__init__.py +5 -1
  50. gnosisllm_knowledge/loaders/base.py +3 -4
  51. gnosisllm_knowledge/loaders/discovery.py +338 -0
  52. gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  53. gnosisllm_knowledge/loaders/factory.py +46 -0
  54. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  55. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  56. gnosisllm_knowledge/services/indexing.py +100 -93
  57. gnosisllm_knowledge/services/search.py +84 -31
  58. gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
  59. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
  60. gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  62. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
  63. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,127 @@
1
+ """Memory-specific configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+
8
+ from gnosisllm_knowledge.core.domain.memory import MemoryStrategy
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class MemoryConfig:
13
+ """Configuration for Agentic Memory.
14
+
15
+ Example:
16
+ ```python
17
+ # From environment
18
+ config = MemoryConfig.from_env()
19
+
20
+ # Explicit configuration
21
+ config = MemoryConfig(
22
+ host="localhost",
23
+ port=9200,
24
+ llm_model_id="model-123",
25
+ embedding_model_id="model-456",
26
+ )
27
+ ```
28
+ """
29
+
30
+ # === OpenSearch Connection ===
31
+ host: str = "localhost"
32
+ port: int = 9200
33
+ username: str | None = None
34
+ password: str | None = None
35
+ use_ssl: bool = False
36
+ verify_certs: bool = True
37
+
38
+ # === Model IDs (Required for inference) ===
39
+ llm_model_id: str | None = None
40
+ embedding_model_id: str | None = None
41
+
42
+ # === LLM Response Parsing ===
43
+ # OpenAI: $.choices[0].message.content
44
+ # Bedrock Claude: $.output.message.content[0].text
45
+ llm_result_path: str = "$.choices[0].message.content"
46
+
47
+ # === Connector Configuration ===
48
+ # For setup: OpenAI API key
49
+ openai_api_key: str | None = None
50
+ llm_model: str = "gpt-4o"
51
+ embedding_model: str = "text-embedding-3-small"
52
+ embedding_dimension: int = 1536
53
+
54
+ # === Timeouts ===
55
+ connect_timeout: float = 5.0
56
+ inference_timeout: float = 60.0
57
+
58
+ # === Default Strategies ===
59
+ default_strategies: tuple[MemoryStrategy, ...] = (
60
+ MemoryStrategy.SEMANTIC,
61
+ MemoryStrategy.USER_PREFERENCE,
62
+ )
63
+
64
+ @property
65
+ def url(self) -> str:
66
+ """Get the full OpenSearch URL."""
67
+ scheme = "https" if self.use_ssl else "http"
68
+ return f"{scheme}://{self.host}:{self.port}"
69
+
70
+ @property
71
+ def auth(self) -> tuple[str, str] | None:
72
+ """Get auth tuple if credentials are configured."""
73
+ if self.username and self.password:
74
+ return (self.username, self.password)
75
+ return None
76
+
77
+ @property
78
+ def is_configured(self) -> bool:
79
+ """Check if memory is properly configured for inference."""
80
+ return bool(self.llm_model_id and self.embedding_model_id)
81
+
82
+ @classmethod
83
+ def from_env(cls) -> MemoryConfig:
84
+ """Create config from environment variables.
85
+
86
+ Environment Variables:
87
+ OPENSEARCH_HOST: OpenSearch host (default: localhost)
88
+ OPENSEARCH_PORT: OpenSearch port (default: 9200)
89
+ OPENSEARCH_USERNAME: Username
90
+ OPENSEARCH_PASSWORD: Password
91
+ OPENSEARCH_USE_SSL: Use SSL (default: false)
92
+ OPENSEARCH_VERIFY_CERTS: Verify certs (default: true)
93
+ OPENSEARCH_LLM_MODEL_ID: LLM model ID for inference
94
+ OPENSEARCH_EMBEDDING_MODEL_ID: Embedding model ID
95
+ OPENSEARCH_LLM_RESULT_PATH: JSONPath for LLM response
96
+ OPENAI_API_KEY: OpenAI API key (for setup)
97
+ MEMORY_LLM_MODEL: LLM model name (default: gpt-4o)
98
+ MEMORY_EMBEDDING_MODEL: Embedding model (default: text-embedding-3-small)
99
+ MEMORY_EMBEDDING_DIMENSION: Embedding dimension (default: 1536)
100
+ MEMORY_INFERENCE_TIMEOUT: Inference timeout (default: 60)
101
+ OPENSEARCH_CONNECT_TIMEOUT: Connect timeout (default: 5)
102
+ """
103
+ return cls(
104
+ # Connection
105
+ host=os.getenv("OPENSEARCH_HOST", "localhost"),
106
+ port=int(os.getenv("OPENSEARCH_PORT", "9200")),
107
+ username=os.getenv("OPENSEARCH_USERNAME"),
108
+ password=os.getenv("OPENSEARCH_PASSWORD"),
109
+ use_ssl=os.getenv("OPENSEARCH_USE_SSL", "false").lower() == "true",
110
+ verify_certs=os.getenv("OPENSEARCH_VERIFY_CERTS", "true").lower() == "true",
111
+ # Model IDs
112
+ llm_model_id=os.getenv("OPENSEARCH_LLM_MODEL_ID"),
113
+ embedding_model_id=os.getenv("OPENSEARCH_EMBEDDING_MODEL_ID"),
114
+ # LLM parsing
115
+ llm_result_path=os.getenv(
116
+ "OPENSEARCH_LLM_RESULT_PATH",
117
+ "$.choices[0].message.content",
118
+ ),
119
+ # Connector setup
120
+ openai_api_key=os.getenv("OPENAI_API_KEY"),
121
+ llm_model=os.getenv("MEMORY_LLM_MODEL", "gpt-4o"),
122
+ embedding_model=os.getenv("MEMORY_EMBEDDING_MODEL", "text-embedding-3-small"),
123
+ embedding_dimension=int(os.getenv("MEMORY_EMBEDDING_DIMENSION", "1536")),
124
+ # Timeouts
125
+ connect_timeout=float(os.getenv("OPENSEARCH_CONNECT_TIMEOUT", "5.0")),
126
+ inference_timeout=float(os.getenv("MEMORY_INFERENCE_TIMEOUT", "60.0")),
127
+ )
@@ -0,0 +1,322 @@
1
+ """Memory setup operations - Connector and Model creation.
2
+
3
+ CRITICAL: The LLM connector MUST use both system_prompt AND user_prompt.
4
+ If only system_prompt is used, zero facts will be extracted.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ import httpx
14
+
15
+ from gnosisllm_knowledge.core.exceptions import MemoryConfigurationError
16
+
17
+ if TYPE_CHECKING:
18
+ from gnosisllm_knowledge.backends.opensearch.memory.config import MemoryConfig
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class SetupStatus:
25
+ """Result of setup verification."""
26
+
27
+ is_ready: bool
28
+ checks: dict[str, bool]
29
+
30
+
31
+ class MemorySetup:
32
+ """Setup operations for Agentic Memory.
33
+
34
+ Creates the required OpenSearch connectors and models for memory to work.
35
+
36
+ Example:
37
+ ```python
38
+ setup = MemorySetup(config)
39
+
40
+ # Create connectors and models
41
+ llm_model_id = await setup.setup_llm_model()
42
+ embedding_model_id = await setup.setup_embedding_model()
43
+
44
+ # Verify setup
45
+ status = await setup.verify_setup()
46
+ if status.is_ready:
47
+ print("Memory is ready!")
48
+ ```
49
+ """
50
+
51
+ def __init__(self, config: MemoryConfig) -> None:
52
+ """Initialize setup.
53
+
54
+ Args:
55
+ config: Memory configuration.
56
+ """
57
+ self._config = config
58
+ self._base_url = config.url
59
+ self._auth = config.auth
60
+
61
+ async def setup_llm_model(self) -> str:
62
+ """Create OpenAI LLM connector and model for fact extraction.
63
+
64
+ CRITICAL: The connector uses BOTH system_prompt AND user_prompt.
65
+
66
+ Returns:
67
+ The deployed LLM model ID.
68
+
69
+ Raises:
70
+ MemoryConfigurationError: If OpenAI API key is not configured.
71
+ """
72
+ if not self._config.openai_api_key:
73
+ raise MemoryConfigurationError(
74
+ "OpenAI API key required for LLM setup",
75
+ missing_config=["openai_api_key"],
76
+ )
77
+
78
+ connector_id = await self._create_llm_connector()
79
+ model_id = await self._register_model(
80
+ name="OpenAI LLM for Agentic Memory",
81
+ connector_id=connector_id,
82
+ function_name="remote",
83
+ )
84
+ await self._deploy_model(model_id)
85
+
86
+ logger.info(f"LLM model deployed: {model_id}")
87
+ return model_id
88
+
89
+ async def _create_llm_connector(self) -> str:
90
+ """Create OpenAI chat connector.
91
+
92
+ CRITICAL: Uses BOTH system_prompt AND user_prompt parameters.
93
+ This is required for Agentic Memory fact extraction to work.
94
+
95
+ Returns:
96
+ The connector ID.
97
+ """
98
+ # CRITICAL: Both system_prompt AND user_prompt are required
99
+ request_body = (
100
+ '{"model": "${parameters.model}", '
101
+ '"messages": ['
102
+ '{"role": "system", "content": "${parameters.system_prompt}"}, '
103
+ '{"role": "user", "content": "${parameters.user_prompt}"}'
104
+ "]}"
105
+ )
106
+
107
+ connector_body: dict[str, Any] = {
108
+ "name": "OpenAI Chat Connector for Agentic Memory",
109
+ "description": "Connector for OpenAI with system_prompt AND user_prompt support",
110
+ "version": "1",
111
+ "protocol": "http",
112
+ "parameters": {
113
+ "model": self._config.llm_model,
114
+ },
115
+ "credential": {
116
+ "openAI_key": self._config.openai_api_key,
117
+ },
118
+ "actions": [
119
+ {
120
+ "action_type": "predict",
121
+ "method": "POST",
122
+ "url": "https://api.openai.com/v1/chat/completions",
123
+ "headers": {
124
+ "Authorization": "Bearer ${credential.openAI_key}",
125
+ "Content-Type": "application/json",
126
+ },
127
+ "request_body": request_body,
128
+ }
129
+ ],
130
+ }
131
+
132
+ async with httpx.AsyncClient(
133
+ verify=self._config.verify_certs,
134
+ timeout=self._config.connect_timeout,
135
+ ) as client:
136
+ response = await client.post(
137
+ f"{self._base_url}/_plugins/_ml/connectors/_create",
138
+ json=connector_body,
139
+ auth=self._auth,
140
+ )
141
+ response.raise_for_status()
142
+ result = response.json()
143
+
144
+ connector_id = result.get("connector_id")
145
+ logger.info(f"LLM connector created: {connector_id}")
146
+ return connector_id
147
+
148
+ async def setup_embedding_model(self) -> str:
149
+ """Create OpenAI embedding connector and model.
150
+
151
+ Returns:
152
+ The deployed embedding model ID.
153
+
154
+ Raises:
155
+ MemoryConfigurationError: If OpenAI API key is not configured.
156
+ """
157
+ if not self._config.openai_api_key:
158
+ raise MemoryConfigurationError(
159
+ "OpenAI API key required for embedding setup",
160
+ missing_config=["openai_api_key"],
161
+ )
162
+
163
+ connector_id = await self._create_embedding_connector()
164
+ model_id = await self._register_model(
165
+ name="OpenAI Embedding for Agentic Memory",
166
+ connector_id=connector_id,
167
+ function_name="remote",
168
+ )
169
+ await self._deploy_model(model_id)
170
+
171
+ logger.info(f"Embedding model deployed: {model_id}")
172
+ return model_id
173
+
174
+ async def _create_embedding_connector(self) -> str:
175
+ """Create OpenAI embedding connector.
176
+
177
+ Returns:
178
+ The connector ID.
179
+ """
180
+ connector_body: dict[str, Any] = {
181
+ "name": "OpenAI Embedding Connector",
182
+ "description": "Connector for OpenAI text-embedding models",
183
+ "version": "1",
184
+ "protocol": "http",
185
+ "parameters": {
186
+ "model": self._config.embedding_model,
187
+ },
188
+ "credential": {
189
+ "openAI_key": self._config.openai_api_key,
190
+ },
191
+ "actions": [
192
+ {
193
+ "action_type": "predict",
194
+ "method": "POST",
195
+ "url": "https://api.openai.com/v1/embeddings",
196
+ "headers": {
197
+ "Authorization": "Bearer ${credential.openAI_key}",
198
+ "Content-Type": "application/json",
199
+ },
200
+ "request_body": '{"model": "${parameters.model}", "input": ${parameters.input}}',
201
+ "post_process_function": "connector.post_process.openai.embedding",
202
+ }
203
+ ],
204
+ }
205
+
206
+ async with httpx.AsyncClient(
207
+ verify=self._config.verify_certs,
208
+ timeout=self._config.connect_timeout,
209
+ ) as client:
210
+ response = await client.post(
211
+ f"{self._base_url}/_plugins/_ml/connectors/_create",
212
+ json=connector_body,
213
+ auth=self._auth,
214
+ )
215
+ response.raise_for_status()
216
+ result = response.json()
217
+
218
+ connector_id = result.get("connector_id")
219
+ logger.info(f"Embedding connector created: {connector_id}")
220
+ return connector_id
221
+
222
+ async def _register_model(
223
+ self,
224
+ name: str,
225
+ connector_id: str,
226
+ function_name: str = "remote",
227
+ ) -> str:
228
+ """Register a model with OpenSearch.
229
+
230
+ Args:
231
+ name: Model name.
232
+ connector_id: Connector ID to use.
233
+ function_name: Model function name (default: remote).
234
+
235
+ Returns:
236
+ The registered model ID.
237
+ """
238
+ model_body: dict[str, Any] = {
239
+ "name": name,
240
+ "function_name": function_name,
241
+ "connector_id": connector_id,
242
+ }
243
+
244
+ async with httpx.AsyncClient(
245
+ verify=self._config.verify_certs,
246
+ timeout=self._config.connect_timeout,
247
+ ) as client:
248
+ response = await client.post(
249
+ f"{self._base_url}/_plugins/_ml/models/_register",
250
+ json=model_body,
251
+ auth=self._auth,
252
+ )
253
+ response.raise_for_status()
254
+ result = response.json()
255
+
256
+ return result.get("model_id")
257
+
258
+ async def _deploy_model(self, model_id: str) -> None:
259
+ """Deploy a model.
260
+
261
+ Args:
262
+ model_id: Model ID to deploy.
263
+ """
264
+ async with httpx.AsyncClient(
265
+ verify=self._config.verify_certs,
266
+ timeout=60.0, # Deployment can be slow
267
+ ) as client:
268
+ response = await client.post(
269
+ f"{self._base_url}/_plugins/_ml/models/{model_id}/_deploy",
270
+ auth=self._auth,
271
+ )
272
+ response.raise_for_status()
273
+
274
+ async def verify_setup(self) -> SetupStatus:
275
+ """Verify that memory is properly configured.
276
+
277
+ Returns:
278
+ SetupStatus with verification results.
279
+ """
280
+ checks: dict[str, bool] = {}
281
+
282
+ # Check LLM model
283
+ if self._config.llm_model_id:
284
+ llm_ok = await self._check_model(self._config.llm_model_id)
285
+ checks["llm_model"] = llm_ok
286
+ else:
287
+ checks["llm_model"] = False
288
+
289
+ # Check embedding model
290
+ if self._config.embedding_model_id:
291
+ embed_ok = await self._check_model(self._config.embedding_model_id)
292
+ checks["embedding_model"] = embed_ok
293
+ else:
294
+ checks["embedding_model"] = False
295
+
296
+ is_ready = all(checks.values())
297
+ return SetupStatus(is_ready=is_ready, checks=checks)
298
+
299
+ async def _check_model(self, model_id: str) -> bool:
300
+ """Check if a model is deployed and responding.
301
+
302
+ Args:
303
+ model_id: Model ID to check.
304
+
305
+ Returns:
306
+ True if model is deployed and ready.
307
+ """
308
+ try:
309
+ async with httpx.AsyncClient(
310
+ verify=self._config.verify_certs,
311
+ timeout=self._config.connect_timeout,
312
+ ) as client:
313
+ response = await client.get(
314
+ f"{self._base_url}/_plugins/_ml/models/{model_id}",
315
+ auth=self._auth,
316
+ )
317
+ if response.status_code == 200:
318
+ data = response.json()
319
+ return data.get("model_state") == "DEPLOYED"
320
+ except Exception:
321
+ pass
322
+ return False
@@ -2,6 +2,10 @@
2
2
 
3
3
  Uses OpenSearch neural search - embeddings are generated automatically
4
4
  via the deployed model. No Python-side embedding generation needed.
5
+
6
+ Note: This module is tenant-agnostic. Multi-tenancy should be handled
7
+ at the API layer by using separate indices per account (e.g.,
8
+ `knowledge-{account_id}`) rather than filtering by account_id.
5
9
  """
6
10
 
7
11
  from __future__ import annotations
@@ -18,9 +22,13 @@ class QueryBuilder:
18
22
  model handles embedding generation automatically via ingest and
19
23
  search pipelines.
20
24
 
25
+ Note:
26
+ This builder is tenant-agnostic. Multi-tenancy should be handled
27
+ by using separate indices per account.
28
+
21
29
  Example:
22
30
  ```python
23
- query = SearchQuery(text="how to configure", account_id="acc123")
31
+ query = SearchQuery(text="how to configure", collection_ids=["col-1"])
24
32
  builder = QueryBuilder(query, model_id="abc123")
25
33
  os_query = builder.build_hybrid_query()
26
34
  ```
@@ -204,12 +212,12 @@ class QueryBuilder:
204
212
  },
205
213
  }
206
214
 
207
- # Apply filters at top level for hybrid
215
+ # Apply filters using post_filter for hybrid queries
216
+ # Hybrid queries cannot be wrapped in bool - they must be top-level
208
217
  filters = self._build_filters()
209
218
  if filters:
210
- query["query"] = {
219
+ query["post_filter"] = {
211
220
  "bool": {
212
- "must": [query["query"]],
213
221
  "filter": filters,
214
222
  }
215
223
  }
@@ -270,15 +278,15 @@ class QueryBuilder:
270
278
  def _build_filters(self) -> list[dict[str, Any]]:
271
279
  """Build filter clauses from query parameters.
272
280
 
281
+ Note:
282
+ This method is tenant-agnostic. Multi-tenancy should be handled
283
+ at the API layer by using separate indices per account.
284
+
273
285
  Returns:
274
- List of filter clauses.
286
+ List of filter clauses for collection, source, and metadata filters.
275
287
  """
276
288
  filters: list[dict[str, Any]] = []
277
289
 
278
- # Multi-tenant filter (required for security)
279
- if self._query.account_id:
280
- filters.append({"term": {"account_id": self._query.account_id}})
281
-
282
290
  # Collection filter
283
291
  if self._query.collection_ids:
284
292
  filters.append({"terms": {"collection_id": self._query.collection_ids}})
@@ -357,67 +365,61 @@ class QueryBuilder:
357
365
  ]
358
366
 
359
367
 
360
- def build_delete_by_source_query(
361
- source_id: str,
362
- account_id: str | None = None,
363
- ) -> dict[str, Any]:
368
+ def build_delete_by_source_query(source_id: str) -> dict[str, Any]:
364
369
  """Build query to delete documents by source.
365
370
 
371
+ Note:
372
+ This function is tenant-agnostic. Multi-tenancy should be handled
373
+ at the API layer by using separate indices per account.
374
+
366
375
  Args:
367
376
  source_id: Source ID to delete.
368
- account_id: Optional account filter for multi-tenancy.
369
377
 
370
378
  Returns:
371
379
  Delete-by-query dictionary.
372
380
  """
373
- filters = [{"term": {"source_id": source_id}}]
374
- if account_id:
375
- filters.append({"term": {"account_id": account_id}})
376
-
377
381
  return {
378
382
  "query": {
379
383
  "bool": {
380
- "filter": filters,
384
+ "filter": [{"term": {"source_id": source_id}}],
381
385
  }
382
386
  }
383
387
  }
384
388
 
385
389
 
386
- def build_delete_by_collection_query(
387
- collection_id: str,
388
- account_id: str | None = None,
389
- ) -> dict[str, Any]:
390
+ def build_delete_by_collection_query(collection_id: str) -> dict[str, Any]:
390
391
  """Build query to delete documents by collection.
391
392
 
393
+ Note:
394
+ This function is tenant-agnostic. Multi-tenancy should be handled
395
+ at the API layer by using separate indices per account.
396
+
392
397
  Args:
393
398
  collection_id: Collection ID to delete.
394
- account_id: Optional account filter for multi-tenancy.
395
399
 
396
400
  Returns:
397
401
  Delete-by-query dictionary.
398
402
  """
399
- filters = [{"term": {"collection_id": collection_id}}]
400
- if account_id:
401
- filters.append({"term": {"account_id": account_id}})
402
-
403
403
  return {
404
404
  "query": {
405
405
  "bool": {
406
- "filter": filters,
406
+ "filter": [{"term": {"collection_id": collection_id}}],
407
407
  }
408
408
  }
409
409
  }
410
410
 
411
411
 
412
412
  def build_count_query(
413
- account_id: str | None = None,
414
413
  collection_id: str | None = None,
415
414
  source_id: str | None = None,
416
415
  ) -> dict[str, Any]:
417
416
  """Build query to count documents.
418
417
 
418
+ Note:
419
+ This function is tenant-agnostic. Multi-tenancy should be handled
420
+ at the API layer by using separate indices per account.
421
+
419
422
  Args:
420
- account_id: Optional account filter.
421
423
  collection_id: Optional collection filter.
422
424
  source_id: Optional source filter.
423
425
 
@@ -426,8 +428,6 @@ def build_count_query(
426
428
  """
427
429
  filters: list[dict[str, Any]] = []
428
430
 
429
- if account_id:
430
- filters.append({"term": {"account_id": account_id}})
431
431
  if collection_id:
432
432
  filters.append({"term": {"collection_id": collection_id}})
433
433
  if source_id: