gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +287 -7
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
  7. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  8. gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
  9. gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
  10. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  11. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  12. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  16. gnosisllm_knowledge/cli/app.py +378 -12
  17. gnosisllm_knowledge/cli/commands/agentic.py +11 -0
  18. gnosisllm_knowledge/cli/commands/memory.py +723 -0
  19. gnosisllm_knowledge/cli/commands/setup.py +24 -22
  20. gnosisllm_knowledge/cli/display/service.py +43 -0
  21. gnosisllm_knowledge/cli/utils/config.py +58 -0
  22. gnosisllm_knowledge/core/domain/__init__.py +41 -0
  23. gnosisllm_knowledge/core/domain/document.py +5 -0
  24. gnosisllm_knowledge/core/domain/memory.py +440 -0
  25. gnosisllm_knowledge/core/domain/result.py +11 -3
  26. gnosisllm_knowledge/core/domain/search.py +2 -0
  27. gnosisllm_knowledge/core/events/types.py +76 -0
  28. gnosisllm_knowledge/core/exceptions.py +134 -0
  29. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  30. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  31. gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
  32. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  33. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  34. gnosisllm_knowledge/loaders/base.py +3 -4
  35. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  36. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  37. gnosisllm_knowledge/services/indexing.py +67 -75
  38. gnosisllm_knowledge/services/search.py +47 -11
  39. gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
  40. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
  41. gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
  42. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  43. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
  44. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,440 @@
1
+ """Memory domain models for Agentic Memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Any, Literal
9
+
10
+
11
+ class MemoryStrategy(str, Enum):
12
+ """Memory extraction strategies.
13
+
14
+ Attributes:
15
+ SEMANTIC: General facts and knowledge extraction.
16
+ USER_PREFERENCE: User preferences and choices.
17
+ SUMMARY: Conversation summaries.
18
+ """
19
+
20
+ SEMANTIC = "SEMANTIC"
21
+ USER_PREFERENCE = "USER_PREFERENCE"
22
+ SUMMARY = "SUMMARY"
23
+
24
+
25
+ class MemoryType(str, Enum):
26
+ """Memory storage types.
27
+
28
+ Attributes:
29
+ WORKING: Raw conversation messages (short-term).
30
+ LONG_TERM: Extracted facts with embeddings.
31
+ SESSIONS: Session metadata.
32
+ HISTORY: Audit trail of operations.
33
+ """
34
+
35
+ WORKING = "working"
36
+ LONG_TERM = "long-term"
37
+ SESSIONS = "sessions"
38
+ HISTORY = "history"
39
+
40
+
41
+ class PayloadType(str, Enum):
42
+ """Memory payload types.
43
+
44
+ Attributes:
45
+ CONVERSATIONAL: Conversation messages.
46
+ DATA: Structured data (agent state, traces).
47
+ """
48
+
49
+ CONVERSATIONAL = "conversational"
50
+ DATA = "data"
51
+
52
+
53
+ class EmbeddingModelType(str, Enum):
54
+ """Embedding model types supported by OpenSearch.
55
+
56
+ Attributes:
57
+ TEXT_EMBEDDING: Dense vector embeddings (default).
58
+ SPARSE_ENCODING: Sparse vector encoding.
59
+ """
60
+
61
+ TEXT_EMBEDDING = "TEXT_EMBEDDING"
62
+ SPARSE_ENCODING = "SPARSE_ENCODING"
63
+
64
+
65
+ class HistoryAction(str, Enum):
66
+ """History audit trail action types.
67
+
68
+ Attributes:
69
+ ADD: Memory was added.
70
+ UPDATE: Memory was updated.
71
+ DELETE: Memory was deleted.
72
+ """
73
+
74
+ ADD = "ADD"
75
+ UPDATE = "UPDATE"
76
+ DELETE = "DELETE"
77
+
78
+
79
+ @dataclass
80
+ class StrategyConfig:
81
+ """Configuration for a memory extraction strategy.
82
+
83
+ Each strategy MUST be scoped to namespace fields.
84
+ When storing memory, only strategies whose namespace fields are
85
+ present in the request will run.
86
+
87
+ Attributes:
88
+ type: Strategy type (SEMANTIC, USER_PREFERENCE, SUMMARY).
89
+ namespace: Fields used to scope this strategy (REQUIRED).
90
+ llm_result_path: JSONPath to extract LLM response.
91
+ system_prompt: Optional custom system prompt.
92
+ llm_id: Optional strategy-specific LLM override.
93
+ """
94
+
95
+ type: MemoryStrategy
96
+ namespace: list[str] # REQUIRED - no default
97
+ llm_result_path: str | None = None
98
+ system_prompt: str | None = None
99
+ llm_id: str | None = None
100
+
101
+ def to_dict(self) -> dict[str, Any]:
102
+ """Convert to OpenSearch API format."""
103
+ config: dict[str, Any] = {}
104
+ if self.llm_result_path:
105
+ config["llm_result_path"] = self.llm_result_path
106
+ if self.system_prompt:
107
+ config["system_prompt"] = self.system_prompt
108
+ if self.llm_id:
109
+ config["llm_id"] = self.llm_id
110
+
111
+ return {
112
+ "type": self.type.value,
113
+ "namespace": self.namespace,
114
+ "configuration": config,
115
+ }
116
+
117
+
118
+ @dataclass
119
+ class IndexSettings:
120
+ """Index-level settings for memory container indexes.
121
+
122
+ Attributes:
123
+ number_of_shards: Number of shards for the index.
124
+ number_of_replicas: Number of replicas for the index.
125
+ """
126
+
127
+ number_of_shards: int = 1
128
+ number_of_replicas: int = 1
129
+
130
+ def to_dict(self) -> dict[str, Any]:
131
+ """Convert to OpenSearch index settings format."""
132
+ return {
133
+ "index": {
134
+ "number_of_shards": str(self.number_of_shards),
135
+ "number_of_replicas": str(self.number_of_replicas),
136
+ }
137
+ }
138
+
139
+
140
+ @dataclass
141
+ class ContainerIndexSettings:
142
+ """Settings for all memory container indexes.
143
+
144
+ Attributes:
145
+ session_index: Settings for session index.
146
+ short_term_memory_index: Settings for working memory index.
147
+ long_term_memory_index: Settings for long-term memory index.
148
+ long_term_memory_history_index: Settings for history index.
149
+ """
150
+
151
+ session_index: IndexSettings | None = None
152
+ short_term_memory_index: IndexSettings | None = None
153
+ long_term_memory_index: IndexSettings | None = None
154
+ long_term_memory_history_index: IndexSettings | None = None
155
+
156
+ def to_dict(self) -> dict[str, Any]:
157
+ """Convert to OpenSearch API format."""
158
+ result: dict[str, Any] = {}
159
+ if self.session_index:
160
+ result["session_index"] = self.session_index.to_dict()
161
+ if self.short_term_memory_index:
162
+ result["short_term_memory_index"] = self.short_term_memory_index.to_dict()
163
+ if self.long_term_memory_index:
164
+ result["long_term_memory_index"] = self.long_term_memory_index.to_dict()
165
+ if self.long_term_memory_history_index:
166
+ result["long_term_memory_history_index"] = (
167
+ self.long_term_memory_history_index.to_dict()
168
+ )
169
+ return result
170
+
171
+
172
+ @dataclass
173
+ class ContainerConfig:
174
+ """Memory container configuration.
175
+
176
+ Attributes:
177
+ name: Container name.
178
+ description: Optional description.
179
+ strategies: List of extraction strategies.
180
+ embedding_model_id: OpenSearch embedding model ID.
181
+ embedding_model_type: Type of embedding model (TEXT_EMBEDDING or SPARSE_ENCODING).
182
+ llm_model_id: OpenSearch LLM model ID for inference.
183
+ llm_result_path: JSONPath to extract LLM response.
184
+ embedding_dimension: Embedding vector dimension.
185
+ index_prefix: Custom index prefix (optional).
186
+ use_system_index: Whether to use system indexes (default: True).
187
+ index_settings: Optional index-level settings (shards, replicas).
188
+ """
189
+
190
+ name: str
191
+ description: str | None = None
192
+ strategies: list[StrategyConfig] = field(default_factory=list)
193
+ embedding_model_id: str | None = None
194
+ embedding_model_type: EmbeddingModelType = EmbeddingModelType.TEXT_EMBEDDING
195
+ llm_model_id: str | None = None
196
+ llm_result_path: str = "$.choices[0].message.content"
197
+ embedding_dimension: int = 1536
198
+ index_prefix: str | None = None
199
+ use_system_index: bool = True
200
+ index_settings: ContainerIndexSettings | None = None
201
+
202
+
203
+ @dataclass
204
+ class ContainerInfo:
205
+ """Memory container information.
206
+
207
+ Attributes:
208
+ id: Container ID.
209
+ name: Container name.
210
+ description: Container description.
211
+ strategies: Configured strategies.
212
+ embedding_model_id: Embedding model ID.
213
+ llm_model_id: LLM model ID.
214
+ created_at: Creation timestamp.
215
+ updated_at: Last update timestamp.
216
+ """
217
+
218
+ id: str
219
+ name: str
220
+ description: str | None = None
221
+ strategies: list[MemoryStrategy] = field(default_factory=list)
222
+ embedding_model_id: str | None = None
223
+ llm_model_id: str | None = None
224
+ created_at: datetime | None = None
225
+ updated_at: datetime | None = None
226
+
227
+
228
+ @dataclass
229
+ class Message:
230
+ """A conversation message.
231
+
232
+ Attributes:
233
+ role: Message role (user, assistant, system).
234
+ content: Message content.
235
+ timestamp: Optional timestamp.
236
+ """
237
+
238
+ role: Literal["user", "assistant", "system"]
239
+ content: str
240
+ timestamp: datetime | None = None
241
+
242
+ def to_dict(self) -> dict[str, Any]:
243
+ """Convert to OpenSearch API format."""
244
+ return {
245
+ "role": self.role,
246
+ "content": [{"text": self.content, "type": "text"}],
247
+ }
248
+
249
+
250
+ @dataclass
251
+ class Namespace:
252
+ """Memory namespace for partitioning and strategy scoping.
253
+
254
+ Completely configurable key-value pairs for memory isolation.
255
+ Common fields: user_id, session_id, agent_id, org_id.
256
+
257
+ When creating a container, strategies are scoped to namespace fields.
258
+ When adding memory with `infer=True`, OpenSearch automatically runs
259
+ strategies based on which namespace fields are present.
260
+
261
+ Attributes:
262
+ values: Namespace key-value pairs.
263
+ """
264
+
265
+ values: dict[str, str] = field(default_factory=dict)
266
+
267
+ def __getitem__(self, key: str) -> str | None:
268
+ """Get namespace value by key."""
269
+ return self.values.get(key)
270
+
271
+ def __setitem__(self, key: str, value: str) -> None:
272
+ """Set namespace value by key."""
273
+ self.values[key] = value
274
+
275
+ def to_dict(self) -> dict[str, str]:
276
+ """Get namespace as dictionary for API calls."""
277
+ return dict(self.values)
278
+
279
+
280
+ @dataclass
281
+ class StoreRequest:
282
+ """Request to store memory.
283
+
284
+ Attributes:
285
+ messages: Conversation messages (for conversational payload).
286
+ structured_data: Structured data (for data payload).
287
+ namespace: Namespace for partitioning and strategy scoping.
288
+ payload_type: Type of payload.
289
+ infer: Whether to apply LLM inference for fact extraction.
290
+ metadata: Optional custom metadata.
291
+ tags: Optional custom tags.
292
+ """
293
+
294
+ messages: list[Message] | None = None
295
+ structured_data: dict[str, Any] | None = None
296
+ namespace: Namespace = field(default_factory=Namespace)
297
+ payload_type: PayloadType = PayloadType.CONVERSATIONAL
298
+ infer: bool = True
299
+ metadata: dict[str, Any] = field(default_factory=dict)
300
+ tags: dict[str, str] = field(default_factory=dict)
301
+
302
+
303
+ @dataclass
304
+ class StoreResult:
305
+ """Result of a store operation.
306
+
307
+ Attributes:
308
+ session_id: Session ID (for conversational).
309
+ working_memory_id: Working memory document ID.
310
+ long_term_count: Number of facts extracted (if infer=True).
311
+ extraction_time_ms: Time taken for extraction.
312
+ """
313
+
314
+ session_id: str | None = None
315
+ working_memory_id: str | None = None
316
+ long_term_count: int = 0
317
+ extraction_time_ms: int | None = None
318
+
319
+
320
+ @dataclass
321
+ class MemoryEntry:
322
+ """A memory entry from long-term storage.
323
+
324
+ Attributes:
325
+ id: Memory document ID.
326
+ content: The memory content (extracted fact).
327
+ strategy: Which strategy extracted this.
328
+ score: Similarity score (for search results).
329
+ namespace: Namespace values.
330
+ created_at: Creation timestamp.
331
+ metadata: Custom metadata.
332
+ """
333
+
334
+ id: str
335
+ content: str
336
+ strategy: MemoryStrategy | None = None
337
+ score: float = 0.0
338
+ namespace: dict[str, str] = field(default_factory=dict)
339
+ created_at: datetime | None = None
340
+ metadata: dict[str, Any] = field(default_factory=dict)
341
+
342
+
343
+ @dataclass
344
+ class RecallResult:
345
+ """Result of a recall (search) operation.
346
+
347
+ Attributes:
348
+ items: List of memory entries.
349
+ total: Total number of matches.
350
+ query: The search query.
351
+ took_ms: Time taken in milliseconds.
352
+ """
353
+
354
+ items: list[MemoryEntry]
355
+ total: int
356
+ query: str
357
+ took_ms: int = 0
358
+
359
+
360
+ @dataclass
361
+ class SessionInfo:
362
+ """Session information.
363
+
364
+ Attributes:
365
+ id: Session ID.
366
+ container_id: Parent container ID.
367
+ summary: Session summary text.
368
+ namespace: Session namespace.
369
+ started_at: Session start time.
370
+ ended_at: Session end time (if ended).
371
+ message_count: Number of messages in session.
372
+ messages: Session messages (if requested).
373
+ metadata: Custom session metadata.
374
+ """
375
+
376
+ id: str
377
+ container_id: str
378
+ summary: str | None = None
379
+ namespace: dict[str, str] = field(default_factory=dict)
380
+ started_at: datetime | None = None
381
+ ended_at: datetime | None = None
382
+ message_count: int = 0
383
+ messages: list[Message] | None = None
384
+ metadata: dict[str, Any] = field(default_factory=dict)
385
+
386
+
387
+ @dataclass
388
+ class HistoryEntry:
389
+ """Audit trail entry for memory operations. READ-ONLY.
390
+
391
+ History is READ-ONLY and cannot be updated or deleted.
392
+
393
+ Attributes:
394
+ id: History entry ID.
395
+ memory_id: ID of the affected memory.
396
+ container_id: Parent container ID.
397
+ action: Operation type (ADD, UPDATE, DELETE).
398
+ owner_id: User who performed the action.
399
+ before: State before change (for UPDATE/DELETE).
400
+ after: State after change.
401
+ namespace: Namespace at time of operation.
402
+ tags: Tags at time of operation.
403
+ created_at: Operation timestamp.
404
+ """
405
+
406
+ id: str
407
+ memory_id: str
408
+ container_id: str
409
+ action: HistoryAction
410
+ owner_id: str | None = None
411
+ before: dict[str, Any] | None = None
412
+ after: dict[str, Any] | None = None
413
+ namespace: dict[str, str] = field(default_factory=dict)
414
+ tags: dict[str, str] = field(default_factory=dict)
415
+ created_at: datetime | None = None
416
+
417
+
418
+ @dataclass
419
+ class MemoryStats:
420
+ """Memory usage statistics.
421
+
422
+ Attributes:
423
+ container_id: Container ID.
424
+ container_name: Container name.
425
+ working_memory_count: Messages in working memory.
426
+ long_term_memory_count: Facts in long-term memory.
427
+ session_count: Number of sessions.
428
+ strategies_breakdown: Count per strategy.
429
+ storage_size_bytes: Estimated storage size.
430
+ last_updated: Last update timestamp.
431
+ """
432
+
433
+ container_id: str
434
+ container_name: str
435
+ working_memory_count: int = 0
436
+ long_term_memory_count: int = 0
437
+ session_count: int = 0
438
+ strategies_breakdown: dict[MemoryStrategy, int] = field(default_factory=dict)
439
+ storage_size_bytes: int = 0
440
+ last_updated: datetime | None = None
@@ -3,7 +3,10 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass, field
6
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from gnosisllm_knowledge.core.domain.document import Document
7
10
 
8
11
 
9
12
  @dataclass
@@ -13,7 +16,7 @@ class LoadResult:
13
16
  Attributes:
14
17
  source: The source that was loaded (URL, file path, etc.).
15
18
  source_type: Type of source (website, sitemap, file, etc.).
16
- document_count: Number of documents loaded.
19
+ documents: List of loaded documents.
17
20
  success: Whether the operation succeeded.
18
21
  error_message: Error message if operation failed.
19
22
  duration_ms: Duration of the operation in milliseconds.
@@ -25,8 +28,8 @@ class LoadResult:
25
28
 
26
29
  source: str
27
30
  source_type: str
28
- document_count: int
29
31
  success: bool
32
+ documents: list[Document] = field(default_factory=list)
30
33
  error_message: str | None = None
31
34
  duration_ms: float = 0.0
32
35
  metadata: dict[str, Any] = field(default_factory=dict)
@@ -34,6 +37,11 @@ class LoadResult:
34
37
  urls_failed: int = 0
35
38
  bytes_loaded: int = 0
36
39
 
40
+ @property
41
+ def document_count(self) -> int:
42
+ """Return the number of loaded documents."""
43
+ return len(self.documents)
44
+
37
45
  @property
38
46
  def success_rate(self) -> float:
39
47
  """Calculate the success rate for multi-URL loads."""
@@ -276,6 +276,7 @@ class AgenticSearchResult:
276
276
  total_tokens: Total tokens consumed.
277
277
  prompt_tokens: Tokens used in prompts.
278
278
  completion_tokens: Tokens used in completions.
279
+ generated_query: The DSL query generated by QueryPlanningTool (if applicable).
279
280
  """
280
281
 
281
282
  query: str
@@ -292,6 +293,7 @@ class AgenticSearchResult:
292
293
  total_tokens: int = 0
293
294
  prompt_tokens: int = 0
294
295
  completion_tokens: int = 0
296
+ generated_query: str | None = None # DSL generated by QueryPlanningTool
295
297
 
296
298
  @property
297
299
  def has_answer(self) -> bool:
@@ -14,6 +14,7 @@ class EventType(str, Enum):
14
14
  Events are organized by category:
15
15
  - Loading events: Document and content loading
16
16
  - Indexing events: Document indexing operations
17
+ - Streaming events: Streaming pipeline progress
17
18
  - Search events: Search and retrieval operations
18
19
  - Agentic events: AI-powered operations
19
20
  - Setup events: Backend setup operations
@@ -33,6 +34,11 @@ class EventType(str, Enum):
33
34
  LOAD_FAILED = "load_failed"
34
35
  SITEMAP_DISCOVERED = "sitemap_discovered"
35
36
 
37
+ # Streaming events
38
+ STREAMING_PROGRESS = "streaming_progress"
39
+ URL_BATCH_PROCESSED = "url_batch_processed"
40
+ STREAMING_COMPLETED = "streaming_completed"
41
+
36
42
  # Indexing events
37
43
  INDEX_STARTED = "index_started"
38
44
  DOCUMENT_INDEXED = "document_indexed"
@@ -224,3 +230,73 @@ class BatchCompletedEvent(Event):
224
230
  "failure_count": self.failure_count,
225
231
  "duration_ms": self.duration_ms,
226
232
  }
233
+
234
+
235
+ @dataclass
236
+ class StreamingProgressEvent(Event):
237
+ """Progress event for streaming operations.
238
+
239
+ Emitted periodically during streaming pipeline execution to
240
+ provide visibility into progress.
241
+ """
242
+
243
+ urls_discovered: int = 0
244
+ urls_processed: int = 0
245
+ documents_indexed: int = 0
246
+ documents_failed: int = 0
247
+ phase: str = "unknown"
248
+ memory_mb: float | None = None
249
+
250
+ def __post_init__(self) -> None:
251
+ """Set event type."""
252
+ self.event_type = EventType.STREAMING_PROGRESS
253
+ self.data = {
254
+ "urls_discovered": self.urls_discovered,
255
+ "urls_processed": self.urls_processed,
256
+ "documents_indexed": self.documents_indexed,
257
+ "documents_failed": self.documents_failed,
258
+ "phase": self.phase,
259
+ "memory_mb": self.memory_mb,
260
+ }
261
+
262
+
263
+ @dataclass
264
+ class UrlBatchProcessedEvent(Event):
265
+ """Event emitted when a batch of URLs is processed."""
266
+
267
+ batch_index: int = 0
268
+ urls_in_batch: int = 0
269
+ documents_created: int = 0
270
+ total_urls_processed: int = 0
271
+
272
+ def __post_init__(self) -> None:
273
+ """Set event type."""
274
+ self.event_type = EventType.URL_BATCH_PROCESSED
275
+ self.data = {
276
+ "batch_index": self.batch_index,
277
+ "urls_in_batch": self.urls_in_batch,
278
+ "documents_created": self.documents_created,
279
+ "total_urls_processed": self.total_urls_processed,
280
+ }
281
+
282
+
283
+ @dataclass
284
+ class StreamingCompletedEvent(Event):
285
+ """Event emitted when streaming pipeline completes."""
286
+
287
+ total_urls: int = 0
288
+ total_documents: int = 0
289
+ indexed_count: int = 0
290
+ failed_count: int = 0
291
+ duration_ms: float = 0.0
292
+
293
+ def __post_init__(self) -> None:
294
+ """Set event type."""
295
+ self.event_type = EventType.STREAMING_COMPLETED
296
+ self.data = {
297
+ "total_urls": self.total_urls,
298
+ "total_documents": self.total_documents,
299
+ "indexed_count": self.indexed_count,
300
+ "failed_count": self.failed_count,
301
+ "duration_ms": self.duration_ms,
302
+ }