sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sirchmunk/api/__init__.py +1 -0
  2. sirchmunk/api/chat.py +1123 -0
  3. sirchmunk/api/components/__init__.py +0 -0
  4. sirchmunk/api/components/history_storage.py +402 -0
  5. sirchmunk/api/components/monitor_tracker.py +518 -0
  6. sirchmunk/api/components/settings_storage.py +353 -0
  7. sirchmunk/api/history.py +254 -0
  8. sirchmunk/api/knowledge.py +411 -0
  9. sirchmunk/api/main.py +120 -0
  10. sirchmunk/api/monitor.py +219 -0
  11. sirchmunk/api/run_server.py +54 -0
  12. sirchmunk/api/search.py +230 -0
  13. sirchmunk/api/settings.py +309 -0
  14. sirchmunk/api/tools.py +315 -0
  15. sirchmunk/cli/__init__.py +11 -0
  16. sirchmunk/cli/cli.py +789 -0
  17. sirchmunk/learnings/knowledge_base.py +5 -2
  18. sirchmunk/llm/prompts.py +12 -1
  19. sirchmunk/retrieve/text_retriever.py +186 -2
  20. sirchmunk/scan/file_scanner.py +2 -2
  21. sirchmunk/schema/knowledge.py +119 -35
  22. sirchmunk/search.py +384 -26
  23. sirchmunk/storage/__init__.py +2 -2
  24. sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
  25. sirchmunk/utils/constants.py +7 -5
  26. sirchmunk/utils/embedding_util.py +217 -0
  27. sirchmunk/utils/tokenizer_util.py +36 -1
  28. sirchmunk/version.py +1 -1
  29. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
  30. sirchmunk-0.0.2.dist-info/RECORD +69 -0
  31. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
  32. sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
  33. sirchmunk_mcp/__init__.py +25 -0
  34. sirchmunk_mcp/cli.py +478 -0
  35. sirchmunk_mcp/config.py +276 -0
  36. sirchmunk_mcp/server.py +355 -0
  37. sirchmunk_mcp/service.py +327 -0
  38. sirchmunk_mcp/setup.py +15 -0
  39. sirchmunk_mcp/tools.py +410 -0
  40. sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
  41. sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
  42. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
  43. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,518 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Real-time monitoring and tracking component
4
+ Provides actual system metrics and activity tracking
5
+ """
6
+
7
+ import psutil
8
+ import os
9
+ from datetime import datetime, timedelta
10
+ from typing import Dict, Any
11
+ from pathlib import Path
12
+ import threading
13
+
14
+ from sirchmunk.storage.knowledge_storage import KnowledgeStorage
15
+ from sirchmunk.api.components.history_storage import HistoryStorage
16
+ from sirchmunk.utils.constants import DEFAULT_SIRCHMUNK_WORK_PATH
17
+
18
+
19
+ class LLMUsageTracker:
20
+ """
21
+ Global tracker for LLM token usage and call statistics.
22
+ Thread-safe singleton for tracking across the application.
23
+ """
24
+ _instance = None
25
+ _lock = threading.Lock()
26
+
27
+ def __new__(cls):
28
+ if cls._instance is None:
29
+ with cls._lock:
30
+ if cls._instance is None:
31
+ cls._instance = super().__new__(cls)
32
+ cls._instance._initialize()
33
+ return cls._instance
34
+
35
+ def _initialize(self):
36
+ """Initialize tracking data"""
37
+ self.total_calls = 0
38
+ self.total_input_tokens = 0
39
+ self.total_output_tokens = 0
40
+ self.total_tokens = 0
41
+ self.calls_by_model = {}
42
+ self.last_call_time = None
43
+ self.session_start = datetime.now()
44
+ self._data_lock = threading.Lock()
45
+
46
+ def record_usage(self, model: str, usage: Dict[str, int]):
47
+ """
48
+ Record token usage from an LLM call
49
+
50
+ Args:
51
+ model: Model name
52
+ usage: Dictionary with prompt_tokens, completion_tokens, total_tokens
53
+ """
54
+ with self._data_lock:
55
+ self.total_calls += 1
56
+ self.last_call_time = datetime.now()
57
+
58
+ input_tokens = usage.get('prompt_tokens', 0)
59
+ output_tokens = usage.get('completion_tokens', 0)
60
+ total = usage.get('total_tokens', input_tokens + output_tokens)
61
+
62
+ self.total_input_tokens += input_tokens
63
+ self.total_output_tokens += output_tokens
64
+ self.total_tokens += total
65
+
66
+ # Track by model
67
+ if model not in self.calls_by_model:
68
+ self.calls_by_model[model] = {
69
+ "calls": 0,
70
+ "input_tokens": 0,
71
+ "output_tokens": 0,
72
+ "total_tokens": 0
73
+ }
74
+ self.calls_by_model[model]["calls"] += 1
75
+ self.calls_by_model[model]["input_tokens"] += input_tokens
76
+ self.calls_by_model[model]["output_tokens"] += output_tokens
77
+ self.calls_by_model[model]["total_tokens"] += total
78
+
79
+ def get_stats(self) -> Dict[str, Any]:
80
+ """Get current usage statistics"""
81
+ with self._data_lock:
82
+ uptime_seconds = (datetime.now() - self.session_start).total_seconds()
83
+ calls_per_minute = (self.total_calls / uptime_seconds * 60) if uptime_seconds > 0 else 0
84
+
85
+ return {
86
+ "total_calls": self.total_calls,
87
+ "total_input_tokens": self.total_input_tokens,
88
+ "total_output_tokens": self.total_output_tokens,
89
+ "total_tokens": self.total_tokens,
90
+ "calls_per_minute": round(calls_per_minute, 2),
91
+ "last_call_time": self.last_call_time.isoformat() if self.last_call_time else None,
92
+ "session_start": self.session_start.isoformat(),
93
+ "session_duration_minutes": round(uptime_seconds / 60, 1),
94
+ "models": self.calls_by_model.copy(),
95
+ }
96
+
97
+ def reset(self):
98
+ """Reset all statistics"""
99
+ self._initialize()
100
+
101
+
102
+ # Global LLM usage tracker instance
103
+ llm_usage_tracker = LLMUsageTracker()
104
+
105
+
106
+ class MonitorTracker:
107
+ """
108
+ Real-time system monitoring and activity tracking
109
+
110
+ Architecture:
111
+ - Tracks actual chat sessions
112
+ - Monitors knowledge cluster creation
113
+ - Collects real system metrics
114
+ - Provides comprehensive statistics
115
+ """
116
+
117
+ def __init__(self):
118
+ """Initialize monitoring components"""
119
+ try:
120
+ self.history_storage = HistoryStorage()
121
+ except:
122
+ self.history_storage = None
123
+
124
+ try:
125
+ self.knowledge_manager = KnowledgeStorage()
126
+ except:
127
+ self.knowledge_manager = None
128
+
129
+ def get_system_metrics(self) -> Dict[str, Any]:
130
+ """
131
+ Get real system metrics
132
+
133
+ Returns:
134
+ Dictionary with CPU, memory, disk, and network metrics
135
+ """
136
+ try:
137
+ # CPU usage
138
+ cpu_percent = psutil.cpu_percent(interval=0.5)
139
+ cpu_count = psutil.cpu_count()
140
+
141
+ # Memory usage
142
+ memory = psutil.virtual_memory()
143
+ memory_total_gb = memory.total / (1024 ** 3)
144
+ memory_used_gb = memory.used / (1024 ** 3)
145
+ memory_available_gb = memory.available / (1024 ** 3)
146
+
147
+ # Disk usage
148
+ disk = psutil.disk_usage('/')
149
+ disk_total_gb = disk.total / (1024 ** 3)
150
+ disk_used_gb = disk.used / (1024 ** 3)
151
+ disk_free_gb = disk.free / (1024 ** 3)
152
+
153
+ # Network connections (limit to reasonable number for display)
154
+ try:
155
+ connections = len(psutil.net_connections())
156
+ except:
157
+ connections = 0
158
+
159
+ # System uptime
160
+ boot_time = psutil.boot_time()
161
+ uptime_seconds = datetime.now().timestamp() - boot_time
162
+ uptime_days = int(uptime_seconds // 86400)
163
+ uptime_hours = int((uptime_seconds % 86400) // 3600)
164
+ uptime_minutes = int((uptime_seconds % 3600) // 60)
165
+ uptime_str = f"{uptime_days}d {uptime_hours}h {uptime_minutes}m"
166
+
167
+ # Process info
168
+ process = psutil.Process(os.getpid())
169
+ process_memory_mb = process.memory_info().rss / (1024 ** 2)
170
+ process_cpu_percent = process.cpu_percent(interval=0.1)
171
+
172
+ return {
173
+ "cpu": {
174
+ "usage_percent": round(cpu_percent, 1),
175
+ "count": cpu_count,
176
+ "process_percent": round(process_cpu_percent, 1),
177
+ },
178
+ "memory": {
179
+ "usage_percent": round(memory.percent, 1),
180
+ "total_gb": round(memory_total_gb, 2),
181
+ "used_gb": round(memory_used_gb, 2),
182
+ "available_gb": round(memory_available_gb, 2),
183
+ "process_mb": round(process_memory_mb, 1),
184
+ },
185
+ "disk": {
186
+ "usage_percent": round(disk.percent, 1),
187
+ "total_gb": round(disk_total_gb, 2),
188
+ "used_gb": round(disk_used_gb, 2),
189
+ "free_gb": round(disk_free_gb, 2),
190
+ },
191
+ "network": {
192
+ "active_connections": connections,
193
+ },
194
+ "uptime": uptime_str,
195
+ "timestamp": datetime.now().isoformat(),
196
+ }
197
+
198
+ except Exception as e:
199
+ # Minimal fallback
200
+ return {
201
+ "cpu": {"usage_percent": 0, "count": 1, "process_percent": 0},
202
+ "memory": {"usage_percent": 0, "total_gb": 0, "used_gb": 0, "available_gb": 0, "process_mb": 0},
203
+ "disk": {"usage_percent": 0, "total_gb": 0, "used_gb": 0, "free_gb": 0},
204
+ "network": {"active_connections": 0},
205
+ "uptime": "0d 0h 0m",
206
+ "timestamp": datetime.now().isoformat(),
207
+ "error": str(e)
208
+ }
209
+
210
+ def get_chat_activity(self, hours: int = 24) -> Dict[str, Any]:
211
+ """
212
+ Get chat activity statistics
213
+
214
+ Args:
215
+ hours: Time window in hours
216
+
217
+ Returns:
218
+ Chat activity statistics
219
+ """
220
+ if not self.history_storage:
221
+ return {
222
+ "total_sessions": 0,
223
+ "total_messages": 0,
224
+ "recent_sessions": [],
225
+ "active_sessions": 0,
226
+ }
227
+
228
+ try:
229
+ # Get all sessions
230
+ all_sessions = self.history_storage.get_all_sessions()
231
+
232
+ # Calculate time threshold
233
+ threshold = datetime.now() - timedelta(hours=hours)
234
+ threshold_ts = threshold.timestamp()
235
+
236
+ # Filter recent sessions
237
+ recent_sessions = []
238
+ total_messages = 0
239
+ active_count = 0
240
+
241
+ for session in all_sessions:
242
+ session_time = session.get('updated_at', 0)
243
+
244
+ if isinstance(session_time, datetime):
245
+ session_time = session_time.timestamp()
246
+ elif isinstance(session_time, str):
247
+ try:
248
+ session_time = datetime.fromisoformat(session_time.replace('Z', '+00:00')).timestamp()
249
+ except (ValueError, TypeError):
250
+ session_time = 0
251
+
252
+ # Count messages using message_count field (messages are not included in get_all_sessions)
253
+ message_count = session.get('message_count', 0)
254
+ total_messages += message_count
255
+
256
+ # Check if recent
257
+ if session_time >= threshold_ts:
258
+ recent_sessions.append({
259
+ "session_id": session.get('session_id'),
260
+ "title": session.get('title', 'Untitled'),
261
+ "message_count": message_count,
262
+ "created_at": session.get('created_at'),
263
+ "updated_at": session_time, # Store as timestamp
264
+ })
265
+ active_count += 1
266
+
267
+ # Sort by update time
268
+ recent_sessions.sort(key=lambda x: x['updated_at'] if isinstance(x['updated_at'], (int, float)) else 0, reverse=True)
269
+
270
+ # Get LLM usage stats
271
+ llm_stats = llm_usage_tracker.get_stats()
272
+
273
+ return {
274
+ "total_sessions": len(all_sessions),
275
+ "total_messages": total_messages,
276
+ "recent_sessions": recent_sessions[:10], # Top 10 most recent
277
+ "active_sessions": active_count,
278
+ "time_window_hours": hours,
279
+ "llm_usage": llm_stats,
280
+ }
281
+
282
+ except Exception as e:
283
+ print(f"[ERROR] Error getting chat activity in monitor: {e}")
284
+
285
+ return {
286
+ "total_sessions": 0,
287
+ "total_messages": 0,
288
+ "recent_sessions": [],
289
+ "active_sessions": 0,
290
+ "llm_usage": llm_usage_tracker.get_stats(),
291
+ "error": str(e)
292
+ }
293
+
294
+ def get_knowledge_activity(self) -> Dict[str, Any]:
295
+ """
296
+ Get knowledge cluster activity statistics
297
+
298
+ Returns:
299
+ Knowledge cluster statistics
300
+ """
301
+ if not self.knowledge_manager:
302
+ return {
303
+ "total_clusters": 0,
304
+ "recent_clusters": [],
305
+ "lifecycle_distribution": {},
306
+ }
307
+
308
+ try:
309
+ stats = self.knowledge_manager.get_stats()
310
+ custom_stats = stats.get('custom_stats', {})
311
+
312
+ # Get recent clusters
313
+ recent_rows = self.knowledge_manager.db.fetch_all(
314
+ """
315
+ SELECT id, name, lifecycle, last_modified, confidence
316
+ FROM knowledge_clusters
317
+ ORDER BY last_modified DESC
318
+ LIMIT 10
319
+ """
320
+ )
321
+
322
+ recent_clusters = [
323
+ {
324
+ "id": row[0],
325
+ "name": row[1],
326
+ "lifecycle": row[2],
327
+ "last_modified": row[3],
328
+ "confidence": row[4],
329
+ }
330
+ for row in recent_rows
331
+ ]
332
+
333
+ return {
334
+ "total_clusters": custom_stats.get('total_clusters', 0),
335
+ "recent_clusters": recent_clusters,
336
+ "lifecycle_distribution": custom_stats.get('lifecycle_distribution', {}),
337
+ "average_confidence": custom_stats.get('average_confidence', 0),
338
+ }
339
+
340
+ except Exception as e:
341
+ return {
342
+ "total_clusters": 0,
343
+ "recent_clusters": [],
344
+ "lifecycle_distribution": {},
345
+ "error": str(e)
346
+ }
347
+
348
+ def get_storage_info(self) -> Dict[str, Any]:
349
+ """
350
+ Get storage information for databases and cache
351
+
352
+ Returns:
353
+ Storage information
354
+ """
355
+ try:
356
+ work_path = Path(os.getenv("SIRCHMUNK_WORK_PATH", DEFAULT_SIRCHMUNK_WORK_PATH)).expanduser().resolve()
357
+ cache_path = work_path / ".cache"
358
+
359
+ storage_info = {
360
+ "work_path": str(work_path),
361
+ "cache_path": str(cache_path),
362
+ "databases": {},
363
+ }
364
+
365
+ # Check history database
366
+ history_db = cache_path / "history" / "chat_history.db"
367
+ if history_db.exists():
368
+ size_mb = history_db.stat().st_size / (1024 ** 2)
369
+ storage_info["databases"]["history"] = {
370
+ "path": str(history_db),
371
+ "size_mb": round(size_mb, 2),
372
+ "exists": True,
373
+ }
374
+
375
+ # Check knowledge parquet
376
+ knowledge_parquet = cache_path / "knowledge" / "knowledge_clusters.parquet"
377
+ if knowledge_parquet.exists():
378
+ size_mb = knowledge_parquet.stat().st_size / (1024 ** 2)
379
+ storage_info["databases"]["knowledge"] = {
380
+ "path": str(knowledge_parquet),
381
+ "size_mb": round(size_mb, 2),
382
+ "exists": True,
383
+ }
384
+
385
+ # Check settings database
386
+ settings_db = cache_path / "settings" / "settings.db"
387
+ if settings_db.exists():
388
+ size_mb = settings_db.stat().st_size / (1024 ** 2)
389
+ storage_info["databases"]["settings"] = {
390
+ "path": str(settings_db),
391
+ "size_mb": round(size_mb, 2),
392
+ "exists": True,
393
+ }
394
+
395
+ # Calculate total cache size
396
+ total_size = 0
397
+ if cache_path.exists():
398
+ for file in cache_path.rglob('*'):
399
+ if file.is_file():
400
+ total_size += file.stat().st_size
401
+
402
+ storage_info["total_cache_size_mb"] = round(total_size / (1024 ** 2), 2)
403
+
404
+ return storage_info
405
+
406
+ except Exception as e:
407
+ return {
408
+ "work_path": "",
409
+ "cache_path": "",
410
+ "databases": {},
411
+ "error": str(e)
412
+ }
413
+
414
+ def get_health_status(self) -> Dict[str, Any]:
415
+ """
416
+ Get comprehensive health status
417
+
418
+ Returns:
419
+ Health status for all components
420
+ """
421
+ metrics = self.get_system_metrics()
422
+
423
+ # Calculate health score
424
+ health_score = 100
425
+ issues = []
426
+
427
+ # CPU check
428
+ cpu_usage = metrics.get('cpu', {}).get('usage_percent', 0)
429
+ if cpu_usage > 90:
430
+ health_score -= 30
431
+ issues.append("High CPU usage")
432
+ elif cpu_usage > 75:
433
+ health_score -= 15
434
+ issues.append("Elevated CPU usage")
435
+
436
+ # Memory check
437
+ memory_usage = metrics.get('memory', {}).get('usage_percent', 0)
438
+ if memory_usage > 90:
439
+ health_score -= 30
440
+ issues.append("High memory usage")
441
+ elif memory_usage > 80:
442
+ health_score -= 15
443
+ issues.append("Elevated memory usage")
444
+
445
+ # Disk check
446
+ disk_usage = metrics.get('disk', {}).get('usage_percent', 0)
447
+ if disk_usage > 95:
448
+ health_score -= 40
449
+ issues.append("Critical disk usage")
450
+ elif disk_usage > 85:
451
+ health_score -= 20
452
+ issues.append("High disk usage")
453
+
454
+ # Determine overall status
455
+ if health_score >= 90:
456
+ status = "excellent"
457
+ status_color = "green"
458
+ elif health_score >= 70:
459
+ status = "good"
460
+ status_color = "blue"
461
+ elif health_score >= 50:
462
+ status = "warning"
463
+ status_color = "yellow"
464
+ else:
465
+ status = "critical"
466
+ status_color = "red"
467
+
468
+ # Check service availability
469
+ services = {
470
+ "api": {
471
+ "status": "running",
472
+ "healthy": True,
473
+ },
474
+ "history_storage": {
475
+ "status": "connected" if self.history_storage else "unavailable",
476
+ "healthy": bool(self.history_storage),
477
+ },
478
+ "knowledge_manager": {
479
+ "status": "connected" if self.knowledge_manager else "unavailable",
480
+ "healthy": bool(self.knowledge_manager),
481
+ },
482
+ }
483
+
484
+ return {
485
+ "overall_status": status,
486
+ "status_color": status_color,
487
+ "health_score": max(0, health_score),
488
+ "issues": issues,
489
+ "services": services,
490
+ "timestamp": datetime.now().isoformat(),
491
+ }
492
+
493
+ def get_overview(self) -> Dict[str, Any]:
494
+ """
495
+ Get comprehensive monitoring overview
496
+
497
+ Returns:
498
+ Complete monitoring data
499
+ """
500
+ return {
501
+ "system": self.get_system_metrics(),
502
+ "chat": self.get_chat_activity(hours=24),
503
+ "knowledge": self.get_knowledge_activity(),
504
+ "storage": self.get_storage_info(),
505
+ "health": self.get_health_status(),
506
+ "timestamp": datetime.now().isoformat(),
507
+ }
508
+
509
+
510
+ # Global instance
511
+ _monitor_tracker = None
512
+
513
+ def get_monitor_tracker() -> MonitorTracker:
514
+ """Get or create global monitor tracker instance"""
515
+ global _monitor_tracker
516
+ if _monitor_tracker is None:
517
+ _monitor_tracker = MonitorTracker()
518
+ return _monitor_tracker