sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sirchmunk/api/__init__.py +1 -0
  2. sirchmunk/api/chat.py +1123 -0
  3. sirchmunk/api/components/__init__.py +0 -0
  4. sirchmunk/api/components/history_storage.py +402 -0
  5. sirchmunk/api/components/monitor_tracker.py +518 -0
  6. sirchmunk/api/components/settings_storage.py +353 -0
  7. sirchmunk/api/history.py +254 -0
  8. sirchmunk/api/knowledge.py +411 -0
  9. sirchmunk/api/main.py +120 -0
  10. sirchmunk/api/monitor.py +219 -0
  11. sirchmunk/api/run_server.py +54 -0
  12. sirchmunk/api/search.py +230 -0
  13. sirchmunk/api/settings.py +309 -0
  14. sirchmunk/api/tools.py +315 -0
  15. sirchmunk/cli/__init__.py +11 -0
  16. sirchmunk/cli/cli.py +789 -0
  17. sirchmunk/learnings/knowledge_base.py +5 -2
  18. sirchmunk/llm/prompts.py +12 -1
  19. sirchmunk/retrieve/text_retriever.py +186 -2
  20. sirchmunk/scan/file_scanner.py +2 -2
  21. sirchmunk/schema/knowledge.py +119 -35
  22. sirchmunk/search.py +384 -26
  23. sirchmunk/storage/__init__.py +2 -2
  24. sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
  25. sirchmunk/utils/constants.py +7 -5
  26. sirchmunk/utils/embedding_util.py +217 -0
  27. sirchmunk/utils/tokenizer_util.py +36 -1
  28. sirchmunk/version.py +1 -1
  29. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
  30. sirchmunk-0.0.2.dist-info/RECORD +69 -0
  31. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
  32. sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
  33. sirchmunk_mcp/__init__.py +25 -0
  34. sirchmunk_mcp/cli.py +478 -0
  35. sirchmunk_mcp/config.py +276 -0
  36. sirchmunk_mcp/server.py +355 -0
  37. sirchmunk_mcp/service.py +327 -0
  38. sirchmunk_mcp/setup.py +15 -0
  39. sirchmunk_mcp/tools.py +410 -0
  40. sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
  41. sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
  42. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
  43. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,353 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Settings Storage using DuckDB
4
+ Provides persistent storage for application settings
5
+ """
6
+
7
+ import os
8
+ import json
9
+ from typing import Dict, Any, Optional
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ from loguru import logger
13
+
14
+ from sirchmunk.storage.duckdb import DuckDBManager
15
+ from sirchmunk.utils.constants import DEFAULT_SIRCHMUNK_WORK_PATH
16
+
17
+
18
+ class SettingsStorage:
19
+ """
20
+ Manages persistent storage of application settings using DuckDB
21
+
22
+ Architecture:
23
+ - Stores UI settings, environment variables, and configuration
24
+ - Follows Single Responsibility Principle (SRP)
25
+ - Provides clean interface for CRUD operations
26
+ """
27
+
28
+ def __init__(self, work_path: Optional[str] = None):
29
+ """
30
+ Initialize Settings Storage
31
+
32
+ Args:
33
+ work_path: Base work path. If None, uses SIRCHMUNK_WORK_PATH env variable
34
+ """
35
+ # Get work path from env if not provided, and expand ~ in path
36
+ if work_path is None:
37
+ work_path = os.getenv("SIRCHMUNK_WORK_PATH", DEFAULT_SIRCHMUNK_WORK_PATH)
38
+
39
+ # Create settings storage path (expand ~ and resolve to absolute path)
40
+ self.settings_path = Path(work_path).expanduser().resolve() / ".cache" / "settings"
41
+ self.settings_path.mkdir(parents=True, exist_ok=True)
42
+
43
+ # Initialize DuckDB
44
+ self.db_path = str(self.settings_path / "settings.db")
45
+ self.db = DuckDBManager(db_path=self.db_path)
46
+
47
+ # Create tables if not exist
48
+ self._initialize_tables()
49
+
50
+ logger.info(f"Settings storage initialized at: {self.db_path}")
51
+
52
+ def _initialize_tables(self):
53
+ """Create database tables for settings"""
54
+
55
+ # Settings table (key-value store)
56
+ settings_schema = {
57
+ "key": "VARCHAR PRIMARY KEY",
58
+ "value": "TEXT NOT NULL",
59
+ "category": "VARCHAR NOT NULL",
60
+ "updated_at": "TIMESTAMP NOT NULL",
61
+ }
62
+ self.db.create_table("settings", settings_schema, if_not_exists=True)
63
+
64
+ # Environment variables table
65
+ env_schema = {
66
+ "key": "VARCHAR PRIMARY KEY",
67
+ "value": "TEXT",
68
+ "description": "TEXT",
69
+ "category": "VARCHAR",
70
+ "updated_at": "TIMESTAMP NOT NULL",
71
+ }
72
+ self.db.create_table("environment", env_schema, if_not_exists=True)
73
+
74
+ def save_setting(self, key: str, value: Any, category: str = "general") -> bool:
75
+ """
76
+ Save or update a setting
77
+
78
+ Args:
79
+ key: Setting key
80
+ value: Setting value (will be JSON serialized)
81
+ category: Setting category (ui, llm, env, etc.)
82
+
83
+ Returns:
84
+ True if successful
85
+ """
86
+ try:
87
+ # Serialize value to JSON
88
+ value_str = json.dumps(value) if not isinstance(value, str) else value
89
+
90
+ # Check if setting exists
91
+ existing = self.db.fetch_one(
92
+ "SELECT key FROM settings WHERE key = ?",
93
+ [key]
94
+ )
95
+
96
+ data = {
97
+ "key": key,
98
+ "value": value_str,
99
+ "category": category,
100
+ "updated_at": datetime.now().isoformat(),
101
+ }
102
+
103
+ if existing:
104
+ # Update existing setting
105
+ set_clause = {k: v for k, v in data.items() if k != "key"}
106
+ self.db.update_data(
107
+ "settings",
108
+ set_clause=set_clause,
109
+ where_clause="key = ?",
110
+ where_params=[key]
111
+ )
112
+ logger.debug(f"Updated setting: {key}")
113
+ else:
114
+ # Insert new setting
115
+ self.db.insert_data("settings", data)
116
+ logger.debug(f"Created new setting: {key}")
117
+
118
+ return True
119
+
120
+ except Exception as e:
121
+ logger.error(f"Failed to save setting {key}: {e}")
122
+ return False
123
+
124
+ def get_setting(self, key: str, default: Any = None) -> Any:
125
+ """
126
+ Get a setting value
127
+
128
+ Args:
129
+ key: Setting key
130
+ default: Default value if not found
131
+
132
+ Returns:
133
+ Setting value (deserialized from JSON) or default
134
+ """
135
+ try:
136
+ row = self.db.fetch_one(
137
+ "SELECT value FROM settings WHERE key = ?",
138
+ [key]
139
+ )
140
+
141
+ if row:
142
+ value_str = row[0]
143
+ try:
144
+ return json.loads(value_str)
145
+ except json.JSONDecodeError:
146
+ return value_str
147
+
148
+ return default
149
+
150
+ except Exception as e:
151
+ logger.error(f"Failed to get setting {key}: {e}")
152
+ return default
153
+
154
+ def get_settings_by_category(self, category: str) -> Dict[str, Any]:
155
+ """
156
+ Get all settings in a category
157
+
158
+ Args:
159
+ category: Category name
160
+
161
+ Returns:
162
+ Dictionary of key-value pairs
163
+ """
164
+ try:
165
+ rows = self.db.fetch_all(
166
+ "SELECT key, value FROM settings WHERE category = ?",
167
+ [category]
168
+ )
169
+
170
+ settings = {}
171
+ for row in rows:
172
+ key, value_str = row
173
+ try:
174
+ settings[key] = json.loads(value_str)
175
+ except json.JSONDecodeError:
176
+ settings[key] = value_str
177
+
178
+ return settings
179
+
180
+ except Exception as e:
181
+ logger.error(f"Failed to get settings for category {category}: {e}")
182
+ return {}
183
+
184
+ def get_all_settings(self) -> Dict[str, Dict[str, Any]]:
185
+ """
186
+ Get all settings grouped by category
187
+
188
+ Returns:
189
+ Dictionary with categories as keys
190
+ """
191
+ try:
192
+ rows = self.db.fetch_all(
193
+ "SELECT key, value, category FROM settings"
194
+ )
195
+
196
+ settings_by_category = {}
197
+ for row in rows:
198
+ key, value_str, category = row
199
+
200
+ if category not in settings_by_category:
201
+ settings_by_category[category] = {}
202
+
203
+ try:
204
+ settings_by_category[category][key] = json.loads(value_str)
205
+ except json.JSONDecodeError:
206
+ settings_by_category[category][key] = value_str
207
+
208
+ return settings_by_category
209
+
210
+ except Exception as e:
211
+ logger.error(f"Failed to get all settings: {e}")
212
+ return {}
213
+
214
+ def save_env_variable(self, key: str, value: str, description: str = "", category: str = "general") -> bool:
215
+ """
216
+ Save or update an environment variable
217
+
218
+ Args:
219
+ key: Environment variable key
220
+ value: Environment variable value
221
+ description: Description of the variable
222
+ category: Category (llm, system, etc.)
223
+
224
+ Returns:
225
+ True if successful
226
+ """
227
+ try:
228
+ # Check if env var exists
229
+ existing = self.db.fetch_one(
230
+ "SELECT key FROM environment WHERE key = ?",
231
+ [key]
232
+ )
233
+
234
+ data = {
235
+ "key": key,
236
+ "value": value,
237
+ "description": description,
238
+ "category": category,
239
+ "updated_at": datetime.now().isoformat(),
240
+ }
241
+
242
+ if existing:
243
+ # Update existing env var
244
+ set_clause = {k: v for k, v in data.items() if k != "key"}
245
+ self.db.update_data(
246
+ "environment",
247
+ set_clause=set_clause,
248
+ where_clause="key = ?",
249
+ where_params=[key]
250
+ )
251
+ logger.debug(f"Updated env var: {key}")
252
+ else:
253
+ # Insert new env var
254
+ self.db.insert_data("environment", data)
255
+ logger.debug(f"Created new env var: {key}")
256
+
257
+ return True
258
+
259
+ except Exception as e:
260
+ logger.error(f"Failed to save env var {key}: {e}")
261
+ return False
262
+
263
+ def get_env_variable(self, key: str, default: str = "") -> str:
264
+ """
265
+ Get an environment variable value
266
+
267
+ Args:
268
+ key: Environment variable key
269
+ default: Default value if not found
270
+
271
+ Returns:
272
+ Environment variable value or default
273
+ """
274
+ try:
275
+ row = self.db.fetch_one(
276
+ "SELECT value FROM environment WHERE key = ?",
277
+ [key]
278
+ )
279
+
280
+ return row[0] if row and row[0] else default
281
+
282
+ except Exception as e:
283
+ logger.error(f"Failed to get env var {key}: {e}")
284
+ return default
285
+
286
+ def get_all_env_variables(self) -> Dict[str, Dict[str, str]]:
287
+ """
288
+ Get all environment variables grouped by category
289
+
290
+ Returns:
291
+ Dictionary with categories as keys
292
+ """
293
+ try:
294
+ rows = self.db.fetch_all(
295
+ "SELECT key, value, description, category FROM environment"
296
+ )
297
+
298
+ env_by_category = {}
299
+ for row in rows:
300
+ key, value, description, category = row
301
+
302
+ if category not in env_by_category:
303
+ env_by_category[category] = {}
304
+
305
+ env_by_category[category][key] = {
306
+ "value": value or "",
307
+ "description": description or ""
308
+ }
309
+
310
+ return env_by_category
311
+
312
+ except Exception as e:
313
+ logger.error(f"Failed to get all env vars: {e}")
314
+ return {}
315
+
316
+ def delete_setting(self, key: str) -> bool:
317
+ """Delete a setting"""
318
+ try:
319
+ self.db.delete_data("settings", "key = ?", [key])
320
+ logger.info(f"Deleted setting: {key}")
321
+ return True
322
+ except Exception as e:
323
+ logger.error(f"Failed to delete setting {key}: {e}")
324
+ return False
325
+
326
+ def delete_env_variable(self, key: str) -> bool:
327
+ """Delete an environment variable"""
328
+ try:
329
+ self.db.delete_data("environment", "key = ?", [key])
330
+ logger.info(f"Deleted env var: {key}")
331
+ return True
332
+ except Exception as e:
333
+ logger.error(f"Failed to delete env var {key}: {e}")
334
+ return False
335
+
336
+ def close(self):
337
+ """Close database connection"""
338
+ if self.db:
339
+ self.db.close()
340
+ logger.info("Settings storage closed")
341
+
342
+ def __enter__(self):
343
+ """Context manager entry"""
344
+ return self
345
+
346
+ def __exit__(self, exc_type, exc_val, exc_tb):
347
+ """Context manager exit"""
348
+ self.close()
349
+
350
+ def __del__(self):
351
+ """Destructor to ensure connection is closed"""
352
+ if hasattr(self, 'db') and self.db:
353
+ self.close()
@@ -0,0 +1,254 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ History API endpoints integrated with persistent storage
4
+ Provides unified history tracking with DuckDB backend
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException
8
+ from typing import Dict, Any, List, Optional
9
+ import json
10
+ from datetime import datetime, timedelta
11
+
12
+ # Import chat sessions and history storage from chat module
13
+ from .chat import chat_sessions, history_storage
14
+
15
+ router = APIRouter(prefix="/api/v1", tags=["history"])
16
+
17
+ # Create a second router for dashboard endpoints
18
+ from fastapi import APIRouter as FastAPIRouter
19
+ dashboard_router = FastAPIRouter(prefix="/api/v1/dashboard", tags=["dashboard"])
20
+
21
+
22
+ @router.get("/chat/sessions")
23
+ async def get_chat_sessions(limit: int = 20, offset: int = 0):
24
+ """Get list of chat sessions from persistent storage"""
25
+ # Get sessions from persistent storage
26
+ sessions_list = history_storage.get_all_sessions(limit=limit, offset=offset)
27
+
28
+ # Format for response
29
+ formatted_sessions = []
30
+ for session in sessions_list:
31
+ # Get full session data to access messages for title generation
32
+ full_session = history_storage.get_session(session["session_id"])
33
+
34
+ title = session.get("title", "Chat Session")
35
+ if full_session and full_session.get("messages"):
36
+ first_user_message = next((m for m in full_session["messages"] if m["role"] == "user"), None)
37
+ if first_user_message:
38
+ title = first_user_message["content"][:50] + "..." if len(first_user_message["content"]) > 50 else first_user_message["content"]
39
+
40
+ last_message = ""
41
+ if full_session and full_session.get("messages"):
42
+ last_msg = full_session["messages"][-1]
43
+ last_message = last_msg["content"][:100] + "..." if len(last_msg["content"]) > 100 else last_msg["content"]
44
+
45
+ # Convert ISO timestamps to Unix timestamps
46
+ created_at = session["created_at"]
47
+ updated_at = session["updated_at"]
48
+ if isinstance(created_at, str):
49
+ created_at = int(datetime.fromisoformat(created_at).timestamp())
50
+ if isinstance(updated_at, str):
51
+ updated_at = int(datetime.fromisoformat(updated_at).timestamp())
52
+
53
+ formatted_sessions.append({
54
+ "session_id": session["session_id"],
55
+ "title": title,
56
+ "message_count": session.get("message_count", 0),
57
+ "last_message": last_message,
58
+ "created_at": created_at,
59
+ "updated_at": updated_at,
60
+ "topics": [] # Placeholder
61
+ })
62
+
63
+ # Get total count
64
+ total_count = history_storage.get_session_count()
65
+
66
+ return {
67
+ "success": True,
68
+ "data": formatted_sessions,
69
+ "pagination": {
70
+ "limit": limit,
71
+ "offset": offset,
72
+ "total": total_count
73
+ }
74
+ }
75
+
76
+
77
+ @router.get("/chat/sessions/{session_id}")
78
+ async def get_chat_session(session_id: str):
79
+ """Get specific chat session details from persistent storage"""
80
+ # Try to get from persistent storage first
81
+ session = history_storage.get_session(session_id)
82
+
83
+ # Fallback to in-memory cache if not in persistent storage
84
+ if not session and session_id in chat_sessions:
85
+ session = chat_sessions[session_id]
86
+
87
+ if not session:
88
+ raise HTTPException(status_code=404, detail="Chat session not found")
89
+
90
+ # Convert ISO timestamps to Unix timestamps for frontend compatibility
91
+ messages_with_unix_timestamps = []
92
+ for msg in session.get("messages", []):
93
+ msg_copy = msg.copy()
94
+ if "timestamp" in msg_copy:
95
+ if isinstance(msg_copy["timestamp"], str):
96
+ msg_copy["timestamp"] = int(datetime.fromisoformat(msg_copy["timestamp"]).timestamp())
97
+ messages_with_unix_timestamps.append(msg_copy)
98
+
99
+ # Handle created_at and updated_at
100
+ created_at = session.get("created_at")
101
+ updated_at = session.get("updated_at")
102
+
103
+ if isinstance(created_at, str):
104
+ created_at = int(datetime.fromisoformat(created_at).timestamp())
105
+ if isinstance(updated_at, str):
106
+ updated_at = int(datetime.fromisoformat(updated_at).timestamp())
107
+
108
+ return {
109
+ "success": True,
110
+ "data": {
111
+ "session_id": session["session_id"],
112
+ "title": session.get("title", "Chat Session"),
113
+ "messages": messages_with_unix_timestamps,
114
+ "settings": session.get("settings", {}),
115
+ "created_at": created_at,
116
+ "updated_at": updated_at
117
+ }
118
+ }
119
+
120
+
121
+ @router.delete("/chat/sessions/{session_id}")
122
+ async def delete_chat_session(session_id: str):
123
+ """Delete a specific chat session from both memory and persistent storage"""
124
+ # Delete from persistent storage
125
+ success = history_storage.delete_session(session_id)
126
+
127
+ # Also delete from in-memory cache
128
+ if session_id in chat_sessions:
129
+ chat_sessions.pop(session_id)
130
+
131
+ if not success:
132
+ raise HTTPException(status_code=404, detail="Chat session not found")
133
+
134
+ return {
135
+ "success": True,
136
+ "message": "Chat session deleted successfully",
137
+ "data": {
138
+ "session_id": session_id
139
+ }
140
+ }
141
+
142
+
143
+ @router.get("/history/search")
144
+ async def search_history(query: str, limit: int = 20):
145
+ """Search chat history by content"""
146
+ sessions = history_storage.search_sessions(query, limit=limit)
147
+
148
+ # Format for response
149
+ formatted_sessions = []
150
+ for session in sessions:
151
+ # Convert timestamps
152
+ created_at = session["created_at"]
153
+ updated_at = session["updated_at"]
154
+ if isinstance(created_at, str):
155
+ created_at = int(datetime.fromisoformat(created_at).timestamp())
156
+ if isinstance(updated_at, str):
157
+ updated_at = int(datetime.fromisoformat(updated_at).timestamp())
158
+
159
+ formatted_sessions.append({
160
+ "session_id": session["session_id"],
161
+ "title": session.get("title", "Chat Session"),
162
+ "message_count": session.get("message_count", 0),
163
+ "created_at": created_at,
164
+ "updated_at": updated_at,
165
+ })
166
+
167
+ return {
168
+ "success": True,
169
+ "data": formatted_sessions,
170
+ "query": query
171
+ }
172
+
173
+
174
+ @router.get("/history/stats")
175
+ async def get_history_statistics():
176
+ """Get history statistics from persistent storage"""
177
+ total_sessions = history_storage.get_session_count()
178
+
179
+ # Get recent sessions (last 7 days)
180
+ all_sessions = history_storage.get_all_sessions(limit=1000)
181
+ week_ago = datetime.now() - timedelta(days=7)
182
+
183
+ recent_sessions = []
184
+ total_messages = 0
185
+
186
+ for session in all_sessions:
187
+ updated_at = session.get("updated_at")
188
+ if isinstance(updated_at, str):
189
+ updated_at = datetime.fromisoformat(updated_at)
190
+
191
+ if updated_at > week_ago:
192
+ recent_sessions.append(session)
193
+
194
+ total_messages += session.get("message_count", 0)
195
+
196
+ return {
197
+ "success": True,
198
+ "data": {
199
+ "total_sessions": total_sessions,
200
+ "total_messages": total_messages,
201
+ "recent_activity": {
202
+ "last_7_days": len(recent_sessions),
203
+ "daily_average": len(recent_sessions) / 7
204
+ }
205
+ }
206
+ }
207
+
208
+
209
+ @dashboard_router.get("/recent")
210
+ async def get_recent_activity(limit: int = 50, type: Optional[str] = None):
211
+ """
212
+ Get recent activity (chat sessions)
213
+
214
+ Query params:
215
+ limit: Maximum number of items to return
216
+ type: Filter by type (currently only "chat" is supported)
217
+ """
218
+ try:
219
+ # Get recent sessions
220
+ sessions_list = history_storage.get_all_sessions(limit=limit, offset=0)
221
+
222
+ # Format as activity items
223
+ activities = []
224
+ for session in sessions_list:
225
+ # Get full session data
226
+ full_session = history_storage.get_session(session["session_id"])
227
+
228
+ title = session.get("title", "Chat Session")
229
+ if full_session and full_session.get("messages"):
230
+ first_user_message = next((m for m in full_session["messages"] if m["role"] == "user"), None)
231
+ if first_user_message:
232
+ title = first_user_message["content"][:50] + "..." if len(first_user_message["content"]) > 50 else first_user_message["content"]
233
+
234
+ # Convert timestamps
235
+ created_at = session["created_at"]
236
+ if isinstance(created_at, str):
237
+ created_at = int(datetime.fromisoformat(created_at).timestamp())
238
+
239
+ activities.append({
240
+ "id": session["session_id"],
241
+ "type": "chat",
242
+ "title": title,
243
+ "timestamp": created_at,
244
+ "message_count": session.get("message_count", 0),
245
+ })
246
+
247
+ return {
248
+ "success": True,
249
+ "data": activities,
250
+ "count": len(activities),
251
+ }
252
+
253
+ except Exception as e:
254
+ raise HTTPException(status_code=500, detail=str(e))