sirchmunk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sirchmunk/api/__init__.py +1 -0
  2. sirchmunk/api/chat.py +1123 -0
  3. sirchmunk/api/components/__init__.py +0 -0
  4. sirchmunk/api/components/history_storage.py +402 -0
  5. sirchmunk/api/components/monitor_tracker.py +518 -0
  6. sirchmunk/api/components/settings_storage.py +353 -0
  7. sirchmunk/api/history.py +254 -0
  8. sirchmunk/api/knowledge.py +411 -0
  9. sirchmunk/api/main.py +120 -0
  10. sirchmunk/api/monitor.py +219 -0
  11. sirchmunk/api/run_server.py +54 -0
  12. sirchmunk/api/search.py +230 -0
  13. sirchmunk/api/settings.py +309 -0
  14. sirchmunk/api/tools.py +315 -0
  15. sirchmunk/cli/__init__.py +11 -0
  16. sirchmunk/cli/cli.py +789 -0
  17. sirchmunk/learnings/knowledge_base.py +5 -2
  18. sirchmunk/llm/prompts.py +12 -1
  19. sirchmunk/retrieve/text_retriever.py +186 -2
  20. sirchmunk/scan/file_scanner.py +2 -2
  21. sirchmunk/schema/knowledge.py +119 -35
  22. sirchmunk/search.py +384 -26
  23. sirchmunk/storage/__init__.py +2 -2
  24. sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
  25. sirchmunk/utils/constants.py +7 -5
  26. sirchmunk/utils/embedding_util.py +217 -0
  27. sirchmunk/utils/tokenizer_util.py +36 -1
  28. sirchmunk/version.py +1 -1
  29. {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +196 -14
  30. sirchmunk-0.0.2.dist-info/RECORD +69 -0
  31. {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
  32. sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
  33. sirchmunk_mcp/__init__.py +25 -0
  34. sirchmunk_mcp/cli.py +478 -0
  35. sirchmunk_mcp/config.py +276 -0
  36. sirchmunk_mcp/server.py +355 -0
  37. sirchmunk_mcp/service.py +327 -0
  38. sirchmunk_mcp/setup.py +15 -0
  39. sirchmunk_mcp/tools.py +410 -0
  40. sirchmunk-0.0.1.dist-info/RECORD +0 -45
  41. sirchmunk-0.0.1.dist-info/top_level.txt +0 -1
  42. {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
  43. {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
File without changes
@@ -0,0 +1,402 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Chat History Storage using DuckDB
4
+ Provides persistent storage for chat sessions and messages
5
+ """
6
+
7
+ import os
8
+ import json
9
+ from typing import Dict, Any, List, Optional
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ from loguru import logger
13
+
14
+ from sirchmunk.storage.duckdb import DuckDBManager
15
+ from sirchmunk.utils.constants import DEFAULT_SIRCHMUNK_WORK_PATH
16
+
17
+
18
+ class HistoryStorage:
19
+ """
20
+ Manages persistent storage of chat history using DuckDB
21
+
22
+ Architecture:
23
+ - Stores chat sessions and messages in DuckDB
24
+ - Follows Single Responsibility Principle (SRP)
25
+ - Provides clean interface for CRUD operations
26
+ - Decoupled from API layer (Dependency Inversion Principle)
27
+ """
28
+
29
+ def __init__(self, work_path: Optional[str] = None):
30
+ """
31
+ Initialize History Storage
32
+
33
+ Args:
34
+ work_path: Base work path. If None, uses SIRCHMUNK_WORK_PATH env variable
35
+ """
36
+ # Get work path from env if not provided, and expand ~ in path
37
+ if work_path is None:
38
+ work_path = os.getenv("SIRCHMUNK_WORK_PATH", DEFAULT_SIRCHMUNK_WORK_PATH)
39
+
40
+ # Create history storage path (expand ~ and resolve to absolute path)
41
+ self.history_path = Path(work_path).expanduser().resolve() / ".cache" / "history"
42
+ self.history_path.mkdir(parents=True, exist_ok=True)
43
+
44
+ # Initialize DuckDB
45
+ self.db_path = str(self.history_path / "chat_history.db")
46
+ self.db = DuckDBManager(db_path=self.db_path)
47
+
48
+ # Create tables if not exist
49
+ self._initialize_tables()
50
+
51
+ logger.info(f"History storage initialized at: {self.db_path}")
52
+
53
+ def _initialize_tables(self):
54
+ """Create database tables for chat history"""
55
+
56
+ # Chat sessions table
57
+ sessions_schema = {
58
+ "session_id": "VARCHAR PRIMARY KEY",
59
+ "title": "VARCHAR",
60
+ "created_at": "TIMESTAMP NOT NULL",
61
+ "updated_at": "TIMESTAMP NOT NULL",
62
+ "settings": "JSON",
63
+ "message_count": "INTEGER DEFAULT 0",
64
+ }
65
+ self.db.create_table("chat_sessions", sessions_schema, if_not_exists=True)
66
+
67
+ # Chat messages table
68
+ messages_schema = {
69
+ "id": "VARCHAR PRIMARY KEY",
70
+ "session_id": "VARCHAR NOT NULL",
71
+ "role": "VARCHAR NOT NULL",
72
+ "content": "TEXT NOT NULL",
73
+ "timestamp": "TIMESTAMP NOT NULL",
74
+ "search_logs": "JSON",
75
+ "is_streaming": "BOOLEAN DEFAULT FALSE",
76
+ }
77
+ self.db.create_table("chat_messages", messages_schema, if_not_exists=True)
78
+
79
+ # Create index for faster queries
80
+ if not self.db.table_exists("chat_messages"):
81
+ self.db.create_index("chat_messages", ["session_id"], "idx_messages_session")
82
+
83
+ def save_session(self, session_data: Dict[str, Any]) -> bool:
84
+ """
85
+ Save or update a chat session
86
+
87
+ Args:
88
+ session_data: Dictionary containing session information
89
+ - session_id: str
90
+ - title: str (optional)
91
+ - created_at: str (ISO format)
92
+ - updated_at: str (ISO format)
93
+ - settings: dict (optional)
94
+ - message_count: int (optional)
95
+
96
+ Returns:
97
+ True if successful
98
+ """
99
+ try:
100
+ session_id = session_data["session_id"]
101
+
102
+ # Check if session exists
103
+ existing = self.db.fetch_one(
104
+ "SELECT session_id FROM chat_sessions WHERE session_id = ?",
105
+ [session_id]
106
+ )
107
+
108
+ # Prepare data
109
+ data_to_save = {
110
+ "session_id": session_id,
111
+ "title": session_data.get("title", "Chat Session"),
112
+ "created_at": session_data.get("created_at"),
113
+ "updated_at": session_data.get("updated_at"),
114
+ "settings": json.dumps(session_data.get("settings", {})),
115
+ "message_count": session_data.get("message_count", 0),
116
+ }
117
+
118
+ if existing:
119
+ # Update existing session
120
+ set_clause = {k: v for k, v in data_to_save.items() if k != "session_id"}
121
+ self.db.update_data(
122
+ "chat_sessions",
123
+ set_clause=set_clause,
124
+ where_clause="session_id = ?",
125
+ where_params=[session_id]
126
+ )
127
+ logger.debug(f"Updated session: {session_id}")
128
+ else:
129
+ # Insert new session
130
+ self.db.insert_data("chat_sessions", data_to_save)
131
+ logger.debug(f"Created new session: {session_id}")
132
+
133
+ return True
134
+
135
+ except Exception as e:
136
+ logger.error(f"Failed to save session: {e}")
137
+ return False
138
+
139
+ def save_message(self, session_id: str, message_data: Dict[str, Any]) -> bool:
140
+ """
141
+ Save a chat message
142
+
143
+ Args:
144
+ session_id: Session ID
145
+ message_data: Dictionary containing message information
146
+ - role: str ("user" or "assistant")
147
+ - content: str
148
+ - timestamp: str (ISO format) or int (Unix timestamp)
149
+ - search_logs: list (optional)
150
+ - is_streaming: bool (optional)
151
+
152
+ Returns:
153
+ True if successful
154
+ """
155
+ try:
156
+ # Generate message ID
157
+ message_id = f"{session_id}_{datetime.now().timestamp()}"
158
+
159
+ # Handle timestamp conversion
160
+ timestamp = message_data.get("timestamp")
161
+ if isinstance(timestamp, int):
162
+ timestamp = datetime.fromtimestamp(timestamp).isoformat()
163
+ elif not timestamp:
164
+ timestamp = datetime.now().isoformat()
165
+
166
+ # Prepare data
167
+ data_to_save = {
168
+ "id": message_id,
169
+ "session_id": session_id,
170
+ "role": message_data["role"],
171
+ "content": message_data["content"],
172
+ "timestamp": timestamp,
173
+ "search_logs": json.dumps(message_data.get("searchLogs", [])),
174
+ "is_streaming": message_data.get("isStreaming", False),
175
+ }
176
+
177
+ # Insert message
178
+ self.db.insert_data("chat_messages", data_to_save)
179
+
180
+ # Update session message count
181
+ self.db.execute(
182
+ """
183
+ UPDATE chat_sessions
184
+ SET message_count = (
185
+ SELECT COUNT(*) FROM chat_messages WHERE session_id = ?
186
+ ),
187
+ updated_at = ?
188
+ WHERE session_id = ?
189
+ """,
190
+ [session_id, datetime.now().isoformat(), session_id]
191
+ )
192
+
193
+ logger.debug(f"Saved message to session: {session_id}")
194
+ return True
195
+
196
+ except Exception as e:
197
+ logger.error(f"Failed to save message: {e}")
198
+ return False
199
+
200
+ def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
201
+ """
202
+ Retrieve a chat session with all messages
203
+
204
+ Args:
205
+ session_id: Session ID
206
+
207
+ Returns:
208
+ Dictionary containing session data and messages, or None if not found
209
+ """
210
+ try:
211
+ # Get session info
212
+ session_row = self.db.fetch_one(
213
+ "SELECT * FROM chat_sessions WHERE session_id = ?",
214
+ [session_id]
215
+ )
216
+
217
+ if not session_row:
218
+ return None
219
+
220
+ # Parse session data
221
+ session_data = {
222
+ "session_id": session_row[0],
223
+ "title": session_row[1],
224
+ "created_at": session_row[2],
225
+ "updated_at": session_row[3],
226
+ "settings": json.loads(session_row[4]) if session_row[4] else {},
227
+ "message_count": session_row[5],
228
+ }
229
+
230
+ # Get messages
231
+ message_rows = self.db.fetch_all(
232
+ """
233
+ SELECT id, role, content, timestamp, search_logs, is_streaming
234
+ FROM chat_messages
235
+ WHERE session_id = ?
236
+ ORDER BY timestamp ASC
237
+ """,
238
+ [session_id]
239
+ )
240
+
241
+ messages = []
242
+ for row in message_rows:
243
+ messages.append({
244
+ "id": row[0],
245
+ "role": row[1],
246
+ "content": row[2],
247
+ "timestamp": row[3],
248
+ "searchLogs": json.loads(row[4]) if row[4] else [],
249
+ "isStreaming": row[5],
250
+ })
251
+
252
+ session_data["messages"] = messages
253
+ return session_data
254
+
255
+ except Exception as e:
256
+ logger.error(f"Failed to get session {session_id}: {e}")
257
+ return None
258
+
259
+ def get_all_sessions(self, limit: int = 100, offset: int = 0) -> List[Dict[str, Any]]:
260
+ """
261
+ Retrieve all chat sessions (without messages)
262
+
263
+ Args:
264
+ limit: Maximum number of sessions to retrieve
265
+ offset: Number of sessions to skip
266
+
267
+ Returns:
268
+ List of session dictionaries
269
+ """
270
+ try:
271
+ rows = self.db.fetch_all(
272
+ """
273
+ SELECT session_id, title, created_at, updated_at, settings, message_count
274
+ FROM chat_sessions
275
+ ORDER BY updated_at DESC
276
+ LIMIT ? OFFSET ?
277
+ """,
278
+ [limit, offset]
279
+ )
280
+
281
+ sessions = []
282
+ for row in rows:
283
+ sessions.append({
284
+ "session_id": row[0],
285
+ "title": row[1],
286
+ "created_at": row[2],
287
+ "updated_at": row[3],
288
+ "settings": json.loads(row[4]) if row[4] else {},
289
+ "message_count": row[5],
290
+ })
291
+
292
+ return sessions
293
+
294
+ except Exception as e:
295
+ logger.error(f"Failed to get all sessions: {e}")
296
+ return []
297
+
298
+ def delete_session(self, session_id: str) -> bool:
299
+ """
300
+ Delete a chat session and all its messages
301
+
302
+ Args:
303
+ session_id: Session ID
304
+
305
+ Returns:
306
+ True if successful
307
+ """
308
+ try:
309
+ # Delete messages first
310
+ self.db.delete_data("chat_messages", "session_id = ?", [session_id])
311
+
312
+ # Delete session
313
+ self.db.delete_data("chat_sessions", "session_id = ?", [session_id])
314
+
315
+ logger.info(f"Deleted session: {session_id}")
316
+ return True
317
+
318
+ except Exception as e:
319
+ logger.error(f"Failed to delete session {session_id}: {e}")
320
+ return False
321
+
322
+ def get_session_count(self) -> int:
323
+ """Get total number of sessions"""
324
+ return self.db.get_table_count("chat_sessions")
325
+
326
+ def search_sessions(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
327
+ """
328
+ Search sessions by title or message content
329
+
330
+ Args:
331
+ query: Search query
332
+ limit: Maximum results
333
+
334
+ Returns:
335
+ List of matching sessions
336
+ """
337
+ try:
338
+ # Search in session titles
339
+ title_matches = self.db.fetch_all(
340
+ """
341
+ SELECT DISTINCT session_id, title, created_at, updated_at, settings, message_count
342
+ FROM chat_sessions
343
+ WHERE LOWER(title) LIKE ?
344
+ ORDER BY updated_at DESC
345
+ LIMIT ?
346
+ """,
347
+ [f"%{query.lower()}%", limit]
348
+ )
349
+
350
+ # Search in message content
351
+ content_matches = self.db.fetch_all(
352
+ """
353
+ SELECT DISTINCT s.session_id, s.title, s.created_at, s.updated_at, s.settings, s.message_count
354
+ FROM chat_sessions s
355
+ JOIN chat_messages m ON s.session_id = m.session_id
356
+ WHERE LOWER(m.content) LIKE ?
357
+ ORDER BY s.updated_at DESC
358
+ LIMIT ?
359
+ """,
360
+ [f"%{query.lower()}%", limit]
361
+ )
362
+
363
+ # Combine and deduplicate results
364
+ session_ids = set()
365
+ sessions = []
366
+
367
+ for row in title_matches + content_matches:
368
+ if row[0] not in session_ids:
369
+ session_ids.add(row[0])
370
+ sessions.append({
371
+ "session_id": row[0],
372
+ "title": row[1],
373
+ "created_at": row[2],
374
+ "updated_at": row[3],
375
+ "settings": json.loads(row[4]) if row[4] else {},
376
+ "message_count": row[5],
377
+ })
378
+
379
+ return sessions[:limit]
380
+
381
+ except Exception as e:
382
+ logger.error(f"Failed to search sessions: {e}")
383
+ return []
384
+
385
+ def close(self):
386
+ """Close database connection"""
387
+ if self.db:
388
+ self.db.close()
389
+ logger.info("History storage closed")
390
+
391
+ def __enter__(self):
392
+ """Context manager entry"""
393
+ return self
394
+
395
+ def __exit__(self, exc_type, exc_val, exc_tb):
396
+ """Context manager exit"""
397
+ self.close()
398
+
399
+ def __del__(self):
400
+ """Destructor to ensure connection is closed"""
401
+ if hasattr(self, 'db') and self.db:
402
+ self.close()