sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sirchmunk/api/__init__.py +1 -0
- sirchmunk/api/chat.py +1123 -0
- sirchmunk/api/components/__init__.py +0 -0
- sirchmunk/api/components/history_storage.py +402 -0
- sirchmunk/api/components/monitor_tracker.py +518 -0
- sirchmunk/api/components/settings_storage.py +353 -0
- sirchmunk/api/history.py +254 -0
- sirchmunk/api/knowledge.py +411 -0
- sirchmunk/api/main.py +120 -0
- sirchmunk/api/monitor.py +219 -0
- sirchmunk/api/run_server.py +54 -0
- sirchmunk/api/search.py +230 -0
- sirchmunk/api/settings.py +309 -0
- sirchmunk/api/tools.py +315 -0
- sirchmunk/cli/__init__.py +11 -0
- sirchmunk/cli/cli.py +789 -0
- sirchmunk/learnings/knowledge_base.py +5 -2
- sirchmunk/llm/prompts.py +12 -1
- sirchmunk/retrieve/text_retriever.py +186 -2
- sirchmunk/scan/file_scanner.py +2 -2
- sirchmunk/schema/knowledge.py +119 -35
- sirchmunk/search.py +384 -26
- sirchmunk/storage/__init__.py +2 -2
- sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
- sirchmunk/utils/constants.py +7 -5
- sirchmunk/utils/embedding_util.py +217 -0
- sirchmunk/utils/tokenizer_util.py +36 -1
- sirchmunk/version.py +1 -1
- {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
- sirchmunk-0.0.2.dist-info/RECORD +69 -0
- {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
- sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
- sirchmunk_mcp/__init__.py +25 -0
- sirchmunk_mcp/cli.py +478 -0
- sirchmunk_mcp/config.py +276 -0
- sirchmunk_mcp/server.py +355 -0
- sirchmunk_mcp/service.py +327 -0
- sirchmunk_mcp/setup.py +15 -0
- sirchmunk_mcp/tools.py +410 -0
- sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
- sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
- {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
- {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
|
File without changes
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
# Copyright (c) ModelScope Contributors. All rights reserved.
|
|
2
|
+
"""
|
|
3
|
+
Chat History Storage using DuckDB
|
|
4
|
+
Provides persistent storage for chat sessions and messages
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
from typing import Dict, Any, List, Optional
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from sirchmunk.storage.duckdb import DuckDBManager
|
|
15
|
+
from sirchmunk.utils.constants import DEFAULT_SIRCHMUNK_WORK_PATH
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HistoryStorage:
|
|
19
|
+
"""
|
|
20
|
+
Manages persistent storage of chat history using DuckDB
|
|
21
|
+
|
|
22
|
+
Architecture:
|
|
23
|
+
- Stores chat sessions and messages in DuckDB
|
|
24
|
+
- Follows Single Responsibility Principle (SRP)
|
|
25
|
+
- Provides clean interface for CRUD operations
|
|
26
|
+
- Decoupled from API layer (Dependency Inversion Principle)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, work_path: Optional[str] = None):
|
|
30
|
+
"""
|
|
31
|
+
Initialize History Storage
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
work_path: Base work path. If None, uses SIRCHMUNK_WORK_PATH env variable
|
|
35
|
+
"""
|
|
36
|
+
# Get work path from env if not provided, and expand ~ in path
|
|
37
|
+
if work_path is None:
|
|
38
|
+
work_path = os.getenv("SIRCHMUNK_WORK_PATH", DEFAULT_SIRCHMUNK_WORK_PATH)
|
|
39
|
+
|
|
40
|
+
# Create history storage path (expand ~ and resolve to absolute path)
|
|
41
|
+
self.history_path = Path(work_path).expanduser().resolve() / ".cache" / "history"
|
|
42
|
+
self.history_path.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
# Initialize DuckDB
|
|
45
|
+
self.db_path = str(self.history_path / "chat_history.db")
|
|
46
|
+
self.db = DuckDBManager(db_path=self.db_path)
|
|
47
|
+
|
|
48
|
+
# Create tables if not exist
|
|
49
|
+
self._initialize_tables()
|
|
50
|
+
|
|
51
|
+
logger.info(f"History storage initialized at: {self.db_path}")
|
|
52
|
+
|
|
53
|
+
def _initialize_tables(self):
|
|
54
|
+
"""Create database tables for chat history"""
|
|
55
|
+
|
|
56
|
+
# Chat sessions table
|
|
57
|
+
sessions_schema = {
|
|
58
|
+
"session_id": "VARCHAR PRIMARY KEY",
|
|
59
|
+
"title": "VARCHAR",
|
|
60
|
+
"created_at": "TIMESTAMP NOT NULL",
|
|
61
|
+
"updated_at": "TIMESTAMP NOT NULL",
|
|
62
|
+
"settings": "JSON",
|
|
63
|
+
"message_count": "INTEGER DEFAULT 0",
|
|
64
|
+
}
|
|
65
|
+
self.db.create_table("chat_sessions", sessions_schema, if_not_exists=True)
|
|
66
|
+
|
|
67
|
+
# Chat messages table
|
|
68
|
+
messages_schema = {
|
|
69
|
+
"id": "VARCHAR PRIMARY KEY",
|
|
70
|
+
"session_id": "VARCHAR NOT NULL",
|
|
71
|
+
"role": "VARCHAR NOT NULL",
|
|
72
|
+
"content": "TEXT NOT NULL",
|
|
73
|
+
"timestamp": "TIMESTAMP NOT NULL",
|
|
74
|
+
"search_logs": "JSON",
|
|
75
|
+
"is_streaming": "BOOLEAN DEFAULT FALSE",
|
|
76
|
+
}
|
|
77
|
+
self.db.create_table("chat_messages", messages_schema, if_not_exists=True)
|
|
78
|
+
|
|
79
|
+
# Create index for faster queries
|
|
80
|
+
if not self.db.table_exists("chat_messages"):
|
|
81
|
+
self.db.create_index("chat_messages", ["session_id"], "idx_messages_session")
|
|
82
|
+
|
|
83
|
+
def save_session(self, session_data: Dict[str, Any]) -> bool:
|
|
84
|
+
"""
|
|
85
|
+
Save or update a chat session
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
session_data: Dictionary containing session information
|
|
89
|
+
- session_id: str
|
|
90
|
+
- title: str (optional)
|
|
91
|
+
- created_at: str (ISO format)
|
|
92
|
+
- updated_at: str (ISO format)
|
|
93
|
+
- settings: dict (optional)
|
|
94
|
+
- message_count: int (optional)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
True if successful
|
|
98
|
+
"""
|
|
99
|
+
try:
|
|
100
|
+
session_id = session_data["session_id"]
|
|
101
|
+
|
|
102
|
+
# Check if session exists
|
|
103
|
+
existing = self.db.fetch_one(
|
|
104
|
+
"SELECT session_id FROM chat_sessions WHERE session_id = ?",
|
|
105
|
+
[session_id]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Prepare data
|
|
109
|
+
data_to_save = {
|
|
110
|
+
"session_id": session_id,
|
|
111
|
+
"title": session_data.get("title", "Chat Session"),
|
|
112
|
+
"created_at": session_data.get("created_at"),
|
|
113
|
+
"updated_at": session_data.get("updated_at"),
|
|
114
|
+
"settings": json.dumps(session_data.get("settings", {})),
|
|
115
|
+
"message_count": session_data.get("message_count", 0),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if existing:
|
|
119
|
+
# Update existing session
|
|
120
|
+
set_clause = {k: v for k, v in data_to_save.items() if k != "session_id"}
|
|
121
|
+
self.db.update_data(
|
|
122
|
+
"chat_sessions",
|
|
123
|
+
set_clause=set_clause,
|
|
124
|
+
where_clause="session_id = ?",
|
|
125
|
+
where_params=[session_id]
|
|
126
|
+
)
|
|
127
|
+
logger.debug(f"Updated session: {session_id}")
|
|
128
|
+
else:
|
|
129
|
+
# Insert new session
|
|
130
|
+
self.db.insert_data("chat_sessions", data_to_save)
|
|
131
|
+
logger.debug(f"Created new session: {session_id}")
|
|
132
|
+
|
|
133
|
+
return True
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.error(f"Failed to save session: {e}")
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
def save_message(self, session_id: str, message_data: Dict[str, Any]) -> bool:
|
|
140
|
+
"""
|
|
141
|
+
Save a chat message
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
session_id: Session ID
|
|
145
|
+
message_data: Dictionary containing message information
|
|
146
|
+
- role: str ("user" or "assistant")
|
|
147
|
+
- content: str
|
|
148
|
+
- timestamp: str (ISO format) or int (Unix timestamp)
|
|
149
|
+
- search_logs: list (optional)
|
|
150
|
+
- is_streaming: bool (optional)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
True if successful
|
|
154
|
+
"""
|
|
155
|
+
try:
|
|
156
|
+
# Generate message ID
|
|
157
|
+
message_id = f"{session_id}_{datetime.now().timestamp()}"
|
|
158
|
+
|
|
159
|
+
# Handle timestamp conversion
|
|
160
|
+
timestamp = message_data.get("timestamp")
|
|
161
|
+
if isinstance(timestamp, int):
|
|
162
|
+
timestamp = datetime.fromtimestamp(timestamp).isoformat()
|
|
163
|
+
elif not timestamp:
|
|
164
|
+
timestamp = datetime.now().isoformat()
|
|
165
|
+
|
|
166
|
+
# Prepare data
|
|
167
|
+
data_to_save = {
|
|
168
|
+
"id": message_id,
|
|
169
|
+
"session_id": session_id,
|
|
170
|
+
"role": message_data["role"],
|
|
171
|
+
"content": message_data["content"],
|
|
172
|
+
"timestamp": timestamp,
|
|
173
|
+
"search_logs": json.dumps(message_data.get("searchLogs", [])),
|
|
174
|
+
"is_streaming": message_data.get("isStreaming", False),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Insert message
|
|
178
|
+
self.db.insert_data("chat_messages", data_to_save)
|
|
179
|
+
|
|
180
|
+
# Update session message count
|
|
181
|
+
self.db.execute(
|
|
182
|
+
"""
|
|
183
|
+
UPDATE chat_sessions
|
|
184
|
+
SET message_count = (
|
|
185
|
+
SELECT COUNT(*) FROM chat_messages WHERE session_id = ?
|
|
186
|
+
),
|
|
187
|
+
updated_at = ?
|
|
188
|
+
WHERE session_id = ?
|
|
189
|
+
""",
|
|
190
|
+
[session_id, datetime.now().isoformat(), session_id]
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
logger.debug(f"Saved message to session: {session_id}")
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
except Exception as e:
|
|
197
|
+
logger.error(f"Failed to save message: {e}")
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
|
|
201
|
+
"""
|
|
202
|
+
Retrieve a chat session with all messages
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
session_id: Session ID
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Dictionary containing session data and messages, or None if not found
|
|
209
|
+
"""
|
|
210
|
+
try:
|
|
211
|
+
# Get session info
|
|
212
|
+
session_row = self.db.fetch_one(
|
|
213
|
+
"SELECT * FROM chat_sessions WHERE session_id = ?",
|
|
214
|
+
[session_id]
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if not session_row:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
# Parse session data
|
|
221
|
+
session_data = {
|
|
222
|
+
"session_id": session_row[0],
|
|
223
|
+
"title": session_row[1],
|
|
224
|
+
"created_at": session_row[2],
|
|
225
|
+
"updated_at": session_row[3],
|
|
226
|
+
"settings": json.loads(session_row[4]) if session_row[4] else {},
|
|
227
|
+
"message_count": session_row[5],
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# Get messages
|
|
231
|
+
message_rows = self.db.fetch_all(
|
|
232
|
+
"""
|
|
233
|
+
SELECT id, role, content, timestamp, search_logs, is_streaming
|
|
234
|
+
FROM chat_messages
|
|
235
|
+
WHERE session_id = ?
|
|
236
|
+
ORDER BY timestamp ASC
|
|
237
|
+
""",
|
|
238
|
+
[session_id]
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
messages = []
|
|
242
|
+
for row in message_rows:
|
|
243
|
+
messages.append({
|
|
244
|
+
"id": row[0],
|
|
245
|
+
"role": row[1],
|
|
246
|
+
"content": row[2],
|
|
247
|
+
"timestamp": row[3],
|
|
248
|
+
"searchLogs": json.loads(row[4]) if row[4] else [],
|
|
249
|
+
"isStreaming": row[5],
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
session_data["messages"] = messages
|
|
253
|
+
return session_data
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.error(f"Failed to get session {session_id}: {e}")
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
def get_all_sessions(self, limit: int = 100, offset: int = 0) -> List[Dict[str, Any]]:
|
|
260
|
+
"""
|
|
261
|
+
Retrieve all chat sessions (without messages)
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
limit: Maximum number of sessions to retrieve
|
|
265
|
+
offset: Number of sessions to skip
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
List of session dictionaries
|
|
269
|
+
"""
|
|
270
|
+
try:
|
|
271
|
+
rows = self.db.fetch_all(
|
|
272
|
+
"""
|
|
273
|
+
SELECT session_id, title, created_at, updated_at, settings, message_count
|
|
274
|
+
FROM chat_sessions
|
|
275
|
+
ORDER BY updated_at DESC
|
|
276
|
+
LIMIT ? OFFSET ?
|
|
277
|
+
""",
|
|
278
|
+
[limit, offset]
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
sessions = []
|
|
282
|
+
for row in rows:
|
|
283
|
+
sessions.append({
|
|
284
|
+
"session_id": row[0],
|
|
285
|
+
"title": row[1],
|
|
286
|
+
"created_at": row[2],
|
|
287
|
+
"updated_at": row[3],
|
|
288
|
+
"settings": json.loads(row[4]) if row[4] else {},
|
|
289
|
+
"message_count": row[5],
|
|
290
|
+
})
|
|
291
|
+
|
|
292
|
+
return sessions
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.error(f"Failed to get all sessions: {e}")
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
def delete_session(self, session_id: str) -> bool:
|
|
299
|
+
"""
|
|
300
|
+
Delete a chat session and all its messages
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
session_id: Session ID
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
True if successful
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
# Delete messages first
|
|
310
|
+
self.db.delete_data("chat_messages", "session_id = ?", [session_id])
|
|
311
|
+
|
|
312
|
+
# Delete session
|
|
313
|
+
self.db.delete_data("chat_sessions", "session_id = ?", [session_id])
|
|
314
|
+
|
|
315
|
+
logger.info(f"Deleted session: {session_id}")
|
|
316
|
+
return True
|
|
317
|
+
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.error(f"Failed to delete session {session_id}: {e}")
|
|
320
|
+
return False
|
|
321
|
+
|
|
322
|
+
def get_session_count(self) -> int:
|
|
323
|
+
"""Get total number of sessions"""
|
|
324
|
+
return self.db.get_table_count("chat_sessions")
|
|
325
|
+
|
|
326
|
+
def search_sessions(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
327
|
+
"""
|
|
328
|
+
Search sessions by title or message content
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
query: Search query
|
|
332
|
+
limit: Maximum results
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
List of matching sessions
|
|
336
|
+
"""
|
|
337
|
+
try:
|
|
338
|
+
# Search in session titles
|
|
339
|
+
title_matches = self.db.fetch_all(
|
|
340
|
+
"""
|
|
341
|
+
SELECT DISTINCT session_id, title, created_at, updated_at, settings, message_count
|
|
342
|
+
FROM chat_sessions
|
|
343
|
+
WHERE LOWER(title) LIKE ?
|
|
344
|
+
ORDER BY updated_at DESC
|
|
345
|
+
LIMIT ?
|
|
346
|
+
""",
|
|
347
|
+
[f"%{query.lower()}%", limit]
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Search in message content
|
|
351
|
+
content_matches = self.db.fetch_all(
|
|
352
|
+
"""
|
|
353
|
+
SELECT DISTINCT s.session_id, s.title, s.created_at, s.updated_at, s.settings, s.message_count
|
|
354
|
+
FROM chat_sessions s
|
|
355
|
+
JOIN chat_messages m ON s.session_id = m.session_id
|
|
356
|
+
WHERE LOWER(m.content) LIKE ?
|
|
357
|
+
ORDER BY s.updated_at DESC
|
|
358
|
+
LIMIT ?
|
|
359
|
+
""",
|
|
360
|
+
[f"%{query.lower()}%", limit]
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Combine and deduplicate results
|
|
364
|
+
session_ids = set()
|
|
365
|
+
sessions = []
|
|
366
|
+
|
|
367
|
+
for row in title_matches + content_matches:
|
|
368
|
+
if row[0] not in session_ids:
|
|
369
|
+
session_ids.add(row[0])
|
|
370
|
+
sessions.append({
|
|
371
|
+
"session_id": row[0],
|
|
372
|
+
"title": row[1],
|
|
373
|
+
"created_at": row[2],
|
|
374
|
+
"updated_at": row[3],
|
|
375
|
+
"settings": json.loads(row[4]) if row[4] else {},
|
|
376
|
+
"message_count": row[5],
|
|
377
|
+
})
|
|
378
|
+
|
|
379
|
+
return sessions[:limit]
|
|
380
|
+
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logger.error(f"Failed to search sessions: {e}")
|
|
383
|
+
return []
|
|
384
|
+
|
|
385
|
+
def close(self):
|
|
386
|
+
"""Close database connection"""
|
|
387
|
+
if self.db:
|
|
388
|
+
self.db.close()
|
|
389
|
+
logger.info("History storage closed")
|
|
390
|
+
|
|
391
|
+
def __enter__(self):
|
|
392
|
+
"""Context manager entry"""
|
|
393
|
+
return self
|
|
394
|
+
|
|
395
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
396
|
+
"""Context manager exit"""
|
|
397
|
+
self.close()
|
|
398
|
+
|
|
399
|
+
def __del__(self):
|
|
400
|
+
"""Destructor to ensure connection is closed"""
|
|
401
|
+
if hasattr(self, 'db') and self.db:
|
|
402
|
+
self.close()
|