PyPI - sirchmunk - Versions diffs - 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

sirchmunk 0.0.1.post1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

sirchmunk/api/__init__.py +1 -0
sirchmunk/api/chat.py +1123 -0
sirchmunk/api/components/__init__.py +0 -0
sirchmunk/api/components/history_storage.py +402 -0
sirchmunk/api/components/monitor_tracker.py +518 -0
sirchmunk/api/components/settings_storage.py +353 -0
sirchmunk/api/history.py +254 -0
sirchmunk/api/knowledge.py +411 -0
sirchmunk/api/main.py +120 -0
sirchmunk/api/monitor.py +219 -0
sirchmunk/api/run_server.py +54 -0
sirchmunk/api/search.py +230 -0
sirchmunk/api/settings.py +309 -0
sirchmunk/api/tools.py +315 -0
sirchmunk/cli/__init__.py +11 -0
sirchmunk/cli/cli.py +789 -0
sirchmunk/learnings/knowledge_base.py +5 -2
sirchmunk/llm/prompts.py +12 -1
sirchmunk/retrieve/text_retriever.py +186 -2
sirchmunk/scan/file_scanner.py +2 -2
sirchmunk/schema/knowledge.py +119 -35
sirchmunk/search.py +384 -26
sirchmunk/storage/__init__.py +2 -2
sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
sirchmunk/utils/constants.py +7 -5
sirchmunk/utils/embedding_util.py +217 -0
sirchmunk/utils/tokenizer_util.py +36 -1
sirchmunk/version.py +1 -1
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
sirchmunk-0.0.2.dist-info/RECORD +69 -0
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
sirchmunk_mcp/__init__.py +25 -0
sirchmunk_mcp/cli.py +478 -0
sirchmunk_mcp/config.py +276 -0
sirchmunk_mcp/server.py +355 -0
sirchmunk_mcp/service.py +327 -0
sirchmunk_mcp/setup.py +15 -0
sirchmunk_mcp/tools.py +410 -0
sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
{sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0

sirchmunk/api/knowledge.py ADDED Viewed

@@ -0,0 +1,411 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+Knowledge Base API endpoints
+Provides CRUD and analytics for KnowledgeCluster objects
+"""
+from fastapi import APIRouter, HTTPException
+from typing import Optional
+from pydantic import BaseModel
+from sirchmunk.storage.knowledge_storage import KnowledgeStorage
+from sirchmunk.schema.knowledge import AbstractionLevel
+router = APIRouter(prefix="/api/v1/knowledge", tags=["knowledge"])
+# Initialize Knowledge Manager
+km = KnowledgeStorage()
+# === Request/Response Models ===
+class SearchRequest(BaseModel):
+    query: str
+    limit: int = 10
+# === API Endpoints ===
+@router.get("/list")
+async def list_knowledge_bases_alias():
+    """Alias for /clusters endpoint (backward compatibility)"""
+    return await get_all_clusters(limit=100)
+@router.get("/clusters")
+async def get_all_clusters(
+    limit: int = 100,
+    lifecycle: Optional[str] = None,
+    abstraction_level: Optional[str] = None
+):
+    """
+    Get all knowledge clusters with optional filtering
+    Query params:
+        limit: Maximum number of clusters to return
+        lifecycle: Filter by lifecycle (STABLE, EMERGING, CONTESTED, DEPRECATED)
+        abstraction_level: Filter by abstraction level
+    """
+    try:
+        stats = km.get_stats()
+        # Get all clusters by searching with empty query (returns all)
+        all_clusters = []
+        # Fetch clusters using DuckDB directly
+        sql = "SELECT * FROM knowledge_clusters"
+        where_clauses = []
+        params = []
+        if lifecycle:
+            where_clauses.append("lifecycle = ?")
+            params.append(lifecycle.upper())
+        if abstraction_level:
+            where_clauses.append("abstraction_level = ?")
+            params.append(abstraction_level.upper())
+        if where_clauses:
+            sql += " WHERE " + " AND ".join(where_clauses)
+        sql += f" ORDER BY last_modified DESC LIMIT {limit}"
+        try:
+            rows = km.db.fetch_all(sql, params if params else None)
+        except Exception as fetch_error:
+            # If table is missing or schema is out of date, recreate and return empty list.
+            km._create_table()
+            rows = []
+        clusters = []
+        for row in rows:
+            try:
+                clusters.append(km._row_to_cluster(row))
+            except Exception:
+                # Skip malformed rows to avoid failing the whole request
+                continue
+        return {
+            "success": True,
+                "count": len(clusters),
+                "total": stats.get('custom_stats', {}).get('total_clusters', 0),
+                "data": [c.to_dict() for c in clusters]
+            }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/clusters/{cluster_id}")
+async def get_cluster(cluster_id: str):
+    """Get a specific knowledge cluster by ID"""
+    try:
+        cluster = await km.get(cluster_id)
+        if not cluster:
+            raise HTTPException(status_code=404, detail="Cluster not found")
+        return {
+            "success": True,
+                "data": cluster.to_dict()
+            }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/search")
+async def search_clusters(request: SearchRequest):
+    """
+    Search knowledge clusters by query
+    Searches across: id, name, description, content, patterns
+    """
+    try:
+        results = await km.find(request.query, limit=request.limit)
+        return {
+            "success": True,
+                "query": request.query,
+                "count": len(results),
+                "data": [c.to_dict() for c in results]
+            }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/stats")
+async def get_knowledge_stats():
+    """
+    Get comprehensive knowledge base statistics
+    Returns:
+        - Total clusters
+        - Lifecycle distribution
+        - Abstraction level distribution
+        - Average confidence
+        - Hotness distribution
+        - Top patterns
+        - Recent activity
+    """
+    try:
+        stats = km.get_stats()
+        custom_stats = stats.get('custom_stats', {})
+        # Get lifecycle distribution
+        lifecycle_dist = custom_stats.get('lifecycle_distribution', {})
+        # Get abstraction level distribution
+        abstraction_dist = {}
+        for level in AbstractionLevel:
+            count_row = km.db.fetch_one(
+                "SELECT COUNT(*) FROM knowledge_clusters WHERE abstraction_level = ?",
+                [level.name]
+            )
+            abstraction_dist[level.name] = count_row[0] if count_row else 0
+        # Get confidence statistics
+        confidence_stats_row = km.db.fetch_one(
+            """
+            SELECT
+                MIN(confidence) as min_confidence,
+                MAX(confidence) as max_confidence,
+                AVG(confidence) as avg_confidence
+            FROM knowledge_clusters
+            WHERE confidence IS NOT NULL
+            """
+        )
+        confidence_stats = {
+            "min": confidence_stats_row[0] if confidence_stats_row and confidence_stats_row[0] else 0,
+            "max": confidence_stats_row[1] if confidence_stats_row and confidence_stats_row[1] else 0,
+            "avg": round(confidence_stats_row[2], 4) if confidence_stats_row and confidence_stats_row[2] else 0,
+        }
+        # Get hotness statistics
+        hotness_stats_row = km.db.fetch_one(
+            """
+            SELECT
+                MIN(hotness) as min_hotness,
+                MAX(hotness) as max_hotness,
+                AVG(hotness) as avg_hotness
+            FROM knowledge_clusters
+            WHERE hotness IS NOT NULL
+            """
+        )
+        hotness_stats = {
+            "min": hotness_stats_row[0] if hotness_stats_row and hotness_stats_row[0] else 0,
+            "max": hotness_stats_row[1] if hotness_stats_row and hotness_stats_row[1] else 0,
+            "avg": round(hotness_stats_row[2], 4) if hotness_stats_row and hotness_stats_row[2] else 0,
+        }
+        # Get top 10 most recent clusters
+        recent_rows = km.db.fetch_all(
+            """
+            SELECT id, name, last_modified
+            FROM knowledge_clusters
+            ORDER BY last_modified DESC
+            LIMIT 10
+            """
+        )
+        recent_clusters = [
+            {
+                "id": row[0],
+                "name": row[1],
+                "last_modified": row[2]
+            }
+            for row in recent_rows
+        ]
+        # Get top 10 highest confidence clusters
+        top_confidence_rows = km.db.fetch_all(
+            """
+            SELECT id, name, confidence
+            FROM knowledge_clusters
+            WHERE confidence IS NOT NULL
+            ORDER BY confidence DESC
+            LIMIT 10
+            """
+        )
+        top_confidence = [
+            {
+                "id": row[0],
+                "name": row[1],
+                "confidence": row[2]
+            }
+            for row in top_confidence_rows
+        ]
+        # Get top 10 hottest clusters
+        top_hotness_rows = km.db.fetch_all(
+            """
+            SELECT id, name, hotness
+            FROM knowledge_clusters
+            WHERE hotness IS NOT NULL
+            ORDER BY hotness DESC
+            LIMIT 10
+            """
+        )
+        top_hotness = [
+            {
+                "id": row[0],
+                "name": row[1],
+                "hotness": row[2]
+            }
+            for row in top_hotness_rows
+        ]
+        # Timeline data (clusters created per day for last 30 days)
+        timeline_rows = km.db.fetch_all(
+            """
+            SELECT
+                CAST(create_time AS DATE) as date,
+                COUNT(*) as count
+            FROM knowledge_clusters
+            WHERE create_time >= current_date - INTERVAL '30 days'
+            GROUP BY CAST(create_time AS DATE)
+            ORDER BY date ASC
+            """
+        )
+        timeline = [
+            {
+                "date": str(row[0]),
+                "count": row[1]
+            }
+            for row in timeline_rows
+        ]
+        return {
+            "success": True,
+            "data": {
+                    "overview": {
+                        "total_clusters": custom_stats.get('total_clusters', 0),
+                        "avg_confidence": custom_stats.get('average_confidence', 0),
+                    },
+                    "lifecycle_distribution": lifecycle_dist,
+                    "abstraction_level_distribution": abstraction_dist,
+                    "confidence_stats": confidence_stats,
+                    "hotness_stats": hotness_stats,
+                    "recent_clusters": recent_clusters,
+                    "top_confidence_clusters": top_confidence,
+                    "top_hotness_clusters": top_hotness,
+                    "timeline": timeline,
+                }
+            }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/patterns")
+async def get_top_patterns(limit: int = 20):
+    """
+    Get most common patterns across all clusters
+    Query params:
+        limit: Number of top patterns to return
+    """
+    try:
+        # Fetch all patterns and count occurrences
+        rows = km.db.fetch_all("SELECT patterns FROM knowledge_clusters WHERE patterns IS NOT NULL")
+        import json
+        from collections import Counter
+        pattern_counter = Counter()
+        for row in rows:
+            patterns_json = row[0]
+            if patterns_json:
+                patterns = json.loads(patterns_json)
+                pattern_counter.update(patterns)
+        top_patterns = [
+            {"pattern": pattern, "count": count}
+            for pattern, count in pattern_counter.most_common(limit)
+        ]
+        return {
+            "success": True,
+                "count": len(top_patterns),
+                "data": top_patterns
+            }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/graph")
+async def get_knowledge_graph():
+    """
+    Get knowledge graph data (nodes and edges)
+    Returns clusters as nodes and related_clusters as edges
+    """
+    try:
+        # Get all clusters
+        rows = km.db.fetch_all(
+            "SELECT id, name, confidence, hotness, lifecycle, abstraction_level, related_clusters FROM knowledge_clusters"
+        )
+        import json
+        nodes = []
+        edges = []
+        for row in rows:
+            cluster_id, name, confidence, hotness, lifecycle, abstraction_level, related_clusters_json = row
+            # Add node
+            nodes.append({
+                "id": cluster_id,
+                "name": name,
+                "confidence": confidence,
+                "hotness": hotness,
+                "lifecycle": lifecycle,
+                "abstraction_level": abstraction_level,
+            })
+            # Add edges
+            if related_clusters_json:
+                related_clusters = json.loads(related_clusters_json)
+                for rc in related_clusters:
+                    edges.append({
+                        "source": cluster_id,
+                        "target": rc["target_cluster_id"],
+                        "weight": rc["weight"],
+                        "type": rc["source"]
+                    })
+        return {
+            "success": True,
+                "data": {
+                    "nodes": nodes,
+                    "edges": edges
+                }
+            }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.delete("/clusters/{cluster_id}")
+async def delete_cluster(cluster_id: str):
+    """Delete a knowledge cluster"""
+    try:
+        success = await km.remove(cluster_id)
+        if not success:
+            raise HTTPException(status_code=404, detail="Cluster not found")
+        return {
+            "success": True,
+                "message": f"Cluster {cluster_id} deleted successfully"
+            }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.delete("/clusters")
+async def clear_all_clusters():
+    """Clear all knowledge clusters (use with caution!)"""
+    try:
+        success = await km.clear()
+        return {
+            "success": success,
+            "message": "All clusters cleared successfully"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

sirchmunk/api/main.py ADDED Viewed

@@ -0,0 +1,120 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+Main FastAPI application for Sirchmunk API
+Combines all API modules and provides centralized configuration
+"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import uvicorn
+# Import all API routers
+from .knowledge import router as knowledge_router
+from .settings import router as settings_router
+from .history import router as history_router, dashboard_router
+from .chat import router as chat_router
+from .monitor import router as monitor_router
+from .search import router as search_router
+# Create FastAPI application
+app = FastAPI(
+    title="Sirchmunk API",
+    description="APIs for Sirchmunk",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify actual origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include all API routers
+app.include_router(knowledge_router)
+app.include_router(settings_router)
+app.include_router(history_router)
+app.include_router(dashboard_router)
+app.include_router(chat_router)
+app.include_router(monitor_router)
+app.include_router(search_router)
+@app.get("/")
+async def root():
+    """Root endpoint with API information"""
+    return {
+        "name": "Sirchmunk API",
+        "version": "1.0.0",
+        "description": "APIs for Sirchmunk",
+        "status": "running",
+        "endpoints": {
+            "search": "/api/v1/search",
+            "knowledge": "/api/v1/knowledge",
+            "settings": "/api/v1/settings",
+            "history": "/api/v1/history",
+            "chat": "/api/v1/chat",
+            "monitor": "/api/v1/monitor"
+        },
+        "documentation": {
+            "swagger": "/docs",
+            "redoc": "/redoc"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "timestamp": "2024-01-13T17:30:00Z",
+        "services": {
+            "api": "running",
+            "database": "connected",
+            "llm": "available",
+            "embedding": "available"
+        }
+    }
+@app.exception_handler(404)
+async def not_found_handler(request, exc):
+    """Custom 404 handler"""
+    return JSONResponse(
+        status_code=404,
+        content={
+            "success": False,
+            "error": {
+                "code": "NOT_FOUND",
+                "message": "The requested resource was not found",
+                "path": str(request.url.path)
+            }
+        }
+    )
+@app.exception_handler(500)
+async def internal_error_handler(request, exc):
+    """Custom 500 handler"""
+    return JSONResponse(
+        status_code=500,
+        content={
+            "success": False,
+            "error": {
+                "code": "INTERNAL_ERROR",
+                "message": "An internal server error occurred",
+                "details": "Please try again later or contact support"
+            }
+        }
+    )
+if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8584,
+        reload=True,
+        log_level="info"
+    )

sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

sirchmunk 0.0.1.post1py3-none-any.whl → 0.0.2py3-none-any.whl