PyPI - cosma-backend - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cosma-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

cosma_backend/__init__.py +14 -0
cosma_backend/__main__.py +4 -0
cosma_backend/api/__init__.py +29 -0
cosma_backend/api/files.py +154 -0
cosma_backend/api/index.py +114 -0
cosma_backend/api/models.py +28 -0
cosma_backend/api/search.py +166 -0
cosma_backend/api/status.py +28 -0
cosma_backend/api/updates.py +67 -0
cosma_backend/api/watch.py +156 -0
cosma_backend/app.py +192 -0
cosma_backend/db/__init__.py +2 -0
cosma_backend/db/database.py +638 -0
cosma_backend/discoverer/__init__.py +1 -0
cosma_backend/discoverer/discoverer.py +34 -0
cosma_backend/embedder/__init__.py +1 -0
cosma_backend/embedder/embedder.py +637 -0
cosma_backend/logging.py +73 -0
cosma_backend/models/__init__.py +3 -0
cosma_backend/models/file.py +169 -0
cosma_backend/models/status.py +10 -0
cosma_backend/models/update.py +202 -0
cosma_backend/models/watch.py +132 -0
cosma_backend/pipeline/__init__.py +2 -0
cosma_backend/pipeline/pipeline.py +222 -0
cosma_backend/schema.sql +319 -0
cosma_backend/searcher/__init__.py +1 -0
cosma_backend/searcher/searcher.py +397 -0
cosma_backend/summarizer/__init__.py +44 -0
cosma_backend/summarizer/summarizer.py +1075 -0
cosma_backend/utils/bundled.py +24 -0
cosma_backend/utils/pubsub.py +31 -0
cosma_backend/utils/sse.py +92 -0
cosma_backend/watcher/__init__.py +1 -0
cosma_backend/watcher/awatchdog.py +80 -0
cosma_backend/watcher/watcher.py +257 -0
cosma_backend-0.1.0.dist-info/METADATA +23 -0
cosma_backend-0.1.0.dist-info/RECORD +39 -0
cosma_backend-0.1.0.dist-info/WHEEL +4 -0

cosma_backend/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .app import app as app
+from .app import run as run
+def serve():
+    import uvicorn
+    uvicorn.run(
+        app, host="127.0.0.1",
+        port=8080,
+        log_level="info",
+        # I can't find a way to gracefully shut down SSE connections,
+        # so this bullshit will have to do for now
+        timeout_graceful_shutdown=5
+    )

cosma_backend/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from . import serve
+if __name__ == "__main__":
+    serve()

cosma_backend/api/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""
+API Blueprint Module
+This module initializes and combines all API blueprints for the application.
+All API routes will be prefixed with /api when registered in app.py
+"""
+from quart import Blueprint
+from .files import files_bp
+from .index import index_bp
+from .search import search_bp
+from .watch import watch_bp
+from .updates import updates_bp
+from .status import status_bp
+# Create the main API blueprint
+api_blueprint = Blueprint('api', __name__)
+# Register sub-blueprints
+api_blueprint.register_blueprint(files_bp, url_prefix='/files')
+api_blueprint.register_blueprint(index_bp, url_prefix='/index')
+api_blueprint.register_blueprint(search_bp, url_prefix='/search')
+api_blueprint.register_blueprint(watch_bp, url_prefix='/watch')
+api_blueprint.register_blueprint(updates_bp, url_prefix='/updates')
+api_blueprint.register_blueprint(status_bp, url_prefix='/status')
+__all__ = ['api_blueprint']

cosma_backend/api/files.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""
+Files API Blueprint
+Handles endpoints related to file operations and retrieval.
+"""
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from quart import Blueprint, current_app
+from quart_schema import validate_request, validate_response
+if TYPE_CHECKING:
+    from backend.app import app as current_app
+files_bp = Blueprint('files', __name__)
+@dataclass
+class FileResponse:
+    """Response model for a single file"""
+    id: int
+    filename: str
+    extension: str
+    created: str
+    modified: str
+    summary: str
+    keywords: list[str] | None
+@dataclass
+class FilesListResponse:
+    """Response model for list of files"""
+    files: list[FileResponse]
+    total: int
+    page: int
+    per_page: int
+# @files_bp.get("/")  # type: ignore[return-value]
+# @validate_response(FilesListResponse, 200)
+# async def list_files() -> tuple[FilesListResponse, int]:
+#     """
+#     Get a list of all indexed files.
+#
+#     GET /api/files/
+#
+#     Query parameters:
+#         page: Page number (default: 1)
+#         per_page: Items per page (default: 50)
+#         extension: Filter by file extension
+#
+#     Returns:
+#         200: List of files
+#     """
+#     # TODO: Implement file listing with pagination
+#     # 1. Get query parameters
+#     # 2. Query database with filters and pagination
+#     # 3. Return formatted response
+#
+#     return FilesListResponse(
+#         files=[],
+#         total=0,
+#         page=1,
+#         per_page=50
+#     ), 200
+@files_bp.get("/<int:file_id>")  # type: ignore[return-value]
+@validate_response(FileResponse, 200)
+async def get_file(file_id: int) -> tuple[FileResponse, int]:
+    """Get details of a specific file by ID"""
+    # TODO: Implement single file retrieval
+    # 1. Query database for file by ID
+    # 2. Return file details or 404
+    async with current_app.db.acquire() as conn:
+        file = await conn.fetchone(
+            "SELECT * FROM files WHERE id = ?;",
+            (file_id,)
+        )
+    if not file:
+        return {"error": "File not found"}, 404  # type: ignore
+    # TODO: Parse the file data properly
+    return FileResponse(
+        id=file['id'],
+        filename=file['filename'],
+        extension=file['extension'],
+        created=str(file['created']),
+        modified=str(file['modified']),
+        summary=file['summary'],
+        keywords=None  # TODO: Parse keywords from database
+    ), 200
+@dataclass
+class DeleteFileResponse:
+    """Response for file deletion"""
+    success: bool
+    message: str
+# @files_bp.delete("/<int:file_id>")  # type: ignore[return-value]
+# @validate_response(DeleteFileResponse, 200)
+# async def delete_file(file_id: int) -> tuple[DeleteFileResponse, int]:
+#     """
+#     Delete a file from the index.
+#
+#     DELETE /api/files/{file_id}
+#
+#     Returns:
+#         200: File deleted successfully
+#         404: File not found
+#     """
+#     # TODO: Implement file deletion
+#     # 1. Check if file exists
+#     # 2. Delete from database
+#     # 3. Return success/failure
+#
+#     return DeleteFileResponse(
+#         success=True,
+#         message=f"File {file_id} deleted successfully"
+#     ), 200
+@dataclass
+class FileStatsResponse:
+    """Response for file statistics"""
+    total_files: int
+    total_size: int
+    file_types: dict[str, int]
+    last_indexed: str | None
+@files_bp.get("/stats")  # type: ignore[return-value]
+@validate_response(FileStatsResponse, 200)
+async def get_stats() -> tuple[FileStatsResponse, int]:
+    """Get statistics about indexed files"""
+    # TODO: Implement statistics gathering
+    # 1. Count total files
+    # 2. Group by extension
+    # 3. Get most recent index timestamp
+    async with current_app.db.acquire() as conn:
+        total = await conn.fetchone("SELECT COUNT(*) as count FROM files;")
+    return FileStatsResponse(
+        total_files=total['count'] if total else 0,
+        total_size=0,  # TODO: Add size tracking
+        file_types={},
+        last_indexed=None
+    ), 200

cosma_backend/api/index.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""
+Index API Blueprint
+Handles endpoints related to indexing directories and files.
+"""
+import asyncio
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+from quart import Blueprint, current_app
+from quart_schema import validate_request, validate_response
+from backend.models import File
+from backend.pipeline import Pipeline
+if TYPE_CHECKING:
+    from backend.app import app as current_app
+index_bp = Blueprint('index', __name__)
+@dataclass
+class IndexDirectoryRequest:
+    """Request body for indexing a directory"""
+    directory_path: str
+@dataclass
+class IndexDirectoryResponse:
+    """Response for directory indexing"""
+    success: bool
+    message: str
+    files_indexed: int
+@index_bp.post("/directory")  # type: ignore[return-value]
+@validate_request(IndexDirectoryRequest)
+@validate_response(IndexDirectoryResponse, 201)
+async def index_directory(data: IndexDirectoryRequest) -> tuple[IndexDirectoryResponse, int]:
+    """Index all files in a directory"""
+    current_app.submit_job(current_app.pipeline.process_directory(data.directory_path))
+    return IndexDirectoryResponse(
+        success=True,
+        message=f"Started indexing directory: {data.directory_path}",
+        files_indexed=0
+    ), 201
+@dataclass
+class IndexFileRequest:
+    """Request body for indexing a single file"""
+    file_path: str
+@dataclass
+class IndexFileResponse:
+    """Response for file indexing"""
+    success: bool
+    message: str
+    file_id: int | None
+@index_bp.post("/file")  # type: ignore[return-value]
+@validate_request(IndexFileRequest)
+@validate_response(IndexFileResponse, 201)
+async def index_file(data: IndexFileRequest) -> tuple[IndexFileResponse, int]:
+    """Index a single file"""
+    # TODO: Implement single file indexing
+    # 1. Validate file exists
+    # 2. Parse file using backend.parser
+    # 3. Summarize file using backend.summarizer
+    # 4. Insert into database using current_app.db
+    await current_app.pipeline.process_file(File.from_path(Path(data.file_path)))
+    return IndexFileResponse(
+        success=True,
+        message=f"Successfully indexed file: {data.file_path}",
+        file_id=None
+    ), 201
+@dataclass
+class IndexStatusResponse:
+    """Response for indexing status"""
+    is_indexing: bool
+    current_file: str | None
+    files_processed: int
+    total_files: int
+# @index_bp.get("/status")  # type: ignore[return-value]
+# @validate_response(IndexStatusResponse, 200)
+# async def index_status() -> tuple[IndexStatusResponse, int]:
+#     """
+#     Get the current status of any ongoing indexing operations.
+#
+#     GET /api/index/status
+#
+#     Returns:
+#         200: Current indexing status
+#     """
+#     # TODO: Implement status tracking
+#     # This could use a global state manager or database table
+#     # to track ongoing indexing operations
+#
+#     return IndexStatusResponse(
+#         is_indexing=False,
+#         current_file=None,
+#         files_processed=0,
+#         total_files=0
+#     ), 200

cosma_backend/api/models.py ADDED Viewed

@@ -0,0 +1,28 @@
+from dataclasses import dataclass
+from datetime import datetime
+@dataclass
+class FileResponse:
+    """Shared API response model for file metadata across endpoints"""
+    file_path: str
+    filename: str
+    extension: str
+    created: datetime
+    modified: datetime
+    accessed: datetime
+    title: str | None
+    summary: str | None
+@dataclass
+class JobResponse:
+    """API response model for watched directory jobs"""
+    id: int
+    path: str
+    is_active: bool
+    recursive: bool
+    file_pattern: str | None
+    last_scan: datetime | None
+    created_at: datetime | None
+    updated_at: datetime | None

cosma_backend/api/search.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+Search API Blueprint
+Handles endpoints related to searching indexed files.
+"""
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from quart import Blueprint, current_app, request
+from quart_schema import validate_request, validate_response
+from backend.api.models import FileResponse
+if TYPE_CHECKING:
+    from backend.app import app as current_app
+search_bp = Blueprint('search', __name__)
+@dataclass
+class SearchRequest:
+    """Request body for searching files"""
+    query: str
+    filters: dict[str, str] | None = None
+    limit: int = 50
+    directory: str | None = None
+@dataclass
+class SearchResultItem:
+    """A single search result"""
+    file: FileResponse
+    relevance_score: float
+@dataclass
+class SearchResponse:
+    """Response for search queries"""
+    results: list[SearchResultItem]
+@search_bp.post("/")  # type: ignore[return-value]
+@validate_request(SearchRequest)
+@validate_response(SearchResponse, 200)
+async def search(data: SearchRequest) -> tuple[SearchResponse, int]:
+    """Search for files based on query"""
+    # TODO: Implement search functionality
+    # 1. Parse query and filters
+    # 2. Search database (could use FTS if implemented)
+    # 3. Rank results by relevance
+    # 4. Return sorted results
+    results = await current_app.searcher.search(data.query, directory=data.directory)
+    return SearchResponse(
+        results=[
+            SearchResultItem(
+                r.file_metadata.to_response(), r.combined_score
+            )
+            for r in results],
+    ), 200
+@dataclass
+class KeywordSearchRequest:
+    """Request body for keyword-based search"""
+    keywords: list[str]
+    match_all: bool = False
+# @search_bp.post("/keywords")  # type: ignore[return-value]
+# @validate_request(KeywordSearchRequest)
+# @validate_response(SearchResponse, 200)
+# async def search_by_keywords(data: KeywordSearchRequest) -> tuple[SearchResponse, int]:
+#     """
+#     Search for files by keywords.
+#
+#     POST /api/search/keywords
+#
+#     Request body:
+#         {
+#             "keywords": ["python", "api", "database"],
+#             "match_all": false
+#         }
+#
+#     Returns:
+#         200: Search results matching keywords
+#     """
+#     # TODO: Implement keyword search
+#     # 1. Query files with matching keywords
+#     # 2. If match_all=true, require all keywords
+#     # 3. If match_all=false, match any keyword
+#     # 4. Rank by number of matching keywords
+#
+#     return SearchResponse(
+#         results=[],
+#         total=0,
+#         query=f"Keywords: {', '.join(data.keywords)}"
+#     ), 200
+@dataclass
+class SimilarFilesResponse:
+    """Response for similar files query"""
+    files: list[SearchResultItem]
+    total: int
+# @search_bp.get("/<int:file_id>/similar")  # type: ignore[return-value]
+# @validate_response(SimilarFilesResponse, 200)
+# async def find_similar_files(file_id: int) -> tuple[SimilarFilesResponse, int]:
+#     """
+#     Find files similar to a given file.
+#
+#     GET /api/search/{file_id}/similar
+#
+#     Query parameters:
+#         limit: Maximum number of results (default: 10)
+#
+#     Returns:
+#         200: Similar files
+#         404: Source file not found
+#     """
+#     # TODO: Implement similarity search
+#     # 1. Get the source file
+#     # 2. Compare keywords/summaries with other files
+#     # 3. Rank by similarity
+#     # 4. Return top N results
+#
+#     return SimilarFilesResponse(
+#         files=[],
+#         total=0
+#     ), 200
+@dataclass
+class AutocompleteResponse:
+    """Response for autocomplete suggestions"""
+    suggestions: list[str]
+# @search_bp.get("/autocomplete")  # type: ignore[return-value]
+# @validate_response(AutocompleteResponse, 200)
+# async def autocomplete() -> tuple[AutocompleteResponse, int]:
+#     """
+#     Get autocomplete suggestions for search queries.
+#
+#     GET /api/search/autocomplete?q=py
+#
+#     Query parameters:
+#         q: Partial query string
+#         limit: Maximum suggestions (default: 10)
+#
+#     Returns:
+#         200: List of suggestions
+#     """
+#     # TODO: Implement autocomplete
+#     # 1. Get partial query from request args
+#     # 2. Search for matching filenames, keywords, or common terms
+#     # 3. Return suggestions
+#
+#     query = request.args.get('q', '')
+#
+#     return AutocompleteResponse(
+#         suggestions=[]
+#     ), 200

cosma_backend/api/status.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""
+Status API Blueprint
+Handles endpoints related to app status.
+"""
+import asyncio
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+from quart import Blueprint, abort, current_app, make_response, request, stream_with_context
+from quart_schema import validate_request, validate_response
+from backend.utils.pubsub import subscribe
+if TYPE_CHECKING:
+    from backend.app import app as current_app
+status_bp = Blueprint('status', __name__)
+@status_bp.get("/")  # type: ignore[return-value]
+async def status():
+    """Get current application status and active jobs count"""
+    return {
+        "jobs": len(current_app.jobs),
+    }

cosma_backend/api/updates.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""
+Updates API Blueprint
+Handles endpoints related to streaming updates.
+"""
+import asyncio
+from dataclasses import dataclass
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+from quart import Blueprint, abort, current_app, make_response, request, stream_with_context
+from quart_schema import validate_request, validate_response
+from backend.logging import sm
+from backend.models.update import UpdateOpcode
+from backend.utils.pubsub import subscribe
+from backend.utils.sse import ServerSentEvent, sse_comment
+if TYPE_CHECKING:
+    from backend.app import app as current_app
+updates_bp = Blueprint('updates', __name__)
+logger = logging.getLogger(__name__)
+@updates_bp.get("/")  # type: ignore[return-value]
+async def updates():
+    """Stream real-time updates via Server-Sent Events"""
+    if "text/event-stream" not in request.accept_mimetypes:
+        abort(400)
+    @stream_with_context
+    async def updates_generator():
+        # Keep-alive interval: send a comment if no updates for 15 seconds
+        # This prevents proxy/browser timeouts and helps detect dead connections
+        KEEPALIVE_INTERVAL = 15.0
+        with subscribe(current_app.updates_hub) as queue:
+            while True:
+                try:
+                    # Wait for an update with timeout
+                    update = await asyncio.wait_for(queue.get(), timeout=KEEPALIVE_INTERVAL)
+                    if update.opcode is UpdateOpcode.SHUTTING_DOWN:
+                        print(update.to_sse().encode())
+                        yield update.to_sse().encode()
+                        return  # close connection
+                    yield update.to_sse().encode()
+                except asyncio.TimeoutError:
+                    # No updates received within the keepalive interval
+                    # Send a keep-alive comment (SSE spec: lines starting with : are comments)
+                    yield sse_comment("keepalive")
+    response = await make_response(
+        updates_generator(),
+        {
+            'Content-Type': 'text/event-stream',
+            'Cache-Control': 'no-cache',
+            'Transfer-Encoding': 'chunked',
+        },
+    )
+    response.timeout = None  # type: ignore[assignment]
+    return response