cosma-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. cosma_backend/__init__.py +14 -0
  2. cosma_backend/__main__.py +4 -0
  3. cosma_backend/api/__init__.py +29 -0
  4. cosma_backend/api/files.py +154 -0
  5. cosma_backend/api/index.py +114 -0
  6. cosma_backend/api/models.py +28 -0
  7. cosma_backend/api/search.py +166 -0
  8. cosma_backend/api/status.py +28 -0
  9. cosma_backend/api/updates.py +67 -0
  10. cosma_backend/api/watch.py +156 -0
  11. cosma_backend/app.py +192 -0
  12. cosma_backend/db/__init__.py +2 -0
  13. cosma_backend/db/database.py +638 -0
  14. cosma_backend/discoverer/__init__.py +1 -0
  15. cosma_backend/discoverer/discoverer.py +34 -0
  16. cosma_backend/embedder/__init__.py +1 -0
  17. cosma_backend/embedder/embedder.py +637 -0
  18. cosma_backend/logging.py +73 -0
  19. cosma_backend/models/__init__.py +3 -0
  20. cosma_backend/models/file.py +169 -0
  21. cosma_backend/models/status.py +10 -0
  22. cosma_backend/models/update.py +202 -0
  23. cosma_backend/models/watch.py +132 -0
  24. cosma_backend/pipeline/__init__.py +2 -0
  25. cosma_backend/pipeline/pipeline.py +222 -0
  26. cosma_backend/schema.sql +319 -0
  27. cosma_backend/searcher/__init__.py +1 -0
  28. cosma_backend/searcher/searcher.py +397 -0
  29. cosma_backend/summarizer/__init__.py +44 -0
  30. cosma_backend/summarizer/summarizer.py +1075 -0
  31. cosma_backend/utils/bundled.py +24 -0
  32. cosma_backend/utils/pubsub.py +31 -0
  33. cosma_backend/utils/sse.py +92 -0
  34. cosma_backend/watcher/__init__.py +1 -0
  35. cosma_backend/watcher/awatchdog.py +80 -0
  36. cosma_backend/watcher/watcher.py +257 -0
  37. cosma_backend-0.1.0.dist-info/METADATA +23 -0
  38. cosma_backend-0.1.0.dist-info/RECORD +39 -0
  39. cosma_backend-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,14 @@
1
+ from .app import app as app
2
+ from .app import run as run
3
+
4
+ def serve():
5
+ import uvicorn
6
+
7
+ uvicorn.run(
8
+ app, host="127.0.0.1",
9
+ port=8080,
10
+ log_level="info",
11
+ # I can't find a way to gracefully shut down SSE connections,
12
+ # so this bullshit will have to do for now
13
+ timeout_graceful_shutdown=5
14
+ )
@@ -0,0 +1,4 @@
1
+ from . import serve
2
+
3
+ if __name__ == "__main__":
4
+ serve()
@@ -0,0 +1,29 @@
1
+ """
2
+ API Blueprint Module
3
+
4
+ This module initializes and combines all API blueprints for the application.
5
+ All API routes will be prefixed with /api when registered in app.py
6
+ """
7
+
8
+ from quart import Blueprint
9
+
10
+ from .files import files_bp
11
+ from .index import index_bp
12
+ from .search import search_bp
13
+ from .watch import watch_bp
14
+ from .updates import updates_bp
15
+ from .status import status_bp
16
+
17
+ # Create the main API blueprint
18
+ api_blueprint = Blueprint('api', __name__)
19
+
20
+ # Register sub-blueprints
21
+ api_blueprint.register_blueprint(files_bp, url_prefix='/files')
22
+ api_blueprint.register_blueprint(index_bp, url_prefix='/index')
23
+ api_blueprint.register_blueprint(search_bp, url_prefix='/search')
24
+ api_blueprint.register_blueprint(watch_bp, url_prefix='/watch')
25
+ api_blueprint.register_blueprint(updates_bp, url_prefix='/updates')
26
+ api_blueprint.register_blueprint(status_bp, url_prefix='/status')
27
+
28
+
29
+ __all__ = ['api_blueprint']
@@ -0,0 +1,154 @@
1
+ """
2
+ Files API Blueprint
3
+
4
+ Handles endpoints related to file operations and retrieval.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING
9
+
10
+ from quart import Blueprint, current_app
11
+ from quart_schema import validate_request, validate_response
12
+
13
+ if TYPE_CHECKING:
14
+ from backend.app import app as current_app
15
+
16
+ files_bp = Blueprint('files', __name__)
17
+
18
+
19
+ @dataclass
20
+ class FileResponse:
21
+ """Response model for a single file"""
22
+ id: int
23
+ filename: str
24
+ extension: str
25
+ created: str
26
+ modified: str
27
+ summary: str
28
+ keywords: list[str] | None
29
+
30
+
31
+ @dataclass
32
+ class FilesListResponse:
33
+ """Response model for list of files"""
34
+ files: list[FileResponse]
35
+ total: int
36
+ page: int
37
+ per_page: int
38
+
39
+
40
+ # @files_bp.get("/") # type: ignore[return-value]
41
+ # @validate_response(FilesListResponse, 200)
42
+ # async def list_files() -> tuple[FilesListResponse, int]:
43
+ # """
44
+ # Get a list of all indexed files.
45
+ #
46
+ # GET /api/files/
47
+ #
48
+ # Query parameters:
49
+ # page: Page number (default: 1)
50
+ # per_page: Items per page (default: 50)
51
+ # extension: Filter by file extension
52
+ #
53
+ # Returns:
54
+ # 200: List of files
55
+ # """
56
+ # # TODO: Implement file listing with pagination
57
+ # # 1. Get query parameters
58
+ # # 2. Query database with filters and pagination
59
+ # # 3. Return formatted response
60
+ #
61
+ # return FilesListResponse(
62
+ # files=[],
63
+ # total=0,
64
+ # page=1,
65
+ # per_page=50
66
+ # ), 200
67
+
68
+
69
+ @files_bp.get("/<int:file_id>") # type: ignore[return-value]
70
+ @validate_response(FileResponse, 200)
71
+ async def get_file(file_id: int) -> tuple[FileResponse, int]:
72
+ """Get details of a specific file by ID"""
73
+ # TODO: Implement single file retrieval
74
+ # 1. Query database for file by ID
75
+ # 2. Return file details or 404
76
+
77
+ async with current_app.db.acquire() as conn:
78
+ file = await conn.fetchone(
79
+ "SELECT * FROM files WHERE id = ?;",
80
+ (file_id,)
81
+ )
82
+
83
+ if not file:
84
+ return {"error": "File not found"}, 404 # type: ignore
85
+
86
+ # TODO: Parse the file data properly
87
+ return FileResponse(
88
+ id=file['id'],
89
+ filename=file['filename'],
90
+ extension=file['extension'],
91
+ created=str(file['created']),
92
+ modified=str(file['modified']),
93
+ summary=file['summary'],
94
+ keywords=None # TODO: Parse keywords from database
95
+ ), 200
96
+
97
+
98
+ @dataclass
99
+ class DeleteFileResponse:
100
+ """Response for file deletion"""
101
+ success: bool
102
+ message: str
103
+
104
+
105
+ # @files_bp.delete("/<int:file_id>") # type: ignore[return-value]
106
+ # @validate_response(DeleteFileResponse, 200)
107
+ # async def delete_file(file_id: int) -> tuple[DeleteFileResponse, int]:
108
+ # """
109
+ # Delete a file from the index.
110
+ #
111
+ # DELETE /api/files/{file_id}
112
+ #
113
+ # Returns:
114
+ # 200: File deleted successfully
115
+ # 404: File not found
116
+ # """
117
+ # # TODO: Implement file deletion
118
+ # # 1. Check if file exists
119
+ # # 2. Delete from database
120
+ # # 3. Return success/failure
121
+ #
122
+ # return DeleteFileResponse(
123
+ # success=True,
124
+ # message=f"File {file_id} deleted successfully"
125
+ # ), 200
126
+
127
+
128
+ @dataclass
129
+ class FileStatsResponse:
130
+ """Response for file statistics"""
131
+ total_files: int
132
+ total_size: int
133
+ file_types: dict[str, int]
134
+ last_indexed: str | None
135
+
136
+
137
+ @files_bp.get("/stats") # type: ignore[return-value]
138
+ @validate_response(FileStatsResponse, 200)
139
+ async def get_stats() -> tuple[FileStatsResponse, int]:
140
+ """Get statistics about indexed files"""
141
+ # TODO: Implement statistics gathering
142
+ # 1. Count total files
143
+ # 2. Group by extension
144
+ # 3. Get most recent index timestamp
145
+
146
+ async with current_app.db.acquire() as conn:
147
+ total = await conn.fetchone("SELECT COUNT(*) as count FROM files;")
148
+
149
+ return FileStatsResponse(
150
+ total_files=total['count'] if total else 0,
151
+ total_size=0, # TODO: Add size tracking
152
+ file_types={},
153
+ last_indexed=None
154
+ ), 200
@@ -0,0 +1,114 @@
1
+ """
2
+ Index API Blueprint
3
+
4
+ Handles endpoints related to indexing directories and files.
5
+ """
6
+
7
+ import asyncio
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ from quart import Blueprint, current_app
13
+ from quart_schema import validate_request, validate_response
14
+
15
+ from backend.models import File
16
+ from backend.pipeline import Pipeline
17
+
18
+ if TYPE_CHECKING:
19
+ from backend.app import app as current_app
20
+
21
+ index_bp = Blueprint('index', __name__)
22
+
23
+
24
+ @dataclass
25
+ class IndexDirectoryRequest:
26
+ """Request body for indexing a directory"""
27
+ directory_path: str
28
+
29
+
30
+ @dataclass
31
+ class IndexDirectoryResponse:
32
+ """Response for directory indexing"""
33
+ success: bool
34
+ message: str
35
+ files_indexed: int
36
+
37
+
38
+ @index_bp.post("/directory") # type: ignore[return-value]
39
+ @validate_request(IndexDirectoryRequest)
40
+ @validate_response(IndexDirectoryResponse, 201)
41
+ async def index_directory(data: IndexDirectoryRequest) -> tuple[IndexDirectoryResponse, int]:
42
+ """Index all files in a directory"""
43
+ current_app.submit_job(current_app.pipeline.process_directory(data.directory_path))
44
+
45
+ return IndexDirectoryResponse(
46
+ success=True,
47
+ message=f"Started indexing directory: {data.directory_path}",
48
+ files_indexed=0
49
+ ), 201
50
+
51
+
52
+ @dataclass
53
+ class IndexFileRequest:
54
+ """Request body for indexing a single file"""
55
+ file_path: str
56
+
57
+
58
+ @dataclass
59
+ class IndexFileResponse:
60
+ """Response for file indexing"""
61
+ success: bool
62
+ message: str
63
+ file_id: int | None
64
+
65
+
66
+ @index_bp.post("/file") # type: ignore[return-value]
67
+ @validate_request(IndexFileRequest)
68
+ @validate_response(IndexFileResponse, 201)
69
+ async def index_file(data: IndexFileRequest) -> tuple[IndexFileResponse, int]:
70
+ """Index a single file"""
71
+ # TODO: Implement single file indexing
72
+ # 1. Validate file exists
73
+ # 2. Parse file using backend.parser
74
+ # 3. Summarize file using backend.summarizer
75
+ # 4. Insert into database using current_app.db
76
+ await current_app.pipeline.process_file(File.from_path(Path(data.file_path)))
77
+
78
+ return IndexFileResponse(
79
+ success=True,
80
+ message=f"Successfully indexed file: {data.file_path}",
81
+ file_id=None
82
+ ), 201
83
+
84
+
85
+ @dataclass
86
+ class IndexStatusResponse:
87
+ """Response for indexing status"""
88
+ is_indexing: bool
89
+ current_file: str | None
90
+ files_processed: int
91
+ total_files: int
92
+
93
+
94
+ # @index_bp.get("/status") # type: ignore[return-value]
95
+ # @validate_response(IndexStatusResponse, 200)
96
+ # async def index_status() -> tuple[IndexStatusResponse, int]:
97
+ # """
98
+ # Get the current status of any ongoing indexing operations.
99
+ #
100
+ # GET /api/index/status
101
+ #
102
+ # Returns:
103
+ # 200: Current indexing status
104
+ # """
105
+ # # TODO: Implement status tracking
106
+ # # This could use a global state manager or database table
107
+ # # to track ongoing indexing operations
108
+ #
109
+ # return IndexStatusResponse(
110
+ # is_indexing=False,
111
+ # current_file=None,
112
+ # files_processed=0,
113
+ # total_files=0
114
+ # ), 200
@@ -0,0 +1,28 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+
4
+
5
+ @dataclass
6
+ class FileResponse:
7
+ """Shared API response model for file metadata across endpoints"""
8
+ file_path: str
9
+ filename: str
10
+ extension: str
11
+ created: datetime
12
+ modified: datetime
13
+ accessed: datetime
14
+ title: str | None
15
+ summary: str | None
16
+
17
+
18
+ @dataclass
19
+ class JobResponse:
20
+ """API response model for watched directory jobs"""
21
+ id: int
22
+ path: str
23
+ is_active: bool
24
+ recursive: bool
25
+ file_pattern: str | None
26
+ last_scan: datetime | None
27
+ created_at: datetime | None
28
+ updated_at: datetime | None
@@ -0,0 +1,166 @@
1
+ """
2
+ Search API Blueprint
3
+
4
+ Handles endpoints related to searching indexed files.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING
9
+
10
+ from quart import Blueprint, current_app, request
11
+ from quart_schema import validate_request, validate_response
12
+
13
+ from backend.api.models import FileResponse
14
+
15
+ if TYPE_CHECKING:
16
+ from backend.app import app as current_app
17
+
18
+ search_bp = Blueprint('search', __name__)
19
+
20
+
21
+ @dataclass
22
+ class SearchRequest:
23
+ """Request body for searching files"""
24
+ query: str
25
+ filters: dict[str, str] | None = None
26
+ limit: int = 50
27
+ directory: str | None = None
28
+
29
+
30
+ @dataclass
31
+ class SearchResultItem:
32
+ """A single search result"""
33
+ file: FileResponse
34
+ relevance_score: float
35
+
36
+
37
+ @dataclass
38
+ class SearchResponse:
39
+ """Response for search queries"""
40
+ results: list[SearchResultItem]
41
+
42
+
43
+ @search_bp.post("/") # type: ignore[return-value]
44
+ @validate_request(SearchRequest)
45
+ @validate_response(SearchResponse, 200)
46
+ async def search(data: SearchRequest) -> tuple[SearchResponse, int]:
47
+ """Search for files based on query"""
48
+ # TODO: Implement search functionality
49
+ # 1. Parse query and filters
50
+ # 2. Search database (could use FTS if implemented)
51
+ # 3. Rank results by relevance
52
+ # 4. Return sorted results
53
+ results = await current_app.searcher.search(data.query, directory=data.directory)
54
+
55
+ return SearchResponse(
56
+ results=[
57
+ SearchResultItem(
58
+ r.file_metadata.to_response(), r.combined_score
59
+ )
60
+ for r in results],
61
+ ), 200
62
+
63
+
64
+ @dataclass
65
+ class KeywordSearchRequest:
66
+ """Request body for keyword-based search"""
67
+ keywords: list[str]
68
+ match_all: bool = False
69
+
70
+
71
+ # @search_bp.post("/keywords") # type: ignore[return-value]
72
+ # @validate_request(KeywordSearchRequest)
73
+ # @validate_response(SearchResponse, 200)
74
+ # async def search_by_keywords(data: KeywordSearchRequest) -> tuple[SearchResponse, int]:
75
+ # """
76
+ # Search for files by keywords.
77
+ #
78
+ # POST /api/search/keywords
79
+ #
80
+ # Request body:
81
+ # {
82
+ # "keywords": ["python", "api", "database"],
83
+ # "match_all": false
84
+ # }
85
+ #
86
+ # Returns:
87
+ # 200: Search results matching keywords
88
+ # """
89
+ # # TODO: Implement keyword search
90
+ # # 1. Query files with matching keywords
91
+ # # 2. If match_all=true, require all keywords
92
+ # # 3. If match_all=false, match any keyword
93
+ # # 4. Rank by number of matching keywords
94
+ #
95
+ # return SearchResponse(
96
+ # results=[],
97
+ # total=0,
98
+ # query=f"Keywords: {', '.join(data.keywords)}"
99
+ # ), 200
100
+
101
+
102
+ @dataclass
103
+ class SimilarFilesResponse:
104
+ """Response for similar files query"""
105
+ files: list[SearchResultItem]
106
+ total: int
107
+
108
+
109
+ # @search_bp.get("/<int:file_id>/similar") # type: ignore[return-value]
110
+ # @validate_response(SimilarFilesResponse, 200)
111
+ # async def find_similar_files(file_id: int) -> tuple[SimilarFilesResponse, int]:
112
+ # """
113
+ # Find files similar to a given file.
114
+ #
115
+ # GET /api/search/{file_id}/similar
116
+ #
117
+ # Query parameters:
118
+ # limit: Maximum number of results (default: 10)
119
+ #
120
+ # Returns:
121
+ # 200: Similar files
122
+ # 404: Source file not found
123
+ # """
124
+ # # TODO: Implement similarity search
125
+ # # 1. Get the source file
126
+ # # 2. Compare keywords/summaries with other files
127
+ # # 3. Rank by similarity
128
+ # # 4. Return top N results
129
+ #
130
+ # return SimilarFilesResponse(
131
+ # files=[],
132
+ # total=0
133
+ # ), 200
134
+
135
+
136
+ @dataclass
137
+ class AutocompleteResponse:
138
+ """Response for autocomplete suggestions"""
139
+ suggestions: list[str]
140
+
141
+
142
+ # @search_bp.get("/autocomplete") # type: ignore[return-value]
143
+ # @validate_response(AutocompleteResponse, 200)
144
+ # async def autocomplete() -> tuple[AutocompleteResponse, int]:
145
+ # """
146
+ # Get autocomplete suggestions for search queries.
147
+ #
148
+ # GET /api/search/autocomplete?q=py
149
+ #
150
+ # Query parameters:
151
+ # q: Partial query string
152
+ # limit: Maximum suggestions (default: 10)
153
+ #
154
+ # Returns:
155
+ # 200: List of suggestions
156
+ # """
157
+ # # TODO: Implement autocomplete
158
+ # # 1. Get partial query from request args
159
+ # # 2. Search for matching filenames, keywords, or common terms
160
+ # # 3. Return suggestions
161
+ #
162
+ # query = request.args.get('q', '')
163
+ #
164
+ # return AutocompleteResponse(
165
+ # suggestions=[]
166
+ # ), 200
@@ -0,0 +1,28 @@
1
+ """
2
+ Status API Blueprint
3
+
4
+ Handles endpoints related to app status.
5
+ """
6
+
7
+ import asyncio
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ from quart import Blueprint, abort, current_app, make_response, request, stream_with_context
13
+ from quart_schema import validate_request, validate_response
14
+
15
+ from backend.utils.pubsub import subscribe
16
+
17
+ if TYPE_CHECKING:
18
+ from backend.app import app as current_app
19
+
20
+ status_bp = Blueprint('status', __name__)
21
+
22
+
23
+ @status_bp.get("/") # type: ignore[return-value]
24
+ async def status():
25
+ """Get current application status and active jobs count"""
26
+ return {
27
+ "jobs": len(current_app.jobs),
28
+ }
@@ -0,0 +1,67 @@
1
+ """
2
+ Updates API Blueprint
3
+
4
+ Handles endpoints related to streaming updates.
5
+ """
6
+
7
+ import asyncio
8
+ from dataclasses import dataclass
9
+ import logging
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ from quart import Blueprint, abort, current_app, make_response, request, stream_with_context
14
+ from quart_schema import validate_request, validate_response
15
+
16
+ from backend.logging import sm
17
+ from backend.models.update import UpdateOpcode
18
+ from backend.utils.pubsub import subscribe
19
+ from backend.utils.sse import ServerSentEvent, sse_comment
20
+
21
+ if TYPE_CHECKING:
22
+ from backend.app import app as current_app
23
+
24
+ updates_bp = Blueprint('updates', __name__)
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ @updates_bp.get("/") # type: ignore[return-value]
30
+ async def updates():
31
+ """Stream real-time updates via Server-Sent Events"""
32
+ if "text/event-stream" not in request.accept_mimetypes:
33
+ abort(400)
34
+
35
+ @stream_with_context
36
+ async def updates_generator():
37
+ # Keep-alive interval: send a comment if no updates for 15 seconds
38
+ # This prevents proxy/browser timeouts and helps detect dead connections
39
+ KEEPALIVE_INTERVAL = 15.0
40
+
41
+ with subscribe(current_app.updates_hub) as queue:
42
+ while True:
43
+ try:
44
+ # Wait for an update with timeout
45
+ update = await asyncio.wait_for(queue.get(), timeout=KEEPALIVE_INTERVAL)
46
+
47
+ if update.opcode is UpdateOpcode.SHUTTING_DOWN:
48
+ print(update.to_sse().encode())
49
+ yield update.to_sse().encode()
50
+ return # close connection
51
+
52
+ yield update.to_sse().encode()
53
+ except asyncio.TimeoutError:
54
+ # No updates received within the keepalive interval
55
+ # Send a keep-alive comment (SSE spec: lines starting with : are comments)
56
+ yield sse_comment("keepalive")
57
+
58
+ response = await make_response(
59
+ updates_generator(),
60
+ {
61
+ 'Content-Type': 'text/event-stream',
62
+ 'Cache-Control': 'no-cache',
63
+ 'Transfer-Encoding': 'chunked',
64
+ },
65
+ )
66
+ response.timeout = None # type: ignore[assignment]
67
+ return response