cosma-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. cosma_backend/__init__.py +14 -0
  2. cosma_backend/__main__.py +4 -0
  3. cosma_backend/api/__init__.py +29 -0
  4. cosma_backend/api/files.py +154 -0
  5. cosma_backend/api/index.py +114 -0
  6. cosma_backend/api/models.py +28 -0
  7. cosma_backend/api/search.py +166 -0
  8. cosma_backend/api/status.py +28 -0
  9. cosma_backend/api/updates.py +67 -0
  10. cosma_backend/api/watch.py +156 -0
  11. cosma_backend/app.py +192 -0
  12. cosma_backend/db/__init__.py +2 -0
  13. cosma_backend/db/database.py +638 -0
  14. cosma_backend/discoverer/__init__.py +1 -0
  15. cosma_backend/discoverer/discoverer.py +34 -0
  16. cosma_backend/embedder/__init__.py +1 -0
  17. cosma_backend/embedder/embedder.py +637 -0
  18. cosma_backend/logging.py +73 -0
  19. cosma_backend/models/__init__.py +3 -0
  20. cosma_backend/models/file.py +169 -0
  21. cosma_backend/models/status.py +10 -0
  22. cosma_backend/models/update.py +202 -0
  23. cosma_backend/models/watch.py +132 -0
  24. cosma_backend/pipeline/__init__.py +2 -0
  25. cosma_backend/pipeline/pipeline.py +222 -0
  26. cosma_backend/schema.sql +319 -0
  27. cosma_backend/searcher/__init__.py +1 -0
  28. cosma_backend/searcher/searcher.py +397 -0
  29. cosma_backend/summarizer/__init__.py +44 -0
  30. cosma_backend/summarizer/summarizer.py +1075 -0
  31. cosma_backend/utils/bundled.py +24 -0
  32. cosma_backend/utils/pubsub.py +31 -0
  33. cosma_backend/utils/sse.py +92 -0
  34. cosma_backend/watcher/__init__.py +1 -0
  35. cosma_backend/watcher/awatchdog.py +80 -0
  36. cosma_backend/watcher/watcher.py +257 -0
  37. cosma_backend-0.1.0.dist-info/METADATA +23 -0
  38. cosma_backend-0.1.0.dist-info/RECORD +39 -0
  39. cosma_backend-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,156 @@
1
+ """
2
+ Watch API Blueprint
3
+
4
+ Handles endpoints related to watching directories.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING
9
+
10
+ from quart import Blueprint, current_app
11
+ from quart_schema import validate_request, validate_response
12
+
13
+ from backend.api.models import JobResponse
14
+
15
+ if TYPE_CHECKING:
16
+ from backend.app import app as current_app
17
+
18
+ watch_bp = Blueprint('watch', __name__)
19
+
20
+
21
+ @dataclass
22
+ class WatchRequest:
23
+ """Request body for watching a directory"""
24
+ directory_path: str
25
+
26
+
27
+ @dataclass
28
+ class WatchResponse:
29
+ """Response for directory watching"""
30
+ success: bool
31
+ message: str
32
+ files_indexed: int
33
+
34
+
35
+ @watch_bp.post("/") # type: ignore[return-value]
36
+ @validate_request(WatchRequest)
37
+ @validate_response(WatchResponse, 201)
38
+ async def watch_directory(data: WatchRequest) -> tuple[WatchResponse, int]:
39
+ """Watch all files in a directory for changes"""
40
+ # TODO: Implement indexing logic
41
+ # 1. Validate directory exists
42
+ # 2. Extract files using backend.extractor
43
+ # 3. Parse each file using backend.parser
44
+ # 4. Summarize each file using backend.summarizer
45
+ # 5. Insert into database using current_app.db
46
+
47
+ try:
48
+ await current_app.watcher.start_watching(data.directory_path)
49
+ except ValueError as e:
50
+ # Directory is already being watched or parent directory is watching it
51
+ return WatchResponse(
52
+ success=False,
53
+ message=str(e),
54
+ files_indexed=0
55
+ ), 400
56
+
57
+ return WatchResponse(
58
+ success=True,
59
+ message=f"Started watching directory: {data.directory_path}",
60
+ files_indexed=0
61
+ ), 201
62
+
63
+
64
+ @dataclass
65
+ class WatchStatusResponse:
66
+ """Response for watching status"""
67
+ is_indexing: bool
68
+ current_file: str | None
69
+ files_processed: int
70
+ total_files: int
71
+
72
+
73
+ @dataclass
74
+ class JobsListResponse:
75
+ """Response for jobs list"""
76
+ jobs: list[JobResponse]
77
+
78
+
79
+ @watch_bp.get("/jobs") # type: ignore[return-value]
80
+ @validate_response(JobsListResponse, 200)
81
+ async def get_jobs() -> tuple[JobsListResponse, int]:
82
+ """
83
+ Get all watched directory jobs.
84
+
85
+ GET /api/watch/jobs
86
+
87
+ Returns:
88
+ 200: List of all watched directories
89
+ """
90
+ # Get all watched directories from database
91
+ watched_dirs = await current_app.db.get_watched_directories(active_only=False)
92
+
93
+ # Convert to API response models
94
+ jobs = [watched_dir.to_response() for watched_dir in watched_dirs]
95
+
96
+ return JobsListResponse(jobs=jobs), 200
97
+
98
+
99
+ @dataclass
100
+ class DeleteJobResponse:
101
+ """Response for deleting a watched directory job"""
102
+ success: bool
103
+ message: str
104
+ job_id: int
105
+
106
+
107
+ @watch_bp.delete("/jobs/<int:job_id>") # type: ignore[return-value]
108
+ @validate_response(DeleteJobResponse, 200)
109
+ async def delete_job(job_id: int) -> tuple[DeleteJobResponse, int]:
110
+ """
111
+ Delete a watched directory job by ID.
112
+
113
+ DELETE /api/watch/jobs/{job_id}
114
+
115
+ Returns:
116
+ 200: Job deleted successfully
117
+ 404: Job not found
118
+ """
119
+ # Delete the watched directory from database
120
+ deleted_dir = await current_app.db.delete_watched_directory(job_id)
121
+
122
+ if deleted_dir:
123
+ return DeleteJobResponse(
124
+ success=True,
125
+ message=f"Successfully deleted watched directory: {deleted_dir.path_str}",
126
+ job_id=job_id
127
+ ), 200
128
+ else:
129
+ return DeleteJobResponse(
130
+ success=False,
131
+ message=f"Watched directory with ID {job_id} not found",
132
+ job_id=job_id
133
+ ), 404
134
+
135
+
136
+ # @watch_bp.get("/status") # type: ignore[return-value]
137
+ # @validate_response(WatchStatusResponse, 200)
138
+ # async def watch_status() -> tuple[WatchStatusResponse, int]:
139
+ # """
140
+ # Get the current status of any ongoing watch operations.
141
+ #
142
+ # GET /api/index/status
143
+ #
144
+ # Returns:
145
+ # 200: Current watch status
146
+ # """
147
+ # # TODO: Implement status tracking
148
+ # # This could use a global state manager or database table
149
+ # # to track ongoing indexing operations
150
+ #
151
+ # return WatchStatusResponse(
152
+ # is_indexing=False,
153
+ # current_file=None,
154
+ # files_processed=0,
155
+ # total_files=0
156
+ # ), 200
cosma_backend/app.py ADDED
@@ -0,0 +1,192 @@
1
+ import asyncio
2
+ import datetime
3
+ from dataclasses import dataclass
4
+ import logging
5
+ from typing import Coroutine
6
+
7
+ from dotenv import load_dotenv
8
+ from rich.logging import RichHandler
9
+ from quart import Quart, request
10
+ from quart_schema import QuartSchema, validate_request, validate_response
11
+
12
+ from backend import db
13
+ from backend.api import api_blueprint
14
+ from backend.db.database import Database
15
+ from backend.logging import sm
16
+ from backend.models.update import Update
17
+ from backend.utils.pubsub import Hub
18
+ from backend.pipeline import Pipeline
19
+ from backend.searcher import HybridSearcher
20
+ from backend.discoverer import Discoverer
21
+ from backend.parser import FileParser
22
+ from backend.summarizer import AutoSummarizer
23
+ from backend.embedder import AutoEmbedder
24
+ from backend.watcher import Watcher
25
+
26
+ load_dotenv()
27
+
28
+ FORMAT = "%(message)s"
29
+ logging.basicConfig(
30
+ level="INFO", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ class App(Quart):
36
+ db: Database
37
+ updates_hub: Hub[Update]
38
+ jobs: set[asyncio.Task]
39
+ pipeline: Pipeline
40
+ searcher: HybridSearcher
41
+ watcher: Watcher
42
+
43
+ def __init__(self, *args, **kwargs):
44
+ super().__init__(*args, **kwargs)
45
+
46
+ self.updates_hub = Hub()
47
+ self.jobs = set()
48
+
49
+ def initialize_config(self):
50
+ logger.info("Loading config")
51
+ self.config.from_prefixed_env("BACKEND")
52
+
53
+ # add new config variable defaults here (if there should be a default)
54
+ self.config.setdefault("DATABASE_PATH", './app.db')
55
+ self.config.setdefault("HOST", '127.0.0.1')
56
+ self.config.setdefault("PORT", 8080)
57
+
58
+ logger.debug(sm("Config loaded", config=self.config))
59
+
60
+ def submit_job(self, coro: Coroutine) -> asyncio.Task:
61
+ def remove_task_callback(task: asyncio.Task):
62
+ self.jobs.remove(task)
63
+
64
+ task = asyncio.create_task(coro)
65
+ self.jobs.add(task)
66
+ task.add_done_callback(remove_task_callback)
67
+
68
+ return task
69
+
70
+
71
+ app = App(__name__)
72
+ app.initialize_config()
73
+ QuartSchema(app)
74
+
75
+ # Register API blueprints
76
+ app.register_blueprint(api_blueprint, url_prefix='/api')
77
+
78
+ @app.before_serving
79
+ async def initialize_services():
80
+ logger.info(sm("Initializing database"))
81
+ app.db = await db.connect(app.config['DATABASE_PATH'])
82
+
83
+ logger.info(sm("Initializing services"))
84
+ discoverer = Discoverer()
85
+ parser = FileParser()
86
+ summarizer = AutoSummarizer()
87
+ embedder = AutoEmbedder()
88
+
89
+ app.pipeline = Pipeline(
90
+ db=app.db,
91
+ updates_hub=app.updates_hub,
92
+ parser=parser,
93
+ discoverer=discoverer,
94
+ summarizer=summarizer,
95
+ embedder=embedder,
96
+ )
97
+
98
+ app.searcher = HybridSearcher(
99
+ db=app.db,
100
+ embedder=embedder,
101
+ )
102
+
103
+ app.watcher = Watcher(
104
+ db=app.db,
105
+ pipeline=app.pipeline,
106
+ )
107
+ await app.watcher.initialize_from_database()
108
+
109
+ logger.info(sm("Initialized services"))
110
+
111
+
112
+ @app.after_serving
113
+ async def handle_shutdown():
114
+ logger.info(sm("Closing DB"))
115
+ await app.db.close()
116
+
117
+
118
+ @app.before_request
119
+ async def log_request():
120
+ request.start_time = datetime.datetime.now()
121
+ logger.info(sm(
122
+ "Incoming request",
123
+ method=request.method,
124
+ path=request.path,
125
+ remote_addr=request.remote_addr,
126
+ user_agent=request.headers.get('User-Agent')
127
+ ))
128
+
129
+ @app.after_request
130
+ async def log_response(response):
131
+ if hasattr(request, 'start_time'):
132
+ duration = (datetime.datetime.now() - request.start_time).total_seconds()
133
+ logger.info(sm(
134
+ "Request completed",
135
+ method=request.method,
136
+ path=request.path,
137
+ status_code=response.status_code,
138
+ duration_seconds=duration
139
+ ))
140
+ return response
141
+
142
+ @app.post("/echo")
143
+ async def echo():
144
+ data = await request.get_json()
145
+ return {"input": data, "extra": True}
146
+
147
+ # ====== Sample Database Usage ======
148
+
149
+ @app.get("/get")
150
+ async def get():
151
+ # I haven't implemented a get_files function yet for the db,
152
+ # but I can if/when we need it.
153
+ # For now I'm just running a SQL query directly
154
+ async with app.db.acquire() as conn:
155
+ files = await conn.fetchall("SELECT * FROM files;")
156
+
157
+ return [dict(file) for file in files]
158
+
159
+ # ====== Main Indexing Route ======
160
+ # Note: Indexing routes have been moved to backend/api/index.py
161
+ # This endpoint remains for backward compatibility but will be deprecated
162
+
163
+ @dataclass
164
+ class IndexIn:
165
+ directory_path: str
166
+
167
+ @dataclass
168
+ class IndexOut:
169
+ success: bool
170
+
171
+ @app.post("/index") # type: ignore[return-value]
172
+ @validate_request(IndexIn)
173
+ @validate_response(IndexOut, 201)
174
+ async def index(data: IndexIn) -> tuple[IndexOut, int]:
175
+ # TODO: extract, summarize, and db
176
+ # something like:
177
+ # for file in extract_files():
178
+ # parsed_file = parse_file(file)
179
+ # summarized_file = app.summarizer.summarize_file(parsed_file)
180
+ # await app.db.insert_file(summarized_file)
181
+
182
+ # Note: Use /api/index/directory instead (this route kept for compatibility)
183
+
184
+ return IndexOut(success=True), 201
185
+
186
+
187
+ def run() -> None:
188
+ app.run(
189
+ host=app.config['HOST'],
190
+ port=app.config['PORT'],
191
+ use_reloader=False,
192
+ )
@@ -0,0 +1,2 @@
1
+ from .database import Database as Database
2
+ from .database import connect as connect