cosma-backend 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosma_backend/__init__.py +14 -0
- cosma_backend/__main__.py +4 -0
- cosma_backend/api/__init__.py +29 -0
- cosma_backend/api/files.py +154 -0
- cosma_backend/api/index.py +114 -0
- cosma_backend/api/models.py +28 -0
- cosma_backend/api/search.py +166 -0
- cosma_backend/api/status.py +28 -0
- cosma_backend/api/updates.py +67 -0
- cosma_backend/api/watch.py +156 -0
- cosma_backend/app.py +192 -0
- cosma_backend/db/__init__.py +2 -0
- cosma_backend/db/database.py +638 -0
- cosma_backend/discoverer/__init__.py +1 -0
- cosma_backend/discoverer/discoverer.py +34 -0
- cosma_backend/embedder/__init__.py +1 -0
- cosma_backend/embedder/embedder.py +637 -0
- cosma_backend/logging.py +73 -0
- cosma_backend/models/__init__.py +3 -0
- cosma_backend/models/file.py +169 -0
- cosma_backend/models/status.py +10 -0
- cosma_backend/models/update.py +202 -0
- cosma_backend/models/watch.py +132 -0
- cosma_backend/pipeline/__init__.py +2 -0
- cosma_backend/pipeline/pipeline.py +222 -0
- cosma_backend/schema.sql +319 -0
- cosma_backend/searcher/__init__.py +1 -0
- cosma_backend/searcher/searcher.py +397 -0
- cosma_backend/summarizer/__init__.py +44 -0
- cosma_backend/summarizer/summarizer.py +1075 -0
- cosma_backend/utils/bundled.py +24 -0
- cosma_backend/utils/pubsub.py +31 -0
- cosma_backend/utils/sse.py +92 -0
- cosma_backend/watcher/__init__.py +1 -0
- cosma_backend/watcher/awatchdog.py +80 -0
- cosma_backend/watcher/watcher.py +257 -0
- cosma_backend-0.1.0.dist-info/METADATA +23 -0
- cosma_backend-0.1.0.dist-info/RECORD +39 -0
- cosma_backend-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Watch API Blueprint
|
|
3
|
+
|
|
4
|
+
Handles endpoints related to watching directories.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from quart import Blueprint, current_app
|
|
11
|
+
from quart_schema import validate_request, validate_response
|
|
12
|
+
|
|
13
|
+
from backend.api.models import JobResponse
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from backend.app import app as current_app
|
|
17
|
+
|
|
18
|
+
watch_bp = Blueprint('watch', __name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class WatchRequest:
|
|
23
|
+
"""Request body for watching a directory"""
|
|
24
|
+
directory_path: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class WatchResponse:
|
|
29
|
+
"""Response for directory watching"""
|
|
30
|
+
success: bool
|
|
31
|
+
message: str
|
|
32
|
+
files_indexed: int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@watch_bp.post("/") # type: ignore[return-value]
|
|
36
|
+
@validate_request(WatchRequest)
|
|
37
|
+
@validate_response(WatchResponse, 201)
|
|
38
|
+
async def watch_directory(data: WatchRequest) -> tuple[WatchResponse, int]:
|
|
39
|
+
"""Watch all files in a directory for changes"""
|
|
40
|
+
# TODO: Implement indexing logic
|
|
41
|
+
# 1. Validate directory exists
|
|
42
|
+
# 2. Extract files using backend.extractor
|
|
43
|
+
# 3. Parse each file using backend.parser
|
|
44
|
+
# 4. Summarize each file using backend.summarizer
|
|
45
|
+
# 5. Insert into database using current_app.db
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
await current_app.watcher.start_watching(data.directory_path)
|
|
49
|
+
except ValueError as e:
|
|
50
|
+
# Directory is already being watched or parent directory is watching it
|
|
51
|
+
return WatchResponse(
|
|
52
|
+
success=False,
|
|
53
|
+
message=str(e),
|
|
54
|
+
files_indexed=0
|
|
55
|
+
), 400
|
|
56
|
+
|
|
57
|
+
return WatchResponse(
|
|
58
|
+
success=True,
|
|
59
|
+
message=f"Started watching directory: {data.directory_path}",
|
|
60
|
+
files_indexed=0
|
|
61
|
+
), 201
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class WatchStatusResponse:
|
|
66
|
+
"""Response for watching status"""
|
|
67
|
+
is_indexing: bool
|
|
68
|
+
current_file: str | None
|
|
69
|
+
files_processed: int
|
|
70
|
+
total_files: int
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class JobsListResponse:
|
|
75
|
+
"""Response for jobs list"""
|
|
76
|
+
jobs: list[JobResponse]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@watch_bp.get("/jobs") # type: ignore[return-value]
|
|
80
|
+
@validate_response(JobsListResponse, 200)
|
|
81
|
+
async def get_jobs() -> tuple[JobsListResponse, int]:
|
|
82
|
+
"""
|
|
83
|
+
Get all watched directory jobs.
|
|
84
|
+
|
|
85
|
+
GET /api/watch/jobs
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
200: List of all watched directories
|
|
89
|
+
"""
|
|
90
|
+
# Get all watched directories from database
|
|
91
|
+
watched_dirs = await current_app.db.get_watched_directories(active_only=False)
|
|
92
|
+
|
|
93
|
+
# Convert to API response models
|
|
94
|
+
jobs = [watched_dir.to_response() for watched_dir in watched_dirs]
|
|
95
|
+
|
|
96
|
+
return JobsListResponse(jobs=jobs), 200
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class DeleteJobResponse:
|
|
101
|
+
"""Response for deleting a watched directory job"""
|
|
102
|
+
success: bool
|
|
103
|
+
message: str
|
|
104
|
+
job_id: int
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@watch_bp.delete("/jobs/<int:job_id>") # type: ignore[return-value]
|
|
108
|
+
@validate_response(DeleteJobResponse, 200)
|
|
109
|
+
async def delete_job(job_id: int) -> tuple[DeleteJobResponse, int]:
|
|
110
|
+
"""
|
|
111
|
+
Delete a watched directory job by ID.
|
|
112
|
+
|
|
113
|
+
DELETE /api/watch/jobs/{job_id}
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
200: Job deleted successfully
|
|
117
|
+
404: Job not found
|
|
118
|
+
"""
|
|
119
|
+
# Delete the watched directory from database
|
|
120
|
+
deleted_dir = await current_app.db.delete_watched_directory(job_id)
|
|
121
|
+
|
|
122
|
+
if deleted_dir:
|
|
123
|
+
return DeleteJobResponse(
|
|
124
|
+
success=True,
|
|
125
|
+
message=f"Successfully deleted watched directory: {deleted_dir.path_str}",
|
|
126
|
+
job_id=job_id
|
|
127
|
+
), 200
|
|
128
|
+
else:
|
|
129
|
+
return DeleteJobResponse(
|
|
130
|
+
success=False,
|
|
131
|
+
message=f"Watched directory with ID {job_id} not found",
|
|
132
|
+
job_id=job_id
|
|
133
|
+
), 404
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# @watch_bp.get("/status") # type: ignore[return-value]
|
|
137
|
+
# @validate_response(WatchStatusResponse, 200)
|
|
138
|
+
# async def watch_status() -> tuple[WatchStatusResponse, int]:
|
|
139
|
+
# """
|
|
140
|
+
# Get the current status of any ongoing watch operations.
|
|
141
|
+
#
|
|
142
|
+
# GET /api/index/status
|
|
143
|
+
#
|
|
144
|
+
# Returns:
|
|
145
|
+
# 200: Current watch status
|
|
146
|
+
# """
|
|
147
|
+
# # TODO: Implement status tracking
|
|
148
|
+
# # This could use a global state manager or database table
|
|
149
|
+
# # to track ongoing indexing operations
|
|
150
|
+
#
|
|
151
|
+
# return WatchStatusResponse(
|
|
152
|
+
# is_indexing=False,
|
|
153
|
+
# current_file=None,
|
|
154
|
+
# files_processed=0,
|
|
155
|
+
# total_files=0
|
|
156
|
+
# ), 200
|
cosma_backend/app.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import datetime
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Coroutine
|
|
6
|
+
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
from rich.logging import RichHandler
|
|
9
|
+
from quart import Quart, request
|
|
10
|
+
from quart_schema import QuartSchema, validate_request, validate_response
|
|
11
|
+
|
|
12
|
+
from backend import db
|
|
13
|
+
from backend.api import api_blueprint
|
|
14
|
+
from backend.db.database import Database
|
|
15
|
+
from backend.logging import sm
|
|
16
|
+
from backend.models.update import Update
|
|
17
|
+
from backend.utils.pubsub import Hub
|
|
18
|
+
from backend.pipeline import Pipeline
|
|
19
|
+
from backend.searcher import HybridSearcher
|
|
20
|
+
from backend.discoverer import Discoverer
|
|
21
|
+
from backend.parser import FileParser
|
|
22
|
+
from backend.summarizer import AutoSummarizer
|
|
23
|
+
from backend.embedder import AutoEmbedder
|
|
24
|
+
from backend.watcher import Watcher
|
|
25
|
+
|
|
26
|
+
load_dotenv()
|
|
27
|
+
|
|
28
|
+
FORMAT = "%(message)s"
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level="INFO", format=FORMAT, datefmt="[%X]", handlers=[RichHandler()]
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
class App(Quart):
|
|
36
|
+
db: Database
|
|
37
|
+
updates_hub: Hub[Update]
|
|
38
|
+
jobs: set[asyncio.Task]
|
|
39
|
+
pipeline: Pipeline
|
|
40
|
+
searcher: HybridSearcher
|
|
41
|
+
watcher: Watcher
|
|
42
|
+
|
|
43
|
+
def __init__(self, *args, **kwargs):
|
|
44
|
+
super().__init__(*args, **kwargs)
|
|
45
|
+
|
|
46
|
+
self.updates_hub = Hub()
|
|
47
|
+
self.jobs = set()
|
|
48
|
+
|
|
49
|
+
def initialize_config(self):
|
|
50
|
+
logger.info("Loading config")
|
|
51
|
+
self.config.from_prefixed_env("BACKEND")
|
|
52
|
+
|
|
53
|
+
# add new config variable defaults here (if there should be a default)
|
|
54
|
+
self.config.setdefault("DATABASE_PATH", './app.db')
|
|
55
|
+
self.config.setdefault("HOST", '127.0.0.1')
|
|
56
|
+
self.config.setdefault("PORT", 8080)
|
|
57
|
+
|
|
58
|
+
logger.debug(sm("Config loaded", config=self.config))
|
|
59
|
+
|
|
60
|
+
def submit_job(self, coro: Coroutine) -> asyncio.Task:
|
|
61
|
+
def remove_task_callback(task: asyncio.Task):
|
|
62
|
+
self.jobs.remove(task)
|
|
63
|
+
|
|
64
|
+
task = asyncio.create_task(coro)
|
|
65
|
+
self.jobs.add(task)
|
|
66
|
+
task.add_done_callback(remove_task_callback)
|
|
67
|
+
|
|
68
|
+
return task
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
app = App(__name__)
|
|
72
|
+
app.initialize_config()
|
|
73
|
+
QuartSchema(app)
|
|
74
|
+
|
|
75
|
+
# Register API blueprints
|
|
76
|
+
app.register_blueprint(api_blueprint, url_prefix='/api')
|
|
77
|
+
|
|
78
|
+
@app.before_serving
|
|
79
|
+
async def initialize_services():
|
|
80
|
+
logger.info(sm("Initializing database"))
|
|
81
|
+
app.db = await db.connect(app.config['DATABASE_PATH'])
|
|
82
|
+
|
|
83
|
+
logger.info(sm("Initializing services"))
|
|
84
|
+
discoverer = Discoverer()
|
|
85
|
+
parser = FileParser()
|
|
86
|
+
summarizer = AutoSummarizer()
|
|
87
|
+
embedder = AutoEmbedder()
|
|
88
|
+
|
|
89
|
+
app.pipeline = Pipeline(
|
|
90
|
+
db=app.db,
|
|
91
|
+
updates_hub=app.updates_hub,
|
|
92
|
+
parser=parser,
|
|
93
|
+
discoverer=discoverer,
|
|
94
|
+
summarizer=summarizer,
|
|
95
|
+
embedder=embedder,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
app.searcher = HybridSearcher(
|
|
99
|
+
db=app.db,
|
|
100
|
+
embedder=embedder,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
app.watcher = Watcher(
|
|
104
|
+
db=app.db,
|
|
105
|
+
pipeline=app.pipeline,
|
|
106
|
+
)
|
|
107
|
+
await app.watcher.initialize_from_database()
|
|
108
|
+
|
|
109
|
+
logger.info(sm("Initialized services"))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@app.after_serving
|
|
113
|
+
async def handle_shutdown():
|
|
114
|
+
logger.info(sm("Closing DB"))
|
|
115
|
+
await app.db.close()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.before_request
|
|
119
|
+
async def log_request():
|
|
120
|
+
request.start_time = datetime.datetime.now()
|
|
121
|
+
logger.info(sm(
|
|
122
|
+
"Incoming request",
|
|
123
|
+
method=request.method,
|
|
124
|
+
path=request.path,
|
|
125
|
+
remote_addr=request.remote_addr,
|
|
126
|
+
user_agent=request.headers.get('User-Agent')
|
|
127
|
+
))
|
|
128
|
+
|
|
129
|
+
@app.after_request
|
|
130
|
+
async def log_response(response):
|
|
131
|
+
if hasattr(request, 'start_time'):
|
|
132
|
+
duration = (datetime.datetime.now() - request.start_time).total_seconds()
|
|
133
|
+
logger.info(sm(
|
|
134
|
+
"Request completed",
|
|
135
|
+
method=request.method,
|
|
136
|
+
path=request.path,
|
|
137
|
+
status_code=response.status_code,
|
|
138
|
+
duration_seconds=duration
|
|
139
|
+
))
|
|
140
|
+
return response
|
|
141
|
+
|
|
142
|
+
@app.post("/echo")
|
|
143
|
+
async def echo():
|
|
144
|
+
data = await request.get_json()
|
|
145
|
+
return {"input": data, "extra": True}
|
|
146
|
+
|
|
147
|
+
# ====== Sample Database Usage ======
|
|
148
|
+
|
|
149
|
+
@app.get("/get")
|
|
150
|
+
async def get():
|
|
151
|
+
# I haven't implemented a get_files function yet for the db,
|
|
152
|
+
# but I can if/when we need it.
|
|
153
|
+
# For now I'm just running a SQL query directly
|
|
154
|
+
async with app.db.acquire() as conn:
|
|
155
|
+
files = await conn.fetchall("SELECT * FROM files;")
|
|
156
|
+
|
|
157
|
+
return [dict(file) for file in files]
|
|
158
|
+
|
|
159
|
+
# ====== Main Indexing Route ======
|
|
160
|
+
# Note: Indexing routes have been moved to backend/api/index.py
|
|
161
|
+
# This endpoint remains for backward compatibility but will be deprecated
|
|
162
|
+
|
|
163
|
+
@dataclass
|
|
164
|
+
class IndexIn:
|
|
165
|
+
directory_path: str
|
|
166
|
+
|
|
167
|
+
@dataclass
|
|
168
|
+
class IndexOut:
|
|
169
|
+
success: bool
|
|
170
|
+
|
|
171
|
+
@app.post("/index") # type: ignore[return-value]
|
|
172
|
+
@validate_request(IndexIn)
|
|
173
|
+
@validate_response(IndexOut, 201)
|
|
174
|
+
async def index(data: IndexIn) -> tuple[IndexOut, int]:
|
|
175
|
+
# TODO: extract, summarize, and db
|
|
176
|
+
# something like:
|
|
177
|
+
# for file in extract_files():
|
|
178
|
+
# parsed_file = parse_file(file)
|
|
179
|
+
# summarized_file = app.summarizer.summarize_file(parsed_file)
|
|
180
|
+
# await app.db.insert_file(summarized_file)
|
|
181
|
+
|
|
182
|
+
# Note: Use /api/index/directory instead (this route kept for compatibility)
|
|
183
|
+
|
|
184
|
+
return IndexOut(success=True), 201
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def run() -> None:
|
|
188
|
+
app.run(
|
|
189
|
+
host=app.config['HOST'],
|
|
190
|
+
port=app.config['PORT'],
|
|
191
|
+
use_reloader=False,
|
|
192
|
+
)
|