cosma-backend 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosma_backend/__init__.py +14 -0
- cosma_backend/__main__.py +4 -0
- cosma_backend/api/__init__.py +29 -0
- cosma_backend/api/files.py +154 -0
- cosma_backend/api/index.py +114 -0
- cosma_backend/api/models.py +28 -0
- cosma_backend/api/search.py +166 -0
- cosma_backend/api/status.py +28 -0
- cosma_backend/api/updates.py +67 -0
- cosma_backend/api/watch.py +156 -0
- cosma_backend/app.py +192 -0
- cosma_backend/db/__init__.py +2 -0
- cosma_backend/db/database.py +638 -0
- cosma_backend/discoverer/__init__.py +1 -0
- cosma_backend/discoverer/discoverer.py +34 -0
- cosma_backend/embedder/__init__.py +1 -0
- cosma_backend/embedder/embedder.py +637 -0
- cosma_backend/logging.py +73 -0
- cosma_backend/models/__init__.py +3 -0
- cosma_backend/models/file.py +169 -0
- cosma_backend/models/status.py +10 -0
- cosma_backend/models/update.py +202 -0
- cosma_backend/models/watch.py +132 -0
- cosma_backend/pipeline/__init__.py +2 -0
- cosma_backend/pipeline/pipeline.py +222 -0
- cosma_backend/schema.sql +319 -0
- cosma_backend/searcher/__init__.py +1 -0
- cosma_backend/searcher/searcher.py +397 -0
- cosma_backend/summarizer/__init__.py +44 -0
- cosma_backend/summarizer/summarizer.py +1075 -0
- cosma_backend/utils/bundled.py +24 -0
- cosma_backend/utils/pubsub.py +31 -0
- cosma_backend/utils/sse.py +92 -0
- cosma_backend/watcher/__init__.py +1 -0
- cosma_backend/watcher/awatchdog.py +80 -0
- cosma_backend/watcher/watcher.py +257 -0
- cosma_backend-0.1.0.dist-info/METADATA +23 -0
- cosma_backend-0.1.0.dist-info/RECORD +39 -0
- cosma_backend-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from importlib.resources import files
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
def get_bundled_file(relative_path: str):
|
|
6
|
+
"""Get path to bundled file, works in dev and production"""
|
|
7
|
+
if getattr(sys, 'frozen', False):
|
|
8
|
+
# Running in PyInstaller bundle
|
|
9
|
+
base_path = Path(sys._MEIPASS) # type: ignore
|
|
10
|
+
return base_path / relative_path
|
|
11
|
+
else:
|
|
12
|
+
# Running in normal Python - use importlib.resources
|
|
13
|
+
# The package is 'backend' and schema.sql is in the package root
|
|
14
|
+
return files('backend').joinpath(relative_path)
|
|
15
|
+
|
|
16
|
+
def get_bundled_file_text(relative_path: str) -> str:
|
|
17
|
+
"""Get text content of bundled file, works in dev and production"""
|
|
18
|
+
if getattr(sys, 'frozen', False):
|
|
19
|
+
# Running in PyInstaller bundle
|
|
20
|
+
file_path = Path(sys._MEIPASS) / relative_path # type: ignore
|
|
21
|
+
return file_path.read_text()
|
|
22
|
+
else:
|
|
23
|
+
# Running in normal Python - use importlib.resources
|
|
24
|
+
return files('backend').joinpath(relative_path).read_text()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# adapted from https://gist.github.com/appeltel/fd3ddeeed6c330c7208502462639d2c9
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TypeVar, Generic
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
T = TypeVar('T')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Hub(Generic[T]):
|
|
14
|
+
subscriptions: set[asyncio.Queue[T]]
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
self.subscriptions = set()
|
|
18
|
+
|
|
19
|
+
def publish(self, message: T):
|
|
20
|
+
for queue in self.subscriptions:
|
|
21
|
+
queue.put_nowait(message)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def subscribe(hub: Hub[T]):
|
|
26
|
+
queue: asyncio.Queue[T] = asyncio.Queue()
|
|
27
|
+
hub.subscriptions.add(queue)
|
|
28
|
+
try:
|
|
29
|
+
yield queue
|
|
30
|
+
finally:
|
|
31
|
+
hub.subscriptions.remove(queue)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ServerSentEvent helper class for Quart SSE endpoints
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
from typing import Optional, Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def sse_comment(text: str = "keepalive") -> str:
|
|
9
|
+
"""
|
|
10
|
+
Create an SSE comment for keep-alive or debugging purposes.
|
|
11
|
+
|
|
12
|
+
According to the SSE spec, lines starting with ':' are comments
|
|
13
|
+
and are ignored by clients. These are useful for:
|
|
14
|
+
- Keep-alive messages to prevent connection timeouts
|
|
15
|
+
- Debugging/logging without affecting the client
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
text: Optional comment text (default: "keepalive")
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Properly formatted SSE comment string with trailing double newline
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
>>> sse_comment()
|
|
25
|
+
': keepalive\\n\\n'
|
|
26
|
+
>>> sse_comment("heartbeat")
|
|
27
|
+
': heartbeat\\n\\n'
|
|
28
|
+
"""
|
|
29
|
+
return f": {text}\n\n"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ServerSentEvent:
|
|
33
|
+
"""
|
|
34
|
+
Helper class to format Server-Sent Events according to the SSE specification.
|
|
35
|
+
|
|
36
|
+
SSE format:
|
|
37
|
+
event: event_name
|
|
38
|
+
id: event_id
|
|
39
|
+
retry: retry_time
|
|
40
|
+
data: message_data
|
|
41
|
+
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
data: Any,
|
|
47
|
+
event: Optional[str] = None,
|
|
48
|
+
id: Optional[str] = None,
|
|
49
|
+
retry: Optional[int] = None,
|
|
50
|
+
):
|
|
51
|
+
"""
|
|
52
|
+
Args:
|
|
53
|
+
data: The message data (will be JSON-encoded if not a string)
|
|
54
|
+
event: Optional event name for named events
|
|
55
|
+
id: Optional event ID (used for reconnection)
|
|
56
|
+
retry: Optional reconnection time in milliseconds
|
|
57
|
+
"""
|
|
58
|
+
self.data = data
|
|
59
|
+
self.event = event
|
|
60
|
+
self.id = id
|
|
61
|
+
self.retry = retry
|
|
62
|
+
|
|
63
|
+
def encode(self) -> str:
|
|
64
|
+
"""
|
|
65
|
+
Encode the event in SSE format.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Properly formatted SSE string with trailing double newline
|
|
69
|
+
"""
|
|
70
|
+
lines = []
|
|
71
|
+
|
|
72
|
+
if self.event:
|
|
73
|
+
lines.append(f"event: {self.event}")
|
|
74
|
+
|
|
75
|
+
if self.id:
|
|
76
|
+
lines.append(f"id: {self.id}")
|
|
77
|
+
|
|
78
|
+
if self.retry:
|
|
79
|
+
lines.append(f"retry: {self.retry}")
|
|
80
|
+
|
|
81
|
+
# Handle data - convert to JSON if not a string
|
|
82
|
+
if isinstance(self.data, str):
|
|
83
|
+
data_str = self.data
|
|
84
|
+
else:
|
|
85
|
+
data_str = json.dumps(self.data)
|
|
86
|
+
|
|
87
|
+
# Support multi-line data
|
|
88
|
+
for line in data_str.splitlines():
|
|
89
|
+
lines.append(f"data: {line}")
|
|
90
|
+
|
|
91
|
+
# SSE spec requires double newline at the end
|
|
92
|
+
return "\n".join(lines) + "\n\n"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .watcher import Watcher as Watcher
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
7
|
+
from watchdog.observers import Observer
|
|
8
|
+
from watchdog.observers.api import BaseObserver
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class _EventHandler(FileSystemEventHandler):
|
|
12
|
+
def __init__(self, queue: asyncio.Queue, loop: asyncio.AbstractEventLoop,
|
|
13
|
+
*args, **kwargs):
|
|
14
|
+
self._loop = loop
|
|
15
|
+
self._queue = queue
|
|
16
|
+
super().__init__(*args, **kwargs)
|
|
17
|
+
|
|
18
|
+
def on_created(self, event: FileSystemEvent) -> None:
|
|
19
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, event)
|
|
20
|
+
|
|
21
|
+
def on_modified(self, event: FileSystemEvent) -> None:
|
|
22
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, event)
|
|
23
|
+
|
|
24
|
+
def on_deleted(self, event: FileSystemEvent) -> None:
|
|
25
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, event)
|
|
26
|
+
|
|
27
|
+
def on_moved(self, event: FileSystemEvent) -> None:
|
|
28
|
+
self._loop.call_soon_threadsafe(self._queue.put_nowait, event)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EventIterator:
|
|
32
|
+
def __init__(self, queue: asyncio.Queue,
|
|
33
|
+
loop: Optional[asyncio.BaseEventLoop] = None):
|
|
34
|
+
self.queue = queue
|
|
35
|
+
|
|
36
|
+
def __aiter__(self):
|
|
37
|
+
return self
|
|
38
|
+
|
|
39
|
+
async def __anext__(self):
|
|
40
|
+
item = await self.queue.get()
|
|
41
|
+
|
|
42
|
+
if item is None:
|
|
43
|
+
raise StopAsyncIteration
|
|
44
|
+
|
|
45
|
+
return item
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _watch(path: Path, queue: asyncio.Queue, loop: asyncio.AbstractEventLoop,
|
|
49
|
+
recursive: bool = False) -> BaseObserver:
|
|
50
|
+
"""Watch a directory for changes."""
|
|
51
|
+
|
|
52
|
+
handler = _EventHandler(queue, loop)
|
|
53
|
+
|
|
54
|
+
observer = Observer()
|
|
55
|
+
observer.schedule(handler, str(path), recursive=recursive)
|
|
56
|
+
observer.start()
|
|
57
|
+
return observer
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def watch(path: Path, queue: asyncio.Queue, recursive: bool = False) -> BaseObserver:
|
|
61
|
+
loop = asyncio.get_running_loop()
|
|
62
|
+
partial = functools.partial(_watch, path=path, queue=queue, loop=loop, recursive=recursive)
|
|
63
|
+
return await asyncio.to_thread(partial)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# async def consume(queue: asyncio.Queue) -> None:
|
|
67
|
+
# async for event in EventIterator(queue):
|
|
68
|
+
# print("Got an event!", event)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# if __name__ == "__main__":
|
|
72
|
+
# loop = asyncio.get_event_loop()
|
|
73
|
+
# queue = asyncio.Queue(loop=loop)
|
|
74
|
+
|
|
75
|
+
# futures = [
|
|
76
|
+
# loop.run_in_executor(None, watch, Path("."), queue, loop, False),
|
|
77
|
+
# consume(queue),
|
|
78
|
+
# ]
|
|
79
|
+
|
|
80
|
+
# loop.run_until_complete(asyncio.gather(*futures))
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from watchdog.events import (
|
|
8
|
+
FileSystemEvent,
|
|
9
|
+
FileCreatedEvent,
|
|
10
|
+
FileModifiedEvent,
|
|
11
|
+
FileDeletedEvent,
|
|
12
|
+
FileMovedEvent,
|
|
13
|
+
DirCreatedEvent,
|
|
14
|
+
DirModifiedEvent,
|
|
15
|
+
DirDeletedEvent,
|
|
16
|
+
DirMovedEvent,
|
|
17
|
+
)
|
|
18
|
+
from watchdog.observers.api import BaseObserver
|
|
19
|
+
|
|
20
|
+
from backend.db import Database
|
|
21
|
+
from backend.logging import sm
|
|
22
|
+
from backend.models import File
|
|
23
|
+
from backend.models.watch import WatchedDirectory
|
|
24
|
+
from backend.models.update import Update
|
|
25
|
+
from backend.pipeline import Pipeline
|
|
26
|
+
from backend.utils.pubsub import Hub
|
|
27
|
+
from backend.watcher.awatchdog import watch
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WatcherJob:
|
|
33
|
+
pipeline: Pipeline
|
|
34
|
+
db: Database
|
|
35
|
+
watched_dir: WatchedDirectory
|
|
36
|
+
queue: asyncio.Queue[FileSystemEvent]
|
|
37
|
+
observer: Optional[BaseObserver]
|
|
38
|
+
task: Optional[asyncio.Task]
|
|
39
|
+
closed: bool
|
|
40
|
+
|
|
41
|
+
def __init__(self, watched_dir: WatchedDirectory, pipeline: Pipeline, db: Database):
|
|
42
|
+
self.watched_dir = watched_dir
|
|
43
|
+
self.pipeline = pipeline
|
|
44
|
+
self.db = db
|
|
45
|
+
self.queue = asyncio.Queue()
|
|
46
|
+
self.observer = None
|
|
47
|
+
self.closed = False
|
|
48
|
+
self.task = None
|
|
49
|
+
|
|
50
|
+
def _publish_update(self, update: Update):
|
|
51
|
+
"""Publish an update through the pipeline's updates hub."""
|
|
52
|
+
if self.pipeline.updates_hub:
|
|
53
|
+
self.pipeline.updates_hub.publish(update)
|
|
54
|
+
|
|
55
|
+
async def do_initial_processing(self):
|
|
56
|
+
await self.pipeline.process_directory(self.watched_dir.path)
|
|
57
|
+
|
|
58
|
+
async def start(self):
|
|
59
|
+
logger.info(sm("Starting watchdog observer", watched_dir=self.watched_dir))
|
|
60
|
+
|
|
61
|
+
# Publish watch started update
|
|
62
|
+
from backend.models.update import UpdateOpcode
|
|
63
|
+
self._publish_update(Update.create(
|
|
64
|
+
UpdateOpcode.WATCH_STARTED,
|
|
65
|
+
path=str(self.watched_dir.path),
|
|
66
|
+
recursive=self.watched_dir.recursive,
|
|
67
|
+
file_pattern=self.watched_dir.file_pattern
|
|
68
|
+
))
|
|
69
|
+
|
|
70
|
+
self.observer = await watch(self.watched_dir.path, self.queue, recursive=self.watched_dir.recursive)
|
|
71
|
+
self.task = asyncio.create_task(self.consumer_task())
|
|
72
|
+
asyncio.create_task(self.do_initial_processing())
|
|
73
|
+
|
|
74
|
+
async def stop(self):
|
|
75
|
+
self.closed = True
|
|
76
|
+
if self.task is not None:
|
|
77
|
+
self.task.cancel()
|
|
78
|
+
if self.observer is not None:
|
|
79
|
+
self.observer.unschedule_all()
|
|
80
|
+
|
|
81
|
+
async def consumer_task(self):
|
|
82
|
+
while not self.closed:
|
|
83
|
+
event = await self.queue.get()
|
|
84
|
+
|
|
85
|
+
# Skip directory events - we only care about files
|
|
86
|
+
if isinstance(event, (DirCreatedEvent, DirModifiedEvent, DirDeletedEvent, DirMovedEvent)):
|
|
87
|
+
logger.debug(sm("Skipping directory event", event_type=type(event).__name__, path=event.src_path))
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
# Handle different event types
|
|
92
|
+
if isinstance(event, FileDeletedEvent):
|
|
93
|
+
logger.info(sm("File deleted", path=event.src_path))
|
|
94
|
+
path = Path(str(event.src_path)).resolve()
|
|
95
|
+
|
|
96
|
+
self._publish_update(Update.file_deleted(str(path)))
|
|
97
|
+
await self.db.delete_file(str(path))
|
|
98
|
+
|
|
99
|
+
elif isinstance(event, FileMovedEvent):
|
|
100
|
+
# Handle moved files as delete old + create new
|
|
101
|
+
logger.info(sm("File moved", src=event.src_path, dest=event.dest_path))
|
|
102
|
+
src_path = Path(str(event.src_path)).resolve()
|
|
103
|
+
dest_path = Path(str(event.dest_path)).resolve()
|
|
104
|
+
|
|
105
|
+
self._publish_update(Update.file_moved(str(src_path), str(dest_path)))
|
|
106
|
+
await self.db.delete_file(str(src_path))
|
|
107
|
+
|
|
108
|
+
# Check if destination file type is supported before processing
|
|
109
|
+
dest_file = File.from_path(dest_path)
|
|
110
|
+
if await self.pipeline.is_supported(dest_file):
|
|
111
|
+
await self.pipeline.process_file(dest_file)
|
|
112
|
+
else:
|
|
113
|
+
logger.debug(sm("Skipping unsupported file type", path=str(dest_path)))
|
|
114
|
+
|
|
115
|
+
elif isinstance(event, (FileCreatedEvent, FileModifiedEvent)):
|
|
116
|
+
# Handle created and modified files the same way - process them
|
|
117
|
+
event_type = "created" if isinstance(event, FileCreatedEvent) else "modified"
|
|
118
|
+
logger.info(sm(f"File {event_type}", path=event.src_path))
|
|
119
|
+
path = Path(str(event.src_path)).resolve()
|
|
120
|
+
|
|
121
|
+
# Publish file system event update
|
|
122
|
+
if isinstance(event, FileCreatedEvent):
|
|
123
|
+
self._publish_update(Update.file_created(str(path)))
|
|
124
|
+
else:
|
|
125
|
+
self._publish_update(Update.file_modified(str(path)))
|
|
126
|
+
|
|
127
|
+
# Check if file type is supported before processing
|
|
128
|
+
file = File.from_path(path)
|
|
129
|
+
if await self.pipeline.is_supported(file):
|
|
130
|
+
await self.pipeline.process_file(file)
|
|
131
|
+
else:
|
|
132
|
+
logger.debug(sm("Skipping unsupported file type", path=str(path)))
|
|
133
|
+
|
|
134
|
+
else:
|
|
135
|
+
logger.warning(sm("Unknown event type", event_type=type(event).__name__, path=event.src_path))
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(sm("Error processing file system event", event_type=type(event).__name__, path=event.src_path, error=e))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Watcher:
|
|
142
|
+
jobs: set[WatcherJob]
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
db: Database,
|
|
147
|
+
pipeline: Pipeline,
|
|
148
|
+
updates_hub: Optional[Hub] = None,
|
|
149
|
+
):
|
|
150
|
+
self.db = db
|
|
151
|
+
self.pipeline = pipeline
|
|
152
|
+
self.updates_hub = updates_hub
|
|
153
|
+
|
|
154
|
+
self.jobs = set()
|
|
155
|
+
|
|
156
|
+
async def create_job(self, watched_dir: WatchedDirectory):
|
|
157
|
+
"""
|
|
158
|
+
Create and start a watcher job for a watched directory.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
watched_dir: WatchedDirectory instance to watch
|
|
162
|
+
"""
|
|
163
|
+
job = WatcherJob(watched_dir, self.pipeline, self.db)
|
|
164
|
+
self.jobs.add(job)
|
|
165
|
+
await job.start()
|
|
166
|
+
|
|
167
|
+
async def start_watching(self, path: str | Path, recursive: bool = True, file_pattern: Optional[str] = None):
|
|
168
|
+
"""
|
|
169
|
+
Start watching a directory for file changes.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
path: Path to the directory to watch
|
|
173
|
+
recursive: Whether to watch subdirectories recursively
|
|
174
|
+
file_pattern: Optional glob pattern for filtering files (e.g., "*.pdf")
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
ValueError: If the directory is already being watched or a parent directory is being watched
|
|
178
|
+
"""
|
|
179
|
+
# Create WatchedDirectory model
|
|
180
|
+
path = Path(path).resolve()
|
|
181
|
+
|
|
182
|
+
# Check if this directory or any parent is already being watched
|
|
183
|
+
watched_dirs = await self.db.get_watched_directories(active_only=True)
|
|
184
|
+
|
|
185
|
+
for existing_dir in watched_dirs:
|
|
186
|
+
# Check if exact path is already being watched
|
|
187
|
+
if existing_dir.path == path:
|
|
188
|
+
logger.warning(sm("Directory already being watched", path=str(path)))
|
|
189
|
+
raise ValueError(f"Directory '{path}' is already being watched")
|
|
190
|
+
|
|
191
|
+
# Check if a parent directory is being watched with recursive=True
|
|
192
|
+
# A parent is watching this path if:
|
|
193
|
+
# 1. The parent is recursive
|
|
194
|
+
# 2. This path starts with the parent path
|
|
195
|
+
if existing_dir.recursive:
|
|
196
|
+
try:
|
|
197
|
+
# Use relative_to to check if path is a subdirectory of existing_dir.path
|
|
198
|
+
path.relative_to(existing_dir.path)
|
|
199
|
+
# If we get here, path is a subdirectory of existing_dir.path
|
|
200
|
+
logger.warning(sm("Parent directory already being watched",
|
|
201
|
+
path=str(path),
|
|
202
|
+
parent=str(existing_dir.path)))
|
|
203
|
+
raise ValueError(f"Parent directory '{existing_dir.path}' is already watching '{path}' recursively")
|
|
204
|
+
except ValueError:
|
|
205
|
+
# relative_to raises ValueError if path is not relative to existing_dir.path
|
|
206
|
+
# This means it's not a subdirectory, so continue checking
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
watched_dir = WatchedDirectory.from_path(path, recursive=recursive, file_pattern=file_pattern)
|
|
210
|
+
|
|
211
|
+
# Add directory to watched_directories table in database
|
|
212
|
+
await self.db.add_watched_directory(watched_dir)
|
|
213
|
+
|
|
214
|
+
# Create and start the watcher job
|
|
215
|
+
await self.create_job(watched_dir)
|
|
216
|
+
|
|
217
|
+
async def initialize_from_database(self):
|
|
218
|
+
"""
|
|
219
|
+
Create jobs for all active watched directories from the database.
|
|
220
|
+
This should be called on startup to restore watching state.
|
|
221
|
+
"""
|
|
222
|
+
logger.info(sm("Initializing watcher from database"))
|
|
223
|
+
|
|
224
|
+
# Get all active watched directories
|
|
225
|
+
watched_dirs = await self.db.get_watched_directories(active_only=True)
|
|
226
|
+
|
|
227
|
+
if not watched_dirs:
|
|
228
|
+
logger.info(sm("No watched directories found in database"))
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
logger.info(sm("Found watched directories", count=len(watched_dirs)))
|
|
232
|
+
|
|
233
|
+
# Create jobs for each watched directory
|
|
234
|
+
for watched_dir in watched_dirs:
|
|
235
|
+
# Check if path still exists
|
|
236
|
+
if not watched_dir.path.exists():
|
|
237
|
+
logger.warning(sm("Watched directory no longer exists", path=watched_dir.path_str, id=watched_dir.id))
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
if not watched_dir.path.is_dir():
|
|
241
|
+
logger.warning(sm("Watched path is not a directory", path=watched_dir.path_str, id=watched_dir.id))
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
logger.info(sm("Creating job for watched directory",
|
|
245
|
+
path=watched_dir.path_str,
|
|
246
|
+
id=watched_dir.id,
|
|
247
|
+
recursive=watched_dir.recursive,
|
|
248
|
+
file_pattern=watched_dir.file_pattern))
|
|
249
|
+
try:
|
|
250
|
+
await self.create_job(watched_dir)
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.error(sm("Failed to create job for watched directory",
|
|
253
|
+
path=watched_dir.path_str,
|
|
254
|
+
id=watched_dir.id,
|
|
255
|
+
error=str(e)))
|
|
256
|
+
|
|
257
|
+
logger.info(sm("Watcher initialization complete", active_jobs=len(self.jobs)))
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: cosma-backend
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Flow backend
|
|
5
|
+
Requires-Dist: asqlite>=2.0.0
|
|
6
|
+
Requires-Dist: litellm>=1.77.5
|
|
7
|
+
Requires-Dist: llama-cpp-python
|
|
8
|
+
Requires-Dist: markitdown[docx,pdf,pptx]>=0.1.3
|
|
9
|
+
Requires-Dist: ollama>=0.6.0
|
|
10
|
+
Requires-Dist: quart>=0.20.0
|
|
11
|
+
Requires-Dist: quart-schema>=0.22.0
|
|
12
|
+
Requires-Dist: rich>=14.2.0
|
|
13
|
+
Requires-Dist: sentence-transformers>=5.1.1
|
|
14
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
15
|
+
Requires-Dist: tiktoken>=0.11.0
|
|
16
|
+
Requires-Dist: uvicorn>=0.38.0
|
|
17
|
+
Requires-Dist: watchdog>=6.0.0
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# backend
|
|
22
|
+
|
|
23
|
+
Cosma backend
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
cosma_backend/__init__.py,sha256=rUTO7_YoWZiLT396hMThilPjq58aDxa88d2vwrfVDS0,350
|
|
2
|
+
cosma_backend/__main__.py,sha256=XCrM3GNtyOO3WoGWi9q1orJ7YZ1ZFtyIvS9uDHjvTV8,60
|
|
3
|
+
cosma_backend/api/__init__.py,sha256=e7hpuncNrF5oRoMIWdQ5qz2OnV-GJzF0nUSYnYQdc9g,906
|
|
4
|
+
cosma_backend/api/files.py,sha256=rgL4gn3VWfTLE2_ZeYJRGNbiBeXhCt6QxAWIRluhcVM,4041
|
|
5
|
+
cosma_backend/api/index.py,sha256=I1GBhgYU_7AUAOndtgsF0mUIJ_d_cBgpIt_iSeX4kUQ,3049
|
|
6
|
+
cosma_backend/api/models.py,sha256=ofqYTBdNpJ5PEbFngoF0Fp-BgBjiS_xKRWO15Ir4Y9A,616
|
|
7
|
+
cosma_backend/api/search.py,sha256=AarPHBEMt4VuufBvRods40vSM9XS24SxcBkbTTY5Ic8,4421
|
|
8
|
+
cosma_backend/api/status.py,sha256=zYbqdSmDaYWe0mw8ImZh9VKb3HzmKLw6acTPB7VlMf4,680
|
|
9
|
+
cosma_backend/api/updates.py,sha256=NEfr-EaUlP0E_Al6mAyoz9a-w6N9D_edIR0wz_DrfqM,2305
|
|
10
|
+
cosma_backend/api/watch.py,sha256=R3jic9QlNwJftBf_Wdia4KY_xyjhQxCGQUtbwBstCvk,4189
|
|
11
|
+
cosma_backend/app.py,sha256=pJc1VgQizKywxS2La2rXM1veEIo2WgGzaa0f1-1WfJM,5267
|
|
12
|
+
cosma_backend/db/__init__.py,sha256=qjJ-hkOUO4wu2XLs83-VOfiC7KmERJAfJTH50SOf5b4,84
|
|
13
|
+
cosma_backend/db/database.py,sha256=DmifOHuNooXc38JjTLR485tnetuMl5Dhtv_nBFbHyrg,23388
|
|
14
|
+
cosma_backend/discoverer/__init__.py,sha256=LJiZ0WZxuh4vTRP2q0fOLR0MY-y5NtpQnYj-UN9vHDM,49
|
|
15
|
+
cosma_backend/discoverer/discoverer.py,sha256=PY7y_6E-zUY5KuDo_OmiPx5ok7m8GhuazUA_RObGcJw,900
|
|
16
|
+
cosma_backend/embedder/__init__.py,sha256=wah-YAXJlU0muzMcQEdsEss6L1uxr8ul1CzDoGr6gIA,51
|
|
17
|
+
cosma_backend/embedder/embedder.py,sha256=rim5_u0vDR2_UPukAgVqjFo5YOOtbp5MHX9JjkSFE4w,23894
|
|
18
|
+
cosma_backend/logging.py,sha256=UdzeCANJ8tp9ewi0wsVJnsNasCXHy36sUKxJ5gJofQw,2172
|
|
19
|
+
cosma_backend/models/__init__.py,sha256=0zOKcsF38_0w2R7vqa6vegltk0yE719a6zhYIRkMPho,144
|
|
20
|
+
cosma_backend/models/file.py,sha256=nW2S5F-UDZMTi1aAFqzaC5qXmYro5T98oksyfGTEJRg,5655
|
|
21
|
+
cosma_backend/models/status.py,sha256=wcnkM954LUfHhz3MCCb2gr5qmbFG2Wady7VbZRfZ9w4,135
|
|
22
|
+
cosma_backend/models/update.py,sha256=rNrS9s75g8xAf0YMZXPTXT4ynI5fnWiHL6CABowbYyQ,7969
|
|
23
|
+
cosma_backend/models/watch.py,sha256=JUlBYzrLQKmA02DhCCg7Q3k4Kjs_7hsPrfzCsIYEdME,4233
|
|
24
|
+
cosma_backend/pipeline/__init__.py,sha256=0bzogjLkCVWexfdymwVW8QfOBFn02hF-quWFBR5mPQw,98
|
|
25
|
+
cosma_backend/pipeline/pipeline.py,sha256=-_EiidzaGNSwOL69Uw3BOr4N3vKmjn8ZzyHyoacl8VE,8460
|
|
26
|
+
cosma_backend/schema.sql,sha256=gpnJpJdhwGzq8c9ZgED0FbkecJE4Me0bjcJLfo_Lm6g,10817
|
|
27
|
+
cosma_backend/searcher/__init__.py,sha256=Cb_GU3h7QjGC4I_gRkSlY7tT4eH-F4fectoj5YNTrj8,55
|
|
28
|
+
cosma_backend/searcher/searcher.py,sha256=bmxzlyhLzsQqvDuYX3QuWdRZJlxmJT_Jn-vo118y_Nk,14877
|
|
29
|
+
cosma_backend/summarizer/__init__.py,sha256=8Dz2YfNbfd4CpBkW-iwftprQDtXqgj71jt5o-4g62RE,993
|
|
30
|
+
cosma_backend/summarizer/summarizer.py,sha256=qXWl3dpd95vgzxYthUHNnkiE3qCZm4-AkASC4W9bC3Q,45069
|
|
31
|
+
cosma_backend/utils/bundled.py,sha256=hSW-RdSvVxMfBiA76E2G5N3u5h791G02bjpE3hrEMP0,1007
|
|
32
|
+
cosma_backend/utils/pubsub.py,sha256=Prvx9MR0Eht7n40KqsjEL5yLthgkp88YsuBfciIGowI,694
|
|
33
|
+
cosma_backend/utils/sse.py,sha256=mGrTMtNK-vD9B72drKft6NGjdzRvD1lEP11lHmY2-m4,2493
|
|
34
|
+
cosma_backend/watcher/__init__.py,sha256=pXE-xQsBGV9Fp5ceUs6n-nEM3fkKLV4t0ONo0vmPlKQ,40
|
|
35
|
+
cosma_backend/watcher/awatchdog.py,sha256=LT_6BzTg5aUIhDlgwNN1doSKrN6xQr76yUl4TPpHU3o,2444
|
|
36
|
+
cosma_backend/watcher/watcher.py,sha256=RyqLweQ8EnUCt0kqJlezEwxN4TdJDC7qlld8eBXISDs,10873
|
|
37
|
+
cosma_backend-0.1.0.dist-info/WHEEL,sha256=DpNsHFUm_gffZe1FgzmqwuqiuPC6Y-uBCzibcJcdupM,78
|
|
38
|
+
cosma_backend-0.1.0.dist-info/METADATA,sha256=EgYuhbC3T-iPSTrBE-IgHjnEXVIZnkG2bY35IqjaEFg,603
|
|
39
|
+
cosma_backend-0.1.0.dist-info/RECORD,,
|