pensiev 0.25.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memos/__init__.py +6 -0
- memos/cmds/__init__.py +0 -0
- memos/cmds/library.py +1289 -0
- memos/cmds/plugin.py +96 -0
- memos/commands.py +865 -0
- memos/config.py +225 -0
- memos/crud.py +605 -0
- memos/databases/__init__.py +0 -0
- memos/databases/initializers.py +481 -0
- memos/dataset_extractor_for_florence.py +165 -0
- memos/dataset_extractor_for_internvl2.py +192 -0
- memos/default_config.yaml +88 -0
- memos/embedding.py +129 -0
- memos/frame_extractor.py +53 -0
- memos/logging_config.py +35 -0
- memos/main.py +104 -0
- memos/migrations/alembic/README +1 -0
- memos/migrations/alembic/__pycache__/env.cpython-310.pyc +0 -0
- memos/migrations/alembic/env.py +108 -0
- memos/migrations/alembic/script.py.mako +30 -0
- memos/migrations/alembic/versions/00904ac8c6fc_add_indexes_to_entitymodel.py +63 -0
- memos/migrations/alembic/versions/04acdaf75664_add_indices_to_entitytags_and_metadata.py +86 -0
- memos/migrations/alembic/versions/12504c5b1d3c_add_extra_columns_for_embedding.py +67 -0
- memos/migrations/alembic/versions/31a1ad0e10b3_add_entity_plugin_status.py +71 -0
- memos/migrations/alembic/versions/__pycache__/00904ac8c6fc_add_indexes_to_entitymodel.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/04acdaf75664_add_indices_to_entitytags_and_metadata.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/12504c5b1d3c_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/20f5ecab014d_add_entity_plugin_status.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/31a1ad0e10b3_add_entity_plugin_status.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/4fcb062c5128_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/d10c55fbb7d2_add_index_for_entity_file_type_group_.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/__pycache__/f8f158182416_add_active_app_index.cpython-310.pyc +0 -0
- memos/migrations/alembic/versions/d10c55fbb7d2_add_index_for_entity_file_type_group_.py +44 -0
- memos/migrations/alembic/versions/f8f158182416_add_active_app_index.py +75 -0
- memos/migrations/alembic.ini +116 -0
- memos/migrations.py +19 -0
- memos/models.py +199 -0
- memos/plugins/__init__.py +0 -0
- memos/plugins/ocr/__init__.py +0 -0
- memos/plugins/ocr/main.py +251 -0
- memos/plugins/ocr/models/ch_PP-OCRv4_det_infer.onnx +0 -0
- memos/plugins/ocr/models/ch_PP-OCRv4_rec_infer.onnx +0 -0
- memos/plugins/ocr/models/ch_ppocr_mobile_v2.0_cls_train.onnx +0 -0
- memos/plugins/ocr/ppocr-gpu.yaml +43 -0
- memos/plugins/ocr/ppocr.yaml +44 -0
- memos/plugins/ocr/server.py +227 -0
- memos/plugins/ocr/temp_ppocr.yaml +42 -0
- memos/plugins/vlm/__init__.py +0 -0
- memos/plugins/vlm/main.py +251 -0
- memos/prepare_dataset.py +107 -0
- memos/process_webp.py +55 -0
- memos/read_metadata.py +32 -0
- memos/record.py +358 -0
- memos/schemas.py +289 -0
- memos/search.py +1198 -0
- memos/server.py +883 -0
- memos/shotsum.py +105 -0
- memos/shotsum_with_ocr.py +145 -0
- memos/simple_tokenizer/dict/README.md +31 -0
- memos/simple_tokenizer/dict/hmm_model.utf8 +34 -0
- memos/simple_tokenizer/dict/idf.utf8 +258826 -0
- memos/simple_tokenizer/dict/jieba.dict.utf8 +348982 -0
- memos/simple_tokenizer/dict/pos_dict/char_state_tab.utf8 +6653 -0
- memos/simple_tokenizer/dict/pos_dict/prob_emit.utf8 +166 -0
- memos/simple_tokenizer/dict/pos_dict/prob_start.utf8 +259 -0
- memos/simple_tokenizer/dict/pos_dict/prob_trans.utf8 +5222 -0
- memos/simple_tokenizer/dict/stop_words.utf8 +1534 -0
- memos/simple_tokenizer/dict/user.dict.utf8 +4 -0
- memos/simple_tokenizer/linux/libsimple.so +0 -0
- memos/simple_tokenizer/macos/libsimple.dylib +0 -0
- memos/simple_tokenizer/windows/simple.dll +0 -0
- memos/static/_app/immutable/assets/0.e250c031.css +1 -0
- memos/static/_app/immutable/assets/_layout.e7937cfe.css +1 -0
- memos/static/_app/immutable/chunks/index.5c08976b.js +1 -0
- memos/static/_app/immutable/chunks/index.60ee613b.js +4 -0
- memos/static/_app/immutable/chunks/runtime.a7926cf6.js +5 -0
- memos/static/_app/immutable/chunks/scheduler.5c1cff6e.js +1 -0
- memos/static/_app/immutable/chunks/singletons.583bdf4e.js +1 -0
- memos/static/_app/immutable/entry/app.666c1643.js +1 -0
- memos/static/_app/immutable/entry/start.aed5c701.js +3 -0
- memos/static/_app/immutable/nodes/0.5862ea38.js +7 -0
- memos/static/_app/immutable/nodes/1.35378a5e.js +1 -0
- memos/static/_app/immutable/nodes/2.1ccf9ea5.js +81 -0
- memos/static/_app/version.json +1 -0
- memos/static/app.html +36 -0
- memos/static/favicon.png +0 -0
- memos/static/logos/memos_logo_1024.png +0 -0
- memos/static/logos/memos_logo_1024@2x.png +0 -0
- memos/static/logos/memos_logo_128.png +0 -0
- memos/static/logos/memos_logo_128@2x.png +0 -0
- memos/static/logos/memos_logo_16.png +0 -0
- memos/static/logos/memos_logo_16@2x.png +0 -0
- memos/static/logos/memos_logo_256.png +0 -0
- memos/static/logos/memos_logo_256@2x.png +0 -0
- memos/static/logos/memos_logo_32.png +0 -0
- memos/static/logos/memos_logo_32@2x.png +0 -0
- memos/static/logos/memos_logo_512.png +0 -0
- memos/static/logos/memos_logo_512@2x.png +0 -0
- memos/static/logos/memos_logo_64.png +0 -0
- memos/static/logos/memos_logo_64@2x.png +0 -0
- memos/test_server.py +802 -0
- memos/utils.py +49 -0
- memos_ml_backends/florence2_server.py +176 -0
- memos_ml_backends/qwen2vl_server.py +182 -0
- memos_ml_backends/schemas.py +48 -0
- pensiev-0.25.5.dist-info/LICENSE +201 -0
- pensiev-0.25.5.dist-info/METADATA +541 -0
- pensiev-0.25.5.dist-info/RECORD +111 -0
- pensiev-0.25.5.dist-info/WHEEL +5 -0
- pensiev-0.25.5.dist-info/entry_points.txt +2 -0
- pensiev-0.25.5.dist-info/top_level.txt +2 -0
memos/server.py
ADDED
@@ -0,0 +1,883 @@
|
|
1
|
+
import os
|
2
|
+
import httpx
|
3
|
+
import uvicorn
|
4
|
+
import mimetypes
|
5
|
+
|
6
|
+
import logfire
|
7
|
+
|
8
|
+
from fastapi import FastAPI, HTTPException, Depends, status, Query, Request
|
9
|
+
from fastapi.middleware.cors import CORSMiddleware
|
10
|
+
from fastapi.staticfiles import StaticFiles
|
11
|
+
from fastapi.responses import FileResponse, JSONResponse
|
12
|
+
from fastapi.encoders import jsonable_encoder
|
13
|
+
from sqlalchemy.orm import Session
|
14
|
+
from sqlalchemy.orm import sessionmaker
|
15
|
+
from typing import List, Annotated
|
16
|
+
from pathlib import Path
|
17
|
+
import json
|
18
|
+
import cv2
|
19
|
+
from PIL import Image
|
20
|
+
import logging
|
21
|
+
|
22
|
+
from .config import settings
|
23
|
+
from memos.plugins.vlm import main as vlm_main
|
24
|
+
from memos.plugins.ocr import main as ocr_main
|
25
|
+
from . import crud
|
26
|
+
from .search import create_search_provider
|
27
|
+
from .schemas import (
|
28
|
+
Library,
|
29
|
+
Folder,
|
30
|
+
Entity,
|
31
|
+
Plugin,
|
32
|
+
NewLibraryParam,
|
33
|
+
NewFoldersParam,
|
34
|
+
NewEntityParam,
|
35
|
+
UpdateEntityParam,
|
36
|
+
NewPluginParam,
|
37
|
+
NewLibraryPluginParam,
|
38
|
+
UpdateEntityTagsParam,
|
39
|
+
UpdateEntityMetadataParam,
|
40
|
+
MetadataType,
|
41
|
+
MetadataIndexItem,
|
42
|
+
EntitySearchResult,
|
43
|
+
SearchResult,
|
44
|
+
SearchHit,
|
45
|
+
RequestParams,
|
46
|
+
EntityContext,
|
47
|
+
BatchIndexRequest,
|
48
|
+
FacetCount,
|
49
|
+
Facet,
|
50
|
+
FacetStats,
|
51
|
+
)
|
52
|
+
from .read_metadata import read_metadata
|
53
|
+
from .logging_config import LOGGING_CONFIG
|
54
|
+
from .databases.initializers import create_db_initializer
|
55
|
+
|
56
|
+
# Configure logging
|
57
|
+
logging.basicConfig(level=logging.INFO)
|
58
|
+
|
59
|
+
# Configure mimetypes for JavaScript files
|
60
|
+
# This is a workaround for the issue:
|
61
|
+
# https://github.com/python/cpython/issues/88141#issuecomment-1631735902
|
62
|
+
# Without this, the mime type of .js files will be text/plain and
|
63
|
+
# the browser will not render them correctly in some windows machines.
|
64
|
+
mimetypes.add_type("application/javascript", ".js")
|
65
|
+
|
66
|
+
app = FastAPI()
|
67
|
+
|
68
|
+
logfire.configure(send_to_logfire="if-token-present")
|
69
|
+
logfire.instrument_fastapi(app, excluded_urls=["/files"])
|
70
|
+
|
71
|
+
# Create database engine and initializer
|
72
|
+
engine, initializer = create_db_initializer(settings)
|
73
|
+
|
74
|
+
# Initialize search provider based on database URL
|
75
|
+
search_provider = create_search_provider(settings.database_url)
|
76
|
+
|
77
|
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
78
|
+
|
79
|
+
logfire.instrument_sqlalchemy(engine=engine)
|
80
|
+
|
81
|
+
app.add_middleware(
|
82
|
+
CORSMiddleware,
|
83
|
+
allow_origins=["*"],
|
84
|
+
allow_credentials=True,
|
85
|
+
allow_methods=["*"],
|
86
|
+
allow_headers=["*"],
|
87
|
+
)
|
88
|
+
|
89
|
+
|
90
|
+
current_dir = os.path.dirname(__file__)
|
91
|
+
|
92
|
+
app.mount(
|
93
|
+
"/_app", StaticFiles(directory=os.path.join(current_dir, "static/_app"), html=True)
|
94
|
+
)
|
95
|
+
|
96
|
+
|
97
|
+
@app.get("/health")
|
98
|
+
async def health():
|
99
|
+
return {"status": "ok"}
|
100
|
+
|
101
|
+
|
102
|
+
@app.get("/favicon.png", response_class=FileResponse)
|
103
|
+
async def favicon_png():
|
104
|
+
return FileResponse(os.path.join(current_dir, "static/favicon.png"))
|
105
|
+
|
106
|
+
|
107
|
+
@app.get("/favicon.ico", response_class=FileResponse)
|
108
|
+
async def favicon_ico():
|
109
|
+
return FileResponse(os.path.join(current_dir, "static/favicon.png"))
|
110
|
+
|
111
|
+
|
112
|
+
@app.get("/")
|
113
|
+
async def serve_spa():
|
114
|
+
return FileResponse(os.path.join(current_dir, "static/app.html"))
|
115
|
+
|
116
|
+
|
117
|
+
def get_db():
|
118
|
+
db = SessionLocal()
|
119
|
+
try:
|
120
|
+
yield db
|
121
|
+
finally:
|
122
|
+
db.close()
|
123
|
+
|
124
|
+
|
125
|
+
@app.post("/libraries", response_model=Library, tags=["library"])
|
126
|
+
def new_library(library_param: NewLibraryParam, db: Session = Depends(get_db)):
|
127
|
+
# Check if a library with the same name (case insensitive) already exists
|
128
|
+
existing_library = crud.get_library_by_name(library_param.name, db)
|
129
|
+
if existing_library:
|
130
|
+
raise HTTPException(
|
131
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
132
|
+
detail="Library with this name already exists",
|
133
|
+
)
|
134
|
+
|
135
|
+
# Remove duplicate folders from the library_param
|
136
|
+
unique_folders = []
|
137
|
+
seen_paths = set()
|
138
|
+
for folder in library_param.folders:
|
139
|
+
if folder.path not in seen_paths:
|
140
|
+
seen_paths.add(folder.path)
|
141
|
+
unique_folders.append(folder)
|
142
|
+
library_param.folders = unique_folders
|
143
|
+
|
144
|
+
library = crud.create_library(library_param, db)
|
145
|
+
return library
|
146
|
+
|
147
|
+
|
148
|
+
@app.get("/libraries", response_model=List[Library], tags=["library"])
|
149
|
+
def list_libraries(db: Session = Depends(get_db)):
|
150
|
+
libraries = crud.get_libraries(db)
|
151
|
+
return libraries
|
152
|
+
|
153
|
+
|
154
|
+
@app.get("/libraries/{library_id}", response_model=Library, tags=["library"])
|
155
|
+
def get_library_by_id(library_id: int, db: Session = Depends(get_db)):
|
156
|
+
library = crud.get_library_by_id(library_id, db)
|
157
|
+
if library is None:
|
158
|
+
return JSONResponse(
|
159
|
+
content={"detail": "Library not found"},
|
160
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
161
|
+
)
|
162
|
+
return library
|
163
|
+
|
164
|
+
|
165
|
+
@app.post("/libraries/{library_id}/folders", response_model=Library, tags=["library"])
|
166
|
+
def new_folders(
|
167
|
+
library_id: int,
|
168
|
+
folders: NewFoldersParam,
|
169
|
+
db: Session = Depends(get_db),
|
170
|
+
):
|
171
|
+
library = crud.get_library_by_id(library_id, db)
|
172
|
+
if library is None:
|
173
|
+
raise HTTPException(
|
174
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
175
|
+
)
|
176
|
+
|
177
|
+
existing_folders = [folder.path for folder in library.folders]
|
178
|
+
if any(str(folder.path) in existing_folders for folder in folders.folders):
|
179
|
+
raise HTTPException(
|
180
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
181
|
+
detail="Folder already exists in the library",
|
182
|
+
)
|
183
|
+
|
184
|
+
return crud.add_folders(library_id=library.id, folders=folders, db=db)
|
185
|
+
|
186
|
+
|
187
|
+
async def trigger_webhooks(
|
188
|
+
library: Library,
|
189
|
+
entity: Entity,
|
190
|
+
request: Request,
|
191
|
+
plugins: List[int] = None,
|
192
|
+
db: Session = Depends(get_db),
|
193
|
+
):
|
194
|
+
"""Trigger webhooks for plugins that haven't processed the entity yet"""
|
195
|
+
async with httpx.AsyncClient() as client:
|
196
|
+
tasks = []
|
197
|
+
pending_plugins = crud.get_pending_plugins(entity.id, library.id, db)
|
198
|
+
|
199
|
+
for plugin in library.plugins:
|
200
|
+
# Skip if specific plugins are requested and this one isn't in the list
|
201
|
+
if plugins is not None and plugin.id not in plugins:
|
202
|
+
continue
|
203
|
+
|
204
|
+
# Skip if entity has already been processed by this plugin
|
205
|
+
if plugin.id not in pending_plugins:
|
206
|
+
continue
|
207
|
+
|
208
|
+
if plugin.webhook_url:
|
209
|
+
logging.info("Triggering plugin %d for entity %d", plugin.id, entity.id)
|
210
|
+
location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
|
211
|
+
webhook_url = plugin.webhook_url
|
212
|
+
if webhook_url.startswith("/"):
|
213
|
+
webhook_url = str(request.base_url)[:-1] + webhook_url
|
214
|
+
task = client.post(
|
215
|
+
webhook_url,
|
216
|
+
json=entity.model_dump(mode="json"),
|
217
|
+
headers={"Location": location},
|
218
|
+
timeout=60.0,
|
219
|
+
)
|
220
|
+
tasks.append((plugin.id, task))
|
221
|
+
|
222
|
+
for plugin_id, task in tasks:
|
223
|
+
try:
|
224
|
+
response = await task
|
225
|
+
if response.status_code < 400:
|
226
|
+
# Record successful plugin processing
|
227
|
+
crud.record_plugin_processed(entity.id, plugin_id, db)
|
228
|
+
else:
|
229
|
+
logging.error(
|
230
|
+
"Error processing entity with plugin %d: %d - %s",
|
231
|
+
plugin_id,
|
232
|
+
response.status_code,
|
233
|
+
response.text,
|
234
|
+
)
|
235
|
+
except Exception as e:
|
236
|
+
logging.error(
|
237
|
+
"Error processing entity with plugin %d: %s",
|
238
|
+
plugin_id,
|
239
|
+
str(e),
|
240
|
+
)
|
241
|
+
|
242
|
+
|
243
|
+
@app.post("/libraries/{library_id}/entities", response_model=Entity, tags=["entity"])
|
244
|
+
async def new_entity(
|
245
|
+
new_entity: NewEntityParam,
|
246
|
+
library_id: int,
|
247
|
+
request: Request,
|
248
|
+
db: Session = Depends(get_db),
|
249
|
+
plugins: Annotated[List[int] | None, Query()] = None,
|
250
|
+
trigger_webhooks_flag: bool = True,
|
251
|
+
update_index: bool = False,
|
252
|
+
):
|
253
|
+
library = crud.get_library_by_id(library_id, db)
|
254
|
+
if library is None:
|
255
|
+
raise HTTPException(
|
256
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
257
|
+
)
|
258
|
+
|
259
|
+
with logfire.span("create new entity {filepath=}", filepath=new_entity.filepath):
|
260
|
+
entity = crud.create_entity(library_id, new_entity, db)
|
261
|
+
|
262
|
+
if trigger_webhooks_flag:
|
263
|
+
with logfire.span("trigger webhooks {entity_id=}", entity_id=entity.id):
|
264
|
+
await trigger_webhooks(library, entity, request, plugins, db)
|
265
|
+
|
266
|
+
if update_index:
|
267
|
+
with logfire.span("update entity index {entity_id=}", entity_id=entity.id):
|
268
|
+
search_provider.update_entity_index(entity.id, db)
|
269
|
+
|
270
|
+
return entity
|
271
|
+
|
272
|
+
|
273
|
+
@app.get(
|
274
|
+
"/libraries/{library_id}/folders/{folder_id}/entities",
|
275
|
+
response_model=List[Entity],
|
276
|
+
tags=["entity"],
|
277
|
+
)
|
278
|
+
def list_entities_in_folder(
|
279
|
+
library_id: int,
|
280
|
+
folder_id: int,
|
281
|
+
limit: Annotated[int, Query(ge=1, le=400)] = 10,
|
282
|
+
offset: int = 0,
|
283
|
+
path_prefix: str | None = None,
|
284
|
+
db: Session = Depends(get_db),
|
285
|
+
):
|
286
|
+
library = crud.get_library_by_id(library_id, db)
|
287
|
+
if library is None:
|
288
|
+
raise HTTPException(
|
289
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
290
|
+
)
|
291
|
+
|
292
|
+
if folder_id not in [folder.id for folder in library.folders]:
|
293
|
+
raise HTTPException(
|
294
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
295
|
+
detail="Folder not found in the specified library",
|
296
|
+
)
|
297
|
+
|
298
|
+
entities, total_count = crud.get_entities_of_folder(
|
299
|
+
library_id, folder_id, db, limit, offset, path_prefix
|
300
|
+
)
|
301
|
+
return JSONResponse(
|
302
|
+
content=jsonable_encoder(entities), headers={"X-Total-Count": str(total_count)}
|
303
|
+
)
|
304
|
+
|
305
|
+
|
306
|
+
@app.get(
|
307
|
+
"/libraries/{library_id}/entities/by-filepath",
|
308
|
+
response_model=Entity,
|
309
|
+
tags=["entity"],
|
310
|
+
)
|
311
|
+
def get_entity_by_filepath(
|
312
|
+
library_id: int, filepath: str, db: Session = Depends(get_db)
|
313
|
+
):
|
314
|
+
entity = crud.get_entity_by_filepath(filepath, db)
|
315
|
+
if entity is None or entity.library_id != library_id:
|
316
|
+
return JSONResponse(
|
317
|
+
content={"detail": "Entity not found"},
|
318
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
319
|
+
)
|
320
|
+
return entity
|
321
|
+
|
322
|
+
|
323
|
+
@app.post(
|
324
|
+
"/libraries/{library_id}/entities/by-filepaths",
|
325
|
+
response_model=List[Entity],
|
326
|
+
tags=["entity"],
|
327
|
+
)
|
328
|
+
def get_entities_by_filepaths(
|
329
|
+
library_id: int, filepaths: List[str], db: Session = Depends(get_db)
|
330
|
+
):
|
331
|
+
entities = crud.get_entities_by_filepaths(filepaths, db)
|
332
|
+
return [entity for entity in entities if entity.library_id == library_id]
|
333
|
+
|
334
|
+
|
335
|
+
@app.get("/entities/{entity_id}", response_model=Entity, tags=["entity"])
|
336
|
+
def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
|
337
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
338
|
+
if entity is None:
|
339
|
+
return JSONResponse(
|
340
|
+
content={"detail": "Entity not found"},
|
341
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
342
|
+
)
|
343
|
+
return entity
|
344
|
+
|
345
|
+
|
346
|
+
@app.get(
|
347
|
+
"/libraries/{library_id}/entities/{entity_id}",
|
348
|
+
response_model=Entity,
|
349
|
+
tags=["entity"],
|
350
|
+
)
|
351
|
+
def get_entity_by_id_in_library(
|
352
|
+
library_id: int, entity_id: int, db: Session = Depends(get_db)
|
353
|
+
):
|
354
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
355
|
+
if entity is None or entity.library_id != library_id:
|
356
|
+
return JSONResponse(
|
357
|
+
content={"detail": "Entity not found"},
|
358
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
359
|
+
)
|
360
|
+
return entity
|
361
|
+
|
362
|
+
|
363
|
+
@app.put("/entities/{entity_id}", response_model=Entity, tags=["entity"])
|
364
|
+
async def update_entity(
|
365
|
+
entity_id: int,
|
366
|
+
request: Request,
|
367
|
+
updated_entity: UpdateEntityParam = None,
|
368
|
+
db: Session = Depends(get_db),
|
369
|
+
trigger_webhooks_flag: bool = False,
|
370
|
+
plugins: Annotated[List[int] | None, Query()] = None,
|
371
|
+
update_index: bool = False,
|
372
|
+
):
|
373
|
+
with logfire.span("fetch entity {entity_id=}", entity_id=entity_id):
|
374
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
375
|
+
if entity is None:
|
376
|
+
raise HTTPException(
|
377
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
378
|
+
detail="Entity not found",
|
379
|
+
)
|
380
|
+
|
381
|
+
if updated_entity:
|
382
|
+
entity = crud.update_entity(entity_id, updated_entity, db)
|
383
|
+
|
384
|
+
if trigger_webhooks_flag:
|
385
|
+
library = crud.get_library_by_id(entity.library_id, db)
|
386
|
+
if library is None:
|
387
|
+
raise HTTPException(
|
388
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
389
|
+
)
|
390
|
+
await trigger_webhooks(library, entity, request, plugins, db)
|
391
|
+
|
392
|
+
if update_index:
|
393
|
+
search_provider.update_entity_index(entity.id, db)
|
394
|
+
|
395
|
+
return entity
|
396
|
+
|
397
|
+
|
398
|
+
@app.post(
|
399
|
+
"/entities/{entity_id}/last-scan-at",
|
400
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
401
|
+
tags=["entity"],
|
402
|
+
)
|
403
|
+
def update_entity_last_scan_at(entity_id: int, db: Session = Depends(get_db)):
|
404
|
+
"""
|
405
|
+
Update the last_scan_at timestamp for an entity and trigger update for fts and vec.
|
406
|
+
"""
|
407
|
+
succeeded = crud.touch_entity(entity_id, db)
|
408
|
+
if not succeeded:
|
409
|
+
raise HTTPException(
|
410
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
411
|
+
detail="Entity not found",
|
412
|
+
)
|
413
|
+
|
414
|
+
|
415
|
+
@app.post(
|
416
|
+
"/entities/{entity_id}/index",
|
417
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
418
|
+
tags=["entity"],
|
419
|
+
)
|
420
|
+
def update_index(entity_id: int, db: Session = Depends(get_db)):
|
421
|
+
"""
|
422
|
+
Update the FTS and vector indexes for an entity.
|
423
|
+
"""
|
424
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
425
|
+
if entity is None:
|
426
|
+
raise HTTPException(
|
427
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
428
|
+
detail="Entity not found",
|
429
|
+
)
|
430
|
+
|
431
|
+
search_provider.update_entity_index(entity.id, db)
|
432
|
+
|
433
|
+
|
434
|
+
@app.post(
|
435
|
+
"/entities/batch-index",
|
436
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
437
|
+
tags=["entity"],
|
438
|
+
)
|
439
|
+
async def batch_update_index(request: BatchIndexRequest, db: Session = Depends(get_db)):
|
440
|
+
"""
|
441
|
+
Batch update the FTS and vector indexes for multiple entities.
|
442
|
+
"""
|
443
|
+
try:
|
444
|
+
search_provider.batch_update_entity_indices(request.entity_ids, db)
|
445
|
+
except ValueError as e:
|
446
|
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
|
447
|
+
|
448
|
+
|
449
|
+
@app.put("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
|
450
|
+
def replace_entity_tags(
|
451
|
+
entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
|
452
|
+
):
|
453
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
454
|
+
if entity is None:
|
455
|
+
raise HTTPException(
|
456
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
457
|
+
detail="Entity not found",
|
458
|
+
)
|
459
|
+
|
460
|
+
return crud.update_entity_tags(entity_id, update_tags.tags, db)
|
461
|
+
|
462
|
+
|
463
|
+
@app.patch("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
|
464
|
+
def patch_entity_tags(
|
465
|
+
entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
|
466
|
+
):
|
467
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
468
|
+
if entity is None:
|
469
|
+
raise HTTPException(
|
470
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
471
|
+
detail="Entity not found",
|
472
|
+
)
|
473
|
+
|
474
|
+
return crud.add_new_tags(entity_id, update_tags.tags, db)
|
475
|
+
|
476
|
+
|
477
|
+
@app.patch("/entities/{entity_id}/metadata", response_model=Entity, tags=["entity"])
|
478
|
+
def patch_entity_metadata(
|
479
|
+
entity_id: int,
|
480
|
+
update_metadata: UpdateEntityMetadataParam,
|
481
|
+
db: Session = Depends(get_db),
|
482
|
+
):
|
483
|
+
with logfire.span("fetch entity {entity_id=}", entity_id=entity_id):
|
484
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
485
|
+
if entity is None:
|
486
|
+
raise HTTPException(
|
487
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
488
|
+
detail="Entity not found",
|
489
|
+
)
|
490
|
+
|
491
|
+
# Use the CRUD function to update the metadata entries
|
492
|
+
entity = crud.update_entity_metadata_entries(
|
493
|
+
entity_id, update_metadata.metadata_entries, db
|
494
|
+
)
|
495
|
+
return entity
|
496
|
+
|
497
|
+
|
498
|
+
@app.delete(
|
499
|
+
"/libraries/{library_id}/entities/{entity_id}",
|
500
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
501
|
+
tags=["entity"],
|
502
|
+
)
|
503
|
+
def remove_entity(library_id: int, entity_id: int, db: Session = Depends(get_db)):
|
504
|
+
entity = crud.get_entity_by_id(entity_id, db)
|
505
|
+
if entity is None or entity.library_id != library_id:
|
506
|
+
raise HTTPException(
|
507
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
508
|
+
detail="Entity not found in the specified library",
|
509
|
+
)
|
510
|
+
|
511
|
+
crud.remove_entity(entity_id, db)
|
512
|
+
|
513
|
+
|
514
|
+
@app.post("/plugins", response_model=Plugin, tags=["plugin"])
|
515
|
+
def new_plugin(new_plugin: NewPluginParam, db: Session = Depends(get_db)):
|
516
|
+
existing_plugin = crud.get_plugin_by_name(new_plugin.name, db)
|
517
|
+
if existing_plugin:
|
518
|
+
raise HTTPException(
|
519
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
520
|
+
detail="Plugin with this name already exists",
|
521
|
+
)
|
522
|
+
plugin = crud.create_plugin(new_plugin, db)
|
523
|
+
return plugin
|
524
|
+
|
525
|
+
|
526
|
+
@app.get("/plugins", response_model=List[Plugin], tags=["plugin"])
|
527
|
+
def list_plugins(db: Session = Depends(get_db)):
|
528
|
+
plugins = crud.get_plugins(db)
|
529
|
+
return plugins
|
530
|
+
|
531
|
+
|
532
|
+
@app.post(
|
533
|
+
"/libraries/{library_id}/plugins",
|
534
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
535
|
+
tags=["plugin"],
|
536
|
+
)
|
537
|
+
def add_library_plugin(
|
538
|
+
library_id: int, new_plugin: NewLibraryPluginParam, db: Session = Depends(get_db)
|
539
|
+
):
|
540
|
+
library = crud.get_library_by_id(library_id, db)
|
541
|
+
if library is None:
|
542
|
+
raise HTTPException(
|
543
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
544
|
+
)
|
545
|
+
|
546
|
+
plugin = None
|
547
|
+
if new_plugin.plugin_id is not None:
|
548
|
+
plugin = crud.get_plugin_by_id(new_plugin.plugin_id, db)
|
549
|
+
elif new_plugin.plugin_name is not None:
|
550
|
+
plugin = crud.get_plugin_by_name(new_plugin.plugin_name, db)
|
551
|
+
|
552
|
+
if plugin is None:
|
553
|
+
raise HTTPException(
|
554
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Plugin not found"
|
555
|
+
)
|
556
|
+
|
557
|
+
if any(p.id == plugin.id for p in library.plugins):
|
558
|
+
raise HTTPException(
|
559
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
560
|
+
detail="Plugin already exists in the library",
|
561
|
+
)
|
562
|
+
|
563
|
+
crud.add_plugin_to_library(library_id, plugin.id, db)
|
564
|
+
|
565
|
+
|
566
|
+
@app.delete(
|
567
|
+
"/libraries/{library_id}/plugins/{plugin_id}",
|
568
|
+
status_code=status.HTTP_204_NO_CONTENT,
|
569
|
+
tags=["plugin"],
|
570
|
+
)
|
571
|
+
def delete_library_plugin(
|
572
|
+
library_id: int, plugin_id: int, db: Session = Depends(get_db)
|
573
|
+
):
|
574
|
+
library = crud.get_library_by_id(library_id, db)
|
575
|
+
if library is None:
|
576
|
+
raise HTTPException(
|
577
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
578
|
+
)
|
579
|
+
|
580
|
+
plugin = crud.get_plugin_by_id(plugin_id, db)
|
581
|
+
if plugin is None:
|
582
|
+
raise HTTPException(
|
583
|
+
status_code=status.HTTP_404_NOT_FOUND, detail="Plugin not found"
|
584
|
+
)
|
585
|
+
|
586
|
+
crud.remove_plugin_from_library(library_id, plugin_id, db)
|
587
|
+
|
588
|
+
|
589
|
+
def is_image(file_path: Path) -> bool:
|
590
|
+
return file_path.suffix.lower() in [".png", ".jpg", ".jpeg"]
|
591
|
+
|
592
|
+
|
593
|
+
def get_thumbnail_info(metadata: dict) -> tuple:
|
594
|
+
if not metadata:
|
595
|
+
return None, None, None
|
596
|
+
|
597
|
+
if not metadata.get("sequence"):
|
598
|
+
return None, None, False
|
599
|
+
|
600
|
+
return metadata.get("screen_name"), metadata.get("sequence"), True
|
601
|
+
|
602
|
+
|
603
|
+
def extract_video_frame(video_path: Path, frame_number: int) -> Image.Image:
|
604
|
+
cap = cv2.VideoCapture(str(video_path))
|
605
|
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
606
|
+
ret, frame = cap.read()
|
607
|
+
cap.release()
|
608
|
+
|
609
|
+
if not ret:
|
610
|
+
return None
|
611
|
+
|
612
|
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
613
|
+
return Image.fromarray(frame_rgb)
|
614
|
+
|
615
|
+
|
616
|
+
@app.get("/files/video/{file_path:path}", tags=["files"])
|
617
|
+
async def get_video_frame(file_path: str):
|
618
|
+
|
619
|
+
full_path = Path("/") / file_path.strip("/")
|
620
|
+
|
621
|
+
if not full_path.is_file():
|
622
|
+
raise HTTPException(status_code=404, detail="File not found")
|
623
|
+
|
624
|
+
if not is_image(full_path):
|
625
|
+
return FileResponse(full_path)
|
626
|
+
|
627
|
+
metadata = read_metadata(str(full_path))
|
628
|
+
screen, sequence, is_thumbnail = get_thumbnail_info(metadata)
|
629
|
+
|
630
|
+
logging.debug(
|
631
|
+
"Screen: %s, Sequence: %s, Is Thumbnail: %s", screen, sequence, is_thumbnail
|
632
|
+
)
|
633
|
+
|
634
|
+
if not all([screen, sequence, is_thumbnail]):
|
635
|
+
return FileResponse(full_path)
|
636
|
+
|
637
|
+
video_path = full_path.parent / f"{screen}.mp4"
|
638
|
+
logging.debug("Video path: %s", video_path)
|
639
|
+
if not video_path.is_file():
|
640
|
+
return FileResponse(full_path)
|
641
|
+
|
642
|
+
frame_image = extract_video_frame(video_path, sequence)
|
643
|
+
if frame_image is None:
|
644
|
+
return FileResponse(full_path)
|
645
|
+
|
646
|
+
temp_dir = Path("/tmp")
|
647
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
648
|
+
temp_path = temp_dir / f"temp_{full_path.name}"
|
649
|
+
frame_image.save(temp_path)
|
650
|
+
|
651
|
+
return FileResponse(
|
652
|
+
temp_path, headers={"Content-Disposition": f"inline; filename={full_path.name}"}
|
653
|
+
)
|
654
|
+
|
655
|
+
|
656
|
+
@app.get("/files/{file_path:path}", tags=["files"])
|
657
|
+
async def get_file(file_path: str):
|
658
|
+
full_path = Path("/") / file_path.strip("/")
|
659
|
+
# Check if the file exists and is a file
|
660
|
+
if full_path.is_file():
|
661
|
+
return FileResponse(full_path)
|
662
|
+
else:
|
663
|
+
return JSONResponse(
|
664
|
+
content={"detail": "File not found"}, status_code=status.HTTP_404_NOT_FOUND
|
665
|
+
)
|
666
|
+
|
667
|
+
|
668
|
+
@app.get("/search", response_model=SearchResult, tags=["search"])
|
669
|
+
async def search_entities_v2(
|
670
|
+
q: str,
|
671
|
+
library_ids: str = Query(None, description="Comma-separated list of library IDs"),
|
672
|
+
limit: Annotated[int, Query(ge=1, le=200)] = 48,
|
673
|
+
start: int = None,
|
674
|
+
end: int = None,
|
675
|
+
app_names: str = Query(None, description="Comma-separated list of app names"),
|
676
|
+
facet: bool = Query(None, description="Include facet in the search results"),
|
677
|
+
db: Session = Depends(get_db),
|
678
|
+
):
|
679
|
+
library_ids = [int(id) for id in library_ids.split(",")] if library_ids else None
|
680
|
+
app_name_list = (
|
681
|
+
[app_name.strip() for app_name in app_names.split(",")] if app_names else None
|
682
|
+
)
|
683
|
+
|
684
|
+
# Use settings.facet if facet parameter is not provided
|
685
|
+
use_facet = settings.facet if facet is None else facet
|
686
|
+
|
687
|
+
try:
|
688
|
+
if q.strip() == "":
|
689
|
+
# Use list_entities when q is empty
|
690
|
+
entities = crud.list_entities(
|
691
|
+
db=db, limit=limit, library_ids=library_ids, start=start, end=end
|
692
|
+
)
|
693
|
+
stats = {}
|
694
|
+
else:
|
695
|
+
# Use search provider for both search and stats
|
696
|
+
entity_ids = search_provider.hybrid_search(
|
697
|
+
query=q,
|
698
|
+
db=db,
|
699
|
+
limit=limit,
|
700
|
+
library_ids=library_ids,
|
701
|
+
start=start,
|
702
|
+
end=end,
|
703
|
+
app_names=app_name_list,
|
704
|
+
)
|
705
|
+
entities = crud.find_entities_by_ids(entity_ids, db)
|
706
|
+
stats = (
|
707
|
+
search_provider.get_search_stats(
|
708
|
+
query=q,
|
709
|
+
db=db,
|
710
|
+
library_ids=library_ids,
|
711
|
+
start=start,
|
712
|
+
end=end,
|
713
|
+
app_names=app_name_list,
|
714
|
+
)
|
715
|
+
if use_facet
|
716
|
+
else {}
|
717
|
+
)
|
718
|
+
|
719
|
+
# Convert Entity list to SearchHit list
|
720
|
+
hits = []
|
721
|
+
for entity in entities:
|
722
|
+
entity_search_result = EntitySearchResult(
|
723
|
+
id=str(entity.id),
|
724
|
+
filepath=entity.filepath,
|
725
|
+
filename=entity.filename,
|
726
|
+
size=entity.size,
|
727
|
+
file_created_at=int(entity.file_created_at.timestamp()),
|
728
|
+
file_last_modified_at=int(entity.file_last_modified_at.timestamp()),
|
729
|
+
file_type=entity.file_type,
|
730
|
+
file_type_group=entity.file_type_group,
|
731
|
+
last_scan_at=(
|
732
|
+
int(entity.last_scan_at.timestamp())
|
733
|
+
if entity.last_scan_at
|
734
|
+
else None
|
735
|
+
),
|
736
|
+
library_id=entity.library_id,
|
737
|
+
folder_id=entity.folder_id,
|
738
|
+
tags=[tag.name for tag in entity.tags],
|
739
|
+
metadata_entries=[
|
740
|
+
MetadataIndexItem(
|
741
|
+
key=metadata.key,
|
742
|
+
value=(
|
743
|
+
json.loads(metadata.value)
|
744
|
+
if metadata.data_type == MetadataType.JSON_DATA
|
745
|
+
else metadata.value
|
746
|
+
),
|
747
|
+
source=metadata.source,
|
748
|
+
)
|
749
|
+
for metadata in entity.metadata_entries
|
750
|
+
],
|
751
|
+
)
|
752
|
+
|
753
|
+
hits.append(
|
754
|
+
SearchHit(
|
755
|
+
document=entity_search_result,
|
756
|
+
highlight={},
|
757
|
+
highlights=[],
|
758
|
+
text_match=None,
|
759
|
+
hybrid_search_info=None,
|
760
|
+
text_match_info=None,
|
761
|
+
)
|
762
|
+
)
|
763
|
+
|
764
|
+
# Convert tag_counts to facet_counts format
|
765
|
+
app_name_facet_counts = []
|
766
|
+
if stats and "app_name_counts" in stats:
|
767
|
+
for app_name, count in stats["app_name_counts"].items():
|
768
|
+
app_name_facet_counts.append(
|
769
|
+
FacetCount(
|
770
|
+
value=app_name,
|
771
|
+
count=count,
|
772
|
+
highlighted=app_name,
|
773
|
+
)
|
774
|
+
)
|
775
|
+
|
776
|
+
facet_counts = (
|
777
|
+
[
|
778
|
+
Facet(
|
779
|
+
field_name="app_names",
|
780
|
+
counts=app_name_facet_counts,
|
781
|
+
sampled=False,
|
782
|
+
stats=FacetStats(total_values=len(app_name_facet_counts)),
|
783
|
+
)
|
784
|
+
]
|
785
|
+
if app_name_facet_counts
|
786
|
+
else []
|
787
|
+
)
|
788
|
+
|
789
|
+
# Build SearchResult
|
790
|
+
search_result = SearchResult(
|
791
|
+
facet_counts=facet_counts,
|
792
|
+
found=len(hits),
|
793
|
+
hits=hits,
|
794
|
+
out_of=len(hits),
|
795
|
+
page=1,
|
796
|
+
request_params=RequestParams(
|
797
|
+
collection_name="entities",
|
798
|
+
first_q=q,
|
799
|
+
per_page=limit,
|
800
|
+
q=q,
|
801
|
+
app_names=app_name_list,
|
802
|
+
),
|
803
|
+
search_cutoff=False,
|
804
|
+
search_time_ms=0,
|
805
|
+
)
|
806
|
+
|
807
|
+
return search_result
|
808
|
+
|
809
|
+
except Exception as e:
|
810
|
+
logging.error("Error searching entities: %s", e)
|
811
|
+
raise HTTPException(
|
812
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
813
|
+
detail=str(e),
|
814
|
+
)
|
815
|
+
|
816
|
+
|
817
|
+
@app.get(
|
818
|
+
"/libraries/{library_id}/entities/{entity_id}/context",
|
819
|
+
response_model=EntityContext,
|
820
|
+
tags=["entity"],
|
821
|
+
)
|
822
|
+
def get_entity_context(
|
823
|
+
library_id: int,
|
824
|
+
entity_id: int,
|
825
|
+
prev: Annotated[int | None, Query(ge=0, le=100)] = None,
|
826
|
+
next: Annotated[int | None, Query(ge=0, le=100)] = None,
|
827
|
+
db: Session = Depends(get_db),
|
828
|
+
):
|
829
|
+
"""
|
830
|
+
Get the context (previous and next entities) for a given entity.
|
831
|
+
|
832
|
+
Args:
|
833
|
+
library_id: The ID of the library
|
834
|
+
entity_id: The ID of the target entity
|
835
|
+
prev: Number of previous entities to fetch (optional)
|
836
|
+
next: Number of next entities to fetch (optional)
|
837
|
+
|
838
|
+
Returns:
|
839
|
+
EntityContext object containing prev and next lists of entities
|
840
|
+
"""
|
841
|
+
# If both prev and next are None, return empty lists
|
842
|
+
if prev is None and next is None:
|
843
|
+
return EntityContext(prev=[], next=[])
|
844
|
+
|
845
|
+
# Convert None to 0 for the crud function
|
846
|
+
prev_count = prev if prev is not None else 0
|
847
|
+
next_count = next if next is not None else 0
|
848
|
+
|
849
|
+
# Get the context entities
|
850
|
+
prev_entities, next_entities = crud.get_entity_context(
|
851
|
+
db=db,
|
852
|
+
library_id=library_id,
|
853
|
+
entity_id=entity_id,
|
854
|
+
prev=prev_count,
|
855
|
+
next=next_count,
|
856
|
+
)
|
857
|
+
|
858
|
+
# Return the context object
|
859
|
+
return EntityContext(prev=prev_entities, next=next_entities)
|
860
|
+
|
861
|
+
|
862
|
+
def run_server():
|
863
|
+
logging.info("Database path: %s", settings.database_url)
|
864
|
+
logging.info("VLM plugin enabled: %s", settings.vlm)
|
865
|
+
logging.info("OCR plugin enabled: %s", settings.ocr)
|
866
|
+
|
867
|
+
# Add VLM plugin router
|
868
|
+
# Removed check for settings.vlm.enabled
|
869
|
+
vlm_main.init_plugin(settings.vlm)
|
870
|
+
app.include_router(vlm_main.router, prefix="/plugins/vlm")
|
871
|
+
|
872
|
+
# Add OCR plugin router
|
873
|
+
# Removed check for settings.ocr.enabled
|
874
|
+
ocr_main.init_plugin(settings.ocr)
|
875
|
+
app.include_router(ocr_main.router, prefix="/plugins/ocr")
|
876
|
+
|
877
|
+
uvicorn.run(
|
878
|
+
"memos.server:app",
|
879
|
+
host=settings.server_host,
|
880
|
+
port=settings.server_port,
|
881
|
+
reload=False,
|
882
|
+
log_config=LOGGING_CONFIG,
|
883
|
+
)
|