pensiev 0.25.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. memos/__init__.py +6 -0
  2. memos/cmds/__init__.py +0 -0
  3. memos/cmds/library.py +1289 -0
  4. memos/cmds/plugin.py +96 -0
  5. memos/commands.py +865 -0
  6. memos/config.py +225 -0
  7. memos/crud.py +605 -0
  8. memos/databases/__init__.py +0 -0
  9. memos/databases/initializers.py +481 -0
  10. memos/dataset_extractor_for_florence.py +165 -0
  11. memos/dataset_extractor_for_internvl2.py +192 -0
  12. memos/default_config.yaml +88 -0
  13. memos/embedding.py +129 -0
  14. memos/frame_extractor.py +53 -0
  15. memos/logging_config.py +35 -0
  16. memos/main.py +104 -0
  17. memos/migrations/alembic/README +1 -0
  18. memos/migrations/alembic/__pycache__/env.cpython-310.pyc +0 -0
  19. memos/migrations/alembic/env.py +108 -0
  20. memos/migrations/alembic/script.py.mako +30 -0
  21. memos/migrations/alembic/versions/00904ac8c6fc_add_indexes_to_entitymodel.py +63 -0
  22. memos/migrations/alembic/versions/04acdaf75664_add_indices_to_entitytags_and_metadata.py +86 -0
  23. memos/migrations/alembic/versions/12504c5b1d3c_add_extra_columns_for_embedding.py +67 -0
  24. memos/migrations/alembic/versions/31a1ad0e10b3_add_entity_plugin_status.py +71 -0
  25. memos/migrations/alembic/versions/__pycache__/00904ac8c6fc_add_indexes_to_entitymodel.cpython-310.pyc +0 -0
  26. memos/migrations/alembic/versions/__pycache__/04acdaf75664_add_indices_to_entitytags_and_metadata.cpython-310.pyc +0 -0
  27. memos/migrations/alembic/versions/__pycache__/12504c5b1d3c_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
  28. memos/migrations/alembic/versions/__pycache__/20f5ecab014d_add_entity_plugin_status.cpython-310.pyc +0 -0
  29. memos/migrations/alembic/versions/__pycache__/31a1ad0e10b3_add_entity_plugin_status.cpython-310.pyc +0 -0
  30. memos/migrations/alembic/versions/__pycache__/4fcb062c5128_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
  31. memos/migrations/alembic/versions/__pycache__/d10c55fbb7d2_add_index_for_entity_file_type_group_.cpython-310.pyc +0 -0
  32. memos/migrations/alembic/versions/__pycache__/f8f158182416_add_active_app_index.cpython-310.pyc +0 -0
  33. memos/migrations/alembic/versions/d10c55fbb7d2_add_index_for_entity_file_type_group_.py +44 -0
  34. memos/migrations/alembic/versions/f8f158182416_add_active_app_index.py +75 -0
  35. memos/migrations/alembic.ini +116 -0
  36. memos/migrations.py +19 -0
  37. memos/models.py +199 -0
  38. memos/plugins/__init__.py +0 -0
  39. memos/plugins/ocr/__init__.py +0 -0
  40. memos/plugins/ocr/main.py +251 -0
  41. memos/plugins/ocr/models/ch_PP-OCRv4_det_infer.onnx +0 -0
  42. memos/plugins/ocr/models/ch_PP-OCRv4_rec_infer.onnx +0 -0
  43. memos/plugins/ocr/models/ch_ppocr_mobile_v2.0_cls_train.onnx +0 -0
  44. memos/plugins/ocr/ppocr-gpu.yaml +43 -0
  45. memos/plugins/ocr/ppocr.yaml +44 -0
  46. memos/plugins/ocr/server.py +227 -0
  47. memos/plugins/ocr/temp_ppocr.yaml +42 -0
  48. memos/plugins/vlm/__init__.py +0 -0
  49. memos/plugins/vlm/main.py +251 -0
  50. memos/prepare_dataset.py +107 -0
  51. memos/process_webp.py +55 -0
  52. memos/read_metadata.py +32 -0
  53. memos/record.py +358 -0
  54. memos/schemas.py +289 -0
  55. memos/search.py +1198 -0
  56. memos/server.py +883 -0
  57. memos/shotsum.py +105 -0
  58. memos/shotsum_with_ocr.py +145 -0
  59. memos/simple_tokenizer/dict/README.md +31 -0
  60. memos/simple_tokenizer/dict/hmm_model.utf8 +34 -0
  61. memos/simple_tokenizer/dict/idf.utf8 +258826 -0
  62. memos/simple_tokenizer/dict/jieba.dict.utf8 +348982 -0
  63. memos/simple_tokenizer/dict/pos_dict/char_state_tab.utf8 +6653 -0
  64. memos/simple_tokenizer/dict/pos_dict/prob_emit.utf8 +166 -0
  65. memos/simple_tokenizer/dict/pos_dict/prob_start.utf8 +259 -0
  66. memos/simple_tokenizer/dict/pos_dict/prob_trans.utf8 +5222 -0
  67. memos/simple_tokenizer/dict/stop_words.utf8 +1534 -0
  68. memos/simple_tokenizer/dict/user.dict.utf8 +4 -0
  69. memos/simple_tokenizer/linux/libsimple.so +0 -0
  70. memos/simple_tokenizer/macos/libsimple.dylib +0 -0
  71. memos/simple_tokenizer/windows/simple.dll +0 -0
  72. memos/static/_app/immutable/assets/0.e250c031.css +1 -0
  73. memos/static/_app/immutable/assets/_layout.e7937cfe.css +1 -0
  74. memos/static/_app/immutable/chunks/index.5c08976b.js +1 -0
  75. memos/static/_app/immutable/chunks/index.60ee613b.js +4 -0
  76. memos/static/_app/immutable/chunks/runtime.a7926cf6.js +5 -0
  77. memos/static/_app/immutable/chunks/scheduler.5c1cff6e.js +1 -0
  78. memos/static/_app/immutable/chunks/singletons.583bdf4e.js +1 -0
  79. memos/static/_app/immutable/entry/app.666c1643.js +1 -0
  80. memos/static/_app/immutable/entry/start.aed5c701.js +3 -0
  81. memos/static/_app/immutable/nodes/0.5862ea38.js +7 -0
  82. memos/static/_app/immutable/nodes/1.35378a5e.js +1 -0
  83. memos/static/_app/immutable/nodes/2.1ccf9ea5.js +81 -0
  84. memos/static/_app/version.json +1 -0
  85. memos/static/app.html +36 -0
  86. memos/static/favicon.png +0 -0
  87. memos/static/logos/memos_logo_1024.png +0 -0
  88. memos/static/logos/memos_logo_1024@2x.png +0 -0
  89. memos/static/logos/memos_logo_128.png +0 -0
  90. memos/static/logos/memos_logo_128@2x.png +0 -0
  91. memos/static/logos/memos_logo_16.png +0 -0
  92. memos/static/logos/memos_logo_16@2x.png +0 -0
  93. memos/static/logos/memos_logo_256.png +0 -0
  94. memos/static/logos/memos_logo_256@2x.png +0 -0
  95. memos/static/logos/memos_logo_32.png +0 -0
  96. memos/static/logos/memos_logo_32@2x.png +0 -0
  97. memos/static/logos/memos_logo_512.png +0 -0
  98. memos/static/logos/memos_logo_512@2x.png +0 -0
  99. memos/static/logos/memos_logo_64.png +0 -0
  100. memos/static/logos/memos_logo_64@2x.png +0 -0
  101. memos/test_server.py +802 -0
  102. memos/utils.py +49 -0
  103. memos_ml_backends/florence2_server.py +176 -0
  104. memos_ml_backends/qwen2vl_server.py +182 -0
  105. memos_ml_backends/schemas.py +48 -0
  106. pensiev-0.25.5.dist-info/LICENSE +201 -0
  107. pensiev-0.25.5.dist-info/METADATA +541 -0
  108. pensiev-0.25.5.dist-info/RECORD +111 -0
  109. pensiev-0.25.5.dist-info/WHEEL +5 -0
  110. pensiev-0.25.5.dist-info/entry_points.txt +2 -0
  111. pensiev-0.25.5.dist-info/top_level.txt +2 -0
memos/server.py ADDED
@@ -0,0 +1,883 @@
1
+ import os
2
+ import httpx
3
+ import uvicorn
4
+ import mimetypes
5
+
6
+ import logfire
7
+
8
+ from fastapi import FastAPI, HTTPException, Depends, status, Query, Request
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.staticfiles import StaticFiles
11
+ from fastapi.responses import FileResponse, JSONResponse
12
+ from fastapi.encoders import jsonable_encoder
13
+ from sqlalchemy.orm import Session
14
+ from sqlalchemy.orm import sessionmaker
15
+ from typing import List, Annotated
16
+ from pathlib import Path
17
+ import json
18
+ import cv2
19
+ from PIL import Image
20
+ import logging
21
+
22
+ from .config import settings
23
+ from memos.plugins.vlm import main as vlm_main
24
+ from memos.plugins.ocr import main as ocr_main
25
+ from . import crud
26
+ from .search import create_search_provider
27
+ from .schemas import (
28
+ Library,
29
+ Folder,
30
+ Entity,
31
+ Plugin,
32
+ NewLibraryParam,
33
+ NewFoldersParam,
34
+ NewEntityParam,
35
+ UpdateEntityParam,
36
+ NewPluginParam,
37
+ NewLibraryPluginParam,
38
+ UpdateEntityTagsParam,
39
+ UpdateEntityMetadataParam,
40
+ MetadataType,
41
+ MetadataIndexItem,
42
+ EntitySearchResult,
43
+ SearchResult,
44
+ SearchHit,
45
+ RequestParams,
46
+ EntityContext,
47
+ BatchIndexRequest,
48
+ FacetCount,
49
+ Facet,
50
+ FacetStats,
51
+ )
52
+ from .read_metadata import read_metadata
53
+ from .logging_config import LOGGING_CONFIG
54
+ from .databases.initializers import create_db_initializer
55
+
56
+ # Configure logging
57
+ logging.basicConfig(level=logging.INFO)
58
+
59
+ # Configure mimetypes for JavaScript files
60
+ # This is a workaround for the issue:
61
+ # https://github.com/python/cpython/issues/88141#issuecomment-1631735902
62
+ # Without this, the mime type of .js files will be text/plain and
63
+ # the browser will not render them correctly in some windows machines.
64
+ mimetypes.add_type("application/javascript", ".js")
65
+
66
+ app = FastAPI()
67
+
68
+ logfire.configure(send_to_logfire="if-token-present")
69
+ logfire.instrument_fastapi(app, excluded_urls=["/files"])
70
+
71
+ # Create database engine and initializer
72
+ engine, initializer = create_db_initializer(settings)
73
+
74
+ # Initialize search provider based on database URL
75
+ search_provider = create_search_provider(settings.database_url)
76
+
77
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
78
+
79
+ logfire.instrument_sqlalchemy(engine=engine)
80
+
81
+ app.add_middleware(
82
+ CORSMiddleware,
83
+ allow_origins=["*"],
84
+ allow_credentials=True,
85
+ allow_methods=["*"],
86
+ allow_headers=["*"],
87
+ )
88
+
89
+
90
+ current_dir = os.path.dirname(__file__)
91
+
92
+ app.mount(
93
+ "/_app", StaticFiles(directory=os.path.join(current_dir, "static/_app"), html=True)
94
+ )
95
+
96
+
97
+ @app.get("/health")
98
+ async def health():
99
+ return {"status": "ok"}
100
+
101
+
102
+ @app.get("/favicon.png", response_class=FileResponse)
103
+ async def favicon_png():
104
+ return FileResponse(os.path.join(current_dir, "static/favicon.png"))
105
+
106
+
107
+ @app.get("/favicon.ico", response_class=FileResponse)
108
+ async def favicon_ico():
109
+ return FileResponse(os.path.join(current_dir, "static/favicon.png"))
110
+
111
+
112
+ @app.get("/")
113
+ async def serve_spa():
114
+ return FileResponse(os.path.join(current_dir, "static/app.html"))
115
+
116
+
117
+ def get_db():
118
+ db = SessionLocal()
119
+ try:
120
+ yield db
121
+ finally:
122
+ db.close()
123
+
124
+
125
+ @app.post("/libraries", response_model=Library, tags=["library"])
126
+ def new_library(library_param: NewLibraryParam, db: Session = Depends(get_db)):
127
+ # Check if a library with the same name (case insensitive) already exists
128
+ existing_library = crud.get_library_by_name(library_param.name, db)
129
+ if existing_library:
130
+ raise HTTPException(
131
+ status_code=status.HTTP_400_BAD_REQUEST,
132
+ detail="Library with this name already exists",
133
+ )
134
+
135
+ # Remove duplicate folders from the library_param
136
+ unique_folders = []
137
+ seen_paths = set()
138
+ for folder in library_param.folders:
139
+ if folder.path not in seen_paths:
140
+ seen_paths.add(folder.path)
141
+ unique_folders.append(folder)
142
+ library_param.folders = unique_folders
143
+
144
+ library = crud.create_library(library_param, db)
145
+ return library
146
+
147
+
148
+ @app.get("/libraries", response_model=List[Library], tags=["library"])
149
+ def list_libraries(db: Session = Depends(get_db)):
150
+ libraries = crud.get_libraries(db)
151
+ return libraries
152
+
153
+
154
+ @app.get("/libraries/{library_id}", response_model=Library, tags=["library"])
155
+ def get_library_by_id(library_id: int, db: Session = Depends(get_db)):
156
+ library = crud.get_library_by_id(library_id, db)
157
+ if library is None:
158
+ return JSONResponse(
159
+ content={"detail": "Library not found"},
160
+ status_code=status.HTTP_404_NOT_FOUND,
161
+ )
162
+ return library
163
+
164
+
165
+ @app.post("/libraries/{library_id}/folders", response_model=Library, tags=["library"])
166
+ def new_folders(
167
+ library_id: int,
168
+ folders: NewFoldersParam,
169
+ db: Session = Depends(get_db),
170
+ ):
171
+ library = crud.get_library_by_id(library_id, db)
172
+ if library is None:
173
+ raise HTTPException(
174
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
175
+ )
176
+
177
+ existing_folders = [folder.path for folder in library.folders]
178
+ if any(str(folder.path) in existing_folders for folder in folders.folders):
179
+ raise HTTPException(
180
+ status_code=status.HTTP_400_BAD_REQUEST,
181
+ detail="Folder already exists in the library",
182
+ )
183
+
184
+ return crud.add_folders(library_id=library.id, folders=folders, db=db)
185
+
186
+
187
+ async def trigger_webhooks(
188
+ library: Library,
189
+ entity: Entity,
190
+ request: Request,
191
+ plugins: List[int] = None,
192
+ db: Session = Depends(get_db),
193
+ ):
194
+ """Trigger webhooks for plugins that haven't processed the entity yet"""
195
+ async with httpx.AsyncClient() as client:
196
+ tasks = []
197
+ pending_plugins = crud.get_pending_plugins(entity.id, library.id, db)
198
+
199
+ for plugin in library.plugins:
200
+ # Skip if specific plugins are requested and this one isn't in the list
201
+ if plugins is not None and plugin.id not in plugins:
202
+ continue
203
+
204
+ # Skip if entity has already been processed by this plugin
205
+ if plugin.id not in pending_plugins:
206
+ continue
207
+
208
+ if plugin.webhook_url:
209
+ logging.info("Triggering plugin %d for entity %d", plugin.id, entity.id)
210
+ location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
211
+ webhook_url = plugin.webhook_url
212
+ if webhook_url.startswith("/"):
213
+ webhook_url = str(request.base_url)[:-1] + webhook_url
214
+ task = client.post(
215
+ webhook_url,
216
+ json=entity.model_dump(mode="json"),
217
+ headers={"Location": location},
218
+ timeout=60.0,
219
+ )
220
+ tasks.append((plugin.id, task))
221
+
222
+ for plugin_id, task in tasks:
223
+ try:
224
+ response = await task
225
+ if response.status_code < 400:
226
+ # Record successful plugin processing
227
+ crud.record_plugin_processed(entity.id, plugin_id, db)
228
+ else:
229
+ logging.error(
230
+ "Error processing entity with plugin %d: %d - %s",
231
+ plugin_id,
232
+ response.status_code,
233
+ response.text,
234
+ )
235
+ except Exception as e:
236
+ logging.error(
237
+ "Error processing entity with plugin %d: %s",
238
+ plugin_id,
239
+ str(e),
240
+ )
241
+
242
+
243
+ @app.post("/libraries/{library_id}/entities", response_model=Entity, tags=["entity"])
244
+ async def new_entity(
245
+ new_entity: NewEntityParam,
246
+ library_id: int,
247
+ request: Request,
248
+ db: Session = Depends(get_db),
249
+ plugins: Annotated[List[int] | None, Query()] = None,
250
+ trigger_webhooks_flag: bool = True,
251
+ update_index: bool = False,
252
+ ):
253
+ library = crud.get_library_by_id(library_id, db)
254
+ if library is None:
255
+ raise HTTPException(
256
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
257
+ )
258
+
259
+ with logfire.span("create new entity {filepath=}", filepath=new_entity.filepath):
260
+ entity = crud.create_entity(library_id, new_entity, db)
261
+
262
+ if trigger_webhooks_flag:
263
+ with logfire.span("trigger webhooks {entity_id=}", entity_id=entity.id):
264
+ await trigger_webhooks(library, entity, request, plugins, db)
265
+
266
+ if update_index:
267
+ with logfire.span("update entity index {entity_id=}", entity_id=entity.id):
268
+ search_provider.update_entity_index(entity.id, db)
269
+
270
+ return entity
271
+
272
+
273
+ @app.get(
274
+ "/libraries/{library_id}/folders/{folder_id}/entities",
275
+ response_model=List[Entity],
276
+ tags=["entity"],
277
+ )
278
+ def list_entities_in_folder(
279
+ library_id: int,
280
+ folder_id: int,
281
+ limit: Annotated[int, Query(ge=1, le=400)] = 10,
282
+ offset: int = 0,
283
+ path_prefix: str | None = None,
284
+ db: Session = Depends(get_db),
285
+ ):
286
+ library = crud.get_library_by_id(library_id, db)
287
+ if library is None:
288
+ raise HTTPException(
289
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
290
+ )
291
+
292
+ if folder_id not in [folder.id for folder in library.folders]:
293
+ raise HTTPException(
294
+ status_code=status.HTTP_404_NOT_FOUND,
295
+ detail="Folder not found in the specified library",
296
+ )
297
+
298
+ entities, total_count = crud.get_entities_of_folder(
299
+ library_id, folder_id, db, limit, offset, path_prefix
300
+ )
301
+ return JSONResponse(
302
+ content=jsonable_encoder(entities), headers={"X-Total-Count": str(total_count)}
303
+ )
304
+
305
+
306
+ @app.get(
307
+ "/libraries/{library_id}/entities/by-filepath",
308
+ response_model=Entity,
309
+ tags=["entity"],
310
+ )
311
+ def get_entity_by_filepath(
312
+ library_id: int, filepath: str, db: Session = Depends(get_db)
313
+ ):
314
+ entity = crud.get_entity_by_filepath(filepath, db)
315
+ if entity is None or entity.library_id != library_id:
316
+ return JSONResponse(
317
+ content={"detail": "Entity not found"},
318
+ status_code=status.HTTP_404_NOT_FOUND,
319
+ )
320
+ return entity
321
+
322
+
323
+ @app.post(
324
+ "/libraries/{library_id}/entities/by-filepaths",
325
+ response_model=List[Entity],
326
+ tags=["entity"],
327
+ )
328
+ def get_entities_by_filepaths(
329
+ library_id: int, filepaths: List[str], db: Session = Depends(get_db)
330
+ ):
331
+ entities = crud.get_entities_by_filepaths(filepaths, db)
332
+ return [entity for entity in entities if entity.library_id == library_id]
333
+
334
+
335
+ @app.get("/entities/{entity_id}", response_model=Entity, tags=["entity"])
336
+ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
337
+ entity = crud.get_entity_by_id(entity_id, db)
338
+ if entity is None:
339
+ return JSONResponse(
340
+ content={"detail": "Entity not found"},
341
+ status_code=status.HTTP_404_NOT_FOUND,
342
+ )
343
+ return entity
344
+
345
+
346
+ @app.get(
347
+ "/libraries/{library_id}/entities/{entity_id}",
348
+ response_model=Entity,
349
+ tags=["entity"],
350
+ )
351
+ def get_entity_by_id_in_library(
352
+ library_id: int, entity_id: int, db: Session = Depends(get_db)
353
+ ):
354
+ entity = crud.get_entity_by_id(entity_id, db)
355
+ if entity is None or entity.library_id != library_id:
356
+ return JSONResponse(
357
+ content={"detail": "Entity not found"},
358
+ status_code=status.HTTP_404_NOT_FOUND,
359
+ )
360
+ return entity
361
+
362
+
363
+ @app.put("/entities/{entity_id}", response_model=Entity, tags=["entity"])
364
+ async def update_entity(
365
+ entity_id: int,
366
+ request: Request,
367
+ updated_entity: UpdateEntityParam = None,
368
+ db: Session = Depends(get_db),
369
+ trigger_webhooks_flag: bool = False,
370
+ plugins: Annotated[List[int] | None, Query()] = None,
371
+ update_index: bool = False,
372
+ ):
373
+ with logfire.span("fetch entity {entity_id=}", entity_id=entity_id):
374
+ entity = crud.get_entity_by_id(entity_id, db)
375
+ if entity is None:
376
+ raise HTTPException(
377
+ status_code=status.HTTP_404_NOT_FOUND,
378
+ detail="Entity not found",
379
+ )
380
+
381
+ if updated_entity:
382
+ entity = crud.update_entity(entity_id, updated_entity, db)
383
+
384
+ if trigger_webhooks_flag:
385
+ library = crud.get_library_by_id(entity.library_id, db)
386
+ if library is None:
387
+ raise HTTPException(
388
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
389
+ )
390
+ await trigger_webhooks(library, entity, request, plugins, db)
391
+
392
+ if update_index:
393
+ search_provider.update_entity_index(entity.id, db)
394
+
395
+ return entity
396
+
397
+
398
+ @app.post(
399
+ "/entities/{entity_id}/last-scan-at",
400
+ status_code=status.HTTP_204_NO_CONTENT,
401
+ tags=["entity"],
402
+ )
403
+ def update_entity_last_scan_at(entity_id: int, db: Session = Depends(get_db)):
404
+ """
405
+ Update the last_scan_at timestamp for an entity and trigger update for fts and vec.
406
+ """
407
+ succeeded = crud.touch_entity(entity_id, db)
408
+ if not succeeded:
409
+ raise HTTPException(
410
+ status_code=status.HTTP_404_NOT_FOUND,
411
+ detail="Entity not found",
412
+ )
413
+
414
+
415
+ @app.post(
416
+ "/entities/{entity_id}/index",
417
+ status_code=status.HTTP_204_NO_CONTENT,
418
+ tags=["entity"],
419
+ )
420
+ def update_index(entity_id: int, db: Session = Depends(get_db)):
421
+ """
422
+ Update the FTS and vector indexes for an entity.
423
+ """
424
+ entity = crud.get_entity_by_id(entity_id, db)
425
+ if entity is None:
426
+ raise HTTPException(
427
+ status_code=status.HTTP_404_NOT_FOUND,
428
+ detail="Entity not found",
429
+ )
430
+
431
+ search_provider.update_entity_index(entity.id, db)
432
+
433
+
434
+ @app.post(
435
+ "/entities/batch-index",
436
+ status_code=status.HTTP_204_NO_CONTENT,
437
+ tags=["entity"],
438
+ )
439
+ async def batch_update_index(request: BatchIndexRequest, db: Session = Depends(get_db)):
440
+ """
441
+ Batch update the FTS and vector indexes for multiple entities.
442
+ """
443
+ try:
444
+ search_provider.batch_update_entity_indices(request.entity_ids, db)
445
+ except ValueError as e:
446
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
447
+
448
+
449
+ @app.put("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
450
+ def replace_entity_tags(
451
+ entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
452
+ ):
453
+ entity = crud.get_entity_by_id(entity_id, db)
454
+ if entity is None:
455
+ raise HTTPException(
456
+ status_code=status.HTTP_404_NOT_FOUND,
457
+ detail="Entity not found",
458
+ )
459
+
460
+ return crud.update_entity_tags(entity_id, update_tags.tags, db)
461
+
462
+
463
+ @app.patch("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
464
+ def patch_entity_tags(
465
+ entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
466
+ ):
467
+ entity = crud.get_entity_by_id(entity_id, db)
468
+ if entity is None:
469
+ raise HTTPException(
470
+ status_code=status.HTTP_404_NOT_FOUND,
471
+ detail="Entity not found",
472
+ )
473
+
474
+ return crud.add_new_tags(entity_id, update_tags.tags, db)
475
+
476
+
477
+ @app.patch("/entities/{entity_id}/metadata", response_model=Entity, tags=["entity"])
478
+ def patch_entity_metadata(
479
+ entity_id: int,
480
+ update_metadata: UpdateEntityMetadataParam,
481
+ db: Session = Depends(get_db),
482
+ ):
483
+ with logfire.span("fetch entity {entity_id=}", entity_id=entity_id):
484
+ entity = crud.get_entity_by_id(entity_id, db)
485
+ if entity is None:
486
+ raise HTTPException(
487
+ status_code=status.HTTP_404_NOT_FOUND,
488
+ detail="Entity not found",
489
+ )
490
+
491
+ # Use the CRUD function to update the metadata entries
492
+ entity = crud.update_entity_metadata_entries(
493
+ entity_id, update_metadata.metadata_entries, db
494
+ )
495
+ return entity
496
+
497
+
498
+ @app.delete(
499
+ "/libraries/{library_id}/entities/{entity_id}",
500
+ status_code=status.HTTP_204_NO_CONTENT,
501
+ tags=["entity"],
502
+ )
503
+ def remove_entity(library_id: int, entity_id: int, db: Session = Depends(get_db)):
504
+ entity = crud.get_entity_by_id(entity_id, db)
505
+ if entity is None or entity.library_id != library_id:
506
+ raise HTTPException(
507
+ status_code=status.HTTP_404_NOT_FOUND,
508
+ detail="Entity not found in the specified library",
509
+ )
510
+
511
+ crud.remove_entity(entity_id, db)
512
+
513
+
514
+ @app.post("/plugins", response_model=Plugin, tags=["plugin"])
515
+ def new_plugin(new_plugin: NewPluginParam, db: Session = Depends(get_db)):
516
+ existing_plugin = crud.get_plugin_by_name(new_plugin.name, db)
517
+ if existing_plugin:
518
+ raise HTTPException(
519
+ status_code=status.HTTP_400_BAD_REQUEST,
520
+ detail="Plugin with this name already exists",
521
+ )
522
+ plugin = crud.create_plugin(new_plugin, db)
523
+ return plugin
524
+
525
+
526
+ @app.get("/plugins", response_model=List[Plugin], tags=["plugin"])
527
+ def list_plugins(db: Session = Depends(get_db)):
528
+ plugins = crud.get_plugins(db)
529
+ return plugins
530
+
531
+
532
+ @app.post(
533
+ "/libraries/{library_id}/plugins",
534
+ status_code=status.HTTP_204_NO_CONTENT,
535
+ tags=["plugin"],
536
+ )
537
+ def add_library_plugin(
538
+ library_id: int, new_plugin: NewLibraryPluginParam, db: Session = Depends(get_db)
539
+ ):
540
+ library = crud.get_library_by_id(library_id, db)
541
+ if library is None:
542
+ raise HTTPException(
543
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
544
+ )
545
+
546
+ plugin = None
547
+ if new_plugin.plugin_id is not None:
548
+ plugin = crud.get_plugin_by_id(new_plugin.plugin_id, db)
549
+ elif new_plugin.plugin_name is not None:
550
+ plugin = crud.get_plugin_by_name(new_plugin.plugin_name, db)
551
+
552
+ if plugin is None:
553
+ raise HTTPException(
554
+ status_code=status.HTTP_404_NOT_FOUND, detail="Plugin not found"
555
+ )
556
+
557
+ if any(p.id == plugin.id for p in library.plugins):
558
+ raise HTTPException(
559
+ status_code=status.HTTP_400_BAD_REQUEST,
560
+ detail="Plugin already exists in the library",
561
+ )
562
+
563
+ crud.add_plugin_to_library(library_id, plugin.id, db)
564
+
565
+
566
+ @app.delete(
567
+ "/libraries/{library_id}/plugins/{plugin_id}",
568
+ status_code=status.HTTP_204_NO_CONTENT,
569
+ tags=["plugin"],
570
+ )
571
+ def delete_library_plugin(
572
+ library_id: int, plugin_id: int, db: Session = Depends(get_db)
573
+ ):
574
+ library = crud.get_library_by_id(library_id, db)
575
+ if library is None:
576
+ raise HTTPException(
577
+ status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
578
+ )
579
+
580
+ plugin = crud.get_plugin_by_id(plugin_id, db)
581
+ if plugin is None:
582
+ raise HTTPException(
583
+ status_code=status.HTTP_404_NOT_FOUND, detail="Plugin not found"
584
+ )
585
+
586
+ crud.remove_plugin_from_library(library_id, plugin_id, db)
587
+
588
+
589
+ def is_image(file_path: Path) -> bool:
590
+ return file_path.suffix.lower() in [".png", ".jpg", ".jpeg"]
591
+
592
+
593
+ def get_thumbnail_info(metadata: dict) -> tuple:
594
+ if not metadata:
595
+ return None, None, None
596
+
597
+ if not metadata.get("sequence"):
598
+ return None, None, False
599
+
600
+ return metadata.get("screen_name"), metadata.get("sequence"), True
601
+
602
+
603
+ def extract_video_frame(video_path: Path, frame_number: int) -> Image.Image:
604
+ cap = cv2.VideoCapture(str(video_path))
605
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
606
+ ret, frame = cap.read()
607
+ cap.release()
608
+
609
+ if not ret:
610
+ return None
611
+
612
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
613
+ return Image.fromarray(frame_rgb)
614
+
615
+
616
+ @app.get("/files/video/{file_path:path}", tags=["files"])
617
+ async def get_video_frame(file_path: str):
618
+
619
+ full_path = Path("/") / file_path.strip("/")
620
+
621
+ if not full_path.is_file():
622
+ raise HTTPException(status_code=404, detail="File not found")
623
+
624
+ if not is_image(full_path):
625
+ return FileResponse(full_path)
626
+
627
+ metadata = read_metadata(str(full_path))
628
+ screen, sequence, is_thumbnail = get_thumbnail_info(metadata)
629
+
630
+ logging.debug(
631
+ "Screen: %s, Sequence: %s, Is Thumbnail: %s", screen, sequence, is_thumbnail
632
+ )
633
+
634
+ if not all([screen, sequence, is_thumbnail]):
635
+ return FileResponse(full_path)
636
+
637
+ video_path = full_path.parent / f"{screen}.mp4"
638
+ logging.debug("Video path: %s", video_path)
639
+ if not video_path.is_file():
640
+ return FileResponse(full_path)
641
+
642
+ frame_image = extract_video_frame(video_path, sequence)
643
+ if frame_image is None:
644
+ return FileResponse(full_path)
645
+
646
+ temp_dir = Path("/tmp")
647
+ temp_dir.mkdir(parents=True, exist_ok=True)
648
+ temp_path = temp_dir / f"temp_{full_path.name}"
649
+ frame_image.save(temp_path)
650
+
651
+ return FileResponse(
652
+ temp_path, headers={"Content-Disposition": f"inline; filename={full_path.name}"}
653
+ )
654
+
655
+
656
+ @app.get("/files/{file_path:path}", tags=["files"])
657
+ async def get_file(file_path: str):
658
+ full_path = Path("/") / file_path.strip("/")
659
+ # Check if the file exists and is a file
660
+ if full_path.is_file():
661
+ return FileResponse(full_path)
662
+ else:
663
+ return JSONResponse(
664
+ content={"detail": "File not found"}, status_code=status.HTTP_404_NOT_FOUND
665
+ )
666
+
667
+
668
+ @app.get("/search", response_model=SearchResult, tags=["search"])
669
+ async def search_entities_v2(
670
+ q: str,
671
+ library_ids: str = Query(None, description="Comma-separated list of library IDs"),
672
+ limit: Annotated[int, Query(ge=1, le=200)] = 48,
673
+ start: int = None,
674
+ end: int = None,
675
+ app_names: str = Query(None, description="Comma-separated list of app names"),
676
+ facet: bool = Query(None, description="Include facet in the search results"),
677
+ db: Session = Depends(get_db),
678
+ ):
679
+ library_ids = [int(id) for id in library_ids.split(",")] if library_ids else None
680
+ app_name_list = (
681
+ [app_name.strip() for app_name in app_names.split(",")] if app_names else None
682
+ )
683
+
684
+ # Use settings.facet if facet parameter is not provided
685
+ use_facet = settings.facet if facet is None else facet
686
+
687
+ try:
688
+ if q.strip() == "":
689
+ # Use list_entities when q is empty
690
+ entities = crud.list_entities(
691
+ db=db, limit=limit, library_ids=library_ids, start=start, end=end
692
+ )
693
+ stats = {}
694
+ else:
695
+ # Use search provider for both search and stats
696
+ entity_ids = search_provider.hybrid_search(
697
+ query=q,
698
+ db=db,
699
+ limit=limit,
700
+ library_ids=library_ids,
701
+ start=start,
702
+ end=end,
703
+ app_names=app_name_list,
704
+ )
705
+ entities = crud.find_entities_by_ids(entity_ids, db)
706
+ stats = (
707
+ search_provider.get_search_stats(
708
+ query=q,
709
+ db=db,
710
+ library_ids=library_ids,
711
+ start=start,
712
+ end=end,
713
+ app_names=app_name_list,
714
+ )
715
+ if use_facet
716
+ else {}
717
+ )
718
+
719
+ # Convert Entity list to SearchHit list
720
+ hits = []
721
+ for entity in entities:
722
+ entity_search_result = EntitySearchResult(
723
+ id=str(entity.id),
724
+ filepath=entity.filepath,
725
+ filename=entity.filename,
726
+ size=entity.size,
727
+ file_created_at=int(entity.file_created_at.timestamp()),
728
+ file_last_modified_at=int(entity.file_last_modified_at.timestamp()),
729
+ file_type=entity.file_type,
730
+ file_type_group=entity.file_type_group,
731
+ last_scan_at=(
732
+ int(entity.last_scan_at.timestamp())
733
+ if entity.last_scan_at
734
+ else None
735
+ ),
736
+ library_id=entity.library_id,
737
+ folder_id=entity.folder_id,
738
+ tags=[tag.name for tag in entity.tags],
739
+ metadata_entries=[
740
+ MetadataIndexItem(
741
+ key=metadata.key,
742
+ value=(
743
+ json.loads(metadata.value)
744
+ if metadata.data_type == MetadataType.JSON_DATA
745
+ else metadata.value
746
+ ),
747
+ source=metadata.source,
748
+ )
749
+ for metadata in entity.metadata_entries
750
+ ],
751
+ )
752
+
753
+ hits.append(
754
+ SearchHit(
755
+ document=entity_search_result,
756
+ highlight={},
757
+ highlights=[],
758
+ text_match=None,
759
+ hybrid_search_info=None,
760
+ text_match_info=None,
761
+ )
762
+ )
763
+
764
+ # Convert tag_counts to facet_counts format
765
+ app_name_facet_counts = []
766
+ if stats and "app_name_counts" in stats:
767
+ for app_name, count in stats["app_name_counts"].items():
768
+ app_name_facet_counts.append(
769
+ FacetCount(
770
+ value=app_name,
771
+ count=count,
772
+ highlighted=app_name,
773
+ )
774
+ )
775
+
776
+ facet_counts = (
777
+ [
778
+ Facet(
779
+ field_name="app_names",
780
+ counts=app_name_facet_counts,
781
+ sampled=False,
782
+ stats=FacetStats(total_values=len(app_name_facet_counts)),
783
+ )
784
+ ]
785
+ if app_name_facet_counts
786
+ else []
787
+ )
788
+
789
+ # Build SearchResult
790
+ search_result = SearchResult(
791
+ facet_counts=facet_counts,
792
+ found=len(hits),
793
+ hits=hits,
794
+ out_of=len(hits),
795
+ page=1,
796
+ request_params=RequestParams(
797
+ collection_name="entities",
798
+ first_q=q,
799
+ per_page=limit,
800
+ q=q,
801
+ app_names=app_name_list,
802
+ ),
803
+ search_cutoff=False,
804
+ search_time_ms=0,
805
+ )
806
+
807
+ return search_result
808
+
809
+ except Exception as e:
810
+ logging.error("Error searching entities: %s", e)
811
+ raise HTTPException(
812
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
813
+ detail=str(e),
814
+ )
815
+
816
+
817
+ @app.get(
818
+ "/libraries/{library_id}/entities/{entity_id}/context",
819
+ response_model=EntityContext,
820
+ tags=["entity"],
821
+ )
822
+ def get_entity_context(
823
+ library_id: int,
824
+ entity_id: int,
825
+ prev: Annotated[int | None, Query(ge=0, le=100)] = None,
826
+ next: Annotated[int | None, Query(ge=0, le=100)] = None,
827
+ db: Session = Depends(get_db),
828
+ ):
829
+ """
830
+ Get the context (previous and next entities) for a given entity.
831
+
832
+ Args:
833
+ library_id: The ID of the library
834
+ entity_id: The ID of the target entity
835
+ prev: Number of previous entities to fetch (optional)
836
+ next: Number of next entities to fetch (optional)
837
+
838
+ Returns:
839
+ EntityContext object containing prev and next lists of entities
840
+ """
841
+ # If both prev and next are None, return empty lists
842
+ if prev is None and next is None:
843
+ return EntityContext(prev=[], next=[])
844
+
845
+ # Convert None to 0 for the crud function
846
+ prev_count = prev if prev is not None else 0
847
+ next_count = next if next is not None else 0
848
+
849
+ # Get the context entities
850
+ prev_entities, next_entities = crud.get_entity_context(
851
+ db=db,
852
+ library_id=library_id,
853
+ entity_id=entity_id,
854
+ prev=prev_count,
855
+ next=next_count,
856
+ )
857
+
858
+ # Return the context object
859
+ return EntityContext(prev=prev_entities, next=next_entities)
860
+
861
+
862
+ def run_server():
863
+ logging.info("Database path: %s", settings.database_url)
864
+ logging.info("VLM plugin enabled: %s", settings.vlm)
865
+ logging.info("OCR plugin enabled: %s", settings.ocr)
866
+
867
+ # Add VLM plugin router
868
+ # Removed check for settings.vlm.enabled
869
+ vlm_main.init_plugin(settings.vlm)
870
+ app.include_router(vlm_main.router, prefix="/plugins/vlm")
871
+
872
+ # Add OCR plugin router
873
+ # Removed check for settings.ocr.enabled
874
+ ocr_main.init_plugin(settings.ocr)
875
+ app.include_router(ocr_main.router, prefix="/plugins/ocr")
876
+
877
+ uvicorn.run(
878
+ "memos.server:app",
879
+ host=settings.server_host,
880
+ port=settings.server_port,
881
+ reload=False,
882
+ log_config=LOGGING_CONFIG,
883
+ )