pensiev 0.25.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. memos/__init__.py +6 -0
  2. memos/cmds/__init__.py +0 -0
  3. memos/cmds/library.py +1289 -0
  4. memos/cmds/plugin.py +96 -0
  5. memos/commands.py +865 -0
  6. memos/config.py +225 -0
  7. memos/crud.py +605 -0
  8. memos/databases/__init__.py +0 -0
  9. memos/databases/initializers.py +481 -0
  10. memos/dataset_extractor_for_florence.py +165 -0
  11. memos/dataset_extractor_for_internvl2.py +192 -0
  12. memos/default_config.yaml +88 -0
  13. memos/embedding.py +129 -0
  14. memos/frame_extractor.py +53 -0
  15. memos/logging_config.py +35 -0
  16. memos/main.py +104 -0
  17. memos/migrations/alembic/README +1 -0
  18. memos/migrations/alembic/__pycache__/env.cpython-310.pyc +0 -0
  19. memos/migrations/alembic/env.py +108 -0
  20. memos/migrations/alembic/script.py.mako +30 -0
  21. memos/migrations/alembic/versions/00904ac8c6fc_add_indexes_to_entitymodel.py +63 -0
  22. memos/migrations/alembic/versions/04acdaf75664_add_indices_to_entitytags_and_metadata.py +86 -0
  23. memos/migrations/alembic/versions/12504c5b1d3c_add_extra_columns_for_embedding.py +67 -0
  24. memos/migrations/alembic/versions/31a1ad0e10b3_add_entity_plugin_status.py +71 -0
  25. memos/migrations/alembic/versions/__pycache__/00904ac8c6fc_add_indexes_to_entitymodel.cpython-310.pyc +0 -0
  26. memos/migrations/alembic/versions/__pycache__/04acdaf75664_add_indices_to_entitytags_and_metadata.cpython-310.pyc +0 -0
  27. memos/migrations/alembic/versions/__pycache__/12504c5b1d3c_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
  28. memos/migrations/alembic/versions/__pycache__/20f5ecab014d_add_entity_plugin_status.cpython-310.pyc +0 -0
  29. memos/migrations/alembic/versions/__pycache__/31a1ad0e10b3_add_entity_plugin_status.cpython-310.pyc +0 -0
  30. memos/migrations/alembic/versions/__pycache__/4fcb062c5128_add_extra_columns_for_embedding.cpython-310.pyc +0 -0
  31. memos/migrations/alembic/versions/__pycache__/d10c55fbb7d2_add_index_for_entity_file_type_group_.cpython-310.pyc +0 -0
  32. memos/migrations/alembic/versions/__pycache__/f8f158182416_add_active_app_index.cpython-310.pyc +0 -0
  33. memos/migrations/alembic/versions/d10c55fbb7d2_add_index_for_entity_file_type_group_.py +44 -0
  34. memos/migrations/alembic/versions/f8f158182416_add_active_app_index.py +75 -0
  35. memos/migrations/alembic.ini +116 -0
  36. memos/migrations.py +19 -0
  37. memos/models.py +199 -0
  38. memos/plugins/__init__.py +0 -0
  39. memos/plugins/ocr/__init__.py +0 -0
  40. memos/plugins/ocr/main.py +251 -0
  41. memos/plugins/ocr/models/ch_PP-OCRv4_det_infer.onnx +0 -0
  42. memos/plugins/ocr/models/ch_PP-OCRv4_rec_infer.onnx +0 -0
  43. memos/plugins/ocr/models/ch_ppocr_mobile_v2.0_cls_train.onnx +0 -0
  44. memos/plugins/ocr/ppocr-gpu.yaml +43 -0
  45. memos/plugins/ocr/ppocr.yaml +44 -0
  46. memos/plugins/ocr/server.py +227 -0
  47. memos/plugins/ocr/temp_ppocr.yaml +42 -0
  48. memos/plugins/vlm/__init__.py +0 -0
  49. memos/plugins/vlm/main.py +251 -0
  50. memos/prepare_dataset.py +107 -0
  51. memos/process_webp.py +55 -0
  52. memos/read_metadata.py +32 -0
  53. memos/record.py +358 -0
  54. memos/schemas.py +289 -0
  55. memos/search.py +1198 -0
  56. memos/server.py +883 -0
  57. memos/shotsum.py +105 -0
  58. memos/shotsum_with_ocr.py +145 -0
  59. memos/simple_tokenizer/dict/README.md +31 -0
  60. memos/simple_tokenizer/dict/hmm_model.utf8 +34 -0
  61. memos/simple_tokenizer/dict/idf.utf8 +258826 -0
  62. memos/simple_tokenizer/dict/jieba.dict.utf8 +348982 -0
  63. memos/simple_tokenizer/dict/pos_dict/char_state_tab.utf8 +6653 -0
  64. memos/simple_tokenizer/dict/pos_dict/prob_emit.utf8 +166 -0
  65. memos/simple_tokenizer/dict/pos_dict/prob_start.utf8 +259 -0
  66. memos/simple_tokenizer/dict/pos_dict/prob_trans.utf8 +5222 -0
  67. memos/simple_tokenizer/dict/stop_words.utf8 +1534 -0
  68. memos/simple_tokenizer/dict/user.dict.utf8 +4 -0
  69. memos/simple_tokenizer/linux/libsimple.so +0 -0
  70. memos/simple_tokenizer/macos/libsimple.dylib +0 -0
  71. memos/simple_tokenizer/windows/simple.dll +0 -0
  72. memos/static/_app/immutable/assets/0.e250c031.css +1 -0
  73. memos/static/_app/immutable/assets/_layout.e7937cfe.css +1 -0
  74. memos/static/_app/immutable/chunks/index.5c08976b.js +1 -0
  75. memos/static/_app/immutable/chunks/index.60ee613b.js +4 -0
  76. memos/static/_app/immutable/chunks/runtime.a7926cf6.js +5 -0
  77. memos/static/_app/immutable/chunks/scheduler.5c1cff6e.js +1 -0
  78. memos/static/_app/immutable/chunks/singletons.583bdf4e.js +1 -0
  79. memos/static/_app/immutable/entry/app.666c1643.js +1 -0
  80. memos/static/_app/immutable/entry/start.aed5c701.js +3 -0
  81. memos/static/_app/immutable/nodes/0.5862ea38.js +7 -0
  82. memos/static/_app/immutable/nodes/1.35378a5e.js +1 -0
  83. memos/static/_app/immutable/nodes/2.1ccf9ea5.js +81 -0
  84. memos/static/_app/version.json +1 -0
  85. memos/static/app.html +36 -0
  86. memos/static/favicon.png +0 -0
  87. memos/static/logos/memos_logo_1024.png +0 -0
  88. memos/static/logos/memos_logo_1024@2x.png +0 -0
  89. memos/static/logos/memos_logo_128.png +0 -0
  90. memos/static/logos/memos_logo_128@2x.png +0 -0
  91. memos/static/logos/memos_logo_16.png +0 -0
  92. memos/static/logos/memos_logo_16@2x.png +0 -0
  93. memos/static/logos/memos_logo_256.png +0 -0
  94. memos/static/logos/memos_logo_256@2x.png +0 -0
  95. memos/static/logos/memos_logo_32.png +0 -0
  96. memos/static/logos/memos_logo_32@2x.png +0 -0
  97. memos/static/logos/memos_logo_512.png +0 -0
  98. memos/static/logos/memos_logo_512@2x.png +0 -0
  99. memos/static/logos/memos_logo_64.png +0 -0
  100. memos/static/logos/memos_logo_64@2x.png +0 -0
  101. memos/test_server.py +802 -0
  102. memos/utils.py +49 -0
  103. memos_ml_backends/florence2_server.py +176 -0
  104. memos_ml_backends/qwen2vl_server.py +182 -0
  105. memos_ml_backends/schemas.py +48 -0
  106. pensiev-0.25.5.dist-info/LICENSE +201 -0
  107. pensiev-0.25.5.dist-info/METADATA +541 -0
  108. pensiev-0.25.5.dist-info/RECORD +111 -0
  109. pensiev-0.25.5.dist-info/WHEEL +5 -0
  110. pensiev-0.25.5.dist-info/entry_points.txt +2 -0
  111. pensiev-0.25.5.dist-info/top_level.txt +2 -0
memos/test_server.py ADDED
@@ -0,0 +1,802 @@
1
+ import json
2
+ import os
3
+ import pytest
4
+ from datetime import datetime
5
+
6
+ from fastapi.testclient import TestClient
7
+ from sqlalchemy import create_engine, event, text
8
+ from sqlalchemy.orm import sessionmaker
9
+ from sqlalchemy.pool import StaticPool
10
+ from pathlib import Path
11
+
12
+
13
+ from memos.server import app, get_db
14
+ from memos.schemas import (
15
+ NewPluginParam,
16
+ NewLibraryParam,
17
+ NewEntityParam,
18
+ UpdateEntityParam,
19
+ NewFoldersParam,
20
+ NewFolderParam,
21
+ EntityMetadataParam,
22
+ MetadataType,
23
+ UpdateEntityTagsParam,
24
+ UpdateEntityMetadataParam,
25
+ FolderType,
26
+ )
27
+ from memos.models import Base
28
+ from memos.databases.initializers import SQLiteInitializer
29
+ from memos.config import settings
30
+
31
+
32
+ # Use SQLite for testing by default
33
+ test_engine = create_engine(
34
+ "sqlite:///:memory:",
35
+ connect_args={"check_same_thread": False},
36
+ poolclass=StaticPool,
37
+ )
38
+
39
+ # Initialize SQLite with the test engine
40
+ test_initializer = SQLiteInitializer(test_engine, settings)
41
+ test_initializer.init_extensions()
42
+
43
+ TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=test_engine)
44
+
45
+
46
+ def load_fixture(filename):
47
+ with open(Path(__file__).parent / "fixtures" / filename, "r") as file:
48
+ return json.load(file)
49
+
50
+
51
+ def setup_library_with_entity(client):
52
+ # Create a new library
53
+ new_library = NewLibraryParam(name="Test Library for Metadata")
54
+ library_response = client.post(
55
+ "/libraries", json=new_library.model_dump(mode="json")
56
+ )
57
+ assert library_response.status_code == 200
58
+ library_id = library_response.json()["id"]
59
+
60
+ # Create a new folder in the library
61
+ new_folder = NewFoldersParam(
62
+ folders=[
63
+ NewFolderParam(
64
+ path="/tmp", last_modified_at=datetime.now(), type=FolderType.DEFAULT
65
+ )
66
+ ]
67
+ )
68
+ folder_response = client.post(
69
+ f"/libraries/{library_id}/folders", json=new_folder.model_dump(mode="json")
70
+ )
71
+ assert folder_response.status_code == 200
72
+ folder_id = folder_response.json()["folders"][0]["id"]
73
+
74
+ # Create a new entity in the folder
75
+ new_entity = NewEntityParam(
76
+ filename="metadata_test_file.txt",
77
+ filepath="/tmp/metadata_folder/metadata_test_file.txt",
78
+ size=5678,
79
+ file_created_at="2023-01-01T00:00:00",
80
+ file_last_modified_at="2023-01-01T00:00:00",
81
+ file_type="txt",
82
+ file_type_group="text",
83
+ folder_id=folder_id,
84
+ )
85
+ entity_response = client.post(
86
+ f"/libraries/{library_id}/entities", json=new_entity.model_dump(mode="json")
87
+ )
88
+ assert entity_response.status_code == 200
89
+ entity_id = entity_response.json()["id"]
90
+
91
+ # Update the entity's index
92
+ index_response = client.post(f"/entities/{entity_id}/index")
93
+ assert index_response.status_code == 204
94
+
95
+ return library_id, folder_id, entity_id
96
+
97
+
98
+ def override_get_db():
99
+ try:
100
+ db = TestingSessionLocal()
101
+ yield db
102
+ finally:
103
+ db.close()
104
+
105
+
106
+ app.dependency_overrides[get_db] = override_get_db
107
+
108
+
109
+ # Setup a fixture for the FastAPI test client
110
+ @pytest.fixture
111
+ def client():
112
+ # Create all base tables
113
+ Base.metadata.create_all(bind=test_engine)
114
+
115
+ # Create FTS and Vec tables for SQLite
116
+ test_initializer.init_specific_features()
117
+
118
+ with TestClient(app) as client:
119
+ yield client
120
+
121
+ # Clean up database
122
+ Base.metadata.drop_all(bind=test_engine)
123
+ with test_engine.connect() as conn:
124
+ conn.execute(text("DROP TABLE IF EXISTS entities_fts"))
125
+ conn.execute(text("DROP TABLE IF EXISTS entities_vec_v2"))
126
+ conn.commit()
127
+
128
+
129
+ # Test the new_library endpoint
130
+ def test_new_library(client):
131
+ library_param = NewLibraryParam(name="Test Library")
132
+ # Make a POST request to the /libraries endpoint
133
+ response = client.post("/libraries", json=library_param.model_dump())
134
+ # Check that the response is successful
135
+ assert response.status_code == 200
136
+ # Check the response data
137
+ assert response.json() == {
138
+ "id": 1,
139
+ "name": "Test Library",
140
+ "folders": [],
141
+ "plugins": [],
142
+ }
143
+
144
+ # Test for duplicate library name
145
+ duplicate_response = client.post("/libraries", json=library_param.model_dump())
146
+ # Check that the response indicates a failure due to duplicate name
147
+ assert duplicate_response.status_code == 400
148
+ assert duplicate_response.json() == {
149
+ "detail": "Library with this name already exists"
150
+ }
151
+
152
+
153
+ def test_list_libraries(client):
154
+ # Setup data: Create a new library with a folder
155
+ new_library = NewLibraryParam(
156
+ name="Sample Library",
157
+ folders=[
158
+ NewFolderParam(
159
+ path="/tmp", last_modified_at=datetime.now(), type=FolderType.DEFAULT
160
+ )
161
+ ],
162
+ )
163
+ client.post("/libraries", json=new_library.model_dump(mode="json"))
164
+
165
+ # Make a GET request to the /libraries endpoint
166
+ response = client.get("/libraries")
167
+
168
+ # Check that the response is successful
169
+ assert response.status_code == 200
170
+
171
+ # Check the response data
172
+ response_data = response.json()
173
+ for folder in response_data[0]["folders"]:
174
+ assert "last_modified_at" in folder
175
+ assert isinstance(folder["last_modified_at"], str)
176
+ del folder["last_modified_at"]
177
+
178
+ expected_data = [
179
+ {
180
+ "id": 1,
181
+ "name": "Sample Library",
182
+ "folders": [
183
+ {
184
+ "id": 1,
185
+ "path": "/tmp",
186
+ "type": "DEFAULT",
187
+ }
188
+ ],
189
+ "plugins": [],
190
+ }
191
+ ]
192
+ assert response_data == expected_data
193
+
194
+
195
+ def test_new_entity(client):
196
+ # Setup data: Create a new library
197
+ new_library = NewLibraryParam(
198
+ name="Library for Entity Test",
199
+ folders=[
200
+ NewFolderParam(
201
+ path="/tmp", last_modified_at=datetime.now(), type=FolderType.DEFAULT
202
+ )
203
+ ],
204
+ )
205
+ library_response = client.post(
206
+ "/libraries", json=new_library.model_dump(mode="json")
207
+ )
208
+ library_id = library_response.json()["id"]
209
+ folder_id = library_response.json()["folders"][0]["id"]
210
+
211
+ # Create a new entity
212
+ new_entity = NewEntityParam(
213
+ filename="test_entity.txt",
214
+ filepath="test_entity.txt",
215
+ size=150,
216
+ file_created_at="2023-01-01T00:00:00",
217
+ file_last_modified_at="2023-01-01T00:00:00",
218
+ file_type="txt",
219
+ file_type_group="text",
220
+ folder_id=folder_id,
221
+ )
222
+ entity_response = client.post(
223
+ f"/libraries/{library_id}/entities", json=new_entity.model_dump(mode="json")
224
+ )
225
+
226
+ # Check that the response is successful
227
+ assert entity_response.status_code == 200
228
+
229
+ # Check the response data
230
+ entity_data = entity_response.json()
231
+ assert entity_data["filename"] == "test_entity.txt"
232
+ assert entity_data["filepath"] == "test_entity.txt"
233
+ assert entity_data["size"] == 150
234
+ assert entity_data["file_created_at"] == "2023-01-01T00:00:00"
235
+ assert entity_data["file_last_modified_at"] == "2023-01-01T00:00:00"
236
+ assert entity_data["file_type"] == "txt"
237
+ assert entity_data["file_type_group"] == "text"
238
+ assert entity_data["folder_id"] == 1
239
+
240
+ # Test for library not found
241
+ invalid_entity_response = client.post(
242
+ "/libraries/9999/entities", json=new_entity.model_dump(mode="json")
243
+ )
244
+ assert invalid_entity_response.status_code == 404
245
+ assert invalid_entity_response.json() == {"detail": "Library not found"}
246
+
247
+
248
+ def test_update_entity(client):
249
+ library_id, _, entity_id = setup_library_with_entity(client)
250
+
251
+ # Update the entity
252
+ updated_entity = UpdateEntityParam(
253
+ size=200,
254
+ file_created_at="2023-01-02T00:00:00",
255
+ file_type="markdown",
256
+ file_type_group="text",
257
+ )
258
+ update_response = client.put(
259
+ f"/entities/{entity_id}",
260
+ json=updated_entity.model_dump(mode="json"),
261
+ )
262
+
263
+ # Check that the response is successful
264
+ assert update_response.status_code == 200
265
+
266
+ # Check the response data
267
+ updated_data = update_response.json()
268
+ assert updated_data["id"] == entity_id
269
+ assert updated_data["size"] == 200
270
+ assert updated_data["file_created_at"] == "2023-01-02T00:00:00"
271
+ assert updated_data["file_last_modified_at"] == "2023-01-01T00:00:00"
272
+ assert updated_data["file_type"] == "markdown"
273
+ assert updated_data["file_type_group"] == "text"
274
+
275
+ # Test for entity not found
276
+ invalid_update_response = client.put(
277
+ f"/entities/9999",
278
+ json=updated_entity.model_dump(mode="json"),
279
+ )
280
+ assert invalid_update_response.status_code == 404
281
+ assert invalid_update_response.json() == {"detail": "Entity not found"}
282
+
283
+
284
+ # Test for getting an entity by filepath
285
+ def test_get_entity_by_filepath(client):
286
+ # Setup data: Create a new library and entity
287
+ new_library = NewLibraryParam(
288
+ name="Library for Get Entity Test",
289
+ folders=[
290
+ NewFolderParam(
291
+ path="/tmp", last_modified_at=datetime.now(), type=FolderType.DEFAULT
292
+ )
293
+ ],
294
+ )
295
+ library_response = client.post(
296
+ "/libraries", json=new_library.model_dump(mode="json")
297
+ )
298
+ library_id = library_response.json()["id"]
299
+
300
+ new_entity = NewEntityParam(
301
+ filename="test_get.txt",
302
+ filepath="test_get.txt",
303
+ size=100,
304
+ file_created_at="2023-01-01T00:00:00",
305
+ file_last_modified_at="2023-01-01T00:00:00",
306
+ file_type="txt",
307
+ file_type_group="text",
308
+ folder_id=1,
309
+ )
310
+ entity_response = client.post(
311
+ f"/libraries/{library_id}/entities", json=new_entity.model_dump(mode="json")
312
+ )
313
+ entity_id = entity_response.json()["id"]
314
+
315
+ get_response = client.get(
316
+ f"/libraries/{library_id}/entities/by-filepath",
317
+ params={"filepath": new_entity.filepath},
318
+ )
319
+
320
+ # Check that the response is successful
321
+ assert get_response.status_code == 200
322
+
323
+ # Check the response data
324
+ entity_data = get_response.json()
325
+ assert entity_data["id"] == entity_id
326
+ assert entity_data["filepath"] == new_entity.filepath
327
+ assert entity_data["filename"] == new_entity.filename
328
+ assert entity_data["size"] == new_entity.size
329
+ assert entity_data["file_type"] == new_entity.file_type
330
+ assert entity_data["file_type_group"] == new_entity.file_type_group
331
+
332
+ # Test for entity not found
333
+ invalid_get_response = client.get(
334
+ f"/libraries/{library_id}/entities/by-filepath",
335
+ params={"filepath": "nonexistent.txt"},
336
+ )
337
+ assert invalid_get_response.status_code == 404
338
+ assert invalid_get_response.json() == {"detail": "Entity not found"}
339
+
340
+ # Test for library not found
341
+ invalid_get_response = client.get(
342
+ f"/libraries/9999/entities/by-filepath",
343
+ params={"filepath": new_entity.filepath},
344
+ )
345
+ assert invalid_get_response.status_code == 404
346
+ assert invalid_get_response.json() == {"detail": "Entity not found"}
347
+
348
+
349
+ def test_list_entities_in_folder(client):
350
+ # Setup data: Create a new library and folder
351
+ new_library = NewLibraryParam(name="Library for List Entities Test", folders=[])
352
+ library_response = client.post(
353
+ "/libraries", json=new_library.model_dump(mode="json")
354
+ )
355
+ library_id = library_response.json()["id"]
356
+
357
+ new_folder = NewFoldersParam(
358
+ folders=[
359
+ NewFolderParam(
360
+ path="/tmp", last_modified_at=datetime.now(), type=FolderType.DEFAULT
361
+ )
362
+ ]
363
+ )
364
+ folder_response = client.post(
365
+ f"/libraries/{library_id}/folders", json=new_folder.model_dump(mode="json")
366
+ )
367
+ folder_id = folder_response.json()["folders"][0]["id"]
368
+
369
+ # Create a new entity in the folder
370
+ new_entity = NewEntityParam(
371
+ filename="test_list.txt",
372
+ filepath="test_list.txt",
373
+ size=100,
374
+ file_created_at="2023-01-01T00:00:00",
375
+ file_last_modified_at="2023-01-01T00:00:00",
376
+ file_type="txt",
377
+ file_type_group="text",
378
+ folder_id=folder_id,
379
+ )
380
+ entity_response = client.post(
381
+ f"/libraries/{library_id}/entities", json=new_entity.model_dump(mode="json")
382
+ )
383
+ entity_id = entity_response.json()["id"]
384
+
385
+ # List entities in the folder
386
+ list_response = client.get(f"/libraries/{library_id}/folders/{folder_id}/entities")
387
+
388
+ # Check that the response is successful
389
+ assert list_response.status_code == 200
390
+
391
+ # Check the response data
392
+ entities_data = list_response.json()
393
+ assert len(entities_data) == 1
394
+ assert entities_data[0]["id"] == entity_id
395
+ assert entities_data[0]["filepath"] == new_entity.filepath
396
+ assert entities_data[0]["filename"] == new_entity.filename
397
+ assert entities_data[0]["size"] == new_entity.size
398
+ assert entities_data[0]["file_type"] == new_entity.file_type
399
+ assert entities_data[0]["file_type_group"] == new_entity.file_type_group
400
+
401
+ # Test for folder not found
402
+ invalid_list_response = client.get(f"/libraries/{library_id}/folders/9999/entities")
403
+ assert invalid_list_response.status_code == 404
404
+ assert invalid_list_response.json() == {
405
+ "detail": "Folder not found in the specified library"
406
+ }
407
+
408
+ # Test for library not found
409
+ invalid_list_response = client.get(f"/libraries/9999/folders/{folder_id}/entities")
410
+ assert invalid_list_response.status_code == 404
411
+ assert invalid_list_response.json() == {"detail": "Library not found"}
412
+
413
+
414
+ def test_remove_entity(client):
415
+ library_id, _, entity_id = setup_library_with_entity(client)
416
+
417
+ # Verify the entity data was automatically inserted into fts and vec tables by event listeners
418
+ with test_engine.connect() as conn:
419
+ fts_count = conn.execute(
420
+ text("SELECT COUNT(*) FROM entities_fts WHERE id = :id"),
421
+ {"id": entity_id}
422
+ ).scalar()
423
+ assert fts_count == 1, "Entity was not automatically added to entities_fts table"
424
+
425
+ vec_count = conn.execute(
426
+ text("SELECT COUNT(*) FROM entities_vec_v2 WHERE rowid = :id"),
427
+ {"id": entity_id}
428
+ ).scalar()
429
+ assert vec_count == 1, "Entity was not automatically added to entities_vec_v2 table"
430
+
431
+ # Delete the entity
432
+ delete_response = client.delete(f"/libraries/{library_id}/entities/{entity_id}")
433
+ assert delete_response.status_code == 204
434
+
435
+ # Verify the entity is deleted from the main table
436
+ get_response = client.get(f"/libraries/{library_id}/entities/{entity_id}")
437
+ assert get_response.status_code == 404
438
+ assert get_response.json() == {"detail": "Entity not found"}
439
+
440
+ # Verify the entity is deleted from entities_fts and entities_vec_v2 tables
441
+ with test_engine.connect() as conn:
442
+ # Check entities_fts
443
+ fts_count = conn.execute(
444
+ text("SELECT COUNT(*) FROM entities_fts WHERE id = :id"),
445
+ {"id": entity_id}
446
+ ).scalar()
447
+ assert fts_count == 0, "Entity was not deleted from entities_fts table"
448
+
449
+ # Check entities_vec_v2
450
+ vec_count = conn.execute(
451
+ text("SELECT COUNT(*) FROM entities_vec_v2 WHERE rowid = :id"),
452
+ {"id": entity_id}
453
+ ).scalar()
454
+ assert vec_count == 0, "Entity was not deleted from entities_vec_v2 table"
455
+
456
+ # Test for entity not found in the specified library
457
+ invalid_delete_response = client.delete(f"/libraries/{library_id}/entities/9999")
458
+ assert invalid_delete_response.status_code == 404
459
+ assert invalid_delete_response.json() == {
460
+ "detail": "Entity not found in the specified library"
461
+ }
462
+
463
+
464
+ def test_add_folder_to_library(client):
465
+ # Prepare tmp folders for the test
466
+ tmp_folder_path = "/tmp/new_folder"
467
+ if not os.path.exists(tmp_folder_path):
468
+ os.makedirs(tmp_folder_path)
469
+
470
+ # Create a new library
471
+ new_library = NewLibraryParam(name="Test Library", folders=[])
472
+ library_response = client.post(
473
+ "/libraries", json=new_library.model_dump(mode="json")
474
+ )
475
+ library_id = library_response.json()["id"]
476
+
477
+ # Add a new folder to the library
478
+ new_folders = NewFoldersParam(
479
+ folders=[
480
+ NewFolderParam(
481
+ path=tmp_folder_path,
482
+ last_modified_at=datetime.now(),
483
+ type=FolderType.DEFAULT,
484
+ )
485
+ ]
486
+ )
487
+ folder_response = client.post(
488
+ f"/libraries/{library_id}/folders", json=new_folders.model_dump(mode="json")
489
+ )
490
+ assert folder_response.status_code == 200
491
+ assert any(
492
+ folder["path"] == tmp_folder_path
493
+ for folder in folder_response.json()["folders"]
494
+ )
495
+
496
+ # Verify the folder is added
497
+ library_response = client.get(f"/libraries/{library_id}")
498
+ assert library_response.status_code == 200
499
+ library_data = library_response.json()
500
+ folder_paths = [folder["path"] for folder in library_data["folders"]]
501
+ assert tmp_folder_path in folder_paths
502
+
503
+ # Test for adding a folder that already exists
504
+ duplicate_folder_response = client.post(
505
+ f"/libraries/{library_id}/folders", json=new_folders.model_dump(mode="json")
506
+ )
507
+ assert duplicate_folder_response.status_code == 400
508
+ assert duplicate_folder_response.json() == {
509
+ "detail": "Folder already exists in the library"
510
+ }
511
+
512
+ # Test for adding a folder to a non-existent library
513
+ invalid_folder_response = client.post(
514
+ f"/libraries/9999/folders", json=new_folders.model_dump(mode="json")
515
+ )
516
+ assert invalid_folder_response.status_code == 404
517
+ assert invalid_folder_response.json() == {"detail": "Library not found"}
518
+
519
+
520
+ def test_new_plugin(client):
521
+ new_plugin = NewPluginParam(
522
+ name="Test Plugin",
523
+ description="A test plugin",
524
+ webhook_url="http://example.com/webhook",
525
+ )
526
+
527
+ # Make a POST request to the /plugins endpoint
528
+ response = client.post("/plugins", json=new_plugin.model_dump(mode="json"))
529
+
530
+ # Check that the response is successful
531
+ assert response.status_code == 200
532
+
533
+ # Check the response data
534
+ plugin_data = response.json()
535
+ assert plugin_data["name"] == "Test Plugin"
536
+ assert plugin_data["description"] == "A test plugin"
537
+ assert plugin_data["webhook_url"] == "http://example.com/webhook"
538
+
539
+ # Test for duplicate plugin name
540
+ duplicate_response = client.post(
541
+ "/plugins", json=new_plugin.model_dump(mode="json")
542
+ )
543
+ # Check that the response indicates a failure due to duplicate name
544
+ assert duplicate_response.status_code == 400
545
+ assert duplicate_response.json() == {
546
+ "detail": "Plugin with this name already exists"
547
+ }
548
+
549
+ # Test for another duplicate plugin name
550
+ another_duplicate_response = client.post(
551
+ "/plugins", json=new_plugin.model_dump(mode="json")
552
+ )
553
+ # Check that the response indicates a failure due to duplicate name
554
+ assert another_duplicate_response.status_code == 400
555
+ assert another_duplicate_response.json() == {
556
+ "detail": "Plugin with this name already exists"
557
+ }
558
+
559
+
560
+ def test_update_entity_with_tags(client):
561
+ library_id, _, entity_id = setup_library_with_entity(client)
562
+
563
+ # Update the entity with tags
564
+ update_entity_param = UpdateEntityParam(tags=["tag1", "tag2"])
565
+
566
+ # Make a PUT request to the /libraries/{library_id}/entities/{entity_id} endpoint
567
+ update_response = client.put(
568
+ f"/entities/{entity_id}",
569
+ json=update_entity_param.model_dump(mode="json"),
570
+ )
571
+
572
+ # Check that the response is successful
573
+ assert update_response.status_code == 200
574
+
575
+ # Check the response data
576
+ updated_entity_data = update_response.json()
577
+ assert "tags" in updated_entity_data
578
+ assert len(updated_entity_data["tags"]) == 2
579
+ assert "tag1" in [tag["name"] for tag in updated_entity_data["tags"]]
580
+ assert "tag2" in [tag["name"] for tag in updated_entity_data["tags"]]
581
+
582
+
583
+ def test_patch_tags_to_entity(client):
584
+ library_id, _, entity_id = setup_library_with_entity(client)
585
+
586
+ # Initial tags
587
+ initial_tags = ["tag1", "tag2"]
588
+ update_entity_param = UpdateEntityTagsParam(tags=initial_tags)
589
+
590
+ # Make a PUT request to add initial tags
591
+ initial_update_response = client.put(
592
+ f"/entities/{entity_id}/tags",
593
+ json=update_entity_param.model_dump(mode="json"),
594
+ )
595
+
596
+ # Check that the initial update is successful
597
+ assert initial_update_response.status_code == 200
598
+ initial_entity_data = initial_update_response.json()
599
+ assert len(initial_entity_data["tags"]) == 2
600
+ assert set([tag["name"] for tag in initial_entity_data["tags"]]) == set(
601
+ initial_tags
602
+ )
603
+
604
+ # New tags to patch
605
+ new_tags = ["tag3", "tag4"]
606
+ patch_entity_param = UpdateEntityTagsParam(tags=new_tags)
607
+
608
+ # Make a PATCH request to add new tags
609
+ patch_response = client.patch(
610
+ f"/entities/{entity_id}/tags",
611
+ json=patch_entity_param.model_dump(mode="json"),
612
+ )
613
+
614
+ # Check that the patch response is successful
615
+ assert patch_response.status_code == 200
616
+
617
+ # Check the response data
618
+ patched_entity_data = patch_response.json()
619
+ assert "tags" in patched_entity_data
620
+ assert len(patched_entity_data["tags"]) == 4
621
+ assert set([tag["name"] for tag in patched_entity_data["tags"]]) == set(
622
+ initial_tags + new_tags
623
+ )
624
+
625
+ # Verify that the tags were actually added by making a GET request
626
+ get_response = client.get(f"/libraries/{library_id}/entities/{entity_id}")
627
+ assert get_response.status_code == 200
628
+ get_entity_data = get_response.json()
629
+ assert "tags" in get_entity_data
630
+ assert len(get_entity_data["tags"]) == 4
631
+ assert set([tag["name"] for tag in get_entity_data["tags"]]) == set(
632
+ initial_tags + new_tags
633
+ )
634
+
635
+
636
+ def test_add_metadata_entry_to_entity_success(client):
637
+ library_id, _, entity_id = setup_library_with_entity(client)
638
+
639
+ # Add metadata entry to the entity
640
+ metadata_entry = EntityMetadataParam(
641
+ key="author",
642
+ value="John Doe",
643
+ source="plugin_generated",
644
+ data_type=MetadataType.TEXT_DATA,
645
+ )
646
+ update_entity_param = UpdateEntityParam(metadata_entries=[metadata_entry])
647
+
648
+ # Make a PUT request to the /libraries/{library_id}/entities/{entity_id} endpoint
649
+ update_response = client.put(
650
+ f"/entities/{entity_id}",
651
+ json=update_entity_param.model_dump(mode="json"),
652
+ )
653
+
654
+ # Check that the response is successful
655
+ assert update_response.status_code == 200
656
+
657
+ # Check the response data
658
+ updated_entity_data = update_response.json()
659
+ expected_metadata_entry = load_fixture(
660
+ "add_metadata_entry_to_entity_sucess_response.json"
661
+ )
662
+ assert "metadata_entries" in updated_entity_data
663
+ assert len(updated_entity_data["metadata_entries"]) == 1
664
+ assert updated_entity_data["metadata_entries"][0] == expected_metadata_entry
665
+
666
+
667
+ def test_update_entity_tags(client):
668
+ library_id, _, entity_id = setup_library_with_entity(client)
669
+
670
+ # Add tags to the entity
671
+ tags = ["tag1", "tag2", "tag3"]
672
+ update_entity_param = UpdateEntityParam(tags=tags)
673
+
674
+ # Make a PUT request to the /libraries/{library_id}/entities/{entity_id} endpoint
675
+ update_response = client.put(
676
+ f"/entities/{entity_id}",
677
+ json=update_entity_param.model_dump(mode="json"),
678
+ )
679
+
680
+ # Check that the response is successful
681
+ assert update_response.status_code == 200
682
+
683
+ # Check the response data
684
+ updated_entity_data = update_response.json()
685
+ assert "tags" in updated_entity_data
686
+ assert sorted([t["name"] for t in updated_entity_data["tags"]]) == sorted(
687
+ tags, key=str
688
+ )
689
+
690
+
691
+ def test_patch_entity_metadata_entries(client):
692
+ library_id, _, entity_id = setup_library_with_entity(client)
693
+
694
+ # Patch metadata entries of the entity
695
+ patch_metadata_entries = [
696
+ {
697
+ "key": "author",
698
+ "value": "Jane Smith",
699
+ "source": "user_generated",
700
+ "data_type": MetadataType.TEXT_DATA.value,
701
+ },
702
+ {
703
+ "key": "year",
704
+ "value": "2023",
705
+ "source": "user_generated",
706
+ "data_type": MetadataType.TEXT_DATA.value,
707
+ },
708
+ ]
709
+ update_entity_param = UpdateEntityParam(
710
+ metadata_entries=[
711
+ EntityMetadataParam(**entry) for entry in patch_metadata_entries
712
+ ]
713
+ )
714
+
715
+ # Make a PUT request to the /libraries/{library_id}/entities/{entity_id} endpoint
716
+ patch_response = client.put(
717
+ f"/entities/{entity_id}",
718
+ json=update_entity_param.model_dump(mode="json"),
719
+ )
720
+
721
+ # Check that the response is successful
722
+ assert patch_response.status_code == 200
723
+
724
+ # Check the response data
725
+ patched_entity_data = patch_response.json()
726
+ expected_data = load_fixture("patch_entity_metadata_response.json")
727
+
728
+ # 检查并移除 last_scan_at
729
+ assert "last_scan_at" in patched_entity_data
730
+ assert isinstance(patched_entity_data["last_scan_at"], str)
731
+
732
+ datetime.fromisoformat(patched_entity_data["last_scan_at"].replace("Z", "+00:00"))
733
+
734
+ del patched_entity_data["last_scan_at"]
735
+ if "last_scan_at" in expected_data:
736
+ del expected_data["last_scan_at"]
737
+
738
+ assert patched_entity_data == expected_data
739
+
740
+ # Update the "author" attribute of the entity
741
+ updated_metadata_entries = [
742
+ {
743
+ "key": "author",
744
+ "value": "John Doe",
745
+ "source": "user_generated",
746
+ "data_type": MetadataType.TEXT_DATA.value,
747
+ }
748
+ ]
749
+ update_entity_param = UpdateEntityMetadataParam(
750
+ metadata_entries=[
751
+ EntityMetadataParam(**entry) for entry in updated_metadata_entries
752
+ ]
753
+ )
754
+
755
+ # Make a PATCH request to the /libraries/{library_id}/entities/{entity_id}/metadata endpoint
756
+ update_response = client.patch(
757
+ f"/entities/{entity_id}/metadata",
758
+ json=update_entity_param.model_dump(mode="json"),
759
+ )
760
+
761
+ # Check that the response is successful
762
+ assert update_response.status_code == 200
763
+
764
+ # Check the response data
765
+ updated_entity_data = update_response.json()
766
+ assert "metadata_entries" in updated_entity_data
767
+ assert any(
768
+ entry["key"] == "author" and entry["value"] == "John Doe"
769
+ for entry in updated_entity_data["metadata_entries"]
770
+ )
771
+
772
+ # Add a new attribute "media_type" with value "book"
773
+ new_metadata_entry = {
774
+ "key": "media_type",
775
+ "value": "book",
776
+ "source": "user_generated",
777
+ "data_type": MetadataType.TEXT_DATA.value,
778
+ }
779
+ updated_metadata_entries.append(new_metadata_entry)
780
+
781
+ update_entity_param = UpdateEntityMetadataParam(
782
+ metadata_entries=[
783
+ EntityMetadataParam(**entry) for entry in updated_metadata_entries
784
+ ]
785
+ )
786
+
787
+ # Make a PATCH request to the /libraries/{library_id}/entities/{entity_id}/metadata endpoint
788
+ update_response = client.patch(
789
+ f"/entities/{entity_id}/metadata",
790
+ json=update_entity_param.model_dump(mode="json"),
791
+ )
792
+
793
+ # Check that the response is successful
794
+ assert update_response.status_code == 200
795
+
796
+ # Check the response data
797
+ updated_entity_data = update_response.json()
798
+ assert "metadata_entries" in updated_entity_data
799
+ assert any(
800
+ entry["key"] == "media_type" and entry["value"] == "book"
801
+ for entry in updated_entity_data["metadata_entries"]
802
+ )