core-semantic-search-app 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. core_semantic_search_app-1.0.0/LICENSE.md +35 -0
  2. core_semantic_search_app-1.0.0/MANIFEST.in +5 -0
  3. core_semantic_search_app-1.0.0/PKG-INFO +36 -0
  4. core_semantic_search_app-1.0.0/README.rst +26 -0
  5. core_semantic_search_app-1.0.0/core_semantic_search_app/__init__.py +4 -0
  6. core_semantic_search_app-1.0.0/core_semantic_search_app/admin.py +15 -0
  7. core_semantic_search_app-1.0.0/core_semantic_search_app/apps.py +54 -0
  8. core_semantic_search_app-1.0.0/core_semantic_search_app/commons/__init__.py +0 -0
  9. core_semantic_search_app-1.0.0/core_semantic_search_app/commons/exceptions.py +9 -0
  10. core_semantic_search_app-1.0.0/core_semantic_search_app/components/__init__.py +0 -0
  11. core_semantic_search_app-1.0.0/core_semantic_search_app/components/data/__init__.py +0 -0
  12. core_semantic_search_app-1.0.0/core_semantic_search_app/components/data/watch.py +30 -0
  13. core_semantic_search_app-1.0.0/core_semantic_search_app/components/document/__init__.py +0 -0
  14. core_semantic_search_app-1.0.0/core_semantic_search_app/components/document/api.py +279 -0
  15. core_semantic_search_app-1.0.0/core_semantic_search_app/components/document/models.py +22 -0
  16. core_semantic_search_app-1.0.0/core_semantic_search_app/components/model_settings/__init__.py +0 -0
  17. core_semantic_search_app-1.0.0/core_semantic_search_app/components/model_settings/admin_site.py +49 -0
  18. core_semantic_search_app-1.0.0/core_semantic_search_app/components/model_settings/api.py +23 -0
  19. core_semantic_search_app-1.0.0/core_semantic_search_app/components/model_settings/forms.py +44 -0
  20. core_semantic_search_app-1.0.0/core_semantic_search_app/components/model_settings/models.py +95 -0
  21. core_semantic_search_app-1.0.0/core_semantic_search_app/menus.py +14 -0
  22. core_semantic_search_app-1.0.0/core_semantic_search_app/migrations/0001_initial.py +34 -0
  23. core_semantic_search_app-1.0.0/core_semantic_search_app/migrations/0002_modelsettings.py +75 -0
  24. core_semantic_search_app-1.0.0/core_semantic_search_app/migrations/__init__.py +0 -0
  25. core_semantic_search_app-1.0.0/core_semantic_search_app/rest/__init__.py +0 -0
  26. core_semantic_search_app-1.0.0/core_semantic_search_app/rest/urls.py +14 -0
  27. core_semantic_search_app-1.0.0/core_semantic_search_app/rest/views.py +141 -0
  28. core_semantic_search_app-1.0.0/core_semantic_search_app/settings.py +23 -0
  29. core_semantic_search_app-1.0.0/core_semantic_search_app/static/core_semantic_search_app/css/search_box.css +41 -0
  30. core_semantic_search_app-1.0.0/core_semantic_search_app/static/core_semantic_search_app/js/search_box.js +229 -0
  31. core_semantic_search_app-1.0.0/core_semantic_search_app/static/core_semantic_search_app/js/search_box.raw.js +1 -0
  32. core_semantic_search_app-1.0.0/core_semantic_search_app/tasks.py +18 -0
  33. core_semantic_search_app-1.0.0/core_semantic_search_app/templates/core_semantic_search_app/user/index.html +45 -0
  34. core_semantic_search_app-1.0.0/core_semantic_search_app/templates/core_semantic_search_app/user/modals/settings.html +53 -0
  35. core_semantic_search_app-1.0.0/core_semantic_search_app/urls.py +19 -0
  36. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/__init__.py +0 -0
  37. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/chunking_utils.py +201 -0
  38. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/model_utils/__init__.py +0 -0
  39. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/model_utils/model_api.py +112 -0
  40. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/model_utils/model_client.py +60 -0
  41. core_semantic_search_app-1.0.0/core_semantic_search_app/utils/model_utils/response.py +62 -0
  42. core_semantic_search_app-1.0.0/core_semantic_search_app/views/__init__.py +0 -0
  43. core_semantic_search_app-1.0.0/core_semantic_search_app/views/user/__init__.py +0 -0
  44. core_semantic_search_app-1.0.0/core_semantic_search_app/views/user/views.py +59 -0
  45. core_semantic_search_app-1.0.0/core_semantic_search_app.egg-info/PKG-INFO +36 -0
  46. core_semantic_search_app-1.0.0/core_semantic_search_app.egg-info/SOURCES.txt +74 -0
  47. core_semantic_search_app-1.0.0/core_semantic_search_app.egg-info/dependency_links.txt +1 -0
  48. core_semantic_search_app-1.0.0/core_semantic_search_app.egg-info/requires.txt +7 -0
  49. core_semantic_search_app-1.0.0/core_semantic_search_app.egg-info/top_level.txt +2 -0
  50. core_semantic_search_app-1.0.0/pyproject.toml +7 -0
  51. core_semantic_search_app-1.0.0/requirements.core.txt +1 -0
  52. core_semantic_search_app-1.0.0/requirements.txt +3 -0
  53. core_semantic_search_app-1.0.0/setup.cfg +4 -0
  54. core_semantic_search_app-1.0.0/setup.py +71 -0
  55. core_semantic_search_app-1.0.0/tests/__init__.py +0 -0
  56. core_semantic_search_app-1.0.0/tests/components/__init__.py +0 -0
  57. core_semantic_search_app-1.0.0/tests/components/data/__init__.py +0 -0
  58. core_semantic_search_app-1.0.0/tests/components/data/tests_unit.py +44 -0
  59. core_semantic_search_app-1.0.0/tests/components/document/__init__.py +0 -0
  60. core_semantic_search_app-1.0.0/tests/components/document/tests_unit.py +564 -0
  61. core_semantic_search_app-1.0.0/tests/components/document/tests_unit_tasks.py +30 -0
  62. core_semantic_search_app-1.0.0/tests/components/model_settings/__init__.py +0 -0
  63. core_semantic_search_app-1.0.0/tests/components/model_settings/tests_unit.py +234 -0
  64. core_semantic_search_app-1.0.0/tests/rest/__init__.py +0 -0
  65. core_semantic_search_app-1.0.0/tests/rest/tests_permissions.py +77 -0
  66. core_semantic_search_app-1.0.0/tests/rest/tests_unit.py +213 -0
  67. core_semantic_search_app-1.0.0/tests/test_settings.py +69 -0
  68. core_semantic_search_app-1.0.0/tests/tests_unit_apps.py +42 -0
  69. core_semantic_search_app-1.0.0/tests/urls.py +9 -0
  70. core_semantic_search_app-1.0.0/tests/utils/__init__.py +0 -0
  71. core_semantic_search_app-1.0.0/tests/utils/tests_unit.py +532 -0
  72. core_semantic_search_app-1.0.0/tests/utils/tests_unit_model_api.py +134 -0
  73. core_semantic_search_app-1.0.0/tests/utils/tests_unit_model_client.py +92 -0
  74. core_semantic_search_app-1.0.0/tests/views/__init__.py +0 -0
  75. core_semantic_search_app-1.0.0/tests/views/user/__init__.py +0 -0
  76. core_semantic_search_app-1.0.0/tests/views/user/tests_unit.py +64 -0
@@ -0,0 +1,35 @@
1
+ # NIST Software Licensing Statement
2
+
3
+ NIST-developed software is provided by NIST as a public service.
4
+ You may use, copy, and distribute copies of the software in any
5
+ medium, provided that you keep intact this entire notice. You may
6
+ improve, modify, and create derivative works of the software or
7
+ any portion of the software, and you may copy and distribute such
8
+ modifications or works. Modified works should carry a notice
9
+ stating that you changed the software and should note the date
10
+ and nature of any such change. Please explicitly acknowledge the
11
+ National Institute of Standards and Technology as the source of
12
+ the software.
13
+
14
+ NIST-developed software is expressly provided "AS IS." NIST MAKES
15
+ NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT, OR ARISING BY
16
+ OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17
+ WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
18
+ NON-INFRINGEMENT, AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR
19
+ WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED
20
+ OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES
21
+ NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE
22
+ SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE
23
+ CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE
24
+ SOFTWARE.
25
+
26
+ You are solely responsible for determining the appropriateness of
27
+ using and distributing the software and you assume all risks
28
+ associated with its use, including but not limited to the risks
29
+ and costs of program errors, compliance with applicable laws,
30
+ damage to or loss of data, programs or equipment, and the
31
+ unavailability or interruption of operation. This software is not
32
+ intended to be used in any situation where a failure could cause
33
+ risk of injury or damage to property. The software developed by
34
+ NIST employees is not subject to copyright protection within the
35
+ United States.
@@ -0,0 +1,5 @@
1
+ include README.rst
2
+ include requirements.txt
3
+ include requirements.core.txt
4
+ recursive-include core_semantic_search_app/static *
5
+ recursive-include core_semantic_search_app/templates *
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.1
2
+ Name: core_semantic_search_app
3
+ Version: 1.0.0
4
+ Summary: Semantic Search utils for the curator core project
5
+ Home-page: https://github.com/usnistgov/core_semantic_search_app
6
+ Author: NIST IT Lab
7
+ Author-email: itl_inquiries@nist.gov
8
+ Provides-Extra: sentence_transformers
9
+ License-File: LICENSE.md
10
+
11
+ ========================
12
+ Core Semantic Search App
13
+ ========================
14
+
15
+ Semantic search for the curator core project.
16
+
17
+ Quick start
18
+ ===========
19
+
20
+ 1. Add "core_semantic_search_app" to your INSTALLED_APPS setting
21
+ ----------------------------------------------------------------
22
+
23
+ .. code:: python
24
+
25
+ INSTALLED_APPS = [
26
+ ...
27
+ 'core_semantic_search_app',
28
+ ]
29
+
30
+
31
+ 2. Include the core_semantic_search_app URLconf in your project urls.py
32
+ -----------------------------------------------------------------------
33
+
34
+ .. code:: python
35
+
36
+ re_path(r'^semantic-search/', include('core_semantic_search_app.urls')),
@@ -0,0 +1,26 @@
1
+ ========================
2
+ Core Semantic Search App
3
+ ========================
4
+
5
+ Semantic search for the curator core project.
6
+
7
+ Quick start
8
+ ===========
9
+
10
+ 1. Add "core_semantic_search_app" to your INSTALLED_APPS setting
11
+ ----------------------------------------------------------------
12
+
13
+ .. code:: python
14
+
15
+ INSTALLED_APPS = [
16
+ ...
17
+ 'core_semantic_search_app',
18
+ ]
19
+
20
+
21
+ 2. Include the core_semantic_search_app URLconf in your project urls.py
22
+ -----------------------------------------------------------------------
23
+
24
+ .. code:: python
25
+
26
+ re_path(r'^semantic-search/', include('core_semantic_search_app.urls')),
@@ -0,0 +1,4 @@
1
+ """ Initialize app for core_semantic_search_app
2
+ """
3
+
4
+ default_app_config = "core_semantic_search_app.apps.SemanticSearchAppConfig"
@@ -0,0 +1,15 @@
1
+ """ Url router for the administration site
2
+ """
3
+
4
+ from django.contrib import admin
5
+
6
+ from core_semantic_search_app.components.document.models import Document
7
+ from core_semantic_search_app.components.model_settings.admin_site import (
8
+ CustomModelSettingsAdmin,
9
+ )
10
+ from core_semantic_search_app.components.model_settings.models import (
11
+ ModelSettings,
12
+ )
13
+
14
+ admin.site.register(Document)
15
+ admin.site.register(ModelSettings, CustomModelSettingsAdmin)
@@ -0,0 +1,54 @@
1
+ """ Apps file for core_semantic_search_app
2
+ """
3
+
4
+ import sys
5
+
6
+ from django.apps import AppConfig
7
+ from django.db.models.signals import post_save, post_delete
8
+
9
+ from core_main_app.utils.databases.backend import uses_postgresql_backend
10
+ from core_semantic_search_app.commons.exceptions import SemanticSearchError
11
+
12
+
13
+ class SemanticSearchAppConfig(AppConfig):
14
+ """Core application settings"""
15
+
16
+ name = "core_semantic_search_app"
17
+
18
+ def ready(self):
19
+ """Run when the app is ready
20
+
21
+ Returns:
22
+
23
+ """
24
+ if "migrate" in sys.argv:
25
+ return
26
+
27
+ _check_settings()
28
+ _init_signals()
29
+
30
+
31
+ def _check_settings():
32
+ """Check settings
33
+
34
+ Returns:
35
+
36
+ """
37
+ if not uses_postgresql_backend():
38
+ raise SemanticSearchError("PostgreSQL with Pgvector is required.")
39
+
40
+
41
+ def _init_signals():
42
+ """Init Signals
43
+
44
+ Returns:
45
+
46
+ """
47
+ from core_main_app.components.data.models import Data
48
+ from core_semantic_search_app.components.data.watch import (
49
+ post_save_data,
50
+ post_delete_data,
51
+ )
52
+
53
+ post_save.connect(post_save_data, sender=Data)
54
+ post_delete.connect(post_delete_data, sender=Data)
@@ -0,0 +1,9 @@
1
+ """ Semantic Search Exceptions
2
+ """
3
+
4
+
5
+ class SemanticSearchError(Exception):
6
+ """Exception raised by the Semantic Search app."""
7
+
8
+ def __init__(self, message):
9
+ self.message = message
@@ -0,0 +1,30 @@
1
+ """ Watchers for the data collection
2
+ """
3
+
4
+ from core_semantic_search_app.components.document.api import (
5
+ index_documents_from_data,
6
+ delete_documents_with_data_id,
7
+ )
8
+
9
+
10
+ def post_save_data(sender, instance, **kwargs):
11
+ """Method executed after saving a Data object.
12
+ Args:
13
+ sender: Class.
14
+ instance: Data object.
15
+ **kwargs: Args.
16
+
17
+ """
18
+
19
+ index_documents_from_data(data=instance)
20
+
21
+
22
+ def post_delete_data(sender, instance, **kwargs):
23
+ """Method executed after deleting a Data object.
24
+ Args:
25
+ sender: Class.
26
+ instance: Data object.
27
+ **kwargs: Args.
28
+
29
+ """
30
+ delete_documents_with_data_id(instance.id)
@@ -0,0 +1,279 @@
1
+ """ Document API
2
+ """
3
+
4
+ import logging
5
+ import re
6
+
7
+ from django.db.models import OuterRef, Exists, IntegerField
8
+ from django.db.models.fields.json import KeyTextTransform
9
+ from django.db.models.functions import Cast
10
+ from pgvector.django import CosineDistance, L2Distance, MaxInnerProduct
11
+
12
+ from core_main_app.access_control.api import has_perm_administration
13
+ from core_main_app.access_control.decorators import access_control
14
+ from core_main_app.commons.exceptions import ApiError
15
+ from core_main_app.components.data import api as data_api
16
+ from core_main_app.components.workspace import api as workspace_api
17
+ from core_semantic_search_app import tasks as semantic_search_tasks
18
+ from core_semantic_search_app.components.document.models import Document
19
+ from core_semantic_search_app.components.model_settings.models import (
20
+ ModelSettings,
21
+ )
22
+ from core_semantic_search_app.settings import (
23
+ INSTALLED_APPS,
24
+ )
25
+ from core_semantic_search_app.utils.chunking_utils import chunk_json_dict
26
+
27
+ if "core_linked_records_app" in INSTALLED_APPS:
28
+ from core_linked_records_app.system.pid_path import (
29
+ api as system_pid_path_api,
30
+ )
31
+ from core_linked_records_app.utils.dict import (
32
+ is_dot_notation_in_dictionary,
33
+ get_value_from_dot_notation,
34
+ )
35
+ logger = logging.getLogger(__name__)
36
+
37
+ VECTOR_FUNCTIONS = {
38
+ "cosine_similarity": CosineDistance,
39
+ "l2_distance": L2Distance,
40
+ "max_inner_product": MaxInnerProduct,
41
+ }
42
+
43
+
44
+ # NOTE: called from task
45
+ def generate_documents_from_data(data):
46
+ """Convert data to documents
47
+
48
+ Args:
49
+ data:
50
+
51
+ Returns:
52
+
53
+ """
54
+ data_pid = None
55
+ if "core_linked_records_app" in INSTALLED_APPS:
56
+ try:
57
+ pid_path = system_pid_path_api.get_pid_path_by_template(
58
+ data.template,
59
+ ).path
60
+
61
+ # If the pid_path does not exist in the document, exit early and return None
62
+ data_dict = data.get_dict_content()
63
+ if is_dot_notation_in_dictionary(data_dict, pid_path):
64
+ data_pid = get_value_from_dot_notation(data_dict, pid_path)
65
+ except Exception as e:
66
+ logger.error(
67
+ f"Unable to get data PID during data to document conversion: {str(e)}"
68
+ )
69
+ model_settings = ModelSettings.get()
70
+
71
+ target_keys = (
72
+ model_settings.document_index_fields
73
+ if model_settings.document_index_strategy == "VALUES"
74
+ else None
75
+ )
76
+
77
+ text_chunks = chunk_json_dict(
78
+ json_dict=data.get_dict_content(),
79
+ chunk_size=model_settings.sliding_window_chunk_length,
80
+ chunk_overlap=model_settings.sliding_window_chunk_overlap,
81
+ target_keys=target_keys,
82
+ )
83
+
84
+ documents = list()
85
+ for text_chunk in text_chunks:
86
+ document = Document(
87
+ content=text_chunk,
88
+ meta={
89
+ "title": data.title,
90
+ "data_id": data.id,
91
+ "data_pid": data_pid,
92
+ },
93
+ )
94
+ documents.append(document)
95
+ return documents
96
+
97
+
98
+ # NOTE: called from data watch
99
+ def delete_documents_with_data_id(data_id):
100
+ """Delete documents with given data id
101
+
102
+ Args:
103
+ data_id:
104
+
105
+ Returns:
106
+
107
+ """
108
+ # Get any existing documents for this data
109
+ documents = _get_documents_by_data_id(data_id=data_id)
110
+ # Delete documents
111
+ documents.delete()
112
+
113
+
114
+ # NOTE: called from data watch
115
+ def index_documents_from_data(data):
116
+ """Index documents extracted from a data
117
+
118
+ Args:
119
+ data:
120
+
121
+ Returns:
122
+
123
+ """
124
+ model_settings = ModelSettings.get()
125
+
126
+ if not model_settings or not model_settings.embedding_models.keys():
127
+ return
128
+
129
+ # Delete previous documents
130
+ delete_documents_with_data_id(data.id)
131
+
132
+ if not _check_template_name(
133
+ data, model_settings.document_index_template_name_filter
134
+ ):
135
+ return
136
+
137
+ # only deal with data in a workspace
138
+ if not data.workspace:
139
+ return
140
+
141
+ # only deal with public data
142
+ public_workspaces = workspace_api.get_all_public_workspaces().values_list(
143
+ "id", flat=True
144
+ )
145
+ if data.workspace.id not in public_workspaces:
146
+ return
147
+
148
+ # Start indexing of documents
149
+ semantic_search_tasks.write_documents.apply_async((data.id,))
150
+
151
+
152
+ @access_control(has_perm_administration)
153
+ def reindex(user):
154
+ """Reindex the full knowledge base
155
+
156
+ Returns:
157
+
158
+ """
159
+ # Get all currently indexed documents
160
+ all_docs = Document.objects.all()
161
+ # Delete them
162
+ all_docs.delete()
163
+
164
+ # Find all public workspaces
165
+ public_workspaces = workspace_api.get_all_public_workspaces().values_list(
166
+ "id", flat=True
167
+ )
168
+ # Get all public data
169
+ all_data = data_api.get_all(user).filter(workspace__in=public_workspaces)
170
+
171
+ # Get model settings from database
172
+ model_settings = ModelSettings.get()
173
+ # Go through all records
174
+ for data in all_data:
175
+ # Check if template name matches pattern
176
+ if not _check_template_name(
177
+ data, model_settings.document_index_template_name_filter
178
+ ):
179
+ continue
180
+ # Index documents in tasks
181
+ semantic_search_tasks.write_documents.apply_async((data.id,))
182
+
183
+
184
+ def query(
185
+ query_embedding=None,
186
+ top_k=10,
187
+ threshold=0.8,
188
+ vector_function="cosine_similarity",
189
+ data_filters_qs=None,
190
+ ):
191
+ """Query the embedding
192
+
193
+ Args:
194
+ query_embedding:
195
+ top_k:
196
+ threshold:
197
+ data_filters_qs:
198
+ vector_function:
199
+
200
+ Returns:
201
+
202
+ """
203
+ if vector_function not in list(VECTOR_FUNCTIONS.keys()):
204
+ raise ApiError(
205
+ f"Vector function should be in: {list(VECTOR_FUNCTIONS.keys())}."
206
+ )
207
+ # Get all documents
208
+ queryset = Document.objects.all()
209
+
210
+ if data_filters_qs is not None:
211
+ # Pull data_id from meta field
212
+ data_id_expr = Cast(
213
+ KeyTextTransform("data_id", "meta"),
214
+ output_field=IntegerField(),
215
+ )
216
+ # Filter on extracted data_id
217
+ queryset = queryset.annotate(_data_id=data_id_expr).filter(
218
+ Exists(data_filters_qs.filter(id=OuterRef("_data_id")))
219
+ )
220
+
221
+ if query_embedding:
222
+ # Order documents by distance to query
223
+ # https://github.com/pgvector/pgvector-python?tab=readme-ov-file#django
224
+ queryset = queryset.order_by(
225
+ VECTOR_FUNCTIONS[vector_function]("embedding", query_embedding)
226
+ )
227
+
228
+ # Compute the score between query and documents
229
+ # https://github.com/pgvector/pgvector?tab=readme-ov-file#distances
230
+ if vector_function == "cosine_similarity":
231
+ queryset = queryset.annotate(
232
+ score=1 - CosineDistance("embedding", query_embedding)
233
+ )
234
+ elif vector_function == "max_inner_product":
235
+ queryset = queryset.annotate(
236
+ score=-1 * MaxInnerProduct("embedding", query_embedding)
237
+ )
238
+ elif vector_function == "l2_distance":
239
+ queryset = queryset.annotate(
240
+ score=L2Distance("embedding", query_embedding)
241
+ )
242
+
243
+ # filter score with provided threshold
244
+ if threshold:
245
+ if vector_function == "l2_distance":
246
+ queryset = queryset.filter(score__lt=threshold)
247
+ else:
248
+ queryset = queryset.filter(score__gt=threshold)
249
+
250
+ # Keep the top k results
251
+ if top_k:
252
+ queryset = queryset.all()[:top_k]
253
+
254
+ return queryset
255
+
256
+
257
+ def _check_template_name(data, pattern):
258
+ """Check template name matches the pattern
259
+
260
+ Args:
261
+ data:
262
+ pattern:
263
+
264
+ Returns:
265
+
266
+ """
267
+ return re.search(pattern, data.template.version_manager.title)
268
+
269
+
270
+ def _get_documents_by_data_id(data_id):
271
+ """Get documents with data id
272
+
273
+ Args:
274
+ data_id:
275
+
276
+ Returns:
277
+
278
+ """
279
+ return Document.objects.filter(meta__data_id=int(data_id))
@@ -0,0 +1,22 @@
1
+ """ Document models
2
+ """
3
+
4
+ from django.db import models
5
+ from django.db.models import JSONField
6
+ from pgvector.django import VectorField
7
+
8
+
9
+ class Document(models.Model):
10
+ """Document Model"""
11
+
12
+ embedding = VectorField()
13
+ content = models.CharField(unique=False, max_length=4000)
14
+ meta = JSONField(default=dict)
15
+
16
+ def __str__(self):
17
+ """
18
+
19
+ Returns:
20
+
21
+ """
22
+ return self.meta.get("title", "Untitled")
@@ -0,0 +1,49 @@
1
+ """ Custom admin site for the Model Settings model
2
+ """
3
+
4
+ from django.contrib import admin
5
+ from django.contrib import messages
6
+
7
+ from core_semantic_search_app.components.document.api import reindex
8
+ from core_semantic_search_app.components.model_settings.forms import (
9
+ ModelSettingsAdminForm,
10
+ )
11
+
12
+
13
+ @admin.action(description="Reindex the knowledge base")
14
+ def reindex_action(model_admin, request, queryset):
15
+ """
16
+
17
+ Args:
18
+ model_admin:
19
+ request:
20
+ queryset:
21
+
22
+ Returns:
23
+
24
+ """
25
+ if not request.user.is_superuser:
26
+ model_admin.message_user(request, "Permission denied.", messages.ERROR)
27
+ return
28
+
29
+ reindex(request.user)
30
+
31
+
32
+ class CustomModelSettingsAdmin(admin.ModelAdmin):
33
+ """CustomModelSettingsAdmin"""
34
+
35
+ form = ModelSettingsAdminForm
36
+ actions = [reindex_action]
37
+
38
+ def has_add_permission(self, request):
39
+ """Has add permission - only if doesn't exist
40
+
41
+ Args:
42
+ request:
43
+
44
+ Returns:
45
+
46
+ """
47
+ if self.model.objects.exists():
48
+ return False
49
+ return super().has_add_permission(request)
@@ -0,0 +1,23 @@
1
+ """ ModelSetting api
2
+ """
3
+
4
+ import os
5
+
6
+
7
+ def get_api_key(model_dict):
8
+ """Get API Key from dictionary
9
+
10
+ Args:
11
+ model_dict:
12
+
13
+ Returns:
14
+
15
+ """
16
+ # Check if api_key_env is set
17
+ api_key_env = model_dict.get("api_key_env")
18
+ # Get value of API Key from env
19
+ api_key = os.getenv(api_key_env) if api_key_env else None
20
+ # Get value from dict otherwise
21
+ api_key = model_dict.get("api_key", "no-key") if not api_key else api_key
22
+ # Return API Key
23
+ return api_key
@@ -0,0 +1,44 @@
1
+ """ Model Settings forms
2
+ """
3
+
4
+ import json
5
+
6
+ from django import forms
7
+
8
+ from core_semantic_search_app.components.model_settings.models import (
9
+ ModelSettings,
10
+ )
11
+
12
+
13
+ class PrettyJSONEncoder(json.JSONEncoder):
14
+ def __init__(self, *args, indent, sort_keys, **kwargs):
15
+ super().__init__(*args, indent=4, sort_keys=False, **kwargs)
16
+
17
+
18
+ class ModelSettingsAdminForm(forms.ModelForm):
19
+
20
+ class Meta:
21
+ model = ModelSettings
22
+ fields = "__all__"
23
+ labels = {
24
+ "sliding_window_chunk_length": "Chunk size (characters)",
25
+ "sliding_window_chunk_overlap": "Chunk overlap (characters)",
26
+ }
27
+
28
+ def __init__(self, *args, **kwargs):
29
+ super().__init__(*args, **kwargs)
30
+ self.fields["embedding_models"].encoder = PrettyJSONEncoder
31
+ self.fields["embedding_models"].widget.attrs.update(
32
+ {
33
+ "placeholder": """{"modelName":{
34
+ "model": "modelName:version",
35
+ "base_url": "http://localhost:8080/v1",
36
+ "api_key": "",
37
+ "api_key_env": "",
38
+ "ssl_verify": true,
39
+ "proxies": {}
40
+ }
41
+ }
42
+ """
43
+ }
44
+ )