compair-core 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compair_core/__init__.py +8 -0
- compair_core/api.py +3598 -0
- compair_core/compair/__init__.py +57 -0
- compair_core/compair/celery_app.py +31 -0
- compair_core/compair/default_groups.py +14 -0
- compair_core/compair/embeddings.py +141 -0
- compair_core/compair/feedback.py +368 -0
- compair_core/compair/logger.py +29 -0
- compair_core/compair/main.py +276 -0
- compair_core/compair/models.py +453 -0
- compair_core/compair/schema.py +146 -0
- compair_core/compair/tasks.py +106 -0
- compair_core/compair/utils.py +42 -0
- compair_core/compair_email/__init__.py +0 -0
- compair_core/compair_email/email.py +6 -0
- compair_core/compair_email/email_core.py +15 -0
- compair_core/compair_email/templates.py +6 -0
- compair_core/compair_email/templates_core.py +32 -0
- compair_core/db.py +64 -0
- compair_core/server/__init__.py +0 -0
- compair_core/server/app.py +97 -0
- compair_core/server/deps.py +77 -0
- compair_core/server/local_model/__init__.py +1 -0
- compair_core/server/local_model/app.py +87 -0
- compair_core/server/local_model/ocr.py +107 -0
- compair_core/server/providers/__init__.py +0 -0
- compair_core/server/providers/console_mailer.py +9 -0
- compair_core/server/providers/contracts.py +66 -0
- compair_core/server/providers/http_ocr.py +60 -0
- compair_core/server/providers/local_storage.py +28 -0
- compair_core/server/providers/noop_analytics.py +7 -0
- compair_core/server/providers/noop_billing.py +30 -0
- compair_core/server/providers/noop_ocr.py +10 -0
- compair_core/server/routers/__init__.py +0 -0
- compair_core/server/routers/capabilities.py +46 -0
- compair_core/server/settings.py +66 -0
- compair_core-0.4.12.dist-info/METADATA +136 -0
- compair_core-0.4.12.dist-info/RECORD +41 -0
- compair_core-0.4.12.dist-info/WHEEL +5 -0
- compair_core-0.4.12.dist-info/licenses/LICENSE +674 -0
- compair_core-0.4.12.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import binascii
|
|
4
|
+
import hashlib
|
|
5
|
+
import os
|
|
6
|
+
import secrets
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from math import sqrt
|
|
9
|
+
from typing import Sequence
|
|
10
|
+
from uuid import uuid4
|
|
11
|
+
|
|
12
|
+
try: # Optional: only required when using pgvector backend
|
|
13
|
+
from pgvector.sqlalchemy import Vector
|
|
14
|
+
except ImportError: # pragma: no cover - optional dependency in core
|
|
15
|
+
Vector = None # type: ignore[assignment]
|
|
16
|
+
|
|
17
|
+
from sqlalchemy import (
|
|
18
|
+
Boolean,
|
|
19
|
+
Column,
|
|
20
|
+
DateTime,
|
|
21
|
+
ForeignKey,
|
|
22
|
+
Identity,
|
|
23
|
+
Integer,
|
|
24
|
+
JSON,
|
|
25
|
+
String,
|
|
26
|
+
Table,
|
|
27
|
+
Text,
|
|
28
|
+
)
|
|
29
|
+
from sqlalchemy.orm import (
|
|
30
|
+
DeclarativeBase,
|
|
31
|
+
Mapped,
|
|
32
|
+
MappedAsDataclass,
|
|
33
|
+
mapped_column,
|
|
34
|
+
relationship,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_EDITION = os.getenv("COMPAIR_EDITION", "core").lower()
|
|
38
|
+
_DEFAULT_DIM = 1536 if _EDITION == "cloud" else 384
|
|
39
|
+
_DIM_ENV = (
|
|
40
|
+
os.getenv("COMPAIR_EMBEDDING_DIM")
|
|
41
|
+
or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
|
|
42
|
+
or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
|
|
43
|
+
or str(_DEFAULT_DIM)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
EMBEDDING_DIMENSION = int(_DIM_ENV)
|
|
48
|
+
except ValueError: # pragma: no cover - invalid configuration
|
|
49
|
+
EMBEDDING_DIMENSION = _DEFAULT_DIM
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _detect_vector_backend() -> str:
|
|
53
|
+
explicit = os.getenv("COMPAIR_VECTOR_BACKEND")
|
|
54
|
+
if explicit:
|
|
55
|
+
return explicit.lower()
|
|
56
|
+
|
|
57
|
+
db = os.getenv("DB")
|
|
58
|
+
db_user = os.getenv("DB_USER")
|
|
59
|
+
db_passw = os.getenv("DB_PASSW")
|
|
60
|
+
db_url = os.getenv("DB_URL")
|
|
61
|
+
database_url = os.getenv("DATABASE_URL", "")
|
|
62
|
+
|
|
63
|
+
if all([db, db_user, db_passw, db_url]):
|
|
64
|
+
return "pgvector"
|
|
65
|
+
if database_url.lower().startswith(("postgres://", "postgresql://")):
|
|
66
|
+
return "pgvector"
|
|
67
|
+
return "json"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
VECTOR_BACKEND = _detect_vector_backend()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _embedding_column():
|
|
74
|
+
if VECTOR_BACKEND == "pgvector":
|
|
75
|
+
if Vector is None:
|
|
76
|
+
raise RuntimeError(
|
|
77
|
+
"pgvector is required when COMPAIR_VECTOR_BACKEND is set to 'pgvector'."
|
|
78
|
+
)
|
|
79
|
+
return mapped_column(
|
|
80
|
+
Vector(EMBEDDING_DIMENSION),
|
|
81
|
+
nullable=True,
|
|
82
|
+
default=None,
|
|
83
|
+
)
|
|
84
|
+
# Store embeddings as JSON arrays (works across SQLite/Postgres without pgvector)
|
|
85
|
+
return mapped_column(JSON, nullable=True, default=None)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def cosine_similarity(vec1: Sequence[float] | None, vec2: Sequence[float] | None) -> float | None:
|
|
89
|
+
if not vec1 or not vec2:
|
|
90
|
+
return None
|
|
91
|
+
if len(vec1) != len(vec2):
|
|
92
|
+
return None
|
|
93
|
+
dot = sum(a * b for a, b in zip(vec1, vec2))
|
|
94
|
+
norm1 = sqrt(sum(a * a for a in vec1))
|
|
95
|
+
norm2 = sqrt(sum(b * b for b in vec2))
|
|
96
|
+
if norm1 == 0 or norm2 == 0:
|
|
97
|
+
return None
|
|
98
|
+
return dot / (norm1 * norm2)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Base(DeclarativeBase, MappedAsDataclass):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BaseObject(Base):
|
|
106
|
+
__abstract__ = True
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class User(Base):
|
|
110
|
+
__tablename__ = "user"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
user_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
114
|
+
username: Mapped[str] = mapped_column(String(128))
|
|
115
|
+
name: Mapped[str] = mapped_column(String(256))
|
|
116
|
+
role: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
|
117
|
+
profile_image: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
118
|
+
verification_token: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
119
|
+
reset_token: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
120
|
+
token_expiration: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
121
|
+
datetime_registered: Mapped[datetime]
|
|
122
|
+
status_change_date: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
|
123
|
+
password_hash: Mapped[str]
|
|
124
|
+
password_salt: Mapped[str]
|
|
125
|
+
|
|
126
|
+
status: Mapped[str] = mapped_column(String(16), default="inactive")
|
|
127
|
+
include_own_documents_in_feedback: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
128
|
+
default_publish: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
129
|
+
preferred_feedback_length: Mapped[str] = mapped_column(String(16), default="Brief")
|
|
130
|
+
hide_affiliations: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
131
|
+
|
|
132
|
+
groups = relationship("Group", secondary="user_to_group", back_populates="users")
|
|
133
|
+
documents = relationship(
|
|
134
|
+
"Document",
|
|
135
|
+
back_populates="user",
|
|
136
|
+
cascade="all, delete",
|
|
137
|
+
passive_deletes=True,
|
|
138
|
+
)
|
|
139
|
+
notes = relationship(
|
|
140
|
+
"Note",
|
|
141
|
+
back_populates="author",
|
|
142
|
+
cascade="all, delete",
|
|
143
|
+
passive_deletes=True,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
activities = relationship(
|
|
147
|
+
"Activity",
|
|
148
|
+
back_populates="user",
|
|
149
|
+
cascade="all, delete-orphan"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self,
|
|
154
|
+
username: str,
|
|
155
|
+
name: str,
|
|
156
|
+
datetime_registered: datetime,
|
|
157
|
+
verification_token: str | None,
|
|
158
|
+
token_expiration: datetime | None,
|
|
159
|
+
):
|
|
160
|
+
super().__init__()
|
|
161
|
+
self.username = username
|
|
162
|
+
self.name = name
|
|
163
|
+
self.datetime_registered = datetime_registered
|
|
164
|
+
self.verification_token = verification_token
|
|
165
|
+
self.token_expiration = token_expiration
|
|
166
|
+
self.status = "inactive"
|
|
167
|
+
self.status_change_date = datetime.now(timezone.utc)
|
|
168
|
+
|
|
169
|
+
def set_password(self, password: str) -> str:
|
|
170
|
+
salt = os.urandom(64)
|
|
171
|
+
self.password_salt = binascii.hexlify(salt).decode("utf-8")
|
|
172
|
+
hash_bytes = hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt, 100000)
|
|
173
|
+
self.password_hash = binascii.hexlify(hash_bytes).decode("utf-8")
|
|
174
|
+
return self.password_hash
|
|
175
|
+
|
|
176
|
+
def check_password(self, password: str) -> bool:
|
|
177
|
+
if not self.password_salt or not self.password_hash:
|
|
178
|
+
return False
|
|
179
|
+
salt = binascii.unhexlify(self.password_salt.encode("utf-8"))
|
|
180
|
+
hash_bytes = hashlib.pbkdf2_hmac("sha256", password.encode("utf-8"), salt, 100000)
|
|
181
|
+
hash_hex = binascii.hexlify(hash_bytes).decode("utf-8")
|
|
182
|
+
return secrets.compare_digest(self.password_hash, hash_hex)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class Session(Base):
|
|
186
|
+
__tablename__ = "session"
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
id: Mapped[str] = mapped_column(String(128), primary_key=True, init=True)
|
|
190
|
+
user_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), index=True)
|
|
191
|
+
datetime_created: Mapped[datetime]
|
|
192
|
+
datetime_valid_until: Mapped[datetime]
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class Group(BaseObject):
|
|
196
|
+
__tablename__ = "group"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
group_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
200
|
+
name: Mapped[str] = mapped_column(String(256))
|
|
201
|
+
datetime_created: Mapped[datetime]
|
|
202
|
+
group_image: Mapped[str | None] = mapped_column(String, nullable=True)
|
|
203
|
+
category: Mapped[str] = mapped_column(String(256), default="Other")
|
|
204
|
+
description: Mapped[str] = mapped_column(Text, default="")
|
|
205
|
+
visibility: Mapped[str] = mapped_column(String(32), default="public")
|
|
206
|
+
|
|
207
|
+
users = relationship("User", secondary="user_to_group", back_populates="groups")
|
|
208
|
+
admins = relationship("Administrator", secondary="admin_to_group", back_populates="groups")
|
|
209
|
+
documents = relationship("Document", secondary="document_to_group", back_populates="groups")
|
|
210
|
+
notes = relationship("Note", secondary="note_to_group", back_populates="groups")
|
|
211
|
+
|
|
212
|
+
activities = relationship(
|
|
213
|
+
"Activity",
|
|
214
|
+
back_populates="group",
|
|
215
|
+
cascade="all, delete-orphan"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
__mapper_args__ = {"primary_key": [group_id]}
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def document_count(self) -> int:
|
|
222
|
+
return len(self.documents)
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def user_count(self) -> int:
|
|
226
|
+
return len(self.users)
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def first_three_user_profile_images(self) -> list[str | None]:
|
|
230
|
+
return [user.profile_image for user in self.users[:3]]
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class Administrator(Base):
|
|
234
|
+
__tablename__ = "administrator"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
admin_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
238
|
+
user_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), index=True)
|
|
239
|
+
|
|
240
|
+
user = relationship("User")
|
|
241
|
+
groups = relationship("Group", secondary="admin_to_group", back_populates="admins")
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
class JoinRequest(Base):
|
|
245
|
+
__tablename__ = "join_request"
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
request_id: Mapped[int] = mapped_column(Identity(), primary_key=True, autoincrement=True, init=False)
|
|
249
|
+
user_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"))
|
|
250
|
+
group_id: Mapped[str] = mapped_column(ForeignKey("group.group_id", ondelete="CASCADE"))
|
|
251
|
+
datetime_requested: Mapped[datetime] = mapped_column(default=datetime.now(timezone.utc), init=False)
|
|
252
|
+
|
|
253
|
+
user = relationship("User")
|
|
254
|
+
group = relationship("Group")
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class GroupInvitation(Base):
|
|
258
|
+
__tablename__ = "group_invitation"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
invitation_id: Mapped[int] = mapped_column(Integer, Identity(), primary_key=True, autoincrement=True, init=False)
|
|
262
|
+
group_id: Mapped[str] = mapped_column(ForeignKey("group.group_id", ondelete="CASCADE"))
|
|
263
|
+
inviter_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"))
|
|
264
|
+
token: Mapped[str] = mapped_column(String(64), unique=True, nullable=False)
|
|
265
|
+
email: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
|
266
|
+
datetime_expiration: Mapped[datetime]
|
|
267
|
+
datetime_created: Mapped[datetime] = mapped_column(default=datetime.now(timezone.utc), init=False)
|
|
268
|
+
status: Mapped[str] = mapped_column(String(32), default="pending")
|
|
269
|
+
|
|
270
|
+
group = relationship("Group")
|
|
271
|
+
inviter = relationship("User")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class Document(BaseObject):
|
|
275
|
+
__tablename__ = "document"
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
document_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
279
|
+
user_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), index=True)
|
|
280
|
+
author_id: Mapped[str]
|
|
281
|
+
title: Mapped[str]
|
|
282
|
+
content: Mapped[str] = mapped_column(Text)
|
|
283
|
+
doc_type: Mapped[str]
|
|
284
|
+
datetime_created: Mapped[datetime]
|
|
285
|
+
datetime_modified: Mapped[datetime]
|
|
286
|
+
embedding: Mapped[list[float] | None] = _embedding_column()
|
|
287
|
+
file_key: Mapped[str | None] = mapped_column(String, nullable=True, default=None)
|
|
288
|
+
image_key: Mapped[str | None] = mapped_column(String, nullable=True, default=None)
|
|
289
|
+
is_published: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
290
|
+
|
|
291
|
+
user = relationship("User", back_populates="documents")
|
|
292
|
+
groups = relationship("Group", secondary="document_to_group", back_populates="documents")
|
|
293
|
+
chunks = relationship(
|
|
294
|
+
"Chunk",
|
|
295
|
+
back_populates="document",
|
|
296
|
+
cascade="all, delete",
|
|
297
|
+
passive_deletes=True,
|
|
298
|
+
)
|
|
299
|
+
references = relationship(
|
|
300
|
+
"Reference",
|
|
301
|
+
back_populates="document",
|
|
302
|
+
cascade="all, delete",
|
|
303
|
+
passive_deletes=True,
|
|
304
|
+
)
|
|
305
|
+
notes = relationship(
|
|
306
|
+
"Note",
|
|
307
|
+
back_populates="document",
|
|
308
|
+
cascade="all, delete",
|
|
309
|
+
passive_deletes=True,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class Note(Base):
|
|
314
|
+
__tablename__ = "note"
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
note_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
318
|
+
document_id: Mapped[str] = mapped_column(ForeignKey("document.document_id", ondelete="CASCADE"), index=True)
|
|
319
|
+
author_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), index=True)
|
|
320
|
+
group_id: Mapped[str | None] = mapped_column(ForeignKey("group.group_id", ondelete="CASCADE"), index=True, nullable=True)
|
|
321
|
+
content: Mapped[str] = mapped_column(Text)
|
|
322
|
+
embedding: Mapped[list[float] | None] = _embedding_column()
|
|
323
|
+
datetime_created: Mapped[datetime] = mapped_column(default=datetime.now(timezone.utc))
|
|
324
|
+
|
|
325
|
+
document = relationship("Document", back_populates="notes")
|
|
326
|
+
author = relationship("User", back_populates="notes")
|
|
327
|
+
groups = relationship("Group", back_populates="notes")
|
|
328
|
+
chunks = relationship(
|
|
329
|
+
"Chunk",
|
|
330
|
+
back_populates="note",
|
|
331
|
+
cascade="all, delete",
|
|
332
|
+
passive_deletes=True,
|
|
333
|
+
)
|
|
334
|
+
references = relationship(
|
|
335
|
+
"Reference",
|
|
336
|
+
back_populates="note",
|
|
337
|
+
cascade="all, delete",
|
|
338
|
+
passive_deletes=True,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
class Chunk(Base):
|
|
343
|
+
__tablename__ = "chunk"
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
chunk_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
347
|
+
hash: Mapped[str] = mapped_column(String(32))
|
|
348
|
+
content: Mapped[str] = mapped_column(Text)
|
|
349
|
+
document_id: Mapped[str | None] = mapped_column(ForeignKey("document.document_id", ondelete="CASCADE"), index=True, nullable=True)
|
|
350
|
+
note_id: Mapped[str | None] = mapped_column(ForeignKey("note.note_id", ondelete="CASCADE"), index=True, nullable=True)
|
|
351
|
+
chunk_type: Mapped[str] = mapped_column(String(16), default="document")
|
|
352
|
+
embedding: Mapped[list[float] | None] = _embedding_column()
|
|
353
|
+
|
|
354
|
+
document = relationship("Document", back_populates="chunks")
|
|
355
|
+
note = relationship("Note", back_populates="chunks")
|
|
356
|
+
references = relationship(
|
|
357
|
+
"Reference",
|
|
358
|
+
back_populates="chunk",
|
|
359
|
+
cascade="all, delete",
|
|
360
|
+
passive_deletes=True,
|
|
361
|
+
)
|
|
362
|
+
feedbacks = relationship(
|
|
363
|
+
"Feedback",
|
|
364
|
+
back_populates="chunk",
|
|
365
|
+
cascade="all, delete",
|
|
366
|
+
passive_deletes=True,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
class Reference(Base):
|
|
371
|
+
__tablename__ = "reference"
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
reference_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
375
|
+
source_chunk_id: Mapped[str] = mapped_column(ForeignKey("chunk.chunk_id", ondelete="CASCADE"), index=True)
|
|
376
|
+
reference_document_id: Mapped[str | None] = mapped_column(ForeignKey("document.document_id", ondelete="CASCADE"), index=True, nullable=True)
|
|
377
|
+
reference_note_id: Mapped[str | None] = mapped_column(ForeignKey("note.note_id", ondelete="CASCADE"), index=True, nullable=True)
|
|
378
|
+
reference_type: Mapped[str] = mapped_column(String(16), default="document")
|
|
379
|
+
|
|
380
|
+
chunk = relationship("Chunk", back_populates="references")
|
|
381
|
+
document = relationship("Document", back_populates="references")
|
|
382
|
+
note = relationship("Note", back_populates="references")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class Feedback(Base):
|
|
386
|
+
__tablename__ = "feedback"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
feedback_id: Mapped[str] = mapped_column(String(36), primary_key=True, init=False, default=lambda: str(uuid4()))
|
|
390
|
+
source_chunk_id: Mapped[str] = mapped_column(ForeignKey("chunk.chunk_id", ondelete="CASCADE"), index=True)
|
|
391
|
+
feedback: Mapped[str] = mapped_column(Text)
|
|
392
|
+
model: Mapped[str] = mapped_column(Text)
|
|
393
|
+
timestamp: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=datetime.now(timezone.utc))
|
|
394
|
+
user_feedback: Mapped[str | None] = mapped_column(String(16), nullable=True, default=None)
|
|
395
|
+
is_hidden: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
396
|
+
|
|
397
|
+
chunk = relationship("Chunk", back_populates="feedbacks")
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class Activity(Base):
|
|
401
|
+
__tablename__ = "activity"
|
|
402
|
+
|
|
403
|
+
activity_id: Mapped[int] = mapped_column(Identity(), primary_key=True, init=False, autoincrement=True)
|
|
404
|
+
user_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), nullable=False)
|
|
405
|
+
group_id: Mapped[str] = mapped_column(ForeignKey("group.group_id", ondelete="CASCADE"), nullable=False)
|
|
406
|
+
action: Mapped[str] = mapped_column(String(32))
|
|
407
|
+
object_id: Mapped[str] = mapped_column(String(36))
|
|
408
|
+
object_name: Mapped[str] = mapped_column(Text)
|
|
409
|
+
object_type: Mapped[str] = mapped_column(String(32))
|
|
410
|
+
timestamp: Mapped[datetime] = mapped_column(default=datetime.now(timezone.utc))
|
|
411
|
+
|
|
412
|
+
user = relationship("User", back_populates="activities", lazy="joined")
|
|
413
|
+
group = relationship("Group", back_populates="activities")
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
user_to_group_table = Table(
|
|
417
|
+
"user_to_group",
|
|
418
|
+
Base.metadata,
|
|
419
|
+
Column("user_id", ForeignKey("user.user_id", ondelete="CASCADE"), primary_key=True),
|
|
420
|
+
Column("group_id", ForeignKey("group.group_id", ondelete="CASCADE"), primary_key=True),
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
admin_to_group_table = Table(
|
|
425
|
+
"admin_to_group",
|
|
426
|
+
Base.metadata,
|
|
427
|
+
Column("admin_id", ForeignKey("administrator.admin_id", ondelete="CASCADE"), primary_key=True),
|
|
428
|
+
Column("group_id", ForeignKey("group.group_id", ondelete="CASCADE"), primary_key=True),
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
document_to_group_table = Table(
|
|
433
|
+
"document_to_group",
|
|
434
|
+
Base.metadata,
|
|
435
|
+
Column("document_id", ForeignKey("document.document_id", ondelete="CASCADE"), primary_key=True),
|
|
436
|
+
Column("group_id", ForeignKey("group.group_id", ondelete="CASCADE"), primary_key=True),
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
note_to_group_table = Table(
|
|
440
|
+
"note_to_group",
|
|
441
|
+
Base.metadata,
|
|
442
|
+
Column("note_id", ForeignKey("note.note_id", ondelete="CASCADE"), primary_key=True),
|
|
443
|
+
Column("group_id", ForeignKey("group.group_id", ondelete="CASCADE"), primary_key=True),
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
from compair_cloud.models import extend_models # type: ignore
|
|
449
|
+
except (ImportError, ModuleNotFoundError):
|
|
450
|
+
extend_models = None
|
|
451
|
+
|
|
452
|
+
if extend_models:
|
|
453
|
+
extend_models(Base, globals())
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GroupForm:
|
|
10
|
+
name: str
|
|
11
|
+
user_id: Optional[str] = None
|
|
12
|
+
group_id: Optional[str] = None
|
|
13
|
+
datetime_created: Optional[datetime] = None
|
|
14
|
+
category: Optional[str] = None
|
|
15
|
+
description: Optional[str] = None
|
|
16
|
+
visibility: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Group(BaseModel):
|
|
20
|
+
name: str
|
|
21
|
+
user_id: Optional[str] = None
|
|
22
|
+
group_id: Optional[str] = None
|
|
23
|
+
datetime_created: Optional[datetime] = None
|
|
24
|
+
group_image: Optional[str] = None
|
|
25
|
+
category: Optional[str] = None
|
|
26
|
+
description: Optional[str] = None
|
|
27
|
+
visibility: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
model_config = {"from_attributes": True}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class User(BaseModel):
|
|
33
|
+
user_id: str
|
|
34
|
+
username: str
|
|
35
|
+
name: str
|
|
36
|
+
datetime_registered: datetime
|
|
37
|
+
status: str
|
|
38
|
+
groups: Optional[list[Group]] = None
|
|
39
|
+
profile_image: Optional[str] = None
|
|
40
|
+
role: Optional[str] = None
|
|
41
|
+
|
|
42
|
+
model_config = {"from_attributes": True}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class UpdateUserRequest(BaseModel):
|
|
46
|
+
user_id: str
|
|
47
|
+
name: Optional[str] = None
|
|
48
|
+
role: Optional[str] = None
|
|
49
|
+
group_ids: Optional[list[str]] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Session(BaseModel):
|
|
53
|
+
id: str
|
|
54
|
+
user_id: str
|
|
55
|
+
datetime_created: datetime
|
|
56
|
+
datetime_valid_until: datetime
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Document(BaseModel):
|
|
60
|
+
document_id: str
|
|
61
|
+
user_id: str
|
|
62
|
+
author_id: str
|
|
63
|
+
groups: list[Group]
|
|
64
|
+
user: User
|
|
65
|
+
title: str
|
|
66
|
+
content: str
|
|
67
|
+
doc_type: str
|
|
68
|
+
datetime_created: datetime
|
|
69
|
+
datetime_modified: datetime
|
|
70
|
+
is_published: bool
|
|
71
|
+
file_key: Optional[str] = None
|
|
72
|
+
image_key: Optional[str] = None
|
|
73
|
+
|
|
74
|
+
model_config = {"from_attributes": True}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Chunk(BaseModel):
|
|
78
|
+
chunk_id: str
|
|
79
|
+
hash: str
|
|
80
|
+
document_id: str
|
|
81
|
+
content: str
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Reference(BaseModel):
|
|
85
|
+
reference_id: str
|
|
86
|
+
source_chunk_id: str
|
|
87
|
+
reference_document_id: str
|
|
88
|
+
document: Document
|
|
89
|
+
document_author: str
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Feedback(BaseModel):
|
|
93
|
+
feedback_id: str
|
|
94
|
+
source_chunk_id: str
|
|
95
|
+
feedback: str
|
|
96
|
+
user_feedback: str | None = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class LoginRequest(BaseModel):
|
|
100
|
+
username: str
|
|
101
|
+
password: str
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class SignUpRequest(BaseModel):
|
|
105
|
+
username: str
|
|
106
|
+
name: str
|
|
107
|
+
password: str
|
|
108
|
+
groups: list[Group] | None
|
|
109
|
+
referral_code: str | None = None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ForgotPasswordRequest(BaseModel):
|
|
113
|
+
email: str
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ResetPasswordRequest(BaseModel):
|
|
117
|
+
token: str
|
|
118
|
+
new_password: str
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Note(BaseModel):
|
|
122
|
+
note_id: str
|
|
123
|
+
document_id: str
|
|
124
|
+
author_id: str
|
|
125
|
+
group_id: str | None = None
|
|
126
|
+
content: str
|
|
127
|
+
datetime_created: datetime
|
|
128
|
+
author: User | None = None
|
|
129
|
+
group: Group | None = None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class InviteToGroupRequest(BaseModel):
|
|
133
|
+
admin_id: str
|
|
134
|
+
group_id: str
|
|
135
|
+
email: str
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class InviteMemberRequest(BaseModel):
|
|
139
|
+
admin_id: str
|
|
140
|
+
group_id: str
|
|
141
|
+
username: str
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class RemoveMemberRequest(BaseModel):
|
|
145
|
+
group_id: str
|
|
146
|
+
user_id: str
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Mapping
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from compair_cloud.tasks import ( # type: ignore
|
|
10
|
+
process_document_task,
|
|
11
|
+
process_text_task,
|
|
12
|
+
check_trial_expirations,
|
|
13
|
+
expire_group_invitations,
|
|
14
|
+
send_trial_warnings,
|
|
15
|
+
send_feature_announcement_task,
|
|
16
|
+
send_deactivate_request_email,
|
|
17
|
+
send_help_request_email,
|
|
18
|
+
send_daily_usage_report,
|
|
19
|
+
)
|
|
20
|
+
except (ImportError, ModuleNotFoundError) as exc:
|
|
21
|
+
logger.warning(
|
|
22
|
+
"Failed to import compair_cloud.tasks; using core task implementations. (%s: %s)",
|
|
23
|
+
exc.__class__.__name__,
|
|
24
|
+
exc,
|
|
25
|
+
exc_info=exc,
|
|
26
|
+
)
|
|
27
|
+
from sqlalchemy.orm import joinedload
|
|
28
|
+
|
|
29
|
+
def _lazy_components():
|
|
30
|
+
from . import Session as SessionMaker
|
|
31
|
+
from .embeddings import Embedder
|
|
32
|
+
from .feedback import Reviewer
|
|
33
|
+
from .logger import log_event
|
|
34
|
+
from .main import process_document
|
|
35
|
+
from .models import Document, User
|
|
36
|
+
|
|
37
|
+
return SessionMaker, Embedder, Reviewer, log_event, process_document, Document, User
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
def process_document_task(
|
|
42
|
+
user_id: str,
|
|
43
|
+
doc_id: str,
|
|
44
|
+
doc_text: str,
|
|
45
|
+
generate_feedback: bool = True,
|
|
46
|
+
) -> Mapping[str, list[str]]:
|
|
47
|
+
SessionMaker, Embedder, Reviewer, log_event, process_document, Document, User = _lazy_components()
|
|
48
|
+
with SessionMaker() as session:
|
|
49
|
+
user = session.query(User).filter(User.user_id == user_id).first()
|
|
50
|
+
if not user:
|
|
51
|
+
logger.warning("User not found for document processing", extra={"user_id": user_id})
|
|
52
|
+
return {"chunk_task_ids": []}
|
|
53
|
+
|
|
54
|
+
doc = (
|
|
55
|
+
session.query(Document)
|
|
56
|
+
.options(joinedload(Document.groups))
|
|
57
|
+
.filter(Document.document_id == doc_id)
|
|
58
|
+
.first()
|
|
59
|
+
)
|
|
60
|
+
if not doc:
|
|
61
|
+
logger.warning("Document not found for processing", extra={"document_id": doc_id})
|
|
62
|
+
return {"chunk_task_ids": []}
|
|
63
|
+
|
|
64
|
+
doc.content = doc_text
|
|
65
|
+
session.add(doc)
|
|
66
|
+
|
|
67
|
+
embedder = Embedder()
|
|
68
|
+
reviewer = Reviewer()
|
|
69
|
+
|
|
70
|
+
process_document(user, session, embedder, reviewer, doc, generate_feedback=generate_feedback)
|
|
71
|
+
|
|
72
|
+
log_event(
|
|
73
|
+
"core_document_processed",
|
|
74
|
+
user_id=user_id,
|
|
75
|
+
document_id=doc_id,
|
|
76
|
+
feedback_requested=generate_feedback,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return {"chunk_task_ids": []}
|
|
80
|
+
|
|
81
|
+
def process_text_task(*args, **kwargs): # pragma: no cover
|
|
82
|
+
raise RuntimeError("process_text_task is only available in the Compair Cloud edition.")
|
|
83
|
+
|
|
84
|
+
def check_trial_expirations(): # pragma: no cover
|
|
85
|
+
raise RuntimeError("check_trial_expirations is only available in the Compair Cloud edition.")
|
|
86
|
+
|
|
87
|
+
def expire_group_invitations(): # pragma: no cover
|
|
88
|
+
raise RuntimeError("expire_group_invitations is only available in the Compair Cloud edition.")
|
|
89
|
+
|
|
90
|
+
def send_trial_warnings(): # pragma: no cover
|
|
91
|
+
raise RuntimeError("send_trial_warnings is only available in the Compair Cloud edition.")
|
|
92
|
+
|
|
93
|
+
def send_feature_announcement_task(): # pragma: no cover
|
|
94
|
+
raise RuntimeError("send_feature_announcement_task is only available in the Compair Cloud edition.")
|
|
95
|
+
|
|
96
|
+
def send_deactivate_request_email(*args, **kwargs): # pragma: no cover
|
|
97
|
+
raise RuntimeError("send_deactivate_request_email is only available in the Compair Cloud edition.")
|
|
98
|
+
|
|
99
|
+
def send_help_request_email(*args, **kwargs): # pragma: no cover
|
|
100
|
+
raise RuntimeError("send_help_request_email is only available in the Compair Cloud edition.")
|
|
101
|
+
|
|
102
|
+
def send_daily_usage_report(): # pragma: no cover
|
|
103
|
+
raise RuntimeError("send_daily_usage_report is only available in the Compair Cloud edition.")
|
|
104
|
+
|
|
105
|
+
def process_file_with_ocr_task(*args, **kwargs): # pragma: no cover
|
|
106
|
+
raise RuntimeError("OCR processing is only available in the Compair Cloud edition.")
|