compair-core 0.3.9__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {compair_core-0.3.9 → compair_core-0.4.5}/PKG-INFO +16 -2
  2. {compair_core-0.3.9 → compair_core-0.4.5}/README.md +11 -1
  3. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/api.py +242 -36
  4. compair_core-0.4.5/compair_core/compair/__init__.py +106 -0
  5. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/embeddings.py +11 -1
  6. compair_core-0.4.5/compair_core/compair/feedback.py +368 -0
  7. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/main.py +53 -17
  8. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/models.py +74 -4
  9. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/utils.py +1 -1
  10. compair_core-0.4.5/compair_core/compair_email/templates_core.py +32 -0
  11. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/app.py +8 -2
  12. compair_core-0.4.5/compair_core/server/local_model/app.py +87 -0
  13. compair_core-0.4.5/compair_core/server/local_model/ocr.py +44 -0
  14. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/routers/capabilities.py +9 -1
  15. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/settings.py +5 -1
  16. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core.egg-info/PKG-INFO +16 -2
  17. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core.egg-info/SOURCES.txt +1 -0
  18. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core.egg-info/requires.txt +5 -0
  19. {compair_core-0.3.9 → compair_core-0.4.5}/pyproject.toml +6 -1
  20. compair_core-0.3.9/compair_core/compair/__init__.py +0 -88
  21. compair_core-0.3.9/compair_core/compair/feedback.py +0 -79
  22. compair_core-0.3.9/compair_core/compair_email/templates_core.py +0 -13
  23. compair_core-0.3.9/compair_core/server/local_model/app.py +0 -62
  24. {compair_core-0.3.9 → compair_core-0.4.5}/LICENSE +0 -0
  25. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/__init__.py +0 -0
  26. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/celery_app.py +0 -0
  27. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/default_groups.py +0 -0
  28. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/logger.py +0 -0
  29. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/schema.py +0 -0
  30. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair/tasks.py +0 -0
  31. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair_email/__init__.py +0 -0
  32. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair_email/email.py +0 -0
  33. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair_email/email_core.py +0 -0
  34. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/compair_email/templates.py +0 -0
  35. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/__init__.py +0 -0
  36. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/deps.py +0 -0
  37. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/local_model/__init__.py +0 -0
  38. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/__init__.py +0 -0
  39. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/console_mailer.py +0 -0
  40. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/contracts.py +0 -0
  41. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/local_storage.py +0 -0
  42. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/noop_analytics.py +0 -0
  43. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/noop_billing.py +0 -0
  44. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/providers/noop_ocr.py +0 -0
  45. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core/server/routers/__init__.py +0 -0
  46. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core.egg-info/dependency_links.txt +0 -0
  47. {compair_core-0.3.9 → compair_core-0.4.5}/compair_core.egg-info/top_level.txt +0 -0
  48. {compair_core-0.3.9 → compair_core-0.4.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compair-core
3
- Version: 0.3.9
3
+ Version: 0.4.5
4
4
  Summary: Open-source foundation of the Compair collaboration platform.
5
5
  Author: RocketResearch, Inc.
6
6
  License: MIT
@@ -23,11 +23,15 @@ Requires-Dist: redis>=5.0
23
23
  Requires-Dist: psutil>=5.9
24
24
  Requires-Dist: python-Levenshtein>=0.23
25
25
  Requires-Dist: redmail>=0.6
26
+ Requires-Dist: python-multipart>=0.0.20
26
27
  Provides-Extra: dev
27
28
  Requires-Dist: build>=1.0; extra == "dev"
28
29
  Requires-Dist: twine>=5.0; extra == "dev"
29
30
  Requires-Dist: pytest>=8.0; extra == "dev"
30
31
  Requires-Dist: ruff>=0.3; extra == "dev"
32
+ Provides-Extra: ocr
33
+ Requires-Dist: pillow>=10.0; extra == "ocr"
34
+ Requires-Dist: pytesseract>=0.3.10; extra == "ocr"
31
35
  Provides-Extra: postgres
32
36
  Requires-Dist: psycopg2-binary>=2.9; extra == "postgres"
33
37
  Dynamic: license-file
@@ -85,9 +89,19 @@ Container definitions and build pipelines live outside this public package:
85
89
  Key environment variables for the core edition:
86
90
 
87
91
  - `COMPAIR_EDITION` (`core`) – corresponds to this core local implementation.
88
- - `COMPAIR_SQLITE_DIR` / `COMPAIR_SQLITE_NAME` override the default local SQLite path (falls back to `./compair_data` if `/data` is not writable).
92
+ - `COMPAIR_DATABASE_URL` optional explicit SQLAlchemy URL (e.g. `postgresql+psycopg2://user:pass@host/db`). When omitted, Compair falls back to a local SQLite file.
93
+ - `COMPAIR_DB_DIR` / `COMPAIR_DB_NAME` – directory and filename for the bundled SQLite database (default: `~/.compair-core/data/compair.db`). Legacy `COMPAIR_SQLITE_*` variables remain supported.
89
94
  - `COMPAIR_LOCAL_MODEL_URL` – endpoint for your local embeddings/feedback service (defaults to `http://local-model:9000`).
90
95
  - `COMPAIR_EMAIL_BACKEND` – the core mailer logs emails to stdout; cloud overrides this with transactional delivery.
96
+ - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
97
+ - `COMPAIR_SINGLE_USER_USERNAME` / `COMPAIR_SINGLE_USER_NAME` – override the email-style username and display name that are used for the auto-provisioned local user in single-user mode.
98
+ - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
99
+ - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
100
+ - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
101
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), `http` (POST the request to a custom endpoint), or `fallback` (skip generation and surface similar references only).
102
+ - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-5-nano`). The fallback kicks in automatically if the key or SDK is unavailable.
103
+ - `COMPAIR_GENERATION_ENDPOINT` – HTTP endpoint invoked when `COMPAIR_GENERATION_PROVIDER=http`; the service receives a JSON payload (`document`, `references`, `length_instruction`) and should return `{"feedback": ...}`.
104
+ - `COMPAIR_OCR_ENDPOINT` – endpoint the backend calls for OCR uploads (defaults to the bundled Tesseract wrapper at `http://local-ocr:9001/ocr-file`). Provide your own service by overriding this URL.
91
105
 
92
106
  See `compair_core/server/settings.py` for the full settings surface.
93
107
 
@@ -51,9 +51,19 @@ Container definitions and build pipelines live outside this public package:
51
51
  Key environment variables for the core edition:
52
52
 
53
53
  - `COMPAIR_EDITION` (`core`) – corresponds to this core local implementation.
54
- - `COMPAIR_SQLITE_DIR` / `COMPAIR_SQLITE_NAME` override the default local SQLite path (falls back to `./compair_data` if `/data` is not writable).
54
+ - `COMPAIR_DATABASE_URL` optional explicit SQLAlchemy URL (e.g. `postgresql+psycopg2://user:pass@host/db`). When omitted, Compair falls back to a local SQLite file.
55
+ - `COMPAIR_DB_DIR` / `COMPAIR_DB_NAME` – directory and filename for the bundled SQLite database (default: `~/.compair-core/data/compair.db`). Legacy `COMPAIR_SQLITE_*` variables remain supported.
55
56
  - `COMPAIR_LOCAL_MODEL_URL` – endpoint for your local embeddings/feedback service (defaults to `http://local-model:9000`).
56
57
  - `COMPAIR_EMAIL_BACKEND` – the core mailer logs emails to stdout; cloud overrides this with transactional delivery.
58
+ - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
59
+ - `COMPAIR_SINGLE_USER_USERNAME` / `COMPAIR_SINGLE_USER_NAME` – override the email-style username and display name that are used for the auto-provisioned local user in single-user mode.
60
+ - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
61
+ - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
62
+ - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
63
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), `http` (POST the request to a custom endpoint), or `fallback` (skip generation and surface similar references only).
64
+ - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-5-nano`). The fallback kicks in automatically if the key or SDK is unavailable.
65
+ - `COMPAIR_GENERATION_ENDPOINT` – HTTP endpoint invoked when `COMPAIR_GENERATION_PROVIDER=http`; the service receives a JSON payload (`document`, `references`, `length_instruction`) and should return `{"feedback": ...}`.
66
+ - `COMPAIR_OCR_ENDPOINT` – endpoint the backend calls for OCR uploads (defaults to the bundled Tesseract wrapper at `http://local-ocr:9001/ocr-file`). Provide your own service by overriding this URL.
57
67
 
58
68
  See `compair_core/server/settings.py` for the full settings surface.
59
69
 
@@ -13,6 +13,7 @@ import psutil
13
13
  from celery.result import AsyncResult
14
14
  from fastapi import APIRouter, Body, Depends, File, Form, Header, HTTPException, Query, Request, UploadFile
15
15
  from fastapi.responses import HTMLResponse, RedirectResponse, StreamingResponse
16
+ from fastapi.routing import APIRoute
16
17
  from sqlalchemy import distinct, func, select, or_
17
18
  from sqlalchemy.orm import joinedload, Session
18
19
 
@@ -36,13 +37,17 @@ from .compair_email.templates import (
36
37
  )
37
38
  from .compair.tasks import process_document_task as process_document_celery, send_feature_announcement_task, send_deactivate_request_email, send_help_request_email
38
39
 
39
- import redis
40
+ try:
41
+ import redis # type: ignore
42
+ except ImportError: # pragma: no cover - optional dependency
43
+ redis = None
40
44
 
41
45
  redis_url = os.environ.get("REDIS_URL")
42
- redis_client = redis.Redis.from_url(redis_url)
46
+ redis_client = redis.Redis.from_url(redis_url) if (redis and redis_url) else None
43
47
  #from compair.main import process_document
44
48
 
45
49
  router = APIRouter()
50
+ core_router = APIRouter()
46
51
  WEB_URL = os.environ.get("WEB_URL")
47
52
  ADMIN_API_KEY = os.environ.get("ADMIN_API_KEY")
48
53
 
@@ -56,6 +61,112 @@ GA4_MEASUREMENT_ID = os.getenv("GA4_MEASUREMENT_ID")
56
61
  GA4_API_SECRET = os.getenv("GA4_API_SECRET")
57
62
 
58
63
  IS_CLOUD = os.getenv("COMPAIR_EDITION", "core").lower() == "cloud"
64
+ SINGLE_USER_SESSION_TTL = timedelta(days=365)
65
+
66
+
67
+ def _ensure_single_user(session: Session, settings: Settings) -> models.User:
68
+ """Create or fetch the singleton user used when authentication is disabled."""
69
+ changed = False
70
+ user = (
71
+ session.query(models.User)
72
+ .options(joinedload(models.User.groups))
73
+ .filter(models.User.username == settings.single_user_username)
74
+ .first()
75
+ )
76
+ if user is None:
77
+ now = datetime.now(timezone.utc)
78
+ user = models.User(
79
+ username=settings.single_user_username,
80
+ name=settings.single_user_name,
81
+ datetime_registered=now,
82
+ verification_token=None,
83
+ token_expiration=None,
84
+ )
85
+ user.set_password(secrets.token_urlsafe(16))
86
+ user.status = "active"
87
+ user.status_change_date = now
88
+ session.add(user)
89
+ session.flush()
90
+ admin = models.Administrator(user_id=user.user_id)
91
+ group = models.Group(
92
+ name=user.username,
93
+ datetime_created=now,
94
+ group_image=None,
95
+ category="Private",
96
+ description=f"Private workspace for {settings.single_user_name}",
97
+ visibility="private",
98
+ )
99
+ group.admins.append(admin)
100
+ user.groups = [group]
101
+ session.add_all([group, admin])
102
+ changed = True
103
+ else:
104
+ now = datetime.now(timezone.utc)
105
+ if user.status != "active":
106
+ user.status = "active"
107
+ user.status_change_date = now
108
+ changed = True
109
+ group = next((g for g in user.groups if g.name == user.username), None)
110
+ if group is None:
111
+ group = session.query(models.Group).filter(models.Group.name == user.username).first()
112
+ if group is None:
113
+ group = models.Group(
114
+ name=user.username,
115
+ datetime_created=now,
116
+ group_image=None,
117
+ category="Private",
118
+ description=f"Private workspace for {user.name}",
119
+ visibility="private",
120
+ )
121
+ session.add(group)
122
+ changed = True
123
+ if group not in user.groups:
124
+ user.groups.append(group)
125
+ changed = True
126
+ admin = session.query(models.Administrator).filter(models.Administrator.user_id == user.user_id).first()
127
+ if admin is None:
128
+ admin = models.Administrator(user_id=user.user_id)
129
+ session.add(admin)
130
+ changed = True
131
+ if admin not in group.admins:
132
+ group.admins.append(admin)
133
+ changed = True
134
+
135
+ if changed:
136
+ session.commit()
137
+ user = (
138
+ session.query(models.User)
139
+ .options(joinedload(models.User.groups))
140
+ .filter(models.User.username == settings.single_user_username)
141
+ .first()
142
+ )
143
+ if user is None:
144
+ raise RuntimeError("Failed to initialize the local Compair user.")
145
+ user.groups # ensure relationship is loaded before detaching
146
+ return user
147
+
148
+
149
+ def _ensure_single_user_session(session: Session, user: models.User) -> models.Session:
150
+ """Return a long-lived session token for the singleton user."""
151
+ now = datetime.now(timezone.utc)
152
+ existing = (
153
+ session.query(models.Session)
154
+ .filter(models.Session.user_id == user.user_id, models.Session.datetime_valid_until >= now)
155
+ .order_by(models.Session.datetime_valid_until.desc())
156
+ .first()
157
+ )
158
+ if existing:
159
+ return existing
160
+ token = secrets.token_urlsafe()
161
+ user_session = models.Session(
162
+ id=token,
163
+ user_id=user.user_id,
164
+ datetime_created=now,
165
+ datetime_valid_until=now + SINGLE_USER_SESSION_TTL,
166
+ )
167
+ session.add(user_session)
168
+ session.commit()
169
+ return user_session
59
170
 
60
171
 
61
172
  def require_cloud(feature: str) -> None:
@@ -80,13 +191,20 @@ HAS_ACTIVITY = hasattr(models, "Activity")
80
191
  HAS_REFERRALS = hasattr(models.User, "referral_code")
81
192
  HAS_BILLING = hasattr(models.User, "stripe_customer_id")
82
193
  HAS_TRIALS = hasattr(models.User, "trial_expiration_date")
194
+ HAS_REDIS = redis_client is not None
83
195
 
84
196
 
85
197
  def require_feature(flag: bool, feature: str) -> None:
86
198
  if not flag:
87
199
  raise HTTPException(status_code=501, detail=f"{feature} is only available in the Compair Cloud edition.")
88
200
 
89
- def get_current_user(auth_token: str = Header(...)):
201
+ def get_current_user(auth_token: str | None = Header(None)):
202
+ settings = get_settings_dependency()
203
+ if not settings.require_authentication:
204
+ with compair.Session() as session:
205
+ return _ensure_single_user(session, settings)
206
+ if not auth_token:
207
+ raise HTTPException(status_code=401, detail="Missing session token")
90
208
  with compair.Session() as session:
91
209
  user_session = session.query(models.Session).filter(models.Session.id == auth_token).first()
92
210
  if not user_session:
@@ -150,9 +268,20 @@ log_service_resource_metrics(service_name="backend") # or "frontend"
150
268
 
151
269
  @router.post("/login")
152
270
  def login(request: schema.LoginRequest) -> dict:
271
+ settings = get_settings_dependency()
153
272
  with compair.Session() as session:
273
+ if not settings.require_authentication:
274
+ user = _ensure_single_user(session, settings)
275
+ user_session = _ensure_single_user_session(session, user)
276
+ return {
277
+ "user_id": user.user_id,
278
+ "username": user.username,
279
+ "name": user.name,
280
+ "status": user.status,
281
+ "role": user.role,
282
+ "auth_token": user_session.id,
283
+ }
154
284
  user = session.query(models.User).filter(models.User.username == request.username).first()
155
- print("PW yo: {request.password}")
156
285
  if not user or not user.check_password(request.password):
157
286
  raise HTTPException(status_code=401, detail="Invalid credentials")
158
287
  if user.status == 'inactive':
@@ -526,8 +655,16 @@ def create_user(
526
655
 
527
656
 
528
657
  @router.get("/load_session")
529
- def load_session(auth_token: str) -> schema.Session | None:
658
+ def load_session(auth_token: str | None = None) -> schema.Session | None:
659
+ settings = get_settings_dependency()
660
+ if not settings.require_authentication:
661
+ with compair.Session() as session:
662
+ user = _ensure_single_user(session, settings)
663
+ session_model = _ensure_single_user_session(session, user)
664
+ return schema.Session.model_validate(session_model, from_attributes=True)
530
665
  with compair.Session() as session:
666
+ if not auth_token:
667
+ raise HTTPException(status_code=400, detail="auth_token is required when authentication is enabled.")
531
668
  user_session = session.query(models.Session).filter(models.Session.id == auth_token).first()
532
669
  if not user_session:
533
670
  raise HTTPException(status_code=404, detail="Session not found")
@@ -536,7 +673,7 @@ def load_session(auth_token: str) -> schema.Session | None:
536
673
  valid_until = valid_until.replace(tzinfo=timezone.utc)
537
674
  if valid_until < datetime.now(timezone.utc):
538
675
  raise HTTPException(status_code=401, detail="Invalid or expired session token")
539
- return user_session
676
+ return schema.Session.model_validate(user_session, from_attributes=True)
540
677
 
541
678
 
542
679
  @router.post("/update_user")
@@ -592,6 +729,9 @@ def update_session_duration(
592
729
  def delete_user(
593
730
  current_user: models.User = Depends(get_current_user)
594
731
  ):
732
+ settings = get_settings_dependency()
733
+ if not settings.require_authentication:
734
+ raise HTTPException(status_code=403, detail="Deleting the local user is not supported when authentication is disabled.")
595
735
  with compair.Session() as session:
596
736
  current_user.delete()
597
737
  session.commit()
@@ -1234,22 +1374,35 @@ def create_doc(
1234
1374
  current_user.status_change_date = datetime.now(timezone.utc)
1235
1375
  session.commit()
1236
1376
 
1237
- # Enforce document limits
1377
+ # Enforce document limits (cloud plans) – core runs are unrestricted unless explicitly configured
1238
1378
  team = _user_team(current_user)
1379
+ document_limit: int | None = None
1239
1380
  if IS_CLOUD and HAS_TEAM and team and current_user.status == "active":
1240
1381
  document_limit = team.total_documents_limit # type: ignore[union-attr]
1382
+ elif IS_CLOUD and _user_plan(current_user) == "individual" and current_user.status == "active":
1383
+ document_limit = 100
1241
1384
  else:
1242
- if IS_CLOUD and _user_plan(current_user) == 'individual' and current_user.status == "active":
1243
- document_limit = 100
1244
- else:
1245
- document_limit = int(os.getenv("COMPAIR_CORE_DOCUMENT_LIMIT", "10"))
1385
+ raw_core_limit = os.getenv("COMPAIR_CORE_DOCUMENT_LIMIT")
1386
+ if raw_core_limit:
1387
+ try:
1388
+ document_limit = int(raw_core_limit)
1389
+ except ValueError:
1390
+ document_limit = None
1391
+
1246
1392
  document_count = session.query(models.Document).filter(models.Document.user_id == current_user.user_id).count()
1247
1393
 
1248
- if document_count >= document_limit:
1249
- raise HTTPException(
1250
- status_code=403,
1251
- detail=f"Document limit reached. Individual plan users can have 100, team plans have 100 times the number of users (pooled); other plans can have 10",
1252
- )
1394
+ if document_limit is not None and document_count >= document_limit:
1395
+ if IS_CLOUD:
1396
+ detail_msg = (
1397
+ "Document limit reached. Individual plan users can have 100, team plans have 100 times "
1398
+ "the number of users (pooled); other plans can have 10"
1399
+ )
1400
+ else:
1401
+ detail_msg = (
1402
+ f"Document limit of {document_limit} reached. Adjust COMPAIR_CORE_DOCUMENT_LIMIT to raise "
1403
+ "or unset it to remove limits in core deployments."
1404
+ )
1405
+ raise HTTPException(status_code=403, detail=detail_msg)
1253
1406
 
1254
1407
  if not authorid:
1255
1408
  authorid = current_user.user_id
@@ -1264,21 +1417,24 @@ def create_doc(
1264
1417
  datetime_modified=datetime.now(timezone.utc)
1265
1418
  )
1266
1419
  print('About to assign groups!')
1267
- print(groups.split(','))
1268
- if groups is not None:
1269
- group_ids = groups.split(',')
1270
- q = select(models.Group).filter(
1271
- models.Group.group_id.in_(group_ids)
1272
- )
1273
- groups = session.execute(q).fetchall()[0]
1274
- for group in groups:
1275
- document.groups.append(group)
1420
+ target_group_ids = []
1421
+ if groups:
1422
+ target_group_ids = [gid.strip() for gid in groups.split(',') if gid.strip()]
1423
+
1424
+ if target_group_ids:
1425
+ q = select(models.Group).filter(models.Group.group_id.in_(target_group_ids))
1426
+ resolved_groups = session.execute(q).scalars().all()
1427
+ if not resolved_groups:
1428
+ raise HTTPException(status_code=404, detail="No matching groups found for provided IDs.")
1429
+ document.groups = resolved_groups
1276
1430
  else:
1277
- q = select(models.Group).filter(
1278
- models.Group.name == current_user.username
1279
- )
1280
- group = session.execute(q).fetchone()[0]
1281
- document.groups = [group]
1431
+ q = select(models.Group).filter(models.Group.name == current_user.username)
1432
+ default_group = session.execute(q).scalars().first()
1433
+ if default_group is None:
1434
+ raise HTTPException(status_code=404, detail="Default group not found for user.")
1435
+ document.groups = [default_group]
1436
+
1437
+ primary_group = document.groups[0]
1282
1438
 
1283
1439
  print(f'doc check!!! {document.content}')
1284
1440
  session.add(document)
@@ -1296,7 +1452,7 @@ def create_doc(
1296
1452
  log_activity(
1297
1453
  session=session,
1298
1454
  user_id=document.author_id,
1299
- group_id=group.group_id,
1455
+ group_id=primary_group.group_id,
1300
1456
  action="create",
1301
1457
  object_id=document.document_id,
1302
1458
  object_name=document.title,
@@ -1707,6 +1863,9 @@ def load_references(
1707
1863
 
1708
1864
  @router.get("/verify-email")
1709
1865
  def verify_email(token: str):
1866
+ settings = get_settings_dependency()
1867
+ if not settings.require_authentication:
1868
+ raise HTTPException(status_code=403, detail="Email verification is disabled when authentication is disabled.")
1710
1869
  with compair.Session() as session:
1711
1870
  print(token)
1712
1871
  user = session.query(models.User).filter(models.User.verification_token == token).first()
@@ -1765,6 +1924,9 @@ def sign_up(
1765
1924
  request: schema.SignUpRequest,
1766
1925
  analytics: Analytics = Depends(get_analytics),
1767
1926
  ) -> dict:
1927
+ settings = get_settings_dependency()
1928
+ if not settings.require_authentication:
1929
+ raise HTTPException(status_code=403, detail="Sign-up is disabled when authentication is disabled.")
1768
1930
  print('1')
1769
1931
  if not is_valid_email(request.username):
1770
1932
  raise HTTPException(status_code=400, detail="Invalid email address")
@@ -1798,6 +1960,9 @@ def sign_up(
1798
1960
 
1799
1961
  @router.post("/forgot-password")
1800
1962
  def forgot_password(request: schema.ForgotPasswordRequest) -> dict:
1963
+ settings = get_settings_dependency()
1964
+ if not settings.require_authentication:
1965
+ raise HTTPException(status_code=403, detail="Password resets are disabled when authentication is disabled.")
1801
1966
  print('1')
1802
1967
  with compair.Session() as session:
1803
1968
  print('2')
@@ -1830,6 +1995,9 @@ def forgot_password(request: schema.ForgotPasswordRequest) -> dict:
1830
1995
 
1831
1996
  @router.post("/reset-password")
1832
1997
  def reset_password(request: schema.ResetPasswordRequest) -> dict:
1998
+ settings = get_settings_dependency()
1999
+ if not settings.require_authentication:
2000
+ raise HTTPException(status_code=403, detail="Password resets are disabled when authentication is disabled.")
1833
2001
  with compair.Session() as session:
1834
2002
  print('1')
1835
2003
  print(request.token)
@@ -2216,6 +2384,8 @@ def get_activity_feed(
2216
2384
  ):
2217
2385
  """Retrieve recent activities for a user's groups."""
2218
2386
  require_feature(HAS_ACTIVITY, "Activity feed")
2387
+ if not IS_CLOUD:
2388
+ raise HTTPException(status_code=501, detail="Activity feed is only available in the Compair Cloud edition.")
2219
2389
  with compair.Session() as session:
2220
2390
  # Get user's groups
2221
2391
 
@@ -3228,11 +3398,12 @@ def generate_download_token(
3228
3398
  else:
3229
3399
  raise HTTPException(status_code=403, detail="Not authorized to download this file.")
3230
3400
 
3401
+ if not HAS_REDIS:
3402
+ raise HTTPException(status_code=501, detail="Secure download links require Redis, which is unavailable in the core edition.")
3403
+
3231
3404
  token = secrets.token_urlsafe(32)
3232
3405
  key = f"download_token:{token}"
3233
3406
  redis_client.setex(key, 300, document_id)
3234
- print('Setting redis kv')
3235
- print(key, document_id)
3236
3407
  return {"download_url": f"/documents/download/{token}"}
3237
3408
 
3238
3409
 
@@ -3241,10 +3412,12 @@ def download_document_with_token(
3241
3412
  token: str,
3242
3413
  storage: StorageProvider = Depends(get_storage),
3243
3414
  ):
3415
+ if not HAS_REDIS:
3416
+ raise HTTPException(status_code=501, detail="Secure download links require Redis, which is unavailable in the core edition.")
3417
+
3244
3418
  key = f"download_token:{token}"
3245
- print(f'Retrieving redis kv with key {key}')
3246
- document_id = redis_client.get(key).decode('utf-8') if redis_client.get(key) else None
3247
- print(f'Value {document_id}')
3419
+ value = redis_client.get(key) if redis_client else None
3420
+ document_id = value.decode('utf-8') if value else None
3248
3421
  if not document_id:
3249
3422
  raise HTTPException(status_code=403, detail="Invalid or expired token")
3250
3423
  redis_client.delete(key)
@@ -3345,6 +3518,39 @@ def submit_deactivate_request(
3345
3518
  return {"message": f"We’ve received your request and will delete your account and data shortly. If you change your mind, reach out within 24 hours at {EMAIL_USER}."}
3346
3519
 
3347
3520
 
3521
+ CORE_PATHS: set[str] = {
3522
+ "/login",
3523
+ "/load_session",
3524
+ "/load_groups",
3525
+ "/load_group",
3526
+ "/create_group",
3527
+ "/join_group",
3528
+ "/load_group_users",
3529
+ "/delete_group",
3530
+ "/load_documents",
3531
+ "/load_document",
3532
+ "/load_document_by_id",
3533
+ "/load_user_files",
3534
+ "/create_doc",
3535
+ "/update_doc",
3536
+ "/delete_doc",
3537
+ "/delete_docs",
3538
+ "/process_doc",
3539
+ "/status/{task_id}",
3540
+ "/upload/ocr-file",
3541
+ "/ocr-file-result/{task_id}",
3542
+ "/load_chunks",
3543
+ "/load_references",
3544
+ "/load_feedback",
3545
+ "/documents/{document_id}/feedback",
3546
+ "/get_activity_feed",
3547
+ }
3548
+
3549
+ for route in router.routes:
3550
+ if isinstance(route, APIRoute) and route.path in CORE_PATHS:
3551
+ core_router.routes.append(route)
3552
+
3553
+
3348
3554
  def create_fastapi_app():
3349
3555
  """Backwards-compatible app factory for running this module directly."""
3350
3556
  from fastapi import FastAPI
@@ -0,0 +1,106 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from sqlalchemy import Engine, create_engine
6
+ from sqlalchemy.orm import sessionmaker
7
+
8
+ from . import embeddings, feedback, logger, main, models, tasks, utils
9
+ from .default_groups import initialize_default_groups
10
+
11
+ edition = os.getenv("COMPAIR_EDITION", "core").lower()
12
+
13
+ initialize_database_override = None
14
+
15
+ if edition == "cloud":
16
+ try: # Import cloud overrides if the private package is installed
17
+ from compair_cloud import ( # type: ignore
18
+ bootstrap as cloud_bootstrap,
19
+ embeddings as cloud_embeddings,
20
+ feedback as cloud_feedback,
21
+ logger as cloud_logger,
22
+ main as cloud_main,
23
+ models as cloud_models,
24
+ tasks as cloud_tasks,
25
+ utils as cloud_utils,
26
+ )
27
+
28
+ embeddings = cloud_embeddings
29
+ feedback = cloud_feedback
30
+ logger = cloud_logger
31
+ main = cloud_main
32
+ models = cloud_models
33
+ tasks = cloud_tasks
34
+ utils = cloud_utils
35
+ initialize_database_override = getattr(cloud_bootstrap, "initialize_database", None)
36
+ except ImportError:
37
+ pass
38
+
39
+
40
+ def _handle_engine() -> Engine:
41
+ # Preferred configuration: explicit database URL
42
+ explicit_url = (
43
+ os.getenv("COMPAIR_DATABASE_URL")
44
+ or os.getenv("COMPAIR_DB_URL")
45
+ or os.getenv("DATABASE_URL")
46
+ )
47
+ if explicit_url:
48
+ if explicit_url.startswith("sqlite:"):
49
+ return create_engine(explicit_url, connect_args={"check_same_thread": False})
50
+ return create_engine(explicit_url)
51
+
52
+ # Backwards compatibility with legacy Postgres env variables
53
+ db = os.getenv("DB")
54
+ db_user = os.getenv("DB_USER")
55
+ db_passw = os.getenv("DB_PASSW")
56
+ db_host = os.getenv("DB_URL")
57
+
58
+ if all([db, db_user, db_passw, db_host]):
59
+ return create_engine(
60
+ f"postgresql+psycopg2://{db_user}:{db_passw}@{db_host}/{db}",
61
+ pool_size=10,
62
+ max_overflow=0,
63
+ )
64
+
65
+ # Local default: place an SQLite database inside COMPAIR_DB_DIR
66
+ db_dir = (
67
+ os.getenv("COMPAIR_DB_DIR")
68
+ or os.getenv("COMPAIR_SQLITE_DIR")
69
+ or os.path.join(Path.home(), ".compair-core", "data")
70
+ )
71
+ db_name = os.getenv("COMPAIR_DB_NAME") or os.getenv("COMPAIR_SQLITE_NAME") or "compair.db"
72
+
73
+ db_path = Path(db_dir).expanduser()
74
+ try:
75
+ db_path.mkdir(parents=True, exist_ok=True)
76
+ except OSError:
77
+ fallback_dir = Path(os.getcwd()) / "compair_data"
78
+ fallback_dir.mkdir(parents=True, exist_ok=True)
79
+ db_path = fallback_dir
80
+
81
+ sqlite_path = db_path / db_name
82
+ return create_engine(
83
+ f"sqlite:///{sqlite_path}",
84
+ connect_args={"check_same_thread": False},
85
+ )
86
+
87
+
88
+ def initialize_database() -> None:
89
+ models.Base.metadata.create_all(engine)
90
+ if initialize_database_override:
91
+ initialize_database_override(engine)
92
+
93
+
94
+ def _initialize_defaults() -> None:
95
+ with Session() as session:
96
+ initialize_default_groups(session)
97
+
98
+
99
+ engine = _handle_engine()
100
+ initialize_database()
101
+ Session = sessionmaker(engine)
102
+ embedder = embeddings.Embedder()
103
+ reviewer = feedback.Reviewer()
104
+ _initialize_defaults()
105
+
106
+ __all__ = ["embeddings", "feedback", "main", "models", "utils", "Session"]
@@ -23,7 +23,17 @@ class Embedder:
23
23
 
24
24
  if self._cloud_impl is None:
25
25
  self.model = os.getenv("COMPAIR_LOCAL_EMBED_MODEL", "hash-embedding")
26
- self.dimension = int(os.getenv("COMPAIR_LOCAL_EMBED_DIM", "384"))
26
+ default_dim = 1536 if self.edition == "cloud" else 384
27
+ dim_env = (
28
+ os.getenv("COMPAIR_EMBEDDING_DIM")
29
+ or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
30
+ or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
31
+ or str(default_dim)
32
+ )
33
+ try:
34
+ self.dimension = int(dim_env)
35
+ except ValueError: # pragma: no cover - invalid configuration
36
+ self.dimension = default_dim
27
37
  base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
28
38
  route = os.getenv("COMPAIR_LOCAL_EMBED_ROUTE", "/embed")
29
39
  self.endpoint = f"{base_url.rstrip('/')}{route}"