compair-core 0.3.13__tar.gz → 0.3.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compair-core might be problematic. Click here for more details.

Files changed (45) hide show
  1. {compair_core-0.3.13 → compair_core-0.3.15}/PKG-INFO +5 -1
  2. {compair_core-0.3.13 → compair_core-0.3.15}/README.md +4 -0
  3. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/embeddings.py +11 -1
  4. compair_core-0.3.15/compair_core/compair/feedback.py +204 -0
  5. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/main.py +43 -14
  6. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/models.py +74 -4
  7. compair_core-0.3.15/compair_core/server/local_model/app.py +87 -0
  8. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core.egg-info/PKG-INFO +5 -1
  9. {compair_core-0.3.13 → compair_core-0.3.15}/pyproject.toml +1 -1
  10. compair_core-0.3.13/compair_core/compair/feedback.py +0 -79
  11. compair_core-0.3.13/compair_core/server/local_model/app.py +0 -62
  12. {compair_core-0.3.13 → compair_core-0.3.15}/LICENSE +0 -0
  13. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/__init__.py +0 -0
  14. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/api.py +0 -0
  15. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/__init__.py +0 -0
  16. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/celery_app.py +0 -0
  17. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/default_groups.py +0 -0
  18. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/logger.py +0 -0
  19. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/schema.py +0 -0
  20. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/tasks.py +0 -0
  21. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair/utils.py +0 -0
  22. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair_email/__init__.py +0 -0
  23. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair_email/email.py +0 -0
  24. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair_email/email_core.py +0 -0
  25. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair_email/templates.py +0 -0
  26. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/compair_email/templates_core.py +0 -0
  27. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/__init__.py +0 -0
  28. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/app.py +0 -0
  29. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/deps.py +0 -0
  30. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/local_model/__init__.py +0 -0
  31. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/__init__.py +0 -0
  32. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/console_mailer.py +0 -0
  33. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/contracts.py +0 -0
  34. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/local_storage.py +0 -0
  35. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/noop_analytics.py +0 -0
  36. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/noop_billing.py +0 -0
  37. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/providers/noop_ocr.py +0 -0
  38. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/routers/__init__.py +0 -0
  39. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/routers/capabilities.py +0 -0
  40. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core/server/settings.py +0 -0
  41. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core.egg-info/SOURCES.txt +0 -0
  42. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core.egg-info/dependency_links.txt +0 -0
  43. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core.egg-info/requires.txt +0 -0
  44. {compair_core-0.3.13 → compair_core-0.3.15}/compair_core.egg-info/top_level.txt +0 -0
  45. {compair_core-0.3.13 → compair_core-0.3.15}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compair-core
3
- Version: 0.3.13
3
+ Version: 0.3.15
4
4
  Summary: Open-source foundation of the Compair collaboration platform.
5
5
  Author: RocketResearch, Inc.
6
6
  License: MIT
@@ -92,6 +92,10 @@ Key environment variables for the core edition:
92
92
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
93
93
  - `COMPAIR_SINGLE_USER_USERNAME` / `COMPAIR_SINGLE_USER_NAME` – override the email-style username and display name that are used for the auto-provisioned local user in single-user mode.
94
94
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
95
+ - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
96
+ - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
97
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
98
+ - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
95
99
 
96
100
  See `compair_core/server/settings.py` for the full settings surface.
97
101
 
@@ -57,6 +57,10 @@ Key environment variables for the core edition:
57
57
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
58
58
  - `COMPAIR_SINGLE_USER_USERNAME` / `COMPAIR_SINGLE_USER_NAME` – override the email-style username and display name that are used for the auto-provisioned local user in single-user mode.
59
59
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
60
+ - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
61
+ - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
62
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
63
+ - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
60
64
 
61
65
  See `compair_core/server/settings.py` for the full settings surface.
62
66
 
@@ -23,7 +23,17 @@ class Embedder:
23
23
 
24
24
  if self._cloud_impl is None:
25
25
  self.model = os.getenv("COMPAIR_LOCAL_EMBED_MODEL", "hash-embedding")
26
- self.dimension = int(os.getenv("COMPAIR_LOCAL_EMBED_DIM", "384"))
26
+ default_dim = 1536 if self.edition == "cloud" else 384
27
+ dim_env = (
28
+ os.getenv("COMPAIR_EMBEDDING_DIM")
29
+ or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
30
+ or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
31
+ or str(default_dim)
32
+ )
33
+ try:
34
+ self.dimension = int(dim_env)
35
+ except ValueError: # pragma: no cover - invalid configuration
36
+ self.dimension = default_dim
27
37
  base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
28
38
  route = os.getenv("COMPAIR_LOCAL_EMBED_ROUTE", "/embed")
29
39
  self.endpoint = f"{base_url.rstrip('/')}{route}"
@@ -0,0 +1,204 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any, Iterable, List
5
+
6
+ import requests
7
+
8
+ from .logger import log_event
9
+ from .models import Document, User
10
+
11
+ try:
12
+ import openai # type: ignore
13
+ except ImportError: # pragma: no cover - optional dependency
14
+ openai = None # type: ignore
15
+
16
+ try:
17
+ from compair_cloud.feedback import Reviewer as CloudReviewer # type: ignore
18
+ from compair_cloud.feedback import get_feedback as cloud_get_feedback # type: ignore
19
+ except (ImportError, ModuleNotFoundError):
20
+ CloudReviewer = None # type: ignore
21
+ cloud_get_feedback = None # type: ignore
22
+
23
+
24
+ class Reviewer:
25
+ """Edition-aware wrapper that selects a feedback provider based on configuration."""
26
+
27
+ def __init__(self) -> None:
28
+ self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
29
+ self.provider = os.getenv("COMPAIR_GENERATION_PROVIDER", "local").lower()
30
+ self.length_map = {
31
+ "Brief": "1–2 short sentences",
32
+ "Detailed": "A couple short paragraphs",
33
+ "Verbose": "As thorough as reasonably possible without repeating information",
34
+ }
35
+
36
+ self._cloud_impl = None
37
+ self._openai_client = None
38
+ self.openai_model = os.getenv("COMPAIR_OPENAI_MODEL", "gpt-4o-mini")
39
+
40
+ if self.edition == "cloud" and CloudReviewer is not None:
41
+ self._cloud_impl = CloudReviewer()
42
+ self.provider = "cloud"
43
+ else:
44
+ if self.provider == "openai":
45
+ api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
46
+ if api_key and openai is not None:
47
+ # Support both legacy (ChatCompletion) and new SDKs
48
+ if hasattr(openai, "api_key"):
49
+ openai.api_key = api_key # type: ignore[assignment]
50
+ if hasattr(openai, "OpenAI"):
51
+ try: # pragma: no cover - optional runtime dependency
52
+ self._openai_client = openai.OpenAI(api_key=api_key) # type: ignore[attr-defined]
53
+ except Exception: # pragma: no cover - if instantiation fails
54
+ self._openai_client = None
55
+ if self._openai_client is None and not hasattr(openai, "ChatCompletion"):
56
+ log_event("openai_feedback_unavailable", reason="openai_library_missing")
57
+ self.provider = "fallback"
58
+ if self.provider == "local":
59
+ self.model = os.getenv("COMPAIR_LOCAL_GENERATION_MODEL", "local-feedback")
60
+ base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
61
+ route = os.getenv("COMPAIR_LOCAL_GENERATION_ROUTE", "/generate")
62
+ self.endpoint = f"{base_url.rstrip('/')}{route}"
63
+ else:
64
+ self.model = "external"
65
+ self.endpoint = None
66
+
67
+ @property
68
+ def is_cloud(self) -> bool:
69
+ return self._cloud_impl is not None
70
+
71
+
72
+ def _reference_snippets(references: Iterable[Any], limit: int = 3) -> List[str]:
73
+ snippets: List[str] = []
74
+ for ref in references:
75
+ snippet = getattr(ref, "content", "") or ""
76
+ snippet = snippet.replace("\n", " ").strip()
77
+ if snippet:
78
+ snippets.append(snippet[:200])
79
+ if len(snippets) == limit:
80
+ break
81
+ return snippets
82
+
83
+
84
+ def _fallback_feedback(text: str, references: list[Any]) -> str:
85
+ snippets = _reference_snippets(references)
86
+ if not snippets:
87
+ return "NONE"
88
+ joined = "; ".join(snippets)
89
+ return f"Consider aligning with these reference passages: {joined}"
90
+
91
+
92
+ def _openai_feedback(
93
+ reviewer: Reviewer,
94
+ doc: Document,
95
+ text: str,
96
+ references: list[Any],
97
+ user: User,
98
+ ) -> str | None:
99
+ if openai is None:
100
+ return None
101
+ instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
102
+ ref_text = "\n\n".join(_reference_snippets(references, limit=3))
103
+ messages = [
104
+ {
105
+ "role": "system",
106
+ "content": (
107
+ "You are Compair, an assistant that delivers concise, actionable feedback on a user's document. "
108
+ "Focus on clarity, cohesion, and usefulness."
109
+ ),
110
+ },
111
+ {
112
+ "role": "user",
113
+ "content": (
114
+ f"Document:\n{text}\n\nHelpful reference excerpts:\n{ref_text or 'None provided'}\n\n"
115
+ f"Respond with {instruction} that highlights the most valuable revision to make next."
116
+ ),
117
+ },
118
+ ]
119
+
120
+ try:
121
+ if reviewer._openai_client is not None and hasattr(reviewer._openai_client, "responses"):
122
+ response = reviewer._openai_client.responses.create( # type: ignore[union-attr]
123
+ model=reviewer.openai_model,
124
+ input=messages,
125
+ max_output_tokens=256,
126
+ )
127
+ content = getattr(response, "output_text", None)
128
+ if not content and hasattr(response, "outputs"):
129
+ # Legacy compatibility: join content parts
130
+ parts = []
131
+ for item in getattr(response, "outputs", []):
132
+ parts.extend(getattr(item, "content", []))
133
+ content = " ".join(getattr(part, "text", "") for part in parts)
134
+ elif hasattr(openai, "ChatCompletion"):
135
+ chat_response = openai.ChatCompletion.create( # type: ignore[attr-defined]
136
+ model=reviewer.openai_model,
137
+ messages=messages,
138
+ temperature=0.3,
139
+ max_tokens=256,
140
+ )
141
+ content = (
142
+ chat_response["choices"][0]["message"]["content"].strip() # type: ignore[index, assignment]
143
+ )
144
+ else:
145
+ content = None
146
+ except Exception as exc: # pragma: no cover - network/API failure
147
+ log_event("openai_feedback_failed", error=str(exc))
148
+ content = None
149
+ if content:
150
+ content = content.strip()
151
+ if content:
152
+ return content
153
+ return None
154
+
155
+
156
+ def _local_feedback(
157
+ reviewer: Reviewer,
158
+ text: str,
159
+ references: list[Any],
160
+ user: User,
161
+ ) -> str | None:
162
+ payload = {
163
+ "document": text,
164
+ "references": [getattr(ref, "content", "") for ref in references],
165
+ "length_instruction": reviewer.length_map.get(
166
+ user.preferred_feedback_length,
167
+ "1–2 short sentences",
168
+ ),
169
+ }
170
+
171
+ try:
172
+ response = requests.post(reviewer.endpoint, json=payload, timeout=30)
173
+ response.raise_for_status()
174
+ data = response.json()
175
+ feedback = data.get("feedback") or data.get("text")
176
+ if feedback:
177
+ return str(feedback).strip()
178
+ except Exception as exc: # pragma: no cover - network failures stay graceful
179
+ log_event("local_feedback_failed", error=str(exc))
180
+
181
+ return None
182
+
183
+
184
+ def get_feedback(
185
+ reviewer: Reviewer,
186
+ doc: Document,
187
+ text: str,
188
+ references: list[Any],
189
+ user: User,
190
+ ) -> str:
191
+ if reviewer.is_cloud and cloud_get_feedback is not None:
192
+ return cloud_get_feedback(reviewer._cloud_impl, doc, text, references, user) # type: ignore[arg-type]
193
+
194
+ if reviewer.provider == "openai":
195
+ feedback = _openai_feedback(reviewer, doc, text, references, user)
196
+ if feedback:
197
+ return feedback
198
+
199
+ if reviewer.provider == "local" and getattr(reviewer, "endpoint", None):
200
+ feedback = _local_feedback(reviewer, text, references, user)
201
+ if feedback:
202
+ return feedback
203
+
204
+ return _fallback_feedback(text, references)
@@ -12,7 +12,17 @@ from sqlalchemy.orm import Session as SASession
12
12
 
13
13
  from .embeddings import create_embedding, Embedder
14
14
  from .feedback import get_feedback, Reviewer
15
- from .models import Chunk, Document, Feedback, Group, Note, Reference, User
15
+ from .models import (
16
+ Chunk,
17
+ Document,
18
+ Feedback,
19
+ Group,
20
+ Note,
21
+ Reference,
22
+ User,
23
+ VECTOR_BACKEND,
24
+ cosine_similarity,
25
+ )
16
26
  from .utils import chunk_text, log_activity
17
27
 
18
28
 
@@ -159,22 +169,41 @@ def process_text(
159
169
  Chunk.note_id == note_id,
160
170
  ).first()
161
171
 
172
+ references: list[Chunk] = []
162
173
  if generate_feedback and existing_chunk:
163
174
  doc_group_ids = [g.group_id for g in doc.groups]
164
- references = (
165
- session.query(Chunk)
166
- .join(Chunk.document)
167
- .join(Document.groups)
168
- .filter(
169
- Document.is_published.is_(True),
170
- Document.document_id != doc.document_id,
171
- Chunk.chunk_type == "document",
172
- Group.group_id.in_(doc_group_ids),
175
+ target_embedding = existing_chunk.embedding
176
+
177
+ if target_embedding is not None:
178
+ base_query = (
179
+ session.query(Chunk)
180
+ .join(Chunk.document)
181
+ .join(Document.groups)
182
+ .filter(
183
+ Document.is_published.is_(True),
184
+ Document.document_id != doc.document_id,
185
+ Chunk.chunk_type == "document",
186
+ Group.group_id.in_(doc_group_ids),
187
+ )
173
188
  )
174
- .order_by(Chunk.embedding.cosine_distance(existing_chunk.embedding))
175
- .limit(3)
176
- .all()
177
- )
189
+
190
+ if VECTOR_BACKEND == "pgvector":
191
+ references = (
192
+ base_query.order_by(
193
+ Chunk.embedding.cosine_distance(existing_chunk.embedding)
194
+ )
195
+ .limit(3)
196
+ .all()
197
+ )
198
+ else:
199
+ candidates = base_query.all()
200
+ scored: list[tuple[float, Chunk]] = []
201
+ for candidate in candidates:
202
+ score = cosine_similarity(candidate.embedding, target_embedding)
203
+ if score is not None:
204
+ scored.append((score, candidate))
205
+ scored.sort(key=lambda item: item[0], reverse=True)
206
+ references = [chunk for _, chunk in scored[:3]]
178
207
 
179
208
  sql_references: list[Reference] = []
180
209
  for ref_chunk in references:
@@ -5,9 +5,15 @@ import hashlib
5
5
  import os
6
6
  import secrets
7
7
  from datetime import datetime, timezone
8
+ from math import sqrt
9
+ from typing import Sequence
8
10
  from uuid import uuid4
9
11
 
10
- from pgvector.sqlalchemy import Vector
12
+ try: # Optional: only required when using pgvector backend
13
+ from pgvector.sqlalchemy import Vector
14
+ except ImportError: # pragma: no cover - optional dependency in core
15
+ Vector = None # type: ignore[assignment]
16
+
11
17
  from sqlalchemy import (
12
18
  Boolean,
13
19
  Column,
@@ -15,6 +21,7 @@ from sqlalchemy import (
15
21
  ForeignKey,
16
22
  Identity,
17
23
  Integer,
24
+ JSON,
18
25
  String,
19
26
  Table,
20
27
  Text,
@@ -27,6 +34,69 @@ from sqlalchemy.orm import (
27
34
  relationship,
28
35
  )
29
36
 
37
+ _EDITION = os.getenv("COMPAIR_EDITION", "core").lower()
38
+ _DEFAULT_DIM = 1536 if _EDITION == "cloud" else 384
39
+ _DIM_ENV = (
40
+ os.getenv("COMPAIR_EMBEDDING_DIM")
41
+ or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
42
+ or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
43
+ or str(_DEFAULT_DIM)
44
+ )
45
+
46
+ try:
47
+ EMBEDDING_DIMENSION = int(_DIM_ENV)
48
+ except ValueError: # pragma: no cover - invalid configuration
49
+ EMBEDDING_DIMENSION = _DEFAULT_DIM
50
+
51
+
52
+ def _detect_vector_backend() -> str:
53
+ explicit = os.getenv("COMPAIR_VECTOR_BACKEND")
54
+ if explicit:
55
+ return explicit.lower()
56
+
57
+ db = os.getenv("DB")
58
+ db_user = os.getenv("DB_USER")
59
+ db_passw = os.getenv("DB_PASSW")
60
+ db_url = os.getenv("DB_URL")
61
+ database_url = os.getenv("DATABASE_URL", "")
62
+
63
+ if all([db, db_user, db_passw, db_url]):
64
+ return "pgvector"
65
+ if database_url.lower().startswith(("postgres://", "postgresql://")):
66
+ return "pgvector"
67
+ return "json"
68
+
69
+
70
+ VECTOR_BACKEND = _detect_vector_backend()
71
+
72
+
73
+ def _embedding_column():
74
+ if VECTOR_BACKEND == "pgvector":
75
+ if Vector is None:
76
+ raise RuntimeError(
77
+ "pgvector is required when COMPAIR_VECTOR_BACKEND is set to 'pgvector'."
78
+ )
79
+ return mapped_column(
80
+ Vector(EMBEDDING_DIMENSION),
81
+ nullable=True,
82
+ default=None,
83
+ )
84
+ # Store embeddings as JSON arrays (works across SQLite/Postgres without pgvector)
85
+ return mapped_column(JSON, nullable=True, default=None)
86
+
87
+
88
+ def cosine_similarity(vec1: Sequence[float] | None, vec2: Sequence[float] | None) -> float | None:
89
+ if not vec1 or not vec2:
90
+ return None
91
+ if len(vec1) != len(vec2):
92
+ return None
93
+ dot = sum(a * b for a, b in zip(vec1, vec2))
94
+ norm1 = sqrt(sum(a * a for a in vec1))
95
+ norm2 = sqrt(sum(b * b for b in vec2))
96
+ if norm1 == 0 or norm2 == 0:
97
+ return None
98
+ return dot / (norm1 * norm2)
99
+
30
100
 
31
101
  class Base(DeclarativeBase, MappedAsDataclass):
32
102
  pass
@@ -213,10 +283,10 @@ class Document(BaseObject):
213
283
  doc_type: Mapped[str]
214
284
  datetime_created: Mapped[datetime]
215
285
  datetime_modified: Mapped[datetime]
286
+ embedding: Mapped[list[float] | None] = _embedding_column()
216
287
  file_key: Mapped[str | None] = mapped_column(String, nullable=True, default=None)
217
288
  image_key: Mapped[str | None] = mapped_column(String, nullable=True, default=None)
218
289
  is_published: Mapped[bool] = mapped_column(Boolean, default=False)
219
- embedding = mapped_column(Vector(1536))
220
290
 
221
291
  user = relationship("User", back_populates="documents")
222
292
  groups = relationship("Group", secondary="document_to_group", back_populates="documents")
@@ -249,8 +319,8 @@ class Note(Base):
249
319
  author_id: Mapped[str] = mapped_column(ForeignKey("user.user_id", ondelete="CASCADE"), index=True)
250
320
  group_id: Mapped[str | None] = mapped_column(ForeignKey("group.group_id", ondelete="CASCADE"), index=True, nullable=True)
251
321
  content: Mapped[str] = mapped_column(Text)
322
+ embedding: Mapped[list[float] | None] = _embedding_column()
252
323
  datetime_created: Mapped[datetime] = mapped_column(default=datetime.now(timezone.utc))
253
- embedding = mapped_column(Vector(1536))
254
324
 
255
325
  document = relationship("Document", back_populates="notes")
256
326
  author = relationship("User", back_populates="notes")
@@ -279,7 +349,7 @@ class Chunk(Base):
279
349
  document_id: Mapped[str | None] = mapped_column(ForeignKey("document.document_id", ondelete="CASCADE"), index=True, nullable=True)
280
350
  note_id: Mapped[str | None] = mapped_column(ForeignKey("note.note_id", ondelete="CASCADE"), index=True, nullable=True)
281
351
  chunk_type: Mapped[str] = mapped_column(String(16), default="document")
282
- embedding = mapped_column(Vector(1536))
352
+ embedding: Mapped[list[float] | None] = _embedding_column()
283
353
 
284
354
  document = relationship("Document", back_populates="chunks")
285
355
  note = relationship("Note", back_populates="chunks")
@@ -0,0 +1,87 @@
1
+ """Minimal FastAPI application serving local embedding and generation endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import os
6
+ from typing import List
7
+
8
+ from fastapi import FastAPI
9
+ from pydantic import BaseModel
10
+
11
+ app = FastAPI(title="Compair Local Model", version="0.1.0")
12
+
13
+ _DEFAULT_DIM = 384
14
+ _DIM_ENV = (
15
+ os.getenv("COMPAIR_EMBEDDING_DIM")
16
+ or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
17
+ or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
18
+ or str(_DEFAULT_DIM)
19
+ )
20
+ try:
21
+ EMBED_DIMENSION = int(_DIM_ENV)
22
+ except ValueError: # pragma: no cover - invalid configuration
23
+ EMBED_DIMENSION = _DEFAULT_DIM
24
+
25
+
26
+ def _hash_embedding(text: str, dimension: int = EMBED_DIMENSION) -> List[float]:
27
+ if not text:
28
+ text = " "
29
+ digest = hashlib.sha256(text.encode("utf-8", "ignore")).digest()
30
+ vector: List[float] = []
31
+ while len(vector) < dimension:
32
+ for byte in digest:
33
+ vector.append((byte / 255.0) * 2 - 1)
34
+ if len(vector) == dimension:
35
+ break
36
+ digest = hashlib.sha256(digest).digest()
37
+ return vector
38
+
39
+
40
+ class EmbedRequest(BaseModel):
41
+ text: str
42
+
43
+
44
+ class EmbedResponse(BaseModel):
45
+ embedding: List[float]
46
+
47
+
48
+ class GenerateRequest(BaseModel):
49
+ # Legacy format used by the CLI shim
50
+ system: str | None = None
51
+ prompt: str | None = None
52
+ verbosity: str | None = None
53
+
54
+ # Core API payload (document + references)
55
+ document: str | None = None
56
+ references: List[str] | None = None
57
+ length_instruction: str | None = None
58
+
59
+
60
+ class GenerateResponse(BaseModel):
61
+ feedback: str
62
+
63
+
64
+ @app.post("/embed", response_model=EmbedResponse)
65
+ def embed(request: EmbedRequest) -> EmbedResponse:
66
+ return EmbedResponse(embedding=_hash_embedding(request.text))
67
+
68
+
69
+ @app.post("/generate", response_model=GenerateResponse)
70
+ def generate(request: GenerateRequest) -> GenerateResponse:
71
+ # Determine the main text input (document or prompt)
72
+ text_input = request.document or request.prompt or ""
73
+ text_input = text_input.strip()
74
+
75
+ if not text_input:
76
+ return GenerateResponse(feedback="NONE")
77
+
78
+ first_sentence = text_input.split("\n", 1)[0][:200]
79
+ verbosity = request.length_instruction or request.verbosity or "brief response"
80
+ ref_snippet = ""
81
+ if request.references:
82
+ top_ref = (request.references[0] or "").strip()
83
+ if top_ref:
84
+ ref_snippet = f" Reference: {top_ref[:160]}"
85
+
86
+ feedback = f"[local-feedback] {verbosity}: {first_sentence}{ref_snippet}".strip()
87
+ return GenerateResponse(feedback=feedback or "NONE")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compair-core
3
- Version: 0.3.13
3
+ Version: 0.3.15
4
4
  Summary: Open-source foundation of the Compair collaboration platform.
5
5
  Author: RocketResearch, Inc.
6
6
  License: MIT
@@ -92,6 +92,10 @@ Key environment variables for the core edition:
92
92
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
93
93
  - `COMPAIR_SINGLE_USER_USERNAME` / `COMPAIR_SINGLE_USER_NAME` – override the email-style username and display name that are used for the auto-provisioned local user in single-user mode.
94
94
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
95
+ - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
96
+ - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
97
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
98
+ - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
95
99
 
96
100
  See `compair_core/server/settings.py` for the full settings surface.
97
101
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "compair-core"
7
- version = "0.3.13"
7
+ version = "0.3.15"
8
8
  description = "Open-source foundation of the Compair collaboration platform."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -1,79 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import requests
5
- from typing import Any
6
-
7
- from .logger import log_event
8
- from .models import Document, User
9
-
10
- try:
11
- from compair_cloud.feedback import Reviewer as CloudReviewer # type: ignore
12
- from compair_cloud.feedback import get_feedback as cloud_get_feedback # type: ignore
13
- except (ImportError, ModuleNotFoundError):
14
- CloudReviewer = None # type: ignore
15
- cloud_get_feedback = None # type: ignore
16
-
17
-
18
- class Reviewer:
19
- """Edition-aware wrapper that falls back to the local feedback endpoint."""
20
-
21
- def __init__(self) -> None:
22
- self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
23
- self._cloud_impl = None
24
- if self.edition == "cloud" and CloudReviewer is not None:
25
- self._cloud_impl = CloudReviewer()
26
- else:
27
- self.client = None
28
- self.model = os.getenv("COMPAIR_LOCAL_GENERATION_MODEL", "local-feedback")
29
- base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
30
- route = os.getenv("COMPAIR_LOCAL_GENERATION_ROUTE", "/generate")
31
- self.endpoint = f"{base_url.rstrip('/')}{route}"
32
-
33
- @property
34
- def is_cloud(self) -> bool:
35
- return self._cloud_impl is not None
36
-
37
-
38
- def _fallback_feedback(text: str, references: list[Any]) -> str:
39
- if not references:
40
- return "NONE"
41
- top_ref = references[0]
42
- snippet = getattr(top_ref, "content", "") or ""
43
- snippet = snippet.replace("\n", " ").strip()[:200]
44
- if not snippet:
45
- return "NONE"
46
- return f"Check alignment with this reference: {snippet}"
47
-
48
-
49
- def get_feedback(
50
- reviewer: Reviewer,
51
- doc: Document,
52
- text: str,
53
- references: list[Any],
54
- user: User,
55
- ) -> str:
56
- if reviewer.is_cloud and cloud_get_feedback is not None:
57
- return cloud_get_feedback(reviewer._cloud_impl, doc, text, references, user) # type: ignore[arg-type]
58
-
59
- payload = {
60
- "document": text,
61
- "references": [getattr(ref, "content", "") for ref in references],
62
- "length_instruction": {
63
- "Brief": "1–2 short sentences",
64
- "Detailed": "A couple short paragraphs",
65
- "Verbose": "As thorough as reasonably possible without repeating information",
66
- }.get(user.preferred_feedback_length, "1–2 short sentences"),
67
- }
68
-
69
- try:
70
- response = requests.post(reviewer.endpoint, json=payload, timeout=30)
71
- response.raise_for_status()
72
- data = response.json()
73
- feedback = data.get("feedback")
74
- if feedback:
75
- return feedback
76
- except Exception as exc: # pragma: no cover - network failures stay graceful
77
- log_event("local_feedback_failed", error=str(exc))
78
-
79
- return _fallback_feedback(text, references)
@@ -1,62 +0,0 @@
1
- """Minimal FastAPI application serving local embedding and generation endpoints."""
2
- from __future__ import annotations
3
-
4
- import hashlib
5
- from typing import List
6
-
7
- from fastapi import FastAPI
8
- from pydantic import BaseModel
9
-
10
- app = FastAPI(title="Compair Local Model", version="0.1.0")
11
-
12
- EMBED_DIMENSION = 384
13
-
14
-
15
- def _hash_embedding(text: str, dimension: int = EMBED_DIMENSION) -> List[float]:
16
- if not text:
17
- text = " "
18
- digest = hashlib.sha256(text.encode("utf-8", "ignore")).digest()
19
- vector: List[float] = []
20
- while len(vector) < dimension:
21
- for byte in digest:
22
- vector.append((byte / 255.0) * 2 - 1)
23
- if len(vector) == dimension:
24
- break
25
- digest = hashlib.sha256(digest).digest()
26
- return vector
27
-
28
-
29
- class EmbedRequest(BaseModel):
30
- text: str
31
-
32
-
33
- class EmbedResponse(BaseModel):
34
- embedding: List[float]
35
-
36
-
37
- class GenerateRequest(BaseModel):
38
- system: str | None = None
39
- prompt: str
40
- verbosity: str | None = None
41
-
42
-
43
- class GenerateResponse(BaseModel):
44
- text: str
45
-
46
-
47
- @app.post("/embed", response_model=EmbedResponse)
48
- def embed(request: EmbedRequest) -> EmbedResponse:
49
- return EmbedResponse(embedding=_hash_embedding(request.text))
50
-
51
-
52
- @app.post("/generate", response_model=GenerateResponse)
53
- def generate(request: GenerateRequest) -> GenerateResponse:
54
- prompt = request.prompt.strip()
55
- if not prompt:
56
- return GenerateResponse(text="NONE")
57
-
58
- first_sentence = prompt.split("\n", 1)[0][:200]
59
- verbosity = request.verbosity or "default"
60
- return GenerateResponse(
61
- text=f"[local-{verbosity}] Key takeaway: {first_sentence}"
62
- )
File without changes
File without changes