compair-core 0.3.15__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of compair-core might be problematic. Click here for more details.

Files changed (45) hide show
  1. {compair_core-0.3.15 → compair_core-0.4.1}/PKG-INFO +6 -3
  2. {compair_core-0.3.15 → compair_core-0.4.1}/README.md +5 -2
  3. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/api.py +7 -0
  4. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/__init__.py +35 -10
  5. compair_core-0.4.1/compair_core/compair/feedback.py +313 -0
  6. compair_core-0.4.1/compair_core/server/local_model/ocr.py +44 -0
  7. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/routers/capabilities.py +4 -0
  8. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core.egg-info/PKG-INFO +6 -3
  9. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core.egg-info/SOURCES.txt +1 -0
  10. {compair_core-0.3.15 → compair_core-0.4.1}/pyproject.toml +1 -1
  11. compair_core-0.3.15/compair_core/compair/feedback.py +0 -204
  12. {compair_core-0.3.15 → compair_core-0.4.1}/LICENSE +0 -0
  13. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/__init__.py +0 -0
  14. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/celery_app.py +0 -0
  15. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/default_groups.py +0 -0
  16. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/embeddings.py +0 -0
  17. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/logger.py +0 -0
  18. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/main.py +0 -0
  19. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/models.py +0 -0
  20. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/schema.py +0 -0
  21. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/tasks.py +0 -0
  22. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair/utils.py +0 -0
  23. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair_email/__init__.py +0 -0
  24. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair_email/email.py +0 -0
  25. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair_email/email_core.py +0 -0
  26. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair_email/templates.py +0 -0
  27. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/compair_email/templates_core.py +0 -0
  28. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/__init__.py +0 -0
  29. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/app.py +0 -0
  30. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/deps.py +0 -0
  31. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/local_model/__init__.py +0 -0
  32. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/local_model/app.py +0 -0
  33. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/__init__.py +0 -0
  34. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/console_mailer.py +0 -0
  35. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/contracts.py +0 -0
  36. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/local_storage.py +0 -0
  37. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/noop_analytics.py +0 -0
  38. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/noop_billing.py +0 -0
  39. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/providers/noop_ocr.py +0 -0
  40. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/routers/__init__.py +0 -0
  41. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core/server/settings.py +0 -0
  42. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core.egg-info/dependency_links.txt +0 -0
  43. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core.egg-info/requires.txt +0 -0
  44. {compair_core-0.3.15 → compair_core-0.4.1}/compair_core.egg-info/top_level.txt +0 -0
  45. {compair_core-0.3.15 → compair_core-0.4.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compair-core
3
- Version: 0.3.15
3
+ Version: 0.4.1
4
4
  Summary: Open-source foundation of the Compair collaboration platform.
5
5
  Author: RocketResearch, Inc.
6
6
  License: MIT
@@ -86,7 +86,8 @@ Container definitions and build pipelines live outside this public package:
86
86
  Key environment variables for the core edition:
87
87
 
88
88
  - `COMPAIR_EDITION` (`core`) – corresponds to this core local implementation.
89
- - `COMPAIR_SQLITE_DIR` / `COMPAIR_SQLITE_NAME` override the default local SQLite path (falls back to `./compair_data` if `/data` is not writable).
89
+ - `COMPAIR_DATABASE_URL` optional explicit SQLAlchemy URL (e.g. `postgresql+psycopg2://user:pass@host/db`). When omitted, Compair falls back to a local SQLite file.
90
+ - `COMPAIR_DB_DIR` / `COMPAIR_DB_NAME` – directory and filename for the bundled SQLite database (default: `~/.compair-core/data/compair.db`). Legacy `COMPAIR_SQLITE_*` variables remain supported.
90
91
  - `COMPAIR_LOCAL_MODEL_URL` – endpoint for your local embeddings/feedback service (defaults to `http://local-model:9000`).
91
92
  - `COMPAIR_EMAIL_BACKEND` – the core mailer logs emails to stdout; cloud overrides this with transactional delivery.
92
93
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
@@ -94,8 +95,10 @@ Key environment variables for the core edition:
94
95
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
95
96
  - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
96
97
  - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
97
- - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
98
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), `http` (POST the request to a custom endpoint), or `fallback` (skip generation and surface similar references only).
98
99
  - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
100
+ - `COMPAIR_GENERATION_ENDPOINT` – HTTP endpoint invoked when `COMPAIR_GENERATION_PROVIDER=http`; the service receives a JSON payload (`document`, `references`, `length_instruction`) and should return `{"feedback": ...}`.
101
+ - `COMPAIR_OCR_ENDPOINT` – endpoint the backend calls for OCR uploads (defaults to the bundled Tesseract wrapper at `http://local-ocr:9001/ocr-file`). Provide your own service by overriding this URL.
99
102
 
100
103
  See `compair_core/server/settings.py` for the full settings surface.
101
104
 
@@ -51,7 +51,8 @@ Container definitions and build pipelines live outside this public package:
51
51
  Key environment variables for the core edition:
52
52
 
53
53
  - `COMPAIR_EDITION` (`core`) – corresponds to this core local implementation.
54
- - `COMPAIR_SQLITE_DIR` / `COMPAIR_SQLITE_NAME` override the default local SQLite path (falls back to `./compair_data` if `/data` is not writable).
54
+ - `COMPAIR_DATABASE_URL` optional explicit SQLAlchemy URL (e.g. `postgresql+psycopg2://user:pass@host/db`). When omitted, Compair falls back to a local SQLite file.
55
+ - `COMPAIR_DB_DIR` / `COMPAIR_DB_NAME` – directory and filename for the bundled SQLite database (default: `~/.compair-core/data/compair.db`). Legacy `COMPAIR_SQLITE_*` variables remain supported.
55
56
  - `COMPAIR_LOCAL_MODEL_URL` – endpoint for your local embeddings/feedback service (defaults to `http://local-model:9000`).
56
57
  - `COMPAIR_EMAIL_BACKEND` – the core mailer logs emails to stdout; cloud overrides this with transactional delivery.
57
58
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
@@ -59,8 +60,10 @@ Key environment variables for the core edition:
59
60
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
60
61
  - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
61
62
  - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
62
- - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
63
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), `http` (POST the request to a custom endpoint), or `fallback` (skip generation and surface similar references only).
63
64
  - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
65
+ - `COMPAIR_GENERATION_ENDPOINT` – HTTP endpoint invoked when `COMPAIR_GENERATION_PROVIDER=http`; the service receives a JSON payload (`document`, `references`, `length_instruction`) and should return `{"feedback": ...}`.
66
+ - `COMPAIR_OCR_ENDPOINT` – endpoint the backend calls for OCR uploads (defaults to the bundled Tesseract wrapper at `http://local-ocr:9001/ocr-file`). Provide your own service by overriding this URL.
64
67
 
65
68
  See `compair_core/server/settings.py` for the full settings surface.
66
69
 
@@ -2370,6 +2370,8 @@ def get_activity_feed(
2370
2370
  ):
2371
2371
  """Retrieve recent activities for a user's groups."""
2372
2372
  require_feature(HAS_ACTIVITY, "Activity feed")
2373
+ if not IS_CLOUD:
2374
+ raise HTTPException(status_code=501, detail="Activity feed is only available in the Compair Cloud edition.")
2373
2375
  with compair.Session() as session:
2374
2376
  # Get user's groups
2375
2377
 
@@ -3514,7 +3516,11 @@ CORE_PATHS: set[str] = {
3514
3516
  "/load_documents",
3515
3517
  "/load_document",
3516
3518
  "/load_document_by_id",
3519
+ "/load_user_files",
3517
3520
  "/create_doc",
3521
+ "/update_doc",
3522
+ "/delete_doc",
3523
+ "/delete_docs",
3518
3524
  "/process_doc",
3519
3525
  "/status/{task_id}",
3520
3526
  "/upload/ocr-file",
@@ -3523,6 +3529,7 @@ CORE_PATHS: set[str] = {
3523
3529
  "/load_references",
3524
3530
  "/load_feedback",
3525
3531
  "/documents/{document_id}/feedback",
3532
+ "/get_activity_feed",
3526
3533
  }
3527
3534
 
3528
3535
  for route in router.routes:
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ from pathlib import Path
4
5
  from sqlalchemy import Engine, create_engine
5
6
  from sqlalchemy.orm import sessionmaker
6
7
 
@@ -37,27 +38,51 @@ if edition == "cloud":
37
38
 
38
39
 
39
40
  def _handle_engine() -> Engine:
41
+ # Preferred configuration: explicit database URL
42
+ explicit_url = (
43
+ os.getenv("COMPAIR_DATABASE_URL")
44
+ or os.getenv("COMPAIR_DB_URL")
45
+ or os.getenv("DATABASE_URL")
46
+ )
47
+ if explicit_url:
48
+ if explicit_url.startswith("sqlite:"):
49
+ return create_engine(explicit_url, connect_args={"check_same_thread": False})
50
+ return create_engine(explicit_url)
51
+
52
+ # Backwards compatibility with legacy Postgres env variables
40
53
  db = os.getenv("DB")
41
54
  db_user = os.getenv("DB_USER")
42
55
  db_passw = os.getenv("DB_PASSW")
43
- db_url = os.getenv("DB_URL")
56
+ db_host = os.getenv("DB_URL")
44
57
 
45
- if all([db, db_user, db_passw, db_url]):
58
+ if all([db, db_user, db_passw, db_host]):
46
59
  return create_engine(
47
- f"postgresql+psycopg2://{db_user}:{db_passw}@{db_url}/{db}",
60
+ f"postgresql+psycopg2://{db_user}:{db_passw}@{db_host}/{db}",
48
61
  pool_size=10,
49
62
  max_overflow=0,
50
63
  )
51
64
 
52
- sqlite_dir = os.getenv("COMPAIR_SQLITE_DIR", "/data")
65
+ # Local default: place an SQLite database inside COMPAIR_DB_DIR
66
+ db_dir = (
67
+ os.getenv("COMPAIR_DB_DIR")
68
+ or os.getenv("COMPAIR_SQLITE_DIR")
69
+ or os.path.join(Path.home(), ".compair-core", "data")
70
+ )
71
+ db_name = os.getenv("COMPAIR_DB_NAME") or os.getenv("COMPAIR_SQLITE_NAME") or "compair.db"
72
+
73
+ db_path = Path(db_dir).expanduser()
53
74
  try:
54
- os.makedirs(sqlite_dir, exist_ok=True)
75
+ db_path.mkdir(parents=True, exist_ok=True)
55
76
  except OSError:
56
- fallback_dir = os.path.join(os.getcwd(), "compair_data")
57
- os.makedirs(fallback_dir, exist_ok=True)
58
- sqlite_dir = fallback_dir
59
- sqlite_path = os.path.join(sqlite_dir, os.getenv("COMPAIR_SQLITE_NAME", "compair.db"))
60
- return create_engine(f"sqlite:///{sqlite_path}", connect_args={"check_same_thread": False})
77
+ fallback_dir = Path(os.getcwd()) / "compair_data"
78
+ fallback_dir.mkdir(parents=True, exist_ok=True)
79
+ db_path = fallback_dir
80
+
81
+ sqlite_path = db_path / db_name
82
+ return create_engine(
83
+ f"sqlite:///{sqlite_path}",
84
+ connect_args={"check_same_thread": False},
85
+ )
61
86
 
62
87
 
63
88
  def initialize_database() -> None:
@@ -0,0 +1,313 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any, Iterable, List
5
+
6
+ import requests
7
+
8
+ from .logger import log_event
9
+ from .models import Document, User
10
+
11
+ try:
12
+ import openai # type: ignore
13
+ except ImportError: # pragma: no cover - optional dependency
14
+ openai = None # type: ignore
15
+
16
+ try:
17
+ from compair_cloud.feedback import Reviewer as CloudReviewer # type: ignore
18
+ from compair_cloud.feedback import get_feedback as cloud_get_feedback # type: ignore
19
+ except (ImportError, ModuleNotFoundError):
20
+ CloudReviewer = None # type: ignore
21
+ cloud_get_feedback = None # type: ignore
22
+
23
+
24
+ class Reviewer:
25
+ """Edition-aware wrapper that selects a feedback provider based on configuration."""
26
+
27
+ def __init__(self) -> None:
28
+ self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
29
+ self.provider = os.getenv("COMPAIR_GENERATION_PROVIDER", "local").lower()
30
+ self.length_map = {
31
+ "Brief": "1–2 short sentences",
32
+ "Detailed": "A couple short paragraphs",
33
+ "Verbose": "As thorough as reasonably possible without repeating information",
34
+ }
35
+
36
+ self._cloud_impl = None
37
+ self._openai_client = None
38
+ self.openai_model = os.getenv("COMPAIR_OPENAI_MODEL", "gpt-5-nano")
39
+ self.custom_endpoint = os.getenv("COMPAIR_GENERATION_ENDPOINT")
40
+
41
+ if self.edition == "cloud" and CloudReviewer is not None:
42
+ self._cloud_impl = CloudReviewer()
43
+ self.provider = "cloud"
44
+ else:
45
+ if self.provider == "openai":
46
+ api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
47
+ if api_key and openai is not None:
48
+ # Support both legacy (ChatCompletion) and new SDKs
49
+ if hasattr(openai, "api_key"):
50
+ openai.api_key = api_key # type: ignore[assignment]
51
+ if hasattr(openai, "OpenAI"):
52
+ try: # pragma: no cover - optional runtime dependency
53
+ self._openai_client = openai.OpenAI(api_key=api_key) # type: ignore[attr-defined]
54
+ except Exception: # pragma: no cover - if instantiation fails
55
+ self._openai_client = None
56
+ if self._openai_client is None and not hasattr(openai, "ChatCompletion"):
57
+ log_event("openai_feedback_unavailable", reason="openai_library_missing")
58
+ self.provider = "fallback"
59
+ if self.provider == "http" and not self.custom_endpoint:
60
+ log_event("custom_feedback_unavailable", reason="missing_endpoint")
61
+ self.provider = "fallback"
62
+ if self.provider == "local":
63
+ self.model = os.getenv("COMPAIR_LOCAL_GENERATION_MODEL", "local-feedback")
64
+ base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
65
+ route = os.getenv("COMPAIR_LOCAL_GENERATION_ROUTE", "/generate")
66
+ self.endpoint = f"{base_url.rstrip('/')}{route}"
67
+ else:
68
+ self.model = "external"
69
+ self.endpoint = None
70
+ if self.provider not in {"local", "openai", "http", "fallback"}:
71
+ log_event("feedback_provider_unknown", provider=self.provider)
72
+ self.provider = "fallback"
73
+
74
+ @property
75
+ def is_cloud(self) -> bool:
76
+ return self._cloud_impl is not None
77
+
78
+
79
+ def _reference_snippets(references: Iterable[Any], limit: int = 3) -> List[str]:
80
+ snippets: List[str] = []
81
+ for ref in references:
82
+ snippet = getattr(ref, "content", "") or ""
83
+ snippet = snippet.replace("\n", " ").strip()
84
+ if snippet:
85
+ snippets.append(snippet[:200])
86
+ if len(snippets) == limit:
87
+ break
88
+ return snippets
89
+
90
+
91
+ def _fallback_feedback(text: str, references: list[Any]) -> str:
92
+ snippets = _reference_snippets(references)
93
+ if not snippets:
94
+ return "NONE"
95
+ joined = "; ".join(snippets)
96
+ return f"Consider aligning with these reference passages: {joined}"
97
+
98
+
99
+
100
+ def _local_reference_feedback(
101
+ reviewer: Reviewer,
102
+ references: list[Any],
103
+ user: User,
104
+ ) -> str | None:
105
+ if not references:
106
+ return None
107
+ summaries: list[str] = []
108
+ for ref in references[:3]:
109
+ doc = getattr(ref, "document", None)
110
+ title = getattr(doc, "title", None) or "a related document"
111
+ snippet = getattr(ref, "content", "") or getattr(ref, "text", "")
112
+ snippet = snippet.replace("\n", " ").strip()
113
+ if not snippet:
114
+ continue
115
+ summaries.append(f'"{title}" — {snippet[:200]}')
116
+ if not summaries:
117
+ return None
118
+ instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
119
+ if len(summaries) == 1:
120
+ body = summaries[0]
121
+ else:
122
+ body = "; ".join(summaries)
123
+ return f"[local-feedback] {instruction}: Consider the guidance from {body}"
124
+
125
+
126
+ def _openai_feedback(
127
+ reviewer: Reviewer,
128
+ doc: Document,
129
+ text: str,
130
+ references: list[Any],
131
+ user: User,
132
+ ) -> str | None:
133
+ if openai is None:
134
+ return None
135
+ instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
136
+ ref_text = "\n\n".join(_reference_snippets(references, limit=3))
137
+ system_prompt = (
138
+ "You are Compair, an assistant that delivers concise, actionable feedback on a user's document. "
139
+ "Focus on clarity, cohesion, and usefulness."
140
+ )
141
+ user_prompt = (
142
+ f"Document:\n{text}\n\nHelpful reference excerpts:\n{ref_text or 'None provided'}\n\n"
143
+ f"Respond with {instruction} that highlights the most valuable revision to make next."
144
+ )
145
+
146
+ def _extract_response_text(response: Any) -> str | None:
147
+ if response is None:
148
+ return None
149
+ text_out = getattr(response, "output_text", None)
150
+ if isinstance(text_out, str) and text_out.strip():
151
+ return text_out.strip()
152
+ outputs = getattr(response, "output", None) or getattr(response, "outputs", None)
153
+ pieces: list[str] = []
154
+ if outputs:
155
+ for item in outputs:
156
+ content_field = None
157
+ if isinstance(item, dict):
158
+ content_field = item.get("content")
159
+ else:
160
+ content_field = getattr(item, "content", None)
161
+ if not content_field:
162
+ continue
163
+ for part in content_field:
164
+ if isinstance(part, dict):
165
+ val = part.get("text") or part.get("output_text")
166
+ if val:
167
+ pieces.append(str(val))
168
+ elif part:
169
+ pieces.append(str(part))
170
+ if pieces:
171
+ merged = "\n".join(pieces).strip()
172
+ return merged or None
173
+ return None
174
+
175
+ try:
176
+ client = reviewer._openai_client
177
+ if client is None and hasattr(openai, "OpenAI"):
178
+ api_key = os.getenv("COMPAIR_OPENAI_API_KEY") or None
179
+ try: # pragma: no cover - optional dependency differences
180
+ client = openai.OpenAI(api_key=api_key) if api_key else openai.OpenAI()
181
+ except TypeError:
182
+ client = openai.OpenAI()
183
+ reviewer._openai_client = client
184
+
185
+ content: str | None = None
186
+ if client is not None and hasattr(client, "responses"):
187
+ response = client.responses.create(
188
+ model=reviewer.openai_model,
189
+ instructions=system_prompt,
190
+ input=user_prompt,
191
+ max_output_tokens=256,
192
+ store=False,
193
+ )
194
+ content = _extract_response_text(response)
195
+ elif client is not None and hasattr(client, "chat") and hasattr(client.chat, "completions"):
196
+ response = client.chat.completions.create(
197
+ model=reviewer.openai_model,
198
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
199
+ temperature=0.3,
200
+ max_tokens=256,
201
+ )
202
+ choices = getattr(response, "choices", None) or []
203
+ if choices:
204
+ message = getattr(choices[0], "message", None)
205
+ if message is not None:
206
+ content = getattr(message, "content", None)
207
+ if not content:
208
+ content = getattr(choices[0], "text", None)
209
+ if isinstance(content, str):
210
+ content = content.strip()
211
+ elif hasattr(openai, "ChatCompletion"):
212
+ chat_response = openai.ChatCompletion.create( # type: ignore[attr-defined]
213
+ model=reviewer.openai_model,
214
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
215
+ temperature=0.3,
216
+ max_tokens=256,
217
+ )
218
+ content = chat_response["choices"][0]["message"]["content"].strip() # type: ignore[index, assignment]
219
+ if content:
220
+ return content.strip()
221
+ except Exception as exc: # pragma: no cover - network/API failure
222
+ log_event("openai_feedback_failed", error=str(exc))
223
+ return None
224
+
225
+
226
+ def _local_feedback(
227
+ reviewer: Reviewer,
228
+ text: str,
229
+ references: list[Any],
230
+ user: User,
231
+ ) -> str | None:
232
+ payload = {
233
+ "document": text,
234
+ "references": [getattr(ref, "content", "") for ref in references],
235
+ "length_instruction": reviewer.length_map.get(
236
+ user.preferred_feedback_length,
237
+ "1–2 short sentences",
238
+ ),
239
+ }
240
+
241
+ try:
242
+ response = requests.post(reviewer.endpoint, json=payload, timeout=30)
243
+ response.raise_for_status()
244
+ data = response.json()
245
+ feedback = data.get("feedback") or data.get("text")
246
+ if feedback:
247
+ return str(feedback).strip()
248
+ except Exception as exc: # pragma: no cover - network failures stay graceful
249
+ log_event("local_feedback_failed", error=str(exc))
250
+
251
+ return None
252
+
253
+
254
+ def _http_feedback(
255
+ reviewer: Reviewer,
256
+ text: str,
257
+ references: list[Any],
258
+ user: User,
259
+ ) -> str | None:
260
+ if not reviewer.custom_endpoint:
261
+ return None
262
+ payload = {
263
+ "document": text,
264
+ "references": [getattr(ref, "content", "") for ref in references],
265
+ "length_instruction": reviewer.length_map.get(
266
+ user.preferred_feedback_length,
267
+ "1–2 short sentences",
268
+ ),
269
+ }
270
+ try:
271
+ response = requests.post(reviewer.custom_endpoint, json=payload, timeout=30)
272
+ response.raise_for_status()
273
+ data = response.json()
274
+ feedback = data.get("feedback") or data.get("text")
275
+ if isinstance(feedback, str):
276
+ feedback = feedback.strip()
277
+ if feedback:
278
+ return feedback
279
+ except Exception as exc: # pragma: no cover - network failures stay graceful
280
+ log_event("custom_feedback_failed", error=str(exc))
281
+ return None
282
+
283
+
284
+ def get_feedback(
285
+ reviewer: Reviewer,
286
+ doc: Document,
287
+ text: str,
288
+ references: list[Any],
289
+ user: User,
290
+ ) -> str:
291
+ if reviewer.is_cloud and cloud_get_feedback is not None:
292
+ return cloud_get_feedback(reviewer._cloud_impl, doc, text, references, user) # type: ignore[arg-type]
293
+
294
+ if reviewer.provider == "openai":
295
+ feedback = _openai_feedback(reviewer, doc, text, references, user)
296
+ if feedback:
297
+ return feedback
298
+
299
+ if reviewer.provider == "http":
300
+ feedback = _http_feedback(reviewer, text, references, user)
301
+ if feedback:
302
+ return feedback
303
+
304
+ if reviewer.provider == "local":
305
+ feedback = _local_reference_feedback(reviewer, references, user)
306
+ if feedback:
307
+ return feedback
308
+ if getattr(reviewer, "endpoint", None):
309
+ feedback = _local_feedback(reviewer, text, references, user)
310
+ if feedback:
311
+ return feedback
312
+
313
+ return _fallback_feedback(text, references)
@@ -0,0 +1,44 @@
1
+ """Minimal OCR endpoint leveraging pytesseract when available."""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import os
6
+ from typing import Any, Dict
7
+
8
+ from fastapi import FastAPI, File, HTTPException, UploadFile
9
+
10
+ app = FastAPI(title="Compair Local OCR", version="0.1.0")
11
+
12
+ try: # Optional dependency
13
+ import pytesseract # type: ignore
14
+ from PIL import Image # type: ignore
15
+ except ImportError: # pragma: no cover - optional
16
+ pytesseract = None # type: ignore
17
+ Image = None # type: ignore
18
+
19
+ _OCR_FALLBACK = os.getenv("COMPAIR_LOCAL_OCR_FALLBACK", "text") # text | none
20
+
21
+
22
+ def _extract_text(data: bytes) -> str:
23
+ if pytesseract is None or Image is None:
24
+ if _OCR_FALLBACK == "text":
25
+ try:
26
+ return data.decode("utf-8")
27
+ except UnicodeDecodeError:
28
+ return data.decode("latin-1", errors="ignore")
29
+ return ""
30
+ try:
31
+ image = Image.open(io.BytesIO(data))
32
+ return pytesseract.image_to_string(image)
33
+ except Exception:
34
+ return ""
35
+
36
+
37
+ @app.post("/ocr-file")
38
+ async def ocr_file(file: UploadFile = File(...)) -> Dict[str, Any]:
39
+ payload = await file.read()
40
+ text = _extract_text(payload)
41
+ if not text:
42
+ raise HTTPException(status_code=501, detail="OCR not available or failed to extract text.")
43
+ return {"extracted_text": text}
44
+
@@ -36,6 +36,10 @@ def capabilities(settings: Settings = Depends(get_settings)) -> dict[str, object
36
36
  "docs": None if edition == "core" else 100,
37
37
  "feedback_per_day": None if edition == "core" else 50,
38
38
  },
39
+ "features": {
40
+ "ocr_upload": settings.ocr_enabled,
41
+ "activity_feed": edition == "cloud",
42
+ },
39
43
  "server": "Compair Cloud" if edition == "cloud" else "Compair Core",
40
44
  "version": settings.version,
41
45
  "legacy_routes": settings.include_legacy_routes,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compair-core
3
- Version: 0.3.15
3
+ Version: 0.4.1
4
4
  Summary: Open-source foundation of the Compair collaboration platform.
5
5
  Author: RocketResearch, Inc.
6
6
  License: MIT
@@ -86,7 +86,8 @@ Container definitions and build pipelines live outside this public package:
86
86
  Key environment variables for the core edition:
87
87
 
88
88
  - `COMPAIR_EDITION` (`core`) – corresponds to this core local implementation.
89
- - `COMPAIR_SQLITE_DIR` / `COMPAIR_SQLITE_NAME` override the default local SQLite path (falls back to `./compair_data` if `/data` is not writable).
89
+ - `COMPAIR_DATABASE_URL` optional explicit SQLAlchemy URL (e.g. `postgresql+psycopg2://user:pass@host/db`). When omitted, Compair falls back to a local SQLite file.
90
+ - `COMPAIR_DB_DIR` / `COMPAIR_DB_NAME` – directory and filename for the bundled SQLite database (default: `~/.compair-core/data/compair.db`). Legacy `COMPAIR_SQLITE_*` variables remain supported.
90
91
  - `COMPAIR_LOCAL_MODEL_URL` – endpoint for your local embeddings/feedback service (defaults to `http://local-model:9000`).
91
92
  - `COMPAIR_EMAIL_BACKEND` – the core mailer logs emails to stdout; cloud overrides this with transactional delivery.
92
93
  - `COMPAIR_REQUIRE_AUTHENTICATION` (`true`) – set to `false` to run the API in single-user mode without login or account management. When disabled, Compair auto-provisions a local user, group, and long-lived session token so you can upload documents immediately.
@@ -94,8 +95,10 @@ Key environment variables for the core edition:
94
95
  - `COMPAIR_INCLUDE_LEGACY_ROUTES` (`false`) – opt-in to the full legacy API surface (used by the hosted product) when running the core edition. Leave unset to expose only the streamlined single-user endpoints in Swagger.
95
96
  - `COMPAIR_EMBEDDING_DIM` – force the embedding vector size stored in the database (defaults to 384 for core, 1536 for cloud). Keep this in sync with whichever embedding model you configure.
96
97
  - `COMPAIR_VECTOR_BACKEND` (`auto`) – set to `pgvector` when running against PostgreSQL with the pgvector extension, or `json` to store embeddings as JSON (the default for SQLite deployments).
97
- - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), or `fallback` (skip generation and surface similar references only).
98
+ - `COMPAIR_GENERATION_PROVIDER` (`local`) – choose how feedback is produced. Options: `local` (call the bundled FastAPI service), `openai` (use ChatGPT-compatible APIs with an API key), `http` (POST the request to a custom endpoint), or `fallback` (skip generation and surface similar references only).
98
99
  - `COMPAIR_OPENAI_API_KEY` / `COMPAIR_OPENAI_MODEL` – when using the OpenAI provider, supply your API key and optional model name (defaults to `gpt-4o-mini`). The fallback kicks in automatically if the key or SDK is unavailable.
100
+ - `COMPAIR_GENERATION_ENDPOINT` – HTTP endpoint invoked when `COMPAIR_GENERATION_PROVIDER=http`; the service receives a JSON payload (`document`, `references`, `length_instruction`) and should return `{"feedback": ...}`.
101
+ - `COMPAIR_OCR_ENDPOINT` – endpoint the backend calls for OCR uploads (defaults to the bundled Tesseract wrapper at `http://local-ocr:9001/ocr-file`). Provide your own service by overriding this URL.
99
102
 
100
103
  See `compair_core/server/settings.py` for the full settings surface.
101
104
 
@@ -30,6 +30,7 @@ compair_core/server/deps.py
30
30
  compair_core/server/settings.py
31
31
  compair_core/server/local_model/__init__.py
32
32
  compair_core/server/local_model/app.py
33
+ compair_core/server/local_model/ocr.py
33
34
  compair_core/server/providers/__init__.py
34
35
  compair_core/server/providers/console_mailer.py
35
36
  compair_core/server/providers/contracts.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "compair-core"
7
- version = "0.3.15"
7
+ version = "0.4.1"
8
8
  description = "Open-source foundation of the Compair collaboration platform."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -1,204 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from typing import Any, Iterable, List
5
-
6
- import requests
7
-
8
- from .logger import log_event
9
- from .models import Document, User
10
-
11
- try:
12
- import openai # type: ignore
13
- except ImportError: # pragma: no cover - optional dependency
14
- openai = None # type: ignore
15
-
16
- try:
17
- from compair_cloud.feedback import Reviewer as CloudReviewer # type: ignore
18
- from compair_cloud.feedback import get_feedback as cloud_get_feedback # type: ignore
19
- except (ImportError, ModuleNotFoundError):
20
- CloudReviewer = None # type: ignore
21
- cloud_get_feedback = None # type: ignore
22
-
23
-
24
- class Reviewer:
25
- """Edition-aware wrapper that selects a feedback provider based on configuration."""
26
-
27
- def __init__(self) -> None:
28
- self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
29
- self.provider = os.getenv("COMPAIR_GENERATION_PROVIDER", "local").lower()
30
- self.length_map = {
31
- "Brief": "1–2 short sentences",
32
- "Detailed": "A couple short paragraphs",
33
- "Verbose": "As thorough as reasonably possible without repeating information",
34
- }
35
-
36
- self._cloud_impl = None
37
- self._openai_client = None
38
- self.openai_model = os.getenv("COMPAIR_OPENAI_MODEL", "gpt-4o-mini")
39
-
40
- if self.edition == "cloud" and CloudReviewer is not None:
41
- self._cloud_impl = CloudReviewer()
42
- self.provider = "cloud"
43
- else:
44
- if self.provider == "openai":
45
- api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
46
- if api_key and openai is not None:
47
- # Support both legacy (ChatCompletion) and new SDKs
48
- if hasattr(openai, "api_key"):
49
- openai.api_key = api_key # type: ignore[assignment]
50
- if hasattr(openai, "OpenAI"):
51
- try: # pragma: no cover - optional runtime dependency
52
- self._openai_client = openai.OpenAI(api_key=api_key) # type: ignore[attr-defined]
53
- except Exception: # pragma: no cover - if instantiation fails
54
- self._openai_client = None
55
- if self._openai_client is None and not hasattr(openai, "ChatCompletion"):
56
- log_event("openai_feedback_unavailable", reason="openai_library_missing")
57
- self.provider = "fallback"
58
- if self.provider == "local":
59
- self.model = os.getenv("COMPAIR_LOCAL_GENERATION_MODEL", "local-feedback")
60
- base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://local-model:9000")
61
- route = os.getenv("COMPAIR_LOCAL_GENERATION_ROUTE", "/generate")
62
- self.endpoint = f"{base_url.rstrip('/')}{route}"
63
- else:
64
- self.model = "external"
65
- self.endpoint = None
66
-
67
- @property
68
- def is_cloud(self) -> bool:
69
- return self._cloud_impl is not None
70
-
71
-
72
- def _reference_snippets(references: Iterable[Any], limit: int = 3) -> List[str]:
73
- snippets: List[str] = []
74
- for ref in references:
75
- snippet = getattr(ref, "content", "") or ""
76
- snippet = snippet.replace("\n", " ").strip()
77
- if snippet:
78
- snippets.append(snippet[:200])
79
- if len(snippets) == limit:
80
- break
81
- return snippets
82
-
83
-
84
- def _fallback_feedback(text: str, references: list[Any]) -> str:
85
- snippets = _reference_snippets(references)
86
- if not snippets:
87
- return "NONE"
88
- joined = "; ".join(snippets)
89
- return f"Consider aligning with these reference passages: {joined}"
90
-
91
-
92
- def _openai_feedback(
93
- reviewer: Reviewer,
94
- doc: Document,
95
- text: str,
96
- references: list[Any],
97
- user: User,
98
- ) -> str | None:
99
- if openai is None:
100
- return None
101
- instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
102
- ref_text = "\n\n".join(_reference_snippets(references, limit=3))
103
- messages = [
104
- {
105
- "role": "system",
106
- "content": (
107
- "You are Compair, an assistant that delivers concise, actionable feedback on a user's document. "
108
- "Focus on clarity, cohesion, and usefulness."
109
- ),
110
- },
111
- {
112
- "role": "user",
113
- "content": (
114
- f"Document:\n{text}\n\nHelpful reference excerpts:\n{ref_text or 'None provided'}\n\n"
115
- f"Respond with {instruction} that highlights the most valuable revision to make next."
116
- ),
117
- },
118
- ]
119
-
120
- try:
121
- if reviewer._openai_client is not None and hasattr(reviewer._openai_client, "responses"):
122
- response = reviewer._openai_client.responses.create( # type: ignore[union-attr]
123
- model=reviewer.openai_model,
124
- input=messages,
125
- max_output_tokens=256,
126
- )
127
- content = getattr(response, "output_text", None)
128
- if not content and hasattr(response, "outputs"):
129
- # Legacy compatibility: join content parts
130
- parts = []
131
- for item in getattr(response, "outputs", []):
132
- parts.extend(getattr(item, "content", []))
133
- content = " ".join(getattr(part, "text", "") for part in parts)
134
- elif hasattr(openai, "ChatCompletion"):
135
- chat_response = openai.ChatCompletion.create( # type: ignore[attr-defined]
136
- model=reviewer.openai_model,
137
- messages=messages,
138
- temperature=0.3,
139
- max_tokens=256,
140
- )
141
- content = (
142
- chat_response["choices"][0]["message"]["content"].strip() # type: ignore[index, assignment]
143
- )
144
- else:
145
- content = None
146
- except Exception as exc: # pragma: no cover - network/API failure
147
- log_event("openai_feedback_failed", error=str(exc))
148
- content = None
149
- if content:
150
- content = content.strip()
151
- if content:
152
- return content
153
- return None
154
-
155
-
156
- def _local_feedback(
157
- reviewer: Reviewer,
158
- text: str,
159
- references: list[Any],
160
- user: User,
161
- ) -> str | None:
162
- payload = {
163
- "document": text,
164
- "references": [getattr(ref, "content", "") for ref in references],
165
- "length_instruction": reviewer.length_map.get(
166
- user.preferred_feedback_length,
167
- "1–2 short sentences",
168
- ),
169
- }
170
-
171
- try:
172
- response = requests.post(reviewer.endpoint, json=payload, timeout=30)
173
- response.raise_for_status()
174
- data = response.json()
175
- feedback = data.get("feedback") or data.get("text")
176
- if feedback:
177
- return str(feedback).strip()
178
- except Exception as exc: # pragma: no cover - network failures stay graceful
179
- log_event("local_feedback_failed", error=str(exc))
180
-
181
- return None
182
-
183
-
184
- def get_feedback(
185
- reviewer: Reviewer,
186
- doc: Document,
187
- text: str,
188
- references: list[Any],
189
- user: User,
190
- ) -> str:
191
- if reviewer.is_cloud and cloud_get_feedback is not None:
192
- return cloud_get_feedback(reviewer._cloud_impl, doc, text, references, user) # type: ignore[arg-type]
193
-
194
- if reviewer.provider == "openai":
195
- feedback = _openai_feedback(reviewer, doc, text, references, user)
196
- if feedback:
197
- return feedback
198
-
199
- if reviewer.provider == "local" and getattr(reviewer, "endpoint", None):
200
- feedback = _local_feedback(reviewer, text, references, user)
201
- if feedback:
202
- return feedback
203
-
204
- return _fallback_feedback(text, references)
File without changes
File without changes