compair-core 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compair_core/__init__.py +8 -0
- compair_core/api.py +3598 -0
- compair_core/compair/__init__.py +57 -0
- compair_core/compair/celery_app.py +31 -0
- compair_core/compair/default_groups.py +14 -0
- compair_core/compair/embeddings.py +141 -0
- compair_core/compair/feedback.py +368 -0
- compair_core/compair/logger.py +29 -0
- compair_core/compair/main.py +276 -0
- compair_core/compair/models.py +453 -0
- compair_core/compair/schema.py +146 -0
- compair_core/compair/tasks.py +106 -0
- compair_core/compair/utils.py +42 -0
- compair_core/compair_email/__init__.py +0 -0
- compair_core/compair_email/email.py +6 -0
- compair_core/compair_email/email_core.py +15 -0
- compair_core/compair_email/templates.py +6 -0
- compair_core/compair_email/templates_core.py +32 -0
- compair_core/db.py +64 -0
- compair_core/server/__init__.py +0 -0
- compair_core/server/app.py +97 -0
- compair_core/server/deps.py +77 -0
- compair_core/server/local_model/__init__.py +1 -0
- compair_core/server/local_model/app.py +87 -0
- compair_core/server/local_model/ocr.py +107 -0
- compair_core/server/providers/__init__.py +0 -0
- compair_core/server/providers/console_mailer.py +9 -0
- compair_core/server/providers/contracts.py +66 -0
- compair_core/server/providers/http_ocr.py +60 -0
- compair_core/server/providers/local_storage.py +28 -0
- compair_core/server/providers/noop_analytics.py +7 -0
- compair_core/server/providers/noop_billing.py +30 -0
- compair_core/server/providers/noop_ocr.py +10 -0
- compair_core/server/routers/__init__.py +0 -0
- compair_core/server/routers/capabilities.py +46 -0
- compair_core/server/settings.py +66 -0
- compair_core-0.4.12.dist-info/METADATA +136 -0
- compair_core-0.4.12.dist-info/RECORD +41 -0
- compair_core-0.4.12.dist-info/WHEEL +5 -0
- compair_core-0.4.12.dist-info/licenses/LICENSE +674 -0
- compair_core-0.4.12.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from . import embeddings, feedback, logger, main, models, tasks, utils
|
|
7
|
+
from compair_core.db import SessionLocal as Session
|
|
8
|
+
from compair_core.db import engine
|
|
9
|
+
from .default_groups import initialize_default_groups
|
|
10
|
+
|
|
11
|
+
edition = os.getenv("COMPAIR_EDITION", "core").lower()
|
|
12
|
+
|
|
13
|
+
initialize_database_override = None
|
|
14
|
+
|
|
15
|
+
if edition == "cloud":
|
|
16
|
+
try: # Import cloud overrides if the private package is installed
|
|
17
|
+
from compair_cloud import ( # type: ignore
|
|
18
|
+
bootstrap as cloud_bootstrap,
|
|
19
|
+
embeddings as cloud_embeddings,
|
|
20
|
+
feedback as cloud_feedback,
|
|
21
|
+
logger as cloud_logger,
|
|
22
|
+
main as cloud_main,
|
|
23
|
+
models as cloud_models,
|
|
24
|
+
tasks as cloud_tasks,
|
|
25
|
+
utils as cloud_utils,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
embeddings = cloud_embeddings
|
|
29
|
+
feedback = cloud_feedback
|
|
30
|
+
logger = cloud_logger
|
|
31
|
+
main = cloud_main
|
|
32
|
+
models = cloud_models
|
|
33
|
+
tasks = cloud_tasks
|
|
34
|
+
utils = cloud_utils
|
|
35
|
+
initialize_database_override = getattr(cloud_bootstrap, "initialize_database", None)
|
|
36
|
+
except Exception as exc:
|
|
37
|
+
print(f"[compair_core] Failed to import compair_cloud: {exc}", file=sys.stderr)
|
|
38
|
+
import traceback; traceback.print_exc()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def initialize_database() -> None:
|
|
42
|
+
models.Base.metadata.create_all(engine)
|
|
43
|
+
if initialize_database_override:
|
|
44
|
+
initialize_database_override(engine)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _initialize_defaults() -> None:
|
|
48
|
+
with Session() as session:
|
|
49
|
+
initialize_default_groups(session)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
initialize_database()
|
|
53
|
+
embedder = embeddings.Embedder()
|
|
54
|
+
reviewer = feedback.Reviewer()
|
|
55
|
+
_initialize_defaults()
|
|
56
|
+
|
|
57
|
+
__all__ = ["embeddings", "feedback", "main", "models", "utils", "Session"]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from compair_cloud.celery_app import celery_app # type: ignore
|
|
10
|
+
except (ImportError, ModuleNotFoundError) as exc:
|
|
11
|
+
logger.warning(
|
|
12
|
+
"Failed to import compair_cloud.celery_app; using no-op Celery stub. (%s: %s)",
|
|
13
|
+
exc.__class__.__name__,
|
|
14
|
+
exc,
|
|
15
|
+
exc_info=exc,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
class _NoopCelery:
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
self.conf = SimpleNamespace(beat_schedule={})
|
|
21
|
+
|
|
22
|
+
def task(self, func=None, *args, **kwargs):
|
|
23
|
+
def decorator(fn):
|
|
24
|
+
return fn
|
|
25
|
+
|
|
26
|
+
return decorator(func) if func else decorator
|
|
27
|
+
|
|
28
|
+
def send_task(self, *args, **kwargs):
|
|
29
|
+
raise RuntimeError("Celery is not available in the Compair Core edition.")
|
|
30
|
+
|
|
31
|
+
celery_app = _NoopCelery()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from sqlalchemy.orm import Session
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from compair_cloud.default_groups import initialize_default_groups as cloud_initialize_default_groups # type: ignore
|
|
7
|
+
except (ImportError, ModuleNotFoundError):
|
|
8
|
+
cloud_initialize_default_groups = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def initialize_default_groups(session: Session) -> None:
|
|
12
|
+
"""Core builds do not seed any default groups by default."""
|
|
13
|
+
if cloud_initialize_default_groups:
|
|
14
|
+
cloud_initialize_default_groups(session)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from .logger import log_event
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import openai # type: ignore
|
|
11
|
+
except ImportError: # pragma: no cover - optional dependency
|
|
12
|
+
openai = None # type: ignore
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from compair_cloud.embeddings import Embedder as CloudEmbedder # type: ignore
|
|
16
|
+
from compair_cloud.embeddings import create_embedding as cloud_create_embedding # type: ignore
|
|
17
|
+
except (ImportError, ModuleNotFoundError):
|
|
18
|
+
CloudEmbedder = None
|
|
19
|
+
cloud_create_embedding = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Embedder:
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
|
|
25
|
+
self._cloud_impl = None
|
|
26
|
+
if self.edition == "cloud" and CloudEmbedder is not None:
|
|
27
|
+
self._cloud_impl = CloudEmbedder()
|
|
28
|
+
|
|
29
|
+
if self._cloud_impl is None:
|
|
30
|
+
self.provider = os.getenv("COMPAIR_EMBEDDING_PROVIDER", "local").lower()
|
|
31
|
+
self.model = os.getenv("COMPAIR_LOCAL_EMBED_MODEL", "hash-embedding")
|
|
32
|
+
default_dim = 1536 if self.edition == "cloud" else 384
|
|
33
|
+
dim_env = (
|
|
34
|
+
os.getenv("COMPAIR_EMBEDDING_DIM")
|
|
35
|
+
or os.getenv("COMPAIR_EMBEDDING_DIMENSION")
|
|
36
|
+
or os.getenv("COMPAIR_LOCAL_EMBED_DIM")
|
|
37
|
+
or str(default_dim)
|
|
38
|
+
)
|
|
39
|
+
try:
|
|
40
|
+
self.dimension = int(dim_env)
|
|
41
|
+
except ValueError: # pragma: no cover - invalid configuration
|
|
42
|
+
self.dimension = default_dim
|
|
43
|
+
base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://127.0.0.1:9000")
|
|
44
|
+
route = os.getenv("COMPAIR_LOCAL_EMBED_ROUTE", "/embed")
|
|
45
|
+
self.endpoint = f"{base_url.rstrip('/')}{route}"
|
|
46
|
+
self.openai_embed_model = os.getenv("COMPAIR_OPENAI_EMBED_MODEL", "text-embedding-3-small")
|
|
47
|
+
self._openai_client: Optional[Any] = None
|
|
48
|
+
if self.provider == "openai":
|
|
49
|
+
if openai is None:
|
|
50
|
+
log_event("openai_embedding_unavailable", reason="openai_library_missing")
|
|
51
|
+
self.provider = "local"
|
|
52
|
+
else:
|
|
53
|
+
api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
|
|
54
|
+
if hasattr(openai, "api_key") and api_key:
|
|
55
|
+
openai.api_key = api_key # type: ignore[assignment]
|
|
56
|
+
if hasattr(openai, "OpenAI"):
|
|
57
|
+
try: # pragma: no cover - optional runtime dependency
|
|
58
|
+
self._openai_client = openai.OpenAI(api_key=api_key) # type: ignore[attr-defined]
|
|
59
|
+
except Exception: # pragma: no cover - if instantiation fails
|
|
60
|
+
self._openai_client = None
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def is_cloud(self) -> bool:
|
|
64
|
+
return self._cloud_impl is not None
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _hash_embedding(text: str, dimension: int) -> List[float]:
|
|
68
|
+
"""Generate a deterministic embedding using repeated SHA-256 hashing."""
|
|
69
|
+
if not text:
|
|
70
|
+
text = " "
|
|
71
|
+
digest = hashlib.sha256(text.encode("utf-8", "ignore")).digest()
|
|
72
|
+
vector: List[float] = []
|
|
73
|
+
while len(vector) < dimension:
|
|
74
|
+
for byte in digest:
|
|
75
|
+
vector.append((byte / 255.0) * 2 - 1)
|
|
76
|
+
if len(vector) == dimension:
|
|
77
|
+
break
|
|
78
|
+
digest = hashlib.sha256(digest).digest()
|
|
79
|
+
return vector
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def create_embedding(embedder: Embedder, text: str, user=None) -> list[float]:
|
|
83
|
+
if embedder.is_cloud and cloud_create_embedding is not None:
|
|
84
|
+
return cloud_create_embedding(embedder._cloud_impl, text, user=user)
|
|
85
|
+
|
|
86
|
+
provider = getattr(embedder, "provider", "local")
|
|
87
|
+
if provider == "openai" and openai is not None:
|
|
88
|
+
vector = _openai_embedding(embedder, text)
|
|
89
|
+
if vector:
|
|
90
|
+
return vector
|
|
91
|
+
|
|
92
|
+
# Local/core path
|
|
93
|
+
endpoint = getattr(embedder, "endpoint", None)
|
|
94
|
+
if endpoint:
|
|
95
|
+
try:
|
|
96
|
+
response = requests.post(endpoint, json={"text": text}, timeout=15)
|
|
97
|
+
response.raise_for_status()
|
|
98
|
+
data = response.json()
|
|
99
|
+
embedding = data.get("embedding") or data.get("vector")
|
|
100
|
+
if embedding:
|
|
101
|
+
return embedding
|
|
102
|
+
except Exception as exc:
|
|
103
|
+
log_event("local_embedding_failed", error=str(exc))
|
|
104
|
+
|
|
105
|
+
return _hash_embedding(text, embedder.dimension)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _openai_embedding(embedder: Embedder, text: str) -> list[float] | None:
|
|
109
|
+
if openai is None:
|
|
110
|
+
return None
|
|
111
|
+
client = getattr(embedder, "_openai_client", None)
|
|
112
|
+
if client is None and hasattr(openai, "OpenAI"):
|
|
113
|
+
api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
|
|
114
|
+
try: # pragma: no cover - optional client differences
|
|
115
|
+
client = openai.OpenAI(api_key=api_key) if api_key else openai.OpenAI() # type: ignore[attr-defined]
|
|
116
|
+
except TypeError:
|
|
117
|
+
client = openai.OpenAI()
|
|
118
|
+
embedder._openai_client = client # type: ignore[attr-defined]
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
if client is not None and hasattr(client, "embeddings"):
|
|
122
|
+
response = client.embeddings.create(
|
|
123
|
+
model=embedder.openai_embed_model,
|
|
124
|
+
input=text,
|
|
125
|
+
)
|
|
126
|
+
data = getattr(response, "data", None)
|
|
127
|
+
if data:
|
|
128
|
+
vector = getattr(data[0], "embedding", None)
|
|
129
|
+
if isinstance(vector, list):
|
|
130
|
+
return vector
|
|
131
|
+
elif hasattr(openai, "Embedding"):
|
|
132
|
+
response = openai.Embedding.create( # type: ignore[attr-defined]
|
|
133
|
+
model=embedder.openai_embed_model,
|
|
134
|
+
input=text,
|
|
135
|
+
)
|
|
136
|
+
vector = response["data"][0]["embedding"] # type: ignore[index]
|
|
137
|
+
if isinstance(vector, list):
|
|
138
|
+
return vector
|
|
139
|
+
except Exception as exc: # pragma: no cover - network/API failure
|
|
140
|
+
log_event("openai_embedding_failed", error=str(exc))
|
|
141
|
+
return None
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Iterable, List
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from .logger import log_event
|
|
9
|
+
from .models import Document, User
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import openai # type: ignore
|
|
13
|
+
except ImportError: # pragma: no cover - optional dependency
|
|
14
|
+
openai = None # type: ignore
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from compair_cloud.feedback import Reviewer as CloudReviewer # type: ignore
|
|
18
|
+
from compair_cloud.feedback import get_feedback as cloud_get_feedback # type: ignore
|
|
19
|
+
except (ImportError, ModuleNotFoundError):
|
|
20
|
+
CloudReviewer = None # type: ignore
|
|
21
|
+
cloud_get_feedback = None # type: ignore
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
_REASONING_PREFIXES = ("gpt-5", "o1", "o2", "o3", "o4")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _is_reasoning_model_name(model_name: str | None) -> bool:
|
|
28
|
+
if not model_name:
|
|
29
|
+
return False
|
|
30
|
+
normalized = model_name.lower()
|
|
31
|
+
for prefix in _REASONING_PREFIXES:
|
|
32
|
+
if normalized == prefix or normalized.startswith(f"{prefix}-") or normalized.startswith(f"{prefix}."):
|
|
33
|
+
return True
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_field(source: Any, key: str) -> Any:
|
|
38
|
+
if isinstance(source, dict):
|
|
39
|
+
return source.get(key)
|
|
40
|
+
return getattr(source, key, None)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Reviewer:
|
|
44
|
+
"""Edition-aware wrapper that selects a feedback provider based on configuration."""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
self.edition = os.getenv("COMPAIR_EDITION", "core").lower()
|
|
48
|
+
self.provider = os.getenv("COMPAIR_GENERATION_PROVIDER", "local").lower()
|
|
49
|
+
self.length_map = {
|
|
50
|
+
"Brief": "1–2 short sentences",
|
|
51
|
+
"Detailed": "a couple short paragraphs",
|
|
52
|
+
"Verbose": "as thorough as reasonably possible without repeating information",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
self._cloud_impl = None
|
|
56
|
+
self._openai_client = None
|
|
57
|
+
self.openai_model = os.getenv("COMPAIR_OPENAI_MODEL", "gpt-5-nano")
|
|
58
|
+
self.openai_reasoning_effort = os.getenv("COMPAIR_OPENAI_REASONING_EFFORT", "minimal")
|
|
59
|
+
self.uses_reasoning_model = _is_reasoning_model_name(self.openai_model)
|
|
60
|
+
self.custom_endpoint = os.getenv("COMPAIR_GENERATION_ENDPOINT")
|
|
61
|
+
|
|
62
|
+
if self.edition == "cloud" and CloudReviewer is not None:
|
|
63
|
+
self._cloud_impl = CloudReviewer()
|
|
64
|
+
self.provider = "cloud"
|
|
65
|
+
else:
|
|
66
|
+
if self.provider == "openai":
|
|
67
|
+
api_key = os.getenv("COMPAIR_OPENAI_API_KEY")
|
|
68
|
+
if api_key and openai is not None:
|
|
69
|
+
# Support both legacy (ChatCompletion) and new SDKs
|
|
70
|
+
if hasattr(openai, "api_key"):
|
|
71
|
+
openai.api_key = api_key # type: ignore[assignment]
|
|
72
|
+
if hasattr(openai, "OpenAI"):
|
|
73
|
+
try: # pragma: no cover - optional runtime dependency
|
|
74
|
+
self._openai_client = openai.OpenAI(api_key=api_key) # type: ignore[attr-defined]
|
|
75
|
+
except Exception: # pragma: no cover - if instantiation fails
|
|
76
|
+
self._openai_client = None
|
|
77
|
+
if self._openai_client is None and not hasattr(openai, "ChatCompletion"):
|
|
78
|
+
log_event("openai_feedback_unavailable", reason="openai_library_missing")
|
|
79
|
+
self.provider = "fallback"
|
|
80
|
+
if self.provider == "http" and not self.custom_endpoint:
|
|
81
|
+
log_event("custom_feedback_unavailable", reason="missing_endpoint")
|
|
82
|
+
self.provider = "fallback"
|
|
83
|
+
if self.provider == "local":
|
|
84
|
+
self.model = os.getenv("COMPAIR_LOCAL_GENERATION_MODEL", "local-feedback")
|
|
85
|
+
base_url = os.getenv("COMPAIR_LOCAL_MODEL_URL", "http://127.0.0.1:9000")
|
|
86
|
+
route = os.getenv("COMPAIR_LOCAL_GENERATION_ROUTE", "/generate")
|
|
87
|
+
self.endpoint = f"{base_url.rstrip('/')}{route}"
|
|
88
|
+
else:
|
|
89
|
+
self.model = "external"
|
|
90
|
+
self.endpoint = None
|
|
91
|
+
if self.provider not in {"local", "openai", "http", "fallback"}:
|
|
92
|
+
log_event("feedback_provider_unknown", provider=self.provider)
|
|
93
|
+
self.provider = "fallback"
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def is_cloud(self) -> bool:
|
|
97
|
+
return self._cloud_impl is not None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _reference_snippets(references: Iterable[Any], limit: int = 3) -> List[str]:
|
|
101
|
+
snippets: List[str] = []
|
|
102
|
+
for ref in references:
|
|
103
|
+
snippet = getattr(ref, "content", "") or ""
|
|
104
|
+
snippet = snippet.replace("\n", " ").strip()
|
|
105
|
+
if snippet:
|
|
106
|
+
snippets.append(snippet[:200])
|
|
107
|
+
if len(snippets) == limit:
|
|
108
|
+
break
|
|
109
|
+
return snippets
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _fallback_feedback(text: str, references: list[Any]) -> str:
|
|
113
|
+
snippets = _reference_snippets(references)
|
|
114
|
+
if not snippets:
|
|
115
|
+
return "NONE"
|
|
116
|
+
joined = "; ".join(snippets)
|
|
117
|
+
return f"Consider aligning with these reference passages: {joined}"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _local_reference_feedback(
|
|
122
|
+
reviewer: Reviewer,
|
|
123
|
+
references: list[Any],
|
|
124
|
+
user: User,
|
|
125
|
+
) -> str | None:
|
|
126
|
+
if not references:
|
|
127
|
+
return None
|
|
128
|
+
summaries: list[str] = []
|
|
129
|
+
for ref in references[:3]:
|
|
130
|
+
doc = getattr(ref, "document", None)
|
|
131
|
+
title = getattr(doc, "title", None) or "a related document"
|
|
132
|
+
snippet = getattr(ref, "content", "") or getattr(ref, "text", "")
|
|
133
|
+
snippet = snippet.replace("\n", " ").strip()
|
|
134
|
+
if not snippet:
|
|
135
|
+
continue
|
|
136
|
+
summaries.append(f'"{title}" — {snippet[:200]}')
|
|
137
|
+
if not summaries:
|
|
138
|
+
return None
|
|
139
|
+
instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
|
|
140
|
+
if len(summaries) == 1:
|
|
141
|
+
body = summaries[0]
|
|
142
|
+
else:
|
|
143
|
+
body = "; ".join(summaries)
|
|
144
|
+
return f"[local-feedback] {instruction}: Consider the guidance from {body}"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _openai_feedback(
|
|
148
|
+
reviewer: Reviewer,
|
|
149
|
+
doc: Document,
|
|
150
|
+
text: str,
|
|
151
|
+
references: list[Any],
|
|
152
|
+
user: User,
|
|
153
|
+
) -> str | None:
|
|
154
|
+
if openai is None:
|
|
155
|
+
return None
|
|
156
|
+
instruction = reviewer.length_map.get(user.preferred_feedback_length, "1–2 short sentences")
|
|
157
|
+
ref_text = "\n\n".join(_reference_snippets(references, limit=3))
|
|
158
|
+
system_prompt = """# Identity
|
|
159
|
+
You are a collaborative team member on Compair, a platform designed to help teammates uncover connections, share insights, and accelerate collective learning by comparing user documents with relevant references.
|
|
160
|
+
|
|
161
|
+
# Purpose
|
|
162
|
+
Your goal is to quickly surface **meaningful** connections or useful contrasts between a user’s main document and shared references—especially details that could help the document author or other team members work more effectively together.
|
|
163
|
+
|
|
164
|
+
# Instructions
|
|
165
|
+
|
|
166
|
+
- **Connect the Dots:** Highlight unique insights, similarities, differences, or answers between the main document and its references. Prioritize information that is truly meaningful or helpful to the author or team.
|
|
167
|
+
- **Qualified Sharing:** Only point out connections that matter—avoid commenting on trivial or already-obvious overlapping details. If nothing significant stands out, respond with: **NONE**.
|
|
168
|
+
- **Relay Messages:** If user documents or notes are being used to communicate with teammates, relay any important updates or questions to help foster further discussion or action.
|
|
169
|
+
- **Be Conversational:** Respond in a friendly, direct tone—never formal or repetitive.
|
|
170
|
+
- **Be Constructive:** Focus on actionable insights, especially those that could inform or inspire team decisions, workflow improvements, or new ideas.
|
|
171
|
+
|
|
172
|
+
# Output Format
|
|
173
|
+
- If no meaningful connections or insights are present: **NONE**
|
|
174
|
+
|
|
175
|
+
# Be sure NOT to:
|
|
176
|
+
- Repeat the user’s content back without adding value.
|
|
177
|
+
- Offer generic praise or vague observations.
|
|
178
|
+
- Use overly technical or robotic language.
|
|
179
|
+
"""
|
|
180
|
+
user_prompt = (
|
|
181
|
+
f"Document:\n{text}\n\nRelevant reference excerpts:\n{ref_text or 'None provided'}\n\n"
|
|
182
|
+
f"Respond with {instruction}."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def _extract_response_text(response: Any, reasoning_mode: bool) -> str | None:
|
|
186
|
+
if response is None:
|
|
187
|
+
return None
|
|
188
|
+
text_out = _get_field(response, "output_text")
|
|
189
|
+
if isinstance(text_out, str) and text_out.strip():
|
|
190
|
+
return text_out.strip()
|
|
191
|
+
outputs = _get_field(response, "output") or _get_field(response, "outputs")
|
|
192
|
+
pieces: list[str] = []
|
|
193
|
+
if outputs:
|
|
194
|
+
for item in outputs:
|
|
195
|
+
item_type = _get_field(item, "type")
|
|
196
|
+
if reasoning_mode and item_type and item_type not in {"message", "assistant"}:
|
|
197
|
+
continue
|
|
198
|
+
content_field = _get_field(item, "content")
|
|
199
|
+
if not content_field:
|
|
200
|
+
continue
|
|
201
|
+
for part in content_field:
|
|
202
|
+
part_type = _get_field(part, "type")
|
|
203
|
+
if reasoning_mode and part_type and part_type not in {"output_text", "text"}:
|
|
204
|
+
continue
|
|
205
|
+
val = _get_field(part, "text") or _get_field(part, "output_text")
|
|
206
|
+
if val:
|
|
207
|
+
pieces.append(str(val))
|
|
208
|
+
elif part and not reasoning_mode:
|
|
209
|
+
pieces.append(str(part))
|
|
210
|
+
if pieces:
|
|
211
|
+
merged = "\n".join(piece.strip() for piece in pieces if piece and str(piece).strip())
|
|
212
|
+
return merged or None
|
|
213
|
+
message = _get_field(response, "message")
|
|
214
|
+
if isinstance(message, dict):
|
|
215
|
+
message_content = message.get("content") or message.get("text")
|
|
216
|
+
if isinstance(message_content, str) and message_content.strip():
|
|
217
|
+
return message_content.strip()
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
client = reviewer._openai_client
|
|
222
|
+
if client is None and hasattr(openai, "OpenAI"):
|
|
223
|
+
api_key = os.getenv("COMPAIR_OPENAI_API_KEY") or None
|
|
224
|
+
try: # pragma: no cover - optional dependency differences
|
|
225
|
+
client = openai.OpenAI(api_key=api_key) if api_key else openai.OpenAI()
|
|
226
|
+
except TypeError:
|
|
227
|
+
client = openai.OpenAI()
|
|
228
|
+
reviewer._openai_client = client
|
|
229
|
+
|
|
230
|
+
content: str | None = None
|
|
231
|
+
uses_reasoning = reviewer.uses_reasoning_model
|
|
232
|
+
if client is not None and hasattr(client, "responses"):
|
|
233
|
+
request_kwargs: dict[str, Any] = {
|
|
234
|
+
"model": reviewer.openai_model,
|
|
235
|
+
}
|
|
236
|
+
if uses_reasoning:
|
|
237
|
+
request_kwargs["input"] = [
|
|
238
|
+
{"role": "developer", "content": system_prompt},
|
|
239
|
+
{"role": "user", "content": user_prompt},
|
|
240
|
+
]
|
|
241
|
+
if reviewer.openai_reasoning_effort:
|
|
242
|
+
request_kwargs["reasoning"] = {"effort": reviewer.openai_reasoning_effort}
|
|
243
|
+
else:
|
|
244
|
+
request_kwargs["instructions"] = system_prompt
|
|
245
|
+
request_kwargs["input"] = user_prompt
|
|
246
|
+
response = client.responses.create(
|
|
247
|
+
**request_kwargs,
|
|
248
|
+
)
|
|
249
|
+
content = _extract_response_text(response, reasoning_mode=uses_reasoning)
|
|
250
|
+
elif client is not None and hasattr(client, "chat") and hasattr(client.chat, "completions"):
|
|
251
|
+
response = client.chat.completions.create(
|
|
252
|
+
model=reviewer.openai_model,
|
|
253
|
+
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
|
254
|
+
temperature=0.3,
|
|
255
|
+
max_tokens=256,
|
|
256
|
+
)
|
|
257
|
+
choices = getattr(response, "choices", None) or []
|
|
258
|
+
if choices:
|
|
259
|
+
message = getattr(choices[0], "message", None)
|
|
260
|
+
if message is not None:
|
|
261
|
+
content = getattr(message, "content", None)
|
|
262
|
+
if not content:
|
|
263
|
+
content = getattr(choices[0], "text", None)
|
|
264
|
+
if isinstance(content, str):
|
|
265
|
+
content = content.strip()
|
|
266
|
+
elif hasattr(openai, "ChatCompletion"):
|
|
267
|
+
chat_response = openai.ChatCompletion.create( # type: ignore[attr-defined]
|
|
268
|
+
model=reviewer.openai_model,
|
|
269
|
+
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
|
270
|
+
temperature=0.3,
|
|
271
|
+
max_tokens=256,
|
|
272
|
+
)
|
|
273
|
+
content = chat_response["choices"][0]["message"]["content"].strip() # type: ignore[index, assignment]
|
|
274
|
+
if content:
|
|
275
|
+
return content.strip()
|
|
276
|
+
except Exception as exc: # pragma: no cover - network/API failure
|
|
277
|
+
log_event("openai_feedback_failed", error=str(exc))
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _local_feedback(
|
|
282
|
+
reviewer: Reviewer,
|
|
283
|
+
text: str,
|
|
284
|
+
references: list[Any],
|
|
285
|
+
user: User,
|
|
286
|
+
) -> str | None:
|
|
287
|
+
payload = {
|
|
288
|
+
"document": text,
|
|
289
|
+
"references": [getattr(ref, "content", "") for ref in references],
|
|
290
|
+
"length_instruction": reviewer.length_map.get(
|
|
291
|
+
user.preferred_feedback_length,
|
|
292
|
+
"1–2 short sentences",
|
|
293
|
+
),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
response = requests.post(reviewer.endpoint, json=payload, timeout=30)
|
|
298
|
+
response.raise_for_status()
|
|
299
|
+
data = response.json()
|
|
300
|
+
feedback = data.get("feedback") or data.get("text")
|
|
301
|
+
if feedback:
|
|
302
|
+
return str(feedback).strip()
|
|
303
|
+
except Exception as exc: # pragma: no cover - network failures stay graceful
|
|
304
|
+
log_event("local_feedback_failed", error=str(exc))
|
|
305
|
+
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _http_feedback(
|
|
310
|
+
reviewer: Reviewer,
|
|
311
|
+
text: str,
|
|
312
|
+
references: list[Any],
|
|
313
|
+
user: User,
|
|
314
|
+
) -> str | None:
|
|
315
|
+
if not reviewer.custom_endpoint:
|
|
316
|
+
return None
|
|
317
|
+
payload = {
|
|
318
|
+
"document": text,
|
|
319
|
+
"references": [getattr(ref, "content", "") for ref in references],
|
|
320
|
+
"length_instruction": reviewer.length_map.get(
|
|
321
|
+
user.preferred_feedback_length,
|
|
322
|
+
"1–2 short sentences",
|
|
323
|
+
),
|
|
324
|
+
}
|
|
325
|
+
try:
|
|
326
|
+
response = requests.post(reviewer.custom_endpoint, json=payload, timeout=30)
|
|
327
|
+
response.raise_for_status()
|
|
328
|
+
data = response.json()
|
|
329
|
+
feedback = data.get("feedback") or data.get("text")
|
|
330
|
+
if isinstance(feedback, str):
|
|
331
|
+
feedback = feedback.strip()
|
|
332
|
+
if feedback:
|
|
333
|
+
return feedback
|
|
334
|
+
except Exception as exc: # pragma: no cover - network failures stay graceful
|
|
335
|
+
log_event("custom_feedback_failed", error=str(exc))
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def get_feedback(
|
|
340
|
+
reviewer: Reviewer,
|
|
341
|
+
doc: Document,
|
|
342
|
+
text: str,
|
|
343
|
+
references: list[Any],
|
|
344
|
+
user: User,
|
|
345
|
+
) -> str:
|
|
346
|
+
if reviewer.is_cloud and cloud_get_feedback is not None:
|
|
347
|
+
return cloud_get_feedback(reviewer._cloud_impl, doc, text, references, user) # type: ignore[arg-type]
|
|
348
|
+
|
|
349
|
+
if reviewer.provider == "openai":
|
|
350
|
+
feedback = _openai_feedback(reviewer, doc, text, references, user)
|
|
351
|
+
if feedback:
|
|
352
|
+
return feedback
|
|
353
|
+
|
|
354
|
+
if reviewer.provider == "http":
|
|
355
|
+
feedback = _http_feedback(reviewer, text, references, user)
|
|
356
|
+
if feedback:
|
|
357
|
+
return feedback
|
|
358
|
+
|
|
359
|
+
if reviewer.provider == "local":
|
|
360
|
+
feedback = _local_reference_feedback(reviewer, references, user)
|
|
361
|
+
if feedback:
|
|
362
|
+
return feedback
|
|
363
|
+
if getattr(reviewer, "endpoint", None):
|
|
364
|
+
feedback = _local_feedback(reviewer, text, references, user)
|
|
365
|
+
if feedback:
|
|
366
|
+
return feedback
|
|
367
|
+
|
|
368
|
+
return _fallback_feedback(text, references)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from compair_cloud.logger import log_event as cloud_log_event # type: ignore
|
|
9
|
+
except (ImportError, ModuleNotFoundError):
|
|
10
|
+
cloud_log_event: Callable[..., None] | None = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
if not logging.getLogger().handlers:
|
|
14
|
+
logging.basicConfig(level=logging.INFO)
|
|
15
|
+
|
|
16
|
+
_LOGGER = logging.getLogger("compair.core")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def log_event(message: str, **fields: Any) -> None:
|
|
20
|
+
"""Emit a structured log entry for the core edition."""
|
|
21
|
+
if cloud_log_event:
|
|
22
|
+
cloud_log_event(message, **fields)
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
payload = json.dumps({"message": message, **fields}, default=str)
|
|
27
|
+
except TypeError:
|
|
28
|
+
payload = json.dumps({"message": message}, default=str)
|
|
29
|
+
_LOGGER.info(payload)
|