nornweave 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,52 @@
1
+ """LLM provider abstraction for thread summarization.
2
+
3
+ Factory function to create the appropriate SummaryProvider based on configuration.
4
+ """
5
+
6
+ import logging
7
+
8
+ from nornweave.core.config import get_settings
9
+ from nornweave.verdandi.llm.base import SummaryProvider, SummaryResult
10
+
11
+ __all__ = ["SummaryProvider", "SummaryResult", "get_summary_provider"]
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def get_summary_provider() -> SummaryProvider | None:
17
+ """
18
+ Create and return the appropriate SummaryProvider based on LLM configuration.
19
+
20
+ Returns:
21
+ A SummaryProvider instance if LLM_PROVIDER is configured, None if disabled.
22
+
23
+ Raises:
24
+ ImportError: If the required provider SDK is not installed.
25
+ ValueError: If LLM_PROVIDER is set but LLM_API_KEY is missing.
26
+ """
27
+ settings = get_settings()
28
+
29
+ if settings.llm_provider is None:
30
+ return None
31
+
32
+ api_key = settings.llm_api_key
33
+ model = settings.llm_model
34
+ prompt = settings.llm_summary_prompt
35
+
36
+ if settings.llm_provider == "openai":
37
+ from nornweave.verdandi.llm.openai import OpenAISummaryProvider
38
+
39
+ return OpenAISummaryProvider(api_key=api_key, model=model, prompt=prompt)
40
+
41
+ if settings.llm_provider == "anthropic":
42
+ from nornweave.verdandi.llm.anthropic import AnthropicSummaryProvider
43
+
44
+ return AnthropicSummaryProvider(api_key=api_key, model=model, prompt=prompt)
45
+
46
+ if settings.llm_provider == "gemini":
47
+ from nornweave.verdandi.llm.gemini import GeminiSummaryProvider
48
+
49
+ return GeminiSummaryProvider(api_key=api_key, model=model, prompt=prompt)
50
+
51
+ msg = f"Unknown LLM provider: {settings.llm_provider}"
52
+ raise ValueError(msg)
@@ -0,0 +1,63 @@
1
+ """Anthropic summarization provider."""
2
+
3
+ import logging
4
+
5
+ from nornweave.verdandi.llm.base import SummaryResult
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ DEFAULT_MODEL = "claude-haiku"
10
+
11
+
12
+ class AnthropicSummaryProvider:
13
+ """Summarization provider using Anthropic Messages API."""
14
+
15
+ def __init__(self, api_key: str, model: str = "", prompt: str = "") -> None:
16
+ try:
17
+ from anthropic import AsyncAnthropic
18
+ except ImportError:
19
+ msg = (
20
+ "Anthropic package is required for LLM_PROVIDER='anthropic'. "
21
+ "Install with: uv add anthropic"
22
+ )
23
+ raise ImportError(msg)
24
+
25
+ self.client = AsyncAnthropic(api_key=api_key)
26
+ self.model = model or DEFAULT_MODEL
27
+ self.prompt = prompt
28
+
29
+ async def summarize(self, text: str) -> SummaryResult:
30
+ """Generate a summary using Anthropic Messages API."""
31
+ try:
32
+ from anthropic import APIError
33
+ except ImportError:
34
+ APIError = Exception
35
+
36
+ try:
37
+ response = await self.client.messages.create(
38
+ model=self.model,
39
+ max_tokens=1024,
40
+ system=self.prompt,
41
+ messages=[
42
+ {"role": "user", "content": text},
43
+ ],
44
+ )
45
+ except APIError as e:
46
+ logger.error("Anthropic API error: %s %s", e.status_code, e.message)
47
+ raise
48
+
49
+ summary = ""
50
+ for block in response.content:
51
+ if block.type == "text":
52
+ summary += block.text
53
+
54
+ input_tokens = response.usage.input_tokens
55
+ output_tokens = response.usage.output_tokens
56
+
57
+ return SummaryResult(
58
+ summary=summary.strip(),
59
+ input_tokens=input_tokens,
60
+ output_tokens=output_tokens,
61
+ total_tokens=input_tokens + output_tokens,
62
+ model=response.model or self.model,
63
+ )
@@ -0,0 +1,35 @@
1
+ """Base protocol and data types for LLM summarization providers."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Protocol, runtime_checkable
5
+
6
+
7
+ @dataclass
8
+ class SummaryResult:
9
+ """Result of an LLM summarization call."""
10
+
11
+ summary: str
12
+ input_tokens: int
13
+ output_tokens: int
14
+ total_tokens: int
15
+ model: str
16
+
17
+
18
+ @runtime_checkable
19
+ class SummaryProvider(Protocol):
20
+ """Protocol for LLM summarization providers."""
21
+
22
+ async def summarize(self, text: str) -> SummaryResult:
23
+ """
24
+ Generate a summary of the given text.
25
+
26
+ Args:
27
+ text: The thread text to summarize (formatted conversation).
28
+
29
+ Returns:
30
+ SummaryResult with the generated summary and token usage.
31
+
32
+ Raises:
33
+ Exception: If the provider API call fails.
34
+ """
35
+ ...
@@ -0,0 +1,78 @@
1
+ """Google Gemini summarization provider using REST API directly.
2
+
3
+ Uses httpx instead of the google-genai SDK to avoid credential resolution
4
+ conflicts with other Google Cloud libraries (google-auth, google-cloud-storage).
5
+ """
6
+
7
+ import logging
8
+ from typing import Any
9
+
10
+ import httpx
11
+
12
+ from nornweave.verdandi.llm.base import SummaryResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ DEFAULT_MODEL = "gemini-2.0-flash"
17
+ _BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
18
+
19
+
20
+ class GeminiSummaryProvider:
21
+ """Summarization provider using Google Generative Language REST API."""
22
+
23
+ def __init__(self, api_key: str, model: str = "", prompt: str = "") -> None:
24
+ self.api_key = api_key
25
+ self.model = model or DEFAULT_MODEL
26
+ self.prompt = prompt
27
+
28
+ async def summarize(self, text: str) -> SummaryResult:
29
+ """Generate a summary using the Gemini REST API."""
30
+ url = f"{_BASE_URL}/models/{self.model}:generateContent"
31
+
32
+ payload: dict[str, Any] = {
33
+ "contents": [{"parts": [{"text": text}]}],
34
+ }
35
+ if self.prompt:
36
+ payload["systemInstruction"] = {"parts": [{"text": self.prompt}]}
37
+
38
+ async with httpx.AsyncClient(timeout=60.0) as client:
39
+ response = await client.post(
40
+ url,
41
+ json=payload,
42
+ headers={
43
+ "Content-Type": "application/json",
44
+ "X-goog-api-key": self.api_key,
45
+ },
46
+ )
47
+
48
+ if response.status_code != 200:
49
+ body = (
50
+ response.json()
51
+ if response.headers.get("content-type", "").startswith("application/json")
52
+ else {}
53
+ )
54
+ error_msg = body.get("error", {}).get("message", response.text[:200])
55
+ logger.error("Gemini API error %d: %s", response.status_code, error_msg)
56
+ msg = f"Gemini API returned {response.status_code}: {error_msg}"
57
+ raise RuntimeError(msg)
58
+
59
+ data = response.json()
60
+ candidates = data.get("candidates", [])
61
+ summary = ""
62
+ if candidates:
63
+ parts = candidates[0].get("content", {}).get("parts", [])
64
+ summary = "".join(p.get("text", "") for p in parts)
65
+
66
+ # Extract token usage from usageMetadata
67
+ usage = data.get("usageMetadata", {})
68
+ input_tokens = usage.get("promptTokenCount", 0)
69
+ output_tokens = usage.get("candidatesTokenCount", 0)
70
+ total_tokens = usage.get("totalTokenCount", input_tokens + output_tokens)
71
+
72
+ return SummaryResult(
73
+ summary=summary.strip(),
74
+ input_tokens=input_tokens,
75
+ output_tokens=output_tokens,
76
+ total_tokens=total_tokens,
77
+ model=self.model,
78
+ )
@@ -0,0 +1,60 @@
1
+ """OpenAI summarization provider."""
2
+
3
+ import logging
4
+
5
+ from nornweave.verdandi.llm.base import SummaryResult
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ DEFAULT_MODEL = "gpt-4o-mini"
10
+
11
+
12
+ class OpenAISummaryProvider:
13
+ """Summarization provider using OpenAI Chat Completions API."""
14
+
15
+ def __init__(self, api_key: str, model: str = "", prompt: str = "") -> None:
16
+ try:
17
+ from openai import AsyncOpenAI
18
+ except ImportError:
19
+ msg = (
20
+ "OpenAI package is required for LLM_PROVIDER='openai'. Install with: uv add openai"
21
+ )
22
+ raise ImportError(msg)
23
+
24
+ self.client = AsyncOpenAI(api_key=api_key)
25
+ self.model = model or DEFAULT_MODEL
26
+ self.prompt = prompt
27
+
28
+ async def summarize(self, text: str) -> SummaryResult:
29
+ """Generate a summary using OpenAI Chat Completions API."""
30
+ try:
31
+ from openai import APIError
32
+ except ImportError:
33
+ APIError = Exception # type: ignore[assignment,misc]
34
+
35
+ try:
36
+ response = await self.client.chat.completions.create(
37
+ model=self.model,
38
+ messages=[
39
+ {"role": "system", "content": self.prompt},
40
+ {"role": "user", "content": text},
41
+ ],
42
+ )
43
+ except APIError as e:
44
+ logger.error("OpenAI API error: %s %s", getattr(e, "status_code", None), e.message)
45
+ raise
46
+
47
+ choice = response.choices[0]
48
+ summary = choice.message.content or ""
49
+ usage = response.usage
50
+
51
+ input_tokens = usage.prompt_tokens if usage else 0
52
+ output_tokens = usage.completion_tokens if usage else 0
53
+
54
+ return SummaryResult(
55
+ summary=summary.strip(),
56
+ input_tokens=input_tokens,
57
+ output_tokens=output_tokens,
58
+ total_tokens=input_tokens + output_tokens,
59
+ model=response.model or self.model,
60
+ )
@@ -22,4 +22,5 @@ def html_to_markdown(html: str) -> str:
22
22
  h.inline_links = True
23
23
  h.protect_links = True
24
24
 
25
- return h.handle(html).strip()
25
+ result: str = h.handle(html).strip()
26
+ return result
@@ -0,0 +1,231 @@
1
+ """Thread summarization orchestration.
2
+
3
+ Generates LLM-powered thread summaries from Talon-cleaned message content.
4
+ Runs as a fire-and-forget post-ingestion hook.
5
+ """
6
+
7
+ import logging
8
+ from datetime import UTC, datetime
9
+ from typing import TYPE_CHECKING
10
+
11
+ from nornweave.core.config import get_settings
12
+ from nornweave.verdandi.llm import get_summary_provider
13
+
14
+ if TYPE_CHECKING:
15
+ from nornweave.core.interfaces import StorageInterface
16
+ from nornweave.models.message import Message
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Approximate context window sizes per model family (in tokens).
21
+ # Used for truncation. Conservative estimates at 80% capacity.
22
+ _CONTEXT_WINDOWS: dict[str, int] = {
23
+ # OpenAI
24
+ "gpt-4o-mini": 128_000,
25
+ "gpt-4o": 128_000,
26
+ "gpt-4-turbo": 128_000,
27
+ "gpt-3.5-turbo": 16_385,
28
+ # Anthropic
29
+ "claude-haiku": 200_000,
30
+ "claude-sonnet": 200_000,
31
+ "claude-opus": 200_000,
32
+ # Gemini
33
+ "gemini-2.0-flash": 1_000_000,
34
+ "gemini-2.0-pro": 1_000_000,
35
+ "gemini-1.5-flash": 1_000_000,
36
+ "gemini-1.5-pro": 2_000_000,
37
+ }
38
+ _DEFAULT_CONTEXT_WINDOW = 128_000
39
+ # Approximate chars per token for truncation estimation
40
+ _CHARS_PER_TOKEN = 4
41
+
42
+
43
+ def prepare_thread_text(messages: list[Message]) -> str:
44
+ """
45
+ Prepare thread text for summarization from Talon-cleaned extracted_text.
46
+
47
+ Concatenates extracted_text for all messages in chronological order,
48
+ with [datetime] sender: headers. Falls back to raw text if extracted_text
49
+ is unavailable. Skips messages with no text content.
50
+
51
+ Args:
52
+ messages: List of messages ordered by timestamp ascending.
53
+
54
+ Returns:
55
+ Formatted conversation text ready for LLM summarization.
56
+ """
57
+ sorted_messages = sorted(messages, key=lambda m: m.timestamp or datetime.min)
58
+ parts: list[str] = []
59
+
60
+ for msg in sorted_messages:
61
+ text = msg.extracted_text or msg.text
62
+ if not text or not text.strip():
63
+ continue
64
+
65
+ timestamp_str = (
66
+ msg.timestamp.strftime("%Y-%m-%d %H:%M") if msg.timestamp else "unknown date"
67
+ )
68
+ sender = msg.from_address or "unknown"
69
+ parts.append(f"[{timestamp_str}] {sender}:\n{text.strip()}")
70
+
71
+ return "\n\n".join(parts)
72
+
73
+
74
+ def truncate_to_context_window(text: str, model: str) -> str:
75
+ """
76
+ Truncate thread text to fit within the model's context window.
77
+
78
+ Keeps the most recent messages (from the end) that fit within 80% of
79
+ the model's context window, reserving space for the prompt and response.
80
+
81
+ Args:
82
+ text: The full conversation text.
83
+ model: The model identifier (used to look up context window size).
84
+
85
+ Returns:
86
+ The text, possibly truncated with a note about earlier messages.
87
+ """
88
+ # Find context window for this model (match by prefix for versioned models)
89
+ context_window = _DEFAULT_CONTEXT_WINDOW
90
+ for model_prefix, window in _CONTEXT_WINDOWS.items():
91
+ if model.startswith(model_prefix):
92
+ context_window = window
93
+ break
94
+
95
+ max_chars = int(context_window * 0.8 * _CHARS_PER_TOKEN)
96
+
97
+ if len(text) <= max_chars:
98
+ return text
99
+
100
+ # Split into message blocks and keep from the end
101
+ blocks = text.split("\n\n")
102
+ kept: list[str] = []
103
+ total_chars = 0
104
+ truncation_note = "[Earlier messages truncated — summary covers the most recent messages]\n\n"
105
+ available_chars = max_chars - len(truncation_note)
106
+
107
+ for block in reversed(blocks):
108
+ block_len = len(block) + 2 # +2 for the \n\n separator
109
+ if total_chars + block_len > available_chars:
110
+ break
111
+ kept.insert(0, block)
112
+ total_chars += block_len
113
+
114
+ if len(kept) < len(blocks):
115
+ return truncation_note + "\n\n".join(kept)
116
+
117
+ return text
118
+
119
+
120
+ async def check_token_budget(storage: StorageInterface) -> bool:
121
+ """
122
+ Check if the daily token budget allows another summarization call.
123
+
124
+ Args:
125
+ storage: Storage interface for reading token usage.
126
+
127
+ Returns:
128
+ True if summarization can proceed, False if budget is exhausted.
129
+ """
130
+ settings = get_settings()
131
+ limit = settings.llm_daily_token_limit
132
+
133
+ # 0 means unlimited
134
+ if limit == 0:
135
+ return True
136
+
137
+ today = datetime.now(UTC).date()
138
+ current_usage = await storage.get_token_usage(today)
139
+
140
+ if current_usage >= limit:
141
+ logger.debug(
142
+ "Daily token limit reached (%d/%d), skipping summarization", current_usage, limit
143
+ )
144
+ return False
145
+
146
+ return True
147
+
148
+
149
+ async def generate_thread_summary(storage: StorageInterface, thread_id: str) -> None:
150
+ """
151
+ Generate or update the LLM summary for a thread.
152
+
153
+ Orchestrates: check budget → load messages → prepare text → truncate →
154
+ call provider → update thread summary → record tokens.
155
+
156
+ This function is designed to be called fire-and-forget after message
157
+ ingestion. Failures are logged and never propagated.
158
+
159
+ Args:
160
+ storage: Storage interface for reading/writing data.
161
+ thread_id: ID of the thread to summarize.
162
+ """
163
+ try:
164
+ # Step 1: Check if LLM is enabled
165
+ provider = get_summary_provider()
166
+ if provider is None:
167
+ return
168
+
169
+ # Step 2: Check daily token budget
170
+ if not await check_token_budget(storage):
171
+ return
172
+
173
+ # Step 3: Load all messages for the thread
174
+ messages = await storage.list_messages_for_thread(thread_id, limit=1000)
175
+ if not messages:
176
+ return
177
+
178
+ # Step 4: Prepare thread text
179
+ text = prepare_thread_text(messages)
180
+ if not text.strip():
181
+ return
182
+
183
+ # Step 5: Truncate if needed
184
+ settings = get_settings()
185
+ model = settings.llm_model or ""
186
+ text = truncate_to_context_window(text, model)
187
+
188
+ # Step 6: Call the LLM provider
189
+ result = await provider.summarize(text)
190
+
191
+ # If provider didn't report tokens, estimate from character count
192
+ if result.total_tokens == 0 and (text or result.summary):
193
+ estimated_input = len(text) // 4
194
+ estimated_output = len(result.summary) // 4
195
+ result = type(result)(
196
+ summary=result.summary,
197
+ input_tokens=estimated_input,
198
+ output_tokens=estimated_output,
199
+ total_tokens=estimated_input + estimated_output,
200
+ model=result.model,
201
+ )
202
+ logger.warning(
203
+ "Provider did not report token usage for thread %s, estimated %d tokens",
204
+ thread_id,
205
+ result.total_tokens,
206
+ )
207
+
208
+ # Step 7: Update thread summary
209
+ thread = await storage.get_thread(thread_id)
210
+ if thread is None:
211
+ logger.warning("Thread %s not found when updating summary", thread_id)
212
+ return
213
+
214
+ thread.summary = result.summary
215
+ await storage.update_thread(thread)
216
+
217
+ # Step 8: Record token usage
218
+ today = datetime.now(UTC).date()
219
+ await storage.record_token_usage(today, result.total_tokens)
220
+
221
+ logger.info(
222
+ "Updated summary for thread %s (model=%s, tokens=%d)",
223
+ thread_id,
224
+ result.model,
225
+ result.total_tokens,
226
+ )
227
+
228
+ except Exception as exc:
229
+ # Provider errors are already logged at ERROR level by the provider itself.
230
+ # Log a concise warning here without the full traceback to keep logs clean.
231
+ logger.warning("Failed to generate summary for thread %s: %s", thread_id, exc)
@@ -52,12 +52,13 @@ def create_app() -> FastAPI:
52
52
  return {"status": "ok"}
53
53
 
54
54
  # Include API routers
55
- from nornweave.yggdrasil.routes.v1 import inboxes, messages, search, threads
55
+ from nornweave.yggdrasil.routes.v1 import attachments, inboxes, messages, search, threads
56
56
 
57
57
  app.include_router(inboxes.router, prefix="/v1", tags=["inboxes"])
58
58
  app.include_router(threads.router, prefix="/v1", tags=["threads"])
59
59
  app.include_router(messages.router, prefix="/v1", tags=["messages"])
60
60
  app.include_router(search.router, prefix="/v1", tags=["search"])
61
+ app.include_router(attachments.router, prefix="/v1", tags=["attachments"])
61
62
 
62
63
  # Include webhook routers
63
64
  from nornweave.yggdrasil.routes.webhooks import mailgun, resend, sendgrid, ses