appkit-assistant 0.17.3__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- appkit_assistant/backend/{models.py → database/models.py} +32 -132
- appkit_assistant/backend/{repositories.py → database/repositories.py} +93 -1
- appkit_assistant/backend/model_manager.py +5 -5
- appkit_assistant/backend/models/__init__.py +28 -0
- appkit_assistant/backend/models/anthropic.py +31 -0
- appkit_assistant/backend/models/google.py +27 -0
- appkit_assistant/backend/models/openai.py +50 -0
- appkit_assistant/backend/models/perplexity.py +56 -0
- appkit_assistant/backend/processors/__init__.py +29 -0
- appkit_assistant/backend/processors/claude_responses_processor.py +205 -387
- appkit_assistant/backend/processors/gemini_responses_processor.py +290 -352
- appkit_assistant/backend/processors/lorem_ipsum_processor.py +6 -4
- appkit_assistant/backend/processors/mcp_mixin.py +297 -0
- appkit_assistant/backend/processors/openai_base.py +11 -125
- appkit_assistant/backend/processors/openai_chat_completion_processor.py +5 -3
- appkit_assistant/backend/processors/openai_responses_processor.py +480 -402
- appkit_assistant/backend/processors/perplexity_processor.py +156 -79
- appkit_assistant/backend/{processor.py → processors/processor_base.py} +7 -2
- appkit_assistant/backend/processors/streaming_base.py +188 -0
- appkit_assistant/backend/schemas.py +138 -0
- appkit_assistant/backend/services/auth_error_detector.py +99 -0
- appkit_assistant/backend/services/chunk_factory.py +273 -0
- appkit_assistant/backend/services/citation_handler.py +292 -0
- appkit_assistant/backend/services/file_cleanup_service.py +316 -0
- appkit_assistant/backend/services/file_upload_service.py +903 -0
- appkit_assistant/backend/services/file_validation.py +138 -0
- appkit_assistant/backend/{mcp_auth_service.py → services/mcp_auth_service.py} +4 -2
- appkit_assistant/backend/services/mcp_token_service.py +61 -0
- appkit_assistant/backend/services/message_converter.py +289 -0
- appkit_assistant/backend/services/openai_client_service.py +120 -0
- appkit_assistant/backend/{response_accumulator.py → services/response_accumulator.py} +163 -1
- appkit_assistant/backend/services/system_prompt_builder.py +89 -0
- appkit_assistant/backend/services/thread_service.py +5 -3
- appkit_assistant/backend/system_prompt_cache.py +3 -3
- appkit_assistant/components/__init__.py +8 -4
- appkit_assistant/components/composer.py +59 -24
- appkit_assistant/components/file_manager.py +623 -0
- appkit_assistant/components/mcp_server_dialogs.py +12 -20
- appkit_assistant/components/mcp_server_table.py +12 -2
- appkit_assistant/components/message.py +119 -2
- appkit_assistant/components/thread.py +1 -1
- appkit_assistant/components/threadlist.py +4 -2
- appkit_assistant/components/tools_modal.py +37 -20
- appkit_assistant/configuration.py +12 -0
- appkit_assistant/state/file_manager_state.py +697 -0
- appkit_assistant/state/mcp_oauth_state.py +3 -3
- appkit_assistant/state/mcp_server_state.py +47 -2
- appkit_assistant/state/system_prompt_state.py +1 -1
- appkit_assistant/state/thread_list_state.py +99 -5
- appkit_assistant/state/thread_state.py +88 -9
- {appkit_assistant-0.17.3.dist-info → appkit_assistant-1.0.1.dist-info}/METADATA +8 -6
- appkit_assistant-1.0.1.dist-info/RECORD +58 -0
- appkit_assistant/backend/processors/claude_base.py +0 -178
- appkit_assistant/backend/processors/gemini_base.py +0 -84
- appkit_assistant-0.17.3.dist-info/RECORD +0 -39
- /appkit_assistant/backend/{file_manager.py → services/file_manager.py} +0 -0
- {appkit_assistant-0.17.3.dist-info → appkit_assistant-1.0.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Auth Error Detector for detecting authentication failures.
|
|
2
|
+
|
|
3
|
+
Provides unified error detection utilities shared across all AI processors.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AuthErrorDetector:
|
|
13
|
+
"""Utility class for detecting authentication errors."""
|
|
14
|
+
|
|
15
|
+
# Common authentication error indicators
|
|
16
|
+
AUTH_INDICATORS: tuple[str, ...] = (
|
|
17
|
+
"401",
|
|
18
|
+
"403",
|
|
19
|
+
"unauthorized",
|
|
20
|
+
"forbidden",
|
|
21
|
+
"authentication required",
|
|
22
|
+
"access denied",
|
|
23
|
+
"invalid token",
|
|
24
|
+
"token expired",
|
|
25
|
+
"not authenticated",
|
|
26
|
+
"auth_required",
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def is_auth_error(self, error: Any) -> bool:
|
|
30
|
+
"""Check if an error indicates authentication failure (401/403).
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
error: The error object or message
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
True if the error appears to be authentication-related
|
|
37
|
+
"""
|
|
38
|
+
error_str = str(error).lower()
|
|
39
|
+
return any(indicator in error_str for indicator in self.AUTH_INDICATORS)
|
|
40
|
+
|
|
41
|
+
def extract_error_text(self, error: Any) -> str:
|
|
42
|
+
"""Extract readable error text from an error object.
|
|
43
|
+
|
|
44
|
+
Handles various error formats:
|
|
45
|
+
- dict with 'message' key
|
|
46
|
+
- objects with 'message' attribute
|
|
47
|
+
- plain strings or other objects
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
error: The error object
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Human-readable error string
|
|
54
|
+
"""
|
|
55
|
+
if error is None:
|
|
56
|
+
return ""
|
|
57
|
+
|
|
58
|
+
if isinstance(error, dict):
|
|
59
|
+
return error.get("message", str(error))
|
|
60
|
+
|
|
61
|
+
if hasattr(error, "message"):
|
|
62
|
+
return getattr(error, "message", str(error))
|
|
63
|
+
|
|
64
|
+
return str(error)
|
|
65
|
+
|
|
66
|
+
def find_matching_server_in_error(
|
|
67
|
+
self,
|
|
68
|
+
error_str: str,
|
|
69
|
+
servers: list[Any],
|
|
70
|
+
) -> Any | None:
|
|
71
|
+
"""Find a server whose name appears in the error message.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
error_str: The error message string (should be lowercase)
|
|
75
|
+
servers: List of server objects with 'name' attribute
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
The matching server or None
|
|
79
|
+
"""
|
|
80
|
+
for server in servers:
|
|
81
|
+
if hasattr(server, "name") and server.name.lower() in error_str.lower():
|
|
82
|
+
return server
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Singleton instance for convenience
|
|
87
|
+
_auth_error_detector: AuthErrorDetector | None = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_auth_error_detector() -> AuthErrorDetector:
|
|
91
|
+
"""Get or create the auth error detector singleton.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The AuthErrorDetector instance
|
|
95
|
+
"""
|
|
96
|
+
global _auth_error_detector
|
|
97
|
+
if _auth_error_detector is None:
|
|
98
|
+
_auth_error_detector = AuthErrorDetector()
|
|
99
|
+
return _auth_error_detector
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Chunk Factory for creating standardized Chunk objects.
|
|
2
|
+
|
|
3
|
+
Provides unified chunk creation with consistent metadata handling
|
|
4
|
+
across all AI processors.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from appkit_assistant.backend.schemas import Chunk, ChunkType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ChunkFactory:
|
|
13
|
+
"""Factory for creating Chunk objects with processor-specific metadata."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, processor_name: str) -> None:
|
|
16
|
+
"""Initialize the chunk factory.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
processor_name: Name to include in chunk metadata (e.g., "claude_responses")
|
|
20
|
+
"""
|
|
21
|
+
self._processor_name = processor_name
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def processor_name(self) -> str:
|
|
25
|
+
"""Get the processor name."""
|
|
26
|
+
return self._processor_name
|
|
27
|
+
|
|
28
|
+
def create(
|
|
29
|
+
self,
|
|
30
|
+
chunk_type: ChunkType,
|
|
31
|
+
content: str,
|
|
32
|
+
extra_metadata: dict[str, Any] | None = None,
|
|
33
|
+
) -> Chunk:
|
|
34
|
+
"""Create a Chunk with standardized metadata.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
chunk_type: The type of chunk (TEXT, THINKING, TOOL_CALL, etc.)
|
|
38
|
+
content: The text content of the chunk
|
|
39
|
+
extra_metadata: Additional metadata to include (values will be stringified)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A Chunk instance with processor metadata
|
|
43
|
+
"""
|
|
44
|
+
metadata: dict[str, str] = {
|
|
45
|
+
"processor": self._processor_name,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if extra_metadata:
|
|
49
|
+
for key, value in extra_metadata.items():
|
|
50
|
+
if value is not None:
|
|
51
|
+
metadata[key] = str(value)
|
|
52
|
+
|
|
53
|
+
return Chunk(
|
|
54
|
+
type=chunk_type,
|
|
55
|
+
text=content,
|
|
56
|
+
chunk_metadata=metadata,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def text(self, content: str, delta: str | None = None) -> Chunk:
|
|
60
|
+
"""Create a TEXT chunk.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
content: The text content
|
|
64
|
+
delta: Optional delta text for streaming
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
A TEXT Chunk
|
|
68
|
+
"""
|
|
69
|
+
metadata = {"delta": delta} if delta else None
|
|
70
|
+
return self.create(ChunkType.TEXT, content, metadata)
|
|
71
|
+
|
|
72
|
+
def thinking(
|
|
73
|
+
self,
|
|
74
|
+
content: str,
|
|
75
|
+
reasoning_id: str | None = None,
|
|
76
|
+
status: str = "in_progress",
|
|
77
|
+
delta: str | None = None,
|
|
78
|
+
) -> Chunk:
|
|
79
|
+
"""Create a THINKING chunk.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
content: The thinking content
|
|
83
|
+
reasoning_id: Optional reasoning session ID
|
|
84
|
+
status: Status of the thinking (starting, in_progress, completed)
|
|
85
|
+
delta: Optional delta text for streaming
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
A THINKING Chunk
|
|
89
|
+
"""
|
|
90
|
+
metadata: dict[str, Any] = {"status": status}
|
|
91
|
+
if reasoning_id:
|
|
92
|
+
metadata["reasoning_id"] = reasoning_id
|
|
93
|
+
if delta is not None:
|
|
94
|
+
metadata["delta"] = delta
|
|
95
|
+
return self.create(ChunkType.THINKING, content, metadata)
|
|
96
|
+
|
|
97
|
+
def thinking_result(
|
|
98
|
+
self,
|
|
99
|
+
content: str,
|
|
100
|
+
reasoning_id: str | None = None,
|
|
101
|
+
) -> Chunk:
|
|
102
|
+
"""Create a THINKING_RESULT chunk.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
content: The result content
|
|
106
|
+
reasoning_id: Optional reasoning session ID
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A THINKING_RESULT Chunk
|
|
110
|
+
"""
|
|
111
|
+
metadata: dict[str, Any] = {"status": "completed"}
|
|
112
|
+
if reasoning_id:
|
|
113
|
+
metadata["reasoning_id"] = reasoning_id
|
|
114
|
+
return self.create(ChunkType.THINKING_RESULT, content, metadata)
|
|
115
|
+
|
|
116
|
+
def tool_call(
|
|
117
|
+
self,
|
|
118
|
+
content: str,
|
|
119
|
+
tool_name: str,
|
|
120
|
+
tool_id: str,
|
|
121
|
+
server_label: str | None = None,
|
|
122
|
+
status: str = "starting",
|
|
123
|
+
reasoning_session: str | None = None,
|
|
124
|
+
) -> Chunk:
|
|
125
|
+
"""Create a TOOL_CALL chunk.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
content: Description of the tool call
|
|
129
|
+
tool_name: Name of the tool being called
|
|
130
|
+
tool_id: Unique identifier for this tool call
|
|
131
|
+
server_label: Optional MCP server label
|
|
132
|
+
status: Status of the tool call
|
|
133
|
+
reasoning_session: Optional reasoning session ID
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
A TOOL_CALL Chunk
|
|
137
|
+
"""
|
|
138
|
+
metadata: dict[str, Any] = {
|
|
139
|
+
"tool_name": tool_name,
|
|
140
|
+
"tool_id": tool_id,
|
|
141
|
+
"status": status,
|
|
142
|
+
}
|
|
143
|
+
if server_label:
|
|
144
|
+
metadata["server_label"] = server_label
|
|
145
|
+
if reasoning_session:
|
|
146
|
+
metadata["reasoning_session"] = reasoning_session
|
|
147
|
+
return self.create(ChunkType.TOOL_CALL, content, metadata)
|
|
148
|
+
|
|
149
|
+
def tool_result(
|
|
150
|
+
self,
|
|
151
|
+
content: str,
|
|
152
|
+
tool_id: str,
|
|
153
|
+
status: str = "completed",
|
|
154
|
+
is_error: bool = False,
|
|
155
|
+
reasoning_session: str | None = None,
|
|
156
|
+
tool_name: str | None = None,
|
|
157
|
+
server_label: str | None = None,
|
|
158
|
+
) -> Chunk:
|
|
159
|
+
"""Create a TOOL_RESULT chunk.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
content: The tool result content
|
|
163
|
+
tool_id: The tool call ID this result corresponds to
|
|
164
|
+
status: Status of the result
|
|
165
|
+
is_error: Whether the result is an error
|
|
166
|
+
reasoning_session: Optional reasoning session ID
|
|
167
|
+
tool_name: Optional tool name for display
|
|
168
|
+
server_label: Optional server label for MCP tools
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
A TOOL_RESULT Chunk
|
|
172
|
+
"""
|
|
173
|
+
metadata: dict[str, Any] = {
|
|
174
|
+
"tool_id": tool_id,
|
|
175
|
+
"status": status,
|
|
176
|
+
"error": is_error,
|
|
177
|
+
}
|
|
178
|
+
if reasoning_session:
|
|
179
|
+
metadata["reasoning_session"] = reasoning_session
|
|
180
|
+
if tool_name:
|
|
181
|
+
metadata["tool_name"] = tool_name
|
|
182
|
+
if server_label:
|
|
183
|
+
metadata["server_label"] = server_label
|
|
184
|
+
return self.create(ChunkType.TOOL_RESULT, content, metadata)
|
|
185
|
+
|
|
186
|
+
def lifecycle(self, stage: str, extra: dict[str, Any] | None = None) -> Chunk:
|
|
187
|
+
"""Create a LIFECYCLE chunk.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
stage: The lifecycle stage (created, in_progress, done)
|
|
191
|
+
extra: Additional metadata
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
A LIFECYCLE Chunk
|
|
195
|
+
"""
|
|
196
|
+
metadata: dict[str, Any] = {"stage": stage}
|
|
197
|
+
if extra:
|
|
198
|
+
metadata.update(extra)
|
|
199
|
+
return self.create(ChunkType.LIFECYCLE, stage, metadata)
|
|
200
|
+
|
|
201
|
+
def completion(self, status: str = "response_complete") -> Chunk:
|
|
202
|
+
"""Create a COMPLETION chunk.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
status: The completion status
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
A COMPLETION Chunk
|
|
209
|
+
"""
|
|
210
|
+
return self.create(
|
|
211
|
+
ChunkType.COMPLETION,
|
|
212
|
+
"Response generation completed",
|
|
213
|
+
{"status": status},
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def error(self, message: str, error_type: str = "unknown") -> Chunk:
|
|
217
|
+
"""Create an ERROR chunk.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
message: The error message
|
|
221
|
+
error_type: The type of error
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
An ERROR Chunk
|
|
225
|
+
"""
|
|
226
|
+
return self.create(
|
|
227
|
+
ChunkType.ERROR,
|
|
228
|
+
message,
|
|
229
|
+
{"error_type": error_type},
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def auth_required(
|
|
233
|
+
self,
|
|
234
|
+
server_name: str,
|
|
235
|
+
server_id: str | None = None,
|
|
236
|
+
auth_url: str = "",
|
|
237
|
+
state: str = "",
|
|
238
|
+
) -> Chunk:
|
|
239
|
+
"""Create an AUTH_REQUIRED chunk.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
server_name: Name of the server requiring auth
|
|
243
|
+
server_id: Optional server ID
|
|
244
|
+
auth_url: The authorization URL
|
|
245
|
+
state: The OAuth state parameter
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
An AUTH_REQUIRED Chunk
|
|
249
|
+
"""
|
|
250
|
+
metadata: dict[str, Any] = {
|
|
251
|
+
"server_name": server_name,
|
|
252
|
+
"auth_url": auth_url,
|
|
253
|
+
"state": state,
|
|
254
|
+
}
|
|
255
|
+
if server_id:
|
|
256
|
+
metadata["server_id"] = server_id
|
|
257
|
+
return self.create(
|
|
258
|
+
ChunkType.AUTH_REQUIRED,
|
|
259
|
+
f"{server_name} benötigt Ihre Autorisierung",
|
|
260
|
+
metadata,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def annotation(self, text: str, annotation_data: dict[str, Any]) -> Chunk:
|
|
264
|
+
"""Create an ANNOTATION chunk.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
text: The annotation text
|
|
268
|
+
annotation_data: Annotation metadata
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
An ANNOTATION Chunk
|
|
272
|
+
"""
|
|
273
|
+
return self.create(ChunkType.ANNOTATION, text, annotation_data)
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Citation Handler Protocol and implementations.
|
|
2
|
+
|
|
3
|
+
Provides a unified interface for extracting and yielding citations
|
|
4
|
+
from different AI API responses.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from collections.abc import AsyncGenerator
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Any, Protocol
|
|
13
|
+
|
|
14
|
+
from appkit_assistant.backend.schemas import Chunk, ChunkType
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Citation:
|
|
21
|
+
"""Standardized citation data structure."""
|
|
22
|
+
|
|
23
|
+
cited_text: str = ""
|
|
24
|
+
document_title: str | None = None
|
|
25
|
+
document_index: int = 0
|
|
26
|
+
url: str | None = None
|
|
27
|
+
# Location info (varies by type)
|
|
28
|
+
start_char_index: int | None = None
|
|
29
|
+
end_char_index: int | None = None
|
|
30
|
+
start_page_number: int | None = None
|
|
31
|
+
end_page_number: int | None = None
|
|
32
|
+
start_block_index: int | None = None
|
|
33
|
+
end_block_index: int | None = None
|
|
34
|
+
# Raw data for passthrough
|
|
35
|
+
raw_data: dict[str, Any] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict[str, Any]:
|
|
38
|
+
"""Convert to dictionary for JSON serialization."""
|
|
39
|
+
result = {
|
|
40
|
+
"cited_text": self.cited_text,
|
|
41
|
+
"document_index": self.document_index,
|
|
42
|
+
}
|
|
43
|
+
if self.document_title:
|
|
44
|
+
result["document_title"] = self.document_title
|
|
45
|
+
if self.url:
|
|
46
|
+
result["url"] = self.url
|
|
47
|
+
if self.start_char_index is not None:
|
|
48
|
+
result["start_char_index"] = self.start_char_index
|
|
49
|
+
result["end_char_index"] = self.end_char_index
|
|
50
|
+
if self.start_page_number is not None:
|
|
51
|
+
result["start_page_number"] = self.start_page_number
|
|
52
|
+
result["end_page_number"] = self.end_page_number
|
|
53
|
+
if self.start_block_index is not None:
|
|
54
|
+
result["start_block_index"] = self.start_block_index
|
|
55
|
+
result["end_block_index"] = self.end_block_index
|
|
56
|
+
return result
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CitationHandlerProtocol(Protocol):
|
|
60
|
+
"""Protocol for citation handlers."""
|
|
61
|
+
|
|
62
|
+
def extract_citations(self, delta: Any) -> list[Citation]:
|
|
63
|
+
"""Extract citations from a response delta/chunk.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
delta: The response delta object (vendor-specific)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List of extracted Citation objects
|
|
70
|
+
"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
async def yield_citation_chunks(
|
|
74
|
+
self,
|
|
75
|
+
citations: list[Citation] | list[str],
|
|
76
|
+
processor_name: str,
|
|
77
|
+
) -> AsyncGenerator[Chunk, None]:
|
|
78
|
+
"""Yield citation chunks for display.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
citations: List of Citation objects or URL strings
|
|
82
|
+
processor_name: Name of the processor for metadata
|
|
83
|
+
|
|
84
|
+
Yields:
|
|
85
|
+
Chunk objects for citation display
|
|
86
|
+
"""
|
|
87
|
+
...
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class BaseCitationHandler(ABC):
|
|
91
|
+
"""Base class for citation handlers."""
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def extract_citations(self, delta: Any) -> list[Citation]:
|
|
95
|
+
"""Extract citations from a response delta."""
|
|
96
|
+
|
|
97
|
+
async def yield_citation_chunks(
|
|
98
|
+
self,
|
|
99
|
+
citations: list[Citation] | list[str],
|
|
100
|
+
processor_name: str,
|
|
101
|
+
) -> AsyncGenerator[Chunk, None]:
|
|
102
|
+
"""Default implementation yields ANNOTATION chunks."""
|
|
103
|
+
if not citations:
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
logger.debug("Processing %d citations", len(citations))
|
|
107
|
+
|
|
108
|
+
for citation in citations:
|
|
109
|
+
if isinstance(citation, str):
|
|
110
|
+
# URL string
|
|
111
|
+
yield Chunk(
|
|
112
|
+
type=ChunkType.ANNOTATION,
|
|
113
|
+
text=citation,
|
|
114
|
+
chunk_metadata={
|
|
115
|
+
"url": citation,
|
|
116
|
+
"processor": processor_name,
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
# Citation object
|
|
121
|
+
text = citation.url or citation.document_title or citation.cited_text
|
|
122
|
+
yield Chunk(
|
|
123
|
+
type=ChunkType.ANNOTATION,
|
|
124
|
+
text=text or "",
|
|
125
|
+
chunk_metadata={
|
|
126
|
+
"citation": json.dumps(citation.to_dict()),
|
|
127
|
+
"processor": processor_name,
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ClaudeCitationHandler(BaseCitationHandler):
|
|
133
|
+
"""Citation handler for Claude API responses.
|
|
134
|
+
|
|
135
|
+
Claude provides citations in text delta's citations field with
|
|
136
|
+
various location types (char, page, content_block).
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
def extract_citations(self, delta: Any) -> list[Citation]:
|
|
140
|
+
"""Extract citations from Claude text delta.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
delta: Claude API text delta object
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List of Citation objects
|
|
147
|
+
"""
|
|
148
|
+
citations = []
|
|
149
|
+
|
|
150
|
+
text_block_citations = getattr(delta, "citations", None)
|
|
151
|
+
if not text_block_citations:
|
|
152
|
+
return citations
|
|
153
|
+
|
|
154
|
+
for citation_obj in text_block_citations:
|
|
155
|
+
citation = Citation(
|
|
156
|
+
cited_text=getattr(citation_obj, "cited_text", ""),
|
|
157
|
+
document_index=getattr(citation_obj, "document_index", 0),
|
|
158
|
+
document_title=getattr(citation_obj, "document_title", None),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Handle different citation location types
|
|
162
|
+
citation_type = getattr(citation_obj, "type", None)
|
|
163
|
+
if citation_type == "char_location":
|
|
164
|
+
citation.start_char_index = getattr(citation_obj, "start_char_index", 0)
|
|
165
|
+
citation.end_char_index = getattr(citation_obj, "end_char_index", 0)
|
|
166
|
+
elif citation_type == "page_location":
|
|
167
|
+
citation.start_page_number = getattr(
|
|
168
|
+
citation_obj, "start_page_number", 0
|
|
169
|
+
)
|
|
170
|
+
citation.end_page_number = getattr(citation_obj, "end_page_number", 0)
|
|
171
|
+
elif citation_type == "content_block_location":
|
|
172
|
+
citation.start_block_index = getattr(
|
|
173
|
+
citation_obj, "start_block_index", 0
|
|
174
|
+
)
|
|
175
|
+
citation.end_block_index = getattr(citation_obj, "end_block_index", 0)
|
|
176
|
+
|
|
177
|
+
citations.append(citation)
|
|
178
|
+
|
|
179
|
+
return citations
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class PerplexityCitationHandler(BaseCitationHandler):
|
|
183
|
+
"""Citation handler for Perplexity API responses.
|
|
184
|
+
|
|
185
|
+
Perplexity provides URL-based citations that should be displayed
|
|
186
|
+
as annotation chunks after streaming completes.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
def extract_citations(self, delta: Any) -> list[Citation]:
|
|
190
|
+
"""Extract citations from Perplexity response.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
delta: Perplexity response with citations attribute
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
List of Citation objects with URLs
|
|
197
|
+
"""
|
|
198
|
+
citations = []
|
|
199
|
+
|
|
200
|
+
raw_citations = getattr(delta, "citations", None)
|
|
201
|
+
if not raw_citations:
|
|
202
|
+
return citations
|
|
203
|
+
|
|
204
|
+
citations.extend(
|
|
205
|
+
Citation(url=url, document_title=url)
|
|
206
|
+
for url in raw_citations
|
|
207
|
+
if isinstance(url, str)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
return citations
|
|
211
|
+
|
|
212
|
+
async def yield_citation_chunks(
|
|
213
|
+
self,
|
|
214
|
+
citations: list[Citation] | list[str],
|
|
215
|
+
processor_name: str,
|
|
216
|
+
) -> AsyncGenerator[Chunk, None]:
|
|
217
|
+
"""Yield citation chunks for Perplexity.
|
|
218
|
+
|
|
219
|
+
Perplexity yields:
|
|
220
|
+
1. A TEXT chunk with all citations in metadata (for accumulator)
|
|
221
|
+
2. Individual ANNOTATION chunks for immediate display
|
|
222
|
+
"""
|
|
223
|
+
if not citations:
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
logger.debug("Processing %d Perplexity citations", len(citations))
|
|
227
|
+
|
|
228
|
+
# Convert to list of dicts for JSON
|
|
229
|
+
citations_data = []
|
|
230
|
+
for citation in citations:
|
|
231
|
+
if isinstance(citation, str):
|
|
232
|
+
citations_data.append({"url": citation, "document_title": citation})
|
|
233
|
+
else:
|
|
234
|
+
citations_data.append(
|
|
235
|
+
{
|
|
236
|
+
"url": citation.url,
|
|
237
|
+
"document_title": citation.document_title,
|
|
238
|
+
}
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Yield TEXT chunk with citations metadata for accumulator
|
|
242
|
+
yield Chunk(
|
|
243
|
+
type=ChunkType.TEXT,
|
|
244
|
+
text="", # Empty text, just carries citations metadata
|
|
245
|
+
chunk_metadata={
|
|
246
|
+
"citations": json.dumps(citations_data),
|
|
247
|
+
"source": "perplexity",
|
|
248
|
+
"processor": processor_name,
|
|
249
|
+
},
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Yield individual ANNOTATION chunks for display
|
|
253
|
+
for citation in citations:
|
|
254
|
+
url = citation if isinstance(citation, str) else citation.url
|
|
255
|
+
if url:
|
|
256
|
+
yield Chunk(
|
|
257
|
+
type=ChunkType.ANNOTATION,
|
|
258
|
+
text=url,
|
|
259
|
+
chunk_metadata={
|
|
260
|
+
"url": url,
|
|
261
|
+
"source": "perplexity",
|
|
262
|
+
"processor": processor_name,
|
|
263
|
+
},
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class NullCitationHandler(BaseCitationHandler):
|
|
268
|
+
"""No-op citation handler for APIs without citation support.
|
|
269
|
+
|
|
270
|
+
Use this for OpenAI and Gemini processors that don't have
|
|
271
|
+
built-in citation extraction.
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
def extract_citations(self, delta: Any) -> list[Citation]: # noqa: ARG002
|
|
275
|
+
"""Return empty list - no citation support.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
delta: Ignored
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Empty list
|
|
282
|
+
"""
|
|
283
|
+
return []
|
|
284
|
+
|
|
285
|
+
async def yield_citation_chunks(
|
|
286
|
+
self,
|
|
287
|
+
citations: list[Citation] | list[str], # noqa: ARG002
|
|
288
|
+
processor_name: str, # noqa: ARG002
|
|
289
|
+
) -> AsyncGenerator[Chunk, None]:
|
|
290
|
+
"""Yield nothing - no citation support."""
|
|
291
|
+
return
|
|
292
|
+
yield # Make this a generator # noqa: B901
|