slide-narrator 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slide-narrator might be problematic. Click here for more details.

@@ -0,0 +1,507 @@
1
+ from typing import Dict, Optional, Literal, Any, Union, List, TypedDict
2
+ from datetime import datetime, UTC
3
+ from pydantic import BaseModel, Field, field_validator, model_validator
4
+ import hashlib
5
+ import json
6
+ from narrator.utils.logging import get_logger
7
+ import base64
8
+ # Direct imports
9
+ from narrator.models.attachment import Attachment
10
+ from narrator.storage.file_store import FileStore
11
+
12
+ # Get configured logger
13
+ logger = get_logger(__name__)
14
+
15
+ class ImageUrl(TypedDict):
16
+ url: str
17
+
18
+ class ImageContent(TypedDict):
19
+ type: Literal["image_url"]
20
+ image_url: ImageUrl
21
+
22
+ class TextContent(TypedDict):
23
+ type: Literal["text"]
24
+ text: str
25
+
26
+ class EntitySource(TypedDict, total=False):
27
+ id: str # Unique identifier for the entity
28
+ name: str # Human-readable name of the entity
29
+ type: Literal["user", "agent", "tool"] # Type of entity
30
+ attributes: Optional[Dict[str, Any]] # All other entity-specific attributes
31
+
32
+ class Message(BaseModel):
33
+ """Represents a single message in a thread"""
34
+ id: str = None # Will be set in __init__
35
+ role: Literal["system", "user", "assistant", "tool"]
36
+ sequence: Optional[int] = Field(
37
+ default=None,
38
+ description="Message sequence number within thread. System messages get lowest sequences."
39
+ )
40
+ turn: Optional[int] = Field(
41
+ default=None,
42
+ description="Turn number grouping related messages in the same conversational step."
43
+ )
44
+ content: Optional[Union[str, List[Union[TextContent, ImageContent]]]] = None
45
+ name: Optional[str] = None
46
+ tool_call_id: Optional[str] = None # Required for tool messages
47
+ tool_calls: Optional[list] = None # For assistant messages
48
+ attributes: Dict = Field(default_factory=dict)
49
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
50
+ source: Optional[EntitySource] = None # Creator information (who created this message)
51
+ platforms: Dict[str, Dict[str, str]] = Field(
52
+ default_factory=dict,
53
+ description="References to where this message exists on external platforms. Maps platform name to platform-specific identifiers."
54
+ )
55
+ attachments: List[Attachment] = Field(default_factory=list)
56
+ reactions: Dict[str, List[str]] = Field(
57
+ default_factory=dict,
58
+ description="Map of emoji to list of user IDs who reacted with that emoji"
59
+ )
60
+
61
+ # Simple metrics structure
62
+ metrics: Dict[str, Any] = Field(
63
+ default_factory=lambda: {
64
+ "model": None,
65
+ "timing": {
66
+ "started_at": None,
67
+ "ended_at": None,
68
+ "latency": 0 # in milliseconds
69
+ },
70
+ "usage": {
71
+ "completion_tokens": 0,
72
+ "prompt_tokens": 0,
73
+ "total_tokens": 0
74
+ },
75
+ "weave_call": {
76
+ "id": "",
77
+ "ui_url": ""
78
+ }
79
+ }
80
+ )
81
+
82
+ @field_validator("timestamp", mode="before")
83
+ def ensure_timezone(cls, value: datetime) -> datetime:
84
+ """Ensure timestamp is timezone-aware UTC"""
85
+ if value.tzinfo is None:
86
+ return value.replace(tzinfo=UTC)
87
+ return value
88
+
89
+ @field_validator("role")
90
+ def validate_role(cls, v):
91
+ """Validate role field"""
92
+ if v not in ["system", "user", "assistant", "tool"]:
93
+ raise ValueError("Invalid role. Must be one of: system, user, assistant, tool")
94
+ return v
95
+
96
+ @model_validator(mode='after')
97
+ def validate_tool_message(self):
98
+ """Validate tool message requirements"""
99
+ if self.role == "tool" and not self.tool_call_id:
100
+ raise ValueError("tool_call_id is required for tool messages")
101
+ return self
102
+
103
+ @field_validator("tool_calls")
104
+ def validate_tool_calls(cls, v, info):
105
+ """Validate tool_calls field"""
106
+ if v is not None:
107
+ for tool_call in v:
108
+ if not isinstance(tool_call, dict):
109
+ raise ValueError("Each tool call must be a dictionary")
110
+ if "id" not in tool_call or "type" not in tool_call or "function" not in tool_call:
111
+ raise ValueError("Tool calls must have id, type, and function fields")
112
+ if not isinstance(tool_call["function"], dict):
113
+ raise ValueError("Tool call function must be a dictionary")
114
+ if "name" not in tool_call["function"] or "arguments" not in tool_call["function"]:
115
+ raise ValueError("Tool call function must have name and arguments fields")
116
+ return v
117
+
118
+ @field_validator("source")
119
+ def validate_source(cls, v):
120
+ """Validate source field structure"""
121
+ if v is not None:
122
+ # Check if type field is present and valid
123
+ if "type" in v and v["type"] not in ["user", "agent", "tool"]:
124
+ raise ValueError("source.type must be one of: user, agent, tool")
125
+
126
+ # Ensure ID is present
127
+ if "id" not in v:
128
+ raise ValueError("source.id is required when source is present")
129
+
130
+ return v
131
+
132
+ def __init__(self, **data):
133
+ # Handle file content if provided as raw bytes
134
+ if "file_content" in data and "filename" in data:
135
+ if "attachments" not in data:
136
+ data["attachments"] = []
137
+ data["attachments"].append(Attachment(
138
+ filename=data.pop("filename"),
139
+ content=data.pop("file_content")
140
+ ))
141
+
142
+ super().__init__(**data)
143
+ if not self.id:
144
+ # Create a hash of relevant properties
145
+ hash_content = {
146
+ "role": self.role,
147
+ "sequence": self.sequence, # Include sequence in hash
148
+ "turn": self.turn, # Include turn in hash
149
+ "content": self.content,
150
+ "timestamp": self.timestamp.isoformat()
151
+ }
152
+ # Include name for function messages
153
+ if self.name and self.role == "tool":
154
+ hash_content["name"] = self.name
155
+
156
+ if self.source:
157
+ hash_content["source"] = self.source
158
+
159
+ # Create deterministic JSON string for hashing
160
+ hash_str = json.dumps(hash_content, sort_keys=True)
161
+ self.id = hashlib.sha256(hash_str.encode()).hexdigest()
162
+ logger.debug(f"Generated message ID {self.id} from hash content: {hash_str}")
163
+
164
+ def _serialize_tool_calls(self, tool_calls):
165
+ """Helper method to serialize tool calls into a JSON-friendly format"""
166
+ if not tool_calls:
167
+ return None
168
+
169
+ serialized_calls = []
170
+ for call in tool_calls:
171
+ try:
172
+ # Handle OpenAI response objects
173
+ if hasattr(call, 'model_dump'):
174
+ # For newer Pydantic models
175
+ call_dict = call.model_dump()
176
+ elif hasattr(call, 'to_dict'):
177
+ # For objects with to_dict method
178
+ call_dict = call.to_dict()
179
+ elif hasattr(call, 'id') and hasattr(call, 'function'):
180
+ # Direct access to OpenAI tool call attributes
181
+ call_dict = {
182
+ "id": call.id,
183
+ "type": getattr(call, 'type', 'function'),
184
+ "function": {
185
+ "name": call.function.name,
186
+ "arguments": call.function.arguments
187
+ }
188
+ }
189
+ elif isinstance(call, dict):
190
+ # If it's already a dict, ensure it has the required structure
191
+ call_dict = {
192
+ "id": call.get("id"),
193
+ "type": call.get("type", "function"),
194
+ "function": {
195
+ "name": call.get("function", {}).get("name"),
196
+ "arguments": call.get("function", {}).get("arguments")
197
+ }
198
+ }
199
+ else:
200
+ logger.warning(f"Unsupported tool call format: {type(call)}")
201
+ continue
202
+
203
+ # Validate the required fields are present
204
+ if all(key in call_dict for key in ["id", "type", "function"]):
205
+ serialized_calls.append(call_dict)
206
+ else:
207
+ logger.warning(f"Missing required fields in tool call: {call_dict}")
208
+ except Exception as e:
209
+ logger.error(f"Error serializing tool call: {str(e)}")
210
+ continue
211
+
212
+ return serialized_calls
213
+
214
+ def model_dump(self, mode: str = "json") -> Dict[str, Any]:
215
+ """Convert message to a dictionary suitable for JSON serialization
216
+
217
+ Args:
218
+ mode: Serialization mode, either "json" or "python".
219
+ "json" converts datetimes to ISO strings (default).
220
+ "python" keeps datetimes as datetime objects.
221
+ """
222
+ message_dict = {
223
+ "id": self.id,
224
+ "role": self.role,
225
+ "sequence": self.sequence, # Include sequence in serialization
226
+ "turn": self.turn, # Include turn in serialization
227
+ "content": self.content,
228
+ "timestamp": self.timestamp.isoformat() if mode == "json" else self.timestamp,
229
+ "source": self.source,
230
+ "platforms": self.platforms,
231
+ "metrics": self.metrics,
232
+ "reactions": self.reactions
233
+ }
234
+
235
+ if self.name:
236
+ message_dict["name"] = self.name
237
+
238
+ if self.tool_call_id:
239
+ message_dict["tool_call_id"] = self.tool_call_id
240
+
241
+ if self.tool_calls:
242
+ message_dict["tool_calls"] = self._serialize_tool_calls(self.tool_calls)
243
+
244
+ if self.attributes:
245
+ message_dict["attributes"] = self.attributes
246
+
247
+ if self.attachments:
248
+ message_dict["attachments"] = []
249
+ for attachment in self.attachments:
250
+ # Ensure content is properly serialized
251
+ attachment_dict = attachment.model_dump(mode=mode) if hasattr(attachment, 'model_dump') else {
252
+ "filename": attachment.filename,
253
+ "mime_type": attachment.mime_type,
254
+ "file_id": attachment.file_id,
255
+ "storage_path": attachment.storage_path,
256
+ "storage_backend": attachment.storage_backend,
257
+ "status": attachment.status
258
+ }
259
+
260
+ # Remove content field if present to avoid large data serialization
261
+ if "content" in attachment_dict:
262
+ del attachment_dict["content"]
263
+
264
+ # Add processed content if available
265
+ if attachment.attributes:
266
+ attachment_dict["attributes"] = attachment.attributes
267
+
268
+ # Add to attachments list
269
+ message_dict["attachments"].append(attachment_dict)
270
+
271
+ return message_dict
272
+
273
+ def to_chat_completion_message(self, file_store: Optional[FileStore] = None) -> Dict[str, Any]:
274
+ """Return message in the format expected by chat completion APIs
275
+
276
+ Args:
277
+ file_store: Optional FileStore instance for accessing file URLs
278
+ """
279
+ base_content = self.content if isinstance(self.content, str) else ""
280
+
281
+ message_dict = {
282
+ "role": self.role,
283
+ "content": base_content,
284
+ "sequence": self.sequence
285
+ }
286
+
287
+ if self.name:
288
+ message_dict["name"] = self.name
289
+
290
+ if self.role == "assistant" and self.tool_calls:
291
+ message_dict["tool_calls"] = self.tool_calls
292
+
293
+ if self.role == "tool" and self.tool_call_id:
294
+ message_dict["tool_call_id"] = self.tool_call_id
295
+
296
+ # Handle attachments if we have them
297
+ if self.attachments:
298
+ # Get file references for all attachments
299
+ file_references = []
300
+ for attachment in self.attachments:
301
+ if not attachment.storage_path:
302
+ continue
303
+
304
+ # Get the URL from attributes if available, otherwise construct it
305
+ file_url = attachment.attributes.get("url") if attachment.attributes else None
306
+
307
+ if not file_url and attachment.storage_path:
308
+ # Construct URL from storage path
309
+ file_url = FileStore.get_file_url(attachment.storage_path)
310
+
311
+ # Simplified file reference format
312
+ file_ref = f"[File: {file_url} ({attachment.mime_type})]"
313
+ file_references.append(file_ref)
314
+
315
+ # Add file references to content based on message role
316
+ if file_references:
317
+ if self.role == "user" or self.role == "tool":
318
+ # For user and tool messages, add file references directly
319
+ if message_dict["content"]:
320
+ message_dict["content"] += "\n\n" + "\n".join(file_references)
321
+ else:
322
+ message_dict["content"] = "\n".join(file_references)
323
+ elif self.role == "assistant":
324
+ # For assistant messages, add a header
325
+ if message_dict["content"]:
326
+ message_dict["content"] += "\n\nGenerated Files:\n" + "\n".join(file_references)
327
+ else:
328
+ message_dict["content"] = "Generated Files:\n" + "\n".join(file_references)
329
+
330
+ return message_dict
331
+
332
+ def add_attachment(self, attachment: Union[Attachment, bytes], filename: Optional[str] = None) -> None:
333
+ """Add an attachment to the message.
334
+
335
+ Args:
336
+ attachment: Either an Attachment object or raw bytes
337
+ filename: Required if attachment is bytes, ignored if attachment is Attachment
338
+
339
+ Raises:
340
+ ValueError: If attachment is bytes and filename is not provided
341
+ """
342
+ if isinstance(attachment, Attachment):
343
+ self.attachments.append(attachment)
344
+ elif isinstance(attachment, bytes):
345
+ if not filename:
346
+ raise ValueError("filename is required when adding raw bytes as attachment")
347
+ att = Attachment(
348
+ filename=filename,
349
+ content=attachment
350
+ )
351
+ self.attachments.append(att)
352
+ else:
353
+ raise ValueError("attachment must be either Attachment object or bytes")
354
+
355
+ def add_reaction(self, emoji: str, user_id: str) -> bool:
356
+ """Add a reaction to a message.
357
+
358
+ Args:
359
+ emoji: Emoji shortcode (e.g., ":thumbsup:")
360
+ user_id: ID of the user adding the reaction
361
+
362
+ Returns:
363
+ True if reaction was added, False if it already existed
364
+ """
365
+ logger.info(f"Message.add_reaction (msg_id={self.id}): Current reactions: {self.reactions}. Adding '{emoji}' for user '{user_id}'.")
366
+ if emoji not in self.reactions:
367
+ self.reactions[emoji] = []
368
+
369
+ if user_id in self.reactions[emoji]:
370
+ logger.warning(f"Message.add_reaction (msg_id={self.id}): User '{user_id}' already reacted with '{emoji}'.")
371
+ return False # Indicate that reaction was not newly added because it already existed
372
+
373
+ self.reactions[emoji].append(user_id)
374
+ logger.info(f"Message.add_reaction (msg_id={self.id}): Successfully added. Reactions now: {self.reactions}")
375
+ return True
376
+
377
+ def remove_reaction(self, emoji: str, user_id: str) -> bool:
378
+ """Remove a reaction from a message.
379
+
380
+ Args:
381
+ emoji: Emoji shortcode (e.g., ":thumbsup:")
382
+ user_id: ID of the user removing the reaction
383
+
384
+ Returns:
385
+ True if reaction was removed, False if it didn't exist
386
+ """
387
+ logger.info(f"Message.remove_reaction (msg_id={self.id}): Current reactions: {self.reactions}. Removing '{emoji}' for user '{user_id}'.")
388
+ if emoji not in self.reactions or user_id not in self.reactions[emoji]:
389
+ logger.warning(f"Message.remove_reaction (msg_id={self.id}): Emoji '{emoji}' or user '{user_id}' not found in reactions {self.reactions}.")
390
+ return False
391
+
392
+ self.reactions[emoji].remove(user_id)
393
+
394
+ # Clean up empty reactions
395
+ if not self.reactions[emoji]:
396
+ del self.reactions[emoji]
397
+
398
+ logger.info(f"Message.remove_reaction (msg_id={self.id}): Successfully removed. Reactions now: {self.reactions}")
399
+ return True
400
+
401
+ def get_reactions(self) -> Dict[str, List[str]]:
402
+ """Get all reactions for this message.
403
+
404
+ Returns:
405
+ Dictionary mapping emoji to list of user IDs
406
+ """
407
+ return self.reactions
408
+
409
+ def get_reaction_counts(self) -> Dict[str, int]:
410
+ """Get counts of reactions for this message.
411
+
412
+ Returns:
413
+ Dictionary mapping emoji to count of reactions
414
+ """
415
+ return {emoji: len(users) for emoji, users in self.reactions.items()}
416
+
417
+ model_config = {
418
+ "json_schema_extra": {
419
+ "examples": [
420
+ {
421
+ "id": "123e4567-e89b-12d3-a456-426614174000",
422
+ "role": "user",
423
+ "sequence": 1,
424
+ "turn": 1,
425
+ "content": "Here are some files to look at",
426
+ "name": None,
427
+ "tool_call_id": None,
428
+ "tool_calls": None,
429
+ "attributes": {},
430
+ "timestamp": "2024-02-07T00:00:00+00:00",
431
+ "source": {
432
+ "entity": {
433
+ "id": "U123456",
434
+ "name": "John Doe",
435
+ "type": "user",
436
+ "attributes": {
437
+ "email": "john.doe@example.com",
438
+ "user_id": "U123456"
439
+ }
440
+ },
441
+ "platform": {
442
+ "name": "slack",
443
+ "attributes": {
444
+ "thread_ts": "1234567890.123456",
445
+ "channel_id": "C123456",
446
+ "team_id": "T123456"
447
+ }
448
+ }
449
+ },
450
+ "attachments": [
451
+ {
452
+ "filename": "example.txt",
453
+ "mime_type": "text/plain",
454
+ "attributes": {
455
+ "type": "text",
456
+ "text": "Example content",
457
+ "url": "/files/example.txt"
458
+ },
459
+ "status": "stored"
460
+ },
461
+ {
462
+ "filename": "example.pdf",
463
+ "mime_type": "application/pdf",
464
+ "attributes": {
465
+ "type": "document",
466
+ "text": "Extracted text from PDF",
467
+ "url": "/files/example.pdf"
468
+ },
469
+ "status": "stored"
470
+ },
471
+ {
472
+ "filename": "example.jpg",
473
+ "mime_type": "image/jpeg",
474
+ "attributes": {
475
+ "type": "image",
476
+ "url": "/files/example.jpg"
477
+ },
478
+ "status": "stored"
479
+ }
480
+ ],
481
+ "metrics": {
482
+ "model": "gpt-4.1",
483
+ "timing": {
484
+ "started_at": "2024-02-07T00:00:00+00:00",
485
+ "ended_at": "2024-02-07T00:00:01+00:00",
486
+ "latency": 1.0
487
+ },
488
+ "usage": {
489
+ "completion_tokens": 100,
490
+ "prompt_tokens": 50,
491
+ "total_tokens": 150
492
+ },
493
+ "weave_call": {
494
+ "id": "call-123",
495
+ "ui_url": "https://weave.ui/call-123"
496
+ }
497
+ },
498
+ "reactions": {
499
+ ":thumbsup:": ["U123456", "U234567"],
500
+ ":heart:": ["U123456"]
501
+ }
502
+ }
503
+ ]
504
+ },
505
+ "extra": "forbid",
506
+ "validate_assignment": True
507
+ }