cognitive-modules 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cognitive/runner.py CHANGED
@@ -10,8 +10,13 @@ v2.2 Features:
10
10
  """
11
11
 
12
12
  import json
13
+ import base64
14
+ import mimetypes
13
15
  from pathlib import Path
14
- from typing import Optional, TypedDict, Union, Literal
16
+ from typing import Optional, TypedDict, Union, Literal, Callable, AsyncIterator
17
+ from dataclasses import dataclass, field
18
+ from urllib.request import urlopen
19
+ from urllib.error import URLError
15
20
 
16
21
  import jsonschema
17
22
  import yaml
@@ -154,10 +159,11 @@ def repair_envelope(
154
159
  """
155
160
  Attempt to repair envelope format issues without changing semantics.
156
161
 
157
- Repairs (lossless only):
162
+ Repairs (mostly lossless, except explain truncation):
158
163
  - Missing meta fields (fill with conservative defaults)
159
- - Truncate explain if too long
160
- - Trim whitespace from string fields
164
+ - Truncate explain if too long (lossy operation, but required for v2.2 spec)
165
+ - Trim whitespace from string fields (lossless)
166
+ - Clamp confidence to [0, 1] range (lossy if out of range)
161
167
 
162
168
  Does NOT repair:
163
169
  - Invalid enum values (treated as validation failure)
@@ -575,6 +581,20 @@ def run_module(
575
581
  meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
576
582
  if meta_errors and enable_repair:
577
583
  result = repair_envelope(result, meta_schema, risk_rule=risk_rule)
584
+ # Re-validate meta after repair
585
+ meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
586
+ if meta_errors:
587
+ # Meta validation failed after repair attempt
588
+ return {
589
+ "ok": False,
590
+ "meta": {
591
+ "confidence": 0.0,
592
+ "risk": "high",
593
+ "explain": "Meta schema validation failed after repair attempt."
594
+ },
595
+ "error": {"code": "META_VALIDATION_FAILED", "message": str(meta_errors)},
596
+ "partial_data": result.get("data")
597
+ }
578
598
 
579
599
  return result
580
600
 
@@ -637,3 +657,692 @@ def should_escalate(result: EnvelopeResponseV22, confidence_threshold: float = 0
637
657
  return True
638
658
 
639
659
  return False
660
+
661
+
662
+ # =============================================================================
663
+ # v2.5 Streaming Support
664
+ # =============================================================================
665
+
666
+ import uuid
667
+ from typing import AsyncIterator, Iterator, Any, Callable
668
+ from dataclasses import dataclass, field
669
+
670
+
671
+ @dataclass
672
+ class StreamingSession:
673
+ """Represents an active streaming session."""
674
+ session_id: str
675
+ module_name: str
676
+ started_at: float = field(default_factory=lambda: __import__('time').time())
677
+ chunks_sent: int = 0
678
+ accumulated_data: dict = field(default_factory=dict)
679
+ accumulated_text: dict = field(default_factory=dict) # field -> accumulated string
680
+
681
+
682
+ def create_session_id() -> str:
683
+ """Generate a unique session ID for streaming."""
684
+ return f"sess_{uuid.uuid4().hex[:12]}"
685
+
686
+
687
+ def create_meta_chunk(session_id: str, initial_risk: str = "low") -> dict:
688
+ """Create the initial meta chunk for streaming."""
689
+ return {
690
+ "ok": True,
691
+ "streaming": True,
692
+ "session_id": session_id,
693
+ "meta": {
694
+ "confidence": None,
695
+ "risk": initial_risk,
696
+ "explain": "Processing..."
697
+ }
698
+ }
699
+
700
+
701
+ def create_delta_chunk(seq: int, field: str, delta: str) -> dict:
702
+ """Create a delta chunk for incremental content."""
703
+ return {
704
+ "chunk": {
705
+ "seq": seq,
706
+ "type": "delta",
707
+ "field": field,
708
+ "delta": delta
709
+ }
710
+ }
711
+
712
+
713
+ def create_snapshot_chunk(seq: int, field: str, data: Any) -> dict:
714
+ """Create a snapshot chunk for full field replacement."""
715
+ return {
716
+ "chunk": {
717
+ "seq": seq,
718
+ "type": "snapshot",
719
+ "field": field,
720
+ "data": data
721
+ }
722
+ }
723
+
724
+
725
+ def create_progress_chunk(percent: int, stage: str = "", message: str = "") -> dict:
726
+ """Create a progress update chunk."""
727
+ return {
728
+ "progress": {
729
+ "percent": percent,
730
+ "stage": stage,
731
+ "message": message
732
+ }
733
+ }
734
+
735
+
736
+ def create_final_chunk(meta: dict, data: dict, usage: dict = None) -> dict:
737
+ """Create the final chunk with complete data."""
738
+ chunk = {
739
+ "final": True,
740
+ "meta": meta,
741
+ "data": data
742
+ }
743
+ if usage:
744
+ chunk["usage"] = usage
745
+ return chunk
746
+
747
+
748
+ def create_error_chunk(session_id: str, error_code: str, message: str,
749
+ recoverable: bool = False, partial_data: dict = None) -> dict:
750
+ """Create an error chunk for stream failures."""
751
+ chunk = {
752
+ "ok": False,
753
+ "streaming": True,
754
+ "session_id": session_id,
755
+ "error": {
756
+ "code": error_code,
757
+ "message": message,
758
+ "recoverable": recoverable
759
+ }
760
+ }
761
+ if partial_data:
762
+ chunk["partial_data"] = partial_data
763
+ return chunk
764
+
765
+
766
+ def assemble_streamed_data(session: StreamingSession) -> dict:
767
+ """Assemble accumulated streaming data into final format."""
768
+ data = session.accumulated_data.copy()
769
+
770
+ # Merge accumulated text fields
771
+ for field_path, text in session.accumulated_text.items():
772
+ parts = field_path.split(".")
773
+ target = data
774
+ for part in parts[:-1]:
775
+ if part not in target:
776
+ target[part] = {}
777
+ target = target[part]
778
+ target[parts[-1]] = text
779
+
780
+ return data
781
+
782
+
783
+ class StreamingRunner:
784
+ """Runner with streaming support for v2.5 modules."""
785
+
786
+ def __init__(self, provider_callback: Callable = None):
787
+ """
788
+ Initialize streaming runner.
789
+
790
+ Args:
791
+ provider_callback: Function to call LLM with streaming support.
792
+ Signature: async (prompt, images=None) -> AsyncIterator[str]
793
+ """
794
+ self.provider_callback = provider_callback or self._default_provider
795
+ self.active_sessions: dict[str, StreamingSession] = {}
796
+
797
+ async def _default_provider(self, prompt: str, images: list = None) -> AsyncIterator[str]:
798
+ """Default provider - yields entire response at once (for testing)."""
799
+ # In real implementation, this would stream from LLM
800
+ yield '{"ok": true, "meta": {"confidence": 0.9, "risk": "low", "explain": "Test"}, "data": {"rationale": "Test response"}}'
801
+
802
+ async def execute_stream(
803
+ self,
804
+ module_name: str,
805
+ input_data: dict,
806
+ on_chunk: Callable[[dict], None] = None
807
+ ) -> AsyncIterator[dict]:
808
+ """
809
+ Execute a module with streaming output.
810
+
811
+ Args:
812
+ module_name: Name of the module to execute
813
+ input_data: Input data including multimodal content
814
+ on_chunk: Optional callback for each chunk
815
+
816
+ Yields:
817
+ Streaming chunks (meta, delta, progress, final, or error)
818
+ """
819
+ session_id = create_session_id()
820
+ session = StreamingSession(session_id=session_id, module_name=module_name)
821
+ self.active_sessions[session_id] = session
822
+
823
+ try:
824
+ # Load module
825
+ module = load_module(module_name)
826
+
827
+ # Check if module supports streaming
828
+ response_config = module.get("response", {})
829
+ mode = response_config.get("mode", "sync")
830
+ if mode not in ("streaming", "both"):
831
+ # Fall back to sync execution
832
+ result = await self._execute_sync(module, input_data)
833
+ yield create_meta_chunk(session_id)
834
+ yield create_final_chunk(result["meta"], result["data"])
835
+ return
836
+
837
+ # Extract images for multimodal
838
+ images = self._extract_media(input_data)
839
+
840
+ # Build prompt
841
+ prompt = self._build_prompt(module, input_data)
842
+
843
+ # Send initial meta chunk
844
+ meta_chunk = create_meta_chunk(session_id)
845
+ if on_chunk:
846
+ on_chunk(meta_chunk)
847
+ yield meta_chunk
848
+
849
+ # Stream from LLM
850
+ seq = 1
851
+ accumulated_response = ""
852
+
853
+ async for text_chunk in self.provider_callback(prompt, images):
854
+ accumulated_response += text_chunk
855
+
856
+ # Create delta chunk for rationale field
857
+ delta_chunk = create_delta_chunk(seq, "data.rationale", text_chunk)
858
+ session.chunks_sent += 1
859
+ session.accumulated_text.setdefault("data.rationale", "")
860
+ session.accumulated_text["data.rationale"] += text_chunk
861
+
862
+ if on_chunk:
863
+ on_chunk(delta_chunk)
864
+ yield delta_chunk
865
+ seq += 1
866
+
867
+ # Parse final response
868
+ try:
869
+ final_data = parse_llm_response(accumulated_response)
870
+ final_data = repair_envelope(final_data)
871
+ except Exception as e:
872
+ error_chunk = create_error_chunk(
873
+ session_id, "E2001", str(e),
874
+ recoverable=False,
875
+ partial_data={"rationale": session.accumulated_text.get("data.rationale", "")}
876
+ )
877
+ yield error_chunk
878
+ return
879
+
880
+ # Send final chunk
881
+ final_chunk = create_final_chunk(
882
+ final_data.get("meta", {}),
883
+ final_data.get("data", {}),
884
+ {"input_tokens": 0, "output_tokens": seq} # Placeholder
885
+ )
886
+ if on_chunk:
887
+ on_chunk(final_chunk)
888
+ yield final_chunk
889
+
890
+ except Exception as e:
891
+ error_chunk = create_error_chunk(
892
+ session_id, "E2010", f"Stream error: {str(e)}",
893
+ recoverable=False
894
+ )
895
+ yield error_chunk
896
+ finally:
897
+ del self.active_sessions[session_id]
898
+
899
+ async def _execute_sync(self, module: dict, input_data: dict) -> dict:
900
+ """Execute module synchronously (fallback)."""
901
+ # Use existing sync execution
902
+ return run_module(module["name"], input_data)
903
+
904
+ def _build_prompt(self, module: dict, input_data: dict) -> str:
905
+ """Build prompt from module and input."""
906
+ prompt_template = module.get("prompt", "")
907
+ return substitute_arguments(prompt_template, input_data)
908
+
909
+ def _extract_media(self, input_data: dict) -> list:
910
+ """Extract media inputs from input data."""
911
+ images = input_data.get("images", [])
912
+ audio = input_data.get("audio", [])
913
+ video = input_data.get("video", [])
914
+ return images + audio + video
915
+
916
+
917
+ # =============================================================================
918
+ # v2.5 Multimodal Support
919
+ # =============================================================================
920
+
921
+ SUPPORTED_IMAGE_TYPES = {
922
+ "image/jpeg", "image/png", "image/webp", "image/gif"
923
+ }
924
+
925
+ SUPPORTED_AUDIO_TYPES = {
926
+ "audio/mpeg", "audio/wav", "audio/ogg", "audio/webm"
927
+ }
928
+
929
+ SUPPORTED_VIDEO_TYPES = {
930
+ "video/mp4", "video/webm", "video/quicktime"
931
+ }
932
+
933
+ # Magic bytes for media type detection
934
+ MEDIA_MAGIC_BYTES = {
935
+ "image/jpeg": [b"\xff\xd8\xff"],
936
+ "image/png": [b"\x89PNG\r\n\x1a\n"],
937
+ "image/gif": [b"GIF87a", b"GIF89a"],
938
+ "image/webp": [b"RIFF"], # Check WEBP signature later
939
+ "audio/mpeg": [b"\xff\xfb", b"\xff\xfa", b"ID3"],
940
+ "audio/wav": [b"RIFF"], # Check WAVE signature later
941
+ "audio/ogg": [b"OggS"],
942
+ "video/mp4": [b"\x00\x00\x00"], # ftyp check needed
943
+ "video/webm": [b"\x1a\x45\xdf\xa3"],
944
+ "application/pdf": [b"%PDF"],
945
+ }
946
+
947
+ # Media size limits in bytes
948
+ MEDIA_SIZE_LIMITS = {
949
+ "image": 20 * 1024 * 1024, # 20MB
950
+ "audio": 25 * 1024 * 1024, # 25MB
951
+ "video": 100 * 1024 * 1024, # 100MB
952
+ "document": 50 * 1024 * 1024, # 50MB
953
+ }
954
+
955
+ # Media dimension limits
956
+ MEDIA_DIMENSION_LIMITS = {
957
+ "max_width": 8192,
958
+ "max_height": 8192,
959
+ "min_width": 10,
960
+ "min_height": 10,
961
+ "max_pixels": 67108864, # 8192 x 8192
962
+ }
963
+
964
+ # v2.5 Error codes
965
+ ERROR_CODES_V25 = {
966
+ "UNSUPPORTED_MEDIA_TYPE": "E1010",
967
+ "MEDIA_TOO_LARGE": "E1011",
968
+ "MEDIA_FETCH_FAILED": "E1012",
969
+ "MEDIA_DECODE_FAILED": "E1013",
970
+ "MEDIA_TYPE_MISMATCH": "E1014",
971
+ "MEDIA_DIMENSION_EXCEEDED": "E1015",
972
+ "MEDIA_DIMENSION_TOO_SMALL": "E1016",
973
+ "MEDIA_PIXEL_LIMIT": "E1017",
974
+ "UPLOAD_EXPIRED": "E1018",
975
+ "UPLOAD_NOT_FOUND": "E1019",
976
+ "CHECKSUM_MISMATCH": "E1020",
977
+ "STREAM_INTERRUPTED": "E2010",
978
+ "STREAM_TIMEOUT": "E2011",
979
+ "STREAMING_NOT_SUPPORTED": "E4010",
980
+ "MULTIMODAL_NOT_SUPPORTED": "E4011",
981
+ "RECOVERY_NOT_SUPPORTED": "E4012",
982
+ "SESSION_EXPIRED": "E4013",
983
+ "CHECKPOINT_INVALID": "E4014",
984
+ }
985
+
986
+
987
+ def detect_media_type_from_magic(data: bytes) -> Optional[str]:
988
+ """Detect media type from magic bytes."""
989
+ for mime_type, magic_list in MEDIA_MAGIC_BYTES.items():
990
+ for magic in magic_list:
991
+ if data.startswith(magic):
992
+ # Special handling for RIFF-based formats
993
+ if magic == b"RIFF" and len(data) >= 12:
994
+ if data[8:12] == b"WEBP":
995
+ return "image/webp"
996
+ elif data[8:12] == b"WAVE":
997
+ return "audio/wav"
998
+ continue
999
+ # Special handling for MP4 (check for ftyp)
1000
+ if mime_type == "video/mp4" and len(data) >= 8:
1001
+ if b"ftyp" in data[4:8]:
1002
+ return "video/mp4"
1003
+ continue
1004
+ return mime_type
1005
+ return None
1006
+
1007
+
1008
+ def validate_media_magic_bytes(data: bytes, declared_type: str) -> tuple[bool, str]:
1009
+ """
1010
+ Validate that media content matches declared MIME type.
1011
+
1012
+ Returns:
1013
+ Tuple of (is_valid, error_message)
1014
+ """
1015
+ detected_type = detect_media_type_from_magic(data)
1016
+
1017
+ if detected_type is None:
1018
+ return True, "" # Can't detect, assume valid
1019
+
1020
+ # Normalize types for comparison
1021
+ declared_category = declared_type.split("/")[0]
1022
+ detected_category = detected_type.split("/")[0]
1023
+
1024
+ if declared_category != detected_category:
1025
+ return False, f"Media content mismatch: declared {declared_type}, detected {detected_type}"
1026
+
1027
+ return True, ""
1028
+
1029
+
1030
+ def validate_image_dimensions(data: bytes) -> Optional[tuple]:
1031
+ """
1032
+ Extract image dimensions from raw bytes.
1033
+
1034
+ Returns:
1035
+ Tuple of (width, height) or None if cannot determine.
1036
+ """
1037
+ try:
1038
+ # PNG dimensions at bytes 16-24
1039
+ if data.startswith(b"\x89PNG"):
1040
+ width = int.from_bytes(data[16:20], "big")
1041
+ height = int.from_bytes(data[20:24], "big")
1042
+ return (width, height)
1043
+
1044
+ # JPEG - need to parse markers
1045
+ if data.startswith(b"\xff\xd8"):
1046
+ i = 2
1047
+ while i < len(data) - 8:
1048
+ if data[i] != 0xff:
1049
+ break
1050
+ marker = data[i + 1]
1051
+ if marker in (0xc0, 0xc1, 0xc2): # SOF markers
1052
+ height = int.from_bytes(data[i + 5:i + 7], "big")
1053
+ width = int.from_bytes(data[i + 7:i + 9], "big")
1054
+ return (width, height)
1055
+ length = int.from_bytes(data[i + 2:i + 4], "big")
1056
+ i += 2 + length
1057
+
1058
+ # GIF dimensions at bytes 6-10
1059
+ if data.startswith(b"GIF"):
1060
+ width = int.from_bytes(data[6:8], "little")
1061
+ height = int.from_bytes(data[8:10], "little")
1062
+ return (width, height)
1063
+
1064
+ except Exception:
1065
+ pass
1066
+
1067
+ return None
1068
+
1069
+
1070
+ def validate_media_input(media: dict, constraints: dict = None) -> tuple:
1071
+ """
1072
+ Validate a media input object with enhanced v2.5 validation.
1073
+
1074
+ Returns:
1075
+ Tuple of (is_valid, error_message, error_code)
1076
+ """
1077
+ constraints = constraints or {}
1078
+
1079
+ media_type = media.get("type")
1080
+ if media_type not in ("url", "base64", "file", "upload_ref"):
1081
+ return False, "Invalid media type. Must be url, base64, file, or upload_ref", None
1082
+
1083
+ if media_type == "url":
1084
+ url = media.get("url")
1085
+ if not url:
1086
+ return False, "URL media missing 'url' field", None
1087
+ if not url.startswith(("http://", "https://")):
1088
+ return False, "URL must start with http:// or https://", None
1089
+
1090
+ elif media_type == "base64":
1091
+ mime_type = media.get("media_type")
1092
+ if not mime_type:
1093
+ return False, "Base64 media missing 'media_type' field", None
1094
+ data = media.get("data")
1095
+ if not data:
1096
+ return False, "Base64 media missing 'data' field", None
1097
+
1098
+ # Validate base64 and decode
1099
+ try:
1100
+ decoded = base64.b64decode(data)
1101
+ except Exception:
1102
+ return False, "Invalid base64 encoding", ERROR_CODES_V25["MEDIA_DECODE_FAILED"]
1103
+
1104
+ # Check size
1105
+ category = mime_type.split("/")[0]
1106
+ max_size = constraints.get("max_size_bytes", MEDIA_SIZE_LIMITS.get(category, 20 * 1024 * 1024))
1107
+ if len(decoded) > max_size:
1108
+ return False, f"Media exceeds size limit ({len(decoded)} > {max_size} bytes)", ERROR_CODES_V25["MEDIA_TOO_LARGE"]
1109
+
1110
+ # Validate magic bytes
1111
+ is_valid, error = validate_media_magic_bytes(decoded, mime_type)
1112
+ if not is_valid:
1113
+ return False, error, ERROR_CODES_V25["MEDIA_TYPE_MISMATCH"]
1114
+
1115
+ # Validate image dimensions if applicable
1116
+ if category == "image":
1117
+ dimensions = validate_image_dimensions(decoded)
1118
+ if dimensions:
1119
+ width, height = dimensions
1120
+ limits = MEDIA_DIMENSION_LIMITS
1121
+
1122
+ if width > limits["max_width"] or height > limits["max_height"]:
1123
+ return False, f"Image dimensions ({width}x{height}) exceed maximum ({limits['max_width']}x{limits['max_height']})", ERROR_CODES_V25["MEDIA_DIMENSION_EXCEEDED"]
1124
+
1125
+ if width < limits["min_width"] or height < limits["min_height"]:
1126
+ return False, f"Image dimensions ({width}x{height}) below minimum ({limits['min_width']}x{limits['min_height']})", ERROR_CODES_V25["MEDIA_DIMENSION_TOO_SMALL"]
1127
+
1128
+ if width * height > limits["max_pixels"]:
1129
+ return False, f"Image pixel count ({width * height}) exceeds maximum ({limits['max_pixels']})", ERROR_CODES_V25["MEDIA_PIXEL_LIMIT"]
1130
+
1131
+ # Validate checksum if provided
1132
+ checksum = media.get("checksum")
1133
+ if checksum:
1134
+ import hashlib
1135
+ algorithm = checksum.get("algorithm", "sha256")
1136
+ expected = checksum.get("value", "")
1137
+
1138
+ if algorithm == "sha256":
1139
+ actual = hashlib.sha256(decoded).hexdigest()
1140
+ elif algorithm == "md5":
1141
+ actual = hashlib.md5(decoded).hexdigest()
1142
+ elif algorithm == "crc32":
1143
+ import zlib
1144
+ actual = format(zlib.crc32(decoded) & 0xffffffff, '08x')
1145
+ else:
1146
+ return False, f"Unsupported checksum algorithm: {algorithm}", None
1147
+
1148
+ if actual.lower() != expected.lower():
1149
+ return False, f"Checksum mismatch: expected {expected}, got {actual}", ERROR_CODES_V25["CHECKSUM_MISMATCH"]
1150
+
1151
+ elif media_type == "file":
1152
+ path = media.get("path")
1153
+ if not path:
1154
+ return False, "File media missing 'path' field", None
1155
+ if not Path(path).exists():
1156
+ return False, f"File not found: {path}", None
1157
+
1158
+ # Check file size
1159
+ file_size = Path(path).stat().st_size
1160
+ mime, _ = mimetypes.guess_type(str(path))
1161
+ if mime:
1162
+ category = mime.split("/")[0]
1163
+ max_size = constraints.get("max_size_bytes", MEDIA_SIZE_LIMITS.get(category, 20 * 1024 * 1024))
1164
+ if file_size > max_size:
1165
+ return False, f"File exceeds size limit ({file_size} > {max_size} bytes)", ERROR_CODES_V25["MEDIA_TOO_LARGE"]
1166
+
1167
+ elif media_type == "upload_ref":
1168
+ upload_id = media.get("upload_id")
1169
+ if not upload_id:
1170
+ return False, "Upload reference missing 'upload_id' field", None
1171
+ # Note: Actual upload validation would require backend lookup
1172
+
1173
+ return True, "", None
1174
+
1175
+
1176
+ def load_media_as_base64(media: dict) -> tuple[str, str]:
1177
+ """
1178
+ Load media from any source and return as base64.
1179
+
1180
+ Returns:
1181
+ Tuple of (base64_data, media_type)
1182
+ """
1183
+ media_type = media.get("type")
1184
+
1185
+ if media_type == "base64":
1186
+ return media["data"], media["media_type"]
1187
+
1188
+ elif media_type == "url":
1189
+ url = media["url"]
1190
+ try:
1191
+ with urlopen(url, timeout=30) as response:
1192
+ data = response.read()
1193
+ content_type = response.headers.get("Content-Type", "application/octet-stream")
1194
+ # Extract just the mime type (remove charset etc)
1195
+ content_type = content_type.split(";")[0].strip()
1196
+ return base64.b64encode(data).decode("utf-8"), content_type
1197
+ except URLError as e:
1198
+ raise ValueError(f"Failed to fetch media from URL: {e}")
1199
+
1200
+ elif media_type == "file":
1201
+ path = Path(media["path"])
1202
+ if not path.exists():
1203
+ raise ValueError(f"File not found: {path}")
1204
+
1205
+ mime_type, _ = mimetypes.guess_type(str(path))
1206
+ mime_type = mime_type or "application/octet-stream"
1207
+
1208
+ with open(path, "rb") as f:
1209
+ data = f.read()
1210
+
1211
+ return base64.b64encode(data).decode("utf-8"), mime_type
1212
+
1213
+ raise ValueError(f"Unknown media type: {media_type}")
1214
+
1215
+
1216
+ def prepare_media_for_llm(media_list: list, provider: str = "openai") -> list:
1217
+ """
1218
+ Prepare media inputs for specific LLM provider format.
1219
+
1220
+ Different providers have different multimodal input formats:
1221
+ - OpenAI: {"type": "image_url", "image_url": {"url": "data:..."}}
1222
+ - Anthropic: {"type": "image", "source": {"type": "base64", ...}}
1223
+ - Google: {"inlineData": {"mimeType": "...", "data": "..."}}
1224
+ """
1225
+ prepared = []
1226
+
1227
+ for media in media_list:
1228
+ data, mime_type = load_media_as_base64(media)
1229
+
1230
+ if provider == "openai":
1231
+ prepared.append({
1232
+ "type": "image_url",
1233
+ "image_url": {
1234
+ "url": f"data:{mime_type};base64,{data}"
1235
+ }
1236
+ })
1237
+ elif provider == "anthropic":
1238
+ prepared.append({
1239
+ "type": "image",
1240
+ "source": {
1241
+ "type": "base64",
1242
+ "media_type": mime_type,
1243
+ "data": data
1244
+ }
1245
+ })
1246
+ elif provider == "google":
1247
+ prepared.append({
1248
+ "inlineData": {
1249
+ "mimeType": mime_type,
1250
+ "data": data
1251
+ }
1252
+ })
1253
+ else:
1254
+ # Generic format
1255
+ prepared.append({
1256
+ "type": "base64",
1257
+ "media_type": mime_type,
1258
+ "data": data
1259
+ })
1260
+
1261
+ return prepared
1262
+
1263
+
1264
+ def get_modalities_config(module: dict) -> dict:
1265
+ """Get modalities configuration from module."""
1266
+ return module.get("modalities", {
1267
+ "input": ["text"],
1268
+ "output": ["text"]
1269
+ })
1270
+
1271
+
1272
+ def supports_multimodal_input(module: dict) -> bool:
1273
+ """Check if module supports multimodal input."""
1274
+ modalities = get_modalities_config(module)
1275
+ input_modalities = modalities.get("input", ["text"])
1276
+ return any(m in input_modalities for m in ["image", "audio", "video"])
1277
+
1278
+
1279
+ def supports_multimodal_output(module: dict) -> bool:
1280
+ """Check if module supports multimodal output."""
1281
+ modalities = get_modalities_config(module)
1282
+ output_modalities = modalities.get("output", ["text"])
1283
+ return any(m in output_modalities for m in ["image", "audio", "video"])
1284
+
1285
+
1286
+ def validate_multimodal_input(input_data: dict, module: dict) -> tuple[bool, list[str]]:
1287
+ """
1288
+ Validate multimodal input against module configuration.
1289
+
1290
+ Returns:
1291
+ Tuple of (is_valid, list of errors)
1292
+ """
1293
+ errors = []
1294
+ modalities = get_modalities_config(module)
1295
+ input_modalities = set(modalities.get("input", ["text"]))
1296
+ constraints = modalities.get("constraints", {})
1297
+
1298
+ # Check images
1299
+ images = input_data.get("images", [])
1300
+ if images:
1301
+ if "image" not in input_modalities:
1302
+ errors.append("Module does not support image input")
1303
+ else:
1304
+ max_images = constraints.get("max_images", 10)
1305
+ if len(images) > max_images:
1306
+ errors.append(f"Too many images ({len(images)} > {max_images})")
1307
+
1308
+ for i, img in enumerate(images):
1309
+ valid, err, err_code = validate_media_input(img, constraints)
1310
+ if not valid:
1311
+ errors.append(f"Image {i}: {err}" + (f" [{err_code}]" if err_code else ""))
1312
+
1313
+ # Check audio
1314
+ audio = input_data.get("audio", [])
1315
+ if audio:
1316
+ if "audio" not in input_modalities:
1317
+ errors.append("Module does not support audio input")
1318
+
1319
+ # Check video
1320
+ video = input_data.get("video", [])
1321
+ if video:
1322
+ if "video" not in input_modalities:
1323
+ errors.append("Module does not support video input")
1324
+
1325
+ return len(errors) == 0, errors
1326
+
1327
+
1328
+ # =============================================================================
1329
+ # v2.5 Runtime Capabilities
1330
+ # =============================================================================
1331
+
1332
+ def get_runtime_capabilities() -> dict:
1333
+ """Get runtime capabilities for v2.5."""
1334
+ return {
1335
+ "runtime": "cognitive-runtime-python",
1336
+ "version": "2.5.0",
1337
+ "spec_version": "2.5",
1338
+ "capabilities": {
1339
+ "streaming": True,
1340
+ "multimodal": {
1341
+ "input": ["image"], # Basic image support
1342
+ "output": [] # No generation yet
1343
+ },
1344
+ "max_media_size_mb": 20,
1345
+ "supported_transports": ["ndjson"], # SSE requires async server
1346
+ "conformance_level": 4
1347
+ }
1348
+ }