cognitive-modules 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognitive/migrate.py +9 -3
- cognitive/runner.py +713 -4
- cognitive_modules-0.6.1.dist-info/METADATA +615 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.1.dist-info}/RECORD +8 -8
- cognitive_modules-0.5.1.dist-info/METADATA +0 -445
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.1.dist-info}/WHEEL +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.1.dist-info}/entry_points.txt +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.1.dist-info}/top_level.txt +0 -0
cognitive/runner.py
CHANGED
|
@@ -10,8 +10,13 @@ v2.2 Features:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import json
|
|
13
|
+
import base64
|
|
14
|
+
import mimetypes
|
|
13
15
|
from pathlib import Path
|
|
14
|
-
from typing import Optional, TypedDict, Union, Literal
|
|
16
|
+
from typing import Optional, TypedDict, Union, Literal, Callable, AsyncIterator
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from urllib.request import urlopen
|
|
19
|
+
from urllib.error import URLError
|
|
15
20
|
|
|
16
21
|
import jsonschema
|
|
17
22
|
import yaml
|
|
@@ -154,10 +159,11 @@ def repair_envelope(
|
|
|
154
159
|
"""
|
|
155
160
|
Attempt to repair envelope format issues without changing semantics.
|
|
156
161
|
|
|
157
|
-
Repairs (lossless
|
|
162
|
+
Repairs (mostly lossless, except explain truncation):
|
|
158
163
|
- Missing meta fields (fill with conservative defaults)
|
|
159
|
-
- Truncate explain if too long
|
|
160
|
-
- Trim whitespace from string fields
|
|
164
|
+
- Truncate explain if too long (lossy operation, but required for v2.2 spec)
|
|
165
|
+
- Trim whitespace from string fields (lossless)
|
|
166
|
+
- Clamp confidence to [0, 1] range (lossy if out of range)
|
|
161
167
|
|
|
162
168
|
Does NOT repair:
|
|
163
169
|
- Invalid enum values (treated as validation failure)
|
|
@@ -575,6 +581,20 @@ def run_module(
|
|
|
575
581
|
meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
|
|
576
582
|
if meta_errors and enable_repair:
|
|
577
583
|
result = repair_envelope(result, meta_schema, risk_rule=risk_rule)
|
|
584
|
+
# Re-validate meta after repair
|
|
585
|
+
meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
|
|
586
|
+
if meta_errors:
|
|
587
|
+
# Meta validation failed after repair attempt
|
|
588
|
+
return {
|
|
589
|
+
"ok": False,
|
|
590
|
+
"meta": {
|
|
591
|
+
"confidence": 0.0,
|
|
592
|
+
"risk": "high",
|
|
593
|
+
"explain": "Meta schema validation failed after repair attempt."
|
|
594
|
+
},
|
|
595
|
+
"error": {"code": "META_VALIDATION_FAILED", "message": str(meta_errors)},
|
|
596
|
+
"partial_data": result.get("data")
|
|
597
|
+
}
|
|
578
598
|
|
|
579
599
|
return result
|
|
580
600
|
|
|
@@ -637,3 +657,692 @@ def should_escalate(result: EnvelopeResponseV22, confidence_threshold: float = 0
|
|
|
637
657
|
return True
|
|
638
658
|
|
|
639
659
|
return False
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
# =============================================================================
|
|
663
|
+
# v2.5 Streaming Support
|
|
664
|
+
# =============================================================================
|
|
665
|
+
|
|
666
|
+
import uuid
|
|
667
|
+
from typing import AsyncIterator, Iterator, Any, Callable
|
|
668
|
+
from dataclasses import dataclass, field
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@dataclass
|
|
672
|
+
class StreamingSession:
|
|
673
|
+
"""Represents an active streaming session."""
|
|
674
|
+
session_id: str
|
|
675
|
+
module_name: str
|
|
676
|
+
started_at: float = field(default_factory=lambda: __import__('time').time())
|
|
677
|
+
chunks_sent: int = 0
|
|
678
|
+
accumulated_data: dict = field(default_factory=dict)
|
|
679
|
+
accumulated_text: dict = field(default_factory=dict) # field -> accumulated string
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def create_session_id() -> str:
|
|
683
|
+
"""Generate a unique session ID for streaming."""
|
|
684
|
+
return f"sess_{uuid.uuid4().hex[:12]}"
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def create_meta_chunk(session_id: str, initial_risk: str = "low") -> dict:
|
|
688
|
+
"""Create the initial meta chunk for streaming."""
|
|
689
|
+
return {
|
|
690
|
+
"ok": True,
|
|
691
|
+
"streaming": True,
|
|
692
|
+
"session_id": session_id,
|
|
693
|
+
"meta": {
|
|
694
|
+
"confidence": None,
|
|
695
|
+
"risk": initial_risk,
|
|
696
|
+
"explain": "Processing..."
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def create_delta_chunk(seq: int, field: str, delta: str) -> dict:
|
|
702
|
+
"""Create a delta chunk for incremental content."""
|
|
703
|
+
return {
|
|
704
|
+
"chunk": {
|
|
705
|
+
"seq": seq,
|
|
706
|
+
"type": "delta",
|
|
707
|
+
"field": field,
|
|
708
|
+
"delta": delta
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def create_snapshot_chunk(seq: int, field: str, data: Any) -> dict:
|
|
714
|
+
"""Create a snapshot chunk for full field replacement."""
|
|
715
|
+
return {
|
|
716
|
+
"chunk": {
|
|
717
|
+
"seq": seq,
|
|
718
|
+
"type": "snapshot",
|
|
719
|
+
"field": field,
|
|
720
|
+
"data": data
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def create_progress_chunk(percent: int, stage: str = "", message: str = "") -> dict:
|
|
726
|
+
"""Create a progress update chunk."""
|
|
727
|
+
return {
|
|
728
|
+
"progress": {
|
|
729
|
+
"percent": percent,
|
|
730
|
+
"stage": stage,
|
|
731
|
+
"message": message
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def create_final_chunk(meta: dict, data: dict, usage: dict = None) -> dict:
|
|
737
|
+
"""Create the final chunk with complete data."""
|
|
738
|
+
chunk = {
|
|
739
|
+
"final": True,
|
|
740
|
+
"meta": meta,
|
|
741
|
+
"data": data
|
|
742
|
+
}
|
|
743
|
+
if usage:
|
|
744
|
+
chunk["usage"] = usage
|
|
745
|
+
return chunk
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def create_error_chunk(session_id: str, error_code: str, message: str,
|
|
749
|
+
recoverable: bool = False, partial_data: dict = None) -> dict:
|
|
750
|
+
"""Create an error chunk for stream failures."""
|
|
751
|
+
chunk = {
|
|
752
|
+
"ok": False,
|
|
753
|
+
"streaming": True,
|
|
754
|
+
"session_id": session_id,
|
|
755
|
+
"error": {
|
|
756
|
+
"code": error_code,
|
|
757
|
+
"message": message,
|
|
758
|
+
"recoverable": recoverable
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
if partial_data:
|
|
762
|
+
chunk["partial_data"] = partial_data
|
|
763
|
+
return chunk
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def assemble_streamed_data(session: StreamingSession) -> dict:
|
|
767
|
+
"""Assemble accumulated streaming data into final format."""
|
|
768
|
+
data = session.accumulated_data.copy()
|
|
769
|
+
|
|
770
|
+
# Merge accumulated text fields
|
|
771
|
+
for field_path, text in session.accumulated_text.items():
|
|
772
|
+
parts = field_path.split(".")
|
|
773
|
+
target = data
|
|
774
|
+
for part in parts[:-1]:
|
|
775
|
+
if part not in target:
|
|
776
|
+
target[part] = {}
|
|
777
|
+
target = target[part]
|
|
778
|
+
target[parts[-1]] = text
|
|
779
|
+
|
|
780
|
+
return data
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
class StreamingRunner:
|
|
784
|
+
"""Runner with streaming support for v2.5 modules."""
|
|
785
|
+
|
|
786
|
+
def __init__(self, provider_callback: Callable = None):
|
|
787
|
+
"""
|
|
788
|
+
Initialize streaming runner.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
provider_callback: Function to call LLM with streaming support.
|
|
792
|
+
Signature: async (prompt, images=None) -> AsyncIterator[str]
|
|
793
|
+
"""
|
|
794
|
+
self.provider_callback = provider_callback or self._default_provider
|
|
795
|
+
self.active_sessions: dict[str, StreamingSession] = {}
|
|
796
|
+
|
|
797
|
+
async def _default_provider(self, prompt: str, images: list = None) -> AsyncIterator[str]:
|
|
798
|
+
"""Default provider - yields entire response at once (for testing)."""
|
|
799
|
+
# In real implementation, this would stream from LLM
|
|
800
|
+
yield '{"ok": true, "meta": {"confidence": 0.9, "risk": "low", "explain": "Test"}, "data": {"rationale": "Test response"}}'
|
|
801
|
+
|
|
802
|
+
async def execute_stream(
|
|
803
|
+
self,
|
|
804
|
+
module_name: str,
|
|
805
|
+
input_data: dict,
|
|
806
|
+
on_chunk: Callable[[dict], None] = None
|
|
807
|
+
) -> AsyncIterator[dict]:
|
|
808
|
+
"""
|
|
809
|
+
Execute a module with streaming output.
|
|
810
|
+
|
|
811
|
+
Args:
|
|
812
|
+
module_name: Name of the module to execute
|
|
813
|
+
input_data: Input data including multimodal content
|
|
814
|
+
on_chunk: Optional callback for each chunk
|
|
815
|
+
|
|
816
|
+
Yields:
|
|
817
|
+
Streaming chunks (meta, delta, progress, final, or error)
|
|
818
|
+
"""
|
|
819
|
+
session_id = create_session_id()
|
|
820
|
+
session = StreamingSession(session_id=session_id, module_name=module_name)
|
|
821
|
+
self.active_sessions[session_id] = session
|
|
822
|
+
|
|
823
|
+
try:
|
|
824
|
+
# Load module
|
|
825
|
+
module = load_module(module_name)
|
|
826
|
+
|
|
827
|
+
# Check if module supports streaming
|
|
828
|
+
response_config = module.get("response", {})
|
|
829
|
+
mode = response_config.get("mode", "sync")
|
|
830
|
+
if mode not in ("streaming", "both"):
|
|
831
|
+
# Fall back to sync execution
|
|
832
|
+
result = await self._execute_sync(module, input_data)
|
|
833
|
+
yield create_meta_chunk(session_id)
|
|
834
|
+
yield create_final_chunk(result["meta"], result["data"])
|
|
835
|
+
return
|
|
836
|
+
|
|
837
|
+
# Extract images for multimodal
|
|
838
|
+
images = self._extract_media(input_data)
|
|
839
|
+
|
|
840
|
+
# Build prompt
|
|
841
|
+
prompt = self._build_prompt(module, input_data)
|
|
842
|
+
|
|
843
|
+
# Send initial meta chunk
|
|
844
|
+
meta_chunk = create_meta_chunk(session_id)
|
|
845
|
+
if on_chunk:
|
|
846
|
+
on_chunk(meta_chunk)
|
|
847
|
+
yield meta_chunk
|
|
848
|
+
|
|
849
|
+
# Stream from LLM
|
|
850
|
+
seq = 1
|
|
851
|
+
accumulated_response = ""
|
|
852
|
+
|
|
853
|
+
async for text_chunk in self.provider_callback(prompt, images):
|
|
854
|
+
accumulated_response += text_chunk
|
|
855
|
+
|
|
856
|
+
# Create delta chunk for rationale field
|
|
857
|
+
delta_chunk = create_delta_chunk(seq, "data.rationale", text_chunk)
|
|
858
|
+
session.chunks_sent += 1
|
|
859
|
+
session.accumulated_text.setdefault("data.rationale", "")
|
|
860
|
+
session.accumulated_text["data.rationale"] += text_chunk
|
|
861
|
+
|
|
862
|
+
if on_chunk:
|
|
863
|
+
on_chunk(delta_chunk)
|
|
864
|
+
yield delta_chunk
|
|
865
|
+
seq += 1
|
|
866
|
+
|
|
867
|
+
# Parse final response
|
|
868
|
+
try:
|
|
869
|
+
final_data = parse_llm_response(accumulated_response)
|
|
870
|
+
final_data = repair_envelope(final_data)
|
|
871
|
+
except Exception as e:
|
|
872
|
+
error_chunk = create_error_chunk(
|
|
873
|
+
session_id, "E2001", str(e),
|
|
874
|
+
recoverable=False,
|
|
875
|
+
partial_data={"rationale": session.accumulated_text.get("data.rationale", "")}
|
|
876
|
+
)
|
|
877
|
+
yield error_chunk
|
|
878
|
+
return
|
|
879
|
+
|
|
880
|
+
# Send final chunk
|
|
881
|
+
final_chunk = create_final_chunk(
|
|
882
|
+
final_data.get("meta", {}),
|
|
883
|
+
final_data.get("data", {}),
|
|
884
|
+
{"input_tokens": 0, "output_tokens": seq} # Placeholder
|
|
885
|
+
)
|
|
886
|
+
if on_chunk:
|
|
887
|
+
on_chunk(final_chunk)
|
|
888
|
+
yield final_chunk
|
|
889
|
+
|
|
890
|
+
except Exception as e:
|
|
891
|
+
error_chunk = create_error_chunk(
|
|
892
|
+
session_id, "E2010", f"Stream error: {str(e)}",
|
|
893
|
+
recoverable=False
|
|
894
|
+
)
|
|
895
|
+
yield error_chunk
|
|
896
|
+
finally:
|
|
897
|
+
del self.active_sessions[session_id]
|
|
898
|
+
|
|
899
|
+
async def _execute_sync(self, module: dict, input_data: dict) -> dict:
|
|
900
|
+
"""Execute module synchronously (fallback)."""
|
|
901
|
+
# Use existing sync execution
|
|
902
|
+
return run_module(module["name"], input_data)
|
|
903
|
+
|
|
904
|
+
def _build_prompt(self, module: dict, input_data: dict) -> str:
|
|
905
|
+
"""Build prompt from module and input."""
|
|
906
|
+
prompt_template = module.get("prompt", "")
|
|
907
|
+
return substitute_arguments(prompt_template, input_data)
|
|
908
|
+
|
|
909
|
+
def _extract_media(self, input_data: dict) -> list:
|
|
910
|
+
"""Extract media inputs from input data."""
|
|
911
|
+
images = input_data.get("images", [])
|
|
912
|
+
audio = input_data.get("audio", [])
|
|
913
|
+
video = input_data.get("video", [])
|
|
914
|
+
return images + audio + video
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
# =============================================================================
|
|
918
|
+
# v2.5 Multimodal Support
|
|
919
|
+
# =============================================================================
|
|
920
|
+
|
|
921
|
+
SUPPORTED_IMAGE_TYPES = {
|
|
922
|
+
"image/jpeg", "image/png", "image/webp", "image/gif"
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
SUPPORTED_AUDIO_TYPES = {
|
|
926
|
+
"audio/mpeg", "audio/wav", "audio/ogg", "audio/webm"
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
SUPPORTED_VIDEO_TYPES = {
|
|
930
|
+
"video/mp4", "video/webm", "video/quicktime"
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
# Magic bytes for media type detection
|
|
934
|
+
MEDIA_MAGIC_BYTES = {
|
|
935
|
+
"image/jpeg": [b"\xff\xd8\xff"],
|
|
936
|
+
"image/png": [b"\x89PNG\r\n\x1a\n"],
|
|
937
|
+
"image/gif": [b"GIF87a", b"GIF89a"],
|
|
938
|
+
"image/webp": [b"RIFF"], # Check WEBP signature later
|
|
939
|
+
"audio/mpeg": [b"\xff\xfb", b"\xff\xfa", b"ID3"],
|
|
940
|
+
"audio/wav": [b"RIFF"], # Check WAVE signature later
|
|
941
|
+
"audio/ogg": [b"OggS"],
|
|
942
|
+
"video/mp4": [b"\x00\x00\x00"], # ftyp check needed
|
|
943
|
+
"video/webm": [b"\x1a\x45\xdf\xa3"],
|
|
944
|
+
"application/pdf": [b"%PDF"],
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
# Media size limits in bytes
|
|
948
|
+
MEDIA_SIZE_LIMITS = {
|
|
949
|
+
"image": 20 * 1024 * 1024, # 20MB
|
|
950
|
+
"audio": 25 * 1024 * 1024, # 25MB
|
|
951
|
+
"video": 100 * 1024 * 1024, # 100MB
|
|
952
|
+
"document": 50 * 1024 * 1024, # 50MB
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
# Media dimension limits
|
|
956
|
+
MEDIA_DIMENSION_LIMITS = {
|
|
957
|
+
"max_width": 8192,
|
|
958
|
+
"max_height": 8192,
|
|
959
|
+
"min_width": 10,
|
|
960
|
+
"min_height": 10,
|
|
961
|
+
"max_pixels": 67108864, # 8192 x 8192
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
# v2.5 Error codes
|
|
965
|
+
ERROR_CODES_V25 = {
|
|
966
|
+
"UNSUPPORTED_MEDIA_TYPE": "E1010",
|
|
967
|
+
"MEDIA_TOO_LARGE": "E1011",
|
|
968
|
+
"MEDIA_FETCH_FAILED": "E1012",
|
|
969
|
+
"MEDIA_DECODE_FAILED": "E1013",
|
|
970
|
+
"MEDIA_TYPE_MISMATCH": "E1014",
|
|
971
|
+
"MEDIA_DIMENSION_EXCEEDED": "E1015",
|
|
972
|
+
"MEDIA_DIMENSION_TOO_SMALL": "E1016",
|
|
973
|
+
"MEDIA_PIXEL_LIMIT": "E1017",
|
|
974
|
+
"UPLOAD_EXPIRED": "E1018",
|
|
975
|
+
"UPLOAD_NOT_FOUND": "E1019",
|
|
976
|
+
"CHECKSUM_MISMATCH": "E1020",
|
|
977
|
+
"STREAM_INTERRUPTED": "E2010",
|
|
978
|
+
"STREAM_TIMEOUT": "E2011",
|
|
979
|
+
"STREAMING_NOT_SUPPORTED": "E4010",
|
|
980
|
+
"MULTIMODAL_NOT_SUPPORTED": "E4011",
|
|
981
|
+
"RECOVERY_NOT_SUPPORTED": "E4012",
|
|
982
|
+
"SESSION_EXPIRED": "E4013",
|
|
983
|
+
"CHECKPOINT_INVALID": "E4014",
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def detect_media_type_from_magic(data: bytes) -> Optional[str]:
|
|
988
|
+
"""Detect media type from magic bytes."""
|
|
989
|
+
for mime_type, magic_list in MEDIA_MAGIC_BYTES.items():
|
|
990
|
+
for magic in magic_list:
|
|
991
|
+
if data.startswith(magic):
|
|
992
|
+
# Special handling for RIFF-based formats
|
|
993
|
+
if magic == b"RIFF" and len(data) >= 12:
|
|
994
|
+
if data[8:12] == b"WEBP":
|
|
995
|
+
return "image/webp"
|
|
996
|
+
elif data[8:12] == b"WAVE":
|
|
997
|
+
return "audio/wav"
|
|
998
|
+
continue
|
|
999
|
+
# Special handling for MP4 (check for ftyp)
|
|
1000
|
+
if mime_type == "video/mp4" and len(data) >= 8:
|
|
1001
|
+
if b"ftyp" in data[4:8]:
|
|
1002
|
+
return "video/mp4"
|
|
1003
|
+
continue
|
|
1004
|
+
return mime_type
|
|
1005
|
+
return None
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def validate_media_magic_bytes(data: bytes, declared_type: str) -> tuple[bool, str]:
|
|
1009
|
+
"""
|
|
1010
|
+
Validate that media content matches declared MIME type.
|
|
1011
|
+
|
|
1012
|
+
Returns:
|
|
1013
|
+
Tuple of (is_valid, error_message)
|
|
1014
|
+
"""
|
|
1015
|
+
detected_type = detect_media_type_from_magic(data)
|
|
1016
|
+
|
|
1017
|
+
if detected_type is None:
|
|
1018
|
+
return True, "" # Can't detect, assume valid
|
|
1019
|
+
|
|
1020
|
+
# Normalize types for comparison
|
|
1021
|
+
declared_category = declared_type.split("/")[0]
|
|
1022
|
+
detected_category = detected_type.split("/")[0]
|
|
1023
|
+
|
|
1024
|
+
if declared_category != detected_category:
|
|
1025
|
+
return False, f"Media content mismatch: declared {declared_type}, detected {detected_type}"
|
|
1026
|
+
|
|
1027
|
+
return True, ""
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def validate_image_dimensions(data: bytes) -> Optional[tuple]:
|
|
1031
|
+
"""
|
|
1032
|
+
Extract image dimensions from raw bytes.
|
|
1033
|
+
|
|
1034
|
+
Returns:
|
|
1035
|
+
Tuple of (width, height) or None if cannot determine.
|
|
1036
|
+
"""
|
|
1037
|
+
try:
|
|
1038
|
+
# PNG dimensions at bytes 16-24
|
|
1039
|
+
if data.startswith(b"\x89PNG"):
|
|
1040
|
+
width = int.from_bytes(data[16:20], "big")
|
|
1041
|
+
height = int.from_bytes(data[20:24], "big")
|
|
1042
|
+
return (width, height)
|
|
1043
|
+
|
|
1044
|
+
# JPEG - need to parse markers
|
|
1045
|
+
if data.startswith(b"\xff\xd8"):
|
|
1046
|
+
i = 2
|
|
1047
|
+
while i < len(data) - 8:
|
|
1048
|
+
if data[i] != 0xff:
|
|
1049
|
+
break
|
|
1050
|
+
marker = data[i + 1]
|
|
1051
|
+
if marker in (0xc0, 0xc1, 0xc2): # SOF markers
|
|
1052
|
+
height = int.from_bytes(data[i + 5:i + 7], "big")
|
|
1053
|
+
width = int.from_bytes(data[i + 7:i + 9], "big")
|
|
1054
|
+
return (width, height)
|
|
1055
|
+
length = int.from_bytes(data[i + 2:i + 4], "big")
|
|
1056
|
+
i += 2 + length
|
|
1057
|
+
|
|
1058
|
+
# GIF dimensions at bytes 6-10
|
|
1059
|
+
if data.startswith(b"GIF"):
|
|
1060
|
+
width = int.from_bytes(data[6:8], "little")
|
|
1061
|
+
height = int.from_bytes(data[8:10], "little")
|
|
1062
|
+
return (width, height)
|
|
1063
|
+
|
|
1064
|
+
except Exception:
|
|
1065
|
+
pass
|
|
1066
|
+
|
|
1067
|
+
return None
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
def validate_media_input(media: dict, constraints: dict = None) -> tuple:
|
|
1071
|
+
"""
|
|
1072
|
+
Validate a media input object with enhanced v2.5 validation.
|
|
1073
|
+
|
|
1074
|
+
Returns:
|
|
1075
|
+
Tuple of (is_valid, error_message, error_code)
|
|
1076
|
+
"""
|
|
1077
|
+
constraints = constraints or {}
|
|
1078
|
+
|
|
1079
|
+
media_type = media.get("type")
|
|
1080
|
+
if media_type not in ("url", "base64", "file", "upload_ref"):
|
|
1081
|
+
return False, "Invalid media type. Must be url, base64, file, or upload_ref", None
|
|
1082
|
+
|
|
1083
|
+
if media_type == "url":
|
|
1084
|
+
url = media.get("url")
|
|
1085
|
+
if not url:
|
|
1086
|
+
return False, "URL media missing 'url' field", None
|
|
1087
|
+
if not url.startswith(("http://", "https://")):
|
|
1088
|
+
return False, "URL must start with http:// or https://", None
|
|
1089
|
+
|
|
1090
|
+
elif media_type == "base64":
|
|
1091
|
+
mime_type = media.get("media_type")
|
|
1092
|
+
if not mime_type:
|
|
1093
|
+
return False, "Base64 media missing 'media_type' field", None
|
|
1094
|
+
data = media.get("data")
|
|
1095
|
+
if not data:
|
|
1096
|
+
return False, "Base64 media missing 'data' field", None
|
|
1097
|
+
|
|
1098
|
+
# Validate base64 and decode
|
|
1099
|
+
try:
|
|
1100
|
+
decoded = base64.b64decode(data)
|
|
1101
|
+
except Exception:
|
|
1102
|
+
return False, "Invalid base64 encoding", ERROR_CODES_V25["MEDIA_DECODE_FAILED"]
|
|
1103
|
+
|
|
1104
|
+
# Check size
|
|
1105
|
+
category = mime_type.split("/")[0]
|
|
1106
|
+
max_size = constraints.get("max_size_bytes", MEDIA_SIZE_LIMITS.get(category, 20 * 1024 * 1024))
|
|
1107
|
+
if len(decoded) > max_size:
|
|
1108
|
+
return False, f"Media exceeds size limit ({len(decoded)} > {max_size} bytes)", ERROR_CODES_V25["MEDIA_TOO_LARGE"]
|
|
1109
|
+
|
|
1110
|
+
# Validate magic bytes
|
|
1111
|
+
is_valid, error = validate_media_magic_bytes(decoded, mime_type)
|
|
1112
|
+
if not is_valid:
|
|
1113
|
+
return False, error, ERROR_CODES_V25["MEDIA_TYPE_MISMATCH"]
|
|
1114
|
+
|
|
1115
|
+
# Validate image dimensions if applicable
|
|
1116
|
+
if category == "image":
|
|
1117
|
+
dimensions = validate_image_dimensions(decoded)
|
|
1118
|
+
if dimensions:
|
|
1119
|
+
width, height = dimensions
|
|
1120
|
+
limits = MEDIA_DIMENSION_LIMITS
|
|
1121
|
+
|
|
1122
|
+
if width > limits["max_width"] or height > limits["max_height"]:
|
|
1123
|
+
return False, f"Image dimensions ({width}x{height}) exceed maximum ({limits['max_width']}x{limits['max_height']})", ERROR_CODES_V25["MEDIA_DIMENSION_EXCEEDED"]
|
|
1124
|
+
|
|
1125
|
+
if width < limits["min_width"] or height < limits["min_height"]:
|
|
1126
|
+
return False, f"Image dimensions ({width}x{height}) below minimum ({limits['min_width']}x{limits['min_height']})", ERROR_CODES_V25["MEDIA_DIMENSION_TOO_SMALL"]
|
|
1127
|
+
|
|
1128
|
+
if width * height > limits["max_pixels"]:
|
|
1129
|
+
return False, f"Image pixel count ({width * height}) exceeds maximum ({limits['max_pixels']})", ERROR_CODES_V25["MEDIA_PIXEL_LIMIT"]
|
|
1130
|
+
|
|
1131
|
+
# Validate checksum if provided
|
|
1132
|
+
checksum = media.get("checksum")
|
|
1133
|
+
if checksum:
|
|
1134
|
+
import hashlib
|
|
1135
|
+
algorithm = checksum.get("algorithm", "sha256")
|
|
1136
|
+
expected = checksum.get("value", "")
|
|
1137
|
+
|
|
1138
|
+
if algorithm == "sha256":
|
|
1139
|
+
actual = hashlib.sha256(decoded).hexdigest()
|
|
1140
|
+
elif algorithm == "md5":
|
|
1141
|
+
actual = hashlib.md5(decoded).hexdigest()
|
|
1142
|
+
elif algorithm == "crc32":
|
|
1143
|
+
import zlib
|
|
1144
|
+
actual = format(zlib.crc32(decoded) & 0xffffffff, '08x')
|
|
1145
|
+
else:
|
|
1146
|
+
return False, f"Unsupported checksum algorithm: {algorithm}", None
|
|
1147
|
+
|
|
1148
|
+
if actual.lower() != expected.lower():
|
|
1149
|
+
return False, f"Checksum mismatch: expected {expected}, got {actual}", ERROR_CODES_V25["CHECKSUM_MISMATCH"]
|
|
1150
|
+
|
|
1151
|
+
elif media_type == "file":
|
|
1152
|
+
path = media.get("path")
|
|
1153
|
+
if not path:
|
|
1154
|
+
return False, "File media missing 'path' field", None
|
|
1155
|
+
if not Path(path).exists():
|
|
1156
|
+
return False, f"File not found: {path}", None
|
|
1157
|
+
|
|
1158
|
+
# Check file size
|
|
1159
|
+
file_size = Path(path).stat().st_size
|
|
1160
|
+
mime, _ = mimetypes.guess_type(str(path))
|
|
1161
|
+
if mime:
|
|
1162
|
+
category = mime.split("/")[0]
|
|
1163
|
+
max_size = constraints.get("max_size_bytes", MEDIA_SIZE_LIMITS.get(category, 20 * 1024 * 1024))
|
|
1164
|
+
if file_size > max_size:
|
|
1165
|
+
return False, f"File exceeds size limit ({file_size} > {max_size} bytes)", ERROR_CODES_V25["MEDIA_TOO_LARGE"]
|
|
1166
|
+
|
|
1167
|
+
elif media_type == "upload_ref":
|
|
1168
|
+
upload_id = media.get("upload_id")
|
|
1169
|
+
if not upload_id:
|
|
1170
|
+
return False, "Upload reference missing 'upload_id' field", None
|
|
1171
|
+
# Note: Actual upload validation would require backend lookup
|
|
1172
|
+
|
|
1173
|
+
return True, "", None
|
|
1174
|
+
|
|
1175
|
+
|
|
1176
|
+
def load_media_as_base64(media: dict) -> tuple[str, str]:
|
|
1177
|
+
"""
|
|
1178
|
+
Load media from any source and return as base64.
|
|
1179
|
+
|
|
1180
|
+
Returns:
|
|
1181
|
+
Tuple of (base64_data, media_type)
|
|
1182
|
+
"""
|
|
1183
|
+
media_type = media.get("type")
|
|
1184
|
+
|
|
1185
|
+
if media_type == "base64":
|
|
1186
|
+
return media["data"], media["media_type"]
|
|
1187
|
+
|
|
1188
|
+
elif media_type == "url":
|
|
1189
|
+
url = media["url"]
|
|
1190
|
+
try:
|
|
1191
|
+
with urlopen(url, timeout=30) as response:
|
|
1192
|
+
data = response.read()
|
|
1193
|
+
content_type = response.headers.get("Content-Type", "application/octet-stream")
|
|
1194
|
+
# Extract just the mime type (remove charset etc)
|
|
1195
|
+
content_type = content_type.split(";")[0].strip()
|
|
1196
|
+
return base64.b64encode(data).decode("utf-8"), content_type
|
|
1197
|
+
except URLError as e:
|
|
1198
|
+
raise ValueError(f"Failed to fetch media from URL: {e}")
|
|
1199
|
+
|
|
1200
|
+
elif media_type == "file":
|
|
1201
|
+
path = Path(media["path"])
|
|
1202
|
+
if not path.exists():
|
|
1203
|
+
raise ValueError(f"File not found: {path}")
|
|
1204
|
+
|
|
1205
|
+
mime_type, _ = mimetypes.guess_type(str(path))
|
|
1206
|
+
mime_type = mime_type or "application/octet-stream"
|
|
1207
|
+
|
|
1208
|
+
with open(path, "rb") as f:
|
|
1209
|
+
data = f.read()
|
|
1210
|
+
|
|
1211
|
+
return base64.b64encode(data).decode("utf-8"), mime_type
|
|
1212
|
+
|
|
1213
|
+
raise ValueError(f"Unknown media type: {media_type}")
|
|
1214
|
+
|
|
1215
|
+
|
|
1216
|
+
def prepare_media_for_llm(media_list: list, provider: str = "openai") -> list:
|
|
1217
|
+
"""
|
|
1218
|
+
Prepare media inputs for specific LLM provider format.
|
|
1219
|
+
|
|
1220
|
+
Different providers have different multimodal input formats:
|
|
1221
|
+
- OpenAI: {"type": "image_url", "image_url": {"url": "data:..."}}
|
|
1222
|
+
- Anthropic: {"type": "image", "source": {"type": "base64", ...}}
|
|
1223
|
+
- Google: {"inlineData": {"mimeType": "...", "data": "..."}}
|
|
1224
|
+
"""
|
|
1225
|
+
prepared = []
|
|
1226
|
+
|
|
1227
|
+
for media in media_list:
|
|
1228
|
+
data, mime_type = load_media_as_base64(media)
|
|
1229
|
+
|
|
1230
|
+
if provider == "openai":
|
|
1231
|
+
prepared.append({
|
|
1232
|
+
"type": "image_url",
|
|
1233
|
+
"image_url": {
|
|
1234
|
+
"url": f"data:{mime_type};base64,{data}"
|
|
1235
|
+
}
|
|
1236
|
+
})
|
|
1237
|
+
elif provider == "anthropic":
|
|
1238
|
+
prepared.append({
|
|
1239
|
+
"type": "image",
|
|
1240
|
+
"source": {
|
|
1241
|
+
"type": "base64",
|
|
1242
|
+
"media_type": mime_type,
|
|
1243
|
+
"data": data
|
|
1244
|
+
}
|
|
1245
|
+
})
|
|
1246
|
+
elif provider == "google":
|
|
1247
|
+
prepared.append({
|
|
1248
|
+
"inlineData": {
|
|
1249
|
+
"mimeType": mime_type,
|
|
1250
|
+
"data": data
|
|
1251
|
+
}
|
|
1252
|
+
})
|
|
1253
|
+
else:
|
|
1254
|
+
# Generic format
|
|
1255
|
+
prepared.append({
|
|
1256
|
+
"type": "base64",
|
|
1257
|
+
"media_type": mime_type,
|
|
1258
|
+
"data": data
|
|
1259
|
+
})
|
|
1260
|
+
|
|
1261
|
+
return prepared
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
def get_modalities_config(module: dict) -> dict:
|
|
1265
|
+
"""Get modalities configuration from module."""
|
|
1266
|
+
return module.get("modalities", {
|
|
1267
|
+
"input": ["text"],
|
|
1268
|
+
"output": ["text"]
|
|
1269
|
+
})
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
def supports_multimodal_input(module: dict) -> bool:
|
|
1273
|
+
"""Check if module supports multimodal input."""
|
|
1274
|
+
modalities = get_modalities_config(module)
|
|
1275
|
+
input_modalities = modalities.get("input", ["text"])
|
|
1276
|
+
return any(m in input_modalities for m in ["image", "audio", "video"])
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
def supports_multimodal_output(module: dict) -> bool:
|
|
1280
|
+
"""Check if module supports multimodal output."""
|
|
1281
|
+
modalities = get_modalities_config(module)
|
|
1282
|
+
output_modalities = modalities.get("output", ["text"])
|
|
1283
|
+
return any(m in output_modalities for m in ["image", "audio", "video"])
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
def validate_multimodal_input(input_data: dict, module: dict) -> tuple[bool, list[str]]:
|
|
1287
|
+
"""
|
|
1288
|
+
Validate multimodal input against module configuration.
|
|
1289
|
+
|
|
1290
|
+
Returns:
|
|
1291
|
+
Tuple of (is_valid, list of errors)
|
|
1292
|
+
"""
|
|
1293
|
+
errors = []
|
|
1294
|
+
modalities = get_modalities_config(module)
|
|
1295
|
+
input_modalities = set(modalities.get("input", ["text"]))
|
|
1296
|
+
constraints = modalities.get("constraints", {})
|
|
1297
|
+
|
|
1298
|
+
# Check images
|
|
1299
|
+
images = input_data.get("images", [])
|
|
1300
|
+
if images:
|
|
1301
|
+
if "image" not in input_modalities:
|
|
1302
|
+
errors.append("Module does not support image input")
|
|
1303
|
+
else:
|
|
1304
|
+
max_images = constraints.get("max_images", 10)
|
|
1305
|
+
if len(images) > max_images:
|
|
1306
|
+
errors.append(f"Too many images ({len(images)} > {max_images})")
|
|
1307
|
+
|
|
1308
|
+
for i, img in enumerate(images):
|
|
1309
|
+
valid, err, err_code = validate_media_input(img, constraints)
|
|
1310
|
+
if not valid:
|
|
1311
|
+
errors.append(f"Image {i}: {err}" + (f" [{err_code}]" if err_code else ""))
|
|
1312
|
+
|
|
1313
|
+
# Check audio
|
|
1314
|
+
audio = input_data.get("audio", [])
|
|
1315
|
+
if audio:
|
|
1316
|
+
if "audio" not in input_modalities:
|
|
1317
|
+
errors.append("Module does not support audio input")
|
|
1318
|
+
|
|
1319
|
+
# Check video
|
|
1320
|
+
video = input_data.get("video", [])
|
|
1321
|
+
if video:
|
|
1322
|
+
if "video" not in input_modalities:
|
|
1323
|
+
errors.append("Module does not support video input")
|
|
1324
|
+
|
|
1325
|
+
return len(errors) == 0, errors
|
|
1326
|
+
|
|
1327
|
+
|
|
1328
|
+
# =============================================================================
|
|
1329
|
+
# v2.5 Runtime Capabilities
|
|
1330
|
+
# =============================================================================
|
|
1331
|
+
|
|
1332
|
+
def get_runtime_capabilities() -> dict:
|
|
1333
|
+
"""Get runtime capabilities for v2.5."""
|
|
1334
|
+
return {
|
|
1335
|
+
"runtime": "cognitive-runtime-python",
|
|
1336
|
+
"version": "2.5.0",
|
|
1337
|
+
"spec_version": "2.5",
|
|
1338
|
+
"capabilities": {
|
|
1339
|
+
"streaming": True,
|
|
1340
|
+
"multimodal": {
|
|
1341
|
+
"input": ["image"], # Basic image support
|
|
1342
|
+
"output": [] # No generation yet
|
|
1343
|
+
},
|
|
1344
|
+
"max_media_size_mb": 20,
|
|
1345
|
+
"supported_transports": ["ndjson"], # SSE requires async server
|
|
1346
|
+
"conformance_level": 4
|
|
1347
|
+
}
|
|
1348
|
+
}
|