cognitive-modules 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognitive/migrate.py +9 -3
- cognitive/runner.py +520 -4
- cognitive_modules-0.6.0.dist-info/METADATA +615 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.0.dist-info}/RECORD +8 -8
- cognitive_modules-0.5.1.dist-info/METADATA +0 -445
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.0.dist-info}/WHEEL +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.0.dist-info}/entry_points.txt +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {cognitive_modules-0.5.1.dist-info → cognitive_modules-0.6.0.dist-info}/top_level.txt +0 -0
cognitive/migrate.py
CHANGED
|
@@ -265,13 +265,19 @@ def _migrate_from_v2(
|
|
|
265
265
|
manifest_changes.append("Added schema_strictness: medium")
|
|
266
266
|
|
|
267
267
|
if 'overflow' not in manifest:
|
|
268
|
+
# Determine default max_items based on schema_strictness (consistent with loader.py)
|
|
269
|
+
schema_strictness = manifest.get('schema_strictness', 'medium')
|
|
270
|
+
strictness_max_items = {'high': 0, 'medium': 5, 'low': 20}
|
|
271
|
+
default_max_items = strictness_max_items.get(schema_strictness, 5)
|
|
272
|
+
default_enabled = schema_strictness != 'high'
|
|
273
|
+
|
|
268
274
|
manifest['overflow'] = {
|
|
269
|
-
'enabled':
|
|
275
|
+
'enabled': default_enabled,
|
|
270
276
|
'recoverable': True,
|
|
271
|
-
'max_items':
|
|
277
|
+
'max_items': default_max_items,
|
|
272
278
|
'require_suggested_mapping': True
|
|
273
279
|
}
|
|
274
|
-
manifest_changes.append("Added overflow config")
|
|
280
|
+
manifest_changes.append(f"Added overflow config (max_items={default_max_items} based on schema_strictness={schema_strictness})")
|
|
275
281
|
|
|
276
282
|
if 'enums' not in manifest:
|
|
277
283
|
manifest['enums'] = {
|
cognitive/runner.py
CHANGED
|
@@ -10,8 +10,13 @@ v2.2 Features:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import json
|
|
13
|
+
import base64
|
|
14
|
+
import mimetypes
|
|
13
15
|
from pathlib import Path
|
|
14
|
-
from typing import Optional, TypedDict, Union, Literal
|
|
16
|
+
from typing import Optional, TypedDict, Union, Literal, Callable, AsyncIterator
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from urllib.request import urlopen
|
|
19
|
+
from urllib.error import URLError
|
|
15
20
|
|
|
16
21
|
import jsonschema
|
|
17
22
|
import yaml
|
|
@@ -154,10 +159,11 @@ def repair_envelope(
|
|
|
154
159
|
"""
|
|
155
160
|
Attempt to repair envelope format issues without changing semantics.
|
|
156
161
|
|
|
157
|
-
Repairs (lossless
|
|
162
|
+
Repairs (mostly lossless, except explain truncation):
|
|
158
163
|
- Missing meta fields (fill with conservative defaults)
|
|
159
|
-
- Truncate explain if too long
|
|
160
|
-
- Trim whitespace from string fields
|
|
164
|
+
- Truncate explain if too long (lossy operation, but required for v2.2 spec)
|
|
165
|
+
- Trim whitespace from string fields (lossless)
|
|
166
|
+
- Clamp confidence to [0, 1] range (lossy if out of range)
|
|
161
167
|
|
|
162
168
|
Does NOT repair:
|
|
163
169
|
- Invalid enum values (treated as validation failure)
|
|
@@ -575,6 +581,20 @@ def run_module(
|
|
|
575
581
|
meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
|
|
576
582
|
if meta_errors and enable_repair:
|
|
577
583
|
result = repair_envelope(result, meta_schema, risk_rule=risk_rule)
|
|
584
|
+
# Re-validate meta after repair
|
|
585
|
+
meta_errors = validate_data(result.get("meta", {}), meta_schema, "Meta")
|
|
586
|
+
if meta_errors:
|
|
587
|
+
# Meta validation failed after repair attempt
|
|
588
|
+
return {
|
|
589
|
+
"ok": False,
|
|
590
|
+
"meta": {
|
|
591
|
+
"confidence": 0.0,
|
|
592
|
+
"risk": "high",
|
|
593
|
+
"explain": "Meta schema validation failed after repair attempt."
|
|
594
|
+
},
|
|
595
|
+
"error": {"code": "META_VALIDATION_FAILED", "message": str(meta_errors)},
|
|
596
|
+
"partial_data": result.get("data")
|
|
597
|
+
}
|
|
578
598
|
|
|
579
599
|
return result
|
|
580
600
|
|
|
@@ -637,3 +657,499 @@ def should_escalate(result: EnvelopeResponseV22, confidence_threshold: float = 0
|
|
|
637
657
|
return True
|
|
638
658
|
|
|
639
659
|
return False
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
# =============================================================================
|
|
663
|
+
# v2.5 Streaming Support
|
|
664
|
+
# =============================================================================
|
|
665
|
+
|
|
666
|
+
import uuid
|
|
667
|
+
from typing import AsyncIterator, Iterator, Any, Callable
|
|
668
|
+
from dataclasses import dataclass, field
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@dataclass
|
|
672
|
+
class StreamingSession:
|
|
673
|
+
"""Represents an active streaming session."""
|
|
674
|
+
session_id: str
|
|
675
|
+
module_name: str
|
|
676
|
+
started_at: float = field(default_factory=lambda: __import__('time').time())
|
|
677
|
+
chunks_sent: int = 0
|
|
678
|
+
accumulated_data: dict = field(default_factory=dict)
|
|
679
|
+
accumulated_text: dict = field(default_factory=dict) # field -> accumulated string
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def create_session_id() -> str:
|
|
683
|
+
"""Generate a unique session ID for streaming."""
|
|
684
|
+
return f"sess_{uuid.uuid4().hex[:12]}"
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def create_meta_chunk(session_id: str, initial_risk: str = "low") -> dict:
|
|
688
|
+
"""Create the initial meta chunk for streaming."""
|
|
689
|
+
return {
|
|
690
|
+
"ok": True,
|
|
691
|
+
"streaming": True,
|
|
692
|
+
"session_id": session_id,
|
|
693
|
+
"meta": {
|
|
694
|
+
"confidence": None,
|
|
695
|
+
"risk": initial_risk,
|
|
696
|
+
"explain": "Processing..."
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def create_delta_chunk(seq: int, field: str, delta: str) -> dict:
|
|
702
|
+
"""Create a delta chunk for incremental content."""
|
|
703
|
+
return {
|
|
704
|
+
"chunk": {
|
|
705
|
+
"seq": seq,
|
|
706
|
+
"type": "delta",
|
|
707
|
+
"field": field,
|
|
708
|
+
"delta": delta
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def create_snapshot_chunk(seq: int, field: str, data: Any) -> dict:
|
|
714
|
+
"""Create a snapshot chunk for full field replacement."""
|
|
715
|
+
return {
|
|
716
|
+
"chunk": {
|
|
717
|
+
"seq": seq,
|
|
718
|
+
"type": "snapshot",
|
|
719
|
+
"field": field,
|
|
720
|
+
"data": data
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def create_progress_chunk(percent: int, stage: str = "", message: str = "") -> dict:
|
|
726
|
+
"""Create a progress update chunk."""
|
|
727
|
+
return {
|
|
728
|
+
"progress": {
|
|
729
|
+
"percent": percent,
|
|
730
|
+
"stage": stage,
|
|
731
|
+
"message": message
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def create_final_chunk(meta: dict, data: dict, usage: dict = None) -> dict:
|
|
737
|
+
"""Create the final chunk with complete data."""
|
|
738
|
+
chunk = {
|
|
739
|
+
"final": True,
|
|
740
|
+
"meta": meta,
|
|
741
|
+
"data": data
|
|
742
|
+
}
|
|
743
|
+
if usage:
|
|
744
|
+
chunk["usage"] = usage
|
|
745
|
+
return chunk
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def create_error_chunk(session_id: str, error_code: str, message: str,
|
|
749
|
+
recoverable: bool = False, partial_data: dict = None) -> dict:
|
|
750
|
+
"""Create an error chunk for stream failures."""
|
|
751
|
+
chunk = {
|
|
752
|
+
"ok": False,
|
|
753
|
+
"streaming": True,
|
|
754
|
+
"session_id": session_id,
|
|
755
|
+
"error": {
|
|
756
|
+
"code": error_code,
|
|
757
|
+
"message": message,
|
|
758
|
+
"recoverable": recoverable
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
if partial_data:
|
|
762
|
+
chunk["partial_data"] = partial_data
|
|
763
|
+
return chunk
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def assemble_streamed_data(session: StreamingSession) -> dict:
|
|
767
|
+
"""Assemble accumulated streaming data into final format."""
|
|
768
|
+
data = session.accumulated_data.copy()
|
|
769
|
+
|
|
770
|
+
# Merge accumulated text fields
|
|
771
|
+
for field_path, text in session.accumulated_text.items():
|
|
772
|
+
parts = field_path.split(".")
|
|
773
|
+
target = data
|
|
774
|
+
for part in parts[:-1]:
|
|
775
|
+
if part not in target:
|
|
776
|
+
target[part] = {}
|
|
777
|
+
target = target[part]
|
|
778
|
+
target[parts[-1]] = text
|
|
779
|
+
|
|
780
|
+
return data
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
class StreamingRunner:
|
|
784
|
+
"""Runner with streaming support for v2.5 modules."""
|
|
785
|
+
|
|
786
|
+
def __init__(self, provider_callback: Callable = None):
|
|
787
|
+
"""
|
|
788
|
+
Initialize streaming runner.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
provider_callback: Function to call LLM with streaming support.
|
|
792
|
+
Signature: async (prompt, images=None) -> AsyncIterator[str]
|
|
793
|
+
"""
|
|
794
|
+
self.provider_callback = provider_callback or self._default_provider
|
|
795
|
+
self.active_sessions: dict[str, StreamingSession] = {}
|
|
796
|
+
|
|
797
|
+
async def _default_provider(self, prompt: str, images: list = None) -> AsyncIterator[str]:
|
|
798
|
+
"""Default provider - yields entire response at once (for testing)."""
|
|
799
|
+
# In real implementation, this would stream from LLM
|
|
800
|
+
yield '{"ok": true, "meta": {"confidence": 0.9, "risk": "low", "explain": "Test"}, "data": {"rationale": "Test response"}}'
|
|
801
|
+
|
|
802
|
+
async def execute_stream(
|
|
803
|
+
self,
|
|
804
|
+
module_name: str,
|
|
805
|
+
input_data: dict,
|
|
806
|
+
on_chunk: Callable[[dict], None] = None
|
|
807
|
+
) -> AsyncIterator[dict]:
|
|
808
|
+
"""
|
|
809
|
+
Execute a module with streaming output.
|
|
810
|
+
|
|
811
|
+
Args:
|
|
812
|
+
module_name: Name of the module to execute
|
|
813
|
+
input_data: Input data including multimodal content
|
|
814
|
+
on_chunk: Optional callback for each chunk
|
|
815
|
+
|
|
816
|
+
Yields:
|
|
817
|
+
Streaming chunks (meta, delta, progress, final, or error)
|
|
818
|
+
"""
|
|
819
|
+
session_id = create_session_id()
|
|
820
|
+
session = StreamingSession(session_id=session_id, module_name=module_name)
|
|
821
|
+
self.active_sessions[session_id] = session
|
|
822
|
+
|
|
823
|
+
try:
|
|
824
|
+
# Load module
|
|
825
|
+
module = load_module(module_name)
|
|
826
|
+
|
|
827
|
+
# Check if module supports streaming
|
|
828
|
+
response_config = module.get("response", {})
|
|
829
|
+
mode = response_config.get("mode", "sync")
|
|
830
|
+
if mode not in ("streaming", "both"):
|
|
831
|
+
# Fall back to sync execution
|
|
832
|
+
result = await self._execute_sync(module, input_data)
|
|
833
|
+
yield create_meta_chunk(session_id)
|
|
834
|
+
yield create_final_chunk(result["meta"], result["data"])
|
|
835
|
+
return
|
|
836
|
+
|
|
837
|
+
# Extract images for multimodal
|
|
838
|
+
images = self._extract_media(input_data)
|
|
839
|
+
|
|
840
|
+
# Build prompt
|
|
841
|
+
prompt = self._build_prompt(module, input_data)
|
|
842
|
+
|
|
843
|
+
# Send initial meta chunk
|
|
844
|
+
meta_chunk = create_meta_chunk(session_id)
|
|
845
|
+
if on_chunk:
|
|
846
|
+
on_chunk(meta_chunk)
|
|
847
|
+
yield meta_chunk
|
|
848
|
+
|
|
849
|
+
# Stream from LLM
|
|
850
|
+
seq = 1
|
|
851
|
+
accumulated_response = ""
|
|
852
|
+
|
|
853
|
+
async for text_chunk in self.provider_callback(prompt, images):
|
|
854
|
+
accumulated_response += text_chunk
|
|
855
|
+
|
|
856
|
+
# Create delta chunk for rationale field
|
|
857
|
+
delta_chunk = create_delta_chunk(seq, "data.rationale", text_chunk)
|
|
858
|
+
session.chunks_sent += 1
|
|
859
|
+
session.accumulated_text.setdefault("data.rationale", "")
|
|
860
|
+
session.accumulated_text["data.rationale"] += text_chunk
|
|
861
|
+
|
|
862
|
+
if on_chunk:
|
|
863
|
+
on_chunk(delta_chunk)
|
|
864
|
+
yield delta_chunk
|
|
865
|
+
seq += 1
|
|
866
|
+
|
|
867
|
+
# Parse final response
|
|
868
|
+
try:
|
|
869
|
+
final_data = parse_llm_response(accumulated_response)
|
|
870
|
+
final_data = repair_envelope(final_data)
|
|
871
|
+
except Exception as e:
|
|
872
|
+
error_chunk = create_error_chunk(
|
|
873
|
+
session_id, "E2001", str(e),
|
|
874
|
+
recoverable=False,
|
|
875
|
+
partial_data={"rationale": session.accumulated_text.get("data.rationale", "")}
|
|
876
|
+
)
|
|
877
|
+
yield error_chunk
|
|
878
|
+
return
|
|
879
|
+
|
|
880
|
+
# Send final chunk
|
|
881
|
+
final_chunk = create_final_chunk(
|
|
882
|
+
final_data.get("meta", {}),
|
|
883
|
+
final_data.get("data", {}),
|
|
884
|
+
{"input_tokens": 0, "output_tokens": seq} # Placeholder
|
|
885
|
+
)
|
|
886
|
+
if on_chunk:
|
|
887
|
+
on_chunk(final_chunk)
|
|
888
|
+
yield final_chunk
|
|
889
|
+
|
|
890
|
+
except Exception as e:
|
|
891
|
+
error_chunk = create_error_chunk(
|
|
892
|
+
session_id, "E2010", f"Stream error: {str(e)}",
|
|
893
|
+
recoverable=False
|
|
894
|
+
)
|
|
895
|
+
yield error_chunk
|
|
896
|
+
finally:
|
|
897
|
+
del self.active_sessions[session_id]
|
|
898
|
+
|
|
899
|
+
async def _execute_sync(self, module: dict, input_data: dict) -> dict:
|
|
900
|
+
"""Execute module synchronously (fallback)."""
|
|
901
|
+
# Use existing sync execution
|
|
902
|
+
return run_module(module["name"], input_data)
|
|
903
|
+
|
|
904
|
+
def _build_prompt(self, module: dict, input_data: dict) -> str:
|
|
905
|
+
"""Build prompt from module and input."""
|
|
906
|
+
prompt_template = module.get("prompt", "")
|
|
907
|
+
return substitute_arguments(prompt_template, input_data)
|
|
908
|
+
|
|
909
|
+
def _extract_media(self, input_data: dict) -> list:
|
|
910
|
+
"""Extract media inputs from input data."""
|
|
911
|
+
images = input_data.get("images", [])
|
|
912
|
+
audio = input_data.get("audio", [])
|
|
913
|
+
video = input_data.get("video", [])
|
|
914
|
+
return images + audio + video
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
# =============================================================================
|
|
918
|
+
# v2.5 Multimodal Support
|
|
919
|
+
# =============================================================================
|
|
920
|
+
|
|
921
|
+
SUPPORTED_IMAGE_TYPES = {
|
|
922
|
+
"image/jpeg", "image/png", "image/webp", "image/gif"
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
SUPPORTED_AUDIO_TYPES = {
|
|
926
|
+
"audio/mpeg", "audio/wav", "audio/ogg", "audio/webm"
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
SUPPORTED_VIDEO_TYPES = {
|
|
930
|
+
"video/mp4", "video/webm", "video/quicktime"
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def validate_media_input(media: dict, constraints: dict = None) -> tuple[bool, str]:
|
|
935
|
+
"""
|
|
936
|
+
Validate a media input object.
|
|
937
|
+
|
|
938
|
+
Returns:
|
|
939
|
+
Tuple of (is_valid, error_message)
|
|
940
|
+
"""
|
|
941
|
+
constraints = constraints or {}
|
|
942
|
+
|
|
943
|
+
media_type = media.get("type")
|
|
944
|
+
if media_type not in ("url", "base64", "file"):
|
|
945
|
+
return False, "Invalid media type. Must be url, base64, or file"
|
|
946
|
+
|
|
947
|
+
if media_type == "url":
|
|
948
|
+
url = media.get("url")
|
|
949
|
+
if not url:
|
|
950
|
+
return False, "URL media missing 'url' field"
|
|
951
|
+
if not url.startswith(("http://", "https://")):
|
|
952
|
+
return False, "URL must start with http:// or https://"
|
|
953
|
+
|
|
954
|
+
elif media_type == "base64":
|
|
955
|
+
mime_type = media.get("media_type")
|
|
956
|
+
if not mime_type:
|
|
957
|
+
return False, "Base64 media missing 'media_type' field"
|
|
958
|
+
data = media.get("data")
|
|
959
|
+
if not data:
|
|
960
|
+
return False, "Base64 media missing 'data' field"
|
|
961
|
+
# Validate base64
|
|
962
|
+
try:
|
|
963
|
+
base64.b64decode(data)
|
|
964
|
+
except Exception:
|
|
965
|
+
return False, "Invalid base64 encoding"
|
|
966
|
+
|
|
967
|
+
# Check size
|
|
968
|
+
max_size = constraints.get("max_size_bytes", 20 * 1024 * 1024) # 20MB default
|
|
969
|
+
data_size = len(data) * 3 // 4 # Approximate decoded size
|
|
970
|
+
if data_size > max_size:
|
|
971
|
+
return False, f"Media exceeds size limit ({data_size} > {max_size} bytes)"
|
|
972
|
+
|
|
973
|
+
elif media_type == "file":
|
|
974
|
+
path = media.get("path")
|
|
975
|
+
if not path:
|
|
976
|
+
return False, "File media missing 'path' field"
|
|
977
|
+
if not Path(path).exists():
|
|
978
|
+
return False, f"File not found: {path}"
|
|
979
|
+
|
|
980
|
+
return True, ""
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
def load_media_as_base64(media: dict) -> tuple[str, str]:
|
|
984
|
+
"""
|
|
985
|
+
Load media from any source and return as base64.
|
|
986
|
+
|
|
987
|
+
Returns:
|
|
988
|
+
Tuple of (base64_data, media_type)
|
|
989
|
+
"""
|
|
990
|
+
media_type = media.get("type")
|
|
991
|
+
|
|
992
|
+
if media_type == "base64":
|
|
993
|
+
return media["data"], media["media_type"]
|
|
994
|
+
|
|
995
|
+
elif media_type == "url":
|
|
996
|
+
url = media["url"]
|
|
997
|
+
try:
|
|
998
|
+
with urlopen(url, timeout=30) as response:
|
|
999
|
+
data = response.read()
|
|
1000
|
+
content_type = response.headers.get("Content-Type", "application/octet-stream")
|
|
1001
|
+
# Extract just the mime type (remove charset etc)
|
|
1002
|
+
content_type = content_type.split(";")[0].strip()
|
|
1003
|
+
return base64.b64encode(data).decode("utf-8"), content_type
|
|
1004
|
+
except URLError as e:
|
|
1005
|
+
raise ValueError(f"Failed to fetch media from URL: {e}")
|
|
1006
|
+
|
|
1007
|
+
elif media_type == "file":
|
|
1008
|
+
path = Path(media["path"])
|
|
1009
|
+
if not path.exists():
|
|
1010
|
+
raise ValueError(f"File not found: {path}")
|
|
1011
|
+
|
|
1012
|
+
mime_type, _ = mimetypes.guess_type(str(path))
|
|
1013
|
+
mime_type = mime_type or "application/octet-stream"
|
|
1014
|
+
|
|
1015
|
+
with open(path, "rb") as f:
|
|
1016
|
+
data = f.read()
|
|
1017
|
+
|
|
1018
|
+
return base64.b64encode(data).decode("utf-8"), mime_type
|
|
1019
|
+
|
|
1020
|
+
raise ValueError(f"Unknown media type: {media_type}")
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def prepare_media_for_llm(media_list: list, provider: str = "openai") -> list:
|
|
1024
|
+
"""
|
|
1025
|
+
Prepare media inputs for specific LLM provider format.
|
|
1026
|
+
|
|
1027
|
+
Different providers have different multimodal input formats:
|
|
1028
|
+
- OpenAI: {"type": "image_url", "image_url": {"url": "data:..."}}
|
|
1029
|
+
- Anthropic: {"type": "image", "source": {"type": "base64", ...}}
|
|
1030
|
+
- Google: {"inlineData": {"mimeType": "...", "data": "..."}}
|
|
1031
|
+
"""
|
|
1032
|
+
prepared = []
|
|
1033
|
+
|
|
1034
|
+
for media in media_list:
|
|
1035
|
+
data, mime_type = load_media_as_base64(media)
|
|
1036
|
+
|
|
1037
|
+
if provider == "openai":
|
|
1038
|
+
prepared.append({
|
|
1039
|
+
"type": "image_url",
|
|
1040
|
+
"image_url": {
|
|
1041
|
+
"url": f"data:{mime_type};base64,{data}"
|
|
1042
|
+
}
|
|
1043
|
+
})
|
|
1044
|
+
elif provider == "anthropic":
|
|
1045
|
+
prepared.append({
|
|
1046
|
+
"type": "image",
|
|
1047
|
+
"source": {
|
|
1048
|
+
"type": "base64",
|
|
1049
|
+
"media_type": mime_type,
|
|
1050
|
+
"data": data
|
|
1051
|
+
}
|
|
1052
|
+
})
|
|
1053
|
+
elif provider == "google":
|
|
1054
|
+
prepared.append({
|
|
1055
|
+
"inlineData": {
|
|
1056
|
+
"mimeType": mime_type,
|
|
1057
|
+
"data": data
|
|
1058
|
+
}
|
|
1059
|
+
})
|
|
1060
|
+
else:
|
|
1061
|
+
# Generic format
|
|
1062
|
+
prepared.append({
|
|
1063
|
+
"type": "base64",
|
|
1064
|
+
"media_type": mime_type,
|
|
1065
|
+
"data": data
|
|
1066
|
+
})
|
|
1067
|
+
|
|
1068
|
+
return prepared
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
def get_modalities_config(module: dict) -> dict:
|
|
1072
|
+
"""Get modalities configuration from module."""
|
|
1073
|
+
return module.get("modalities", {
|
|
1074
|
+
"input": ["text"],
|
|
1075
|
+
"output": ["text"]
|
|
1076
|
+
})
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def supports_multimodal_input(module: dict) -> bool:
|
|
1080
|
+
"""Check if module supports multimodal input."""
|
|
1081
|
+
modalities = get_modalities_config(module)
|
|
1082
|
+
input_modalities = modalities.get("input", ["text"])
|
|
1083
|
+
return any(m in input_modalities for m in ["image", "audio", "video"])
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
def supports_multimodal_output(module: dict) -> bool:
|
|
1087
|
+
"""Check if module supports multimodal output."""
|
|
1088
|
+
modalities = get_modalities_config(module)
|
|
1089
|
+
output_modalities = modalities.get("output", ["text"])
|
|
1090
|
+
return any(m in output_modalities for m in ["image", "audio", "video"])
|
|
1091
|
+
|
|
1092
|
+
|
|
1093
|
+
def validate_multimodal_input(input_data: dict, module: dict) -> tuple[bool, list[str]]:
|
|
1094
|
+
"""
|
|
1095
|
+
Validate multimodal input against module configuration.
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
Tuple of (is_valid, list of errors)
|
|
1099
|
+
"""
|
|
1100
|
+
errors = []
|
|
1101
|
+
modalities = get_modalities_config(module)
|
|
1102
|
+
input_modalities = set(modalities.get("input", ["text"]))
|
|
1103
|
+
constraints = modalities.get("constraints", {})
|
|
1104
|
+
|
|
1105
|
+
# Check images
|
|
1106
|
+
images = input_data.get("images", [])
|
|
1107
|
+
if images:
|
|
1108
|
+
if "image" not in input_modalities:
|
|
1109
|
+
errors.append("Module does not support image input")
|
|
1110
|
+
else:
|
|
1111
|
+
max_images = constraints.get("max_images", 10)
|
|
1112
|
+
if len(images) > max_images:
|
|
1113
|
+
errors.append(f"Too many images ({len(images)} > {max_images})")
|
|
1114
|
+
|
|
1115
|
+
for i, img in enumerate(images):
|
|
1116
|
+
valid, err = validate_media_input(img, constraints)
|
|
1117
|
+
if not valid:
|
|
1118
|
+
errors.append(f"Image {i}: {err}")
|
|
1119
|
+
|
|
1120
|
+
# Check audio
|
|
1121
|
+
audio = input_data.get("audio", [])
|
|
1122
|
+
if audio:
|
|
1123
|
+
if "audio" not in input_modalities:
|
|
1124
|
+
errors.append("Module does not support audio input")
|
|
1125
|
+
|
|
1126
|
+
# Check video
|
|
1127
|
+
video = input_data.get("video", [])
|
|
1128
|
+
if video:
|
|
1129
|
+
if "video" not in input_modalities:
|
|
1130
|
+
errors.append("Module does not support video input")
|
|
1131
|
+
|
|
1132
|
+
return len(errors) == 0, errors
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
# =============================================================================
|
|
1136
|
+
# v2.5 Runtime Capabilities
|
|
1137
|
+
# =============================================================================
|
|
1138
|
+
|
|
1139
|
+
def get_runtime_capabilities() -> dict:
|
|
1140
|
+
"""Get runtime capabilities for v2.5."""
|
|
1141
|
+
return {
|
|
1142
|
+
"runtime": "cognitive-runtime-python",
|
|
1143
|
+
"version": "2.5.0",
|
|
1144
|
+
"spec_version": "2.5",
|
|
1145
|
+
"capabilities": {
|
|
1146
|
+
"streaming": True,
|
|
1147
|
+
"multimodal": {
|
|
1148
|
+
"input": ["image"], # Basic image support
|
|
1149
|
+
"output": [] # No generation yet
|
|
1150
|
+
},
|
|
1151
|
+
"max_media_size_mb": 20,
|
|
1152
|
+
"supported_transports": ["ndjson"], # SSE requires async server
|
|
1153
|
+
"conformance_level": 4
|
|
1154
|
+
}
|
|
1155
|
+
}
|