nebu 0.1.45__py3-none-any.whl → 0.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nebu/__init__.py +0 -1
- nebu/builders/builder.py +0 -0
- nebu/data.py +24 -3
- nebu/processors/consumer.py +581 -389
- nebu/processors/decorate.py +441 -411
- {nebu-0.1.45.dist-info → nebu-0.1.48.dist-info}/METADATA +1 -1
- {nebu-0.1.45.dist-info → nebu-0.1.48.dist-info}/RECORD +10 -12
- {nebu-0.1.45.dist-info → nebu-0.1.48.dist-info}/WHEEL +1 -1
- nebu/adapter.py +0 -20
- nebu/chatx/convert.py +0 -362
- nebu/chatx/openai.py +0 -976
- {nebu-0.1.45.dist-info → nebu-0.1.48.dist-info}/licenses/LICENSE +0 -0
- {nebu-0.1.45.dist-info → nebu-0.1.48.dist-info}/top_level.txt +0 -0
nebu/processors/decorate.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
import ast # For parsing notebook code
|
2
2
|
import inspect
|
3
|
+
import json # Add json import
|
3
4
|
import os # Add os import
|
4
5
|
import re # Import re for fallback check
|
6
|
+
import tempfile # Add tempfile import
|
5
7
|
import textwrap
|
8
|
+
import uuid # Add uuid import
|
6
9
|
from typing import (
|
7
10
|
Any,
|
8
11
|
Callable,
|
@@ -14,18 +17,26 @@ from typing import (
|
|
14
17
|
get_origin,
|
15
18
|
get_type_hints,
|
16
19
|
)
|
20
|
+
from urllib.parse import urlparse # Add urlparse import
|
17
21
|
|
18
22
|
import dill # Add dill import
|
23
|
+
import requests # Add requests import
|
24
|
+
from botocore.exceptions import ClientError # Import ClientError
|
19
25
|
from pydantic import BaseModel
|
20
26
|
|
21
27
|
from nebu.containers.models import (
|
22
28
|
V1AuthzConfig,
|
29
|
+
V1ContainerHealthCheck,
|
23
30
|
V1ContainerRequest,
|
24
31
|
V1ContainerResources,
|
25
32
|
V1EnvVar,
|
26
33
|
V1Meter,
|
34
|
+
V1PortRequest,
|
35
|
+
V1SSHKey,
|
36
|
+
V1VolumeDriver,
|
27
37
|
V1VolumePath,
|
28
38
|
)
|
39
|
+
from nebu.data import Bucket # Import Bucket
|
29
40
|
from nebu.meta import V1ResourceMetaRequest
|
30
41
|
from nebu.processors.models import (
|
31
42
|
Message,
|
@@ -43,6 +54,15 @@ _NEBU_EXPLICIT_SOURCE_ATTR = "_nebu_explicit_source"
|
|
43
54
|
# Environment variable to prevent decorator recursion inside consumer
|
44
55
|
_NEBU_INSIDE_CONSUMER_ENV_VAR = "_NEBU_INSIDE_CONSUMER_EXEC"
|
45
56
|
|
57
|
+
# Define target directory in container
|
58
|
+
CONTAINER_CODE_DIR = "/app/src"
|
59
|
+
# Define S3 prefix for code storage (under the base URI from token endpoint)
|
60
|
+
S3_CODE_PREFIX = "nebu-code"
|
61
|
+
# Define the token endpoint URL (replace with actual URL)
|
62
|
+
# Use environment variable for flexibility, provide a default for local dev
|
63
|
+
NEBU_API_BASE_URL = os.environ.get("NEBU_API_BASE_URL", "http://localhost:8080")
|
64
|
+
S3_TOKEN_ENDPOINT = f"{NEBU_API_BASE_URL}/iam/s3-token"
|
65
|
+
|
46
66
|
# --- Jupyter Helper Functions ---
|
47
67
|
|
48
68
|
|
@@ -51,27 +71,28 @@ def is_jupyter_notebook():
|
|
51
71
|
Determine if the current code is running inside a Jupyter notebook.
|
52
72
|
Returns bool: True if running inside a Jupyter notebook, False otherwise.
|
53
73
|
"""
|
54
|
-
print("[DEBUG Helper] Checking if running in Jupyter...")
|
74
|
+
# print("[DEBUG Helper] Checking if running in Jupyter...") # Reduce verbosity
|
55
75
|
try:
|
56
|
-
|
76
|
+
# Use importlib to avoid runtime dependency if not needed
|
77
|
+
import importlib.util
|
78
|
+
|
79
|
+
if importlib.util.find_spec("IPython") is None:
|
80
|
+
return False
|
81
|
+
import IPython # Now safe to import
|
57
82
|
|
58
83
|
ip = IPython.get_ipython()
|
59
84
|
if ip is None:
|
60
|
-
print("[DEBUG Helper] is_jupyter_notebook: No IPython instance found.")
|
85
|
+
# print("[DEBUG Helper] is_jupyter_notebook: No IPython instance found.")
|
61
86
|
return False
|
62
87
|
class_name = str(ip.__class__)
|
63
|
-
print(f"[DEBUG Helper] is_jupyter_notebook: IPython class name: {class_name}")
|
88
|
+
# print(f"[DEBUG Helper] is_jupyter_notebook: IPython class name: {class_name}")
|
64
89
|
if "ZMQInteractiveShell" in class_name:
|
65
|
-
print(
|
66
|
-
"[DEBUG Helper] is_jupyter_notebook: Jupyter detected (ZMQInteractiveShell)."
|
67
|
-
)
|
90
|
+
# print("[DEBUG Helper] is_jupyter_notebook: Jupyter detected (ZMQInteractiveShell).")
|
68
91
|
return True
|
69
|
-
print(
|
70
|
-
"[DEBUG Helper] is_jupyter_notebook: Not Jupyter (IPython instance found, but not ZMQInteractiveShell)."
|
71
|
-
)
|
92
|
+
# print("[DEBUG Helper] is_jupyter_notebook: Not Jupyter (IPython instance found, but not ZMQInteractiveShell).")
|
72
93
|
return False
|
73
94
|
except Exception as e:
|
74
|
-
print(f"[DEBUG Helper] is_jupyter_notebook: Exception occurred: {e}")
|
95
|
+
# print(f"[DEBUG Helper] is_jupyter_notebook: Exception occurred: {e}") # Reduce verbosity
|
75
96
|
return False
|
76
97
|
|
77
98
|
|
@@ -224,8 +245,11 @@ def include(obj: Any) -> Any:
|
|
224
245
|
"""
|
225
246
|
Decorator to explicitly capture the source code of a function or class,
|
226
247
|
intended for use in environments where inspect/dill might fail (e.g., Jupyter).
|
248
|
+
NOTE: This source is currently added to environment variables. Consider if
|
249
|
+
large included objects should also use S3.
|
227
250
|
"""
|
228
251
|
try:
|
252
|
+
# Still use dill for @include as it might capture things not in the main file dir
|
229
253
|
source = dill.source.getsource(obj)
|
230
254
|
dedented_source = textwrap.dedent(source)
|
231
255
|
setattr(obj, _NEBU_EXPLICIT_SOURCE_ATTR, dedented_source)
|
@@ -356,13 +380,17 @@ def processor(
|
|
356
380
|
no_delete: bool = False,
|
357
381
|
include: Optional[List[Any]] = None,
|
358
382
|
init_func: Optional[Callable[[], None]] = None,
|
383
|
+
queue: Optional[str] = None,
|
384
|
+
timeout: Optional[str] = None,
|
385
|
+
ssh_keys: Optional[List[V1SSHKey]] = None,
|
386
|
+
ports: Optional[List[V1PortRequest]] = None,
|
387
|
+
proxy_port: Optional[int] = None,
|
388
|
+
health_check: Optional[V1ContainerHealthCheck] = None,
|
359
389
|
):
|
360
390
|
def decorator(
|
361
391
|
func: Callable[[Any], Any],
|
362
|
-
) -> Processor | Callable[[Any], Any]:
|
392
|
+
) -> Processor | Callable[[Any], Any]:
|
363
393
|
# --- Prevent Recursion Guard ---
|
364
|
-
# If this env var is set, we are inside the consumer's exec context.
|
365
|
-
# Return the original function without applying the decorator again.
|
366
394
|
if os.environ.get(_NEBU_INSIDE_CONSUMER_ENV_VAR) == "1":
|
367
395
|
print(
|
368
396
|
f"[DEBUG Decorator] Guard triggered for '{func.__name__}'. Returning original function."
|
@@ -370,111 +398,263 @@ def processor(
|
|
370
398
|
return func
|
371
399
|
# --- End Guard ---
|
372
400
|
|
373
|
-
# Moved init print here
|
374
401
|
print(
|
375
402
|
f"[DEBUG Decorator Init] @processor decorating function '{func.__name__}'"
|
376
403
|
)
|
377
404
|
all_env = env or []
|
378
405
|
processor_name = func.__name__
|
406
|
+
all_volumes = volumes or [] # Initialize volumes list
|
379
407
|
|
380
|
-
# ---
|
381
|
-
print("[DEBUG Decorator]
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
408
|
+
# --- Get Decorated Function File Path and Directory ---
|
409
|
+
print("[DEBUG Decorator] Getting source file path for decorated function...")
|
410
|
+
func_file_path: Optional[str] = None
|
411
|
+
func_dir: Optional[str] = None
|
412
|
+
rel_func_path: Optional[str] = None # Relative path within func_dir
|
413
|
+
try:
|
414
|
+
func_file_path = inspect.getfile(func)
|
415
|
+
# Resolve symlinks to get the actual directory containing the file
|
416
|
+
func_file_path = os.path.realpath(func_file_path)
|
417
|
+
func_dir = os.path.dirname(func_file_path)
|
418
|
+
# Calculate relative path based on the resolved directory
|
419
|
+
rel_func_path = os.path.relpath(func_file_path, func_dir)
|
420
|
+
print(f"[DEBUG Decorator] Found real file path: {func_file_path}")
|
421
|
+
print(f"[DEBUG Decorator] Found function directory: {func_dir}")
|
422
|
+
print(f"[DEBUG Decorator] Relative function path: {rel_func_path}")
|
423
|
+
except (TypeError, OSError) as e:
|
424
|
+
# TypeError can happen if func is not a module, class, method, function, traceback, frame, or code object
|
425
|
+
raise ValueError(
|
426
|
+
f"Could not get file path for function '{processor_name}'. Ensure it's defined in a file and is a standard function/method."
|
427
|
+
) from e
|
428
|
+
except Exception as e:
|
429
|
+
raise ValueError(
|
430
|
+
f"Unexpected error getting file path for '{processor_name}': {e}"
|
431
|
+
) from e
|
432
|
+
|
433
|
+
# --- Fetch S3 Token and Upload Code ---
|
434
|
+
s3_destination_uri: Optional[str] = None
|
435
|
+
if not func_dir or not rel_func_path:
|
436
|
+
# This case should be caught by the exceptions above, but double-check
|
437
|
+
raise ValueError(
|
438
|
+
"Could not determine function directory or relative path for S3 upload."
|
439
|
+
)
|
440
|
+
|
441
|
+
print(f"[DEBUG Decorator] Fetching S3 token from: {S3_TOKEN_ENDPOINT}")
|
442
|
+
try:
|
443
|
+
response = requests.get(S3_TOKEN_ENDPOINT, timeout=10) # Add timeout
|
444
|
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
445
|
+
s3_token_data = response.json()
|
446
|
+
|
447
|
+
aws_access_key_id = s3_token_data.get("access_key_id")
|
448
|
+
aws_secret_access_key = s3_token_data.get("secret_access_key")
|
449
|
+
aws_session_token = s3_token_data.get(
|
450
|
+
"session_token"
|
451
|
+
) # May be None for non-STS keys
|
452
|
+
s3_base_uri = s3_token_data.get("s3_base_uri")
|
453
|
+
|
454
|
+
if not all([aws_access_key_id, aws_secret_access_key, s3_base_uri]):
|
455
|
+
raise ValueError(
|
456
|
+
"Missing required fields (access_key_id, secret_access_key, s3_base_uri) in S3 token response."
|
390
457
|
)
|
391
|
-
|
392
|
-
|
393
|
-
|
458
|
+
|
459
|
+
# Construct unique S3 path: s3://<base_bucket>/<base_prefix>/<code_prefix>/<processor_name>-<uuid>/
|
460
|
+
unique_suffix = f"{processor_name}-{uuid.uuid4()}"
|
461
|
+
parsed_base = urlparse(s3_base_uri)
|
462
|
+
if not parsed_base.scheme == "s3" or not parsed_base.netloc:
|
463
|
+
raise ValueError(f"Invalid s3_base_uri received: {s3_base_uri}")
|
464
|
+
|
465
|
+
base_path = parsed_base.path.strip("/")
|
466
|
+
s3_dest_components = [S3_CODE_PREFIX, unique_suffix]
|
467
|
+
if base_path:
|
468
|
+
# Handle potential multiple path segments in base_path
|
469
|
+
s3_dest_components.insert(0, *base_path.split("/"))
|
470
|
+
|
471
|
+
# Filter out empty strings that might result from split
|
472
|
+
s3_destination_key_components = [
|
473
|
+
comp for comp in s3_dest_components if comp
|
474
|
+
]
|
475
|
+
s3_destination_key = (
|
476
|
+
"/".join(s3_destination_key_components) + "/"
|
477
|
+
) # Ensure trailing slash for prefix
|
478
|
+
s3_destination_uri = f"s3://{parsed_base.netloc}/{s3_destination_key}"
|
479
|
+
|
480
|
+
print(
|
481
|
+
f"[DEBUG Decorator] Uploading code from '{func_dir}' to '{s3_destination_uri}'"
|
482
|
+
)
|
483
|
+
|
484
|
+
# Instantiate Bucket with temporary credentials
|
485
|
+
s3_bucket = Bucket(
|
486
|
+
verbose=True, # Make verbosity configurable later if needed
|
487
|
+
aws_access_key_id=aws_access_key_id,
|
488
|
+
aws_secret_access_key=aws_secret_access_key,
|
489
|
+
aws_session_token=aws_session_token,
|
490
|
+
)
|
491
|
+
|
492
|
+
# Use sync to upload directory contents recursively
|
493
|
+
# Ensure source directory exists before syncing
|
494
|
+
if not os.path.isdir(func_dir):
|
495
|
+
raise ValueError(
|
496
|
+
f"Source path for upload is not a directory: {func_dir}"
|
394
497
|
)
|
395
|
-
else:
|
396
|
-
print("[DEBUG Decorator] Non-Jupyter environment detected.")
|
397
|
-
# --- End Environment Determination ---
|
398
498
|
|
399
|
-
|
499
|
+
s3_bucket.sync(
|
500
|
+
source=func_dir,
|
501
|
+
destination=s3_destination_uri,
|
502
|
+
delete=False,
|
503
|
+
dry_run=False,
|
504
|
+
)
|
505
|
+
print("[DEBUG Decorator] S3 code upload completed.")
|
506
|
+
|
507
|
+
except requests.exceptions.RequestException as e:
|
508
|
+
print(f"ERROR: Failed to fetch S3 token from {S3_TOKEN_ENDPOINT}: {e}")
|
509
|
+
raise RuntimeError(
|
510
|
+
f"Failed to fetch S3 token from {S3_TOKEN_ENDPOINT}: {e}"
|
511
|
+
) from e
|
512
|
+
except ClientError as e:
|
513
|
+
print(f"ERROR: Failed to upload code to S3 {s3_destination_uri}: {e}")
|
514
|
+
# Attempt to provide more context from the error if possible
|
515
|
+
error_code = e.response.get("Error", {}).get("Code")
|
516
|
+
error_msg = e.response.get("Error", {}).get("Message")
|
517
|
+
print(f" S3 Error Code: {error_code}, Message: {error_msg}")
|
518
|
+
raise RuntimeError(
|
519
|
+
f"Failed to upload code to {s3_destination_uri}: {e}"
|
520
|
+
) from e
|
521
|
+
except ValueError as e: # Catch ValueErrors from validation
|
522
|
+
print(f"ERROR: Configuration or response data error: {e}")
|
523
|
+
raise RuntimeError(f"Configuration or response data error: {e}") from e
|
524
|
+
except Exception as e:
|
525
|
+
print(f"ERROR: Unexpected error during S3 token fetch or upload: {e}")
|
526
|
+
# Consider logging traceback here for better debugging
|
527
|
+
import traceback
|
528
|
+
|
529
|
+
traceback.print_exc()
|
530
|
+
raise RuntimeError(f"Unexpected error during S3 setup: {e}") from e
|
531
|
+
|
532
|
+
# --- Process Manually Included Objects (Keep for now, add source via env) ---
|
533
|
+
# This part remains unchanged for now, using @include and environment variables.
|
534
|
+
# Future: Could potentially upload these to S3 as well if they become large.
|
400
535
|
included_sources: Dict[Any, Any] = {}
|
536
|
+
notebook_code_for_include = None # Get notebook code only if needed for include
|
401
537
|
if include:
|
402
|
-
|
538
|
+
# Determine if we are in Jupyter only if needed for include fallback
|
539
|
+
# print("[DEBUG Decorator] Processing manually included objects...")
|
540
|
+
is_jupyter_env = is_jupyter_notebook()
|
541
|
+
if is_jupyter_env:
|
542
|
+
notebook_code_for_include = get_notebook_executed_code()
|
543
|
+
|
403
544
|
for i, obj in enumerate(include):
|
404
545
|
obj_name_str = getattr(obj, "__name__", str(obj))
|
405
|
-
print(
|
406
|
-
|
546
|
+
# print(f"[DEBUG Decorator] Getting source for manually included object: {obj_name_str}")
|
547
|
+
# Pass notebook code only if available and needed by get_model_source
|
548
|
+
obj_source = get_model_source(
|
549
|
+
obj, notebook_code_for_include if is_jupyter_env else None
|
407
550
|
)
|
408
|
-
obj_source = get_model_source(obj, notebook_code)
|
409
551
|
if obj_source:
|
410
552
|
included_sources[obj] = obj_source
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
553
|
+
# Decide how to pass included source - keep using Env Vars for now
|
554
|
+
env_key_base = f"INCLUDED_OBJECT_{i}"
|
555
|
+
if isinstance(obj_source, str):
|
556
|
+
all_env.append(
|
557
|
+
V1EnvVar(key=f"{env_key_base}_SOURCE", value=obj_source)
|
558
|
+
)
|
559
|
+
# print(f"[DEBUG Decorator] Added string source to env for included obj: {obj_name_str}")
|
560
|
+
elif isinstance(obj_source, tuple):
|
561
|
+
# Handle tuple source (origin, args) - assumes get_model_source/get_type_source logic
|
562
|
+
origin_src, arg_srcs = obj_source
|
563
|
+
if origin_src and isinstance(origin_src, str):
|
564
|
+
all_env.append(
|
565
|
+
V1EnvVar(key=f"{env_key_base}_SOURCE", value=origin_src)
|
566
|
+
)
|
567
|
+
for j, arg_src in enumerate(arg_srcs):
|
568
|
+
if isinstance(arg_src, str):
|
569
|
+
all_env.append(
|
570
|
+
V1EnvVar(
|
571
|
+
key=f"{env_key_base}_ARG_{j}_SOURCE",
|
572
|
+
value=arg_src,
|
573
|
+
)
|
574
|
+
)
|
575
|
+
# Handle nested tuples if necessary, or keep it simple
|
576
|
+
# print(f"[DEBUG Decorator] Added tuple source to env for included obj: {obj_name_str}")
|
577
|
+
else:
|
578
|
+
print(
|
579
|
+
f"Warning: Unknown source type for included object {obj_name_str}: {type(obj_source)}"
|
580
|
+
)
|
416
581
|
else:
|
417
582
|
print(
|
418
|
-
f"Warning: Could not retrieve source for manually included object: {obj_name_str}"
|
583
|
+
f"Warning: Could not retrieve source for manually included object: {obj_name_str}. It might not be available in the consumer."
|
419
584
|
)
|
420
|
-
print(
|
421
|
-
f"[DEBUG Decorator] Finished processing included objects. Sources found: {len(included_sources)}"
|
422
|
-
)
|
423
|
-
else:
|
424
|
-
print("[DEBUG Decorator] No manually included objects specified.")
|
425
585
|
# --- End Manually Included Objects ---
|
426
586
|
|
427
|
-
# --- Validate Function Signature and Types ---
|
587
|
+
# --- Validate Function Signature and Types (Keep as is) ---
|
428
588
|
print(
|
429
589
|
f"[DEBUG Decorator] Validating signature and type hints for {processor_name}..."
|
430
590
|
)
|
431
591
|
sig = inspect.signature(func)
|
432
592
|
params = list(sig.parameters.values())
|
433
593
|
if len(params) != 1:
|
434
|
-
raise TypeError(
|
594
|
+
raise TypeError(
|
595
|
+
f"{processor_name} must take exactly one parameter"
|
596
|
+
) # Stricter check
|
435
597
|
|
436
598
|
try:
|
599
|
+
# Attempt to resolve type hints
|
437
600
|
type_hints = get_type_hints(func, globalns=func.__globals__, localns=None)
|
438
|
-
print(f"[DEBUG Decorator]
|
601
|
+
print(f"[DEBUG Decorator] Resolved type hints: {type_hints}")
|
602
|
+
except NameError as e:
|
603
|
+
# Specific handling for NameError (common in notebooks/dynamic environments)
|
604
|
+
print(
|
605
|
+
f"Warning: Could not fully resolve type hints for {processor_name} due to NameError: {e}. Type validation might be incomplete."
|
606
|
+
)
|
607
|
+
# Try to get raw annotations as fallback?
|
608
|
+
type_hints = getattr(func, "__annotations__", {})
|
609
|
+
print(f"[DEBUG Decorator] Using raw annotations as fallback: {type_hints}")
|
439
610
|
except Exception as e:
|
440
611
|
print(f"[DEBUG Decorator] Error getting type hints: {e}")
|
612
|
+
# Potentially re-raise or handle based on severity
|
441
613
|
raise TypeError(
|
442
|
-
f"Could not evaluate type hints for {processor_name}: {e}"
|
614
|
+
f"Could not evaluate type hints for {processor_name}: {e}. Ensure all type dependencies are defined or imported."
|
443
615
|
) from e
|
444
616
|
|
445
617
|
param_name = params[0].name
|
446
618
|
if param_name not in type_hints:
|
619
|
+
# Allow missing param type hint if using raw annotations? Maybe not.
|
447
620
|
raise TypeError(
|
448
|
-
f"{processor_name} parameter '{param_name}' must have type hint"
|
621
|
+
f"{processor_name} parameter '{param_name}' must have a type hint"
|
449
622
|
)
|
450
|
-
param_type = type_hints
|
451
|
-
|
623
|
+
param_type = type_hints.get(
|
624
|
+
param_name
|
625
|
+
) # Use .get for safety with raw annotations fallback
|
626
|
+
param_type_str_repr = str(param_type) # Use string representation
|
452
627
|
print(
|
453
628
|
f"[DEBUG Decorator] Parameter '{param_name}' type hint: {param_type_str_repr}"
|
454
629
|
)
|
455
630
|
|
456
631
|
if "return" not in type_hints:
|
457
|
-
raise TypeError(f"{processor_name} must have return type hint")
|
458
|
-
return_type = type_hints
|
459
|
-
|
632
|
+
raise TypeError(f"{processor_name} must have a return type hint")
|
633
|
+
return_type = type_hints.get("return")
|
634
|
+
return_type_str_repr = str(return_type)
|
635
|
+
print(f"[DEBUG Decorator] Return type hint: {return_type_str_repr}")
|
460
636
|
|
461
637
|
# --- Determine Input Type (StreamMessage, ContentType) ---
|
638
|
+
# This logic remains mostly the same, using the resolved types
|
462
639
|
print(
|
463
640
|
f"[DEBUG Decorator] Determining input type structure for param type hint: {param_type_str_repr}"
|
464
641
|
)
|
465
|
-
origin = get_origin(param_type)
|
466
|
-
args = get_args(param_type)
|
642
|
+
origin = get_origin(param_type) if param_type else None
|
643
|
+
args = get_args(param_type) if param_type else tuple()
|
467
644
|
print(f"[DEBUG Decorator] get_origin result: {origin}, get_args result: {args}")
|
468
645
|
is_stream_message = False
|
469
646
|
content_type = None
|
470
647
|
|
648
|
+
# Use Message class directly for comparison
|
649
|
+
message_cls = Message # Get the class object
|
650
|
+
|
471
651
|
# Check 1: Standard introspection
|
472
|
-
if origin is
|
473
|
-
origin is
|
474
|
-
and origin.__name__ == Message.__name__
|
475
|
-
and origin.__module__ == Message.__module__
|
652
|
+
if origin is message_cls or (
|
653
|
+
isinstance(origin, type) and origin is message_cls
|
476
654
|
):
|
477
|
-
print(
|
655
|
+
print(
|
656
|
+
"[DEBUG Decorator] Input type identified as Message via get_origin/isinstance."
|
657
|
+
)
|
478
658
|
is_stream_message = True
|
479
659
|
if args:
|
480
660
|
content_type = args[0]
|
@@ -485,61 +665,20 @@ def processor(
|
|
485
665
|
print(
|
486
666
|
"[DEBUG Decorator] Message detected, but no generic arguments found via get_args."
|
487
667
|
)
|
488
|
-
# Check 2: Direct type check
|
489
|
-
elif isinstance(param_type, type) and param_type is
|
668
|
+
# Check 2: Direct type check (Handles cases where get_origin might fail but type is correct)
|
669
|
+
elif isinstance(param_type, type) and param_type is message_cls:
|
490
670
|
print("[DEBUG Decorator] Input type identified as direct Message type.")
|
491
671
|
is_stream_message = True
|
492
|
-
# Check 3: Regex fallback
|
493
|
-
elif
|
494
|
-
|
495
|
-
|
496
|
-
)
|
497
|
-
#
|
498
|
-
|
499
|
-
|
500
|
-
param_type_str_repr,
|
501
|
-
)
|
502
|
-
if generic_match:
|
503
|
-
print("[DEBUG Decorator] Regex matched generic Message pattern!")
|
504
|
-
is_stream_message = True
|
505
|
-
content_type_name_str = generic_match.group(1).strip()
|
506
|
-
print(
|
507
|
-
f"[DEBUG Decorator] Captured content type name via regex: '{content_type_name_str}'"
|
508
|
-
)
|
509
|
-
try:
|
510
|
-
resolved_type = eval(content_type_name_str, func.__globals__)
|
511
|
-
content_type = resolved_type
|
512
|
-
print(
|
513
|
-
f"[DEBUG Decorator] Successfully resolved content type name '{content_type_name_str}' to type: {content_type}"
|
514
|
-
)
|
515
|
-
except NameError:
|
516
|
-
print(
|
517
|
-
f"[DEBUG Decorator] Warning: Regex found content type name '{content_type_name_str}', but it's not defined in function's globals. Consumer might fail."
|
518
|
-
)
|
519
|
-
content_type = None
|
520
|
-
except Exception as e:
|
521
|
-
print(
|
522
|
-
f"[DEBUG Decorator] Warning: Error evaluating content type name '{content_type_name_str}': {e}"
|
523
|
-
)
|
524
|
-
content_type = None
|
525
|
-
else:
|
526
|
-
# Use param_type_str_repr in match calls
|
527
|
-
simple_match = re.match(
|
528
|
-
r"^<class '(?:[a-zA-Z0-9_.]+\.)?Message'>$",
|
529
|
-
param_type_str_repr,
|
530
|
-
)
|
531
|
-
if simple_match:
|
532
|
-
print(
|
533
|
-
"[DEBUG Decorator] Regex identified direct Message (no generic) from string."
|
534
|
-
)
|
535
|
-
is_stream_message = True
|
536
|
-
else:
|
537
|
-
print(
|
538
|
-
f"[DEBUG Decorator] Regex did not match Message pattern for string '{param_type_str_repr}'. Assuming not StreamMessage."
|
539
|
-
)
|
540
|
-
else:
|
672
|
+
# Check 3: Regex fallback might be less reliable now, but keep as last resort?
|
673
|
+
elif (
|
674
|
+
origin is None and param_type is not None
|
675
|
+
): # Only if origin failed and type exists
|
676
|
+
# ... (existing regex fallback logic using param_type_str_repr) ...
|
677
|
+
pass # Keep existing regex logic here if desired
|
678
|
+
|
679
|
+
else: # Handle cases where param_type might be None or origin is something else
|
541
680
|
print(
|
542
|
-
f"[DEBUG Decorator] Input parameter '{param_name}' type ({param_type_str_repr}) identified as non-
|
681
|
+
f"[DEBUG Decorator] Input parameter '{param_name}' type ({param_type_str_repr}) identified as non-Message type."
|
543
682
|
)
|
544
683
|
|
545
684
|
print(
|
@@ -552,33 +691,72 @@ def processor(
|
|
552
691
|
"[DEBUG Decorator] Validating parameter and return types are BaseModel subclasses..."
|
553
692
|
)
|
554
693
|
|
694
|
+
# Define check_basemodel locally or ensure it's available
|
555
695
|
def check_basemodel(type_to_check: Optional[Any], desc: str):
|
556
|
-
print(
|
557
|
-
|
558
|
-
)
|
559
|
-
if not type_to_check:
|
696
|
+
# print(f"[DEBUG Decorator] check_basemodel: Checking {desc} - Type: {type_to_check}") # Verbose
|
697
|
+
if type_to_check is None or type_to_check is Any:
|
560
698
|
print(
|
561
|
-
f"[DEBUG Decorator] check_basemodel: Skipping check for {desc} (type is None
|
699
|
+
f"[DEBUG Decorator] check_basemodel: Skipping check for {desc} (type is None or Any)."
|
562
700
|
)
|
563
701
|
return
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
702
|
+
# Handle Optional[T] by getting the inner type
|
703
|
+
actual_type = type_to_check
|
704
|
+
type_origin = get_origin(type_to_check)
|
705
|
+
if (
|
706
|
+
type_origin is Optional or str(type_origin) == "typing.Union"
|
707
|
+
): # Handle Optional and Union for None
|
708
|
+
type_args = get_args(type_to_check)
|
709
|
+
# Find the first non-None type argument
|
710
|
+
non_none_args = [arg for arg in type_args if arg is not type(None)]
|
711
|
+
if len(non_none_args) == 1:
|
712
|
+
actual_type = non_none_args[0]
|
713
|
+
# print(f"[DEBUG Decorator] check_basemodel: Unwrapped Optional/Union to {actual_type} for {desc}")
|
714
|
+
else:
|
715
|
+
# Handle complex Unions later if needed, skip check for now
|
716
|
+
print(
|
717
|
+
f"[DEBUG Decorator] check_basemodel: Skipping check for complex Union {desc}: {type_to_check}"
|
718
|
+
)
|
719
|
+
return
|
720
|
+
|
721
|
+
# Check the actual type
|
722
|
+
effective_type = (
|
723
|
+
get_origin(actual_type) or actual_type
|
724
|
+
) # Handle generics like List[Model]
|
725
|
+
# print(f"[DEBUG Decorator] check_basemodel: Effective type for {desc}: {effective_type}") # Verbose
|
726
|
+
if isinstance(effective_type, type) and not issubclass(
|
727
|
+
effective_type, BaseModel
|
728
|
+
):
|
729
|
+
# Allow non-BaseModel basic types (str, int, bool, float, dict, list)
|
730
|
+
allowed_non_model_types = (
|
731
|
+
str,
|
732
|
+
int,
|
733
|
+
float,
|
734
|
+
bool,
|
735
|
+
dict,
|
736
|
+
list,
|
737
|
+
type(None),
|
574
738
|
)
|
575
|
-
|
739
|
+
if effective_type not in allowed_non_model_types:
|
740
|
+
print(
|
741
|
+
f"[DEBUG Decorator] check_basemodel: Error - {desc} effective type ({effective_type.__name__}) is not BaseModel or standard type."
|
742
|
+
)
|
743
|
+
raise TypeError(
|
744
|
+
f"{desc} effective type ({effective_type.__name__}) must be BaseModel subclass or standard type (str, int, etc.)"
|
745
|
+
)
|
746
|
+
else:
|
747
|
+
print(
|
748
|
+
f"[DEBUG Decorator] check_basemodel: OK - {desc} is standard type {effective_type.__name__}."
|
749
|
+
)
|
750
|
+
|
751
|
+
elif not isinstance(effective_type, type):
|
752
|
+
# Allow TypeVars or other constructs for now? Or enforce BaseModel? Enforce for now.
|
576
753
|
print(
|
577
|
-
f"[DEBUG Decorator] check_basemodel: Warning - {desc} effective type '{
|
754
|
+
f"[DEBUG Decorator] check_basemodel: Warning - {desc} effective type '{effective_type}' is not a class. Cannot verify BaseModel subclass."
|
578
755
|
)
|
756
|
+
# Revisit this if TypeVars bound to BaseModel are needed.
|
579
757
|
else:
|
580
758
|
print(
|
581
|
-
f"[DEBUG Decorator] check_basemodel: OK - {desc} effective type ({
|
759
|
+
f"[DEBUG Decorator] check_basemodel: OK - {desc} effective type ({effective_type.__name__}) is a BaseModel subclass."
|
582
760
|
)
|
583
761
|
|
584
762
|
effective_param_type = (
|
@@ -586,323 +764,154 @@ def processor(
|
|
586
764
|
if is_stream_message and content_type
|
587
765
|
else param_type
|
588
766
|
if not is_stream_message
|
589
|
-
else None
|
767
|
+
else None # If just Message without content type, param is Message itself (not BaseModel)
|
590
768
|
)
|
591
|
-
|
769
|
+
# Check param only if it's not the base Message class
|
770
|
+
if effective_param_type is not message_cls:
|
771
|
+
check_basemodel(effective_param_type, f"Parameter '{param_name}'")
|
592
772
|
check_basemodel(return_type, "Return value")
|
593
773
|
print("[DEBUG Decorator] Type validation complete.")
|
594
774
|
# --- End Type Validation ---
|
595
775
|
|
596
|
-
# ---
|
597
|
-
print(
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
776
|
+
# --- Populate Environment Variables ---
|
777
|
+
print("[DEBUG Decorator] Populating environment variables...")
|
778
|
+
# Keep: FUNCTION_NAME, PARAM_TYPE_STR, RETURN_TYPE_STR, IS_STREAM_MESSAGE, CONTENT_TYPE_NAME, MODULE_NAME
|
779
|
+
# Add: NEBU_ENTRYPOINT_MODULE_PATH
|
780
|
+
# Add: Included object sources (if any)
|
781
|
+
# Add: INIT_FUNC_NAME (if provided)
|
602
782
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
)
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
)
|
612
|
-
|
613
|
-
|
783
|
+
# Basic info needed by consumer to find and run the function
|
784
|
+
all_env.append(V1EnvVar(key="FUNCTION_NAME", value=processor_name))
|
785
|
+
if rel_func_path:
|
786
|
+
# Convert OS-specific path to module path (e.g., subdir/file.py -> subdir.file)
|
787
|
+
module_path = rel_func_path.replace(os.sep, ".")
|
788
|
+
if module_path.endswith(".py"):
|
789
|
+
module_path = module_path[:-3]
|
790
|
+
# Handle __init__.py -> treat as package name
|
791
|
+
if module_path.endswith(".__init__"):
|
792
|
+
module_path = module_path[: -len(".__init__")]
|
793
|
+
elif module_path == "__init__": # Top-level __init__.py
|
794
|
+
module_path = "" # Or handle differently? Let's assume it means import '.'? Maybe error?
|
795
|
+
|
796
|
+
# For now, just pass the relative file path, consumer will handle conversion
|
797
|
+
all_env.append(
|
798
|
+
V1EnvVar(key="NEBU_ENTRYPOINT_MODULE_PATH", value=rel_func_path)
|
614
799
|
)
|
615
|
-
if function_source:
|
616
|
-
print(
|
617
|
-
f"[DEBUG Decorator] Found function '{processor_name}' source in notebook history."
|
618
|
-
)
|
619
|
-
else:
|
620
|
-
print(
|
621
|
-
f"[DEBUG Decorator] Failed to find function '{processor_name}' in notebook history, falling back to dill."
|
622
|
-
)
|
623
|
-
if function_source is None:
|
624
800
|
print(
|
625
|
-
f"[DEBUG Decorator]
|
626
|
-
)
|
627
|
-
try:
|
628
|
-
raw_function_source = dill.source.getsource(func)
|
629
|
-
function_source = textwrap.dedent(raw_function_source)
|
630
|
-
print(
|
631
|
-
f"[DEBUG Decorator] Successfully got source via dill for '{processor_name}'."
|
632
|
-
)
|
633
|
-
except (IOError, TypeError, OSError) as e:
|
634
|
-
print(
|
635
|
-
f"[DEBUG Decorator] Dill fallback failed for '{processor_name}': {e}"
|
636
|
-
)
|
637
|
-
if not (in_jupyter and notebook_code):
|
638
|
-
raise ValueError(
|
639
|
-
f"Could not retrieve source for '{processor_name}' using dill: {e}"
|
640
|
-
) from e
|
641
|
-
|
642
|
-
if function_source is None: # Final check after all attempts
|
643
|
-
raise ValueError(
|
644
|
-
f"Failed to obtain source code for function '{processor_name}' using any method."
|
801
|
+
f"[DEBUG Decorator] Set NEBU_ENTRYPOINT_MODULE_PATH to: {rel_func_path}"
|
645
802
|
)
|
803
|
+
else:
|
804
|
+
# Should have errored earlier if rel_func_path is None
|
805
|
+
raise RuntimeError("Internal error: Relative function path not determined.")
|
646
806
|
|
647
|
-
print(f"[DEBUG Decorator] Final function source obtained for '{processor_name}' (len: {len(function_source)}). Source starts:\n-------\
|
648
|
-
{function_source[:250]}...\n-------")
|
649
|
-
# --- End Function Source ---
|
650
|
-
|
651
|
-
# --- Get Before Function Source (if provided) ---
|
652
|
-
init_func_source = None
|
653
|
-
init_func_name = None
|
654
807
|
if init_func:
|
655
|
-
|
656
|
-
|
657
|
-
# Validate signature (must take no arguments)
|
808
|
+
init_func_name = init_func.__name__ # Get name here
|
809
|
+
# Validate signature (must take no arguments) - moved validation earlier conceptually
|
658
810
|
before_sig = inspect.signature(init_func)
|
659
811
|
if len(before_sig.parameters) != 0:
|
660
812
|
raise TypeError(
|
661
813
|
f"init_func '{init_func_name}' must take zero parameters"
|
662
814
|
)
|
815
|
+
all_env.append(V1EnvVar(key="INIT_FUNC_NAME", value=init_func_name))
|
816
|
+
print(f"[DEBUG Decorator] Set INIT_FUNC_NAME to: {init_func_name}")
|
663
817
|
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
)
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
f"[DEBUG Decorator] Attempting notebook history extraction for init_func '{init_func_name}'..."
|
676
|
-
)
|
677
|
-
init_func_source = extract_definition_source_from_string(
|
678
|
-
notebook_code, init_func_name, ast.FunctionDef
|
679
|
-
)
|
680
|
-
if init_func_source:
|
681
|
-
print(
|
682
|
-
f"[DEBUG Decorator] Found init_func '{init_func_name}' source in notebook history."
|
683
|
-
)
|
684
|
-
else:
|
685
|
-
print(
|
686
|
-
f"[DEBUG Decorator] Failed to find init_func '{init_func_name}' in notebook history, falling back to dill."
|
687
|
-
)
|
688
|
-
|
689
|
-
if init_func_source is None:
|
690
|
-
print(
|
691
|
-
f"[DEBUG Decorator] Using dill fallback for init_func '{init_func_name}'..."
|
692
|
-
)
|
693
|
-
try:
|
694
|
-
raw_init_func_source = dill.source.getsource(init_func)
|
695
|
-
init_func_source = textwrap.dedent(raw_init_func_source)
|
696
|
-
print(
|
697
|
-
f"[DEBUG Decorator] Successfully got source via dill for '{init_func_name}'."
|
698
|
-
)
|
699
|
-
except (IOError, TypeError, OSError) as e:
|
700
|
-
print(
|
701
|
-
f"[DEBUG Decorator] Dill fallback failed for '{init_func_name}': {e}"
|
702
|
-
)
|
703
|
-
# Raise error if we couldn't get the source by any means
|
704
|
-
raise ValueError(
|
705
|
-
f"Could not retrieve source for init_func '{init_func_name}': {e}"
|
706
|
-
) from e
|
707
|
-
|
708
|
-
if init_func_source is None: # Final check
|
709
|
-
raise ValueError(
|
710
|
-
f"Failed to obtain source code for init_func '{init_func_name}' using any method."
|
711
|
-
)
|
712
|
-
print(
|
713
|
-
f"[DEBUG Decorator] Final init_func source obtained for '{init_func_name}'."
|
714
|
-
)
|
715
|
-
else:
|
716
|
-
print("[DEBUG Decorator] No init_func provided.")
|
717
|
-
# --- End Before Function Source ---
|
718
|
-
|
719
|
-
# --- Get Model Sources ---
|
720
|
-
print("[DEBUG Decorator] Getting model sources...")
|
721
|
-
input_model_source = None
|
722
|
-
output_model_source = None
|
723
|
-
content_type_source = None
|
724
|
-
print("[DEBUG Decorator] Getting base Message source...")
|
725
|
-
stream_message_source = get_type_source(Message, notebook_code)
|
726
|
-
|
727
|
-
if is_stream_message:
|
728
|
-
print(
|
729
|
-
f"[DEBUG Decorator] Input is StreamMessage. Content type: {content_type}"
|
730
|
-
)
|
731
|
-
if content_type:
|
732
|
-
print(
|
733
|
-
f"[DEBUG Decorator] Getting source for content_type: {content_type}"
|
818
|
+
# Type info (still useful for deserialization/validation in consumer)
|
819
|
+
all_env.append(V1EnvVar(key="PARAM_TYPE_STR", value=param_type_str_repr))
|
820
|
+
all_env.append(
|
821
|
+
V1EnvVar(key="RETURN_TYPE_STR", value=return_type_str_repr)
|
822
|
+
) # Use repr
|
823
|
+
all_env.append(V1EnvVar(key="IS_STREAM_MESSAGE", value=str(is_stream_message)))
|
824
|
+
if content_type and hasattr(content_type, "__name__"):
|
825
|
+
# Check if content_type is a class before accessing __name__
|
826
|
+
if isinstance(content_type, type):
|
827
|
+
all_env.append(
|
828
|
+
V1EnvVar(key="CONTENT_TYPE_NAME", value=content_type.__name__)
|
734
829
|
)
|
735
|
-
|
736
|
-
|
737
|
-
print(
|
738
|
-
f"Warning: Failed to get source for content_type: {content_type}"
|
739
|
-
)
|
740
|
-
else: # Not a stream message
|
741
|
-
print(
|
742
|
-
f"[DEBUG Decorator] Input is not StreamMessage. Getting source for param_type: {param_type}"
|
743
|
-
)
|
744
|
-
input_model_source = get_type_source(param_type, notebook_code)
|
745
|
-
if input_model_source is None:
|
830
|
+
else:
|
831
|
+
# Handle unresolved types / typevars if needed
|
746
832
|
print(
|
747
|
-
f"Warning:
|
833
|
+
f"Warning: Content type '{content_type}' is not a class, cannot get name."
|
748
834
|
)
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
print(
|
759
|
-
f"[DEBUG Decorator] Source Result - Input Model (non-stream): {'Found' if input_model_source else 'Not Found or N/A'}"
|
760
|
-
)
|
761
|
-
print(
|
762
|
-
f"[DEBUG Decorator] Source Result - Output Model: {'Found' if output_model_source else 'Not Found'}"
|
763
|
-
)
|
764
|
-
print(
|
765
|
-
f"[DEBUG Decorator] Source Result - Base StreamMessage: {'Found' if stream_message_source else 'Not Found'}"
|
835
|
+
# MODULE_NAME might be less reliable now, depends on where func is defined relative to project root
|
836
|
+
all_env.append(
|
837
|
+
V1EnvVar(key="MODULE_NAME", value=func.__module__)
|
838
|
+
) # Keep for potential debugging/info
|
839
|
+
|
840
|
+
# Add PYTHONPATH
|
841
|
+
pythonpath_value = CONTAINER_CODE_DIR
|
842
|
+
existing_pythonpath = next(
|
843
|
+
(var for var in all_env if var.key == "PYTHONPATH"), None
|
766
844
|
)
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
def add_source_to_env(key_base: str, source: Any):
|
775
|
-
print(f"[DEBUG Decorator] add_source_to_env: Processing key '{key_base}'")
|
776
|
-
if not source:
|
777
|
-
print(
|
778
|
-
f"[DEBUG Decorator] add_source_to_env: No source for '{key_base}', skipping."
|
779
|
-
)
|
780
|
-
return
|
781
|
-
|
782
|
-
if isinstance(source, tuple):
|
783
|
-
origin_src, arg_srcs = source
|
784
|
-
print(
|
785
|
-
f"[DEBUG Decorator] add_source_to_env: '{key_base}' is tuple source. Origin found: {bool(origin_src)}, Num args: {len(arg_srcs)}"
|
786
|
-
)
|
787
|
-
if origin_src and isinstance(origin_src, str):
|
788
|
-
all_env.append(V1EnvVar(key=f"{key_base}_SOURCE", value=origin_src))
|
789
|
-
print(f"[DEBUG Decorator] Added env var {key_base}_SOURCE (origin)")
|
790
|
-
for i, arg_src in enumerate(arg_srcs):
|
791
|
-
if isinstance(arg_src, str):
|
792
|
-
all_env.append(
|
793
|
-
V1EnvVar(key=f"{key_base}_ARG_{i}_SOURCE", value=arg_src)
|
794
|
-
)
|
795
|
-
print(
|
796
|
-
f"[DEBUG Decorator] Added env var {key_base}_ARG_{i}_SOURCE"
|
797
|
-
)
|
798
|
-
elif isinstance(arg_src, tuple):
|
799
|
-
arg_origin_src, _ = arg_src
|
800
|
-
if arg_origin_src and isinstance(arg_origin_src, str):
|
801
|
-
all_env.append(
|
802
|
-
V1EnvVar(
|
803
|
-
key=f"{key_base}_ARG_{i}_SOURCE",
|
804
|
-
value=arg_origin_src,
|
805
|
-
)
|
806
|
-
)
|
807
|
-
print(
|
808
|
-
f"[DEBUG Decorator] Added env var {key_base}_ARG_{i}_SOURCE (nested origin)"
|
809
|
-
)
|
810
|
-
else:
|
811
|
-
print(
|
812
|
-
f"[DEBUG Decorator] Skipping complex/non-string nested arg origin for {key_base}_ARG_{i}"
|
813
|
-
)
|
814
|
-
else:
|
815
|
-
print(
|
816
|
-
f"[DEBUG Decorator] Skipping complex/non-string arg source for {key_base}_ARG_{i}"
|
817
|
-
)
|
818
|
-
elif isinstance(source, str):
|
819
|
-
all_env.append(V1EnvVar(key=f"{key_base}_SOURCE", value=source))
|
820
|
-
print(f"[DEBUG Decorator] Added env var {key_base}_SOURCE (string)")
|
845
|
+
if existing_pythonpath:
|
846
|
+
if existing_pythonpath.value:
|
847
|
+
# Prepend our code dir, ensuring no duplicates and handling separators
|
848
|
+
paths = [p for p in existing_pythonpath.value.split(":") if p]
|
849
|
+
if pythonpath_value not in paths:
|
850
|
+
paths.insert(0, pythonpath_value)
|
851
|
+
existing_pythonpath.value = ":".join(paths)
|
821
852
|
else:
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
add_source_to_env("INPUT_MODEL", input_model_source)
|
827
|
-
add_source_to_env("OUTPUT_MODEL", output_model_source)
|
828
|
-
add_source_to_env("CONTENT_TYPE", content_type_source)
|
829
|
-
add_source_to_env("STREAM_MESSAGE", stream_message_source)
|
830
|
-
|
831
|
-
# Add init_func source if available
|
832
|
-
if init_func_source and init_func_name:
|
833
|
-
print(f"[DEBUG Decorator] Adding INIT_FUNC env vars for {init_func_name}")
|
834
|
-
all_env.append(V1EnvVar(key="INIT_FUNC_SOURCE", value=init_func_source))
|
835
|
-
all_env.append(V1EnvVar(key="INIT_FUNC_NAME", value=init_func_name))
|
853
|
+
existing_pythonpath.value = pythonpath_value
|
854
|
+
else:
|
855
|
+
all_env.append(V1EnvVar(key="PYTHONPATH", value=pythonpath_value))
|
856
|
+
print(f"[DEBUG Decorator] Ensured PYTHONPATH includes: {pythonpath_value}")
|
836
857
|
|
837
|
-
print("[DEBUG Decorator] Adding type info env vars...")
|
838
|
-
all_env.append(V1EnvVar(key="PARAM_TYPE_STR", value=param_type_str_repr))
|
839
|
-
all_env.append(V1EnvVar(key="RETURN_TYPE_STR", value=str(return_type)))
|
840
|
-
all_env.append(V1EnvVar(key="IS_STREAM_MESSAGE", value=str(is_stream_message)))
|
841
|
-
if content_type and hasattr(content_type, "__name__"):
|
842
|
-
all_env.append(
|
843
|
-
V1EnvVar(key="CONTENT_TYPE_NAME", value=content_type.__name__)
|
844
|
-
)
|
845
|
-
all_env.append(V1EnvVar(key="MODULE_NAME", value=func.__module__))
|
846
858
|
print("[DEBUG Decorator] Finished populating environment variables.")
|
847
859
|
# --- End Environment Variables ---
|
848
860
|
|
849
|
-
# ---
|
850
|
-
|
851
|
-
func_file_source = None
|
852
|
-
try:
|
853
|
-
func_file_path = inspect.getfile(func)
|
854
|
-
print(f"[DEBUG Decorator] Found file path: {func_file_path}")
|
855
|
-
with open(func_file_path, "r") as f:
|
856
|
-
func_file_source = f.read()
|
861
|
+
# --- Add S3 Sync Volume ---
|
862
|
+
if s3_destination_uri:
|
857
863
|
print(
|
858
|
-
f"[DEBUG Decorator]
|
864
|
+
f"[DEBUG Decorator] Adding volume to sync S3 code from {s3_destination_uri} to {CONTAINER_CODE_DIR}"
|
859
865
|
)
|
860
|
-
|
861
|
-
|
866
|
+
s3_sync_volume = V1VolumePath(
|
867
|
+
source=s3_destination_uri,
|
868
|
+
dest=CONTAINER_CODE_DIR,
|
869
|
+
driver=V1VolumeDriver.RCLONE_SYNC, # Use SYNC for one-way download
|
870
|
+
# Add flags if needed, e.g., --checksum, --fast-list?
|
862
871
|
)
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
872
|
+
# Check if an identical volume already exists
|
873
|
+
if not any(
|
874
|
+
v.source == s3_sync_volume.source and v.dest == s3_sync_volume.dest
|
875
|
+
for v in all_volumes
|
876
|
+
):
|
877
|
+
all_volumes.append(s3_sync_volume)
|
878
|
+
else:
|
879
|
+
print(
|
880
|
+
f"[DEBUG Decorator] Volume for {s3_destination_uri} to {CONTAINER_CODE_DIR} already exists."
|
881
|
+
)
|
882
|
+
else:
|
883
|
+
# Should have errored earlier if S3 upload failed
|
884
|
+
raise RuntimeError(
|
885
|
+
"Internal Error: S3 destination URI not set, cannot add sync volume."
|
873
886
|
)
|
874
|
-
# --- End
|
887
|
+
# --- End S3 Sync Volume ---
|
875
888
|
|
876
889
|
# --- Final Setup ---
|
877
890
|
print("[DEBUG Decorator] Preparing final Processor object...")
|
878
891
|
metadata = V1ResourceMetaRequest(
|
879
892
|
name=processor_name, namespace=namespace, labels=labels
|
880
893
|
)
|
881
|
-
#
|
894
|
+
# Base command now just runs the consumer module, relies on PYTHONPATH finding code
|
882
895
|
consumer_module = "nebu.processors.consumer"
|
883
896
|
if "accelerate launch" in python_cmd:
|
884
|
-
# python_cmd is the launcher prefix (e.g., "accelerate launch")
|
885
|
-
# Append the module flag and the module name.
|
886
|
-
# Remove -u as accelerate likely handles buffering.
|
887
897
|
consumer_execution_command = f"{python_cmd.strip()} -m {consumer_module}"
|
888
898
|
else:
|
889
|
-
#
|
899
|
+
# Standard python execution
|
890
900
|
consumer_execution_command = f"{python_cmd} -u -m {consumer_module}"
|
891
901
|
|
892
|
-
#
|
893
|
-
|
894
|
-
|
895
|
-
|
902
|
+
# Setup commands: Base dependencies needed by consumer.py itself or the framework
|
903
|
+
# Assume nebu package (and thus boto3, requests, redis-py, dill, pydantic)
|
904
|
+
# are installed in the base image or via other means.
|
905
|
+
# User's setup_script is still valuable for *their* specific dependencies.
|
906
|
+
setup_commands_list = []
|
896
907
|
if setup_script:
|
897
|
-
print("[DEBUG Decorator] Adding setup script to setup commands.")
|
898
|
-
# Add setup script as raw commands
|
908
|
+
print("[DEBUG Decorator] Adding user setup script to setup commands.")
|
899
909
|
setup_commands_list.append(setup_script.strip())
|
900
910
|
|
901
911
|
# Combine setup commands and the final execution command
|
902
912
|
all_commands = setup_commands_list + [consumer_execution_command]
|
903
|
-
|
904
|
-
|
905
|
-
) # Use double newline for clarity in logs
|
913
|
+
# Use newline separator for clarity in logs and script execution
|
914
|
+
final_command = "\n".join(all_commands)
|
906
915
|
|
907
916
|
print(
|
908
917
|
f"[DEBUG Decorator] Final container command:\n-------\n{final_command}\n-------"
|
@@ -912,28 +921,39 @@ def processor(
|
|
912
921
|
image=image,
|
913
922
|
command=final_command,
|
914
923
|
env=all_env,
|
915
|
-
volumes=volumes
|
924
|
+
volumes=all_volumes, # Use updated volumes list
|
916
925
|
accelerators=accelerators,
|
917
926
|
resources=resources,
|
918
927
|
meters=meters,
|
919
|
-
restart="Always",
|
928
|
+
restart="Always", # Consider making this configurable? Defaulting to Always
|
920
929
|
authz=authz,
|
921
930
|
platform=platform,
|
922
931
|
metadata=metadata,
|
932
|
+
# Pass through optional parameters from the main decorator function
|
933
|
+
queue=queue,
|
934
|
+
timeout=timeout,
|
935
|
+
ssh_keys=ssh_keys,
|
936
|
+
ports=ports,
|
937
|
+
proxy_port=proxy_port,
|
938
|
+
health_check=health_check,
|
923
939
|
)
|
924
940
|
print("[DEBUG Decorator] Final Container Request Env Vars (Summary):")
|
925
941
|
for env_var in all_env:
|
926
|
-
|
927
|
-
|
942
|
+
# Avoid printing potentially large included source code
|
943
|
+
value_str = env_var.value or ""
|
944
|
+
if "SOURCE" in env_var.key and len(value_str) > 100:
|
945
|
+
print(
|
946
|
+
f"[DEBUG Decorator] {env_var.key}: <source code present, length={len(value_str)}>"
|
947
|
+
)
|
928
948
|
else:
|
929
|
-
print(f"[DEBUG Decorator] {env_var.key}: {
|
949
|
+
print(f"[DEBUG Decorator] {env_var.key}: {value_str}")
|
930
950
|
|
931
951
|
processor_instance = Processor(
|
932
952
|
name=processor_name,
|
933
953
|
namespace=namespace,
|
934
954
|
labels=labels,
|
935
955
|
container=container_request,
|
936
|
-
schema_=None,
|
956
|
+
schema_=None, # Schema info might be derived differently now if needed
|
937
957
|
common_schema=None,
|
938
958
|
min_replicas=min_replicas,
|
939
959
|
max_replicas=max_replicas,
|
@@ -943,7 +963,17 @@ def processor(
|
|
943
963
|
print(
|
944
964
|
f"[DEBUG Decorator] Processor instance '{processor_name}' created successfully."
|
945
965
|
)
|
946
|
-
|
966
|
+
# Store original func for potential local invocation/testing? Keep for now.
|
967
|
+
# TODO: Add original_func to Processor model definition if this is desired
|
968
|
+
# setattr(processor_instance, 'original_func', func) # Use setattr if not in model
|
969
|
+
try:
|
970
|
+
# This will fail if Processor hasn't been updated to include this field
|
971
|
+
processor_instance.original_func = func
|
972
|
+
except AttributeError:
|
973
|
+
print(
|
974
|
+
"Warning: Could not assign original_func to Processor instance. Update Processor model or remove assignment."
|
975
|
+
)
|
976
|
+
|
947
977
|
return processor_instance
|
948
978
|
|
949
979
|
return decorator
|