PyPI - haystack-ml-stack - Versions diffs - 0.4.4__tar.gz → 0.4.5__tar.gz - Mend

haystack-ml-stack 0.4.4tar.gz → 0.4.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haystack-ml-stack
-Version: 0.4.4
+Version: 0.4.5
 Summary: Functions related to Haystack ML
 Author-email: Oscar Vega <oscar@haystack.tv>
 License: MIT

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "haystack-ml-stack"
-version = "0.4.4"
+version = "0.4.5"
 description = "Functions related to Haystack ML"
 readme = "README.md"
 authors = [{ name = "Oscar Vega", email = "oscar@haystack.tv" }]

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/__init__.py RENAMED Viewed

@@ -1,9 +1,9 @@
 __all__ = []
 try:
-    from .app import create_app
+    from .app import create_app, create_stream_app, create_channel_app
-    __all__ = ["create_app"]
+    __all__ = ["create_app", "create_stream_app", "create_channel_app"]
 except ImportError as e:
     pass

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/_kafka.py RENAMED Viewed

@@ -23,6 +23,7 @@ async def send_to_kafka(
     state: dict,
     model_output: dict,
     monitoring_meta: dict,
+    query_params: dict,
     processed_at: datetime.datetime,
 ) -> None:
     if topic is None or producer is None:
@@ -43,6 +44,45 @@ async def send_to_kafka(
             "playlist_category": playlist.get("category"),
             "user_features": state.get("user_features", []),
             "stream_features": state.get("stream_features", []),
+            "query_params": query_params,
+        },
+        "processed_at": processed_at.isoformat(),
+    }
+    delivery_future = await producer.produce(
+        topic, orjson.dumps(message, default=default_serialization)
+    )
+    await delivery_future
+    return
+async def send_channels_to_kafka(
+    producer: AIOProducer,
+    topic: str,
+    user: dict,
+    channels: list[dict],
+    state: dict,
+    model_output: dict,
+    monitoring_meta: dict,
+    query_params: dict,
+    processed_at: datetime.datetime,
+) -> None:
+    if topic is None or producer is None:
+        return
+    message = {
+        "userid": user.get("userid"),
+        "client_os": user.get("clientOs"),
+        "model_input": {"user": user, "channels": channels},
+        "model_output": model_output,
+        "model_name": state["model_name"].replace(".pkl", "")
+        if state["model_name"]
+        else None,
+        "model_type": "channels",
+        "meta": {
+            "monitoring": monitoring_meta,
+            "haystack_ml_stack_version": __version__,
+            "user_features": state.get("user_features", []),
+            "global_features": state.get("global_features", []),
+            "query_params": query_params,
         },
         "processed_at": processed_at.isoformat(),
     }

haystack_ml_stack-0.4.5/src/haystack_ml_stack/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.4.5"

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/app.py RENAMED Viewed

@@ -18,12 +18,12 @@ from fastapi.encoders import jsonable_encoder
 import newrelic.agent
 from .cache import make_features_cache
-from .dynamo import set_all_features, FeatureRetrievalMeta
+from .dynamo import set_all_features, create_channel_candidates, FeatureRetrievalMeta
 from .model_store import download_and_load_model
 from .settings import Settings
 from . import exceptions
 from ._serializers import SerializerRegistry
-from ._kafka import send_to_kafka, initialize_kafka_producer, should_log_user
+from ._kafka import send_to_kafka, send_channels_to_kafka, initialize_kafka_producer, should_log_user
 from google.protobuf import text_format
 logging.basicConfig(
@@ -65,6 +65,7 @@ async def load_model(state, cfg: Settings) -> None:
             )
             state["stream_features"] = state["model"].get("stream_features", [])
             state["user_features"] = state["model"].get("user_features", [])
+            state["global_features"] = state["model"].get("global_features", [])
             valid_features = set(
                 (entity_type, feature_id)
                 for entity_type, feature_id, _ in SerializerRegistry.keys()
@@ -332,6 +333,7 @@ def create_app(
                     state=state,
                     model_output=model_output,
                     monitoring_meta=monitoring_meta,
+                    query_params=query_params,
                     processed_at=processed_at,
                 )
             return jsonable_encoder(model_output)
@@ -348,3 +350,241 @@ def create_app(
         }
     return app
+create_stream_app = create_app
+def create_channel_app(
+    settings: Optional[Settings] = None,
+    *,
+    preloaded_model: Optional[Dict[str, Any]] = None,
+) -> FastAPI:
+    cfg = settings or Settings()
+    global_features_cache = make_features_cache(cfg.global_cache_maxsize)
+    user_features_cache = make_features_cache(cfg.user_cache_maxsize)
+    aws_session = aiobotocore.session.get_session()
+    state: Dict[str, Any] = {
+        "model": preloaded_model,
+        "session": aws_session,
+        "model_name": (
+            os.path.basename(cfg.s3_model_path) if cfg.s3_model_path else None
+        ),
+    }
+    @asynccontextmanager
+    async def lifespan(app_server: FastAPI):
+        if state["model"] is None:
+            await load_model(state, cfg)
+        kafka_producer = None
+        if cfg.kafka_bootstrap_servers is not None:
+            kafka_producer = initialize_kafka_producer(app_config=cfg)
+        state["kafka_producer"] = kafka_producer
+        async with AsyncExitStack() as stack:
+            session = state["session"]
+            state["dynamo_client"] = await stack.enter_async_context(
+                session.create_client(
+                    "dynamodb",
+                    config=AioConfig(max_pool_connections=MAX_POOL_CONNECTIONS),
+                )
+            )
+            logger.info("DynamoDB persistent client initialized.")
+            yield
+            logger.info("Shutting down: Connection pools closed.")
+        logger.info("Shutting down: Flushing Kafka queue.")
+        if kafka_producer is not None:
+            try:
+                await kafka_producer.flush()
+            except Exception:
+                logger.error(
+                    "Unknown exception while flushing kafka queue, shutting down producer.\n%s",
+                    traceback.format_exc(),
+                )
+            finally:
+                await kafka_producer.close()
+    app = FastAPI(
+        title="ML Channel Scorer",
+        description="Scores channels using a pre-trained ML model and DynamoDB features.",
+        version="1.0.0",
+        lifespan=lifespan,
+    )
+    @app.get("/health", status_code=HTTPStatus.OK)
+    async def health():
+        model_ok = state["model"] is not None
+        if not model_ok:
+            raise HTTPException(
+                status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+                detail="ML Model not loaded",
+            )
+        return {
+            "status": "ok",
+            "model_loaded": True,
+            "global_cache_size": len(global_features_cache),
+            "user_cache_size": len(user_features_cache),
+            "global_features": state.get("global_features", []),
+            "user_features": state.get("user_features", []),
+            "model_name": state.get("model_name"),
+        }
+    @app.post("/score", status_code=HTTPStatus.OK)
+    async def score_channels(
+        request: Request, response: Response, background_tasks: BackgroundTasks
+    ):
+        if state["model"] is None:
+            raise HTTPException(
+                status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+                detail="ML Model not loaded",
+            )
+        try:
+            data = await request.json()
+        except json.JSONDecodeError as e:
+            body = await request.body()
+            logger.error(
+                "Received malformed json. Raw body: %s\n%s",
+                body.decode(errors="replace"),
+                traceback.format_exc(),
+            )
+            raise HTTPException(
+                status_code=HTTPStatus.BAD_REQUEST, detail="Invalid JSON payload"
+            ) from e
+        except Exception as e:
+            logger.error(
+                "Unexpected exception when parsing request.\n %s",
+                traceback.format_exc(),
+            )
+            raise HTTPException(
+                status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail="Unknown exception"
+            ) from e
+        query_params = {}
+        for k in request.query_params.keys():
+            values = request.query_params.getlist(k)
+            query_params[k] = values[0] if len(values) == 1 else values
+        user = data
+        channels: List[Dict[str, Any]] = []
+        retrieval_meta = FeatureRetrievalMeta(
+            cache_misses=0,
+            stream_cache_misses=0,
+            user_cache_misses=0,
+            retrieval_ms=0,
+            success=True,
+            cache_delay_minutes=0,
+            dynamo_ms=0,
+            parsing_ms=0,
+        )
+        try:
+            retrieval_meta = await create_channel_candidates(
+                dynamo_client=state["dynamo_client"],
+                user=user,
+                channels=channels,
+                global_features=state.get("global_features", []),
+                user_features=state.get("user_features", []),
+                global_features_cache=global_features_cache,
+                user_features_cache=user_features_cache,
+                features_table=cfg.features_table,
+                cache_sep=cfg.cache_separator,
+            )
+        except exceptions.InvalidFeaturesException as e:
+            logger.error(
+                "The following features are not present in the SerializerRegistry %s",
+                e,
+            )
+            raise HTTPException(
+                status_code=HTTPStatus.SERVICE_UNAVAILABLE,
+                detail=f"Received invalid features from feature store: {e}",
+            ) from e
+        random_number = random.random()
+        userid = user.get("userid", "")
+        if random_number < cfg.logs_fraction:
+            logger.info(
+                "User %s data: %s",
+                userid,
+                user,
+            )
+            logger.info(
+                "User %s channels: %s",
+                userid,
+                channels,
+            )
+        model = state["model"]
+        try:
+            preprocess_start = time.perf_counter_ns()
+            model["params"]["query_params"] = query_params
+            model_input = model["preprocess"](
+                user,
+                channels,
+                model["params"],
+            )
+            predict_start = time.perf_counter_ns()
+            model_output = model["predict"](model_input, model["params"])
+            predict_end = time.perf_counter_ns()
+        except Exception as e:
+            logger.error("Model prediction failed: \n%s", traceback.format_exc())
+            raise HTTPException(
+                status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+                detail="Model prediction failed",
+            ) from e
+        monitoring_meta = {
+            "cache_misses": retrieval_meta.cache_misses,
+            "user_cache_misses": retrieval_meta.user_cache_misses,
+            "global_cache_misses": retrieval_meta.stream_cache_misses,
+            "user_cache_size": len(user_features_cache),
+            "global_cache_size": len(global_features_cache),
+            "retrieval_success": int(retrieval_meta.success),
+            "cache_delay_minutes": retrieval_meta.cache_delay_minutes,
+            "dynamo_ms": retrieval_meta.dynamo_ms,
+            "dynamo_parse_ms": retrieval_meta.parsing_ms,
+            "retrieval_ms": retrieval_meta.retrieval_ms,
+            "preprocess_ms": (predict_start - preprocess_start) * 1e-6,
+            "predict_ms": (predict_end - predict_start) * 1e-6,
+            "total_channels": len(model_output),
+        }
+        newrelic.agent.record_custom_event(
+            "ChannelInference",
+            monitoring_meta,
+        )
+        if model_output:
+            if random_number < cfg.logs_fraction:
+                logger.info(
+                    "User %s - model output %s",
+                    userid,
+                    model_output,
+                )
+            if should_log_user(userid=userid, kafka_fraction=cfg.kafka_fraction):
+                processed_at = datetime.datetime.now(tz=datetime.UTC)
+                background_tasks.add_task(
+                    send_channels_to_kafka,
+                    producer=state["kafka_producer"],
+                    topic=cfg.kafka_topic,
+                    user=user,
+                    channels=channels,
+                    state=state,
+                    model_output=model_output,
+                    monitoring_meta=monitoring_meta,
+                    query_params=query_params,
+                    processed_at=processed_at,
+                )
+            return jsonable_encoder(model_output)
+        raise HTTPException(
+            status_code=HTTPStatus.NOT_FOUND, detail="No model output generated"
+        )
+    @app.get("/", status_code=HTTPStatus.OK)
+    async def root():
+        return {
+            "message": "ML Channel Scoring Service is running.",
+            "model_name": state.get("model_name"),
+        }
+    return app

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/dynamo.py RENAMED Viewed

@@ -1,13 +1,15 @@
-from typing import Any, Dict, List, NamedTuple, Literal
+import asyncio
+import datetime
 import logging
 import time
-import datetime
-from boto3.dynamodb.types import TypeDeserializer
+from typing import Any, Dict, List, Literal, NamedTuple
 import newrelic.agent
-import asyncio
-from ._serializers import SerializerRegistry, FeatureRegistryId
-from . import exceptions
+from boto3.dynamodb.types import TypeDeserializer
+from . import exceptions
+from ._serializers import FeatureRegistryId, SerializerRegistry
+from .utils import _complete_features_for_channels
 logger = logging.getLogger(__name__)
@@ -21,7 +23,7 @@ class FloatDeserializer(TypeDeserializer):
 _deser = FloatDeserializer()
-IdType = Literal["STREAM", "USER"]
+IdType = Literal["STREAM", "USER", "GLOBAL"]
 class FeatureRetrievalMeta(NamedTuple):
@@ -268,9 +270,295 @@ async def set_all_features(
     )
+_MOBILE_OS = {"ios", "android", "iphone", "galaxy"}
+def _get_os_cat(client_os: str) -> str:
+    normalized = client_os.lower().replace("debug","") if client_os else ""
+    return "MOBILE" if normalized in _MOBILE_OS else "TV"
+@newrelic.agent.function_trace()
+async def create_channel_candidates(
+    *,
+    user: Dict[str, Any],
+    channels: List[Dict[str, Any]],
+    global_features,
+    user_features,
+    global_features_cache,
+    user_features_cache,
+    features_table: str,
+    cache_sep: str,
+    dynamo_client,
+) -> FeatureRetrievalMeta:
+    time_start = time.perf_counter_ns()
+    os_cat = _get_os_cat(user.get("clientOs", ""))
+    global_holder: Dict[str, Any] = {}
+    cache_miss: Dict[str, Dict[str, Any]] = {}
+    all_feature_keys = [*global_features, *user_features]
+    cache_delay_obj: dict[str, float] = {f: 0 for f in all_feature_keys}
+    now = datetime.datetime.utcnow()
+    for f in global_features:
+        cache_miss, cache_delay_obj = _check_cache(
+            obj=global_holder,
+            id_type="GLOBAL",
+            id_key="GLOBAL",
+            feature_key=f,
+            cache_sep=cache_sep,
+            features_cache=global_features_cache,
+            cache_miss=cache_miss,
+            cache_delay=cache_delay_obj,
+            now=now,
+        )
+    global_cache_misses = len(cache_miss)
+    for f in user_features:
+        cache_miss, cache_delay_obj = _check_cache(
+            obj=user,
+            id_type="USER",
+            id_key=user["userid"],
+            feature_key=f,
+            cache_sep=cache_sep,
+            features_cache=user_features_cache,
+            cache_miss=cache_miss,
+            cache_delay=cache_delay_obj,
+            now=now,
+        )
+    user_cache_misses = len(cache_miss) - global_cache_misses
+    valid_cache_delays = list(v for v in cache_delay_obj.values() if v > 0)
+    cache_delay = min(valid_cache_delays) if valid_cache_delays else 0
+    if not cache_miss:
+        _process_channels(
+            channels, global_features, user_features, global_holder, user, os_cat
+        )
+        return FeatureRetrievalMeta(
+            user_cache_misses=0,
+            stream_cache_misses=0,
+            cache_misses=0,
+            retrieval_ms=(time.perf_counter_ns() - time_start) * 1e-6,
+            success=True,
+            cache_delay_minutes=cache_delay / 60,
+            dynamo_ms=0,
+            parsing_ms=0,
+        )
+    cache_misses = len(cache_miss)
+    logger.info(
+        "Channel candidates cache miss for %d items (%d global, %d user)",
+        cache_misses,
+        global_cache_misses,
+        user_cache_misses,
+    )
+    keys = []
+    for k in cache_miss.keys():
+        id_type, id_key, sk = k.split(cache_sep, 2)
+        if id_type == "GLOBAL":
+            pk = "GLOBAL"
+        else:
+            pk = f"{id_type}#{id_key}"
+        keys.append({"pk": {"S": pk}, "sk": {"S": sk}})
+    dynamo_start = time.perf_counter_ns()
+    try:
+        items = await async_batch_get(dynamo_client, features_table, keys)
+    except Exception as e:
+        logger.error("DynamoDB batch_get failed for channel candidates: %s", e)
+        end_time = time.perf_counter_ns()
+        return FeatureRetrievalMeta(
+            user_cache_misses=user_cache_misses,
+            stream_cache_misses=global_cache_misses,
+            cache_misses=0,
+            retrieval_ms=(end_time - time_start) * 1e-6,
+            success=False,
+            cache_delay_minutes=cache_delay / 60,
+            dynamo_ms=(end_time - dynamo_start) * 1e-6,
+            parsing_ms=0,
+        )
+    dynamo_end = time.perf_counter_ns()
+    updated_keys = set()
+    for item in items:
+        full_id = item["pk"]["S"]
+        if "#" in full_id:
+            id_type, id_key = full_id.split("#", 1)
+        else:
+            id_type = full_id
+            id_key = full_id
+        feature_name = item["sk"]["S"]
+        if id_type == "GLOBAL":
+            cache_to_use = global_features_cache
+        elif id_type == "USER":
+            cache_to_use = user_features_cache
+        else:
+            raise ValueError(
+                f"Unexpected id type in channel candidates. "
+                f"Expected 'GLOBAL' or 'USER', received {id_type}"
+            )
+        cache_key = _build_cache_key(
+            id_type=id_type,
+            id_key=id_key,
+            feature_key=feature_name,
+            cache_sep=cache_sep,
+        )
+        parsed = parse_dynamo_item(item)
+        feature_version = parsed.get("version", "v0")
+        feature_id = FeatureRegistryId(
+            entity_type=id_type, feature_id=feature_name, version=feature_version
+        )
+        try:
+            serializer = SerializerRegistry[feature_id]
+        except KeyError as e:
+            raise exceptions.InvalidFeaturesException(
+                f"Could not find '{feature_id}' in serializer registry"
+            ) from e
+        try:
+            value = (
+                serializer.deserialize(parsed.get("value"))
+                if parsed.get("value")
+                else None
+            )
+        except TypeError as e:
+            raise exceptions.DeserializationException(
+                f"Ran into an error while deserializing {feature_id}. Error: {e}"
+            ) from e
+        cache_to_use[cache_key] = {
+            "value": value,
+            "cache_ttl_in_seconds": int(parsed.get("cache_ttl_in_seconds", -1)),
+            "inserted_at": datetime.datetime.utcnow(),
+        }
+        if cache_key in cache_miss:
+            cache_miss[cache_key][feature_name] = value
+            updated_keys.add(cache_key)
+    parsing_end = time.perf_counter_ns()
+    if len(updated_keys) < len(cache_miss):
+        missing_keys = set(cache_miss.keys()) - updated_keys
+        for k in missing_keys:
+            id_type = _get_id_type_from_partition_key(k, sep=cache_sep)
+            if id_type == "GLOBAL":
+                global_features_cache[k] = {
+                    "value": None,
+                    "cache_ttl_in_seconds": 300,
+                }
+            elif id_type == "USER":
+                user_features_cache[k] = {
+                    "value": None,
+                    "cache_ttl_in_seconds": 6 * 3600,
+                }
+    _process_channels(
+        channels, global_features, user_features, global_holder, user, os_cat
+    )
+    end_time = time.perf_counter_ns()
+    return FeatureRetrievalMeta(
+        cache_misses=global_cache_misses + user_cache_misses,
+        user_cache_misses=user_cache_misses,
+        stream_cache_misses=global_cache_misses,
+        retrieval_ms=_perf_counter_ns_delta_in_ms(time_start, end_time),
+        success=True,
+        cache_delay_minutes=cache_delay / 60,
+        dynamo_ms=_perf_counter_ns_delta_in_ms(dynamo_start, dynamo_end),
+        parsing_ms=_perf_counter_ns_delta_in_ms(dynamo_end, parsing_end),
+    )
+def _process_channels(
+    channels: List[Dict[str, Any]],
+    global_features: List[str],
+    user_features: List[str],
+    global_holder: Dict[str, Any],
+    user: Dict[str, Any],
+    os_cat: str,
+) -> None:
+    channel_candidates = global_holder.get("CHANNEL_CANDIDATES")
+    global_features_os_cat = [f for f in global_features if f.endswith(f"#{os_cat}")]
+    playlist_stats_global = {
+        "OS_CAT_" + k.replace(f"#{os_cat}", "").replace("#", "_"): v
+        for k, v in global_holder.items()
+        if k in global_features_os_cat
+    }
+    playlist_stats_user = {
+        "USER_" + k.replace("#", "_"): v for k, v in user.items() if k in user_features
+    }
+    all_channels = set()
+    if playlist_stats_global:
+        for stat in playlist_stats_global.values():
+            all_channels.update(stat.data.keys())
+    if playlist_stats_user:
+        for stat in playlist_stats_user.values():
+            all_channels.update(stat.data.keys())
+    # Get not preferred channels to be ignored
+    # The group labels are also ignored, not real channels for UI
+    ignore_channels = set(["national_favorite", "local_favorite"])
+    inserted_channel_names = set()
+    if channel_candidates is not None:
+        preferred = set(user.get("preferredChannels", []) or [])
+        for ch in channel_candidates.data:
+            if ch.category_group in ("national_favorite", "local_favorite"):
+                if ch.name not in preferred:
+                    ignore_channels.add(ch.name)
+        for ch in channel_candidates.data:
+            if ch.name not in ignore_channels:
+                channels.append(
+                    {
+                        "name": ch.name,
+                        "category_group": ch.category_group,
+                        "start_date": ch.start_date,
+                    }
+                )
+                inserted_channel_names.add(ch.name)
+    DEFAULT_CHANNELS = [
+        "local news",
+        "science & technology",
+        "business & finance",
+        "entertainment news",
+        "live",
+        "live_es",
+        "weather",
+        "politics",
+        "international",
+        "top videos",
+        "editor picks",
+    ]
+    for name in all_channels | set(DEFAULT_CHANNELS):
+        if name not in ignore_channels and name not in inserted_channel_names:
+            channels.append(
+                {
+                    "name": name,
+                    "category_group": "",
+                    "start_date": int(datetime.datetime.now().timestamp()),
+                }
+            )
+    _complete_features_for_channels(
+        channels=channels,
+        user_features=playlist_stats_user,
+        global_features=playlist_stats_global,
+    )
 def _check_cache(
     obj: dict,
-    id_type: Literal["STREAM", "USER"],
+    id_type: IdType,
     id_key: str,
     feature_key: str,
     cache_sep: str,

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/settings.py RENAMED Viewed

@@ -9,7 +9,7 @@ class Settings(BaseSettings):
         default=None, alias="KAFKA_BOOTSTRAP_SERVERS"
     )
     kafka_fraction: float = Field(0.01, alias="KAFKA_FRACTION")
-    kafka_topic: str = Field(default=None, alias="KAFKA_TOPIC")
+    kafka_topic: str | None = Field(default=None, alias="KAFKA_TOPIC")
     # Model (S3)
     s3_model_path: str | None = Field(default=None, alias="S3_MODEL_PATH")
@@ -21,6 +21,7 @@ class Settings(BaseSettings):
     # Cache
     stream_cache_maxsize: int = 50_000
     user_cache_maxsize: int = 80_000
+    global_cache_maxsize: int = 1_000
     cache_separator: str = "--"
     class Config:

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack/utils.py RENAMED Viewed

@@ -13,6 +13,10 @@ from .generated.v1.features_pb2 import (
     UserPersonalizingPSelect,
     UserPSelect,
     EntryContextPWatched,
+    PlaylistStatsForGlobal,
+    PlaylistStatsForUser,
+    UserPlaylistStats,
+    GlobalPlaylistStats,
 )
 from ._serializers import SerializerRegistry
 from . import exceptions
@@ -673,3 +677,54 @@ def _validate_pwatched_entry_context(entry_contexts: list[str]):
     invalid_contexts = [c for c in entry_contexts if c not in valid_contexts]
     if invalid_contexts:
         raise ValueError(f"Invalid entry contexts found: {invalid_contexts}")
+def _complete_features_for_channels(
+    channels: list[dict],
+    user_features: dict[str, UserPlaylistStats],
+    global_features: dict[str, GlobalPlaylistStats],
+) -> None:
+    GLOBAL_FEATURES = [
+        "watched_count",
+        "not_watched_count",
+        "capped_watched_secs",
+        "capped_not_watched_secs",
+        "watched_secs",
+        "not_watched_secs",
+    ]
+    USER_FEATURES = [
+        "total_days",
+        "start_days",
+        "active_days",
+        "total_watched",
+        "capped_total_watched",
+    ]
+    for ch in channels:
+        name = ch.get("name", "")
+        category_group = ch.get("category_group", "")
+        for prefix, global_feature in global_features.items():
+            global_feature = global_feature or GlobalPlaylistStats()
+            ch[prefix] = {}
+            if name in global_feature.data:
+                features = global_feature.data.get(name, PlaylistStatsForGlobal())
+                for feature in GLOBAL_FEATURES:
+                    ch[prefix][feature] = getattr(features, feature, 0)
+            else:
+                # Some global features are at the category group level instead of the channel level
+                features = global_feature.data.get(
+                    category_group, PlaylistStatsForGlobal()
+                )
+                for feature in GLOBAL_FEATURES:
+                    ch[prefix][feature] = getattr(features, feature, 0)
+        for prefix, user_feature in user_features.items():
+            user_feature = user_feature or UserPlaylistStats()
+            ch[prefix] = {}
+            for feature in USER_FEATURES:
+                ch[prefix][feature] = getattr(
+                    user_feature.data.get(name, PlaylistStatsForUser()), feature, 0
+                )

{haystack_ml_stack-0.4.4 → haystack_ml_stack-0.4.5}/src/haystack_ml_stack.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haystack-ml-stack
-Version: 0.4.4
+Version: 0.4.5
 Summary: Functions related to Haystack ML
 Author-email: Oscar Vega <oscar@haystack.tv>
 License: MIT