PyPI - dagster-cloud - Versions diffs - 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl - Mend

dagster-cloud 1.8.2py3-none-any.whl → 1.12.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

dagster_cloud/__init__.py +3 -3
dagster_cloud/agent/__init__.py +4 -4
dagster_cloud/agent/cli/__init__.py +56 -17
dagster_cloud/agent/dagster_cloud_agent.py +360 -172
dagster_cloud/agent/instrumentation/__init__.py +0 -0
dagster_cloud/agent/instrumentation/constants.py +2 -0
dagster_cloud/agent/instrumentation/run_launch.py +23 -0
dagster_cloud/agent/instrumentation/schedule.py +34 -0
dagster_cloud/agent/instrumentation/sensor.py +34 -0
dagster_cloud/anomaly_detection/__init__.py +2 -2
dagster_cloud/anomaly_detection/defs.py +17 -12
dagster_cloud/anomaly_detection/types.py +3 -3
dagster_cloud/api/dagster_cloud_api.py +209 -293
dagster_cloud/auth/constants.py +21 -5
dagster_cloud/batching/__init__.py +1 -0
dagster_cloud/batching/batcher.py +210 -0
dagster_cloud/dagster_insights/__init__.py +12 -6
dagster_cloud/dagster_insights/bigquery/bigquery_utils.py +3 -2
dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +39 -12
dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +8 -6
dagster_cloud/dagster_insights/insights_utils.py +18 -8
dagster_cloud/dagster_insights/metrics_utils.py +12 -12
dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +5 -12
dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +34 -8
dagster_cloud/dagster_insights/snowflake/definitions.py +38 -12
dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +11 -23
dagster_cloud/definitions/__init__.py +0 -0
dagster_cloud/definitions/job_selection.py +36 -0
dagster_cloud/execution/cloud_run_launcher/k8s.py +1 -1
dagster_cloud/execution/cloud_run_launcher/process.py +3 -3
dagster_cloud/execution/monitoring/__init__.py +27 -33
dagster_cloud/execution/utils/process.py +3 -3
dagster_cloud/instance/__init__.py +125 -38
dagster_cloud/instrumentation/__init__.py +32 -0
dagster_cloud/metadata/source_code.py +13 -8
dagster_cloud/metrics/__init__.py +0 -0
dagster_cloud/metrics/tracer.py +59 -0
dagster_cloud/opentelemetry/__init__.py +0 -0
dagster_cloud/opentelemetry/config/__init__.py +73 -0
dagster_cloud/opentelemetry/config/exporter.py +81 -0
dagster_cloud/opentelemetry/config/log_record_processor.py +40 -0
dagster_cloud/opentelemetry/config/logging_handler.py +14 -0
dagster_cloud/opentelemetry/config/meter_provider.py +9 -0
dagster_cloud/opentelemetry/config/metric_reader.py +39 -0
dagster_cloud/opentelemetry/controller.py +319 -0
dagster_cloud/opentelemetry/enum.py +58 -0
dagster_cloud/opentelemetry/factories/__init__.py +1 -0
dagster_cloud/opentelemetry/factories/logs.py +113 -0
dagster_cloud/opentelemetry/factories/metrics.py +121 -0
dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
dagster_cloud/opentelemetry/metrics/meter.py +140 -0
dagster_cloud/opentelemetry/observers/__init__.py +0 -0
dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +40 -0
dagster_cloud/opentelemetry/observers/execution_observer.py +178 -0
dagster_cloud/pex/grpc/__generated__/multi_pex_api_pb2.pyi +175 -0
dagster_cloud/pex/grpc/__init__.py +2 -2
dagster_cloud/pex/grpc/client.py +4 -4
dagster_cloud/pex/grpc/compile.py +2 -2
dagster_cloud/pex/grpc/server/__init__.py +2 -2
dagster_cloud/pex/grpc/server/cli/__init__.py +31 -19
dagster_cloud/pex/grpc/server/manager.py +60 -42
dagster_cloud/pex/grpc/server/registry.py +28 -21
dagster_cloud/pex/grpc/server/server.py +23 -14
dagster_cloud/pex/grpc/types.py +5 -5
dagster_cloud/py.typed +0 -0
dagster_cloud/secrets/__init__.py +1 -1
dagster_cloud/secrets/loader.py +3 -3
dagster_cloud/serverless/__init__.py +1 -1
dagster_cloud/serverless/io_manager.py +36 -53
dagster_cloud/storage/client.py +54 -17
dagster_cloud/storage/compute_logs/__init__.py +3 -1
dagster_cloud/storage/compute_logs/compute_log_manager.py +22 -17
dagster_cloud/storage/defs_state/__init__.py +3 -0
dagster_cloud/storage/defs_state/queries.py +15 -0
dagster_cloud/storage/defs_state/storage.py +113 -0
dagster_cloud/storage/event_logs/__init__.py +3 -1
dagster_cloud/storage/event_logs/queries.py +102 -4
dagster_cloud/storage/event_logs/storage.py +266 -73
dagster_cloud/storage/event_logs/utils.py +88 -7
dagster_cloud/storage/runs/__init__.py +1 -1
dagster_cloud/storage/runs/queries.py +17 -2
dagster_cloud/storage/runs/storage.py +88 -42
dagster_cloud/storage/schedules/__init__.py +1 -1
dagster_cloud/storage/schedules/storage.py +6 -8
dagster_cloud/storage/tags.py +66 -1
dagster_cloud/util/__init__.py +10 -12
dagster_cloud/util/errors.py +49 -64
dagster_cloud/version.py +1 -1
dagster_cloud/workspace/config_schema/__init__.py +55 -13
dagster_cloud/workspace/docker/__init__.py +76 -25
dagster_cloud/workspace/docker/utils.py +1 -1
dagster_cloud/workspace/ecs/__init__.py +1 -1
dagster_cloud/workspace/ecs/client.py +51 -33
dagster_cloud/workspace/ecs/launcher.py +76 -22
dagster_cloud/workspace/ecs/run_launcher.py +3 -3
dagster_cloud/workspace/ecs/utils.py +14 -5
dagster_cloud/workspace/kubernetes/__init__.py +1 -1
dagster_cloud/workspace/kubernetes/launcher.py +61 -29
dagster_cloud/workspace/kubernetes/utils.py +34 -22
dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
dagster_cloud/workspace/user_code_launcher/process.py +16 -14
dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +552 -172
dagster_cloud/workspace/user_code_launcher/utils.py +105 -1
{dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +48 -42
dagster_cloud-1.12.6.dist-info/RECORD +134 -0
{dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
dagster_cloud-1.8.2.dist-info/RECORD +0 -100
{dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0

dagster_cloud/auth/constants.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import uuid
 from typing import Optional
 from dagster._core.errors import DagsterInvariantViolationError
@@ -19,12 +20,27 @@ def get_organization_public_id_from_api_token(api_token: str) -> Optional[str]:
     return split_token[2]
-def get_organization_name_from_agent_token(agent_token: str) -> Optional[str]:
+def decode_region_from_uuid(regional_token: str) -> Optional[str]:
+    try:
+        regional_uuid = uuid.UUID(regional_token)
+    except ValueError:
+        # if it's not an actual uuid, we can't decode region
+        return None
+    # custom uuids contain region subdomains in the first 2 bytes
+    if regional_uuid.version != 8 or regional_uuid.variant != uuid.RFC_4122:
+        return None
+    uuid_bytes = regional_uuid.bytes
+    return uuid_bytes[:2].decode("ascii")
+def decode_agent_token(agent_token: str) -> tuple[Optional[str], Optional[str]]:
     split_token = agent_token.split(":")
     # Legacy agent token format - organization must be specified in dagster.yaml
     if len(split_token) == 1:
-        return None
+        return None, None
     token_type, *token = split_token
@@ -35,6 +51,6 @@ def get_organization_name_from_agent_token(agent_token: str) -> Optional[str]:
             "Generate a new agent token in Dagster Cloud."
         )
-    organization, _identifier = token
-    return organization
+    # token format: agent:<org>:<uuid>
+    organization, uuid_str = token
+    return organization, decode_region_from_uuid(uuid_str)

dagster_cloud/batching/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from dagster_cloud.batching.batcher import Batcher as Batcher

dagster_cloud/batching/batcher.py ADDED Viewed

@@ -0,0 +1,210 @@
+import logging
+import os
+from collections.abc import Generator
+from concurrent.futures import Future, TimeoutError
+from contextlib import contextmanager
+from queue import Empty, Full, Queue
+from threading import Lock
+from typing import Callable, Generic, Optional, TypeVar
+import dagster._check as check
+from dagster_cloud.instrumentation import Instrumentation, NoOpInstrumentation
+logger = logging.getLogger(__name__)
+I = TypeVar("I")  # noqa: E741
+O = TypeVar("O")  # noqa: E741
+QueueItem = tuple[I, Future[O]]
+DEFAULT_MAX_WAIT_MS = 1000
+DEFAULT_MAX_BATCH_SIZE = 100
+DEFAULT_MAX_QUEUE_SIZE = 1000
+def _get_override_for_name(setting: str, name: str) -> Optional[int]:
+    env_name = f"DAGSTER_BATCHING__{name.upper().replace('-', '_')}__{setting.upper()}"
+    value = os.getenv(env_name)
+    if value is None:
+        return None
+    try:
+        value_int = int(value)
+        if value_int <= 0:
+            logger.warning(
+                f"Environment variable misconfiguration for {env_name} (should be positive int, got: '{value}')"
+            )
+            return None
+        return value_int
+    except ValueError:
+        logger.warning(
+            f"Environment variable misconfiguration for {env_name} (should be positive int, got: '{value}')"
+        )
+        return None
+def _get_config(
+    setting: str, name: str, passed_in_default: Optional[int], global_default: int
+) -> int:
+    override = _get_override_for_name(setting, name)
+    if override is not None:
+        return override
+    if passed_in_default is not None:
+        return passed_in_default
+    return global_default
+class Batcher(Generic[I, O]):
+    """the basic algorithm is.
+    1. insert (item, future) into queue
+    2. wait for future to complete, with max timeout
+      2a. if future completes, return result
+      2b. on timeout, acquire lock, then drain the queue until
+          the future completes
+    NOTE: if the queue is full, submit() will raise an exception
+    NOTE: the lock means that only one thread will ever be running the batcher_fn
+          at a time. the algorithm would still be correct without the lock but
+          locking leads to larger batches. HOWEVER without the lock we might try
+          to submit empty batches, which there is currently an invariant to protect
+          against
+    NOTE: the max queue size is meant to cap the number of inflight requests
+          in order to fail faster if the underlying function is taking too long
+          (database issues).
+    Configuration for queue size, max wait, and batch size is specified (by priority order) by:
+    1. an env var override (of the form DAGSTER_BATCHING__TEST__MAX_WAIT_MS -- see _get_override_for_name)
+    2. the passed in value
+    3. the default (specified in this file)
+    """
+    def __init__(
+        self,
+        name: str,
+        batcher_fn: Callable[[list[I]], list[O]],
+        max_queue_size: Optional[int] = None,
+        max_batch_size: Optional[int] = None,
+        max_wait_ms: Optional[int] = None,
+        instrumentation: Optional[Instrumentation] = None,
+    ) -> None:
+        check.invariant(
+            max_wait_ms is None or max_wait_ms > 0,
+            "max wait, if provided, must be set to a positive integer",
+        )
+        check.invariant(
+            max_queue_size is None or max_queue_size > 0,
+            "max queue size, if provided, must be set to a positive integer",
+        )
+        check.invariant(
+            max_batch_size is None or max_batch_size > 0,
+            "max batch size, if provided, must be set to a positive integer",
+        )
+        if max_queue_size and max_batch_size:
+            check.invariant(
+                max_batch_size <= max_queue_size,
+                "if max batch size and max queue size are provided, max batch size must be "
+                "less than or equal to max queue size",
+            )
+        self._name = name
+        self._batcher_fn = batcher_fn
+        self._max_batch_size = _get_config(
+            "max_batch_size", name, max_batch_size, DEFAULT_MAX_BATCH_SIZE
+        )
+        self._max_wait_ms: float = _get_config(
+            "max_wait_ms", name, max_wait_ms, DEFAULT_MAX_WAIT_MS
+        )
+        config_max_queue_size = _get_config(
+            "max_queue_size", name, max_queue_size, DEFAULT_MAX_QUEUE_SIZE
+        )
+        self._queue: Queue[QueueItem] = Queue(maxsize=config_max_queue_size)
+        self._drain_lock = Lock()
+        self._instrumentation = (instrumentation or NoOpInstrumentation()).tags([f"batcher:{name}"])
+    def _submit_batch(self, batch: list[QueueItem]) -> None:
+        check.invariant(len(batch) > 0, "should never submit an empty batch")
+        self._instrument_batch_size(len(batch))
+        try:
+            with self._time("batcher_fn"):
+                results = self._batcher_fn([i for i, _ in batch])
+        except Exception as e:
+            for _, fut in batch:
+                fut.set_exception(e)
+        else:
+            check.invariant(
+                len(results) == len(batch), "batcher returned fewer results than expected"
+            )
+            for (_, fut), result in zip(batch, results):
+                fut.set_result(result)
+    def _build_batch(self) -> list[QueueItem]:
+        batch = []
+        for _ in range(self._max_batch_size):
+            try:
+                batch.append(self._queue.get(block=False))
+            except Empty:
+                break
+        return batch
+    @contextmanager
+    def _lock(self) -> Generator[None, None, None]:
+        with self._time("lock_acquisition"):
+            self._drain_lock.acquire()
+        try:
+            yield
+        finally:
+            self._drain_lock.release()
+    def _drain_batch(self, fut: Future[O]) -> O:
+        with self._lock(), self._time("drain_batch"):
+            while not fut.done():
+                self._submit_batch(self._build_batch())
+            return fut.result()
+    def submit(self, i: I) -> O:
+        with self._time("submit"):
+            fut: Future[O] = Future()
+            try:
+                self._queue.put((i, fut), block=False)
+            except Full:
+                self._instrumentation.increment("dagster.batching.full")
+                logger.exception(f"Batching queue for batcher {self._name} is full!")
+                raise
+            else:
+                try:
+                    queue_size = self._queue.qsize()
+                    self._instrument_queue_size(queue_size)
+                    timeout = 0 if queue_size >= self._max_batch_size else self._max_wait_ms / 1000
+                    return fut.result(timeout=timeout)
+                except TimeoutError:
+                    self._instrumentation.increment("dagster.batching.timeout")
+                    self._drain_batch(fut)
+                    return fut.result()
+    def _instrument_queue_size(self, queue_size: int) -> None:
+        self._instrumentation.histogram("dagster.batching.queue_size", queue_size)
+        for bucket in [5, 10, 100]:
+            if queue_size >= bucket:
+                self._instrumentation.increment(f"dagster.batching.queue_size.ge_{bucket}")
+            else:
+                break
+    def _instrument_batch_size(self, batch_size: int) -> None:
+        self._instrumentation.histogram("dagster.batching.batch_size", batch_size)
+        for bucket in [5, 10, 100]:
+            if batch_size >= bucket:
+                self._instrumentation.increment(f"dagster.batching.batch_size.ge_{bucket}")
+            else:
+                break
+    @contextmanager
+    def _time(self, metric_name: str) -> Generator[None, None, None]:
+        with self._instrumentation.instrument_context(
+            f"dagster.batching.{metric_name}",
+            buckets_ms=[10, 100, 500, 1000],
+        ):
+            yield

dagster_cloud/dagster_insights/__init__.py CHANGED Viewed

@@ -1,15 +1,19 @@
 import sys
 from typing import Any
-from .snowflake.dbt_wrapper import dbt_with_snowflake_insights as dbt_with_snowflake_insights
-from .snowflake.definitions import (
+from dagster_cloud.dagster_insights.snowflake.dbt_wrapper import (
+    dbt_with_snowflake_insights as dbt_with_snowflake_insights,
+)
+from dagster_cloud.dagster_insights.snowflake.definitions import (
     create_snowflake_insights_asset_and_schedule as create_snowflake_insights_asset_and_schedule,
 )
-from .snowflake.snowflake_utils import meter_snowflake_query as meter_snowflake_query
+from dagster_cloud.dagster_insights.snowflake.snowflake_utils import (
+    meter_snowflake_query as meter_snowflake_query,
+)
 dagster_snowflake_req_imports = {"InsightsSnowflakeResource"}
 try:
-    from .snowflake.insights_snowflake_resource import (
+    from dagster_cloud.dagster_insights.snowflake.insights_snowflake_resource import (
         InsightsSnowflakeResource as InsightsSnowflakeResource,
     )
 except ImportError:
@@ -17,8 +21,10 @@ except ImportError:
 dagster_bigquery_req_imports = {"InsightsBigQueryResource", "dbt_with_bigquery_insights"}
 try:
-    from .bigquery.dbt_wrapper import dbt_with_bigquery_insights as dbt_with_bigquery_insights
-    from .bigquery.insights_bigquery_resource import (
+    from dagster_cloud.dagster_insights.bigquery.dbt_wrapper import (
+        dbt_with_bigquery_insights as dbt_with_bigquery_insights,
+    )
+    from dagster_cloud.dagster_insights.bigquery.insights_bigquery_resource import (
         InsightsBigQueryResource as InsightsBigQueryResource,
     )
 except ImportError:

dagster_cloud/dagster_insights/bigquery/bigquery_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, List, Mapping, Optional
+from collections.abc import Mapping
+from typing import Any, Optional
 from dagster import AssetKey, JobDefinition
@@ -15,7 +16,7 @@ def marker_asset_key_for_job(
 def build_bigquery_cost_metadata(
-    job_ids: Optional[List[str]], bytes_billed: int, slots_ms: int
+    job_ids: Optional[list[str]], bytes_billed: int, slots_ms: int
 ) -> Mapping[str, Any]:
     metadata: Mapping[str, Any] = {
         BIGQUERY_METADATA_BYTES_BILLED: bytes_billed,

dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from collections import defaultdict
+from collections.abc import Iterable, Iterator
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Iterable, Iterator, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 import yaml
 from dagster import (
+    AssetCheckEvaluation,
     AssetCheckResult,
     AssetExecutionContext,
     AssetKey,
@@ -16,8 +18,14 @@ from dagster_dbt import DbtCliInvocation
 from dagster_dbt.version import __version__ as dagster_dbt_version
 from packaging import version
-from ..insights_utils import extract_asset_info_from_event, handle_raise_on_error
-from .bigquery_utils import build_bigquery_cost_metadata, marker_asset_key_for_job
+from dagster_cloud.dagster_insights.bigquery.bigquery_utils import (
+    build_bigquery_cost_metadata,
+    marker_asset_key_for_job,
+)
+from dagster_cloud.dagster_insights.insights_utils import (
+    extract_asset_info_from_event,
+    handle_raise_on_error,
+)
 if TYPE_CHECKING:
     from dbt.adapters.base.impl import BaseAdapter
@@ -52,11 +60,21 @@ def dbt_with_bigquery_insights(
     context: Union[OpExecutionContext, AssetExecutionContext],
     dbt_cli_invocation: DbtCliInvocation,
     dagster_events: Optional[
-        Iterable[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult]]
+        Iterable[
+            Union[
+                Output,
+                AssetMaterialization,
+                AssetObservation,
+                AssetCheckResult,
+                AssetCheckEvaluation,
+            ]
+        ]
     ] = None,
     skip_config_check=False,
     record_observation_usage: bool = True,
-) -> Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult]]:
+) -> Iterator[
+    Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]
+]:
     """Wraps a dagster-dbt invocation to associate each BigQuery query with the produced
     asset materializations. This allows the cost of each query to be associated with the asset
     materialization that it produced.
@@ -67,7 +85,7 @@ def dbt_with_bigquery_insights(
     Args:
         context (AssetExecutionContext): The context of the asset that is being materialized.
         dbt_cli_invocation (DbtCliInvocation): The invocation of the dbt CLI to wrap.
-        dagster_events (Optional[Iterable[Union[Output, AssetObservation, AssetCheckResult]]]):
+        dagster_events (Optional[Iterable[Union[Output, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]]):
             The events that were produced by the dbt CLI invocation. If not provided, it is assumed
             that the dbt CLI invocation has not yet been run, and it will be run and the events
             will be streamed.
@@ -116,7 +134,14 @@ def dbt_with_bigquery_insights(
     asset_info_by_unique_id = {}
     for dagster_event in dagster_events:
         if isinstance(
-            dagster_event, (AssetMaterialization, AssetObservation, Output, AssetCheckResult)
+            dagster_event,
+            (
+                AssetMaterialization,
+                AssetObservation,
+                Output,
+                AssetCheckResult,
+                AssetCheckEvaluation,
+            ),
         ):
             unique_id = dagster_event.metadata["unique_id"].value
             asset_key, partition = extract_asset_info_from_event(
@@ -133,7 +158,7 @@ def dbt_with_bigquery_insights(
     invocation_id = run_results_json["metadata"]["invocation_id"]
     # backcompat-proof in case the invocation does not have an instantiated adapter on it
-    adapter: Optional["BaseAdapter"] = getattr(dbt_cli_invocation, "adapter", None)
+    adapter: Optional[BaseAdapter] = getattr(dbt_cli_invocation, "adapter", None)
     if not adapter:
         if version.parse(dagster_dbt_version) < version.parse(MIN_DAGSTER_DBT_VERSION):
             upgrade_message = f" Extracting cost information requires dagster_dbt>={MIN_DAGSTER_DBT_VERSION} (found {dagster_dbt_version}). "
@@ -149,14 +174,16 @@ def dbt_with_bigquery_insights(
     cost_by_asset = defaultdict(list)
     try:
         with adapter.connection_named("dagster_insights:bigquery_cost"):
-            client: "bigquery.Client" = adapter.connections.get_thread_connection().handle
-            if client.location and client.project:
+            client: bigquery.Client = adapter.connections.get_thread_connection().handle  # pyright: ignore[reportAssignmentType]
+            if (client.location or adapter.config.credentials.location) and client.project:
                 # we should populate the location/project from the client, and use that to determine
                 # the correct INFORMATION_SCHEMA.JOBS table to query for cost information
-                location = client.location
+                # If the client doesn't have a location, fall back to the location provided
+                # in the dbt profile config
+                location = client.location or adapter.config.credentials.location
                 project = client.project
             else:
-                # try fetching the default dataset from the schema, if it exists
                 dataset = client.get_dataset(adapter.config.credentials.schema)
                 location = dataset.location if dataset else None
                 project = client.project or dataset.project

dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py CHANGED Viewed

@@ -1,16 +1,18 @@
+from collections.abc import Iterator
 from contextlib import contextmanager, nullcontext
-from typing import Iterator, List
 from dagster import AssetObservation
-from dagster._annotations import experimental
+from dagster._annotations import beta
 from dagster_gcp import BigQueryResource
 from dagster_gcp.bigquery.utils import setup_gcp_creds
 from google.cloud import bigquery
+from dagster_cloud.dagster_insights.bigquery.bigquery_utils import (
+    build_bigquery_cost_metadata,
+    marker_asset_key_for_job,
+)
 from dagster_cloud.dagster_insights.insights_utils import get_current_context_and_asset_key
-from .bigquery_utils import build_bigquery_cost_metadata, marker_asset_key_for_job
 OUTPUT_NON_ASSET_SIGIL = "__bigquery_query_metadata_"
@@ -29,7 +31,7 @@ class WrappedBigQueryClient(bigquery.Client):
         return bq_job
     @property
-    def job_ids(self) -> List[str]:
+    def job_ids(self) -> list[str]:
         return self._job_ids
     @property
@@ -41,7 +43,7 @@ class WrappedBigQueryClient(bigquery.Client):
         return sum([x for x in self._query_slots_ms])
-@experimental
+@beta
 class InsightsBigQueryResource(BigQueryResource):
     """A wrapper around :py:class:`BigQueryResource` which automatically collects metadata about
     BigQuery costs which can be attributed to Dagster jobs and assets.

dagster_cloud/dagster_insights/insights_utils.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from dataclasses import replace
-from typing import Optional, Tuple, Union
+from typing import Optional, Union
 import dagster._check as check
 from dagster import (
+    AssetCheckEvaluation,
     AssetCheckResult,
     AssetExecutionContext,
     AssetKey,
@@ -15,9 +16,9 @@ from dagster import (
 from dagster._core.errors import DagsterInvalidPropertyError
-def get_current_context_and_asset_key() -> (
-    Tuple[Union[OpExecutionContext, AssetExecutionContext], Optional[AssetKey]]
-):
+def get_current_context_and_asset_key() -> tuple[
+    Union[OpExecutionContext, AssetExecutionContext], Optional[AssetKey]
+]:
     asset_key = None
     try:
         context = AssetExecutionContext.get()
@@ -32,7 +33,7 @@ def get_current_context_and_asset_key() -> (
 def get_asset_key_for_output(
     context: Union[OpExecutionContext, AssetExecutionContext], output_name: str
 ) -> Optional[AssetKey]:
-    asset_key = context.job_def.asset_layer.asset_key_for_output(
+    asset_key = context.job_def.asset_layer.get_asset_key_for_node_output(
         node_handle=context.op_handle, output_name=output_name
     )
     if asset_key is None:
@@ -40,15 +41,24 @@ def get_asset_key_for_output(
     return asset_key
-def extract_asset_info_from_event(context, dagster_event, record_observation_usage):
+def extract_asset_info_from_event(
+    context,
+    dagster_event: Union[
+        Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation
+    ],
+    record_observation_usage,
+):
     if isinstance(dagster_event, AssetMaterialization):
         return dagster_event.asset_key, dagster_event.partition
-    if isinstance(dagster_event, (AssetCheckResult, AssetObservation)) and record_observation_usage:
+    if (
+        isinstance(dagster_event, (AssetCheckResult, AssetObservation, AssetCheckEvaluation))
+        and record_observation_usage
+    ):
         partition = dagster_event.partition if isinstance(dagster_event, AssetObservation) else None
         return dagster_event.asset_key, partition
-    if isinstance(dagster_event, (AssetCheckResult, AssetObservation)):
+    if isinstance(dagster_event, (AssetCheckResult, AssetObservation, AssetCheckEvaluation)):
         return None, None
     if isinstance(dagster_event, Output):

dagster_cloud/dagster_insights/metrics_utils.py CHANGED Viewed

@@ -1,19 +1,19 @@
 import os
 import tempfile
-from typing import Dict, List, NamedTuple, Optional, Tuple, Union
+from typing import NamedTuple, Optional, Union
 import requests
 from dagster import AssetExecutionContext, DagsterInstance, OpExecutionContext
-from dagster._annotations import experimental
+from dagster._annotations import beta
 from dagster_cloud_cli.core.errors import raise_http_error
 from dagster_cloud_cli.core.headers.auth import DagsterCloudInstanceScope
 from dagster_cloud.instance import DagsterCloudAgentInstance
-@experimental
+@beta
 class DagsterMetric(NamedTuple):
-    """Experimental: This class gives information about a Metric.
+    """Beta: This class gives information about a Metric.
     Args:
         metric_name (str): name of the metric
@@ -24,16 +24,16 @@ class DagsterMetric(NamedTuple):
     metric_value: float
-def get_url_and_token_from_instance(instance: DagsterInstance) -> Tuple[str, str]:
+def get_url_and_token_from_instance(instance: DagsterInstance) -> tuple[str, str]:
     if not isinstance(instance, DagsterCloudAgentInstance):
         raise RuntimeError("This asset only functions in a running Dagster Cloud instance")
     return f"{instance.dagit_url}graphql", instance.dagster_cloud_agent_token
-def get_post_request_params(
+def get_insights_upload_request_params(
     instance: DagsterInstance,
-) -> Tuple[requests.Session, str, Dict[str, str], int, Optional[Dict[str, str]]]:
+) -> tuple[requests.Session, str, dict[str, str], int, Optional[dict[str, str]]]:
     if not isinstance(instance, DagsterCloudAgentInstance):
         raise RuntimeError("This asset only functions in a running Dagster Cloud instance")
@@ -49,7 +49,7 @@ def get_post_request_params(
 def upload_cost_information(
     context: Union[OpExecutionContext, AssetExecutionContext],
     metric_name: str,
-    cost_information: List[Tuple[str, float, str]],
+    cost_information: list[tuple[str, float, str]],
 ):
     import pyarrow as pa
     import pyarrow.parquet as pq
@@ -70,9 +70,9 @@ def upload_cost_information(
         )
         instance = context.instance
-        session, url, headers, timeout, proxies = get_post_request_params(instance)
+        session, url, headers, timeout, proxies = get_insights_upload_request_params(instance)
-        resp = session.post(url, headers=headers, timeout=timeout, proxies=proxies)
+        resp = session.get(url, headers=headers, timeout=timeout, proxies=proxies)
         raise_http_error(resp)
         resp_data = resp.json()
@@ -86,11 +86,11 @@ def upload_cost_information(
             )
-@experimental
+@beta
 def put_cost_information(
     context: Union[OpExecutionContext, AssetExecutionContext],
     metric_name: str,
-    cost_information: List[Tuple[str, float, str]],
+    cost_information: list[tuple[str, float, str]],
     start: float,
     end: float,
 ) -> None:

dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import json
+from collections.abc import Sequence
 from dataclasses import dataclass
 from datetime import datetime, timedelta
-from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple
+from typing import TYPE_CHECKING, Optional
 from dagster import AssetKey, AssetsDefinition, ScheduleDefinition
-from .snowflake_utils import OPAQUE_ID_SQL_SIGIL
+from dagster_cloud.dagster_insights.snowflake.snowflake_utils import OPAQUE_ID_SQL_SIGIL
 if TYPE_CHECKING:
     from dagster_snowflake import SnowflakeConnection
@@ -34,7 +35,7 @@ def get_cost_data_for_hour(
     snowflake: "SnowflakeConnection",
     start_hour: datetime,
     end_hour: datetime,
-) -> List[Tuple[str, float, str]]:
+) -> list[tuple[str, float, str]]:
     """Given a date range, queries the Snowflake query_history table for all queries that were run
     during that time period and returns a mapping from AssetMaterializationId to the cost of the
     query that produced it, as estimated by Snowflake. The cost is in Snowflake credits.
@@ -75,11 +76,7 @@ HAVING ARRAY_SIZE(opaque_ids) > 0
             assert result
             results = result.fetchall()
-    costs: List[Tuple[str, float, str]] = []
-    print(
-        f"{len(results) if results else 0} annotated queries returned from snowflake query_history"
-    )
+    costs: list[tuple[str, float, str]] = []
     if not results:
         return []
@@ -93,8 +90,4 @@ HAVING ARRAY_SIZE(opaque_ids) > 0
         for opaque_id in opaque_ids:
             costs.append((opaque_id, float(cost), query_id))
-    print(
-        f"Reported costs for {len(costs)} of {total} asset materializations found in the"
-        " query_history."
-    )
     return costs

dagster-cloud 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl

dagster-cloud 1.8.2py3-none-any.whl → 1.12.6py3-none-any.whl