dagstermill 0.16.0__tar.gz → 0.27.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dagstermill-0.16.0 → dagstermill-0.27.9}/LICENSE +1 -1
- dagstermill-0.27.9/PKG-INFO +35 -0
- dagstermill-0.27.9/README.md +4 -0
- dagstermill-0.27.9/dagstermill/__init__.py +27 -0
- dagstermill-0.27.9/dagstermill/__main__.py +3 -0
- dagstermill-0.27.9/dagstermill/asset_factory.py +227 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/cli.py +15 -19
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/compat.py +4 -4
- dagstermill-0.27.9/dagstermill/context.py +191 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/engine.py +13 -18
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/errors.py +1 -1
- dagstermill-0.27.9/dagstermill/examples/__init__.py +1 -0
- dagstermill-0.27.9/dagstermill/examples/repository.py +654 -0
- dagstermill-0.27.9/dagstermill/factory.py +458 -0
- dagstermill-0.27.9/dagstermill/io_managers.py +122 -0
- dagstermill-0.27.9/dagstermill/manager.py +398 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/serialize.py +2 -2
- dagstermill-0.27.9/dagstermill/test_utils.py +45 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/translator.py +16 -15
- dagstermill-0.27.9/dagstermill/version.py +1 -0
- dagstermill-0.27.9/dagstermill.egg-info/PKG-INFO +35 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill.egg-info/SOURCES.txt +2 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill.egg-info/entry_points.txt +0 -1
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill.egg-info/requires.txt +4 -3
- dagstermill-0.27.9/setup.py +60 -0
- dagstermill-0.16.0/PKG-INFO +0 -20
- dagstermill-0.16.0/README.md +0 -4
- dagstermill-0.16.0/dagstermill/__init__.py +0 -22
- dagstermill-0.16.0/dagstermill/__main__.py +0 -3
- dagstermill-0.16.0/dagstermill/context.py +0 -163
- dagstermill-0.16.0/dagstermill/examples/__init__.py +0 -1
- dagstermill-0.16.0/dagstermill/examples/repository.py +0 -599
- dagstermill-0.16.0/dagstermill/factory.py +0 -507
- dagstermill-0.16.0/dagstermill/io_managers.py +0 -74
- dagstermill-0.16.0/dagstermill/manager.py +0 -361
- dagstermill-0.16.0/dagstermill/version.py +0 -1
- dagstermill-0.16.0/dagstermill.egg-info/PKG-INFO +0 -20
- dagstermill-0.16.0/setup.py +0 -60
- {dagstermill-0.16.0 → dagstermill-0.27.9}/MANIFEST.in +0 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill/py.typed +0 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill.egg-info/dependency_links.txt +0 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/dagstermill.egg-info/top_level.txt +0 -0
- {dagstermill-0.16.0 → dagstermill-0.27.9}/setup.cfg +0 -0
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagstermill
|
|
3
|
+
Version: 0.27.9
|
|
4
|
+
Summary: run notebooks using the Dagster tools
|
|
5
|
+
Author: Dagster Labs
|
|
6
|
+
Author-email: hello@dagsterlabs.com
|
|
7
|
+
License: Apache-2.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.9,<3.14
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: dagster==1.11.9
|
|
16
|
+
Requires-Dist: ipykernel!=5.4.0,!=5.4.1,>=4.9.0
|
|
17
|
+
Requires-Dist: ipython_genutils>=0.2.0
|
|
18
|
+
Requires-Dist: packaging>=20.9
|
|
19
|
+
Requires-Dist: papermill>=1.0.0
|
|
20
|
+
Requires-Dist: scrapbook>=0.5.0
|
|
21
|
+
Requires-Dist: nbconvert
|
|
22
|
+
Requires-Dist: jupyter-client<8
|
|
23
|
+
Provides-Extra: test
|
|
24
|
+
Requires-Dist: matplotlib; extra == "test"
|
|
25
|
+
Requires-Dist: scikit-learn>=0.19.0; extra == "test"
|
|
26
|
+
Requires-Dist: tqdm<=4.48; extra == "test"
|
|
27
|
+
Dynamic: author
|
|
28
|
+
Dynamic: author-email
|
|
29
|
+
Dynamic: classifier
|
|
30
|
+
Dynamic: license
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
Dynamic: provides-extra
|
|
33
|
+
Dynamic: requires-dist
|
|
34
|
+
Dynamic: requires-python
|
|
35
|
+
Dynamic: summary
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
|
+
|
|
3
|
+
import dagstermill.factory as factory # noqa: F401
|
|
4
|
+
from dagstermill.asset_factory import define_dagstermill_asset as define_dagstermill_asset
|
|
5
|
+
from dagstermill.context import DagstermillExecutionContext as DagstermillExecutionContext
|
|
6
|
+
from dagstermill.errors import DagstermillError as DagstermillError
|
|
7
|
+
from dagstermill.factory import define_dagstermill_op as define_dagstermill_op
|
|
8
|
+
from dagstermill.io_managers import (
|
|
9
|
+
ConfigurableLocalOutputNotebookIOManager as ConfigurableLocalOutputNotebookIOManager,
|
|
10
|
+
local_output_notebook_io_manager as local_output_notebook_io_manager,
|
|
11
|
+
)
|
|
12
|
+
from dagstermill.manager import MANAGER_FOR_NOTEBOOK_INSTANCE as _MANAGER_FOR_NOTEBOOK_INSTANCE
|
|
13
|
+
from dagstermill.version import __version__ as __version__
|
|
14
|
+
|
|
15
|
+
DagsterLibraryRegistry.register("dagstermill", __version__)
|
|
16
|
+
|
|
17
|
+
get_context = _MANAGER_FOR_NOTEBOOK_INSTANCE.get_context
|
|
18
|
+
|
|
19
|
+
yield_result = _MANAGER_FOR_NOTEBOOK_INSTANCE.yield_result
|
|
20
|
+
|
|
21
|
+
yield_event = _MANAGER_FOR_NOTEBOOK_INSTANCE.yield_event
|
|
22
|
+
|
|
23
|
+
_reconstitute_job_context = _MANAGER_FOR_NOTEBOOK_INSTANCE.reconstitute_job_context
|
|
24
|
+
|
|
25
|
+
_teardown = _MANAGER_FOR_NOTEBOOK_INSTANCE.teardown_resources
|
|
26
|
+
|
|
27
|
+
_load_input_parameter = _MANAGER_FOR_NOTEBOOK_INSTANCE.load_input_parameter
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import tempfile
|
|
3
|
+
from collections.abc import Iterable, Mapping
|
|
4
|
+
from typing import Any, Callable, Optional, Union, cast
|
|
5
|
+
|
|
6
|
+
import dagster._check as check
|
|
7
|
+
from dagster import (
|
|
8
|
+
AssetIn,
|
|
9
|
+
AssetKey,
|
|
10
|
+
AssetsDefinition,
|
|
11
|
+
Failure,
|
|
12
|
+
Output,
|
|
13
|
+
PartitionsDefinition,
|
|
14
|
+
ResourceDefinition,
|
|
15
|
+
RetryPolicy,
|
|
16
|
+
RetryRequested,
|
|
17
|
+
SourceAsset,
|
|
18
|
+
asset,
|
|
19
|
+
)
|
|
20
|
+
from dagster._annotations import beta, beta_param
|
|
21
|
+
from dagster._config.pythonic_config import Config, infer_schema_from_config_class
|
|
22
|
+
from dagster._config.pythonic_config.type_check_utils import safe_is_subclass
|
|
23
|
+
from dagster._core.definitions.events import CoercibleToAssetKey, CoercibleToAssetKeyPrefix
|
|
24
|
+
from dagster._core.execution.context.compute import OpExecutionContext
|
|
25
|
+
from dagster._core.storage.tags import COMPUTE_KIND_TAG
|
|
26
|
+
from dagster._utils.tags import normalize_tags
|
|
27
|
+
|
|
28
|
+
from dagstermill.factory import _clean_path_for_windows, execute_notebook
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _make_dagstermill_asset_compute_fn(
|
|
32
|
+
name: str,
|
|
33
|
+
notebook_path: str,
|
|
34
|
+
save_notebook_on_failure: bool,
|
|
35
|
+
) -> Callable:
|
|
36
|
+
def _t_fn(context: OpExecutionContext, **inputs) -> Iterable:
|
|
37
|
+
check.param_invariant(
|
|
38
|
+
isinstance(context.run_config, dict),
|
|
39
|
+
"context",
|
|
40
|
+
"StepExecutionContext must have valid run_config",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
with tempfile.TemporaryDirectory() as output_notebook_dir:
|
|
44
|
+
executed_notebook_path = execute_notebook(
|
|
45
|
+
context.get_step_execution_context(),
|
|
46
|
+
name=name,
|
|
47
|
+
inputs=inputs,
|
|
48
|
+
save_notebook_on_failure=save_notebook_on_failure,
|
|
49
|
+
notebook_path=notebook_path,
|
|
50
|
+
output_notebook_dir=output_notebook_dir,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
with open(executed_notebook_path, "rb") as fd:
|
|
54
|
+
yield Output(fd.read())
|
|
55
|
+
|
|
56
|
+
# deferred import for perf
|
|
57
|
+
import scrapbook
|
|
58
|
+
|
|
59
|
+
output_nb = scrapbook.read_notebook(executed_notebook_path)
|
|
60
|
+
|
|
61
|
+
for key, value in output_nb.scraps.items():
|
|
62
|
+
if key.startswith("event-"):
|
|
63
|
+
with open(value.data, "rb") as fd:
|
|
64
|
+
event = pickle.loads(fd.read())
|
|
65
|
+
if isinstance(event, (Failure, RetryRequested)):
|
|
66
|
+
raise event
|
|
67
|
+
else:
|
|
68
|
+
yield event
|
|
69
|
+
|
|
70
|
+
return _t_fn
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@beta
|
|
74
|
+
@beta_param(param="resource_defs")
|
|
75
|
+
def define_dagstermill_asset(
|
|
76
|
+
name: str,
|
|
77
|
+
notebook_path: str,
|
|
78
|
+
key_prefix: Optional[CoercibleToAssetKeyPrefix] = None,
|
|
79
|
+
ins: Optional[Mapping[str, AssetIn]] = None,
|
|
80
|
+
deps: Optional[Iterable[Union[CoercibleToAssetKey, AssetsDefinition, SourceAsset]]] = None,
|
|
81
|
+
metadata: Optional[Mapping[str, Any]] = None,
|
|
82
|
+
config_schema: Optional[Union[Any, Mapping[str, Any]]] = None,
|
|
83
|
+
required_resource_keys: Optional[set[str]] = None,
|
|
84
|
+
resource_defs: Optional[Mapping[str, ResourceDefinition]] = None,
|
|
85
|
+
description: Optional[str] = None,
|
|
86
|
+
partitions_def: Optional[PartitionsDefinition] = None,
|
|
87
|
+
op_tags: Optional[Mapping[str, Any]] = None,
|
|
88
|
+
group_name: Optional[str] = None,
|
|
89
|
+
io_manager_key: Optional[str] = None,
|
|
90
|
+
retry_policy: Optional[RetryPolicy] = None,
|
|
91
|
+
save_notebook_on_failure: bool = False,
|
|
92
|
+
non_argument_deps: Optional[Union[set[AssetKey], set[str]]] = None,
|
|
93
|
+
asset_tags: Optional[Mapping[str, Any]] = None,
|
|
94
|
+
) -> AssetsDefinition:
|
|
95
|
+
"""Creates a Dagster asset for a Jupyter notebook.
|
|
96
|
+
|
|
97
|
+
Arguments:
|
|
98
|
+
name (str): The name for the asset
|
|
99
|
+
notebook_path (str): Path to the backing notebook
|
|
100
|
+
key_prefix (Optional[Union[str, Sequence[str]]]): If provided, the asset's key is the
|
|
101
|
+
concatenation of the key_prefix and the asset's name, which defaults to the name of
|
|
102
|
+
the decorated function. Each item in key_prefix must be a valid name in dagster (ie only
|
|
103
|
+
contains letters, numbers, and _) and may not contain python reserved keywords.
|
|
104
|
+
ins (Optional[Mapping[str, AssetIn]]): A dictionary that maps input names to information
|
|
105
|
+
about the input.
|
|
106
|
+
deps (Optional[Sequence[Union[AssetsDefinition, SourceAsset, AssetKey, str]]]): The assets
|
|
107
|
+
that are upstream dependencies, but do not pass an input value to the notebook.
|
|
108
|
+
config_schema (Optional[ConfigSchema): The configuration schema for the asset's underlying
|
|
109
|
+
op. If set, Dagster will check that config provided for the op matches this schema and fail
|
|
110
|
+
if it does not. If not set, Dagster will accept any config provided for the op.
|
|
111
|
+
metadata (Optional[Dict[str, Any]]): A dict of metadata entries for the asset.
|
|
112
|
+
required_resource_keys (Optional[Set[str]]): Set of resource handles required by the notebook.
|
|
113
|
+
description (Optional[str]): Description of the asset to display in the Dagster UI.
|
|
114
|
+
partitions_def (Optional[PartitionsDefinition]): Defines the set of partition keys that
|
|
115
|
+
compose the asset.
|
|
116
|
+
op_tags (Optional[Dict[str, Any]]): A dictionary of tags for the op that computes the asset.
|
|
117
|
+
Frameworks may expect and require certain metadata to be attached to a op. Values that
|
|
118
|
+
are not strings will be json encoded and must meet the criteria that
|
|
119
|
+
`json.loads(json.dumps(value)) == value`.
|
|
120
|
+
group_name (Optional[str]): A string name used to organize multiple assets into groups. If not provided,
|
|
121
|
+
the name "default" is used.
|
|
122
|
+
resource_defs (Optional[Mapping[str, ResourceDefinition]]):
|
|
123
|
+
(Beta) A mapping of resource keys to resource definitions. These resources
|
|
124
|
+
will be initialized during execution, and can be accessed from the
|
|
125
|
+
context within the notebook.
|
|
126
|
+
io_manager_key (Optional[str]): A string key for the IO manager used to store the output notebook.
|
|
127
|
+
If not provided, the default key output_notebook_io_manager will be used.
|
|
128
|
+
retry_policy (Optional[RetryPolicy]): The retry policy for the op that computes the asset.
|
|
129
|
+
save_notebook_on_failure (bool): If True and the notebook fails during execution, the failed notebook will be
|
|
130
|
+
written to the Dagster storage directory. The location of the file will be printed in the Dagster logs.
|
|
131
|
+
Defaults to False.
|
|
132
|
+
asset_tags (Optional[Dict[str, Any]]): A dictionary of tags to apply to the asset.
|
|
133
|
+
non_argument_deps (Optional[Union[Set[AssetKey], Set[str]]]): Deprecated, use deps instead. Set of asset keys that are
|
|
134
|
+
upstream dependencies, but do not pass an input to the asset.
|
|
135
|
+
|
|
136
|
+
Examples:
|
|
137
|
+
.. code-block:: python
|
|
138
|
+
|
|
139
|
+
from dagstermill import define_dagstermill_asset
|
|
140
|
+
from dagster import asset, AssetIn, AssetKey
|
|
141
|
+
from sklearn import datasets
|
|
142
|
+
import pandas as pd
|
|
143
|
+
import numpy as np
|
|
144
|
+
|
|
145
|
+
@asset
|
|
146
|
+
def iris_dataset():
|
|
147
|
+
sk_iris = datasets.load_iris()
|
|
148
|
+
return pd.DataFrame(
|
|
149
|
+
data=np.c_[sk_iris["data"], sk_iris["target"]],
|
|
150
|
+
columns=sk_iris["feature_names"] + ["target"],
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
iris_kmeans_notebook = define_dagstermill_asset(
|
|
154
|
+
name="iris_kmeans_notebook",
|
|
155
|
+
notebook_path="/path/to/iris_kmeans.ipynb",
|
|
156
|
+
ins={
|
|
157
|
+
"iris": AssetIn(key=AssetKey("iris_dataset"))
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
"""
|
|
161
|
+
check.str_param(name, "name")
|
|
162
|
+
check.str_param(notebook_path, "notebook_path")
|
|
163
|
+
check.bool_param(save_notebook_on_failure, "save_notebook_on_failure")
|
|
164
|
+
|
|
165
|
+
required_resource_keys = set(
|
|
166
|
+
check.opt_set_param(required_resource_keys, "required_resource_keys", of_type=str)
|
|
167
|
+
)
|
|
168
|
+
ins = check.opt_mapping_param(ins, "ins", key_type=str, value_type=AssetIn)
|
|
169
|
+
|
|
170
|
+
if isinstance(key_prefix, str):
|
|
171
|
+
key_prefix = [key_prefix]
|
|
172
|
+
|
|
173
|
+
key_prefix = check.opt_list_param(key_prefix, "key_prefix", of_type=str)
|
|
174
|
+
|
|
175
|
+
default_description = f"This asset is backed by the notebook at {notebook_path}"
|
|
176
|
+
description = check.opt_str_param(description, "description", default=default_description)
|
|
177
|
+
|
|
178
|
+
io_mgr_key = check.opt_str_param(
|
|
179
|
+
io_manager_key, "io_manager_key", default="output_notebook_io_manager"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
user_tags = normalize_tags(op_tags)
|
|
183
|
+
if op_tags is not None:
|
|
184
|
+
check.invariant(
|
|
185
|
+
"notebook_path" not in op_tags,
|
|
186
|
+
"user-defined op tags contains the `notebook_path` key, but the `notebook_path` key"
|
|
187
|
+
" is reserved for use by Dagster",
|
|
188
|
+
)
|
|
189
|
+
check.invariant(
|
|
190
|
+
COMPUTE_KIND_TAG not in op_tags,
|
|
191
|
+
f"user-defined op tags contains the `{COMPUTE_KIND_TAG}` key, but the `{COMPUTE_KIND_TAG}` key is reserved for"
|
|
192
|
+
" use by Dagster",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
default_tags = {
|
|
196
|
+
"notebook_path": _clean_path_for_windows(notebook_path),
|
|
197
|
+
COMPUTE_KIND_TAG: "ipynb",
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if safe_is_subclass(config_schema, Config):
|
|
201
|
+
config_schema = infer_schema_from_config_class(cast("type[Config]", config_schema))
|
|
202
|
+
|
|
203
|
+
return asset(
|
|
204
|
+
name=name,
|
|
205
|
+
key_prefix=key_prefix,
|
|
206
|
+
ins=ins,
|
|
207
|
+
deps=deps,
|
|
208
|
+
metadata=metadata,
|
|
209
|
+
description=description,
|
|
210
|
+
config_schema=config_schema,
|
|
211
|
+
required_resource_keys=required_resource_keys,
|
|
212
|
+
resource_defs=resource_defs,
|
|
213
|
+
partitions_def=partitions_def,
|
|
214
|
+
op_tags={**user_tags, **default_tags},
|
|
215
|
+
group_name=group_name,
|
|
216
|
+
output_required=False,
|
|
217
|
+
io_manager_key=io_mgr_key,
|
|
218
|
+
retry_policy=retry_policy,
|
|
219
|
+
non_argument_deps=non_argument_deps,
|
|
220
|
+
tags=asset_tags,
|
|
221
|
+
)(
|
|
222
|
+
_make_dagstermill_asset_compute_fn(
|
|
223
|
+
name=name,
|
|
224
|
+
notebook_path=notebook_path,
|
|
225
|
+
save_notebook_on_failure=save_notebook_on_failure,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import os
|
|
3
3
|
import subprocess
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from typing import Optional
|
|
5
6
|
|
|
6
7
|
import click
|
|
7
|
-
import nbformat
|
|
8
|
-
from papermill.iorw import load_notebook_node, write_ipynb
|
|
9
|
-
|
|
10
8
|
import dagster._check as check
|
|
11
|
-
|
|
9
|
+
import nbformat
|
|
12
10
|
from dagster._utils import mkdir_p, safe_isfile
|
|
11
|
+
from dagster_shared.seven.json import loads
|
|
12
|
+
from papermill.iorw import load_notebook_node, write_ipynb
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def get_import_cell():
|
|
@@ -35,15 +35,13 @@ def get_kernelspec(kernel: Optional[str] = None):
|
|
|
35
35
|
)
|
|
36
36
|
) + list(kernelspecs["kernelspecs"].keys())
|
|
37
37
|
kernel = preferred_kernels[0]
|
|
38
|
-
print( #
|
|
39
|
-
"No kernel specified, defaulting to '{kernel}'".format(kernel=kernel)
|
|
40
|
-
)
|
|
38
|
+
print(f"No kernel specified, defaulting to '{kernel}'") # noqa: T201
|
|
41
39
|
|
|
42
40
|
check.invariant(
|
|
43
41
|
kernel in kernelspecs["kernelspecs"],
|
|
44
42
|
"Could not find kernel '{kernel}': available kernels are [{kernels}]".format(
|
|
45
43
|
kernel=kernel,
|
|
46
|
-
kernels=", ".join(["'{k}'"
|
|
44
|
+
kernels=", ".join([f"'{k}'" for k in kernelspecs["kernelspecs"]]),
|
|
47
45
|
),
|
|
48
46
|
)
|
|
49
47
|
|
|
@@ -54,8 +52,8 @@ def get_kernelspec(kernel: Optional[str] = None):
|
|
|
54
52
|
}
|
|
55
53
|
|
|
56
54
|
|
|
57
|
-
def get_notebook_scaffolding(kernelspec:
|
|
58
|
-
check.
|
|
55
|
+
def get_notebook_scaffolding(kernelspec: Mapping[str, str]):
|
|
56
|
+
check.mapping_param(kernelspec, "kernelspec", key_type=str, value_type=str)
|
|
59
57
|
|
|
60
58
|
notebook = nbformat.v4.new_notebook()
|
|
61
59
|
|
|
@@ -69,7 +67,7 @@ def get_notebook_scaffolding(kernelspec: Dict[str, str]):
|
|
|
69
67
|
|
|
70
68
|
|
|
71
69
|
@click.command(
|
|
72
|
-
name="register-notebook", help=
|
|
70
|
+
name="register-notebook", help="Scaffolds existing notebook for dagstermill compatibility"
|
|
73
71
|
)
|
|
74
72
|
@click.option("--notebook", "-note", type=click.Path(exists=True), help="Path to existing notebook")
|
|
75
73
|
def retroactively_scaffold_notebook(notebook: str):
|
|
@@ -83,7 +81,7 @@ def execute_retroactive_scaffold(notebook_path: str):
|
|
|
83
81
|
write_ipynb(new_nb, notebook_path)
|
|
84
82
|
|
|
85
83
|
|
|
86
|
-
@click.command(name="create-notebook", help=
|
|
84
|
+
@click.command(name="create-notebook", help="Creates new dagstermill notebook.")
|
|
87
85
|
@click.option("--notebook", "-note", type=click.Path(), help="Name of notebook")
|
|
88
86
|
@click.option(
|
|
89
87
|
"--force-overwrite",
|
|
@@ -113,17 +111,15 @@ def execute_create_notebook(notebook: str, force_overwrite: bool, kernel: str):
|
|
|
113
111
|
|
|
114
112
|
if not force_overwrite and safe_isfile(notebook_path):
|
|
115
113
|
click.confirm(
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
"Are you sure you want to continue?"
|
|
120
|
-
).format(notebook_path=notebook_path),
|
|
114
|
+
f"Warning, {notebook_path} already exists and continuing "
|
|
115
|
+
"will overwrite the existing notebook. "
|
|
116
|
+
"Are you sure you want to continue?",
|
|
121
117
|
abort=True,
|
|
122
118
|
)
|
|
123
119
|
|
|
124
120
|
with open(notebook_path, "w", encoding="utf8") as f:
|
|
125
121
|
f.write(get_notebook_scaffolding(get_kernelspec(kernel)))
|
|
126
|
-
click.echo("Created new dagstermill notebook at {
|
|
122
|
+
click.echo(f"Created new dagstermill notebook at {notebook_path}")
|
|
127
123
|
|
|
128
124
|
|
|
129
125
|
def create_dagstermill_cli():
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import papermill
|
|
2
|
-
from packaging.version import
|
|
2
|
+
from packaging.version import Version, parse
|
|
3
3
|
from papermill.exceptions import PapermillExecutionError
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def is_papermill_2():
|
|
7
7
|
version = parse(papermill.__version__)
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
# satisfies typechecker that might think version is a LegacyVersion
|
|
9
|
+
assert isinstance(version, Version)
|
|
10
10
|
return version.major == 2
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
if is_papermill_2():
|
|
14
|
-
from nbclient.exceptions import CellExecutionError
|
|
14
|
+
from nbclient.exceptions import CellExecutionError
|
|
15
15
|
|
|
16
16
|
ExecutionError = (PapermillExecutionError, CellExecutionError)
|
|
17
17
|
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from typing import AbstractSet, Any, Optional, cast # noqa: UP035
|
|
3
|
+
|
|
4
|
+
from dagster import (
|
|
5
|
+
DagsterRun,
|
|
6
|
+
JobDefinition,
|
|
7
|
+
OpDefinition,
|
|
8
|
+
_check as check,
|
|
9
|
+
)
|
|
10
|
+
from dagster._annotations import beta, public
|
|
11
|
+
from dagster._core.definitions.dependency import Node, NodeHandle
|
|
12
|
+
from dagster._core.definitions.repository_definition.repository_definition import (
|
|
13
|
+
RepositoryDefinition,
|
|
14
|
+
)
|
|
15
|
+
from dagster._core.execution.context.op_execution_context import AbstractComputeExecutionContext
|
|
16
|
+
from dagster._core.execution.context.system import PlanExecutionContext, StepExecutionContext
|
|
17
|
+
from dagster._core.log_manager import DagsterLogManager
|
|
18
|
+
from dagster._core.system_config.objects import ResolvedRunConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@beta
|
|
22
|
+
class DagstermillExecutionContext(AbstractComputeExecutionContext):
|
|
23
|
+
"""Dagstermill-specific execution context.
|
|
24
|
+
|
|
25
|
+
Do not initialize directly: use :func:`dagstermill.get_context`.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
job_context: PlanExecutionContext,
|
|
31
|
+
job_def: JobDefinition,
|
|
32
|
+
resource_keys_to_init: AbstractSet[str],
|
|
33
|
+
op_name: str,
|
|
34
|
+
node_handle: NodeHandle,
|
|
35
|
+
op_config: Any = None,
|
|
36
|
+
):
|
|
37
|
+
self._job_context = check.inst_param(job_context, "job_context", PlanExecutionContext)
|
|
38
|
+
self._job_def = check.inst_param(job_def, "job_def", JobDefinition)
|
|
39
|
+
self._resource_keys_to_init = check.set_param(
|
|
40
|
+
resource_keys_to_init, "resource_keys_to_init", of_type=str
|
|
41
|
+
)
|
|
42
|
+
self.op_name = check.str_param(op_name, "op_name")
|
|
43
|
+
self.node_handle = check.inst_param(node_handle, "node_handle", NodeHandle)
|
|
44
|
+
self._op_config = op_config
|
|
45
|
+
|
|
46
|
+
def has_tag(self, key: str) -> bool:
|
|
47
|
+
"""Check if a logging tag is defined on the context.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
key (str): The key to check.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
bool
|
|
54
|
+
"""
|
|
55
|
+
check.str_param(key, "key")
|
|
56
|
+
return self._job_context.has_tag(key)
|
|
57
|
+
|
|
58
|
+
def get_tag(self, key: str) -> Optional[str]:
|
|
59
|
+
"""Get a logging tag defined on the context.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
key (str): The key to get.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
str
|
|
66
|
+
"""
|
|
67
|
+
check.str_param(key, "key")
|
|
68
|
+
return self._job_context.get_tag(key)
|
|
69
|
+
|
|
70
|
+
@public
|
|
71
|
+
@property
|
|
72
|
+
def run_id(self) -> str:
|
|
73
|
+
"""str: The run_id for the context."""
|
|
74
|
+
return self._job_context.run_id
|
|
75
|
+
|
|
76
|
+
@public
|
|
77
|
+
@property
|
|
78
|
+
def run_config(self) -> Mapping[str, Any]:
|
|
79
|
+
"""dict: The run_config for the context."""
|
|
80
|
+
return self._job_context.run_config
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def resolved_run_config(self) -> ResolvedRunConfig:
|
|
84
|
+
""":class:`dagster.ResolvedRunConfig`: The resolved_run_config for the context."""
|
|
85
|
+
return self._job_context.resolved_run_config
|
|
86
|
+
|
|
87
|
+
@public
|
|
88
|
+
@property
|
|
89
|
+
def logging_tags(self) -> Mapping[str, str]:
|
|
90
|
+
"""dict: The logging tags for the context."""
|
|
91
|
+
return self._job_context.logging_tags
|
|
92
|
+
|
|
93
|
+
@public
|
|
94
|
+
@property
|
|
95
|
+
def job_name(self) -> str:
|
|
96
|
+
"""str: The name of the executing job."""
|
|
97
|
+
return self._job_context.job_name
|
|
98
|
+
|
|
99
|
+
@public
|
|
100
|
+
@property
|
|
101
|
+
def job_def(self) -> JobDefinition:
|
|
102
|
+
""":class:`dagster.JobDefinition`: The job definition for the context.
|
|
103
|
+
|
|
104
|
+
This will be a dagstermill-specific shim.
|
|
105
|
+
"""
|
|
106
|
+
return self._job_def
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def repository_def(self) -> RepositoryDefinition:
|
|
110
|
+
""":class:`dagster.RepositoryDefinition`: The repository definition for the context."""
|
|
111
|
+
raise NotImplementedError
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def resources(self) -> Any:
|
|
115
|
+
"""collections.namedtuple: A dynamically-created type whose properties allow access to
|
|
116
|
+
resources.
|
|
117
|
+
"""
|
|
118
|
+
return self._job_context.scoped_resources_builder.build(
|
|
119
|
+
required_resource_keys=self._resource_keys_to_init,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
@public
|
|
123
|
+
@property
|
|
124
|
+
def run(self) -> DagsterRun:
|
|
125
|
+
""":class:`dagster.DagsterRun`: The job run for the context."""
|
|
126
|
+
return cast("DagsterRun", self._job_context.dagster_run)
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def log(self) -> DagsterLogManager:
|
|
130
|
+
""":class:`dagster.DagsterLogManager`: The log manager for the context.
|
|
131
|
+
|
|
132
|
+
Call, e.g., ``log.info()`` to log messages through the Dagster machinery.
|
|
133
|
+
"""
|
|
134
|
+
return self._job_context.log
|
|
135
|
+
|
|
136
|
+
@public
|
|
137
|
+
@property
|
|
138
|
+
def op_def(self) -> OpDefinition:
|
|
139
|
+
""":class:`dagster.OpDefinition`: The op definition for the context.
|
|
140
|
+
|
|
141
|
+
In interactive contexts, this may be a dagstermill-specific shim, depending whether an
|
|
142
|
+
op definition was passed to ``dagstermill.get_context``.
|
|
143
|
+
"""
|
|
144
|
+
return cast("OpDefinition", self._job_def.node_def_named(self.op_name))
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def node(self) -> Node:
|
|
148
|
+
""":class:`dagster.Node`: The node for the context.
|
|
149
|
+
|
|
150
|
+
In interactive contexts, this may be a dagstermill-specific shim, depending whether an
|
|
151
|
+
op definition was passed to ``dagstermill.get_context``.
|
|
152
|
+
"""
|
|
153
|
+
return self.job_def.get_node(self.node_handle)
|
|
154
|
+
|
|
155
|
+
@public
|
|
156
|
+
@property
|
|
157
|
+
def op_config(self) -> Any:
|
|
158
|
+
"""collections.namedtuple: A dynamically-created type whose properties allow access to
|
|
159
|
+
op-specific config.
|
|
160
|
+
"""
|
|
161
|
+
if self._op_config:
|
|
162
|
+
return self._op_config
|
|
163
|
+
|
|
164
|
+
op_config = self.resolved_run_config.ops.get(self.op_name)
|
|
165
|
+
return op_config.config if op_config else None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class DagstermillRuntimeExecutionContext(DagstermillExecutionContext):
|
|
169
|
+
def __init__(
|
|
170
|
+
self,
|
|
171
|
+
job_context: PlanExecutionContext,
|
|
172
|
+
job_def: JobDefinition,
|
|
173
|
+
resource_keys_to_init: AbstractSet[str],
|
|
174
|
+
op_name: str,
|
|
175
|
+
step_context: StepExecutionContext,
|
|
176
|
+
node_handle: NodeHandle,
|
|
177
|
+
op_config: Any = None,
|
|
178
|
+
):
|
|
179
|
+
self._step_context = check.inst_param(step_context, "step_context", StepExecutionContext)
|
|
180
|
+
super().__init__(
|
|
181
|
+
job_context,
|
|
182
|
+
job_def,
|
|
183
|
+
resource_keys_to_init,
|
|
184
|
+
op_name,
|
|
185
|
+
node_handle,
|
|
186
|
+
op_config,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def step_context(self) -> StepExecutionContext:
|
|
191
|
+
return self._step_context
|