metaflow 2.12.39__py2.py3-none-any.whl → 2.13__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metaflow_config.py +5 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/pypi/bootstrap.py +196 -61
- metaflow/plugins/pypi/conda_environment.py +76 -21
- metaflow/plugins/pypi/micromamba.py +42 -15
- metaflow/plugins/pypi/pip.py +8 -3
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/runner/utils.py +5 -0
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/METADATA +2 -2
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/RECORD +34 -34
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/LICENSE +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/WHEEL +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.dist-info}/top_level.txt +0 -0
metaflow/__init__.py
CHANGED
@@ -103,7 +103,7 @@ from .flowspec import FlowSpec
|
|
103
103
|
|
104
104
|
from .parameters import Parameter, JSONTypeClass, JSONType
|
105
105
|
|
106
|
-
from .user_configs.config_parameters import Config, config_expr
|
106
|
+
from .user_configs.config_parameters import Config, ConfigValue, config_expr
|
107
107
|
from .user_configs.config_decorators import CustomFlowDecorator, CustomStepDecorator
|
108
108
|
|
109
109
|
# data layer
|
metaflow/cli.py
CHANGED
@@ -10,7 +10,8 @@ from metaflow._vendor import click
|
|
10
10
|
from . import decorators, lint, metaflow_version, parameters, plugins
|
11
11
|
from .cli_args import cli_args
|
12
12
|
from .cli_components.utils import LazyGroup, LazyPluginCommandCollection
|
13
|
-
from .datastore import FlowDataStore
|
13
|
+
from .datastore import FlowDataStore, TaskDataStoreSet
|
14
|
+
from .debug import debug
|
14
15
|
from .exception import CommandException, MetaflowException
|
15
16
|
from .flowspec import _FlowState
|
16
17
|
from .graph import FlowGraph
|
@@ -35,7 +36,7 @@ from .plugins import (
|
|
35
36
|
)
|
36
37
|
from .pylint_wrapper import PyLint
|
37
38
|
from .R import metaflow_r_version, use_r
|
38
|
-
from .util import resolve_identity
|
39
|
+
from .util import get_latest_run_id, resolve_identity
|
39
40
|
from .user_configs.config_options import LocalFileInput, config_options
|
40
41
|
from .user_configs.config_parameters import ConfigValue
|
41
42
|
|
@@ -169,7 +170,8 @@ def check(obj, warnings=False):
|
|
169
170
|
@click.pass_obj
|
170
171
|
def show(obj):
|
171
172
|
echo_always("\n%s" % obj.graph.doc)
|
172
|
-
for
|
173
|
+
for node_name in obj.graph.sorted_nodes:
|
174
|
+
node = obj.graph[node_name]
|
173
175
|
echo_always("\nStep *%s*" % node.name, err=False)
|
174
176
|
echo_always(node.doc if node.doc else "?", indent=True, err=False)
|
175
177
|
if node.type != "end":
|
@@ -328,8 +330,8 @@ def start(
|
|
328
330
|
event_logger=None,
|
329
331
|
monitor=None,
|
330
332
|
local_config_file=None,
|
331
|
-
|
332
|
-
|
333
|
+
config_file=None,
|
334
|
+
config_value=None,
|
333
335
|
**deco_options
|
334
336
|
):
|
335
337
|
if quiet:
|
@@ -346,6 +348,33 @@ def start(
|
|
346
348
|
echo(" executing *%s*" % ctx.obj.flow.name, fg="magenta", nl=False)
|
347
349
|
echo(" for *%s*" % resolve_identity(), fg="magenta")
|
348
350
|
|
351
|
+
# Setup the context
|
352
|
+
cli_args._set_top_kwargs(ctx.params)
|
353
|
+
ctx.obj.echo = echo
|
354
|
+
ctx.obj.echo_always = echo_always
|
355
|
+
ctx.obj.is_quiet = quiet
|
356
|
+
ctx.obj.logger = logger
|
357
|
+
ctx.obj.pylint = pylint
|
358
|
+
ctx.obj.check = functools.partial(_check, echo)
|
359
|
+
ctx.obj.top_cli = cli
|
360
|
+
ctx.obj.package_suffixes = package_suffixes.split(",")
|
361
|
+
|
362
|
+
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
|
363
|
+
|
364
|
+
if datastore_root is None:
|
365
|
+
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
|
366
|
+
ctx.obj.echo
|
367
|
+
)
|
368
|
+
if datastore_root is None:
|
369
|
+
raise CommandException(
|
370
|
+
"Could not find the location of the datastore -- did you correctly set the "
|
371
|
+
"METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
|
372
|
+
)
|
373
|
+
|
374
|
+
ctx.obj.datastore_impl.datastore_root = datastore_root
|
375
|
+
|
376
|
+
FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
|
377
|
+
|
349
378
|
# At this point, we are able to resolve the user-configuration options so we can
|
350
379
|
# process all those decorators that the user added that will modify the flow based
|
351
380
|
# on those configurations. It is important to do this as early as possible since it
|
@@ -354,19 +383,76 @@ def start(
|
|
354
383
|
# When we process the options, the first one processed will return None and the
|
355
384
|
# second one processed will return the actual options. The order of processing
|
356
385
|
# depends on what (and in what order) the user specifies on the command line.
|
357
|
-
config_options =
|
358
|
-
|
386
|
+
config_options = config_file or config_value
|
387
|
+
|
388
|
+
if (
|
389
|
+
hasattr(ctx, "saved_args")
|
390
|
+
and ctx.saved_args
|
391
|
+
and ctx.saved_args[0] == "resume"
|
392
|
+
and getattr(ctx.obj, "has_config_options", False)
|
393
|
+
):
|
394
|
+
# In the case of resume, we actually need to load the configurations
|
395
|
+
# from the resumed run to process them. This can be slightly onerous so check
|
396
|
+
# if we need to in the first place
|
397
|
+
if getattr(ctx.obj, "has_cl_config_options", False):
|
398
|
+
raise click.UsageError(
|
399
|
+
"Cannot specify --config-file or --config-value with 'resume'"
|
400
|
+
)
|
401
|
+
# We now load the config artifacts from the original run id
|
402
|
+
run_id = None
|
403
|
+
try:
|
404
|
+
idx = ctx.saved_args.index("--origin-run-id")
|
405
|
+
except ValueError:
|
406
|
+
idx = -1
|
407
|
+
if idx >= 0:
|
408
|
+
run_id = ctx.saved_args[idx + 1]
|
409
|
+
else:
|
410
|
+
run_id = get_latest_run_id(ctx.obj.echo, ctx.obj.flow.name)
|
411
|
+
if run_id is None:
|
412
|
+
raise CommandException(
|
413
|
+
"A previous run id was not found. Specify --origin-run-id."
|
414
|
+
)
|
415
|
+
# We get the name of the parameters we need to load from the datastore -- these
|
416
|
+
# are accessed using the *variable* name and not necessarily the *parameter* name
|
417
|
+
config_var_names = []
|
418
|
+
config_param_names = []
|
419
|
+
for name, param in ctx.obj.flow._get_parameters():
|
420
|
+
if not param.IS_CONFIG_PARAMETER:
|
421
|
+
continue
|
422
|
+
config_var_names.append(name)
|
423
|
+
config_param_names.append(param.name)
|
424
|
+
|
425
|
+
# We just need a task datastore that will be thrown away -- we do this so
|
426
|
+
# we don't have to create the logger, monitor, etc.
|
427
|
+
debug.userconf_exec("Loading config parameters from run %s" % run_id)
|
428
|
+
for d in TaskDataStoreSet(
|
429
|
+
FlowDataStore(ctx.obj.flow.name),
|
430
|
+
run_id,
|
431
|
+
steps=["_parameters"],
|
432
|
+
prefetch_data_artifacts=config_var_names,
|
433
|
+
):
|
434
|
+
param_ds = d
|
435
|
+
|
436
|
+
# We can now set the the CONFIGS value in the flow properly. This will overwrite
|
437
|
+
# anything that may have been passed in by default and we will use exactly what
|
438
|
+
# the original flow had. Note that these are accessed through the parameter name
|
439
|
+
ctx.obj.flow._flow_state[_FlowState.CONFIGS].clear()
|
440
|
+
d = ctx.obj.flow._flow_state[_FlowState.CONFIGS]
|
441
|
+
for param_name, var_name in zip(config_param_names, config_var_names):
|
442
|
+
val = param_ds[var_name]
|
443
|
+
debug.userconf_exec("Loaded config %s as: %s" % (param_name, val))
|
444
|
+
d[param_name] = val
|
445
|
+
|
446
|
+
elif getattr(ctx.obj, "delayed_config_exception", None):
|
447
|
+
# If we are not doing a resume, any exception we had parsing configs needs to
|
448
|
+
# be raised. For resume, since we ignore those options, we ignore the error.
|
449
|
+
raise ctx.obj.delayed_config_exception
|
450
|
+
|
451
|
+
new_cls = ctx.obj.flow._process_config_decorators(config_options)
|
452
|
+
if new_cls:
|
453
|
+
ctx.obj.flow = new_cls(use_cli=False)
|
359
454
|
|
360
|
-
cli_args._set_top_kwargs(ctx.params)
|
361
|
-
ctx.obj.echo = echo
|
362
|
-
ctx.obj.echo_always = echo_always
|
363
|
-
ctx.obj.is_quiet = quiet
|
364
455
|
ctx.obj.graph = ctx.obj.flow._graph
|
365
|
-
ctx.obj.logger = logger
|
366
|
-
ctx.obj.pylint = pylint
|
367
|
-
ctx.obj.check = functools.partial(_check, echo)
|
368
|
-
ctx.obj.top_cli = cli
|
369
|
-
ctx.obj.package_suffixes = package_suffixes.split(",")
|
370
456
|
|
371
457
|
ctx.obj.environment = [
|
372
458
|
e for e in ENVIRONMENTS + [MetaflowEnvironment] if e.TYPE == environment
|
@@ -389,21 +475,6 @@ def start(
|
|
389
475
|
ctx.obj.environment, ctx.obj.flow, ctx.obj.event_logger, ctx.obj.monitor
|
390
476
|
)
|
391
477
|
|
392
|
-
ctx.obj.datastore_impl = [d for d in DATASTORES if d.TYPE == datastore][0]
|
393
|
-
|
394
|
-
if datastore_root is None:
|
395
|
-
datastore_root = ctx.obj.datastore_impl.get_datastore_root_from_config(
|
396
|
-
ctx.obj.echo
|
397
|
-
)
|
398
|
-
if datastore_root is None:
|
399
|
-
raise CommandException(
|
400
|
-
"Could not find the location of the datastore -- did you correctly set the "
|
401
|
-
"METAFLOW_DATASTORE_SYSROOT_%s environment variable?" % datastore.upper()
|
402
|
-
)
|
403
|
-
|
404
|
-
ctx.obj.datastore_impl.datastore_root = datastore_root
|
405
|
-
|
406
|
-
FlowDataStore.default_storage_impl = ctx.obj.datastore_impl
|
407
478
|
ctx.obj.flow_datastore = FlowDataStore(
|
408
479
|
ctx.obj.flow.name,
|
409
480
|
ctx.obj.environment,
|
@@ -463,7 +534,8 @@ def start(
|
|
463
534
|
decorators._attach_decorators(ctx.obj.flow, all_decospecs)
|
464
535
|
decorators._init(ctx.obj.flow)
|
465
536
|
# Regenerate graph if we attached more decorators
|
466
|
-
ctx.obj.
|
537
|
+
ctx.obj.flow.__class__._init_attrs()
|
538
|
+
ctx.obj.graph = ctx.obj.flow._graph
|
467
539
|
|
468
540
|
decorators._init_step_decorators(
|
469
541
|
ctx.obj.flow,
|
@@ -519,10 +591,13 @@ def _check(echo, graph, flow, environment, pylint=True, warnings=False, **kwargs
|
|
519
591
|
|
520
592
|
def print_metaflow_exception(ex):
|
521
593
|
echo_always(ex.headline, indent=True, nl=False, bold=True)
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
594
|
+
location = ""
|
595
|
+
if ex.source_file is not None:
|
596
|
+
location += " in file %s" % ex.source_file
|
597
|
+
if ex.line_no is not None:
|
598
|
+
location += " on line %d" % ex.line_no
|
599
|
+
location += ":"
|
600
|
+
echo_always(location, bold=True)
|
526
601
|
echo_always(ex.message, indent=True, bold=False, padding_bottom=True)
|
527
602
|
|
528
603
|
|
metaflow/cli_args.py
CHANGED
@@ -72,10 +72,10 @@ class CLIArgs(object):
|
|
72
72
|
# keyword in Python, so we call it 'decospecs' in click args
|
73
73
|
if k == "decospecs":
|
74
74
|
k = "with"
|
75
|
-
if k in ("
|
75
|
+
if k in ("config_file", "config_value"):
|
76
76
|
# Special handling here since we gather them all in one option but actually
|
77
77
|
# need to send them one at a time using --config-value <name> kv.<name>.
|
78
|
-
# Note it can be either
|
78
|
+
# Note it can be either config_file or config_value depending
|
79
79
|
# on click processing order.
|
80
80
|
for config_name in v.keys():
|
81
81
|
yield "--config-value"
|
@@ -41,7 +41,9 @@ def before_run(obj, tags, decospecs):
|
|
41
41
|
if all_decospecs:
|
42
42
|
decorators._attach_decorators(obj.flow, all_decospecs)
|
43
43
|
decorators._init(obj.flow)
|
44
|
-
|
44
|
+
# Regenerate graph if we attached more decorators
|
45
|
+
obj.flow.__class__._init_attrs()
|
46
|
+
obj.graph = obj.flow._graph
|
45
47
|
|
46
48
|
obj.check(obj.graph, obj.flow, obj.environment, pylint=obj.pylint)
|
47
49
|
# obj.environment.init_environment(obj.logger)
|
@@ -13,7 +13,7 @@ class FlowDataStore(object):
|
|
13
13
|
def __init__(
|
14
14
|
self,
|
15
15
|
flow_name,
|
16
|
-
environment,
|
16
|
+
environment=None,
|
17
17
|
metadata=None,
|
18
18
|
event_logger=None,
|
19
19
|
monitor=None,
|
@@ -31,7 +31,7 @@ class FlowDataStore(object):
|
|
31
31
|
----------
|
32
32
|
flow_name : str
|
33
33
|
The name of the flow
|
34
|
-
environment : MetaflowEnvironment
|
34
|
+
environment : MetaflowEnvironment, optional
|
35
35
|
Environment this datastore is operating in
|
36
36
|
metadata : MetadataProvider, optional
|
37
37
|
The metadata provider to use and update if needed, by default None
|
metaflow/exception.py
CHANGED
@@ -43,13 +43,19 @@ class MetaflowExceptionWrapper(Exception):
|
|
43
43
|
class MetaflowException(Exception):
|
44
44
|
headline = "Flow failed"
|
45
45
|
|
46
|
-
def __init__(self, msg="", lineno=None):
|
46
|
+
def __init__(self, msg="", lineno=None, source_file=None):
|
47
47
|
self.message = msg
|
48
48
|
self.line_no = lineno
|
49
|
+
self.source_file = source_file
|
49
50
|
super(MetaflowException, self).__init__()
|
50
51
|
|
51
52
|
def __str__(self):
|
52
|
-
prefix = "
|
53
|
+
prefix = ""
|
54
|
+
if self.source_file:
|
55
|
+
prefix = "%s:" % self.source_file
|
56
|
+
if self.line_no:
|
57
|
+
prefix = "line %d:" % self.line_no
|
58
|
+
prefix = "%s: " % prefix if prefix else ""
|
53
59
|
return "%s%s" % (prefix, self.message)
|
54
60
|
|
55
61
|
|
metaflow/flowspec.py
CHANGED
@@ -24,6 +24,7 @@ from .extension_support import extension_info
|
|
24
24
|
from .graph import FlowGraph
|
25
25
|
from .unbounded_foreach import UnboundedForeachInput
|
26
26
|
from .user_configs.config_decorators import (
|
27
|
+
ConfigValue,
|
27
28
|
CustomFlowDecorator,
|
28
29
|
CustomStepDecorator,
|
29
30
|
MutableFlow,
|
@@ -81,8 +82,10 @@ class _FlowState(Enum):
|
|
81
82
|
|
82
83
|
|
83
84
|
class FlowSpecMeta(type):
|
84
|
-
def
|
85
|
-
|
85
|
+
def __init__(cls, name, bases, attrs):
|
86
|
+
super().__init__(name, bases, attrs)
|
87
|
+
if name == "FlowSpec":
|
88
|
+
return
|
86
89
|
# We store some state in the flow class itself. This is primarily used to
|
87
90
|
# attach global state to a flow. It is *not* an actual global because of
|
88
91
|
# Runner/NBRunner. This is also created here in the meta class to avoid it being
|
@@ -90,12 +93,18 @@ class FlowSpecMeta(type):
|
|
90
93
|
|
91
94
|
# We should move _flow_decorators into this structure as well but keeping it
|
92
95
|
# out to limit the changes for now.
|
93
|
-
|
96
|
+
cls._flow_decorators = {}
|
94
97
|
|
95
98
|
# Keys are _FlowState enum values
|
96
|
-
|
99
|
+
cls._flow_state = {}
|
100
|
+
|
101
|
+
cls._init_attrs()
|
97
102
|
|
98
|
-
|
103
|
+
def _init_attrs(cls):
|
104
|
+
# Graph and steps are specific to the class -- store here so we can access
|
105
|
+
# in class method _process_config_decorators
|
106
|
+
cls._graph = FlowGraph(cls)
|
107
|
+
cls._steps = [getattr(cls, node.name) for node in cls._graph]
|
99
108
|
|
100
109
|
|
101
110
|
class FlowSpec(metaclass=FlowSpecMeta):
|
@@ -145,9 +154,6 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
145
154
|
self._transition = None
|
146
155
|
self._cached_input = {}
|
147
156
|
|
148
|
-
self._graph = FlowGraph(self.__class__)
|
149
|
-
self._steps = [getattr(self, node.name) for node in self._graph]
|
150
|
-
|
151
157
|
if use_cli:
|
152
158
|
with parameters.flow_context(self.__class__) as _:
|
153
159
|
from . import cli
|
@@ -171,9 +177,10 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
171
177
|
fname = fname[:-1]
|
172
178
|
return os.path.basename(fname)
|
173
179
|
|
174
|
-
|
180
|
+
@classmethod
|
181
|
+
def _check_parameters(cls, config_parameters=False):
|
175
182
|
seen = set()
|
176
|
-
for _, param in
|
183
|
+
for _, param in cls._get_parameters():
|
177
184
|
if param.IS_CONFIG_PARAMETER != config_parameters:
|
178
185
|
continue
|
179
186
|
norm = param.name.lower()
|
@@ -185,17 +192,17 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
185
192
|
)
|
186
193
|
seen.add(norm)
|
187
194
|
|
188
|
-
|
189
|
-
|
195
|
+
@classmethod
|
196
|
+
def _process_config_decorators(cls, config_options, ignore_errors=False):
|
190
197
|
|
191
198
|
# Fast path for no user configurations
|
192
|
-
if not
|
199
|
+
if not cls._flow_state.get(_FlowState.CONFIG_DECORATORS):
|
193
200
|
# Process parameters to allow them to also use config values easily
|
194
|
-
for var, param in
|
201
|
+
for var, param in cls._get_parameters():
|
195
202
|
if param.IS_CONFIG_PARAMETER:
|
196
203
|
continue
|
197
|
-
param.init()
|
198
|
-
return
|
204
|
+
param.init(ignore_errors)
|
205
|
+
return None
|
199
206
|
|
200
207
|
debug.userconf_exec("Processing mutating step/flow decorators")
|
201
208
|
# We need to convert all the user configurations from DelayedEvaluationParameters
|
@@ -204,8 +211,8 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
204
211
|
# We then reset them to be proper configs so they can be re-evaluated in
|
205
212
|
# _set_constants
|
206
213
|
to_reset_configs = []
|
207
|
-
|
208
|
-
for var, param in
|
214
|
+
cls._check_parameters(config_parameters=True)
|
215
|
+
for var, param in cls._get_parameters():
|
209
216
|
if not param.IS_CONFIG_PARAMETER:
|
210
217
|
continue
|
211
218
|
# Note that a config with no default and not required will be None
|
@@ -217,38 +224,36 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
217
224
|
param._store_value(val)
|
218
225
|
to_reset_configs.append((var, param))
|
219
226
|
debug.userconf_exec("Setting config %s to %s" % (var, str(val)))
|
220
|
-
setattr(
|
227
|
+
setattr(cls, var, val)
|
221
228
|
|
222
229
|
# Run all the decorators. Step decorators are directly in the step and
|
223
230
|
# we will run those first and *then* we run all the flow level decorators
|
224
|
-
for step in
|
231
|
+
for step in cls._steps:
|
225
232
|
for deco in step.config_decorators:
|
226
233
|
if isinstance(deco, CustomStepDecorator):
|
227
234
|
debug.userconf_exec(
|
228
235
|
"Evaluating step level decorator %s for %s"
|
229
236
|
% (deco.__class__.__name__, step.name)
|
230
237
|
)
|
231
|
-
deco.evaluate(MutableStep(
|
238
|
+
deco.evaluate(MutableStep(cls, step))
|
232
239
|
else:
|
233
240
|
raise MetaflowInternalError(
|
234
241
|
"A non CustomFlowDecorator found in step custom decorators"
|
235
242
|
)
|
236
243
|
if step.config_decorators:
|
237
244
|
# We remove all mention of the custom step decorator
|
238
|
-
setattr(
|
245
|
+
setattr(cls, step.name, step)
|
239
246
|
|
240
|
-
mutable_flow = MutableFlow(
|
241
|
-
for deco in
|
247
|
+
mutable_flow = MutableFlow(cls)
|
248
|
+
for deco in cls._flow_state[_FlowState.CONFIG_DECORATORS]:
|
242
249
|
if isinstance(deco, CustomFlowDecorator):
|
243
250
|
# Sanity check to make sure we are applying the decorator to the right
|
244
251
|
# class
|
245
|
-
if not deco._flow_cls ==
|
246
|
-
current_cls, deco._flow_cls
|
247
|
-
):
|
252
|
+
if not deco._flow_cls == cls and not issubclass(cls, deco._flow_cls):
|
248
253
|
raise MetaflowInternalError(
|
249
254
|
"CustomFlowDecorator registered on the wrong flow -- "
|
250
255
|
"expected %s but got %s"
|
251
|
-
% (deco._flow_cls.__name__,
|
256
|
+
% (deco._flow_cls.__name__, cls.__name__)
|
252
257
|
)
|
253
258
|
debug.userconf_exec(
|
254
259
|
"Evaluating flow level decorator %s" % deco.__class__.__name__
|
@@ -256,15 +261,15 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
256
261
|
deco.evaluate(mutable_flow)
|
257
262
|
# We reset cached_parameters on the very off chance that the user added
|
258
263
|
# more configurations based on the configuration
|
259
|
-
if _FlowState.CACHED_PARAMETERS in
|
260
|
-
del
|
264
|
+
if _FlowState.CACHED_PARAMETERS in cls._flow_state:
|
265
|
+
del cls._flow_state[_FlowState.CACHED_PARAMETERS]
|
261
266
|
else:
|
262
267
|
raise MetaflowInternalError(
|
263
268
|
"A non CustomFlowDecorator found in flow custom decorators"
|
264
269
|
)
|
265
270
|
|
266
271
|
# Process parameters to allow them to also use config values easily
|
267
|
-
for var, param in
|
272
|
+
for var, param in cls._get_parameters():
|
268
273
|
if param.IS_CONFIG_PARAMETER:
|
269
274
|
continue
|
270
275
|
param.init()
|
@@ -272,15 +277,18 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
272
277
|
# TODO: This means that users can't override configs directly. Not sure if this
|
273
278
|
# is a pattern we want to support
|
274
279
|
for var, param in to_reset_configs:
|
275
|
-
setattr(
|
280
|
+
setattr(cls, var, param)
|
276
281
|
|
277
282
|
# Reset cached parameters again since we added back the config parameters
|
278
|
-
if _FlowState.CACHED_PARAMETERS in
|
279
|
-
del
|
283
|
+
if _FlowState.CACHED_PARAMETERS in cls._flow_state:
|
284
|
+
del cls._flow_state[_FlowState.CACHED_PARAMETERS]
|
280
285
|
|
281
286
|
# Set the current flow class we are in (the one we just created)
|
282
|
-
parameters.replace_flow_context(
|
283
|
-
|
287
|
+
parameters.replace_flow_context(cls)
|
288
|
+
|
289
|
+
# Re-calculate class level attributes after modifying the class
|
290
|
+
cls._init_attrs()
|
291
|
+
return cls
|
284
292
|
|
285
293
|
def _set_constants(self, graph, kwargs, config_options):
|
286
294
|
from metaflow.decorators import (
|
@@ -309,6 +317,10 @@ class FlowSpec(metaclass=FlowSpecMeta):
|
|
309
317
|
if isinstance(val, DelayedEvaluationParameter):
|
310
318
|
val = val()
|
311
319
|
val = val.split(param.separator) if val and param.separator else val
|
320
|
+
if isinstance(val, ConfigValue):
|
321
|
+
# We store config values as dict so they are accessible with older
|
322
|
+
# metaflow clients. It also makes it easier to access.
|
323
|
+
val = val.to_dict()
|
312
324
|
setattr(self, var, val)
|
313
325
|
parameters_info.append({"name": var, "type": param.__class__.__name__})
|
314
326
|
|
metaflow/graph.py
CHANGED
@@ -45,9 +45,12 @@ def deindent_docstring(doc):
|
|
45
45
|
|
46
46
|
|
47
47
|
class DAGNode(object):
|
48
|
-
def __init__(self, func_ast, decos, doc):
|
48
|
+
def __init__(self, func_ast, decos, doc, source_file, lineno):
|
49
49
|
self.name = func_ast.name
|
50
|
-
self.
|
50
|
+
self.source_file = source_file
|
51
|
+
# lineno is the start line of decorators in source_file
|
52
|
+
# func_ast.lineno is lines from decorators start to def of function
|
53
|
+
self.func_lineno = lineno + func_ast.lineno - 1
|
51
54
|
self.decorators = decos
|
52
55
|
self.doc = deindent_docstring(doc)
|
53
56
|
self.parallel_step = any(getattr(deco, "IS_PARALLEL", False) for deco in decos)
|
@@ -62,7 +65,7 @@ class DAGNode(object):
|
|
62
65
|
self.foreach_param = None
|
63
66
|
self.num_parallel = 0
|
64
67
|
self.parallel_foreach = False
|
65
|
-
self._parse(func_ast)
|
68
|
+
self._parse(func_ast, lineno)
|
66
69
|
|
67
70
|
# these attributes are populated by _traverse_graph
|
68
71
|
self.in_funcs = set()
|
@@ -74,7 +77,7 @@ class DAGNode(object):
|
|
74
77
|
def _expr_str(self, expr):
|
75
78
|
return "%s.%s" % (expr.value.id, expr.attr)
|
76
79
|
|
77
|
-
def _parse(self, func_ast):
|
80
|
+
def _parse(self, func_ast, lineno):
|
78
81
|
self.num_args = len(func_ast.args.args)
|
79
82
|
tail = func_ast.body[-1]
|
80
83
|
|
@@ -94,7 +97,7 @@ class DAGNode(object):
|
|
94
97
|
|
95
98
|
self.has_tail_next = True
|
96
99
|
self.invalid_tail_next = True
|
97
|
-
self.tail_next_lineno = tail.lineno
|
100
|
+
self.tail_next_lineno = lineno + tail.lineno - 1
|
98
101
|
self.out_funcs = [e.attr for e in tail.value.args]
|
99
102
|
|
100
103
|
keywords = dict(
|
@@ -131,7 +134,7 @@ class DAGNode(object):
|
|
131
134
|
return
|
132
135
|
|
133
136
|
def __str__(self):
|
134
|
-
return """*[{0.name} {0.type} (line {0.func_lineno})]*
|
137
|
+
return """*[{0.name} {0.type} ({0.source_file} line {0.func_lineno})]*
|
135
138
|
in_funcs={in_funcs}
|
136
139
|
out_funcs={out_funcs}
|
137
140
|
split_parents={parents}
|
@@ -156,18 +159,6 @@ class DAGNode(object):
|
|
156
159
|
)
|
157
160
|
|
158
161
|
|
159
|
-
class StepVisitor(ast.NodeVisitor):
|
160
|
-
def __init__(self, nodes, flow):
|
161
|
-
self.nodes = nodes
|
162
|
-
self.flow = flow
|
163
|
-
super(StepVisitor, self).__init__()
|
164
|
-
|
165
|
-
def visit_FunctionDef(self, node):
|
166
|
-
func = getattr(self.flow, node.name)
|
167
|
-
if hasattr(func, "is_step"):
|
168
|
-
self.nodes[node.name] = DAGNode(node, func.decorators, func.__doc__)
|
169
|
-
|
170
|
-
|
171
162
|
class FlowGraph(object):
|
172
163
|
def __init__(self, flow):
|
173
164
|
self.name = flow.__name__
|
@@ -179,13 +170,20 @@ class FlowGraph(object):
|
|
179
170
|
self._postprocess()
|
180
171
|
|
181
172
|
def _create_nodes(self, flow):
|
182
|
-
module = __import__(flow.__module__)
|
183
|
-
tree = ast.parse(inspect.getsource(module)).body
|
184
|
-
root = [n for n in tree if isinstance(n, ast.ClassDef) and n.name == self.name][
|
185
|
-
0
|
186
|
-
]
|
187
173
|
nodes = {}
|
188
|
-
|
174
|
+
for element in dir(flow):
|
175
|
+
func = getattr(flow, element)
|
176
|
+
if callable(func) and hasattr(func, "is_step"):
|
177
|
+
source_file = inspect.getsourcefile(func)
|
178
|
+
source_lines, lineno = inspect.getsourcelines(func)
|
179
|
+
# This also works for code (strips out leading whitspace based on
|
180
|
+
# first line)
|
181
|
+
source_code = deindent_docstring("".join(source_lines))
|
182
|
+
function_ast = ast.parse(source_code).body[0]
|
183
|
+
node = DAGNode(
|
184
|
+
function_ast, func.decorators, func.__doc__, source_file, lineno
|
185
|
+
)
|
186
|
+
nodes[element] = node
|
189
187
|
return nodes
|
190
188
|
|
191
189
|
def _postprocess(self):
|
@@ -201,6 +199,10 @@ class FlowGraph(object):
|
|
201
199
|
|
202
200
|
def _traverse_graph(self):
|
203
201
|
def traverse(node, seen, split_parents):
|
202
|
+
try:
|
203
|
+
self.sorted_nodes.remove(node.name)
|
204
|
+
except ValueError:
|
205
|
+
pass
|
204
206
|
self.sorted_nodes.append(node.name)
|
205
207
|
if node.type in ("split", "foreach"):
|
206
208
|
node.split_parents = split_parents
|
@@ -240,9 +242,7 @@ class FlowGraph(object):
|
|
240
242
|
return iter(self.nodes.values())
|
241
243
|
|
242
244
|
def __str__(self):
|
243
|
-
return "\n".join(
|
244
|
-
str(n) for _, n in sorted((n.func_lineno, n) for n in self.nodes.values())
|
245
|
-
)
|
245
|
+
return "\n".join(str(self[n]) for n in self.sorted_nodes)
|
246
246
|
|
247
247
|
def output_dot(self):
|
248
248
|
def edge_specs():
|
@@ -286,6 +286,7 @@ class FlowGraph(object):
|
|
286
286
|
"name": name,
|
287
287
|
"type": node_to_type(node),
|
288
288
|
"line": node.func_lineno,
|
289
|
+
"source_file": node.source_file,
|
289
290
|
"doc": node.doc,
|
290
291
|
"decorators": [
|
291
292
|
{
|
metaflow/includefile.py
CHANGED
@@ -290,8 +290,8 @@ class IncludeFile(Parameter):
|
|
290
290
|
**kwargs,
|
291
291
|
)
|
292
292
|
|
293
|
-
def init(self):
|
294
|
-
super(IncludeFile, self).init()
|
293
|
+
def init(self, ignore_errors=False):
|
294
|
+
super(IncludeFile, self).init(ignore_errors)
|
295
295
|
|
296
296
|
# This will use the values set explicitly in the args if present, else will
|
297
297
|
# use and remove from kwargs else will use True/utf-8
|