ob-metaflow 2.10.7.4__py2.py3-none-any.whl → 2.10.9.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (52) hide show
  1. metaflow/cards.py +2 -0
  2. metaflow/decorators.py +1 -1
  3. metaflow/metaflow_config.py +2 -0
  4. metaflow/plugins/__init__.py +4 -0
  5. metaflow/plugins/airflow/airflow_cli.py +1 -1
  6. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  7. metaflow/plugins/aws/aws_utils.py +1 -1
  8. metaflow/plugins/aws/batch/batch.py +4 -0
  9. metaflow/plugins/aws/batch/batch_cli.py +3 -0
  10. metaflow/plugins/aws/batch/batch_client.py +40 -11
  11. metaflow/plugins/aws/batch/batch_decorator.py +1 -0
  12. metaflow/plugins/aws/step_functions/step_functions.py +1 -0
  13. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  14. metaflow/plugins/azure/azure_exceptions.py +1 -1
  15. metaflow/plugins/cards/card_cli.py +413 -28
  16. metaflow/plugins/cards/card_client.py +16 -7
  17. metaflow/plugins/cards/card_creator.py +228 -0
  18. metaflow/plugins/cards/card_datastore.py +124 -26
  19. metaflow/plugins/cards/card_decorator.py +40 -86
  20. metaflow/plugins/cards/card_modules/base.html +12 -0
  21. metaflow/plugins/cards/card_modules/basic.py +74 -8
  22. metaflow/plugins/cards/card_modules/bundle.css +1 -170
  23. metaflow/plugins/cards/card_modules/card.py +65 -0
  24. metaflow/plugins/cards/card_modules/components.py +446 -81
  25. metaflow/plugins/cards/card_modules/convert_to_native_type.py +9 -3
  26. metaflow/plugins/cards/card_modules/main.js +250 -21
  27. metaflow/plugins/cards/card_modules/test_cards.py +117 -0
  28. metaflow/plugins/cards/card_resolver.py +0 -2
  29. metaflow/plugins/cards/card_server.py +361 -0
  30. metaflow/plugins/cards/component_serializer.py +506 -42
  31. metaflow/plugins/cards/exception.py +20 -1
  32. metaflow/plugins/datastores/azure_storage.py +1 -2
  33. metaflow/plugins/datastores/gs_storage.py +1 -2
  34. metaflow/plugins/datastores/s3_storage.py +2 -1
  35. metaflow/plugins/datatools/s3/s3.py +24 -11
  36. metaflow/plugins/env_escape/client.py +2 -12
  37. metaflow/plugins/env_escape/client_modules.py +18 -14
  38. metaflow/plugins/env_escape/server.py +18 -11
  39. metaflow/plugins/env_escape/utils.py +12 -0
  40. metaflow/plugins/gcp/gs_exceptions.py +1 -1
  41. metaflow/plugins/gcp/gs_utils.py +1 -1
  42. metaflow/plugins/pypi/conda_environment.py +5 -6
  43. metaflow/plugins/pypi/pip.py +2 -2
  44. metaflow/plugins/pypi/utils.py +15 -0
  45. metaflow/task.py +1 -0
  46. metaflow/version.py +1 -1
  47. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/METADATA +1 -1
  48. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/RECORD +52 -50
  49. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/LICENSE +0 -0
  50. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/WHEEL +0 -0
  51. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/entry_points.txt +0 -0
  52. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ import time
2
+ import subprocess
3
+ import tempfile
4
+ import json
5
+ import sys
6
+ import os
7
+ from metaflow import current
8
+
9
+ ASYNC_TIMEOUT = 30
10
+
11
+
12
+ class CardProcessManager:
13
+ """
14
+ This class is responsible for managing the card creation processes.
15
+
16
+ """
17
+
18
+ async_card_processes = {
19
+ # "carduuid": {
20
+ # "proc": subprocess.Popen,
21
+ # "started": time.time()
22
+ # }
23
+ }
24
+
25
+ @classmethod
26
+ def _register_card_process(cls, carduuid, proc):
27
+ cls.async_card_processes[carduuid] = {
28
+ "proc": proc,
29
+ "started": time.time(),
30
+ }
31
+
32
+ @classmethod
33
+ def _get_card_process(cls, carduuid):
34
+ proc_dict = cls.async_card_processes.get(carduuid, None)
35
+ if proc_dict is not None:
36
+ return proc_dict["proc"], proc_dict["started"]
37
+ return None, None
38
+
39
+ @classmethod
40
+ def _remove_card_process(cls, carduuid):
41
+ if carduuid in cls.async_card_processes:
42
+ cls.async_card_processes[carduuid]["proc"].kill()
43
+ del cls.async_card_processes[carduuid]
44
+
45
+
46
+ class CardCreator:
47
+ def __init__(self, top_level_options):
48
+ self._top_level_options = top_level_options
49
+
50
+ def create(
51
+ self,
52
+ card_uuid=None,
53
+ user_set_card_id=None,
54
+ runtime_card=False,
55
+ decorator_attributes=None,
56
+ card_options=None,
57
+ logger=None,
58
+ mode="render",
59
+ final=False,
60
+ sync=False,
61
+ ):
62
+ # Setting `final` will affect the Reload token set during the card refresh
63
+ # data creation along with synchronous execution of subprocess.
64
+ # Setting `sync` will only cause synchronous execution of subprocess.
65
+ if mode != "render" and not runtime_card:
66
+ # silently ignore runtime updates for cards that don't support them
67
+ return
68
+ elif mode == "refresh":
69
+ # don't serialize components, which can be a somewhat expensive operation,
70
+ # if we are just updating data
71
+ component_strings = []
72
+ else:
73
+ component_strings = current.card._serialize_components(card_uuid)
74
+ data = current.card._get_latest_data(card_uuid, final=final, mode=mode)
75
+ runspec = "/".join([current.run_id, current.step_name, current.task_id])
76
+ self._run_cards_subprocess(
77
+ card_uuid,
78
+ user_set_card_id,
79
+ mode,
80
+ runspec,
81
+ decorator_attributes,
82
+ card_options,
83
+ component_strings,
84
+ logger,
85
+ data,
86
+ final=final,
87
+ sync=sync,
88
+ )
89
+
90
+ def _run_cards_subprocess(
91
+ self,
92
+ card_uuid,
93
+ user_set_card_id,
94
+ mode,
95
+ runspec,
96
+ decorator_attributes,
97
+ card_options,
98
+ component_strings,
99
+ logger,
100
+ data=None,
101
+ final=False,
102
+ sync=False,
103
+ ):
104
+ components_file = data_file = None
105
+ wait = final or sync
106
+
107
+ if len(component_strings) > 0:
108
+ # note that we can't delete temporary files here when calling the subprocess
109
+ # async due to a race condition. The subprocess must delete them
110
+ components_file = tempfile.NamedTemporaryFile(
111
+ "w", suffix=".json", delete=False
112
+ )
113
+ json.dump(component_strings, components_file)
114
+ components_file.seek(0)
115
+ if data is not None:
116
+ data_file = tempfile.NamedTemporaryFile("w", suffix=".json", delete=False)
117
+ json.dump(data, data_file)
118
+ data_file.seek(0)
119
+
120
+ executable = sys.executable
121
+ cmd = [
122
+ executable,
123
+ sys.argv[0],
124
+ ]
125
+ cmd += self._top_level_options + [
126
+ "card",
127
+ "create",
128
+ runspec,
129
+ "--delete-input-files",
130
+ "--card-uuid",
131
+ card_uuid,
132
+ "--mode",
133
+ mode,
134
+ "--type",
135
+ decorator_attributes["type"],
136
+ # Add the options relating to card arguments.
137
+ # todo : add scope as a CLI arg for the create method.
138
+ ]
139
+ if card_options is not None and len(card_options) > 0:
140
+ cmd += ["--options", json.dumps(card_options)]
141
+ # set the id argument.
142
+
143
+ if decorator_attributes["timeout"] is not None:
144
+ cmd += ["--timeout", str(decorator_attributes["timeout"])]
145
+
146
+ if user_set_card_id is not None:
147
+ cmd += ["--id", str(user_set_card_id)]
148
+
149
+ if decorator_attributes["save_errors"]:
150
+ cmd += ["--render-error-card"]
151
+
152
+ if components_file is not None:
153
+ cmd += ["--component-file", components_file.name]
154
+
155
+ if data_file is not None:
156
+ cmd += ["--data-file", data_file.name]
157
+
158
+ response, fail = self._run_command(
159
+ cmd,
160
+ card_uuid,
161
+ os.environ,
162
+ timeout=decorator_attributes["timeout"],
163
+ wait=wait,
164
+ )
165
+ if fail:
166
+ resp = "" if response is None else response.decode("utf-8")
167
+ logger(
168
+ "Card render failed with error : \n\n %s" % resp,
169
+ timestamp=False,
170
+ bad=True,
171
+ )
172
+
173
+ def _wait_for_async_processes_to_finish(self, card_uuid, async_timeout):
174
+ _async_proc, _async_started = CardProcessManager._get_card_process(card_uuid)
175
+ while _async_proc is not None and _async_proc.poll() is None:
176
+ if time.time() - _async_started > async_timeout:
177
+ # This means the process has crossed the timeout and we need to kill it.
178
+ CardProcessManager._remove_card_process(card_uuid)
179
+ break
180
+
181
+ def _run_command(self, cmd, card_uuid, env, wait=True, timeout=None):
182
+ fail = False
183
+ timeout_args = {}
184
+ async_timeout = ASYNC_TIMEOUT
185
+ if timeout is not None:
186
+ async_timeout = int(timeout) + 10
187
+ timeout_args = dict(timeout=int(timeout) + 10)
188
+
189
+ if wait:
190
+ self._wait_for_async_processes_to_finish(card_uuid, async_timeout)
191
+ try:
192
+ rep = subprocess.check_output(
193
+ cmd, env=env, stderr=subprocess.STDOUT, **timeout_args
194
+ )
195
+ except subprocess.CalledProcessError as e:
196
+ rep = e.output
197
+ fail = True
198
+ except subprocess.TimeoutExpired as e:
199
+ rep = e.output
200
+ fail = True
201
+ return rep, fail
202
+ else:
203
+ _async_proc, _async_started = CardProcessManager._get_card_process(
204
+ card_uuid
205
+ )
206
+ if _async_proc and _async_proc.poll() is None:
207
+ if time.time() - _async_started > async_timeout:
208
+ CardProcessManager._remove_card_process(card_uuid)
209
+ # Since we have removed the card process, we are free to run a new one
210
+ # This will also ensure that when a old process is removed a new one is replaced.
211
+ return self._run_command(
212
+ cmd, card_uuid, env, wait=wait, timeout=timeout
213
+ )
214
+ else:
215
+ # silently refuse to run an async process if a previous one is still running
216
+ # and timeout hasn't been reached
217
+ return "".encode(), False
218
+ else:
219
+ CardProcessManager._register_card_process(
220
+ card_uuid,
221
+ subprocess.Popen(
222
+ cmd,
223
+ env=env,
224
+ stderr=subprocess.DEVNULL,
225
+ stdout=subprocess.DEVNULL,
226
+ ),
227
+ )
228
+ return "".encode(), False
@@ -3,9 +3,9 @@
3
3
  """
4
4
 
5
5
  from collections import namedtuple
6
- from hashlib import sha1
7
6
  from io import BytesIO
8
7
  import os
8
+ import json
9
9
  import shutil
10
10
 
11
11
  from metaflow.plugins.datastores.local_storage import LocalStorage
@@ -28,6 +28,16 @@ NUM_SHORT_HASH_CHARS = 5
28
28
  CardInfo = namedtuple("CardInfo", ["type", "hash", "id", "filename"])
29
29
 
30
30
 
31
+ class CardNameSuffix:
32
+ DATA = "data.json"
33
+ CARD = "html"
34
+
35
+
36
+ class CardPathSuffix:
37
+ DATA = "runtime"
38
+ CARD = "cards"
39
+
40
+
31
41
  def path_spec_resolver(pathspec):
32
42
  splits = pathspec.split("/")
33
43
  splits.extend([None] * (4 - len(splits)))
@@ -85,18 +95,22 @@ class CardDatastore(object):
85
95
  self._run_id = run_id
86
96
  self._step_name = step_name
87
97
  self._pathspec = pathspec
88
- self._temp_card_save_path = self._get_write_path(base_pth=TEMP_DIR_NAME)
98
+ self._temp_card_save_path = self._get_card_write_path(base_pth=TEMP_DIR_NAME)
89
99
 
90
100
  @classmethod
91
- def get_card_location(cls, base_path, card_name, card_html, card_id=None):
92
- chash = sha1(bytes(card_html, "utf-8")).hexdigest()
101
+ def get_card_location(
102
+ cls, base_path, card_name, uuid, card_id=None, suffix=CardNameSuffix.CARD
103
+ ):
104
+ chash = uuid
93
105
  if card_id is None:
94
- card_file_name = "%s-%s.html" % (card_name, chash)
106
+ card_file_name = "%s-%s.%s" % (card_name, chash, suffix)
95
107
  else:
96
- card_file_name = "%s-%s-%s.html" % (card_name, card_id, chash)
108
+ card_file_name = "%s-%s-%s.%s" % (card_name, card_id, chash, suffix)
97
109
  return os.path.join(base_path, card_file_name)
98
110
 
99
- def _make_path(self, base_pth, pathspec=None, with_steps=False):
111
+ def _make_path(
112
+ self, base_pth, pathspec=None, with_steps=False, suffix=CardPathSuffix.CARD
113
+ ):
100
114
  sysroot = base_pth
101
115
  if pathspec is not None:
102
116
  # since most cards are at a task level there will always be 4 non-none values returned
@@ -121,7 +135,7 @@ class CardDatastore(object):
121
135
  step_name,
122
136
  "tasks",
123
137
  task_id,
124
- "cards",
138
+ suffix,
125
139
  ]
126
140
  else:
127
141
  pth_arr = [
@@ -131,20 +145,49 @@ class CardDatastore(object):
131
145
  run_id,
132
146
  "tasks",
133
147
  task_id,
134
- "cards",
148
+ suffix,
135
149
  ]
136
150
  if sysroot == "" or sysroot is None:
137
151
  pth_arr.pop(0)
138
152
  return os.path.join(*pth_arr)
139
153
 
140
- def _get_write_path(self, base_pth=""):
141
- return self._make_path(base_pth, pathspec=self._pathspec, with_steps=True)
154
+ def _get_data_read_path(self, base_pth=""):
155
+ return self._make_path(
156
+ base_pth=base_pth,
157
+ pathspec=self._pathspec,
158
+ with_steps=True,
159
+ suffix=CardPathSuffix.DATA,
160
+ )
161
+
162
+ def _get_data_write_path(self, base_pth=""):
163
+ return self._make_path(
164
+ base_pth=base_pth,
165
+ pathspec=self._pathspec,
166
+ with_steps=True,
167
+ suffix=CardPathSuffix.DATA,
168
+ )
169
+
170
+ def _get_card_write_path(
171
+ self,
172
+ base_pth="",
173
+ ):
174
+ return self._make_path(
175
+ base_pth,
176
+ pathspec=self._pathspec,
177
+ with_steps=True,
178
+ suffix=CardPathSuffix.CARD,
179
+ )
142
180
 
143
- def _get_read_path(self, base_pth="", with_steps=False):
144
- return self._make_path(base_pth, pathspec=self._pathspec, with_steps=with_steps)
181
+ def _get_card_read_path(self, base_pth="", with_steps=False):
182
+ return self._make_path(
183
+ base_pth,
184
+ pathspec=self._pathspec,
185
+ with_steps=with_steps,
186
+ suffix=CardPathSuffix.CARD,
187
+ )
145
188
 
146
189
  @staticmethod
147
- def card_info_from_path(path):
190
+ def info_from_path(path, suffix=CardNameSuffix.CARD):
148
191
  """
149
192
  Args:
150
193
  path (str): The path to the card
@@ -160,8 +203,8 @@ class CardDatastore(object):
160
203
 
161
204
  if len(file_split) not in [2, 3]:
162
205
  raise Exception(
163
- "Invalid card file name %s. Card file names should be of form TYPE-HASH.html or TYPE-ID-HASH.html"
164
- % card_file_name
206
+ "Invalid file name %s. Card/Data file names should be of form TYPE-HASH.%s or TYPE-ID-HASH.%s"
207
+ % (card_file_name, suffix, suffix)
165
208
  )
166
209
  card_type, card_hash, card_id = None, None, None
167
210
 
@@ -170,10 +213,23 @@ class CardDatastore(object):
170
213
  else:
171
214
  card_type, card_id, card_hash = file_split
172
215
 
173
- card_hash = card_hash.split(".html")[0]
216
+ card_hash = card_hash.split("." + suffix)[0]
174
217
  return CardInfo(card_type, card_hash, card_id, card_file_name)
175
218
 
176
- def save_card(self, card_type, card_html, card_id=None, overwrite=True):
219
+ def save_data(self, uuid, card_type, json_data, card_id=None):
220
+ card_file_name = card_type
221
+ loc = self.get_card_location(
222
+ self._get_data_write_path(),
223
+ card_file_name,
224
+ uuid,
225
+ card_id=card_id,
226
+ suffix=CardNameSuffix.DATA,
227
+ )
228
+ self._backend.save_bytes(
229
+ [(loc, BytesIO(json.dumps(json_data).encode("utf-8")))], overwrite=True
230
+ )
231
+
232
+ def save_card(self, uuid, card_type, card_html, card_id=None, overwrite=True):
177
233
  card_file_name = card_type
178
234
  # TEMPORARY_WORKAROUND: FIXME (LATER) : Fix the duplication of below block in a few months.
179
235
  # Check file blame to understand the age of this temporary workaround.
@@ -193,7 +249,11 @@ class CardDatastore(object):
193
249
  # It will also easily end up breaking the metaflow-ui (which maybe using a client from an older version).
194
250
  # Hence, we are writing cards to both paths so that we can introduce breaking changes later in the future.
195
251
  card_path_with_steps = self.get_card_location(
196
- self._get_write_path(), card_file_name, card_html, card_id=card_id
252
+ self._get_card_write_path(),
253
+ card_file_name,
254
+ uuid,
255
+ card_id=card_id,
256
+ suffix=CardNameSuffix.CARD,
197
257
  )
198
258
  if SKIP_CARD_DUALWRITE:
199
259
  self._backend.save_bytes(
@@ -202,28 +262,29 @@ class CardDatastore(object):
202
262
  )
203
263
  else:
204
264
  card_path_without_steps = self.get_card_location(
205
- self._get_read_path(with_steps=False),
265
+ self._get_card_read_path(with_steps=False),
206
266
  card_file_name,
207
- card_html,
267
+ uuid,
208
268
  card_id=card_id,
269
+ suffix=CardNameSuffix.CARD,
209
270
  )
210
271
  for cp in [card_path_with_steps, card_path_without_steps]:
211
272
  self._backend.save_bytes(
212
273
  [(cp, BytesIO(bytes(card_html, "utf-8")))], overwrite=overwrite
213
274
  )
214
275
 
215
- return self.card_info_from_path(card_path_with_steps)
276
+ return self.info_from_path(card_path_with_steps, suffix=CardNameSuffix.CARD)
216
277
 
217
278
  def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
218
279
  # Check for new cards first
219
280
  card_paths = []
220
281
  card_paths_with_steps = self._backend.list_content(
221
- [self._get_read_path(with_steps=True)]
282
+ [self._get_card_read_path(with_steps=True)]
222
283
  )
223
284
 
224
285
  if len(card_paths_with_steps) == 0:
225
286
  card_paths_without_steps = self._backend.list_content(
226
- [self._get_read_path(with_steps=False)]
287
+ [self._get_card_read_path(with_steps=False)]
227
288
  )
228
289
  if len(card_paths_without_steps) == 0:
229
290
  # If there are no files found on the Path then raise an error of
@@ -240,7 +301,7 @@ class CardDatastore(object):
240
301
  cards_found = []
241
302
  for task_card_path in card_paths:
242
303
  card_path = task_card_path.path
243
- card_info = self.card_info_from_path(card_path)
304
+ card_info = self.info_from_path(card_path, suffix=CardNameSuffix.CARD)
244
305
  if card_type is not None and card_info.type != card_type:
245
306
  continue
246
307
  elif card_hash is not None:
@@ -254,11 +315,32 @@ class CardDatastore(object):
254
315
 
255
316
  return cards_found
256
317
 
318
+ def _list_card_data(self, card_type=None, card_hash=None, card_id=None):
319
+ card_data_paths = self._backend.list_content([self._get_data_read_path()])
320
+ data_found = []
321
+
322
+ for data_path in card_data_paths:
323
+ _pth = data_path.path
324
+ card_info = self.info_from_path(_pth, suffix=CardNameSuffix.DATA)
325
+ if card_type is not None and card_info.type != card_type:
326
+ continue
327
+ elif card_hash is not None:
328
+ if not card_info.hash.startswith(card_hash):
329
+ continue
330
+ elif card_id is not None and card_info.id != card_id:
331
+ continue
332
+ if data_path.is_file:
333
+ data_found.append(_pth)
334
+
335
+ return data_found
336
+
257
337
  def create_full_path(self, card_path):
258
338
  return os.path.join(self._backend.datastore_root, card_path)
259
339
 
260
340
  def get_card_names(self, card_paths):
261
- return [self.card_info_from_path(path) for path in card_paths]
341
+ return [
342
+ self.info_from_path(path, suffix=CardNameSuffix.CARD) for path in card_paths
343
+ ]
262
344
 
263
345
  def get_card_html(self, path):
264
346
  with self._backend.load_bytes([path]) as get_results:
@@ -267,6 +349,13 @@ class CardDatastore(object):
267
349
  with open(path, "r") as f:
268
350
  return f.read()
269
351
 
352
+ def get_card_data(self, path):
353
+ with self._backend.load_bytes([path]) as get_results:
354
+ for _, path, _ in get_results:
355
+ if path is not None:
356
+ with open(path, "r") as f:
357
+ return json.loads(f.read())
358
+
270
359
  def cache_locally(self, path, save_path=None):
271
360
  """
272
361
  Saves the data present in the `path` the `metaflow_card_cache` directory or to the `save_path`.
@@ -292,6 +381,15 @@ class CardDatastore(object):
292
381
  shutil.copy(path, main_path)
293
382
  return main_path
294
383
 
384
+ def extract_data_paths(self, card_type=None, card_hash=None, card_id=None):
385
+ return self._list_card_data(
386
+ # card_hash is the unique identifier to the card.
387
+ # Its no longer the actual hash!
388
+ card_type=card_type,
389
+ card_hash=card_hash,
390
+ card_id=card_id,
391
+ )
392
+
295
393
  def extract_card_paths(self, card_type=None, card_hash=None, card_id=None):
296
394
  return self._list_card_paths(
297
395
  card_type=card_type, card_hash=card_hash, card_id=card_id