ob-metaflow 2.10.7.4__py2.py3-none-any.whl → 2.10.9.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (52) hide show
  1. metaflow/cards.py +2 -0
  2. metaflow/decorators.py +1 -1
  3. metaflow/metaflow_config.py +2 -0
  4. metaflow/plugins/__init__.py +4 -0
  5. metaflow/plugins/airflow/airflow_cli.py +1 -1
  6. metaflow/plugins/argo/argo_workflows_cli.py +1 -1
  7. metaflow/plugins/aws/aws_utils.py +1 -1
  8. metaflow/plugins/aws/batch/batch.py +4 -0
  9. metaflow/plugins/aws/batch/batch_cli.py +3 -0
  10. metaflow/plugins/aws/batch/batch_client.py +40 -11
  11. metaflow/plugins/aws/batch/batch_decorator.py +1 -0
  12. metaflow/plugins/aws/step_functions/step_functions.py +1 -0
  13. metaflow/plugins/aws/step_functions/step_functions_cli.py +1 -1
  14. metaflow/plugins/azure/azure_exceptions.py +1 -1
  15. metaflow/plugins/cards/card_cli.py +413 -28
  16. metaflow/plugins/cards/card_client.py +16 -7
  17. metaflow/plugins/cards/card_creator.py +228 -0
  18. metaflow/plugins/cards/card_datastore.py +124 -26
  19. metaflow/plugins/cards/card_decorator.py +40 -86
  20. metaflow/plugins/cards/card_modules/base.html +12 -0
  21. metaflow/plugins/cards/card_modules/basic.py +74 -8
  22. metaflow/plugins/cards/card_modules/bundle.css +1 -170
  23. metaflow/plugins/cards/card_modules/card.py +65 -0
  24. metaflow/plugins/cards/card_modules/components.py +446 -81
  25. metaflow/plugins/cards/card_modules/convert_to_native_type.py +9 -3
  26. metaflow/plugins/cards/card_modules/main.js +250 -21
  27. metaflow/plugins/cards/card_modules/test_cards.py +117 -0
  28. metaflow/plugins/cards/card_resolver.py +0 -2
  29. metaflow/plugins/cards/card_server.py +361 -0
  30. metaflow/plugins/cards/component_serializer.py +506 -42
  31. metaflow/plugins/cards/exception.py +20 -1
  32. metaflow/plugins/datastores/azure_storage.py +1 -2
  33. metaflow/plugins/datastores/gs_storage.py +1 -2
  34. metaflow/plugins/datastores/s3_storage.py +2 -1
  35. metaflow/plugins/datatools/s3/s3.py +24 -11
  36. metaflow/plugins/env_escape/client.py +2 -12
  37. metaflow/plugins/env_escape/client_modules.py +18 -14
  38. metaflow/plugins/env_escape/server.py +18 -11
  39. metaflow/plugins/env_escape/utils.py +12 -0
  40. metaflow/plugins/gcp/gs_exceptions.py +1 -1
  41. metaflow/plugins/gcp/gs_utils.py +1 -1
  42. metaflow/plugins/pypi/conda_environment.py +5 -6
  43. metaflow/plugins/pypi/pip.py +2 -2
  44. metaflow/plugins/pypi/utils.py +15 -0
  45. metaflow/task.py +1 -0
  46. metaflow/version.py +1 -1
  47. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/METADATA +1 -1
  48. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/RECORD +52 -50
  49. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/LICENSE +0 -0
  50. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/WHEEL +0 -0
  51. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/entry_points.txt +0 -0
  52. {ob_metaflow-2.10.7.4.dist-info → ob_metaflow-2.10.9.1.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,20 @@
1
+ import json
1
2
  from .card import MetaflowCard, MetaflowCardComponent
3
+ from .renderer_tools import render_safely
2
4
 
3
5
 
4
6
  class TestStringComponent(MetaflowCardComponent):
7
+ REALTIME_UPDATABLE = True
8
+
5
9
  def __init__(self, text):
6
10
  self._text = text
7
11
 
8
12
  def render(self):
9
13
  return str(self._text)
10
14
 
15
+ def update(self, text):
16
+ self._text = text
17
+
11
18
 
12
19
  class TestPathSpecCard(MetaflowCard):
13
20
  type = "test_pathspec_card"
@@ -98,3 +105,113 @@ class TestTimeoutCard(MetaflowCard):
98
105
 
99
106
  time.sleep(self._timeout)
100
107
  return "%s" % task.pathspec
108
+
109
+
110
+ REFRESHABLE_HTML_TEMPLATE = """
111
+ <html>
112
+ <script>
113
+ var METAFLOW_RELOAD_TOKEN = "[METAFLOW_RELOAD_TOKEN]"
114
+
115
+ window.metaflow_card_update = function(data) {
116
+ document.querySelector("h1").innerHTML = JSON.stringify(data);
117
+ }
118
+ </script>
119
+ <h1>[PATHSPEC]</h1>
120
+ <h1>[REPLACE_CONTENT_HERE]</h1>
121
+ </html>
122
+ """
123
+
124
+
125
+ class TestJSONComponent(MetaflowCardComponent):
126
+
127
+ REALTIME_UPDATABLE = True
128
+
129
+ def __init__(self, data):
130
+ self._data = data
131
+
132
+ @render_safely
133
+ def render(self):
134
+ return self._data
135
+
136
+ def update(self, data):
137
+ self._data = data
138
+
139
+
140
+ class TestRefreshCard(MetaflowCard):
141
+
142
+ """
143
+ This card takes no components and helps test the `current.card.refresh(data)` interface.
144
+ """
145
+
146
+ HTML_TEMPLATE = REFRESHABLE_HTML_TEMPLATE
147
+
148
+ RUNTIME_UPDATABLE = True
149
+
150
+ ALLOW_USER_COMPONENTS = True
151
+
152
+ # Not implementing Reload Policy here since the reload Policy is set to always
153
+ RELOAD_POLICY = MetaflowCard.RELOAD_POLICY_ALWAYS
154
+
155
+ type = "test_refresh_card"
156
+
157
+ def render(self, task) -> str:
158
+ return self._render_func(task, self.runtime_data)
159
+
160
+ def _render_func(self, task, data):
161
+ return self.HTML_TEMPLATE.replace(
162
+ "[REPLACE_CONTENT_HERE]", json.dumps(data["user"])
163
+ ).replace("[PATHSPEC]", task.pathspec)
164
+
165
+ def render_runtime(self, task, data):
166
+ return self._render_func(task, data)
167
+
168
+ def refresh(self, task, data):
169
+ return data
170
+
171
+
172
+ import hashlib
173
+
174
+
175
+ def _component_values_to_hash(components):
176
+ comma_str = ",".join(["".join(x) for v in components.values() for x in v])
177
+ return hashlib.sha256(comma_str.encode("utf-8")).hexdigest()
178
+
179
+
180
+ class TestRefreshComponentCard(MetaflowCard):
181
+
182
+ """
183
+ This card takes components and helps test the `current.card.components["A"].update()`
184
+ interface
185
+ """
186
+
187
+ HTML_TEMPLATE = REFRESHABLE_HTML_TEMPLATE
188
+
189
+ RUNTIME_UPDATABLE = True
190
+
191
+ ALLOW_USER_COMPONENTS = True
192
+
193
+ # Not implementing Reload Policy here since the reload Policy is set to always
194
+ RELOAD_POLICY = MetaflowCard.RELOAD_POLICY_ONCHANGE
195
+
196
+ type = "test_component_refresh_card"
197
+
198
+ def __init__(self, options={}, components=[], graph=None):
199
+ self._components = components
200
+
201
+ def render(self, task) -> str:
202
+ # Calling `render`/`render_runtime` wont require the `data` object
203
+ return self.HTML_TEMPLATE.replace(
204
+ "[REPLACE_CONTENT_HERE]", json.dumps(self._components)
205
+ ).replace("[PATHSPEC]", task.pathspec)
206
+
207
+ def render_runtime(self, task, data):
208
+ return self.render(task)
209
+
210
+ def refresh(self, task, data):
211
+ # Govers the information passed in the data update
212
+ return data["components"]
213
+
214
+ def reload_content_token(self, task, data):
215
+ if task.finished:
216
+ return "final"
217
+ return "runtime-%s" % _component_values_to_hash(data["components"])
@@ -1,5 +1,3 @@
1
- from collections import namedtuple
2
-
3
1
  from .card_datastore import CardDatastore
4
2
 
5
3
 
@@ -0,0 +1,361 @@
1
+ import os
2
+ import json
3
+ from http.server import BaseHTTPRequestHandler
4
+ from threading import Thread
5
+ from multiprocessing import Pipe
6
+ from multiprocessing.connection import Connection
7
+ from urllib.parse import urlparse
8
+ import time
9
+
10
+ try:
11
+ from http.server import ThreadingHTTPServer
12
+ except ImportError:
13
+ from socketserver import ThreadingMixIn
14
+ from http.server import HTTPServer
15
+
16
+ class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
17
+ daemon_threads = True
18
+
19
+
20
+ from .card_client import CardContainer
21
+ from .exception import CardNotPresentException
22
+ from .card_resolver import resolve_paths_from_task
23
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
24
+ from metaflow import namespace
25
+ from metaflow.exception import (
26
+ CommandException,
27
+ MetaflowNotFound,
28
+ MetaflowNamespaceMismatch,
29
+ )
30
+
31
+
32
+ VIEWER_PATH = os.path.join(
33
+ os.path.dirname(os.path.abspath(__file__)), "card_viewer", "viewer.html"
34
+ )
35
+
36
+ CARD_VIEWER_HTML = open(VIEWER_PATH).read()
37
+
38
+ TASK_CACHE = {}
39
+
40
+ _ClickLogger = None
41
+
42
+
43
+ class RunWatcher(Thread):
44
+ """
45
+ A thread that watches for new runs and sends the run_id to the
46
+ card server when a new run is detected. It observes the `latest_run`
47
+ file in the `.metaflow/<flowname>` directory.
48
+ """
49
+
50
+ def __init__(self, flow_name, connection: Connection):
51
+ super().__init__()
52
+
53
+ self._watch_file = os.path.join(
54
+ os.getcwd(), DATASTORE_LOCAL_DIR, flow_name, "latest_run"
55
+ )
56
+ self._current_run_id = self.get_run_id()
57
+ self.daemon = True
58
+ self._connection = connection
59
+
60
+ def get_run_id(self):
61
+ if not os.path.exists(self._watch_file):
62
+ return None
63
+ with open(self._watch_file, "r") as f:
64
+ return f.read().strip()
65
+
66
+ def watch(self):
67
+ while True:
68
+ run_id = self.get_run_id()
69
+ if run_id != self._current_run_id:
70
+ self._current_run_id = run_id
71
+ self._connection.send(run_id)
72
+ time.sleep(2)
73
+
74
+ def run(self):
75
+ self.watch()
76
+
77
+
78
+ class CardServerOptions:
79
+ def __init__(
80
+ self,
81
+ flow_name,
82
+ run_object,
83
+ only_running,
84
+ follow_resumed,
85
+ flow_datastore,
86
+ follow_new_runs,
87
+ max_cards=20,
88
+ poll_interval=5,
89
+ ):
90
+ from metaflow import Run
91
+
92
+ self.RunClass = Run
93
+ self.run_object = run_object
94
+
95
+ self.flow_name = flow_name
96
+ self.only_running = only_running
97
+ self.follow_resumed = follow_resumed
98
+ self.flow_datastore = flow_datastore
99
+ self.max_cards = max_cards
100
+ self.follow_new_runs = follow_new_runs
101
+ self.poll_interval = poll_interval
102
+
103
+ self._parent_conn, self._child_conn = Pipe()
104
+
105
+ def refresh_run(self):
106
+ if not self.follow_new_runs:
107
+ return False
108
+ if not self.parent_conn.poll():
109
+ return False
110
+ run_id = self.parent_conn.recv()
111
+ if run_id is None:
112
+ return False
113
+ namespace(None)
114
+ try:
115
+ self.run_object = self.RunClass(f"{self.flow_name}/{run_id}")
116
+ return True
117
+ except MetaflowNotFound:
118
+ return False
119
+
120
+ @property
121
+ def parent_conn(self):
122
+ return self._parent_conn
123
+
124
+ @property
125
+ def child_conn(self):
126
+ return self._child_conn
127
+
128
+
129
+ def cards_for_task(
130
+ flow_datastore, task_pathspec, card_type=None, card_hash=None, card_id=None
131
+ ):
132
+ try:
133
+ paths, card_ds = resolve_paths_from_task(
134
+ flow_datastore,
135
+ task_pathspec,
136
+ type=card_type,
137
+ hash=card_hash,
138
+ card_id=card_id,
139
+ )
140
+ except CardNotPresentException:
141
+ return None
142
+ for card in CardContainer(paths, card_ds, origin_pathspec=None):
143
+ yield card
144
+
145
+
146
+ def cards_for_run(
147
+ flow_datastore,
148
+ run_object,
149
+ only_running,
150
+ card_type=None,
151
+ card_hash=None,
152
+ card_id=None,
153
+ max_cards=20,
154
+ ):
155
+ curr_idx = 0
156
+ for step in run_object.steps():
157
+ for task in step.tasks():
158
+ if only_running and task.finished:
159
+ continue
160
+ card_generator = cards_for_task(
161
+ flow_datastore,
162
+ task.pathspec,
163
+ card_type=card_type,
164
+ card_hash=card_hash,
165
+ card_id=card_id,
166
+ )
167
+ if card_generator is None:
168
+ continue
169
+ for card in card_generator:
170
+ curr_idx += 1
171
+ if curr_idx >= max_cards:
172
+ raise StopIteration
173
+ yield task.pathspec, card
174
+
175
+
176
+ class CardViewerRoutes(BaseHTTPRequestHandler):
177
+
178
+ card_options: CardServerOptions = None
179
+
180
+ run_watcher: RunWatcher = None
181
+
182
+ def do_GET(self):
183
+ try:
184
+ _, path = self.path.split("/", 1)
185
+ try:
186
+ prefix, suffix = path.split("/", 1)
187
+ except:
188
+ prefix = path
189
+ suffix = None
190
+ except:
191
+ prefix = None
192
+ if prefix in self.ROUTES:
193
+ self.ROUTES[prefix](self, suffix)
194
+ else:
195
+ self._response(open(VIEWER_PATH).read().encode("utf-8"))
196
+
197
+ def get_runinfo(self, suffix):
198
+ run_id_changed = self.card_options.refresh_run()
199
+ if run_id_changed:
200
+ self.log_message(
201
+ "RunID changed in the background to %s"
202
+ % self.card_options.run_object.pathspec
203
+ )
204
+ _ClickLogger(
205
+ "RunID changed in the background to %s"
206
+ % self.card_options.run_object.pathspec,
207
+ fg="blue",
208
+ )
209
+
210
+ if self.card_options.run_object is None:
211
+ self._response(
212
+ {"status": "No Run Found", "flow": self.card_options.flow_name},
213
+ code=404,
214
+ is_json=True,
215
+ )
216
+ return
217
+
218
+ task_card_generator = cards_for_run(
219
+ self.card_options.flow_datastore,
220
+ self.card_options.run_object,
221
+ self.card_options.only_running,
222
+ max_cards=self.card_options.max_cards,
223
+ )
224
+ flow_name = self.card_options.run_object.parent.id
225
+ run_id = self.card_options.run_object.id
226
+ cards = []
227
+ for pathspec, card in task_card_generator:
228
+ step, task = pathspec.split("/")[-2:]
229
+ _task = self.card_options.run_object[step][task]
230
+ task_finished = True if _task.finished else False
231
+ cards.append(
232
+ dict(
233
+ task=pathspec,
234
+ label="%s/%s %s" % (step, task, card.hash),
235
+ card_object=dict(
236
+ hash=card.hash,
237
+ type=card.type,
238
+ path=card.path,
239
+ id=card.id,
240
+ ),
241
+ finished=task_finished,
242
+ card="%s/%s" % (pathspec, card.hash),
243
+ )
244
+ )
245
+ resp = {
246
+ "status": "ok",
247
+ "flow": flow_name,
248
+ "run_id": run_id,
249
+ "cards": cards,
250
+ "poll_interval": self.card_options.poll_interval,
251
+ }
252
+ self._response(resp, is_json=True)
253
+
254
+ def get_card(self, suffix):
255
+ _suffix = urlparse(self.path).path
256
+ _, flow, run_id, step, task_id, card_hash = _suffix.strip("/").split("/")
257
+
258
+ pathspec = "/".join([flow, run_id, step, task_id])
259
+ cards = list(
260
+ cards_for_task(
261
+ self.card_options.flow_datastore, pathspec, card_hash=card_hash
262
+ )
263
+ )
264
+ if len(cards) == 0:
265
+ self._response({"status": "Card Not Found"}, code=404)
266
+ return
267
+ selected_card = cards[0]
268
+ self._response(selected_card.get().encode("utf-8"))
269
+
270
+ def get_data(self, suffix):
271
+ _suffix = urlparse(self.path).path
272
+ _, flow, run_id, step, task_id, card_hash = _suffix.strip("/").split("/")
273
+ pathspec = "/".join([flow, run_id, step, task_id])
274
+ cards = list(
275
+ cards_for_task(
276
+ self.card_options.flow_datastore, pathspec, card_hash=card_hash
277
+ )
278
+ )
279
+ if len(cards) == 0:
280
+ self._response(
281
+ {
282
+ "status": "Card Not Found",
283
+ },
284
+ is_json=True,
285
+ code=404,
286
+ )
287
+ return
288
+
289
+ status = "ok"
290
+ try:
291
+ task_object = self.card_options.run_object[step][task_id]
292
+ except KeyError:
293
+ return self._response(
294
+ {"status": "Task Not Found", "is_complete": False},
295
+ is_json=True,
296
+ code=404,
297
+ )
298
+
299
+ is_complete = task_object.finished
300
+ selected_card = cards[0]
301
+ card_data = selected_card.get_data()
302
+ if card_data is not None:
303
+ self.log_message(
304
+ "Task Success: %s, Task Finished: %s"
305
+ % (task_object.successful, is_complete)
306
+ )
307
+ if not task_object.successful and is_complete:
308
+ status = "Task Failed"
309
+ self._response(
310
+ {"status": status, "payload": card_data, "is_complete": is_complete},
311
+ is_json=True,
312
+ )
313
+ else:
314
+ self._response(
315
+ {"status": "ok", "is_complete": is_complete},
316
+ is_json=True,
317
+ code=404,
318
+ )
319
+
320
+ def _response(self, body, is_json=False, code=200):
321
+ self.send_response(code)
322
+ mime = "application/json" if is_json else "text/html"
323
+ self.send_header("Content-type", mime)
324
+ self.end_headers()
325
+ if is_json:
326
+ self.wfile.write(json.dumps(body).encode("utf-8"))
327
+ else:
328
+ self.wfile.write(body)
329
+
330
+ ROUTES = {"runinfo": get_runinfo, "card": get_card, "data": get_data}
331
+
332
+
333
+ def _is_debug_mode():
334
+ debug_flag = os.environ.get("METAFLOW_DEBUG_CARD_SERVER")
335
+ if debug_flag is None:
336
+ return False
337
+ return debug_flag.lower() in ["true", "1"]
338
+
339
+
340
+ def create_card_server(card_options: CardServerOptions, port, ctx_obj):
341
+ CardViewerRoutes.card_options = card_options
342
+ global _ClickLogger
343
+ _ClickLogger = ctx_obj.echo
344
+ if card_options.follow_new_runs:
345
+ CardViewerRoutes.run_watcher = RunWatcher(
346
+ card_options.flow_name, card_options.child_conn
347
+ )
348
+ CardViewerRoutes.run_watcher.start()
349
+ server_addr = ("", port)
350
+ ctx_obj.echo(
351
+ "Starting card server on port %d " % (port),
352
+ fg="green",
353
+ bold=True,
354
+ )
355
+ # Disable logging if not in debug mode
356
+ if not _is_debug_mode():
357
+ CardViewerRoutes.log_request = lambda *args, **kwargs: None
358
+ CardViewerRoutes.log_message = lambda *args, **kwargs: None
359
+
360
+ server = ThreadingHTTPServer(server_addr, CardViewerRoutes)
361
+ server.serve_forever()