PyPI - metaflow - Versions diffs - 2.8.1__py2.py3-none-any.whl → 2.8.3__py2.py3-none-any.whl - Mend

metaflow 2.8.1py2.py3-none-any.whl → 2.8.3py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

metaflow/client/core.py +14 -4
metaflow/cmd/configure_cmd.py +3 -3
metaflow/cmd/main_cli.py +9 -14
metaflow/current.py +15 -0
metaflow/datastore/datastore_set.py +7 -7
metaflow/datastore/flow_datastore.py +1 -2
metaflow/extension_support/__init__.py +1 -0
metaflow/extension_support/integrations.py +141 -0
metaflow/integrations.py +29 -0
metaflow/metaflow_config.py +21 -0
metaflow/metaflow_environment.py +5 -4
metaflow/package.py +1 -1
metaflow/plugins/airflow/airflow.py +0 -1
metaflow/plugins/argo/argo_workflows.py +2 -0
metaflow/plugins/argo/argo_workflows_cli.py +11 -1
metaflow/plugins/aws/aws_utils.py +6 -1
metaflow/plugins/aws/batch/batch.py +30 -8
metaflow/plugins/aws/batch/batch_cli.py +12 -0
metaflow/plugins/aws/batch/batch_client.py +39 -2
metaflow/plugins/aws/batch/batch_decorator.py +23 -0
metaflow/plugins/aws/step_functions/step_functions.py +7 -4
metaflow/plugins/aws/step_functions/step_functions_cli.py +11 -1
metaflow/plugins/cards/card_modules/bundle.css +56 -56
metaflow/plugins/cards/card_modules/convert_to_native_type.py +67 -5
metaflow/plugins/cards/card_modules/main.js +14 -7
metaflow/plugins/conda/conda_environment.py +2 -2
metaflow/plugins/conda/conda_step_decorator.py +7 -1
metaflow/plugins/datatools/s3/s3.py +2 -2
metaflow/plugins/env_escape/communication/channel.py +1 -1
metaflow/plugins/kubernetes/kubernetes.py +4 -0
metaflow/plugins/kubernetes/kubernetes_decorator.py +6 -2
metaflow/plugins/kubernetes/kubernetes_job.py +17 -2
metaflow/plugins/metadata/service.py +3 -2
metaflow/runtime.py +5 -3
metaflow/tutorials/02-statistics/README.md +4 -9
metaflow/tutorials/02-statistics/stats.py +38 -11
metaflow/tutorials/03-playlist-redux/playlist.py +24 -16
metaflow/tutorials/04-playlist-plus/playlist.py +14 -23
metaflow/tutorials/05-hello-cloud/README.md +45 -0
metaflow/tutorials/{05-helloaws/helloaws.ipynb → 05-hello-cloud/hello-cloud.ipynb} +10 -5
metaflow/tutorials/{05-helloaws/helloaws.py → 05-hello-cloud/hello-cloud.py} +11 -13
metaflow/tutorials/06-statistics-redux/README.md +6 -29
metaflow/tutorials/06-statistics-redux/stats.ipynb +2 -2
metaflow/tutorials/07-worldview/README.md +3 -11
metaflow/tutorials/07-worldview/worldview.ipynb +3 -3
metaflow/tutorials/08-autopilot/README.md +10 -17
metaflow/tutorials/08-autopilot/autopilot.ipynb +12 -7
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/METADATA +1 -6
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/RECORD +53 -51
metaflow/tutorials/05-helloaws/README.md +0 -27
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/LICENSE +0 -0
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/WHEEL +0 -0
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/entry_points.txt +0 -0
{metaflow-2.8.1.dist-info → metaflow-2.8.3.dist-info}/top_level.txt +0 -0

metaflow/plugins/conda/conda_environment.py CHANGED Viewed

@@ -132,5 +132,5 @@ class CondaEnvironment(MetaflowEnvironment):
     def get_package_commands(self, code_package_url, datastore_type):
         return self.base_env.get_package_commands(code_package_url, datastore_type)
-    def get_environment_info(self):
-        return self.base_env.get_environment_info()
+    def get_environment_info(self, include_ext_info=False):
+        return self.base_env.get_environment_info(include_ext_info)

metaflow/plugins/conda/conda_step_decorator.py CHANGED Viewed

@@ -285,7 +285,13 @@ class CondaStepDecorator(StepDecorator):
                 mode="wt",
                 encoding="utf-8",
             ) as f:
-                f.write(json.dumps(self._cur_environment.get_environment_info()))
+                f.write(
+                    json.dumps(
+                        self._cur_environment.get_environment_info(
+                            include_ext_info=True
+                        )
+                    )
+                )
         # Do the same for EXT_PKG
         try:

metaflow/plugins/datatools/s3/s3.py CHANGED Viewed

@@ -17,6 +17,7 @@ from metaflow.metaflow_config import (
     DATATOOLS_S3ROOT,
     S3_RETRY_COUNT,
     S3_TRANSIENT_RETRY_COUNT,
+    TEMPDIR,
 )
 from metaflow.util import (
     namedtuple_with_defaults,
@@ -142,7 +143,6 @@ class S3Object(object):
         range_info: Optional[RangeInfo] = None,
         last_modified: int = None,
     ):
         # all fields of S3Object should return a unicode object
         prefix, url, path = map(ensure_unicode, (prefix, url, path))
@@ -481,7 +481,7 @@ class S3(object):
     def __init__(
         self,
-        tmproot: str = ".",
+        tmproot: str = TEMPDIR,
         bucket: Optional[str] = None,
         prefix: Optional[str] = None,
         run: Optional[Union[FlowSpec, "Run"]] = None,

metaflow/plugins/env_escape/communication/channel.py CHANGED Viewed

@@ -36,7 +36,7 @@ class Channel(object):
             sz_bytes = self._stream.read(self._fmt.size, timeout)
             msg_sz = self._fmt.unpack(sz_bytes)[0]
             obj_bytes = self._stream.read(msg_sz, timeout)
-            return json.loads(obj_bytes, encoding="utf-8")
+            return json.loads(obj_bytes)
         except EOFError as e:
             raise RuntimeError("Cannot receive object over streaming interface: %s" % e)
         except BaseException as e:

metaflow/plugins/kubernetes/kubernetes.py CHANGED Viewed

@@ -15,6 +15,7 @@ from metaflow.metaflow_config import (
     DEFAULT_AWS_CLIENT_PROVIDER,
     DEFAULT_METADATA,
     KUBERNETES_SANDBOX_INIT_SCRIPT,
+    KUBERNETES_FETCH_EC2_METADATA,
     S3_ENDPOINT_URL,
     AZURE_STORAGE_BLOB_SERVICE_ENDPOINT,
     DATASTORE_SYSROOT_AZURE,
@@ -199,6 +200,9 @@ class Kubernetes(object):
             .environment_variable("METAFLOW_DEFAULT_DATASTORE", self._datastore.TYPE)
             .environment_variable("METAFLOW_DEFAULT_METADATA", DEFAULT_METADATA)
             .environment_variable("METAFLOW_KUBERNETES_WORKLOAD", 1)
+            .environment_variable(
+                "METAFLOW_KUBERNETES_FETCH_EC2_METADATA", KUBERNETES_FETCH_EC2_METADATA
+            )
             .environment_variable("METAFLOW_RUNTIME_ENVIRONMENT", "kubernetes")
             .environment_variable(
                 "METAFLOW_DEFAULT_SECRETS_BACKEND_TYPE", DEFAULT_SECRETS_BACKEND_TYPE

metaflow/plugins/kubernetes/kubernetes_decorator.py CHANGED Viewed

@@ -17,6 +17,7 @@ from metaflow.metaflow_config import (
     KUBERNETES_TOLERATIONS,
     KUBERNETES_SERVICE_ACCOUNT,
     KUBERNETES_SECRETS,
+    KUBERNETES_FETCH_EC2_METADATA,
 )
 from metaflow.plugins.resources_decorator import ResourcesDecorator
 from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
@@ -327,8 +328,11 @@ class KubernetesDecorator(StepDecorator):
             # TODO (savin): Introduce equivalent support for Microsoft Azure and
             #               Google Cloud Platform
-            instance_meta = get_ec2_instance_metadata()
-            meta.update(instance_meta)
+            # TODO: Introduce a way to detect Cloud Provider, so unnecessary requests (and delays)
+            # can be avoided by not having to try out all providers.
+            if KUBERNETES_FETCH_EC2_METADATA:
+                instance_meta = get_ec2_instance_metadata()
+                meta.update(instance_meta)
             # Unfortunately, there doesn't seem to be any straight forward way right
             # now to attach the Batch/v1 name - While we can rely on a hacky approach

metaflow/plugins/kubernetes/kubernetes_job.py CHANGED Viewed

@@ -413,8 +413,23 @@ class RunningJob(object):
                 except:
                     # Best effort. It's likely that this API call could be
                     # blocked for the user.
-                    pass
-                    # raise
+                    # --------------------------------------------------------
+                    # We try patching Job parallelism anyway. Stopping any runaway
+                    # jobs (and their pods) is secondary to correctly showing
+                    # "Killed" status on the Kubernetes pod.
+                    #
+                    # This has the effect of pausing the job.
+                    try:
+                        client.BatchV1Api().patch_namespaced_job(
+                            name=self._name,
+                            namespace=self._namespace,
+                            field_manager="metaflow",
+                            body={"spec": {"parallelism": 0}},
+                        )
+                    except:
+                        # Best effort.
+                        pass
+                        # raise
             else:
                 # Case 2.
                 # This has the effect of pausing the job.

metaflow/plugins/metadata/service.py CHANGED Viewed

@@ -137,9 +137,10 @@ class ServiceMetadataProvider(MetadataProvider):
             payload[HB_URL_KEY] = self.url_run_template.format(**data)
         else:
             raise Exception("invalid heartbeat type")
-        payload["service_version"] = self.version()
+        service_version = self.version()
+        payload["service_version"] = service_version
         # start sidecar
-        if self.version() is None or LooseVersion(self.version()) < LooseVersion(
+        if service_version is None or LooseVersion(service_version) < LooseVersion(
             "2.0.4"
         ):
             # if old version of the service is running

metaflow/runtime.py CHANGED Viewed

@@ -198,9 +198,11 @@ class NativeRuntime(object):
         self._is_cloned[task.path] = task.is_cloned
     def execute(self):
-        run_url = None
-        if UI_URL:
-            run_url = "%s/%s/%s" % (UI_URL.rstrip("/"), self._flow.name, self._run_id)
+        run_url = (
+            "%s/%s/%s" % (UI_URL.rstrip("/"), self._flow.name, self._run_id)
+            if UI_URL
+            else None
+        )
         if run_url:
             self._logger(

metaflow/tutorials/02-statistics/README.md CHANGED Viewed

@@ -1,26 +1,21 @@
 # Episode 02-statistics: Is this Data Science?
-**Use metaflow to load the movie metadata CSV file into a Pandas Dataframe and
+**Use metaflow to load the movie metadata CSV file into a dataframe and
 compute some movie genre-specific statistics. These statistics are then used in
 later examples to improve our playlist generator. You can optionally use the
 Metaflow client to eyeball the results in a Notebook, and make some simple
 plots using the Matplotlib library.**
-Please note that Episode 04, a follow-on to this episode, requires Pandas version 1.3.3.
-Please make sure that you install or upgrade/downgrade to Pandas 1.3.3.
 #### Showcasing:
 - Fan-out over a set of parameters using Metaflow foreach.
-- Using external packages like Pandas.
 - Plotting results in a Notebook.
 #### Before playing this episode:
-1. ```python -m pip install pandas==1.3.3```
-2. ```python -m pip install notebook```
-3. ```python -m pip install matplotlib```
+1. ```python -m pip install notebook```
+2. ```python -m pip install matplotlib```
 #### To play this episode:
 1. ```cd metaflow-tutorials```
 2. ```python 02-statistics/stats.py show```
 3. ```python 02-statistics/stats.py run```
-4. ```jupyter-notebook 02-statistics/stats.ipynb```
+4. ```jupyter-notebook 02-statistics/stats.ipynb```

metaflow/tutorials/02-statistics/stats.py CHANGED Viewed

@@ -19,7 +19,7 @@ class MovieStatsFlow(FlowSpec):
     A flow to generate some statistics about the movie genres.
     The flow performs the following steps:
-    1) Ingests a CSV into a Pandas Dataframe.
+    1) Ingests a CSV into a dataframe.
     2) Fan-out over genre using Metaflow foreach.
     3) Compute quartiles for each genre.
     4) Save a dictionary of genre-specific statistics.
@@ -36,16 +36,29 @@ class MovieStatsFlow(FlowSpec):
     def start(self):
         """
         The start step:
-        1) Loads the movie metadata into pandas dataframe.
+        1) Loads the movie metadata into dataframe.
         2) Finds all the unique genres.
         3) Launches parallel statistics computation for each genre.
         """
-        import pandas
+        import csv
         from io import StringIO
-        # Load the data set into a pandas dataframe.
-        self.dataframe = pandas.read_csv(StringIO(self.movie_data))
+        # Load the data set into a dataframe structure.
+        self.dataframe = {
+            "movie_title": [],
+            "title_year": [],
+            "genres": [],
+            "gross": [],
+        }
+        for row in csv.reader(StringIO(self.movie_data), delimiter=","):
+            if row[0] == "movie_title":
+                continue
+            self.dataframe["movie_title"].append(row[0])
+            self.dataframe["title_year"].append(int(row[1]))
+            self.dataframe["genres"].append(row[2])
+            self.dataframe["gross"].append(int(row[3]))
         # The column 'genres' has a list of genres for each movie. Let's get
         # all the unique genres.
@@ -63,8 +76,8 @@ class MovieStatsFlow(FlowSpec):
     def compute_statistics(self):
         """
         Compute statistics for a single genre.
         """
         # The genre currently being processed is a class property called
         # 'input'.
         self.genre = self.input
@@ -72,13 +85,27 @@ class MovieStatsFlow(FlowSpec):
         # Find all the movies that have this genre and build a dataframe with
         # just those movies and just the columns of interest.
-        selector = self.dataframe["genres"].apply(lambda row: self.genre in row)
-        self.dataframe = self.dataframe[selector]
-        self.dataframe = self.dataframe[["movie_title", "genres", "gross"]]
+        selector = [self.genre in row for row in self.dataframe["genres"]]
+        for col in self.dataframe.keys():
+            self.dataframe[col] = [
+                col for col, is_genre in zip(self.dataframe[col], selector) if is_genre
+            ]
+        # Sort by gross box office and drop unused column.
+        argsort_indices = sorted(
+            range(len(self.dataframe["gross"])), key=self.dataframe["gross"].__getitem__
+        )
+        for col in self.dataframe.keys():
+            self.dataframe[col] = [self.dataframe[col][idx] for idx in argsort_indices]
+        del self.dataframe["title_year"]
         # Get some statistics on the gross box office for these titles.
-        points = [0.25, 0.5, 0.75]
-        self.quartiles = self.dataframe["gross"].quantile(points).values
+        n_points = len(self.dataframe["movie_title"])
+        self.quartiles = []
+        for cut in [0.25, 0.5, 0.75]:
+            idx = 0 if n_points < 2 else round(n_points * cut)
+            self.quartiles.append(self.dataframe["gross"][idx])
         # Join the results from other genres.
         self.next(self.join)

metaflow/tutorials/03-playlist-redux/playlist.py CHANGED Viewed

@@ -53,19 +53,25 @@ class PlayListFlow(FlowSpec):
         This step chooses a random title for a different movie genre.
         """
-        import pandas
-        # Concatenate all the genre-specific data frames and choose a random
-        # movie.
-        df = pandas.concat(
-            [
-                data["dataframe"]
-                for genre, data in self.genre_stats.items()
-                if genre != self.genre.lower()
-            ]
-        )
-        df = df.sample(n=1)
-        self.bonus = (df["movie_title"].values[0], df["genres"].values[0])
+        import random
+        # Concatenate all the genre-specific data frames.
+        df = {"movie_title": [], "genres": []}
+        for genre, data in self.genre_stats.items():
+            if genre != self.genre.lower():
+                for row_idx in range(len(data["dataframe"]["movie_title"])):
+                    if (
+                        self.genre.lower()
+                        not in data["dataframe"]["genres"][row_idx].lower()
+                    ):
+                        df["movie_title"].append(
+                            data["dataframe"]["movie_title"][row_idx]
+                        )
+                        df["genres"].append(data["dataframe"]["genres"][row_idx])
+        # Choose a random movie.
+        random_index = random.randint(0, len(df["genres"]) - 1)
+        self.bonus = (df["movie_title"][random_index], df["genres"][random_index])
         self.next(self.join)
@@ -82,12 +88,14 @@ class PlayListFlow(FlowSpec):
         genre = self.genre.lower()
         if genre not in self.genre_stats:
             self.movies = []
         else:
             df = self.genre_stats[genre]["dataframe"]
             quartiles = self.genre_stats[genre]["quartiles"]
-            selector = df["gross"] >= quartiles[-1]
-            self.movies = list(df[selector]["movie_title"])
+            self.movies = [
+                df["movie_title"][i]
+                for i, g in enumerate(df["gross"])
+                if g >= quartiles[-1]
+            ]
         # Shuffle the playlist.
         shuffle(self.movies)

metaflow/tutorials/04-playlist-plus/playlist.py CHANGED Viewed

@@ -10,7 +10,7 @@ def get_python_version():
     """
     import platform
-    versions = {"2": "2.7.15", "3": "3.7.3"}
+    versions = {"2": "2.7.15", "3": "3.9.10"}
     return versions[platform.python_version_tuple()[0]]
@@ -48,17 +48,12 @@ class PlayListFlow(FlowSpec):
         default=5,
     )
-    @conda(libraries={"pandas": "1.3.3"})
     @step
     def start(self):
         """
         Use the Metaflow client to retrieve the latest successful run from our
         MovieStatsFlow and assign them as data artifacts in this flow.
-        This step uses 'conda' to isolate the environment. This step will
-        always use pandas==1.3.3 regardless of what is installed on the
-        system.
         """
         # Load the analysis from the MovieStatsFlow.
         from metaflow import Flow, get_metadata
@@ -80,7 +75,7 @@ class PlayListFlow(FlowSpec):
         # Compute our two recommendation types in parallel.
         self.next(self.bonus_movie, self.genre_movies)
-    @conda(libraries={"editdistance": "0.5.3", "pandas": "1.3.3"})
+    @conda(libraries={"editdistance": "0.5.3"})
     @step
     def bonus_movie(self):
         """
@@ -90,9 +85,7 @@ class PlayListFlow(FlowSpec):
         This step uses 'conda' to isolate the environment. Note that the
         package 'editdistance' need not be installed in your python
         environment.
         """
-        import pandas
         import editdistance
         # Define a helper function to compute the similarity between two
@@ -101,27 +94,23 @@ class PlayListFlow(FlowSpec):
             return editdistance.eval(self.hint, movie_title)
         # Compute the distance and take the argmin to find the closest title.
-        distance = self.dataframe["movie_title"].apply(_edit_distance)
-        index = distance.idxmin()
+        distance = [
+            _edit_distance(movie_title) for movie_title in self.dataframe["movie_title"]
+        ]
+        index = distance.index(min(distance))
         self.bonus = (
-            self.dataframe["movie_title"].values[index],
-            self.dataframe["genres"].values[index],
+            self.dataframe["movie_title"][index],
+            self.dataframe["genres"][index],
         )
         self.next(self.join)
-    @conda(libraries={"pandas": "1.3.3"})
     @step
     def genre_movies(self):
         """
         Select the top performing movies from the use specified genre.
-        This step uses 'conda' to isolate the environment. This step will
-        always use pandas==1.3.3 regardless of what is installed on the
-        system.
         """
-        import pandas
         from random import shuffle
         # For the genre of interest, generate a potential playlist using only
@@ -129,12 +118,14 @@ class PlayListFlow(FlowSpec):
         genre = self.genre.lower()
         if genre not in self.genre_stats:
             self.movies = []
         else:
             df = self.genre_stats[genre]["dataframe"]
             quartiles = self.genre_stats[genre]["quartiles"]
-            selector = df["gross"] >= quartiles[-1]
-            self.movies = list(df[selector]["movie_title"])
+            self.movies = [
+                df["movie_title"][i]
+                for i, g in enumerate(df["gross"])
+                if g >= quartiles[-1]
+            ]
         # Shuffle the content.
         shuffle(self.movies)

metaflow/tutorials/05-hello-cloud/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# Episode 05-hellocloud: Look Mom, We're in the Cloud.
+**This flow is a simple linear workflow that verifies your Kubernetes
+configuration. The 'start' and 'end' steps will run locally, while the 'hello'
+step will run remotely on Kubernetes. After configuring Metaflow to run on the cloud,
+data and metadata about your runs will be stored remotely. This means you can
+use the client to access information about any flow from anywhere.**
+#### Showcasing:
+- Kubernetes decorator.
+- Accessing data artifacts generated remotely in a local notebook.
+- retry decorator.
+#### To play this episode:
+Open ```05-hello-cloud/hello-cloud.ipynb```
+# Episode 5: Hello Cloud
+## Look Mom, We're in the Cloud.
+This flow is a simple linear workflow that verifies your cloud configuration. The `start` and `end` steps will run locally, while the `hello` step will [run remotely](/scaling/remote-tasks/introduction). After [configuring Metaflow](/getting-started/infrastructure) to run in the cloud, data and metadata about your runs will be stored remotely. This means you can use the client to access information about any flow from anywhere.
+You can find the tutorial code on [GitHub](https://github.com/Netflix/metaflow/tree/master/metaflow/tutorials/05-hello-cloud)
+**Showcasing:**
+- [Kubernetes](https://docs.metaflow.org/scaling/remote-tasks/kubernetes) and the [`@kubernetes`](https://docs.metaflow.org/scaling/remote-tasks/introduction) decorator.
+- Using the [Client API](../../../metaflow/client) to access data artifacts generated remotely in a local notebook.
+- [`@retry`](https://docs.metaflow.org/scaling/failures#retrying-tasks-with-the-retry-decorator)decorator.
+**Before playing this episode:**
+1. `python -m pip install notebook`
+2. This tutorial requires access to compute and storage resources in the cloud, which can be configured by
+   1. Following the instructions [here](https://outerbounds.com/docs/engineering-welcome/) or
+   2. Requesting [a sandbox](https://outerbounds.com/sandbox/).
+**To play this episode:**
+1. `cd metaflow-tutorials`
+2. `python 05-hello-cloud/hello-cloud.py run`
+3. `jupyter-notebook 05-hello-cloud/hello-cloud.ipynb`
+4. Open _**hello-cloud.ipynb**_ in a notebook
+<TutorialsLink link="../../tutorials"/>

metaflow/tutorials/{05-helloaws/helloaws.ipynb → 05-hello-cloud/hello-cloud.ipynb} RENAMED Viewed

@@ -4,9 +4,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Episode 05-helloaws: Look Mom, We're in the Cloud\n",
+    "# Episode 05-hellocloud: Look Mom, We're in the Cloud\n",
     "\n",
-    "### In HellowAWSFlow, the 'start' and 'end' steps were run locally, while the 'hello' step was run remotely on AWS batch. Since we are using AWS, data artifacts and metadata were stored remotely. This means you can use the client to access information about any flow from anywhere. This notebook shows you how. "
+    "### In HellowCloudFlow, the 'start' and 'end' steps were run locally, while the 'hello' step was run remotely on Kubernetes. Since we are using AWS, data artifacts and metadata were stored remotely. This means you can use the client to access information about any flow from anywhere. This notebook shows you how. "
    ]
   },
   {
@@ -41,7 +41,7 @@
    "source": [
     "# Set namespace to None to search over all namespaces\n",
     "namespace(None)\n",
-    "run = Flow('HelloAWSFlow').latest_successful_run\n",
+    "run = Flow('HelloCloudFlow').latest_successful_run\n",
     "print(\"Using run: %s\" % str(run))\n",
     "print(run.data.message)"
    ]
@@ -49,7 +49,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.10.6 64-bit ('3.10.6')",
    "language": "python",
    "name": "python3"
   },
@@ -63,7 +63,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.10.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "60d98827d7482d2a0f6aae287a18990d3a1d423e0f66197ec6cdef8a2e07b41f"
+   }
   }
  },
  "nbformat": 4,

metaflow/tutorials/{05-helloaws/helloaws.py → 05-hello-cloud/hello-cloud.py} RENAMED Viewed

@@ -1,11 +1,11 @@
-from metaflow import FlowSpec, step, batch, retry
+from metaflow import FlowSpec, step, kubernetes, retry
-class HelloAWSFlow(FlowSpec):
+class HelloCloudFlow(FlowSpec):
     """
-    A flow where Metaflow prints 'Metaflow says Hi from AWS!'
+    A flow where Metaflow prints 'Metaflow says Hi from the cloud!'
-    Run this flow to validate your AWS configuration.
+    Run this flow to validate your Kubernetes configuration.
     """
@@ -18,30 +18,28 @@ class HelloAWSFlow(FlowSpec):
         """
         from metaflow import get_metadata
-        print("HelloAWS is starting.")
+        print("HelloCloud is starting.")
         print("")
         print("Using metadata provider: %s" % get_metadata())
         print("")
         print("The start step is running locally. Next, the ")
-        print("'hello' step will run remotely on AWS batch. ")
-        print("If you are running in the Netflix sandbox, ")
-        print("it may take some time to acquire a compute resource.")
+        print("'hello' step will run remotely on Kubernetes. ")
         self.next(self.hello)
-    @batch(cpu=1, memory=500)
+    @kubernetes(cpu=1, memory=500)
     @retry
     @step
     def hello(self):
         """
-        This steps runs remotely on AWS batch using 1 virtual CPU and 500Mb of
+        This steps runs remotely on Kubernetes using 1 virtual CPU and 500Mb of
         memory. Since we are now using a remote metadata service and data
         store, the flow information and artifacts are available from
         anywhere. The step also uses the retry decorator, so that if something
         goes wrong, the step will be automatically retried.
         """
-        self.message = "Hi from AWS!"
+        self.message = "Hi from the cloud!"
         print("Metaflow says: %s" % self.message)
         self.next(self.end)
@@ -52,8 +50,8 @@ class HelloAWSFlow(FlowSpec):
         which the flow is executed.
         """
-        print("HelloAWS is finished.")
+        print("HelloCloud is finished.")
 if __name__ == "__main__":
-    HelloAWSFlow()
+    HelloCloudFlow()

metaflow/tutorials/06-statistics-redux/README.md CHANGED Viewed

@@ -3,42 +3,19 @@
 **This example revisits 'Episode 02-statistics: Is this Data Science?'. With
 Metaflow, you don't need to make any code changes to scale-up your flow by
 running on remote compute. In this example we re-run the 'stats.py' workflow
-adding the '--with batch' command line argument. This instructs Metaflow to run
-all your steps on AWS batch without changing any code. You can control the
+adding the '--with kubernetes' command line argument. This instructs Metaflow to run
+all your steps on AWS Kubernetes without changing any code. You can control the
 behavior with additional arguments, like '--max-workers'. For this example,
 'max-workers' is used to limit the number of parallel genre-specific statistics
 computations.
 You can then access the data artifacts (even the local CSV file) from anywhere
-because the data is being stored in AWS S3.
-This tutorial uses `pandas` which may not be available in your environment.
-Use the 'conda' package manager with the `conda-forge` channel added to run
-this tutorial in any environment**
+because the data is being stored in AWS S3.**
 #### Showcasing:
-- '--with batch' command line option
+- '--with kubernetes' command line option
 - '--max-workers' command line option
 - Accessing data locally or remotely
-- Metaflow's conda based dependency management.
-#### Before playing this episode:
-1. ```python -m pip install pandas```
-2. ```python -m pip install notebook```
-3. ```python -m pip install matplotlib```
-4. This tutorial requires the 'conda' package manager to be installed with the
-   conda-forge channel added.
-   a. Download Miniconda at https://docs.conda.io/en/latest/miniconda.html
-   b. ```conda config --add channels conda-forge```
-5. This tutorial requires access to compute and storage resources on AWS, which
-   can be configured by
-   1. Following the instructions at
-      https://docs.metaflow.org/metaflow-on-aws/deploy-to-aws or
-   2. Requesting a sandbox at
-      https://docs.metaflow.org/metaflow-on-aws/metaflow-sandbox
 #### To play this episode:
-1. ```cd metaflow-tutorials```
-2. ```python 02-statistics/stats.py --environment conda run --with batch --max-workers 4 --with conda:python=3.7,libraries="{pandas:1.3.3}"```
-3. ```jupyter-notebook 06-statistics-redux/stats.ipynb```
-4. Open 'stats.ipynb' in your remote Sagemaker notebook
+1. ```python 02-statistics/stats.py run --with kubernetes --max-workers 4```
+2. Open ```06-statistics-redux/stats.ipynb```

metaflow/tutorials/06-statistics-redux/stats.ipynb CHANGED Viewed

@@ -113,7 +113,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -127,7 +127,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.11.0"
   }
  },
  "nbformat": 4,

metaflow 2.8.1__py2.py3-none-any.whl → 2.8.3__py2.py3-none-any.whl

metaflow 2.8.1py2.py3-none-any.whl → 2.8.3py2.py3-none-any.whl