runnable 0.12.3__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +0 -11
 - runnable/catalog.py +27 -5
 - runnable/cli.py +122 -26
 - runnable/datastore.py +71 -35
 - runnable/defaults.py +0 -1
 - runnable/entrypoints.py +107 -32
 - runnable/exceptions.py +6 -2
 - runnable/executor.py +28 -9
 - runnable/graph.py +37 -12
 - runnable/integration.py +7 -2
 - runnable/nodes.py +15 -17
 - runnable/parameters.py +27 -8
 - runnable/pickler.py +1 -1
 - runnable/sdk.py +101 -33
 - runnable/secrets.py +3 -1
 - runnable/tasks.py +246 -34
 - runnable/utils.py +41 -13
 - {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/METADATA +25 -31
 - runnable-0.14.0.dist-info/RECORD +24 -0
 - {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info}/WHEEL +1 -1
 - runnable-0.14.0.dist-info/entry_points.txt +40 -0
 - runnable/extensions/__init__.py +0 -0
 - runnable/extensions/catalog/__init__.py +0 -21
 - runnable/extensions/catalog/file_system/__init__.py +0 -0
 - runnable/extensions/catalog/file_system/implementation.py +0 -234
 - runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
 - runnable/extensions/catalog/k8s_pvc/implementation.py +0 -16
 - runnable/extensions/catalog/k8s_pvc/integration.py +0 -59
 - runnable/extensions/executor/__init__.py +0 -649
 - runnable/extensions/executor/argo/__init__.py +0 -0
 - runnable/extensions/executor/argo/implementation.py +0 -1194
 - runnable/extensions/executor/argo/specification.yaml +0 -51
 - runnable/extensions/executor/k8s_job/__init__.py +0 -0
 - runnable/extensions/executor/k8s_job/implementation_FF.py +0 -259
 - runnable/extensions/executor/k8s_job/integration_FF.py +0 -69
 - runnable/extensions/executor/local/__init__.py +0 -0
 - runnable/extensions/executor/local/implementation.py +0 -71
 - runnable/extensions/executor/local_container/__init__.py +0 -0
 - runnable/extensions/executor/local_container/implementation.py +0 -446
 - runnable/extensions/executor/mocked/__init__.py +0 -0
 - runnable/extensions/executor/mocked/implementation.py +0 -154
 - runnable/extensions/executor/retry/__init__.py +0 -0
 - runnable/extensions/executor/retry/implementation.py +0 -168
 - runnable/extensions/nodes.py +0 -855
 - runnable/extensions/run_log_store/__init__.py +0 -0
 - runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
 - runnable/extensions/run_log_store/chunked_file_system/implementation.py +0 -111
 - runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
 - runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +0 -21
 - runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +0 -61
 - runnable/extensions/run_log_store/db/implementation_FF.py +0 -157
 - runnable/extensions/run_log_store/db/integration_FF.py +0 -0
 - runnable/extensions/run_log_store/file_system/__init__.py +0 -0
 - runnable/extensions/run_log_store/file_system/implementation.py +0 -140
 - runnable/extensions/run_log_store/generic_chunked.py +0 -557
 - runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
 - runnable/extensions/run_log_store/k8s_pvc/implementation.py +0 -21
 - runnable/extensions/run_log_store/k8s_pvc/integration.py +0 -56
 - runnable/extensions/secrets/__init__.py +0 -0
 - runnable/extensions/secrets/dotenv/__init__.py +0 -0
 - runnable/extensions/secrets/dotenv/implementation.py +0 -100
 - runnable-0.12.3.dist-info/RECORD +0 -64
 - runnable-0.12.3.dist-info/entry_points.txt +0 -41
 - {runnable-0.12.3.dist-info → runnable-0.14.0.dist-info/licenses}/LICENSE +0 -0
 
| 
         @@ -0,0 +1,24 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            runnable/__init__.py,sha256=WuJwXEBxjiz2E1jBapkOkYpIaCAPZ1Udyep0dnN4bkE,666
         
     | 
| 
      
 2 
     | 
    
         
            +
            runnable/catalog.py,sha256=5eTYwZWqfVBXIIn8WbweTMqiXZ9ccvtJBnAiIxSQ3Vk,4835
         
     | 
| 
      
 3 
     | 
    
         
            +
            runnable/cli.py,sha256=rBTvkNDetN6psHmDLa0kko8IHGvND5xMuO30hU_gcvY,9931
         
     | 
| 
      
 4 
     | 
    
         
            +
            runnable/context.py,sha256=QhiXJHRcEBfSKB1ijvL5yB9w44x0HCe7VEiwK1cUJ9U,1124
         
     | 
| 
      
 5 
     | 
    
         
            +
            runnable/datastore.py,sha256=Q_KKb4PNP2IXnUlR2bjOclDFAsAVJ_oNiCd5x0vB5jc,28127
         
     | 
| 
      
 6 
     | 
    
         
            +
            runnable/defaults.py,sha256=HYkXNI2hg0Y-SsXySjliwdc-3FUGJvJV3TnarmMIFFs,4656
         
     | 
| 
      
 7 
     | 
    
         
            +
            runnable/entrypoints.py,sha256=gMywHyoUheSAXCyqLMJ0QWK4IxiFVgEYwRDuZWsk-uI,18612
         
     | 
| 
      
 8 
     | 
    
         
            +
            runnable/exceptions.py,sha256=3gyN2bhqYvaZF_bo8hA7I09u8aQCAeh8NclBp5lCH8w,2574
         
     | 
| 
      
 9 
     | 
    
         
            +
            runnable/executor.py,sha256=Y-yCw4ZIz88nHn47QzCXvXm7VjByTIyBWzsqsaIpNP8,14653
         
     | 
| 
      
 10 
     | 
    
         
            +
            runnable/graph.py,sha256=EuH0210DcbEFlc6J-aSvfXJOb0SqORUiTpgFYyb_KPM,16602
         
     | 
| 
      
 11 
     | 
    
         
            +
            runnable/integration.py,sha256=IXBH20QKpFYW7pQwwbTI0qQvrg4kJseM0KMacQKli74,6791
         
     | 
| 
      
 12 
     | 
    
         
            +
            runnable/names.py,sha256=vn92Kv9ANROYSZX6Z4z1v_WA3WiEdIYmG6KEStBFZug,8134
         
     | 
| 
      
 13 
     | 
    
         
            +
            runnable/nodes.py,sha256=I9C65nj3kAHHXJSwn5QYximFjV7tbjBiTk0ayEgrmK4,16526
         
     | 
| 
      
 14 
     | 
    
         
            +
            runnable/parameters.py,sha256=g_bJurLjuppFDiDpfFqy6BRF36o_EY0OC5APl7HJFok,5450
         
     | 
| 
      
 15 
     | 
    
         
            +
            runnable/pickler.py,sha256=ydJ_eti_U1F4l-YacFp7BWm6g5vTn04UXye25S1HVok,2684
         
     | 
| 
      
 16 
     | 
    
         
            +
            runnable/sdk.py,sha256=tEwTwcfm1KVfnEql3G_yJpgymDWOqoIIA4q3RzKmHp0,30365
         
     | 
| 
      
 17 
     | 
    
         
            +
            runnable/secrets.py,sha256=PXcEJw-4WPzeWRLfsatcPPyr1zkqgHzdRWRcS9vvpvM,2354
         
     | 
| 
      
 18 
     | 
    
         
            +
            runnable/tasks.py,sha256=QPCgH_D7YkN2oAi7-w6Ipt9IZ397SayjhAl_PPyVto8,29822
         
     | 
| 
      
 19 
     | 
    
         
            +
            runnable/utils.py,sha256=THMHnWVrUhNKdIvUbeZdDiXnP1WEOuee9e9OB8zzW5M,20441
         
     | 
| 
      
 20 
     | 
    
         
            +
            runnable-0.14.0.dist-info/METADATA,sha256=n3mrLZadanBuHaUz4h5WgtsfuXmkjaHdtsfOCSAmEEk,9994
         
     | 
| 
      
 21 
     | 
    
         
            +
            runnable-0.14.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
         
     | 
| 
      
 22 
     | 
    
         
            +
            runnable-0.14.0.dist-info/entry_points.txt,sha256=8yBeduXOnO3SUnafZQwzXiE8rQMPXGDbqueyL7G9euM,1297
         
     | 
| 
      
 23 
     | 
    
         
            +
            runnable-0.14.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         
     | 
| 
      
 24 
     | 
    
         
            +
            runnable-0.14.0.dist-info/RECORD,,
         
     | 
| 
         @@ -0,0 +1,40 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            [console_scripts]
         
     | 
| 
      
 2 
     | 
    
         
            +
            runnable = runnable.cli:cli
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            [catalog]
         
     | 
| 
      
 5 
     | 
    
         
            +
            do-nothing = runnable.catalog:DoNothingCatalog
         
     | 
| 
      
 6 
     | 
    
         
            +
            file-system = extensions.catalog.file_system:FileSystemCatalog
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            [executor]
         
     | 
| 
      
 9 
     | 
    
         
            +
            argo = extensions.executor.argo:ArgoExecutor
         
     | 
| 
      
 10 
     | 
    
         
            +
            local = extensions.executor.local:LocalExecutor
         
     | 
| 
      
 11 
     | 
    
         
            +
            local-container = extensions.executor.local_container:LocalContainerExecutor
         
     | 
| 
      
 12 
     | 
    
         
            +
            mocked = extensions.executor.mocked:MockedExecutor
         
     | 
| 
      
 13 
     | 
    
         
            +
            retry = extensions.executor.retry:RetryExecutor
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            [nodes]
         
     | 
| 
      
 16 
     | 
    
         
            +
            dag = extensions.nodes.nodes:DagNode
         
     | 
| 
      
 17 
     | 
    
         
            +
            fail = extensions.nodes.nodes:FailNode
         
     | 
| 
      
 18 
     | 
    
         
            +
            map = extensions.nodes.nodes:MapNode
         
     | 
| 
      
 19 
     | 
    
         
            +
            parallel = extensions.nodes.nodes:ParallelNode
         
     | 
| 
      
 20 
     | 
    
         
            +
            stub = extensions.nodes.nodes:StubNode
         
     | 
| 
      
 21 
     | 
    
         
            +
            success = extensions.nodes.nodes:SuccessNode
         
     | 
| 
      
 22 
     | 
    
         
            +
            task = extensions.nodes.nodes:TaskNode
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            [pickler]
         
     | 
| 
      
 25 
     | 
    
         
            +
            pickle = runnable.pickler:NativePickler
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            [run_log_store]
         
     | 
| 
      
 28 
     | 
    
         
            +
            buffered = runnable.datastore:BufferRunLogstore
         
     | 
| 
      
 29 
     | 
    
         
            +
            chunked-fs = extensions.run_log_store.chunked_fs:ChunkedFileSystemRunLogStore
         
     | 
| 
      
 30 
     | 
    
         
            +
            file-system = extensions.run_log_store.file_system:FileSystemRunLogstore
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            [secrets]
         
     | 
| 
      
 33 
     | 
    
         
            +
            do-nothing = runnable.secrets:DoNothingSecretManager
         
     | 
| 
      
 34 
     | 
    
         
            +
            dotenv = extensions.secrets.dotenv:DotEnvSecrets
         
     | 
| 
      
 35 
     | 
    
         
            +
            env-secrets = runnable.secrets:EnvSecretsManager
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            [tasks]
         
     | 
| 
      
 38 
     | 
    
         
            +
            notebook = runnable.tasks:NotebookTaskType
         
     | 
| 
      
 39 
     | 
    
         
            +
            python = runnable.tasks:PythonTaskType
         
     | 
| 
      
 40 
     | 
    
         
            +
            shell = runnable.tasks:ShellTaskType
         
     | 
    
        runnable/extensions/__init__.py
    DELETED
    
    | 
         
            File without changes
         
     | 
| 
         @@ -1,21 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            from typing import List, Optional
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
            from runnable.datastore import DataCatalog
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            def is_catalog_out_of_sync(catalog, synced_catalogs=Optional[List[DataCatalog]]) -> bool:
         
     | 
| 
       7 
     | 
    
         
            -
                """
         
     | 
| 
       8 
     | 
    
         
            -
                Check if the catalog items are out of sync from already cataloged objects.
         
     | 
| 
       9 
     | 
    
         
            -
                If they are, return False.
         
     | 
| 
       10 
     | 
    
         
            -
                If the object does not exist or synced catalog does not exist, return True
         
     | 
| 
       11 
     | 
    
         
            -
                """
         
     | 
| 
       12 
     | 
    
         
            -
                if not synced_catalogs:
         
     | 
| 
       13 
     | 
    
         
            -
                    return True  # If nothing has been synced in the past
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                for synced_catalog in synced_catalogs:
         
     | 
| 
       16 
     | 
    
         
            -
                    if synced_catalog.catalog_relative_path == catalog.catalog_relative_path:
         
     | 
| 
       17 
     | 
    
         
            -
                        if synced_catalog.data_hash == catalog.data_hash:
         
     | 
| 
       18 
     | 
    
         
            -
                            return False
         
     | 
| 
       19 
     | 
    
         
            -
                        return True
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                return True  # The object does not exist, sync it
         
     | 
| 
         
            File without changes
         
     | 
| 
         @@ -1,234 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            import os
         
     | 
| 
       3 
     | 
    
         
            -
            import shutil
         
     | 
| 
       4 
     | 
    
         
            -
            from pathlib import Path
         
     | 
| 
       5 
     | 
    
         
            -
            from typing import Any, Dict, List, Optional
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
            from runnable import defaults, utils
         
     | 
| 
       8 
     | 
    
         
            -
            from runnable.catalog import BaseCatalog
         
     | 
| 
       9 
     | 
    
         
            -
            from runnable.datastore import DataCatalog
         
     | 
| 
       10 
     | 
    
         
            -
            from runnable.extensions.catalog import is_catalog_out_of_sync
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
            logger = logging.getLogger(defaults.LOGGER_NAME)
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
            class FileSystemCatalog(BaseCatalog):
         
     | 
| 
       16 
     | 
    
         
            -
                """
         
     | 
| 
       17 
     | 
    
         
            -
                A Catalog handler that uses the local file system for cataloging.
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                Note: Do not use this if the steps of the pipeline run on different compute environments.
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                Example config:
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
                catalog:
         
     | 
| 
       24 
     | 
    
         
            -
                  type: file-system
         
     | 
| 
       25 
     | 
    
         
            -
                  config:
         
     | 
| 
       26 
     | 
    
         
            -
                    catalog_location: The location to store the catalog.
         
     | 
| 
       27 
     | 
    
         
            -
                    compute_data_folder: The folder to source the data from.
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                """
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
                service_name: str = "file-system"
         
     | 
| 
       32 
     | 
    
         
            -
                catalog_location: str = defaults.CATALOG_LOCATION_FOLDER
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
                def get_catalog_location(self):
         
     | 
| 
       35 
     | 
    
         
            -
                    return self.catalog_location
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
                def get_summary(self) -> Dict[str, Any]:
         
     | 
| 
       38 
     | 
    
         
            -
                    summary = {
         
     | 
| 
       39 
     | 
    
         
            -
                        "Catalog Location": self.get_catalog_location(),
         
     | 
| 
       40 
     | 
    
         
            -
                    }
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
                    return summary
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
                def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
         
     | 
| 
       45 
     | 
    
         
            -
                    """
         
     | 
| 
       46 
     | 
    
         
            -
                    Get the file by matching glob pattern to the name
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                    Args:
         
     | 
| 
       49 
     | 
    
         
            -
                        name ([str]): A glob matching the file name
         
     | 
| 
       50 
     | 
    
         
            -
                        run_id ([str]): The run id
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                    Raises:
         
     | 
| 
       53 
     | 
    
         
            -
                        Exception: If the catalog location does not exist
         
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
                    Returns:
         
     | 
| 
       56 
     | 
    
         
            -
                        List(object) : A list of catalog objects
         
     | 
| 
       57 
     | 
    
         
            -
                    """
         
     | 
| 
       58 
     | 
    
         
            -
                    logger.info(f"Using the {self.service_name} catalog and trying to get {name} for run_id: {run_id}")
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
                    copy_to = self.compute_data_folder
         
     | 
| 
       61 
     | 
    
         
            -
                    if compute_data_folder:
         
     | 
| 
       62 
     | 
    
         
            -
                        copy_to = compute_data_folder
         
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
                    copy_to = Path(copy_to)  # type: ignore
         
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
                    catalog_location = self.get_catalog_location()
         
     | 
| 
       67 
     | 
    
         
            -
                    run_catalog = Path(catalog_location) / run_id / copy_to
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
                    logger.debug(f"Copying objects to {copy_to} from the run catalog location of {run_catalog}")
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
                    if not utils.does_dir_exist(run_catalog):
         
     | 
| 
       72 
     | 
    
         
            -
                        msg = (
         
     | 
| 
       73 
     | 
    
         
            -
                            f"Expected Catalog to be present at: {run_catalog} but not found.\n"
         
     | 
| 
       74 
     | 
    
         
            -
                            "Note: Please make sure that some data was put in the catalog before trying to get from it.\n"
         
     | 
| 
       75 
     | 
    
         
            -
                        )
         
     | 
| 
       76 
     | 
    
         
            -
                        raise Exception(msg)
         
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
                    # Iterate through the contents of the run_catalog and copy the files that fit the name pattern
         
     | 
| 
       79 
     | 
    
         
            -
                    # We should also return a list of data hashes
         
     | 
| 
       80 
     | 
    
         
            -
                    glob_files = run_catalog.glob(name)
         
     | 
| 
       81 
     | 
    
         
            -
                    logger.debug(f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}")
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                    data_catalogs = []
         
     | 
| 
       84 
     | 
    
         
            -
                    run_log_store = self._context.run_log_store
         
     | 
| 
       85 
     | 
    
         
            -
                    for file in glob_files:
         
     | 
| 
       86 
     | 
    
         
            -
                        if file.is_dir():
         
     | 
| 
       87 
     | 
    
         
            -
                            # Need not add a data catalog for the folder
         
     | 
| 
       88 
     | 
    
         
            -
                            continue
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                        if str(file).endswith(".execution.log"):
         
     | 
| 
       91 
     | 
    
         
            -
                            continue
         
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
                        relative_file_path = file.relative_to(run_catalog)
         
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
       95 
     | 
    
         
            -
                        data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
         
     | 
| 
       96 
     | 
    
         
            -
                        data_catalog.catalog_handler_location = catalog_location
         
     | 
| 
       97 
     | 
    
         
            -
                        data_catalog.catalog_relative_path = str(relative_file_path)
         
     | 
| 
       98 
     | 
    
         
            -
                        data_catalog.data_hash = utils.get_data_hash(str(file))
         
     | 
| 
       99 
     | 
    
         
            -
                        data_catalog.stage = "get"
         
     | 
| 
       100 
     | 
    
         
            -
                        data_catalogs.append(data_catalog)
         
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
                        # Make the directory in the data folder if required
         
     | 
| 
       103 
     | 
    
         
            -
                        Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
         
     | 
| 
       104 
     | 
    
         
            -
                        shutil.copy(file, copy_to / relative_file_path)
         
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
                        logger.info(f"Copied {file} from {run_catalog} to {copy_to}")
         
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
                    if not data_catalogs:
         
     | 
| 
       109 
     | 
    
         
            -
                        raise Exception(f"Did not find any files matching {name} in {run_catalog}")
         
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
                    return data_catalogs
         
     | 
| 
       112 
     | 
    
         
            -
             
     | 
| 
       113 
     | 
    
         
            -
                def put(
         
     | 
| 
       114 
     | 
    
         
            -
                    self,
         
     | 
| 
       115 
     | 
    
         
            -
                    name: str,
         
     | 
| 
       116 
     | 
    
         
            -
                    run_id: str,
         
     | 
| 
       117 
     | 
    
         
            -
                    compute_data_folder: str = "",
         
     | 
| 
       118 
     | 
    
         
            -
                    synced_catalogs: Optional[List[DataCatalog]] = None,
         
     | 
| 
       119 
     | 
    
         
            -
                    **kwargs,
         
     | 
| 
       120 
     | 
    
         
            -
                ) -> List[DataCatalog]:
         
     | 
| 
       121 
     | 
    
         
            -
                    """
         
     | 
| 
       122 
     | 
    
         
            -
                    Put the files matching the glob pattern into the catalog.
         
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
                    If previously synced catalogs are provided, and no changes were observed, we do not sync them.
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
       126 
     | 
    
         
            -
                    Args:
         
     | 
| 
       127 
     | 
    
         
            -
                        name (str): The glob pattern of the files to catalog
         
     | 
| 
       128 
     | 
    
         
            -
                        run_id (str): The run id of the run
         
     | 
| 
       129 
     | 
    
         
            -
                        compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
         
     | 
| 
       130 
     | 
    
         
            -
                        synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                    Raises:
         
     | 
| 
       133 
     | 
    
         
            -
                        Exception: If the compute data folder does not exist.
         
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
                    Returns:
         
     | 
| 
       136 
     | 
    
         
            -
                        List(object) : A list of catalog objects
         
     | 
| 
       137 
     | 
    
         
            -
                    """
         
     | 
| 
       138 
     | 
    
         
            -
                    logger.info(f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}")
         
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
                    copy_from = self.compute_data_folder
         
     | 
| 
       141 
     | 
    
         
            -
                    if compute_data_folder:
         
     | 
| 
       142 
     | 
    
         
            -
                        copy_from = compute_data_folder
         
     | 
| 
       143 
     | 
    
         
            -
                    copy_from = Path(copy_from)  # type: ignore
         
     | 
| 
       144 
     | 
    
         
            -
             
     | 
| 
       145 
     | 
    
         
            -
                    catalog_location = self.get_catalog_location()
         
     | 
| 
       146 
     | 
    
         
            -
                    run_catalog = Path(catalog_location) / run_id
         
     | 
| 
       147 
     | 
    
         
            -
                    utils.safe_make_dir(run_catalog)
         
     | 
| 
       148 
     | 
    
         
            -
             
     | 
| 
       149 
     | 
    
         
            -
                    logger.debug(f"Copying objects from {copy_from} to the run catalog location of {run_catalog}")
         
     | 
| 
       150 
     | 
    
         
            -
             
     | 
| 
       151 
     | 
    
         
            -
                    if not utils.does_dir_exist(copy_from):
         
     | 
| 
       152 
     | 
    
         
            -
                        msg = (
         
     | 
| 
       153 
     | 
    
         
            -
                            f"Expected compute data folder to be present at: {compute_data_folder} but not found. \n"
         
     | 
| 
       154 
     | 
    
         
            -
                            "Note: runnable does not create the compute data folder for you. Please ensure that the "
         
     | 
| 
       155 
     | 
    
         
            -
                            "folder exists.\n"
         
     | 
| 
       156 
     | 
    
         
            -
                        )
         
     | 
| 
       157 
     | 
    
         
            -
                        raise Exception(msg)
         
     | 
| 
       158 
     | 
    
         
            -
             
     | 
| 
       159 
     | 
    
         
            -
                    # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
         
     | 
| 
       160 
     | 
    
         
            -
                    # We should also return a list of datastore.DataCatalog items
         
     | 
| 
       161 
     | 
    
         
            -
             
     | 
| 
       162 
     | 
    
         
            -
                    glob_files = copy_from.glob(name)  # type: ignore
         
     | 
| 
       163 
     | 
    
         
            -
                    logger.debug(f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}")
         
     | 
| 
       164 
     | 
    
         
            -
             
     | 
| 
       165 
     | 
    
         
            -
                    data_catalogs = []
         
     | 
| 
       166 
     | 
    
         
            -
                    run_log_store = self._context.run_log_store
         
     | 
| 
       167 
     | 
    
         
            -
                    for file in glob_files:
         
     | 
| 
       168 
     | 
    
         
            -
                        if file.is_dir():
         
     | 
| 
       169 
     | 
    
         
            -
                            # Need not add a data catalog for the folder
         
     | 
| 
       170 
     | 
    
         
            -
                            continue
         
     | 
| 
       171 
     | 
    
         
            -
             
     | 
| 
       172 
     | 
    
         
            -
                        relative_file_path = file.relative_to(".")
         
     | 
| 
       173 
     | 
    
         
            -
             
     | 
| 
       174 
     | 
    
         
            -
                        data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
         
     | 
| 
       175 
     | 
    
         
            -
                        data_catalog.catalog_handler_location = catalog_location
         
     | 
| 
       176 
     | 
    
         
            -
                        data_catalog.catalog_relative_path = run_id + os.sep + str(relative_file_path)
         
     | 
| 
       177 
     | 
    
         
            -
                        data_catalog.data_hash = utils.get_data_hash(str(file))
         
     | 
| 
       178 
     | 
    
         
            -
                        data_catalog.stage = "put"
         
     | 
| 
       179 
     | 
    
         
            -
                        data_catalogs.append(data_catalog)
         
     | 
| 
       180 
     | 
    
         
            -
             
     | 
| 
       181 
     | 
    
         
            -
                        if is_catalog_out_of_sync(data_catalog, synced_catalogs):
         
     | 
| 
       182 
     | 
    
         
            -
                            logger.info(f"{data_catalog.name} was found to be changed, syncing")
         
     | 
| 
       183 
     | 
    
         
            -
             
     | 
| 
       184 
     | 
    
         
            -
                            # Make the directory in the catalog if required
         
     | 
| 
       185 
     | 
    
         
            -
                            Path(run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
         
     | 
| 
       186 
     | 
    
         
            -
                            shutil.copy(file, run_catalog / relative_file_path)
         
     | 
| 
       187 
     | 
    
         
            -
                        else:
         
     | 
| 
       188 
     | 
    
         
            -
                            logger.info(f"{data_catalog.name} was found to be unchanged, ignoring syncing")
         
     | 
| 
       189 
     | 
    
         
            -
             
     | 
| 
       190 
     | 
    
         
            -
                    if not data_catalogs:
         
     | 
| 
       191 
     | 
    
         
            -
                        raise Exception(f"Did not find any files matching {name} in {copy_from}")
         
     | 
| 
       192 
     | 
    
         
            -
             
     | 
| 
       193 
     | 
    
         
            -
                    return data_catalogs
         
     | 
| 
       194 
     | 
    
         
            -
             
     | 
| 
       195 
     | 
    
         
            -
                def sync_between_runs(self, previous_run_id: str, run_id: str):
         
     | 
| 
       196 
     | 
    
         
            -
                    """
         
     | 
| 
       197 
     | 
    
         
            -
                    Given the previous run id, sync the catalogs between the current one and previous
         
     | 
| 
       198 
     | 
    
         
            -
             
     | 
| 
       199 
     | 
    
         
            -
                    Args:
         
     | 
| 
       200 
     | 
    
         
            -
                        previous_run_id (str): The previous run id to sync the catalogs from
         
     | 
| 
       201 
     | 
    
         
            -
                        run_id (str): The run_id to which the data catalogs should be synced to.
         
     | 
| 
       202 
     | 
    
         
            -
             
     | 
| 
       203 
     | 
    
         
            -
                    Raises:
         
     | 
| 
       204 
     | 
    
         
            -
                        Exception: If the previous run log does not exist in the catalog
         
     | 
| 
       205 
     | 
    
         
            -
             
     | 
| 
       206 
     | 
    
         
            -
                    """
         
     | 
| 
       207 
     | 
    
         
            -
                    logger.info(
         
     | 
| 
       208 
     | 
    
         
            -
                        f"Using the {self.service_name} catalog and syncing catalogs"
         
     | 
| 
       209 
     | 
    
         
            -
                        "between old: {previous_run_id} to new: {run_id}"
         
     | 
| 
       210 
     | 
    
         
            -
                    )
         
     | 
| 
       211 
     | 
    
         
            -
             
     | 
| 
       212 
     | 
    
         
            -
                    catalog_location = Path(self.get_catalog_location())
         
     | 
| 
       213 
     | 
    
         
            -
                    run_catalog = catalog_location / run_id
         
     | 
| 
       214 
     | 
    
         
            -
                    utils.safe_make_dir(run_catalog)
         
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
                    if not utils.does_dir_exist(catalog_location / previous_run_id):
         
     | 
| 
       217 
     | 
    
         
            -
                        msg = (
         
     | 
| 
       218 
     | 
    
         
            -
                            f"Catalogs from previous run : {previous_run_id} are not found.\n"
         
     | 
| 
       219 
     | 
    
         
            -
                            "Note: Please provision the catalog objects generated by previous run in the same catalog location"
         
     | 
| 
       220 
     | 
    
         
            -
                            " as the current run, even if the catalog handler for the previous run was different"
         
     | 
| 
       221 
     | 
    
         
            -
                        )
         
     | 
| 
       222 
     | 
    
         
            -
                        raise Exception(msg)
         
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
       224 
     | 
    
         
            -
                    cataloged_files = list((catalog_location / previous_run_id).glob("*"))
         
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
                    for cataloged_file in cataloged_files:
         
     | 
| 
       227 
     | 
    
         
            -
                        if str(cataloged_file).endswith("execution.log"):
         
     | 
| 
       228 
     | 
    
         
            -
                            continue
         
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
                        if cataloged_file.is_file():
         
     | 
| 
       231 
     | 
    
         
            -
                            shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
         
     | 
| 
       232 
     | 
    
         
            -
                        else:
         
     | 
| 
       233 
     | 
    
         
            -
                            shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
         
     | 
| 
       234 
     | 
    
         
            -
                        logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
         
     | 
| 
         
            File without changes
         
     | 
| 
         @@ -1,16 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            from pathlib import Path
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            from runnable import defaults
         
     | 
| 
       5 
     | 
    
         
            -
            from runnable.extensions.catalog.file_system.implementation import FileSystemCatalog
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
            logger = logging.getLogger(defaults.LOGGER_NAME)
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
            class K8sPVCatalog(FileSystemCatalog):
         
     | 
| 
       11 
     | 
    
         
            -
                service_name: str = "k8s-pvc"
         
     | 
| 
       12 
     | 
    
         
            -
                persistent_volume_name: str
         
     | 
| 
       13 
     | 
    
         
            -
                mount_path: str
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                def get_catalog_location(self):
         
     | 
| 
       16 
     | 
    
         
            -
                    return str(Path(self.mount_path) / self.catalog_location)
         
     | 
| 
         @@ -1,59 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            from typing import cast
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            from runnable import defaults
         
     | 
| 
       5 
     | 
    
         
            -
            from runnable.integration import BaseIntegration
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
            logger = logging.getLogger(defaults.NAME)
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
            class LocalCompute(BaseIntegration):
         
     | 
| 
       11 
     | 
    
         
            -
                """
         
     | 
| 
       12 
     | 
    
         
            -
                Integration between local and k8's pvc
         
     | 
| 
       13 
     | 
    
         
            -
                """
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                executor_type = "local"
         
     | 
| 
       16 
     | 
    
         
            -
                service_type = "catalog"  # One of secret, catalog, datastore
         
     | 
| 
       17 
     | 
    
         
            -
                service_provider = "k8s-pvc"  # The actual implementation of the service
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                def validate(self, **kwargs):
         
     | 
| 
       20 
     | 
    
         
            -
                    msg = "We can't use the local compute k8s pvc store integration."
         
     | 
| 
       21 
     | 
    
         
            -
                    raise Exception(msg)
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
            class LocalContainerCompute(BaseIntegration):
         
     | 
| 
       25 
     | 
    
         
            -
                """
         
     | 
| 
       26 
     | 
    
         
            -
                Integration between local-container and k8's pvc
         
     | 
| 
       27 
     | 
    
         
            -
                """
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                executor_type = "local-container"
         
     | 
| 
       30 
     | 
    
         
            -
                service_type = "catalog"  # One of secret, catalog, datastore
         
     | 
| 
       31 
     | 
    
         
            -
                service_provider = "k8s-pvc"  # The actual implementation of the service
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
                def validate(self, **kwargs):
         
     | 
| 
       34 
     | 
    
         
            -
                    msg = "We can't use the local-container compute k8s pvc store integration."
         
     | 
| 
       35 
     | 
    
         
            -
                    raise Exception(msg)
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
            class ArgoCompute(BaseIntegration):
         
     | 
| 
       39 
     | 
    
         
            -
                """
         
     | 
| 
       40 
     | 
    
         
            -
                Integration between argo and k8's pvc
         
     | 
| 
       41 
     | 
    
         
            -
                """
         
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
                executor_type = "argo"
         
     | 
| 
       44 
     | 
    
         
            -
                service_type = "catalog"  # One of secret, catalog, datastore
         
     | 
| 
       45 
     | 
    
         
            -
                service_provider = "k8s-pvc"  # The actual implementation of the service
         
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
                def configure_for_traversal(self, **kwargs):
         
     | 
| 
       48 
     | 
    
         
            -
                    from runnable.extensions.catalog.k8s_pvc.implementation import K8sPVCatalog
         
     | 
| 
       49 
     | 
    
         
            -
                    from runnable.extensions.executor.argo.implementation import ArgoExecutor, UserVolumeMounts
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                    self.executor = cast(ArgoExecutor, self.executor)
         
     | 
| 
       52 
     | 
    
         
            -
                    self.service = cast(K8sPVCatalog, self.service)
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                    volume_mount = UserVolumeMounts(
         
     | 
| 
       55 
     | 
    
         
            -
                        name=self.service.persistent_volume_name,
         
     | 
| 
       56 
     | 
    
         
            -
                        mount_path=self.service.mount_path,
         
     | 
| 
       57 
     | 
    
         
            -
                    )
         
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
                    self.executor.persistent_volumes.append(volume_mount)
         
     |