PyPI - acdc_aws_etl_pipeline - Versions diffs - 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl - Mend

acdc_aws_etl_pipeline 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

acdc_aws_etl_pipeline/upload/metadata_submitter.py CHANGED Viewed

@@ -9,6 +9,12 @@ from gen3.index import Gen3Index
 from gen3.submission import Gen3Submission
 import logging
 from datetime import datetime
+import jwt
+from typing import Dict, List
+import re
+import pandas as pd
+import uuid
+from acdc_aws_etl_pipeline.validate.validate import write_parquet_to_db
 logger = logging.getLogger(__name__)
@@ -201,16 +207,17 @@ def read_metadata_json_s3(s3_uri: str, session) -> dict:
     logger.debug(f"Read {len(data) if isinstance(data, list) else 'object'} objects from {s3_uri}")
     return data
-def read_data_import_order_txt_s3(s3_uri: str, session) -> list:
+def read_data_import_order_txt_s3(s3_uri: str, session, exclude_nodes: list = None) -> list:
     """
-    Read a DataImportOrder.txt file from S3 and return node order as a list.
+    Read a DataImportOrder.txt file from S3 and return node order as a list, optionally excluding some nodes.
     Args:
         s3_uri (str): S3 URI to the DataImportOrder.txt file.
         session (boto3.Session): Boto3 session.
+        exclude_nodes (list, optional): Node names to exclude from result.
     Returns:
-        list: Node names (order as listed in file).
+        list: Node names (order as listed in file), optionally excluding nodes in exclude_nodes.
     Raises:
         ValueError: If the provided S3 URI does not point to DataImportOrder.txt.
@@ -224,7 +231,11 @@ def read_data_import_order_txt_s3(s3_uri: str, session) -> list:
     obj = s3.get_object(Bucket=s3_uri.split("/")[2], Key="/".join(s3_uri.split("/")[3:]))
     content = obj['Body'].read().decode('utf-8')
     import_order = [line.rstrip() for line in content.splitlines() if line.strip()]
-    logger.debug(f"Read import order from S3 file: {import_order}")
+    logger.debug(f"Raw import order from S3 file: {import_order}")
+    if exclude_nodes is not None:
+        import_order = [node for node in import_order if node not in exclude_nodes]
+        logger.debug(f"Import order after excluding nodes {exclude_nodes}: {import_order}")
+    logger.debug(f"Final import order from S3 file {s3_uri}: {import_order}")
     return import_order
 def read_data_import_order_txt(file_path: str, exclude_nodes: list) -> list:
@@ -347,7 +358,27 @@ def get_gen3_api_key_aws_secret(secret_name: str, region_name: str, session) ->
         logger.error(f"Error parsing Gen3 API key from AWS Secrets Manager: {e}")
         raise e
-def create_gen3_submission_class(api_key: dict, api_endpoint: str):
+def infer_api_endpoint_from_jwt(jwt_token: str, api_version: str = 'v0') -> str:
+    """
+    Extracts the URL from a JSON Web Token (JWT) credential.
+    Args:
+        jwt_string (string): The JSON Web Token (JWT) credential.
+    Returns:
+        str: The extracted URL.
+    """
+    logger.info("Decoding JWT to extract API URL.")
+    url = jwt.decode(jwt_token, options={"verify_signature": False}).get('iss', '')
+    if '/user' in url:
+        url = url.split('/user')[0]
+    url = f"{url}/api/{api_version}"
+    logger.info(f"Extracted API URL from JWT: {url}")
+    return url
+def create_gen3_submission_class(api_key: dict):
     """
     Create and authenticate a Gen3Submission client using a temporary file for API key.
@@ -357,185 +388,385 @@ def create_gen3_submission_class(api_key: dict, api_endpoint: str):
     Returns:
         Gen3Submission: An authenticated Gen3Submission object.
-    Notes:
-        The temporary file storing the API key is deleted after use.
     """
-    import tempfile
+    logger.debug("Extracting JWT token from API key dict.")
+    jwt_token = api_key['api_key']
+    logger.info("Inferring API endpoint from JWT token.")
+    api_endpoint = infer_api_endpoint_from_jwt(jwt_token)
+    logger.debug(f"Inferred API endpoint: {api_endpoint}")
     logger.info(f"Creating Gen3Submission class for endpoint: {api_endpoint}")
-    tmp_api_key_path = None
-    submit = None
-    try:
-        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".json", dir="/tmp") as tmp_file:
-            json.dump(api_key, tmp_file)
-            tmp_api_key_path = tmp_file.name
-        auth = Gen3Auth(refresh_file=tmp_api_key_path)
-        submit = Gen3Submission(endpoint=api_endpoint, auth_provider=auth)
-        return submit
-    finally:
-        if tmp_api_key_path and os.path.exists(tmp_api_key_path):
-            try:
-                os.remove(tmp_api_key_path)
-                logger.debug(f"Temporary API key file {tmp_api_key_path} deleted.")
-            except Exception as e:
-                logger.warning(f"Failed to delete temporary API key file {tmp_api_key_path}: {e}")
+    auth = Gen3Auth(refresh_token=api_key)
+    submit = Gen3Submission(endpoint=api_endpoint, auth_provider=auth)
+    return submit
-def write_submission_results(results, output_path, mode='w'):
-    with open(output_path, mode) as f:
-        json.dump(results, f, indent=4)
-def submit_metadata(
-    file_list: list,
-    api_key: str,
-    api_endpoint: str,
+def submit_data_chunks(
+    split_json_list: list,
+    node: str,
+    gen3_submitter,
     project_id: str,
-    data_import_order_path: str,
-    boto3_session,
-    max_size_kb: int = 50,
-    exclude_nodes: list = None,
-    max_retries: int = 5,
-    write_submission_results_path: str = None
-):
+    max_retries: int,
+    file_path: str,
+    program_id: str = "program1"
+) -> List[Dict]:
     """
-    Submit a set of metadata JSON files to a Gen3 data commons endpoint, in order.
+    Submit each chunk of data (in split_json_list) for a given node to Gen3, using retry logic and logging on failures.
     Args:
-        file_list (list): List of paths (local or S3 URIs) to metadata .json files, one per node type.
-        api_key (str): Gen3 API key (parsed dict or JSON string).
-        api_endpoint (str): Gen3 data commons endpoint URL.
-        project_id (str): Gen3 project ID to submit data to.
-        data_import_order_path (str): Path or S3 URI to DataImportOrder.txt specifying submission order.
-        boto3_session (boto3.Session): Existing AWS/boto3 session for S3 & secret usage.
-        max_size_kb (int, optional): Maximum size per submission chunk, in KB. Default: 50.
-        exclude_nodes (list, optional): List of node names to skip (default: ["project", "program", "acknowledgement", "publication"]).
-        max_retries (int, optional): Maximum number of retry attempts per node chunk. Default: 5.
+        split_json_list (list): List of JSON-serializable chunked data to submit.
+        node (str): Name of the data node being submitted.
+        gen3_submitter: A Gen3Submission instance for making submissions.
+        project_id (str): The project identifier within Gen3.
+        max_retries (int): Maximum number of retry attempts per chunk on failure.
+        file_path (str): Path of the file that was submitted. Used only for data capture.
+        program_id (str, optional): The Gen3 program id (default: "program1").
     Returns:
-        None
+        List[Dict]: List of response dictionaries for each submitted chunk.
     Raises:
-        Exception: On critical submission failure for any chunk.
+        Exception: If submission fails after all retry attempts for any chunk.
+    """
+    n_json_data = len(split_json_list)
+    response_results = []
+    for index, jsn in enumerate(split_json_list):
+        progress_str = f"{index + 1}/{n_json_data}"
+        submission_success = False
+        last_exception = None
-    Notes:
-        Each file is split into size-friendly chunks before submit. Local and S3 files are supported.
+        attempt = 0
+        while attempt <= max_retries:
+            try:
+                if attempt == 0:
+                    log_msg = (
+                        f"[SUBMIT]  | Project: {project_id:<10} | Node: {node:<12} | "
+                        f"Split: {progress_str:<5}"
+                    )
+                    logger.info(log_msg)
+                else:
+                    log_msg = (
+                        f"[RETRY]   | Project: {project_id:<10} | Node: {node:<12} | "
+                        f"Split: {progress_str:<5} | Attempt: {attempt}/{max_retries}"
+                    )
+                    logger.warning(log_msg)
+                res = gen3_submitter.submit_record(program_id, project_id, jsn)
+                res.update({"file_path": file_path})
+                response_results.append(res)
+                submission_success = True
+                logger.info(
+                    f"\033[92m[SUCCESS]\033[0m | Project: {project_id:<10} | "
+                    f"Node: {node:<12} | Split: {progress_str:<5}"
+                )
+                break  # Success
+            except Exception as e:
+                last_exception = e
+                logger.error(
+                    f"Error submitting chunk {progress_str} for node '{node}': {e}"
+                )
+                if attempt < max_retries:
+                    import time
+                    time.sleep(0.2)
+                else:
+                    logger.critical(
+                        f"\033[91m[FAILED]\033[0m  | Project: {project_id:<10} | "
+                        f"Node: {node:<12} | Split: {progress_str:<5} | Error: {e}"
+                    )
+            attempt += 1
+        if not submission_success:
+            # After retries, still failed
+            raise Exception(
+                f"Failed to submit chunk {progress_str} for node '{node}' after {max_retries + 1} attempts. "
+                f"Last error: {last_exception}"
+            )
+    logger.info(f"Finished submitting node '{node}'.")
+    return response_results
+def flatten_submission_results(submission_results: List[Dict]) -> List[Dict]:
     """
+    Flattens a list of Gen3 submission result dictionaries into a single list of entity dictionaries.
-    timestamp = datetime.now().strftime("%Y%d%m-%H%M%S")
-    log_dir = f"submission_logs/{timestamp}"
-    os.makedirs(log_dir, exist_ok=True)
+    For each submission result, this function processes its entities (if any),
+    extracting the 'project_id' and 'submitter_id' from the 'unique_keys' field (if present)
+    into the top-level entity dictionary for easy access.
-    if exclude_nodes is None:
-        exclude_nodes = ["project", "program", "acknowledgement", "publication"]
+    Any submission result that does not have a code of 200 or lacks entities is skipped, and a warning is logged.
-    logger.info("Starting metadata submission process.")
-    logger.info(f"Creating Gen3Submission class for endpoint: {api_endpoint}")
+    Args:
+        submission_results (List[Dict]):
+            A list of Gen3 submission result dictionaries, each containing at least a "code" and "entities" entry.
-    try:
-        submit = create_gen3_submission_class(api_key, api_endpoint)
+    Returns:
+        List[Dict]:
+            A flat list, where each element is an entity dictionary (with keys 'project_id' and 'submitter_id' added if available).
+    """
+    flat_list_dict = []
+    total = len(submission_results)
+    logger.info(f"Flattening {total} submission result(s)...")
+    for idx, obj in enumerate(submission_results, 1):
+        transaction_id = obj.get("transaction_id")
+        code = obj.get("code")
+        if code != 200:
+            logger.warning(f"Skipping submission result at index {idx-1} (code={code})")
+            continue
+        entities = obj.get("entities")
+        if entities is None:
+            logger.warning(f"No entities found in submission result at index {idx-1}")
+            continue
+        logger.info(f"Processing submission result {idx} of {total}, {len(entities)} entities")
+        for entity in entities:
+            unique_keys = entity.get("unique_keys", [{}])
+            if unique_keys and isinstance(unique_keys, list):
+                keys = unique_keys[0]
+                entity["project_id"] = keys.get("project_id")
+                entity["submitter_id"] = keys.get("submitter_id")
+                entity["transaction_id"] = transaction_id
+                entity["file_path"] = obj.get("file_path", '')
+            flat_list_dict.append(entity)
+    # renaming cols
+    for entity in flat_list_dict:
+        entity["gen3_guid"] = entity.pop("id", None)
+        entity["node"] = entity.pop("type", None)
+    logger.info(f"Finished flattening. Total entities: {len(flat_list_dict)}")
+    return flat_list_dict
+def find_version_from_path(path):
+    version_pattern = re.compile(r"^v?(\d+\.\d+\.\d+)$")
+    found_versions = []
+    for segment in path.split('/'):
+        match = version_pattern.match(segment)
+        if match:
+            found_versions.append(match.group(1))
+    if not found_versions:
+        return None
+    if len(found_versions) > 1:
+        logger.warning("more than one match found in path for version string")
+    return found_versions[-1]
+def collect_versions_from_metadata_file_list(metadata_file_list):
+    versions = []
+    for file_path in metadata_file_list:
+        version = find_version_from_path(file_path)
+        if version:
+            versions.append(version)
+    versions = list(set(versions))
+    if len(versions) > 1:
+        logger.error(f"more than one version found in metadata file list: {metadata_file_list}")
+        raise
+    return versions[0]
+class MetadataSubmitter:
+    def __init__(
+        self,
+        metadata_file_list: list,
+        api_key: dict,
+        project_id: str,
+        data_import_order_path: str,
+        program_id: str = "program1",
+        max_size_kb: int = 100,
+        exclude_nodes: list = ["project", "program", "acknowledgement", "publication"],
+        max_retries: int = 3,
+        aws_profile: str = None
+    ):
+        """
+        Initialises a MetadataSubmitter for submitting a set of metadata JSON files to a Gen3 data commons endpoint, in order.
+        Args:
+            metadata_file_list (list): List of local file paths or S3 URIs to metadata .json files, one per node type.
+            api_key (dict): Gen3 API key as a parsed dictionary.
+            project_id (str): Gen3 project ID to submit data to.
+            data_import_order_path (str): Path or S3 URI to DataImportOrder.txt specifying node submission order.
+            program_id (str, optional): Gen3 program ID (default: "program1").
+            max_size_kb (int, optional): Maximum size per submission chunk, in KB (default: 100).
+            exclude_nodes (list, optional): List of node names to skip during submission (default: ["project", "program", "acknowledgement", "publication"]).
+            max_retries (int, optional): Maximum number of retry attempts per node chunk (default: 3).
+            aws_profile (str, optional): AWS CLI named profile to use for boto3 session (default: None).
+        """
+        self.metadata_file_list = metadata_file_list
+        self.api_key = api_key
+        self.project_id = project_id
+        self.data_import_order_path = data_import_order_path
+        self.program_id = program_id
+        self.max_size_kb = max_size_kb
+        self.exclude_nodes = exclude_nodes
+        self.max_retries = max_retries
+        self.submission_results = []
+        self.aws_profile = aws_profile
+        self.boto3_session = self._create_boto3_session()
+        logger.info("MetadataSubmitter initialised.")
+    def _create_gen3_submission_class(self):
+        return create_gen3_submission_class(self.api_key)
+    def _create_boto3_session(self):
+        return create_boto3_session(self.aws_profile)
+    def _read_data_import_order(self, data_import_order_path: str, exclude_nodes: list[str], boto3_session = None):
         if is_s3_uri(data_import_order_path):
-            logger.info(f"Reading import order from S3: {data_import_order_path}")
-            import_order = read_data_import_order_txt_s3(data_import_order_path, boto3_session)
-            logger.debug(f"Import order from S3: {import_order}")
+            session = boto3_session or self.boto3_session
+            return read_data_import_order_txt_s3(data_import_order_path, session, exclude_nodes)
         else:
-            logger.info(f"Reading import order from file: {data_import_order_path}")
-            import_order = read_data_import_order_txt(data_import_order_path, exclude_nodes)
-            logger.debug(f"Import order from file: {import_order}")
+            return read_data_import_order_txt(data_import_order_path, exclude_nodes)
-        file_map = {get_node_from_file_path(file): file for file in file_list}
+    def _prepare_json_chunks(self, metadata_file_path: str, max_size_kb: int) -> List[List[Dict]]:
+        """
+        Read JSON data from a given file path and split it into chunks,
+        each with a maximum size of ``max_size_kb`` kilobytes.
-        for node in import_order:
-            if node in exclude_nodes:
-                logger.info(f"Skipping node '{node}' (in exclude list).")
-                continue
-            file = file_map.get(node)
-            if not file:
-                logger.info(f"Skipping node '{node}' (not present in file list).")
-                continue
+        Args:
+            metadata_file_path (str): File path (local or S3 URI) to the JSON data.
+            max_size_kb (int): Maximum allowed size (in kilobytes) for each chunk.
-            logger.info(f"Processing file '{file}' for node '{node}'.")
+        Returns:
+            list: A list of chunks, where each chunk is a list of dictionaries
+                containing JSON data.
+        """
+        logger.info(f"Reading metadata json from {metadata_file_path}")
+        if is_s3_uri(metadata_file_path):
+            session = self.boto3_session
+            data = read_metadata_json_s3(metadata_file_path, session)
+        else:
+            data = read_metadata_json(metadata_file_path)
+        return split_json_objects(data, max_size_kb)
-            try:
-                if is_s3_uri(file):
-                    logger.info(f"Reading JSON data for node '{node}' from S3 file: {file}")
-                    json_data = read_metadata_json_s3(file, boto3_session)
-                else:
-                    logger.info(f"Reading JSON data for node '{node}' from local file: {file}")
-                    json_data = read_metadata_json(file)
-            except Exception as e:
-                logger.error(f"Error reading JSON for node '{node}' from {file}: {e}")
-                raise Exception(f"Failed to read JSON metadata for node '{node}' from {file}: {e}")
+    def _create_file_map(self):
+        """
+        Generate a mapping from node names to metadata file paths.
-            split_json_list = split_json_objects(json_data, max_size_kb=max_size_kb)
-            n_json_data = len(split_json_list)
-            logger.info(
-                f"--- Starting submission process for node '{node}' ({n_json_data} chunks) ---"
-            )
+        This method infers the node name for each file in `self.metadata_file_list`
+        and returns a dictionary where the keys are node names and the values
+        are the corresponding file paths.
-            for index, jsn in enumerate(split_json_list):
-                progress_str = f"{index + 1}/{n_json_data}"
-                submission_success = False
-                last_exception = None
-                for attempt in range(max_retries + 1):
-                    try:
-                        log_msg = (
-                            f"[SUBMIT]  | Project: {project_id:<10} | Node: {node:<12} | "
-                            f"Split: {progress_str:<5}"
-                            if attempt == 0 else
-                            f"[RETRY]   | Project: {project_id:<10} | Node: {node:<12} | "
-                            f"Split: {progress_str:<5} | Attempt: {attempt}/{max_retries}"
-                        )
-                        logger.info(log_msg) if attempt == 0 else logger.warning(log_msg)
-                        res = submit.submit_record("program1", project_id, jsn)
-                        if write_submission_results_path is not None:
-                            log_filename = os.path.join(
-                                log_dir, f"{project_id}_{node}_split{index + 1}_of_{n_json_data}.json"
-                            )
-                            abs_log_filename = os.path.abspath(log_filename)
-                            with open(abs_log_filename, "a") as f:
-                                json.dump(res, f)
-                                f.write("\n")
-                            logger.info(
-                                f"Wrote submission response to log file: {abs_log_filename}"
-                            )
-                        logger.info(
-                            f"\033[92m[SUCCESS]\033[0m | Project: {project_id:<10} | "
-                            f"Node: {node:<12} | Split: {progress_str:<5}"
-                        )
-                        submission_success = True
-                        break  # Success
-                    except Exception as e:
-                        last_exception = e
-                        logger.error(
-                            f"Error submitting chunk {progress_str} for node '{node}': {e}"
-                        )
-                        if attempt < max_retries:
-                            import time
-                            time.sleep(0.2)
-                        else:
-                            logger.critical(
-                                f"\033[91m[FAILED]\033[0m  | Project: {project_id:<10} | "
-                                f"Node: {node:<12} | Split: {progress_str:<5} | Error: {e}"
-                            )
-                if not submission_success:
-                    # After retries, still failed
-                    raise Exception(
-                        f"Failed to submit chunk {progress_str} for node '{node}' after {max_retries + 1} attempts. "
-                        f"Last error: {last_exception}"
-                    )
+        Returns:
+            dict: Dictionary mapping node names (str) to their associated metadata file paths (str).
+        """
+        file_map = {
+            get_node_from_file_path(file): file
+            for file in self.metadata_file_list
+        }
+        return file_map
-            logger.info(f"Finished submitting node '{node}'.")
+    def submit_metadata(self) -> List[Dict]:
+        """
+        Submits metadata for each node defined in the data import order, except those in the exclude list.
-        logger.info("--- Submission process complete ---")
+        For each node, this method retrieves the corresponding metadata file, splits the JSON data
+        into size-constrained chunks, and submits each chunk to the Gen3 submission API. Responses
+        from all submissions are gathered and returned as a list.
-    except Exception as exc:
-        logger.exception(f"Critical error during submission process: {exc}")
-        raise
+        Returns:
+            List[Dict]: A list of response dictionaries returned from the Gen3 metadata submissions.
+        """
+        gen3_submitter = self._create_gen3_submission_class()
+        data_import_order = self._read_data_import_order(self.data_import_order_path, self.exclude_nodes, self.boto3_session)
+        file_map = self._create_file_map()
+        output_response_list_dict = []
+        logger.info("Starting metadata submission.")
+        for node in data_import_order:
+            if node in self.exclude_nodes:
+                logger.info(f"Skipping node '{node}' (in exclude list).")
+                continue
+            file_path = file_map.get(node)
+            if not file_path:
+                logger.info(f"Skipping node '{node}' (not present in file list).")
+                continue
+            logger.info(f"Processing file '{file_path}' for node '{node}'.")
+            logger.info("Splitting JSON data into chunks.")
+            json_chunks = self._prepare_json_chunks(file_path, self.max_size_kb)
+            logger.info("Submitting chunks to Gen3.")
+            response_list = submit_data_chunks(
+                split_json_list=json_chunks,
+                node=node,
+                file_path=file_path,
+                gen3_submitter=gen3_submitter,
+                project_id=self.project_id,
+                max_retries=self.max_retries,
+                program_id=self.program_id
+            )
+            output_response_list_dict.extend(response_list)
+        self.submission_results = output_response_list_dict
+        return output_response_list_dict
+    def upload_metadata_submission_results(
+        self,
+        dataset_root: str,
+        database: str,
+        table: str,
+        partition_cols: list = ["upload_datetime"],
+    ):
+        """
+        Uploads the submission results to s3 and parquet table.
+        Args:
+            dataset_root (str): S3 path where the parquet files will be stored
+                (e.g., "s3://acdc-dataops-metadata/metadata_upload/").
+            database (str): Database name for storing the metadata upload
+                (e.g., "acdc_dataops_metadata_db").
+            table (str): Table name for storing the metadata upload
+                (e.g., "metadata_upload").
+            partition_cols (list, optional): List of column names to partition the parquet table by.
+                Defaults to ["upload_datetime"].
+        """
+        logger.info("Collecting version from metadata file list.")
+        version = collect_versions_from_metadata_file_list(self.metadata_file_list)
+        logger.info(f"Extracted version: {version}")
+        logger.info("Inferring API endpoint from JWT.")
+        api_endpoint = infer_api_endpoint_from_jwt(self.api_key['api_key'])
+        logger.info(f"Using API endpoint: {api_endpoint}")
+        upload_datetime = datetime.now().isoformat()
+        upload_id = str(uuid.uuid4())
+        logger.info(f"Upload datetime: {upload_datetime}")
+        logger.info(f"Generated upload ID: {upload_id}")
+        logger.info("Flattening submission results for upload.")
+        flattened_results = flatten_submission_results(self.submission_results)
+        logger.info(f"Flattened {len(flattened_results)} submission result entries.")
+        logger.info("Converting flattened results to DataFrame.")
+        flattened_results_df = pd.DataFrame(flattened_results)
+        flattened_results_df['upload_datetime'] = upload_datetime
+        flattened_results_df['upload_id'] = upload_id
+        flattened_results_df['api_endpoint'] = api_endpoint
+        flattened_results_df['version'] = version
+        logger.info(
+            f"Writing DataFrame to parquet and S3/table: "
+            f"dataset_root={dataset_root}, database={database}, table={table}, partition_cols={partition_cols}"
+        )
+        write_parquet_to_db(
+            df=flattened_results_df,
+            dataset_root=dataset_root,
+            database=database,
+            table=table,
+            partition_cols=partition_cols
+        )
+        logger.info("Metadata submission results upload complete.")

{acdc_aws_etl_pipeline-0.6.4.dist-info → acdc_aws_etl_pipeline-0.6.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: acdc_aws_etl_pipeline
-Version: 0.6.4
+Version: 0.6.6
 Summary: Tools for ACDC ETL pipeline
 Author: JoshuaHarris391
 Author-email: harjo391@gmail.com
@@ -17,6 +17,7 @@ Requires-Dist: dbt-core (==1.9.4)
 Requires-Dist: gen3 (>=4.27.4,<5.0.0)
 Requires-Dist: gen3_validator (>=2.0.0,<3.0.0)
 Requires-Dist: numpy (<2.0.0)
+Requires-Dist: pyjwt (>=2.10.1,<3.0.0)
 Requires-Dist: pytest
 Requires-Dist: python-dotenv
 Requires-Dist: pytz (>=2025.2,<2026.0)

{acdc_aws_etl_pipeline-0.6.4.dist-info → acdc_aws_etl_pipeline-0.6.6.dist-info}/RECORD RENAMED Viewed

@@ -3,12 +3,12 @@ acdc_aws_etl_pipeline/ingest/ingest.py,sha256=5Q63PZfUVB5L1WxwElAxG6N-4GvqBuTNp6
 acdc_aws_etl_pipeline/upload/__init__.py,sha256=kRI1wozjK-b9YXMAPwzWHzm967ZiUAM6g8rRo4ONWtI,67
 acdc_aws_etl_pipeline/upload/gen3datasubmitter.py,sha256=bu5d8IOsKFIA1uvvzaxb7YIKwBZKdP-0QvBt-gZMyUc,8625
 acdc_aws_etl_pipeline/upload/metadata_deleter.py,sha256=T4q9xqSE2Beu3zluvAmKh7wJWcCFGz2AZ9h9ZcASfyA,63
-acdc_aws_etl_pipeline/upload/metadata_submitter.py,sha256=6brNnwh5rpyzIf13ZGC_srcP_U0GRtne_sWoiM5CMnw,21059
+acdc_aws_etl_pipeline/upload/metadata_submitter.py,sha256=k5q5hRkj-dWo25z9nVZI2eNh0xnmQU8TPDffSSnQlUY,29906
 acdc_aws_etl_pipeline/upload/upload_synthdata_s3.py,sha256=Ge5TQzZkWnJNp-q0Ak-Yhv1h1eWLxg-PlWVHrd1m0B8,5155
 acdc_aws_etl_pipeline/utils/athena_utils.py,sha256=QJlBe-07Hkq-BqmcxBu6ZtAmVfZSHuSY4dijcysgPH8,29560
 acdc_aws_etl_pipeline/utils/dbt_utils.py,sha256=5XRFOwNNIeuW2sQuor3h_OZTuXGg6xv2AUYwj9bMAAM,2054
 acdc_aws_etl_pipeline/utils/release_writer.py,sha256=vsxHJ6l-UWPpzeyEPHurX5iFgeCEQ-9FbySAbPNfTTM,7555
 acdc_aws_etl_pipeline/validate/validate.py,sha256=zLqK9i92FsRAaBOGdY-G7-vb0e6tmkoUXhY6zCfbjN8,24895
-acdc_aws_etl_pipeline-0.6.4.dist-info/METADATA,sha256=Pk7RSX7IkUL0yLu9Yh851xx20OK7YGLegPnIK7r7Og8,2887
-acdc_aws_etl_pipeline-0.6.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-acdc_aws_etl_pipeline-0.6.4.dist-info/RECORD,,
+acdc_aws_etl_pipeline-0.6.6.dist-info/METADATA,sha256=n2wMY9pJS49KUdUmhzd-JkPJHx7Fe4XMmMLGH4kI1eo,2926
+acdc_aws_etl_pipeline-0.6.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+acdc_aws_etl_pipeline-0.6.6.dist-info/RECORD,,

{acdc_aws_etl_pipeline-0.6.4.dist-info → acdc_aws_etl_pipeline-0.6.6.dist-info}/WHEEL RENAMED Viewed

File without changes

acdc_aws_etl_pipeline 0.6.4__py3-none-any.whl → 0.6.6__py3-none-any.whl

acdc_aws_etl_pipeline 0.6.4py3-none-any.whl → 0.6.6py3-none-any.whl