acdc_aws_etl_pipeline 0.3.7__tar.gz → 0.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: acdc_aws_etl_pipeline
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: Tools for ACDC ETL pipeline
5
5
  Author: JoshuaHarris391
6
6
  Author-email: harjo391@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "acdc_aws_etl_pipeline"
3
- version = "0.3.7"
3
+ version = "0.3.9"
4
4
  description = "Tools for ACDC ETL pipeline"
5
5
  authors = ["JoshuaHarris391 <harjo391@gmail.com>"]
6
6
  readme = "README.md"
@@ -13,6 +13,8 @@ import pytz
13
13
  import base64
14
14
  import numpy as np
15
15
  from decimal import Decimal
16
+ from gen3_validator.dict import DataDictionary
17
+ from acdc_aws_etl_pipeline.validate.validate import load_schema_from_s3_uri
16
18
 
17
19
  logger = logging.getLogger(__name__)
18
20
 
@@ -688,9 +690,37 @@ def write_gold_json_to_s3(
688
690
  logger.info(f"Object created at s3://{s3_bucket}/{s3_object_key}")
689
691
 
690
692
 
693
+ def construct_data_import_order(s3_uri) -> list:
694
+ schema_dict = load_schema_from_s3_uri(s3_uri)
695
+ dd = DataDictionary(schema_dict)
696
+ dd.schema = schema_dict
697
+ dd.calculate_node_order()
698
+ return dd.node_order
699
+
691
700
  def write_release_jsons_to_s3(s3_bucket, release_id, study_id, table_name, json_data):
701
+ """
702
+ Write a JSON string to a specific S3 location for a given release and study.
703
+
704
+ Args:
705
+ s3_bucket (str): The S3 bucket where the file will be uploaded.
706
+ release_id (str): Release identifier used in the S3 key path.
707
+ study_id (str): Study identifier used in the S3 key path.
708
+ table_name (str): Table name (used for naming the .json file).
709
+ json_data (str): JSON data (as a string) to be uploaded.
710
+
711
+ Returns:
712
+ str: The output directory path in S3 where the file was written.
713
+
714
+ Raises:
715
+ Exception: Any exception raised by boto3.client('s3').put_object.
716
+
717
+ Example:
718
+ >>> output_dir = write_release_jsons_to_s3('my-bucket', 'release123', 'study1', 'gold_foo', '{"x":1}')
719
+ """
692
720
  s3 = boto3.client('s3')
693
- s3_object_key = f"release_jsons/{release_id}/{study_id}/{table_name}.json"
721
+ output_dir = f"release_jsons/{release_id}/{study_id}"
722
+ s3_object_key = f"{output_dir}/{table_name}.json"
694
723
  logger.info(f"Writing JSON data to S3 bucket: {s3_bucket}, object key: {s3_object_key}")
695
724
  s3.put_object(Body=json_data, Bucket=s3_bucket, Key=s3_object_key)
696
- logger.info(f"Object created at s3://{s3_bucket}/{s3_object_key}")
725
+ logger.info(f"Object created at s3://{s3_bucket}/{s3_object_key}")
726
+ return output_dir