datapipelab 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ from datapipelab.app.node.tnode import TNode
2
+ from datapipelab.logger import logger
3
+
4
+ class GCPBucketAPINode(TNode):
5
+ def __init__(self, spark, tnode_config):
6
+ super().__init__(spark=spark)
7
+ self.node_name = tnode_config['name']
8
+ self.credentials_path = tnode_config['options']['credentials_path']
9
+ self.project_name = tnode_config['options']['project_name']
10
+ self.bucket_name = tnode_config['options']['bucket_name']
11
+ self.prefix = tnode_config['options'].get('subdirectory', None) # Optional subdirectory (prefix) to delete
12
+
13
+ def __delete_gcs_folder(self, bucket_name, prefix=None):
14
+ from google.cloud import storage
15
+ from google.oauth2 import service_account
16
+ credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
17
+ client = storage.Client(credentials=credentials, project=self.project_name)
18
+
19
+ bucket = client.bucket(bucket_name)
20
+
21
+ if prefix:
22
+ # Delete only objects under the prefix (subfolder)
23
+ blobs = bucket.list_blobs(prefix=prefix)
24
+ deleted = False
25
+ for blob in blobs:
26
+ blob.delete()
27
+ deleted = True
28
+ if deleted:
29
+ logger.info(f"Deleted all objects under prefix '{prefix}' in bucket '{bucket_name}'.")
30
+ else:
31
+ logger.info(f"No objects found under prefix '{prefix}' in bucket '{bucket_name}'.")
32
+ else:
33
+ # Delete the entire bucket (must be empty)
34
+ try:
35
+ bucket.delete(force=True) # force=True to delete non-empty bucket
36
+ logger.info(f"Bucket '{bucket_name}' deleted.")
37
+ except Exception as e:
38
+ logger.info(f"Error deleting bucket '{bucket_name}': {e}")
39
+
40
+ def _process(self):
41
+ self.__delete_gcs_folder(self.bucket_name, self.prefix)
42
+ return None
File without changes
File without changes
@@ -10,6 +10,7 @@ from datapipelab.app.node.sink.pandas_csv_node import PandasCSVSinkNode
10
10
  from datapipelab.app.node.sink.teams_notification_node import TeamsNotificationSinkNode
11
11
  from datapipelab.app.node.processor.bigquery_spark_node import BigQuerySparkProcessorNode
12
12
  from datapipelab.app.node.processor.bigquery_api_node import BigQueryAPIProcessorNode
13
+ from datapipelab.app.node.processor.gcp_bucket_node import GCPBucketAPINode
13
14
 
14
15
 
15
16
  class PipelineHandler:
@@ -59,6 +60,8 @@ class PipelineHandler:
59
60
  processor_df = BigQuerySparkProcessorNode(self.spark, tnode_config).run()
60
61
  if tnode_format == 'shell':
61
62
  processor_df = ShellProcessorNode(self.spark, tnode_config).run()
63
+ if tnode_format == 'gcp_bucket_api':
64
+ processor_df = GCPBucketAPINode(self.spark, tnode_config).run()
62
65
  return processor_df
63
66
 
64
67
  def write_sink_node(self, tnode_config, t_df):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datapipelab
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: A data pipeline library with connectors, sources, processors, and sinks.
5
5
  Requires-Dist: json5
6
6
  Requires-Dist: loguru
@@ -3,7 +3,7 @@ datapipelab/engine.py,sha256=3QRsedRYNov6xIDOZ1tukinFE-SKv39Fn3sNCnD3L6g,442
3
3
  datapipelab/logger.py,sha256=Ugv0A4TfD3JWCWXNWu0lURcnfAEyuVrK3IrvVVgcHBo,864
4
4
  datapipelab/pipeline.py,sha256=dw9D9KM_hztt9g_YzqoNgQBRyCYR92cRZwrU5duP_Pg,1464
5
5
  datapipelab/pipeline_config.py,sha256=2bFAJepViE7rT7CaRANZU07aeQpOYcZ954ISujm9pXA,3816
6
- datapipelab/pipeline_handler.py,sha256=xlDNHUr4bhlvpj6rpI6Mbfe8jsHG0-rQF4EasRUPU8Y,4783
6
+ datapipelab/pipeline_handler.py,sha256=Q1AzuPgOb9bElclfX-E8PiTGrdOKzFshQUjbNNXp5m0,4980
7
7
  datapipelab/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  datapipelab/app/connector_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datapipelab/app/node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,6 +13,7 @@ datapipelab/app/node/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
13
13
  datapipelab/app/node/processor/bigquery_api_node.py,sha256=IclDkGxo9ltGJVkBaHKFPFCSlEEyzefgalaAOLA17bE,1752
14
14
  datapipelab/app/node/processor/bigquery_spark_node.py,sha256=S9kIYW0RE5b0RjniKFFBTzA3Tx4_plFdkFQXzhl1xTY,1039
15
15
  datapipelab/app/node/processor/custom_node.py,sha256=1nqbJEhNiMP1rmN9ufpUuKO1IkuI2BEM5auW4JceGMA,933
16
+ datapipelab/app/node/processor/gcp_bucket_node.py,sha256=bzV2c89-g5S0OH5bcKKQ-9yKOGwlmOR7h7_5uO6Gnq0,1904
16
17
  datapipelab/app/node/processor/shell_node.py,sha256=s3dKgfEqbpUIEiwORERgvp7FNDE5JkFHBo7EnJYBPnA,669
17
18
  datapipelab/app/node/processor/spark_node.py,sha256=jzqdffIHUCgOfMFcoqjXdl8wFag-3gafxfNCdssKnwc,483
18
19
  datapipelab/app/node/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,7 +27,9 @@ datapipelab/app/node/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
26
27
  datapipelab/app/node/source/delta_node.py,sha256=gg7SfuKBAAfjk6OX2jNrot9XX61HoBe3us3D8O-dscE,529
27
28
  datapipelab/app/node/source/hive_node.py,sha256=h_AMCnnmon7uLRIGsaHAPWEReD3VaWZXnz9r0TpLGNM,478
28
29
  datapipelab/app/node/source/spark_node.py,sha256=S_x2atRFPDnXmhCUtcmaLc4BDFd2H4uQq6wnEJb7Uug,480
29
- datapipelab-0.2.9.dist-info/METADATA,sha256=nIXYk_x-oMW4E1mZ0yDS1YniCbafMy9iGNNwXxcWbZI,220
30
- datapipelab-0.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- datapipelab-0.2.9.dist-info/top_level.txt,sha256=HgeBjHvXorKzvNqU5BNPutoI771HtiqVit9_-0Zyrb4,12
32
- datapipelab-0.2.9.dist-info/RECORD,,
30
+ datapipelab/app/wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ datapipelab/app/wrapper/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ datapipelab-0.3.0.dist-info/METADATA,sha256=uXwxvNNpO_FSDUi8rC5bxPNImGtLuZuSMUV9QDNptT0,220
33
+ datapipelab-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ datapipelab-0.3.0.dist-info/top_level.txt,sha256=HgeBjHvXorKzvNqU5BNPutoI771HtiqVit9_-0Zyrb4,12
35
+ datapipelab-0.3.0.dist-info/RECORD,,