datapipelab 0.2.9__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datapipelab-0.2.9 → datapipelab-0.3.0}/PKG-INFO +1 -1
- datapipelab-0.3.0/datapipelab/app/node/processor/gcp_bucket_node.py +42 -0
- datapipelab-0.3.0/datapipelab/app/wrapper/__init__.py +0 -0
- datapipelab-0.3.0/datapipelab/app/wrapper/source/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/pipeline_handler.py +3 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab.egg-info/PKG-INFO +1 -1
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab.egg-info/SOURCES.txt +4 -1
- {datapipelab-0.2.9 → datapipelab-0.3.0}/setup.py +1 -1
- {datapipelab-0.2.9 → datapipelab-0.3.0}/MANIFEST.in +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/README.md +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/connector_node/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/custom_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/bigquery_api_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/bigquery_spark_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/custom_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/shell_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/spark_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/csv_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/delta_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/hive_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/pandas_csv_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/spark_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/teams_notification_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/source/__init__.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/source/delta_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/source/hive_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/source/spark_node.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/tnode.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/engine.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/logger.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/pipeline.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/pipeline_config.py +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab.egg-info/dependency_links.txt +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab.egg-info/requires.txt +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab.egg-info/top_level.txt +0 -0
- {datapipelab-0.2.9 → datapipelab-0.3.0}/setup.cfg +0 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
from datapipelab.app.node.tnode import TNode
|
2
|
+
from datapipelab.logger import logger
|
3
|
+
|
4
|
+
class GCPBucketAPINode(TNode):
|
5
|
+
def __init__(self, spark, tnode_config):
|
6
|
+
super().__init__(spark=spark)
|
7
|
+
self.node_name = tnode_config['name']
|
8
|
+
self.credentials_path = tnode_config['options']['credentials_path']
|
9
|
+
self.project_name = tnode_config['options']['project_name']
|
10
|
+
self.bucket_name = tnode_config['options']['bucket_name']
|
11
|
+
self.prefix = tnode_config['options'].get('subdirectory', None) # Optional subdirectory (prefix) to delete
|
12
|
+
|
13
|
+
def __delete_gcs_folder(self, bucket_name, prefix=None):
|
14
|
+
from google.cloud import storage
|
15
|
+
from google.oauth2 import service_account
|
16
|
+
credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
|
17
|
+
client = storage.Client(credentials=credentials, project=self.project_name)
|
18
|
+
|
19
|
+
bucket = client.bucket(bucket_name)
|
20
|
+
|
21
|
+
if prefix:
|
22
|
+
# Delete only objects under the prefix (subfolder)
|
23
|
+
blobs = bucket.list_blobs(prefix=prefix)
|
24
|
+
deleted = False
|
25
|
+
for blob in blobs:
|
26
|
+
blob.delete()
|
27
|
+
deleted = True
|
28
|
+
if deleted:
|
29
|
+
logger.info(f"Deleted all objects under prefix '{prefix}' in bucket '{bucket_name}'.")
|
30
|
+
else:
|
31
|
+
logger.info(f"No objects found under prefix '{prefix}' in bucket '{bucket_name}'.")
|
32
|
+
else:
|
33
|
+
# Delete the entire bucket (must be empty)
|
34
|
+
try:
|
35
|
+
bucket.delete(force=True) # force=True to delete non-empty bucket
|
36
|
+
logger.info(f"Bucket '{bucket_name}' deleted.")
|
37
|
+
except Exception as e:
|
38
|
+
logger.info(f"Error deleting bucket '{bucket_name}': {e}")
|
39
|
+
|
40
|
+
def _process(self):
|
41
|
+
self.__delete_gcs_folder(self.bucket_name, self.prefix)
|
42
|
+
return None
|
File without changes
|
File without changes
|
@@ -10,6 +10,7 @@ from datapipelab.app.node.sink.pandas_csv_node import PandasCSVSinkNode
|
|
10
10
|
from datapipelab.app.node.sink.teams_notification_node import TeamsNotificationSinkNode
|
11
11
|
from datapipelab.app.node.processor.bigquery_spark_node import BigQuerySparkProcessorNode
|
12
12
|
from datapipelab.app.node.processor.bigquery_api_node import BigQueryAPIProcessorNode
|
13
|
+
from datapipelab.app.node.processor.gcp_bucket_node import GCPBucketAPINode
|
13
14
|
|
14
15
|
|
15
16
|
class PipelineHandler:
|
@@ -59,6 +60,8 @@ class PipelineHandler:
|
|
59
60
|
processor_df = BigQuerySparkProcessorNode(self.spark, tnode_config).run()
|
60
61
|
if tnode_format == 'shell':
|
61
62
|
processor_df = ShellProcessorNode(self.spark, tnode_config).run()
|
63
|
+
if tnode_format == 'gcp_bucket_api':
|
64
|
+
processor_df = GCPBucketAPINode(self.spark, tnode_config).run()
|
62
65
|
return processor_df
|
63
66
|
|
64
67
|
def write_sink_node(self, tnode_config, t_df):
|
@@ -21,6 +21,7 @@ datapipelab/app/node/processor/__init__.py
|
|
21
21
|
datapipelab/app/node/processor/bigquery_api_node.py
|
22
22
|
datapipelab/app/node/processor/bigquery_spark_node.py
|
23
23
|
datapipelab/app/node/processor/custom_node.py
|
24
|
+
datapipelab/app/node/processor/gcp_bucket_node.py
|
24
25
|
datapipelab/app/node/processor/shell_node.py
|
25
26
|
datapipelab/app/node/processor/spark_node.py
|
26
27
|
datapipelab/app/node/sink/__init__.py
|
@@ -33,4 +34,6 @@ datapipelab/app/node/sink/teams_notification_node.py
|
|
33
34
|
datapipelab/app/node/source/__init__.py
|
34
35
|
datapipelab/app/node/source/delta_node.py
|
35
36
|
datapipelab/app/node/source/hive_node.py
|
36
|
-
datapipelab/app/node/source/spark_node.py
|
37
|
+
datapipelab/app/node/source/spark_node.py
|
38
|
+
datapipelab/app/wrapper/__init__.py
|
39
|
+
datapipelab/app/wrapper/source/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/processor/bigquery_spark_node.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datapipelab-0.2.9 → datapipelab-0.3.0}/datapipelab/app/node/sink/teams_notification_node.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|