datapipelab 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datapipelab-0.1.7 → datapipelab-0.1.9}/PKG-INFO +1 -1
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/bigquery_api_node.py +4 -4
- datapipelab-0.1.9/datapipelab/app/node/processor/bigquery_spark_node.py +22 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/PKG-INFO +1 -1
- {datapipelab-0.1.7 → datapipelab-0.1.9}/setup.py +1 -1
- datapipelab-0.1.7/datapipelab/app/node/processor/bigquery_spark_node.py +0 -30
- {datapipelab-0.1.7 → datapipelab-0.1.9}/MANIFEST.in +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/README.md +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/connector_node/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/custom_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/custom_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/shell_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/spark_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/csv_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/delta_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/hive_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/pandas_csv_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/teams_notification_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/__init__.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/delta_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/hive_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/spark_node.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/tnode.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/engine.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/logger.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline_config.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline_handler.py +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/SOURCES.txt +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/dependency_links.txt +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/requires.txt +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/top_level.txt +0 -0
- {datapipelab-0.1.7 → datapipelab-0.1.9}/setup.cfg +0 -0
@@ -3,8 +3,6 @@ from datapipelab.logger import logger
|
|
3
3
|
|
4
4
|
class BigQueryAPIProcessorNode(TNode):
|
5
5
|
def __init__(self, spark, tnode_config):
|
6
|
-
from google.cloud import bigquery
|
7
|
-
from google.oauth2 import service_account
|
8
6
|
super().__init__(spark=spark)
|
9
7
|
self.sql_query = tnode_config['options']['query']
|
10
8
|
self.node_name = tnode_config['name']
|
@@ -12,7 +10,9 @@ class BigQueryAPIProcessorNode(TNode):
|
|
12
10
|
self.return_as_spark_df = tnode_config['options']['return_as_spark_df']
|
13
11
|
self.project_name = tnode_config['options']['project_name']
|
14
12
|
|
15
|
-
def
|
13
|
+
def __sql_biqquery(self, sql_query):
|
14
|
+
from google.cloud import bigquery
|
15
|
+
from google.oauth2 import service_account
|
16
16
|
credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
|
17
17
|
client = bigquery.Client(credentials=credentials, project=self.project_name)
|
18
18
|
|
@@ -28,6 +28,6 @@ class BigQueryAPIProcessorNode(TNode):
|
|
28
28
|
logger.info(rows)
|
29
29
|
|
30
30
|
def _process(self):
|
31
|
-
self.
|
31
|
+
self.__sql_biqquery(self.sql_query)
|
32
32
|
self._createOrReplaceTempView()
|
33
33
|
return self.node
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from datapipelab.app.node.tnode import TNode
|
2
|
+
from datapipelab.logger import logger
|
3
|
+
|
4
|
+
|
5
|
+
class BigQuerySparkProcessorNode(TNode):
|
6
|
+
def __init__(self, spark, tnode_config):
|
7
|
+
super().__init__(spark=spark)
|
8
|
+
self.sql_query = tnode_config['options']['query']
|
9
|
+
self.node_name = tnode_config['name']
|
10
|
+
self.materialization_dataset = tnode_config['options']['materialization_dataset'] # materializationDataset
|
11
|
+
self.parent_project = tnode_config['options']['parent_project'] # parentProject
|
12
|
+
|
13
|
+
def __sql_query(self):
|
14
|
+
self.node = self.spark.read.format("bigquery").option("materializationDataset",
|
15
|
+
self.materialization_dataset).option("query",
|
16
|
+
self.sql_query).option(
|
17
|
+
"parentProject", self.parent_project).load()
|
18
|
+
|
19
|
+
def _process(self):
|
20
|
+
self.__sql_query()
|
21
|
+
self._createOrReplaceTempView()
|
22
|
+
return self.node
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from datapipelab.app.node.tnode import TNode
|
2
|
-
from datapipelab.logger import logger
|
3
|
-
|
4
|
-
class BigQuerySparkProcessorNode(TNode):
|
5
|
-
def __init__(self, spark, tnode_config):
|
6
|
-
super().__init__(spark=spark)
|
7
|
-
self.sql_query = tnode_config['options']['query']
|
8
|
-
self.node_name = tnode_config['name']
|
9
|
-
self.credentials_path = tnode_config['options']['materialization_dataset'] # materializationDataset
|
10
|
-
self.return_as_spark_df = tnode_config['options']['parent_project'] # parentProject
|
11
|
-
|
12
|
-
def __sql_query(self, sql_query):
|
13
|
-
credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
|
14
|
-
client = bigquery.Client(credentials=credentials, project=self.project_name)
|
15
|
-
|
16
|
-
# run the job
|
17
|
-
query_job = client.query(sql_query)
|
18
|
-
|
19
|
-
results = query_job.result()
|
20
|
-
rows = [dict(row) for row in results]
|
21
|
-
if self.return_as_spark_df:
|
22
|
-
self.node = self.spark.createDataFrame(rows)
|
23
|
-
else:
|
24
|
-
self.node = None
|
25
|
-
logger.info(rows)
|
26
|
-
|
27
|
-
def _process(self):
|
28
|
-
self.__sql_query(self.sql_query)
|
29
|
-
self._createOrReplaceTempView()
|
30
|
-
return self.node
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/teams_notification_node.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|