datapipelab 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipelab/app/node/processor/bigquery_api_node.py +2 -2
- datapipelab/app/node/processor/bigquery_spark_node.py +9 -17
- datapipelab/pipeline_handler.py +3 -2
- {datapipelab-0.1.6.dist-info → datapipelab-0.1.8.dist-info}/METADATA +1 -1
- {datapipelab-0.1.6.dist-info → datapipelab-0.1.8.dist-info}/RECORD +7 -7
- {datapipelab-0.1.6.dist-info → datapipelab-0.1.8.dist-info}/WHEEL +1 -1
- {datapipelab-0.1.6.dist-info → datapipelab-0.1.8.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ class BigQueryAPIProcessorNode(TNode):
|
|
12
12
|
self.return_as_spark_df = tnode_config['options']['return_as_spark_df']
|
13
13
|
self.project_name = tnode_config['options']['project_name']
|
14
14
|
|
15
|
-
def
|
15
|
+
def __sql_biqquery(self, sql_query):
|
16
16
|
credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
|
17
17
|
client = bigquery.Client(credentials=credentials, project=self.project_name)
|
18
18
|
|
@@ -28,6 +28,6 @@ class BigQueryAPIProcessorNode(TNode):
|
|
28
28
|
logger.info(rows)
|
29
29
|
|
30
30
|
def _process(self):
|
31
|
-
self.
|
31
|
+
self.__sql_biqquery(self.sql_query)
|
32
32
|
self._createOrReplaceTempView()
|
33
33
|
return self.node
|
@@ -1,30 +1,22 @@
|
|
1
1
|
from datapipelab.app.node.tnode import TNode
|
2
2
|
from datapipelab.logger import logger
|
3
3
|
|
4
|
+
|
4
5
|
class BigQuerySparkProcessorNode(TNode):
|
5
6
|
def __init__(self, spark, tnode_config):
|
6
7
|
super().__init__(spark=spark)
|
7
8
|
self.sql_query = tnode_config['options']['query']
|
8
9
|
self.node_name = tnode_config['name']
|
9
|
-
self.
|
10
|
-
self.
|
11
|
-
|
12
|
-
def __sql_query(self, sql_query):
|
13
|
-
credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
|
14
|
-
client = bigquery.Client(credentials=credentials, project=self.project_name)
|
15
|
-
|
16
|
-
# run the job
|
17
|
-
query_job = client.query(sql_query)
|
10
|
+
self.materialization_dataset = tnode_config['options']['materialization_dataset'] # materializationDataset
|
11
|
+
self.parent_project = tnode_config['options']['parent_project'] # parentProject
|
18
12
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
self.node = None
|
25
|
-
logger.info(rows)
|
13
|
+
def __sql_query(self):
|
14
|
+
self.node = self.spark.read.format("bigquery").option("materializationDataset",
|
15
|
+
self.materialization_dataset).option("query",
|
16
|
+
self.sql_query).option(
|
17
|
+
"parentProject", self.parent_project).load()
|
26
18
|
|
27
19
|
def _process(self):
|
28
|
-
self.__sql_query(
|
20
|
+
self.__sql_query()
|
29
21
|
self._createOrReplaceTempView()
|
30
22
|
return self.node
|
datapipelab/pipeline_handler.py
CHANGED
@@ -7,6 +7,7 @@ from datapipelab.app.node.sink.csv_node import CSVSinkNode
|
|
7
7
|
from datapipelab.app.node.sink.pandas_csv_node import PandasCSVSinkNode
|
8
8
|
from datapipelab.app.node.sink.teams_notification_node import TeamsNotificationSinkNode
|
9
9
|
from datapipelab.app.node.processor.bigquery_spark_node import BigQuerySparkProcessorNode
|
10
|
+
from datapipelab.app.node.processor.bigquery_api_node import BigQueryAPIProcessorNode
|
10
11
|
|
11
12
|
|
12
13
|
class PipelineHandler:
|
@@ -46,9 +47,9 @@ class PipelineHandler:
|
|
46
47
|
if tnode_format == 'query':
|
47
48
|
processor_df = SparkProcessorNode(self.spark, tnode_config).run()
|
48
49
|
if tnode_format == 'bigquery_api':
|
49
|
-
processor_df =
|
50
|
+
processor_df = BigQueryAPIProcessorNode(self.spark, tnode_config).run()
|
50
51
|
if tnode_format == 'bigquery_spark':
|
51
|
-
processor_df =
|
52
|
+
processor_df = BigQuerySparkProcessorNode(self.spark, tnode_config).run()
|
52
53
|
return processor_df
|
53
54
|
|
54
55
|
def write_sink_node(self, tnode_config, t_df):
|
@@ -3,15 +3,15 @@ datapipelab/engine.py,sha256=3QRsedRYNov6xIDOZ1tukinFE-SKv39Fn3sNCnD3L6g,442
|
|
3
3
|
datapipelab/logger.py,sha256=Ugv0A4TfD3JWCWXNWu0lURcnfAEyuVrK3IrvVVgcHBo,864
|
4
4
|
datapipelab/pipeline.py,sha256=dw9D9KM_hztt9g_YzqoNgQBRyCYR92cRZwrU5duP_Pg,1464
|
5
5
|
datapipelab/pipeline_config.py,sha256=2bFAJepViE7rT7CaRANZU07aeQpOYcZ954ISujm9pXA,3816
|
6
|
-
datapipelab/pipeline_handler.py,sha256=
|
6
|
+
datapipelab/pipeline_handler.py,sha256=lQv6HwwdgZDQvICgABtWiuvZQ9jG9cJjy8s_7qLZr9s,3871
|
7
7
|
datapipelab/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
datapipelab/app/connector_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
datapipelab/app/node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
datapipelab/app/node/custom_node.py,sha256=VvjwkECTobRhO_fYKUrJCd117B5MoR9P6UKYZfRLhV4,1017
|
11
11
|
datapipelab/app/node/tnode.py,sha256=npHG4fFZty5JZ3F_okO9xml-BRhu4DkrZuNE6oaLbvw,446
|
12
12
|
datapipelab/app/node/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
datapipelab/app/node/processor/bigquery_api_node.py,sha256=
|
14
|
-
datapipelab/app/node/processor/bigquery_spark_node.py,sha256=
|
13
|
+
datapipelab/app/node/processor/bigquery_api_node.py,sha256=g0LSHofMH9xxb6TW_5kSb-oY495E4lY1FZspO_CEkHw,1299
|
14
|
+
datapipelab/app/node/processor/bigquery_spark_node.py,sha256=S9kIYW0RE5b0RjniKFFBTzA3Tx4_plFdkFQXzhl1xTY,1039
|
15
15
|
datapipelab/app/node/processor/custom_node.py,sha256=1nqbJEhNiMP1rmN9ufpUuKO1IkuI2BEM5auW4JceGMA,933
|
16
16
|
datapipelab/app/node/processor/shell_node.py,sha256=s3dKgfEqbpUIEiwORERgvp7FNDE5JkFHBo7EnJYBPnA,669
|
17
17
|
datapipelab/app/node/processor/spark_node.py,sha256=jzqdffIHUCgOfMFcoqjXdl8wFag-3gafxfNCdssKnwc,483
|
@@ -25,7 +25,7 @@ datapipelab/app/node/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
25
25
|
datapipelab/app/node/source/delta_node.py,sha256=gg7SfuKBAAfjk6OX2jNrot9XX61HoBe3us3D8O-dscE,529
|
26
26
|
datapipelab/app/node/source/hive_node.py,sha256=h_AMCnnmon7uLRIGsaHAPWEReD3VaWZXnz9r0TpLGNM,478
|
27
27
|
datapipelab/app/node/source/spark_node.py,sha256=S_x2atRFPDnXmhCUtcmaLc4BDFd2H4uQq6wnEJb7Uug,480
|
28
|
-
datapipelab-0.1.
|
29
|
-
datapipelab-0.1.
|
30
|
-
datapipelab-0.1.
|
31
|
-
datapipelab-0.1.
|
28
|
+
datapipelab-0.1.8.dist-info/METADATA,sha256=JhNtfMCVgxF4ZTiCcdYQNnzfFV7lq5Xkw3GEDlR5lz4,220
|
29
|
+
datapipelab-0.1.8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
30
|
+
datapipelab-0.1.8.dist-info/top_level.txt,sha256=HgeBjHvXorKzvNqU5BNPutoI771HtiqVit9_-0Zyrb4,12
|
31
|
+
datapipelab-0.1.8.dist-info/RECORD,,
|
File without changes
|