datapipelab 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datapipelab/app/node/processor/bigquery_spark_node.py +2 -0
- datapipelab/app/node/processor/spark_node.py +2 -0
- datapipelab/app/node/sink/spark_api_node.py +38 -0
- datapipelab/app/node/source/spark_api_node.py +42 -0
- datapipelab/app/node/source/spark_node.py +2 -0
- datapipelab/engine.py +1 -1
- datapipelab/pipeline_handler.py +8 -0
- {datapipelab-0.3.1.dist-info → datapipelab-0.3.3.dist-info}/METADATA +1 -1
- {datapipelab-0.3.1.dist-info → datapipelab-0.3.3.dist-info}/RECORD +11 -9
- {datapipelab-0.3.1.dist-info → datapipelab-0.3.3.dist-info}/WHEEL +0 -0
- {datapipelab-0.3.1.dist-info → datapipelab-0.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
from datapipelab.app.node.tnode import TNode
|
2
|
+
from datapipelab.logger import logger
|
3
|
+
|
4
|
+
|
5
|
+
class SparkApiSourceNode(TNode):
|
6
|
+
def __init__(self, spark, tnode_config, df):
|
7
|
+
from pyspark.sql import DataFrame
|
8
|
+
super().__init__(spark=spark)
|
9
|
+
self.df = df
|
10
|
+
self.__load_options(tnode_config)
|
11
|
+
|
12
|
+
def __load_options(self, tnode_config):
|
13
|
+
self.spark_options = tnode_config.get('options', {})
|
14
|
+
self.options = {}
|
15
|
+
if 'format' in self.spark_options:
|
16
|
+
self.format = self.spark_options.get('format')
|
17
|
+
if 'mode' in self.spark_options:
|
18
|
+
self.mode = self.spark_options.get('mode')
|
19
|
+
if 'parent_project' in self.spark_options:
|
20
|
+
self.options['parentProject'] = self.spark_options.get('parent_project')
|
21
|
+
if 'table' in self.spark_options:
|
22
|
+
self.options['table'] = self.spark_options.get('table')
|
23
|
+
if 'write_method' in self.spark_options:
|
24
|
+
self.options['writeMethod'] = self.spark_options.get('write_method')
|
25
|
+
|
26
|
+
def __write_df(self):
|
27
|
+
writer = self.df.write
|
28
|
+
if self.format:
|
29
|
+
writer = writer.format(self.format)
|
30
|
+
for key, value in self.options.items():
|
31
|
+
if value:
|
32
|
+
writer = writer.option(key, value)
|
33
|
+
if self.mode:
|
34
|
+
writer = writer.mode(self.mode)
|
35
|
+
writer.save()
|
36
|
+
|
37
|
+
def _process(self):
|
38
|
+
self.__write_df()
|
@@ -0,0 +1,42 @@
|
|
1
|
+
from datapipelab.app.node.tnode import TNode
|
2
|
+
from datapipelab.logger import logger
|
3
|
+
|
4
|
+
|
5
|
+
class SparkApiSourceNode(TNode):
|
6
|
+
def __init__(self, spark, tnode_config):
|
7
|
+
super().__init__(spark=spark)
|
8
|
+
self.node_name = tnode_config['name']
|
9
|
+
self.__load_options(tnode_config)
|
10
|
+
|
11
|
+
|
12
|
+
def __load_options(self, tnode_config):
|
13
|
+
self.spark_options = tnode_config.get('options', {})
|
14
|
+
self.options = {}
|
15
|
+
if 'format' in self.spark_options:
|
16
|
+
self.format = self.spark_options.get('format')
|
17
|
+
if 'query' in self.spark_options:
|
18
|
+
self.query = self.spark_options.get('query')
|
19
|
+
if 'materialization_dataset' in self.spark_options:
|
20
|
+
self.options['materializationDataset'] = self.spark_options.get('materialization_dataset')
|
21
|
+
if 'parent_project' in self.spark_options:
|
22
|
+
self.options['parentProject'] = self.spark_options.get('parent_project')
|
23
|
+
if 'table' in self.spark_options:
|
24
|
+
self.options['table'] = self.spark_options.get('table')
|
25
|
+
if 'path' in self.spark_options:
|
26
|
+
self.options['path'] = self.spark_options.get('path')
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def __load_df(self):
|
31
|
+
reader = self.spark.read
|
32
|
+
if self.format:
|
33
|
+
reader = reader.format(self.format)
|
34
|
+
for key, value in self.options.items():
|
35
|
+
if value:
|
36
|
+
reader = reader.option(key, value)
|
37
|
+
self.node = reader.load()
|
38
|
+
|
39
|
+
def _process(self):
|
40
|
+
self.__load_df()
|
41
|
+
self._createOrReplaceTempView()
|
42
|
+
return self.node
|
datapipelab/engine.py
CHANGED
@@ -3,7 +3,7 @@ from datapipelab.logger import logger
|
|
3
3
|
|
4
4
|
|
5
5
|
class Engine:
|
6
|
-
def __init__(self, engine_config_path, spark, params=None):
|
6
|
+
def __init__(self, engine_config_path, spark=None, params=None):
|
7
7
|
self.engine_config_path = engine_config_path
|
8
8
|
self.params = params
|
9
9
|
self.pipeline = None
|
datapipelab/pipeline_handler.py
CHANGED
@@ -3,6 +3,7 @@ from datapipelab.app.node.processor.shell_node import ShellProcessorNode
|
|
3
3
|
from datapipelab.app.node.source.hive_node import HiveSourceNode
|
4
4
|
from datapipelab.app.node.source.spark_node import SparkSourceNode
|
5
5
|
from datapipelab.app.node.source.delta_node import DeltaSourceNode
|
6
|
+
from datapipelab.app.node.source.spark_api_node import SparkApiSourceNode
|
6
7
|
from datapipelab.app.node.processor.spark_node import SparkProcessorNode
|
7
8
|
from datapipelab.app.node.sink.delta_node import DeltaSinkNode
|
8
9
|
from datapipelab.app.node.sink.csv_node import CSVSinkNode
|
@@ -43,6 +44,9 @@ class PipelineHandler:
|
|
43
44
|
source_df = DeltaSourceNode(self.spark, tnode_config).run()
|
44
45
|
if input_type == 'custom':
|
45
46
|
source_df = CustomNode(self.spark, tnode_config).run()
|
47
|
+
if input_type == 'spark':
|
48
|
+
if input_format == 'api':
|
49
|
+
source_df = SparkApiSourceNode(self.spark, tnode_config).run()
|
46
50
|
|
47
51
|
return source_df
|
48
52
|
|
@@ -96,6 +100,10 @@ class PipelineHandler:
|
|
96
100
|
if tnode_format == 'spark':
|
97
101
|
from datapipelab.app.node.sink import spark_node
|
98
102
|
processor_df = spark_node.SparkSinkNode(self.spark, tnode_config, t_df[tnode_name_df]).run()
|
103
|
+
if tnode_type == 'spark':
|
104
|
+
if tnode_format == 'api':
|
105
|
+
from datapipelab.app.node.sink import spark_api_node
|
106
|
+
processor_df = spark_api_node.SparkApiSourceNode(self.spark, tnode_config, t_df[tnode_name_df]).run()
|
99
107
|
|
100
108
|
|
101
109
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
datapipelab/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
datapipelab/engine.py,sha256=
|
2
|
+
datapipelab/engine.py,sha256=Dt8oM7RvIMkllPhFpUr1fynJD01ZG-hr6eqt5OSRh-Y,447
|
3
3
|
datapipelab/logger.py,sha256=Ugv0A4TfD3JWCWXNWu0lURcnfAEyuVrK3IrvVVgcHBo,864
|
4
4
|
datapipelab/pipeline.py,sha256=dw9D9KM_hztt9g_YzqoNgQBRyCYR92cRZwrU5duP_Pg,1464
|
5
5
|
datapipelab/pipeline_config.py,sha256=2bFAJepViE7rT7CaRANZU07aeQpOYcZ954ISujm9pXA,3816
|
6
|
-
datapipelab/pipeline_handler.py,sha256=
|
6
|
+
datapipelab/pipeline_handler.py,sha256=LrVhYFAPf1FVLlDBACmQu-cJkVX-X8r4eIavwxJlAGo,5464
|
7
7
|
datapipelab/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
8
|
datapipelab/app/connector_node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
datapipelab/app/node/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -11,25 +11,27 @@ datapipelab/app/node/custom_node.py,sha256=3Se4DweMvm5VK4MTZ-pQSQ_lE_fOm6cGj-wzc
|
|
11
11
|
datapipelab/app/node/tnode.py,sha256=-2hnQkIuLwEy7xVTig54TByO7L2l7UujolXMQL0CQJA,484
|
12
12
|
datapipelab/app/node/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
13
|
datapipelab/app/node/processor/bigquery_api_node.py,sha256=IclDkGxo9ltGJVkBaHKFPFCSlEEyzefgalaAOLA17bE,1752
|
14
|
-
datapipelab/app/node/processor/bigquery_spark_node.py,sha256=
|
14
|
+
datapipelab/app/node/processor/bigquery_spark_node.py,sha256=pklpsqYqztidCIECkl3rpjfY6LiB0p4thvE7-PzBodE,1099
|
15
15
|
datapipelab/app/node/processor/custom_node.py,sha256=1nqbJEhNiMP1rmN9ufpUuKO1IkuI2BEM5auW4JceGMA,933
|
16
16
|
datapipelab/app/node/processor/gcp_bucket_node.py,sha256=bzV2c89-g5S0OH5bcKKQ-9yKOGwlmOR7h7_5uO6Gnq0,1904
|
17
17
|
datapipelab/app/node/processor/shell_node.py,sha256=s3dKgfEqbpUIEiwORERgvp7FNDE5JkFHBo7EnJYBPnA,669
|
18
|
-
datapipelab/app/node/processor/spark_node.py,sha256=
|
18
|
+
datapipelab/app/node/processor/spark_node.py,sha256=ROSp_gpqHhtS4jog7z64jEcVPaCLFaELyIhb1A2UVe8,532
|
19
19
|
datapipelab/app/node/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
20
|
datapipelab/app/node/sink/csv_node.py,sha256=ZcrMZXIwJ_ln4ZZbpCAT-iMDAZIDFI9eSHNENx4wMpA,1718
|
21
21
|
datapipelab/app/node/sink/delta_node.py,sha256=4ajvMyz3cpXbd29_mZq0MW-gwpLJqdj6F9urmP8uHJw,2089
|
22
22
|
datapipelab/app/node/sink/hive_node.py,sha256=ycknOPBBwZGH3oHram_6LjHy-ygFjhuFNvVoPaNGaCU,1220
|
23
23
|
datapipelab/app/node/sink/pandas_csv_node.py,sha256=JsJFt2XRpwxGeJyt_PDUgqZafiQROf1Sk5TUhQPxh4c,870
|
24
|
+
datapipelab/app/node/sink/spark_api_node.py,sha256=Uu25EtQEXJkqzm3eBEwvpuqSfVXpYUBbWRaybC_BoQQ,1406
|
24
25
|
datapipelab/app/node/sink/spark_node.py,sha256=tP3tZae2jzQtAtfIm8C-166WWSLdZs54mqoIyZOSy58,1221
|
25
26
|
datapipelab/app/node/sink/teams_notification_node.py,sha256=6ZufdbhVvRXi3QTQafLo5uKl9kLyDnkYIE_VZFT0QNw,3581
|
26
27
|
datapipelab/app/node/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
28
|
datapipelab/app/node/source/delta_node.py,sha256=gg7SfuKBAAfjk6OX2jNrot9XX61HoBe3us3D8O-dscE,529
|
28
29
|
datapipelab/app/node/source/hive_node.py,sha256=h_AMCnnmon7uLRIGsaHAPWEReD3VaWZXnz9r0TpLGNM,478
|
29
|
-
datapipelab/app/node/source/
|
30
|
+
datapipelab/app/node/source/spark_api_node.py,sha256=SSJW3PnuqdHPbC57pIvBSmJXoDv34FVRXtaLLw_AGjY,1542
|
31
|
+
datapipelab/app/node/source/spark_node.py,sha256=TDfezmlk8Ts2YTGkB92-God_AyGVUslTUoevXolN7W8,532
|
30
32
|
datapipelab/app/wrapper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
33
|
datapipelab/app/wrapper/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
datapipelab-0.3.
|
33
|
-
datapipelab-0.3.
|
34
|
-
datapipelab-0.3.
|
35
|
-
datapipelab-0.3.
|
34
|
+
datapipelab-0.3.3.dist-info/METADATA,sha256=xFN7aePrkeTH2bktpfv3IATbDaRO8gIajxK7CiRiwFg,220
|
35
|
+
datapipelab-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
36
|
+
datapipelab-0.3.3.dist-info/top_level.txt,sha256=HgeBjHvXorKzvNqU5BNPutoI771HtiqVit9_-0Zyrb4,12
|
37
|
+
datapipelab-0.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|