datapipelab 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {datapipelab-0.1.7 → datapipelab-0.1.9}/PKG-INFO +1 -1
  2. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/bigquery_api_node.py +4 -4
  3. datapipelab-0.1.9/datapipelab/app/node/processor/bigquery_spark_node.py +22 -0
  4. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/PKG-INFO +1 -1
  5. {datapipelab-0.1.7 → datapipelab-0.1.9}/setup.py +1 -1
  6. datapipelab-0.1.7/datapipelab/app/node/processor/bigquery_spark_node.py +0 -30
  7. {datapipelab-0.1.7 → datapipelab-0.1.9}/MANIFEST.in +0 -0
  8. {datapipelab-0.1.7 → datapipelab-0.1.9}/README.md +0 -0
  9. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/__init__.py +0 -0
  10. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/__init__.py +0 -0
  11. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/connector_node/__init__.py +0 -0
  12. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/__init__.py +0 -0
  13. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/custom_node.py +0 -0
  14. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/__init__.py +0 -0
  15. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/custom_node.py +0 -0
  16. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/shell_node.py +0 -0
  17. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/processor/spark_node.py +0 -0
  18. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/__init__.py +0 -0
  19. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/csv_node.py +0 -0
  20. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/delta_node.py +0 -0
  21. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/hive_node.py +0 -0
  22. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/pandas_csv_node.py +0 -0
  23. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/sink/teams_notification_node.py +0 -0
  24. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/__init__.py +0 -0
  25. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/delta_node.py +0 -0
  26. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/hive_node.py +0 -0
  27. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/source/spark_node.py +0 -0
  28. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/app/node/tnode.py +0 -0
  29. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/engine.py +0 -0
  30. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/logger.py +0 -0
  31. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline.py +0 -0
  32. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline_config.py +0 -0
  33. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab/pipeline_handler.py +0 -0
  34. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/SOURCES.txt +0 -0
  35. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/dependency_links.txt +0 -0
  36. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/requires.txt +0 -0
  37. {datapipelab-0.1.7 → datapipelab-0.1.9}/datapipelab.egg-info/top_level.txt +0 -0
  38. {datapipelab-0.1.7 → datapipelab-0.1.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datapipelab
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A data pipeline library with connectors, sources, processors, and sinks.
5
5
  Requires-Dist: json5
6
6
  Requires-Dist: loguru
@@ -3,8 +3,6 @@ from datapipelab.logger import logger
3
3
 
4
4
  class BigQueryAPIProcessorNode(TNode):
5
5
  def __init__(self, spark, tnode_config):
6
- from google.cloud import bigquery
7
- from google.oauth2 import service_account
8
6
  super().__init__(spark=spark)
9
7
  self.sql_query = tnode_config['options']['query']
10
8
  self.node_name = tnode_config['name']
@@ -12,7 +10,9 @@ class BigQueryAPIProcessorNode(TNode):
12
10
  self.return_as_spark_df = tnode_config['options']['return_as_spark_df']
13
11
  self.project_name = tnode_config['options']['project_name']
14
12
 
15
- def __sql_query(self, sql_query):
13
+ def __sql_biqquery(self, sql_query):
14
+ from google.cloud import bigquery
15
+ from google.oauth2 import service_account
16
16
  credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
17
17
  client = bigquery.Client(credentials=credentials, project=self.project_name)
18
18
 
@@ -28,6 +28,6 @@ class BigQueryAPIProcessorNode(TNode):
28
28
  logger.info(rows)
29
29
 
30
30
  def _process(self):
31
- self.__sql_query(self.sql_query)
31
+ self.__sql_biqquery(self.sql_query)
32
32
  self._createOrReplaceTempView()
33
33
  return self.node
@@ -0,0 +1,22 @@
1
+ from datapipelab.app.node.tnode import TNode
2
+ from datapipelab.logger import logger
3
+
4
+
5
+ class BigQuerySparkProcessorNode(TNode):
6
+ def __init__(self, spark, tnode_config):
7
+ super().__init__(spark=spark)
8
+ self.sql_query = tnode_config['options']['query']
9
+ self.node_name = tnode_config['name']
10
+ self.materialization_dataset = tnode_config['options']['materialization_dataset'] # materializationDataset
11
+ self.parent_project = tnode_config['options']['parent_project'] # parentProject
12
+
13
+ def __sql_query(self):
14
+ self.node = self.spark.read.format("bigquery").option("materializationDataset",
15
+ self.materialization_dataset).option("query",
16
+ self.sql_query).option(
17
+ "parentProject", self.parent_project).load()
18
+
19
+ def _process(self):
20
+ self.__sql_query()
21
+ self._createOrReplaceTempView()
22
+ return self.node
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datapipelab
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A data pipeline library with connectors, sources, processors, and sinks.
5
5
  Requires-Dist: json5
6
6
  Requires-Dist: loguru
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='datapipelab',
5
- version='0.1.7',
5
+ version='0.1.9',
6
6
  description='A data pipeline library with connectors, sources, processors, and sinks.',
7
7
  packages=find_packages(),
8
8
  include_package_data=True,
@@ -1,30 +0,0 @@
1
- from datapipelab.app.node.tnode import TNode
2
- from datapipelab.logger import logger
3
-
4
- class BigQuerySparkProcessorNode(TNode):
5
- def __init__(self, spark, tnode_config):
6
- super().__init__(spark=spark)
7
- self.sql_query = tnode_config['options']['query']
8
- self.node_name = tnode_config['name']
9
- self.credentials_path = tnode_config['options']['materialization_dataset'] # materializationDataset
10
- self.return_as_spark_df = tnode_config['options']['parent_project'] # parentProject
11
-
12
- def __sql_query(self, sql_query):
13
- credentials = service_account.Credentials.from_service_account_file(self.credentials_path)
14
- client = bigquery.Client(credentials=credentials, project=self.project_name)
15
-
16
- # run the job
17
- query_job = client.query(sql_query)
18
-
19
- results = query_job.result()
20
- rows = [dict(row) for row in results]
21
- if self.return_as_spark_df:
22
- self.node = self.spark.createDataFrame(rows)
23
- else:
24
- self.node = None
25
- logger.info(rows)
26
-
27
- def _process(self):
28
- self.__sql_query(self.sql_query)
29
- self._createOrReplaceTempView()
30
- return self.node
File without changes
File without changes
File without changes