mkpipe-loader-sqlserver 0.1.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. mkpipe_loader_sqlserver-0.4.0/MANIFEST.in +1 -0
  2. mkpipe_loader_sqlserver-0.4.0/PKG-INFO +17 -0
  3. mkpipe_loader_sqlserver-0.4.0/README.md +3 -0
  4. mkpipe_loader_sqlserver-0.4.0/mkpipe_loader_sqlserver/__init__.py +18 -0
  5. mkpipe_loader_sqlserver-0.4.0/mkpipe_loader_sqlserver.egg-info/PKG-INFO +17 -0
  6. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/SOURCES.txt +1 -2
  7. mkpipe_loader_sqlserver-0.4.0/setup.py +19 -0
  8. mkpipe_loader_sqlserver-0.1.1/MANIFEST.in +0 -1
  9. mkpipe_loader_sqlserver-0.1.1/PKG-INFO +0 -39
  10. mkpipe_loader_sqlserver-0.1.1/README.md +0 -25
  11. mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver/__init__.py +0 -146
  12. mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver/jars/com.microsoft.sqlserver_mssql-jdbc-12.8.1.jre11.jar +0 -0
  13. mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver.egg-info/PKG-INFO +0 -39
  14. mkpipe_loader_sqlserver-0.1.1/setup.py +0 -25
  15. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/LICENSE +0 -0
  16. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/dependency_links.txt +0 -0
  17. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/entry_points.txt +0 -0
  18. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/requires.txt +0 -0
  19. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/top_level.txt +0 -0
  20. {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/setup.cfg +0 -0
@@ -0,0 +1 @@
1
+ include mkpipe_extractor_postgres/jars/*
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: mkpipe-loader-sqlserver
3
+ Version: 0.4.0
4
+ Summary: SQL Server loader for mkpipe.
5
+ Author: Metin Karakus
6
+ Author-email: metin_karakus@yahoo.com
7
+ License: Apache License 2.0
8
+ Requires-Python: >=3.9
9
+ License-File: LICENSE
10
+ Requires-Dist: mkpipe
11
+ Dynamic: author
12
+ Dynamic: author-email
13
+ Dynamic: license
14
+ Dynamic: license-file
15
+ Dynamic: requires-dist
16
+ Dynamic: requires-python
17
+ Dynamic: summary
@@ -0,0 +1,3 @@
1
+ # mkpipe-loader-sqlserver
2
+
3
+ SQL Server loader plugin for mkpipe.
@@ -0,0 +1,18 @@
1
+ from urllib.parse import unquote
2
+
3
+ from mkpipe.spark import JdbcLoader
4
+
5
+
6
+ class SqlserverLoader(JdbcLoader, variant='sqlserver'):
7
+ driver_name = 'sqlserver'
8
+ driver_jdbc = 'com.microsoft.sqlserver.jdbc.SQLServerDriver'
9
+
10
+ def build_jdbc_url(self):
11
+ password = unquote(self.password)
12
+ return (
13
+ f'jdbc:{self.driver_name}://{self.host}:{self.port}'
14
+ f';databaseName={self.database}'
15
+ f';user={self.username}'
16
+ f';password={password}'
17
+ f';encrypt=false;trustServerCertificate=false'
18
+ )
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: mkpipe-loader-sqlserver
3
+ Version: 0.4.0
4
+ Summary: SQL Server loader for mkpipe.
5
+ Author: Metin Karakus
6
+ Author-email: metin_karakus@yahoo.com
7
+ License: Apache License 2.0
8
+ Requires-Python: >=3.9
9
+ License-File: LICENSE
10
+ Requires-Dist: mkpipe
11
+ Dynamic: author
12
+ Dynamic: author-email
13
+ Dynamic: license
14
+ Dynamic: license-file
15
+ Dynamic: requires-dist
16
+ Dynamic: requires-python
17
+ Dynamic: summary
@@ -8,5 +8,4 @@ mkpipe_loader_sqlserver.egg-info/SOURCES.txt
8
8
  mkpipe_loader_sqlserver.egg-info/dependency_links.txt
9
9
  mkpipe_loader_sqlserver.egg-info/entry_points.txt
10
10
  mkpipe_loader_sqlserver.egg-info/requires.txt
11
- mkpipe_loader_sqlserver.egg-info/top_level.txt
12
- mkpipe_loader_sqlserver/jars/com.microsoft.sqlserver_mssql-jdbc-12.8.1.jre11.jar
11
+ mkpipe_loader_sqlserver.egg-info/top_level.txt
@@ -0,0 +1,19 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='mkpipe-loader-sqlserver',
5
+ version='0.4.0',
6
+ license='Apache License 2.0',
7
+ packages=find_packages(),
8
+ install_requires=['mkpipe'],
9
+ include_package_data=True,
10
+ entry_points={
11
+ 'mkpipe.loaders': [
12
+ 'sqlserver = mkpipe_loader_sqlserver:SqlserverLoader',
13
+ ],
14
+ },
15
+ description='SQL Server loader for mkpipe.',
16
+ author='Metin Karakus',
17
+ author_email='metin_karakus@yahoo.com',
18
+ python_requires='>=3.9',
19
+ )
@@ -1 +0,0 @@
1
- include mkpipe_loader_sqlserver/jars/*
@@ -1,39 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mkpipe-loader-sqlserver
3
- Version: 0.1.1
4
- Summary: SQLServer loader for mkpipe.
5
- Author: Metin Karakus
6
- Author-email: metin_karakus@yahoo.com
7
- License: Apache License 2.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: Apache Software License
10
- Requires-Python: >=3.8
11
- Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
- Requires-Dist: mkpipe
14
-
15
- # MkPipe
16
-
17
- **MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
18
-
19
- ## Documentation
20
-
21
- For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
22
-
23
- ## License
24
-
25
- This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
26
-
27
-
28
- ## mkpipe_project.yaml Variables
29
- ```yaml
30
- ...
31
- connections:
32
- source:
33
- host: 'XXX'
34
- port: 'XXX'
35
- database: 'XXX'
36
- user: 'XXX'
37
- password: 'XXX'
38
- ...
39
- ```
@@ -1,25 +0,0 @@
1
- # MkPipe
2
-
3
- **MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
4
-
5
- ## Documentation
6
-
7
- For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
8
-
9
- ## License
10
-
11
- This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
12
-
13
-
14
- ## mkpipe_project.yaml Variables
15
- ```yaml
16
- ...
17
- connections:
18
- source:
19
- host: 'XXX'
20
- port: 'XXX'
21
- database: 'XXX'
22
- user: 'XXX'
23
- password: 'XXX'
24
- ...
25
- ```
@@ -1,146 +0,0 @@
1
- import time
2
- from pathlib import Path
3
- from urllib.parse import quote_plus
4
- from pyspark.sql import functions as F
5
- from pyspark.sql.types import TimestampType
6
- from mkpipe.config import load_config
7
- from mkpipe.functions_db import get_db_connector
8
- from mkpipe.functions_spark import remove_partitioned_parquet, get_parser
9
- from mkpipe.utils import log_container, Logger
10
- from mkpipe.utils.base_class import PipeSettings
11
-
12
-
13
- class SqlserverLoader:
14
- def __init__(self, config, settings):
15
- if isinstance(settings, dict):
16
- self.settings = PipeSettings(**settings)
17
- else:
18
- self.settings = settings
19
- self.connection_params = config['connection_params']
20
- self.host = self.connection_params['host']
21
- self.port = self.connection_params['port']
22
- self.username = self.connection_params['user']
23
- # self.password = quote_plus(str(self.connection_params['password']))
24
- self.password = str(self.connection_params['password'])
25
- self.database = self.connection_params['database']
26
-
27
- self.driver_name = 'sqlserver'
28
- self.driver_jdbc = 'com.microsoft.sqlserver.jdbc.SQLServerDriver'
29
- self.settings.driver_name = self.driver_name
30
- self.jdbc_url = f'jdbc:{self.driver_name}://{self.host}:{self.port};databaseName={self.database};user={self.username};password={self.password};encrypt=false;trustServerCertificate=false'
31
-
32
- config = load_config()
33
- connection_params = config['settings']['backend']
34
- db_type = connection_params['database_type']
35
- self.backend = get_db_connector(db_type)(connection_params)
36
-
37
- def add_custom_columns(self, df, elt_start_time):
38
- if 'etl_time' in df.columns:
39
- df = df.drop('etl_time')
40
-
41
- df = df.withColumn('etl_time', F.lit(elt_start_time).cast(TimestampType()))
42
- return df
43
-
44
- @log_container(__file__)
45
- def load(self, data, elt_start_time):
46
- try:
47
- logger = Logger(__file__)
48
- start_time = time.time()
49
- name = data['table_name']
50
-
51
- write_mode = data.get('write_mode', None)
52
- file_type = data.get('file_type', None)
53
- last_point_value = data.get('last_point_value', None)
54
- iterate_column_type = data.get('iterate_column_type', None)
55
- replication_method = data.get('replication_method', 'full')
56
- batchsize = data.get('fetchsize', 100_000)
57
- pass_on_error = data.get('pass_on_error', None)
58
-
59
- if not file_type:
60
- 'means that the data fetched before no new data'
61
- self.backend.manifest_table_update(
62
- name=name,
63
- value=None, # Last point remains unchanged
64
- value_type=None, # Type remains unchanged
65
- status='completed', # ('completed', 'failed', 'extracting', 'loading')
66
- replication_method=replication_method, # ('incremental', 'full')
67
- error_message='',
68
- )
69
- return
70
-
71
- self.backend.manifest_table_update(
72
- name=name,
73
- value=None, # Last point remains unchanged
74
- value_type=None, # Type remains unchanged
75
- status='loading', # ('completed', 'failed', 'extracting', 'loading')
76
- replication_method=replication_method, # ('incremental', 'full')
77
- error_message='',
78
- )
79
-
80
- df = get_parser(file_type)(data, self.settings)
81
- df = self.add_custom_columns(df, elt_start_time)
82
- message = dict(
83
- table_name=name,
84
- status='loading',
85
- total_partition_count=df.rdd.getNumPartitions(),
86
- )
87
- logger.info(message)
88
-
89
- (
90
- df.write.format('jdbc')
91
- .mode(
92
- write_mode
93
- ) # Use write_mode for the first iteration, 'append' for others
94
- .option('url', self.jdbc_url)
95
- .option('dbtable', name)
96
- .option('driver', self.driver_jdbc)
97
- .option('batchsize', batchsize)
98
- .save()
99
- )
100
-
101
- # Update last point in the mkpipe_manifest table if applicable
102
- self.backend.manifest_table_update(
103
- name=name,
104
- value=last_point_value,
105
- value_type=iterate_column_type,
106
- status='completed',
107
- replication_method=replication_method,
108
- error_message='',
109
- )
110
-
111
- message = dict(table_name=name, status=write_mode)
112
- logger.info(message)
113
-
114
- # remove the parquet to reduce the storage
115
- remove_partitioned_parquet(data['path'])
116
-
117
- run_time = time.time() - start_time
118
- message = dict(table_name=name, status='success', run_time=run_time)
119
- logger.info(message)
120
-
121
- except Exception as e:
122
- # Log the error message and update the mkpipe_manifest with the error details
123
- message = dict(
124
- table_name=name,
125
- status='failed',
126
- type='loading',
127
- error_message=str(e),
128
- etl_start_time=str(elt_start_time),
129
- )
130
-
131
- self.backend.manifest_table_update(
132
- name=name,
133
- value=None, # Last point remains unchanged
134
- value_type=None, # Type remains unchanged
135
- status='failed',
136
- replication_method=replication_method,
137
- error_message=str(e),
138
- )
139
-
140
- if pass_on_error:
141
- logger.warning(message)
142
- return
143
- else:
144
- logger.error(message)
145
- raise Exception(message) from e
146
- return
@@ -1,39 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: mkpipe-loader-sqlserver
3
- Version: 0.1.1
4
- Summary: SQLServer loader for mkpipe.
5
- Author: Metin Karakus
6
- Author-email: metin_karakus@yahoo.com
7
- License: Apache License 2.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: Apache Software License
10
- Requires-Python: >=3.8
11
- Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
- Requires-Dist: mkpipe
14
-
15
- # MkPipe
16
-
17
- **MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
18
-
19
- ## Documentation
20
-
21
- For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
22
-
23
- ## License
24
-
25
- This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
26
-
27
-
28
- ## mkpipe_project.yaml Variables
29
- ```yaml
30
- ...
31
- connections:
32
- source:
33
- host: 'XXX'
34
- port: 'XXX'
35
- database: 'XXX'
36
- user: 'XXX'
37
- password: 'XXX'
38
- ...
39
- ```
@@ -1,25 +0,0 @@
1
- from setuptools import setup, find_packages
2
-
3
- setup(
4
- name='mkpipe-loader-sqlserver',
5
- version='0.1.1',
6
- license='Apache License 2.0',
7
- packages=find_packages(exclude=['tests', 'scripts', 'deploy', 'install_jars.py']),
8
- install_requires=['mkpipe'],
9
- include_package_data=True,
10
- entry_points={
11
- 'mkpipe.loaders': [
12
- 'sqlserver = mkpipe_loader_sqlserver:SqlserverLoader',
13
- ],
14
- },
15
- description='SQLServer loader for mkpipe.',
16
- long_description=open('README.md').read(),
17
- long_description_content_type='text/markdown',
18
- author='Metin Karakus',
19
- author_email='metin_karakus@yahoo.com',
20
- classifiers=[
21
- 'Programming Language :: Python :: 3',
22
- 'License :: OSI Approved :: Apache Software License',
23
- ],
24
- python_requires='>=3.8',
25
- )