mkpipe-loader-sqlserver 0.1.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mkpipe_loader_sqlserver-0.4.0/MANIFEST.in +1 -0
- mkpipe_loader_sqlserver-0.4.0/PKG-INFO +17 -0
- mkpipe_loader_sqlserver-0.4.0/README.md +3 -0
- mkpipe_loader_sqlserver-0.4.0/mkpipe_loader_sqlserver/__init__.py +18 -0
- mkpipe_loader_sqlserver-0.4.0/mkpipe_loader_sqlserver.egg-info/PKG-INFO +17 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/SOURCES.txt +1 -2
- mkpipe_loader_sqlserver-0.4.0/setup.py +19 -0
- mkpipe_loader_sqlserver-0.1.1/MANIFEST.in +0 -1
- mkpipe_loader_sqlserver-0.1.1/PKG-INFO +0 -39
- mkpipe_loader_sqlserver-0.1.1/README.md +0 -25
- mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver/__init__.py +0 -146
- mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver/jars/com.microsoft.sqlserver_mssql-jdbc-12.8.1.jre11.jar +0 -0
- mkpipe_loader_sqlserver-0.1.1/mkpipe_loader_sqlserver.egg-info/PKG-INFO +0 -39
- mkpipe_loader_sqlserver-0.1.1/setup.py +0 -25
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/LICENSE +0 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/dependency_links.txt +0 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/entry_points.txt +0 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/requires.txt +0 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/mkpipe_loader_sqlserver.egg-info/top_level.txt +0 -0
- {mkpipe_loader_sqlserver-0.1.1 → mkpipe_loader_sqlserver-0.4.0}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include mkpipe_extractor_postgres/jars/*
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mkpipe-loader-sqlserver
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: SQL Server loader for mkpipe.
|
|
5
|
+
Author: Metin Karakus
|
|
6
|
+
Author-email: metin_karakus@yahoo.com
|
|
7
|
+
License: Apache License 2.0
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: mkpipe
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: author-email
|
|
13
|
+
Dynamic: license
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
Dynamic: requires-dist
|
|
16
|
+
Dynamic: requires-python
|
|
17
|
+
Dynamic: summary
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from urllib.parse import unquote
|
|
2
|
+
|
|
3
|
+
from mkpipe.spark import JdbcLoader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SqlserverLoader(JdbcLoader, variant='sqlserver'):
|
|
7
|
+
driver_name = 'sqlserver'
|
|
8
|
+
driver_jdbc = 'com.microsoft.sqlserver.jdbc.SQLServerDriver'
|
|
9
|
+
|
|
10
|
+
def build_jdbc_url(self):
|
|
11
|
+
password = unquote(self.password)
|
|
12
|
+
return (
|
|
13
|
+
f'jdbc:{self.driver_name}://{self.host}:{self.port}'
|
|
14
|
+
f';databaseName={self.database}'
|
|
15
|
+
f';user={self.username}'
|
|
16
|
+
f';password={password}'
|
|
17
|
+
f';encrypt=false;trustServerCertificate=false'
|
|
18
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mkpipe-loader-sqlserver
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: SQL Server loader for mkpipe.
|
|
5
|
+
Author: Metin Karakus
|
|
6
|
+
Author-email: metin_karakus@yahoo.com
|
|
7
|
+
License: Apache License 2.0
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: mkpipe
|
|
11
|
+
Dynamic: author
|
|
12
|
+
Dynamic: author-email
|
|
13
|
+
Dynamic: license
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
Dynamic: requires-dist
|
|
16
|
+
Dynamic: requires-python
|
|
17
|
+
Dynamic: summary
|
|
@@ -8,5 +8,4 @@ mkpipe_loader_sqlserver.egg-info/SOURCES.txt
|
|
|
8
8
|
mkpipe_loader_sqlserver.egg-info/dependency_links.txt
|
|
9
9
|
mkpipe_loader_sqlserver.egg-info/entry_points.txt
|
|
10
10
|
mkpipe_loader_sqlserver.egg-info/requires.txt
|
|
11
|
-
mkpipe_loader_sqlserver.egg-info/top_level.txt
|
|
12
|
-
mkpipe_loader_sqlserver/jars/com.microsoft.sqlserver_mssql-jdbc-12.8.1.jre11.jar
|
|
11
|
+
mkpipe_loader_sqlserver.egg-info/top_level.txt
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name='mkpipe-loader-sqlserver',
|
|
5
|
+
version='0.4.0',
|
|
6
|
+
license='Apache License 2.0',
|
|
7
|
+
packages=find_packages(),
|
|
8
|
+
install_requires=['mkpipe'],
|
|
9
|
+
include_package_data=True,
|
|
10
|
+
entry_points={
|
|
11
|
+
'mkpipe.loaders': [
|
|
12
|
+
'sqlserver = mkpipe_loader_sqlserver:SqlserverLoader',
|
|
13
|
+
],
|
|
14
|
+
},
|
|
15
|
+
description='SQL Server loader for mkpipe.',
|
|
16
|
+
author='Metin Karakus',
|
|
17
|
+
author_email='metin_karakus@yahoo.com',
|
|
18
|
+
python_requires='>=3.9',
|
|
19
|
+
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
include mkpipe_loader_sqlserver/jars/*
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: mkpipe-loader-sqlserver
|
|
3
|
-
Version: 0.1.1
|
|
4
|
-
Summary: SQLServer loader for mkpipe.
|
|
5
|
-
Author: Metin Karakus
|
|
6
|
-
Author-email: metin_karakus@yahoo.com
|
|
7
|
-
License: Apache License 2.0
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
-
Requires-Python: >=3.8
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
|
-
Requires-Dist: mkpipe
|
|
14
|
-
|
|
15
|
-
# MkPipe
|
|
16
|
-
|
|
17
|
-
**MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
|
|
18
|
-
|
|
19
|
-
## Documentation
|
|
20
|
-
|
|
21
|
-
For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
|
|
22
|
-
|
|
23
|
-
## License
|
|
24
|
-
|
|
25
|
-
This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
## mkpipe_project.yaml Variables
|
|
29
|
-
```yaml
|
|
30
|
-
...
|
|
31
|
-
connections:
|
|
32
|
-
source:
|
|
33
|
-
host: 'XXX'
|
|
34
|
-
port: 'XXX'
|
|
35
|
-
database: 'XXX'
|
|
36
|
-
user: 'XXX'
|
|
37
|
-
password: 'XXX'
|
|
38
|
-
...
|
|
39
|
-
```
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# MkPipe
|
|
2
|
-
|
|
3
|
-
**MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
|
|
4
|
-
|
|
5
|
-
## Documentation
|
|
6
|
-
|
|
7
|
-
For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
|
|
8
|
-
|
|
9
|
-
## License
|
|
10
|
-
|
|
11
|
-
This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
## mkpipe_project.yaml Variables
|
|
15
|
-
```yaml
|
|
16
|
-
...
|
|
17
|
-
connections:
|
|
18
|
-
source:
|
|
19
|
-
host: 'XXX'
|
|
20
|
-
port: 'XXX'
|
|
21
|
-
database: 'XXX'
|
|
22
|
-
user: 'XXX'
|
|
23
|
-
password: 'XXX'
|
|
24
|
-
...
|
|
25
|
-
```
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from urllib.parse import quote_plus
|
|
4
|
-
from pyspark.sql import functions as F
|
|
5
|
-
from pyspark.sql.types import TimestampType
|
|
6
|
-
from mkpipe.config import load_config
|
|
7
|
-
from mkpipe.functions_db import get_db_connector
|
|
8
|
-
from mkpipe.functions_spark import remove_partitioned_parquet, get_parser
|
|
9
|
-
from mkpipe.utils import log_container, Logger
|
|
10
|
-
from mkpipe.utils.base_class import PipeSettings
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SqlserverLoader:
|
|
14
|
-
def __init__(self, config, settings):
|
|
15
|
-
if isinstance(settings, dict):
|
|
16
|
-
self.settings = PipeSettings(**settings)
|
|
17
|
-
else:
|
|
18
|
-
self.settings = settings
|
|
19
|
-
self.connection_params = config['connection_params']
|
|
20
|
-
self.host = self.connection_params['host']
|
|
21
|
-
self.port = self.connection_params['port']
|
|
22
|
-
self.username = self.connection_params['user']
|
|
23
|
-
# self.password = quote_plus(str(self.connection_params['password']))
|
|
24
|
-
self.password = str(self.connection_params['password'])
|
|
25
|
-
self.database = self.connection_params['database']
|
|
26
|
-
|
|
27
|
-
self.driver_name = 'sqlserver'
|
|
28
|
-
self.driver_jdbc = 'com.microsoft.sqlserver.jdbc.SQLServerDriver'
|
|
29
|
-
self.settings.driver_name = self.driver_name
|
|
30
|
-
self.jdbc_url = f'jdbc:{self.driver_name}://{self.host}:{self.port};databaseName={self.database};user={self.username};password={self.password};encrypt=false;trustServerCertificate=false'
|
|
31
|
-
|
|
32
|
-
config = load_config()
|
|
33
|
-
connection_params = config['settings']['backend']
|
|
34
|
-
db_type = connection_params['database_type']
|
|
35
|
-
self.backend = get_db_connector(db_type)(connection_params)
|
|
36
|
-
|
|
37
|
-
def add_custom_columns(self, df, elt_start_time):
|
|
38
|
-
if 'etl_time' in df.columns:
|
|
39
|
-
df = df.drop('etl_time')
|
|
40
|
-
|
|
41
|
-
df = df.withColumn('etl_time', F.lit(elt_start_time).cast(TimestampType()))
|
|
42
|
-
return df
|
|
43
|
-
|
|
44
|
-
@log_container(__file__)
|
|
45
|
-
def load(self, data, elt_start_time):
|
|
46
|
-
try:
|
|
47
|
-
logger = Logger(__file__)
|
|
48
|
-
start_time = time.time()
|
|
49
|
-
name = data['table_name']
|
|
50
|
-
|
|
51
|
-
write_mode = data.get('write_mode', None)
|
|
52
|
-
file_type = data.get('file_type', None)
|
|
53
|
-
last_point_value = data.get('last_point_value', None)
|
|
54
|
-
iterate_column_type = data.get('iterate_column_type', None)
|
|
55
|
-
replication_method = data.get('replication_method', 'full')
|
|
56
|
-
batchsize = data.get('fetchsize', 100_000)
|
|
57
|
-
pass_on_error = data.get('pass_on_error', None)
|
|
58
|
-
|
|
59
|
-
if not file_type:
|
|
60
|
-
'means that the data fetched before no new data'
|
|
61
|
-
self.backend.manifest_table_update(
|
|
62
|
-
name=name,
|
|
63
|
-
value=None, # Last point remains unchanged
|
|
64
|
-
value_type=None, # Type remains unchanged
|
|
65
|
-
status='completed', # ('completed', 'failed', 'extracting', 'loading')
|
|
66
|
-
replication_method=replication_method, # ('incremental', 'full')
|
|
67
|
-
error_message='',
|
|
68
|
-
)
|
|
69
|
-
return
|
|
70
|
-
|
|
71
|
-
self.backend.manifest_table_update(
|
|
72
|
-
name=name,
|
|
73
|
-
value=None, # Last point remains unchanged
|
|
74
|
-
value_type=None, # Type remains unchanged
|
|
75
|
-
status='loading', # ('completed', 'failed', 'extracting', 'loading')
|
|
76
|
-
replication_method=replication_method, # ('incremental', 'full')
|
|
77
|
-
error_message='',
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
df = get_parser(file_type)(data, self.settings)
|
|
81
|
-
df = self.add_custom_columns(df, elt_start_time)
|
|
82
|
-
message = dict(
|
|
83
|
-
table_name=name,
|
|
84
|
-
status='loading',
|
|
85
|
-
total_partition_count=df.rdd.getNumPartitions(),
|
|
86
|
-
)
|
|
87
|
-
logger.info(message)
|
|
88
|
-
|
|
89
|
-
(
|
|
90
|
-
df.write.format('jdbc')
|
|
91
|
-
.mode(
|
|
92
|
-
write_mode
|
|
93
|
-
) # Use write_mode for the first iteration, 'append' for others
|
|
94
|
-
.option('url', self.jdbc_url)
|
|
95
|
-
.option('dbtable', name)
|
|
96
|
-
.option('driver', self.driver_jdbc)
|
|
97
|
-
.option('batchsize', batchsize)
|
|
98
|
-
.save()
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
# Update last point in the mkpipe_manifest table if applicable
|
|
102
|
-
self.backend.manifest_table_update(
|
|
103
|
-
name=name,
|
|
104
|
-
value=last_point_value,
|
|
105
|
-
value_type=iterate_column_type,
|
|
106
|
-
status='completed',
|
|
107
|
-
replication_method=replication_method,
|
|
108
|
-
error_message='',
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
message = dict(table_name=name, status=write_mode)
|
|
112
|
-
logger.info(message)
|
|
113
|
-
|
|
114
|
-
# remove the parquet to reduce the storage
|
|
115
|
-
remove_partitioned_parquet(data['path'])
|
|
116
|
-
|
|
117
|
-
run_time = time.time() - start_time
|
|
118
|
-
message = dict(table_name=name, status='success', run_time=run_time)
|
|
119
|
-
logger.info(message)
|
|
120
|
-
|
|
121
|
-
except Exception as e:
|
|
122
|
-
# Log the error message and update the mkpipe_manifest with the error details
|
|
123
|
-
message = dict(
|
|
124
|
-
table_name=name,
|
|
125
|
-
status='failed',
|
|
126
|
-
type='loading',
|
|
127
|
-
error_message=str(e),
|
|
128
|
-
etl_start_time=str(elt_start_time),
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
self.backend.manifest_table_update(
|
|
132
|
-
name=name,
|
|
133
|
-
value=None, # Last point remains unchanged
|
|
134
|
-
value_type=None, # Type remains unchanged
|
|
135
|
-
status='failed',
|
|
136
|
-
replication_method=replication_method,
|
|
137
|
-
error_message=str(e),
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
if pass_on_error:
|
|
141
|
-
logger.warning(message)
|
|
142
|
-
return
|
|
143
|
-
else:
|
|
144
|
-
logger.error(message)
|
|
145
|
-
raise Exception(message) from e
|
|
146
|
-
return
|
|
Binary file
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: mkpipe-loader-sqlserver
|
|
3
|
-
Version: 0.1.1
|
|
4
|
-
Summary: SQLServer loader for mkpipe.
|
|
5
|
-
Author: Metin Karakus
|
|
6
|
-
Author-email: metin_karakus@yahoo.com
|
|
7
|
-
License: Apache License 2.0
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
-
Requires-Python: >=3.8
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
|
-
Requires-Dist: mkpipe
|
|
14
|
-
|
|
15
|
-
# MkPipe
|
|
16
|
-
|
|
17
|
-
**MkPipe** is a modular, open-source ETL (Extract, Transform, Load) tool that allows you to integrate various data sources and sinks easily. It is designed to be extensible with a plugin-based architecture that supports extractors, transformers, and loaders.
|
|
18
|
-
|
|
19
|
-
## Documentation
|
|
20
|
-
|
|
21
|
-
For more detailed documentation, please visit the [GitHub repository](https://github.com/mkpipe-etl/mkpipe).
|
|
22
|
-
|
|
23
|
-
## License
|
|
24
|
-
|
|
25
|
-
This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
## mkpipe_project.yaml Variables
|
|
29
|
-
```yaml
|
|
30
|
-
...
|
|
31
|
-
connections:
|
|
32
|
-
source:
|
|
33
|
-
host: 'XXX'
|
|
34
|
-
port: 'XXX'
|
|
35
|
-
database: 'XXX'
|
|
36
|
-
user: 'XXX'
|
|
37
|
-
password: 'XXX'
|
|
38
|
-
...
|
|
39
|
-
```
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from setuptools import setup, find_packages
|
|
2
|
-
|
|
3
|
-
setup(
|
|
4
|
-
name='mkpipe-loader-sqlserver',
|
|
5
|
-
version='0.1.1',
|
|
6
|
-
license='Apache License 2.0',
|
|
7
|
-
packages=find_packages(exclude=['tests', 'scripts', 'deploy', 'install_jars.py']),
|
|
8
|
-
install_requires=['mkpipe'],
|
|
9
|
-
include_package_data=True,
|
|
10
|
-
entry_points={
|
|
11
|
-
'mkpipe.loaders': [
|
|
12
|
-
'sqlserver = mkpipe_loader_sqlserver:SqlserverLoader',
|
|
13
|
-
],
|
|
14
|
-
},
|
|
15
|
-
description='SQLServer loader for mkpipe.',
|
|
16
|
-
long_description=open('README.md').read(),
|
|
17
|
-
long_description_content_type='text/markdown',
|
|
18
|
-
author='Metin Karakus',
|
|
19
|
-
author_email='metin_karakus@yahoo.com',
|
|
20
|
-
classifiers=[
|
|
21
|
-
'Programming Language :: Python :: 3',
|
|
22
|
-
'License :: OSI Approved :: Apache Software License',
|
|
23
|
-
],
|
|
24
|
-
python_requires='>=3.8',
|
|
25
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|