mkpipe-loader-sqlite 0.1.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mkpipe_loader_sqlite-0.1.2/mkpipe_loader_sqlite.egg-info → mkpipe_loader_sqlite-0.3.0}/PKG-INFO +12 -2
- mkpipe_loader_sqlite-0.3.0/mkpipe_loader_sqlite/__init__.py +13 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0/mkpipe_loader_sqlite.egg-info}/PKG-INFO +12 -2
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/setup.py +1 -1
- mkpipe_loader_sqlite-0.1.2/mkpipe_loader_sqlite/__init__.py +0 -142
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/LICENSE +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/MANIFEST.in +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/README.md +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite/jars/org.xerial_sqlite-jdbc-3.47.1.0.jar +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/SOURCES.txt +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/dependency_links.txt +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/entry_points.txt +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/requires.txt +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/top_level.txt +0 -0
- {mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/setup.cfg +0 -0
{mkpipe_loader_sqlite-0.1.2/mkpipe_loader_sqlite.egg-info → mkpipe_loader_sqlite-0.3.0}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mkpipe-loader-sqlite
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SQLite loader for mkpipe.
|
|
5
5
|
Author: Metin Karakus
|
|
6
6
|
Author-email: metin_karakus@yahoo.com
|
|
@@ -11,6 +11,16 @@ Requires-Python: >=3.8
|
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE
|
|
13
13
|
Requires-Dist: mkpipe
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: license
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-dist
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
Dynamic: summary
|
|
14
24
|
|
|
15
25
|
# MkPipe
|
|
16
26
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from mkpipe.spark import JdbcLoader
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SqliteLoader(JdbcLoader, variant='sqlite'):
|
|
7
|
+
driver_name = 'sqlite'
|
|
8
|
+
driver_jdbc = 'org.sqlite.JDBC'
|
|
9
|
+
|
|
10
|
+
def build_jdbc_url(self):
|
|
11
|
+
db_path = self.connection.extra.get('db_path', self.database or 'data.db')
|
|
12
|
+
db_path = os.path.abspath(db_path)
|
|
13
|
+
return f'jdbc:sqlite:{db_path}'
|
{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0/mkpipe_loader_sqlite.egg-info}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mkpipe-loader-sqlite
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SQLite loader for mkpipe.
|
|
5
5
|
Author: Metin Karakus
|
|
6
6
|
Author-email: metin_karakus@yahoo.com
|
|
@@ -11,6 +11,16 @@ Requires-Python: >=3.8
|
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE
|
|
13
13
|
Requires-Dist: mkpipe
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: license
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
Dynamic: requires-dist
|
|
22
|
+
Dynamic: requires-python
|
|
23
|
+
Dynamic: summary
|
|
14
24
|
|
|
15
25
|
# MkPipe
|
|
16
26
|
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='mkpipe-loader-sqlite',
|
|
5
|
-
version='0.
|
|
5
|
+
version='0.3.0',
|
|
6
6
|
license='Apache License 2.0',
|
|
7
7
|
packages=find_packages(exclude=['tests', 'scripts', 'deploy', 'install_jars.py']),
|
|
8
8
|
install_requires=['mkpipe'],
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from pyspark.sql import functions as F
|
|
5
|
-
from pyspark.sql.types import TimestampType
|
|
6
|
-
from mkpipe.config import load_config
|
|
7
|
-
from mkpipe.functions_db import get_db_connector
|
|
8
|
-
from mkpipe.functions_spark import remove_partitioned_parquet, get_parser
|
|
9
|
-
from mkpipe.utils import log_container, Logger
|
|
10
|
-
from mkpipe.utils.base_class import PipeSettings
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SqliteLoader:
|
|
14
|
-
def __init__(self, config, settings):
|
|
15
|
-
if isinstance(settings, dict):
|
|
16
|
-
self.settings = PipeSettings(**settings)
|
|
17
|
-
else:
|
|
18
|
-
self.settings = settings
|
|
19
|
-
self.connection_params = config['connection_params']
|
|
20
|
-
|
|
21
|
-
self.db_path = os.path.abspath(self.connection_params['db_path'])
|
|
22
|
-
|
|
23
|
-
self.driver_name = 'sqlite'
|
|
24
|
-
self.driver_jdbc = 'org.sqlite.JDBC'
|
|
25
|
-
self.settings.driver_name = self.driver_name
|
|
26
|
-
self.jdbc_url = f'jdbc:sqlite:{self.db_path}'
|
|
27
|
-
|
|
28
|
-
config = load_config()
|
|
29
|
-
connection_params = config['settings']['backend']
|
|
30
|
-
db_type = connection_params['database_type']
|
|
31
|
-
self.backend = get_db_connector(db_type)(connection_params)
|
|
32
|
-
|
|
33
|
-
def add_custom_columns(self, df, elt_start_time):
|
|
34
|
-
if 'etl_time' in df.columns:
|
|
35
|
-
df = df.drop('etl_time')
|
|
36
|
-
|
|
37
|
-
df = df.withColumn('etl_time', F.lit(elt_start_time).cast(TimestampType()))
|
|
38
|
-
return df
|
|
39
|
-
|
|
40
|
-
@log_container(__file__)
|
|
41
|
-
def load(self, data, elt_start_time):
|
|
42
|
-
try:
|
|
43
|
-
logger = Logger(__file__)
|
|
44
|
-
start_time = time.time()
|
|
45
|
-
name = data['table_name']
|
|
46
|
-
|
|
47
|
-
write_mode = data.get('write_mode', None)
|
|
48
|
-
file_type = data.get('file_type', None)
|
|
49
|
-
last_point_value = data.get('last_point_value', None)
|
|
50
|
-
iterate_column_type = data.get('iterate_column_type', None)
|
|
51
|
-
replication_method = data.get('replication_method', 'full')
|
|
52
|
-
batchsize = data.get('fetchsize', 100_000)
|
|
53
|
-
pass_on_error = data.get('pass_on_error', None)
|
|
54
|
-
|
|
55
|
-
if not file_type:
|
|
56
|
-
'means that the data fetched before no new data'
|
|
57
|
-
self.backend.manifest_table_update(
|
|
58
|
-
name=name,
|
|
59
|
-
value=None, # Last point remains unchanged
|
|
60
|
-
value_type=None, # Type remains unchanged
|
|
61
|
-
status='completed', # ('completed', 'failed', 'extracting', 'loading')
|
|
62
|
-
replication_method=replication_method, # ('incremental', 'full')
|
|
63
|
-
error_message='',
|
|
64
|
-
)
|
|
65
|
-
return
|
|
66
|
-
|
|
67
|
-
self.backend.manifest_table_update(
|
|
68
|
-
name=name,
|
|
69
|
-
value=None, # Last point remains unchanged
|
|
70
|
-
value_type=None, # Type remains unchanged
|
|
71
|
-
status='loading', # ('completed', 'failed', 'extracting', 'loading')
|
|
72
|
-
replication_method=replication_method, # ('incremental', 'full')
|
|
73
|
-
error_message='',
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
df = get_parser(file_type)(data, self.settings)
|
|
77
|
-
df = self.add_custom_columns(df, elt_start_time)
|
|
78
|
-
message = dict(
|
|
79
|
-
table_name=name,
|
|
80
|
-
status='loading',
|
|
81
|
-
total_partition_count=df.rdd.getNumPartitions(),
|
|
82
|
-
)
|
|
83
|
-
logger.info(message)
|
|
84
|
-
|
|
85
|
-
(
|
|
86
|
-
df.write.format('jdbc')
|
|
87
|
-
.mode(
|
|
88
|
-
write_mode
|
|
89
|
-
) # Use write_mode for the first iteration, 'append' for others
|
|
90
|
-
.option('url', self.jdbc_url)
|
|
91
|
-
.option('dbtable', name)
|
|
92
|
-
.option('driver', self.driver_jdbc)
|
|
93
|
-
.option('batchsize', batchsize)
|
|
94
|
-
.save()
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
# Update last point in the mkpipe_manifest table if applicable
|
|
98
|
-
self.backend.manifest_table_update(
|
|
99
|
-
name=name,
|
|
100
|
-
value=last_point_value,
|
|
101
|
-
value_type=iterate_column_type,
|
|
102
|
-
status='completed',
|
|
103
|
-
replication_method=replication_method,
|
|
104
|
-
error_message='',
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
message = dict(table_name=name, status=write_mode)
|
|
108
|
-
logger.info(message)
|
|
109
|
-
|
|
110
|
-
# remove the parquet to reduce the storage
|
|
111
|
-
remove_partitioned_parquet(data['path'])
|
|
112
|
-
|
|
113
|
-
run_time = time.time() - start_time
|
|
114
|
-
message = dict(table_name=name, status='success', run_time=run_time)
|
|
115
|
-
logger.info(message)
|
|
116
|
-
|
|
117
|
-
except Exception as e:
|
|
118
|
-
# Log the error message and update the mkpipe_manifest with the error details
|
|
119
|
-
message = dict(
|
|
120
|
-
table_name=name,
|
|
121
|
-
status='failed',
|
|
122
|
-
type='loading',
|
|
123
|
-
error_message=str(e),
|
|
124
|
-
etl_start_time=str(elt_start_time),
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
self.backend.manifest_table_update(
|
|
128
|
-
name=name,
|
|
129
|
-
value=None, # Last point remains unchanged
|
|
130
|
-
value_type=None, # Type remains unchanged
|
|
131
|
-
status='failed',
|
|
132
|
-
replication_method=replication_method,
|
|
133
|
-
error_message=str(e),
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
if pass_on_error:
|
|
137
|
-
logger.warning(message)
|
|
138
|
-
return
|
|
139
|
-
else:
|
|
140
|
-
logger.error(message)
|
|
141
|
-
raise Exception(message) from e
|
|
142
|
-
return
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/requires.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|