PyPI - mkpipe-loader-sqlite - Versions diffs - 0.1.2__tar.gz → 0.3.0__tar.gz - Mend

mkpipe-loader-sqlite 0.1.2tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{mkpipe_loader_sqlite-0.1.2/mkpipe_loader_sqlite.egg-info → mkpipe_loader_sqlite-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: mkpipe-loader-sqlite
-Version: 0.1.2
+Version: 0.3.0
 Summary: SQLite loader for mkpipe.
 Author: Metin Karakus
 Author-email: metin_karakus@yahoo.com
@@ -11,6 +11,16 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: mkpipe
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # MkPipe

mkpipe_loader_sqlite-0.3.0/mkpipe_loader_sqlite/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+import os
+from mkpipe.spark import JdbcLoader
+class SqliteLoader(JdbcLoader, variant='sqlite'):
+    driver_name = 'sqlite'
+    driver_jdbc = 'org.sqlite.JDBC'
+    def build_jdbc_url(self):
+        db_path = self.connection.extra.get('db_path', self.database or 'data.db')
+        db_path = os.path.abspath(db_path)
+        return f'jdbc:sqlite:{db_path}'

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0/mkpipe_loader_sqlite.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: mkpipe-loader-sqlite
-Version: 0.1.2
+Version: 0.3.0
 Summary: SQLite loader for mkpipe.
 Author: Metin Karakus
 Author-email: metin_karakus@yahoo.com
@@ -11,6 +11,16 @@ Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: mkpipe
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # MkPipe

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name='mkpipe-loader-sqlite',
-    version='0.1.2',
+    version='0.3.0',
     license='Apache License 2.0',
     packages=find_packages(exclude=['tests', 'scripts', 'deploy', 'install_jars.py']),
     install_requires=['mkpipe'],

mkpipe_loader_sqlite-0.1.2/mkpipe_loader_sqlite/__init__.py DELETED Viewed

@@ -1,142 +0,0 @@
-import os
-import time
-from pathlib import Path
-from pyspark.sql import functions as F
-from pyspark.sql.types import TimestampType
-from mkpipe.config import load_config
-from mkpipe.functions_db import get_db_connector
-from mkpipe.functions_spark import remove_partitioned_parquet, get_parser
-from mkpipe.utils import log_container, Logger
-from mkpipe.utils.base_class import PipeSettings
-class SqliteLoader:
-    def __init__(self, config, settings):
-        if isinstance(settings, dict):
-            self.settings = PipeSettings(**settings)
-        else:
-            self.settings = settings
-        self.connection_params = config['connection_params']
-        self.db_path = os.path.abspath(self.connection_params['db_path'])
-        self.driver_name = 'sqlite'
-        self.driver_jdbc = 'org.sqlite.JDBC'
-        self.settings.driver_name = self.driver_name
-        self.jdbc_url = f'jdbc:sqlite:{self.db_path}'
-        config = load_config()
-        connection_params = config['settings']['backend']
-        db_type = connection_params['database_type']
-        self.backend = get_db_connector(db_type)(connection_params)
-    def add_custom_columns(self, df, elt_start_time):
-        if 'etl_time' in df.columns:
-            df = df.drop('etl_time')
-        df = df.withColumn('etl_time', F.lit(elt_start_time).cast(TimestampType()))
-        return df
-    @log_container(__file__)
-    def load(self, data, elt_start_time):
-        try:
-            logger = Logger(__file__)
-            start_time = time.time()
-            name = data['table_name']
-            write_mode = data.get('write_mode', None)
-            file_type = data.get('file_type', None)
-            last_point_value = data.get('last_point_value', None)
-            iterate_column_type = data.get('iterate_column_type', None)
-            replication_method = data.get('replication_method', 'full')
-            batchsize = data.get('fetchsize', 100_000)
-            pass_on_error = data.get('pass_on_error', None)
-            if not file_type:
-                'means that the data fetched before no new data'
-                self.backend.manifest_table_update(
-                    name=name,
-                    value=None,  # Last point remains unchanged
-                    value_type=None,  # Type remains unchanged
-                    status='completed',  # ('completed', 'failed', 'extracting', 'loading')
-                    replication_method=replication_method,  # ('incremental', 'full')
-                    error_message='',
-                )
-                return
-            self.backend.manifest_table_update(
-                name=name,
-                value=None,  # Last point remains unchanged
-                value_type=None,  # Type remains unchanged
-                status='loading',  # ('completed', 'failed', 'extracting', 'loading')
-                replication_method=replication_method,  # ('incremental', 'full')
-                error_message='',
-            )
-            df = get_parser(file_type)(data, self.settings)
-            df = self.add_custom_columns(df, elt_start_time)
-            message = dict(
-                table_name=name,
-                status='loading',
-                total_partition_count=df.rdd.getNumPartitions(),
-            )
-            logger.info(message)
-            (
-                df.write.format('jdbc')
-                .mode(
-                    write_mode
-                )  # Use write_mode for the first iteration, 'append' for others
-                .option('url', self.jdbc_url)
-                .option('dbtable', name)
-                .option('driver', self.driver_jdbc)
-                .option('batchsize', batchsize)
-                .save()
-            )
-            # Update last point in the mkpipe_manifest table if applicable
-            self.backend.manifest_table_update(
-                name=name,
-                value=last_point_value,
-                value_type=iterate_column_type,
-                status='completed',
-                replication_method=replication_method,
-                error_message='',
-            )
-            message = dict(table_name=name, status=write_mode)
-            logger.info(message)
-            # remove the parquet to reduce the storage
-            remove_partitioned_parquet(data['path'])
-            run_time = time.time() - start_time
-            message = dict(table_name=name, status='success', run_time=run_time)
-            logger.info(message)
-        except Exception as e:
-            # Log the error message and update the mkpipe_manifest with the error details
-            message = dict(
-                table_name=name,
-                status='failed',
-                type='loading',
-                error_message=str(e),
-                etl_start_time=str(elt_start_time),
-            )
-            self.backend.manifest_table_update(
-                name=name,
-                value=None,  # Last point remains unchanged
-                value_type=None,  # Type remains unchanged
-                status='failed',
-                replication_method=replication_method,
-                error_message=str(e),
-            )
-            if pass_on_error:
-                logger.warning(message)
-                return
-            else:
-                logger.error(message)
-                raise Exception(message) from e
-        return

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/LICENSE RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/MANIFEST.in RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/README.md RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite/jars/org.xerial_sqlite-jdbc-3.47.1.0.jar RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/entry_points.txt RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/requires.txt RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/mkpipe_loader_sqlite.egg-info/top_level.txt RENAMED Viewed

File without changes

{mkpipe_loader_sqlite-0.1.2 → mkpipe_loader_sqlite-0.3.0}/setup.cfg RENAMED Viewed

File without changes

mkpipe-loader-sqlite 0.1.2__tar.gz → 0.3.0__tar.gz

mkpipe-loader-sqlite 0.1.2tar.gz → 0.3.0tar.gz