PyPI - dbhydra - Versions diffs - 2.1.0__tar.gz → 2.1.2__tar.gz - Mend

dbhydra 2.1.0tar.gz → 2.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{dbhydra-2.1.0 → dbhydra-2.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dbhydra
-Version: 2.1.0
+Version: 2.1.2
 Summary: Data science friendly ORM combining Python
 Home-page: https://github.com/DovaX/dbhydra
 Author: DovaX

{dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/abstract_db.py RENAMED Viewed

@@ -1,10 +1,12 @@
 import abc
 import threading
 from contextlib import contextmanager
+from typing import Optional
 from dbhydra.src.migrator import Migrator
 from dbhydra.src.tables import AbstractTable
 def read_connection_details(config_file):
     def read_file(file):
         """Reads txt file -> list"""
@@ -102,6 +104,7 @@ class AbstractDb(abc.ABC):
         # self.connect_to_db()
         self.active_transactions=[]
+        self.last_table_inserted_into: Optional[str] = None
     @abc.abstractmethod
     def connect_locally(self):

{dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/abstract_table.py RENAMED Viewed

@@ -241,10 +241,6 @@ class AbstractTable(AbstractJoinable, abc.ABC):
         if self.columns is not None and self.types is not None:
             assert len(self.columns) == len(self.types)
-            # Hotfix: flatten nested lists (otherwise crashes might happen)
-            # TODO: Search for the causes and implement better handling
-            for i, column in enumerate(self.columns):
-                self.columns[i] = column[0] if type(column) == list else column
             self.column_type_dict={self.columns[i]:self.types[i] for i,x in enumerate(self.columns)}
         else:
             self.column_type_dict={}
@@ -361,6 +357,8 @@ class AbstractTable(AbstractJoinable, abc.ABC):
         return df_copy
+    def extract_last_id(self) -> Any:
+        raise NotImplementedError("Method not implemented for this subclass")
     def insert_from_df(self, df, batch=1, try_mode=False, debug_mode=False, adjust_df=False, insert_id=False):
         if debug_mode:
@@ -407,7 +405,9 @@ class AbstractTable(AbstractJoinable, abc.ABC):
         #             rows[i][j] = "'" + record + "'"
         #print(rows)
         rows = df.values.tolist()
-        return self.insert(rows, batch=batch, try_mode=try_mode, debug_mode=False, insert_id=insert_id)
+        result = self.insert(rows, batch=batch, try_mode=try_mode, debug_mode=False, insert_id=insert_id)
+        self.db1.last_table_inserted_into = self.name
+        return result
     #TODO: need to solve inserting in different column_order
     #check df column names, permute if needed

{dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/tables.py RENAMED Viewed

@@ -83,20 +83,21 @@ class PostgresTable(AbstractTable):
         print("==========================================")
     def initialize_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
-        query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '" + self.name + "';"
-        columns = information_schema_table.select(query)
+        """
+        TODO Dominik: Check for usecases of this method. Isn't it somewhat duplicated by `get_all_columns`?
+        """
+        columns = self.get_all_columns()
         self.columns = columns
     def initialize_types(self):
         self.types = self.get_all_types()
     def get_all_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
-        query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '" + self.name + "'"
-        columns = information_schema_table.select(query)
+        information_schema_table = PostgresTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
+        query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '{self.name}';"
+        columns = information_schema_table.select(query, flattening_of_results=True)
-        return (columns)
+        return columns
     def convert_types_from_mysql(self):
         inverse_dict_mysql_to_postgres = dict(zip(POSTGRES_TO_MYSQL_DATA_MAPPING.values(), POSTGRES_TO_MYSQL_DATA_MAPPING.keys()))
@@ -104,8 +105,7 @@ class PostgresTable(AbstractTable):
         self.types = postgres_types
     def get_all_types(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
+        information_schema_table = PostgresTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
         query = "SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '" + self.name + "'"
         types = information_schema_table.select(query)
         data_types = [x[0].lower() for x in types]
@@ -484,13 +484,13 @@ class SqlServerTable(AbstractTable):
         return (cls(db1, name, columns, types))
     def get_all_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
+        information_schema_table = SqlServerTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
         query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '" + self.name + "'"
-        columns = information_schema_table.select(query)
+        columns = information_schema_table.select(query, flattening_of_results=True)
         return (columns)
     def get_all_types(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
+        information_schema_table = SqlServerTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
         query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME  = '" + self.name + "'"
         types = information_schema_table.select(query)
         return (types)
@@ -620,9 +620,10 @@ class MysqlTable(AbstractTable):
     #     return cls(db1, name, columns, types, id_column_name=id_column_name)
     def initialize_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
-        query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND  TABLE_NAME  = '" + self.name + "';"
-        columns = information_schema_table.select(query)
+        """
+        TODO Dominik: Check for usecases of this method. Isn't it somewhat duplicated by `get_all_columns`?
+        """
+        columns = self.get_all_columns()
         self.columns = columns
     def convert_types_from_mysql(self):
@@ -632,14 +633,13 @@ class MysqlTable(AbstractTable):
         self.types = self.get_all_types()
     def get_all_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
-        query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME  = '" + self.name + "'"
-        columns = information_schema_table.select(query)
+        information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
+        query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME  = '{self.name}';"
+        columns = information_schema_table.select(query, flattening_of_results=True)
-        return (columns)
+        return columns
     def get_all_types(self):
         data_types, data_lengths = self.get_data_types_and_character_lengths()
         for i in range(len(data_types)):
             if data_lengths[i] is not None:
@@ -654,7 +654,7 @@ class MysqlTable(AbstractTable):
         'varchar' in the data_types list and 2047 in the data_lengths list.
     """
     def get_data_types_and_character_lengths(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
+        information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
         query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME  = '" + self.name + "'"
         types = information_schema_table.select(query)
         data_types = [x[0] for x in types]
@@ -675,8 +675,20 @@ class MysqlTable(AbstractTable):
         return python_types
+    def extract_last_id(self) -> Any:
+        """
+        Extract the last inserted ID from the DB.
+        LAST_INSERT_ID exists in the DB connection context, therefore is safe to use if DB session is request-scoped
+        In this case we only use global connection, but we use Lock to ensure thread-safety across different requests
+        This is a go-to mechanism for extracting the ID of the inserted record for multiple SQL DBs,
+        altho this specific query is applicable only to MySQL.
+        """
+        assert self.name == self.db1.last_table_inserted_into, "Last table inserted into is not the same as the table being queried"
+        return self.select("SELECT LAST_INSERT_ID()")[0][0]
     def get_nullable_columns(self):
-        information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
+        information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
         query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = '{self.db1.DB_DATABASE}' and TABLE_NAME = '{self.name}' and IS_NULLABLE = 'YES'"
         nullable_columns = information_schema_table.select(query)
         return (nullable_columns)
@@ -1014,6 +1026,7 @@ class XlsxTable(AbstractTable):
         df.reset_index(drop=True, inplace=True)
         self._save_table(df)
+        self.last_table_inserted_into = self.name
     def replace_from_df(self, df):
         assert len(df.columns) == len(self.columns)  # +1 because of id column
@@ -1113,4 +1126,9 @@ class XlsxTable(AbstractTable):
             df.drop(df[df[where_variable] == where_value].index, inplace=True)
             deleted_count = number_of_records - len(df)
         self.replace_from_df(df)
-        return deleted_count
+        return deleted_count
+    def extract_last_id(self) -> Any:
+        assert self.name == self.db1.last_table_inserted_into, "Last table inserted into is not the same as the table being queried"
+        df = self.select_to_df()
+        return df.iloc[-1][self.id_column_name]

{dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/xlsx_db.py RENAMED Viewed

@@ -1,10 +1,11 @@
-from dbhydra.src.abstract_db import AbstractDb
-from dbhydra.src.tables import XlsxTable
 import contextlib
-import threading
-import pathlib
 import os
+import pathlib
+import threading
+from typing import Optional
+from dbhydra.src.abstract_db import AbstractDb
+from dbhydra.src.tables import XlsxTable
 class XlsxDb(AbstractDb):
@@ -29,7 +30,8 @@ class XlsxDb(AbstractDb):
         if self.db_directory_path is None:
             self.db_directory_path = pathlib.Path(self.name)
+        self.last_table_inserted_into: Optional[str] = None
         self.python_database_type_mapping = {
         'int': "int",
         'float': "double",

{dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dbhydra
-Version: 2.1.0
+Version: 2.1.2
 Summary: Data science friendly ORM combining Python
 Home-page: https://github.com/DovaX/dbhydra
 Author: DovaX

{dbhydra-2.1.0 → dbhydra-2.1.2}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setuptools.setup(
     name='dbhydra',
-    version='2.1.0',
+    version='2.1.2',
     author='DovaX',
     author_email='dovax.ai@gmail.com',
     description='Data science friendly ORM combining Python',