dbhydra 2.1.0__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbhydra-2.1.0 → dbhydra-2.1.2}/PKG-INFO +1 -1
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/abstract_db.py +3 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/abstract_table.py +5 -5
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/tables.py +41 -23
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/xlsx_db.py +8 -6
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/PKG-INFO +1 -1
- {dbhydra-2.1.0 → dbhydra-2.1.2}/setup.py +1 -1
- {dbhydra-2.1.0 → dbhydra-2.1.2}/LICENSE +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/README.md +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/__init__.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/dbhydra_core.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/__init__.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/bigquery_db.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/errors/__init__.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/errors/exceptions.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/migrator.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/mongo_db.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/mysql_db.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/postgres_db.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/src/sqlserver_db.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/tests/__init__.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/tests/test_cases.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/tests/test_mongo.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra/tests/test_sql.py +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/SOURCES.txt +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/dependency_links.txt +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/requires.txt +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/dbhydra.egg-info/top_level.txt +0 -0
- {dbhydra-2.1.0 → dbhydra-2.1.2}/setup.cfg +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import threading
|
|
3
3
|
from contextlib import contextmanager
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from dbhydra.src.migrator import Migrator
|
|
6
7
|
from dbhydra.src.tables import AbstractTable
|
|
7
8
|
|
|
9
|
+
|
|
8
10
|
def read_connection_details(config_file):
|
|
9
11
|
def read_file(file):
|
|
10
12
|
"""Reads txt file -> list"""
|
|
@@ -102,6 +104,7 @@ class AbstractDb(abc.ABC):
|
|
|
102
104
|
# self.connect_to_db()
|
|
103
105
|
|
|
104
106
|
self.active_transactions=[]
|
|
107
|
+
self.last_table_inserted_into: Optional[str] = None
|
|
105
108
|
|
|
106
109
|
@abc.abstractmethod
|
|
107
110
|
def connect_locally(self):
|
|
@@ -241,10 +241,6 @@ class AbstractTable(AbstractJoinable, abc.ABC):
|
|
|
241
241
|
if self.columns is not None and self.types is not None:
|
|
242
242
|
assert len(self.columns) == len(self.types)
|
|
243
243
|
|
|
244
|
-
# Hotfix: flatten nested lists (otherwise crashes might happen)
|
|
245
|
-
# TODO: Search for the causes and implement better handling
|
|
246
|
-
for i, column in enumerate(self.columns):
|
|
247
|
-
self.columns[i] = column[0] if type(column) == list else column
|
|
248
244
|
self.column_type_dict={self.columns[i]:self.types[i] for i,x in enumerate(self.columns)}
|
|
249
245
|
else:
|
|
250
246
|
self.column_type_dict={}
|
|
@@ -361,6 +357,8 @@ class AbstractTable(AbstractJoinable, abc.ABC):
|
|
|
361
357
|
|
|
362
358
|
return df_copy
|
|
363
359
|
|
|
360
|
+
def extract_last_id(self) -> Any:
|
|
361
|
+
raise NotImplementedError("Method not implemented for this subclass")
|
|
364
362
|
|
|
365
363
|
def insert_from_df(self, df, batch=1, try_mode=False, debug_mode=False, adjust_df=False, insert_id=False):
|
|
366
364
|
if debug_mode:
|
|
@@ -407,7 +405,9 @@ class AbstractTable(AbstractJoinable, abc.ABC):
|
|
|
407
405
|
# rows[i][j] = "'" + record + "'"
|
|
408
406
|
#print(rows)
|
|
409
407
|
rows = df.values.tolist()
|
|
410
|
-
|
|
408
|
+
result = self.insert(rows, batch=batch, try_mode=try_mode, debug_mode=False, insert_id=insert_id)
|
|
409
|
+
self.db1.last_table_inserted_into = self.name
|
|
410
|
+
return result
|
|
411
411
|
|
|
412
412
|
#TODO: need to solve inserting in different column_order
|
|
413
413
|
#check df column names, permute if needed
|
|
@@ -83,20 +83,21 @@ class PostgresTable(AbstractTable):
|
|
|
83
83
|
print("==========================================")
|
|
84
84
|
|
|
85
85
|
def initialize_columns(self):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
86
|
+
"""
|
|
87
|
+
TODO Dominik: Check for usecases of this method. Isn't it somewhat duplicated by `get_all_columns`?
|
|
88
|
+
"""
|
|
89
|
+
columns = self.get_all_columns()
|
|
89
90
|
self.columns = columns
|
|
90
91
|
|
|
91
92
|
def initialize_types(self):
|
|
92
93
|
self.types = self.get_all_types()
|
|
93
94
|
|
|
94
95
|
def get_all_columns(self):
|
|
95
|
-
information_schema_table =
|
|
96
|
-
query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '
|
|
97
|
-
columns = information_schema_table.select(query)
|
|
96
|
+
information_schema_table = PostgresTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
|
|
97
|
+
query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{self.name}';"
|
|
98
|
+
columns = information_schema_table.select(query, flattening_of_results=True)
|
|
98
99
|
|
|
99
|
-
return
|
|
100
|
+
return columns
|
|
100
101
|
|
|
101
102
|
def convert_types_from_mysql(self):
|
|
102
103
|
inverse_dict_mysql_to_postgres = dict(zip(POSTGRES_TO_MYSQL_DATA_MAPPING.values(), POSTGRES_TO_MYSQL_DATA_MAPPING.keys()))
|
|
@@ -104,8 +105,7 @@ class PostgresTable(AbstractTable):
|
|
|
104
105
|
self.types = postgres_types
|
|
105
106
|
|
|
106
107
|
def get_all_types(self):
|
|
107
|
-
|
|
108
|
-
information_schema_table = Table(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
|
|
108
|
+
information_schema_table = PostgresTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
|
|
109
109
|
query = "SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '" + self.name + "'"
|
|
110
110
|
types = information_schema_table.select(query)
|
|
111
111
|
data_types = [x[0].lower() for x in types]
|
|
@@ -484,13 +484,13 @@ class SqlServerTable(AbstractTable):
|
|
|
484
484
|
return (cls(db1, name, columns, types))
|
|
485
485
|
|
|
486
486
|
def get_all_columns(self):
|
|
487
|
-
information_schema_table =
|
|
487
|
+
information_schema_table = SqlServerTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
|
|
488
488
|
query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '" + self.name + "'"
|
|
489
|
-
columns = information_schema_table.select(query)
|
|
489
|
+
columns = information_schema_table.select(query, flattening_of_results=True)
|
|
490
490
|
return (columns)
|
|
491
491
|
|
|
492
492
|
def get_all_types(self):
|
|
493
|
-
information_schema_table =
|
|
493
|
+
information_schema_table = SqlServerTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
|
|
494
494
|
query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '" + self.name + "'"
|
|
495
495
|
types = information_schema_table.select(query)
|
|
496
496
|
return (types)
|
|
@@ -620,9 +620,10 @@ class MysqlTable(AbstractTable):
|
|
|
620
620
|
# return cls(db1, name, columns, types, id_column_name=id_column_name)
|
|
621
621
|
|
|
622
622
|
def initialize_columns(self):
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
623
|
+
"""
|
|
624
|
+
TODO Dominik: Check for usecases of this method. Isn't it somewhat duplicated by `get_all_columns`?
|
|
625
|
+
"""
|
|
626
|
+
columns = self.get_all_columns()
|
|
626
627
|
self.columns = columns
|
|
627
628
|
|
|
628
629
|
def convert_types_from_mysql(self):
|
|
@@ -632,14 +633,13 @@ class MysqlTable(AbstractTable):
|
|
|
632
633
|
self.types = self.get_all_types()
|
|
633
634
|
|
|
634
635
|
def get_all_columns(self):
|
|
635
|
-
information_schema_table =
|
|
636
|
-
query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '
|
|
637
|
-
columns = information_schema_table.select(query)
|
|
636
|
+
information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
|
|
637
|
+
query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '{self.name}';"
|
|
638
|
+
columns = information_schema_table.select(query, flattening_of_results=True)
|
|
638
639
|
|
|
639
|
-
return
|
|
640
|
+
return columns
|
|
640
641
|
|
|
641
642
|
def get_all_types(self):
|
|
642
|
-
|
|
643
643
|
data_types, data_lengths = self.get_data_types_and_character_lengths()
|
|
644
644
|
for i in range(len(data_types)):
|
|
645
645
|
if data_lengths[i] is not None:
|
|
@@ -654,7 +654,7 @@ class MysqlTable(AbstractTable):
|
|
|
654
654
|
'varchar' in the data_types list and 2047 in the data_lengths list.
|
|
655
655
|
"""
|
|
656
656
|
def get_data_types_and_character_lengths(self):
|
|
657
|
-
information_schema_table =
|
|
657
|
+
information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
|
|
658
658
|
query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '" + self.name + "'"
|
|
659
659
|
types = information_schema_table.select(query)
|
|
660
660
|
data_types = [x[0] for x in types]
|
|
@@ -675,8 +675,20 @@ class MysqlTable(AbstractTable):
|
|
|
675
675
|
|
|
676
676
|
return python_types
|
|
677
677
|
|
|
678
|
+
def extract_last_id(self) -> Any:
|
|
679
|
+
"""
|
|
680
|
+
Extract the last inserted ID from the DB.
|
|
681
|
+
|
|
682
|
+
LAST_INSERT_ID exists in the DB connection context, therefore is safe to use if DB session is request-scoped
|
|
683
|
+
In this case we only use global connection, but we use Lock to ensure thread-safety across different requests
|
|
684
|
+
This is a go-to mechanism for extracting the ID of the inserted record for multiple SQL DBs,
|
|
685
|
+
altho this specific query is applicable only to MySQL.
|
|
686
|
+
"""
|
|
687
|
+
assert self.name == self.db1.last_table_inserted_into, "Last table inserted into is not the same as the table being queried"
|
|
688
|
+
return self.select("SELECT LAST_INSERT_ID()")[0][0]
|
|
689
|
+
|
|
678
690
|
def get_nullable_columns(self):
|
|
679
|
-
information_schema_table =
|
|
691
|
+
information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS')
|
|
680
692
|
query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS where TABLE_SCHEMA = '{self.db1.DB_DATABASE}' and TABLE_NAME = '{self.name}' and IS_NULLABLE = 'YES'"
|
|
681
693
|
nullable_columns = information_schema_table.select(query)
|
|
682
694
|
return (nullable_columns)
|
|
@@ -1014,6 +1026,7 @@ class XlsxTable(AbstractTable):
|
|
|
1014
1026
|
df.reset_index(drop=True, inplace=True)
|
|
1015
1027
|
|
|
1016
1028
|
self._save_table(df)
|
|
1029
|
+
self.last_table_inserted_into = self.name
|
|
1017
1030
|
|
|
1018
1031
|
def replace_from_df(self, df):
|
|
1019
1032
|
assert len(df.columns) == len(self.columns) # +1 because of id column
|
|
@@ -1113,4 +1126,9 @@ class XlsxTable(AbstractTable):
|
|
|
1113
1126
|
df.drop(df[df[where_variable] == where_value].index, inplace=True)
|
|
1114
1127
|
deleted_count = number_of_records - len(df)
|
|
1115
1128
|
self.replace_from_df(df)
|
|
1116
|
-
return deleted_count
|
|
1129
|
+
return deleted_count
|
|
1130
|
+
|
|
1131
|
+
def extract_last_id(self) -> Any:
|
|
1132
|
+
assert self.name == self.db1.last_table_inserted_into, "Last table inserted into is not the same as the table being queried"
|
|
1133
|
+
df = self.select_to_df()
|
|
1134
|
+
return df.iloc[-1][self.id_column_name]
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from dbhydra.src.abstract_db import AbstractDb
|
|
2
|
-
from dbhydra.src.tables import XlsxTable
|
|
3
|
-
|
|
4
1
|
import contextlib
|
|
5
|
-
import threading
|
|
6
|
-
import pathlib
|
|
7
2
|
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import threading
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from dbhydra.src.abstract_db import AbstractDb
|
|
8
|
+
from dbhydra.src.tables import XlsxTable
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class XlsxDb(AbstractDb):
|
|
@@ -29,7 +30,8 @@ class XlsxDb(AbstractDb):
|
|
|
29
30
|
if self.db_directory_path is None:
|
|
30
31
|
self.db_directory_path = pathlib.Path(self.name)
|
|
31
32
|
|
|
32
|
-
|
|
33
|
+
self.last_table_inserted_into: Optional[str] = None
|
|
34
|
+
|
|
33
35
|
self.python_database_type_mapping = {
|
|
34
36
|
'int': "int",
|
|
35
37
|
'float': "double",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|