dbhydra 2.1.3__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbhydra/dbhydra_core.py +2 -1
- dbhydra/src/abstract_db.py +1 -0
- dbhydra/src/abstract_table.py +6 -5
- dbhydra/src/migrator.py +321 -10
- dbhydra/src/mysql_db.py +7 -0
- dbhydra/src/tables.py +41 -34
- dbhydra/test_migrator.py +27 -0
- {dbhydra-2.1.3.dist-info → dbhydra-2.2.1.dist-info}/METADATA +1 -1
- {dbhydra-2.1.3.dist-info → dbhydra-2.2.1.dist-info}/RECORD +12 -11
- {dbhydra-2.1.3.dist-info → dbhydra-2.2.1.dist-info}/LICENSE +0 -0
- {dbhydra-2.1.3.dist-info → dbhydra-2.2.1.dist-info}/WHEEL +0 -0
- {dbhydra-2.1.3.dist-info → dbhydra-2.2.1.dist-info}/top_level.txt +0 -0
dbhydra/dbhydra_core.py
CHANGED
|
@@ -13,7 +13,8 @@ from dbhydra.src.mongo_db import MongoDb
|
|
|
13
13
|
from dbhydra.src.postgres_db import PostgresDb
|
|
14
14
|
from dbhydra.src.xlsx_db import XlsxDb, XlsxDB
|
|
15
15
|
from dbhydra.src.abstract_db import AbstractDb
|
|
16
|
-
from dbhydra.src.tables import SqlServerTable, PostgresTable, MysqlTable, XlsxTable, AbstractTable, MongoTable,
|
|
16
|
+
from dbhydra.src.tables import (SqlServerTable, PostgresTable, MysqlTable, XlsxTable, AbstractTable, MongoTable,
|
|
17
|
+
BigQueryTable, Table, AbstractSelectable, AbstractJoinable, PYTHON_TO_MYSQL_DATA_MAPPING)
|
|
17
18
|
##### Do not remove imports - they are expored in the package
|
|
18
19
|
|
|
19
20
|
|
dbhydra/src/abstract_db.py
CHANGED
dbhydra/src/abstract_table.py
CHANGED
|
@@ -132,12 +132,13 @@ class AbstractSelectable:
|
|
|
132
132
|
|
|
133
133
|
|
|
134
134
|
def select_all(self):
|
|
135
|
+
quote = self.db1.identifier_quote
|
|
135
136
|
all_cols_query = ""
|
|
136
137
|
for col in self.columns:
|
|
137
|
-
all_cols_query = all_cols_query + col + ","
|
|
138
|
+
all_cols_query = all_cols_query + quote + col + quote + ","
|
|
138
139
|
if all_cols_query[-1] == ",":
|
|
139
140
|
all_cols_query = all_cols_query[:-1]
|
|
140
|
-
list1 = self.select(f"SELECT {all_cols_query} FROM
|
|
141
|
+
list1 = self.select(f"SELECT {all_cols_query} FROM {quote}{self.name}{quote};")
|
|
141
142
|
return (list1)
|
|
142
143
|
|
|
143
144
|
def select_to_df(self):
|
|
@@ -244,7 +245,6 @@ class AbstractTable(AbstractJoinable, abc.ABC):
|
|
|
244
245
|
self.column_type_dict={self.columns[i]:self.types[i] for i,x in enumerate(self.columns)}
|
|
245
246
|
else:
|
|
246
247
|
self.column_type_dict={}
|
|
247
|
-
|
|
248
248
|
|
|
249
249
|
# Temporary disabled, please make sure this is implemented where needed, don't introduce breaking changes please
|
|
250
250
|
# @abc.abstractmethod
|
|
@@ -421,11 +421,12 @@ class AbstractTable(AbstractJoinable, abc.ABC):
|
|
|
421
421
|
|
|
422
422
|
|
|
423
423
|
def delete(self, where=None):
|
|
424
|
+
quote = self.db1.identifier_quote
|
|
424
425
|
|
|
425
426
|
if where is None:
|
|
426
|
-
query = "DELETE FROM
|
|
427
|
+
query = "DELETE FROM {quote}{self.name}{quote}"
|
|
427
428
|
else:
|
|
428
|
-
query = "DELETE FROM
|
|
429
|
+
query = f"DELETE FROM {quote}{self.name}{quote} WHERE {where}"
|
|
429
430
|
return self.execute(query)
|
|
430
431
|
|
|
431
432
|
|
dbhydra/src/migrator.py
CHANGED
|
@@ -1,15 +1,46 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
2
|
import math
|
|
3
3
|
import json
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
from deepdiff import DeepDiff
|
|
8
|
+
from dataclasses import dataclass, asdict
|
|
4
9
|
|
|
10
|
+
PENDING_MIGRATION_DEFAULT_PATH = "./db/migrations/pending_migration.json"
|
|
11
|
+
MIGRATION_HISTORY_DEFAULT_PATH = "./db/migrations/migration_history.json"
|
|
12
|
+
|
|
13
|
+
# @dataclass
|
|
14
|
+
# class Migration:
|
|
15
|
+
# forward: list[dict]
|
|
16
|
+
# backward: list[dict]
|
|
5
17
|
|
|
6
18
|
class Migrator:
|
|
7
|
-
"""
|
|
19
|
+
"""
|
|
20
|
+
A class for managing database migrations.
|
|
21
|
+
|
|
22
|
+
This class provides functionality to create, manage, and execute database migrations
|
|
23
|
+
using a migration system compatible with MySQL and Postgres dialects. It allows for
|
|
24
|
+
creating forward and backward migrations, reading and writing migrations to JSON files,
|
|
25
|
+
and executing migrations based on changes detected in database structures.
|
|
26
|
+
|
|
27
|
+
Note: This class is compatible with MySQL and Postgres dialects and has been somewhat tested
|
|
28
|
+
with those databases. It may require adjustments for other database systems.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
db: The database connection object used for executing migrations.
|
|
32
|
+
"""
|
|
8
33
|
|
|
9
|
-
def __init__(self, db
|
|
34
|
+
def __init__(self, db):
|
|
10
35
|
self.db = db
|
|
11
|
-
|
|
12
|
-
|
|
36
|
+
|
|
37
|
+
# Used in older implementations, TODO: decide whether to keep both approaches, unify them or pick one
|
|
38
|
+
self._migration_number = 1
|
|
39
|
+
self._migration_list = []
|
|
40
|
+
|
|
41
|
+
# Used in newer approach
|
|
42
|
+
self._pending_forward_migration_list = []#Migration(forward=[], backward=[])
|
|
43
|
+
self._pending_forward_migration_list = []#Migration(forward=[], backward=[])
|
|
13
44
|
|
|
14
45
|
def process_migration_dict(self, migration_dict):
|
|
15
46
|
matching_table_class = self.db.matching_table_class #E.g. MysqlTable
|
|
@@ -42,9 +73,10 @@ class Migrator:
|
|
|
42
73
|
table.initialize_types()
|
|
43
74
|
table.drop_column(options["column_name"])
|
|
44
75
|
|
|
76
|
+
# Old approach methods START
|
|
45
77
|
def next_migration(self):
|
|
46
|
-
self.
|
|
47
|
-
self.
|
|
78
|
+
self._migration_number += 1
|
|
79
|
+
self._migration_list = []
|
|
48
80
|
|
|
49
81
|
def migrate(self, migration_list):
|
|
50
82
|
for i, migration_dict in enumerate(migration_list):
|
|
@@ -59,21 +91,30 @@ class Migrator:
|
|
|
59
91
|
return (result)
|
|
60
92
|
|
|
61
93
|
def migration_list_to_json(self, filename=None):
|
|
62
|
-
result = json.dumps(self.
|
|
94
|
+
result = json.dumps(self._migration_list)
|
|
63
95
|
|
|
64
96
|
if filename is None or filename == "" or filename.isspace():
|
|
65
|
-
with open("migrations/migration-" + str(self.
|
|
97
|
+
with open("migrations/migration-" + str(self._migration_number) + ".json", "w+") as f:
|
|
66
98
|
f.write(result)
|
|
67
99
|
else:
|
|
68
100
|
with open(f"migrations/{filename}.json", "w+") as f:
|
|
69
101
|
f.write(result)
|
|
70
102
|
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
##### Auxilliary? #####
|
|
71
112
|
def create_migrations_from_df(self, name, dataframe):
|
|
72
113
|
|
|
73
114
|
columns, return_types = self.extract_columns_and_types_from_df(dataframe)
|
|
74
115
|
|
|
75
116
|
migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
|
|
76
|
-
self.
|
|
117
|
+
self._migration_list.append(migration_dict)
|
|
77
118
|
self.migration_list_to_json()
|
|
78
119
|
# return columns, return_types
|
|
79
120
|
|
|
@@ -111,4 +152,274 @@ class Migrator:
|
|
|
111
152
|
return_types.insert(0, "int")
|
|
112
153
|
|
|
113
154
|
return columns, return_types
|
|
155
|
+
# Old approach methods END
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# def set_pending_migration(self, migration_dict: dict[str, list]):
|
|
169
|
+
# self._pending_migration = Migration(**migration_dict)
|
|
170
|
+
|
|
171
|
+
def migrate_forward(self):
|
|
172
|
+
"""
|
|
173
|
+
Applies forward migrations from the pending migration object.
|
|
174
|
+
|
|
175
|
+
Iterates through each migration dictionary in the pending migration's forward list,
|
|
176
|
+
processes the migration, saves it to migration history, and clears the pending migration.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
None
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
for migration_dict in self._pending_forward_migration_list:
|
|
183
|
+
self.process_migration_dict(migration_dict)
|
|
184
|
+
|
|
185
|
+
#self._save_migration_to_history(migration=self._pending_migration)
|
|
186
|
+
self._clear_pending_migration()
|
|
187
|
+
|
|
188
|
+
def migrate_backward(self):
|
|
189
|
+
"""
|
|
190
|
+
Applies backward migrations from the pending migration object.
|
|
191
|
+
|
|
192
|
+
Iterates through each migration dictionary in the pending migration's backward list,
|
|
193
|
+
processes the migration, saves it to migration history, and clears the pending migration.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
None
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
for migration_dict in self._pending_backward_migration_list:
|
|
200
|
+
self.process_migration_dict(migration_dict)
|
|
201
|
+
|
|
202
|
+
#history_migration = Migration(forward=self._pending_migration.backward, backward=self._pending_migration.forward)
|
|
203
|
+
#self._save_migration_to_history(migration=history_migration)
|
|
204
|
+
self._clear_pending_migration()
|
|
205
|
+
|
|
206
|
+
# def migrate_n_steps_back_in_history(self, n: int, migration_history_json: str = MIGRATION_HISTORY_DEFAULT_PATH):
|
|
207
|
+
# migration_history = self._read_migration_history_json(migration_history_json)
|
|
208
|
+
|
|
209
|
+
# if len(migration_history) < n:
|
|
210
|
+
# raise ValueError(f"Provided n (= {n}) is larger than migration history length (= {len(migration_history)}).")
|
|
211
|
+
|
|
212
|
+
# total_backward_migration = Migration(forward=[], backward=[])
|
|
213
|
+
# migrations = migration_history[-n:] # Take last n elements of migration history for execution
|
|
214
|
+
|
|
215
|
+
# # Loop in reversed order as we execute backward migrations in reversed order compared to forward ones
|
|
216
|
+
# for migration_dict in reversed(migrations):
|
|
217
|
+
# total_backward_migration.forward.append(migration_dict["forward"])
|
|
218
|
+
# total_backward_migration.backward.append(migration_dict["backward"])
|
|
219
|
+
|
|
220
|
+
# self.set_pending_migration(asdict(total_backward_migration))
|
|
221
|
+
# self.migrate_backward()
|
|
222
|
+
|
|
223
|
+
# def load_migration_from_json(self, json_file_path: str = PENDING_MIGRATION_DEFAULT_PATH):
|
|
224
|
+
# with open(json_file_path, "r") as file:
|
|
225
|
+
# migration_dict = json.load(file)
|
|
226
|
+
|
|
227
|
+
# self.set_pending_migration(migration_dict)
|
|
228
|
+
|
|
229
|
+
# def save_pending_migration_to_json(self, file_path: str = PENDING_MIGRATION_DEFAULT_PATH):
|
|
230
|
+
# if not file_path.endswith(".json"):
|
|
231
|
+
# raise ValueError("pending migration file must be of '.json' type.")
|
|
232
|
+
|
|
233
|
+
# self._build_folder_structure_for_file_path(file_path)
|
|
234
|
+
|
|
235
|
+
# with open(file_path, "w+") as file:
|
|
236
|
+
# json.dump(asdict(self._pending_migration), file, indent=2)
|
|
237
|
+
|
|
238
|
+
def create_table_migration(self, table_name: str, old_column_type_dict: Optional[dict], new_column_type_dict: Optional[dict]):
|
|
239
|
+
"""
|
|
240
|
+
Creates a migration for a database table based on its old and new column_type_dicts.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
table_name (str): The name of the database table.
|
|
244
|
+
old_column_type_dict (Optional[dict]): The old column_type_dict of the table.
|
|
245
|
+
new_column_type_dict (Optional[dict]): The new column_type_dict of the table.
|
|
246
|
+
|
|
247
|
+
If old_column_type_dict is None and new_column_type_dict is not None: CREATE table
|
|
248
|
+
If old_column_type_dict is not None and new_column_type_dict is None: DROP table
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Migration: The generated migration object.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
ValueError: If the table_name argument is empty.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
def _extract_column_name_from_deepdiff_key(deepdiff_key: str) -> str:
|
|
258
|
+
"""
|
|
259
|
+
Extracts the column name from a key generated by deepdiff.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
deepdiff_key (str): The key generated by deepdiff.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
str: The extracted column name.
|
|
266
|
+
|
|
267
|
+
Example:
|
|
268
|
+
>>> migrator = Migrator()
|
|
269
|
+
>>> column_name = migrator._extract_column_name_from_deepdiff_key("root['table']['column']")
|
|
270
|
+
>>> print(column_name)
|
|
271
|
+
'column'
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
# Split the item_key by '[' and ']' to isolate the column name
|
|
275
|
+
# The column name is expected to be the last element after splitting
|
|
276
|
+
column_name = deepdiff_key.split('[')[-1].strip("']")
|
|
277
|
+
return column_name
|
|
278
|
+
|
|
279
|
+
def _convert_deepdiff_dict_into_migration_lists(table_name: str, deepdiff_dict: dict):
|
|
280
|
+
"""
|
|
281
|
+
Converts deepdiff dictionary from the new and old table column_type_dicts comparison into a Migration object.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
table_name (str): A name of the examined DB table.
|
|
285
|
+
deepdiff_dict (dict): A dictionary from DeepDiff comparison of the old and new table column_type_dict.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Migration: A Migration object containing forward and backward migrations for the given table.
|
|
289
|
+
|
|
290
|
+
Example:
|
|
291
|
+
>>> table_name = 'results'
|
|
292
|
+
>>> deepdiff_dict = {'dictionary_item_removed': {"root['hehexd']": 'double'}}
|
|
293
|
+
>>> migrator = Migrator()
|
|
294
|
+
>>> asdict(migrator._convert_deepdiff_dict_into_migration)
|
|
295
|
+
>>> {
|
|
296
|
+
'forward': [
|
|
297
|
+
{'drop_column': {'table_name': 'results', 'column_name': 'hehexd'}}
|
|
298
|
+
],
|
|
299
|
+
'backward': [
|
|
300
|
+
{'add_column': {'table_name': 'results', 'column_name': 'hehexd', 'column_type': 'double'}}
|
|
301
|
+
]
|
|
302
|
+
}
|
|
303
|
+
"""
|
|
304
|
+
forward_migration_list, backward_migration_list = [], []
|
|
305
|
+
|
|
306
|
+
forward_conversions = {
|
|
307
|
+
"dictionary_item_added": "add_column",
|
|
308
|
+
"dictionary_item_removed": "drop_column",
|
|
309
|
+
"values_changed": "modify_column"
|
|
310
|
+
}
|
|
311
|
+
backward_conversions = {
|
|
312
|
+
"dictionary_item_added": "drop_column",
|
|
313
|
+
"dictionary_item_removed": "add_column",
|
|
314
|
+
"values_changed": "modify_column"
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
for action_name, deepdiff_action in deepdiff_dict.items():
|
|
318
|
+
for deepdiff_key in deepdiff_action.keys():
|
|
319
|
+
column_name = _extract_column_name_from_deepdiff_key(deepdiff_key)
|
|
320
|
+
forward_action, backward_action = forward_conversions[action_name], backward_conversions[action_name]
|
|
321
|
+
|
|
322
|
+
if action_name=="dictionary_item_added":
|
|
323
|
+
column_type = deepdiff_action[deepdiff_key]
|
|
324
|
+
forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
|
|
325
|
+
backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name}})
|
|
326
|
+
elif action_name=="dictionary_item_removed":
|
|
327
|
+
column_type = deepdiff_action[deepdiff_key]
|
|
328
|
+
forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name}})
|
|
329
|
+
backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
|
|
330
|
+
elif action_name=="values_changed":
|
|
331
|
+
column_type = deepdiff_action[deepdiff_key]["old_value"]
|
|
332
|
+
column_new_type = deepdiff_action[deepdiff_key]["new_value"]
|
|
333
|
+
|
|
334
|
+
# HACK: Do not create migrations for cases such as varchar(2047) --> nvarchar(2047)
|
|
335
|
+
is_varchar_in_types = "varchar" in column_type and "varchar" in column_new_type
|
|
336
|
+
is_max_length_equal = (
|
|
337
|
+
column_type[column_type.index("("): column_type.index(")")]
|
|
338
|
+
and column_new_type[column_new_type.index("("): column_new_type.index(")")]
|
|
339
|
+
) if is_varchar_in_types else False
|
|
340
|
+
is_varchar_nvarchar_conversion = is_varchar_in_types and is_max_length_equal
|
|
341
|
+
|
|
342
|
+
if not is_varchar_nvarchar_conversion:
|
|
343
|
+
forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name,
|
|
344
|
+
"column_type": column_new_type}})
|
|
345
|
+
backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name,
|
|
346
|
+
"column_type": column_type}})
|
|
347
|
+
|
|
348
|
+
return forward_migration_list, backward_migration_list
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
if not table_name:
|
|
354
|
+
raise ValueError("The 'table_name' argument must be a non-empty string.")
|
|
355
|
+
|
|
356
|
+
if not old_column_type_dict and new_column_type_dict:
|
|
357
|
+
# non-empty initial column_type_dict --> empty new column_type_dict
|
|
358
|
+
columns, types = list(new_column_type_dict.keys()), list(new_column_type_dict.values())
|
|
359
|
+
forward_migration_list = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
|
|
360
|
+
backward_migration_list = [{"drop": {"table_name": table_name}}]
|
|
361
|
+
|
|
362
|
+
elif not new_column_type_dict:
|
|
363
|
+
# new column_type_dict is empty ==> drop the table
|
|
364
|
+
forward_migration_list = [{"drop": {"table_name": table_name}}]
|
|
365
|
+
backward_migration_list = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
else:
|
|
369
|
+
diff = DeepDiff(old_column_type_dict, new_column_type_dict, verbose_level=2)
|
|
370
|
+
forward_migration_list, backward_migration_list = _convert_deepdiff_dict_into_migration_lists(table_name, diff)
|
|
371
|
+
|
|
372
|
+
#migration = Migration(forward=forward_migration_list, backward=backward_migration_list)
|
|
373
|
+
|
|
374
|
+
self._append_migration_to_pending_migration(forward_migration_list, backward_migration_list)
|
|
375
|
+
|
|
376
|
+
return forward_migration_list, backward_migration_list
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _append_migration_to_pending_migration(self, forward_migration_list, backward_migration_list):
|
|
383
|
+
self._pending_forward_migration_list += forward_migration_list
|
|
384
|
+
self._pending_backward_migration_list += backward_migration_list
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _clear_pending_migration(self):
|
|
388
|
+
self._pending_forward_migration_list = []
|
|
389
|
+
self._pending_backward_migration_list = []
|
|
390
|
+
|
|
391
|
+
# def _read_migration_history_json(self, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
|
|
392
|
+
# if not file_path.endswith(".json"):
|
|
393
|
+
# raise ValueError("Migration history file must be of '.json' type.")
|
|
394
|
+
|
|
395
|
+
# if not os.path.exists(file_path):
|
|
396
|
+
# raise FileNotFoundError(f"Migration history file '{file_path}' does not exist.")
|
|
397
|
+
|
|
398
|
+
# try:
|
|
399
|
+
# with open(file_path, "r") as file:
|
|
400
|
+
# migration_history = json.load(file)
|
|
401
|
+
# except json.JSONDecodeError:
|
|
402
|
+
# migration_history = []
|
|
403
|
+
|
|
404
|
+
# return migration_history
|
|
405
|
+
|
|
406
|
+
# def _save_migration_to_history(self, migration: Migration, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
|
|
407
|
+
# try:
|
|
408
|
+
# migration_history = self._read_migration_history_json(file_path)
|
|
409
|
+
# except FileNotFoundError:
|
|
410
|
+
# self._build_folder_structure_for_file_path(file_path)
|
|
411
|
+
# migration_history = []
|
|
412
|
+
|
|
413
|
+
# migration_history.append(asdict(migration))
|
|
414
|
+
|
|
415
|
+
# with open(file_path, "w") as file:
|
|
416
|
+
# json.dump(migration_history, file, indent=2)
|
|
417
|
+
|
|
418
|
+
# def _build_folder_structure_for_file_path(self, file_path: str):
|
|
419
|
+
# folder_path = os.path.dirname(file_path)
|
|
420
|
+
# if not os.path.exists(folder_path):
|
|
421
|
+
# print(f"Folder path to the file '{file_path}' does not exist. Creating the file and the folder structure.")
|
|
422
|
+
# os.makedirs(folder_path)
|
|
423
|
+
|
|
424
|
+
|
|
114
425
|
|
dbhydra/src/mysql_db.py
CHANGED
|
@@ -22,6 +22,10 @@ class MysqlDb(AbstractDb):
|
|
|
22
22
|
'Jsonable': "json"
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
def __init__(self, *args, **kwargs):
|
|
26
|
+
super().__init__(*args, **kwargs)
|
|
27
|
+
self.identifier_quote = '`'
|
|
28
|
+
|
|
25
29
|
def connect_locally(self):
|
|
26
30
|
self.connection = pymysql.connect(host=self.DB_SERVER, user=self.DB_USERNAME, password=self.DB_PASSWORD,
|
|
27
31
|
database=self.DB_DATABASE)
|
|
@@ -39,6 +43,9 @@ class MysqlDb(AbstractDb):
|
|
|
39
43
|
print("DB connection established")
|
|
40
44
|
|
|
41
45
|
def create_new_db(self):
|
|
46
|
+
self.connection = pymysql.connect(host=self.DB_SERVER, port=self.DB_PORT, user=self.DB_USERNAME,
|
|
47
|
+
charset="utf8mb4", password=self.DB_PASSWORD)
|
|
48
|
+
self.cursor = self.connection.cursor()
|
|
42
49
|
create_db_command = "CREATE DATABASE " + self.DB_DATABASE
|
|
43
50
|
self.execute(create_db_command)
|
|
44
51
|
|
dbhydra/src/tables.py
CHANGED
|
@@ -2,7 +2,7 @@ import pandas as pd
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
from typing import Optional, Any
|
|
4
4
|
import abc
|
|
5
|
-
|
|
5
|
+
import time
|
|
6
6
|
#xlsx imports
|
|
7
7
|
import pathlib
|
|
8
8
|
|
|
@@ -655,7 +655,7 @@ class MysqlTable(AbstractTable):
|
|
|
655
655
|
"""
|
|
656
656
|
def get_data_types_and_character_lengths(self):
|
|
657
657
|
information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
|
|
658
|
-
query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '
|
|
658
|
+
query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '{self.name}'"
|
|
659
659
|
types = information_schema_table.select(query)
|
|
660
660
|
data_types = [x[0] for x in types]
|
|
661
661
|
data_lengths = [x[1] for x in types]
|
|
@@ -725,12 +725,12 @@ class MysqlTable(AbstractTable):
|
|
|
725
725
|
Returns the number of records in table
|
|
726
726
|
"""
|
|
727
727
|
|
|
728
|
-
num_of_records = self.select(f"SELECT COUNT(*) FROM {self.name}
|
|
728
|
+
num_of_records = self.select(f"SELECT COUNT(*) FROM `{self.name}`;")
|
|
729
729
|
|
|
730
730
|
return num_of_records[0][0]
|
|
731
731
|
|
|
732
732
|
def drop(self):
|
|
733
|
-
query = "DROP TABLE " + self.name + "
|
|
733
|
+
query = "DROP TABLE `" + self.name + "`;"
|
|
734
734
|
print(query)
|
|
735
735
|
self.db1.execute(query)
|
|
736
736
|
|
|
@@ -742,9 +742,9 @@ class MysqlTable(AbstractTable):
|
|
|
742
742
|
|
|
743
743
|
column_type_pairs = list(zip(self.columns, self.types))[1:]
|
|
744
744
|
fields = ", ".join(
|
|
745
|
-
[f"{column} {type_.upper()}" for column, type_ in column_type_pairs]
|
|
745
|
+
[f"`{column}` {type_.upper()}" for column, type_ in column_type_pairs]
|
|
746
746
|
)
|
|
747
|
-
query = f"CREATE TABLE {self.name} ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
|
|
747
|
+
query = f"CREATE TABLE `{self.name}` ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
|
|
748
748
|
|
|
749
749
|
print(query)
|
|
750
750
|
try:
|
|
@@ -759,14 +759,11 @@ class MysqlTable(AbstractTable):
|
|
|
759
759
|
total_output=[]
|
|
760
760
|
for k in range(len(rows)):
|
|
761
761
|
if k % batch == 0:
|
|
762
|
-
query = "INSERT INTO " + self.name + " ("
|
|
762
|
+
query = "INSERT INTO `" + self.name + "` ("
|
|
763
763
|
for i in range(start_index, len(self.columns)):
|
|
764
764
|
if i < len(rows[k]) + 1:
|
|
765
|
-
# column name containing space needs to be wrapped in `...`, otherwise causes syntax error
|
|
766
|
-
|
|
767
|
-
column_name = '`' + self.columns[i] + '`'
|
|
768
|
-
else:
|
|
769
|
-
column_name = self.columns[i]
|
|
765
|
+
# column name containing space/reserved keyword needs to be wrapped in `...`, otherwise causes syntax error
|
|
766
|
+
column_name = '`' + self.columns[i] + '`'
|
|
770
767
|
query += column_name + ","
|
|
771
768
|
if len(rows) < len(self.columns):
|
|
772
769
|
print(len(self.columns) - len(rows), "columns were not specified")
|
|
@@ -846,17 +843,17 @@ class MysqlTable(AbstractTable):
|
|
|
846
843
|
def add_foreign_key(self, foreign_key):
|
|
847
844
|
parent_id = foreign_key['parent_id']
|
|
848
845
|
parent = foreign_key['parent']
|
|
849
|
-
query = "ALTER TABLE " + self.name + " MODIFY " + parent_id + " INT UNSIGNED"
|
|
846
|
+
query = "ALTER TABLE `" + self.name + "` MODIFY " + parent_id + " INT UNSIGNED"
|
|
850
847
|
print(query)
|
|
851
848
|
self.db1.execute(query)
|
|
852
|
-
query = "ALTER TABLE " + self.name + " ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
|
|
849
|
+
query = "ALTER TABLE `" + self.name + "` ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
|
|
853
850
|
print(query)
|
|
854
851
|
self.db1.execute(query)
|
|
855
852
|
|
|
856
853
|
@save_migration
|
|
857
854
|
def add_column(self, column_name, column_type):
|
|
858
855
|
assert len(column_name) > 1
|
|
859
|
-
command = "ALTER TABLE " + self.name + " ADD COLUMN " + column_name + " " + column_type
|
|
856
|
+
command = "ALTER TABLE `" + self.name + "` ADD COLUMN `" + column_name + "` " + column_type
|
|
860
857
|
try:
|
|
861
858
|
self.db1.execute(command)
|
|
862
859
|
self.columns.append(column_name)
|
|
@@ -867,7 +864,7 @@ class MysqlTable(AbstractTable):
|
|
|
867
864
|
@save_migration
|
|
868
865
|
def drop_column(self, column_name):
|
|
869
866
|
assert len(column_name) > 1
|
|
870
|
-
command = "ALTER TABLE " + self.name + " DROP COLUMN " + column_name
|
|
867
|
+
command = "ALTER TABLE `" + self.name + "` DROP COLUMN " + column_name
|
|
871
868
|
try:
|
|
872
869
|
print(command)
|
|
873
870
|
self.db1.execute(command)
|
|
@@ -881,7 +878,7 @@ class MysqlTable(AbstractTable):
|
|
|
881
878
|
@save_migration
|
|
882
879
|
def modify_column(self, column_name, new_column_type):
|
|
883
880
|
assert len(column_name) > 1
|
|
884
|
-
command = "ALTER TABLE " + self.name + " MODIFY COLUMN " + column_name + " " + new_column_type
|
|
881
|
+
command = "ALTER TABLE `" + self.name + "` MODIFY COLUMN `" + column_name + "` " + new_column_type
|
|
885
882
|
print(command)
|
|
886
883
|
try:
|
|
887
884
|
self.db1.execute(command)
|
|
@@ -894,9 +891,10 @@ class MysqlTable(AbstractTable):
|
|
|
894
891
|
############### XLSX ##################
|
|
895
892
|
|
|
896
893
|
class XlsxTable(AbstractTable):
|
|
897
|
-
def __init__(self, db1, name, columns=None, types=None, id_column_name = "id"):
|
|
894
|
+
def __init__(self, db1, name, columns=None, types=None, id_column_name = "id", number_of_retries=5):
|
|
898
895
|
super().__init__(db1, name, columns, types)
|
|
899
896
|
self.id_column_name = id_column_name
|
|
897
|
+
self.NUMBER_OF_RETRIES = number_of_retries
|
|
900
898
|
|
|
901
899
|
table_filename = f"{self.name}.csv" if self.db1.is_csv else f"{self.name}.xlsx"
|
|
902
900
|
self.table_directory_path: pathlib.Path = self.db1.db_directory_path / table_filename
|
|
@@ -960,23 +958,32 @@ class XlsxTable(AbstractTable):
|
|
|
960
958
|
column for column, type_ in self.column_type_dict.items() if type_ == "datetime"
|
|
961
959
|
]
|
|
962
960
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
df = pd.
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
print(f"Error while reading data into XlsxTable: {e}")
|
|
978
|
-
df = pd.DataFrame(columns=self.columns)
|
|
961
|
+
# BUG: If XlsxTable is being accessed by multiple threads, read operation
|
|
962
|
+
# might fail due to race conditions. Add retry mechanism to handle these cases.
|
|
963
|
+
for attempt in range(self.NUMBER_OF_RETRIES):
|
|
964
|
+
try:
|
|
965
|
+
df = self._select(column_type_map, date_columns)
|
|
966
|
+
except Exception:
|
|
967
|
+
# print(f"Error while reading data into XlsxTable: {e}")
|
|
968
|
+
# df = pd.DataFrame(columns=self.columns)
|
|
969
|
+
if attempt < self.NUMBER_OF_RETRIES - 1:
|
|
970
|
+
time.sleep(0.1)
|
|
971
|
+
else:
|
|
972
|
+
print(f"Failed to read data from {self.table_directory_path}, returning empty DataFrame")
|
|
973
|
+
df = pd.DataFrame(columns=self.columns)
|
|
974
|
+
return df
|
|
979
975
|
|
|
976
|
+
def _select(self, column_type_map, date_columns):
|
|
977
|
+
if self.db1.is_csv:
|
|
978
|
+
df = pd.read_csv(
|
|
979
|
+
self.table_directory_path, dtype=column_type_map, parse_dates=date_columns,
|
|
980
|
+
encoding='utf-8'
|
|
981
|
+
)
|
|
982
|
+
else:
|
|
983
|
+
df = pd.read_excel(
|
|
984
|
+
self.table_directory_path, dtype=column_type_map, parse_dates=date_columns
|
|
985
|
+
)
|
|
986
|
+
df.replace({np.nan: None}, inplace=True)
|
|
980
987
|
return df
|
|
981
988
|
|
|
982
989
|
def insert_from_df(self, df, batch=1, try_mode=False, debug_mode=False, adjust_df=False, insert_id=False):
|
dbhydra/test_migrator.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import dbhydra.dbhydra_core as dh
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
db1=dh.MysqlDb("config-mysql.ini")
|
|
5
|
+
with db1.connect_to_db():
|
|
6
|
+
|
|
7
|
+
nodes_table = dh.MysqlTable(db1, "nodes",columns=["id","name"],types=["int","int"])
|
|
8
|
+
#nodes_table.create()
|
|
9
|
+
|
|
10
|
+
db1.initialize_migrator()
|
|
11
|
+
|
|
12
|
+
print(nodes_table.column_type_dict)
|
|
13
|
+
|
|
14
|
+
new_column_type_dict={"id":"int","name":"nvarchar","age":"int"}
|
|
15
|
+
|
|
16
|
+
migration1=db1.migrator.create_table_migration("nodes", nodes_table.column_type_dict, new_column_type_dict)
|
|
17
|
+
db1.migrator.save_current_migration_to_json()
|
|
18
|
+
migration2=db1.migrator.create_table_migration("nodes", new_column_type_dict, nodes_table.column_type_dict)
|
|
19
|
+
db1.migrator.save_current_migration_to_json()
|
|
20
|
+
print(migration1)
|
|
21
|
+
print(migration2)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
dbhydra/__init__.py,sha256=DCocEeXf4QxdVlBRlNiFvuP5IZJ5aa77_DbUR-_4C14,65
|
|
2
|
-
dbhydra/dbhydra_core.py,sha256=
|
|
2
|
+
dbhydra/dbhydra_core.py,sha256=26xBOo3sl--xFa-IrnE3AmBjB3ut5CXUJ1add438ups,2470
|
|
3
|
+
dbhydra/test_migrator.py,sha256=e3Nnb2mCd3CfjhjSexNg1tXVJMjkl5cCoYcuhbfZ4pM,803
|
|
3
4
|
dbhydra/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
dbhydra/src/abstract_db.py,sha256=
|
|
5
|
-
dbhydra/src/abstract_table.py,sha256=
|
|
5
|
+
dbhydra/src/abstract_db.py,sha256=lEP24vWz0HdGjQgSoHnawNK_NvRlSLuvzVSiiawssuw,5901
|
|
6
|
+
dbhydra/src/abstract_table.py,sha256=c3pkBTgMOLGDgLH4YEfqM0x33puh-gkML9Rid8xzdFs,17081
|
|
6
7
|
dbhydra/src/bigquery_db.py,sha256=77XsgvYbANlvYaJnuVve-kz-PNBx_CHoYCL-eYnA8e4,1834
|
|
7
|
-
dbhydra/src/migrator.py,sha256=
|
|
8
|
+
dbhydra/src/migrator.py,sha256=QzaODEFfraD9_6HN_Osaidaj-nLYQryCYYWwJtUu3n8,18931
|
|
8
9
|
dbhydra/src/mongo_db.py,sha256=mP48zRjI7mXKpm45R8prroZI-Eo7JKf0KJqGX-oTy3w,1922
|
|
9
|
-
dbhydra/src/mysql_db.py,sha256=
|
|
10
|
+
dbhydra/src/mysql_db.py,sha256=xFYy1Ty7iS3GXSncFoaKve4QN1SMJiuDjGyMbb-b1bw,3152
|
|
10
11
|
dbhydra/src/postgres_db.py,sha256=L7MaBq_6ArwDSP_5LaEqK58oLxZ1X7FgIokcDOSB7wk,1805
|
|
11
12
|
dbhydra/src/sqlserver_db.py,sha256=9Xi3NAliqM79MTV8fpNQb0nWMH8Bqjl1leJSEqgyT94,3611
|
|
12
|
-
dbhydra/src/tables.py,sha256=
|
|
13
|
+
dbhydra/src/tables.py,sha256=QZK76rv_d0MpXGNnuAezouXN8dO0nPrkVMmKVsHxj68,46656
|
|
13
14
|
dbhydra/src/xlsx_db.py,sha256=z6d-IjMYMmXC591Mt5DcxIYWyluanjPRFd-sXtjjXww,3514
|
|
14
15
|
dbhydra/src/errors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
16
|
dbhydra/src/errors/exceptions.py,sha256=LVpfbTd3NHfQIM-D5TFAU6hOZwGQ3b5DwFD4B6vtf2U,149
|
|
@@ -17,8 +18,8 @@ dbhydra/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
17
18
|
dbhydra/tests/test_cases.py,sha256=eAFGaHaIaab3md3HHm2_ryb_HHfObtcXDAEzLh4qWx8,508
|
|
18
19
|
dbhydra/tests/test_mongo.py,sha256=M8TD72M0iQAk7ZcLTWwLmcmmF_zwALnYEGTWjhQlq0s,1979
|
|
19
20
|
dbhydra/tests/test_sql.py,sha256=aPFXyA0jh8o9VG3B5f9fNz7qDbuVPZ9TcE2twn5dAeQ,3126
|
|
20
|
-
dbhydra-2.1.
|
|
21
|
-
dbhydra-2.1.
|
|
22
|
-
dbhydra-2.1.
|
|
23
|
-
dbhydra-2.1.
|
|
24
|
-
dbhydra-2.1.
|
|
21
|
+
dbhydra-2.2.1.dist-info/LICENSE,sha256=k49Yga8CP889JJaHlOpGFzr_be2nqMoep2chYeIDctk,1091
|
|
22
|
+
dbhydra-2.2.1.dist-info/METADATA,sha256=fXT5IdyIT6MA0US_YolueauSs0KAtZJE5392uF7G03c,2298
|
|
23
|
+
dbhydra-2.2.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
24
|
+
dbhydra-2.2.1.dist-info/top_level.txt,sha256=oO4Gf1T8_txIsIlp11GI0k7PtBIMb9GRwb5ObF4MLVg,8
|
|
25
|
+
dbhydra-2.2.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|