dbhydra 2.2.0__tar.gz → 2.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {dbhydra-2.2.0 → dbhydra-2.2.1}/PKG-INFO +1 -1
  2. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/dbhydra_core.py +2 -1
  3. dbhydra-2.2.1/dbhydra/src/migrator.py +425 -0
  4. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/mysql_db.py +3 -0
  5. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/tables.py +1 -2
  6. dbhydra-2.2.1/dbhydra/test_migrator.py +27 -0
  7. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra.egg-info/PKG-INFO +1 -1
  8. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra.egg-info/SOURCES.txt +1 -0
  9. {dbhydra-2.2.0 → dbhydra-2.2.1}/setup.py +1 -1
  10. dbhydra-2.2.0/dbhydra/src/migrator.py +0 -395
  11. {dbhydra-2.2.0 → dbhydra-2.2.1}/LICENSE +0 -0
  12. {dbhydra-2.2.0 → dbhydra-2.2.1}/README.md +0 -0
  13. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/__init__.py +0 -0
  14. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/__init__.py +0 -0
  15. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/abstract_db.py +0 -0
  16. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/abstract_table.py +0 -0
  17. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/bigquery_db.py +0 -0
  18. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/errors/__init__.py +0 -0
  19. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/errors/exceptions.py +0 -0
  20. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/mongo_db.py +0 -0
  21. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/postgres_db.py +0 -0
  22. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/sqlserver_db.py +0 -0
  23. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/src/xlsx_db.py +0 -0
  24. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/tests/__init__.py +0 -0
  25. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/tests/test_cases.py +0 -0
  26. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/tests/test_mongo.py +0 -0
  27. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra/tests/test_sql.py +0 -0
  28. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra.egg-info/dependency_links.txt +0 -0
  29. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra.egg-info/requires.txt +0 -0
  30. {dbhydra-2.2.0 → dbhydra-2.2.1}/dbhydra.egg-info/top_level.txt +0 -0
  31. {dbhydra-2.2.0 → dbhydra-2.2.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbhydra
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Data science friendly ORM combining Python
5
5
  Home-page: https://github.com/DovaX/dbhydra
6
6
  Author: DovaX
@@ -13,7 +13,8 @@ from dbhydra.src.mongo_db import MongoDb
13
13
  from dbhydra.src.postgres_db import PostgresDb
14
14
  from dbhydra.src.xlsx_db import XlsxDb, XlsxDB
15
15
  from dbhydra.src.abstract_db import AbstractDb
16
- from dbhydra.src.tables import SqlServerTable, PostgresTable, MysqlTable, XlsxTable, AbstractTable, MongoTable, BigQueryTable, Table, AbstractSelectable, AbstractJoinable
16
+ from dbhydra.src.tables import (SqlServerTable, PostgresTable, MysqlTable, XlsxTable, AbstractTable, MongoTable,
17
+ BigQueryTable, Table, AbstractSelectable, AbstractJoinable, PYTHON_TO_MYSQL_DATA_MAPPING)
17
18
  ##### Do not remove imports - they are expored in the package
18
19
 
19
20
 
@@ -0,0 +1,425 @@
1
+ import os
2
+ import math
3
+ import json
4
+ import pandas as pd
5
+
6
+ from typing import Optional
7
+ from deepdiff import DeepDiff
8
+ from dataclasses import dataclass, asdict
9
+
10
+ PENDING_MIGRATION_DEFAULT_PATH = "./db/migrations/pending_migration.json"
11
+ MIGRATION_HISTORY_DEFAULT_PATH = "./db/migrations/migration_history.json"
12
+
13
+ # @dataclass
14
+ # class Migration:
15
+ # forward: list[dict]
16
+ # backward: list[dict]
17
+
18
+ class Migrator:
19
+ """
20
+ A class for managing database migrations.
21
+
22
+ This class provides functionality to create, manage, and execute database migrations
23
+ using a migration system compatible with MySQL and Postgres dialects. It allows for
24
+ creating forward and backward migrations, reading and writing migrations to JSON files,
25
+ and executing migrations based on changes detected in database structures.
26
+
27
+ Note: This class is compatible with MySQL and Postgres dialects and has been somewhat tested
28
+ with those databases. It may require adjustments for other database systems.
29
+
30
+ Attributes:
31
+ db: The database connection object used for executing migrations.
32
+ """
33
+
34
+ def __init__(self, db):
35
+ self.db = db
36
+
37
+ # Used in older implementations, TODO: decide whether to keep both approaches, unify them or pick one
38
+ self._migration_number = 1
39
+ self._migration_list = []
40
+
41
+ # Used in newer approach
42
+ self._pending_forward_migration_list = []#Migration(forward=[], backward=[])
43
+ self._pending_forward_migration_list = []#Migration(forward=[], backward=[])
44
+
45
+ def process_migration_dict(self, migration_dict):
46
+ matching_table_class = self.db.matching_table_class #E.g. MysqlTable
47
+
48
+ assert len(migration_dict.keys()) == 1
49
+ operation = list(migration_dict.keys())[0]
50
+ options = migration_dict[operation]
51
+ if operation == "create":
52
+ table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
53
+ table.convert_types_from_mysql()
54
+ table.create()
55
+ elif operation == "drop":
56
+ table = matching_table_class(self.db, options["table_name"])
57
+ table.drop()
58
+ elif operation == "add_column":
59
+ table = matching_table_class(self.db, options["table_name"])
60
+ table.initialize_columns()
61
+ table.initialize_types()
62
+ table.convert_types_from_mysql()
63
+ table.add_column(options["column_name"], options["column_type"])
64
+ elif operation == "modify_column":
65
+ table = matching_table_class(self.db, options["table_name"])
66
+ table.initialize_columns()
67
+ table.initialize_types()
68
+ table.convert_types_from_mysql()
69
+ table.modify_column(options["column_name"], options["column_type"])
70
+ elif operation == "drop_column":
71
+ table = matching_table_class(self.db, options["table_name"])
72
+ table.initialize_columns()
73
+ table.initialize_types()
74
+ table.drop_column(options["column_name"])
75
+
76
+ # Old approach methods START
77
+ def next_migration(self):
78
+ self._migration_number += 1
79
+ self._migration_list = []
80
+
81
+ def migrate(self, migration_list):
82
+ for i, migration_dict in enumerate(migration_list):
83
+ self.process_migration_dict(migration_dict)
84
+
85
+ def migrate_from_json(self, filename):
86
+ with open(filename, "r") as f:
87
+ rows = f.readlines()[0].replace("\n", "")
88
+ result = json.loads(rows)
89
+ for dict in result:
90
+ self.process_migration_dict(dict)
91
+ return (result)
92
+
93
+ def migration_list_to_json(self, filename=None):
94
+ result = json.dumps(self._migration_list)
95
+
96
+ if filename is None or filename == "" or filename.isspace():
97
+ with open("migrations/migration-" + str(self._migration_number) + ".json", "w+") as f:
98
+ f.write(result)
99
+ else:
100
+ with open(f"migrations/{filename}.json", "w+") as f:
101
+ f.write(result)
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+ ##### Auxilliary? #####
112
+ def create_migrations_from_df(self, name, dataframe):
113
+
114
+ columns, return_types = self.extract_columns_and_types_from_df(dataframe)
115
+
116
+ migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
117
+ self._migration_list.append(migration_dict)
118
+ self.migration_list_to_json()
119
+ # return columns, return_types
120
+
121
+ def extract_columns_and_types_from_df(self, dataframe):
122
+ columns = list(dataframe.columns)
123
+
124
+ return_types = []
125
+
126
+ if columns == []:
127
+ return ["id"], ["int"]
128
+
129
+ for column in dataframe:
130
+ if dataframe.empty:
131
+ return_types.append(type(None).__name__)
132
+ continue
133
+
134
+ t = dataframe.loc[0, column]
135
+ try:
136
+ if pd.isna(t):
137
+ return_types.append(type(None).__name__)
138
+ else:
139
+ try:
140
+ return_types.append(type(t.item()).__name__)
141
+ except:
142
+ return_types.append(type(t).__name__)
143
+ except:
144
+ # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
145
+ length = int(dataframe[column].str.len().max())
146
+ length += 0.1 * length
147
+ length = int(math.ceil(length / 10.0)) * 10
148
+ return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
149
+
150
+ if (columns[0] != "id"):
151
+ columns.insert(0, "id")
152
+ return_types.insert(0, "int")
153
+
154
+ return columns, return_types
155
+ # Old approach methods END
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+ # def set_pending_migration(self, migration_dict: dict[str, list]):
169
+ # self._pending_migration = Migration(**migration_dict)
170
+
171
+ def migrate_forward(self):
172
+ """
173
+ Applies forward migrations from the pending migration object.
174
+
175
+ Iterates through each migration dictionary in the pending migration's forward list,
176
+ processes the migration, saves it to migration history, and clears the pending migration.
177
+
178
+ Returns:
179
+ None
180
+ """
181
+
182
+ for migration_dict in self._pending_forward_migration_list:
183
+ self.process_migration_dict(migration_dict)
184
+
185
+ #self._save_migration_to_history(migration=self._pending_migration)
186
+ self._clear_pending_migration()
187
+
188
+ def migrate_backward(self):
189
+ """
190
+ Applies backward migrations from the pending migration object.
191
+
192
+ Iterates through each migration dictionary in the pending migration's backward list,
193
+ processes the migration, saves it to migration history, and clears the pending migration.
194
+
195
+ Returns:
196
+ None
197
+ """
198
+
199
+ for migration_dict in self._pending_backward_migration_list:
200
+ self.process_migration_dict(migration_dict)
201
+
202
+ #history_migration = Migration(forward=self._pending_migration.backward, backward=self._pending_migration.forward)
203
+ #self._save_migration_to_history(migration=history_migration)
204
+ self._clear_pending_migration()
205
+
206
+ # def migrate_n_steps_back_in_history(self, n: int, migration_history_json: str = MIGRATION_HISTORY_DEFAULT_PATH):
207
+ # migration_history = self._read_migration_history_json(migration_history_json)
208
+
209
+ # if len(migration_history) < n:
210
+ # raise ValueError(f"Provided n (= {n}) is larger than migration history length (= {len(migration_history)}).")
211
+
212
+ # total_backward_migration = Migration(forward=[], backward=[])
213
+ # migrations = migration_history[-n:] # Take last n elements of migration history for execution
214
+
215
+ # # Loop in reversed order as we execute backward migrations in reversed order compared to forward ones
216
+ # for migration_dict in reversed(migrations):
217
+ # total_backward_migration.forward.append(migration_dict["forward"])
218
+ # total_backward_migration.backward.append(migration_dict["backward"])
219
+
220
+ # self.set_pending_migration(asdict(total_backward_migration))
221
+ # self.migrate_backward()
222
+
223
+ # def load_migration_from_json(self, json_file_path: str = PENDING_MIGRATION_DEFAULT_PATH):
224
+ # with open(json_file_path, "r") as file:
225
+ # migration_dict = json.load(file)
226
+
227
+ # self.set_pending_migration(migration_dict)
228
+
229
+ # def save_pending_migration_to_json(self, file_path: str = PENDING_MIGRATION_DEFAULT_PATH):
230
+ # if not file_path.endswith(".json"):
231
+ # raise ValueError("pending migration file must be of '.json' type.")
232
+
233
+ # self._build_folder_structure_for_file_path(file_path)
234
+
235
+ # with open(file_path, "w+") as file:
236
+ # json.dump(asdict(self._pending_migration), file, indent=2)
237
+
238
+ def create_table_migration(self, table_name: str, old_column_type_dict: Optional[dict], new_column_type_dict: Optional[dict]):
239
+ """
240
+ Creates a migration for a database table based on its old and new column_type_dicts.
241
+
242
+ Args:
243
+ table_name (str): The name of the database table.
244
+ old_column_type_dict (Optional[dict]): The old column_type_dict of the table.
245
+ new_column_type_dict (Optional[dict]): The new column_type_dict of the table.
246
+
247
+ If old_column_type_dict is None and new_column_type_dict is not None: CREATE table
248
+ If old_column_type_dict is not None and new_column_type_dict is None: DROP table
249
+
250
+ Returns:
251
+ Migration: The generated migration object.
252
+
253
+ Raises:
254
+ ValueError: If the table_name argument is empty.
255
+ """
256
+
257
+ def _extract_column_name_from_deepdiff_key(deepdiff_key: str) -> str:
258
+ """
259
+ Extracts the column name from a key generated by deepdiff.
260
+
261
+ Args:
262
+ deepdiff_key (str): The key generated by deepdiff.
263
+
264
+ Returns:
265
+ str: The extracted column name.
266
+
267
+ Example:
268
+ >>> migrator = Migrator()
269
+ >>> column_name = migrator._extract_column_name_from_deepdiff_key("root['table']['column']")
270
+ >>> print(column_name)
271
+ 'column'
272
+ """
273
+
274
+ # Split the item_key by '[' and ']' to isolate the column name
275
+ # The column name is expected to be the last element after splitting
276
+ column_name = deepdiff_key.split('[')[-1].strip("']")
277
+ return column_name
278
+
279
+ def _convert_deepdiff_dict_into_migration_lists(table_name: str, deepdiff_dict: dict):
280
+ """
281
+ Converts deepdiff dictionary from the new and old table column_type_dicts comparison into a Migration object.
282
+
283
+ Args:
284
+ table_name (str): A name of the examined DB table.
285
+ deepdiff_dict (dict): A dictionary from DeepDiff comparison of the old and new table column_type_dict.
286
+
287
+ Returns:
288
+ Migration: A Migration object containing forward and backward migrations for the given table.
289
+
290
+ Example:
291
+ >>> table_name = 'results'
292
+ >>> deepdiff_dict = {'dictionary_item_removed': {"root['hehexd']": 'double'}}
293
+ >>> migrator = Migrator()
294
+ >>> asdict(migrator._convert_deepdiff_dict_into_migration)
295
+ >>> {
296
+ 'forward': [
297
+ {'drop_column': {'table_name': 'results', 'column_name': 'hehexd'}}
298
+ ],
299
+ 'backward': [
300
+ {'add_column': {'table_name': 'results', 'column_name': 'hehexd', 'column_type': 'double'}}
301
+ ]
302
+ }
303
+ """
304
+ forward_migration_list, backward_migration_list = [], []
305
+
306
+ forward_conversions = {
307
+ "dictionary_item_added": "add_column",
308
+ "dictionary_item_removed": "drop_column",
309
+ "values_changed": "modify_column"
310
+ }
311
+ backward_conversions = {
312
+ "dictionary_item_added": "drop_column",
313
+ "dictionary_item_removed": "add_column",
314
+ "values_changed": "modify_column"
315
+ }
316
+
317
+ for action_name, deepdiff_action in deepdiff_dict.items():
318
+ for deepdiff_key in deepdiff_action.keys():
319
+ column_name = _extract_column_name_from_deepdiff_key(deepdiff_key)
320
+ forward_action, backward_action = forward_conversions[action_name], backward_conversions[action_name]
321
+
322
+ if action_name=="dictionary_item_added":
323
+ column_type = deepdiff_action[deepdiff_key]
324
+ forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
325
+ backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name}})
326
+ elif action_name=="dictionary_item_removed":
327
+ column_type = deepdiff_action[deepdiff_key]
328
+ forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name}})
329
+ backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
330
+ elif action_name=="values_changed":
331
+ column_type = deepdiff_action[deepdiff_key]["old_value"]
332
+ column_new_type = deepdiff_action[deepdiff_key]["new_value"]
333
+
334
+ # HACK: Do not create migrations for cases such as varchar(2047) --> nvarchar(2047)
335
+ is_varchar_in_types = "varchar" in column_type and "varchar" in column_new_type
336
+ is_max_length_equal = (
337
+ column_type[column_type.index("("): column_type.index(")")]
338
+ and column_new_type[column_new_type.index("("): column_new_type.index(")")]
339
+ ) if is_varchar_in_types else False
340
+ is_varchar_nvarchar_conversion = is_varchar_in_types and is_max_length_equal
341
+
342
+ if not is_varchar_nvarchar_conversion:
343
+ forward_migration_list.append({forward_action: {"table_name": table_name, "column_name": column_name,
344
+ "column_type": column_new_type}})
345
+ backward_migration_list.append({backward_action: {"table_name": table_name, "column_name": column_name,
346
+ "column_type": column_type}})
347
+
348
+ return forward_migration_list, backward_migration_list
349
+
350
+
351
+
352
+
353
+ if not table_name:
354
+ raise ValueError("The 'table_name' argument must be a non-empty string.")
355
+
356
+ if not old_column_type_dict and new_column_type_dict:
357
+ # non-empty initial column_type_dict --> empty new column_type_dict
358
+ columns, types = list(new_column_type_dict.keys()), list(new_column_type_dict.values())
359
+ forward_migration_list = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
360
+ backward_migration_list = [{"drop": {"table_name": table_name}}]
361
+
362
+ elif not new_column_type_dict:
363
+ # new column_type_dict is empty ==> drop the table
364
+ forward_migration_list = [{"drop": {"table_name": table_name}}]
365
+ backward_migration_list = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
366
+
367
+
368
+ else:
369
+ diff = DeepDiff(old_column_type_dict, new_column_type_dict, verbose_level=2)
370
+ forward_migration_list, backward_migration_list = _convert_deepdiff_dict_into_migration_lists(table_name, diff)
371
+
372
+ #migration = Migration(forward=forward_migration_list, backward=backward_migration_list)
373
+
374
+ self._append_migration_to_pending_migration(forward_migration_list, backward_migration_list)
375
+
376
+ return forward_migration_list, backward_migration_list
377
+
378
+
379
+
380
+
381
+
382
+ def _append_migration_to_pending_migration(self, forward_migration_list, backward_migration_list):
383
+ self._pending_forward_migration_list += forward_migration_list
384
+ self._pending_backward_migration_list += backward_migration_list
385
+
386
+
387
+ def _clear_pending_migration(self):
388
+ self._pending_forward_migration_list = []
389
+ self._pending_backward_migration_list = []
390
+
391
+ # def _read_migration_history_json(self, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
392
+ # if not file_path.endswith(".json"):
393
+ # raise ValueError("Migration history file must be of '.json' type.")
394
+
395
+ # if not os.path.exists(file_path):
396
+ # raise FileNotFoundError(f"Migration history file '{file_path}' does not exist.")
397
+
398
+ # try:
399
+ # with open(file_path, "r") as file:
400
+ # migration_history = json.load(file)
401
+ # except json.JSONDecodeError:
402
+ # migration_history = []
403
+
404
+ # return migration_history
405
+
406
+ # def _save_migration_to_history(self, migration: Migration, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
407
+ # try:
408
+ # migration_history = self._read_migration_history_json(file_path)
409
+ # except FileNotFoundError:
410
+ # self._build_folder_structure_for_file_path(file_path)
411
+ # migration_history = []
412
+
413
+ # migration_history.append(asdict(migration))
414
+
415
+ # with open(file_path, "w") as file:
416
+ # json.dump(migration_history, file, indent=2)
417
+
418
+ # def _build_folder_structure_for_file_path(self, file_path: str):
419
+ # folder_path = os.path.dirname(file_path)
420
+ # if not os.path.exists(folder_path):
421
+ # print(f"Folder path to the file '{file_path}' does not exist. Creating the file and the folder structure.")
422
+ # os.makedirs(folder_path)
423
+
424
+
425
+
@@ -43,6 +43,9 @@ class MysqlDb(AbstractDb):
43
43
  print("DB connection established")
44
44
 
45
45
  def create_new_db(self):
46
+ self.connection = pymysql.connect(host=self.DB_SERVER, port=self.DB_PORT, user=self.DB_USERNAME,
47
+ charset="utf8mb4", password=self.DB_PASSWORD)
48
+ self.cursor = self.connection.cursor()
46
49
  create_db_command = "CREATE DATABASE " + self.DB_DATABASE
47
50
  self.execute(create_db_command)
48
51
 
@@ -966,9 +966,8 @@ class XlsxTable(AbstractTable):
966
966
  except Exception:
967
967
  # print(f"Error while reading data into XlsxTable: {e}")
968
968
  # df = pd.DataFrame(columns=self.columns)
969
- if attempt < self.NUMBER_OF_RETRIES:
969
+ if attempt < self.NUMBER_OF_RETRIES - 1:
970
970
  time.sleep(0.1)
971
- continue
972
971
  else:
973
972
  print(f"Failed to read data from {self.table_directory_path}, returning empty DataFrame")
974
973
  df = pd.DataFrame(columns=self.columns)
@@ -0,0 +1,27 @@
1
+ import dbhydra.dbhydra_core as dh
2
+
3
+
4
+ db1=dh.MysqlDb("config-mysql.ini")
5
+ with db1.connect_to_db():
6
+
7
+ nodes_table = dh.MysqlTable(db1, "nodes",columns=["id","name"],types=["int","int"])
8
+ #nodes_table.create()
9
+
10
+ db1.initialize_migrator()
11
+
12
+ print(nodes_table.column_type_dict)
13
+
14
+ new_column_type_dict={"id":"int","name":"nvarchar","age":"int"}
15
+
16
+ migration1=db1.migrator.create_table_migration("nodes", nodes_table.column_type_dict, new_column_type_dict)
17
+ db1.migrator.save_current_migration_to_json()
18
+ migration2=db1.migrator.create_table_migration("nodes", new_column_type_dict, nodes_table.column_type_dict)
19
+ db1.migrator.save_current_migration_to_json()
20
+ print(migration1)
21
+ print(migration2)
22
+
23
+
24
+
25
+
26
+
27
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbhydra
3
- Version: 2.2.0
3
+ Version: 2.2.1
4
4
  Summary: Data science friendly ORM combining Python
5
5
  Home-page: https://github.com/DovaX/dbhydra
6
6
  Author: DovaX
@@ -3,6 +3,7 @@ README.md
3
3
  setup.py
4
4
  dbhydra/__init__.py
5
5
  dbhydra/dbhydra_core.py
6
+ dbhydra/test_migrator.py
6
7
  dbhydra.egg-info/PKG-INFO
7
8
  dbhydra.egg-info/SOURCES.txt
8
9
  dbhydra.egg-info/dependency_links.txt
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name='dbhydra',
8
- version='2.2.0',
8
+ version='2.2.1',
9
9
  author='DovaX',
10
10
  author_email='dovax.ai@gmail.com',
11
11
  description='Data science friendly ORM combining Python',
@@ -1,395 +0,0 @@
1
- import os
2
- import math
3
- import json
4
- import pandas as pd
5
-
6
- from typing import Optional
7
- from deepdiff import DeepDiff
8
- from dataclasses import dataclass, asdict
9
-
10
- CURRENT_MIGRATION_DEFAULT_PATH = "./db/migrations/current_migration.json"
11
- MIGRATION_HISTORY_DEFAULT_PATH = "./db/migrations/migration_history.json"
12
-
13
- @dataclass
14
- class Migration:
15
- forward: list[dict]
16
- backward: list[dict]
17
-
18
- class Migrator:
19
- """
20
- A class for managing database migrations.
21
-
22
- This class provides functionality to create, manage, and execute database migrations
23
- using a migration system compatible with MySQL and Postgres dialects. It allows for
24
- creating forward and backward migrations, reading and writing migrations to JSON files,
25
- and executing migrations based on changes detected in database structures.
26
-
27
- Note: This class is compatible with MySQL and Postgres dialects and has been somewhat tested
28
- with those databases. It may require adjustments for other database systems.
29
-
30
- Attributes:
31
- db: The database connection object used for executing migrations.
32
- """
33
-
34
- def __init__(self, db):
35
- self.db = db
36
-
37
- # Used in older implementations, TODO: decide whether to keep both approaches, unify them or pick one
38
- self._migration_number = 1
39
- self._migration_list = []
40
-
41
- # Used in newer approach
42
- self._current_migration = Migration(forward=[], backward=[])
43
-
44
- def process_migration_dict(self, migration_dict):
45
- matching_table_class = self.db.matching_table_class #E.g. MysqlTable
46
-
47
- assert len(migration_dict.keys()) == 1
48
- operation = list(migration_dict.keys())[0]
49
- options = migration_dict[operation]
50
- if operation == "create":
51
- table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
52
- table.convert_types_from_mysql()
53
- table.create()
54
- elif operation == "drop":
55
- table = matching_table_class(self.db, options["table_name"])
56
- table.drop()
57
- elif operation == "add_column":
58
- table = matching_table_class(self.db, options["table_name"])
59
- table.initialize_columns()
60
- table.initialize_types()
61
- table.convert_types_from_mysql()
62
- table.add_column(options["column_name"], options["column_type"])
63
- elif operation == "modify_column":
64
- table = matching_table_class(self.db, options["table_name"])
65
- table.initialize_columns()
66
- table.initialize_types()
67
- table.convert_types_from_mysql()
68
- table.modify_column(options["column_name"], options["column_type"])
69
- elif operation == "drop_column":
70
- table = matching_table_class(self.db, options["table_name"])
71
- table.initialize_columns()
72
- table.initialize_types()
73
- table.drop_column(options["column_name"])
74
-
75
- # Old approach methods START
76
- def next_migration(self):
77
- self._migration_number += 1
78
- self._migration_list = []
79
-
80
- def migrate(self, migration_list):
81
- for i, migration_dict in enumerate(migration_list):
82
- self.process_migration_dict(migration_dict)
83
-
84
- def migrate_from_json(self, filename):
85
- with open(filename, "r") as f:
86
- rows = f.readlines()[0].replace("\n", "")
87
- result = json.loads(rows)
88
- for dict in result:
89
- self.process_migration_dict(dict)
90
- return (result)
91
-
92
- def migration_list_to_json(self, filename=None):
93
- result = json.dumps(self._migration_list)
94
-
95
- if filename is None or filename == "" or filename.isspace():
96
- with open("migrations/migration-" + str(self._migration_number) + ".json", "w+") as f:
97
- f.write(result)
98
- else:
99
- with open(f"migrations/{filename}.json", "w+") as f:
100
- f.write(result)
101
-
102
- def create_migrations_from_df(self, name, dataframe):
103
-
104
- columns, return_types = self.extract_columns_and_types_from_df(dataframe)
105
-
106
- migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
107
- self._migration_list.append(migration_dict)
108
- self.migration_list_to_json()
109
- # return columns, return_types
110
-
111
- def extract_columns_and_types_from_df(self, dataframe):
112
- columns = list(dataframe.columns)
113
-
114
- return_types = []
115
-
116
- if columns == []:
117
- return ["id"], ["int"]
118
-
119
- for column in dataframe:
120
- if dataframe.empty:
121
- return_types.append(type(None).__name__)
122
- continue
123
-
124
- t = dataframe.loc[0, column]
125
- try:
126
- if pd.isna(t):
127
- return_types.append(type(None).__name__)
128
- else:
129
- try:
130
- return_types.append(type(t.item()).__name__)
131
- except:
132
- return_types.append(type(t).__name__)
133
- except:
134
- # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
135
- length = int(dataframe[column].str.len().max())
136
- length += 0.1 * length
137
- length = int(math.ceil(length / 10.0)) * 10
138
- return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
139
-
140
- if (columns[0] != "id"):
141
- columns.insert(0, "id")
142
- return_types.insert(0, "int")
143
-
144
- return columns, return_types
145
- # Old approach methods END
146
-
147
- def set_current_migration(self, migration_dict: dict[str, list]):
148
- self._current_migration = Migration(**migration_dict)
149
-
150
- def migrate_forward(self):
151
- """
152
- Applies forward migrations from the current migration object.
153
-
154
- Iterates through each migration dictionary in the current migration's forward list,
155
- processes the migration, saves it to migration history, and clears the current migration.
156
-
157
- Returns:
158
- None
159
- """
160
-
161
- for migration_dict in self._current_migration.forward:
162
- self.process_migration_dict(migration_dict)
163
-
164
- self._save_migration_to_history(migration=self._current_migration)
165
- self._clear_current_migration()
166
-
167
- def migrate_backward(self):
168
- """
169
- Applies backward migrations from the current migration object.
170
-
171
- Iterates through each migration dictionary in the current migration's backward list,
172
- processes the migration, saves it to migration history, and clears the current migration.
173
-
174
- Returns:
175
- None
176
- """
177
-
178
- for migration_dict in self._current_migration.backward:
179
- self.process_migration_dict(migration_dict)
180
-
181
- history_migration = Migration(forward=self._current_migration.backward, backward=self._current_migration.forward)
182
- self._save_migration_to_history(migration=history_migration)
183
- self._clear_current_migration()
184
-
185
- def migrate_n_steps_back_in_history(self, n: int, migration_history_json: str = MIGRATION_HISTORY_DEFAULT_PATH):
186
- migration_history = self._read_migration_history_json(migration_history_json)
187
-
188
- if len(migration_history) < n:
189
- raise ValueError(f"Provided n (= {n}) is larger than migration history length (= {len(migration_history)}).")
190
-
191
- total_backward_migration = Migration(forward=[], backward=[])
192
- migrations = migration_history[-n:] # Take last n elements of migration history for execution
193
-
194
- # Loop in reversed order as we execute backward migrations in reversed order compared to forward ones
195
- for migration_dict in reversed(migrations):
196
- total_backward_migration.forward.append(migration_dict["forward"])
197
- total_backward_migration.backward.append(migration_dict["backward"])
198
-
199
- self.set_current_migration(asdict(total_backward_migration))
200
- self.migrate_backward()
201
-
202
- def load_migration_from_json(self, json_file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
203
- with open(json_file_path, "r") as file:
204
- migration_dict = json.load(file)
205
-
206
- self.set_current_migration(migration_dict)
207
-
208
- def save_current_migration_to_json(self, file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
209
- if not file_path.endswith(".json"):
210
- raise ValueError("Current migration file must be of '.json' type.")
211
-
212
- self._build_folder_structure_for_file_path(file_path)
213
-
214
- with open(file_path, "w+") as file:
215
- json.dump(asdict(self._current_migration), file, indent=2)
216
-
217
- def create_table_migration(self, table_name: str, old_structure: Optional[dict], new_structure: Optional[dict]):
218
- """
219
- Creates a migration for a database table based on its old and new structures.
220
-
221
- Args:
222
- table_name (str): The name of the database table.
223
- old_structure (Optional[dict]): The old structure of the table.
224
- new_structure (Optional[dict]): The new structure of the table.
225
-
226
- If old_structure is None and new_structure is not None: CREATE table
227
- If old_structure is not None and new_structure is None: DROP table
228
-
229
- Returns:
230
- Migration: The generated migration object.
231
-
232
- Raises:
233
- ValueError: If the table_name argument is empty.
234
- """
235
-
236
- if not table_name:
237
- raise ValueError("The 'table_name' argument must be a non-empty string.")
238
-
239
- if not old_structure and new_structure:
240
- # non-empty initial structure --> empty new structure
241
- columns, types = list(new_structure.keys()), list(new_structure.values())
242
- forward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
243
- backward_migration = [{"drop": {"table_name": table_name}}]
244
-
245
- migration = Migration(forward=forward_migration, backward=backward_migration)
246
- elif not new_structure:
247
- # new structure is empty ==> drop the table
248
- forward_migration = [{"drop": {"table_name": table_name}}]
249
- backward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
250
-
251
- migration = Migration(forward=forward_migration, backward=backward_migration)
252
- else:
253
- diff = DeepDiff(old_structure, new_structure, verbose_level=2)
254
- migration = self._convert_deepdiff_dict_into_migration(table_name, diff)
255
-
256
- self._merge_migration_to_current_migration(migration=migration)
257
-
258
- return migration
259
-
260
- def _convert_deepdiff_dict_into_migration(self, table_name: str, deepdiff_dict: dict) -> Migration:
261
- """
262
- Converts deepdiff dictionary from the new and old table structures comparison into a Migration object.
263
-
264
- Args:
265
- table_name (str): A name of the examined DB table.
266
- deepdiff_dict (dict): A dictionary from DeepDiff comparison of the old and new table structure.
267
-
268
- Returns:
269
- Migration: A Migration object containing forward and backward migrations for the given table.
270
-
271
- Example:
272
- >>> table_name = 'results'
273
- >>> deepdiff_dict = {'dictionary_item_removed': {"root['hehexd']": 'double'}}
274
- >>> migrator = Migrator()
275
- >>> asdict(migrator._convert_deepdiff_dict_into_migration)
276
- >>> {
277
- 'forward': [
278
- {'drop_column': {'table_name': 'results', 'column_name': 'hehexd'}}
279
- ],
280
- 'backward': [
281
- {'add_column': {'table_name': 'results', 'column_name': 'hehexd', 'column_type': 'double'}}
282
- ]
283
- }
284
- """
285
- forward_migration, backward_migration = [], []
286
-
287
- forward_conversions = {
288
- "dictionary_item_added": "add_column",
289
- "dictionary_item_removed": "drop_column",
290
- "values_changed": "modify_column"
291
- }
292
- backward_conversions = {
293
- "dictionary_item_added": "drop_column",
294
- "dictionary_item_removed": "add_column",
295
- "values_changed": "modify_column"
296
- }
297
-
298
- for action_name, deepdiff_action in deepdiff_dict.items():
299
- for deepdiff_key in deepdiff_action.keys():
300
- column_name = self._extract_column_name_from_deepdiff_key(deepdiff_key)
301
- forward_action, backward_action = forward_conversions[action_name], backward_conversions[action_name]
302
-
303
- if action_name=="dictionary_item_added":
304
- column_type = deepdiff_action[deepdiff_key]
305
- forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
306
- backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name}})
307
- elif action_name=="dictionary_item_removed":
308
- column_type = deepdiff_action[deepdiff_key]
309
- forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name}})
310
- backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
311
- elif action_name=="values_changed":
312
- column_type = deepdiff_action[deepdiff_key]["old_value"]
313
- column_new_type = deepdiff_action[deepdiff_key]["new_value"]
314
-
315
- # HACK: Do not create migrations for cases such as varchar(2047) --> nvarchar(2047)
316
- is_varchar_in_types = "varchar" in column_type and "varchar" in column_new_type
317
- is_max_length_equal = (
318
- column_type[column_type.index("("): column_type.index(")")]
319
- and column_new_type[column_new_type.index("("): column_new_type.index(")")]
320
- ) if is_varchar_in_types else False
321
- is_varchar_nvarchar_conversion = is_varchar_in_types and is_max_length_equal
322
-
323
- if not is_varchar_nvarchar_conversion:
324
- forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name,
325
- "column_type": column_new_type}})
326
- backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name,
327
- "column_type": column_type}})
328
-
329
- return Migration(forward=forward_migration, backward=backward_migration)
330
-
331
- def _extract_column_name_from_deepdiff_key(self, deepdiff_key: str) -> str:
332
- """
333
- Extracts the column name from a key generated by deepdiff.
334
-
335
- Args:
336
- deepdiff_key (str): The key generated by deepdiff.
337
-
338
- Returns:
339
- str: The extracted column name.
340
-
341
- Example:
342
- >>> migrator = Migrator()
343
- >>> column_name = migrator._extract_column_name_from_deepdiff_key("root['table']['column']")
344
- >>> print(column_name)
345
- 'column'
346
- """
347
-
348
- # Split the item_key by '[' and ']' to isolate the column name
349
- # The column name is expected to be the last element after splitting
350
- column_name = deepdiff_key.split('[')[-1].strip("']")
351
- return column_name
352
-
353
- def _merge_migration_to_current_migration(self, migration: Migration):
354
- new_forward_part = self._current_migration.forward + migration.forward
355
- new_backward_part = self._current_migration.backward + migration.backward
356
- self._current_migration = Migration(forward=new_forward_part, backward=new_backward_part)
357
-
358
- def _clear_current_migration(self):
359
- self._current_migration = Migration(forward=[], backward=[])
360
-
361
- def _read_migration_history_json(self, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
362
- if not file_path.endswith(".json"):
363
- raise ValueError("Migration history file must be of '.json' type.")
364
-
365
- if not os.path.exists(file_path):
366
- raise FileNotFoundError(f"Migration history file '{file_path}' does not exist.")
367
-
368
- try:
369
- with open(file_path, "r") as file:
370
- migration_history = json.load(file)
371
- except json.JSONDecodeError:
372
- migration_history = []
373
-
374
- return migration_history
375
-
376
- def _save_migration_to_history(self, migration: Migration, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
377
- try:
378
- migration_history = self._read_migration_history_json(file_path)
379
- except FileNotFoundError:
380
- self._build_folder_structure_for_file_path(file_path)
381
- migration_history = []
382
-
383
- migration_history.append(asdict(migration))
384
-
385
- with open(file_path, "w") as file:
386
- json.dump(migration_history, file, indent=2)
387
-
388
- def _build_folder_structure_for_file_path(self, file_path: str):
389
- folder_path = os.path.dirname(file_path)
390
- if not os.path.exists(folder_path):
391
- print(f"Folder path to the file '{file_path}' does not exist. Creating the file and the folder structure.")
392
- os.makedirs(folder_path)
393
-
394
-
395
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes