dbhydra 2.1.2__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {dbhydra-2.1.2 → dbhydra-2.2.0}/PKG-INFO +1 -1
  2. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/abstract_db.py +1 -0
  3. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/abstract_table.py +6 -5
  4. dbhydra-2.2.0/dbhydra/src/migrator.py +395 -0
  5. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/mysql_db.py +4 -0
  6. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/tables.py +43 -35
  7. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/PKG-INFO +1 -1
  8. {dbhydra-2.1.2 → dbhydra-2.2.0}/setup.py +1 -1
  9. dbhydra-2.1.2/dbhydra/src/migrator.py +0 -114
  10. {dbhydra-2.1.2 → dbhydra-2.2.0}/LICENSE +0 -0
  11. {dbhydra-2.1.2 → dbhydra-2.2.0}/README.md +0 -0
  12. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/__init__.py +0 -0
  13. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/dbhydra_core.py +0 -0
  14. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/__init__.py +0 -0
  15. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/bigquery_db.py +0 -0
  16. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/errors/__init__.py +0 -0
  17. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/errors/exceptions.py +0 -0
  18. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/mongo_db.py +0 -0
  19. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/postgres_db.py +0 -0
  20. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/sqlserver_db.py +0 -0
  21. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/src/xlsx_db.py +0 -0
  22. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/tests/__init__.py +0 -0
  23. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/tests/test_cases.py +0 -0
  24. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/tests/test_mongo.py +0 -0
  25. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra/tests/test_sql.py +0 -0
  26. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/SOURCES.txt +0 -0
  27. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/dependency_links.txt +0 -0
  28. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/requires.txt +0 -0
  29. {dbhydra-2.1.2 → dbhydra-2.2.0}/dbhydra.egg-info/top_level.txt +0 -0
  30. {dbhydra-2.1.2 → dbhydra-2.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbhydra
3
- Version: 2.1.2
3
+ Version: 2.2.0
4
4
  Summary: Data science friendly ORM combining Python
5
5
  Home-page: https://github.com/DovaX/dbhydra
6
6
  Author: DovaX
@@ -105,6 +105,7 @@ class AbstractDb(abc.ABC):
105
105
 
106
106
  self.active_transactions=[]
107
107
  self.last_table_inserted_into: Optional[str] = None
108
+ self.identifier_quote = ''
108
109
 
109
110
  @abc.abstractmethod
110
111
  def connect_locally(self):
@@ -132,12 +132,13 @@ class AbstractSelectable:
132
132
 
133
133
 
134
134
  def select_all(self):
135
+ quote = self.db1.identifier_quote
135
136
  all_cols_query = ""
136
137
  for col in self.columns:
137
- all_cols_query = all_cols_query + col + ","
138
+ all_cols_query = all_cols_query + quote + col + quote + ","
138
139
  if all_cols_query[-1] == ",":
139
140
  all_cols_query = all_cols_query[:-1]
140
- list1 = self.select(f"SELECT {all_cols_query} FROM " + self.name)
141
+ list1 = self.select(f"SELECT {all_cols_query} FROM {quote}{self.name}{quote};")
141
142
  return (list1)
142
143
 
143
144
  def select_to_df(self):
@@ -244,7 +245,6 @@ class AbstractTable(AbstractJoinable, abc.ABC):
244
245
  self.column_type_dict={self.columns[i]:self.types[i] for i,x in enumerate(self.columns)}
245
246
  else:
246
247
  self.column_type_dict={}
247
-
248
248
 
249
249
  # Temporary disabled, please make sure this is implemented where needed, don't introduce breaking changes please
250
250
  # @abc.abstractmethod
@@ -421,11 +421,12 @@ class AbstractTable(AbstractJoinable, abc.ABC):
421
421
 
422
422
 
423
423
  def delete(self, where=None):
424
+ quote = self.db1.identifier_quote
424
425
 
425
426
  if where is None:
426
- query = "DELETE FROM " + self.name
427
+ query = "DELETE FROM {quote}{self.name}{quote}"
427
428
  else:
428
- query = "DELETE FROM " + self.name + " WHERE " + where
429
+ query = f"DELETE FROM {quote}{self.name}{quote} WHERE {where}"
429
430
  return self.execute(query)
430
431
 
431
432
 
@@ -0,0 +1,395 @@
1
+ import os
2
+ import math
3
+ import json
4
+ import pandas as pd
5
+
6
+ from typing import Optional
7
+ from deepdiff import DeepDiff
8
+ from dataclasses import dataclass, asdict
9
+
10
+ CURRENT_MIGRATION_DEFAULT_PATH = "./db/migrations/current_migration.json"
11
+ MIGRATION_HISTORY_DEFAULT_PATH = "./db/migrations/migration_history.json"
12
+
13
+ @dataclass
14
+ class Migration:
15
+ forward: list[dict]
16
+ backward: list[dict]
17
+
18
+ class Migrator:
19
+ """
20
+ A class for managing database migrations.
21
+
22
+ This class provides functionality to create, manage, and execute database migrations
23
+ using a migration system compatible with MySQL and Postgres dialects. It allows for
24
+ creating forward and backward migrations, reading and writing migrations to JSON files,
25
+ and executing migrations based on changes detected in database structures.
26
+
27
+ Note: This class is compatible with MySQL and Postgres dialects and has been somewhat tested
28
+ with those databases. It may require adjustments for other database systems.
29
+
30
+ Attributes:
31
+ db: The database connection object used for executing migrations.
32
+ """
33
+
34
+ def __init__(self, db):
35
+ self.db = db
36
+
37
+ # Used in older implementations, TODO: decide whether to keep both approaches, unify them or pick one
38
+ self._migration_number = 1
39
+ self._migration_list = []
40
+
41
+ # Used in newer approach
42
+ self._current_migration = Migration(forward=[], backward=[])
43
+
44
+ def process_migration_dict(self, migration_dict):
45
+ matching_table_class = self.db.matching_table_class #E.g. MysqlTable
46
+
47
+ assert len(migration_dict.keys()) == 1
48
+ operation = list(migration_dict.keys())[0]
49
+ options = migration_dict[operation]
50
+ if operation == "create":
51
+ table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
52
+ table.convert_types_from_mysql()
53
+ table.create()
54
+ elif operation == "drop":
55
+ table = matching_table_class(self.db, options["table_name"])
56
+ table.drop()
57
+ elif operation == "add_column":
58
+ table = matching_table_class(self.db, options["table_name"])
59
+ table.initialize_columns()
60
+ table.initialize_types()
61
+ table.convert_types_from_mysql()
62
+ table.add_column(options["column_name"], options["column_type"])
63
+ elif operation == "modify_column":
64
+ table = matching_table_class(self.db, options["table_name"])
65
+ table.initialize_columns()
66
+ table.initialize_types()
67
+ table.convert_types_from_mysql()
68
+ table.modify_column(options["column_name"], options["column_type"])
69
+ elif operation == "drop_column":
70
+ table = matching_table_class(self.db, options["table_name"])
71
+ table.initialize_columns()
72
+ table.initialize_types()
73
+ table.drop_column(options["column_name"])
74
+
75
+ # Old approach methods START
76
+ def next_migration(self):
77
+ self._migration_number += 1
78
+ self._migration_list = []
79
+
80
+ def migrate(self, migration_list):
81
+ for i, migration_dict in enumerate(migration_list):
82
+ self.process_migration_dict(migration_dict)
83
+
84
+ def migrate_from_json(self, filename):
85
+ with open(filename, "r") as f:
86
+ rows = f.readlines()[0].replace("\n", "")
87
+ result = json.loads(rows)
88
+ for dict in result:
89
+ self.process_migration_dict(dict)
90
+ return (result)
91
+
92
+ def migration_list_to_json(self, filename=None):
93
+ result = json.dumps(self._migration_list)
94
+
95
+ if filename is None or filename == "" or filename.isspace():
96
+ with open("migrations/migration-" + str(self._migration_number) + ".json", "w+") as f:
97
+ f.write(result)
98
+ else:
99
+ with open(f"migrations/{filename}.json", "w+") as f:
100
+ f.write(result)
101
+
102
+ def create_migrations_from_df(self, name, dataframe):
103
+
104
+ columns, return_types = self.extract_columns_and_types_from_df(dataframe)
105
+
106
+ migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
107
+ self._migration_list.append(migration_dict)
108
+ self.migration_list_to_json()
109
+ # return columns, return_types
110
+
111
+ def extract_columns_and_types_from_df(self, dataframe):
112
+ columns = list(dataframe.columns)
113
+
114
+ return_types = []
115
+
116
+ if columns == []:
117
+ return ["id"], ["int"]
118
+
119
+ for column in dataframe:
120
+ if dataframe.empty:
121
+ return_types.append(type(None).__name__)
122
+ continue
123
+
124
+ t = dataframe.loc[0, column]
125
+ try:
126
+ if pd.isna(t):
127
+ return_types.append(type(None).__name__)
128
+ else:
129
+ try:
130
+ return_types.append(type(t.item()).__name__)
131
+ except:
132
+ return_types.append(type(t).__name__)
133
+ except:
134
+ # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
135
+ length = int(dataframe[column].str.len().max())
136
+ length += 0.1 * length
137
+ length = int(math.ceil(length / 10.0)) * 10
138
+ return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
139
+
140
+ if (columns[0] != "id"):
141
+ columns.insert(0, "id")
142
+ return_types.insert(0, "int")
143
+
144
+ return columns, return_types
145
+ # Old approach methods END
146
+
147
+ def set_current_migration(self, migration_dict: dict[str, list]):
148
+ self._current_migration = Migration(**migration_dict)
149
+
150
+ def migrate_forward(self):
151
+ """
152
+ Applies forward migrations from the current migration object.
153
+
154
+ Iterates through each migration dictionary in the current migration's forward list,
155
+ processes the migration, saves it to migration history, and clears the current migration.
156
+
157
+ Returns:
158
+ None
159
+ """
160
+
161
+ for migration_dict in self._current_migration.forward:
162
+ self.process_migration_dict(migration_dict)
163
+
164
+ self._save_migration_to_history(migration=self._current_migration)
165
+ self._clear_current_migration()
166
+
167
+ def migrate_backward(self):
168
+ """
169
+ Applies backward migrations from the current migration object.
170
+
171
+ Iterates through each migration dictionary in the current migration's backward list,
172
+ processes the migration, saves it to migration history, and clears the current migration.
173
+
174
+ Returns:
175
+ None
176
+ """
177
+
178
+ for migration_dict in self._current_migration.backward:
179
+ self.process_migration_dict(migration_dict)
180
+
181
+ history_migration = Migration(forward=self._current_migration.backward, backward=self._current_migration.forward)
182
+ self._save_migration_to_history(migration=history_migration)
183
+ self._clear_current_migration()
184
+
185
+ def migrate_n_steps_back_in_history(self, n: int, migration_history_json: str = MIGRATION_HISTORY_DEFAULT_PATH):
186
+ migration_history = self._read_migration_history_json(migration_history_json)
187
+
188
+ if len(migration_history) < n:
189
+ raise ValueError(f"Provided n (= {n}) is larger than migration history length (= {len(migration_history)}).")
190
+
191
+ total_backward_migration = Migration(forward=[], backward=[])
192
+ migrations = migration_history[-n:] # Take last n elements of migration history for execution
193
+
194
+ # Loop in reversed order as we execute backward migrations in reversed order compared to forward ones
195
+ for migration_dict in reversed(migrations):
196
+ total_backward_migration.forward.append(migration_dict["forward"])
197
+ total_backward_migration.backward.append(migration_dict["backward"])
198
+
199
+ self.set_current_migration(asdict(total_backward_migration))
200
+ self.migrate_backward()
201
+
202
+ def load_migration_from_json(self, json_file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
203
+ with open(json_file_path, "r") as file:
204
+ migration_dict = json.load(file)
205
+
206
+ self.set_current_migration(migration_dict)
207
+
208
+ def save_current_migration_to_json(self, file_path: str = CURRENT_MIGRATION_DEFAULT_PATH):
209
+ if not file_path.endswith(".json"):
210
+ raise ValueError("Current migration file must be of '.json' type.")
211
+
212
+ self._build_folder_structure_for_file_path(file_path)
213
+
214
+ with open(file_path, "w+") as file:
215
+ json.dump(asdict(self._current_migration), file, indent=2)
216
+
217
+ def create_table_migration(self, table_name: str, old_structure: Optional[dict], new_structure: Optional[dict]):
218
+ """
219
+ Creates a migration for a database table based on its old and new structures.
220
+
221
+ Args:
222
+ table_name (str): The name of the database table.
223
+ old_structure (Optional[dict]): The old structure of the table.
224
+ new_structure (Optional[dict]): The new structure of the table.
225
+
226
+ If old_structure is None and new_structure is not None: CREATE table
227
+ If old_structure is not None and new_structure is None: DROP table
228
+
229
+ Returns:
230
+ Migration: The generated migration object.
231
+
232
+ Raises:
233
+ ValueError: If the table_name argument is empty.
234
+ """
235
+
236
+ if not table_name:
237
+ raise ValueError("The 'table_name' argument must be a non-empty string.")
238
+
239
+ if not old_structure and new_structure:
240
+ # non-empty initial structure --> empty new structure
241
+ columns, types = list(new_structure.keys()), list(new_structure.values())
242
+ forward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
243
+ backward_migration = [{"drop": {"table_name": table_name}}]
244
+
245
+ migration = Migration(forward=forward_migration, backward=backward_migration)
246
+ elif not new_structure:
247
+ # new structure is empty ==> drop the table
248
+ forward_migration = [{"drop": {"table_name": table_name}}]
249
+ backward_migration = [{"create": {"table_name": table_name, "columns": columns, "types": types}}]
250
+
251
+ migration = Migration(forward=forward_migration, backward=backward_migration)
252
+ else:
253
+ diff = DeepDiff(old_structure, new_structure, verbose_level=2)
254
+ migration = self._convert_deepdiff_dict_into_migration(table_name, diff)
255
+
256
+ self._merge_migration_to_current_migration(migration=migration)
257
+
258
+ return migration
259
+
260
+ def _convert_deepdiff_dict_into_migration(self, table_name: str, deepdiff_dict: dict) -> Migration:
261
+ """
262
+ Converts deepdiff dictionary from the new and old table structures comparison into a Migration object.
263
+
264
+ Args:
265
+ table_name (str): A name of the examined DB table.
266
+ deepdiff_dict (dict): A dictionary from DeepDiff comparison of the old and new table structure.
267
+
268
+ Returns:
269
+ Migration: A Migration object containing forward and backward migrations for the given table.
270
+
271
+ Example:
272
+ >>> table_name = 'results'
273
+ >>> deepdiff_dict = {'dictionary_item_removed': {"root['hehexd']": 'double'}}
274
+ >>> migrator = Migrator()
275
+ >>> asdict(migrator._convert_deepdiff_dict_into_migration)
276
+ >>> {
277
+ 'forward': [
278
+ {'drop_column': {'table_name': 'results', 'column_name': 'hehexd'}}
279
+ ],
280
+ 'backward': [
281
+ {'add_column': {'table_name': 'results', 'column_name': 'hehexd', 'column_type': 'double'}}
282
+ ]
283
+ }
284
+ """
285
+ forward_migration, backward_migration = [], []
286
+
287
+ forward_conversions = {
288
+ "dictionary_item_added": "add_column",
289
+ "dictionary_item_removed": "drop_column",
290
+ "values_changed": "modify_column"
291
+ }
292
+ backward_conversions = {
293
+ "dictionary_item_added": "drop_column",
294
+ "dictionary_item_removed": "add_column",
295
+ "values_changed": "modify_column"
296
+ }
297
+
298
+ for action_name, deepdiff_action in deepdiff_dict.items():
299
+ for deepdiff_key in deepdiff_action.keys():
300
+ column_name = self._extract_column_name_from_deepdiff_key(deepdiff_key)
301
+ forward_action, backward_action = forward_conversions[action_name], backward_conversions[action_name]
302
+
303
+ if action_name=="dictionary_item_added":
304
+ column_type = deepdiff_action[deepdiff_key]
305
+ forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
306
+ backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name}})
307
+ elif action_name=="dictionary_item_removed":
308
+ column_type = deepdiff_action[deepdiff_key]
309
+ forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name}})
310
+ backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name, "column_type": column_type}})
311
+ elif action_name=="values_changed":
312
+ column_type = deepdiff_action[deepdiff_key]["old_value"]
313
+ column_new_type = deepdiff_action[deepdiff_key]["new_value"]
314
+
315
+ # HACK: Do not create migrations for cases such as varchar(2047) --> nvarchar(2047)
316
+ is_varchar_in_types = "varchar" in column_type and "varchar" in column_new_type
317
+ is_max_length_equal = (
318
+ column_type[column_type.index("("): column_type.index(")")]
319
+ and column_new_type[column_new_type.index("("): column_new_type.index(")")]
320
+ ) if is_varchar_in_types else False
321
+ is_varchar_nvarchar_conversion = is_varchar_in_types and is_max_length_equal
322
+
323
+ if not is_varchar_nvarchar_conversion:
324
+ forward_migration.append({forward_action: {"table_name": table_name, "column_name": column_name,
325
+ "column_type": column_new_type}})
326
+ backward_migration.append({backward_action: {"table_name": table_name, "column_name": column_name,
327
+ "column_type": column_type}})
328
+
329
+ return Migration(forward=forward_migration, backward=backward_migration)
330
+
331
+ def _extract_column_name_from_deepdiff_key(self, deepdiff_key: str) -> str:
332
+ """
333
+ Extracts the column name from a key generated by deepdiff.
334
+
335
+ Args:
336
+ deepdiff_key (str): The key generated by deepdiff.
337
+
338
+ Returns:
339
+ str: The extracted column name.
340
+
341
+ Example:
342
+ >>> migrator = Migrator()
343
+ >>> column_name = migrator._extract_column_name_from_deepdiff_key("root['table']['column']")
344
+ >>> print(column_name)
345
+ 'column'
346
+ """
347
+
348
+ # Split the item_key by '[' and ']' to isolate the column name
349
+ # The column name is expected to be the last element after splitting
350
+ column_name = deepdiff_key.split('[')[-1].strip("']")
351
+ return column_name
352
+
353
+ def _merge_migration_to_current_migration(self, migration: Migration):
354
+ new_forward_part = self._current_migration.forward + migration.forward
355
+ new_backward_part = self._current_migration.backward + migration.backward
356
+ self._current_migration = Migration(forward=new_forward_part, backward=new_backward_part)
357
+
358
+ def _clear_current_migration(self):
359
+ self._current_migration = Migration(forward=[], backward=[])
360
+
361
+ def _read_migration_history_json(self, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
362
+ if not file_path.endswith(".json"):
363
+ raise ValueError("Migration history file must be of '.json' type.")
364
+
365
+ if not os.path.exists(file_path):
366
+ raise FileNotFoundError(f"Migration history file '{file_path}' does not exist.")
367
+
368
+ try:
369
+ with open(file_path, "r") as file:
370
+ migration_history = json.load(file)
371
+ except json.JSONDecodeError:
372
+ migration_history = []
373
+
374
+ return migration_history
375
+
376
+ def _save_migration_to_history(self, migration: Migration, file_path: str = MIGRATION_HISTORY_DEFAULT_PATH):
377
+ try:
378
+ migration_history = self._read_migration_history_json(file_path)
379
+ except FileNotFoundError:
380
+ self._build_folder_structure_for_file_path(file_path)
381
+ migration_history = []
382
+
383
+ migration_history.append(asdict(migration))
384
+
385
+ with open(file_path, "w") as file:
386
+ json.dump(migration_history, file, indent=2)
387
+
388
+ def _build_folder_structure_for_file_path(self, file_path: str):
389
+ folder_path = os.path.dirname(file_path)
390
+ if not os.path.exists(folder_path):
391
+ print(f"Folder path to the file '{file_path}' does not exist. Creating the file and the folder structure.")
392
+ os.makedirs(folder_path)
393
+
394
+
395
+
@@ -22,6 +22,10 @@ class MysqlDb(AbstractDb):
22
22
  'Jsonable': "json"
23
23
  }
24
24
 
25
+ def __init__(self, *args, **kwargs):
26
+ super().__init__(*args, **kwargs)
27
+ self.identifier_quote = '`'
28
+
25
29
  def connect_locally(self):
26
30
  self.connection = pymysql.connect(host=self.DB_SERVER, user=self.DB_USERNAME, password=self.DB_PASSWORD,
27
31
  database=self.DB_DATABASE)
@@ -2,7 +2,7 @@ import pandas as pd
2
2
  import numpy as np
3
3
  from typing import Optional, Any
4
4
  import abc
5
-
5
+ import time
6
6
  #xlsx imports
7
7
  import pathlib
8
8
 
@@ -655,7 +655,7 @@ class MysqlTable(AbstractTable):
655
655
  """
656
656
  def get_data_types_and_character_lengths(self):
657
657
  information_schema_table = MysqlTable(self.db1, 'INFORMATION_SCHEMA.COLUMNS', ['DATA_TYPE'], ['nvarchar(50)'])
658
- query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '" + self.name + "'"
658
+ query = f"SELECT DATA_TYPE,character_maximum_length FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{self.db1.DB_DATABASE}' AND TABLE_NAME = '{self.name}'"
659
659
  types = information_schema_table.select(query)
660
660
  data_types = [x[0] for x in types]
661
661
  data_lengths = [x[1] for x in types]
@@ -725,12 +725,12 @@ class MysqlTable(AbstractTable):
725
725
  Returns the number of records in table
726
726
  """
727
727
 
728
- num_of_records = self.select(f"SELECT COUNT(*) FROM {self.name};")
728
+ num_of_records = self.select(f"SELECT COUNT(*) FROM `{self.name}`;")
729
729
 
730
730
  return num_of_records[0][0]
731
731
 
732
732
  def drop(self):
733
- query = "DROP TABLE " + self.name + ";"
733
+ query = "DROP TABLE `" + self.name + "`;"
734
734
  print(query)
735
735
  self.db1.execute(query)
736
736
 
@@ -742,9 +742,9 @@ class MysqlTable(AbstractTable):
742
742
 
743
743
  column_type_pairs = list(zip(self.columns, self.types))[1:]
744
744
  fields = ", ".join(
745
- [f"{column} {type_.upper()}" for column, type_ in column_type_pairs]
745
+ [f"`{column}` {type_.upper()}" for column, type_ in column_type_pairs]
746
746
  )
747
- query = f"CREATE TABLE {self.name} ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
747
+ query = f"CREATE TABLE `{self.name}` ({self.id_column_name} INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, {fields})"
748
748
 
749
749
  print(query)
750
750
  try:
@@ -759,14 +759,11 @@ class MysqlTable(AbstractTable):
759
759
  total_output=[]
760
760
  for k in range(len(rows)):
761
761
  if k % batch == 0:
762
- query = "INSERT INTO " + self.name + " ("
762
+ query = "INSERT INTO `" + self.name + "` ("
763
763
  for i in range(start_index, len(self.columns)):
764
764
  if i < len(rows[k]) + 1:
765
- # column name containing space needs to be wrapped in `...`, otherwise causes syntax error
766
- if " " in self.columns[i]:
767
- column_name = '`' + self.columns[i] + '`'
768
- else:
769
- column_name = self.columns[i]
765
+ # column name containing space/reserved keyword needs to be wrapped in `...`, otherwise causes syntax error
766
+ column_name = '`' + self.columns[i] + '`'
770
767
  query += column_name + ","
771
768
  if len(rows) < len(self.columns):
772
769
  print(len(self.columns) - len(rows), "columns were not specified")
@@ -846,17 +843,17 @@ class MysqlTable(AbstractTable):
846
843
  def add_foreign_key(self, foreign_key):
847
844
  parent_id = foreign_key['parent_id']
848
845
  parent = foreign_key['parent']
849
- query = "ALTER TABLE " + self.name + " MODIFY " + parent_id + " INT UNSIGNED"
846
+ query = "ALTER TABLE `" + self.name + "` MODIFY " + parent_id + " INT UNSIGNED"
850
847
  print(query)
851
848
  self.db1.execute(query)
852
- query = "ALTER TABLE " + self.name + " ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
849
+ query = "ALTER TABLE `" + self.name + "` ADD FOREIGN KEY (" + parent_id + ") REFERENCES " + parent + "("+self.id_column_name+")"
853
850
  print(query)
854
851
  self.db1.execute(query)
855
852
 
856
853
  @save_migration
857
854
  def add_column(self, column_name, column_type):
858
855
  assert len(column_name) > 1
859
- command = "ALTER TABLE " + self.name + " ADD COLUMN " + column_name + " " + column_type
856
+ command = "ALTER TABLE `" + self.name + "` ADD COLUMN `" + column_name + "` " + column_type
860
857
  try:
861
858
  self.db1.execute(command)
862
859
  self.columns.append(column_name)
@@ -867,7 +864,7 @@ class MysqlTable(AbstractTable):
867
864
  @save_migration
868
865
  def drop_column(self, column_name):
869
866
  assert len(column_name) > 1
870
- command = "ALTER TABLE " + self.name + " DROP COLUMN " + column_name
867
+ command = "ALTER TABLE `" + self.name + "` DROP COLUMN " + column_name
871
868
  try:
872
869
  print(command)
873
870
  self.db1.execute(command)
@@ -881,7 +878,7 @@ class MysqlTable(AbstractTable):
881
878
  @save_migration
882
879
  def modify_column(self, column_name, new_column_type):
883
880
  assert len(column_name) > 1
884
- command = "ALTER TABLE " + self.name + " MODIFY COLUMN " + column_name + " " + new_column_type
881
+ command = "ALTER TABLE `" + self.name + "` MODIFY COLUMN `" + column_name + "` " + new_column_type
885
882
  print(command)
886
883
  try:
887
884
  self.db1.execute(command)
@@ -894,9 +891,10 @@ class MysqlTable(AbstractTable):
894
891
  ############### XLSX ##################
895
892
 
896
893
  class XlsxTable(AbstractTable):
897
- def __init__(self, db1, name, columns=None, types=None, id_column_name = "id"):
894
+ def __init__(self, db1, name, columns=None, types=None, id_column_name = "id", number_of_retries=5):
898
895
  super().__init__(db1, name, columns, types)
899
896
  self.id_column_name = id_column_name
897
+ self.NUMBER_OF_RETRIES = number_of_retries
900
898
 
901
899
  table_filename = f"{self.name}.csv" if self.db1.is_csv else f"{self.name}.xlsx"
902
900
  self.table_directory_path: pathlib.Path = self.db1.db_directory_path / table_filename
@@ -960,23 +958,33 @@ class XlsxTable(AbstractTable):
960
958
  column for column, type_ in self.column_type_dict.items() if type_ == "datetime"
961
959
  ]
962
960
 
963
- try:
964
- if self.db1.is_csv:
965
- df = pd.read_csv(
966
- self.table_directory_path, dtype=column_type_map,
967
- parse_dates=date_columns, encoding='utf-8'
968
- )
969
- else:
970
- df = pd.read_excel(
971
- self.table_directory_path, dtype=column_type_map,
972
- parse_dates=date_columns
973
- )
974
-
975
- df.replace({np.nan: None}, inplace=True)
976
- except Exception as e:
977
- print(f"Error while reading data into XlsxTable: {e}")
978
- df = pd.DataFrame(columns=self.columns)
961
+ # BUG: If XlsxTable is being accessed by multiple threads, read operation
962
+ # might fail due to race conditions. Add retry mechanism to handle these cases.
963
+ for attempt in range(self.NUMBER_OF_RETRIES):
964
+ try:
965
+ df = self._select(column_type_map, date_columns)
966
+ except Exception:
967
+ # print(f"Error while reading data into XlsxTable: {e}")
968
+ # df = pd.DataFrame(columns=self.columns)
969
+ if attempt < self.NUMBER_OF_RETRIES:
970
+ time.sleep(0.1)
971
+ continue
972
+ else:
973
+ print(f"Failed to read data from {self.table_directory_path}, returning empty DataFrame")
974
+ df = pd.DataFrame(columns=self.columns)
975
+ return df
979
976
 
977
+ def _select(self, column_type_map, date_columns):
978
+ if self.db1.is_csv:
979
+ df = pd.read_csv(
980
+ self.table_directory_path, dtype=column_type_map, parse_dates=date_columns,
981
+ encoding='utf-8'
982
+ )
983
+ else:
984
+ df = pd.read_excel(
985
+ self.table_directory_path, dtype=column_type_map, parse_dates=date_columns
986
+ )
987
+ df.replace({np.nan: None}, inplace=True)
980
988
  return df
981
989
 
982
990
  def insert_from_df(self, df, batch=1, try_mode=False, debug_mode=False, adjust_df=False, insert_id=False):
@@ -1026,7 +1034,7 @@ class XlsxTable(AbstractTable):
1026
1034
  df.reset_index(drop=True, inplace=True)
1027
1035
 
1028
1036
  self._save_table(df)
1029
- self.last_table_inserted_into = self.name
1037
+ self.db1.last_table_inserted_into = self.name
1030
1038
 
1031
1039
  def replace_from_df(self, df):
1032
1040
  assert len(df.columns) == len(self.columns) # +1 because of id column
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dbhydra
3
- Version: 2.1.2
3
+ Version: 2.2.0
4
4
  Summary: Data science friendly ORM combining Python
5
5
  Home-page: https://github.com/DovaX/dbhydra
6
6
  Author: DovaX
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name='dbhydra',
8
- version='2.1.2',
8
+ version='2.2.0',
9
9
  author='DovaX',
10
10
  author_email='dovax.ai@gmail.com',
11
11
  description='Data science friendly ORM combining Python',
@@ -1,114 +0,0 @@
1
- import pandas as pd
2
- import math
3
- import json
4
-
5
-
6
- class Migrator:
7
- """It was somewhat tested only for MySQL and Postgres dialect"""
8
-
9
- def __init__(self, db=None):
10
- self.db = db
11
- self.migration_number = 1
12
- self.migration_list = []
13
-
14
- def process_migration_dict(self, migration_dict):
15
- matching_table_class = self.db.matching_table_class #E.g. MysqlTable
16
-
17
- assert len(migration_dict.keys()) == 1
18
- operation = list(migration_dict.keys())[0]
19
- options = migration_dict[operation]
20
- if operation == "create":
21
- table = matching_table_class(self.db, options["table_name"], options["columns"], options["types"])
22
- table.convert_types_from_mysql()
23
- table.create()
24
- elif operation == "drop":
25
- table = matching_table_class(self.db, options["table_name"])
26
- table.drop()
27
- elif operation == "add_column":
28
- table = matching_table_class(self.db, options["table_name"])
29
- table.initialize_columns()
30
- table.initialize_types()
31
- table.convert_types_from_mysql()
32
- table.add_column(options["column_name"], options["column_type"])
33
- elif operation == "modify_column":
34
- table = matching_table_class(self.db, options["table_name"])
35
- table.initialize_columns()
36
- table.initialize_types()
37
- table.convert_types_from_mysql()
38
- table.modify_column(options["column_name"], options["column_type"])
39
- elif operation == "drop_column":
40
- table = matching_table_class(self.db, options["table_name"])
41
- table.initialize_columns()
42
- table.initialize_types()
43
- table.drop_column(options["column_name"])
44
-
45
- def next_migration(self):
46
- self.migration_number += 1
47
- self.migration_list = []
48
-
49
- def migrate(self, migration_list):
50
- for i, migration_dict in enumerate(migration_list):
51
- self.process_migration_dict(migration_dict)
52
-
53
- def migrate_from_json(self, filename):
54
- with open(filename, "r") as f:
55
- rows = f.readlines()[0].replace("\n", "")
56
- result = json.loads(rows)
57
- for dict in result:
58
- self.process_migration_dict(dict)
59
- return (result)
60
-
61
- def migration_list_to_json(self, filename=None):
62
- result = json.dumps(self.migration_list)
63
-
64
- if filename is None or filename == "" or filename.isspace():
65
- with open("migrations/migration-" + str(self.migration_number) + ".json", "w+") as f:
66
- f.write(result)
67
- else:
68
- with open(f"migrations/{filename}.json", "w+") as f:
69
- f.write(result)
70
-
71
- def create_migrations_from_df(self, name, dataframe):
72
-
73
- columns, return_types = self.extract_columns_and_types_from_df(dataframe)
74
-
75
- migration_dict = {"create": {"table_name": name, "columns": columns, "types": return_types}}
76
- self.migration_list.append(migration_dict)
77
- self.migration_list_to_json()
78
- # return columns, return_types
79
-
80
- def extract_columns_and_types_from_df(self, dataframe):
81
- columns = list(dataframe.columns)
82
-
83
- return_types = []
84
-
85
- if columns == []:
86
- return ["id"], ["int"]
87
-
88
- for column in dataframe:
89
- if dataframe.empty:
90
- return_types.append(type(None).__name__)
91
- continue
92
-
93
- t = dataframe.loc[0, column]
94
- try:
95
- if pd.isna(t):
96
- return_types.append(type(None).__name__)
97
- else:
98
- try:
99
- return_types.append(type(t.item()).__name__)
100
- except:
101
- return_types.append(type(t).__name__)
102
- except:
103
- # length = 2**( int(dataframe[col].str.len().max()) - 1).bit_length()
104
- length = int(dataframe[column].str.len().max())
105
- length += 0.1 * length
106
- length = int(math.ceil(length / 10.0)) * 10
107
- return_types.append(f'nvarchar({length})' if type(t).__name__ == 'str' else type(t).__name__)
108
-
109
- if (columns[0] != "id"):
110
- columns.insert(0, "id")
111
- return_types.insert(0, "int")
112
-
113
- return columns, return_types
114
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes