DataOpsHub 6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,878 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Database Handler Module
6
+
7
+ This module provides comprehensive database operations and utilities for the DataOpsHub application.
8
+ It includes functionality for database initialization, data filtering, CRUD operations, and file uploads.
9
+ """
10
+
11
+ #----------------#
12
+ # Import modules #
13
+ #----------------#
14
+
15
+ from sqlalchemy import Column, DateTime, Integer, String, create_engine, text
16
+ from sqlalchemy.exc import SQLAlchemyError, IntegrityError
17
+ from sqlalchemy.orm import declarative_base, sessionmaker
18
+ from urllib.parse import quote_plus
19
+
20
+ #------------------------#
21
+ # Import project modules #
22
+ #------------------------#
23
+
24
+ from filewise.general.introspection_utils import get_type_str, get_func_name
25
+ from filewise.json_utils.json_obj_handler import serialise_to_json, deserialise_json
26
+ from filewise.pandas_utils.pandas_obj_handler import csv2df, excel_handler, ods_handler
27
+ from paramlib.config_params import DB_ERROR_CODE_DICT, USER_INFO_JSON_PATH
28
+ from pygenutils.strings.string_handler import find_substring_index, get_obj_specs
29
+ from pygenutils.strings.text_formatters import format_string
30
+ from pygenutils.time_handling.date_and_time_utils import parse_time_string
31
+
32
+ #------------------------#
33
+ # Import project modules #
34
+ #------------------------#
35
+
36
+ from DataOpsHub.models.user import User
37
+ from DataOpsHub.constants.error_messages import (
38
+ INVALID_DATE_FORMAT_ERROR,
39
+ INVALID_DATE_RANGE_ERROR
40
+ )
41
+
42
+ #-----------------#
43
+ # Declare objects #
44
+ #-----------------#
45
+
46
+ # SQLAlchemy Base class #
47
+ Base = declarative_base()
48
+
49
+ # Model registry to store all models
50
+ MODEL_REGISTRY = {}
51
+
52
+ #------------------#
53
+ # Define functions #
54
+ #------------------#
55
+
56
+ # %% HELPERS
57
+
58
+ # Database connections #
59
+ #----------------------#
60
+
61
+ def _init_engine(config, database_type="mysql"):
62
+ """
63
+ Create a SQLAlchemy engine based on provided credentials and database type.
64
+
65
+ Parameters
66
+ ----------
67
+ config : dict
68
+ Contains credentials for accessing the database with keys:
69
+ - 'username': str, the database username.
70
+ - 'password': str, the database password.
71
+ - 'host': str, the database host address.
72
+ - 'port': str, optional, the database port.
73
+ - 'database_name': str, optional, the name of the database.
74
+
75
+ database_type : {'mysql', 'postgresql', 'sqlite'}, optional
76
+ The type of database for SQLAlchemy configuration. Default is 'mysql'.
77
+
78
+ Returns
79
+ -------
80
+ sqlalchemy.engine.base.Engine
81
+ A configured SQLAlchemy engine object for database interactions.
82
+
83
+ Note
84
+ ----
85
+ Use URL-encoded passwords with special characters (e.g., '@') using `urllib.parse.quote_plus`.
86
+ """
87
+ if database_type not in DATABASE_ALIAS_DICT:
88
+ raise ValueError(f"Unsupported database type. Choose from {list(DATABASE_ALIAS_DICT.keys())}")
89
+
90
+ password = quote_plus(config.get('password'))
91
+ db_alias = DATABASE_ALIAS_DICT[database_type]
92
+ port = config.get('port', '') # Get port if provided
93
+ host_with_port = f"{config['host']}:{port}" if port else config['host']
94
+
95
+ if database_type in ("mysql", "postgresql"):
96
+ engine = create_engine(f"{db_alias}://{config['username']}:{password}@{host_with_port}/{config.get('database_name', '')}")
97
+ else:
98
+ engine = create_engine(f"{db_alias}:///{config.get('database_name', '')}.db")
99
+
100
+ return engine
101
+
102
+ def _apply_date_range_filter(query, min_value, max_value, date_field):
103
+ """
104
+ Apply date range filter, always including both edges of the range.
105
+
106
+ Parameters
107
+ ----------
108
+ query : sqlalchemy.orm.query.Query
109
+ The current query object.
110
+ min_value : str
111
+ Start date in format 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM'.
112
+ If only date is provided, time defaults to 00:00.
113
+ max_value : str
114
+ End date in format 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM'.
115
+ If only date is provided, time defaults to 23:59.
116
+ date_field : sqlalchemy.Column
117
+ The date field to filter on.
118
+
119
+ Returns
120
+ -------
121
+ sqlalchemy.orm.query.Query
122
+ Query with date range filter applied.
123
+
124
+ Raises
125
+ ------
126
+ ValueError
127
+ If date format is incorrect.
128
+ """
129
+ try:
130
+ # Try parsing with time first
131
+ try:
132
+ min_date = parse_time_string(min_value, '%Y-%m-%d %H:%M')
133
+ except ValueError:
134
+ # If that fails, try date-only format and set time to 00:00
135
+ min_date = parse_time_string(min_value, '%Y-%m-%d')
136
+ min_date = min_date.replace(hour=0, minute=0)
137
+
138
+ try:
139
+ max_date = parse_time_string(max_value, '%Y-%m-%d %H:%M')
140
+ except ValueError:
141
+ # If that fails, try date-only format and set time to 23:59
142
+ max_date = parse_time_string(max_value, '%Y-%m-%d')
143
+ max_date = max_date.replace(hour=23, minute=59)
144
+
145
+ except ValueError:
146
+ raise ValueError(INVALID_DATE_FORMAT_ERROR)
147
+
148
+ if min_date > max_date:
149
+ raise ValueError(INVALID_DATE_RANGE_ERROR)
150
+
151
+ # Always include both edges of the range
152
+ query = query.filter(date_field >= min_date, date_field <= max_date)
153
+ return query
154
+
155
+ # Bulk Data Loading #
156
+ #-------------------#
157
+
158
+ def _load_df(df, table_name, engine, if_exists="replace", import_index=False):
159
+ """
160
+ Upload a DataFrame to an SQL database.
161
+
162
+ Parameters
163
+ ----------
164
+ df : pandas.DataFrame
165
+ DataFrame to upload.
166
+ table_name : str
167
+ Name of the SQL table.
168
+ engine : sqlalchemy.engine.Engine
169
+ Database engine for connection.
170
+ if_exists : {'replace', 'append', 'fail'}, optional
171
+ Action if the table already exists, default is 'replace'.
172
+ import_index : bool, optional
173
+ Whether to include DataFrame index, default is False.
174
+
175
+ Returns
176
+ -------
177
+ str
178
+ A message indicating successful data load to the specified
179
+ database and table name.
180
+
181
+ Raises
182
+ ------
183
+ ValueError
184
+ If the data upload fails due to incorrect database credentials
185
+ or other issues.
186
+ SQLAlchemyError
187
+ If an error occurs within SQLAlchemy during the upload process.
188
+ Exception
189
+ For any other errors, providing an intuitive error message.
190
+ """
191
+ try:
192
+ df.to_sql(table_name, con=engine, if_exists=if_exists, index=import_index)
193
+ except ValueError as ve:
194
+ print(f"ValueError: {ve}")
195
+ except SQLAlchemyError as se:
196
+ print(f"SQLAlchemyError: {se}")
197
+ except Exception as e:
198
+ err_code = find_substring_index(str(e), "[0-9]{4}",
199
+ advanced_search=True,
200
+ return_match_index=False,
201
+ return_match_str=True)
202
+ intuitive_err = DB_ERROR_CODE_DICT.get(err_code)
203
+ raise ValueError(intuitive_err)
204
+ else:
205
+ database_name = engine.url.database
206
+ data_load_format_values = [database_name, table_name]
207
+ return format_string(SUCCESS_DATA_LOAD_TEMPLATE, data_load_format_values)
208
+
209
+
210
+ # %% MAIN METHODS
211
+
212
+ # Database creation and initialization #
213
+ #-------------------------------------#
214
+
215
+ def create_database(config, database_name, database_type="mysql"):
216
+ """
217
+ Create a new database using the provided configuration and database name.
218
+
219
+ Parameters
220
+ ----------
221
+ config : dict
222
+ Contains credentials for accessing the database.
223
+ For information about keys, refer to the docstring of the '_init_engine' function.
224
+ database_name : str
225
+ The name of the database to be created.
226
+ database_type : str, optional
227
+ The type of database for SQLAlchemy configuration. Default is 'mysql'.
228
+
229
+ Returns
230
+ -------
231
+ None
232
+ """
233
+ # Create an engine for the initial connection #
234
+ engine = _init_engine(config, database_type)
235
+
236
+ # Check if the database already exists
237
+ try:
238
+ with engine.connect() as conn:
239
+ if database_type in ("mysql", "postgresql"):
240
+ result = conn.execute(text(f"SELECT 1 FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = :db_name"), {'db_name': database_name})
241
+ if result.fetchone():
242
+ return f"Database '{database_name}' already exists, skipping creation."
243
+
244
+ conn.execute(text(f"CREATE DATABASE {database_name}"))
245
+ return f"Database '{database_name}' created successfully."
246
+ else:
247
+ # For SQLite, the database is created when the engine is connected
248
+ return f"SQLite database '{database_name}.db' created successfully."
249
+ except SQLAlchemyError as e:
250
+ return f"An SQLAlchemy error occurred: {e}"
251
+ except Exception as e:
252
+ return f"An unexpected error occurred: {e}"
253
+
254
+ def init_db(config, database_type="mysql"):
255
+ """
256
+ Initialise database tables as per model classes, including the User model.
257
+
258
+ Parameters
259
+ ----------
260
+ config : dict
261
+ Database configuration credentials for connection.
262
+ For information of the keys, refer to the help docstring of the
263
+ '_init_engine' helper function.
264
+ database_type : {'mysql', 'postgresql', 'sqlite'}, optional
265
+ The type of database for SQLAlchemy configuration. Default is 'mysql'.
266
+
267
+ Returns
268
+ -------
269
+ tuple
270
+ A tuple containing:
271
+ - engine (sqlalchemy.engine.base.Engine): The database engine.
272
+ - Session (sqlalchemy.orm.session.Session): A session factory bound to the database.
273
+ """
274
+ engine = _init_engine(config, database_type)
275
+ Base.metadata.create_all(engine)
276
+ Session = sessionmaker(bind=engine)
277
+ return engine, Session
278
+
279
+ # Data filtering #
280
+ #----------------#
281
+
282
+ def filter_data(session_or_factory, request_data, filtering_fields, model_class=None):
283
+ """
284
+ Filter user data based on the provided JSON data.
285
+
286
+ Parameters
287
+ ----------
288
+ session_or_factory : sqlalchemy.orm.Session or sqlalchemy.orm.session.Session
289
+ Either a SQLAlchemy session or a session factory.
290
+ request_data : dict
291
+ JSON data containing filter criteria.
292
+ filtering_fields : list of str
293
+ List of field names to filter on.
294
+ model_class : sqlalchemy.orm.DeclarativeMeta, optional
295
+ The model class to query. If not provided, it will be determined from the request data.
296
+
297
+ Returns
298
+ -------
299
+ sqlalchemy.orm.query.Query
300
+ A query object with the applied filters.
301
+ """
302
+ if isinstance(session_or_factory, sessionmaker):
303
+ session = session_or_factory()
304
+ else:
305
+ session = session_or_factory
306
+
307
+ if model_class is None:
308
+ model_class = MODEL_REGISTRY.get(request_data.get('table_name'))
309
+
310
+ if model_class is None:
311
+ raise ValueError("No model class provided or found in registry")
312
+
313
+ query = session.query(model_class)
314
+
315
+ for field in filtering_fields:
316
+ if field in request_data:
317
+ value = request_data[field]
318
+ if value is not None:
319
+ column = getattr(model_class, field)
320
+ if field == model_class.get_date_field():
321
+ min_value = value.get('min_value')
322
+ max_value = value.get('max_value')
323
+ if min_value is not None and max_value is not None:
324
+ query = _apply_date_range_filter(query, min_value, max_value, column)
325
+ else:
326
+ query = query.filter(column == value)
327
+
328
+ return query
329
+
330
+ # CRUD operations #
331
+ #-----------------#
332
+
333
+ # User insertion #
334
+ #-#-#-#-#-#-#-#-#-
335
+
336
+ def create_user(Session, user_info_dict):
337
+ """
338
+ Insert a new user into the database.
339
+
340
+ Parameters
341
+ ----------
342
+ Session : sqlalchemy.orm.session.Session
343
+ The session factory for database interaction.
344
+ user_info_dict : dict
345
+ Dictionary containing the user's information.
346
+
347
+ Returns
348
+ -------
349
+ str
350
+ A message indicating the result of the operation.
351
+ """
352
+ session = Session()
353
+ try:
354
+ new_user = User(
355
+ email=user_info_dict['email'],
356
+ first_name=user_info_dict['first_name'],
357
+ first_surname=user_info_dict['first_surname'],
358
+ second_surname=user_info_dict['second_surname'],
359
+ password=user_info_dict['password'],
360
+ tel=user_info_dict['tel'],
361
+ age=user_info_dict['age']
362
+ )
363
+ session.add(new_user)
364
+ session.commit()
365
+ return SUCCESS_USER_INSERT
366
+ except IntegrityError as e:
367
+ session.rollback()
368
+ if "email" in str(e.orig):
369
+ return EMAIL_EXISTS_ERROR
370
+ elif "tel" in str(e.orig):
371
+ return PHONE_EXISTS_ERROR
372
+ except Exception as e:
373
+ session.rollback()
374
+ return f"Error al insertar el usuario: {e}"
375
+ finally:
376
+ session.close()
377
+
378
+ def create_user_json(json_path, data_dict):
379
+ """
380
+ Insert a new user's information into the JSON file, ensuring uniqueness for email and tel.
381
+
382
+ Parameters
383
+ ----------
384
+ json_path : str
385
+ Path to the JSON file.
386
+ data_dict : dict
387
+ Dictionary containing the new user's information.
388
+
389
+ Returns
390
+ -------
391
+ str
392
+ A message indicating the result of the operation.
393
+ """
394
+ user_info_array = deserialise_json(json_path)
395
+
396
+ # Check for uniqueness of email and tel
397
+ for user in user_info_array:
398
+ if user['email'] == data_dict['email']:
399
+ return EMAIL_EXISTS_ERROR
400
+ if user['tel'] == data_dict['tel']:
401
+ return PHONE_EXISTS_ERROR
402
+
403
+ # Add the new user data
404
+ user_info_array.append(data_dict)
405
+ serialise_to_json(user_info_array, json_path)
406
+ return SUCCESS_USER_INSERT
407
+
408
+ # User retrieval #
409
+ #-#-#-#-#-#-#-#-#
410
+
411
+ def get_all_users(Session):
412
+ """
413
+ Retrieve all user records from the database.
414
+
415
+ Parameters
416
+ ----------
417
+ Session : sqlalchemy.orm.session.Session
418
+ The session factory for database interaction.
419
+
420
+ Returns
421
+ -------
422
+ list
423
+ A list of User objects representing all users in the database.
424
+ """
425
+ session = Session()
426
+ try:
427
+ users = session.query(User).all()
428
+ return users
429
+ except Exception as e:
430
+ session.rollback()
431
+ return f"Error al recabar datos de usuarios: {e}"
432
+ finally:
433
+ session.close()
434
+
435
+ def get_all_users_json(json_path):
436
+ user_info_array = deserialise_json(json_path)
437
+ field_names = list(list(set(tuple(user.keys()) for user in user_info_array))[0])
438
+ return user_info_array, field_names
439
+
440
+ # User data filtering #
441
+ #-#-#-#-#-#-#-#-#-#-#-#
442
+
443
+ def filter_data_json(json_path, form_data, field_name_list):
444
+ """
445
+ Filter user data from a JSON file based on the provided form data.
446
+
447
+ Parameters
448
+ ----------
449
+ json_path : str
450
+ Path to the JSON file containing user data.
451
+ form_data : dict
452
+ Dictionary containing form data for filtering.
453
+ field_name_list : list
454
+ List of field names to consider for filtering.
455
+
456
+ Returns
457
+ -------
458
+ list
459
+ A list of dictionaries representing the filtered user data.
460
+ """
461
+ # Load the JSON data using the deserialise_json function
462
+ data = deserialise_json(json_path)
463
+
464
+ # Filter the data based on form inputs
465
+ filtered_data = []
466
+ for entry in data:
467
+ match = True
468
+ for field in field_name_list:
469
+ if field in form_data and form_data[field]:
470
+ if form_data[field] not in entry.get(field, ''):
471
+ match = False
472
+ break
473
+ if match:
474
+ filtered_data.append(entry)
475
+
476
+ return filtered_data
477
+
478
+ # User data modification #
479
+ #-#-#-#-#-#-#-#-#-#-#-#-#-
480
+
481
+ def update_data(Session, checked_box_ids, updatable_info_dict):
482
+ """
483
+ Update user data in the database for multiple users and return both old and new data.
484
+
485
+ Parameters
486
+ ----------
487
+ Session : sqlalchemy.orm.session.Session
488
+ The session factory for database interaction.
489
+ checked_box_ids : list
490
+ List of IDs of the users to be updated.
491
+ updatable_info_dict : dict
492
+ Dictionary containing the new user information to update.
493
+
494
+ Returns
495
+ -------
496
+ tuple
497
+ A tuple containing:
498
+ - original_users (list): List of original user data dictionaries.
499
+ - updated_users (list): List of updated user data dictionaries.
500
+ """
501
+ session = Session()
502
+ original_users = []
503
+ updated_users = []
504
+ try:
505
+ # Retrieve and filter the selected users
506
+ selected_users = session.query(User).filter(User.id.in_(checked_box_ids)).all()
507
+
508
+ for user in selected_users:
509
+ # Store original user data
510
+ original_user_data = {column.name: getattr(user, column.name) for column in User.__table__.columns}
511
+ original_users.append(original_user_data)
512
+
513
+ # Check for uniqueness of email and tel
514
+ if 'email' in updatable_info_dict:
515
+ existing_user = session.query(User).filter(User.email == updatable_info_dict['email'], User.id != user.id).first()
516
+ if existing_user:
517
+ return "El correo electrónico ya existe.", None
518
+
519
+ if 'tel' in updatable_info_dict:
520
+ existing_user = session.query(User).filter(User.tel == updatable_info_dict['tel'], User.id != user.id).first()
521
+ if existing_user:
522
+ return "El teléfono ya existe.", None
523
+
524
+ # Update the user with new values
525
+ for key, value in updatable_info_dict.items():
526
+ if value:
527
+ setattr(user, key, value)
528
+
529
+ # Store updated user data
530
+ updated_user_data = {column.name: getattr(user, column.name) for column in User.__table__.columns}
531
+ updated_users.append(updated_user_data)
532
+
533
+ session.commit()
534
+ return original_users, updated_users
535
+ except IntegrityError as e:
536
+ session.rollback()
537
+ if "email" in str(e.orig):
538
+ return "El correo electrónico ya existe.", None
539
+ elif "tel" in str(e.orig):
540
+ return "El teléfono ya existe.", None
541
+ except Exception as e:
542
+ session.rollback()
543
+ return f"Error al actualizar los datos del usuario: {e}", None
544
+ finally:
545
+ session.close()
546
+
547
+ def update_data_json(json_path, updated_info_dict, user_ids):
548
+ """
549
+ Updates the user information in the JSON file for the given user IDs.
550
+
551
+ Parameters
552
+ ----------
553
+ json_path : str
554
+ Path to the JSON file containing user information.
555
+ updated_info_dict : dict
556
+ Dictionary containing the new user information.
557
+ user_ids : list
558
+ List of user IDs to update.
559
+
560
+ Returns
561
+ -------
562
+ list
563
+ Updated list of user information dictionaries.
564
+ """
565
+ user_info_array = deserialise_json(json_path)
566
+
567
+ # Check for uniqueness of email and tel
568
+ for user in user_info_array:
569
+ if str(user['ID']) not in user_ids:
570
+ if 'email' in updated_info_dict and user['email'] == updated_info_dict['email']:
571
+ return "El correo electrónico ya existe."
572
+ if 'tel' in updated_info_dict and user['tel'] == updated_info_dict['tel']:
573
+ return "El teléfono ya existe."
574
+
575
+ # Update the selected users
576
+ selected_users_obj = [dct for dct in user_info_array if str(dct.get("ID")) in user_ids]
577
+ for user in selected_users_obj:
578
+ for key, value in updated_info_dict.items():
579
+ if value:
580
+ user[key] = value
581
+
582
+ serialise_to_json(USER_INFO_JSON_PATH, user_info_array)
583
+ return selected_users_obj, user_info_array
584
+
585
+ # User deletion #
586
+ #-#-#-#-#-#-#-#-#
587
+
588
+ def remove_data(Session, checked_box_ids):
589
+ """
590
+ Remove user data from the database based on the provided IDs.
591
+
592
+ Parameters
593
+ ----------
594
+ Session : sqlalchemy.orm.session.Session
595
+ The session factory for database interaction.
596
+ checked_box_ids : list
597
+ List of IDs of the users to be deleted.
598
+
599
+ Returns
600
+ -------
601
+ None
602
+ """
603
+ session = Session()
604
+ try:
605
+ # Use the checked_box_ids to filter and delete users
606
+ session.query(User).filter(User.id.in_(checked_box_ids)).delete(synchronize_session='fetch')
607
+ # Commit the changes to the database
608
+ session.commit()
609
+ except Exception as e:
610
+ session.rollback()
611
+ return f"Error durante la eliminación de datos: {e}"
612
+ finally:
613
+ session.close()
614
+
615
+ def remove_data_json(json_path, checked_box_ids):
616
+ """
617
+ Removes the specified users from the user info array and updates the JSON file.
618
+
619
+ Parameters
620
+ ----------
621
+ json_path : str
622
+ Path to the JSON file containing user information.
623
+ checked_box_ids : list
624
+ List of IDs of the users to be deleted.
625
+
626
+ Returns
627
+ -------
628
+ None
629
+ """
630
+ # Load the existing user data from the JSON file
631
+ user_info_array = deserialise_json(json_path)
632
+
633
+ # Identify users to delete based on the provided IDs
634
+ users_to_delete = [user for user in user_info_array if str(user['ID']) in checked_box_ids]
635
+
636
+ # Create a new list excluding the users marked for deletion
637
+ updated_info_array = [user for user in user_info_array if user not in users_to_delete]
638
+
639
+ # Save the updated user list back to the JSON file
640
+ serialise_to_json(json_path, updated_info_array)
641
+
642
+ # Operations beyond CRUD set #
643
+ #----------------------------#
644
+
645
+ def run_custom_query(query_str, engine, print_str=False):
646
+ """
647
+ Execute custom SQL queries for operations beyond
648
+ standard CRUD (Create, Read, Update, Delete) operations.
649
+
650
+ Parameters
651
+ ----------
652
+ query_str : str
653
+ The SQL query string to execute.
654
+ engine : sqlalchemy.engine.base.Engine
655
+ The SQLAlchemy engine object for the database connection.
656
+ print_str : bool, optional
657
+ If True, prints the query result to the console. Default is False.
658
+
659
+ Returns
660
+ -------
661
+ list of tuple
662
+ The result of the query as a list of tuples, or an empty list if there are no rows.
663
+ """
664
+ try:
665
+ with engine.connect() as conn:
666
+ query = text(query_str)
667
+ result = conn.execute(query)
668
+ rows = result.fetchall() if result.returns_rows else []
669
+
670
+ if print_str:
671
+ if rows:
672
+ for row in rows:
673
+ return row
674
+ else:
675
+ return "Query executed successfully, no results to display."
676
+
677
+ return rows
678
+ except SQLAlchemyError as e:
679
+ return f"An SQLAlchemy error occurred: {e}"
680
+ except Exception as e:
681
+ return f"An unexpected error occurred: {e}"
682
+
683
+ def upload_file_data(input_file_list,
684
+ config,
685
+ database_type="mysql",
686
+ if_exists="replace",
687
+ import_index=False,
688
+ separator="\t",
689
+ sheet_name=None,
690
+ header=None,
691
+ parser_engine=None,
692
+ decimal=".",
693
+ dtype_dict=None):
694
+ """
695
+ Load data from various file types into an SQL database table using Pandas DataFrames.
696
+
697
+ Parameters
698
+ ----------
699
+ input_file_list : str or list of str
700
+ Path(s) to input files (CSV, XLSX, ODS).
701
+ config : dict
702
+ Database configuration credentials (refer to `create_engine_with_config`).
703
+ database_type : {'mysql', 'postgresql', or 'sqlite'}, optional
704
+ Type of SQL database. Default is 'mysql'.
705
+ if_exists : {'replace', 'append', or 'fail'}, optional
706
+ Behaviour if the table exists in the database. Default is 'replace'.
707
+ import_index : bool, optional
708
+ Whether to import the DataFrame index into the database table.
709
+ Default is False.
710
+ separator : str, optional
711
+ Separator for CSV files. Default is "\t".
712
+ sheet_name : str, optional
713
+ Sheet name for XLSX/ODS files.
714
+ header : int or None, optional
715
+ Row number(s) to use as column names.
716
+ Default is None (take into account all sheets).
717
+ parser_engine : str, optional
718
+ Parser engine for reading the files.
719
+ decimal : str, optional
720
+ Character to recognise as decimal point. Default is ".".
721
+ dtype_dict : dict, ptional
722
+ Data type definitions for columns.
723
+
724
+ Notes
725
+ -----
726
+
727
+ Usage
728
+ ~~~~~
729
+ - The `dtype_dict` parameter defines data types for columns in the DataFrame.
730
+ - If processing multiple files, construct a list of dictionaries,
731
+ with each dictionary corresponding to a file's data types.
732
+ - It is not necessary to mention every data table column in a dictionary.
733
+ - For columns that don't require specific types, set the relevant dictionary entry
734
+ to `None` or an empty dictionary.
735
+
736
+ Data Type Options
737
+ ~~~~~~~~~~~~~~~~~
738
+ Refer to the following table for mapping Pandas data types to MySQL data types:
739
+
740
+ | Pandas Data Type | MySQL Data Type |
741
+ |------------------|---------------------|
742
+ | `int64` | `BIGINT` |
743
+ | `int32` | `INTEGER` |
744
+ | `float64` | `DOUBLE` |
745
+ | `float32` | `FLOAT` |
746
+ | `bool` | `BOOLEAN` |
747
+ | `datetime64` | `DATETIME` |
748
+ | `timedelta[ns]` | `TIME` |
749
+ | `object` | `TEXT` or `VARCHAR` |
750
+ | `category` | `VARCHAR` |
751
+
752
+ Returns
753
+ -------
754
+ None
755
+ """
756
+
757
+ # Argument adequacy controls #
758
+ ##############################
759
+
760
+ # Retrieve argument names for error handling #
761
+ function_arg_names = get_func_name()
762
+ dtype_dict_arg_pos = find_substring_index(function_arg_names, "dtype_dict")
763
+
764
+ # Convert the input file list to that if it's a string
765
+ if isinstance(input_file_list, str):
766
+ input_file_list = [input_file_list]
767
+
768
+ # Convert 'dtype_dict' argument to list if it's a single dictionary
769
+ if isinstance(dtype_dict, dict):
770
+ dtype_dict = [dtype_dict]
771
+
772
+ # Common keyword arguments #
773
+ ############################
774
+
775
+ # Common keyword argument dictionary for CSV and other file handlers
776
+ kwargs_simple = dict(
777
+ engine=parser_engine,
778
+ header=header,
779
+ decimal=decimal,
780
+ )
781
+
782
+ kwargs_complete = dict(
783
+ engine=parser_engine,
784
+ sheet_name=sheet_name,
785
+ header=header,
786
+ decimal=decimal,
787
+ )
788
+
789
+ # Operations #
790
+ ##############
791
+
792
+ # Create the database engine
793
+ engine = _init_engine(config, database_type=database_type)
794
+
795
+ # Keyword arguments for data upload
796
+ kwargs_data_upload = dict(
797
+ engine=engine,
798
+ if_exists=if_exists,
799
+ import_index=import_index,
800
+ )
801
+
802
+ # Loop through files and upload data to the table names defined by their sheet names
803
+ for file, dtype in zip(input_file_list, dtype_dict):
804
+ in_extension = get_obj_specs(file, "ext")
805
+
806
+ try:
807
+ # CSV files
808
+ if in_extension == SUPPORTED_EXTENSIONS[0]:
809
+ """
810
+ In CSV files there is no 'sheet' concept, so it is assumed
811
+ that the input data comes as is.
812
+ """
813
+ df = csv2df(file, separator, **kwargs_simple)
814
+ table_name = get_obj_specs(file, "name_noext")
815
+ if dtype:
816
+ df = df.astype(dtype)
817
+ _load_df(df, table_name, **kwargs_data_upload)
818
+
819
+ # Microsoft Excel files
820
+ elif in_extension == SUPPORTED_EXTENSIONS[1]:
821
+ item_dict = excel_handler(file, **kwargs_complete, return_type='dict')
822
+ for sheet_name, df in item_dict.items():
823
+ if dtype:
824
+ df = df.astype(dtype)
825
+ _load_df(df, sheet_name, **kwargs_data_upload)
826
+
827
+ # LibreOffice Calc files
828
+ elif in_extension == SUPPORTED_EXTENSIONS[2]:
829
+ item_dict = ods_handler(file, **kwargs_complete, return_type='dict')
830
+ for sheet_name, df in item_dict.items():
831
+ if dtype:
832
+ df = df.astype(dtype)
833
+ _load_df(df, sheet_name, **kwargs_data_upload)
834
+
835
+ except ValueError:
836
+ # Handle ValueError for unsupported data types
837
+ raise ValueError(format_string(UNSUPPORTED_DTYPE_ERROR_TEMPLATE, file))
838
+ except TypeError:
839
+ # Handle TypeError for incorrect dtype_dict type
840
+ dtype_arg_type = get_type_str(dtype_dict)
841
+ format_args_type_error = [file, function_arg_names[dtype_dict_arg_pos], dtype_arg_type]
842
+ raise TypeError(INCORRECT_ARG_TYPE_TEMPLATE, format_args_type_error)
843
+
844
+
845
+ #--------------------------#
846
+ # Parameters and constants #
847
+ #--------------------------#
848
+
849
+ # File extensions #
850
+ SUPPORTED_EXTENSIONS = ["csv", "xlsx", "ods"]
851
+
852
+ # Template strings #
853
+ #------------------#
854
+
855
+ # Informative strings #
856
+ SUCCESS_DATA_LOAD_TEMPLATE = """Data successfully loaded:
857
+ Database : {}
858
+ Table : {}
859
+ """
860
+
861
+ # Error strings #
862
+ UNSUPPORTED_DTYPE_ERROR_TEMPLATE = "File: {}\nUnsupported data type(s) provided"
863
+ INCORRECT_ARG_TYPE_TEMPLATE = "File: {}\nExpected dictionary for argument '{}', got {}"
864
+
865
+ # Database configuration #
866
+ #-----------------------#
867
+
868
+ # Database aliases #
869
+ DATABASE_ALIAS_DICT = {
870
+ "mysql": "mysql+pymysql",
871
+ "postgresql": "postgresql",
872
+ "sqlite": "sqlite"
873
+ }
874
+
875
+ # Success messages #
876
+ SUCCESS_USER_INSERT = "Usuario insertado exitosamente."
877
+ EMAIL_EXISTS_ERROR = "El correo electrónico ya existe."
878
+ PHONE_EXISTS_ERROR = "El teléfono ya existe."