pjdev-sqlmodel 4.6.3__tar.gz → 4.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pjdev-sqlmodel
3
- Version: 4.6.3
3
+ Version: 4.6.5
4
4
  Project-URL: Documentation, https://gitlab.purplejay.net/keystone/python
5
5
  Project-URL: Issues, https://gitlab.purplejay.net/keystone/python/issues
6
6
  Project-URL: Source, https://gitlab.purplejay.net/keystone/python
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2024-present Chris O'Neill <chris@purplejay.io>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "4.6.3"
4
+ __version__ = "4.6.5"
@@ -55,13 +55,16 @@ def get_excel_columns(file_path, header_ndx: int = 1, col_range: Optional[str] =
55
55
 
56
56
  def load_csv_data(model_type: Type[T], data_files: List[Path]) -> None:
57
57
  fields = model_type.model_fields.keys()
58
+ # Check validation alias first, then alias, and fallback to field name
58
59
  cols = [
59
- f
60
- if model_type.model_fields.get(f).alias is None
61
- else model_type.model_fields.get(f).alias
60
+ model_type.model_fields[f].validation_alias
61
+ or model_type.model_fields[f].alias
62
+ or f
62
63
  for f in fields
63
64
  if f != "row_id"
64
65
  ]
66
+
67
+ # Filter only CSV files that match the required columns
65
68
  filtered_files = [
66
69
  f
67
70
  for f in data_files
@@ -80,41 +83,28 @@ def load_csv_data(model_type: Type[T], data_files: List[Path]) -> None:
80
83
  logger.info("Loaded {} rows for {} table".format(len(data), model_type.__name__))
81
84
 
82
85
 
83
- def load_excel_data(
84
- model_type: Type[T],
85
- data_files: List[Path],
86
- header_ndx: int = 0,
87
- sheet_name: str | int = 0,
88
- col_range: Optional[str] = None,
89
- ) -> None:
86
+ def load_excel_data(model_type: Type[T], data_files: List[Path], sheet_name: str | int = 0) -> None:
90
87
  fields = model_type.model_fields.keys()
88
+ # Check validation alias first, then alias, and fallback to field name
91
89
  cols = [
92
- f
93
- if model_type.model_fields.get(f).alias is None
94
- else model_type.model_fields.get(f).alias
90
+ model_type.model_fields[f].validation_alias
91
+ or model_type.model_fields[f].alias
92
+ or f
95
93
  for f in fields
96
94
  if f != "row_id"
97
95
  ]
98
96
 
97
+ # Filter only Excel files that match the required columns
99
98
  filtered_files = [
100
99
  f
101
100
  for f in data_files
102
- if f.name.endswith(".xlsx")
103
- and len(
104
- set(cols).difference(set(get_excel_columns(f, header_ndx + 1, col_range, sheet_name=sheet_name)))
105
- )
106
- == 0
101
+ if f.name.endswith((".xls", ".xlsx"))
102
+ and len(set(cols).difference(set(get_excel_columns(f, sheet_name=sheet_name)))) == 0
107
103
  ]
108
104
 
109
105
  data: List[model_type] = []
110
-
111
- if len(filtered_files) == 0:
112
- raise Exception(
113
- f"No files found that matched the schema for {model_type.__name__}"
114
- )
115
-
116
106
  for file in filtered_files:
117
- df = __read_excel(file=file, cols=cols, header_ndx=header_ndx, sheet_name=sheet_name)
107
+ df = __read_excel(file, cols, sheet_name=sheet_name)
118
108
  data.extend(__convert_to_models(file.name, df, model_type))
119
109
 
120
110
  with session_context() as session:
@@ -251,33 +241,53 @@ def export_to_sheet(InputTable: type[BaseModel], wb: Workbook, sheet_name: str,
251
241
  ws.column_dimensions[column_letter].width = adjusted_width
252
242
 
253
243
 
254
- def __read_csv(
255
- file: Path | str,
256
- cols: Optional[List[str]] = None,
257
- data_type_map: Optional[Dict[str, Type]] = None,
258
- ) -> pd.DataFrame:
259
- return pd.read_csv(
260
- file, engine="pyarrow", usecols=cols, na_filter=False, dtype=data_type_map
244
+ def __read_csv(file: Path, cols: List[str]) -> pd.DataFrame:
245
+ df = pd.read_csv(
246
+ file,
247
+ usecols=cols,
248
+ na_filter=True,
249
+ na_values=[""],
250
+ keep_default_na=False,
261
251
  )
262
252
 
263
-
264
- def __read_excel(
265
- file: Path | str,
266
- sheet_name: str | int = 0,
267
- cols: Optional[List[str]] = None,
268
- header_ndx: int = 0,
269
- data_type_map: Optional[Dict[str, Type]] = None,
270
- ) -> pd.DataFrame:
271
- return pd.read_excel(
272
- io=file,
253
+ df = df.convert_dtypes()
254
+ for col in df.select_dtypes(include="number").columns:
255
+ s = df[col]
256
+ if s.isna().all():
257
+ continue
258
+ non_na = s.dropna()
259
+ if (non_na % 1 == 0).all():
260
+ df[col] = s.astype("Int64")
261
+ else:
262
+ df[col] = s.astype("Float64")
263
+ df = df.convert_dtypes()
264
+
265
+ return df
266
+
267
+ def __read_excel(file: Path, cols: List[str], sheet_name: str | int = 0) -> pd.DataFrame:
268
+ df = pd.read_excel(
269
+ file,
273
270
  usecols=cols,
274
- sheet_name=sheet_name,
275
- na_filter=False,
276
- header=header_ndx,
277
- engine="calamine",
278
- dtype=data_type_map,
271
+ na_filter=True,
272
+ na_values=[""],
273
+ keep_default_na=False,
274
+ sheet_name = sheet_name
279
275
  )
280
276
 
277
+ df = df.convert_dtypes()
278
+ for col in df.select_dtypes(include="number").columns:
279
+ s = df[col]
280
+ if s.isna().all():
281
+ continue
282
+ non_na = s.dropna()
283
+ if (non_na % 1 == 0).all():
284
+ df[col] = s.astype("Int64")
285
+ else:
286
+ df[col] = s.astype("Float64")
287
+ df = df.convert_dtypes()
288
+
289
+ return df
290
+
281
291
 
282
292
  def convert_to_csv(
283
293
  data: List[BaseModel],
File without changes