power-grid-model-io 1.2.89__py3-none-any.whl → 1.2.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of power-grid-model-io might be problematic. Click here for more details.

@@ -167,14 +167,16 @@ class ExcelFileStore(BaseDataStore[TabularData]):
167
167
  if to_rename:
168
168
  columns = data.columns.values.copy()
169
169
  for col_idx, new_name in to_rename.items():
170
+ new_name = new_name[0] if isinstance(new_name, tuple) else new_name
171
+ full_new_name = (new_name, columns[col_idx][1])
170
172
  self._log.warning(
171
173
  "Column is renamed",
172
174
  sheet_name=sheet_name,
173
175
  col_name=columns[col_idx],
174
- new_name=new_name,
176
+ new_name=full_new_name,
175
177
  col_idx=col_idx,
176
178
  )
177
- columns[col_idx] = new_name
179
+ columns[col_idx] = full_new_name
178
180
 
179
181
  if data.columns.nlevels == 1:
180
182
  data.columns = pd.Index(columns)
@@ -218,7 +220,7 @@ class ExcelFileStore(BaseDataStore[TabularData]):
218
220
  if isinstance(col_name, tuple):
219
221
  to_rename[dup_idx] = (f"{col_name[0]}_{counter}",) + col_name[1:]
220
222
  else:
221
- to_rename[dup_idx] = f"{col_name[0]}_{counter}"
223
+ to_rename[dup_idx] = f"{col_name}_{counter}"
222
224
 
223
225
  return to_rename
224
226
 
@@ -253,7 +255,7 @@ class ExcelFileStore(BaseDataStore[TabularData]):
253
255
  grouped: Dict[Union[str, Tuple[str, ...]], Set[int]] = {}
254
256
  columns = data.columns.values
255
257
  for col_idx, col_name in enumerate(columns):
256
- if col_name not in grouped:
257
- grouped[col_name] = set()
258
- grouped[col_name].add(col_idx)
258
+ if col_name[0] not in grouped:
259
+ grouped[col_name[0]] = set()
260
+ grouped[col_name[0]].add(col_idx)
259
261
  return grouped
@@ -16,7 +16,7 @@ def exclude_empty(row: pd.Series, col: str) -> bool:
16
16
  """
17
17
  filter out empty
18
18
  """
19
- if col not in row:
19
+ if col not in row:
20
20
  raise ValueError(f"The column: '{col}' cannot be found for the filter")
21
21
  result = has_value(row[col])
22
22
  if isinstance(result, pd.Series):
@@ -0,0 +1,165 @@
1
+ # SPDX-FileCopyrightText: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
2
+ #
3
+ # SPDX-License-Identifier: MPL-2.0
4
+ """
5
+ This module provides the ExcelAmbiguityChecker class, which is designed to identify and report ambiguous column names
6
+ within the sheets of an Excel (.xlsx) file. It parses the Excel file, extracts the names of columns from a specified
7
+ row across all sheets, and checks for any duplicates within those names to flag them as ambiguous.
8
+
9
+ Usage:
10
+ checker = ExcelAmbiguityChecker(file_path='path/to/excel/file.xlsx', column_name_in_row=0)
11
+ has_ambiguity, ambiguous_columns = checker.check_ambiguity()
12
+ if has_ambiguity:
13
+ print("Ambiguous column names found:", ambiguous_columns)
14
+ else:
15
+ print("No ambiguous column names found.")
16
+
17
+ Requirements:
18
+ - Python 3.9 or higher (PGM library dependencies)
19
+ - xml.etree.ElementTree for parsing XML structures within the Excel file.
20
+ - zipfile to handle the Excel file as a ZIP archive for parsing.
21
+ """
22
+ import os
23
+ import xml.etree.ElementTree as ET
24
+ import zipfile
25
+ from collections import Counter
26
+ from typing import Dict, List, Optional, Tuple
27
+
28
+ XML_NAME_SPACE = {"": "http://schemas.openxmlformats.org/spreadsheetml/2006/main"} # NOSONAR
29
+ WORK_BOOK = "xl/workbook.xml"
30
+ SHARED_STR_PATH = "xl/sharedStrings.xml"
31
+ FIND_T = ".//t"
32
+ FIND_C = ".//c"
33
+ FIND_V = ".//v"
34
+ NAME = "name"
35
+ FIND_ROW = ".//row"
36
+ FIND_SHEET = ".//sheet"
37
+ FIND_TYPE = "t"
38
+ TYPE_STR = "s"
39
+
40
+
41
+ class ExcelAmbiguityChecker:
42
+ """
43
+ A class to check for ambiguous column names within the sheets of an Excel (.xlsx) file.
44
+
45
+ Attributes:
46
+ _file_path (str): The path to the Excel file to be checked.
47
+ _col_name_in_row (int): The row index (0-based) where column names are expected. Default is 0.
48
+ sheets (dict): A dictionary storing sheet names as keys and lists of column names as values.
49
+
50
+ Methods:
51
+ __init__(self, file_path, column_name_in_row=0): Initializes the ExcelAmbiguityChecker instance.
52
+ _parse_zip(self, zip_file): Parses the shared strings XML file within the Excel ZIP archive.
53
+ _get_column_names_from_row(self, row, shared_strings): Extracts column names from a specified row.
54
+ _parse_excel_file(self): Parses the Excel file to extract sheet names and their corresponding column names.
55
+ """
56
+
57
+ def __init__(self, file_path, column_name_in_row=0) -> None:
58
+ """
59
+ Initializes the ExcelAmbiguityChecker with the path to an Excel file and the row index for column names.
60
+
61
+ Parameters:
62
+ file_path (str): The path to the Excel file.
63
+ column_name_in_row (int): The row index (0-based) where column names are expected. Default is 0.
64
+ """
65
+ self._valid_file = file_path.endswith(".xlsx") and os.path.exists(file_path)
66
+ if self._valid_file:
67
+ self._file_path = file_path
68
+ self._col_name_in_row = column_name_in_row
69
+ self.sheets: Dict[str, List[str]] = {}
70
+ self._parse_excel_file()
71
+
72
+ def _parse_zip(self, zip_file) -> List[Optional[str]]:
73
+ """
74
+ Parses the shared strings XML file within the Excel ZIP archive to extract all shared strings.
75
+
76
+ Parameters:
77
+ zip_file (zipfile.ZipFile): The opened Excel ZIP file.
78
+
79
+ Returns:
80
+ list: A list of shared strings used in the Excel file.
81
+ """
82
+ shared_strings_path = SHARED_STR_PATH
83
+ shared_strings = []
84
+ with zip_file.open(shared_strings_path) as f:
85
+ tree = ET.parse(f)
86
+ for si in tree.findall(FIND_T, namespaces=XML_NAME_SPACE):
87
+ shared_strings.append(si.text)
88
+ return shared_strings
89
+
90
+ def _get_column_names_from_row(self, row, shared_strings) -> List[Optional[str]]:
91
+ """
92
+ Extracts column names from a specified row using shared strings for strings stored in the shared string table.
93
+
94
+ Parameters:
95
+ row (xml.etree.ElementTree.Element): The XML element representing the row.
96
+ shared_strings (list): A list of shared strings extracted from the Excel file.
97
+
98
+ Returns:
99
+ list: A list of column names found in the row.
100
+ """
101
+ column_names = []
102
+ for c in row.findall(FIND_C, namespaces=XML_NAME_SPACE):
103
+ cell_type = c.get(FIND_TYPE)
104
+ value = c.find(FIND_V, namespaces=XML_NAME_SPACE)
105
+ if cell_type == TYPE_STR and value is not None:
106
+ column_names.append(shared_strings[int(value.text)])
107
+ elif value is not None:
108
+ column_names.append(value.text)
109
+ else:
110
+ column_names.append(None)
111
+ return column_names
112
+
113
+ def _parse_excel_file(self) -> None:
114
+ """
115
+ Parses the Excel file to extract sheet names and their corresponding column names.
116
+ """
117
+ with zipfile.ZipFile(self._file_path) as z:
118
+ shared_strings = self._parse_zip(z)
119
+ workbook_xml = z.read(WORK_BOOK)
120
+ xml_tree = ET.fromstring(workbook_xml)
121
+ sheets = xml_tree.findall(FIND_SHEET, namespaces=XML_NAME_SPACE)
122
+
123
+ for index, sheet in enumerate(sheets, start=1):
124
+ sheet_name = str(sheet.get(NAME))
125
+ sheet_file_path = f"xl/worksheets/sheet{index}.xml"
126
+
127
+ with z.open(sheet_file_path) as f:
128
+ sheet_tree = ET.parse(f)
129
+ rows = sheet_tree.findall(FIND_ROW, namespaces=XML_NAME_SPACE)
130
+ if rows:
131
+ column_names = self._get_column_names_from_row(rows[self._col_name_in_row], shared_strings)
132
+ self.sheets[sheet_name] = [name for name in column_names if name is not None]
133
+
134
+ def list_sheets(self) -> List[str]:
135
+ """
136
+ Get the list of all sheet names in the Excel file.
137
+
138
+ Returns:
139
+ List[str]: list of all sheet names
140
+ """
141
+ return list(self.sheets.keys())
142
+
143
+ def check_ambiguity(self) -> Tuple[bool, Dict[str, List[str]]]:
144
+ """
145
+ Check if there is ambiguity in column names across sheets.
146
+
147
+ Returns:
148
+ Tuple[bool, Dict[str, List[str]]]: A tuple containing a boolean indicating if any ambiguity was found,
149
+ and a dictionary with sheet names as keys and lists of ambiguous column names as values.
150
+ """
151
+ res: Dict[str, List[str]] = {}
152
+ if not self._valid_file:
153
+ return False, res
154
+ for sheet_name, column_names in self.sheets.items():
155
+ column_name_counts = Counter(column_names)
156
+ duplicates = [name for name, count in column_name_counts.items() if count > 1]
157
+ if duplicates:
158
+ res[sheet_name] = duplicates
159
+ return bool(res), res
160
+
161
+
162
+ # Example usage
163
+ if __name__ == "__main__":
164
+ excel_file_checker = ExcelAmbiguityChecker("excel_ambiguity_check_data.xlsx")
165
+ excel_file_checker.check_ambiguity()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: power-grid-model-io
3
- Version: 1.2.89
3
+ Version: 1.2.90
4
4
  Summary: Power Grid Model Input/Output
5
5
  Author-email: Contributors to the Power Grid Model project <powergridmodel@lfenergy.org>
6
6
  License: MPL-2.0
@@ -15,7 +15,7 @@ power_grid_model_io/converters/vision_excel_converter.py,sha256=aGEoOh8Y-ZK1O5oS
15
15
  power_grid_model_io/data_stores/__init__.py,sha256=qwbj1j-Aa_yRB-E3j35pEVtF3mgH8CVIXAnog5mOry0,138
16
16
  power_grid_model_io/data_stores/base_data_store.py,sha256=DJfLtRwvx_tXKnpjtBdfbMqPjWc324Eo5WeKTXjWXqc,1706
17
17
  power_grid_model_io/data_stores/csv_dir_store.py,sha256=H8ICXZRLDvp9OkbjkfHnoh4y7uNSXNepHAW6W53VsIw,1877
18
- power_grid_model_io/data_stores/excel_file_store.py,sha256=Nl5GsC4N6Nf-PpC64wUcdWBgYv2xRt47Rc8CSOaeKi4,10704
18
+ power_grid_model_io/data_stores/excel_file_store.py,sha256=uyEwCzFzIt09QY5aI7D0ZtxBTpBhb81Yw4iOF8FMTv0,10868
19
19
  power_grid_model_io/data_stores/json_file_store.py,sha256=0njL2YZn_fImNcZqnIRpHp2UtIS6WGaQQ46TjIK8tyo,3954
20
20
  power_grid_model_io/data_stores/vision_excel_file_store.py,sha256=85mZXoXaWpFHy_t-Ra0w4xCIjYK9b2Tr8cFZj3hPIbM,1059
21
21
  power_grid_model_io/data_types/__init__.py,sha256=63A_PkOsQkVd3To7Kl4FTUX7lbPG9BS9MSLzXTW6Ktk,383
@@ -23,7 +23,7 @@ power_grid_model_io/data_types/_data_types.py,sha256=9xH5vBGrRVUSlPh4HXmORtKo3LF
23
23
  power_grid_model_io/data_types/tabular_data.py,sha256=sV6S4kqCEuQiNZTOdKS7CiA2M8Ny1oGXvtFoN-xkYBg,8582
24
24
  power_grid_model_io/functions/__init__.py,sha256=pamhvKX5c_5fkVMRrUp6zhHWex2R63otRJk1Sfsw6y0,495
25
25
  power_grid_model_io/functions/_functions.py,sha256=tqwwZ0G8AeDza0IiS6CSMwKB0lV1hDo2D8e9-ARHXQM,2843
26
- power_grid_model_io/functions/filters.py,sha256=JW36XTVhE2uD0kSge1FQ_kK89WCyMZ0C7D4surgw4v8,1319
26
+ power_grid_model_io/functions/filters.py,sha256=T_Gsy1FPxMx9eQn0rxuok2hXC8sJwb2O2EZsMk_Ccrw,1311
27
27
  power_grid_model_io/functions/phase_to_phase.py,sha256=zbaDXIj8S4cLO42LjkpcQoUrEW1frzBUj1OmKu-xkTg,4459
28
28
  power_grid_model_io/mappings/__init__.py,sha256=qwbj1j-Aa_yRB-E3j35pEVtF3mgH8CVIXAnog5mOry0,138
29
29
  power_grid_model_io/mappings/field_mapping.py,sha256=YfrwKolNG06kIC1sbUYnYmxuOrbNbNo1dYtnF8rNItw,1659
@@ -35,13 +35,14 @@ power_grid_model_io/utils/__init__.py,sha256=qwbj1j-Aa_yRB-E3j35pEVtF3mgH8CVIXAn
35
35
  power_grid_model_io/utils/auto_id.py,sha256=H7F00_Lc5fIjxrb0O04esgT-LnpzhPgn_IBg3c-uGZU,4122
36
36
  power_grid_model_io/utils/dict.py,sha256=CDZM8iMWpLqfzxiyZZnvIUdWihwuczJQKHpfEGFsf2s,1014
37
37
  power_grid_model_io/utils/download.py,sha256=enEoj42ByC5j5rj_iZ1Y9we7t8Nmlwre0-9Njon-Tqg,9457
38
+ power_grid_model_io/utils/excel_ambiguity_checker.py,sha256=MqqmGKTVn_TJ0rNO0KSHauXgJW2K4RmgR-v-NJASxvE,6918
38
39
  power_grid_model_io/utils/json.py,sha256=dQDRd2Vb8pfqLU2hTuWYv2cpSIBBbFhd0LOBP21YxJI,3327
39
40
  power_grid_model_io/utils/modules.py,sha256=a4IdozSL-sOZcmIQON_aQS7-cpnCyt-3p7zs40wRFkU,928
40
41
  power_grid_model_io/utils/parsing.py,sha256=XB1QSHnslIieFJBKFXZCtiydqpOqQBiX_CXDbItXgAQ,4522
41
42
  power_grid_model_io/utils/uuid_excel_cvtr.py,sha256=H1iWhW_nluJBUJ3hK-Gc0xJjGnH5e35WrBz_fA3YXZs,7626
42
43
  power_grid_model_io/utils/zip.py,sha256=VXHX4xWPPZbhOlZUAbMDy3MgQFzK6_l7sRvGXihNUY4,3875
43
- power_grid_model_io-1.2.89.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
44
- power_grid_model_io-1.2.89.dist-info/METADATA,sha256=JMkJFUJCplvAQ2MOe81QG07AHGaaI8P49taw7QbAAnY,8038
45
- power_grid_model_io-1.2.89.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
46
- power_grid_model_io-1.2.89.dist-info/top_level.txt,sha256=7sq9VveemMm2R0RgTBa4tH8y_xF4_1hxbufmX9OjCTo,20
47
- power_grid_model_io-1.2.89.dist-info/RECORD,,
44
+ power_grid_model_io-1.2.90.dist-info/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
45
+ power_grid_model_io-1.2.90.dist-info/METADATA,sha256=9dhMaKJXZfaMqrN4F5qz3DE4pek3gl0XJRd1gWYnp68,8038
46
+ power_grid_model_io-1.2.90.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
47
+ power_grid_model_io-1.2.90.dist-info/top_level.txt,sha256=7sq9VveemMm2R0RgTBa4tH8y_xF4_1hxbufmX9OjCTo,20
48
+ power_grid_model_io-1.2.90.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.1)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5