lybic-guiagents 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lybic-guiagents might be problematic. Click here for more details.

Files changed (85) hide show
  1. desktop_env/__init__.py +1 -0
  2. desktop_env/actions.py +203 -0
  3. desktop_env/controllers/__init__.py +0 -0
  4. desktop_env/controllers/python.py +471 -0
  5. desktop_env/controllers/setup.py +882 -0
  6. desktop_env/desktop_env.py +509 -0
  7. desktop_env/evaluators/__init__.py +5 -0
  8. desktop_env/evaluators/getters/__init__.py +41 -0
  9. desktop_env/evaluators/getters/calc.py +15 -0
  10. desktop_env/evaluators/getters/chrome.py +1774 -0
  11. desktop_env/evaluators/getters/file.py +154 -0
  12. desktop_env/evaluators/getters/general.py +42 -0
  13. desktop_env/evaluators/getters/gimp.py +38 -0
  14. desktop_env/evaluators/getters/impress.py +126 -0
  15. desktop_env/evaluators/getters/info.py +24 -0
  16. desktop_env/evaluators/getters/misc.py +406 -0
  17. desktop_env/evaluators/getters/replay.py +20 -0
  18. desktop_env/evaluators/getters/vlc.py +86 -0
  19. desktop_env/evaluators/getters/vscode.py +35 -0
  20. desktop_env/evaluators/metrics/__init__.py +160 -0
  21. desktop_env/evaluators/metrics/basic_os.py +68 -0
  22. desktop_env/evaluators/metrics/chrome.py +493 -0
  23. desktop_env/evaluators/metrics/docs.py +1011 -0
  24. desktop_env/evaluators/metrics/general.py +665 -0
  25. desktop_env/evaluators/metrics/gimp.py +637 -0
  26. desktop_env/evaluators/metrics/libreoffice.py +28 -0
  27. desktop_env/evaluators/metrics/others.py +92 -0
  28. desktop_env/evaluators/metrics/pdf.py +31 -0
  29. desktop_env/evaluators/metrics/slides.py +957 -0
  30. desktop_env/evaluators/metrics/table.py +585 -0
  31. desktop_env/evaluators/metrics/thunderbird.py +176 -0
  32. desktop_env/evaluators/metrics/utils.py +719 -0
  33. desktop_env/evaluators/metrics/vlc.py +524 -0
  34. desktop_env/evaluators/metrics/vscode.py +283 -0
  35. desktop_env/providers/__init__.py +35 -0
  36. desktop_env/providers/aws/__init__.py +0 -0
  37. desktop_env/providers/aws/manager.py +278 -0
  38. desktop_env/providers/aws/provider.py +186 -0
  39. desktop_env/providers/aws/provider_with_proxy.py +315 -0
  40. desktop_env/providers/aws/proxy_pool.py +193 -0
  41. desktop_env/providers/azure/__init__.py +0 -0
  42. desktop_env/providers/azure/manager.py +87 -0
  43. desktop_env/providers/azure/provider.py +207 -0
  44. desktop_env/providers/base.py +97 -0
  45. desktop_env/providers/gcp/__init__.py +0 -0
  46. desktop_env/providers/gcp/manager.py +0 -0
  47. desktop_env/providers/gcp/provider.py +0 -0
  48. desktop_env/providers/virtualbox/__init__.py +0 -0
  49. desktop_env/providers/virtualbox/manager.py +463 -0
  50. desktop_env/providers/virtualbox/provider.py +124 -0
  51. desktop_env/providers/vmware/__init__.py +0 -0
  52. desktop_env/providers/vmware/manager.py +455 -0
  53. desktop_env/providers/vmware/provider.py +105 -0
  54. gui_agents/__init__.py +0 -0
  55. gui_agents/agents/Action.py +209 -0
  56. gui_agents/agents/__init__.py +0 -0
  57. gui_agents/agents/agent_s.py +832 -0
  58. gui_agents/agents/global_state.py +610 -0
  59. gui_agents/agents/grounding.py +651 -0
  60. gui_agents/agents/hardware_interface.py +129 -0
  61. gui_agents/agents/manager.py +568 -0
  62. gui_agents/agents/translator.py +132 -0
  63. gui_agents/agents/worker.py +355 -0
  64. gui_agents/cli_app.py +560 -0
  65. gui_agents/core/__init__.py +0 -0
  66. gui_agents/core/engine.py +1496 -0
  67. gui_agents/core/knowledge.py +449 -0
  68. gui_agents/core/mllm.py +555 -0
  69. gui_agents/tools/__init__.py +0 -0
  70. gui_agents/tools/tools.py +727 -0
  71. gui_agents/unit_test/__init__.py +0 -0
  72. gui_agents/unit_test/run_tests.py +65 -0
  73. gui_agents/unit_test/test_manager.py +330 -0
  74. gui_agents/unit_test/test_worker.py +269 -0
  75. gui_agents/utils/__init__.py +0 -0
  76. gui_agents/utils/analyze_display.py +301 -0
  77. gui_agents/utils/common_utils.py +263 -0
  78. gui_agents/utils/display_viewer.py +281 -0
  79. gui_agents/utils/embedding_manager.py +53 -0
  80. gui_agents/utils/image_axis_utils.py +27 -0
  81. lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
  82. lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
  83. lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
  84. lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
  85. lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,719 @@
1
+ import builtins
2
+ #import datetime
3
+ import functools
4
+ import itertools
5
+ import logging
6
+ import operator
7
+ import os
8
+ import re
9
+ import zipfile
10
+ #import pandas as pd
11
+ from typing import Any, TypeVar, Union, Iterable, Optional, Callable
12
+ from typing import Dict, List, Set, Match, Tuple, Pattern
13
+ from urllib.parse import urlparse, urlunparse
14
+
15
+ import formulas
16
+ import lxml.cssselect
17
+ import lxml.etree
18
+ import xmltodict
19
+ from lxml.etree import _Element
20
+ from openpyxl import Workbook
21
+ from openpyxl.cell.cell import Cell, MergedCell
22
+ from openpyxl.chart._chart import ChartBase
23
+ from openpyxl.formatting.formatting import ConditionalFormattingList
24
+ from openpyxl.pivot.cache import CacheSource as PivotCacheSource
25
+ from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
26
+ from openpyxl.styles.differential import DifferentialStyle
27
+ from openpyxl.utils import coordinate_to_tuple, get_column_letter
28
+ from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
29
+ from openpyxl.worksheet.dimensions import DimensionHolder
30
+ from openpyxl.worksheet.filters import AutoFilter, SortState
31
+ from openpyxl.worksheet.worksheet import Worksheet
32
+
33
+ V = TypeVar("Value")
34
+
35
+ logger = logging.getLogger("desktopenv.metrics.utils")
36
+
37
+ _xlsx_namespaces = [
38
+ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
39
+ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
40
+ ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
41
+ ]
42
+ _xlsx_ns_mapping = dict(_xlsx_namespaces)
43
+ _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
44
+ _xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
45
+ _sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
46
+ _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
47
+
48
+
49
+ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
50
+ # function load_sparklines {{{ #
51
+ """
52
+ Args:
53
+ xlsx_file (str): path to xlsx
54
+ sheet_name (str): sheet name
55
+
56
+ Returns:
57
+ List[Dict[str, str]]: sparkline definitions in form of
58
+ {
59
+ "F3": "Sheet1!C3:E3"
60
+ }
61
+ """
62
+
63
+ # read xlsx
64
+ try:
65
+ with zipfile.ZipFile(xlsx_file, "r") as z_f:
66
+ with z_f.open("xl/workbook.xml") as f:
67
+ workbook_database: _Element = lxml.etree.fromstring(f.read())
68
+ sheets: List[_Element] = _sheet_name_selector(workbook_database)
69
+ sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
70
+ with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
71
+ sheet: _Element = lxml.etree.fromstring(f.read())
72
+ sparklines: List[_Element] = _sparklines_selector(sheet)
73
+ except zipfile.BadZipFile:
74
+ return {}
75
+
76
+ sparklines_dict: Dict[str, str] = {}
77
+ for sp_l in sparklines:
78
+ sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
79
+ sparkline: Dict[str, Dict[str, str]] = xmltodict.parse(sparkline_xml
80
+ , process_namespaces=True
81
+ , namespaces=_xlsx_ns_imapping
82
+ )
83
+ sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
84
+ return sparklines_dict
85
+ # }}} function load_sparklines #
86
+
87
+
88
+ # Available Chart Properties:
89
+ # title: str
90
+ # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
91
+ # legend: "b" | "tr" | "l" | "r" | "t"
92
+ # width: number
93
+ # height: number
94
+ # type: "scatterChart" | "lineChart" | "barChart"
95
+ # direction: "bar" (hori) | "col" (vert)
96
+ # xtitle, ytitle, ztitle: str
97
+ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
98
+ # function load_charts {{{ #
99
+ """
100
+ Args:
101
+ xlsx_file (Workbook): concerned excel book
102
+ sheet_name (str): sheet name
103
+ options (Dict[str, List[str]]): dict like {"chart_props": list of str}
104
+ giving the concerned chart properties
105
+
106
+ Returns:
107
+ Dict[str, Any]: information of charts, dict like
108
+ {
109
+ <str representing data source>: {
110
+ <str as property>: anything
111
+ }
112
+ }
113
+ """
114
+
115
+ # workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
116
+ try:
117
+ worksheet: Worksheet = xlsx_file[sheet_name]
118
+ except KeyError:
119
+ return {}
120
+ charts: List[ChartBase] = worksheet._charts
121
+
122
+ chart_set: Dict[str, Any] = {}
123
+ chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
124
+ for ch in charts:
125
+ series: List[str] = []
126
+ for ser in ch.series:
127
+ if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f"):
128
+ value_str: str = ser.val.numRef.f
129
+ elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"):
130
+ value_str: str = ser.val.strRef.f
131
+ else:
132
+ value_str: str = ""
133
+ if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"):
134
+ categ_str: str = ser.cat.numRef.f
135
+ elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"):
136
+ categ_str: str = ser.cat.strRef.f
137
+ else:
138
+ categ_str: str = ""
139
+ series.append("{:},{:}".format(value_str, categ_str))
140
+ series: str = ";".join(series)
141
+
142
+ # TODO: maybe more aspects, like chart type
143
+ info: Dict[str, Any] = {}
144
+
145
+ if "title" in chart_props:
146
+ try:
147
+ info["title"] = ch.title.tx.rich.p[0].r[0].t
148
+ except:
149
+ info["title"] = None
150
+ if "legend" in chart_props:
151
+ info["legend"] = ch.legend.position if ch.legend is not None else None
152
+ if "anchor" in chart_props:
153
+ info["anchor"] = [ch.anchor.editAs
154
+ , ch.anchor._from.col, ch.anchor.to.row
155
+ , ch.anchor.to.col, ch.anchor.to.row
156
+ ]
157
+ if "width" in chart_props:
158
+ info["width"] = ch.width
159
+ if "height" in chart_props:
160
+ info["height"] = ch.height
161
+ if "type" in chart_props:
162
+ info["type"] = ch.tagname
163
+ if "direction" in chart_props:
164
+ info["direction"] = ch.barDir
165
+
166
+ if "xtitle" in chart_props:
167
+ try:
168
+ info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
169
+ except:
170
+ info["xtitle"] = None
171
+ if "ytitle" in chart_props:
172
+ try:
173
+ info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
174
+ except:
175
+ info["ytitle"] = None
176
+ if "ztitle" in chart_props:
177
+ try:
178
+ info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
179
+ except:
180
+ info["ztitle"] = None
181
+ chart_set[series] = info
182
+ logger.debug(".[%s].charts: %s", sheet_name, repr(chart_set))
183
+ return chart_set
184
+ # }}} function load_charts #
185
+
186
+
187
+ # Available Pivot Properties:
188
+ # name: str
189
+ # show_total, show_empty_row, show_empty_col, show_headers: bool
190
+ # location: str
191
+ # selection: if the concrete item selection should be checked, a list of set of tuple like (bool, index) will be returned; list will be returned instead of set if "ordered" is specified
192
+ # filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item
193
+ # col_fields: indices
194
+ # row_fields: indices
195
+ # data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props`
196
+ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
197
+ # function load_pivot_tables {{{ #
198
+ """
199
+ Args:
200
+ xlsx_file (Workbook): concerned excel book
201
+ sheet_name (str): sheet name
202
+ options (Dict[str, List[str]]): dict like {"pivot_props": list of str}
203
+ giving the concerned pivot properties
204
+
205
+ Returns:
206
+ Dict[str, Any]: information of pivot tables, dict like
207
+ {
208
+ <str representing data source>: {
209
+ <str as property>: anything
210
+ }
211
+ }
212
+ """
213
+
214
+ try:
215
+ worksheet: Worksheet = xlsx_file[sheet_name]
216
+ except KeyError:
217
+ return {}
218
+ pivots: List[PivotTableDefinition] = worksheet._pivots
219
+
220
+ pivot_set: Dict[str, Any] = {}
221
+ pivot_props: Set[str] = set(options.get("pivot_props", []))
222
+ for pvt in pivots:
223
+ raw_selection: List[List[tuple[Optional[bool], int]]] = \
224
+ [[(itm.h, itm.x) for itm in f.items if itm.x is not None] \
225
+ for f in pvt.pivotFields
226
+ ]
227
+ raw__selection: List[List[tuple[Optional[bool], int]]] = list(
228
+ itertools.dropwhile(lambda r: len(r) == 0, raw_selection))
229
+ left_bias = len(raw_selection) - len(raw__selection)
230
+ selection: List[List[tuple[Optional[bool], int]]] = list(
231
+ (itertools.dropwhile(lambda r: len(r) == 0, reversed(raw__selection))))[::-1]
232
+ right_bias = len(raw__selection) - len(selection)
233
+ cache_source: PivotCacheSource = pvt.cache.cacheSource
234
+ cell_range1: str
235
+ cell_range2: str
236
+ cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
237
+ cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
238
+ cell_range1 = (cell_range1[0], cell_range1[1] + left_bias)
239
+ cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
240
+ cell_range2 = (cell_range2[0], cell_range2[1] - right_bias)
241
+ source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2,
242
+ cache_source.worksheetSource.sheet)
243
+
244
+ info: Dict[str, Any] = {}
245
+ if "name" in pivot_props:
246
+ info["name"] = pvt.name
247
+
248
+ if "show_total" in pivot_props:
249
+ info["show_total"] = pvt.visualTotals
250
+ if "show_empty_row" in pivot_props:
251
+ info["show_empty_row"] = pvt.showEmptyRow
252
+ if "show_empty_col" in pivot_props:
253
+ info["show_empty_col"] = pvt.showEmptyCol
254
+ if "show_headers" in pivot_props:
255
+ info["show_headers"] = pvt.showHeaders
256
+
257
+ if "location" in pivot_props:
258
+ info["location"] = pvt.location
259
+ if "filter" in pivot_props or "selection" in pivot_props:
260
+ info["selection"] = selection if "ordered" in pivot_props else list(set(r) for r in selection)
261
+ if "filter" in pivot_props:
262
+ info["filter_fields"] = set(f.fld for f in pvt.pageFields)
263
+ if "col_fields" in pivot_props:
264
+ info["col_fields"] = [f.x - left_bias for f in pvt.colFields]
265
+ if "row_fields" in pivot_props:
266
+ info["row_fields"] = [f.x - left_bias for f in pvt.rowFields]
267
+ if "data_fields" in pivot_props:
268
+ info["data_fields"] = [
269
+ "{:d};{:};{:};{:}".format(f.fld - left_bias, f.name if "data_fields_name" in pivot_props else ""
270
+ , f.subtotal, f.showDataAs
271
+ ) \
272
+ for f in pvt.dataFields
273
+ ]
274
+
275
+ pivot_set[source] = info
276
+ logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
277
+ return pivot_set
278
+ # }}} function load_pivot_tables #
279
+
280
+
281
+ _shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
282
+ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)
283
+
284
+
285
+ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
286
+ # read_cell_value {{{ #
287
+ logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
288
+
289
+ # Check if file exists
290
+ if not os.path.exists(xlsx_file):
291
+ logger.error(f"Excel file not found: {xlsx_file}")
292
+ return None
293
+
294
+ try:
295
+ with zipfile.ZipFile(xlsx_file, "r") as z_f:
296
+ try:
297
+ with z_f.open("xl/sharedStrings.xml") as f:
298
+ shared_str_xml: _Element = lxml.etree.fromstring(f.read())
299
+ str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
300
+ shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
301
+ for elm in str_elements
302
+ ]
303
+ except:
304
+ #logger.exception("Read shared strings error: %s", xlsx_file)
305
+ logger.debug("Read shared strings error: %s", xlsx_file)
306
+ shared_strs: List[str] = []
307
+
308
+ with z_f.open("xl/workbook.xml") as f:
309
+ workbook_database: _Element = lxml.etree.fromstring(f.read())
310
+ sheets: List[_Element] = _sheet_name_selector(workbook_database)
311
+ sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
312
+
313
+ with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
314
+ sheet: _Element = lxml.etree.fromstring(f.read())
315
+ cells: List[_Element] = \
316
+ lxml.cssselect.CSSSelector('oo|row>oo|c[r="{:}"]'.format(coordinate)
317
+ , namespaces=_xlsx_ns_mapping
318
+ )(sheet)
319
+ if len(cells) == 0:
320
+ logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
321
+ return None
322
+ cell: _Element = cells[0]
323
+ except zipfile.BadZipFile as e:
324
+ logger.error(f"Bad zip file {xlsx_file}: {e}")
325
+ return None
326
+ except KeyError as e:
327
+ logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
328
+ return None
329
+ except Exception as e:
330
+ logger.error(f"Error reading {xlsx_file}: {e}")
331
+ return None
332
+
333
+ cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
334
+ , process_namespaces=True
335
+ , namespaces=_xlsx_ns_imapping
336
+ )
337
+ logger.debug("%s.shared_strings: %s", xlsx_file, repr(shared_strs))
338
+ logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
339
+ try:
340
+ if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
341
+ return float(cell["c"]["v"])
342
+ if cell["c"]["@t"] == "s":
343
+ return shared_strs[int(cell["c"]["v"])]
344
+ if cell["c"]["@t"] == "str":
345
+ return cell["c"]["v"]
346
+ if cell["c"]["@t"] == "inlineStr":
347
+ return cell["c"]["is"]["t"]
348
+ except (KeyError, ValueError):
349
+ return None
350
+ # }}} read_cell_value #
351
+
352
+
353
+ # Supported Styles:
354
+ # number_format
355
+ # font_name - str
356
+ # font_family - float
357
+ # font_color - in aRGB, e.g., FF000000 is black
358
+ # font_bold - bool
359
+ # font_italic - bool
360
+ # font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
361
+ # font_size - float
362
+ # fill_type - "patternFill" | "gradientFill"
363
+ # bgcolor - in aRGB, e.g., FFFF0000 is red; This property seems to be ambiguous with fgcolor in xlsx, strange
364
+ # fgcolor - in aRGB, e.g., FF00FFFF is yellow # Deprecated
365
+ # hyperlink - str
366
+ # merge - bool, if the cell is in a merged range and is not the first cell in the merged range
367
+ def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style: Optional[DifferentialStyle] = None) -> Any:
368
+ if style_name == "number_format":
369
+ return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
370
+ if cell.value is not None and cell.data_type == "n" else None
371
+ elif style_name == "font_name":
372
+ return (diff_style or cell).font.name if cell.value is not None else None
373
+ elif style_name == "font_family":
374
+ return (diff_style or cell).font.family if cell.value is not None else None
375
+ elif style_name == "font_color":
376
+ return (diff_style or cell).font.color.rgb if cell.value is not None else None
377
+ elif style_name == "font_bold":
378
+ return (diff_style or cell).font.bold if cell.value is not None else None
379
+ elif style_name == "font_italic":
380
+ return (diff_style or cell).font.italic if cell.value is not None else None
381
+ elif style_name == "font_underline":
382
+ return (diff_style or cell).font.underline if cell.value is not None else None
383
+ elif style_name == "font_size":
384
+ return (diff_style or cell).font.size if cell.value is not None else None
385
+ elif style_name == "fill_type":
386
+ try:
387
+ return (diff_style or cell).fill.tagname
388
+ except:
389
+ return None
390
+ elif style_name == "bgcolor" or style_name == "fgcolor":
391
+ try:
392
+ #return (diff_style or cell).fill.bgColor.rgb
393
+ if diff_style is not None:
394
+ return diff_style.fill.bgColor.rgb
395
+ else:
396
+ return cell.fill.fgColor.rgb
397
+ except:
398
+ return None
399
+ #elif style_name == "fgcolor":
400
+ #try:
401
+ #return (diff_style or cell).fill.fgColor.rgb
402
+ #except:
403
+ #return None
404
+ elif style_name == "hyperlink":
405
+ return cell.hyperlink or "" if cell.value is not None else None
406
+ elif style_name == "merge":
407
+ return isinstance(cell, MergedCell)
408
+ else:
409
+ raise NotImplementedError("Unsupported Style: {:}".format(style_name))
410
+
411
+
412
+ _absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
413
+ (?::
414
+ \$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
415
+ )?
416
+ """
417
+ , re.X
418
+ )
419
+
420
+
421
+ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
422
+ # function load_xlsx_styles {{{ #
423
+ """
424
+ Args:
425
+ xlsx_file (Workbook): concerned excel book
426
+ sheet_name (str): sheet name
427
+ book_name (str): book name
428
+ options (Dict[str, List[str]): dick like {"props": list of str} giving
429
+ the concerned styles
430
+
431
+ Returns:
432
+ Dict[str, List[Any]]: dict like
433
+ {
434
+ <str as cell coordinates>: list of anything indicating concerned
435
+ property values
436
+ }
437
+ """
438
+
439
+ try:
440
+ worksheet: Worksheet = xlsx_file[sheet_name]
441
+ except KeyError:
442
+ return {}
443
+
444
+ style_dict: Dict[str, List[Any]] = {}
445
+ concerned_styles: List[str] = options.get("props", [])
446
+
447
+ # Handles Cell Styles
448
+ for col in worksheet.iter_cols():
449
+ for c in col:
450
+ style_list: List[Any] = []
451
+ for st in concerned_styles:
452
+ style_list.append(_read_cell_style(st, c))
453
+ style_dict[c.coordinate] = style_list
454
+
455
+ # Handles Conditional Formatting
456
+ conditional_formattings: ConditionalFormattingList = worksheet.conditional_formatting
457
+ formula_parser = formulas.Parser()
458
+ for fmt in conditional_formattings:
459
+ for r in fmt.rules:
460
+ active_cells: List[Cell] = []
461
+ if r.type == "expression":
462
+ condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
463
+ logger.debug("Expression condition: %s", r.formula[0])
464
+
465
+ arguments: List[Any] = []
466
+ absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
467
+ for m in absolute_range_match:
468
+ logger.debug("Absolute ranges: %s", repr(m))
469
+ if m[2] is None and m[3] is None:
470
+ arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
471
+ else:
472
+ arguments.append([read_cell_value(book_name, sheet_name
473
+ , coordinate="{:}{:}".format(get_column_letter(c[1])
474
+ , c[0]
475
+ )
476
+ ) \
477
+ for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells \
478
+ ]
479
+ )
480
+ logger.debug("Absolute range arguments: %s", repr(arguments))
481
+
482
+ nb_contiguous_nothings = 0
483
+ for rge in fmt.cells:
484
+ for c in rge.cells:
485
+ cell: Cell = worksheet.cell(row=c[0], column=c[1])
486
+ cell_value = read_cell_value(book_name, sheet_name
487
+ , coordinate="{:}{:d}".format(get_column_letter(c[1])
488
+ , c[0]
489
+ )
490
+ )
491
+ if cell_value is None:
492
+ nb_contiguous_nothings += 1
493
+ if nb_contiguous_nothings>50:
494
+ break
495
+ continue
496
+ elif condition(cell_value, *arguments):
497
+ logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
498
+ active_cells.append(cell)
499
+ else:
500
+ raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
501
+
502
+ for c in active_cells:
503
+ style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
504
+
505
+ logger.debug(".[%s].styles: %s", sheet_name, repr(style_dict))
506
+ return style_dict
507
+ # }}} function load_xlsx_styles #
508
+
509
+
510
+ # Available Row Properties:
511
+ # hidden
512
+ # collapsed
513
+ # height
514
+ #
515
+ # Available Column Properties:
516
+ # width
517
+ # auto_size
518
+ # hidden
519
+ # collapsed
520
+ # min
521
+ # max
522
+ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options) \
523
+ -> Dict[Union[int, str], Dict[str, Any]]:
524
+ # function load_rows_or_cols {{{ #
525
+ """
526
+ Args:
527
+ xlsx_file (Workbook): concerned excel book
528
+ sheet_name (str): sheet name
529
+ options (Dict[str, List[str]]): dict like
530
+ {"obj": "row" | "column", "props": list of str} giving the concerned
531
+ row/column properties
532
+
533
+ Returns:
534
+ Dict[Union[int, str], Dict[str, Any]]: row/column information
535
+ """
536
+
537
+ try:
538
+ worksheet: Worksheet = xlsx_file[sheet_name]
539
+ except KeyError:
540
+ return {}
541
+ objs: DimensionHolder = getattr(worksheet, "{:}_dimensions".format(options["obj"]))
542
+
543
+ obj_set: Dict[int, Any] = {}
544
+ obj_props: Set[str] = set(options.get("props", []))
545
+ for obj_no, obj_dms in objs.items():
546
+ info_dict: Dict[str, Any] = {}
547
+ for prop in obj_props:
548
+ info_dict[prop] = getattr(obj_dms, prop)
549
+ obj_set[obj_no] = info_dict
550
+ return obj_set
551
+ # }}} function load_rows_or_cols #
552
+
553
+
554
+ def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
555
+ # function load_filters {{{ #
556
+ try:
557
+ worksheet: Worksheet = xlsx_file[sheet_name]
558
+ except KeyError:
559
+ return {}
560
+
561
+ filters: AutoFilter = worksheet.auto_filter
562
+ filter_dict: Dict[str, Any] = {}
563
+ filter_dict["ref"] = filters.ref
564
+
565
+ # filterColumn
566
+ filter_column_set: List[Dict[str, Any]] = []
567
+ for flt_clm in filters.filterColumn:
568
+ filter_column: Dict[str, Any] = {}
569
+ filter_column["col_id"] = flt_clm.colId
570
+ filter_column["hidden_button"] = flt_clm.hiddenButton
571
+ filter_column["show_button"] = flt_clm.showButton
572
+ if flt_clm.filters is not None:
573
+ filter_column["filters_blank"] = flt_clm.filters.blank
574
+ filter_column["filters"] = set(flt_clm.filters.filter)
575
+ if flt_clm.customFilters is not None:
576
+ filter_column["custom_filters_op"] = flt_clm.customFilters._and
577
+ filter_column["custom_filters"] = set((flt.operator
578
+ , flt.val
579
+ ) \
580
+ for flt in flt_clm.customFilters.customFilter
581
+ )
582
+ filter_column_set.append(filter_column)
583
+ filter_column_set = list(sorted(filter_column_set
584
+ , key=(lambda d: d["col_id"])
585
+ )
586
+ )
587
+ filter_dict["filter_column"] = filter_column_set
588
+
589
+ # sortState
590
+ sort_state: Optional[SortState] = filters.sortState
591
+ if sort_state is not None:
592
+ sort_state_dict: Dict[str, Any] = {}
593
+ sort_state_dict["sort"] = sort_state.columnSort
594
+ sort_state_dict["case"] = sort_state.caseSensitive
595
+ sort_state_dict["method"] = sort_state.sortMethod
596
+ sort_state_dict["ref"] = sort_state.ref
597
+ sort_state_dict["condition"] = list({"descending": cdt.descending
598
+ , "key": cdt.sortBy
599
+ , "ref": cdt.ref
600
+ , "custom_list": cdt.customList
601
+ , "dxf_id": cdt.dxfId
602
+ , "icon": cdt.iconSet
603
+ , "iconid": cdt.iconId
604
+ } \
605
+ for cdt in sort_state.sortCondition
606
+ )
607
+ filter_dict["sort_state"] = sort_state_dict
608
+
609
+ return filter_dict
610
+ # }}} function load_filters #
611
+
612
+
613
+ def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
614
+ return all(k in item and item[k] == val for k, val in pattern.items())
615
+
616
+
617
+ def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
618
+ return all(r in superset_candidate for r in subset_candidate)
619
+
620
+
621
+ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
622
+ """
623
+ Args:
624
+ value (V): value to match
625
+ rule (Dict[str, Union[str, V]]): rule dict like
626
+ {
627
+ "method": str
628
+ "ref": V as ref value
629
+ }
630
+
631
+ Returns:
632
+ bool
633
+ """
634
+
635
+ if rule["method"].startswith("re"): # re.FLAGs
636
+ flags: List[str] = rule["method"].split(".")[1:]
637
+ flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags)
638
+ flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0))
639
+ logger.debug("REFLAG: %s", repr(flag))
640
+
641
+ match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
642
+ return match_ is not None
643
+ if rule["method"] in {"eq", "ne"
644
+ , "le", "lt"
645
+ , "ge", "gt"
646
+ }:
647
+ return getattr(operator, rule["method"])(value, rule["ref"])
648
+ if rule["method"].startswith("approx"): # approx:THRESHOLD
649
+ threshold: float = float(rule["method"].split(":")[1])
650
+ logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value))
651
+ try:
652
+ value = float(value)
653
+ except (ValueError, TypeError):
654
+ return False
655
+ else:
656
+ return abs(value - rule["ref"]) <= threshold
657
+ if rule["method"] == "spreadsheet_range":
658
+ subset_limit = MultiCellRange(rule["ref"][0])
659
+ superset_limit = MultiCellRange(rule["ref"][1])
660
+ return _multicellrange_containsby(subset_limit, value) \
661
+ and _multicellrange_containsby(value, superset_limit)
662
+ if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
663
+ left_et = rule["method"][6]
664
+ right_et = rule["method"][7]
665
+ return getattr(operator, "l" + left_et)(rule["ref"][0], value) \
666
+ and getattr(operator, "l" + right_et)(value, rule["ref"][1])
667
+ if rule["method"] in {"str_list_eq", "str_set_eq"}:
668
+ container_type_str: str = rule["method"][4:-3]
669
+ container_type = getattr(builtins, container_type_str)
670
+
671
+ value: container_type = container_type(value.strip("\"'").split(","))
672
+ ref: container_type = container_type(rule["ref"])
673
+ return value == ref
674
+ raise NotImplementedError()
675
+
676
+
677
+ def are_lists_equal(list1, list2, comparison_func):
678
+ # First check if both lists have the same length
679
+ if len(list1) != len(list2):
680
+ return False
681
+
682
+ # Now make sure each element in one list has an equal element in the other list
683
+ for item1 in list1:
684
+ # Use the supplied function to test for an equal item
685
+ if not any(comparison_func(item1, item2) for item2 in list2):
686
+ return False
687
+
688
+ # If all items match, the lists are equal
689
+ return True
690
+
691
+
692
+ def compare_urls(url1, url2):
693
+ if url1 is None or url2 is None:
694
+ return url1 == url2
695
+
696
+ def normalize_url(url):
697
+ # Parse the URL
698
+ parsed_url = urlparse(url)
699
+
700
+ # If no scheme is present, assume 'http'
701
+ scheme = parsed_url.scheme if parsed_url.scheme else 'http'
702
+
703
+ # Lowercase the scheme and netloc, remove 'www.', and handle trailing slash
704
+ normalized_netloc = parsed_url.netloc.lower().replace("www.", "")
705
+ normalized_path = parsed_url.path if parsed_url.path != '/' else ''
706
+
707
+ # Reassemble the URL with normalized components
708
+ normalized_parsed_url = parsed_url._replace(scheme=scheme.lower(), netloc=normalized_netloc,
709
+ path=normalized_path)
710
+ normalized_url = urlunparse(normalized_parsed_url)
711
+
712
+ return normalized_url
713
+
714
+ # Normalize both URLs for comparison
715
+ norm_url1 = normalize_url(url1)
716
+ norm_url2 = normalize_url(url2)
717
+
718
+ # Compare the normalized URLs
719
+ return norm_url1 == norm_url2