lybic-guiagents 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lybic-guiagents might be problematic. Click here for more details.
- desktop_env/__init__.py +1 -0
- desktop_env/actions.py +203 -0
- desktop_env/controllers/__init__.py +0 -0
- desktop_env/controllers/python.py +471 -0
- desktop_env/controllers/setup.py +882 -0
- desktop_env/desktop_env.py +509 -0
- desktop_env/evaluators/__init__.py +5 -0
- desktop_env/evaluators/getters/__init__.py +41 -0
- desktop_env/evaluators/getters/calc.py +15 -0
- desktop_env/evaluators/getters/chrome.py +1774 -0
- desktop_env/evaluators/getters/file.py +154 -0
- desktop_env/evaluators/getters/general.py +42 -0
- desktop_env/evaluators/getters/gimp.py +38 -0
- desktop_env/evaluators/getters/impress.py +126 -0
- desktop_env/evaluators/getters/info.py +24 -0
- desktop_env/evaluators/getters/misc.py +406 -0
- desktop_env/evaluators/getters/replay.py +20 -0
- desktop_env/evaluators/getters/vlc.py +86 -0
- desktop_env/evaluators/getters/vscode.py +35 -0
- desktop_env/evaluators/metrics/__init__.py +160 -0
- desktop_env/evaluators/metrics/basic_os.py +68 -0
- desktop_env/evaluators/metrics/chrome.py +493 -0
- desktop_env/evaluators/metrics/docs.py +1011 -0
- desktop_env/evaluators/metrics/general.py +665 -0
- desktop_env/evaluators/metrics/gimp.py +637 -0
- desktop_env/evaluators/metrics/libreoffice.py +28 -0
- desktop_env/evaluators/metrics/others.py +92 -0
- desktop_env/evaluators/metrics/pdf.py +31 -0
- desktop_env/evaluators/metrics/slides.py +957 -0
- desktop_env/evaluators/metrics/table.py +585 -0
- desktop_env/evaluators/metrics/thunderbird.py +176 -0
- desktop_env/evaluators/metrics/utils.py +719 -0
- desktop_env/evaluators/metrics/vlc.py +524 -0
- desktop_env/evaluators/metrics/vscode.py +283 -0
- desktop_env/providers/__init__.py +35 -0
- desktop_env/providers/aws/__init__.py +0 -0
- desktop_env/providers/aws/manager.py +278 -0
- desktop_env/providers/aws/provider.py +186 -0
- desktop_env/providers/aws/provider_with_proxy.py +315 -0
- desktop_env/providers/aws/proxy_pool.py +193 -0
- desktop_env/providers/azure/__init__.py +0 -0
- desktop_env/providers/azure/manager.py +87 -0
- desktop_env/providers/azure/provider.py +207 -0
- desktop_env/providers/base.py +97 -0
- desktop_env/providers/gcp/__init__.py +0 -0
- desktop_env/providers/gcp/manager.py +0 -0
- desktop_env/providers/gcp/provider.py +0 -0
- desktop_env/providers/virtualbox/__init__.py +0 -0
- desktop_env/providers/virtualbox/manager.py +463 -0
- desktop_env/providers/virtualbox/provider.py +124 -0
- desktop_env/providers/vmware/__init__.py +0 -0
- desktop_env/providers/vmware/manager.py +455 -0
- desktop_env/providers/vmware/provider.py +105 -0
- gui_agents/__init__.py +0 -0
- gui_agents/agents/Action.py +209 -0
- gui_agents/agents/__init__.py +0 -0
- gui_agents/agents/agent_s.py +832 -0
- gui_agents/agents/global_state.py +610 -0
- gui_agents/agents/grounding.py +651 -0
- gui_agents/agents/hardware_interface.py +129 -0
- gui_agents/agents/manager.py +568 -0
- gui_agents/agents/translator.py +132 -0
- gui_agents/agents/worker.py +355 -0
- gui_agents/cli_app.py +560 -0
- gui_agents/core/__init__.py +0 -0
- gui_agents/core/engine.py +1496 -0
- gui_agents/core/knowledge.py +449 -0
- gui_agents/core/mllm.py +555 -0
- gui_agents/tools/__init__.py +0 -0
- gui_agents/tools/tools.py +727 -0
- gui_agents/unit_test/__init__.py +0 -0
- gui_agents/unit_test/run_tests.py +65 -0
- gui_agents/unit_test/test_manager.py +330 -0
- gui_agents/unit_test/test_worker.py +269 -0
- gui_agents/utils/__init__.py +0 -0
- gui_agents/utils/analyze_display.py +301 -0
- gui_agents/utils/common_utils.py +263 -0
- gui_agents/utils/display_viewer.py +281 -0
- gui_agents/utils/embedding_manager.py +53 -0
- gui_agents/utils/image_axis_utils.py +27 -0
- lybic_guiagents-0.1.0.dist-info/METADATA +416 -0
- lybic_guiagents-0.1.0.dist-info/RECORD +85 -0
- lybic_guiagents-0.1.0.dist-info/WHEEL +5 -0
- lybic_guiagents-0.1.0.dist-info/licenses/LICENSE +201 -0
- lybic_guiagents-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,719 @@
|
|
|
1
|
+
import builtins
|
|
2
|
+
#import datetime
|
|
3
|
+
import functools
|
|
4
|
+
import itertools
|
|
5
|
+
import logging
|
|
6
|
+
import operator
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import zipfile
|
|
10
|
+
#import pandas as pd
|
|
11
|
+
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
|
12
|
+
from typing import Dict, List, Set, Match, Tuple, Pattern
|
|
13
|
+
from urllib.parse import urlparse, urlunparse
|
|
14
|
+
|
|
15
|
+
import formulas
|
|
16
|
+
import lxml.cssselect
|
|
17
|
+
import lxml.etree
|
|
18
|
+
import xmltodict
|
|
19
|
+
from lxml.etree import _Element
|
|
20
|
+
from openpyxl import Workbook
|
|
21
|
+
from openpyxl.cell.cell import Cell, MergedCell
|
|
22
|
+
from openpyxl.chart._chart import ChartBase
|
|
23
|
+
from openpyxl.formatting.formatting import ConditionalFormattingList
|
|
24
|
+
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
|
|
25
|
+
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
|
|
26
|
+
from openpyxl.styles.differential import DifferentialStyle
|
|
27
|
+
from openpyxl.utils import coordinate_to_tuple, get_column_letter
|
|
28
|
+
from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
|
|
29
|
+
from openpyxl.worksheet.dimensions import DimensionHolder
|
|
30
|
+
from openpyxl.worksheet.filters import AutoFilter, SortState
|
|
31
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
32
|
+
|
|
33
|
+
V = TypeVar("Value")
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger("desktopenv.metrics.utils")
|
|
36
|
+
|
|
37
|
+
_xlsx_namespaces = [
|
|
38
|
+
("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
|
|
39
|
+
("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
|
|
40
|
+
("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
|
41
|
+
]
|
|
42
|
+
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
|
43
|
+
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
|
44
|
+
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
|
45
|
+
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
|
|
46
|
+
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
|
|
50
|
+
# function load_sparklines {{{ #
|
|
51
|
+
"""
|
|
52
|
+
Args:
|
|
53
|
+
xlsx_file (str): path to xlsx
|
|
54
|
+
sheet_name (str): sheet name
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
List[Dict[str, str]]: sparkline definitions in form of
|
|
58
|
+
{
|
|
59
|
+
"F3": "Sheet1!C3:E3"
|
|
60
|
+
}
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
# read xlsx
|
|
64
|
+
try:
|
|
65
|
+
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
|
66
|
+
with z_f.open("xl/workbook.xml") as f:
|
|
67
|
+
workbook_database: _Element = lxml.etree.fromstring(f.read())
|
|
68
|
+
sheets: List[_Element] = _sheet_name_selector(workbook_database)
|
|
69
|
+
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
|
|
70
|
+
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
|
|
71
|
+
sheet: _Element = lxml.etree.fromstring(f.read())
|
|
72
|
+
sparklines: List[_Element] = _sparklines_selector(sheet)
|
|
73
|
+
except zipfile.BadZipFile:
|
|
74
|
+
return {}
|
|
75
|
+
|
|
76
|
+
sparklines_dict: Dict[str, str] = {}
|
|
77
|
+
for sp_l in sparklines:
|
|
78
|
+
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
|
|
79
|
+
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse(sparkline_xml
|
|
80
|
+
, process_namespaces=True
|
|
81
|
+
, namespaces=_xlsx_ns_imapping
|
|
82
|
+
)
|
|
83
|
+
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
|
|
84
|
+
return sparklines_dict
|
|
85
|
+
# }}} function load_sparklines #
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Available Chart Properties:
|
|
89
|
+
# title: str
|
|
90
|
+
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
|
|
91
|
+
# legend: "b" | "tr" | "l" | "r" | "t"
|
|
92
|
+
# width: number
|
|
93
|
+
# height: number
|
|
94
|
+
# type: "scatterChart" | "lineChart" | "barChart"
|
|
95
|
+
# direction: "bar" (hori) | "col" (vert)
|
|
96
|
+
# xtitle, ytitle, ztitle: str
|
|
97
|
+
def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
|
|
98
|
+
# function load_charts {{{ #
|
|
99
|
+
"""
|
|
100
|
+
Args:
|
|
101
|
+
xlsx_file (Workbook): concerned excel book
|
|
102
|
+
sheet_name (str): sheet name
|
|
103
|
+
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
|
|
104
|
+
giving the concerned chart properties
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Dict[str, Any]: information of charts, dict like
|
|
108
|
+
{
|
|
109
|
+
<str representing data source>: {
|
|
110
|
+
<str as property>: anything
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
|
116
|
+
try:
|
|
117
|
+
worksheet: Worksheet = xlsx_file[sheet_name]
|
|
118
|
+
except KeyError:
|
|
119
|
+
return {}
|
|
120
|
+
charts: List[ChartBase] = worksheet._charts
|
|
121
|
+
|
|
122
|
+
chart_set: Dict[str, Any] = {}
|
|
123
|
+
chart_props: Set[str] = set(options["chart_props"]) if "chart_props" in options else set()
|
|
124
|
+
for ch in charts:
|
|
125
|
+
series: List[str] = []
|
|
126
|
+
for ser in ch.series:
|
|
127
|
+
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f"):
|
|
128
|
+
value_str: str = ser.val.numRef.f
|
|
129
|
+
elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"):
|
|
130
|
+
value_str: str = ser.val.strRef.f
|
|
131
|
+
else:
|
|
132
|
+
value_str: str = ""
|
|
133
|
+
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"):
|
|
134
|
+
categ_str: str = ser.cat.numRef.f
|
|
135
|
+
elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"):
|
|
136
|
+
categ_str: str = ser.cat.strRef.f
|
|
137
|
+
else:
|
|
138
|
+
categ_str: str = ""
|
|
139
|
+
series.append("{:},{:}".format(value_str, categ_str))
|
|
140
|
+
series: str = ";".join(series)
|
|
141
|
+
|
|
142
|
+
# TODO: maybe more aspects, like chart type
|
|
143
|
+
info: Dict[str, Any] = {}
|
|
144
|
+
|
|
145
|
+
if "title" in chart_props:
|
|
146
|
+
try:
|
|
147
|
+
info["title"] = ch.title.tx.rich.p[0].r[0].t
|
|
148
|
+
except:
|
|
149
|
+
info["title"] = None
|
|
150
|
+
if "legend" in chart_props:
|
|
151
|
+
info["legend"] = ch.legend.position if ch.legend is not None else None
|
|
152
|
+
if "anchor" in chart_props:
|
|
153
|
+
info["anchor"] = [ch.anchor.editAs
|
|
154
|
+
, ch.anchor._from.col, ch.anchor.to.row
|
|
155
|
+
, ch.anchor.to.col, ch.anchor.to.row
|
|
156
|
+
]
|
|
157
|
+
if "width" in chart_props:
|
|
158
|
+
info["width"] = ch.width
|
|
159
|
+
if "height" in chart_props:
|
|
160
|
+
info["height"] = ch.height
|
|
161
|
+
if "type" in chart_props:
|
|
162
|
+
info["type"] = ch.tagname
|
|
163
|
+
if "direction" in chart_props:
|
|
164
|
+
info["direction"] = ch.barDir
|
|
165
|
+
|
|
166
|
+
if "xtitle" in chart_props:
|
|
167
|
+
try:
|
|
168
|
+
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
|
|
169
|
+
except:
|
|
170
|
+
info["xtitle"] = None
|
|
171
|
+
if "ytitle" in chart_props:
|
|
172
|
+
try:
|
|
173
|
+
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
|
|
174
|
+
except:
|
|
175
|
+
info["ytitle"] = None
|
|
176
|
+
if "ztitle" in chart_props:
|
|
177
|
+
try:
|
|
178
|
+
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
|
|
179
|
+
except:
|
|
180
|
+
info["ztitle"] = None
|
|
181
|
+
chart_set[series] = info
|
|
182
|
+
logger.debug(".[%s].charts: %s", sheet_name, repr(chart_set))
|
|
183
|
+
return chart_set
|
|
184
|
+
# }}} function load_charts #
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# Available Pivot Properties:
|
|
188
|
+
# name: str
|
|
189
|
+
# show_total, show_empty_row, show_empty_col, show_headers: bool
|
|
190
|
+
# location: str
|
|
191
|
+
# selection: if the concrete item selection should be checked, a list of set of tuple like (bool, index) will be returned; list will be returned instead of set if "ordered" is specified
|
|
192
|
+
# filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item
|
|
193
|
+
# col_fields: indices
|
|
194
|
+
# row_fields: indices
|
|
195
|
+
# data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props`
|
|
196
|
+
def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
|
|
197
|
+
# function load_pivot_tables {{{ #
|
|
198
|
+
"""
|
|
199
|
+
Args:
|
|
200
|
+
xlsx_file (Workbook): concerned excel book
|
|
201
|
+
sheet_name (str): sheet name
|
|
202
|
+
options (Dict[str, List[str]]): dict like {"pivot_props": list of str}
|
|
203
|
+
giving the concerned pivot properties
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Dict[str, Any]: information of pivot tables, dict like
|
|
207
|
+
{
|
|
208
|
+
<str representing data source>: {
|
|
209
|
+
<str as property>: anything
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
worksheet: Worksheet = xlsx_file[sheet_name]
|
|
216
|
+
except KeyError:
|
|
217
|
+
return {}
|
|
218
|
+
pivots: List[PivotTableDefinition] = worksheet._pivots
|
|
219
|
+
|
|
220
|
+
pivot_set: Dict[str, Any] = {}
|
|
221
|
+
pivot_props: Set[str] = set(options.get("pivot_props", []))
|
|
222
|
+
for pvt in pivots:
|
|
223
|
+
raw_selection: List[List[tuple[Optional[bool], int]]] = \
|
|
224
|
+
[[(itm.h, itm.x) for itm in f.items if itm.x is not None] \
|
|
225
|
+
for f in pvt.pivotFields
|
|
226
|
+
]
|
|
227
|
+
raw__selection: List[List[tuple[Optional[bool], int]]] = list(
|
|
228
|
+
itertools.dropwhile(lambda r: len(r) == 0, raw_selection))
|
|
229
|
+
left_bias = len(raw_selection) - len(raw__selection)
|
|
230
|
+
selection: List[List[tuple[Optional[bool], int]]] = list(
|
|
231
|
+
(itertools.dropwhile(lambda r: len(r) == 0, reversed(raw__selection))))[::-1]
|
|
232
|
+
right_bias = len(raw__selection) - len(selection)
|
|
233
|
+
cache_source: PivotCacheSource = pvt.cache.cacheSource
|
|
234
|
+
cell_range1: str
|
|
235
|
+
cell_range2: str
|
|
236
|
+
cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
|
|
237
|
+
cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
|
|
238
|
+
cell_range1 = (cell_range1[0], cell_range1[1] + left_bias)
|
|
239
|
+
cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
|
|
240
|
+
cell_range2 = (cell_range2[0], cell_range2[1] - right_bias)
|
|
241
|
+
source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2,
|
|
242
|
+
cache_source.worksheetSource.sheet)
|
|
243
|
+
|
|
244
|
+
info: Dict[str, Any] = {}
|
|
245
|
+
if "name" in pivot_props:
|
|
246
|
+
info["name"] = pvt.name
|
|
247
|
+
|
|
248
|
+
if "show_total" in pivot_props:
|
|
249
|
+
info["show_total"] = pvt.visualTotals
|
|
250
|
+
if "show_empty_row" in pivot_props:
|
|
251
|
+
info["show_empty_row"] = pvt.showEmptyRow
|
|
252
|
+
if "show_empty_col" in pivot_props:
|
|
253
|
+
info["show_empty_col"] = pvt.showEmptyCol
|
|
254
|
+
if "show_headers" in pivot_props:
|
|
255
|
+
info["show_headers"] = pvt.showHeaders
|
|
256
|
+
|
|
257
|
+
if "location" in pivot_props:
|
|
258
|
+
info["location"] = pvt.location
|
|
259
|
+
if "filter" in pivot_props or "selection" in pivot_props:
|
|
260
|
+
info["selection"] = selection if "ordered" in pivot_props else list(set(r) for r in selection)
|
|
261
|
+
if "filter" in pivot_props:
|
|
262
|
+
info["filter_fields"] = set(f.fld for f in pvt.pageFields)
|
|
263
|
+
if "col_fields" in pivot_props:
|
|
264
|
+
info["col_fields"] = [f.x - left_bias for f in pvt.colFields]
|
|
265
|
+
if "row_fields" in pivot_props:
|
|
266
|
+
info["row_fields"] = [f.x - left_bias for f in pvt.rowFields]
|
|
267
|
+
if "data_fields" in pivot_props:
|
|
268
|
+
info["data_fields"] = [
|
|
269
|
+
"{:d};{:};{:};{:}".format(f.fld - left_bias, f.name if "data_fields_name" in pivot_props else ""
|
|
270
|
+
, f.subtotal, f.showDataAs
|
|
271
|
+
) \
|
|
272
|
+
for f in pvt.dataFields
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
pivot_set[source] = info
|
|
276
|
+
logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
|
|
277
|
+
return pivot_set
|
|
278
|
+
# }}} function load_pivot_tables #
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
|
|
282
|
+
_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|
286
|
+
# read_cell_value {{{ #
|
|
287
|
+
logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
|
|
288
|
+
|
|
289
|
+
# Check if file exists
|
|
290
|
+
if not os.path.exists(xlsx_file):
|
|
291
|
+
logger.error(f"Excel file not found: {xlsx_file}")
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
|
296
|
+
try:
|
|
297
|
+
with z_f.open("xl/sharedStrings.xml") as f:
|
|
298
|
+
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
|
|
299
|
+
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
|
|
300
|
+
shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
|
|
301
|
+
for elm in str_elements
|
|
302
|
+
]
|
|
303
|
+
except:
|
|
304
|
+
#logger.exception("Read shared strings error: %s", xlsx_file)
|
|
305
|
+
logger.debug("Read shared strings error: %s", xlsx_file)
|
|
306
|
+
shared_strs: List[str] = []
|
|
307
|
+
|
|
308
|
+
with z_f.open("xl/workbook.xml") as f:
|
|
309
|
+
workbook_database: _Element = lxml.etree.fromstring(f.read())
|
|
310
|
+
sheets: List[_Element] = _sheet_name_selector(workbook_database)
|
|
311
|
+
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
|
|
312
|
+
|
|
313
|
+
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
|
|
314
|
+
sheet: _Element = lxml.etree.fromstring(f.read())
|
|
315
|
+
cells: List[_Element] = \
|
|
316
|
+
lxml.cssselect.CSSSelector('oo|row>oo|c[r="{:}"]'.format(coordinate)
|
|
317
|
+
, namespaces=_xlsx_ns_mapping
|
|
318
|
+
)(sheet)
|
|
319
|
+
if len(cells) == 0:
|
|
320
|
+
logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
|
|
321
|
+
return None
|
|
322
|
+
cell: _Element = cells[0]
|
|
323
|
+
except zipfile.BadZipFile as e:
|
|
324
|
+
logger.error(f"Bad zip file {xlsx_file}: {e}")
|
|
325
|
+
return None
|
|
326
|
+
except KeyError as e:
|
|
327
|
+
logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
|
|
328
|
+
return None
|
|
329
|
+
except Exception as e:
|
|
330
|
+
logger.error(f"Error reading {xlsx_file}: {e}")
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
|
|
334
|
+
, process_namespaces=True
|
|
335
|
+
, namespaces=_xlsx_ns_imapping
|
|
336
|
+
)
|
|
337
|
+
logger.debug("%s.shared_strings: %s", xlsx_file, repr(shared_strs))
|
|
338
|
+
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
|
|
339
|
+
try:
|
|
340
|
+
if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
|
|
341
|
+
return float(cell["c"]["v"])
|
|
342
|
+
if cell["c"]["@t"] == "s":
|
|
343
|
+
return shared_strs[int(cell["c"]["v"])]
|
|
344
|
+
if cell["c"]["@t"] == "str":
|
|
345
|
+
return cell["c"]["v"]
|
|
346
|
+
if cell["c"]["@t"] == "inlineStr":
|
|
347
|
+
return cell["c"]["is"]["t"]
|
|
348
|
+
except (KeyError, ValueError):
|
|
349
|
+
return None
|
|
350
|
+
# }}} read_cell_value #
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# Supported Styles:
|
|
354
|
+
# number_format
|
|
355
|
+
# font_name - str
|
|
356
|
+
# font_family - float
|
|
357
|
+
# font_color - in aRGB, e.g., FF000000 is black
|
|
358
|
+
# font_bold - bool
|
|
359
|
+
# font_italic - bool
|
|
360
|
+
# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
|
|
361
|
+
# font_size - float
|
|
362
|
+
# fill_type - "patternFill" | "gradientFill"
|
|
363
|
+
# bgcolor - in aRGB, e.g., FFFF0000 is red; This property seems to be ambiguous with fgcolor in xlsx, strange
|
|
364
|
+
# fgcolor - in aRGB, e.g., FF00FFFF is yellow # Deprecated
|
|
365
|
+
# hyperlink - str
|
|
366
|
+
# merge - bool, if the cell is in a merged range and is not the first cell in the merged range
|
|
367
|
+
def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style: Optional[DifferentialStyle] = None) -> Any:
|
|
368
|
+
if style_name == "number_format":
|
|
369
|
+
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
|
|
370
|
+
if cell.value is not None and cell.data_type == "n" else None
|
|
371
|
+
elif style_name == "font_name":
|
|
372
|
+
return (diff_style or cell).font.name if cell.value is not None else None
|
|
373
|
+
elif style_name == "font_family":
|
|
374
|
+
return (diff_style or cell).font.family if cell.value is not None else None
|
|
375
|
+
elif style_name == "font_color":
|
|
376
|
+
return (diff_style or cell).font.color.rgb if cell.value is not None else None
|
|
377
|
+
elif style_name == "font_bold":
|
|
378
|
+
return (diff_style or cell).font.bold if cell.value is not None else None
|
|
379
|
+
elif style_name == "font_italic":
|
|
380
|
+
return (diff_style or cell).font.italic if cell.value is not None else None
|
|
381
|
+
elif style_name == "font_underline":
|
|
382
|
+
return (diff_style or cell).font.underline if cell.value is not None else None
|
|
383
|
+
elif style_name == "font_size":
|
|
384
|
+
return (diff_style or cell).font.size if cell.value is not None else None
|
|
385
|
+
elif style_name == "fill_type":
|
|
386
|
+
try:
|
|
387
|
+
return (diff_style or cell).fill.tagname
|
|
388
|
+
except:
|
|
389
|
+
return None
|
|
390
|
+
elif style_name == "bgcolor" or style_name == "fgcolor":
|
|
391
|
+
try:
|
|
392
|
+
#return (diff_style or cell).fill.bgColor.rgb
|
|
393
|
+
if diff_style is not None:
|
|
394
|
+
return diff_style.fill.bgColor.rgb
|
|
395
|
+
else:
|
|
396
|
+
return cell.fill.fgColor.rgb
|
|
397
|
+
except:
|
|
398
|
+
return None
|
|
399
|
+
#elif style_name == "fgcolor":
|
|
400
|
+
#try:
|
|
401
|
+
#return (diff_style or cell).fill.fgColor.rgb
|
|
402
|
+
#except:
|
|
403
|
+
#return None
|
|
404
|
+
elif style_name == "hyperlink":
|
|
405
|
+
return cell.hyperlink or "" if cell.value is not None else None
|
|
406
|
+
elif style_name == "merge":
|
|
407
|
+
return isinstance(cell, MergedCell)
|
|
408
|
+
else:
|
|
409
|
+
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
|
413
|
+
(?::
|
|
414
|
+
\$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
|
|
415
|
+
)?
|
|
416
|
+
"""
|
|
417
|
+
, re.X
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
|
|
422
|
+
# function load_xlsx_styles {{{ #
|
|
423
|
+
"""
|
|
424
|
+
Args:
|
|
425
|
+
xlsx_file (Workbook): concerned excel book
|
|
426
|
+
sheet_name (str): sheet name
|
|
427
|
+
book_name (str): book name
|
|
428
|
+
options (Dict[str, List[str]): dick like {"props": list of str} giving
|
|
429
|
+
the concerned styles
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
Dict[str, List[Any]]: dict like
|
|
433
|
+
{
|
|
434
|
+
<str as cell coordinates>: list of anything indicating concerned
|
|
435
|
+
property values
|
|
436
|
+
}
|
|
437
|
+
"""
|
|
438
|
+
|
|
439
|
+
try:
|
|
440
|
+
worksheet: Worksheet = xlsx_file[sheet_name]
|
|
441
|
+
except KeyError:
|
|
442
|
+
return {}
|
|
443
|
+
|
|
444
|
+
style_dict: Dict[str, List[Any]] = {}
|
|
445
|
+
concerned_styles: List[str] = options.get("props", [])
|
|
446
|
+
|
|
447
|
+
# Handles Cell Styles
|
|
448
|
+
for col in worksheet.iter_cols():
|
|
449
|
+
for c in col:
|
|
450
|
+
style_list: List[Any] = []
|
|
451
|
+
for st in concerned_styles:
|
|
452
|
+
style_list.append(_read_cell_style(st, c))
|
|
453
|
+
style_dict[c.coordinate] = style_list
|
|
454
|
+
|
|
455
|
+
# Handles Conditional Formatting
|
|
456
|
+
conditional_formattings: ConditionalFormattingList = worksheet.conditional_formatting
|
|
457
|
+
formula_parser = formulas.Parser()
|
|
458
|
+
for fmt in conditional_formattings:
|
|
459
|
+
for r in fmt.rules:
|
|
460
|
+
active_cells: List[Cell] = []
|
|
461
|
+
if r.type == "expression":
|
|
462
|
+
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
|
|
463
|
+
logger.debug("Expression condition: %s", r.formula[0])
|
|
464
|
+
|
|
465
|
+
arguments: List[Any] = []
|
|
466
|
+
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
|
|
467
|
+
for m in absolute_range_match:
|
|
468
|
+
logger.debug("Absolute ranges: %s", repr(m))
|
|
469
|
+
if m[2] is None and m[3] is None:
|
|
470
|
+
arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
|
|
471
|
+
else:
|
|
472
|
+
arguments.append([read_cell_value(book_name, sheet_name
|
|
473
|
+
, coordinate="{:}{:}".format(get_column_letter(c[1])
|
|
474
|
+
, c[0]
|
|
475
|
+
)
|
|
476
|
+
) \
|
|
477
|
+
for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells \
|
|
478
|
+
]
|
|
479
|
+
)
|
|
480
|
+
logger.debug("Absolute range arguments: %s", repr(arguments))
|
|
481
|
+
|
|
482
|
+
nb_contiguous_nothings = 0
|
|
483
|
+
for rge in fmt.cells:
|
|
484
|
+
for c in rge.cells:
|
|
485
|
+
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
|
486
|
+
cell_value = read_cell_value(book_name, sheet_name
|
|
487
|
+
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
|
488
|
+
, c[0]
|
|
489
|
+
)
|
|
490
|
+
)
|
|
491
|
+
if cell_value is None:
|
|
492
|
+
nb_contiguous_nothings += 1
|
|
493
|
+
if nb_contiguous_nothings>50:
|
|
494
|
+
break
|
|
495
|
+
continue
|
|
496
|
+
elif condition(cell_value, *arguments):
|
|
497
|
+
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
|
|
498
|
+
active_cells.append(cell)
|
|
499
|
+
else:
|
|
500
|
+
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
|
|
501
|
+
|
|
502
|
+
for c in active_cells:
|
|
503
|
+
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
|
|
504
|
+
|
|
505
|
+
logger.debug(".[%s].styles: %s", sheet_name, repr(style_dict))
|
|
506
|
+
return style_dict
|
|
507
|
+
# }}} function load_xlsx_styles #
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
# Available Row Properties:
|
|
511
|
+
# hidden
|
|
512
|
+
# collapsed
|
|
513
|
+
# height
|
|
514
|
+
#
|
|
515
|
+
# Available Column Properties:
|
|
516
|
+
# width
|
|
517
|
+
# auto_size
|
|
518
|
+
# hidden
|
|
519
|
+
# collapsed
|
|
520
|
+
# min
|
|
521
|
+
# max
|
|
522
|
+
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options) \
|
|
523
|
+
-> Dict[Union[int, str], Dict[str, Any]]:
|
|
524
|
+
# function load_rows_or_cols {{{ #
|
|
525
|
+
"""
|
|
526
|
+
Args:
|
|
527
|
+
xlsx_file (Workbook): concerned excel book
|
|
528
|
+
sheet_name (str): sheet name
|
|
529
|
+
options (Dict[str, List[str]]): dict like
|
|
530
|
+
{"obj": "row" | "column", "props": list of str} giving the concerned
|
|
531
|
+
row/column properties
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
Dict[Union[int, str], Dict[str, Any]]: row/column information
|
|
535
|
+
"""
|
|
536
|
+
|
|
537
|
+
try:
|
|
538
|
+
worksheet: Worksheet = xlsx_file[sheet_name]
|
|
539
|
+
except KeyError:
|
|
540
|
+
return {}
|
|
541
|
+
objs: DimensionHolder = getattr(worksheet, "{:}_dimensions".format(options["obj"]))
|
|
542
|
+
|
|
543
|
+
obj_set: Dict[int, Any] = {}
|
|
544
|
+
obj_props: Set[str] = set(options.get("props", []))
|
|
545
|
+
for obj_no, obj_dms in objs.items():
|
|
546
|
+
info_dict: Dict[str, Any] = {}
|
|
547
|
+
for prop in obj_props:
|
|
548
|
+
info_dict[prop] = getattr(obj_dms, prop)
|
|
549
|
+
obj_set[obj_no] = info_dict
|
|
550
|
+
return obj_set
|
|
551
|
+
# }}} function load_rows_or_cols #
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
|
|
555
|
+
# function load_filters {{{ #
|
|
556
|
+
try:
|
|
557
|
+
worksheet: Worksheet = xlsx_file[sheet_name]
|
|
558
|
+
except KeyError:
|
|
559
|
+
return {}
|
|
560
|
+
|
|
561
|
+
filters: AutoFilter = worksheet.auto_filter
|
|
562
|
+
filter_dict: Dict[str, Any] = {}
|
|
563
|
+
filter_dict["ref"] = filters.ref
|
|
564
|
+
|
|
565
|
+
# filterColumn
|
|
566
|
+
filter_column_set: List[Dict[str, Any]] = []
|
|
567
|
+
for flt_clm in filters.filterColumn:
|
|
568
|
+
filter_column: Dict[str, Any] = {}
|
|
569
|
+
filter_column["col_id"] = flt_clm.colId
|
|
570
|
+
filter_column["hidden_button"] = flt_clm.hiddenButton
|
|
571
|
+
filter_column["show_button"] = flt_clm.showButton
|
|
572
|
+
if flt_clm.filters is not None:
|
|
573
|
+
filter_column["filters_blank"] = flt_clm.filters.blank
|
|
574
|
+
filter_column["filters"] = set(flt_clm.filters.filter)
|
|
575
|
+
if flt_clm.customFilters is not None:
|
|
576
|
+
filter_column["custom_filters_op"] = flt_clm.customFilters._and
|
|
577
|
+
filter_column["custom_filters"] = set((flt.operator
|
|
578
|
+
, flt.val
|
|
579
|
+
) \
|
|
580
|
+
for flt in flt_clm.customFilters.customFilter
|
|
581
|
+
)
|
|
582
|
+
filter_column_set.append(filter_column)
|
|
583
|
+
filter_column_set = list(sorted(filter_column_set
|
|
584
|
+
, key=(lambda d: d["col_id"])
|
|
585
|
+
)
|
|
586
|
+
)
|
|
587
|
+
filter_dict["filter_column"] = filter_column_set
|
|
588
|
+
|
|
589
|
+
# sortState
|
|
590
|
+
sort_state: Optional[SortState] = filters.sortState
|
|
591
|
+
if sort_state is not None:
|
|
592
|
+
sort_state_dict: Dict[str, Any] = {}
|
|
593
|
+
sort_state_dict["sort"] = sort_state.columnSort
|
|
594
|
+
sort_state_dict["case"] = sort_state.caseSensitive
|
|
595
|
+
sort_state_dict["method"] = sort_state.sortMethod
|
|
596
|
+
sort_state_dict["ref"] = sort_state.ref
|
|
597
|
+
sort_state_dict["condition"] = list({"descending": cdt.descending
|
|
598
|
+
, "key": cdt.sortBy
|
|
599
|
+
, "ref": cdt.ref
|
|
600
|
+
, "custom_list": cdt.customList
|
|
601
|
+
, "dxf_id": cdt.dxfId
|
|
602
|
+
, "icon": cdt.iconSet
|
|
603
|
+
, "iconid": cdt.iconId
|
|
604
|
+
} \
|
|
605
|
+
for cdt in sort_state.sortCondition
|
|
606
|
+
)
|
|
607
|
+
filter_dict["sort_state"] = sort_state_dict
|
|
608
|
+
|
|
609
|
+
return filter_dict
|
|
610
|
+
# }}} function load_filters #
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
|
|
614
|
+
return all(k in item and item[k] == val for k, val in pattern.items())
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
|
|
618
|
+
return all(r in superset_candidate for r in subset_candidate)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
|
|
622
|
+
"""
|
|
623
|
+
Args:
|
|
624
|
+
value (V): value to match
|
|
625
|
+
rule (Dict[str, Union[str, V]]): rule dict like
|
|
626
|
+
{
|
|
627
|
+
"method": str
|
|
628
|
+
"ref": V as ref value
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
bool
|
|
633
|
+
"""
|
|
634
|
+
|
|
635
|
+
if rule["method"].startswith("re"): # re.FLAGs
|
|
636
|
+
flags: List[str] = rule["method"].split(".")[1:]
|
|
637
|
+
flags: Iterable[re.RegexFlag] = (getattr(re, fl) for fl in flags)
|
|
638
|
+
flag: re.RegexFlag = functools.reduce(operator.or_, flags, re.RegexFlag(0))
|
|
639
|
+
logger.debug("REFLAG: %s", repr(flag))
|
|
640
|
+
|
|
641
|
+
match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
|
|
642
|
+
return match_ is not None
|
|
643
|
+
if rule["method"] in {"eq", "ne"
|
|
644
|
+
, "le", "lt"
|
|
645
|
+
, "ge", "gt"
|
|
646
|
+
}:
|
|
647
|
+
return getattr(operator, rule["method"])(value, rule["ref"])
|
|
648
|
+
if rule["method"].startswith("approx"): # approx:THRESHOLD
|
|
649
|
+
threshold: float = float(rule["method"].split(":")[1])
|
|
650
|
+
logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value))
|
|
651
|
+
try:
|
|
652
|
+
value = float(value)
|
|
653
|
+
except (ValueError, TypeError):
|
|
654
|
+
return False
|
|
655
|
+
else:
|
|
656
|
+
return abs(value - rule["ref"]) <= threshold
|
|
657
|
+
if rule["method"] == "spreadsheet_range":
|
|
658
|
+
subset_limit = MultiCellRange(rule["ref"][0])
|
|
659
|
+
superset_limit = MultiCellRange(rule["ref"][1])
|
|
660
|
+
return _multicellrange_containsby(subset_limit, value) \
|
|
661
|
+
and _multicellrange_containsby(value, superset_limit)
|
|
662
|
+
if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
|
|
663
|
+
left_et = rule["method"][6]
|
|
664
|
+
right_et = rule["method"][7]
|
|
665
|
+
return getattr(operator, "l" + left_et)(rule["ref"][0], value) \
|
|
666
|
+
and getattr(operator, "l" + right_et)(value, rule["ref"][1])
|
|
667
|
+
if rule["method"] in {"str_list_eq", "str_set_eq"}:
|
|
668
|
+
container_type_str: str = rule["method"][4:-3]
|
|
669
|
+
container_type = getattr(builtins, container_type_str)
|
|
670
|
+
|
|
671
|
+
value: container_type = container_type(value.strip("\"'").split(","))
|
|
672
|
+
ref: container_type = container_type(rule["ref"])
|
|
673
|
+
return value == ref
|
|
674
|
+
raise NotImplementedError()
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def are_lists_equal(list1, list2, comparison_func):
|
|
678
|
+
# First check if both lists have the same length
|
|
679
|
+
if len(list1) != len(list2):
|
|
680
|
+
return False
|
|
681
|
+
|
|
682
|
+
# Now make sure each element in one list has an equal element in the other list
|
|
683
|
+
for item1 in list1:
|
|
684
|
+
# Use the supplied function to test for an equal item
|
|
685
|
+
if not any(comparison_func(item1, item2) for item2 in list2):
|
|
686
|
+
return False
|
|
687
|
+
|
|
688
|
+
# If all items match, the lists are equal
|
|
689
|
+
return True
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def compare_urls(url1, url2):
|
|
693
|
+
if url1 is None or url2 is None:
|
|
694
|
+
return url1 == url2
|
|
695
|
+
|
|
696
|
+
def normalize_url(url):
|
|
697
|
+
# Parse the URL
|
|
698
|
+
parsed_url = urlparse(url)
|
|
699
|
+
|
|
700
|
+
# If no scheme is present, assume 'http'
|
|
701
|
+
scheme = parsed_url.scheme if parsed_url.scheme else 'http'
|
|
702
|
+
|
|
703
|
+
# Lowercase the scheme and netloc, remove 'www.', and handle trailing slash
|
|
704
|
+
normalized_netloc = parsed_url.netloc.lower().replace("www.", "")
|
|
705
|
+
normalized_path = parsed_url.path if parsed_url.path != '/' else ''
|
|
706
|
+
|
|
707
|
+
# Reassemble the URL with normalized components
|
|
708
|
+
normalized_parsed_url = parsed_url._replace(scheme=scheme.lower(), netloc=normalized_netloc,
|
|
709
|
+
path=normalized_path)
|
|
710
|
+
normalized_url = urlunparse(normalized_parsed_url)
|
|
711
|
+
|
|
712
|
+
return normalized_url
|
|
713
|
+
|
|
714
|
+
# Normalize both URLs for comparison
|
|
715
|
+
norm_url1 = normalize_url(url1)
|
|
716
|
+
norm_url2 = normalize_url(url2)
|
|
717
|
+
|
|
718
|
+
# Compare the normalized URLs
|
|
719
|
+
return norm_url1 == norm_url2
|