pyegeria 5.4.8__py3-none-any.whl → 5.4.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. md_processing/__init__.py +2 -1
  2. md_processing/data/commands.json +59579 -52198
  3. md_processing/dr_egeria.py +7 -5
  4. md_processing/md_commands/solution_architect_commands.py +54 -10
  5. md_processing/md_processing_utils/common_md_proc_utils.py +8 -4
  6. md_processing/md_processing_utils/common_md_utils.py +41 -2
  7. pyegeria/_base_client.py +1 -17
  8. pyegeria/_client_new.py +1008 -323
  9. pyegeria/_client_new_backup.py +5359 -0
  10. pyegeria/_exceptions_new.py +6 -1
  11. pyegeria/base_report_formats.py +31 -2
  12. pyegeria/classification_manager.py +1430 -357
  13. pyegeria/collection_manager.py +52 -54
  14. pyegeria/config.py +1 -0
  15. pyegeria/data_designer.py +41 -41
  16. pyegeria/external_links.py +26 -26
  17. pyegeria/feedback_manager_omvs.py +13 -31
  18. pyegeria/glossary_manager.py +32 -35
  19. pyegeria/governance_officer.py +31 -31
  20. pyegeria/output_formatter.py +36 -11
  21. pyegeria/output_formatter_with_machine_keys.py +1127 -0
  22. pyegeria/project_manager.py +21 -21
  23. pyegeria/reference_data.py +2 -2
  24. pyegeria/solution_architect.py +112 -91
  25. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/METADATA +6 -5
  26. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/RECORD +30 -29
  27. pyegeria/md_processing_utils_orig.py +0 -1103
  28. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/WHEEL +0 -0
  29. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/entry_points.txt +0 -0
  30. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/licenses/LICENSE +0 -0
  31. {pyegeria-5.4.8.dist-info → pyegeria-5.4.8.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1127 @@
1
+ from datetime import datetime
2
+ import re
3
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
4
+ from pyegeria.utils import (camel_to_title_case)
5
+ from markdown_it import MarkdownIt
6
+ from rich.console import Console
7
+ from loguru import logger
8
+ from pyegeria.config import settings
9
+
10
+ from pyegeria.mermaid_utilities import construct_mermaid_web
11
+ from pyegeria.base_report_formats import select_report_format, MD_SEPARATOR, get_report_spec_match
12
+ from pyegeria.models import to_camel_case
13
+
14
+ """
15
+ Note on select_report_spec function:
16
+
17
+ This function and related data structures have been moved back to _output_formats.py.
18
+ Please import select_report_spec from pyegeria._output_formats instead of from this module.
19
+ """
20
+
21
+ console = Console(width=settings.Environment.console_width)
22
+
23
+
24
+ def _extract_referenceable_properties(element: dict[str, Any]) -> dict[str, Any]:
25
+ # Get general header attributes
26
+ guid = element.get('elementHeader', {}).get("guid", None)
27
+ if guid is None:
28
+ return {}
29
+ metadata_collection_id = element['elementHeader']['origin'].get("homeMetadataCollectionId", None)
30
+ metadata_collection_name = element['elementHeader']['origin'].get("homeMetadataCollectionName", None)
31
+ origin_category = element['elementHeader'].get("origin_category", None)
32
+ created_by = element['elementHeader']["versions"].get("createdBy", None)
33
+ create_time = element['elementHeader']["versions"].get("createTime", None)
34
+ updated_by = element['elementHeader']["versions"].get("updatedBy", None)
35
+ version = element['elementHeader']["versions"].get("version", None)
36
+ type_name = element['elementHeader']["type"].get("typeName", None)
37
+ classifications = element['elementHeader'].get("classifications", [])
38
+ effective_from = element['elementHeader'].get("effectiveFrom", None)
39
+ effective_to = element['elementHeader'].get("effectiveTo", None)
40
+
41
+ # Get attributes from properties
42
+ # properties = element['properties']
43
+ # display_name = properties.get("name", "") or ""
44
+ # if display_name == "":
45
+ # display_name = properties.get("displayName","")
46
+ # description = properties.get("description", "") or ""
47
+ # qualified_name = properties.get("qualifiedName", "") or ""
48
+ # category = properties.get("category", "") or ""
49
+ # version_identifier = properties.get("versionIdentifier", "") or ""
50
+ # additional_properties = properties.get("additionalProperties", {}) or {}
51
+ # extended_properties = properties.get("extendedProperties", {}) or {}
52
+ #
53
+ return {
54
+ "GUID": guid,
55
+ "metadata_collection_id": metadata_collection_id,
56
+ "metadata_collection_name": metadata_collection_name,
57
+ "origin_category": origin_category,
58
+ "created_by": created_by,
59
+ "create_time": create_time,
60
+ "updated_by": updated_by,
61
+ "version": version,
62
+ "type_name": type_name,
63
+ "classifications": classifications,
64
+
65
+ # "display_name": display_name,
66
+ # "description": description,
67
+ # "qualified_name": qualified_name,
68
+ # "category": category,
69
+ # "version_identifier": version_identifier,
70
+ # "additional_properties": additional_properties,
71
+ # "extended_properties": extended_properties,
72
+ "effective_from": effective_from,
73
+ "effective_to": effective_to,
74
+ }
75
+
76
+
77
+
78
+
79
+
80
+
81
+ def markdown_to_html(markdown_text: str) -> str:
82
+ """
83
+ Convert markdown text to HTML, with special handling for mermaid code blocks.
84
+
85
+ Args:
86
+ markdown_text: The markdown text to convert
87
+
88
+ Returns:
89
+ HTML string
90
+ """
91
+ # Initialize markdown-it
92
+ md = MarkdownIt()
93
+
94
+ # Find all mermaid code blocks
95
+ mermaid_blocks = re.findall(r'```mermaid\n(.*?)\n```', markdown_text, re.DOTALL)
96
+
97
+ # Replace each mermaid block with a placeholder
98
+ placeholders = []
99
+ for i, block in enumerate(mermaid_blocks):
100
+ placeholder = f"MERMAID_PLACEHOLDER_{i}"
101
+ markdown_text = markdown_text.replace(f"```mermaid\n{block}\n```", placeholder)
102
+ placeholders.append((placeholder, block))
103
+
104
+ # Convert markdown to HTML
105
+ html_text = md.render(markdown_text)
106
+
107
+ # Replace placeholders with rendered mermaid HTML
108
+ for placeholder, mermaid_block in placeholders:
109
+ mermaid_html = construct_mermaid_web(mermaid_block)
110
+ html_text = html_text.replace(placeholder, mermaid_html)
111
+
112
+ # Add basic HTML structure
113
+ html_text = f"""
114
+ <!DOCTYPE html>
115
+ <html>
116
+ <head>
117
+ <meta charset="UTF-8">
118
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
119
+ <title>Egeria Report</title>
120
+ <style>
121
+ body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
122
+ h1 {{ color: #2c3e50; }}
123
+ h2 {{ color: #3498db; }}
124
+ pre {{ background-color: #f8f8f8; padding: 10px; border-radius: 5px; overflow-x: auto; }}
125
+ table {{ border-collapse: collapse; width: 100%; margin-bottom: 20px; }}
126
+ th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
127
+ th {{ background-color: #f2f2f2; }}
128
+ tr:nth-child(even) {{ background-color: #f9f9f9; }}
129
+ </style>
130
+ </head>
131
+ <body>
132
+ {html_text}
133
+ </body>
134
+ </html>
135
+ """
136
+
137
+ return html_text
138
+
139
+ def make_preamble(obj_type: str, search_string: str, output_format: str = 'MD') -> Tuple[str, Optional[str]]:
140
+ """
141
+ Creates a preamble string and an elements action based on the given object type, search string,
142
+ and output format.
143
+
144
+ Args:
145
+ obj_type: The type of object being updated or reported on (e.g., "Product", "Category").
146
+ search_string: The search string used to filter objects. Defaults to "All Elements" if None.
147
+ output_format: A format identifier determining the output structure.
148
+ JSON - output standard json
149
+ MD - output standard markdown with no preamble
150
+ FORM - output markdown with a preamble for a form
151
+ REPORT - output markdown with a preamble for a report
152
+
153
+ Returns:
154
+ tuple: A tuple containing:
155
+ - A string representing the formatted update or report preamble.
156
+ - A string or None indicating the action description for the elements,
157
+ depending on the output format.
158
+ """
159
+ # search_string = search_string if search_string else "All Elements"
160
+ elements_md = ""
161
+ elements_action = "Update " + obj_type
162
+ if output_format == "FORM":
163
+ preamble = f"\n# Update {obj_type} Form - created at {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
164
+ if search_string:
165
+ preamble += f"\t {obj_type} found from the search string: `{search_string}`\n\n"
166
+ return preamble, elements_action
167
+ elif output_format == "REPORT":
168
+ elements_md += (f"# {obj_type} Report - created at {datetime.now().strftime('%Y-%m-%d %H:%M')}\n"
169
+ f"\t{obj_type} found from the search string: `{search_string}`\n\n")
170
+ elements_action = None
171
+ return elements_md, elements_action
172
+ else:
173
+ return "\n", elements_action
174
+
175
+ def make_md_attribute(attribute_name: str, attribute_value: str, output_type: str) -> Optional[str]:
176
+ """
177
+ Create a markdown attribute line for a given attribute name and value.
178
+
179
+ Args:
180
+ attribute_name: The name of the attribute
181
+ attribute_value: The value of the attribute
182
+ output_type: The output format (FORM, MD, REPORT)
183
+
184
+ Returns:
185
+ str: Formatted markdown for the attribute
186
+ """
187
+ output = ""
188
+ if isinstance(attribute_value,str):
189
+ attribute_value = attribute_value.strip() if attribute_value else ""
190
+ elif isinstance(attribute_value,list) and len(attribute_value) > 0:
191
+ attribute_value = ",\n".join(attribute_value)
192
+ if attribute_name:
193
+ if attribute_name.upper() == "GUID":
194
+ attribute_title = attribute_name.upper()
195
+ else:
196
+ # attribute_title = attribute_name.title()
197
+ attribute_title = camel_to_title_case(attribute_name)
198
+ else:
199
+ attribute_title = ""
200
+
201
+ if output_type in ["FORM", "MD"]:
202
+ if attribute_name.lower() in [ "mermaid", "solutionBlueprintMermaidGraph", "links", "implemented by", "sub_components"]:
203
+ return '\n'
204
+
205
+ output = f"## {attribute_title}\n{attribute_value}\n\n"
206
+ elif output_type in ["REPORT", "MERMAID"]:
207
+ if attribute_title in ['Mermaid Graph', 'Mermaid', 'Solution Blueprint Mermaid Graph']:
208
+ output = f"## {attribute_title}\n\n```mermaid\n{attribute_value}\n```\n"
209
+ elif attribute_value:
210
+ output = f"## {attribute_title}\n{attribute_value}\n\n"
211
+ return output
212
+
213
+ def format_for_markdown_table(text: str, guid: str = None) -> str:
214
+ """
215
+ Format text for markdown tables by replacing newlines with spaces and escaping pipe characters.
216
+ No truncation is applied to allow full-length text display regardless of console width.
217
+
218
+ Args:
219
+ text (str): The text to format
220
+
221
+ Returns:
222
+ str: Formatted text safe for markdown tables
223
+ """
224
+ if not text:
225
+ return ""
226
+ # Replace newlines with spaces and escape pipe characters
227
+ if isinstance(text, list):
228
+ text = "\n".join(text)
229
+ t = text.replace("\n", " ").replace("|", "\\|")
230
+ if '::' in t and guid:
231
+ t = f" [{t}](#{guid}) "
232
+ return t
233
+
234
+
235
+ def populate_columns_from_properties(element: dict, columns_struct: dict) -> dict:
236
+ """
237
+ Populate a columns_struct with values from the element's properties.
238
+
239
+ The element dict is expected to have a nested 'properties' dict whose keys are in camelCase.
240
+ The columns_struct is expected to follow the format returned by select_report_spec, where
241
+ columns are located at columns_struct['formats']['columns'] and each column is a dict containing
242
+ at least a 'key' field expressed in snake_case. For each column whose snake_case key corresponds
243
+ to a key in the element properties (after converting to camelCase), this function adds a 'value'
244
+ entry to the column with the matching property's value.
245
+
246
+ Args:
247
+ element: The element containing a 'properties' dict with camelCase keys.
248
+ columns_struct: The columns structure whose columns have snake_case 'key' fields.
249
+
250
+ Returns:
251
+ The updated columns_struct (the input structure is modified in place and also returned).
252
+ """
253
+ if not isinstance(columns_struct, dict):
254
+ return columns_struct
255
+
256
+ props = (element or {}).get('properties') or {}
257
+ # If properties is not a dict, do nothing
258
+ if not isinstance(props, dict):
259
+ return columns_struct
260
+
261
+ # Get the attributes list if present
262
+ formats = columns_struct.get('formats') or {}
263
+ columns = formats.get('attributes') if isinstance(formats, dict) else None
264
+ if not isinstance(columns, list):
265
+ return columns_struct
266
+
267
+ for col in columns:
268
+ try:
269
+ key_snake = col.get('key') if isinstance(col, dict) else None
270
+ if not key_snake:
271
+ continue
272
+ # Convert the snake_case key to camelCase to look up in properties
273
+ key_camel = to_camel_case(key_snake)
274
+ if key_camel in props:
275
+ col['value'] = props.get(key_camel)
276
+ except Exception as e:
277
+ # Be resilient; log and continue
278
+ logger.debug(f"populate_columns_from_properties: skipping column due to error: {e}")
279
+ continue
280
+
281
+ return columns_struct
282
+
283
+
284
+ def get_required_relationships(element: dict, columns_struct: dict) -> dict:
285
+ """
286
+ Populate relationship-derived column values in columns_struct based on top-level keys in the element.
287
+
288
+ This function inspects the requested columns in columns_struct, converts each column key from
289
+ snake_case to camelCase, and if a matching top-level key exists in the element, parses that value
290
+ (typically lists of relationship beans) into a human-readable value (e.g., a comma-separated list
291
+ of qualified names) and stores it under the column's 'value'. Columns not specified in the
292
+ columns_struct are ignored. Existing non-empty 'value's are left as-is.
293
+
294
+ Example: if a column with key 'member_of_collections' is present, this function will look for the
295
+ top-level key 'memberOfCollections' in the element and derive a value if found.
296
+
297
+ Args:
298
+ element: The element dictionary containing top-level relationship lists (e.g., associatedGlossaries,
299
+ memberOfCollections, collectionMembers).
300
+ columns_struct: The columns structure to augment with derived 'value's.
301
+
302
+ Returns:
303
+ The updated columns_struct (modified in place and returned).
304
+ """
305
+ if not isinstance(columns_struct, dict):
306
+ return columns_struct
307
+
308
+ formats = columns_struct.get('formats') or {}
309
+ columns = formats.get('attributes') if isinstance(formats, dict) else None
310
+ if not isinstance(columns, list):
311
+ return columns_struct
312
+
313
+ def _extract_name_from_item(item: Any) -> Optional[str]:
314
+ """Best-effort extraction of a display/qualified name from a relationship item."""
315
+ try:
316
+ if isinstance(item, dict):
317
+ # Common pattern: item['relatedElement']['properties']['qualifiedName']
318
+ related = item.get('relatedElement') or item.get('related_element')
319
+ if isinstance(related, dict):
320
+ props = related.get('properties') or {}
321
+ name = (
322
+ props.get('qualifiedName')
323
+ or props.get('displayName')
324
+ or props.get('name')
325
+ )
326
+ if name:
327
+ return name
328
+ # Sometimes the properties may be at the top level of the item
329
+ name = (
330
+ item.get('qualifiedName')
331
+ or item.get('displayName')
332
+ or item.get('name')
333
+ )
334
+ if name:
335
+ return name
336
+ elif isinstance(item, str):
337
+ return item
338
+ except Exception as e:
339
+ logger.debug(f"get_required_relationships: error extracting name from item: {e}")
340
+ return None
341
+
342
+ for col in columns:
343
+ try:
344
+ if not isinstance(col, dict):
345
+ continue
346
+ key_snake = col.get('key')
347
+ if not key_snake:
348
+ continue
349
+ # If already has a non-empty value, don't overwrite
350
+ if col.get('value') not in (None, ""):
351
+ continue
352
+
353
+ # Convert the snake_case key to camelCase to look up in top-level element
354
+ key_camel = to_camel_case(key_snake)
355
+ if key_camel not in element:
356
+ continue
357
+
358
+ top_val = element.get(key_camel)
359
+ derived_value: str = ""
360
+ if isinstance(top_val, list):
361
+ names: List[str] = []
362
+ for item in top_val:
363
+ nm = _extract_name_from_item(item)
364
+ if nm:
365
+ names.append(nm)
366
+ derived_value = ", ".join(names)
367
+ elif isinstance(top_val, dict):
368
+ nm = _extract_name_from_item(top_val)
369
+ derived_value = nm or ""
370
+ else:
371
+ # Primitive or unexpected type; coerce to string if not None
372
+ derived_value = str(top_val) if top_val is not None else ""
373
+
374
+ col['value'] = derived_value
375
+ except Exception as e:
376
+ logger.debug(f"get_required_relationships: skipping column due to error: {e}")
377
+ continue
378
+
379
+ return columns_struct
380
+
381
+
382
+ def generate_entity_md(elements: List[Dict],
383
+ elements_action: str,
384
+ output_format: str,
385
+ entity_type: str,
386
+ extract_properties_func: Callable,
387
+ get_additional_props_func: Optional[Callable] = None,
388
+ columns_struct: [dict] = None) -> str:
389
+ """
390
+ Generic method to generate markdown for entities.
391
+
392
+ Args:
393
+ elements (list): List of entity elements
394
+ elements_action (str): Action description for elements
395
+ output_format (str): Output format
396
+ entity_type (str): Type of entity (Glossary, Term, Category, etc.)
397
+ extract_properties_func: Function to extract properties from an element
398
+ get_additional_props_func: Optional function to get additional properties
399
+ columns (list): List of column name structures
400
+
401
+ Returns:
402
+ str: Markdown representation
403
+ """
404
+ heading = columns_struct.get("heading")
405
+ if heading == "Default Base Attributes":
406
+ elements_md = "## Reporting on Default Base Attributes - Perhaps couldn't find a valid combination of report_spec and output_format?\n\n"
407
+ else:
408
+ elements_md = ""
409
+ base_columns = columns_struct['formats'].get('attributes') if columns_struct else None
410
+
411
+ for element in elements:
412
+ if element is None:
413
+ continue
414
+ guid = element.get('elementHeader', {}).get('guid')
415
+
416
+ # Prefer new behavior: extractor returns an updated columns_struct with values
417
+ returned_struct = None
418
+ if columns_struct is not None:
419
+ try:
420
+ returned_struct = extract_properties_func(element, columns_struct)
421
+ except TypeError:
422
+ # Fallback for legacy extractors without columns_struct parameter
423
+ returned_struct = None
424
+
425
+ # Legacy fallback: get props dict if no columns_struct provided/returned
426
+ props = {}
427
+ if returned_struct is None:
428
+ props = extract_properties_func(element) if callable(extract_properties_func) else {}
429
+
430
+ # Get additional properties if function is provided
431
+ additional_props = {}
432
+ if get_additional_props_func:
433
+ # Use guid if available, else try to get from props
434
+ guid_for_fmt = guid or props.get('GUID')
435
+ additional_props = get_additional_props_func(element, guid_for_fmt, output_format)
436
+
437
+ # Determine display name
438
+ display_name = None
439
+ if returned_struct is not None:
440
+ cols = returned_struct.get('formats', {}).get('attributes', [])
441
+ # Find value from 'display_name' or 'title'
442
+ for col in cols:
443
+ if col.get('key') in ('display_name', 'title'):
444
+ display_name = col.get('value')
445
+ if display_name:
446
+ break
447
+ else:
448
+ display_name = props.get('display_name') or props.get('title')
449
+
450
+ if display_name is None:
451
+ display_name = "NO DISPLAY NAME"
452
+
453
+ # Format header based on output format
454
+ if output_format in ['FORM', 'MD']:
455
+ elements_md += f"# {elements_action}\n\n"
456
+ elements_md += f"## {entity_type} Name \n\n{display_name}\n\n"
457
+ elif output_format == 'REPORT':
458
+ elements_md += f'<a id="{(guid or props.get("GUID") or "No GUID" )}"></a>\n# {entity_type} Name: {display_name}\n\n'
459
+ else:
460
+ elements_md += f"## {entity_type} Name \n\n{display_name}\n\n"
461
+
462
+ # Add attributes based on column spec if available, otherwise, add all (legacy)
463
+ if returned_struct is not None:
464
+ cols = returned_struct.get('formats', {}).get('attributes', [])
465
+ for column in cols:
466
+ name = column.get('name')
467
+ key = column.get('key')
468
+ value = column.get('value')
469
+ if value in (None, "") and key in additional_props:
470
+ value = additional_props[key]
471
+ if column.get('format'):
472
+ value = format_for_markdown_table(value, guid)
473
+ elements_md += make_md_attribute(name, value, output_format)
474
+ if wk := returned_struct.get("annotations", {}).get("wikilinks", None):
475
+ elements_md += ", ".join(wk)
476
+ elif base_columns:
477
+ # If we have columns but extractor didn't return struct, use legacy props lookup
478
+ for column in base_columns:
479
+ key = column['key']
480
+ name = column['name']
481
+ value = ""
482
+ if key in props:
483
+ value = props[key]
484
+ elif key in additional_props:
485
+ value = additional_props[key]
486
+ if column.get('format'):
487
+ value = format_for_markdown_table(value, guid or props.get('GUID'))
488
+ elements_md += make_md_attribute(name, value, output_format)
489
+ if wk := columns_struct.get("annotations", {}).get("wikilinks", None):
490
+ elements_md += ", ".join(wk)
491
+ else:
492
+ # Legacy path without columns: dump all props
493
+ for key, value in props.items():
494
+ if output_format in ['FORM', 'MD', 'DICT'] and key == 'mermaid':
495
+ continue
496
+ if key not in ['properties', 'display_name']:
497
+ if key == 'mermaid' and value == '':
498
+ continue
499
+ elements_md += make_md_attribute(key.replace('_', ' '), value, output_format)
500
+ for key, value in additional_props.items():
501
+ elements_md += make_md_attribute(key.replace('_', ' '), value, output_format)
502
+
503
+ if element != elements[-1]:
504
+ elements_md += MD_SEPARATOR
505
+
506
+ return elements_md
507
+
508
+ def generate_entity_md_table(elements: List[Dict],
509
+ search_string: str,
510
+ entity_type: str,
511
+ extract_properties_func: Callable,
512
+ columns_struct: dict,
513
+ get_additional_props_func: Optional[Callable] = None,
514
+ output_format: str = 'LIST') -> str:
515
+ """
516
+ Generic method to generate a markdown table for entities.
517
+
518
+ Args:
519
+ elements (list): List of entity elements
520
+ search_string (str): The search string used
521
+ entity_type (str): Type of entity (Glossary, Term, Category, etc.)
522
+ extract_properties_func: Function to extract properties from an element
523
+ columns: List of column definitions, each containing 'name', 'key', and 'format' (optional)
524
+ get_additional_props_func: Optional function to get additional properties
525
+ output_format (str): Output format (FORM, REPORT, LIST, etc.)
526
+
527
+ Returns:
528
+ str: Markdown table
529
+ """
530
+ # Handle pluralization - if entity_type ends with 'y', use 'ies' instead of 's'
531
+ target_type = columns_struct.get('target_type', entity_type)
532
+ # if target_type.endswith('y'):
533
+ # target_type = target_type.replace('y', 'ies')
534
+ # else:
535
+ # target_type = target_type.replace('s', 's')
536
+
537
+ entity_type_plural = f"{target_type[:-1]}ies" if target_type.endswith('y') else f"{target_type}s"
538
+ # entity_type_plural = target_type
539
+ columns = columns_struct['formats'].get('attributes', [])
540
+ heading = columns_struct.get("heading")
541
+ if heading == "Default Base Attributes":
542
+ elements_md = "## Reporting on Default Base Attributes - Perhaps couldn't find a valid combination of report_spec and output_format?\n\n"
543
+ else:
544
+ elements_md = ""
545
+
546
+ if output_format == "LIST":
547
+ elements_md = f"# {entity_type_plural} Table\n\n"
548
+ elements_md += f"{entity_type_plural} found from the search string: `{search_string}`\n\n"
549
+
550
+ # Add column headers
551
+ header_row = "| "
552
+ separator_row = "|"
553
+ for column in columns:
554
+ header_row += f"{column['name']} | "
555
+ separator_row += "-------------|"
556
+
557
+ elements_md += header_row + "\n"
558
+ elements_md += separator_row + "\n"
559
+
560
+ # Add rows
561
+ for element in elements:
562
+ guid = element.get('elementHeader', {}).get('guid', None)
563
+
564
+ # Extractor returns columns_struct with values when possible
565
+ try:
566
+ returned_struct = extract_properties_func(element, columns_struct)
567
+ except TypeError:
568
+ returned_struct = None
569
+
570
+ # For help mode, bypass extraction
571
+ if output_format == "help":
572
+ returned_struct = {"formats": {"attributes": columns}}
573
+
574
+ # Additional props (if any)
575
+ additional_props = {}
576
+ if get_additional_props_func:
577
+ additional_props = get_additional_props_func(element, guid, output_format)
578
+
579
+ # Build row
580
+ row = "| "
581
+ if returned_struct is not None:
582
+ for column in returned_struct.get('formats', {}).get('attributes', []):
583
+ key = column.get('key')
584
+ value = column.get('value')
585
+ if (value in (None, "")) and key in additional_props:
586
+ value = additional_props[key]
587
+ if column.get('format'):
588
+ value = format_for_markdown_table(value, guid)
589
+ row += f"{value} | "
590
+ else:
591
+ # Legacy fallback: read from props dict
592
+ props = extract_properties_func(element)
593
+ for column in columns:
594
+ key = column['key']
595
+ value = ""
596
+ if key in props:
597
+ value = props[key]
598
+ elif key in additional_props:
599
+ value = additional_props[key]
600
+ if column.get('format'):
601
+ value = format_for_markdown_table(value, guid or props.get('GUID'))
602
+ row += f"{value} | "
603
+
604
+ elements_md += row + "\n"
605
+ # if wk := columns_struct.get("annotations",{}).get("wikilinks", None):
606
+ # elements_md += ", ".join(wk)
607
+ return elements_md
608
+
609
+ def generate_entity_dict(elements: List[Dict],
610
+ extract_properties_func: Callable,
611
+ get_additional_props_func: Optional[Callable] = None,
612
+ include_keys: Optional[List[str]] = None,
613
+ exclude_keys: Optional[List[str]] = None,
614
+ columns_struct: dict = None,
615
+ output_format: str = 'DICT') -> List[Dict]:
616
+ """
617
+ Generic method to generate a dictionary representation of entities.
618
+
619
+ Args:
620
+ elements (list): List of entity elements
621
+ extract_properties_func: Function to extract properties from an element
622
+ get_additional_props_func: Optional function to get additional properties
623
+ include_keys: Optional list of keys to include in the result (if None, include all)
624
+ exclude_keys: Optional list of keys to exclude from the result (if None, exclude none)
625
+ columns_struct: Optional dict of columns to include (if None, include all)
626
+ output_format (str): Output format (FORM, REPORT, DICT, etc.)
627
+
628
+ Returns:
629
+ list: List of entity dictionaries
630
+ """
631
+ result = []
632
+
633
+ #####
634
+ # Add attributes based on column spec if available, otherwise, add all
635
+ for element in elements:
636
+ if element is None:
637
+ continue
638
+
639
+ guid = element.get('elementHeader', {}).get('guid')
640
+
641
+ returned_struct = None
642
+ if columns_struct is not None:
643
+ try:
644
+ returned_struct = extract_properties_func(element, columns_struct)
645
+ except TypeError as e:
646
+ logger.info(f"Error - didn't find extractor?: {e}")
647
+ returned_struct = None
648
+
649
+ # Get additional properties if function is provided
650
+ additional_props = {}
651
+ if get_additional_props_func:
652
+ additional_props = get_additional_props_func(element, guid, output_format)
653
+
654
+ # Create entity dictionary
655
+ entity_dict = {}
656
+
657
+ columns = columns_struct['formats'].get('attributes', None) if columns_struct else None
658
+ if returned_struct is not None:
659
+ for column in returned_struct.get('formats', {}).get('attributes', []):
660
+ key = column.get('key')
661
+ name = column.get('name')
662
+ value = column.get('value')
663
+ if (value in (None, "")) and key in additional_props:
664
+ value = additional_props[key]
665
+ if column.get('format'):
666
+ value = format_for_markdown_table(value, guid)
667
+ # Use machine key for DICT output to avoid name collisions (e.g., multiple 'GUID' display names)
668
+ dict_key = key or name
669
+ # If the key already exists and differs only by case, preserve both by preferring exact key
670
+ if dict_key in entity_dict and name and name != dict_key:
671
+ # suffix with _1 to avoid overwrite
672
+ suffix_idx = 1
673
+ tmp_key = f"{dict_key}_{suffix_idx}"
674
+ while tmp_key in entity_dict:
675
+ suffix_idx += 1
676
+ tmp_key = f"{dict_key}_{suffix_idx}"
677
+ dict_key = tmp_key
678
+ entity_dict[dict_key] = value
679
+ elif columns:
680
+ for column in columns:
681
+ key = column['key']
682
+ name = column['name']
683
+ value = ""
684
+ props = extract_properties_func(element)
685
+ if key in props:
686
+ value = props[key]
687
+ elif key in additional_props:
688
+ value = additional_props[key]
689
+ if column.get('format', None):
690
+ value = format_for_markdown_table(value, guid or props.get('GUID'))
691
+ dict_key = key or name
692
+ if dict_key in entity_dict and name and name != dict_key:
693
+ suffix_idx = 1
694
+ tmp_key = f"{dict_key}_{suffix_idx}"
695
+ while tmp_key in entity_dict:
696
+ suffix_idx += 1
697
+ tmp_key = f"{dict_key}_{suffix_idx}"
698
+ dict_key = tmp_key
699
+ entity_dict[dict_key] = value
700
+ else:
701
+ props = extract_properties_func(element, columns_struct)
702
+ # Add properties based on include/exclude lists
703
+ for key, value in props.items():
704
+ if key not in ['properties', 'mermaid']: # Skip the raw properties object
705
+ if (include_keys is None or key in include_keys) and (
706
+ exclude_keys is None or key not in exclude_keys):
707
+ entity_dict[key] = value
708
+
709
+ # Add additional properties
710
+ for key, value in additional_props.items():
711
+ if (include_keys is None or key in include_keys) and (exclude_keys is None or key not in exclude_keys):
712
+ entity_dict[key] = value
713
+
714
+ result.append(entity_dict)
715
+ #####
716
+ # for element in elements:
717
+ # if element is None:
718
+ # continue
719
+ # props = extract_properties_func(element)
720
+ #
721
+ # # Get additional properties if function is provided
722
+ # additional_props = {}
723
+ # if get_additional_props_func:
724
+ # additional_props = get_additional_props_func(element,props['GUID'], output_format)
725
+ #
726
+ # # Create entity dictionary
727
+ # entity_dict = {}
728
+ #
729
+ # # Add properties based on include/exclude lists
730
+ # for key, value in props.items():
731
+ # if key not in [ 'properties', 'mermaid']: # Skip the raw properties object
732
+ # if (include_keys is None or key in include_keys) and (
733
+ # exclude_keys is None or key not in exclude_keys):
734
+ # entity_dict[key] = value
735
+ #
736
+ # # Add additional properties
737
+ # for key, value in additional_props.items():
738
+ # if (include_keys is None or key in include_keys) and (exclude_keys is None or key not in exclude_keys):
739
+ # entity_dict[key] = value
740
+ #
741
+ # result.append(entity_dict)
742
+
743
+ return result
744
+
745
+ def resolve_output_formats(entity_type: str,
746
+ output_format: str,
747
+ report_spec: Optional[Union[str, dict]] = None,
748
+ default_label: Optional[str] = None,
749
+ **kwargs) -> Optional[dict]:
750
+ """
751
+ Resolve a report format structure given an entity type, the desired output format
752
+ (e.g., DICT, LIST, MD, REPORT, FORM), and either a label (str) or a dict of format sets.
753
+
754
+ Backward compatibility:
755
+ - Accepts legacy kwarg 'report_spec' and treats it as report_spec.
756
+
757
+ Selection order:
758
+ - If report_spec is a str: select by label.
759
+ - If report_spec is a dict: use get_report_spec_match to pick a matching format.
760
+ - Else: try selecting by entity_type or default_label.
761
+ - Fallback: select "Default".
762
+ """
763
+ from pyegeria.base_report_formats import get_report_spec_match
764
+
765
+ if report_spec is None and isinstance(kwargs, dict):
766
+ if 'report_spec' in kwargs:
767
+ report_spec = kwargs.get('report_spec')
768
+ elif 'report_format' in kwargs:
769
+ report_spec = kwargs.get('report_format')
770
+ elif 'output_format_spec' in kwargs:
771
+ report_spec = kwargs.get('output_format_spec')
772
+
773
+ if isinstance(report_spec, str):
774
+ return select_report_format(report_spec, output_format)
775
+ if isinstance(report_spec, dict):
776
+ return get_report_spec_match(report_spec, output_format)
777
+
778
+ label = default_label or entity_type
779
+ fmt = select_report_format(label, output_format)
780
+ if fmt is None:
781
+ fmt = select_report_format("Default", output_format)
782
+ return fmt
783
+
784
+
785
+ def overlay_additional_values(columns_struct: dict, extra: Optional[dict]) -> dict:
786
+ """
787
+ Overlay extra values into columns_struct only where the column's value is empty or missing.
788
+ Returns the modified columns_struct.
789
+ """
790
+ if not isinstance(columns_struct, dict) or not extra:
791
+ return columns_struct
792
+ columns = columns_struct.get('formats', {}).get('attributes')
793
+ if not isinstance(columns, list):
794
+ return columns_struct
795
+ for col in columns:
796
+ if not isinstance(col, dict):
797
+ continue
798
+ key = col.get('key')
799
+ if not key:
800
+ continue
801
+ if col.get('value') in (None, "") and key in extra:
802
+ col['value'] = extra[key]
803
+ return columns_struct
804
+
805
+
806
+ def populate_common_columns(
807
+ element: dict,
808
+ columns_struct: dict,
809
+ *,
810
+ include_header: bool = True,
811
+ include_relationships: bool = True,
812
+ include_subject_area: bool = True,
813
+ mermaid_source_key: str = 'mermaidGraph',
814
+ mermaid_dest_key: str = 'mermaid'
815
+ ) -> dict:
816
+ """
817
+ Populate the common columns in columns_struct based on a standard Egeria element shape.
818
+
819
+ Steps:
820
+ - Populate from element.properties (camelCase mapped from snake_case keys)
821
+ - Optionally overlay header-derived values (GUID, type_name, times, etc.)
822
+ - Optionally populate relationship-based columns via get_required_relationships
823
+ - Optionally populate subject_area from element.elementHeader.subjectArea.classificationProperties.subjectAreaName
824
+ - If a column with key == mermaid_dest_key is present, set it from mermaid_source_key
825
+ - Do not overwrite non-empty values already set
826
+ """
827
+ # 1) Base properties
828
+ col_data = populate_columns_from_properties(element, columns_struct)
829
+ columns_list = col_data.get('formats', {}).get('attributes', [])
830
+
831
+ # 2) Header overlay
832
+ header_props = _extract_referenceable_properties(element) if include_header else {}
833
+ guid = header_props.get('GUID') if include_header else None
834
+ if include_header:
835
+ for column in columns_list:
836
+ if not isinstance(column, dict):
837
+ continue
838
+ key = column.get('key')
839
+ if not key:
840
+ continue
841
+ if column.get('value') not in (None, ""):
842
+ continue
843
+ if key in header_props:
844
+ column['value'] = header_props.get(key)
845
+ elif isinstance(key, str) and key.lower() == 'guid':
846
+ column['value'] = guid
847
+
848
+ # 3) Relationships
849
+ if include_relationships:
850
+ col_data = get_required_relationships(element, col_data)
851
+
852
+ # 4) Subject area
853
+ if include_subject_area:
854
+ try:
855
+ subject_area = element.get('elementHeader', {}).get('subjectArea') or ""
856
+ subj_val = ""
857
+ if isinstance(subject_area, dict):
858
+ subj_val = subject_area.get('classificationProperties', {}).get('subjectAreaName', '')
859
+ for column in columns_list:
860
+ if column.get('key') == 'subject_area' and column.get('value') in (None, ""):
861
+ column['value'] = subj_val
862
+ except Exception as e:
863
+ logger.debug(f"populate_common_columns: subject_area handling error: {e}")
864
+
865
+ # 5) Mermaid
866
+ try:
867
+ mermaid_val = element.get(mermaid_source_key, '') or ''
868
+ for column in columns_list:
869
+ if column.get('key') == mermaid_dest_key and column.get('value') in (None, ""):
870
+ column['value'] = mermaid_val
871
+ break
872
+ except Exception as e:
873
+ logger.debug(f"populate_common_columns: mermaid handling error: {e}")
874
+
875
+ return col_data
876
+
877
+
878
+ def extract_mermaid_only(elements: Union[Dict, List[Dict]]) -> Union[str, List[str]]:
879
+ """
880
+ Extract mermaid graph data from elements.
881
+
882
+ Args:
883
+ elements: Dictionary or list of dictionaries containing element data
884
+
885
+ Returns:
886
+ String or list of strings containing mermaid graph data
887
+ """
888
+ if isinstance(elements, dict):
889
+ mer = elements.get('mermaidGraph', None)
890
+ if mer:
891
+ return f"\n```mermaid\n{mer}\n```"
892
+ else:
893
+ return "---"
894
+
895
+
896
+ result = []
897
+ for element in elements:
898
+ mer = element.get('mermaidGraph', "---")
899
+ mer_out = f"\n\n```mermaid\n{mer}\n```" if mer else "---"
900
+ result.append(mer_out)
901
+ return result
902
+
903
+ def extract_basic_dict(elements: Union[Dict, List[Dict]]) -> Union[Dict, List[Dict]]:
904
+ """
905
+ Extract basic dictionary data from elements.
906
+
907
+ Args:
908
+ elements: Dictionary or list of dictionaries containing element data
909
+
910
+ Returns:
911
+ Dictionary or list of dictionaries with extracted data
912
+ """
913
+ if isinstance(elements, dict):
914
+ body = {'guid': elements['elementHeader']['guid']}
915
+ for key in elements['properties']:
916
+ body[key] = elements['properties'][key]
917
+
918
+ # Add classifications if present
919
+ classifications = elements['elementHeader'].get('classifications', [])
920
+ if classifications:
921
+ classification_names = "["
922
+ for classification in classifications:
923
+ if len(classification_names) > 1:
924
+ classification_names += ", "
925
+ classification_names += f"{classification['classificationName']}"
926
+ body['classification_names'] = classification_names + ']'
927
+
928
+ return body
929
+
930
+ result = []
931
+ for element in elements:
932
+ if element is None:
933
+ continue
934
+ body = {'guid': element['elementHeader']['guid']}
935
+ for key in element['properties']:
936
+ body[key] = element['properties'][key]
937
+
938
+ # Add classifications if present
939
+ classifications = element['elementHeader'].get('classifications', [])
940
+ if classifications:
941
+ classification_names = "["
942
+ for classification in classifications:
943
+ if len(classification_names) > 1:
944
+ classification_names += ", "
945
+ classification_names += f"{classification['classificationName']}"
946
+ body['classifications'] = classification_names + ']'
947
+
948
+ result.append(body)
949
+ return result
950
+
951
+ def _extract_default_properties(self, element: dict, columns_struct: dict) -> dict:
952
+ props = element.get('properties', {}) or {}
953
+ normalized = {
954
+ 'properties': props,
955
+ 'elementHeader': element.get('elementHeader', {}),
956
+ }
957
+ # Common population pipeline
958
+ col_data = populate_common_columns(element, columns_struct)
959
+ columns_list = col_data.get('formats', {}).get('attributes', [])
960
+
961
+ return col_data
962
+
963
+
964
+ def _generate_default_output(self, elements: dict | list[dict], search_string: str,
965
+ element_type_name: str | None,
966
+ output_format: str = 'DICT',
967
+ report_format: dict | str | None = None,
968
+ **kwargs) -> str | list[dict]:
969
+ entity_type = 'Referenceable'
970
+ # Backward compatibility: accept legacy kwarg
971
+ if report_format is None and isinstance(kwargs, dict) and 'report_spec' in kwargs:
972
+ report_format = kwargs.get('report_spec')
973
+ if report_format:
974
+ if isinstance(report_format, str):
975
+ output_formats = select_report_format(report_format, output_format)
976
+ elif isinstance(report_format, dict):
977
+ output_formats = get_report_spec_match(report_format, output_format)
978
+ else:
979
+ output_formats = None
980
+ else:
981
+ output_formats = select_report_format(entity_type, output_format)
982
+ if output_formats is None:
983
+ output_formats = select_report_format('Default', output_format)
984
+ return generate_output(
985
+ elements=elements,
986
+ search_string=search_string,
987
+ entity_type=entity_type,
988
+ output_format=output_format,
989
+ extract_properties_func=_extract_default_properties,
990
+ get_additional_props_func=None,
991
+ columns_struct=output_formats,
992
+ )
993
+
994
+
995
+ def generate_output(elements: Union[Dict, List[Dict]],
996
+ search_string: str,
997
+ entity_type: str,
998
+ output_format: str,
999
+ extract_properties_func: Callable,
1000
+ get_additional_props_func: Optional[Callable] = None,
1001
+ columns_struct: dict = None) -> Union[str, list[dict]]:
1002
+ """
1003
+ Generate output in the specified format for the given elements.
1004
+
1005
+ Args:
1006
+ elements: Dictionary or list of dictionaries containing element data
1007
+ search_string: The search string used to find the elements
1008
+ entity_type: The type of entity (e.g., "Glossary", "Term", "Category")
1009
+ output_format: The desired output format (MD, FORM, REPORT, LIST, DICT, MERMAID, HTML)
1010
+ extract_properties_func: Function to extract properties from an element
1011
+ get_additional_props_func: Optional function to get additional properties
1012
+ columns: Optional list of column definitions for table output
1013
+
1014
+ Returns:
1015
+ Formatted output as string or list of dictionaries
1016
+ """
1017
+ columns = columns_struct['formats'].get('attributes',None) if columns_struct else None
1018
+ if not columns:
1019
+ columns_struct = select_report_format("Default",output_format)
1020
+ if columns_struct:
1021
+ columns = columns_struct.get('formats', {}).get('attributes', None)
1022
+
1023
+ target_type = columns_struct.get('target_type', entity_type) if columns_struct else entity_type
1024
+ if target_type is None:
1025
+ target_type = entity_type
1026
+
1027
+ # Ensure elements is a list
1028
+ if isinstance(elements, dict):
1029
+ elements = [elements]
1030
+
1031
+ # Handle empty search string
1032
+ if search_string is None or search_string == '':
1033
+ search_string = "All"
1034
+
1035
+ # Set the output format to DICT to return values to table display
1036
+ if output_format == "TABLE":
1037
+ output_format = "DICT"
1038
+
1039
+ # Generate output based on format
1040
+ if output_format == 'MERMAID':
1041
+ return extract_mermaid_only(elements)
1042
+
1043
+ elif output_format == 'HTML':
1044
+ # First generate the REPORT format output
1045
+ report_output = generate_output(
1046
+ elements=elements,
1047
+ search_string=search_string,
1048
+ entity_type=entity_type,
1049
+ output_format="REPORT",
1050
+ extract_properties_func=extract_properties_func,
1051
+ get_additional_props_func=get_additional_props_func,
1052
+ columns_struct=columns_struct
1053
+ )
1054
+
1055
+ # Convert the markdown to HTML
1056
+ return markdown_to_html(report_output)
1057
+
1058
+ elif output_format == 'DICT':
1059
+ return generate_entity_dict(
1060
+ elements=elements,
1061
+ extract_properties_func=extract_properties_func,
1062
+ get_additional_props_func=get_additional_props_func,
1063
+ exclude_keys=['properties'],
1064
+ columns_struct=columns_struct,
1065
+ output_format=output_format
1066
+ )
1067
+
1068
+ elif output_format == 'LIST':
1069
+ if columns is None:
1070
+ raise ValueError("Columns must be provided for LIST output format")
1071
+
1072
+ return generate_entity_md_table(
1073
+ elements=elements,
1074
+ search_string=search_string,
1075
+ entity_type=entity_type,
1076
+ extract_properties_func=extract_properties_func,
1077
+ columns_struct=columns_struct,
1078
+ get_additional_props_func=get_additional_props_func,
1079
+ output_format=output_format
1080
+ )
1081
+
1082
+ else: # MD, FORM, REPORT
1083
+ elements_md, elements_action = make_preamble(
1084
+ obj_type=target_type,
1085
+ search_string=search_string,
1086
+ output_format=output_format
1087
+ )
1088
+
1089
+ elements_md += generate_entity_md(
1090
+ elements=elements,
1091
+ elements_action=elements_action,
1092
+ output_format=output_format,
1093
+ entity_type=target_type,
1094
+ extract_properties_func=extract_properties_func,
1095
+ get_additional_props_func=get_additional_props_func,
1096
+ columns_struct = columns_struct
1097
+ )
1098
+
1099
+ return elements_md
1100
+
1101
+ # Machine keys explanation
1102
+ # -------------------------
1103
+ # In pyegeria’s reporting system, each output column is defined with two identifiers:
1104
+ # - name: the human-readable display label (e.g., "GUID", "Display Name") shown in tables and reports.
1105
+ # - key: the stable, programmatic identifier (usually snake_case, e.g., guid, display_name) used internally.
1106
+ #
1107
+ # We refer to the key as the machine key. It is intended to be:
1108
+ # - Unique within a given row/spec: so downstream code can unambiguously reference values.
1109
+ # - Stable over time: it should not change with presentation tweaks or localization.
1110
+ # - Program-friendly: lowercase with underscores, matching the data model or derivation (e.g., header vs properties).
1111
+ #
1112
+ # Why DICT output uses machine keys
1113
+ # - Display names are for humans and may repeat across different sources of data in one row. For example,
1114
+ # Collections may include a GUID from the element header (key: guid, display name: "GUID") and a related member GUID
1115
+ # (key: GUID, display name: "GUID"). If the DICT used display names, one would overwrite the other.
1116
+ # - By using the machine key (column['key']) as the dictionary key, DICT output avoids these collisions and remains
1117
+ # deterministic for programmatic consumers.
1118
+ #
1119
+ # Collision handling
1120
+ # - If, within an unusual spec, two columns share the same key, DICT generation adds a numeric suffix (e.g., guid_1)
1121
+ # to preserve both values. This is a safeguard and such collisions should be avoided in specs.
1122
+ #
1123
+ # Guidance for spec authors
1124
+ # - Choose clear, stable machine keys in snake_case that map to actual data fields or well-defined derivations.
1125
+ # - Reserve display names for presentation; do not rely on them for programmatic access.
1126
+ # - When you need two visually identical labels (e.g., both "GUID"), ensure their keys differ (e.g., guid vs GUID) to
1127
+ # reflect different sources/semantics.