pyDiffTools 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,620 @@
1
+ from pathlib import Path
2
+ from typing import Dict, Any, List, Optional, Tuple
3
+ from datetime import date, datetime
4
+
5
+ import textwrap
6
+ import re
7
+ import yaml
8
+ from dateutil.parser import parse as parse_due_string
9
+ from yaml.emitter import ScalarAnalysis
10
+
11
+
12
+ class IndentDumper(yaml.SafeDumper):
13
+ """YAML dumper that always indents nested lists."""
14
+
15
+ def increase_indent(self, flow: bool = False, indentless: bool = False):
16
+ return super().increase_indent(flow, False)
17
+
18
+ def analyze_scalar(self, scalar: str) -> ScalarAnalysis:
19
+ analysis = super().analyze_scalar(scalar)
20
+ if "\n" in scalar and not analysis.allow_block:
21
+ analysis = ScalarAnalysis(
22
+ scalar=analysis.scalar,
23
+ empty=analysis.empty,
24
+ multiline=analysis.multiline,
25
+ allow_flow_plain=analysis.allow_flow_plain,
26
+ allow_block_plain=analysis.allow_block_plain,
27
+ allow_single_quoted=analysis.allow_single_quoted,
28
+ allow_double_quoted=analysis.allow_double_quoted,
29
+ allow_block=True,
30
+ )
31
+ return analysis
32
+
33
+
34
+ def _str_presenter(dumper, data: str):
35
+ if "\n" in data:
36
+ return dumper.represent_scalar(
37
+ "tag:yaml.org,2002:str",
38
+ data,
39
+ style="|",
40
+ )
41
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data)
42
+
43
+
44
+ def _register_block_str_presenter() -> None:
45
+ """Register the multiline string presenter on all dumpers we use."""
46
+
47
+ for dumper in (yaml.Dumper, yaml.SafeDumper, IndentDumper):
48
+ if getattr(dumper, "yaml_representers", None) is not None:
49
+ dumper.add_representer(str, _str_presenter)
50
+
51
+
52
+ _register_block_str_presenter()
53
+
54
+
55
+ def load_graph_yaml(
56
+ path: str, old_data: Optional[Dict[str, Any]] = None
57
+ ) -> Dict[str, Any]:
58
+ """Load graph description from YAML and synchronize parent/child links.
59
+
60
+ If ``old_data`` is provided, relationships removed or added in the new YAML
61
+ are propagated to the corresponding nodes so that editing only one side of
62
+ a link keeps the structure symmetric.
63
+ """
64
+ with open(path) as f:
65
+ data = yaml.safe_load(f)
66
+ nodes = data.setdefault("nodes", {})
67
+ nodes.pop("node", None)
68
+
69
+ defined_nodes = set(nodes.keys())
70
+
71
+ for name, node in list(nodes.items()):
72
+ node.setdefault("children", [])
73
+ node.setdefault("parents", [])
74
+ node["children"] = list(dict.fromkeys(node["children"]))
75
+ node["parents"] = list(dict.fromkeys(node["parents"]))
76
+ if "subgraph" in node and "style" not in node:
77
+ node["style"] = node.pop("subgraph")
78
+ for child in node["children"]:
79
+ nodes.setdefault(child, {}).setdefault("children", [])
80
+
81
+ if old_data is None:
82
+ # Rebuild parent lists solely from children
83
+ for node in nodes.values():
84
+ node["parents"] = []
85
+ for parent, node in nodes.items():
86
+ for child in node.get("children", []):
87
+ nodes[child]["parents"].append(parent)
88
+ return data
89
+
90
+ old_nodes = old_data.get("nodes", {})
91
+
92
+ removed_nodes = set(old_nodes) - defined_nodes
93
+ if removed_nodes:
94
+ for removed in removed_nodes:
95
+ for node in nodes.values():
96
+ if removed in node.get("children", []):
97
+ node["children"].remove(removed)
98
+ if removed in node.get("parents", []):
99
+ node["parents"].remove(removed)
100
+ nodes.pop(removed, None)
101
+
102
+ for name, node in nodes.items():
103
+ old_node = old_nodes.get(name, {})
104
+ old_children = set(old_node.get("children", []))
105
+ new_children = set(node.get("children", []))
106
+ old_parents = set(old_node.get("parents", []))
107
+ new_parents = set(node.get("parents", []))
108
+
109
+ # Children added or removed on this node
110
+ for child in new_children - old_children:
111
+ nodes.setdefault(child, {}).setdefault("parents", [])
112
+ if name not in nodes[child]["parents"]:
113
+ nodes[child]["parents"].append(name)
114
+ for child in old_children - new_children:
115
+ if child in removed_nodes:
116
+ continue
117
+ nodes.setdefault(child, {}).setdefault("parents", [])
118
+ if name in nodes[child]["parents"]:
119
+ nodes[child]["parents"].remove(name)
120
+
121
+ # Parents added or removed on this node
122
+ for parent in new_parents - old_parents:
123
+ nodes.setdefault(parent, {}).setdefault("children", [])
124
+ if name not in nodes[parent]["children"]:
125
+ nodes[parent]["children"].append(name)
126
+ for parent in old_parents - new_parents:
127
+ if parent in removed_nodes:
128
+ continue
129
+ nodes.setdefault(parent, {}).setdefault("children", [])
130
+ if name in nodes[parent]["children"]:
131
+ nodes[parent]["children"].remove(name)
132
+
133
+ # Deduplicate lists
134
+ for node in nodes.values():
135
+ node["children"] = list(dict.fromkeys(node.get("children", [])))
136
+ node["parents"] = list(dict.fromkeys(node.get("parents", [])))
137
+
138
+ return data
139
+
140
+
141
+ def _format_label(text: str, wrap_width: int = 55) -> str:
142
+ """Return an HTML-like label with wrapped lines and bullets.
143
+
144
+ Single newlines inside paragraphs or list items are treated as spaces
145
+ so that manual line breaks in the YAML do not force breaks in the final
146
+ label. Blank lines delimit paragraphs, and lines starting with ``*`` or a
147
+ numbered prefix begin list items.
148
+ """
149
+
150
+ lines_out: List[str] = []
151
+ TAG_PLACEHOLDER = "\uf000"
152
+ CODE_START = "\uf001"
153
+ CODE_END = "\uf002"
154
+ text = text.replace(TAG_PLACEHOLDER, " ")
155
+ text = text.replace("<obs>", '<font color="blue">→')
156
+ text = text.replace("</obs>", "</font>")
157
+ code_re = re.compile(r"`([^`]+)`")
158
+ text = code_re.sub(lambda m: CODE_START + m.group(1) + CODE_END, text)
159
+ lines = text.splitlines()
160
+ i = 0
161
+ para_buf: List[str] = []
162
+
163
+ # ``textwrap`` may split HTML tags like ``<font color="blue">`` into
164
+ # multiple pieces and their character count should not contribute to the
165
+ # wrapping width. Replace tags with a placeholder character before
166
+ # wrapping and restore them afterwards.
167
+ tag_re = re.compile(r"<[^>]*>")
168
+ tag_list: List[str] = []
169
+
170
+ def _wrap_preserving_tags(
171
+ s: str, tag_list: List[str]
172
+ ) -> Tuple[List[str], List[str]]:
173
+ """Wrap *s* without counting HTML tags toward line width."""
174
+ s = s.replace(TAG_PLACEHOLDER, " ")
175
+
176
+ def repl(m: re.Match[str]) -> str:
177
+ tag_list.append(m.group(0))
178
+ return TAG_PLACEHOLDER
179
+
180
+ protected = tag_re.sub(repl, s)
181
+ wrapped = textwrap.wrap(
182
+ protected,
183
+ width=wrap_width,
184
+ break_long_words=False,
185
+ break_on_hyphens=False,
186
+ )
187
+ return (wrapped or [""]), tag_list
188
+
189
+ def flush_para() -> None:
190
+ """Wrap and emit any buffered paragraph text."""
191
+ nonlocal para_buf, tag_list
192
+ if para_buf:
193
+ para_text = " ".join(s.strip() for s in para_buf)
194
+ wrapped, tag_list = _wrap_preserving_tags(para_text, tag_list)
195
+ for seg in wrapped:
196
+ lines_out.append(seg)
197
+ lines_out.append('<br align="left"/>')
198
+ para_buf = []
199
+
200
+ while i < len(lines):
201
+ raw = lines[i].rstrip()
202
+ if not raw:
203
+ flush_para() # end current paragraph on blank line
204
+ if not lines_out or lines_out[-1] != '<br align="left"/>':
205
+ lines_out.append('<br align="left"/>')
206
+ i += 1
207
+ continue
208
+ if raw.startswith("<font") and raw.endswith(">") or raw == "</font>":
209
+ flush_para() # close paragraph before explicit font tag line
210
+ lines_out.append(raw)
211
+ i += 1
212
+ continue
213
+ bullet = False
214
+ number = None
215
+ content = raw
216
+ if raw.startswith("*"):
217
+ bullet = True
218
+ content = raw[1:].lstrip()
219
+ else:
220
+ m = re.match(r"(\d+)[.)]\s*(.*)", raw)
221
+ if m:
222
+ number = m.group(1)
223
+ content = m.group(2)
224
+ if bullet or number is not None:
225
+ flush_para() # end paragraph before list item
226
+ item_lines = [content]
227
+ i += 1
228
+ while i < len(lines):
229
+ nxt = lines[i].rstrip()
230
+ if not nxt:
231
+ break
232
+ if (
233
+ nxt.startswith("*")
234
+ or re.match(r"\d+[.)]\s*", nxt)
235
+ or (nxt.startswith("<font") and nxt.endswith(">"))
236
+ or nxt == "</font>"
237
+ ):
238
+ break
239
+ item_lines.append(nxt.lstrip())
240
+ i += 1
241
+ text_item = " ".join(item_lines)
242
+ if lines_out and lines_out[-1] != '<br align="left"/>':
243
+ lines_out.append('<br align="left"/>')
244
+ wrapped, tag_list = _wrap_preserving_tags(text_item, tag_list)
245
+ for j, seg in enumerate(wrapped):
246
+ if j == 0:
247
+ prefix = "• " if bullet else f"{number}. "
248
+ else:
249
+ prefix = " "
250
+ lines_out.append(f"{prefix}{seg}")
251
+ lines_out.append('<br align="left"/>')
252
+ continue
253
+ else:
254
+ para_buf.append(raw)
255
+ i += 1
256
+
257
+ flush_para() # emit trailing buffered paragraph
258
+
259
+ if lines_out:
260
+ if lines_out[-1] == "</font>":
261
+ if len(lines_out) < 2 or lines_out[-2] != '<br align="left"/>':
262
+ lines_out.insert(-1, '<br align="left"/>')
263
+ elif lines_out[-1] != '<br align="left"/>':
264
+ lines_out.append('<br align="left"/>')
265
+
266
+ body = "\n".join(lines_out)
267
+ body = body.replace(CODE_START, '<font face="Courier">')
268
+ body = body.replace(CODE_END, "</font>")
269
+ for tag in tag_list:
270
+ body = body.replace(TAG_PLACEHOLDER, tag, 1)
271
+
272
+ return "<" + body + ">"
273
+
274
+
275
+ def _node_text_with_due(node):
276
+ """Return node text with due date appended when present."""
277
+ if "due" not in node or node["due"] is None:
278
+ if "text" in node:
279
+ return node["text"]
280
+ return None
281
+
282
+ due_text = str(node["due"]).strip()
283
+ if not due_text:
284
+ if "text" in node:
285
+ return node["text"]
286
+ return None
287
+
288
+ # ``parse_due_string`` accepts numerous human readable date formats so
289
+ # writers can use whatever is most convenient in the YAML file.
290
+ due_date = parse_due_string(due_text).date()
291
+ today_date = date.today()
292
+
293
+ # Render the actual due date in orange, optionally showing an original date
294
+ # that slipped. The original value is italicized so it stands out while
295
+ # remaining inside the colored tag for continuity.
296
+ def date_formatter(thedate):
297
+ return f"{thedate.month}/{thedate.day}/{thedate.strftime('%y')}"
298
+
299
+ # Completed tasks should always show their calendar date so the original
300
+ # deadline remains visible even if it was today or overdue when finished.
301
+ is_completed = "style" in node and node["style"] == "completed"
302
+ # Replace the actual date with high-visibility notices when the deadline
303
+ # is today or overdue. These are rendered in a bold 12 pt font so they are
304
+ # immediately noticeable in the diagram. Completed tasks skip these
305
+ # notices and keep the real date.
306
+ if not is_completed and due_date == today_date:
307
+ formatted = '<font point-size="12"><b>TODAY</b></font>'
308
+ elif not is_completed and due_date < today_date:
309
+ days_overdue = (today_date - due_date).days
310
+ unit = "DAY" if days_overdue == 1 else "DAYS"
311
+ formatted = (
312
+ f'<font point-size="12"><b>{days_overdue} {unit}'
313
+ + " OVERDUE</b></font>"
314
+ )
315
+ else:
316
+ formatted = date_formatter(due_date)
317
+ if "orig_due" in node and node["orig_due"] is not None:
318
+ orig_str = date_formatter(
319
+ parse_due_string(str(node["orig_due"]).strip())
320
+ )
321
+ formatted = f"<i>{orig_str}</i>→{formatted}"
322
+ # Completed tasks should show a green due date so the status is obvious at
323
+ # a glance. Upcoming deadlines within the next week are orange to match the
324
+ # same visual emphasis used for overdue dates.
325
+ if is_completed:
326
+ due_color = "green"
327
+ elif due_date > today_date and (due_date - today_date).days <= 7:
328
+ due_color = "red"
329
+ else:
330
+ due_color = "orange"
331
+ formatted = f'<font color="{due_color}">{formatted}</font>'
332
+
333
+ if "text" in node and node["text"]:
334
+ if node["text"].endswith("\n"):
335
+ return node["text"] + formatted
336
+ return node["text"] + "\n" + formatted
337
+
338
+ return formatted
339
+
340
+
341
+ def _node_label(text, wrap_width=55):
342
+ if text is None:
343
+ return ""
344
+ return _format_label(text, wrap_width)
345
+
346
+
347
+ def _normalize_graph_dates(data):
348
+ # Normalize due dates to mm/dd/yy so the YAML is consistent across years.
349
+ if "nodes" not in data:
350
+ return
351
+ default_date = datetime(date.today().year, 1, 1)
352
+ for name in data["nodes"]:
353
+ if (
354
+ "due" in data["nodes"][name]
355
+ and data["nodes"][name]["due"] is not None
356
+ ):
357
+ if str(data["nodes"][name]["due"]).strip():
358
+ parsed = parse_due_string(
359
+ str(data["nodes"][name]["due"]).strip(),
360
+ default=default_date,
361
+ )
362
+ data["nodes"][name]["due"] = parsed.date().strftime("%m/%d/%y")
363
+ if (
364
+ "orig_due" in data["nodes"][name]
365
+ and data["nodes"][name]["orig_due"] is not None
366
+ ):
367
+ if str(data["nodes"][name]["orig_due"]).strip():
368
+ parsed = parse_due_string(
369
+ str(data["nodes"][name]["orig_due"]).strip(),
370
+ default=default_date,
371
+ )
372
+ data["nodes"][name]["orig_due"] = parsed.date().strftime(
373
+ "%m/%d/%y"
374
+ )
375
+
376
+
377
+ def _append_node(
378
+ lines, indent, node_name, data, wrap_width, order_by_date, sort_order
379
+ ):
380
+ # Add a node line with an optional sort hint so Graphviz keeps date order.
381
+ if node_name in data["nodes"]:
382
+ label = _node_label(
383
+ _node_text_with_due(data["nodes"][node_name]), wrap_width
384
+ )
385
+ else:
386
+ label = ""
387
+ if label:
388
+ if order_by_date:
389
+ lines.append(
390
+ f"{indent}{node_name} [label={label},"
391
+ f" sortv={sort_order[node_name]}];"
392
+ )
393
+ else:
394
+ lines.append(f"{indent}{node_name} [label={label}];")
395
+ else:
396
+ if order_by_date:
397
+ lines.append(
398
+ f"{indent}{node_name} [sortv={sort_order[node_name]}];"
399
+ )
400
+ else:
401
+ lines.append(f"{indent}{node_name};")
402
+
403
+
404
+ def yaml_to_dot(data, wrap_width=55, order_by_date=False):
405
+ lines = [
406
+ "digraph G {",
407
+ " graph [",
408
+ " rankdir=LR,",
409
+ (
410
+ ' size="10.6,8.1!", // ~11x8.5 minus margins, excl gives'
411
+ " exact margin=0.20,"
412
+ ),
413
+ " ratio=fill,",
414
+ " splines=true,",
415
+ " concentrate=true,",
416
+ " center=true,",
417
+ " nodesep=0.25,",
418
+ " ranksep=0.35",
419
+ " ];",
420
+ " node [shape=box,width=0.5];",
421
+ ]
422
+ if "nodes" not in data:
423
+ data["nodes"] = {}
424
+ if "styles" not in data:
425
+ data["styles"] = {}
426
+ ordered_names = None
427
+ sort_order = None
428
+ ordered_set = None
429
+ if order_by_date:
430
+ # Order nodes by due date so the graph renders boxes in calendar order.
431
+ order_pairs = []
432
+ for name in data["nodes"]:
433
+ # Exclude nodes without a due date from date-ordered display.
434
+ if (
435
+ "due" in data["nodes"][name]
436
+ and data["nodes"][name]["due"] is not None
437
+ ):
438
+ if str(data["nodes"][name]["due"]).strip():
439
+ due_date = parse_due_string(
440
+ str(data["nodes"][name]["due"]).strip()
441
+ ).date()
442
+ order_pairs.append((due_date, name))
443
+ # Capture a stable order and use sort values so Graphviz keeps it.
444
+ ordered_names = [
445
+ name
446
+ for due_date, name in sorted(
447
+ order_pairs, key=lambda item: (item[0], item[1])
448
+ )
449
+ ]
450
+ sort_order = {name: index for index, name in enumerate(ordered_names)}
451
+ ordered_set = set(ordered_names)
452
+ handled = set()
453
+ # Group nodes by their declared style so they share subgraph attributes.
454
+ style_members = {}
455
+ for name in data["nodes"]:
456
+ if order_by_date and name not in ordered_set:
457
+ continue
458
+ if "style" in data["nodes"][name] and data["nodes"][name]["style"]:
459
+ style_members.setdefault(data["nodes"][name]["style"], []).append(
460
+ name
461
+ )
462
+
463
+ for style_name in data["styles"]:
464
+ if style_name not in style_members:
465
+ continue
466
+ if not style_members[style_name]:
467
+ continue
468
+ lines.append(f" subgraph {style_name} {{")
469
+ if (
470
+ "attrs" in data["styles"][style_name]
471
+ and "node" in data["styles"][style_name]["attrs"]
472
+ ):
473
+ if isinstance(data["styles"][style_name]["attrs"]["node"], list):
474
+ attr_str = ", ".join(
475
+ f"{k}={v}"
476
+ for k, v in data["styles"][style_name]["attrs"]["node"][
477
+ 0
478
+ ].items()
479
+ )
480
+ else:
481
+ attr_str = ", ".join(
482
+ f"{k}={v}"
483
+ for k, v in data["styles"][style_name]["attrs"][
484
+ "node"
485
+ ].items()
486
+ )
487
+ lines.append(f" node [{attr_str}];")
488
+ for node_name in style_members[style_name]:
489
+ _append_node(
490
+ lines,
491
+ " ",
492
+ node_name,
493
+ data,
494
+ wrap_width,
495
+ order_by_date,
496
+ sort_order,
497
+ )
498
+ handled.add(node_name)
499
+ lines.append(" };")
500
+
501
+ if ordered_names is None:
502
+ ordered_names = list(data["nodes"].keys())
503
+ for name in ordered_names:
504
+ if name in handled:
505
+ continue
506
+ _append_node(
507
+ lines,
508
+ " ",
509
+ name,
510
+ data,
511
+ wrap_width,
512
+ order_by_date,
513
+ sort_order,
514
+ )
515
+ if order_by_date:
516
+ # Arrange nodes in a grid while preserving style subgraphs.
517
+ column_count = 5
518
+ for index in range(0, len(ordered_names), column_count):
519
+ lines.append(
520
+ " { rank=same; "
521
+ + "; ".join(ordered_names[index : index + column_count])
522
+ + "; }"
523
+ )
524
+ row_nodes = ordered_names[index : index + column_count]
525
+ for row_index in range(len(row_nodes) - 1):
526
+ lines.append(
527
+ f" {row_nodes[row_index]} ->"
528
+ f" {row_nodes[row_index + 1]} [style=invis];"
529
+ )
530
+ for index in range(0, len(ordered_names) - column_count, column_count):
531
+ lines.append(
532
+ f" {ordered_names[index]} ->"
533
+ f" {ordered_names[index + column_count]} [style=invis];"
534
+ )
535
+ else:
536
+ # Edges are omitted when ordering by date so boxes stand alone.
537
+ for name in data["nodes"]:
538
+ if "children" in data["nodes"][name]:
539
+ for child in data["nodes"][name]["children"]:
540
+ lines.append(f" {name} -> {child};")
541
+ lines.append("}")
542
+ return "\n".join(lines)
543
+
544
+
545
+ def save_graph_yaml(path, data):
546
+ # Ensure stored dates are normalized before writing.
547
+ _normalize_graph_dates(data)
548
+ with open(path, "w") as f:
549
+ yaml.dump(
550
+ data,
551
+ f,
552
+ Dumper=IndentDumper,
553
+ default_flow_style=False,
554
+ sort_keys=True,
555
+ allow_unicode=True,
556
+ indent=2,
557
+ )
558
+
559
+
560
+ def write_dot_from_yaml(
561
+ yaml_path,
562
+ dot_path,
563
+ update_yaml=True,
564
+ wrap_width=55,
565
+ order_by_date=False,
566
+ old_data=None,
567
+ validate_due_dates=False,
568
+ ):
569
+ data = load_graph_yaml(str(yaml_path), old_data=old_data)
570
+ _normalize_graph_dates(data)
571
+ if validate_due_dates:
572
+ # Enforce that no node's due date is earlier than any ancestor due date
573
+ # so dependency timelines remain coherent.
574
+ due_dates = {}
575
+ for name in data["nodes"]:
576
+ if (
577
+ "due" in data["nodes"][name]
578
+ and data["nodes"][name]["due"] is not None
579
+ ):
580
+ due_text = str(data["nodes"][name]["due"]).strip()
581
+ if due_text:
582
+ due_dates[name] = parse_due_string(due_text).date()
583
+ for name in due_dates:
584
+ parents_to_check = [
585
+ (parent, [parent]) for parent in data["nodes"][name]["parents"]
586
+ ]
587
+ seen_parents = set()
588
+ while parents_to_check:
589
+ parent, path = parents_to_check.pop()
590
+ if parent in seen_parents:
591
+ continue
592
+ seen_parents.add(parent)
593
+ if parent in due_dates and due_dates[name] < due_dates[parent]:
594
+ path_str = " -> ".join(path)
595
+ raise ValueError(
596
+ "Refusing to render watch_graph because node "
597
+ f"'{name}' has due date {due_dates[name].isoformat()},"
598
+ " which is earlier than its ancestor "
599
+ f"'{parent}' due date {due_dates[parent].isoformat()}."
600
+ " Parent chain checked: "
601
+ f"{name} -> {path_str}. "
602
+ "Update the node's due date or adjust the parent "
603
+ "relationship so child due dates are not earlier than "
604
+ "any ancestor."
605
+ )
606
+ if (
607
+ parent in data["nodes"]
608
+ and data["nodes"][parent]["parents"]
609
+ ):
610
+ for grandparent in data["nodes"][parent]["parents"]:
611
+ parents_to_check.append(
612
+ (grandparent, path + [grandparent])
613
+ )
614
+ dot_str = yaml_to_dot(
615
+ data, wrap_width=wrap_width, order_by_date=order_by_date
616
+ )
617
+ Path(dot_path).write_text(dot_str)
618
+ if update_yaml:
619
+ save_graph_yaml(str(yaml_path), data)
620
+ return data