biblealignlib 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/PKG-INFO +6 -2
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/README.md +5 -1
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/mapper.py +2 -2
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/AlignmentGroup.py +8 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/BaseToken.py +1 -1
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/VerseData.py +12 -2
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/alignments.py +17 -3
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/manager.py +4 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/source.py +3 -4
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/target.py +12 -1
- biblealignlib-0.3.2/biblealignlib/util/DiffAlignments.py +168 -0
- biblealignlib-0.3.2/biblealignlib/util/DiffTargets.py +778 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/pyproject.toml +1 -1
- biblealignlib-0.3.1/biblealignlib/util/DiffTargets.py +0 -402
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/LICENSE +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/LICENSE.md +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/Score.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/corpusmapping.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/eflomal.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/reader.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/runeflomal.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/scorer.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/autoalign/writer.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/AlignmentSet.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/AlignmentType.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/BadRecord.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/DiffRecord.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/burrito/util.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/coverage/Coverage.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/coverage/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/coverage/analyzer.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/coverage/exporter.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/coverage/filters.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/interlinear/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/interlinear/reverse.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/interlinear/token.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/strongs.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/Transfer.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/__init__.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/canonsplit.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/merger.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/tokens_to_chars.py +0 -0
- {biblealignlib-0.3.1 → biblealignlib-0.3.2}/biblealignlib/util/vocab.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: biblealignlib
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Code for managing Word-level alignments for Bibles, including both automatic alignments and manually corrected alignments.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -36,9 +36,13 @@ Description-Content-Type: text/markdown
|
|
|
36
36
|
|
|
37
37
|
# biblealignlib
|
|
38
38
|
|
|
39
|
-
Biblica's code for working with Bible alignment data from
|
|
39
|
+
Biblica's Python code for working with Bible alignment data from
|
|
40
40
|
https://github.com/Clear-Bible/Alignments .
|
|
41
41
|
|
|
42
|
+
This code is ©2024-2026 by [Biblica, Inc](http://biblica.com) and is
|
|
43
|
+
licensed under [CC BY SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/).
|
|
44
|
+
|
|
45
|
+
|
|
42
46
|
## Installing extra dependencies
|
|
43
47
|
|
|
44
48
|
### eflomal
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
# biblealignlib
|
|
2
2
|
|
|
3
|
-
Biblica's code for working with Bible alignment data from
|
|
3
|
+
Biblica's Python code for working with Bible alignment data from
|
|
4
4
|
https://github.com/Clear-Bible/Alignments .
|
|
5
5
|
|
|
6
|
+
This code is ©2024-2026 by [Biblica, Inc](http://biblica.com) and is
|
|
7
|
+
licensed under [CC BY SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/).
|
|
8
|
+
|
|
9
|
+
|
|
6
10
|
## Installing extra dependencies
|
|
7
11
|
|
|
8
12
|
### eflomal
|
|
@@ -18,9 +18,9 @@ commonly used by automated alignment algorithms.
|
|
|
18
18
|
>>> pm.bcv["mappings"]["41004003"]
|
|
19
19
|
<CorpusMapping: 41004003>
|
|
20
20
|
>>> pm.bcv["mappings"]["41004003"].source_pairs
|
|
21
|
-
[(<Source: n41004003001
|
|
21
|
+
[(<Source: n41004003001|Ἀκούετε>, 0), (<Source: n41004003002|ἰδοὺ>, 1), (<Source: n41004003003|ἐξῆλθεν>, 2), ...
|
|
22
22
|
>>> pm.bcv["mappings"]["41004003"].target_pairs
|
|
23
|
-
[(<Target: 410040030011>, 0), (<Target: 410040030021>, 1), (<Target: 410040030031>, 2), ...
|
|
23
|
+
[(<Target: 410040030011|Listen>, 0), (<Target: 410040030021|A>, 1), (<Target: 410040030031|sower>, 2), ...
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
"""
|
|
@@ -283,6 +283,14 @@ class AlignmentRecord:
|
|
|
283
283
|
"""True if any selectors in references are incomplete."""
|
|
284
284
|
return any(ref.incomplete for ref in self.references.values())
|
|
285
285
|
|
|
286
|
+
def update_target_selectors(self, selectors: list[str]) -> None:
|
|
287
|
+
"""Replace the target selectors for this record.
|
|
288
|
+
|
|
289
|
+
Selectors are sorted, matching the behaviour of
|
|
290
|
+
AlignmentReference.__post_init__.
|
|
291
|
+
"""
|
|
292
|
+
self.references["target"].selectors = sorted(selectors)
|
|
293
|
+
|
|
286
294
|
def asdict(
|
|
287
295
|
self, positional: bool = False, withmeta: bool = True, withmaculaprefix: bool = False
|
|
288
296
|
) -> dict[str, Any]:
|
|
@@ -156,6 +156,16 @@ class VerseData:
|
|
|
156
156
|
for trg in targets:
|
|
157
157
|
print(f"Target: {trg._display}")
|
|
158
158
|
|
|
159
|
+
def display_record(self, alrec: AlignmentRecord) -> None:
|
|
160
|
+
"""Display an alignment record from this instance."""
|
|
161
|
+
source_tokenstring: str = ", ".join(
|
|
162
|
+
[self.sourceitems[sel].tokenstr for sel in alrec.source_selectors]
|
|
163
|
+
)
|
|
164
|
+
target_tokenstring: str = ", ".join(
|
|
165
|
+
[self.targetitems[sel].tokenstr for sel in alrec.target_selectors]
|
|
166
|
+
)
|
|
167
|
+
print(f"{alrec.meta.id}: {source_tokenstring} --- {target_tokenstring}")
|
|
168
|
+
|
|
159
169
|
def unaligned(self, typeattr: str = "targets", keepexcluded: bool = False) -> None:
|
|
160
170
|
"""Display tokens from typeattr that are _not_ aligned."""
|
|
161
171
|
assert typeattr in self._typeattrs, f"typeattr should be one of {self._typeattrs}"
|
|
@@ -172,13 +182,13 @@ class VerseData:
|
|
|
172
182
|
if aligned:
|
|
173
183
|
for sources, targets in self.alignments:
|
|
174
184
|
print(
|
|
175
|
-
f"{str([src.
|
|
185
|
+
f"{str([src.tokenstr for src in sources]):{srcwidth}}\t\t{[trg.tokenstr for trg in targets]}"
|
|
176
186
|
)
|
|
177
187
|
else:
|
|
178
188
|
# show all sources with their (possibly empty) target alignments
|
|
179
189
|
for source in self.sources:
|
|
180
190
|
print(
|
|
181
|
-
f"{str(source.
|
|
191
|
+
f"{str(source.tokenstr):{srcwidth}}\t\t{[trg.tokenstr for trg in self.get_source_alignments(source)]}"
|
|
182
192
|
)
|
|
183
193
|
|
|
184
194
|
def get_texts(
|
|
@@ -301,6 +301,7 @@ def write_alignment_group(group: AlignmentGroup, f: TextIO, hoist: bool = True)
|
|
|
301
301
|
"""Write JSON data for an arbitrary group in Scripture Burrito format.
|
|
302
302
|
|
|
303
303
|
Writes some of the JSON by hand to get records on the same line.
|
|
304
|
+
Record meta.id values are assigned sequentially per BCV, e.g. "40001001.1".
|
|
304
305
|
"""
|
|
305
306
|
|
|
306
307
|
def _write_documents(out: TextIO, documents: tuple[Document, Document]) -> None:
|
|
@@ -311,19 +312,32 @@ def write_alignment_group(group: AlignmentGroup, f: TextIO, hoist: bool = True)
|
|
|
311
312
|
out.write(" ],\n")
|
|
312
313
|
|
|
313
314
|
def _write_meta(out: TextIO, meta: Metadata) -> None:
|
|
314
|
-
"""Write
|
|
315
|
+
"""Write metadata to out."""
|
|
315
316
|
metarow = '"meta": ' + json.dumps(meta.asdict())
|
|
316
317
|
f.write(f" {metarow},\n")
|
|
317
318
|
|
|
319
|
+
def _record_dict(arec: AlignmentRecord, bcv_counters: dict[str, int]) -> dict[str, Any]:
|
|
320
|
+
"""Return the serialized dict for arec with a sequential BCV-based id.
|
|
321
|
+
|
|
322
|
+
This converts the ClearAligner opaque IDs to something
|
|
323
|
+
meaningful, attempting to make files more diff-able.
|
|
324
|
+
"""
|
|
325
|
+
bcv = arec.source_bcv
|
|
326
|
+
bcv_counters[bcv] = bcv_counters.get(bcv, 0) + 1
|
|
327
|
+
recdict = arec.asdict()
|
|
328
|
+
recdict["meta"]["id"] = f"{bcv}.{bcv_counters[bcv]:02}"
|
|
329
|
+
return recdict
|
|
330
|
+
|
|
318
331
|
f.write("{\n")
|
|
319
332
|
_write_documents(f, group.documents)
|
|
320
333
|
_write_meta(f, group.meta)
|
|
321
334
|
f.write(f' "roles": {json.dumps(group.roles)},\n')
|
|
322
335
|
f.write(f' "type": "{group._type}",\n "records": [\n ')
|
|
323
336
|
# should sort the records: NIV11 doesn't appear to be sorted
|
|
337
|
+
bcv_counters: dict[str, int] = {}
|
|
324
338
|
for arec in group.records[:-1]:
|
|
325
|
-
json.dump(arec
|
|
339
|
+
json.dump(_record_dict(arec, bcv_counters), f)
|
|
326
340
|
f.write(",\n ")
|
|
327
341
|
# now the last one without a comma, because JSON
|
|
328
|
-
json.dump(group.records[-1]
|
|
342
|
+
json.dump(_record_dict(group.records[-1], bcv_counters), f)
|
|
329
343
|
f.write("\n ]}")
|
|
@@ -114,6 +114,10 @@ class Manager(UserDict):
|
|
|
114
114
|
keepbadrecords=self.keepbadrecords,
|
|
115
115
|
)
|
|
116
116
|
self.alignmentsreader.clean_alignments(self.sourceitems, self.targetitems)
|
|
117
|
+
# TODO: upgrade the selectors to use tokenstr. This requires
|
|
118
|
+
# knowing the source and targetitems, but alignmentsreader
|
|
119
|
+
# doesn't have that data
|
|
120
|
+
# self.add_tokenstr_to_records(self)
|
|
117
121
|
# group records by BCV
|
|
118
122
|
self.bcv["records"] = groupby_bcv(
|
|
119
123
|
list(self.alignmentsreader.alignmentgroup.records), lambda r: r.source_bcv
|
|
@@ -17,12 +17,11 @@ called from burrito.manager.Manager().
|
|
|
17
17
|
5468
|
|
18
18
|
# dict: token ID -> Source() instance
|
|
19
19
|
>>> src["n41004003001"]
|
|
20
|
-
|
|
21
|
-
<Source: n41004003001>
|
|
20
|
+
<Source: n41004003001|Ἀκούετε>
|
|
22
21
|
>>> src["n41004003001"].display()
|
|
23
22
|
n41004003001: Ἀκούετε (Listen, ἀκούω, verb)
|
|
24
|
-
>>> src["n41004003001"].
|
|
25
|
-
|
|
23
|
+
>>> src["n41004003001"].tokenstr
|
|
24
|
+
'n41004003001|Ἀκούετε'
|
|
26
25
|
>>> src["n41004003001"].asdict()
|
|
27
26
|
{'identifier': 'n41004003001',
|
|
28
27
|
'altId': 'Ἀκούετε-1',
|
|
@@ -1,4 +1,14 @@
|
|
|
1
|
-
"""Manage the target/translation data for
|
|
1
|
+
"""Manage the target/translation data for alignment data.
|
|
2
|
+
|
|
3
|
+
This typically reads the output of kathairo.
|
|
4
|
+
|
|
5
|
+
Limitations:
|
|
6
|
+
|
|
7
|
+
- Each token is assigned to the relevant source verse, which may be
|
|
8
|
+
different than the verse assignments in the target text. This is
|
|
9
|
+
version-specific, not necessarily a versification issue. So verse
|
|
10
|
+
identifiers may need mapping.
|
|
11
|
+
- Example: SBLGNT for 3JN has v. 15, but all these tokens are in v. 14 in the NIV11.
|
|
2
12
|
|
|
3
13
|
>>> from biblealignlib.burrito import target
|
|
4
14
|
# Reading is normally done by Manager
|
|
@@ -15,6 +25,7 @@
|
|
|
15
25
|
# write the tokens out
|
|
16
26
|
>>> LANGDATAPATH = CLEARROOT / "alignments-eng/data"
|
|
17
27
|
>>> tr.write_tsv(tokenlist=tr.data.values(), outpath=(LANGDATAPATH / "targets/BSB/new-nt_BSB.tsv"))
|
|
28
|
+
|
|
18
29
|
"""
|
|
19
30
|
|
|
20
31
|
from collections import UserDict, defaultdict
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Compare two alignment groups record by record.
|
|
2
|
+
|
|
3
|
+
Both groups must share the same sourceid, targetid, and
|
|
4
|
+
targetlanguage. This is most useful for checking minor changes to
|
|
5
|
+
ensure you haven't introduced errors.
|
|
6
|
+
|
|
7
|
+
Comparison ignores meta.id (which is assigned on write) but reports
|
|
8
|
+
differences in targets and all other meta fields (status, origin, creator, note).
|
|
9
|
+
|
|
10
|
+
>>> from biblealignlib.burrito import CLEARROOT, AlignmentSet
|
|
11
|
+
>>> from biblealignlib.util.DiffAlignments import DiffAlignments
|
|
12
|
+
>>> LANGDATAPATH = CLEARROOT / "alignments-eng/data"
|
|
13
|
+
>>> alset1 = AlignmentSet(sourceid="SBLGNT", targetid="BSB",
|
|
14
|
+
... targetlanguage="eng", langdatapath=LANGDATAPATH,
|
|
15
|
+
... alternateid="manual")
|
|
16
|
+
>>> alset2 = AlignmentSet(sourceid="SBLGNT", targetid="BSB",
|
|
17
|
+
... targetlanguage="eng", langdatapath=LANGDATAPATH,
|
|
18
|
+
... alternateid="updated")
|
|
19
|
+
>>> da = DiffAlignments(alset1, alset2)
|
|
20
|
+
>>> da.show()
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
|
|
26
|
+
from ..burrito.AlignmentGroup import AlignmentGroup, AlignmentRecord
|
|
27
|
+
from ..burrito.AlignmentSet import AlignmentSet
|
|
28
|
+
from ..burrito.alignments import AlignmentsReader
|
|
29
|
+
|
|
30
|
+
# Meta fields compared between records (id is intentionally excluded)
|
|
31
|
+
_COMPARED_META_FIELDS = ("creator", "note", "origin", "status")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _record_key(rec: AlignmentRecord) -> tuple[str, ...]:
|
|
35
|
+
"""Return a stable key for matching records across groups.
|
|
36
|
+
|
|
37
|
+
Keyed by sorted source selectors, since records are matched on the
|
|
38
|
+
source side and targets may differ.
|
|
39
|
+
"""
|
|
40
|
+
return tuple(sorted(rec.source_selectors))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _meta_diffs(rec1: AlignmentRecord, rec2: AlignmentRecord) -> dict[str, tuple[str, str]]:
|
|
44
|
+
"""Return a dict of differing meta fields (excluding id).
|
|
45
|
+
|
|
46
|
+
Keys are field names; values are (val_in_rec1, val_in_rec2).
|
|
47
|
+
"""
|
|
48
|
+
diffs: dict[str, tuple[str, str]] = {}
|
|
49
|
+
for field_name in _COMPARED_META_FIELDS:
|
|
50
|
+
v1 = getattr(rec1.meta, field_name, "")
|
|
51
|
+
v2 = getattr(rec2.meta, field_name, "")
|
|
52
|
+
if v1 != v2:
|
|
53
|
+
diffs[field_name] = (str(v1), str(v2))
|
|
54
|
+
return diffs
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class RecordDiff:
|
|
59
|
+
"""Captures differences between two matched alignment records."""
|
|
60
|
+
|
|
61
|
+
source_selectors: tuple[str, ...]
|
|
62
|
+
# non-empty when targets differ
|
|
63
|
+
targets1: list[str] = field(default_factory=list)
|
|
64
|
+
targets2: list[str] = field(default_factory=list)
|
|
65
|
+
# non-empty when meta fields (excluding id) differ
|
|
66
|
+
meta_diffs: dict[str, tuple[str, str]] = field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def targets_differ(self) -> bool:
|
|
70
|
+
"""True if target selectors differ between the two records."""
|
|
71
|
+
return self.targets1 != self.targets2
|
|
72
|
+
|
|
73
|
+
def __repr__(self) -> str:
|
|
74
|
+
src = ", ".join(self.source_selectors)
|
|
75
|
+
parts = [f"<RecordDiff src=[{src}]"]
|
|
76
|
+
if self.targets_differ:
|
|
77
|
+
parts.append(f" targets: {self.targets1} -> {self.targets2}")
|
|
78
|
+
for fname, (v1, v2) in self.meta_diffs.items():
|
|
79
|
+
parts.append(f" {fname}: {v1!r} -> {v2!r}")
|
|
80
|
+
parts.append(">")
|
|
81
|
+
return "".join(parts)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DiffAlignments:
|
|
85
|
+
"""Compare two alignment groups from the same source/target pair.
|
|
86
|
+
|
|
87
|
+
Records are matched by their source selectors. Differences in
|
|
88
|
+
target selectors and metadata (excluding id) are reported.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, alset1: AlignmentSet, alset2: AlignmentSet) -> None:
|
|
92
|
+
"""Initialize and compute differences."""
|
|
93
|
+
for attr in ("sourceid", "targetid", "targetlanguage"):
|
|
94
|
+
v1 = getattr(alset1, attr)
|
|
95
|
+
v2 = getattr(alset2, attr)
|
|
96
|
+
if v1 != v2:
|
|
97
|
+
raise ValueError(f"AlignmentSets differ on {attr!r}: {v1!r} vs {v2!r}")
|
|
98
|
+
self.alset1 = alset1
|
|
99
|
+
self.alset2 = alset2
|
|
100
|
+
self.group1: AlignmentGroup = AlignmentsReader(alset1).alignmentgroup
|
|
101
|
+
self.group2: AlignmentGroup = AlignmentsReader(alset2).alignmentgroup
|
|
102
|
+
|
|
103
|
+
# index each group's records by source-selector key
|
|
104
|
+
self._recs1: dict[tuple[str, ...], AlignmentRecord] = {
|
|
105
|
+
_record_key(r): r for r in self.group1.records
|
|
106
|
+
}
|
|
107
|
+
self._recs2: dict[tuple[str, ...], AlignmentRecord] = {
|
|
108
|
+
_record_key(r): r for r in self.group2.records
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
keys1 = set(self._recs1)
|
|
112
|
+
keys2 = set(self._recs2)
|
|
113
|
+
|
|
114
|
+
# records present only in one group
|
|
115
|
+
self.only_in_1: list[AlignmentRecord] = [self._recs1[k] for k in sorted(keys1 - keys2)]
|
|
116
|
+
self.only_in_2: list[AlignmentRecord] = [self._recs2[k] for k in sorted(keys2 - keys1)]
|
|
117
|
+
|
|
118
|
+
# records present in both; compare targets and meta
|
|
119
|
+
self.record_diffs: list[RecordDiff] = []
|
|
120
|
+
for key in sorted(keys1 & keys2):
|
|
121
|
+
r1, r2 = self._recs1[key], self._recs2[key]
|
|
122
|
+
t1, t2 = sorted(r1.target_selectors), sorted(r2.target_selectors)
|
|
123
|
+
mdiffs = _meta_diffs(r1, r2)
|
|
124
|
+
if t1 != t2 or mdiffs:
|
|
125
|
+
self.record_diffs.append(
|
|
126
|
+
RecordDiff(source_selectors=key, targets1=t1, targets2=t2, meta_diffs=mdiffs)
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def has_diffs(self) -> bool:
|
|
131
|
+
"""True if any differences were found."""
|
|
132
|
+
return bool(self.only_in_1 or self.only_in_2 or self.record_diffs)
|
|
133
|
+
|
|
134
|
+
def show(self) -> None:
|
|
135
|
+
"""Print a human-readable summary of all differences."""
|
|
136
|
+
label1 = self.alset1.identifier
|
|
137
|
+
label2 = self.alset2.identifier
|
|
138
|
+
print(f"Comparing {label1!r} vs {label2!r}")
|
|
139
|
+
print(
|
|
140
|
+
f" {len(self.group1.records)} records in {label1}, "
|
|
141
|
+
f"{len(self.group2.records)} records in {label2}"
|
|
142
|
+
)
|
|
143
|
+
if not self.has_diffs:
|
|
144
|
+
print(" No differences found.")
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
if self.only_in_1:
|
|
148
|
+
print(f"\n Records only in {label1} ({len(self.only_in_1)}):")
|
|
149
|
+
for rec in self.only_in_1:
|
|
150
|
+
src = ", ".join(rec.source_selectors)
|
|
151
|
+
print(f" - src=[{src}] tgt={rec.target_selectors}")
|
|
152
|
+
|
|
153
|
+
if self.only_in_2:
|
|
154
|
+
print(f"\n Records only in {label2} ({len(self.only_in_2)}):")
|
|
155
|
+
for rec in self.only_in_2:
|
|
156
|
+
src = ", ".join(rec.source_selectors)
|
|
157
|
+
print(f" + src=[{src}] tgt={rec.target_selectors}")
|
|
158
|
+
|
|
159
|
+
if self.record_diffs:
|
|
160
|
+
print(f"\n Records with differences ({len(self.record_diffs)}):")
|
|
161
|
+
for diff in self.record_diffs:
|
|
162
|
+
src = ", ".join(diff.source_selectors)
|
|
163
|
+
print(f" src=[{src}]")
|
|
164
|
+
if diff.targets_differ:
|
|
165
|
+
print(f" targets: {diff.targets1}")
|
|
166
|
+
print(f" -> {diff.targets2}")
|
|
167
|
+
for fname, (v1, v2) in diff.meta_diffs.items():
|
|
168
|
+
print(f" {fname}: {v1!r} -> {v2!r}")
|