@usejunior/docx-core 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +86 -28
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/atomizer.d.ts +218 -0
  5. package/dist/atomizer.d.ts.map +1 -0
  6. package/dist/atomizer.js +856 -0
  7. package/dist/atomizer.js.map +1 -0
  8. package/dist/baselines/atomizer/atomLcs.d.ts +96 -0
  9. package/dist/baselines/atomizer/atomLcs.d.ts.map +1 -0
  10. package/dist/baselines/atomizer/atomLcs.js +347 -0
  11. package/dist/baselines/atomizer/atomLcs.js.map +1 -0
  12. package/dist/baselines/atomizer/debug.d.ts +41 -0
  13. package/dist/baselines/atomizer/debug.d.ts.map +1 -0
  14. package/dist/baselines/atomizer/debug.js +85 -0
  15. package/dist/baselines/atomizer/debug.js.map +1 -0
  16. package/dist/baselines/atomizer/documentReconstructor.d.ts +64 -0
  17. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -0
  18. package/dist/baselines/atomizer/documentReconstructor.js +939 -0
  19. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -0
  20. package/dist/baselines/atomizer/hierarchicalLcs.d.ts +111 -0
  21. package/dist/baselines/atomizer/hierarchicalLcs.d.ts.map +1 -0
  22. package/dist/baselines/atomizer/hierarchicalLcs.js +469 -0
  23. package/dist/baselines/atomizer/hierarchicalLcs.js.map +1 -0
  24. package/dist/baselines/atomizer/inPlaceModifier.d.ts +183 -0
  25. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -0
  26. package/dist/baselines/atomizer/inPlaceModifier.js +1600 -0
  27. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -0
  28. package/dist/baselines/atomizer/numberingIntegration.d.ts +59 -0
  29. package/dist/baselines/atomizer/numberingIntegration.d.ts.map +1 -0
  30. package/dist/baselines/atomizer/numberingIntegration.js +209 -0
  31. package/dist/baselines/atomizer/numberingIntegration.js.map +1 -0
  32. package/dist/baselines/atomizer/pipeline.d.ts +65 -0
  33. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -0
  34. package/dist/baselines/atomizer/pipeline.js +510 -0
  35. package/dist/baselines/atomizer/pipeline.js.map +1 -0
  36. package/dist/baselines/atomizer/premergeRuns.d.ts +26 -0
  37. package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -0
  38. package/dist/baselines/atomizer/premergeRuns.js +150 -0
  39. package/dist/baselines/atomizer/premergeRuns.js.map +1 -0
  40. package/dist/baselines/atomizer/trackChangesAcceptor.d.ts +63 -0
  41. package/dist/baselines/atomizer/trackChangesAcceptor.d.ts.map +1 -0
  42. package/dist/baselines/atomizer/trackChangesAcceptor.js +254 -0
  43. package/dist/baselines/atomizer/trackChangesAcceptor.js.map +1 -0
  44. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts +64 -0
  45. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -0
  46. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +586 -0
  47. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -0
  48. package/dist/baselines/atomizer/xmlToWmlElement.d.ts +65 -0
  49. package/dist/baselines/atomizer/xmlToWmlElement.d.ts.map +1 -0
  50. package/dist/baselines/atomizer/xmlToWmlElement.js +95 -0
  51. package/dist/baselines/atomizer/xmlToWmlElement.js.map +1 -0
  52. package/dist/baselines/diffmatch/documentBuilder.d.ts +44 -0
  53. package/dist/baselines/diffmatch/documentBuilder.d.ts.map +1 -0
  54. package/dist/baselines/diffmatch/documentBuilder.js +227 -0
  55. package/dist/baselines/diffmatch/documentBuilder.js.map +1 -0
  56. package/dist/baselines/diffmatch/paragraphAlignment.d.ts +75 -0
  57. package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +1 -0
  58. package/dist/baselines/diffmatch/paragraphAlignment.js +206 -0
  59. package/dist/baselines/diffmatch/paragraphAlignment.js.map +1 -0
  60. package/dist/baselines/diffmatch/pipeline.d.ts +33 -0
  61. package/dist/baselines/diffmatch/pipeline.d.ts.map +1 -0
  62. package/dist/baselines/diffmatch/pipeline.js +84 -0
  63. package/dist/baselines/diffmatch/pipeline.js.map +1 -0
  64. package/dist/baselines/diffmatch/runDiff.d.ts +53 -0
  65. package/dist/baselines/diffmatch/runDiff.d.ts.map +1 -0
  66. package/dist/baselines/diffmatch/runDiff.js +253 -0
  67. package/dist/baselines/diffmatch/runDiff.js.map +1 -0
  68. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +64 -0
  69. package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +1 -0
  70. package/dist/baselines/diffmatch/trackChangesRenderer.js +178 -0
  71. package/dist/baselines/diffmatch/trackChangesRenderer.js.map +1 -0
  72. package/dist/baselines/diffmatch/xmlParser.d.ts +45 -0
  73. package/dist/baselines/diffmatch/xmlParser.d.ts.map +1 -0
  74. package/dist/baselines/diffmatch/xmlParser.js +344 -0
  75. package/dist/baselines/diffmatch/xmlParser.js.map +1 -0
  76. package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts +51 -0
  77. package/dist/baselines/wmlcomparer/DocxodusWasm.d.ts.map +1 -0
  78. package/dist/baselines/wmlcomparer/DocxodusWasm.js +83 -0
  79. package/dist/baselines/wmlcomparer/DocxodusWasm.js.map +1 -0
  80. package/dist/baselines/wmlcomparer/DotnetCli.d.ts +40 -0
  81. package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -0
  82. package/dist/baselines/wmlcomparer/DotnetCli.js +135 -0
  83. package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -0
  84. package/dist/benchmark/metrics.d.ts +72 -0
  85. package/dist/benchmark/metrics.d.ts.map +1 -0
  86. package/dist/benchmark/metrics.js +45 -0
  87. package/dist/benchmark/metrics.js.map +1 -0
  88. package/dist/benchmark/reporter.d.ts +23 -0
  89. package/dist/benchmark/reporter.d.ts.map +1 -0
  90. package/dist/benchmark/reporter.js +147 -0
  91. package/dist/benchmark/reporter.js.map +1 -0
  92. package/dist/benchmark/runner.d.ts +30 -0
  93. package/dist/benchmark/runner.d.ts.map +1 -0
  94. package/dist/benchmark/runner.js +233 -0
  95. package/dist/benchmark/runner.js.map +1 -0
  96. package/dist/cli/compare-two.d.ts +28 -0
  97. package/dist/cli/compare-two.d.ts.map +1 -0
  98. package/dist/cli/compare-two.js +110 -0
  99. package/dist/cli/compare-two.js.map +1 -0
  100. package/dist/cli/index.d.ts +3 -0
  101. package/dist/cli/index.d.ts.map +1 -0
  102. package/dist/cli/index.js +21 -0
  103. package/dist/cli/index.js.map +1 -0
  104. package/dist/core-types.d.ts +296 -0
  105. package/dist/core-types.d.ts.map +1 -0
  106. package/dist/core-types.js +122 -0
  107. package/dist/core-types.js.map +1 -0
  108. package/dist/footnotes.d.ts +144 -0
  109. package/dist/footnotes.d.ts.map +1 -0
  110. package/dist/footnotes.js +291 -0
  111. package/dist/footnotes.js.map +1 -0
  112. package/dist/format-detection.d.ts +120 -0
  113. package/dist/format-detection.d.ts.map +1 -0
  114. package/dist/format-detection.js +338 -0
  115. package/dist/format-detection.js.map +1 -0
  116. package/dist/index.d.ts +177 -0
  117. package/dist/index.d.ts.map +1 -0
  118. package/dist/index.js +55 -0
  119. package/dist/index.js.map +1 -0
  120. package/dist/integration/output-artifacts.d.ts +6 -0
  121. package/dist/integration/output-artifacts.d.ts.map +1 -0
  122. package/dist/integration/output-artifacts.js +30 -0
  123. package/dist/integration/output-artifacts.js.map +1 -0
  124. package/dist/move-detection.d.ts +211 -0
  125. package/dist/move-detection.d.ts.map +1 -0
  126. package/dist/move-detection.js +391 -0
  127. package/dist/move-detection.js.map +1 -0
  128. package/dist/numbering.d.ts +136 -0
  129. package/dist/numbering.d.ts.map +1 -0
  130. package/dist/numbering.js +446 -0
  131. package/dist/numbering.js.map +1 -0
  132. package/dist/primitives/accept_changes.d.ts +30 -0
  133. package/dist/primitives/accept_changes.d.ts.map +1 -0
  134. package/dist/primitives/accept_changes.js +241 -0
  135. package/dist/primitives/accept_changes.js.map +1 -0
  136. package/dist/primitives/bookmarks.d.ts +12 -0
  137. package/dist/primitives/bookmarks.d.ts.map +1 -0
  138. package/dist/primitives/bookmarks.js +248 -0
  139. package/dist/primitives/bookmarks.js.map +1 -0
  140. package/dist/primitives/comments.d.ts +88 -0
  141. package/dist/primitives/comments.d.ts.map +1 -0
  142. package/dist/primitives/comments.js +703 -0
  143. package/dist/primitives/comments.js.map +1 -0
  144. package/dist/primitives/document.d.ts +168 -0
  145. package/dist/primitives/document.d.ts.map +1 -0
  146. package/dist/primitives/document.js +532 -0
  147. package/dist/primitives/document.js.map +1 -0
  148. package/dist/primitives/document_view.d.ts +93 -0
  149. package/dist/primitives/document_view.d.ts.map +1 -0
  150. package/dist/primitives/document_view.js +722 -0
  151. package/dist/primitives/document_view.js.map +1 -0
  152. package/dist/primitives/dom-helpers.d.ts +94 -0
  153. package/dist/primitives/dom-helpers.d.ts.map +1 -0
  154. package/dist/primitives/dom-helpers.js +219 -0
  155. package/dist/primitives/dom-helpers.js.map +1 -0
  156. package/dist/primitives/errors.d.ts +7 -0
  157. package/dist/primitives/errors.d.ts.map +1 -0
  158. package/dist/primitives/errors.js +10 -0
  159. package/dist/primitives/errors.js.map +1 -0
  160. package/dist/primitives/extract_revisions.d.ts +50 -0
  161. package/dist/primitives/extract_revisions.d.ts.map +1 -0
  162. package/dist/primitives/extract_revisions.js +340 -0
  163. package/dist/primitives/extract_revisions.js.map +1 -0
  164. package/dist/primitives/footnotes.d.ts +37 -0
  165. package/dist/primitives/footnotes.d.ts.map +1 -0
  166. package/dist/primitives/footnotes.js +552 -0
  167. package/dist/primitives/footnotes.js.map +1 -0
  168. package/dist/primitives/formatting_tags.d.ts +30 -0
  169. package/dist/primitives/formatting_tags.d.ts.map +1 -0
  170. package/dist/primitives/formatting_tags.js +217 -0
  171. package/dist/primitives/formatting_tags.js.map +1 -0
  172. package/dist/primitives/index.d.ts +26 -0
  173. package/dist/primitives/index.d.ts.map +1 -0
  174. package/dist/primitives/index.js +26 -0
  175. package/dist/primitives/index.js.map +1 -0
  176. package/dist/primitives/layout.d.ts +53 -0
  177. package/dist/primitives/layout.d.ts.map +1 -0
  178. package/dist/primitives/layout.js +178 -0
  179. package/dist/primitives/layout.js.map +1 -0
  180. package/dist/primitives/list_labels.d.ts +19 -0
  181. package/dist/primitives/list_labels.d.ts.map +1 -0
  182. package/dist/primitives/list_labels.js +57 -0
  183. package/dist/primitives/list_labels.js.map +1 -0
  184. package/dist/primitives/matching.d.ts +17 -0
  185. package/dist/primitives/matching.d.ts.map +1 -0
  186. package/dist/primitives/matching.js +144 -0
  187. package/dist/primitives/matching.js.map +1 -0
  188. package/dist/primitives/merge_runs.d.ts +23 -0
  189. package/dist/primitives/merge_runs.d.ts.map +1 -0
  190. package/dist/primitives/merge_runs.js +195 -0
  191. package/dist/primitives/merge_runs.js.map +1 -0
  192. package/dist/primitives/namespaces.d.ts +90 -0
  193. package/dist/primitives/namespaces.d.ts.map +1 -0
  194. package/dist/primitives/namespaces.js +107 -0
  195. package/dist/primitives/namespaces.js.map +1 -0
  196. package/dist/primitives/numbering.d.ts +27 -0
  197. package/dist/primitives/numbering.d.ts.map +1 -0
  198. package/dist/primitives/numbering.js +182 -0
  199. package/dist/primitives/numbering.js.map +1 -0
  200. package/dist/primitives/prevent_double_elevation.d.ts +18 -0
  201. package/dist/primitives/prevent_double_elevation.d.ts.map +1 -0
  202. package/dist/primitives/prevent_double_elevation.js +190 -0
  203. package/dist/primitives/prevent_double_elevation.js.map +1 -0
  204. package/dist/primitives/reject_changes.d.ts +27 -0
  205. package/dist/primitives/reject_changes.d.ts.map +1 -0
  206. package/dist/primitives/reject_changes.js +371 -0
  207. package/dist/primitives/reject_changes.js.map +1 -0
  208. package/dist/primitives/relationships.d.ts +7 -0
  209. package/dist/primitives/relationships.d.ts.map +1 -0
  210. package/dist/primitives/relationships.js +24 -0
  211. package/dist/primitives/relationships.js.map +1 -0
  212. package/dist/primitives/semantic_tags.d.ts +32 -0
  213. package/dist/primitives/semantic_tags.d.ts.map +1 -0
  214. package/dist/primitives/semantic_tags.js +139 -0
  215. package/dist/primitives/semantic_tags.js.map +1 -0
  216. package/dist/primitives/simplify_redlines.d.ts +19 -0
  217. package/dist/primitives/simplify_redlines.d.ts.map +1 -0
  218. package/dist/primitives/simplify_redlines.js +94 -0
  219. package/dist/primitives/simplify_redlines.js.map +1 -0
  220. package/dist/primitives/styles.d.ts +36 -0
  221. package/dist/primitives/styles.d.ts.map +1 -0
  222. package/dist/primitives/styles.js +190 -0
  223. package/dist/primitives/styles.js.map +1 -0
  224. package/dist/primitives/text.d.ts +27 -0
  225. package/dist/primitives/text.d.ts.map +1 -0
  226. package/dist/primitives/text.js +416 -0
  227. package/dist/primitives/text.js.map +1 -0
  228. package/dist/primitives/validate_document.d.ts +24 -0
  229. package/dist/primitives/validate_document.d.ts.map +1 -0
  230. package/dist/primitives/validate_document.js +147 -0
  231. package/dist/primitives/validate_document.js.map +1 -0
  232. package/dist/primitives/xml.d.ts +5 -0
  233. package/dist/primitives/xml.d.ts.map +1 -0
  234. package/dist/primitives/xml.js +19 -0
  235. package/dist/primitives/xml.js.map +1 -0
  236. package/dist/primitives/zip.d.ts +25 -0
  237. package/dist/primitives/zip.d.ts.map +1 -0
  238. package/dist/primitives/zip.js +78 -0
  239. package/dist/primitives/zip.js.map +1 -0
  240. package/dist/shared/docx/DocxArchive.d.ts +94 -0
  241. package/dist/shared/docx/DocxArchive.d.ts.map +1 -0
  242. package/dist/shared/docx/DocxArchive.js +169 -0
  243. package/dist/shared/docx/DocxArchive.js.map +1 -0
  244. package/dist/shared/ooxml/namespaces.d.ts +149 -0
  245. package/dist/shared/ooxml/namespaces.d.ts.map +1 -0
  246. package/dist/shared/ooxml/namespaces.js +224 -0
  247. package/dist/shared/ooxml/namespaces.js.map +1 -0
  248. package/dist/shared/ooxml/types.d.ts +136 -0
  249. package/dist/shared/ooxml/types.d.ts.map +1 -0
  250. package/dist/shared/ooxml/types.js +7 -0
  251. package/dist/shared/ooxml/types.js.map +1 -0
  252. package/package.json +63 -6
@@ -0,0 +1 @@
1
+ {"version":3,"file":"xmlToWmlElement.d.ts","sourceRoot":"","sources":["../../../src/baselines/atomizer/xmlToWmlElement.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGrD;AAED;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,SAAS,CAG3D;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,OAAO,GAAG,OAAO,GAAG,SAAS,CAI/D;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,OAAO,EAAE,MAAM,GACd,OAAO,GAAG,SAAS,CAIrB;AAED;;;;;;GAMG;AACH,wBAAgB,eAAe,CAC7B,IAAI,EAAE,OAAO,EACb,OAAO,EAAE,MAAM,GACd,OAAO,EAAE,CAOX;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,OAAO,GAAG,QAAQ,GAAG,MAAM,CAElE;AAED;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAEtD;AAED;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,OAAO,EACd,OAAO,CAAC,EAAE,OAAO,GAChB,IAAI,CAEN"}
@@ -0,0 +1,95 @@
1
+ /**
2
+ * XML Parsing and Serialization
3
+ *
4
+ * Parses document.xml into a DOM tree using @xmldom/xmldom.
5
+ * Replaces the former fast-xml-parser + WmlElement POJO approach.
6
+ */
7
+ import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
8
+ /**
9
+ * Parse document.xml string into a DOM Element tree.
10
+ *
11
+ * @param xml - The raw document.xml content
12
+ * @returns Root element (the Document's documentElement)
13
+ */
14
+ export function parseDocumentXml(xml) {
15
+ const doc = new DOMParser().parseFromString(xml, 'application/xml');
16
+ return doc.documentElement;
17
+ }
18
+ /**
19
+ * Find the w:body element in the document tree.
20
+ *
21
+ * @param root - The document root element
22
+ * @returns The w:body element, or undefined if not found
23
+ */
24
+ export function findBody(root) {
25
+ const bodies = root.getElementsByTagName('w:body');
26
+ return bodies.length > 0 ? bodies[0] : undefined;
27
+ }
28
+ /**
29
+ * Find the w:document element in the document tree.
30
+ *
31
+ * @param root - The document root element
32
+ * @returns The w:document element, or undefined if not found
33
+ */
34
+ export function findDocument(root) {
35
+ if (root.tagName === 'w:document')
36
+ return root;
37
+ const docs = root.getElementsByTagName('w:document');
38
+ return docs.length > 0 ? docs[0] : undefined;
39
+ }
40
+ /**
41
+ * Find an element by tag name in the tree.
42
+ *
43
+ * @param node - The node to search from
44
+ * @param tagName - The tag name to find
45
+ * @returns The found element, or undefined
46
+ */
47
+ export function findElement(node, tagName) {
48
+ if (node.tagName === tagName)
49
+ return node;
50
+ const results = node.getElementsByTagName(tagName);
51
+ return results.length > 0 ? results[0] : undefined;
52
+ }
53
+ /**
54
+ * Find all elements with a specific tag name.
55
+ *
56
+ * @param node - The node to search from
57
+ * @param tagName - The tag name to find
58
+ * @returns Array of matching elements
59
+ */
60
+ export function findAllElements(node, tagName) {
61
+ const nodeList = node.getElementsByTagName(tagName);
62
+ const result = [];
63
+ for (let i = 0; i < nodeList.length; i++) {
64
+ result.push(nodeList[i]);
65
+ }
66
+ return result;
67
+ }
68
+ /**
69
+ * Serialize a DOM Element back to XML string.
70
+ *
71
+ * @param element - The element to serialize
72
+ * @returns XML string
73
+ */
74
+ export function serializeToXml(element) {
75
+ return new XMLSerializer().serializeToString(element);
76
+ }
77
+ /**
78
+ * Clone a DOM Element tree (deep copy).
79
+ *
80
+ * @param element - The element to clone
81
+ * @returns A deep copy of the element
82
+ */
83
+ export function cloneElement(element) {
84
+ return element.cloneNode(true);
85
+ }
86
+ /**
87
+ * Backfill parent references — NO-OP for DOM Elements.
88
+ *
89
+ * DOM Elements have native parentNode/parentElement. This function exists
90
+ * only to ease migration; callers should remove it over time.
91
+ */
92
+ export function backfillParentReferences(_node, _parent) {
93
+ // No-op: DOM Elements have native parentNode
94
+ }
95
+ //# sourceMappingURL=xmlToWmlElement.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"xmlToWmlElement.js","sourceRoot":"","sources":["../../../src/baselines/atomizer/xmlToWmlElement.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAW;IAC1C,MAAM,GAAG,GAAG,IAAI,SAAS,EAAE,CAAC,eAAe,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;IACpE,OAAO,GAAG,CAAC,eAAe,CAAC;AAC7B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAa;IACpC,MAAM,MAAM,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,MAAM,CAAC,CAAC,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;AAChE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAa;IACxC,IAAI,IAAI,CAAC,OAAO,KAAK,YAAY;QAAE,OAAO,IAAI,CAAC;IAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,oBAAoB,CAAC,YAAY,CAAC,CAAC;IACrD,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,CAAC,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;AAC5D,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,WAAW,CACzB,IAAa,EACb,OAAe;IAEf,IAAI,IAAI,CAAC,OAAO,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;IACnD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,OAAO,CAAC,CAAC,CAAa,CAAC,CAAC,CAAC,SAAS,CAAC;AAClE,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAC7B,IAAa,EACb,OAAe;IAEf,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC;IACpD,MAAM,MAAM,GAAc,EAAE,CAAC;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAY,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,OAA2B;IACxD,OAAO,IAAI,aAAa,EAAE,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;AACxD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,OAAgB;IAC3C,OAAO,OAAO,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;AAC5C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,wBAAwB,CACtC,KAAc,EACd,OAAiB;IAEjB,6CAA6C;AAC/C,CAAC"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Document builder for reconstructing DOCX with track changes.
3
+ *
4
+ * Takes alignment results and produces a new document.xml with
5
+ * insertions and deletions marked using OOXML track changes.
6
+ */
7
+ import type { AlignmentResult } from '../../shared/ooxml/types.js';
8
+ import { type ExtendedParagraphInfo } from './xmlParser.js';
9
+ /**
10
+ * Build options for document generation.
11
+ */
12
+ export interface BuildOptions {
13
+ author: string;
14
+ date: Date;
15
+ }
16
+ /**
17
+ * Represents a paragraph operation in the output document.
18
+ */
19
+ export interface ParagraphOperation {
20
+ /** Type of operation */
21
+ type: 'unchanged' | 'modified' | 'deleted' | 'inserted';
22
+ /** Generated XML for this paragraph */
23
+ xml: string;
24
+ /** Index in original document (for ordering) */
25
+ originalIndex?: number;
26
+ /** Index in revised document (for ordering) */
27
+ revisedIndex?: number;
28
+ }
29
+ /**
30
+ * Generate paragraph operations from alignment result.
31
+ *
32
+ * This converts the alignment result into a list of XML paragraph operations
33
+ * that can be used to build the output document.
34
+ */
35
+ export declare function generateParagraphOperations(alignment: AlignmentResult, originalParagraphs: ExtendedParagraphInfo[], revisedParagraphs: ExtendedParagraphInfo[], options: BuildOptions): ParagraphOperation[];
36
+ /**
37
+ * Build a new document.xml with track changes applied.
38
+ *
39
+ * @param originalXml - The original document.xml content
40
+ * @param operations - Ordered list of paragraph operations
41
+ * @returns The new document.xml content
42
+ */
43
+ export declare function buildDocumentWithTrackChanges(originalXml: string, operations: ParagraphOperation[]): string;
44
+ //# sourceMappingURL=documentBuilder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"documentBuilder.d.ts","sourceRoot":"","sources":["../../../src/baselines/diffmatch/documentBuilder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAW,MAAM,6BAA6B,CAAC;AAQ5E,OAAO,EAAiB,KAAK,qBAAqB,EAAE,MAAM,gBAAgB,CAAC;AAE3E;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,IAAI,CAAC;CACZ;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wBAAwB;IACxB,IAAI,EAAE,WAAW,GAAG,UAAU,GAAG,SAAS,GAAG,UAAU,CAAC;IACxD,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;IACZ,gDAAgD;IAChD,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,+CAA+C;IAC/C,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;;;;GAKG;AACH,wBAAgB,2BAA2B,CACzC,SAAS,EAAE,eAAe,EAC1B,kBAAkB,EAAE,qBAAqB,EAAE,EAC3C,iBAAiB,EAAE,qBAAqB,EAAE,EAC1C,OAAO,EAAE,YAAY,GACpB,kBAAkB,EAAE,CAkItB;AAED;;;;;;GAMG;AACH,wBAAgB,6BAA6B,CAC3C,WAAW,EAAE,MAAM,EACnB,UAAU,EAAE,kBAAkB,EAAE,GAC/B,MAAM,CAkBR"}
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Document builder for reconstructing DOCX with track changes.
3
+ *
4
+ * Takes alignment results and produces a new document.xml with
5
+ * insertions and deletions marked using OOXML track changes.
6
+ */
7
+ import { diffRuns } from './runDiff.js';
8
+ import { renderTrackChanges, generateDeletedParagraph, generateInsertedParagraph, wrapInParagraph, } from './trackChangesRenderer.js';
9
+ import { extractSectPr } from './xmlParser.js';
10
+ /**
11
+ * Generate paragraph operations from alignment result.
12
+ *
13
+ * This converts the alignment result into a list of XML paragraph operations
14
+ * that can be used to build the output document.
15
+ */
16
+ export function generateParagraphOperations(alignment, originalParagraphs, revisedParagraphs, options) {
17
+ const operations = [];
18
+ const { author, date } = options;
19
+ // Build lookup maps for original paragraphs by index
20
+ const originalByIndex = new Map();
21
+ for (const p of originalParagraphs) {
22
+ originalByIndex.set(p.originalIndex, p);
23
+ }
24
+ // Build lookup maps for revised paragraphs by index
25
+ const revisedByIndex = new Map();
26
+ for (const p of revisedParagraphs) {
27
+ revisedByIndex.set(p.originalIndex, p);
28
+ }
29
+ // Track which original paragraphs have been matched
30
+ const matchedOriginalIndices = new Set();
31
+ const matchedRevisedIndices = new Set();
32
+ // Build a map from revised index to the matching original paragraph
33
+ const revisedToOriginal = new Map();
34
+ for (const match of alignment.matched) {
35
+ const origPara = match.original;
36
+ const revPara = match.revised;
37
+ matchedOriginalIndices.add(origPara.originalIndex);
38
+ matchedRevisedIndices.add(revPara.originalIndex);
39
+ revisedToOriginal.set(revPara.originalIndex, origPara);
40
+ }
41
+ // Build output in revised document order
42
+ // For each position in the revised document, we output:
43
+ // 1. Any deleted paragraphs that came before the matching original
44
+ // 2. The matched/inserted paragraph
45
+ let lastOriginalIndex = -1;
46
+ for (let revIdx = 0; revIdx < revisedParagraphs.length; revIdx++) {
47
+ const revPara = revisedParagraphs[revIdx];
48
+ if (matchedRevisedIndices.has(revIdx)) {
49
+ // This is a matched paragraph
50
+ const origPara = revisedToOriginal.get(revIdx);
51
+ // First, output any deleted paragraphs between last and this
52
+ for (let i = lastOriginalIndex + 1; i < origPara.originalIndex; i++) {
53
+ if (!matchedOriginalIndices.has(i)) {
54
+ const deletedPara = originalByIndex.get(i);
55
+ if (deletedPara) {
56
+ operations.push({
57
+ type: 'deleted',
58
+ xml: generateDeletedParagraph(deletedPara.runs, author, date, extractPPrContent(deletedPara.pPrXml)),
59
+ originalIndex: i,
60
+ });
61
+ }
62
+ }
63
+ }
64
+ // Check if paragraphs are identical or modified
65
+ const similarity = alignment.matched.find(m => m.revised.originalIndex === revIdx)?.similarity ?? 0;
66
+ if (similarity >= 0.9999) {
67
+ // Unchanged paragraph - use revised content as-is
68
+ operations.push({
69
+ type: 'unchanged',
70
+ xml: generateParagraphXml(revPara),
71
+ originalIndex: origPara.originalIndex,
72
+ revisedIndex: revIdx,
73
+ });
74
+ }
75
+ else {
76
+ // Modified paragraph - diff the runs
77
+ const diffResult = diffRuns(origPara.runs, revPara.runs);
78
+ const trackChangesContent = renderTrackChanges(diffResult.mergedRuns, {
79
+ author,
80
+ date,
81
+ });
82
+ operations.push({
83
+ type: 'modified',
84
+ xml: wrapInParagraph(trackChangesContent, extractPPrContent(revPara.pPrXml)),
85
+ originalIndex: origPara.originalIndex,
86
+ revisedIndex: revIdx,
87
+ });
88
+ }
89
+ lastOriginalIndex = origPara.originalIndex;
90
+ }
91
+ else {
92
+ // This is an inserted paragraph
93
+ operations.push({
94
+ type: 'inserted',
95
+ xml: generateInsertedParagraph(revPara.runs, author, date, extractPPrContent(revPara.pPrXml)),
96
+ revisedIndex: revIdx,
97
+ });
98
+ }
99
+ }
100
+ // Output any remaining deleted paragraphs at the end
101
+ for (let i = lastOriginalIndex + 1; i < originalParagraphs.length; i++) {
102
+ if (!matchedOriginalIndices.has(i)) {
103
+ const deletedPara = originalByIndex.get(i);
104
+ if (deletedPara) {
105
+ operations.push({
106
+ type: 'deleted',
107
+ xml: generateDeletedParagraph(deletedPara.runs, author, date, extractPPrContent(deletedPara.pPrXml)),
108
+ originalIndex: i,
109
+ });
110
+ }
111
+ }
112
+ }
113
+ return operations;
114
+ }
115
+ /**
116
+ * Build a new document.xml with track changes applied.
117
+ *
118
+ * @param originalXml - The original document.xml content
119
+ * @param operations - Ordered list of paragraph operations
120
+ * @returns The new document.xml content
121
+ */
122
+ export function buildDocumentWithTrackChanges(originalXml, operations) {
123
+ // Extract document structure
124
+ const { beforeBody, bodyContent, afterBody } = getDocumentParts(originalXml);
125
+ // Extract sectPr from body (must be preserved at end)
126
+ const { sectPr } = extractSectPr(bodyContent);
127
+ // Build new body content from operations
128
+ const paragraphXmls = operations.map(op => op.xml);
129
+ // Combine: paragraphs + sectPr (if present) + closing
130
+ let newBodyContent = paragraphXmls.join('\n');
131
+ if (sectPr) {
132
+ newBodyContent += '\n' + sectPr;
133
+ }
134
+ // Reconstruct document
135
+ return beforeBody + newBodyContent + afterBody;
136
+ }
137
+ /**
138
+ * Extract document parts for reconstruction.
139
+ */
140
+ function getDocumentParts(documentXml) {
141
+ // Find w:body opening and closing
142
+ const bodyOpenMatch = documentXml.match(/<w:body[^>]*>/);
143
+ const bodyCloseMatch = documentXml.match(/<\/w:body>/);
144
+ if (!bodyOpenMatch || !bodyCloseMatch) {
145
+ return {
146
+ beforeBody: documentXml,
147
+ bodyContent: '',
148
+ afterBody: '',
149
+ };
150
+ }
151
+ const bodyOpenEnd = documentXml.indexOf(bodyOpenMatch[0]) + bodyOpenMatch[0].length;
152
+ const bodyCloseStart = documentXml.lastIndexOf('</w:body>');
153
+ return {
154
+ beforeBody: documentXml.slice(0, bodyOpenEnd),
155
+ bodyContent: documentXml.slice(bodyOpenEnd, bodyCloseStart),
156
+ afterBody: documentXml.slice(bodyCloseStart),
157
+ };
158
+ }
159
+ /**
160
+ * Generate XML for an unchanged paragraph.
161
+ */
162
+ function generateParagraphXml(para) {
163
+ // Build paragraph from runs
164
+ const runsXml = para.runs.map(run => generateRunXml(run)).join('');
165
+ const pPr = para.pPrXml ? para.pPrXml : '';
166
+ return `<w:p>${pPr}${runsXml}</w:p>`;
167
+ }
168
+ /**
169
+ * Generate XML for a single run.
170
+ */
171
+ function generateRunXml(run) {
172
+ const rPr = generateRunPropertiesXml(run.properties);
173
+ // Handle whitespace preservation
174
+ const needsSpace = run.text.startsWith(' ') || run.text.endsWith(' ') || run.text.includes(' ');
175
+ const spaceAttr = needsSpace ? ' xml:space="preserve"' : '';
176
+ return `<w:r>${rPr}<w:t${spaceAttr}>${escapeXml(run.text)}</w:t></w:r>`;
177
+ }
178
+ /**
179
+ * Generate run properties XML.
180
+ */
181
+ function generateRunPropertiesXml(props) {
182
+ if (!props)
183
+ return '';
184
+ const parts = [];
185
+ if (props.bold)
186
+ parts.push('<w:b/>');
187
+ if (props.italic)
188
+ parts.push('<w:i/>');
189
+ if (props.underline)
190
+ parts.push(`<w:u w:val="${escapeXml(props.underline)}"/>`);
191
+ if (props.strikethrough)
192
+ parts.push('<w:strike/>');
193
+ if (props.highlight)
194
+ parts.push(`<w:highlight w:val="${escapeXml(props.highlight)}"/>`);
195
+ if (props.color)
196
+ parts.push(`<w:color w:val="${escapeXml(props.color)}"/>`);
197
+ if (props.fontSize !== undefined)
198
+ parts.push(`<w:sz w:val="${props.fontSize}"/>`);
199
+ if (props.fontFamily)
200
+ parts.push(`<w:rFonts w:ascii="${escapeXml(props.fontFamily)}" w:hAnsi="${escapeXml(props.fontFamily)}"/>`);
201
+ if (parts.length === 0)
202
+ return '';
203
+ return `<w:rPr>${parts.join('')}</w:rPr>`;
204
+ }
205
+ /**
206
+ * Extract inner pPr content from full pPr XML.
207
+ * Removes the outer <w:pPr> tags.
208
+ */
209
+ function extractPPrContent(pPrXml) {
210
+ if (!pPrXml)
211
+ return undefined;
212
+ // Remove outer tags
213
+ const match = pPrXml.match(/<w:pPr[^>]*>([\s\S]*)<\/w:pPr>/);
214
+ return match ? match[1] : undefined;
215
+ }
216
+ /**
217
+ * Escape XML special characters.
218
+ */
219
+ function escapeXml(text) {
220
+ return text
221
+ .replace(/&/g, '&amp;')
222
+ .replace(/</g, '&lt;')
223
+ .replace(/>/g, '&gt;')
224
+ .replace(/"/g, '&quot;')
225
+ .replace(/'/g, '&apos;');
226
+ }
227
+ //# sourceMappingURL=documentBuilder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"documentBuilder.js","sourceRoot":"","sources":["../../../src/baselines/diffmatch/documentBuilder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,yBAAyB,EACzB,eAAe,GAChB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,aAAa,EAA8B,MAAM,gBAAgB,CAAC;AAwB3E;;;;;GAKG;AACH,MAAM,UAAU,2BAA2B,CACzC,SAA0B,EAC1B,kBAA2C,EAC3C,iBAA0C,EAC1C,OAAqB;IAErB,MAAM,UAAU,GAAyB,EAAE,CAAC;IAC5C,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC;IAEjC,qDAAqD;IACrD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAiC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;QACnC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,oDAAoD;IACpD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAiC,CAAC;IAChE,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,oDAAoD;IACpD,MAAM,sBAAsB,GAAG,IAAI,GAAG,EAAU,CAAC;IACjD,MAAM,qBAAqB,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhD,oEAAoE;IACpE,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAiC,CAAC;IAEnE,KAAK,MAAM,KAAK,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAiC,CAAC;QACzD,MAAM,OAAO,GAAG,KAAK,CAAC,OAAgC,CAAC;QAEvD,sBAAsB,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACnD,qBAAqB,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACjD,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;IACzD,CAAC;IAED,yCAAyC;IACzC,wDAAwD;IACxD,mEAAmE;IACnE,oCAAoC;IAEpC,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC;IAE3B,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC;QACjE,MAAM,OAAO,GAAG,iBAAiB,CAAC,MAAM,CAAE,CAAC;QAE3C,IAAI,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACtC,8BAA8B;YAC9B,MAAM,QAAQ,GAAG,iBAAiB,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;YAEhD,6DAA6D;YAC7D,KAAK,IAAI,CAAC,GAAG,iBAAiB,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpE,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;oBAC3C,IAAI,WAAW,EAAE,CAAC;wBAChB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS;4BACf,GAAG,EAAE,wBAAwB,CAC3B,WAAW,CAAC,IAAI,EAChB,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,WAAW,CAAC,MAAM,CAAC,CACtC;4BACD,aAAa,EAAE,CAAC;yBACjB,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,MAAM,UAAU,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CACvC,CAAC,CAAC,EAAE,CAAE,CAAC,CAAC,OAAiC,CAAC,aAAa,KAAK,MAAM,CACnE,EAAE,UAAU,IAAI,CAAC,CAAC;YAEnB,IAAI,UAAU,IAAI,MAAM,EAAE,CAAC;gBACzB,kDAAkD;gBAClD,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,WAAW;oBACjB,GAAG,EAAE,oBAAoB,CAAC,OAAO,CAAC;oBAClC,aAAa,EAAE,QAAQ,CAAC,aAAa;oBACrC,YAAY,EAAE,MAAM;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,qCAAqC;gBACrC,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;gBACzD,MAAM,mBAAmB,GAAG,kBAAkB,CAAC,UAAU,CAAC,UAAU,EAAE;oBACpE,MAAM;oBACN,IAAI;iBACL,CAAC,CAAC;gBAEH,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,UAAU;oBAChB,GAAG,EAAE,eAAe,CAAC,mBAAmB,EAAE,iBAAiB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;oBAC5E,aAAa,EAAE,QAAQ,CAAC,aAAa;oBACrC,YAAY,EAAE,MAAM;iBACrB,CAAC,CAAC;YACL,CAAC;YAED,iBAAiB,GAAG,QAAQ,CAAC,aAAa,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,gCAAgC;YAChC,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,UAAU;gBAChB,GAAG,EAAE,yBAAyB,CAC5B,OAAO,CAAC,IAAI,EACZ,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,OAAO,CAAC,MAAM,CAAC,CAClC;gBACD,YAAY,EAAE,MAAM;aACrB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,qDAAqD;IACrD,KAAK,IAAI,CAAC,GAAG,iBAAiB,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvE,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YACnC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3C,IAAI,WAAW,EAAE,CAAC;gBAChB,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,SAAS;oBACf,GAAG,EAAE,wBAAwB,CAC3B,WAAW,CAAC,IAAI,EAChB,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,WAAW,CAAC,MAAM,CAAC,CACtC;oBACD,aAAa,EAAE,CAAC;iBACjB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,6BAA6B,CAC3C,WAAmB,EACnB,UAAgC;IAEhC,6BAA6B;IAC7B,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAE7E,sDAAsD;IACtD,MAAM,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE9C,yCAAyC;IACzC,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IAEnD,sDAAsD;IACtD,IAAI,cAAc,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9C,IAAI,MAAM,EAAE,CAAC;QACX,cAAc,IAAI,IAAI,GAAG,MAAM,CAAC;IAClC,CAAC;IAED,uBAAuB;IACvB,OAAO,UAAU,GAAG,cAAc,GAAG,SAAS,CAAC;AACjD,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAK3C,kCAAkC;IAClC,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IACzD,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAEvD,IAAI,CAAC,aAAa,IAAI,CAAC,cAAc,EAAE,CAAC;QACtC,OAAO;YACL,UAAU,EAAE,WAAW;YACvB,WAAW,EAAE,EAAE;YACf,SAAS,EAAE,EAAE;SACd,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACpF,MAAM,cAAc,GAAG,WAAW,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;IAE5D,OAAO;QACL,UAAU,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;QAC7C,WAAW,EAAE,WAAW,CAAC,KAAK,CAAC,WAAW,EAAE,cAAc,CAAC;QAC3D,SAAS,EAAE,WAAW,CAAC,KAAK,CAAC,cAAc,CAAC;KAC7C,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAA2B;IACvD,4BAA4B;IAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnE,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAE3C,OAAO,QAAQ,GAAG,GAAG,OAAO,QAAQ,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,GAAY;IAClC,MAAM,GAAG,GAAG,wBAAwB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAErD,iCAAiC;IACjC,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACjG,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5D,OAAO,QAAQ,GAAG,OAAO,SAAS,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC;AAC1E,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,KAA2D;IAC3F,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,CAAC;IAEtB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,KAAK,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,IAAI,KAAK,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAChF,IAAI,KAAK,CAAC,aAAa;QAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACxF,IAAI,KAAK,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC5E,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,QAAQ,KAAK,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,sBAAsB,SAAS,CAAC,KAAK,CAAC,UAAU,CAAC,cAAc,SAAS,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IAElI,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,OAAO,UAAU,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAE9B,oBAAoB;IACpB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Baseline B: Paragraph alignment using LCS (Longest Common Subsequence).
3
+ *
4
+ * Aligns paragraphs between original and revised documents to identify:
5
+ * - Matched paragraphs (same or similar content)
6
+ * - Inserted paragraphs (only in revised)
7
+ * - Deleted paragraphs (only in original)
8
+ * - Modified paragraphs (matched but with differences)
9
+ */
10
+ import type { ParagraphInfo, AlignmentResult } from '../../shared/ooxml/types.js';
11
+ /**
12
+ * Compute a hash for a paragraph's normalized text.
13
+ *
14
+ * Used for fast comparison during LCS.
15
+ */
16
+ export declare function hashParagraph(text: string): string;
17
+ /**
18
+ * Normalize paragraph text for comparison.
19
+ *
20
+ * - Trim whitespace
21
+ * - Collapse multiple spaces
22
+ * - Lowercase for case-insensitive comparison
23
+ */
24
+ export declare function normalizeParagraphText(text: string): string;
25
+ /**
26
+ * Compute similarity between two strings using Jaccard index on words.
27
+ *
28
+ * @returns Value between 0 (completely different) and 1 (identical)
29
+ */
30
+ export declare function computeSimilarity(a: string, b: string): number;
31
+ /**
32
+ * Compute the Longest Common Subsequence of two arrays using dynamic programming.
33
+ *
34
+ * @param a - First array
35
+ * @param b - Second array
36
+ * @param keyFn - Function to extract comparison key from elements
37
+ * @returns Array of [indexA, indexB] pairs representing the LCS
38
+ */
39
+ export declare function lcs<T>(a: T[], b: T[], keyFn: (item: T) => string): Array<[number, number]>;
40
+ /**
41
+ * Align paragraphs between original and revised documents.
42
+ *
43
+ * Uses hash-based LCS to find matching paragraphs, then classifies
44
+ * unmatched paragraphs as inserted or deleted.
45
+ *
46
+ * @param original - Paragraphs from original document
47
+ * @param revised - Paragraphs from revised document
48
+ * @param similarityThreshold - Minimum similarity to consider a match (default: 0.5)
49
+ */
50
+ export declare function alignParagraphs(original: ParagraphInfo[], revised: ParagraphInfo[], similarityThreshold?: number): AlignmentResult;
51
+ /**
52
+ * Classify paragraph matches into categories.
53
+ */
54
+ export interface ParagraphClassification {
55
+ /** Identical paragraphs (similarity = 1.0) */
56
+ identical: Array<{
57
+ original: ParagraphInfo;
58
+ revised: ParagraphInfo;
59
+ }>;
60
+ /** Modified paragraphs (0 < similarity < 1.0) */
61
+ modified: Array<{
62
+ original: ParagraphInfo;
63
+ revised: ParagraphInfo;
64
+ similarity: number;
65
+ }>;
66
+ /** Deleted paragraphs */
67
+ deleted: ParagraphInfo[];
68
+ /** Inserted paragraphs */
69
+ inserted: ParagraphInfo[];
70
+ }
71
+ /**
72
+ * Classify alignment result into more granular categories.
73
+ */
74
+ export declare function classifyAlignment(alignment: AlignmentResult): ParagraphClassification;
75
+ //# sourceMappingURL=paragraphAlignment.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"paragraphAlignment.d.ts","sourceRoot":"","sources":["../../../src/baselines/diffmatch/paragraphAlignment.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAElF;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGlD;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAK3D;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAe9D;AAED;;;;;;;GAOG;AACH,wBAAgB,GAAG,CAAC,CAAC,EACnB,CAAC,EAAE,CAAC,EAAE,EACN,CAAC,EAAE,CAAC,EAAE,EACN,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,GACzB,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAqCzB;AAED;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,aAAa,EAAE,EACzB,OAAO,EAAE,aAAa,EAAE,EACxB,mBAAmB,SAAM,GACxB,eAAe,CA6FjB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,8CAA8C;IAC9C,SAAS,EAAE,KAAK,CAAC;QAAE,QAAQ,EAAE,aAAa,CAAC;QAAC,OAAO,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC;IACtE,iDAAiD;IACjD,QAAQ,EAAE,KAAK,CAAC;QACd,QAAQ,EAAE,aAAa,CAAC;QACxB,OAAO,EAAE,aAAa,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,yBAAyB;IACzB,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,0BAA0B;IAC1B,QAAQ,EAAE,aAAa,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,eAAe,GAAG,uBAAuB,CAoBrF"}
@@ -0,0 +1,206 @@
1
+ /**
2
+ * Baseline B: Paragraph alignment using LCS (Longest Common Subsequence).
3
+ *
4
+ * Aligns paragraphs between original and revised documents to identify:
5
+ * - Matched paragraphs (same or similar content)
6
+ * - Inserted paragraphs (only in revised)
7
+ * - Deleted paragraphs (only in original)
8
+ * - Modified paragraphs (matched but with differences)
9
+ */
10
+ import { createHash } from 'crypto';
11
+ /**
12
+ * Compute a hash for a paragraph's normalized text.
13
+ *
14
+ * Used for fast comparison during LCS.
15
+ */
16
+ export function hashParagraph(text) {
17
+ const normalized = normalizeParagraphText(text);
18
+ return createHash('sha1').update(normalized).digest('hex');
19
+ }
20
+ /**
21
+ * Normalize paragraph text for comparison.
22
+ *
23
+ * - Trim whitespace
24
+ * - Collapse multiple spaces
25
+ * - Lowercase for case-insensitive comparison
26
+ */
27
+ export function normalizeParagraphText(text) {
28
+ return text
29
+ .trim()
30
+ .replace(/\s+/g, ' ')
31
+ .toLowerCase();
32
+ }
33
+ /**
34
+ * Compute similarity between two strings using Jaccard index on words.
35
+ *
36
+ * @returns Value between 0 (completely different) and 1 (identical)
37
+ */
38
+ export function computeSimilarity(a, b) {
39
+ const wordsA = new Set(normalizeParagraphText(a).split(' ').filter(w => w.length > 0));
40
+ const wordsB = new Set(normalizeParagraphText(b).split(' ').filter(w => w.length > 0));
41
+ if (wordsA.size === 0 && wordsB.size === 0) {
42
+ return 1; // Both empty
43
+ }
44
+ if (wordsA.size === 0 || wordsB.size === 0) {
45
+ return 0; // One empty
46
+ }
47
+ const intersection = new Set([...wordsA].filter(x => wordsB.has(x)));
48
+ const union = new Set([...wordsA, ...wordsB]);
49
+ return intersection.size / union.size;
50
+ }
51
+ /**
52
+ * Compute the Longest Common Subsequence of two arrays using dynamic programming.
53
+ *
54
+ * @param a - First array
55
+ * @param b - Second array
56
+ * @param keyFn - Function to extract comparison key from elements
57
+ * @returns Array of [indexA, indexB] pairs representing the LCS
58
+ */
59
+ export function lcs(a, b, keyFn) {
60
+ const m = a.length;
61
+ const n = b.length;
62
+ // Build DP table
63
+ const dp = Array.from({ length: m + 1 }, () => Array.from({ length: n + 1 }, () => 0));
64
+ for (let i = 1; i <= m; i++) {
65
+ for (let j = 1; j <= n; j++) {
66
+ if (keyFn(a[i - 1]) === keyFn(b[j - 1])) {
67
+ dp[i][j] = dp[i - 1][j - 1] + 1;
68
+ }
69
+ else {
70
+ dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
71
+ }
72
+ }
73
+ }
74
+ // Backtrack to find LCS
75
+ const result = [];
76
+ let i = m;
77
+ let j = n;
78
+ while (i > 0 && j > 0) {
79
+ if (keyFn(a[i - 1]) === keyFn(b[j - 1])) {
80
+ result.push([i - 1, j - 1]);
81
+ i--;
82
+ j--;
83
+ }
84
+ else if (dp[i - 1][j] > dp[i][j - 1]) {
85
+ i--;
86
+ }
87
+ else {
88
+ j--;
89
+ }
90
+ }
91
+ return result.reverse();
92
+ }
93
+ /**
94
+ * Align paragraphs between original and revised documents.
95
+ *
96
+ * Uses hash-based LCS to find matching paragraphs, then classifies
97
+ * unmatched paragraphs as inserted or deleted.
98
+ *
99
+ * @param original - Paragraphs from original document
100
+ * @param revised - Paragraphs from revised document
101
+ * @param similarityThreshold - Minimum similarity to consider a match (default: 0.5)
102
+ */
103
+ export function alignParagraphs(original, revised, similarityThreshold = 0.5) {
104
+ // Compute hashes for all paragraphs
105
+ const originalHashes = original.map(p => ({
106
+ para: p,
107
+ hash: hashParagraph(p.text),
108
+ }));
109
+ const revisedHashes = revised.map(p => ({
110
+ para: p,
111
+ hash: hashParagraph(p.text),
112
+ }));
113
+ // Find LCS based on hashes
114
+ const lcsResult = lcs(originalHashes, revisedHashes, item => item.hash);
115
+ // Build sets of matched indices
116
+ const matchedOriginal = new Set(lcsResult.map(([i]) => i));
117
+ const matchedRevised = new Set(lcsResult.map(([, j]) => j));
118
+ // Build alignment result
119
+ const result = {
120
+ matched: [],
121
+ deleted: [],
122
+ inserted: [],
123
+ };
124
+ // Add matched paragraphs
125
+ for (const [origIdx, revIdx] of lcsResult) {
126
+ const origPara = original[origIdx];
127
+ const revPara = revised[revIdx];
128
+ const similarity = computeSimilarity(origPara.text, revPara.text);
129
+ result.matched.push({
130
+ original: origPara,
131
+ revised: revPara,
132
+ similarity,
133
+ });
134
+ }
135
+ // Add deleted paragraphs (in original but not matched)
136
+ for (let i = 0; i < original.length; i++) {
137
+ if (!matchedOriginal.has(i)) {
138
+ result.deleted.push(original[i]);
139
+ }
140
+ }
141
+ // Add inserted paragraphs (in revised but not matched)
142
+ for (let j = 0; j < revised.length; j++) {
143
+ if (!matchedRevised.has(j)) {
144
+ result.inserted.push(revised[j]);
145
+ }
146
+ }
147
+ // Try to find near-matches for unmatched paragraphs
148
+ // This helps with paragraphs that were modified significantly
149
+ const unmatchedOriginal = [...result.deleted];
150
+ const unmatchedRevised = [...result.inserted];
151
+ result.deleted = [];
152
+ result.inserted = [];
153
+ for (const origPara of unmatchedOriginal) {
154
+ let bestMatch = null;
155
+ for (const revPara of unmatchedRevised) {
156
+ const similarity = computeSimilarity(origPara.text, revPara.text);
157
+ if (similarity >= similarityThreshold) {
158
+ if (!bestMatch || similarity > bestMatch.similarity) {
159
+ bestMatch = { para: revPara, similarity };
160
+ }
161
+ }
162
+ }
163
+ if (bestMatch) {
164
+ // Remove from unmatched revised
165
+ const idx = unmatchedRevised.indexOf(bestMatch.para);
166
+ if (idx !== -1) {
167
+ unmatchedRevised.splice(idx, 1);
168
+ }
169
+ result.matched.push({
170
+ original: origPara,
171
+ revised: bestMatch.para,
172
+ similarity: bestMatch.similarity,
173
+ });
174
+ }
175
+ else {
176
+ result.deleted.push(origPara);
177
+ }
178
+ }
179
+ // Remaining unmatched revised are insertions
180
+ result.inserted = unmatchedRevised;
181
+ return result;
182
+ }
183
+ /**
184
+ * Classify alignment result into more granular categories.
185
+ */
186
+ export function classifyAlignment(alignment) {
187
+ const result = {
188
+ identical: [],
189
+ modified: [],
190
+ deleted: alignment.deleted,
191
+ inserted: alignment.inserted,
192
+ };
193
+ for (const match of alignment.matched) {
194
+ if (match.similarity >= 0.9999) {
195
+ result.identical.push({
196
+ original: match.original,
197
+ revised: match.revised,
198
+ });
199
+ }
200
+ else {
201
+ result.modified.push(match);
202
+ }
203
+ }
204
+ return result;
205
+ }
206
+ //# sourceMappingURL=paragraphAlignment.js.map