reddocx 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reddocx-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ayush Kumar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
reddocx-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: reddocx
3
+ Version: 0.0.1
4
+ Summary: Open source implementation for track changes engine for MS Word (.docx)
5
+ Author: Ayush Kumar
6
+ Project-URL: Homepage, https://github.com/yourusername/reddocx
7
+ Keywords: docx,word,track-changes,ooxml
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: lxml>=6.0.2
14
+ Requires-Dist: lxml-stubs>=0.5.1
15
+ Dynamic: license-file
16
+
17
+ # red-docx
18
+ Open source repository for tracking changes in Word documents.
@@ -0,0 +1,2 @@
1
+ # red-docx
2
+ Open source repository for tracking changes in Word documents.
@@ -0,0 +1,32 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "reddocx"
7
+ version = "0.0.1"
8
+ description = "Open source implementation for track changes engine for MS Word (.docx)"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ authors = [
12
+ { name = "Ayush Kumar" }
13
+ ]
14
+ dependencies = [
15
+ "lxml>=6.0.2",
16
+ "lxml-stubs>=0.5.1"
17
+ ]
18
+
19
+ keywords = ["docx", "word", "track-changes", "ooxml"]
20
+ classifiers = [
21
+ "Programming Language :: Python :: 3",
22
+ "Operating System :: OS Independent"
23
+ ]
24
+
25
+ [tool.setuptools.packages.find]
26
+ where = ["src"]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/yourusername/reddocx"
30
+
31
+ [tool.setuptools]
32
+ package-dir = {"" = "src"}
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .core.document import DocxDocument
2
+
3
+ __all__ = ["DocxDocument"]
File without changes
@@ -0,0 +1,148 @@
1
+ import zipfile
2
+ from datetime import datetime
3
+ from io import BytesIO
4
+ from typing import List, Optional, Union, cast
5
+
6
+ from lxml import etree
7
+
8
+ from ..xml.namespace import NS
9
+ from .package import DocxPackage
10
+
11
+
12
+ class DocxDocument:
13
+ def __init__(self, source: Union[str, bytes, BytesIO]):
14
+ self.pkg = DocxPackage(source)
15
+ self.document = self.pkg.read_xml("word/document.xml")
16
+ self.settings = self.pkg.read_xml("word/settings.xml")
17
+
18
+ def paragraphs(self) -> List[etree._Element]:
19
+ return cast(
20
+ List[etree._Element],
21
+ self.document.xpath(".//w:p", namespaces=NS),
22
+ )
23
+
24
+ def runs(self) -> List[etree._Element]:
25
+ return cast(
26
+ List[etree._Element],
27
+ self.document.xpath(".//w:r", namespaces=NS),
28
+ )
29
+
30
+ def get_paragraph_text(
31
+ self,
32
+ paragraph: etree._Element,
33
+ ) -> str:
34
+ texts = cast(List, paragraph.xpath(".//w:t", namespaces=NS))
35
+ return "".join(t.text for t in texts if t.text)
36
+
37
+ def track_replace_words(
38
+ self, replacements: dict[str, str], author: Optional[str] = "John DOE"
39
+ ):
40
+
41
+ results = {}
42
+ revision_id = 1
43
+ now = datetime.utcnow().isoformat() + "Z"
44
+
45
+ paragraphs = self.paragraphs()
46
+
47
+ for idx, p in enumerate(paragraphs):
48
+ text = self.get_paragraph_text(p)
49
+
50
+ for old, new in replacements.items():
51
+ if old in text:
52
+
53
+ results.setdefault(old, []).append(idx)
54
+
55
+ self._apply_revision(
56
+ paragraph=p,
57
+ old_word=old,
58
+ new_word=new,
59
+ rev_id=revision_id,
60
+ date=now,
61
+ author=author,
62
+ )
63
+ revision_id += 1
64
+
65
+ return results
66
+
67
+ def _apply_revision(
68
+ self,
69
+ paragraph,
70
+ old_word,
71
+ new_word,
72
+ rev_id,
73
+ date,
74
+ author: Optional[str] = "John Doe",
75
+ ):
76
+ ns = NS["w"]
77
+
78
+ for run in paragraph.xpath(".//w:r", namespaces=NS):
79
+ texts = run.xpath(".//w:t", namespaces=NS)
80
+
81
+ for t in texts:
82
+ if t.text and old_word in t.text:
83
+
84
+ parent = run.getparent()
85
+
86
+ # delete element
87
+ del_el = etree.Element(f"{{{ns}}}del")
88
+ del_el.set(f"{{{ns}}}id", str(rev_id))
89
+ del_el.set(f"{{{ns}}}author", str(author))
90
+ del_el.set(f"{{{ns}}}date", date)
91
+
92
+ del_run = etree.Element(f"{{{ns}}}r")
93
+ del_text = etree.Element(f"{{{ns}}}delText")
94
+ del_text.text = old_word
95
+
96
+ del_run.append(del_text)
97
+ del_el.append(del_run)
98
+
99
+ # insert element
100
+ ins_el = etree.Element(f"{{{ns}}}ins")
101
+ ins_el.set(f"{{{ns}}}id", str(rev_id))
102
+ ins_el.set(f"{{{ns}}}author", str(author))
103
+ ins_el.set(f"{{{ns}}}date", date)
104
+
105
+ ins_run = etree.Element(f"{{{ns}}}r")
106
+ ins_text = etree.Element(f"{{{ns}}}t")
107
+ ins_text.text = new_word
108
+
109
+ ins_run.append(ins_text)
110
+ ins_el.append(ins_run)
111
+
112
+ parent.replace(run, del_el)
113
+ parent.insert(parent.index(del_el) + 1, ins_el)
114
+
115
+ break
116
+
117
+ def save(self) -> bytes:
118
+
119
+ buffer = BytesIO()
120
+
121
+ with zipfile.ZipFile(BytesIO(self.pkg._raw), "r") as r_file:
122
+ with zipfile.ZipFile(
123
+ buffer, "w", compression=zipfile.ZIP_DEFLATED
124
+ ) as o_file:
125
+ for item in r_file.infolist():
126
+ if item.filename == "word/document.xml":
127
+ o_file.writestr(
128
+ item,
129
+ etree.tostring(
130
+ self.document,
131
+ xml_declaration=True,
132
+ encoding="UTF-8",
133
+ ),
134
+ )
135
+
136
+ elif item.filename == "word/settings.xml":
137
+ o_file.writestr(
138
+ item,
139
+ etree.tostring(
140
+ self.settings,
141
+ xml_declaration=True,
142
+ encoding="UTF-8",
143
+ ),
144
+ )
145
+ else:
146
+ o_file.writestr(item, r_file.read(item.filename))
147
+
148
+ return buffer.getvalue()
@@ -0,0 +1,30 @@
1
+ import zipfile
2
+ from io import BytesIO
3
+ from typing import Union
4
+
5
+ from lxml import etree
6
+
7
+
8
+ class DocxPackage:
9
+ def __init__(self, input_type: Union[str, bytes, BytesIO]):
10
+ if isinstance(input_type, str):
11
+ with open(input_type, "rb") as f:
12
+ self._raw = f.read()
13
+
14
+ elif isinstance(input_type, bytes):
15
+ self._raw = input_type
16
+
17
+ elif isinstance(input_type, BytesIO):
18
+ self._raw = input_type.getvalue()
19
+
20
+ else:
21
+ raise TypeError("Input Type must be path | bytes | BytesIO")
22
+
23
+ self.zip = zipfile.ZipFile(BytesIO(self._raw), "r")
24
+
25
+ def read_xml(self, part: str) -> etree._Element:
26
+ xml_bytes = self.zip.read(part)
27
+ return etree.fromstring(xml_bytes)
28
+
29
+ def list_parts(self):
30
+ return self.zip.namelist()
File without changes
@@ -0,0 +1,5 @@
1
+ W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
2
+
3
+ NS = {
4
+ "w": W_NS
5
+ }
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: reddocx
3
+ Version: 0.0.1
4
+ Summary: Open source implementation for track changes engine for MS Word (.docx)
5
+ Author: Ayush Kumar
6
+ Project-URL: Homepage, https://github.com/yourusername/reddocx
7
+ Keywords: docx,word,track-changes,ooxml
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: lxml>=6.0.2
14
+ Requires-Dist: lxml-stubs>=0.5.1
15
+ Dynamic: license-file
16
+
17
+ # red-docx
18
+ Open source repository for tracking changes in Word documents.
@@ -0,0 +1,15 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/reddocx/__init__.py
5
+ src/reddocx.egg-info/PKG-INFO
6
+ src/reddocx.egg-info/SOURCES.txt
7
+ src/reddocx.egg-info/dependency_links.txt
8
+ src/reddocx.egg-info/requires.txt
9
+ src/reddocx.egg-info/top_level.txt
10
+ src/reddocx/core/__init__.py
11
+ src/reddocx/core/document.py
12
+ src/reddocx/core/package.py
13
+ src/reddocx/xml/__init_.py
14
+ src/reddocx/xml/namespace.py
15
+ tests/test_loader.py
@@ -0,0 +1,2 @@
1
+ lxml>=6.0.2
2
+ lxml-stubs>=0.5.1
@@ -0,0 +1 @@
1
+ reddocx
@@ -0,0 +1,12 @@
1
+ from reddocx.core.document import DocxDocument
2
+
3
+ doc = DocxDocument("sample.docx")
4
+
5
+ print(doc.get_paragraph_text(doc.paragraphs()[0]))
6
+
7
+ report = doc.track_replace_words({"Rippling": "hi", "team": "earth"})
8
+
9
+ data = doc.save()
10
+ output_path = "roundtrip.docx"
11
+ with open(output_path, "wb") as f:
12
+ f.write(data)