reddocx 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reddocx-0.0.1/LICENSE +21 -0
- reddocx-0.0.1/PKG-INFO +18 -0
- reddocx-0.0.1/README.md +2 -0
- reddocx-0.0.1/pyproject.toml +32 -0
- reddocx-0.0.1/setup.cfg +4 -0
- reddocx-0.0.1/src/reddocx/__init__.py +3 -0
- reddocx-0.0.1/src/reddocx/core/__init__.py +0 -0
- reddocx-0.0.1/src/reddocx/core/document.py +148 -0
- reddocx-0.0.1/src/reddocx/core/package.py +30 -0
- reddocx-0.0.1/src/reddocx/xml/__init_.py +0 -0
- reddocx-0.0.1/src/reddocx/xml/namespace.py +5 -0
- reddocx-0.0.1/src/reddocx.egg-info/PKG-INFO +18 -0
- reddocx-0.0.1/src/reddocx.egg-info/SOURCES.txt +15 -0
- reddocx-0.0.1/src/reddocx.egg-info/dependency_links.txt +1 -0
- reddocx-0.0.1/src/reddocx.egg-info/requires.txt +2 -0
- reddocx-0.0.1/src/reddocx.egg-info/top_level.txt +1 -0
- reddocx-0.0.1/tests/test_loader.py +12 -0
reddocx-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ayush Kumar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
reddocx-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: reddocx
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Open source implementation for track changes engine for MS Word (.docx)
|
|
5
|
+
Author: Ayush Kumar
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourusername/reddocx
|
|
7
|
+
Keywords: docx,word,track-changes,ooxml
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: lxml>=6.0.2
|
|
14
|
+
Requires-Dist: lxml-stubs>=0.5.1
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# red-docx
|
|
18
|
+
Open source repository for tracking changes in Word documents.
|
reddocx-0.0.1/README.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "reddocx"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Open source implementation for track changes engine for MS Word (.docx)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Ayush Kumar" }
|
|
13
|
+
]
|
|
14
|
+
dependencies = [
|
|
15
|
+
"lxml>=6.0.2",
|
|
16
|
+
"lxml-stubs>=0.5.1"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
keywords = ["docx", "word", "track-changes", "ooxml"]
|
|
20
|
+
classifiers = [
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Operating System :: OS Independent"
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.packages.find]
|
|
26
|
+
where = ["src"]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/yourusername/reddocx"
|
|
30
|
+
|
|
31
|
+
[tool.setuptools]
|
|
32
|
+
package-dir = {"" = "src"}
|
reddocx-0.0.1/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import zipfile
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
from typing import List, Optional, Union, cast
|
|
5
|
+
|
|
6
|
+
from lxml import etree
|
|
7
|
+
|
|
8
|
+
from ..xml.namespace import NS
|
|
9
|
+
from .package import DocxPackage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DocxDocument:
|
|
13
|
+
def __init__(self, source: Union[str, bytes, BytesIO]):
|
|
14
|
+
self.pkg = DocxPackage(source)
|
|
15
|
+
self.document = self.pkg.read_xml("word/document.xml")
|
|
16
|
+
self.settings = self.pkg.read_xml("word/settings.xml")
|
|
17
|
+
|
|
18
|
+
def paragraphs(self) -> List[etree._Element]:
|
|
19
|
+
return cast(
|
|
20
|
+
List[etree._Element],
|
|
21
|
+
self.document.xpath(".//w:p", namespaces=NS),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def runs(self) -> List[etree._Element]:
|
|
25
|
+
return cast(
|
|
26
|
+
List[etree._Element],
|
|
27
|
+
self.document.xpath(".//w:r", namespaces=NS),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def get_paragraph_text(
|
|
31
|
+
self,
|
|
32
|
+
paragraph: etree._Element,
|
|
33
|
+
) -> str:
|
|
34
|
+
texts = cast(List, paragraph.xpath(".//w:t", namespaces=NS))
|
|
35
|
+
return "".join(t.text for t in texts if t.text)
|
|
36
|
+
|
|
37
|
+
def track_replace_words(
|
|
38
|
+
self, replacements: dict[str, str], author: Optional[str] = "John DOE"
|
|
39
|
+
):
|
|
40
|
+
|
|
41
|
+
results = {}
|
|
42
|
+
revision_id = 1
|
|
43
|
+
now = datetime.utcnow().isoformat() + "Z"
|
|
44
|
+
|
|
45
|
+
paragraphs = self.paragraphs()
|
|
46
|
+
|
|
47
|
+
for idx, p in enumerate(paragraphs):
|
|
48
|
+
text = self.get_paragraph_text(p)
|
|
49
|
+
|
|
50
|
+
for old, new in replacements.items():
|
|
51
|
+
if old in text:
|
|
52
|
+
|
|
53
|
+
results.setdefault(old, []).append(idx)
|
|
54
|
+
|
|
55
|
+
self._apply_revision(
|
|
56
|
+
paragraph=p,
|
|
57
|
+
old_word=old,
|
|
58
|
+
new_word=new,
|
|
59
|
+
rev_id=revision_id,
|
|
60
|
+
date=now,
|
|
61
|
+
author=author,
|
|
62
|
+
)
|
|
63
|
+
revision_id += 1
|
|
64
|
+
|
|
65
|
+
return results
|
|
66
|
+
|
|
67
|
+
def _apply_revision(
|
|
68
|
+
self,
|
|
69
|
+
paragraph,
|
|
70
|
+
old_word,
|
|
71
|
+
new_word,
|
|
72
|
+
rev_id,
|
|
73
|
+
date,
|
|
74
|
+
author: Optional[str] = "John Doe",
|
|
75
|
+
):
|
|
76
|
+
ns = NS["w"]
|
|
77
|
+
|
|
78
|
+
for run in paragraph.xpath(".//w:r", namespaces=NS):
|
|
79
|
+
texts = run.xpath(".//w:t", namespaces=NS)
|
|
80
|
+
|
|
81
|
+
for t in texts:
|
|
82
|
+
if t.text and old_word in t.text:
|
|
83
|
+
|
|
84
|
+
parent = run.getparent()
|
|
85
|
+
|
|
86
|
+
# delete element
|
|
87
|
+
del_el = etree.Element(f"{{{ns}}}del")
|
|
88
|
+
del_el.set(f"{{{ns}}}id", str(rev_id))
|
|
89
|
+
del_el.set(f"{{{ns}}}author", str(author))
|
|
90
|
+
del_el.set(f"{{{ns}}}date", date)
|
|
91
|
+
|
|
92
|
+
del_run = etree.Element(f"{{{ns}}}r")
|
|
93
|
+
del_text = etree.Element(f"{{{ns}}}delText")
|
|
94
|
+
del_text.text = old_word
|
|
95
|
+
|
|
96
|
+
del_run.append(del_text)
|
|
97
|
+
del_el.append(del_run)
|
|
98
|
+
|
|
99
|
+
# insert element
|
|
100
|
+
ins_el = etree.Element(f"{{{ns}}}ins")
|
|
101
|
+
ins_el.set(f"{{{ns}}}id", str(rev_id))
|
|
102
|
+
ins_el.set(f"{{{ns}}}author", str(author))
|
|
103
|
+
ins_el.set(f"{{{ns}}}date", date)
|
|
104
|
+
|
|
105
|
+
ins_run = etree.Element(f"{{{ns}}}r")
|
|
106
|
+
ins_text = etree.Element(f"{{{ns}}}t")
|
|
107
|
+
ins_text.text = new_word
|
|
108
|
+
|
|
109
|
+
ins_run.append(ins_text)
|
|
110
|
+
ins_el.append(ins_run)
|
|
111
|
+
|
|
112
|
+
parent.replace(run, del_el)
|
|
113
|
+
parent.insert(parent.index(del_el) + 1, ins_el)
|
|
114
|
+
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
def save(self) -> bytes:
|
|
118
|
+
|
|
119
|
+
buffer = BytesIO()
|
|
120
|
+
|
|
121
|
+
with zipfile.ZipFile(BytesIO(self.pkg._raw), "r") as r_file:
|
|
122
|
+
with zipfile.ZipFile(
|
|
123
|
+
buffer, "w", compression=zipfile.ZIP_DEFLATED
|
|
124
|
+
) as o_file:
|
|
125
|
+
for item in r_file.infolist():
|
|
126
|
+
if item.filename == "word/document.xml":
|
|
127
|
+
o_file.writestr(
|
|
128
|
+
item,
|
|
129
|
+
etree.tostring(
|
|
130
|
+
self.document,
|
|
131
|
+
xml_declaration=True,
|
|
132
|
+
encoding="UTF-8",
|
|
133
|
+
),
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
elif item.filename == "word/settings.xml":
|
|
137
|
+
o_file.writestr(
|
|
138
|
+
item,
|
|
139
|
+
etree.tostring(
|
|
140
|
+
self.settings,
|
|
141
|
+
xml_declaration=True,
|
|
142
|
+
encoding="UTF-8",
|
|
143
|
+
),
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
o_file.writestr(item, r_file.read(item.filename))
|
|
147
|
+
|
|
148
|
+
return buffer.getvalue()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import zipfile
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
from lxml import etree
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DocxPackage:
|
|
9
|
+
def __init__(self, input_type: Union[str, bytes, BytesIO]):
|
|
10
|
+
if isinstance(input_type, str):
|
|
11
|
+
with open(input_type, "rb") as f:
|
|
12
|
+
self._raw = f.read()
|
|
13
|
+
|
|
14
|
+
elif isinstance(input_type, bytes):
|
|
15
|
+
self._raw = input_type
|
|
16
|
+
|
|
17
|
+
elif isinstance(input_type, BytesIO):
|
|
18
|
+
self._raw = input_type.getvalue()
|
|
19
|
+
|
|
20
|
+
else:
|
|
21
|
+
raise TypeError("Input Type must be path | bytes | BytesIO")
|
|
22
|
+
|
|
23
|
+
self.zip = zipfile.ZipFile(BytesIO(self._raw), "r")
|
|
24
|
+
|
|
25
|
+
def read_xml(self, part: str) -> etree._Element:
|
|
26
|
+
xml_bytes = self.zip.read(part)
|
|
27
|
+
return etree.fromstring(xml_bytes)
|
|
28
|
+
|
|
29
|
+
def list_parts(self):
|
|
30
|
+
return self.zip.namelist()
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: reddocx
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Open source implementation for track changes engine for MS Word (.docx)
|
|
5
|
+
Author: Ayush Kumar
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourusername/reddocx
|
|
7
|
+
Keywords: docx,word,track-changes,ooxml
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: lxml>=6.0.2
|
|
14
|
+
Requires-Dist: lxml-stubs>=0.5.1
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# red-docx
|
|
18
|
+
Open source repository for tracking changes in Word documents.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/reddocx/__init__.py
|
|
5
|
+
src/reddocx.egg-info/PKG-INFO
|
|
6
|
+
src/reddocx.egg-info/SOURCES.txt
|
|
7
|
+
src/reddocx.egg-info/dependency_links.txt
|
|
8
|
+
src/reddocx.egg-info/requires.txt
|
|
9
|
+
src/reddocx.egg-info/top_level.txt
|
|
10
|
+
src/reddocx/core/__init__.py
|
|
11
|
+
src/reddocx/core/document.py
|
|
12
|
+
src/reddocx/core/package.py
|
|
13
|
+
src/reddocx/xml/__init_.py
|
|
14
|
+
src/reddocx/xml/namespace.py
|
|
15
|
+
tests/test_loader.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
reddocx
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from reddocx.core.document import DocxDocument
|
|
2
|
+
|
|
3
|
+
doc = DocxDocument("sample.docx")
|
|
4
|
+
|
|
5
|
+
print(doc.get_paragraph_text(doc.paragraphs()[0]))
|
|
6
|
+
|
|
7
|
+
report = doc.track_replace_words({"Rippling": "hi", "team": "earth"})
|
|
8
|
+
|
|
9
|
+
data = doc.save()
|
|
10
|
+
output_path = "roundtrip.docx"
|
|
11
|
+
with open(output_path, "wb") as f:
|
|
12
|
+
f.write(data)
|