open-document-lib 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- odf_lib/__init__.py +108 -0
- odf_lib/citation_mapping.py +240 -0
- odf_lib/odf_common.py +1625 -0
- odf_lib/py.typed +0 -0
- open_document_lib-1.0.0.dist-info/METADATA +194 -0
- open_document_lib-1.0.0.dist-info/RECORD +9 -0
- open_document_lib-1.0.0.dist-info/WHEEL +5 -0
- open_document_lib-1.0.0.dist-info/licenses/LICENSE +21 -0
- open_document_lib-1.0.0.dist-info/top_level.txt +1 -0
odf_lib/__init__.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""odf_lib — the shared OpenDocument Format library.
|
|
2
|
+
|
|
3
|
+
This package is the substance behind the four ODF agent skills (ODT, ODP,
|
|
4
|
+
ODS, ODG). It provides format-agnostic helpers for reading, editing, and
|
|
5
|
+
writing OpenDocument ZIP packages and flat (single-XML) ODF files, with
|
|
6
|
+
nothing but the Python standard library at its core.
|
|
7
|
+
|
|
8
|
+
The names re-exported here are the **public API** and follow semantic
|
|
9
|
+
versioning from 1.0 onward. Anything in ``odf_lib.odf_common`` that is not
|
|
10
|
+
re-exported here (notably ``_``-prefixed helpers) is internal and may
|
|
11
|
+
change without notice.
|
|
12
|
+
|
|
13
|
+
from odf_lib import pack_flat_odf, replace_text_in_element, xml_bytes
|
|
14
|
+
|
|
15
|
+
Note: ``q()`` is intentionally not exported — it needs a format-specific
|
|
16
|
+
namespace dict and lives in each skill's ``*_common.py`` wrapper.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from odf_lib.odf_common import (
|
|
22
|
+
FLAT_EXTENSIONS,
|
|
23
|
+
ODF_NAMESPACES,
|
|
24
|
+
VERSION,
|
|
25
|
+
apply_strict_schema_check,
|
|
26
|
+
clear_children,
|
|
27
|
+
copy_into_package,
|
|
28
|
+
copy_with_multiple_members,
|
|
29
|
+
embed_pictures,
|
|
30
|
+
ensure_manifest_entry,
|
|
31
|
+
ensure_schema,
|
|
32
|
+
ensure_sequence_declarations,
|
|
33
|
+
find_pandoc,
|
|
34
|
+
find_soffice,
|
|
35
|
+
find_text_position_in_element,
|
|
36
|
+
inject_styles_from_file,
|
|
37
|
+
insert_after_text_in_element,
|
|
38
|
+
insert_in_paragraph,
|
|
39
|
+
latex_to_mathml,
|
|
40
|
+
local_name,
|
|
41
|
+
media_type_for,
|
|
42
|
+
pack_dir_as_odf,
|
|
43
|
+
pack_flat_odf,
|
|
44
|
+
parse_xml_from_zip,
|
|
45
|
+
replace_pattern_with_element_in_element,
|
|
46
|
+
replace_text_in_element,
|
|
47
|
+
sniff_image_mime,
|
|
48
|
+
unique_object_name,
|
|
49
|
+
unique_picture_name,
|
|
50
|
+
unpack_flat_odf,
|
|
51
|
+
unpack_to_temp,
|
|
52
|
+
update_meta_for_edit,
|
|
53
|
+
validate_against_schema,
|
|
54
|
+
wrap_text_across_elements,
|
|
55
|
+
wrap_text_with_pair_in_element,
|
|
56
|
+
write_odf_with_replacements,
|
|
57
|
+
xml_bytes,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
__version__ = VERSION
|
|
61
|
+
|
|
62
|
+
__all__ = [
|
|
63
|
+
# Constants
|
|
64
|
+
"VERSION",
|
|
65
|
+
"ODF_NAMESPACES",
|
|
66
|
+
"FLAT_EXTENSIONS",
|
|
67
|
+
# ZIP / XML core
|
|
68
|
+
"parse_xml_from_zip",
|
|
69
|
+
"xml_bytes",
|
|
70
|
+
"write_odf_with_replacements",
|
|
71
|
+
"pack_dir_as_odf",
|
|
72
|
+
"copy_into_package",
|
|
73
|
+
"copy_with_multiple_members",
|
|
74
|
+
"unpack_to_temp",
|
|
75
|
+
# Manifest / media
|
|
76
|
+
"ensure_manifest_entry",
|
|
77
|
+
"media_type_for",
|
|
78
|
+
"sniff_image_mime",
|
|
79
|
+
"unique_picture_name",
|
|
80
|
+
"unique_object_name",
|
|
81
|
+
# Metadata
|
|
82
|
+
"update_meta_for_edit",
|
|
83
|
+
# Flat ODF
|
|
84
|
+
"pack_flat_odf",
|
|
85
|
+
"unpack_flat_odf",
|
|
86
|
+
# Text walker / locator / insertion
|
|
87
|
+
"replace_text_in_element",
|
|
88
|
+
"replace_pattern_with_element_in_element",
|
|
89
|
+
"find_text_position_in_element",
|
|
90
|
+
"insert_after_text_in_element",
|
|
91
|
+
"insert_in_paragraph",
|
|
92
|
+
"wrap_text_with_pair_in_element",
|
|
93
|
+
"wrap_text_across_elements",
|
|
94
|
+
"ensure_sequence_declarations",
|
|
95
|
+
"clear_children",
|
|
96
|
+
"local_name",
|
|
97
|
+
# Styles / pictures
|
|
98
|
+
"inject_styles_from_file",
|
|
99
|
+
"embed_pictures",
|
|
100
|
+
# Schema validation
|
|
101
|
+
"ensure_schema",
|
|
102
|
+
"validate_against_schema",
|
|
103
|
+
"apply_strict_schema_check",
|
|
104
|
+
# External tooling
|
|
105
|
+
"find_soffice",
|
|
106
|
+
"find_pandoc",
|
|
107
|
+
"latex_to_mathml",
|
|
108
|
+
]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Field mappings between CSL-JSON, BibTeX, and ODF text:bibliography-mark.
|
|
2
|
+
|
|
3
|
+
Keep this module dependency-light so it imports without pulling bibtexparser.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
# ODF text:bibliography-type attribute values (per OASIS ODF 1.3 spec, §6.1.10).
|
|
11
|
+
ODF_BIBLIOGRAPHY_TYPES: set[str] = {
|
|
12
|
+
"article",
|
|
13
|
+
"book",
|
|
14
|
+
"booklet",
|
|
15
|
+
"conference",
|
|
16
|
+
"custom1",
|
|
17
|
+
"custom2",
|
|
18
|
+
"custom3",
|
|
19
|
+
"custom4",
|
|
20
|
+
"custom5",
|
|
21
|
+
"email",
|
|
22
|
+
"inbook",
|
|
23
|
+
"incollection",
|
|
24
|
+
"inproceedings",
|
|
25
|
+
"journal",
|
|
26
|
+
"manual",
|
|
27
|
+
"mastersthesis",
|
|
28
|
+
"misc",
|
|
29
|
+
"phdthesis",
|
|
30
|
+
"proceedings",
|
|
31
|
+
"techreport",
|
|
32
|
+
"unpublished",
|
|
33
|
+
"www",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Field names that ODF accepts as attributes of text:bibliography-mark.
|
|
37
|
+
ODF_BIBLIOGRAPHY_FIELDS: set[str] = {
|
|
38
|
+
"address",
|
|
39
|
+
"annote",
|
|
40
|
+
"author",
|
|
41
|
+
"bibliography-type",
|
|
42
|
+
"booktitle",
|
|
43
|
+
"chapter",
|
|
44
|
+
"custom1",
|
|
45
|
+
"custom2",
|
|
46
|
+
"custom3",
|
|
47
|
+
"custom4",
|
|
48
|
+
"custom5",
|
|
49
|
+
"edition",
|
|
50
|
+
"editor",
|
|
51
|
+
"howpublished",
|
|
52
|
+
"identifier",
|
|
53
|
+
"institution",
|
|
54
|
+
"isbn",
|
|
55
|
+
"issn",
|
|
56
|
+
"journal",
|
|
57
|
+
"month",
|
|
58
|
+
"note",
|
|
59
|
+
"number",
|
|
60
|
+
"organizations",
|
|
61
|
+
"pages",
|
|
62
|
+
"publisher",
|
|
63
|
+
"report-type",
|
|
64
|
+
"school",
|
|
65
|
+
"series",
|
|
66
|
+
"title",
|
|
67
|
+
"url",
|
|
68
|
+
"volume",
|
|
69
|
+
"year",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# CSL type → ODF bibliography-type
|
|
73
|
+
CSL_TYPE_TO_ODF: dict[str, str] = {
|
|
74
|
+
"article-journal": "article",
|
|
75
|
+
"article-magazine": "article",
|
|
76
|
+
"article-newspaper": "article",
|
|
77
|
+
"article": "article",
|
|
78
|
+
"book": "book",
|
|
79
|
+
"chapter": "incollection",
|
|
80
|
+
"paper-conference": "inproceedings",
|
|
81
|
+
"thesis": "phdthesis",
|
|
82
|
+
"report": "techreport",
|
|
83
|
+
"manuscript": "unpublished",
|
|
84
|
+
"webpage": "www",
|
|
85
|
+
"post": "www",
|
|
86
|
+
"post-weblog": "www",
|
|
87
|
+
"personal_communication": "email",
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# BibTeX entry type → ODF bibliography-type (identity for most)
|
|
91
|
+
BIBTEX_TYPE_TO_ODF: dict[str, str] = {
|
|
92
|
+
"article": "article",
|
|
93
|
+
"book": "book",
|
|
94
|
+
"booklet": "booklet",
|
|
95
|
+
"conference": "conference",
|
|
96
|
+
"inbook": "inbook",
|
|
97
|
+
"incollection": "incollection",
|
|
98
|
+
"inproceedings": "inproceedings",
|
|
99
|
+
"manual": "manual",
|
|
100
|
+
"mastersthesis": "mastersthesis",
|
|
101
|
+
"misc": "misc",
|
|
102
|
+
"phdthesis": "phdthesis",
|
|
103
|
+
"proceedings": "proceedings",
|
|
104
|
+
"techreport": "techreport",
|
|
105
|
+
"unpublished": "unpublished",
|
|
106
|
+
"online": "www",
|
|
107
|
+
"electronic": "www",
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# BibTeX field name → ODF field name
|
|
111
|
+
BIBTEX_FIELD_TO_ODF: dict[str, str] = {
|
|
112
|
+
"address": "address",
|
|
113
|
+
"annote": "annote",
|
|
114
|
+
"author": "author",
|
|
115
|
+
"booktitle": "booktitle",
|
|
116
|
+
"chapter": "chapter",
|
|
117
|
+
"edition": "edition",
|
|
118
|
+
"editor": "editor",
|
|
119
|
+
"howpublished": "howpublished",
|
|
120
|
+
"institution": "institution",
|
|
121
|
+
"isbn": "isbn",
|
|
122
|
+
"issn": "issn",
|
|
123
|
+
"journal": "journal",
|
|
124
|
+
"month": "month",
|
|
125
|
+
"note": "note",
|
|
126
|
+
"number": "number",
|
|
127
|
+
"organization": "organizations",
|
|
128
|
+
"pages": "pages",
|
|
129
|
+
"publisher": "publisher",
|
|
130
|
+
"school": "school",
|
|
131
|
+
"series": "series",
|
|
132
|
+
"title": "title",
|
|
133
|
+
"type": "report-type",
|
|
134
|
+
"url": "url",
|
|
135
|
+
"volume": "volume",
|
|
136
|
+
"year": "year",
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def csl_authors_to_string(authors: list[dict[str, Any]]) -> str:
|
|
141
|
+
"""CSL `author` list (dicts with `family`/`given`) → 'Family, Given; Family, Given'."""
|
|
142
|
+
parts: list[str] = []
|
|
143
|
+
for author in authors:
|
|
144
|
+
if not isinstance(author, dict):
|
|
145
|
+
continue
|
|
146
|
+
family = author.get("family", "")
|
|
147
|
+
given = author.get("given", "")
|
|
148
|
+
literal = author.get("literal")
|
|
149
|
+
if literal:
|
|
150
|
+
parts.append(str(literal))
|
|
151
|
+
elif family and given:
|
|
152
|
+
parts.append(f"{family}, {given}")
|
|
153
|
+
elif family:
|
|
154
|
+
parts.append(family)
|
|
155
|
+
elif given:
|
|
156
|
+
parts.append(given)
|
|
157
|
+
return "; ".join(parts)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def csl_date_to_year(date_field: Any) -> str | None:
|
|
161
|
+
"""Extract a year from CSL-JSON's `issued` (or similar) field."""
|
|
162
|
+
if not isinstance(date_field, dict):
|
|
163
|
+
return None
|
|
164
|
+
parts = date_field.get("date-parts")
|
|
165
|
+
if not isinstance(parts, list) or not parts:
|
|
166
|
+
return None
|
|
167
|
+
first = parts[0]
|
|
168
|
+
if not isinstance(first, list) or not first:
|
|
169
|
+
return None
|
|
170
|
+
return str(first[0])
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def csl_entry_to_odf_fields(entry: dict[str, Any]) -> dict[str, str]:
|
|
174
|
+
"""Map a CSL-JSON entry to ODF text:bibliography-mark attribute values."""
|
|
175
|
+
result: dict[str, str] = {}
|
|
176
|
+
csl_type = entry.get("type", "misc")
|
|
177
|
+
result["bibliography-type"] = CSL_TYPE_TO_ODF.get(csl_type, "misc")
|
|
178
|
+
if "title" in entry:
|
|
179
|
+
result["title"] = str(entry["title"])
|
|
180
|
+
if "container-title" in entry:
|
|
181
|
+
result["journal"] = str(entry["container-title"])
|
|
182
|
+
if "author" in entry:
|
|
183
|
+
authors = entry["author"] if isinstance(entry["author"], list) else []
|
|
184
|
+
if authors:
|
|
185
|
+
result["author"] = csl_authors_to_string(authors)
|
|
186
|
+
if "editor" in entry:
|
|
187
|
+
editors = entry["editor"] if isinstance(entry["editor"], list) else []
|
|
188
|
+
if editors:
|
|
189
|
+
result["editor"] = csl_authors_to_string(editors)
|
|
190
|
+
year = csl_date_to_year(entry.get("issued"))
|
|
191
|
+
if year:
|
|
192
|
+
result["year"] = year
|
|
193
|
+
if "publisher" in entry:
|
|
194
|
+
result["publisher"] = str(entry["publisher"])
|
|
195
|
+
if "publisher-place" in entry:
|
|
196
|
+
result["address"] = str(entry["publisher-place"])
|
|
197
|
+
if "page" in entry:
|
|
198
|
+
result["pages"] = str(entry["page"])
|
|
199
|
+
if "volume" in entry:
|
|
200
|
+
result["volume"] = str(entry["volume"])
|
|
201
|
+
if "issue" in entry:
|
|
202
|
+
result["number"] = str(entry["issue"])
|
|
203
|
+
if "edition" in entry:
|
|
204
|
+
result["edition"] = str(entry["edition"])
|
|
205
|
+
if "URL" in entry:
|
|
206
|
+
result["url"] = str(entry["URL"])
|
|
207
|
+
if "ISBN" in entry:
|
|
208
|
+
result["isbn"] = str(entry["ISBN"])
|
|
209
|
+
if "ISSN" in entry:
|
|
210
|
+
issn = entry["ISSN"]
|
|
211
|
+
result["issn"] = str(issn[0] if isinstance(issn, list) and issn else issn)
|
|
212
|
+
if "note" in entry:
|
|
213
|
+
result["note"] = str(entry["note"])
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def bibtex_entry_to_odf_fields(entry: dict[str, Any]) -> dict[str, str]:
|
|
218
|
+
"""Map a parsed BibTeX entry (dict with ENTRYTYPE + fields) to ODF fields."""
|
|
219
|
+
result: dict[str, str] = {}
|
|
220
|
+
entry_type = str(entry.get("ENTRYTYPE", entry.get("type", "misc"))).lower()
|
|
221
|
+
result["bibliography-type"] = BIBTEX_TYPE_TO_ODF.get(entry_type, "misc")
|
|
222
|
+
for bib_field, odf_field in BIBTEX_FIELD_TO_ODF.items():
|
|
223
|
+
if bib_field in entry and entry[bib_field]:
|
|
224
|
+
result[odf_field] = str(entry[bib_field]).strip("{}")
|
|
225
|
+
return result
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# Required fields per ODF bibliography-type (subset; pragmatic for validation).
|
|
229
|
+
REQUIRED_FIELDS: dict[str, set[str]] = {
|
|
230
|
+
"article": {"author", "title", "journal", "year"},
|
|
231
|
+
"book": {"author", "title", "year"},
|
|
232
|
+
"incollection": {"author", "title", "booktitle", "year"},
|
|
233
|
+
"inproceedings": {"author", "title", "booktitle", "year"},
|
|
234
|
+
"techreport": {"author", "title", "institution", "year"},
|
|
235
|
+
"mastersthesis": {"author", "title", "school", "year"},
|
|
236
|
+
"phdthesis": {"author", "title", "school", "year"},
|
|
237
|
+
"manual": {"title", "year"},
|
|
238
|
+
"misc": set(),
|
|
239
|
+
"www": {"url"},
|
|
240
|
+
}
|