docxray 0.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. docxray/__init__.py +6 -0
  2. docxray/api.py +28 -0
  3. docxray/enum/__init__.py +0 -0
  4. docxray/enum/lxml.py +12 -0
  5. docxray/exceptions.py +10 -0
  6. docxray/lxml.py +70 -0
  7. docxray/numeral/bcp47.py +61 -0
  8. docxray/numeral/charset.py +730 -0
  9. docxray/numeral/numeral.py +769 -0
  10. docxray/opc/__init__.py +3 -0
  11. docxray/opc/constants.py +224 -0
  12. docxray/opc/exceptions.py +9 -0
  13. docxray/opc/ns.py +42 -0
  14. docxray/opc/oxml.py +134 -0
  15. docxray/opc/package.py +188 -0
  16. docxray/opc/packuri.py +110 -0
  17. docxray/opc/part.py +200 -0
  18. docxray/opc/phys_pkg.py +114 -0
  19. docxray/opc/pkgreader.py +289 -0
  20. docxray/opc/rel.py +105 -0
  21. docxray/opc/shared.py +29 -0
  22. docxray/oxml/__init__.py +47 -0
  23. docxray/oxml/strict/__init__.py +0 -0
  24. docxray/oxml/trans/__init__.py +354 -0
  25. docxray/oxml/trans/background.py +6 -0
  26. docxray/oxml/trans/document.py +35 -0
  27. docxray/oxml/trans/drawing.py +184 -0
  28. docxray/oxml/trans/enums.py +203 -0
  29. docxray/oxml/trans/exceptions.py +10 -0
  30. docxray/oxml/trans/h2d/border.py +66 -0
  31. docxray/oxml/trans/h2d/cell_h2d.py +768 -0
  32. docxray/oxml/trans/h2d/colorize.py +71 -0
  33. docxray/oxml/trans/h2d/exceptions.py +6 -0
  34. docxray/oxml/trans/h2d/how2display.py +340 -0
  35. docxray/oxml/trans/h2d/list_view.py +487 -0
  36. docxray/oxml/trans/h2d/numeral_rules.py +99 -0
  37. docxray/oxml/trans/h2d/paragraph_h2d.py +643 -0
  38. docxray/oxml/trans/h2d/row_h2d.py +147 -0
  39. docxray/oxml/trans/h2d/run_h2d.py +164 -0
  40. docxray/oxml/trans/h2d/table_h2d.py +97 -0
  41. docxray/oxml/trans/ns.py +346 -0
  42. docxray/oxml/trans/numbering.py +239 -0
  43. docxray/oxml/trans/package.py +22 -0
  44. docxray/oxml/trans/parser.py +39 -0
  45. docxray/oxml/trans/part.py +40 -0
  46. docxray/oxml/trans/parts/__init__.py +0 -0
  47. docxray/oxml/trans/parts/document.py +67 -0
  48. docxray/oxml/trans/parts/image.py +13 -0
  49. docxray/oxml/trans/parts/numbering.py +26 -0
  50. docxray/oxml/trans/parts/settings.py +12 -0
  51. docxray/oxml/trans/parts/story.py +41 -0
  52. docxray/oxml/trans/parts/styles.py +29 -0
  53. docxray/oxml/trans/proxy/blkcntnr.py +68 -0
  54. docxray/oxml/trans/proxy/compute.py +135 -0
  55. docxray/oxml/trans/proxy/document.py +49 -0
  56. docxray/oxml/trans/proxy/drawing.py +61 -0
  57. docxray/oxml/trans/proxy/image/__init__.py +0 -0
  58. docxray/oxml/trans/proxy/image/picture.py +140 -0
  59. docxray/oxml/trans/proxy/image/wmf.py +53 -0
  60. docxray/oxml/trans/proxy/numbering/numbering.py +285 -0
  61. docxray/oxml/trans/proxy/settings.py +7 -0
  62. docxray/oxml/trans/proxy/shared.py +244 -0
  63. docxray/oxml/trans/proxy/styles/__init__.py +0 -0
  64. docxray/oxml/trans/proxy/styles/doc_dflts.py +34 -0
  65. docxray/oxml/trans/proxy/styles/style.py +187 -0
  66. docxray/oxml/trans/proxy/styles/styles.py +83 -0
  67. docxray/oxml/trans/proxy/table.py +467 -0
  68. docxray/oxml/trans/proxy/text/__init__.py +0 -0
  69. docxray/oxml/trans/proxy/text/font.py +27 -0
  70. docxray/oxml/trans/proxy/text/hyperlink.py +25 -0
  71. docxray/oxml/trans/proxy/text/paragraph.py +188 -0
  72. docxray/oxml/trans/proxy/text/run.py +117 -0
  73. docxray/oxml/trans/proxy/types.py +33 -0
  74. docxray/oxml/trans/settings.py +12 -0
  75. docxray/oxml/trans/shared.py +278 -0
  76. docxray/oxml/trans/st/dml_main.py +12 -0
  77. docxray/oxml/trans/st/dml_wordprocessing_drawing.py +7 -0
  78. docxray/oxml/trans/st/enums.py +464 -0
  79. docxray/oxml/trans/st/shared_common.py +67 -0
  80. docxray/oxml/trans/st/wml.py +228 -0
  81. docxray/oxml/trans/styles.py +200 -0
  82. docxray/oxml/trans/table/cell_props.py +164 -0
  83. docxray/oxml/trans/table/row_props.py +79 -0
  84. docxray/oxml/trans/table/table.py +55 -0
  85. docxray/oxml/trans/table/table_props.py +225 -0
  86. docxray/oxml/trans/text/__init__.py +0 -0
  87. docxray/oxml/trans/text/hyperlink.py +13 -0
  88. docxray/oxml/trans/text/num_props.py +33 -0
  89. docxray/oxml/trans/text/omath.py +10 -0
  90. docxray/oxml/trans/text/paragraph.py +50 -0
  91. docxray/oxml/trans/text/paragraph_props.py +303 -0
  92. docxray/oxml/trans/text/range.py +14 -0
  93. docxray/oxml/trans/text/run.py +97 -0
  94. docxray/oxml/trans/text/run_props.py +244 -0
  95. docxray/oxml/trans/types.py +8 -0
  96. docxray/oxml/trans/xmlchemy.py +260 -0
  97. docxray/shared.py +13 -0
  98. docxray/transform/_lxml.py +42 -0
  99. docxray/transform/builders.py +742 -0
  100. docxray/transform/ruleset.py +54 -0
  101. docxray/transform/transformers.py +77 -0
  102. docxray/transform/utils/char_graph.py +243 -0
  103. docxray/types.py +6 -0
  104. docxray/xsd/exceptions.py +26 -0
  105. docxray/xsd/facets.py +60 -0
  106. docxray/xsd/primitives.py +188 -0
  107. docxray/xsd/xsd.py +150 -0
  108. docxray-0.0.0a0.dist-info/METADATA +15 -0
  109. docxray-0.0.0a0.dist-info/RECORD +111 -0
  110. docxray-0.0.0a0.dist-info/WHEEL +5 -0
  111. docxray-0.0.0a0.dist-info/top_level.txt +1 -0
docxray/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from .api import Document
2
+
3
+ __version__ = "0.0.0.alpha+build.1"
4
+
5
+
6
+ __all__ = ["Document"]
docxray/api.py ADDED
@@ -0,0 +1,28 @@
1
+ """Main class for loading DOCX (Word documents)."""
2
+
3
+ from .opc.constants import CONTENT_TYPE as CT
4
+ from .oxml import TransitionalPartFactory
5
+ from .oxml.trans.package import TransitionalPackage
6
+ from .oxml.trans.proxy.document import Document as DocumentObject
7
+ from .types import PkgFile
8
+
9
+
10
+ def Document(docx: PkgFile) -> DocumentObject:
11
+ """Load Word document of an `.docx` format (OOXML) only for reading.
12
+
13
+ Args:
14
+ docx (PkgFile): String path or `Path` instance or byte stream.
15
+
16
+ Raises:
17
+ ValueError: If cannot get document part or parse file.
18
+
19
+ Returns:
20
+ DocumentObject: `Document` instance.
21
+ """
22
+ document_part = TransitionalPackage.open(
23
+ docx, TransitionalPartFactory
24
+ ).main_document_part
25
+ if document_part.content_type != CT.WML_DOCUMENT_MAIN:
26
+ tmpl = "file '%s' is not a Word file, content type is '%s'"
27
+ raise ValueError(tmpl % (docx, document_part.content_type))
28
+ return document_part.document
File without changes
docxray/enum/lxml.py ADDED
@@ -0,0 +1,12 @@
1
+ """Enumrations for lxml (XML)."""
2
+
3
+ from enum import IntEnum
4
+
5
+
6
+ class POS(IntEnum):
7
+ """Describes the position of an element relative to its siblings."""
8
+
9
+ START = 0
10
+ MIDDLE = 1
11
+ END = 2
12
+ ONE_ITEM = 3
docxray/exceptions.py ADDED
@@ -0,0 +1,10 @@
1
+ """Module with exception used in `docxray`."""
2
+
3
+
4
+ class DocxrayError(Exception):
5
+ """Generic error class."""
6
+
7
+
8
+ class InvalidXmlError(DocxrayError):
9
+ """Raised when invalid XML is encountered, such as on attempt to access a missing
10
+ required child element."""
docxray/lxml.py ADDED
@@ -0,0 +1,70 @@
1
+ """Base module of an XML-element/functions in project"""
2
+
3
+ from collections.abc import Iterator, Mapping
4
+ from typing import Any, TypeVar
5
+
6
+ from lxml import etree
7
+
8
+ BASE_ELM_T = TypeVar("BASE_ELM_T", bound="BaseOxmlElement")
9
+
10
+
11
+ class BaseOxmlElement(etree.ElementBase):
12
+ """Base XML-element for project (inherits from lxml `ElementBase`), provides type hints."""
13
+
14
+ def xpath(self, xpath: str, ns: dict[str, str] | None = None) -> Any: # type: ignore[override]
15
+ return super().xpath(xpath, namespaces=ns)
16
+
17
+ def getparent(self, elm_hint: type[BASE_ELM_T]) -> BASE_ELM_T | None: # type: ignore[override]
18
+ # Cache parent for styles
19
+ if not hasattr(self, "_parent"):
20
+ self._parent = super().getparent()
21
+ return self._parent # type: ignore[return-value]
22
+
23
+ def iterfind( # type: ignore[override]
24
+ self,
25
+ elm_qn: str,
26
+ elm_hint: type[BASE_ELM_T],
27
+ namespaces: Mapping[str, str] | None = None,
28
+ ) -> Iterator[BASE_ELM_T]:
29
+ return super().iterfind(elm_qn, namespaces) # type: ignore[return-value]
30
+
31
+ def find( # type: ignore[override]
32
+ self,
33
+ elm_qn: str,
34
+ elm_hint: type[BASE_ELM_T],
35
+ namespaces: Mapping[str, str] | None = None,
36
+ ) -> BASE_ELM_T | None:
37
+ return super().find(elm_qn, namespaces) # type: ignore[return-value]
38
+
39
+ def findall( # type: ignore[override]
40
+ self,
41
+ elm_qn: str,
42
+ elm_hint: type[BASE_ELM_T],
43
+ namespaces: Mapping[str, str] | None = None,
44
+ ) -> list[BASE_ELM_T]:
45
+ return super().findall(elm_qn, namespaces) # type: ignore[return-value]
46
+
47
+ def __repr__(self) -> str:
48
+ """Repsentation of an xml-element for debug."""
49
+ return (
50
+ super().__repr__().replace("Element", self.__class__.__name__, 1)
51
+ )
52
+
53
+
54
+ def elm_ns_cls_lookup(
55
+ fallback_cls: type[BaseOxmlElement] = BaseOxmlElement,
56
+ ) -> etree.ElementNamespaceClassLookup:
57
+ """Get lookup with fallback cls of an `BaseOxmlElement` or given param.
58
+
59
+ Args:
60
+ default_element_class (type[BaseOxmlElement], optional): Fallback cls. Defaults to BaseOxmlElement.
61
+
62
+ Returns:
63
+ etree.ElementNamespaceClassLookup: `ElementNamespaceClassLookup` instance.
64
+ """
65
+ lookup: etree.ElementNamespaceClassLookup = (
66
+ etree.ElementNamespaceClassLookup()
67
+ )
68
+ fallback = etree.ElementDefaultClassLookup(element=fallback_cls)
69
+ lookup.set_fallback(fallback)
70
+ return lookup
@@ -0,0 +1,61 @@
1
+ """Module for getting BCP47 (Best Current Practice 47, RFC5646/4676) language tag
2
+ in format like: `Cyrl` (Cyrillic-based chars), Latn (Latin-based chars), etc for locales like `en-US`.
3
+ """
4
+
5
+ import csv
6
+ from functools import lru_cache
7
+ from pathlib import Path
8
+
9
+ _DIR_ = Path(__file__).parent
10
+ _DFLT_SCRIPT_PATH_ = _DIR_ / "iso639-default-script.tsv"
11
+ _ICIDS_PATH_ = _DIR_ / "iso639-lcids.tsv"
12
+
13
+ with open(_DFLT_SCRIPT_PATH_, "r", encoding="utf-8") as f:
14
+ reader = csv.DictReader(f, delimiter="\t")
15
+ _ISO639_DEFAULT_SCRIPT_ = {}
16
+ for row in reader:
17
+ _ISO639_DEFAULT_SCRIPT_[row["tag3"]] = row["script"]
18
+
19
+ with open(_ICIDS_PATH_, "r", encoding="utf-8") as f:
20
+ reader = csv.DictReader(f, delimiter="\t")
21
+ _ISO639_ICIDS_ = {}
22
+ for row in reader:
23
+ _ISO639_ICIDS_[(row["tag1"], row["region"])] = (
24
+ row["tag3"],
25
+ row["script"],
26
+ )
27
+
28
+
29
+ @lru_cache
30
+ def script(locale: str) -> str:
31
+ """Get language tag for `locale`.
32
+
33
+ Examples:
34
+ For `en-US` it will return `Latn`.
35
+
36
+ Args:
37
+ locale (str): Locale with dash-format `en-US` or single lang tag `ru`.
38
+
39
+ Raises:
40
+ ValueError: Wrong locale format OR script not found.
41
+
42
+ Returns:
43
+ str: Language tag like `Latn`.
44
+ """
45
+ locale_split = locale.split("-")
46
+ if len(locale_split) == 2:
47
+ key = tuple(locale_split)
48
+ elif len(locale_split) == 1:
49
+ key = locale_split[0], ""
50
+ else:
51
+ raise ValueError(f"Wrong locale `{locale}`")
52
+ inf = _ISO639_ICIDS_.get(key) # type: ignore[arg-type]
53
+ if inf is None:
54
+ raise ValueError(f"No such script for locale `{locale}`")
55
+ tag3, script = inf
56
+ if script:
57
+ return script
58
+ script = _ISO639_DEFAULT_SCRIPT_.get(tag3)
59
+ if script is None:
60
+ raise ValueError(f"No such script for locale `{locale}`")
61
+ return script