iamraw 4.91.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iamraw-4.91.1/LICENSE +21 -0
- iamraw-4.91.1/PKG-INFO +32 -0
- iamraw-4.91.1/README +12 -0
- iamraw-4.91.1/iamraw/__init__.py +353 -0
- iamraw-4.91.1/iamraw/__mixin__.py +33 -0
- iamraw-4.91.1/iamraw/abbreviation.py +102 -0
- iamraw-4.91.1/iamraw/annotation.py +52 -0
- iamraw-4.91.1/iamraw/bibliography.py +79 -0
- iamraw-4.91.1/iamraw/border.py +36 -0
- iamraw-4.91.1/iamraw/bounding.py +210 -0
- iamraw-4.91.1/iamraw/boxes.py +66 -0
- iamraw-4.91.1/iamraw/caption.py +82 -0
- iamraw-4.91.1/iamraw/code.py +40 -0
- iamraw-4.91.1/iamraw/content.py +27 -0
- iamraw-4.91.1/iamraw/distance.py +22 -0
- iamraw-4.91.1/iamraw/docinfo.py +103 -0
- iamraw-4.91.1/iamraw/docref.py +49 -0
- iamraw-4.91.1/iamraw/document.py +276 -0
- iamraw-4.91.1/iamraw/figure.py +30 -0
- iamraw-4.91.1/iamraw/findings.py +345 -0
- iamraw-4.91.1/iamraw/fonts.py +106 -0
- iamraw-4.91.1/iamraw/fontstore.py +198 -0
- iamraw-4.91.1/iamraw/footnotes.py +16 -0
- iamraw-4.91.1/iamraw/formula.py +100 -0
- iamraw-4.91.1/iamraw/headerfooter.py +277 -0
- iamraw-4.91.1/iamraw/headlines.py +115 -0
- iamraw-4.91.1/iamraw/hits.py +17 -0
- iamraw-4.91.1/iamraw/href.py +21 -0
- iamraw-4.91.1/iamraw/images.py +40 -0
- iamraw-4.91.1/iamraw/index.py +89 -0
- iamraw-4.91.1/iamraw/lang.py +63 -0
- iamraw-4.91.1/iamraw/layout.py +32 -0
- iamraw-4.91.1/iamraw/likelihood.py +20 -0
- iamraw-4.91.1/iamraw/line.py +31 -0
- iamraw-4.91.1/iamraw/list.py +65 -0
- iamraw-4.91.1/iamraw/magic.py +30 -0
- iamraw-4.91.1/iamraw/page.py +13 -0
- iamraw-4.91.1/iamraw/pagecontent.py +13 -0
- iamraw-4.91.1/iamraw/pagenumbers.py +42 -0
- iamraw-4.91.1/iamraw/path.py +280 -0
- iamraw-4.91.1/iamraw/pdfinfo.py +38 -0
- iamraw-4.91.1/iamraw/person.py +31 -0
- iamraw-4.91.1/iamraw/quote.py +23 -0
- iamraw-4.91.1/iamraw/sections.py +259 -0
- iamraw-4.91.1/iamraw/solution.py +52 -0
- iamraw-4.91.1/iamraw/spacestation.py +30 -0
- iamraw-4.91.1/iamraw/style.py +128 -0
- iamraw-4.91.1/iamraw/table.py +58 -0
- iamraw-4.91.1/iamraw/text.py +93 -0
- iamraw-4.91.1/iamraw/textposition.py +16 -0
- iamraw-4.91.1/iamraw/title.py +54 -0
- iamraw-4.91.1/iamraw/titlepage.py +109 -0
- iamraw-4.91.1/iamraw/toc.py +271 -0
- iamraw-4.91.1/iamraw/webconfig.py +16 -0
- iamraw-4.91.1/iamraw/whitepage.py +25 -0
- iamraw-4.91.1/iamraw.egg-info/PKG-INFO +32 -0
- iamraw-4.91.1/iamraw.egg-info/SOURCES.txt +137 -0
- iamraw-4.91.1/iamraw.egg-info/dependency_links.txt +1 -0
- iamraw-4.91.1/iamraw.egg-info/requires.txt +6 -0
- iamraw-4.91.1/iamraw.egg-info/top_level.txt +3 -0
- iamraw-4.91.1/pyproject.toml +82 -0
- iamraw-4.91.1/serializeraw/__init__.py +228 -0
- iamraw-4.91.1/serializeraw/__patch__.py +37 -0
- iamraw-4.91.1/serializeraw/abbreviation.py +129 -0
- iamraw-4.91.1/serializeraw/annotation.py +95 -0
- iamraw-4.91.1/serializeraw/bibliography.py +107 -0
- iamraw-4.91.1/serializeraw/border.py +126 -0
- iamraw-4.91.1/serializeraw/bounding.py +64 -0
- iamraw-4.91.1/serializeraw/boxedcontent.py +110 -0
- iamraw-4.91.1/serializeraw/boxes.py +50 -0
- iamraw-4.91.1/serializeraw/caption.py +33 -0
- iamraw-4.91.1/serializeraw/chapter.py +30 -0
- iamraw-4.91.1/serializeraw/code.py +82 -0
- iamraw-4.91.1/serializeraw/color.py +52 -0
- iamraw-4.91.1/serializeraw/content.py +34 -0
- iamraw-4.91.1/serializeraw/distance.py +61 -0
- iamraw-4.91.1/serializeraw/docinfo.py +43 -0
- iamraw-4.91.1/serializeraw/docref.py +86 -0
- iamraw-4.91.1/serializeraw/document.py +335 -0
- iamraw-4.91.1/serializeraw/figure.py +110 -0
- iamraw-4.91.1/serializeraw/findings.py +256 -0
- iamraw-4.91.1/serializeraw/fonts.py +182 -0
- iamraw-4.91.1/serializeraw/fontstore.py +59 -0
- iamraw-4.91.1/serializeraw/footnotes.py +47 -0
- iamraw-4.91.1/serializeraw/formula.py +36 -0
- iamraw-4.91.1/serializeraw/formularaw.py +110 -0
- iamraw-4.91.1/serializeraw/headerfooter.py +335 -0
- iamraw-4.91.1/serializeraw/headlines.py +126 -0
- iamraw-4.91.1/serializeraw/hits.py +61 -0
- iamraw-4.91.1/serializeraw/href.py +36 -0
- iamraw-4.91.1/serializeraw/images.py +118 -0
- iamraw-4.91.1/serializeraw/index.py +28 -0
- iamraw-4.91.1/serializeraw/likelihood.py +81 -0
- iamraw-4.91.1/serializeraw/line.py +122 -0
- iamraw-4.91.1/serializeraw/list.py +121 -0
- iamraw-4.91.1/serializeraw/magic.py +69 -0
- iamraw-4.91.1/serializeraw/pagecontent.py +43 -0
- iamraw-4.91.1/serializeraw/pagenumbers.py +106 -0
- iamraw-4.91.1/serializeraw/pdfinfo.py +100 -0
- iamraw-4.91.1/serializeraw/quote.py +62 -0
- iamraw-4.91.1/serializeraw/sections.py +155 -0
- iamraw-4.91.1/serializeraw/sentence.py +77 -0
- iamraw-4.91.1/serializeraw/spacestation.py +115 -0
- iamraw-4.91.1/serializeraw/style.py +30 -0
- iamraw-4.91.1/serializeraw/table.py +68 -0
- iamraw-4.91.1/serializeraw/text.py +111 -0
- iamraw-4.91.1/serializeraw/textposition.py +84 -0
- iamraw-4.91.1/serializeraw/titlepage.py +29 -0
- iamraw-4.91.1/serializeraw/tnav/__init__.py +8 -0
- iamraw-4.91.1/serializeraw/tnav/create.py +253 -0
- iamraw-4.91.1/serializeraw/tnav/dump.py +128 -0
- iamraw-4.91.1/serializeraw/tnav/filter.py +21 -0
- iamraw-4.91.1/serializeraw/tnav/highnote.py +52 -0
- iamraw-4.91.1/serializeraw/tnav/translation.py +30 -0
- iamraw-4.91.1/serializeraw/toc.py +121 -0
- iamraw-4.91.1/serializeraw/utils.py +33 -0
- iamraw-4.91.1/serializeraw/webconfig.py +37 -0
- iamraw-4.91.1/serializeraw/whitepage.py +54 -0
- iamraw-4.91.1/serializeraw/wordspaces.py +51 -0
- iamraw-4.91.1/serializeraw/yamlpages.py +191 -0
- iamraw-4.91.1/setup.cfg +4 -0
- iamraw-4.91.1/texmex/__init__.py +128 -0
- iamraw-4.91.1/texmex/alignment.py +51 -0
- iamraw-4.91.1/texmex/character.py +21 -0
- iamraw-4.91.1/texmex/group/__init__.py +8 -0
- iamraw-4.91.1/texmex/group/fonts.py +209 -0
- iamraw-4.91.1/texmex/group/ml/__init__.py +463 -0
- iamraw-4.91.1/texmex/group/ml/complex.py +104 -0
- iamraw-4.91.1/texmex/iter.py +138 -0
- iamraw-4.91.1/texmex/nav/__init__.py +525 -0
- iamraw-4.91.1/texmex/nav/create.py +277 -0
- iamraw-4.91.1/texmex/navigator.py +13 -0
- iamraw-4.91.1/texmex/regex.py +47 -0
- iamraw-4.91.1/texmex/search.py +55 -0
- iamraw-4.91.1/texmex/sentences.py +145 -0
- iamraw-4.91.1/texmex/style.py +424 -0
- iamraw-4.91.1/texmex/text.py +94 -0
- iamraw-4.91.1/texmex/translation.py +50 -0
- iamraw-4.91.1/texmex/utils.py +26 -0
iamraw-4.91.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Helmut Konrad Schewe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
iamraw-4.91.1/PKG-INFO
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iamraw
|
|
3
|
+
Version: 4.91.1
|
|
4
|
+
Author-email: Helmut Konrad Schewe <helmutus@outlook.com>
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/anaticulae/iamraw
|
|
7
|
+
Project-URL: Repository, https://github.com/anaticulae/iamraw
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: utilo<3.0.0,>=2.107.4
|
|
15
|
+
Requires-Dist: configos<2.0.0,>=1.0.4
|
|
16
|
+
Requires-Dist: PyYAML<7.0.0,>=6.0.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: utilotest<2.0.0,>=1.0.1; extra == "dev"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# iamraw
|
|
22
|
+
|
|
23
|
+
Basic `datastructure` which supports to load and dump data.
|
|
24
|
+
|
|
25
|
+
Features:
|
|
26
|
+
|
|
27
|
+
* border
|
|
28
|
+
* BoundingBox
|
|
29
|
+
* chapter
|
|
30
|
+
* document
|
|
31
|
+
* fonts
|
|
32
|
+
* toc
|
iamraw-4.91.1/README
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# C O P Y R I G H T
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) 2019-2023 by Helmut Konrad Schewe. All rights reserved.
|
|
5
|
+
# This file is property of Helmut Konrad Schewe. Any unauthorized copy,
|
|
6
|
+
# use or distribution is an offensive act against international law and may
|
|
7
|
+
# be prosecuted under federal law. Its content is company confidential.
|
|
8
|
+
# =============================================================================
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
# path
|
|
13
|
+
from iamraw import path
|
|
14
|
+
# mixin
|
|
15
|
+
from iamraw.__mixin__ import extracted
|
|
16
|
+
from iamraw.__mixin__ import hasstrategy
|
|
17
|
+
# abbreviation
|
|
18
|
+
from iamraw.abbreviation import Abbreviation
|
|
19
|
+
from iamraw.abbreviation import AbbreviationList
|
|
20
|
+
from iamraw.abbreviation import AbbreviationListLookup
|
|
21
|
+
from iamraw.abbreviation import AbbreviationLists
|
|
22
|
+
from iamraw.abbreviation import AbbreviationPosition
|
|
23
|
+
from iamraw.abbreviation import AbbreviationResult
|
|
24
|
+
from iamraw.abbreviation import Abbreviations
|
|
25
|
+
from iamraw.abbreviation import ExtractedTextAbbreviation
|
|
26
|
+
from iamraw.abbreviation import ExtractedTextAbbreviations
|
|
27
|
+
# annotation
|
|
28
|
+
from iamraw.annotation import Annotation
|
|
29
|
+
from iamraw.annotation import HyperLink
|
|
30
|
+
from iamraw.annotation import Link
|
|
31
|
+
from iamraw.annotation import PageAnnotation
|
|
32
|
+
from iamraw.annotation import PageAnnotations
|
|
33
|
+
from iamraw.annotation import PageLink
|
|
34
|
+
from iamraw.annotation import hyperlink_annotations
|
|
35
|
+
from iamraw.annotation import pagelink_annotations
|
|
36
|
+
# bib
|
|
37
|
+
from iamraw.bibliography import BibliographyReference
|
|
38
|
+
from iamraw.bibliography import BibliographyReferences
|
|
39
|
+
from iamraw.bibliography import BibliographyTable
|
|
40
|
+
# border
|
|
41
|
+
from iamraw.border import Border
|
|
42
|
+
from iamraw.border import Borders
|
|
43
|
+
# bounding
|
|
44
|
+
from iamraw.bounding import BoundingBox
|
|
45
|
+
from iamraw.bounding import BoundingBoxes
|
|
46
|
+
from iamraw.bounding import PageBoundings
|
|
47
|
+
from iamraw.bounding import PageBoundingsList
|
|
48
|
+
from iamraw.bounding import between
|
|
49
|
+
from iamraw.bounding import split_x
|
|
50
|
+
from iamraw.bounding import split_y
|
|
51
|
+
# boxes
|
|
52
|
+
from iamraw.boxes import Box
|
|
53
|
+
from iamraw.boxes import HorizontalLine
|
|
54
|
+
from iamraw.boxes import PageContentBoxes
|
|
55
|
+
from iamraw.boxes import PageContentHorizontals
|
|
56
|
+
from iamraw.boxes import PagesWithBoxList
|
|
57
|
+
from iamraw.boxes import PagesWithHorizontalList
|
|
58
|
+
# captions
|
|
59
|
+
from iamraw.caption import Caption
|
|
60
|
+
from iamraw.caption import Captions
|
|
61
|
+
from iamraw.caption import CaptionType
|
|
62
|
+
from iamraw.caption import PageContentCaption
|
|
63
|
+
from iamraw.caption import PageContentCaptions
|
|
64
|
+
# code
|
|
65
|
+
from iamraw.code import PageContentCode
|
|
66
|
+
from iamraw.code import PageContentCodes
|
|
67
|
+
from iamraw.code import PeaceOfCode
|
|
68
|
+
from iamraw.code import PeaceOfCodes
|
|
69
|
+
# content
|
|
70
|
+
from iamraw.content import ContentBoundingBox
|
|
71
|
+
from iamraw.content import ContentBoundingBoxes
|
|
72
|
+
# distance
|
|
73
|
+
from iamraw.distance import AreaDistance
|
|
74
|
+
from iamraw.distance import AreaDistances
|
|
75
|
+
from iamraw.distance import PageContentAreaDistance
|
|
76
|
+
from iamraw.distance import PageContentAreaDistances
|
|
77
|
+
# docinfo
|
|
78
|
+
from iamraw.docinfo import DocContentType
|
|
79
|
+
from iamraw.docinfo import DocInfo
|
|
80
|
+
from iamraw.docinfo import DocumentType
|
|
81
|
+
from iamraw.docinfo import Generator
|
|
82
|
+
from iamraw.docinfo import SectionLookup
|
|
83
|
+
# docref
|
|
84
|
+
from iamraw.docref import DocRef
|
|
85
|
+
from iamraw.docref import DocRefs
|
|
86
|
+
from iamraw.docref import TextAdvice
|
|
87
|
+
from iamraw.docref import TextAdviceDelete
|
|
88
|
+
from iamraw.docref import TextAdviceReplacement
|
|
89
|
+
from iamraw.docref import TextAdvices
|
|
90
|
+
# document
|
|
91
|
+
from iamraw.document import Boxed
|
|
92
|
+
from iamraw.document import Char
|
|
93
|
+
from iamraw.document import Chars
|
|
94
|
+
from iamraw.document import Document
|
|
95
|
+
from iamraw.document import Line
|
|
96
|
+
from iamraw.document import Lines
|
|
97
|
+
from iamraw.document import Page
|
|
98
|
+
from iamraw.document import PageObject
|
|
99
|
+
from iamraw.document import Pages
|
|
100
|
+
from iamraw.document import PageSize
|
|
101
|
+
from iamraw.document import PageSizes
|
|
102
|
+
from iamraw.document import TextContainer
|
|
103
|
+
from iamraw.document import TextContainers
|
|
104
|
+
from iamraw.document import UnicodeChar
|
|
105
|
+
from iamraw.document import VerticalTextContainer
|
|
106
|
+
from iamraw.document import VerticalTextContainers
|
|
107
|
+
from iamraw.document import VirtualChar
|
|
108
|
+
# figures
|
|
109
|
+
from iamraw.figure import Figure
|
|
110
|
+
from iamraw.figure import Figures
|
|
111
|
+
# findings
|
|
112
|
+
from iamraw.findings import BoundingLocation
|
|
113
|
+
from iamraw.findings import Finding
|
|
114
|
+
from iamraw.findings import FindingLevel
|
|
115
|
+
from iamraw.findings import Findings
|
|
116
|
+
from iamraw.findings import Location
|
|
117
|
+
from iamraw.findings import PageFinding
|
|
118
|
+
from iamraw.findings import PageFindings
|
|
119
|
+
from iamraw.findings import RangedLocation
|
|
120
|
+
from iamraw.findings import select_findings
|
|
121
|
+
# fonts
|
|
122
|
+
from iamraw.fonts import DEFAULT_STRETCH
|
|
123
|
+
from iamraw.fonts import DEFAULT_STYLE
|
|
124
|
+
from iamraw.fonts import DEFAULT_WEIGHT
|
|
125
|
+
from iamraw.fonts import Font
|
|
126
|
+
from iamraw.fonts import FontFlag
|
|
127
|
+
from iamraw.fonts import FontFlags
|
|
128
|
+
from iamraw.fonts import PageFontContent
|
|
129
|
+
from iamraw.fonts import PageFontContents
|
|
130
|
+
from iamraw.fonts import Stretch
|
|
131
|
+
from iamraw.fonts import Style
|
|
132
|
+
from iamraw.fonts import Weight
|
|
133
|
+
from iamraw.fontstore import NO_FONT
|
|
134
|
+
from iamraw.fontstore import FontChunk
|
|
135
|
+
from iamraw.fontstore import FontChunks
|
|
136
|
+
from iamraw.fontstore import FontContentStore
|
|
137
|
+
from iamraw.fontstore import FontStore
|
|
138
|
+
# footnotes
|
|
139
|
+
from iamraw.footnotes import PageContentFootnote
|
|
140
|
+
from iamraw.footnotes import PageContentFootnotes
|
|
141
|
+
# formula
|
|
142
|
+
from iamraw.formula import Formula
|
|
143
|
+
from iamraw.formula import FormulaRaw
|
|
144
|
+
from iamraw.formula import Formulas
|
|
145
|
+
from iamraw.formula import FormulasRaw
|
|
146
|
+
from iamraw.formula import MathChar
|
|
147
|
+
from iamraw.formula import MathChars
|
|
148
|
+
from iamraw.formula import PageContentFormula
|
|
149
|
+
from iamraw.formula import PageContentFormulas
|
|
150
|
+
from iamraw.formula import PageContentRawFormula
|
|
151
|
+
from iamraw.formula import PageContentRawFormulas
|
|
152
|
+
# headerfooter
|
|
153
|
+
from iamraw.headerfooter import FixedFooterInfo
|
|
154
|
+
from iamraw.headerfooter import FixedHeaderInfo
|
|
155
|
+
from iamraw.headerfooter import FooterInfo
|
|
156
|
+
from iamraw.headerfooter import FootJudgedNote
|
|
157
|
+
from iamraw.headerfooter import FootNote
|
|
158
|
+
from iamraw.headerfooter import FootNoteMerged
|
|
159
|
+
from iamraw.headerfooter import FootNoteRaw
|
|
160
|
+
from iamraw.headerfooter import FootNotes
|
|
161
|
+
from iamraw.headerfooter import HeaderImages
|
|
162
|
+
from iamraw.headerfooter import HeaderInfo
|
|
163
|
+
from iamraw.headerfooter import HeaderTitle
|
|
164
|
+
from iamraw.headerfooter import MovingFooterInfo
|
|
165
|
+
from iamraw.headerfooter import PageContentFooterHeader
|
|
166
|
+
from iamraw.headerfooter import PageContentFooterHeaders
|
|
167
|
+
from iamraw.headerfooter import PageInformation
|
|
168
|
+
from iamraw.headerfooter import PagesFooterInfo
|
|
169
|
+
from iamraw.headerfooter import RawText
|
|
170
|
+
# headlines
|
|
171
|
+
from iamraw.headlines import Headline
|
|
172
|
+
from iamraw.headlines import HeadlineGroup
|
|
173
|
+
from iamraw.headlines import HeadlineGroups
|
|
174
|
+
from iamraw.headlines import HeadlineResult
|
|
175
|
+
from iamraw.headlines import Headlines
|
|
176
|
+
from iamraw.headlines import PagesHeadlineList
|
|
177
|
+
from iamraw.headlines import headlines_totoc
|
|
178
|
+
# hits
|
|
179
|
+
from iamraw.hits import PageContentHit
|
|
180
|
+
from iamraw.hits import PageContentHits
|
|
181
|
+
# href
|
|
182
|
+
from iamraw.href import ExtractedHyperLink
|
|
183
|
+
from iamraw.href import ExtractedHyperLinks
|
|
184
|
+
# images
|
|
185
|
+
from iamraw.images import ImageInformation
|
|
186
|
+
from iamraw.images import ImageInformations
|
|
187
|
+
from iamraw.images import PageContentImageInfo
|
|
188
|
+
from iamraw.images import PageContentImageInfos
|
|
189
|
+
# index
|
|
190
|
+
from iamraw.index import DocumentIndex
|
|
191
|
+
from iamraw.index import DocumentIndexElement
|
|
192
|
+
# lang
|
|
193
|
+
from iamraw.lang import Language
|
|
194
|
+
from iamraw.lang import simplelang
|
|
195
|
+
# layout
|
|
196
|
+
from iamraw.layout import Layout
|
|
197
|
+
from iamraw.layout import Layouts
|
|
198
|
+
# likelihood
|
|
199
|
+
from iamraw.likelihood import Likelihood
|
|
200
|
+
from iamraw.likelihood import PageContentLikelihood
|
|
201
|
+
from iamraw.likelihood import PageContentLikelihoods
|
|
202
|
+
# lines
|
|
203
|
+
from iamraw.line import PageContentLine
|
|
204
|
+
from iamraw.line import PageContentLines
|
|
205
|
+
# list
|
|
206
|
+
from iamraw.list import ListType
|
|
207
|
+
from iamraw.list import PageContentList
|
|
208
|
+
from iamraw.list import PageContentLists
|
|
209
|
+
from iamraw.list import PageList
|
|
210
|
+
# magic
|
|
211
|
+
from iamraw.magic import PageContentContentType
|
|
212
|
+
from iamraw.magic import PageContentContentTypes
|
|
213
|
+
from iamraw.magic import PageContentType
|
|
214
|
+
# pages
|
|
215
|
+
from iamraw.page import PageSizeBorder
|
|
216
|
+
from iamraw.page import PageSizeBorderList
|
|
217
|
+
# content
|
|
218
|
+
from iamraw.pagecontent import PageContent
|
|
219
|
+
from iamraw.pagecontent import PageContents
|
|
220
|
+
# pagenumber
|
|
221
|
+
from iamraw.pagenumbers import PageNumber
|
|
222
|
+
from iamraw.pagenumbers import PageNumberOrientation
|
|
223
|
+
# pdf
|
|
224
|
+
from iamraw.pdfinfo import InvalidPDF
|
|
225
|
+
from iamraw.pdfinfo import PDFDate
|
|
226
|
+
from iamraw.pdfinfo import PDFInfo
|
|
227
|
+
from iamraw.pdfinfo import PDFVersion
|
|
228
|
+
# person
|
|
229
|
+
from iamraw.person import NoPerson
|
|
230
|
+
from iamraw.person import Person
|
|
231
|
+
from iamraw.person import Persons
|
|
232
|
+
# quote
|
|
233
|
+
from iamraw.quote import ExtractedQuotation
|
|
234
|
+
from iamraw.quote import ExtractedQuotations
|
|
235
|
+
from iamraw.quote import PageContentBlockQuotes
|
|
236
|
+
from iamraw.quote import PageContentBlockQuotesList
|
|
237
|
+
# sections
|
|
238
|
+
from iamraw.sections import AbbreviationTable
|
|
239
|
+
from iamraw.sections import Abstract
|
|
240
|
+
from iamraw.sections import Acknowledgments
|
|
241
|
+
from iamraw.sections import Appendix
|
|
242
|
+
from iamraw.sections import AreaItem
|
|
243
|
+
from iamraw.sections import AreaItems
|
|
244
|
+
from iamraw.sections import Bibliography
|
|
245
|
+
from iamraw.sections import Chapter
|
|
246
|
+
from iamraw.sections import CiteContent
|
|
247
|
+
from iamraw.sections import CitePart
|
|
248
|
+
from iamraw.sections import CodeTable
|
|
249
|
+
from iamraw.sections import DocumentSection
|
|
250
|
+
from iamraw.sections import FigureTable
|
|
251
|
+
from iamraw.sections import Glossary
|
|
252
|
+
from iamraw.sections import Index
|
|
253
|
+
from iamraw.sections import LegalInformation
|
|
254
|
+
from iamraw.sections import MainPart
|
|
255
|
+
from iamraw.sections import MultipleSection
|
|
256
|
+
from iamraw.sections import NotImplementedItem
|
|
257
|
+
from iamraw.sections import PartOfDocMixin
|
|
258
|
+
from iamraw.sections import PartsOfDoc
|
|
259
|
+
from iamraw.sections import Publication
|
|
260
|
+
from iamraw.sections import SectionMixin
|
|
261
|
+
from iamraw.sections import Sections
|
|
262
|
+
from iamraw.sections import SectionsList
|
|
263
|
+
from iamraw.sections import SymbolTable
|
|
264
|
+
from iamraw.sections import TableOfContent
|
|
265
|
+
from iamraw.sections import TableTable
|
|
266
|
+
from iamraw.sections import TitlePageSection
|
|
267
|
+
from iamraw.sections import Unknown
|
|
268
|
+
# solution
|
|
269
|
+
from iamraw.solution import Doctails
|
|
270
|
+
from iamraw.solution import ProblemStatus
|
|
271
|
+
from iamraw.solution import Solution
|
|
272
|
+
from iamraw.solution import Solutions
|
|
273
|
+
from iamraw.solution import Text
|
|
274
|
+
from iamraw.solution import Web
|
|
275
|
+
# spacestation
|
|
276
|
+
from iamraw.spacestation import DocumentCharDist
|
|
277
|
+
from iamraw.spacestation import DocumentWordDist
|
|
278
|
+
# style
|
|
279
|
+
from iamraw.style import DocTextStyle
|
|
280
|
+
from iamraw.style import PageTextProperties
|
|
281
|
+
from iamraw.style import PageTextPropertiesList
|
|
282
|
+
from iamraw.style import TextProperties
|
|
283
|
+
from iamraw.style import TextProperty
|
|
284
|
+
# table
|
|
285
|
+
from iamraw.table import PageContentTableBounding
|
|
286
|
+
from iamraw.table import PageContentTableBoundings
|
|
287
|
+
from iamraw.table import TableBounding
|
|
288
|
+
from iamraw.table import TableBoundings
|
|
289
|
+
# text
|
|
290
|
+
from iamraw.text import ChapterText
|
|
291
|
+
from iamraw.text import ChapterTextList
|
|
292
|
+
from iamraw.text import ContentType
|
|
293
|
+
from iamraw.text import DFormula
|
|
294
|
+
from iamraw.text import DocumentContent
|
|
295
|
+
from iamraw.text import HeadlineWithContent
|
|
296
|
+
from iamraw.text import PageContentText
|
|
297
|
+
from iamraw.text import PageContentTexts
|
|
298
|
+
from iamraw.text import Paragraph
|
|
299
|
+
from iamraw.text import Paragraphs
|
|
300
|
+
from iamraw.text import TextSection
|
|
301
|
+
from iamraw.text import TextSections
|
|
302
|
+
from iamraw.text import Undefined
|
|
303
|
+
# textposition
|
|
304
|
+
from iamraw.textposition import PageContentTextPosition
|
|
305
|
+
from iamraw.textposition import PageContentTextPositions
|
|
306
|
+
from iamraw.textposition import TextPosition
|
|
307
|
+
from iamraw.textposition import TextPositions
|
|
308
|
+
# title
|
|
309
|
+
from iamraw.title import PROF_DR
|
|
310
|
+
from iamraw.title import AcademicTitle
|
|
311
|
+
# title
|
|
312
|
+
from iamraw.titlepage import THESIS
|
|
313
|
+
from iamraw.titlepage import Institution
|
|
314
|
+
from iamraw.titlepage import Matrikel
|
|
315
|
+
from iamraw.titlepage import TitleDate
|
|
316
|
+
from iamraw.titlepage import TitlePage
|
|
317
|
+
from iamraw.titlepage import TitlePages
|
|
318
|
+
from iamraw.titlepage import TitleThesisType
|
|
319
|
+
# table of content
|
|
320
|
+
from iamraw.toc import AppendixLevel
|
|
321
|
+
from iamraw.toc import Level
|
|
322
|
+
from iamraw.toc import RomanLevel
|
|
323
|
+
from iamraw.toc import Section
|
|
324
|
+
from iamraw.toc import SectionList
|
|
325
|
+
from iamraw.toc import SectionRaw
|
|
326
|
+
from iamraw.toc import StepLevel
|
|
327
|
+
from iamraw.toc import Toc
|
|
328
|
+
from iamraw.toc import TocLinkMixin
|
|
329
|
+
from iamraw.toc import TocLinkMixins
|
|
330
|
+
from iamraw.toc import TocStyle
|
|
331
|
+
from iamraw.toc import create_toc
|
|
332
|
+
from iamraw.toc import merge_toc
|
|
333
|
+
from iamraw.toc import tosection
|
|
334
|
+
from iamraw.toc import tosectionraw
|
|
335
|
+
# webconfig
|
|
336
|
+
from iamraw.webconfig import WebConfig
|
|
337
|
+
# whitepage
|
|
338
|
+
from iamraw.whitepage import PageContentWhitepage
|
|
339
|
+
from iamraw.whitepage import PageContentWhitepages
|
|
340
|
+
from iamraw.whitepage import WhitePage
|
|
341
|
+
|
|
342
|
+
# TODO: REMOVE LATER
|
|
343
|
+
FootRawNote = FootNoteRaw
|
|
344
|
+
FixedFooterInformation = FixedFooterInfo
|
|
345
|
+
FixedHeaderInformation = FixedHeaderInfo
|
|
346
|
+
HeaderInformation = HeaderInfo
|
|
347
|
+
FooterInformation = FooterInfo
|
|
348
|
+
MovingFooterInformation = MovingFooterInfo
|
|
349
|
+
PagesFooterInformation = PagesFooterInfo
|
|
350
|
+
|
|
351
|
+
__version__ = '4.91.0'
|
|
352
|
+
|
|
353
|
+
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# C O P Y R I G H T
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) 2022-2023 by Helmut Konrad Schewe. All rights reserved.
|
|
5
|
+
# This file is property of Helmut Konrad Schewe. Any unauthorized copy,
|
|
6
|
+
# use or distribution is an offensive act against international law and may
|
|
7
|
+
# be prosecuted under federal law. Its content is company confidential.
|
|
8
|
+
# =============================================================================
|
|
9
|
+
"""\
|
|
10
|
+
>>> @extracted
|
|
11
|
+
... class Helmut:
|
|
12
|
+
... pass
|
|
13
|
+
>>> first = Helmut()
|
|
14
|
+
>>> first.__strategy__ = 'hello'
|
|
15
|
+
>>> first.__strategy__
|
|
16
|
+
'hello'
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extracted(item):
|
|
21
|
+
setattr(item, '__strategy__', None)
|
|
22
|
+
setattr(item, '__strategy_location__', None)
|
|
23
|
+
|
|
24
|
+
def raw(self) -> str:
|
|
25
|
+
"""Element in document which is converted to current result."""
|
|
26
|
+
raise NotImplementedError
|
|
27
|
+
|
|
28
|
+
setattr(item, '__strategy_raw__', raw)
|
|
29
|
+
return item
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def hasstrategy(item) -> bool:
|
|
33
|
+
return hasattr(item, '__strategy__')
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# C O P Y R I G H T
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) 2020-2023 by Helmut Konrad Schewe. All rights reserved.
|
|
5
|
+
# This file is property of Helmut Konrad Schewe. Any unauthorized copy,
|
|
6
|
+
# use or distribution is an offensive act against international law and may
|
|
7
|
+
# be prosecuted under federal law. Its content is company confidential.
|
|
8
|
+
# =============================================================================
|
|
9
|
+
|
|
10
|
+
import collections
|
|
11
|
+
import dataclasses
|
|
12
|
+
|
|
13
|
+
import utilo
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass
|
|
17
|
+
class AbbreviationPosition:
|
|
18
|
+
page: int = None
|
|
19
|
+
sentence: int = None
|
|
20
|
+
word: int = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class Abbreviation:
|
|
25
|
+
short: str = None
|
|
26
|
+
description: str = None
|
|
27
|
+
position: AbbreviationPosition = None
|
|
28
|
+
|
|
29
|
+
def __lt__(self, item):
|
|
30
|
+
if utilo.alphabetically(self.short) <= utilo.alphabetically(item.short):
|
|
31
|
+
return True
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
Abbreviations = list[Abbreviation]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclasses.dataclass
|
|
39
|
+
class AbbreviationResult:
|
|
40
|
+
|
|
41
|
+
abbreviations: Abbreviations = dataclasses.field(default_factory=list)
|
|
42
|
+
pdfpages: list = dataclasses.field(default_factory=list)
|
|
43
|
+
|
|
44
|
+
def append(self, item):
|
|
45
|
+
self.abbreviations.append(item) # pylint:disable=E1101
|
|
46
|
+
|
|
47
|
+
def __getitem__(self, index):
|
|
48
|
+
return self.abbreviations[index] # pylint:disable=E1136
|
|
49
|
+
|
|
50
|
+
def __len__(self):
|
|
51
|
+
return len(self.abbreviations)
|
|
52
|
+
|
|
53
|
+
def short_inside(self, abbrev: str) -> bool:
|
|
54
|
+
"""\
|
|
55
|
+
>>> AbbreviationResult().short_inside('')
|
|
56
|
+
False
|
|
57
|
+
"""
|
|
58
|
+
return any((item.short.lower() == abbrev for item in self.abbreviations)) # yapf:disable
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
ExtractedTextAbbreviation = collections.namedtuple(
|
|
62
|
+
'ExtractedTextAbbreviation',
|
|
63
|
+
'page, content',
|
|
64
|
+
)
|
|
65
|
+
ExtractedTextAbbreviations = list[ExtractedTextAbbreviation]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclasses.dataclass
|
|
69
|
+
class AbbreviationList:
|
|
70
|
+
data: set = dataclasses.field(default_factory=set)
|
|
71
|
+
|
|
72
|
+
def append(self, item):
|
|
73
|
+
self.data.add(item) # pylint:disable=E1101
|
|
74
|
+
|
|
75
|
+
def __contains__(self, item):
|
|
76
|
+
return item in self.data # pylint:disable=unsupported-membership-test
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
AbbreviationLists = list[AbbreviationList]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclasses.dataclass
|
|
83
|
+
class AbbreviationListLookup:
|
|
84
|
+
table: AbbreviationList = dataclasses.field(default=AbbreviationList)
|
|
85
|
+
other: AbbreviationLists = dataclasses.field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
def __contains__(self, item):
|
|
88
|
+
if item in self.table: # pylint:disable=E1135
|
|
89
|
+
return True
|
|
90
|
+
if self.other:
|
|
91
|
+
for table in self.other:
|
|
92
|
+
if item in table:
|
|
93
|
+
return True
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def fromparsed(cls, parsed=None, other=None):
|
|
98
|
+
assert parsed or other, 'empty input'
|
|
99
|
+
if parsed is None:
|
|
100
|
+
parsed = AbbreviationList()
|
|
101
|
+
lookup = cls(table=parsed, other=other)
|
|
102
|
+
return lookup
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# C O P Y R I G H T
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) 2019-2023 by Helmut Konrad Schewe. All rights reserved.
|
|
5
|
+
# This file is property of Helmut Konrad Schewe. Any unauthorized copy,
|
|
6
|
+
# use or distribution is an offensive act against international law and may
|
|
7
|
+
# be prosecuted under federal law. Its content is company confidential.
|
|
8
|
+
# =============================================================================
|
|
9
|
+
|
|
10
|
+
import collections
|
|
11
|
+
import dataclasses
|
|
12
|
+
import enum
|
|
13
|
+
|
|
14
|
+
import iamraw.bounding
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Link(enum.Enum):
|
|
18
|
+
UNDEFINED = -1
|
|
19
|
+
INTERNAL = 0
|
|
20
|
+
HYPERLINK = 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class Annotation:
|
|
25
|
+
goal: str
|
|
26
|
+
bounds: iamraw.bounding.BoundingBox
|
|
27
|
+
typ: Link = Link.UNDEFINED
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclasses.dataclass
|
|
31
|
+
class HyperLink(Annotation):
|
|
32
|
+
typ: Link = Link.HYPERLINK
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclasses.dataclass
|
|
36
|
+
class PageLink(Annotation):
|
|
37
|
+
typ: Link = Link.INTERNAL
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
PageAnnotation = collections.namedtuple(
|
|
41
|
+
'PageAnnotation',
|
|
42
|
+
'pagelinks hyperlinks page',
|
|
43
|
+
)
|
|
44
|
+
PageAnnotations = list[PageAnnotation]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def pagelink_annotations(annos: PageAnnotations) -> list[PageLink]:
|
|
48
|
+
return [item.pagelinks for item in annos]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def hyperlink_annotations(annos: PageAnnotations) -> list[HyperLink]:
|
|
52
|
+
return [item.hyperlinks for item in annos]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# C O P Y R I G H T
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Copyright (c) 2020-2023 by Helmut Konrad Schewe. All rights reserved.
|
|
5
|
+
# This file is property of Helmut Konrad Schewe. Any unauthorized copy,
|
|
6
|
+
# use or distribution is an offensive act against international law and may
|
|
7
|
+
# be prosecuted under federal law. Its content is company confidential.
|
|
8
|
+
# =============================================================================
|
|
9
|
+
|
|
10
|
+
import contextlib
|
|
11
|
+
import dataclasses
|
|
12
|
+
|
|
13
|
+
import iamraw
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass(unsafe_hash=True)
|
|
17
|
+
class BibliographyReference: # pylint:disable=R0902
|
|
18
|
+
|
|
19
|
+
title: str = None
|
|
20
|
+
reference: str = None
|
|
21
|
+
|
|
22
|
+
data: str = None
|
|
23
|
+
|
|
24
|
+
page: int = None
|
|
25
|
+
pageend: int = None
|
|
26
|
+
|
|
27
|
+
year: int = None
|
|
28
|
+
yearend: int = None
|
|
29
|
+
|
|
30
|
+
hyperlink: str = None
|
|
31
|
+
accessed: str = None
|
|
32
|
+
|
|
33
|
+
# a,b,c... to differentiate item in the same year
|
|
34
|
+
number: str = None
|
|
35
|
+
authors: list[str] = dataclasses.field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
publisher: str = None
|
|
38
|
+
|
|
39
|
+
raw: str = dataclasses.field(default=None, compare=False)
|
|
40
|
+
raw_pdfpage: int = None
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def create(cls, author: str, title: str = '', year: int = 2000):
|
|
44
|
+
author = author.split(' ', maxsplit=1)
|
|
45
|
+
author = iamraw.Person(name=author[0], firstname=author[1])
|
|
46
|
+
with contextlib.suppress(TypeError):
|
|
47
|
+
year = int(year)
|
|
48
|
+
return cls(authors=[author], title=title, year=year)
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def author(self) -> str:
|
|
52
|
+
"""Return family of first author."""
|
|
53
|
+
with contextlib.suppress(IndexError, AttributeError):
|
|
54
|
+
# IndexError: No author parsed
|
|
55
|
+
# AttributeError: NoPerson parsed
|
|
56
|
+
return self.authors[0].name # pylint:disable=E1136
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
def __post_init__(self):
|
|
60
|
+
# TODO: MAY REMOVE NO YEAR LATER
|
|
61
|
+
assert any((self.year is None, isinstance(self.year, int), self.year
|
|
62
|
+
== 'no year'),), str(self)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
BibliographyReferences = list[BibliographyReference]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@iamraw.extracted
|
|
69
|
+
@dataclasses.dataclass
|
|
70
|
+
class BibliographyTable:
|
|
71
|
+
headline: str = None
|
|
72
|
+
references: BibliographyReferences = dataclasses.field(default_factory=list)
|
|
73
|
+
pdfpages: tuple = None
|
|
74
|
+
|
|
75
|
+
def __getitem__(self, index):
|
|
76
|
+
return self.references[index]
|
|
77
|
+
|
|
78
|
+
def __len__(self):
|
|
79
|
+
return len(self.references)
|