iamraw 4.91.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. iamraw/__init__.py +353 -0
  2. iamraw/__mixin__.py +33 -0
  3. iamraw/abbreviation.py +102 -0
  4. iamraw/annotation.py +52 -0
  5. iamraw/bibliography.py +79 -0
  6. iamraw/border.py +36 -0
  7. iamraw/bounding.py +210 -0
  8. iamraw/boxes.py +66 -0
  9. iamraw/caption.py +82 -0
  10. iamraw/code.py +40 -0
  11. iamraw/content.py +27 -0
  12. iamraw/distance.py +22 -0
  13. iamraw/docinfo.py +103 -0
  14. iamraw/docref.py +49 -0
  15. iamraw/document.py +276 -0
  16. iamraw/figure.py +30 -0
  17. iamraw/findings.py +345 -0
  18. iamraw/fonts.py +106 -0
  19. iamraw/fontstore.py +198 -0
  20. iamraw/footnotes.py +16 -0
  21. iamraw/formula.py +100 -0
  22. iamraw/headerfooter.py +277 -0
  23. iamraw/headlines.py +115 -0
  24. iamraw/hits.py +17 -0
  25. iamraw/href.py +21 -0
  26. iamraw/images.py +40 -0
  27. iamraw/index.py +89 -0
  28. iamraw/lang.py +63 -0
  29. iamraw/layout.py +32 -0
  30. iamraw/likelihood.py +20 -0
  31. iamraw/line.py +31 -0
  32. iamraw/list.py +65 -0
  33. iamraw/magic.py +30 -0
  34. iamraw/page.py +13 -0
  35. iamraw/pagecontent.py +13 -0
  36. iamraw/pagenumbers.py +42 -0
  37. iamraw/path.py +280 -0
  38. iamraw/pdfinfo.py +38 -0
  39. iamraw/person.py +31 -0
  40. iamraw/quote.py +23 -0
  41. iamraw/sections.py +259 -0
  42. iamraw/solution.py +52 -0
  43. iamraw/spacestation.py +30 -0
  44. iamraw/style.py +128 -0
  45. iamraw/table.py +58 -0
  46. iamraw/text.py +93 -0
  47. iamraw/textposition.py +16 -0
  48. iamraw/title.py +54 -0
  49. iamraw/titlepage.py +109 -0
  50. iamraw/toc.py +271 -0
  51. iamraw/webconfig.py +16 -0
  52. iamraw/whitepage.py +25 -0
  53. iamraw-4.91.1.dist-info/METADATA +32 -0
  54. iamraw-4.91.1.dist-info/RECORD +134 -0
  55. iamraw-4.91.1.dist-info/WHEEL +5 -0
  56. iamraw-4.91.1.dist-info/licenses/LICENSE +21 -0
  57. iamraw-4.91.1.dist-info/top_level.txt +3 -0
  58. serializeraw/__init__.py +228 -0
  59. serializeraw/__patch__.py +37 -0
  60. serializeraw/abbreviation.py +129 -0
  61. serializeraw/annotation.py +95 -0
  62. serializeraw/bibliography.py +107 -0
  63. serializeraw/border.py +126 -0
  64. serializeraw/bounding.py +64 -0
  65. serializeraw/boxedcontent.py +110 -0
  66. serializeraw/boxes.py +50 -0
  67. serializeraw/caption.py +33 -0
  68. serializeraw/chapter.py +30 -0
  69. serializeraw/code.py +82 -0
  70. serializeraw/color.py +52 -0
  71. serializeraw/content.py +34 -0
  72. serializeraw/distance.py +61 -0
  73. serializeraw/docinfo.py +43 -0
  74. serializeraw/docref.py +86 -0
  75. serializeraw/document.py +335 -0
  76. serializeraw/figure.py +110 -0
  77. serializeraw/findings.py +256 -0
  78. serializeraw/fonts.py +182 -0
  79. serializeraw/fontstore.py +59 -0
  80. serializeraw/footnotes.py +47 -0
  81. serializeraw/formula.py +36 -0
  82. serializeraw/formularaw.py +110 -0
  83. serializeraw/headerfooter.py +335 -0
  84. serializeraw/headlines.py +126 -0
  85. serializeraw/hits.py +61 -0
  86. serializeraw/href.py +36 -0
  87. serializeraw/images.py +118 -0
  88. serializeraw/index.py +28 -0
  89. serializeraw/likelihood.py +81 -0
  90. serializeraw/line.py +122 -0
  91. serializeraw/list.py +121 -0
  92. serializeraw/magic.py +69 -0
  93. serializeraw/pagecontent.py +43 -0
  94. serializeraw/pagenumbers.py +106 -0
  95. serializeraw/pdfinfo.py +100 -0
  96. serializeraw/quote.py +62 -0
  97. serializeraw/sections.py +155 -0
  98. serializeraw/sentence.py +77 -0
  99. serializeraw/spacestation.py +115 -0
  100. serializeraw/style.py +30 -0
  101. serializeraw/table.py +68 -0
  102. serializeraw/text.py +111 -0
  103. serializeraw/textposition.py +84 -0
  104. serializeraw/titlepage.py +29 -0
  105. serializeraw/tnav/__init__.py +8 -0
  106. serializeraw/tnav/create.py +253 -0
  107. serializeraw/tnav/dump.py +128 -0
  108. serializeraw/tnav/filter.py +21 -0
  109. serializeraw/tnav/highnote.py +52 -0
  110. serializeraw/tnav/translation.py +30 -0
  111. serializeraw/toc.py +121 -0
  112. serializeraw/utils.py +33 -0
  113. serializeraw/webconfig.py +37 -0
  114. serializeraw/whitepage.py +54 -0
  115. serializeraw/wordspaces.py +51 -0
  116. serializeraw/yamlpages.py +191 -0
  117. texmex/__init__.py +128 -0
  118. texmex/alignment.py +51 -0
  119. texmex/character.py +21 -0
  120. texmex/group/__init__.py +8 -0
  121. texmex/group/fonts.py +209 -0
  122. texmex/group/ml/__init__.py +463 -0
  123. texmex/group/ml/complex.py +104 -0
  124. texmex/iter.py +138 -0
  125. texmex/nav/__init__.py +525 -0
  126. texmex/nav/create.py +277 -0
  127. texmex/navigator.py +13 -0
  128. texmex/regex.py +47 -0
  129. texmex/search.py +55 -0
  130. texmex/sentences.py +145 -0
  131. texmex/style.py +424 -0
  132. texmex/text.py +94 -0
  133. texmex/translation.py +50 -0
  134. texmex/utils.py +26 -0
iamraw/__init__.py ADDED
@@ -0,0 +1,353 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2019-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+
10
+ import os
11
+
12
+ # path
13
+ from iamraw import path
14
+ # mixin
15
+ from iamraw.__mixin__ import extracted
16
+ from iamraw.__mixin__ import hasstrategy
17
+ # abbreviation
18
+ from iamraw.abbreviation import Abbreviation
19
+ from iamraw.abbreviation import AbbreviationList
20
+ from iamraw.abbreviation import AbbreviationListLookup
21
+ from iamraw.abbreviation import AbbreviationLists
22
+ from iamraw.abbreviation import AbbreviationPosition
23
+ from iamraw.abbreviation import AbbreviationResult
24
+ from iamraw.abbreviation import Abbreviations
25
+ from iamraw.abbreviation import ExtractedTextAbbreviation
26
+ from iamraw.abbreviation import ExtractedTextAbbreviations
27
+ # annotation
28
+ from iamraw.annotation import Annotation
29
+ from iamraw.annotation import HyperLink
30
+ from iamraw.annotation import Link
31
+ from iamraw.annotation import PageAnnotation
32
+ from iamraw.annotation import PageAnnotations
33
+ from iamraw.annotation import PageLink
34
+ from iamraw.annotation import hyperlink_annotations
35
+ from iamraw.annotation import pagelink_annotations
36
+ # bib
37
+ from iamraw.bibliography import BibliographyReference
38
+ from iamraw.bibliography import BibliographyReferences
39
+ from iamraw.bibliography import BibliographyTable
40
+ # border
41
+ from iamraw.border import Border
42
+ from iamraw.border import Borders
43
+ # bounding
44
+ from iamraw.bounding import BoundingBox
45
+ from iamraw.bounding import BoundingBoxes
46
+ from iamraw.bounding import PageBoundings
47
+ from iamraw.bounding import PageBoundingsList
48
+ from iamraw.bounding import between
49
+ from iamraw.bounding import split_x
50
+ from iamraw.bounding import split_y
51
+ # boxes
52
+ from iamraw.boxes import Box
53
+ from iamraw.boxes import HorizontalLine
54
+ from iamraw.boxes import PageContentBoxes
55
+ from iamraw.boxes import PageContentHorizontals
56
+ from iamraw.boxes import PagesWithBoxList
57
+ from iamraw.boxes import PagesWithHorizontalList
58
+ # captions
59
+ from iamraw.caption import Caption
60
+ from iamraw.caption import Captions
61
+ from iamraw.caption import CaptionType
62
+ from iamraw.caption import PageContentCaption
63
+ from iamraw.caption import PageContentCaptions
64
+ # code
65
+ from iamraw.code import PageContentCode
66
+ from iamraw.code import PageContentCodes
67
+ from iamraw.code import PeaceOfCode
68
+ from iamraw.code import PeaceOfCodes
69
+ # content
70
+ from iamraw.content import ContentBoundingBox
71
+ from iamraw.content import ContentBoundingBoxes
72
+ # distance
73
+ from iamraw.distance import AreaDistance
74
+ from iamraw.distance import AreaDistances
75
+ from iamraw.distance import PageContentAreaDistance
76
+ from iamraw.distance import PageContentAreaDistances
77
+ # docinfo
78
+ from iamraw.docinfo import DocContentType
79
+ from iamraw.docinfo import DocInfo
80
+ from iamraw.docinfo import DocumentType
81
+ from iamraw.docinfo import Generator
82
+ from iamraw.docinfo import SectionLookup
83
+ # docref
84
+ from iamraw.docref import DocRef
85
+ from iamraw.docref import DocRefs
86
+ from iamraw.docref import TextAdvice
87
+ from iamraw.docref import TextAdviceDelete
88
+ from iamraw.docref import TextAdviceReplacement
89
+ from iamraw.docref import TextAdvices
90
+ # document
91
+ from iamraw.document import Boxed
92
+ from iamraw.document import Char
93
+ from iamraw.document import Chars
94
+ from iamraw.document import Document
95
+ from iamraw.document import Line
96
+ from iamraw.document import Lines
97
+ from iamraw.document import Page
98
+ from iamraw.document import PageObject
99
+ from iamraw.document import Pages
100
+ from iamraw.document import PageSize
101
+ from iamraw.document import PageSizes
102
+ from iamraw.document import TextContainer
103
+ from iamraw.document import TextContainers
104
+ from iamraw.document import UnicodeChar
105
+ from iamraw.document import VerticalTextContainer
106
+ from iamraw.document import VerticalTextContainers
107
+ from iamraw.document import VirtualChar
108
+ # figures
109
+ from iamraw.figure import Figure
110
+ from iamraw.figure import Figures
111
+ # findings
112
+ from iamraw.findings import BoundingLocation
113
+ from iamraw.findings import Finding
114
+ from iamraw.findings import FindingLevel
115
+ from iamraw.findings import Findings
116
+ from iamraw.findings import Location
117
+ from iamraw.findings import PageFinding
118
+ from iamraw.findings import PageFindings
119
+ from iamraw.findings import RangedLocation
120
+ from iamraw.findings import select_findings
121
+ # fonts
122
+ from iamraw.fonts import DEFAULT_STRETCH
123
+ from iamraw.fonts import DEFAULT_STYLE
124
+ from iamraw.fonts import DEFAULT_WEIGHT
125
+ from iamraw.fonts import Font
126
+ from iamraw.fonts import FontFlag
127
+ from iamraw.fonts import FontFlags
128
+ from iamraw.fonts import PageFontContent
129
+ from iamraw.fonts import PageFontContents
130
+ from iamraw.fonts import Stretch
131
+ from iamraw.fonts import Style
132
+ from iamraw.fonts import Weight
133
+ from iamraw.fontstore import NO_FONT
134
+ from iamraw.fontstore import FontChunk
135
+ from iamraw.fontstore import FontChunks
136
+ from iamraw.fontstore import FontContentStore
137
+ from iamraw.fontstore import FontStore
138
+ # footnotes
139
+ from iamraw.footnotes import PageContentFootnote
140
+ from iamraw.footnotes import PageContentFootnotes
141
+ # formula
142
+ from iamraw.formula import Formula
143
+ from iamraw.formula import FormulaRaw
144
+ from iamraw.formula import Formulas
145
+ from iamraw.formula import FormulasRaw
146
+ from iamraw.formula import MathChar
147
+ from iamraw.formula import MathChars
148
+ from iamraw.formula import PageContentFormula
149
+ from iamraw.formula import PageContentFormulas
150
+ from iamraw.formula import PageContentRawFormula
151
+ from iamraw.formula import PageContentRawFormulas
152
+ # headerfooter
153
+ from iamraw.headerfooter import FixedFooterInfo
154
+ from iamraw.headerfooter import FixedHeaderInfo
155
+ from iamraw.headerfooter import FooterInfo
156
+ from iamraw.headerfooter import FootJudgedNote
157
+ from iamraw.headerfooter import FootNote
158
+ from iamraw.headerfooter import FootNoteMerged
159
+ from iamraw.headerfooter import FootNoteRaw
160
+ from iamraw.headerfooter import FootNotes
161
+ from iamraw.headerfooter import HeaderImages
162
+ from iamraw.headerfooter import HeaderInfo
163
+ from iamraw.headerfooter import HeaderTitle
164
+ from iamraw.headerfooter import MovingFooterInfo
165
+ from iamraw.headerfooter import PageContentFooterHeader
166
+ from iamraw.headerfooter import PageContentFooterHeaders
167
+ from iamraw.headerfooter import PageInformation
168
+ from iamraw.headerfooter import PagesFooterInfo
169
+ from iamraw.headerfooter import RawText
170
+ # headlines
171
+ from iamraw.headlines import Headline
172
+ from iamraw.headlines import HeadlineGroup
173
+ from iamraw.headlines import HeadlineGroups
174
+ from iamraw.headlines import HeadlineResult
175
+ from iamraw.headlines import Headlines
176
+ from iamraw.headlines import PagesHeadlineList
177
+ from iamraw.headlines import headlines_totoc
178
+ # hits
179
+ from iamraw.hits import PageContentHit
180
+ from iamraw.hits import PageContentHits
181
+ # href
182
+ from iamraw.href import ExtractedHyperLink
183
+ from iamraw.href import ExtractedHyperLinks
184
+ # images
185
+ from iamraw.images import ImageInformation
186
+ from iamraw.images import ImageInformations
187
+ from iamraw.images import PageContentImageInfo
188
+ from iamraw.images import PageContentImageInfos
189
+ # index
190
+ from iamraw.index import DocumentIndex
191
+ from iamraw.index import DocumentIndexElement
192
+ # lang
193
+ from iamraw.lang import Language
194
+ from iamraw.lang import simplelang
195
+ # layout
196
+ from iamraw.layout import Layout
197
+ from iamraw.layout import Layouts
198
+ # likelihood
199
+ from iamraw.likelihood import Likelihood
200
+ from iamraw.likelihood import PageContentLikelihood
201
+ from iamraw.likelihood import PageContentLikelihoods
202
+ # lines
203
+ from iamraw.line import PageContentLine
204
+ from iamraw.line import PageContentLines
205
+ # list
206
+ from iamraw.list import ListType
207
+ from iamraw.list import PageContentList
208
+ from iamraw.list import PageContentLists
209
+ from iamraw.list import PageList
210
+ # magic
211
+ from iamraw.magic import PageContentContentType
212
+ from iamraw.magic import PageContentContentTypes
213
+ from iamraw.magic import PageContentType
214
+ # pages
215
+ from iamraw.page import PageSizeBorder
216
+ from iamraw.page import PageSizeBorderList
217
+ # content
218
+ from iamraw.pagecontent import PageContent
219
+ from iamraw.pagecontent import PageContents
220
+ # pagenumber
221
+ from iamraw.pagenumbers import PageNumber
222
+ from iamraw.pagenumbers import PageNumberOrientation
223
+ # pdf
224
+ from iamraw.pdfinfo import InvalidPDF
225
+ from iamraw.pdfinfo import PDFDate
226
+ from iamraw.pdfinfo import PDFInfo
227
+ from iamraw.pdfinfo import PDFVersion
228
+ # person
229
+ from iamraw.person import NoPerson
230
+ from iamraw.person import Person
231
+ from iamraw.person import Persons
232
+ # quote
233
+ from iamraw.quote import ExtractedQuotation
234
+ from iamraw.quote import ExtractedQuotations
235
+ from iamraw.quote import PageContentBlockQuotes
236
+ from iamraw.quote import PageContentBlockQuotesList
237
+ # sections
238
+ from iamraw.sections import AbbreviationTable
239
+ from iamraw.sections import Abstract
240
+ from iamraw.sections import Acknowledgments
241
+ from iamraw.sections import Appendix
242
+ from iamraw.sections import AreaItem
243
+ from iamraw.sections import AreaItems
244
+ from iamraw.sections import Bibliography
245
+ from iamraw.sections import Chapter
246
+ from iamraw.sections import CiteContent
247
+ from iamraw.sections import CitePart
248
+ from iamraw.sections import CodeTable
249
+ from iamraw.sections import DocumentSection
250
+ from iamraw.sections import FigureTable
251
+ from iamraw.sections import Glossary
252
+ from iamraw.sections import Index
253
+ from iamraw.sections import LegalInformation
254
+ from iamraw.sections import MainPart
255
+ from iamraw.sections import MultipleSection
256
+ from iamraw.sections import NotImplementedItem
257
+ from iamraw.sections import PartOfDocMixin
258
+ from iamraw.sections import PartsOfDoc
259
+ from iamraw.sections import Publication
260
+ from iamraw.sections import SectionMixin
261
+ from iamraw.sections import Sections
262
+ from iamraw.sections import SectionsList
263
+ from iamraw.sections import SymbolTable
264
+ from iamraw.sections import TableOfContent
265
+ from iamraw.sections import TableTable
266
+ from iamraw.sections import TitlePageSection
267
+ from iamraw.sections import Unknown
268
+ # solution
269
+ from iamraw.solution import Doctails
270
+ from iamraw.solution import ProblemStatus
271
+ from iamraw.solution import Solution
272
+ from iamraw.solution import Solutions
273
+ from iamraw.solution import Text
274
+ from iamraw.solution import Web
275
+ # spacestation
276
+ from iamraw.spacestation import DocumentCharDist
277
+ from iamraw.spacestation import DocumentWordDist
278
+ # style
279
+ from iamraw.style import DocTextStyle
280
+ from iamraw.style import PageTextProperties
281
+ from iamraw.style import PageTextPropertiesList
282
+ from iamraw.style import TextProperties
283
+ from iamraw.style import TextProperty
284
+ # table
285
+ from iamraw.table import PageContentTableBounding
286
+ from iamraw.table import PageContentTableBoundings
287
+ from iamraw.table import TableBounding
288
+ from iamraw.table import TableBoundings
289
+ # text
290
+ from iamraw.text import ChapterText
291
+ from iamraw.text import ChapterTextList
292
+ from iamraw.text import ContentType
293
+ from iamraw.text import DFormula
294
+ from iamraw.text import DocumentContent
295
+ from iamraw.text import HeadlineWithContent
296
+ from iamraw.text import PageContentText
297
+ from iamraw.text import PageContentTexts
298
+ from iamraw.text import Paragraph
299
+ from iamraw.text import Paragraphs
300
+ from iamraw.text import TextSection
301
+ from iamraw.text import TextSections
302
+ from iamraw.text import Undefined
303
+ # textposition
304
+ from iamraw.textposition import PageContentTextPosition
305
+ from iamraw.textposition import PageContentTextPositions
306
+ from iamraw.textposition import TextPosition
307
+ from iamraw.textposition import TextPositions
308
+ # title
309
+ from iamraw.title import PROF_DR
310
+ from iamraw.title import AcademicTitle
311
+ # title
312
+ from iamraw.titlepage import THESIS
313
+ from iamraw.titlepage import Institution
314
+ from iamraw.titlepage import Matrikel
315
+ from iamraw.titlepage import TitleDate
316
+ from iamraw.titlepage import TitlePage
317
+ from iamraw.titlepage import TitlePages
318
+ from iamraw.titlepage import TitleThesisType
319
+ # table of content
320
+ from iamraw.toc import AppendixLevel
321
+ from iamraw.toc import Level
322
+ from iamraw.toc import RomanLevel
323
+ from iamraw.toc import Section
324
+ from iamraw.toc import SectionList
325
+ from iamraw.toc import SectionRaw
326
+ from iamraw.toc import StepLevel
327
+ from iamraw.toc import Toc
328
+ from iamraw.toc import TocLinkMixin
329
+ from iamraw.toc import TocLinkMixins
330
+ from iamraw.toc import TocStyle
331
+ from iamraw.toc import create_toc
332
+ from iamraw.toc import merge_toc
333
+ from iamraw.toc import tosection
334
+ from iamraw.toc import tosectionraw
335
+ # webconfig
336
+ from iamraw.webconfig import WebConfig
337
+ # whitepage
338
+ from iamraw.whitepage import PageContentWhitepage
339
+ from iamraw.whitepage import PageContentWhitepages
340
+ from iamraw.whitepage import WhitePage
341
+
342
+ # TODO: REMOVE LATER
343
+ FootRawNote = FootNoteRaw
344
+ FixedFooterInformation = FixedFooterInfo
345
+ FixedHeaderInformation = FixedHeaderInfo
346
+ HeaderInformation = HeaderInfo
347
+ FooterInformation = FooterInfo
348
+ MovingFooterInformation = MovingFooterInfo
349
+ PagesFooterInformation = PagesFooterInfo
350
+
351
+ __version__ = '4.91.0'
352
+
353
+ ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
iamraw/__mixin__.py ADDED
@@ -0,0 +1,33 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2022-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+ """\
10
+ >>> @extracted
11
+ ... class Helmut:
12
+ ... pass
13
+ >>> first = Helmut()
14
+ >>> first.__strategy__ = 'hello'
15
+ >>> first.__strategy__
16
+ 'hello'
17
+ """
18
+
19
+
20
+ def extracted(item):
21
+ setattr(item, '__strategy__', None)
22
+ setattr(item, '__strategy_location__', None)
23
+
24
+ def raw(self) -> str:
25
+ """Element in document which is converted to current result."""
26
+ raise NotImplementedError
27
+
28
+ setattr(item, '__strategy_raw__', raw)
29
+ return item
30
+
31
+
32
+ def hasstrategy(item) -> bool:
33
+ return hasattr(item, '__strategy__')
iamraw/abbreviation.py ADDED
@@ -0,0 +1,102 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2020-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+
10
+ import collections
11
+ import dataclasses
12
+
13
+ import utilo
14
+
15
+
16
+ @dataclasses.dataclass
17
+ class AbbreviationPosition:
18
+ page: int = None
19
+ sentence: int = None
20
+ word: int = None
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class Abbreviation:
25
+ short: str = None
26
+ description: str = None
27
+ position: AbbreviationPosition = None
28
+
29
+ def __lt__(self, item):
30
+ if utilo.alphabetically(self.short) <= utilo.alphabetically(item.short):
31
+ return True
32
+ return False
33
+
34
+
35
+ Abbreviations = list[Abbreviation]
36
+
37
+
38
+ @dataclasses.dataclass
39
+ class AbbreviationResult:
40
+
41
+ abbreviations: Abbreviations = dataclasses.field(default_factory=list)
42
+ pdfpages: list = dataclasses.field(default_factory=list)
43
+
44
+ def append(self, item):
45
+ self.abbreviations.append(item) # pylint:disable=E1101
46
+
47
+ def __getitem__(self, index):
48
+ return self.abbreviations[index] # pylint:disable=E1136
49
+
50
+ def __len__(self):
51
+ return len(self.abbreviations)
52
+
53
+ def short_inside(self, abbrev: str) -> bool:
54
+ """\
55
+ >>> AbbreviationResult().short_inside('')
56
+ False
57
+ """
58
+ return any((item.short.lower() == abbrev for item in self.abbreviations)) # yapf:disable
59
+
60
+
61
+ ExtractedTextAbbreviation = collections.namedtuple(
62
+ 'ExtractedTextAbbreviation',
63
+ 'page, content',
64
+ )
65
+ ExtractedTextAbbreviations = list[ExtractedTextAbbreviation]
66
+
67
+
68
+ @dataclasses.dataclass
69
+ class AbbreviationList:
70
+ data: set = dataclasses.field(default_factory=set)
71
+
72
+ def append(self, item):
73
+ self.data.add(item) # pylint:disable=E1101
74
+
75
+ def __contains__(self, item):
76
+ return item in self.data # pylint:disable=unsupported-membership-test
77
+
78
+
79
+ AbbreviationLists = list[AbbreviationList]
80
+
81
+
82
+ @dataclasses.dataclass
83
+ class AbbreviationListLookup:
84
+ table: AbbreviationList = dataclasses.field(default=AbbreviationList)
85
+ other: AbbreviationLists = dataclasses.field(default_factory=list)
86
+
87
+ def __contains__(self, item):
88
+ if item in self.table: # pylint:disable=E1135
89
+ return True
90
+ if self.other:
91
+ for table in self.other:
92
+ if item in table:
93
+ return True
94
+ return False
95
+
96
+ @classmethod
97
+ def fromparsed(cls, parsed=None, other=None):
98
+ assert parsed or other, 'empty input'
99
+ if parsed is None:
100
+ parsed = AbbreviationList()
101
+ lookup = cls(table=parsed, other=other)
102
+ return lookup
iamraw/annotation.py ADDED
@@ -0,0 +1,52 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2019-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+
10
+ import collections
11
+ import dataclasses
12
+ import enum
13
+
14
+ import iamraw.bounding
15
+
16
+
17
+ class Link(enum.Enum):
18
+ UNDEFINED = -1
19
+ INTERNAL = 0
20
+ HYPERLINK = 1
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class Annotation:
25
+ goal: str
26
+ bounds: iamraw.bounding.BoundingBox
27
+ typ: Link = Link.UNDEFINED
28
+
29
+
30
+ @dataclasses.dataclass
31
+ class HyperLink(Annotation):
32
+ typ: Link = Link.HYPERLINK
33
+
34
+
35
+ @dataclasses.dataclass
36
+ class PageLink(Annotation):
37
+ typ: Link = Link.INTERNAL
38
+
39
+
40
+ PageAnnotation = collections.namedtuple(
41
+ 'PageAnnotation',
42
+ 'pagelinks hyperlinks page',
43
+ )
44
+ PageAnnotations = list[PageAnnotation]
45
+
46
+
47
+ def pagelink_annotations(annos: PageAnnotations) -> list[PageLink]:
48
+ return [item.pagelinks for item in annos]
49
+
50
+
51
+ def hyperlink_annotations(annos: PageAnnotations) -> list[HyperLink]:
52
+ return [item.hyperlinks for item in annos]
iamraw/bibliography.py ADDED
@@ -0,0 +1,79 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2020-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+
10
+ import contextlib
11
+ import dataclasses
12
+
13
+ import iamraw
14
+
15
+
16
+ @dataclasses.dataclass(unsafe_hash=True)
17
+ class BibliographyReference: # pylint:disable=R0902
18
+
19
+ title: str = None
20
+ reference: str = None
21
+
22
+ data: str = None
23
+
24
+ page: int = None
25
+ pageend: int = None
26
+
27
+ year: int = None
28
+ yearend: int = None
29
+
30
+ hyperlink: str = None
31
+ accessed: str = None
32
+
33
+ # a,b,c... to differentiate item in the same year
34
+ number: str = None
35
+ authors: list[str] = dataclasses.field(default_factory=list)
36
+
37
+ publisher: str = None
38
+
39
+ raw: str = dataclasses.field(default=None, compare=False)
40
+ raw_pdfpage: int = None
41
+
42
+ @classmethod
43
+ def create(cls, author: str, title: str = '', year: int = 2000):
44
+ author = author.split(' ', maxsplit=1)
45
+ author = iamraw.Person(name=author[0], firstname=author[1])
46
+ with contextlib.suppress(TypeError):
47
+ year = int(year)
48
+ return cls(authors=[author], title=title, year=year)
49
+
50
+ @property
51
+ def author(self) -> str:
52
+ """Return family of first author."""
53
+ with contextlib.suppress(IndexError, AttributeError):
54
+ # IndexError: No author parsed
55
+ # AttributeError: NoPerson parsed
56
+ return self.authors[0].name # pylint:disable=E1136
57
+ return None
58
+
59
+ def __post_init__(self):
60
+ # TODO: MAY REMOVE NO YEAR LATER
61
+ assert any((self.year is None, isinstance(self.year, int), self.year
62
+ == 'no year'),), str(self)
63
+
64
+
65
+ BibliographyReferences = list[BibliographyReference]
66
+
67
+
68
+ @iamraw.extracted
69
+ @dataclasses.dataclass
70
+ class BibliographyTable:
71
+ headline: str = None
72
+ references: BibliographyReferences = dataclasses.field(default_factory=list)
73
+ pdfpages: tuple = None
74
+
75
+ def __getitem__(self, index):
76
+ return self.references[index]
77
+
78
+ def __len__(self):
79
+ return len(self.references)
iamraw/border.py ADDED
@@ -0,0 +1,36 @@
1
+ # =============================================================================
2
+ # C O P Y R I G H T
3
+ # -----------------------------------------------------------------------------
4
+ # Copyright (c) 2019-2023 by Helmut Konrad Schewe. All rights reserved.
5
+ # This file is property of Helmut Konrad Schewe. Any unauthorized copy,
6
+ # use or distribution is an offensive act against international law and may
7
+ # be prosecuted under federal law. Its content is company confidential.
8
+ # =============================================================================
9
+
10
+ import collections
11
+
12
+ import utilo
13
+
14
+ Border = collections.namedtuple('Border', 'left right top bottom')
15
+ Borders = list[Border]
16
+
17
+
18
+ def validate(items) -> bool:
19
+ """Iterate throw elements and check if some element contains a negative
20
+ element.
21
+
22
+ Args:
23
+ items(List[Border/PageSize]): list or single item is supported
24
+ Returns:
25
+ True if all elements are positive, False if at least one is not
26
+ """
27
+ valid = True
28
+ if not isinstance(items, list):
29
+ items = [items]
30
+ for index, item in enumerate(items):
31
+ for itemindex, check in enumerate(item):
32
+ if check is not None and check < 0:
33
+ msg = 'invalid field(%d, %d): %r' % (index, itemindex, check)
34
+ utilo.error(msg)
35
+ valid = False
36
+ return valid