fmtr.tools 1.0.33__tar.gz → 1.0.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fmtr.tools might be problematic. Click here for more details.
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/PKG-INFO +1 -1
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/pdf_tools.py +36 -14
- fmtr.tools-1.0.35/fmtr/tools/version +1 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/PKG-INFO +1 -1
- fmtr.tools-1.0.33/fmtr/tools/version +0 -1
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/LICENSE +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/README.md +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/__init__.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/ai_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/api_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/async_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/augmentation_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/caching_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/config.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/config_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/console_script_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/data_modelling_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/dataclass_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/datatype_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/docker_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/environment_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/function_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/google_api_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/hash_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/hfh_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/html_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/import_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/inspection_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/interface_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/iterator_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/json_fix_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/json_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/logging_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/merging_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/metric_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/name_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/netrc_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/openai_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/parallel_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/path_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/platform_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/process_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/profiling_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/random_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/semantic_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/spaces_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/string_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/__init__.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/conftest.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/helpers.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_datatype.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_environment.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_json.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_path.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_yaml.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tokenization_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/unicode_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/version_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/yaml_tools.py +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/SOURCES.txt +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/dependency_links.txt +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/entry_points.txt +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/requires.txt +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/top_level.txt +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/setup.cfg +0 -0
- {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/setup.py +0 -0
|
@@ -97,7 +97,7 @@ class Block(data_modelling_tools.Base):
|
|
|
97
97
|
Simple text representation
|
|
98
98
|
|
|
99
99
|
"""
|
|
100
|
-
return ' '.join([
|
|
100
|
+
return ' '.join([line.text for line in self.lines])
|
|
101
101
|
|
|
102
102
|
@classmethod
|
|
103
103
|
def from_dict(cls, data: Dict) -> Self:
|
|
@@ -120,6 +120,32 @@ class Block(data_modelling_tools.Base):
|
|
|
120
120
|
return self.bbox.rect
|
|
121
121
|
|
|
122
122
|
|
|
123
|
+
class Page(data_modelling_tools.Base):
|
|
124
|
+
number: int
|
|
125
|
+
width: float
|
|
126
|
+
height: float
|
|
127
|
+
blocks: List[Block]
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def text(self) -> str:
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
Simple text representation
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
return ' '.join([block.text for block in self.blocks])
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def from_dict(cls, data: Dict) -> Self:
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
Instantiate from PyMuPDF dictionary data
|
|
143
|
+
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
data['blocks'] = [Block.from_dict(block) for block in data['blocks']]
|
|
147
|
+
return cls(**data)
|
|
148
|
+
|
|
123
149
|
class Document(pm.Document):
|
|
124
150
|
"""
|
|
125
151
|
|
|
@@ -128,21 +154,22 @@ class Document(pm.Document):
|
|
|
128
154
|
"""
|
|
129
155
|
|
|
130
156
|
@property
|
|
131
|
-
def data(self) -> List[
|
|
157
|
+
def data(self) -> List[Page]:
|
|
132
158
|
"""
|
|
133
159
|
|
|
134
160
|
Get representation of Document elements as Python objects.
|
|
135
161
|
|
|
136
162
|
"""
|
|
137
163
|
|
|
138
|
-
|
|
164
|
+
pages = []
|
|
139
165
|
|
|
140
|
-
for
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
166
|
+
for page_pm in self:
|
|
167
|
+
data = page_pm.get_text("dict", flags=pm.TEXTFLAGS_TEXT | pm.TEXT_ACCURATE_BBOXES)
|
|
168
|
+
data['number'] = page_pm.number
|
|
169
|
+
page = Page.from_dict(data)
|
|
170
|
+
pages.append(page)
|
|
144
171
|
|
|
145
|
-
return
|
|
172
|
+
return pages
|
|
146
173
|
|
|
147
174
|
def to_markdown(self, **kwargs) -> str:
|
|
148
175
|
"""
|
|
@@ -162,11 +189,6 @@ if __name__ == '__main__':
|
|
|
162
189
|
assert PATH_PDF.exists()
|
|
163
190
|
|
|
164
191
|
doc = Document(PATH_PDF)
|
|
165
|
-
doc.data
|
|
166
|
-
|
|
167
|
-
for page in doc:
|
|
168
|
-
print(page.get_text('dict'))
|
|
169
|
-
print(page.get_text('html'))
|
|
170
|
-
|
|
192
|
+
data = doc.data
|
|
171
193
|
md = doc.to_markdown()
|
|
172
194
|
md
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
1.0.35
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
1.0.33
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|