fmtr.tools 1.0.34__py3-none-any.whl → 1.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

fmtr/tools/pdf_tools.py CHANGED
@@ -120,6 +120,32 @@ class Block(data_modelling_tools.Base):
120
120
  return self.bbox.rect
121
121
 
122
122
 
123
+ class Page(data_modelling_tools.Base):
124
+ number: int
125
+ width: float
126
+ height: float
127
+ blocks: List[Block]
128
+
129
+ @property
130
+ def text(self) -> str:
131
+ """
132
+
133
+ Simple text representation
134
+
135
+ """
136
+ return ' '.join([block.text for block in self.blocks])
137
+
138
+ @classmethod
139
+ def from_dict(cls, data: Dict) -> Self:
140
+ """
141
+
142
+ Instantiate from PyMuPDF dictionary data
143
+
144
+ """
145
+
146
+ data['blocks'] = [Block.from_dict(block) for block in data['blocks']]
147
+ return cls(**data)
148
+
123
149
  class Document(pm.Document):
124
150
  """
125
151
 
@@ -128,21 +154,22 @@ class Document(pm.Document):
128
154
  """
129
155
 
130
156
  @property
131
- def data(self) -> List[Block]:
157
+ def data(self) -> List[Page]:
132
158
  """
133
159
 
134
160
  Get representation of Document elements as Python objects.
135
161
 
136
162
  """
137
163
 
138
- blocks = []
164
+ pages = []
139
165
 
140
- for page in self:
141
- for block in page.get_text("dict", flags=pm.TEXTFLAGS_TEXT | pm.TEXT_ACCURATE_BBOXES)["blocks"]:
142
- obj = Block.from_dict(block)
143
- blocks.append(obj)
166
+ for page_pm in self:
167
+ data = page_pm.get_text("dict", flags=pm.TEXTFLAGS_TEXT | pm.TEXT_ACCURATE_BBOXES)
168
+ data['number'] = page_pm.number
169
+ page = Page.from_dict(data)
170
+ pages.append(page)
144
171
 
145
- return blocks
172
+ return pages
146
173
 
147
174
  def to_markdown(self, **kwargs) -> str:
148
175
  """
fmtr/tools/version CHANGED
@@ -1 +1 @@
1
- 1.0.34
1
+ 1.0.35
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fmtr.tools
3
- Version: 1.0.34
3
+ Version: 1.0.35
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -31,7 +31,7 @@ fmtr/tools/netrc_tools.py,sha256=PpNpz_mWlQi6VHGromKwFfTyLpHUXsd4LY6-OKLCbeI,376
31
31
  fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,742
32
32
  fmtr/tools/parallel_tools.py,sha256=G__ZbLRRx4cP5OyqY1hKwnE-VI3m5prYABB0tnZHnes,3132
33
33
  fmtr/tools/path_tools.py,sha256=1GeWXdhV5rH99IfLI5ZFEnOJfs4Q4mYTT2R-rA791iQ,4273
34
- fmtr/tools/pdf_tools.py,sha256=mCLPJJlN2izIvSIjhJkEUZKT0GYQGmr96dPq9oIRsuo,3524
34
+ fmtr/tools/pdf_tools.py,sha256=xvv9B84uAF81rFJRnXhSsxYuP42vY9ZdPVFrSMVe8G8,4069
35
35
  fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
36
36
  fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
37
37
  fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
@@ -42,7 +42,7 @@ fmtr/tools/string_tools.py,sha256=U2EptMWR6KDOP22ZQ4ReUHV4i25SP7xwCmZScI1sy4M,32
42
42
  fmtr/tools/tokenization_tools.py,sha256=9FP5vgPufWv0XA961eVKObFll0d_2mM0W3ut3rtZyeo,4329
43
43
  fmtr/tools/tools.py,sha256=xnfUrOnrT4OxFYez6vV5tAhydzCICJFiGVnviiZDEQo,796
44
44
  fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
45
- fmtr/tools/version,sha256=95PJL7GqZ_Ze5aKBt6zAx5dswp7AauJVBg2AJ6DPaFU,6
45
+ fmtr/tools/version,sha256=dl9h4ALGWOf6-smyLVw__eLZbZ3cQt7ygNYjZ5fNJZo,6
46
46
  fmtr/tools/version_tools.py,sha256=axzzHBS9V1n6YuSacsDKG3VfAvRqR8qr6aENCibR8vs,1248
47
47
  fmtr/tools/yaml_tools.py,sha256=Ol43ZwbnSXGnn1K98Uxx61KPGSqfC4axE-X2q1LKMwk,349
48
48
  fmtr/tools/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,9 +53,9 @@ fmtr/tools/tests/test_environment.py,sha256=iHaiMQfECYZPkPKwfuIZV9uHuWe3aE-p_dN_
53
53
  fmtr/tools/tests/test_json.py,sha256=IeSP4ziPvRcmS8kq7k9tHonC9rN5YYq9GSNT2ul6Msk,287
54
54
  fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g,2188
55
55
  fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
56
- fmtr.tools-1.0.34.dist-info/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
57
- fmtr.tools-1.0.34.dist-info/METADATA,sha256=n17dBm5yzzI_ktSH_1hqUxN7cXJtCSRa_Fh4Vc_vqYQ,13438
58
- fmtr.tools-1.0.34.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
59
- fmtr.tools-1.0.34.dist-info/entry_points.txt,sha256=CEStVkwJ1mTFvhN1WV5RdW83SkNW1d5Syj-KZ6A19ng,72
60
- fmtr.tools-1.0.34.dist-info/top_level.txt,sha256=t5341a8ii3n4RFizwTeXGmcq_pf4GqL1h9ylE5LIWRk,12
61
- fmtr.tools-1.0.34.dist-info/RECORD,,
56
+ fmtr.tools-1.0.35.dist-info/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
57
+ fmtr.tools-1.0.35.dist-info/METADATA,sha256=oWxo_lXLyOF_Osu8JvqFeUTWS5lgh-aTRgGKYIzePkU,13438
58
+ fmtr.tools-1.0.35.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
59
+ fmtr.tools-1.0.35.dist-info/entry_points.txt,sha256=CEStVkwJ1mTFvhN1WV5RdW83SkNW1d5Syj-KZ6A19ng,72
60
+ fmtr.tools-1.0.35.dist-info/top_level.txt,sha256=t5341a8ii3n4RFizwTeXGmcq_pf4GqL1h9ylE5LIWRk,12
61
+ fmtr.tools-1.0.35.dist-info/RECORD,,