fmtr.tools 1.0.33__tar.gz → 1.0.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

Files changed (67) hide show
  1. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/PKG-INFO +1 -1
  2. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/pdf_tools.py +36 -14
  3. fmtr.tools-1.0.35/fmtr/tools/version +1 -0
  4. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/PKG-INFO +1 -1
  5. fmtr.tools-1.0.33/fmtr/tools/version +0 -1
  6. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/LICENSE +0 -0
  7. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/README.md +0 -0
  8. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/__init__.py +0 -0
  9. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/ai_tools.py +0 -0
  10. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/api_tools.py +0 -0
  11. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/async_tools.py +0 -0
  12. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/augmentation_tools.py +0 -0
  13. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/caching_tools.py +0 -0
  14. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/config.py +0 -0
  15. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/config_tools.py +0 -0
  16. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/console_script_tools.py +0 -0
  17. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/data_modelling_tools.py +0 -0
  18. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/dataclass_tools.py +0 -0
  19. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/datatype_tools.py +0 -0
  20. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/docker_tools.py +0 -0
  21. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/environment_tools.py +0 -0
  22. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/function_tools.py +0 -0
  23. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/google_api_tools.py +0 -0
  24. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/hash_tools.py +0 -0
  25. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/hfh_tools.py +0 -0
  26. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/html_tools.py +0 -0
  27. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/import_tools.py +0 -0
  28. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/inspection_tools.py +0 -0
  29. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/interface_tools.py +0 -0
  30. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/iterator_tools.py +0 -0
  31. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/json_fix_tools.py +0 -0
  32. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/json_tools.py +0 -0
  33. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/logging_tools.py +0 -0
  34. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/merging_tools.py +0 -0
  35. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/metric_tools.py +0 -0
  36. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/name_tools.py +0 -0
  37. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/netrc_tools.py +0 -0
  38. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/openai_tools.py +0 -0
  39. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/parallel_tools.py +0 -0
  40. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/path_tools.py +0 -0
  41. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/platform_tools.py +0 -0
  42. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/process_tools.py +0 -0
  43. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/profiling_tools.py +0 -0
  44. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/random_tools.py +0 -0
  45. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/semantic_tools.py +0 -0
  46. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/spaces_tools.py +0 -0
  47. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/string_tools.py +0 -0
  48. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/__init__.py +0 -0
  49. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/conftest.py +0 -0
  50. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/helpers.py +0 -0
  51. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_datatype.py +0 -0
  52. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_environment.py +0 -0
  53. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_json.py +0 -0
  54. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_path.py +0 -0
  55. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tests/test_yaml.py +0 -0
  56. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tokenization_tools.py +0 -0
  57. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/tools.py +0 -0
  58. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/unicode_tools.py +0 -0
  59. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/version_tools.py +0 -0
  60. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr/tools/yaml_tools.py +0 -0
  61. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/SOURCES.txt +0 -0
  62. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/dependency_links.txt +0 -0
  63. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/entry_points.txt +0 -0
  64. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/requires.txt +0 -0
  65. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/fmtr.tools.egg-info/top_level.txt +0 -0
  66. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/setup.cfg +0 -0
  67. {fmtr.tools-1.0.33 → fmtr.tools-1.0.35}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fmtr.tools
3
- Version: 1.0.33
3
+ Version: 1.0.35
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -97,7 +97,7 @@ class Block(data_modelling_tools.Base):
97
97
  Simple text representation
98
98
 
99
99
  """
100
- return ' '.join([span.text for span in self.spans])
100
+ return ' '.join([line.text for line in self.lines])
101
101
 
102
102
  @classmethod
103
103
  def from_dict(cls, data: Dict) -> Self:
@@ -120,6 +120,32 @@ class Block(data_modelling_tools.Base):
120
120
  return self.bbox.rect
121
121
 
122
122
 
123
+ class Page(data_modelling_tools.Base):
124
+ number: int
125
+ width: float
126
+ height: float
127
+ blocks: List[Block]
128
+
129
+ @property
130
+ def text(self) -> str:
131
+ """
132
+
133
+ Simple text representation
134
+
135
+ """
136
+ return ' '.join([block.text for block in self.blocks])
137
+
138
+ @classmethod
139
+ def from_dict(cls, data: Dict) -> Self:
140
+ """
141
+
142
+ Instantiate from PyMuPDF dictionary data
143
+
144
+ """
145
+
146
+ data['blocks'] = [Block.from_dict(block) for block in data['blocks']]
147
+ return cls(**data)
148
+
123
149
  class Document(pm.Document):
124
150
  """
125
151
 
@@ -128,21 +154,22 @@ class Document(pm.Document):
128
154
  """
129
155
 
130
156
  @property
131
- def data(self) -> List[Block]:
157
+ def data(self) -> List[Page]:
132
158
  """
133
159
 
134
160
  Get representation of Document elements as Python objects.
135
161
 
136
162
  """
137
163
 
138
- blocks = []
164
+ pages = []
139
165
 
140
- for page in self:
141
- for block in page.get_text("dict")["blocks"]:
142
- obj = Block.from_dict(block)
143
- blocks.append(obj)
166
+ for page_pm in self:
167
+ data = page_pm.get_text("dict", flags=pm.TEXTFLAGS_TEXT | pm.TEXT_ACCURATE_BBOXES)
168
+ data['number'] = page_pm.number
169
+ page = Page.from_dict(data)
170
+ pages.append(page)
144
171
 
145
- return blocks
172
+ return pages
146
173
 
147
174
  def to_markdown(self, **kwargs) -> str:
148
175
  """
@@ -162,11 +189,6 @@ if __name__ == '__main__':
162
189
  assert PATH_PDF.exists()
163
190
 
164
191
  doc = Document(PATH_PDF)
165
- doc.data
166
-
167
- for page in doc:
168
- print(page.get_text('dict'))
169
- print(page.get_text('html'))
170
-
192
+ data = doc.data
171
193
  md = doc.to_markdown()
172
194
  md
@@ -0,0 +1 @@
1
+ 1.0.35
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fmtr.tools
3
- Version: 1.0.33
3
+ Version: 1.0.35
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -1 +0,0 @@
1
- 1.0.33
File without changes
File without changes
File without changes
File without changes