markdownbridge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+ .venv/
9
+ .env
10
+ *.so
11
+ .mypy_cache/
12
+ .pytest_cache/
13
+ .ruff_cache/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MarkdownBridge
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,252 @@
1
+ Metadata-Version: 2.4
2
+ Name: markdownbridge
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the MarkdownBridge OCR API — convert documents and images to Markdown
5
+ Project-URL: Homepage, https://www.markdownbridge.com
6
+ Project-URL: Documentation, https://www.markdownbridge.com/docs
7
+ Project-URL: Repository, https://github.com/markdownbridge/markdownbridge-python
8
+ Project-URL: Issues, https://github.com/markdownbridge/markdownbridge-python/issues
9
+ Author-email: MarkdownBridge <support@markdownbridge.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: api,document,markdown,ocr,pdf,sdk
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: httpx>=0.24.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # markdownbridge
28
+
29
+ Python SDK for the [MarkdownBridge](https://www.markdownbridge.com) OCR API — convert documents and images to Markdown.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install markdownbridge
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```python
40
+ from markdownbridge import MarkdownBridge
41
+
42
+ client = MarkdownBridge(api_key="ocrb_prd_xxx")
43
+
44
+ # One-liner: URL → Markdown
45
+ result = client.ocr("https://example.com/invoice.pdf")
46
+ print(result.markdown)
47
+
48
+ # One-liner: local file → Markdown
49
+ result = client.ocr("./receipt.png")
50
+ print(result.markdown)
51
+ ```
52
+
53
+ ## Authentication
54
+
55
+ Pass your API key directly or set the `MARKDOWNBRIDGE_API_KEY` environment variable:
56
+
57
+ ```bash
58
+ export MARKDOWNBRIDGE_API_KEY="ocrb_prd_xxx"
59
+ ```
60
+
61
+ ```python
62
+ client = MarkdownBridge() # reads from env
63
+ ```
64
+
65
+ ## Client Options
66
+
67
+ ```python
68
+ client = MarkdownBridge(
69
+ api_key="ocrb_prd_xxx", # or env MARKDOWNBRIDGE_API_KEY
70
+ base_url="https://api.markdownbridge.com", # default
71
+ timeout=30.0, # request timeout in seconds
72
+ max_retries=3, # retry 5xx errors with backoff
73
+ )
74
+ ```
75
+
76
+ ## API Reference
77
+
78
+ ### `client.ocr(source, **opts)`
79
+
80
+ The convenience method — give it a URL or file path, get back a `ProcessingResult`.
81
+
82
+ ```python
83
+ result = client.ocr(
84
+ "https://example.com/doc.pdf",
85
+ language="en",
86
+ output_format="markdown",
87
+ enhance_quality=True,
88
+ poll_interval=2.0, # seconds between status checks
89
+ poll_timeout=300.0, # max wait time
90
+ )
91
+ print(result.markdown)
92
+ print(result.page_count)
93
+ ```
94
+
95
+ ### `client.process_url(file_url, **opts)`
96
+
97
+ Submit a URL for processing without waiting for completion.
98
+
99
+ ```python
100
+ proc = client.process_url("https://example.com/doc.pdf")
101
+ print(proc.process_id) # use with get_status() / wait_for_completion()
102
+ ```
103
+
104
+ ### `client.process_file(file_path, **opts)`
105
+
106
+ Upload a local file and submit it for processing.
107
+
108
+ ```python
109
+ proc = client.process_file("./invoice.pdf")
110
+ print(proc.process_id)
111
+ ```
112
+
113
+ ### `client.upload_file(file_path)`
114
+
115
+ Upload a file without processing it.
116
+
117
+ ```python
118
+ upload = client.upload_file("./photo.png")
119
+ print(upload.document_id)
120
+ ```
121
+
122
+ ### `client.get_status(process_id)`
123
+
124
+ Check the current status of a processing job.
125
+
126
+ ```python
127
+ status = client.get_status("uuid-here")
128
+ print(status.status) # queued | processing | completed | failed
129
+ print(status.progress) # 0–100
130
+ print(status.stage) # queued | download | ocr | llm_improvement | completed | failed
131
+ ```
132
+
133
+ ### `client.wait_for_completion(process_id, **opts)`
134
+
135
+ Poll until the job completes or fails.
136
+
137
+ ```python
138
+ result = client.wait_for_completion(
139
+ "uuid-here",
140
+ poll_interval=2.0,
141
+ poll_timeout=300.0,
142
+ on_status_change=lambda s: print(f"Status: {s.status} ({s.stage})"),
143
+ )
144
+ ```
145
+
146
+ ### `client.list_results(**filters)`
147
+
148
+ Fetch paginated results.
149
+
150
+ ```python
151
+ page = client.list_results(limit=20, offset=0, status="completed")
152
+ for item in page.data:
153
+ print(item.file_name, item.status)
154
+ print(f"Total: {page.pagination.total}")
155
+ ```
156
+
157
+ ### `client.iter_results(**filters)`
158
+
159
+ Auto-paginating iterator over all results.
160
+
161
+ ```python
162
+ for item in client.iter_results(status="completed"):
163
+ print(item.file_name)
164
+ ```
165
+
166
+ ### `client.get_result(result_id)`
167
+
168
+ Fetch a specific result by ID.
169
+
170
+ ```python
171
+ result = client.get_result("uuid-here")
172
+ print(result.result.markdown)
173
+ ```
174
+
175
+ ### `client.info()`
176
+
177
+ Get API version and status.
178
+
179
+ ```python
180
+ info = client.info()
181
+ print(info.version, info.status)
182
+ ```
183
+
184
+ ## Async Usage
185
+
186
+ Every method has an async equivalent via `AsyncMarkdownBridge`:
187
+
188
+ ```python
189
+ import asyncio
190
+ from markdownbridge import AsyncMarkdownBridge
191
+
192
+ async def main():
193
+ async with AsyncMarkdownBridge(api_key="ocrb_prd_xxx") as client:
194
+ result = await client.ocr("https://example.com/invoice.pdf")
195
+ print(result.markdown)
196
+
197
+ # Auto-paginating async iteration
198
+ async for item in client.iter_results():
199
+ print(item.file_name)
200
+
201
+ asyncio.run(main())
202
+ ```
203
+
204
+ ## Error Handling
205
+
206
+ All exceptions inherit from `MarkdownBridgeError` and include `status_code`, `error_code`, and `correlation_id`:
207
+
208
+ ```python
209
+ from markdownbridge import MarkdownBridge, RateLimitError, AuthenticationError
210
+
211
+ client = MarkdownBridge(api_key="ocrb_prd_xxx")
212
+
213
+ try:
214
+ result = client.ocr("https://example.com/doc.pdf")
215
+ except AuthenticationError:
216
+ print("Invalid API key")
217
+ except RateLimitError as e:
218
+ print(f"Rate limited — retry after {e.retry_after}s")
219
+ except MarkdownBridgeError as e:
220
+ print(f"API error {e.status_code}: {e}")
221
+ ```
222
+
223
+ ### Exception Hierarchy
224
+
225
+ | Exception | HTTP Status | When |
226
+ |-----------|-------------|------|
227
+ | `AuthenticationError` | 401 | Invalid or missing API key |
228
+ | `ValidationError` | 400/422 | Invalid request parameters |
229
+ | `NotFoundError` | 404 | Resource not found |
230
+ | `RateLimitError` | 429 | Too many requests |
231
+ | `InsufficientCreditsError` | 402 | Account has no credits |
232
+ | `ServerError` | 5xx | Server-side failure |
233
+ | `ProcessingError` | — | OCR job failed |
234
+ | `FileUploadError` | — | Upload failed |
235
+ | `TimeoutError` | — | Polling exceeded timeout |
236
+
237
+ ## Data Types
238
+
239
+ All response types are frozen dataclasses:
240
+
241
+ - `ProcessResponse` — process_id, status, file_id, stage
242
+ - `ProcessingStatus` — process_id, status, progress, stage, result, error
243
+ - `ProcessingResult` — text, markdown, json, page_count, processing_time
244
+ - `UploadResponse` — file_key, public_url, document_id
245
+ - `ResultItem` — id, process_id, file_name, status, result
246
+ - `ResultsPage` — data, pagination
247
+ - `Pagination` — total, limit, offset, has_more, next_offset
248
+ - `ApiInfo` — version, status, endpoints
249
+
250
+ ## License
251
+
252
+ MIT
@@ -0,0 +1,226 @@
1
+ # markdownbridge
2
+
3
+ Python SDK for the [MarkdownBridge](https://www.markdownbridge.com) OCR API — convert documents and images to Markdown.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install markdownbridge
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from markdownbridge import MarkdownBridge
15
+
16
+ client = MarkdownBridge(api_key="ocrb_prd_xxx")
17
+
18
+ # One-liner: URL → Markdown
19
+ result = client.ocr("https://example.com/invoice.pdf")
20
+ print(result.markdown)
21
+
22
+ # One-liner: local file → Markdown
23
+ result = client.ocr("./receipt.png")
24
+ print(result.markdown)
25
+ ```
26
+
27
+ ## Authentication
28
+
29
+ Pass your API key directly or set the `MARKDOWNBRIDGE_API_KEY` environment variable:
30
+
31
+ ```bash
32
+ export MARKDOWNBRIDGE_API_KEY="ocrb_prd_xxx"
33
+ ```
34
+
35
+ ```python
36
+ client = MarkdownBridge() # reads from env
37
+ ```
38
+
39
+ ## Client Options
40
+
41
+ ```python
42
+ client = MarkdownBridge(
43
+ api_key="ocrb_prd_xxx", # or env MARKDOWNBRIDGE_API_KEY
44
+ base_url="https://api.markdownbridge.com", # default
45
+ timeout=30.0, # request timeout in seconds
46
+ max_retries=3, # retry 5xx errors with backoff
47
+ )
48
+ ```
49
+
50
+ ## API Reference
51
+
52
+ ### `client.ocr(source, **opts)`
53
+
54
+ The convenience method — give it a URL or file path, get back a `ProcessingResult`.
55
+
56
+ ```python
57
+ result = client.ocr(
58
+ "https://example.com/doc.pdf",
59
+ language="en",
60
+ output_format="markdown",
61
+ enhance_quality=True,
62
+ poll_interval=2.0, # seconds between status checks
63
+ poll_timeout=300.0, # max wait time
64
+ )
65
+ print(result.markdown)
66
+ print(result.page_count)
67
+ ```
68
+
69
+ ### `client.process_url(file_url, **opts)`
70
+
71
+ Submit a URL for processing without waiting for completion.
72
+
73
+ ```python
74
+ proc = client.process_url("https://example.com/doc.pdf")
75
+ print(proc.process_id) # use with get_status() / wait_for_completion()
76
+ ```
77
+
78
+ ### `client.process_file(file_path, **opts)`
79
+
80
+ Upload a local file and submit it for processing.
81
+
82
+ ```python
83
+ proc = client.process_file("./invoice.pdf")
84
+ print(proc.process_id)
85
+ ```
86
+
87
+ ### `client.upload_file(file_path)`
88
+
89
+ Upload a file without processing it.
90
+
91
+ ```python
92
+ upload = client.upload_file("./photo.png")
93
+ print(upload.document_id)
94
+ ```
95
+
96
+ ### `client.get_status(process_id)`
97
+
98
+ Check the current status of a processing job.
99
+
100
+ ```python
101
+ status = client.get_status("uuid-here")
102
+ print(status.status) # queued | processing | completed | failed
103
+ print(status.progress) # 0–100
104
+ print(status.stage) # queued | download | ocr | llm_improvement | completed | failed
105
+ ```
106
+
107
+ ### `client.wait_for_completion(process_id, **opts)`
108
+
109
+ Poll until the job completes or fails.
110
+
111
+ ```python
112
+ result = client.wait_for_completion(
113
+ "uuid-here",
114
+ poll_interval=2.0,
115
+ poll_timeout=300.0,
116
+ on_status_change=lambda s: print(f"Status: {s.status} ({s.stage})"),
117
+ )
118
+ ```
119
+
120
+ ### `client.list_results(**filters)`
121
+
122
+ Fetch paginated results.
123
+
124
+ ```python
125
+ page = client.list_results(limit=20, offset=0, status="completed")
126
+ for item in page.data:
127
+ print(item.file_name, item.status)
128
+ print(f"Total: {page.pagination.total}")
129
+ ```
130
+
131
+ ### `client.iter_results(**filters)`
132
+
133
+ Auto-paginating iterator over all results.
134
+
135
+ ```python
136
+ for item in client.iter_results(status="completed"):
137
+ print(item.file_name)
138
+ ```
139
+
140
+ ### `client.get_result(result_id)`
141
+
142
+ Fetch a specific result by ID.
143
+
144
+ ```python
145
+ result = client.get_result("uuid-here")
146
+ print(result.result.markdown)
147
+ ```
148
+
149
+ ### `client.info()`
150
+
151
+ Get API version and status.
152
+
153
+ ```python
154
+ info = client.info()
155
+ print(info.version, info.status)
156
+ ```
157
+
158
+ ## Async Usage
159
+
160
+ Every method has an async equivalent via `AsyncMarkdownBridge`:
161
+
162
+ ```python
163
+ import asyncio
164
+ from markdownbridge import AsyncMarkdownBridge
165
+
166
+ async def main():
167
+ async with AsyncMarkdownBridge(api_key="ocrb_prd_xxx") as client:
168
+ result = await client.ocr("https://example.com/invoice.pdf")
169
+ print(result.markdown)
170
+
171
+ # Auto-paginating async iteration
172
+ async for item in client.iter_results():
173
+ print(item.file_name)
174
+
175
+ asyncio.run(main())
176
+ ```
177
+
178
+ ## Error Handling
179
+
180
+ All exceptions inherit from `MarkdownBridgeError` and include `status_code`, `error_code`, and `correlation_id`:
181
+
182
+ ```python
183
+ from markdownbridge import MarkdownBridge, RateLimitError, AuthenticationError
184
+
185
+ client = MarkdownBridge(api_key="ocrb_prd_xxx")
186
+
187
+ try:
188
+ result = client.ocr("https://example.com/doc.pdf")
189
+ except AuthenticationError:
190
+ print("Invalid API key")
191
+ except RateLimitError as e:
192
+ print(f"Rate limited — retry after {e.retry_after}s")
193
+ except MarkdownBridgeError as e:
194
+ print(f"API error {e.status_code}: {e}")
195
+ ```
196
+
197
+ ### Exception Hierarchy
198
+
199
+ | Exception | HTTP Status | When |
200
+ |-----------|-------------|------|
201
+ | `AuthenticationError` | 401 | Invalid or missing API key |
202
+ | `ValidationError` | 400/422 | Invalid request parameters |
203
+ | `NotFoundError` | 404 | Resource not found |
204
+ | `RateLimitError` | 429 | Too many requests |
205
+ | `InsufficientCreditsError` | 402 | Account has no credits |
206
+ | `ServerError` | 5xx | Server-side failure |
207
+ | `ProcessingError` | — | OCR job failed |
208
+ | `FileUploadError` | — | Upload failed |
209
+ | `TimeoutError` | — | Polling exceeded timeout |
210
+
211
+ ## Data Types
212
+
213
+ All response types are frozen dataclasses:
214
+
215
+ - `ProcessResponse` — process_id, status, file_id, stage
216
+ - `ProcessingStatus` — process_id, status, progress, stage, result, error
217
+ - `ProcessingResult` — text, markdown, json, page_count, processing_time
218
+ - `UploadResponse` — file_key, public_url, document_id
219
+ - `ResultItem` — id, process_id, file_name, status, result
220
+ - `ResultsPage` — data, pagination
221
+ - `Pagination` — total, limit, offset, has_more, next_offset
222
+ - `ApiInfo` — version, status, endpoints
223
+
224
+ ## License
225
+
226
+ MIT
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "markdownbridge"
7
+ dynamic = ["version"]
8
+ description = "Python SDK for the MarkdownBridge OCR API — convert documents and images to Markdown"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "MarkdownBridge", email = "support@markdownbridge.com" },
14
+ ]
15
+ keywords = ["ocr", "markdown", "pdf", "document", "api", "sdk"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Typing :: Typed",
27
+ ]
28
+ dependencies = [
29
+ "httpx>=0.24.0",
30
+ ]
31
+
32
+ [project.urls]
33
+ Homepage = "https://www.markdownbridge.com"
34
+ Documentation = "https://www.markdownbridge.com/docs"
35
+ Repository = "https://github.com/markdownbridge/markdownbridge-python"
36
+ Issues = "https://github.com/markdownbridge/markdownbridge-python/issues"
37
+
38
+ [tool.hatch.version]
39
+ path = "src/markdownbridge/_version.py"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/markdownbridge"]
@@ -0,0 +1,55 @@
1
+ """MarkdownBridge Python SDK — convert documents to Markdown via the MarkdownBridge API."""
2
+
3
+ from ._async_client import AsyncMarkdownBridge
4
+ from ._exceptions import (
5
+ AuthenticationError,
6
+ FileUploadError,
7
+ InsufficientCreditsError,
8
+ MarkdownBridgeError,
9
+ NotFoundError,
10
+ ProcessingError,
11
+ RateLimitError,
12
+ ServerError,
13
+ TimeoutError,
14
+ ValidationError,
15
+ )
16
+ from ._sync_client import MarkdownBridge
17
+ from ._types import (
18
+ ApiInfo,
19
+ Pagination,
20
+ ProcessingResult,
21
+ ProcessingStatus,
22
+ ProcessResponse,
23
+ ResultItem,
24
+ ResultsPage,
25
+ UploadResponse,
26
+ )
27
+ from ._version import __version__
28
+
29
+ __all__ = [
30
+ # Clients
31
+ "MarkdownBridge",
32
+ "AsyncMarkdownBridge",
33
+ # Exceptions
34
+ "MarkdownBridgeError",
35
+ "AuthenticationError",
36
+ "ValidationError",
37
+ "NotFoundError",
38
+ "RateLimitError",
39
+ "InsufficientCreditsError",
40
+ "ServerError",
41
+ "ProcessingError",
42
+ "FileUploadError",
43
+ "TimeoutError",
44
+ # Types
45
+ "ProcessResponse",
46
+ "ProcessingStatus",
47
+ "ProcessingResult",
48
+ "UploadResponse",
49
+ "ResultItem",
50
+ "ResultsPage",
51
+ "Pagination",
52
+ "ApiInfo",
53
+ # Version
54
+ "__version__",
55
+ ]