byteit 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- byteit-0.1.2/CHANGELOG.md +45 -0
- {byteit-0.1.0 → byteit-0.1.2}/MANIFEST.in +1 -0
- byteit-0.1.2/PKG-INFO +275 -0
- byteit-0.1.2/README.md +237 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/ByteITClient.py +56 -28
- {byteit-0.1.0 → byteit-0.1.2}/byteit/__init__.py +11 -1
- byteit-0.1.2/byteit/progress.py +167 -0
- byteit-0.1.2/byteit.egg-info/PKG-INFO +275 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit.egg-info/SOURCES.txt +4 -1
- {byteit-0.1.0 → byteit-0.1.2}/byteit.egg-info/requires.txt +1 -0
- {byteit-0.1.0 → byteit-0.1.2}/pyproject.toml +5 -3
- {byteit-0.1.0 → byteit-0.1.2}/tests/test_client.py +134 -5
- byteit-0.1.2/tests/test_progress.py +133 -0
- byteit-0.1.0/PKG-INFO +0 -424
- byteit-0.1.0/README.md +0 -387
- byteit-0.1.0/byteit.egg-info/PKG-INFO +0 -424
- {byteit-0.1.0 → byteit-0.1.2}/LICENSE +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/LocalFileInputConnector.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/LocalFileOutputConnector.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/S3InputConnector.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/S3OutputConnector.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/__init__.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/connectors/base.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/exceptions.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/models/DocumentMetadata.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/models/Job.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/models/JobList.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/models/OutputFormat.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/models/ProcessingOptions.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit/validations.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit.egg-info/dependency_links.txt +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/byteit.egg-info/top_level.txt +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/setup.cfg +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/setup.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/tests/__init__.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/tests/test_connectors.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/tests/test_exceptions.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/tests/test_integration.py +0 -0
- {byteit-0.1.0 → byteit-0.1.2}/tests/test_models.py +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## [0.1.2] - 2026-01-31
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- Processing migrated to top-enhanced production servers, reducing processing time by ~50%.
|
|
13
|
+
|
|
14
|
+
- Adaptive polling improved for smoother operation.
|
|
15
|
+
|
|
16
|
+
- Progress bar and output display enhanced for notebooks.
|
|
17
|
+
|
|
18
|
+
- Increased `DEFAULT_TIMEOUT` from 10 minutes to 30 minutes for large documents
|
|
19
|
+
|
|
20
|
+
## [0.1.1] - 2026-01-24
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
- Increased `DEFAULT_TIMEOUT` from 30 seconds to 10 minutes for large documents
|
|
24
|
+
- Adaptive polling in `_wait_for_completion`: starts at 2s, increases to max 10s
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- Progress logging during document parsing
|
|
28
|
+
|
|
29
|
+
## [0.1.0] - 2026-01-18
|
|
30
|
+
|
|
31
|
+
### Added
|
|
32
|
+
- Initial release of the ByteIT Python SDK
|
|
33
|
+
- `ByteITClient` for AI-powered document parsing
|
|
34
|
+
- Multiple output formats: text, JSON, Markdown, HTML
|
|
35
|
+
- Input connectors:
|
|
36
|
+
- `LocalFileInputConnector`
|
|
37
|
+
- `S3InputConnector`
|
|
38
|
+
- Output connector:
|
|
39
|
+
- `LocalFileOutputConnector`
|
|
40
|
+
- Job management (list jobs, check status, download results)
|
|
41
|
+
- Support for PDF, Word, Excel, and other common document formats
|
|
42
|
+
- Batch processing support
|
|
43
|
+
- Environment variable configuration
|
|
44
|
+
- Custom base URL support (testing & staging)
|
|
45
|
+
- Python 3.8+ support
|
byteit-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: byteit
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: AI-powered document intelligence platform - Turn your data into structured data with a single line of code.
|
|
5
|
+
Author-email: ByteIT GmbH <support@byteit.ai>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://byteit.ai
|
|
8
|
+
Project-URL: Repository, https://github.com/byteit-ai/byteit-api
|
|
9
|
+
Project-URL: Pricing, https://byteit.ai/pricing
|
|
10
|
+
Project-URL: Support, https://byteit.ai/support
|
|
11
|
+
Project-URL: Contact, https://byteit.ai/contact
|
|
12
|
+
Keywords: document-processing,ai,document-intelligence,pdf,data-extraction,machine-learning,ocr
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
|
+
Classifier: Topic :: Office/Business
|
|
26
|
+
Requires-Python: >=3.8
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: requests>=2.28.0
|
|
30
|
+
Requires-Dist: tqdm>=4.65.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: isort>=5.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
38
|
+
|
|
39
|
+
# ByteIT Python SDK
|
|
40
|
+
|
|
41
|
+
ByteIT's Python library for extracting structured data from documents.
|
|
42
|
+
It is designed for backend services and ETL pipelines that require reliable, consistent document parsing at scale through a simple API.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
Install from PyPI:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install byteit
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Python 3.8 or newer is required.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from byteit import ByteITClient
|
|
62
|
+
|
|
63
|
+
client = ByteITClient(api_key="your_api_key")
|
|
64
|
+
|
|
65
|
+
result = client.parse("document.pdf")
|
|
66
|
+
print(result.decode())
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The returned value is raw bytes containing the parsed document content.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Supported Input File Types
|
|
74
|
+
|
|
75
|
+
ByteIT supports the following file types as input:
|
|
76
|
+
|
|
77
|
+
* PDF (`.pdf`)
|
|
78
|
+
* Word (`.docx`)
|
|
79
|
+
* PowerPoint (`.pptx`)
|
|
80
|
+
* HTML (`.html`)
|
|
81
|
+
* Markdown (`.md`)
|
|
82
|
+
* Plain text (`.txt`)
|
|
83
|
+
* JSON (`.json`)
|
|
84
|
+
* XML (`.xml`)
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Basic Usage
|
|
89
|
+
|
|
90
|
+
### Parse a Local File
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
result = client.parse("invoice.pdf")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
By default, the output format is **Markdown (`md`)**.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Output Formats
|
|
101
|
+
|
|
102
|
+
You can choose the output format depending on your pipeline needs:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
txt = client.parse("doc.pdf", output_format="txt")
|
|
106
|
+
json = client.parse("doc.pdf", output_format="json")
|
|
107
|
+
md = client.parse("doc.pdf", output_format="md")
|
|
108
|
+
html = client.parse("doc.pdf", output_format="html")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Supported output formats:
|
|
112
|
+
|
|
113
|
+
* Plain text (`txt`)
|
|
114
|
+
* JSON (`json`)
|
|
115
|
+
* Markdown (`md`) *(default)*
|
|
116
|
+
* HTML (`html`)
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Save Output to File
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
client.parse(
|
|
124
|
+
"doc.pdf",
|
|
125
|
+
output_format="md",
|
|
126
|
+
output="result.md"
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
When `output` is provided, the parsed result is written directly to disk.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Notebook Integration
|
|
135
|
+
|
|
136
|
+
When used in Jupyter notebooks, ByteIT automatically displays results in a readable format:
|
|
137
|
+
|
|
138
|
+
* **JSON**: Interactive, expandable/collapsible tree view
|
|
139
|
+
* **Markdown**: Rendered with formatting (headers, lists, etc.)
|
|
140
|
+
* **HTML**: Rendered as HTML
|
|
141
|
+
* **Text**: Code block with syntax highlighting
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
# In a Jupyter notebook - automatically displays formatted result
|
|
145
|
+
result = client.parse("document.pdf", result_format="json")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
To disable auto-display, save to a file instead:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# Saves to file, no auto-display
|
|
152
|
+
result = client.parse("doc.pdf", result_format="json", output="output.json")
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Typical Use Cases
|
|
158
|
+
|
|
159
|
+
* Extracting structured data from documents in ETL pipelines
|
|
160
|
+
* Preprocessing documents before indexing or downstream processing
|
|
161
|
+
* Automating ingestion of invoices, contracts, or reports
|
|
162
|
+
* Interactive document exploration in Jupyter notebooks
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## API Reference
|
|
167
|
+
|
|
168
|
+
### `ByteITClient`
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
ByteITClient(api_key: str)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Creates a new ByteIT client.
|
|
175
|
+
|
|
176
|
+
#### Parameters
|
|
177
|
+
|
|
178
|
+
* `api_key` (`str`): Your ByteIT API key
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
### `parse(...)`
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
parse(
|
|
186
|
+
input,
|
|
187
|
+
output_format: str = "md",
|
|
188
|
+
output = None
|
|
189
|
+
)
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Parse a document and return the extracted content.
|
|
193
|
+
|
|
194
|
+
#### Parameters
|
|
195
|
+
|
|
196
|
+
* `input` (`str | Path`): Path to a local document
|
|
197
|
+
* `output_format` (`str`): Output format (`txt`, `json`, `md`, `html`)
|
|
198
|
+
* `output` (`str | Path | None`): Optional path to save the result
|
|
199
|
+
|
|
200
|
+
#### Returns
|
|
201
|
+
|
|
202
|
+
* `bytes`: Parsed document content
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Error Handling
|
|
207
|
+
|
|
208
|
+
The SDK exposes specific exceptions for common error cases:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from byteit.exceptions import (
|
|
212
|
+
ByteITError,
|
|
213
|
+
ValidationError,
|
|
214
|
+
AuthenticationError,
|
|
215
|
+
RateLimitError,
|
|
216
|
+
ServerError,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
result = client.parse("document.pdf")
|
|
221
|
+
except ValidationError as e:
|
|
222
|
+
print("Invalid input:", e.message)
|
|
223
|
+
except AuthenticationError:
|
|
224
|
+
print("Invalid API key")
|
|
225
|
+
except RateLimitError:
|
|
226
|
+
print("Rate limit exceeded")
|
|
227
|
+
except ByteITError as e:
|
|
228
|
+
print("ByteIT error:", e.message)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
All exceptions inherit from `ByteITError`.
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Configuration
|
|
236
|
+
|
|
237
|
+
### Environment Variable
|
|
238
|
+
|
|
239
|
+
You can provide the API key via environment variable:
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
export BYTEIT_API_KEY="your_api_key"
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
import os
|
|
247
|
+
from byteit import ByteITClient
|
|
248
|
+
|
|
249
|
+
client = ByteITClient(api_key=os.getenv("BYTEIT_API_KEY"))
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Requirements
|
|
255
|
+
|
|
256
|
+
* Python 3.8+
|
|
257
|
+
* `requests`
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## About ByteIT
|
|
262
|
+
|
|
263
|
+
ByteIT provides document parsing and data extraction APIs designed for backend systems and automation workflows.
|
|
264
|
+
|
|
265
|
+
Website: [https://byteit.ai](https://byteit.ai)
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## License
|
|
270
|
+
|
|
271
|
+
This project is licensed under the terms specified in the [LICENSE](LICENSE) file.
|
|
272
|
+
|
|
273
|
+
© 2026 ByteIT GmbH
|
|
274
|
+
|
|
275
|
+
---
|
byteit-0.1.2/README.md
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# ByteIT Python SDK
|
|
2
|
+
|
|
3
|
+
ByteIT's Python library for extracting structured data from documents.
|
|
4
|
+
It is designed for backend services and ETL pipelines that require reliable, consistent document parsing at scale through a simple API.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
Install from PyPI:
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install byteit
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Python 3.8 or newer is required.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from byteit import ByteITClient
|
|
24
|
+
|
|
25
|
+
client = ByteITClient(api_key="your_api_key")
|
|
26
|
+
|
|
27
|
+
result = client.parse("document.pdf")
|
|
28
|
+
print(result.decode())
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
The returned value is raw bytes containing the parsed document content.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Supported Input File Types
|
|
36
|
+
|
|
37
|
+
ByteIT supports the following file types as input:
|
|
38
|
+
|
|
39
|
+
* PDF (`.pdf`)
|
|
40
|
+
* Word (`.docx`)
|
|
41
|
+
* PowerPoint (`.pptx`)
|
|
42
|
+
* HTML (`.html`)
|
|
43
|
+
* Markdown (`.md`)
|
|
44
|
+
* Plain text (`.txt`)
|
|
45
|
+
* JSON (`.json`)
|
|
46
|
+
* XML (`.xml`)
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Basic Usage
|
|
51
|
+
|
|
52
|
+
### Parse a Local File
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
result = client.parse("invoice.pdf")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
By default, the output format is **Markdown (`md`)**.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Output Formats
|
|
63
|
+
|
|
64
|
+
You can choose the output format depending on your pipeline needs:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
txt = client.parse("doc.pdf", output_format="txt")
|
|
68
|
+
json = client.parse("doc.pdf", output_format="json")
|
|
69
|
+
md = client.parse("doc.pdf", output_format="md")
|
|
70
|
+
html = client.parse("doc.pdf", output_format="html")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Supported output formats:
|
|
74
|
+
|
|
75
|
+
* Plain text (`txt`)
|
|
76
|
+
* JSON (`json`)
|
|
77
|
+
* Markdown (`md`) *(default)*
|
|
78
|
+
* HTML (`html`)
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Save Output to File
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
client.parse(
|
|
86
|
+
"doc.pdf",
|
|
87
|
+
output_format="md",
|
|
88
|
+
output="result.md"
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
When `output` is provided, the parsed result is written directly to disk.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Notebook Integration
|
|
97
|
+
|
|
98
|
+
When used in Jupyter notebooks, ByteIT automatically displays results in a readable format:
|
|
99
|
+
|
|
100
|
+
* **JSON**: Interactive, expandable/collapsible tree view
|
|
101
|
+
* **Markdown**: Rendered with formatting (headers, lists, etc.)
|
|
102
|
+
* **HTML**: Rendered as HTML
|
|
103
|
+
* **Text**: Code block with syntax highlighting
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
# In a Jupyter notebook - automatically displays formatted result
|
|
107
|
+
result = client.parse("document.pdf", result_format="json")
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
To disable auto-display, save to a file instead:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# Saves to file, no auto-display
|
|
114
|
+
result = client.parse("doc.pdf", result_format="json", output="output.json")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Typical Use Cases
|
|
120
|
+
|
|
121
|
+
* Extracting structured data from documents in ETL pipelines
|
|
122
|
+
* Preprocessing documents before indexing or downstream processing
|
|
123
|
+
* Automating ingestion of invoices, contracts, or reports
|
|
124
|
+
* Interactive document exploration in Jupyter notebooks
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## API Reference
|
|
129
|
+
|
|
130
|
+
### `ByteITClient`
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
ByteITClient(api_key: str)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Creates a new ByteIT client.
|
|
137
|
+
|
|
138
|
+
#### Parameters
|
|
139
|
+
|
|
140
|
+
* `api_key` (`str`): Your ByteIT API key
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
### `parse(...)`
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
parse(
|
|
148
|
+
input,
|
|
149
|
+
output_format: str = "md",
|
|
150
|
+
output = None
|
|
151
|
+
)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Parse a document and return the extracted content.
|
|
155
|
+
|
|
156
|
+
#### Parameters
|
|
157
|
+
|
|
158
|
+
* `input` (`str | Path`): Path to a local document
|
|
159
|
+
* `output_format` (`str`): Output format (`txt`, `json`, `md`, `html`)
|
|
160
|
+
* `output` (`str | Path | None`): Optional path to save the result
|
|
161
|
+
|
|
162
|
+
#### Returns
|
|
163
|
+
|
|
164
|
+
* `bytes`: Parsed document content
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Error Handling
|
|
169
|
+
|
|
170
|
+
The SDK exposes specific exceptions for common error cases:
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from byteit.exceptions import (
|
|
174
|
+
ByteITError,
|
|
175
|
+
ValidationError,
|
|
176
|
+
AuthenticationError,
|
|
177
|
+
RateLimitError,
|
|
178
|
+
ServerError,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
result = client.parse("document.pdf")
|
|
183
|
+
except ValidationError as e:
|
|
184
|
+
print("Invalid input:", e.message)
|
|
185
|
+
except AuthenticationError:
|
|
186
|
+
print("Invalid API key")
|
|
187
|
+
except RateLimitError:
|
|
188
|
+
print("Rate limit exceeded")
|
|
189
|
+
except ByteITError as e:
|
|
190
|
+
print("ByteIT error:", e.message)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
All exceptions inherit from `ByteITError`.
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## Configuration
|
|
198
|
+
|
|
199
|
+
### Environment Variable
|
|
200
|
+
|
|
201
|
+
You can provide the API key via environment variable:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
export BYTEIT_API_KEY="your_api_key"
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
import os
|
|
209
|
+
from byteit import ByteITClient
|
|
210
|
+
|
|
211
|
+
client = ByteITClient(api_key=os.getenv("BYTEIT_API_KEY"))
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Requirements
|
|
217
|
+
|
|
218
|
+
* Python 3.8+
|
|
219
|
+
* `requests`
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## About ByteIT
|
|
224
|
+
|
|
225
|
+
ByteIT provides document parsing and data extraction APIs designed for backend systems and automation workflows.
|
|
226
|
+
|
|
227
|
+
Website: [https://byteit.ai](https://byteit.ai)
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## License
|
|
232
|
+
|
|
233
|
+
This project is licensed under the terms specified in the [LICENSE](LICENSE) file.
|
|
234
|
+
|
|
235
|
+
© 2026 ByteIT GmbH
|
|
236
|
+
|
|
237
|
+
---
|