knowhere-python-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. knowhere_python_sdk-0.1.0/.github/workflows/ci.yml +36 -0
  2. knowhere_python_sdk-0.1.0/.github/workflows/publish-pypi.yml +24 -0
  3. knowhere_python_sdk-0.1.0/.github/workflows/publish.yml +42 -0
  4. knowhere_python_sdk-0.1.0/.gitignore +34 -0
  5. knowhere_python_sdk-0.1.0/.release-please-manifest.json +3 -0
  6. knowhere_python_sdk-0.1.0/CHANGELOG.md +8 -0
  7. knowhere_python_sdk-0.1.0/PKG-INFO +314 -0
  8. knowhere_python_sdk-0.1.0/README.md +282 -0
  9. knowhere_python_sdk-0.1.0/examples/async_usage.py +72 -0
  10. knowhere_python_sdk-0.1.0/examples/error_handling.py +112 -0
  11. knowhere_python_sdk-0.1.0/examples/parse_file.py +79 -0
  12. knowhere_python_sdk-0.1.0/examples/parse_url.py +53 -0
  13. knowhere_python_sdk-0.1.0/examples/step_by_step.py +80 -0
  14. knowhere_python_sdk-0.1.0/pyproject.toml +62 -0
  15. knowhere_python_sdk-0.1.0/python-sdk-plan.md +1522 -0
  16. knowhere_python_sdk-0.1.0/release-please-config.json +27 -0
  17. knowhere_python_sdk-0.1.0/src/knowhere/__init__.py +101 -0
  18. knowhere_python_sdk-0.1.0/src/knowhere/_base_client.py +443 -0
  19. knowhere_python_sdk-0.1.0/src/knowhere/_client.py +234 -0
  20. knowhere_python_sdk-0.1.0/src/knowhere/_constants.py +31 -0
  21. knowhere_python_sdk-0.1.0/src/knowhere/_exceptions.py +324 -0
  22. knowhere_python_sdk-0.1.0/src/knowhere/_logging.py +48 -0
  23. knowhere_python_sdk-0.1.0/src/knowhere/_response.py +52 -0
  24. knowhere_python_sdk-0.1.0/src/knowhere/_types.py +56 -0
  25. knowhere_python_sdk-0.1.0/src/knowhere/_version.py +1 -0
  26. knowhere_python_sdk-0.1.0/src/knowhere/lib/__init__.py +1 -0
  27. knowhere_python_sdk-0.1.0/src/knowhere/lib/polling.py +146 -0
  28. knowhere_python_sdk-0.1.0/src/knowhere/lib/result_parser.py +206 -0
  29. knowhere_python_sdk-0.1.0/src/knowhere/lib/upload.py +147 -0
  30. knowhere_python_sdk-0.1.0/src/knowhere/py.typed +0 -0
  31. knowhere_python_sdk-0.1.0/src/knowhere/resources/__init__.py +7 -0
  32. knowhere_python_sdk-0.1.0/src/knowhere/resources/_base.py +71 -0
  33. knowhere_python_sdk-0.1.0/src/knowhere/resources/jobs.py +248 -0
  34. knowhere_python_sdk-0.1.0/src/knowhere/types/__init__.py +43 -0
  35. knowhere_python_sdk-0.1.0/src/knowhere/types/job.py +83 -0
  36. knowhere_python_sdk-0.1.0/src/knowhere/types/params.py +26 -0
  37. knowhere_python_sdk-0.1.0/src/knowhere/types/result.py +315 -0
  38. knowhere_python_sdk-0.1.0/src/knowhere/types/shared.py +3 -0
  39. knowhere_python_sdk-0.1.0/tests/__init__.py +0 -0
  40. knowhere_python_sdk-0.1.0/tests/conftest.py +238 -0
  41. knowhere_python_sdk-0.1.0/tests/fixtures/real_result.zip +0 -0
  42. knowhere_python_sdk-0.1.0/tests/test_client.py +200 -0
  43. knowhere_python_sdk-0.1.0/tests/test_exceptions.py +367 -0
  44. knowhere_python_sdk-0.1.0/tests/test_jobs.py +286 -0
  45. knowhere_python_sdk-0.1.0/tests/test_logging.py +85 -0
  46. knowhere_python_sdk-0.1.0/tests/test_models.py +652 -0
  47. knowhere_python_sdk-0.1.0/tests/test_parse.py +232 -0
  48. knowhere_python_sdk-0.1.0/tests/test_polling.py +239 -0
  49. knowhere_python_sdk-0.1.0/tests/test_result_parser.py +427 -0
  50. knowhere_python_sdk-0.1.0/tests/test_retry.py +250 -0
  51. knowhere_python_sdk-0.1.0/tests/test_upload.py +193 -0
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - uses: astral-sh/setup-uv@v4
19
+ with:
20
+ version: "latest"
21
+
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+
26
+ - name: Install dependencies
27
+ run: uv sync --all-extras
28
+
29
+ - name: Lint
30
+ run: uv run ruff check src/
31
+
32
+ - name: Type check
33
+ run: uv run mypy src/knowhere/
34
+
35
+ - name: Test
36
+ run: uv run pytest tests/ -v
@@ -0,0 +1,24 @@
1
+ name: Publish PyPI (Manual)
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ publish:
8
+ name: publish
9
+ runs-on: ubuntu-latest
10
+ environment: production
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - uses: astral-sh/setup-uv@v4
15
+ with:
16
+ version: "latest"
17
+
18
+ - name: Build
19
+ run: uv build
20
+
21
+ - name: Publish to PyPI
22
+ run: uv publish
23
+ env:
24
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,42 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ permissions:
9
+ contents: write
10
+ pull-requests: write
11
+
12
+ jobs:
13
+ release-please:
14
+ runs-on: ubuntu-latest
15
+ outputs:
16
+ release_created: ${{ steps.release.outputs.release_created }}
17
+ tag_name: ${{ steps.release.outputs.tag_name }}
18
+ steps:
19
+ - uses: googleapis/release-please-action@v4
20
+ id: release
21
+ with:
22
+ token: ${{ secrets.GITHUB_TOKEN }}
23
+
24
+ publish:
25
+ needs: release-please
26
+ if: ${{ needs.release-please.outputs.release_created }}
27
+ runs-on: ubuntu-latest
28
+ environment: production
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+
32
+ - uses: astral-sh/setup-uv@v4
33
+ with:
34
+ version: "latest"
35
+
36
+ - name: Build
37
+ run: uv build
38
+
39
+ - name: Publish to PyPI
40
+ run: uv publish
41
+ env:
42
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,34 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ .eggs/
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ ENV/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+
23
+ # Testing
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+ .mypy_cache/
28
+
29
+ # OS
30
+ .DS_Store
31
+ Thumbs.db
32
+
33
+ # uv
34
+ uv.lock
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.1.0"
3
+ }
@@ -0,0 +1,8 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-02-11)
4
+
5
+
6
+ ### Features
7
+
8
+ * knowhere python SDK ([6363b60](https://github.com/Ontos-AI/knowhere-python-sdk/commit/6363b603372e9bb431e0386daf0f6fb0b5fc999b))
@@ -0,0 +1,314 @@
1
+ Metadata-Version: 2.4
2
+ Name: knowhere-python-sdk
3
+ Version: 0.1.0
4
+ Summary: Official Python SDK for the Knowhere document parsing API
5
+ Project-URL: Homepage, https://knowhereto.ai
6
+ Project-URL: Documentation, https://docs.knowhereto.ai
7
+ Project-URL: Repository, https://github.com/Ontos-AI/knowhere-python-sdk
8
+ Author-email: Knowhere Team <team@knowhereto.ai>
9
+ License-Expression: MIT
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: httpx<1.0,>=0.25.0
22
+ Requires-Dist: pydantic<3.0,>=2.0.0
23
+ Requires-Dist: typing-extensions>=4.7.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: coverage>=7.0.0; extra == 'dev'
26
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
27
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
29
+ Requires-Dist: respx>=0.21.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # Knowhere Python SDK
34
+
35
+ Official Python SDK for the [Knowhere](https://knowhereto.ai) document parsing API.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install knowhere-python-sdk
41
+ ```
42
+
43
+ Or with [uv](https://docs.astral.sh/uv/):
44
+
45
+ ```bash
46
+ uv add knowhere-python-sdk
47
+ ```
48
+
49
+ ## Quick Start
50
+
51
+ ```python
52
+ import knowhere
53
+
54
+ client = knowhere.Knowhere(api_key="sk_...")
55
+
56
+ # Parse a document from URL
57
+ result = client.parse(url="https://example.com/report.pdf")
58
+
59
+ print(result.statistics.total_chunks) # 152
60
+ print(result.full_markdown[:200]) # First 200 chars of full markdown
61
+
62
+ for chunk in result.text_chunks:
63
+ print(chunk.content[:80])
64
+ ```
65
+
66
+ ### Parse a Local File
67
+
68
+ ```python
69
+ from pathlib import Path
70
+
71
+ result = client.parse(
72
+ file=Path("report.pdf"),
73
+ parsing_params={"model": "advanced", "ocr_enabled": True},
74
+ )
75
+
76
+ print(result.manifest.source_file_name) # "report.pdf"
77
+ print(len(result.chunks)) # 152
78
+ ```
79
+
80
+ ### Access Different Chunk Types
81
+
82
+ ```python
83
+ result = client.parse(url="https://example.com/report.pdf")
84
+
85
+ # Text chunks
86
+ for chunk in result.text_chunks:
87
+ print(chunk.keywords)
88
+ print(chunk.summary)
89
+
90
+ # Image chunks (raw bytes loaded from ZIP)
91
+ for chunk in result.image_chunks:
92
+ print(chunk.file_path)
93
+ print(len(chunk.data)) # bytes
94
+ chunk.save("./output/") # writes image to disk
95
+
96
+ # Table chunks (HTML loaded from ZIP)
97
+ for chunk in result.table_chunks:
98
+ print(chunk.file_path)
99
+ print(chunk.html[:100])
100
+ ```
101
+
102
+ ### Save All Results to Disk
103
+
104
+ ```python
105
+ result = client.parse(file=Path("report.pdf"))
106
+ result.save("./output/report/")
107
+ ```
108
+
109
+ ## Async Usage
110
+
111
+ ```python
112
+ import asyncio
113
+ import knowhere
114
+
115
+ async def main():
116
+ async with knowhere.AsyncKnowhere(api_key="sk_...") as client:
117
+ result = await client.parse(url="https://example.com/report.pdf")
118
+ print(result.statistics.total_chunks)
119
+
120
+ for chunk in result.text_chunks:
121
+ print(chunk.summary)
122
+
123
+ asyncio.run(main())
124
+ ```
125
+
126
+ ## Step-by-Step Control
127
+
128
+ For granular control over the parsing workflow, use the `jobs` resource directly:
129
+
130
+ ```python
131
+ from pathlib import Path
132
+
133
+ # Step 1: Create a parsing job
134
+ job = client.jobs.create(
135
+ source_type="file",
136
+ file_name="report.pdf",
137
+ parsing_params={"model": "advanced", "ocr_enabled": True},
138
+ )
139
+
140
+ # Step 2: Upload file to presigned URL
141
+ client.jobs.upload(job, file=Path("report.pdf"))
142
+
143
+ # Step 3: Poll until done (adaptive backoff)
144
+ job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
145
+
146
+ # Step 4: Download and parse results
147
+ result = client.jobs.load(job_result)
148
+ print(result.statistics)
149
+ ```
150
+
151
+ ## Configuration
152
+
153
+ The SDK reads configuration from constructor arguments, environment variables, or defaults (in that priority order):
154
+
155
+ | Variable | Description | Default |
156
+ |----------|-------------|---------|
157
+ | `KNOWHERE_API_KEY` | API key (required) | — |
158
+ | `KNOWHERE_BASE_URL` | API base URL | `https://api.knowhereto.ai` |
159
+ | `KNOWHERE_LOG_LEVEL` | Log level | `WARNING` |
160
+
161
+ ```python
162
+ # Uses environment variables automatically
163
+ client = knowhere.Knowhere()
164
+
165
+ # Or configure explicitly
166
+ client = knowhere.Knowhere(
167
+ api_key="sk_...",
168
+ base_url="https://api.knowhereto.ai",
169
+ timeout=30.0, # HTTP request timeout (default: 60s)
170
+ upload_timeout=300.0, # File upload timeout (default: 600s)
171
+ max_retries=3, # Max retry attempts (default: 5)
172
+ )
173
+ ```
174
+
175
+ ### Context Manager
176
+
177
+ ```python
178
+ # Sync — ensures httpx.Client is properly closed
179
+ with knowhere.Knowhere(api_key="sk_...") as client:
180
+ result = client.parse(url="https://example.com/report.pdf")
181
+
182
+ # Async — ensures httpx.AsyncClient is properly closed
183
+ async with knowhere.AsyncKnowhere(api_key="sk_...") as client:
184
+ result = await client.parse(url="https://example.com/report.pdf")
185
+ ```
186
+
187
+ ## Error Handling
188
+
189
+ ```python
190
+ from knowhere import (
191
+ Knowhere,
192
+ AuthenticationError,
193
+ NotFoundError,
194
+ RateLimitError,
195
+ BadRequestError,
196
+ APIStatusError,
197
+ PollingTimeoutError,
198
+ )
199
+
200
+ try:
201
+ result = client.parse(url="https://example.com/report.pdf")
202
+ except BadRequestError as e:
203
+ print(e.status_code) # 400
204
+ print(e.code) # "INVALID_ARGUMENT"
205
+ print(e.message) # "Unsupported file format"
206
+ print(e.request_id) # "req_abc123"
207
+ except NotFoundError as e:
208
+ print(e.message) # "Job not found"
209
+ except RateLimitError as e:
210
+ print(e.retry_after) # seconds to wait
211
+ except AuthenticationError:
212
+ print("Invalid API key")
213
+ except PollingTimeoutError:
214
+ print("Job did not complete within timeout")
215
+ except APIStatusError as e:
216
+ print(f"API error {e.status_code}: {e.message}")
217
+ ```
218
+
219
+ ## Requirements
220
+
221
+ - Python 3.9+
222
+ - [httpx](https://www.python-httpx.org/) `>=0.25.0,<1.0`
223
+ - [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
224
+ - [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
225
+
226
+ ## Building from Source
227
+
228
+ ### Prerequisites
229
+
230
+ - Python 3.9 or later
231
+ - [uv](https://docs.astral.sh/uv/) (recommended) or pip
232
+
233
+ ### Build
234
+
235
+ ```bash
236
+ git clone https://github.com/Ontos-AI/knowhere-python-sdk.git
237
+ cd knowhere-python-sdk
238
+
239
+ # Install uv if you don't have it
240
+ curl -LsSf https://astral.sh/uv/install.sh | sh
241
+
242
+ # Build sdist + wheel
243
+ uv build
244
+
245
+ # Install the built wheel
246
+ pip install dist/knowhere_python_sdk-*.whl
247
+ ```
248
+
249
+ ## Development
250
+
251
+ ### Setup
252
+
253
+ ```bash
254
+ git clone https://github.com/Ontos-AI/knowhere-python-sdk.git
255
+ cd knowhere-python-sdk
256
+
257
+ # Create venv and install all dependencies (including dev)
258
+ uv sync --all-extras
259
+ ```
260
+
261
+ ### Running Tests
262
+
263
+ ```bash
264
+ # Run all unit tests
265
+ uv run pytest tests/ -v
266
+
267
+ # Run with coverage
268
+ uv run coverage run -m pytest tests/ -v
269
+ uv run coverage report -m
270
+ ```
271
+
272
+ ### Linting and Type Checking
273
+
274
+ ```bash
275
+ # Lint
276
+ uv run ruff check src/
277
+
278
+ # Type check
279
+ uv run mypy src/knowhere/
280
+ ```
281
+
282
+ ### Project Structure
283
+
284
+ ```
285
+ knowhere-python-sdk/
286
+ ├── src/knowhere/
287
+ │ ├── __init__.py # Public API surface
288
+ │ ├── _client.py # Knowhere + AsyncKnowhere clients
289
+ │ ├── _base_client.py # HTTP logic, retry, error parsing
290
+ │ ├── _exceptions.py # Exception hierarchy
291
+ │ ├── _constants.py # Default URLs, timeouts, env var names
292
+ │ ├── _types.py # Sentinel types, callback type aliases
293
+ │ ├── _logging.py # Logger setup, header redaction
294
+ │ ├── _response.py # APIResponse wrapper
295
+ │ ├── _version.py # __version__
296
+ │ ├── py.typed # PEP 561 marker
297
+ │ ├── types/
298
+ │ │ ├── job.py # Job, JobResult, JobError
299
+ │ │ ├── result.py # ParseResult, Manifest, Chunk types
300
+ │ │ └── params.py # ParsingParams, WebhookConfig
301
+ │ ├── resources/
302
+ │ │ └── jobs.py # Jobs + AsyncJobs resource
303
+ │ └── lib/
304
+ │ ├── polling.py # Adaptive polling loop
305
+ │ ├── upload.py # Streaming file upload
306
+ │ └── result_parser.py # ZIP parsing, checksum verification
307
+ ├── tests/ # Unit tests (respx-mocked HTTP)
308
+ ├── examples/ # Usage examples
309
+ └── pyproject.toml
310
+ ```
311
+
312
+ ## License
313
+
314
+ MIT