knowhere-python-sdk 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/bug-report.yml +45 -0
  2. knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/config.yml +8 -0
  3. knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/feature-request.yml +25 -0
  4. knowhere_python_sdk-0.3.2/.github/pull_request_template.md +15 -0
  5. knowhere_python_sdk-0.3.2/.release-please-manifest.json +3 -0
  6. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/CHANGELOG.md +16 -0
  7. knowhere_python_sdk-0.3.2/CODE_OF_CONDUCT.md +29 -0
  8. knowhere_python_sdk-0.3.2/CONTRIBUTING.md +44 -0
  9. knowhere_python_sdk-0.3.2/LICENSE +21 -0
  10. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/PKG-INFO +28 -9
  11. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/README.md +26 -8
  12. knowhere_python_sdk-0.3.2/SECURITY.md +24 -0
  13. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/pyproject.toml +1 -1
  14. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/__init__.py +6 -0
  15. knowhere_python_sdk-0.3.2/src/knowhere/_version.py +1 -0
  16. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/jobs.py +16 -2
  17. knowhere_python_sdk-0.3.2/src/knowhere/resources/retrieval.py +123 -0
  18. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/__init__.py +6 -0
  19. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/job.py +0 -1
  20. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/result.py +6 -0
  21. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/retrieval.py +13 -1
  22. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/conftest.py +0 -1
  23. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_jobs.py +5 -3
  24. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_logging.py +1 -1
  25. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_models.py +6 -1
  26. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_parse.py +4 -0
  27. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_retrieval.py +19 -0
  28. knowhere_python_sdk-0.3.0/.release-please-manifest.json +0 -3
  29. knowhere_python_sdk-0.3.0/src/knowhere/_version.py +0 -1
  30. knowhere_python_sdk-0.3.0/src/knowhere/resources/retrieval.py +0 -70
  31. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/ci.yml +0 -0
  32. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/publish-pypi.yml +0 -0
  33. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/publish.yml +0 -0
  34. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.gitignore +0 -0
  35. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/docs/usage.md +0 -0
  36. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/async_usage.py +0 -0
  37. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/error_handling.py +0 -0
  38. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/parse_file.py +0 -0
  39. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/parse_url.py +0 -0
  40. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/step_by_step.py +0 -0
  41. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/release-please-config.json +0 -0
  42. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_base_client.py +0 -0
  43. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_client.py +0 -0
  44. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_constants.py +0 -0
  45. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_exceptions.py +0 -0
  46. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_logging.py +0 -0
  47. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_response.py +0 -0
  48. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_types.py +0 -0
  49. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/__init__.py +0 -0
  50. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/polling.py +0 -0
  51. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/result_parser.py +0 -0
  52. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/upload.py +0 -0
  53. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/py.typed +0 -0
  54. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/__init__.py +0 -0
  55. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/_base.py +0 -0
  56. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/documents.py +0 -0
  57. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/document.py +0 -0
  58. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/params.py +0 -0
  59. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/shared.py +0 -0
  60. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/__init__.py +0 -0
  61. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/fixtures/real_result.zip +0 -0
  62. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_client.py +0 -0
  63. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_documents.py +0 -0
  64. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_exceptions.py +0 -0
  65. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_polling.py +0 -0
  66. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_result_parser.py +0 -0
  67. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_retry.py +0 -0
  68. {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_upload.py +0 -0
@@ -0,0 +1,45 @@
1
+ name: Bug report
2
+ description: Report a reproducible problem in the Python SDK.
3
+ title: "[Bug]: "
4
+ labels:
5
+ - bug
6
+ body:
7
+ - type: textarea
8
+ id: summary
9
+ attributes:
10
+ label: Summary
11
+ description: What happened, and what did you expect instead?
12
+ validations:
13
+ required: true
14
+ - type: input
15
+ id: sdk-version
16
+ attributes:
17
+ label: SDK version
18
+ placeholder: 0.3.1
19
+ validations:
20
+ required: true
21
+ - type: input
22
+ id: python-version
23
+ attributes:
24
+ label: Python version
25
+ placeholder: 3.11.9
26
+ validations:
27
+ required: true
28
+ - type: input
29
+ id: os
30
+ attributes:
31
+ label: Operating system
32
+ placeholder: macOS 15.4 / Ubuntu 24.04
33
+ - type: textarea
34
+ id: reproduction
35
+ attributes:
36
+ label: Reproduction
37
+ description: Minimal code or steps to reproduce the issue.
38
+ render: python
39
+ validations:
40
+ required: true
41
+ - type: textarea
42
+ id: logs
43
+ attributes:
44
+ label: Relevant logs or tracebacks
45
+ render: text
@@ -0,0 +1,8 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Knowhere documentation
4
+ url: https://docs.knowhereto.ai
5
+ about: Check the public docs before opening a support issue.
6
+ - name: Security report
7
+ url: mailto:team@knowhereto.ai?subject=Security%20report%20for%20knowhere-python-sdk
8
+ about: Report vulnerabilities privately by email.
@@ -0,0 +1,25 @@
1
+ name: Feature request
2
+ description: Propose an improvement for the Python SDK.
3
+ title: "[Feature]: "
4
+ labels:
5
+ - enhancement
6
+ body:
7
+ - type: textarea
8
+ id: problem
9
+ attributes:
10
+ label: Problem statement
11
+ description: What developer problem are you trying to solve?
12
+ validations:
13
+ required: true
14
+ - type: textarea
15
+ id: proposal
16
+ attributes:
17
+ label: Proposed solution
18
+ description: Describe the API or behavior you want to add or improve.
19
+ validations:
20
+ required: true
21
+ - type: textarea
22
+ id: alternatives
23
+ attributes:
24
+ label: Alternatives considered
25
+ description: Describe any workarounds or alternative designs you considered.
@@ -0,0 +1,15 @@
1
+ ## Summary
2
+
3
+ - describe the change
4
+ - describe any public API impact
5
+
6
+ ## Verification
7
+
8
+ - list the commands you ran
9
+ - list any manual checks you performed
10
+
11
+ ## Checklist
12
+
13
+ - [ ] Tests were added or updated when behavior changed
14
+ - [ ] Public docs or examples were updated when needed
15
+ - [ ] The pull request description explains any breaking or user-visible change
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.3.2"
3
+ }
@@ -1,5 +1,21 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.2](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.1...v0.3.2) (2026-04-23)
4
+
5
+
6
+ ### Chores
7
+
8
+ * harden python sdk OSS surface ([e7d9779](https://github.com/Ontos-AI/knowhere-python-sdk/commit/e7d9779502327d2bd9e4f27e666244d34f8fafb7))
9
+ * harden Python SDK OSS surface ([a9396cd](https://github.com/Ontos-AI/knowhere-python-sdk/commit/a9396cda70eabcba66172884e38045caefc85a01))
10
+
11
+ ## [0.3.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.0...v0.3.1) (2026-04-22)
12
+
13
+
14
+ ### Documentation
15
+
16
+ * clarify ParseResult document scope ([861084e](https://github.com/Ontos-AI/knowhere-python-sdk/commit/861084e34144987994fa618ac0db262ce681b5a8))
17
+ * clarify ParseResult document scope ([bb14ad4](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bb14ad4077c41cbe74a5dd155995d6f9937962b8))
18
+
3
19
  ## [0.3.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.1...v0.3.0) (2026-04-21)
4
20
 
5
21
 
@@ -0,0 +1,29 @@
1
+ # Code of Conduct
2
+
3
+ We want the Knowhere Python SDK community to be respectful, constructive, and
4
+ welcoming.
5
+
6
+ ## Expected Behavior
7
+
8
+ - Be respectful in discussions and code review.
9
+ - Assume good intent and give actionable feedback.
10
+ - Focus on technical substance instead of personal attacks.
11
+ - Help keep the project useful for a broad developer audience.
12
+
13
+ ## Unacceptable Behavior
14
+
15
+ - Harassment, discrimination, or hateful conduct
16
+ - Threats, intimidation, or doxxing
17
+ - Spam, trolling, or intentionally disruptive behavior
18
+ - Sharing private information without permission
19
+
20
+ ## Enforcement
21
+
22
+ Maintainers may edit or remove content, close discussions, or restrict access
23
+ when behavior harms the project or its contributors.
24
+
25
+ To report a problem, email `team@knowhereto.ai` with:
26
+
27
+ - the repository name
28
+ - a link or screenshot if available
29
+ - a short description of what happened
@@ -0,0 +1,44 @@
1
+ # Contributing
2
+
3
+ Thanks for contributing to the Knowhere Python SDK.
4
+
5
+ ## Development Setup
6
+
7
+ Requirements:
8
+
9
+ - Python 3.9+
10
+ - `uv`
11
+
12
+ Clone the repository and install the full development environment:
13
+
14
+ ```bash
15
+ uv sync --all-extras
16
+ ```
17
+
18
+ ## Local Checks
19
+
20
+ Run these commands before opening a pull request:
21
+
22
+ ```bash
23
+ uv run ruff check src/
24
+ uv run mypy src/knowhere
25
+ uv run pytest -q
26
+ ```
27
+
28
+ If you change public behavior, also update the relevant documentation in:
29
+
30
+ - `README.md`
31
+ - `docs/usage.md`
32
+ - `examples/`
33
+
34
+ ## Pull Requests
35
+
36
+ Please keep pull requests focused and easy to review.
37
+
38
+ Recommended checklist:
39
+
40
+ 1. Add or update tests for behavior changes.
41
+ 2. Keep public types and examples in sync with the implementation.
42
+ 3. Document any breaking or user-visible changes in the pull request description.
43
+
44
+ Maintainers handle versioning and release automation through GitHub Actions.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Knowhere Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowhere-python-sdk
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Official Python SDK for the Knowhere document parsing API
5
5
  Project-URL: Homepage, https://knowhereto.ai
6
6
  Project-URL: Documentation, https://docs.knowhereto.ai
7
7
  Project-URL: Repository, https://github.com/Ontos-AI/knowhere-python-sdk
8
8
  Author-email: Knowhere Team <team@knowhereto.ai>
9
9
  License-Expression: MIT
10
+ License-File: LICENSE
10
11
  Classifier: Development Status :: 3 - Alpha
11
12
  Classifier: Intended Audience :: Developers
12
13
  Classifier: License :: OSI Approved :: MIT License
@@ -67,8 +68,9 @@ for chunk in result.text_chunks:
67
68
  ## Retrieval and document lifecycle
68
69
 
69
70
  New documents are published into a retrieval namespace. The server returns a
70
- stable `document_id` when you create a job; persist that value if you need to
71
- update or archive the same document later.
71
+ stable `document_id` after the job is published. `client.jobs.create(...)`
72
+ does not return a usable `document_id`; persist `job_result.document_id` if you
73
+ need to update or archive the same document later.
72
74
 
73
75
  ```python
74
76
  job = client.jobs.create(
@@ -77,7 +79,11 @@ job = client.jobs.create(
77
79
  namespace="support-center",
78
80
  )
79
81
 
80
- print(job.document_id) # "doc_..."
82
+ job_result = client.jobs.wait(job.job_id)
83
+ document_id = job_result.document_id
84
+
85
+ if document_id is None:
86
+ raise RuntimeError("Expected document_id after successful publication.")
81
87
  ```
82
88
 
83
89
  After the job is done and published, query the canonical document content:
@@ -87,8 +93,13 @@ response = client.retrieval.query(
87
93
  namespace="support-center",
88
94
  query="How do I reset Bluetooth pairing?",
89
95
  top_k=5,
96
+ channels=["path", "term"],
97
+ filter_mode="keep",
98
+ signal_paths=["Bluetooth", "Pairing"],
90
99
  )
91
100
 
101
+ print(response.router_used)
102
+
92
103
  for result in response.results:
93
104
  print(result.content)
94
105
  print(result.score)
@@ -101,13 +112,13 @@ Use `document_id` to update or archive a document:
101
112
  update_job = client.jobs.create(
102
113
  source_type="url",
103
114
  source_url="https://example.com/manual-v2.pdf",
104
- document_id=job.document_id,
115
+ document_id=document_id,
105
116
  )
106
117
 
107
- document = client.documents.get(job.document_id)
118
+ document = client.documents.get(document_id)
108
119
  print(document.status)
109
120
 
110
- client.documents.archive(job.document_id)
121
+ client.documents.archive(document_id)
111
122
  ```
112
123
 
113
124
  You can also list documents in a namespace:
@@ -146,6 +157,8 @@ result = client.parse(
146
157
 
147
158
  print(result.manifest.source_file_name) # "report.pdf"
148
159
  print(len(result.chunks)) # 152
160
+ print(result.namespace) # "default" or your explicit namespace
161
+ print(result.document_id) # Published canonical document id
149
162
  ```
150
163
 
151
164
  ### Access different chunk types
@@ -209,14 +222,14 @@ job = client.jobs.create(
209
222
  parsing_params={"model": "advanced", "ocr_enabled": True},
210
223
  )
211
224
 
212
- print(job.document_id) # Persist this to update/archive the document later.
213
-
214
225
  # Step 2: Upload file to presigned URL
215
226
  client.jobs.upload(job, file=Path("report.pdf"))
216
227
 
217
228
  # Step 3: Poll until done (adaptive backoff)
218
229
  job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
219
230
 
231
+ print(job_result.document_id) # Persist this to update/archive the document later.
232
+
220
233
  # Step 4: Download and parse results
221
234
  result = client.jobs.load(job_result)
222
235
  print(result.statistics)
@@ -293,6 +306,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
293
306
  - [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
294
307
  - [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
295
308
 
309
+ ## Community
310
+
311
+ - Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
312
+ - Security policy: [SECURITY.md](./SECURITY.md)
313
+ - Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
314
+
296
315
  ## License
297
316
 
298
317
  MIT
@@ -35,8 +35,9 @@ for chunk in result.text_chunks:
35
35
  ## Retrieval and document lifecycle
36
36
 
37
37
  New documents are published into a retrieval namespace. The server returns a
38
- stable `document_id` when you create a job; persist that value if you need to
39
- update or archive the same document later.
38
+ stable `document_id` after the job is published. `client.jobs.create(...)`
39
+ does not return a usable `document_id`; persist `job_result.document_id` if you
40
+ need to update or archive the same document later.
40
41
 
41
42
  ```python
42
43
  job = client.jobs.create(
@@ -45,7 +46,11 @@ job = client.jobs.create(
45
46
  namespace="support-center",
46
47
  )
47
48
 
48
- print(job.document_id) # "doc_..."
49
+ job_result = client.jobs.wait(job.job_id)
50
+ document_id = job_result.document_id
51
+
52
+ if document_id is None:
53
+ raise RuntimeError("Expected document_id after successful publication.")
49
54
  ```
50
55
 
51
56
  After the job is done and published, query the canonical document content:
@@ -55,8 +60,13 @@ response = client.retrieval.query(
55
60
  namespace="support-center",
56
61
  query="How do I reset Bluetooth pairing?",
57
62
  top_k=5,
63
+ channels=["path", "term"],
64
+ filter_mode="keep",
65
+ signal_paths=["Bluetooth", "Pairing"],
58
66
  )
59
67
 
68
+ print(response.router_used)
69
+
60
70
  for result in response.results:
61
71
  print(result.content)
62
72
  print(result.score)
@@ -69,13 +79,13 @@ Use `document_id` to update or archive a document:
69
79
  update_job = client.jobs.create(
70
80
  source_type="url",
71
81
  source_url="https://example.com/manual-v2.pdf",
72
- document_id=job.document_id,
82
+ document_id=document_id,
73
83
  )
74
84
 
75
- document = client.documents.get(job.document_id)
85
+ document = client.documents.get(document_id)
76
86
  print(document.status)
77
87
 
78
- client.documents.archive(job.document_id)
88
+ client.documents.archive(document_id)
79
89
  ```
80
90
 
81
91
  You can also list documents in a namespace:
@@ -114,6 +124,8 @@ result = client.parse(
114
124
 
115
125
  print(result.manifest.source_file_name) # "report.pdf"
116
126
  print(len(result.chunks)) # 152
127
+ print(result.namespace) # "default" or your explicit namespace
128
+ print(result.document_id) # Published canonical document id
117
129
  ```
118
130
 
119
131
  ### Access different chunk types
@@ -177,14 +189,14 @@ job = client.jobs.create(
177
189
  parsing_params={"model": "advanced", "ocr_enabled": True},
178
190
  )
179
191
 
180
- print(job.document_id) # Persist this to update/archive the document later.
181
-
182
192
  # Step 2: Upload file to presigned URL
183
193
  client.jobs.upload(job, file=Path("report.pdf"))
184
194
 
185
195
  # Step 3: Poll until done (adaptive backoff)
186
196
  job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
187
197
 
198
+ print(job_result.document_id) # Persist this to update/archive the document later.
199
+
188
200
  # Step 4: Download and parse results
189
201
  result = client.jobs.load(job_result)
190
202
  print(result.statistics)
@@ -261,6 +273,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
261
273
  - [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
262
274
  - [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
263
275
 
276
+ ## Community
277
+
278
+ - Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
279
+ - Security policy: [SECURITY.md](./SECURITY.md)
280
+ - Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
281
+
264
282
  ## License
265
283
 
266
284
  MIT
@@ -0,0 +1,24 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ Only the latest published release line is supported for security fixes.
6
+
7
+ | Version | Supported |
8
+ | --- | --- |
9
+ | Latest release | Yes |
10
+ | Older releases | No |
11
+
12
+ ## Reporting a Vulnerability
13
+
14
+ Please do not open public GitHub issues for suspected vulnerabilities.
15
+
16
+ Instead, email `team@knowhereto.ai` with:
17
+
18
+ - the repository name
19
+ - a clear description of the issue
20
+ - reproduction steps or a proof of concept
21
+ - impact assessment if known
22
+
23
+ We will acknowledge the report, validate it, and coordinate remediation before
24
+ public disclosure.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "knowhere-python-sdk"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "Official Python SDK for the Knowhere document parsing API"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -39,6 +39,9 @@ from knowhere.types.document import Document, DocumentListResponse
39
39
  from knowhere.types.job import Job, JobError, JobProgress, JobResult
40
40
  from knowhere.types.params import ParsingParams, WebhookConfig
41
41
  from knowhere.types.retrieval import (
42
+ RetrievalChannel,
43
+ RetrievalFilterMode,
44
+ RetrievalSectionExclusion,
42
45
  RetrievalSource,
43
46
  RetrievalQueryResponse,
44
47
  RetrievalResult,
@@ -97,6 +100,9 @@ __all__: list[str] = [
97
100
  "Document",
98
101
  "DocumentListResponse",
99
102
  # Retrieval types
103
+ "RetrievalChannel",
104
+ "RetrievalFilterMode",
105
+ "RetrievalSectionExclusion",
100
106
  "RetrievalSource",
101
107
  "RetrievalQueryResponse",
102
108
  "RetrievalResult",
@@ -0,0 +1 @@
1
+ __version__ = "0.3.2" # x-release-please-version
@@ -145,8 +145,12 @@ class Jobs(SyncAPIResource):
145
145
  if not job_result.result_url:
146
146
  raise InvalidStateError("JobResult does not have a result_url.")
147
147
  result_url: str = job_result.result_url
148
+ namespace: Optional[str] = job_result.namespace
149
+ document_id: Optional[str] = job_result.document_id
148
150
  else:
149
151
  result_url = job_result
152
+ namespace = None
153
+ document_id = None
150
154
 
151
155
  response: httpx.Response = self._client._client.get(
152
156
  result_url, timeout=self._client.upload_timeout
@@ -154,7 +158,10 @@ class Jobs(SyncAPIResource):
154
158
  response.raise_for_status()
155
159
  zip_bytes: bytes = response.content
156
160
 
157
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
161
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
162
+ parsed_result.namespace = namespace
163
+ parsed_result.document_id = document_id
164
+ return parsed_result
158
165
 
159
166
 
160
167
  class AsyncJobs(AsyncAPIResource):
@@ -251,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
251
258
  if not job_result.result_url:
252
259
  raise InvalidStateError("JobResult does not have a result_url.")
253
260
  result_url: str = job_result.result_url
261
+ namespace: Optional[str] = job_result.namespace
262
+ document_id: Optional[str] = job_result.document_id
254
263
  else:
255
264
  result_url = job_result
265
+ namespace = None
266
+ document_id = None
256
267
 
257
268
  response: httpx.Response = await self._client._client.get(
258
269
  result_url, timeout=self._client.upload_timeout
@@ -260,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
260
271
  response.raise_for_status()
261
272
  zip_bytes: bytes = response.content
262
273
 
263
- return parseResultZip(zip_bytes, verify_checksum=verify_checksum)
274
+ parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
275
+ parsed_result.namespace = namespace
276
+ parsed_result.document_id = document_id
277
+ return parsed_result
@@ -0,0 +1,123 @@
1
+ """Retrieval resource for querying published documents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
+ from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalQueryResponse,
12
+ RetrievalSectionExclusion,
13
+ )
14
+
15
+
16
+ class Retrieval(SyncAPIResource):
17
+ """Synchronous interface for ``/v1/retrieval`` endpoints."""
18
+
19
+ def query(
20
+ self,
21
+ *,
22
+ query: str,
23
+ namespace: Optional[str] = None,
24
+ top_k: Optional[int] = None,
25
+ data_type: Optional[int] = None,
26
+ signal_paths: Optional[list[str]] = None,
27
+ filter_mode: Optional[RetrievalFilterMode] = None,
28
+ channels: Optional[list[RetrievalChannel]] = None,
29
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
30
+ rerank: Optional[bool] = None,
31
+ threshold: Optional[float] = None,
32
+ internal_recall_k: Optional[int] = None,
33
+ exclude_document_ids: Optional[list[str]] = None,
34
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
35
+ ) -> RetrievalQueryResponse:
36
+ """Query published documents in a namespace."""
37
+ body: Dict[str, Any] = {"query": query}
38
+ if namespace is not None:
39
+ body["namespace"] = namespace
40
+ if top_k is not None:
41
+ body["top_k"] = top_k
42
+ if data_type is not None:
43
+ body["data_type"] = data_type
44
+ if signal_paths is not None:
45
+ body["signal_paths"] = signal_paths
46
+ if filter_mode is not None:
47
+ body["filter_mode"] = filter_mode
48
+ if channels is not None:
49
+ body["channels"] = channels
50
+ if channel_weights is not None:
51
+ body["channel_weights"] = channel_weights
52
+ if rerank is not None:
53
+ body["rerank"] = rerank
54
+ if threshold is not None:
55
+ body["threshold"] = threshold
56
+ if internal_recall_k is not None:
57
+ body["internal_recall_k"] = internal_recall_k
58
+ if exclude_document_ids is not None:
59
+ body["exclude_document_ids"] = exclude_document_ids
60
+ if exclude_sections is not None:
61
+ body["exclude_sections"] = exclude_sections
62
+
63
+ return self._request(
64
+ "POST",
65
+ "v1/retrieval/query",
66
+ body=body,
67
+ cast_to=RetrievalQueryResponse,
68
+ )
69
+
70
+
71
+ class AsyncRetrieval(AsyncAPIResource):
72
+ """Asynchronous interface for ``/v1/retrieval`` endpoints."""
73
+
74
+ async def query(
75
+ self,
76
+ *,
77
+ query: str,
78
+ namespace: Optional[str] = None,
79
+ top_k: Optional[int] = None,
80
+ data_type: Optional[int] = None,
81
+ signal_paths: Optional[list[str]] = None,
82
+ filter_mode: Optional[RetrievalFilterMode] = None,
83
+ channels: Optional[list[RetrievalChannel]] = None,
84
+ channel_weights: Optional[dict[RetrievalChannel, float]] = None,
85
+ rerank: Optional[bool] = None,
86
+ threshold: Optional[float] = None,
87
+ internal_recall_k: Optional[int] = None,
88
+ exclude_document_ids: Optional[list[str]] = None,
89
+ exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
90
+ ) -> RetrievalQueryResponse:
91
+ """Query published documents in a namespace."""
92
+ body: Dict[str, Any] = {"query": query}
93
+ if namespace is not None:
94
+ body["namespace"] = namespace
95
+ if top_k is not None:
96
+ body["top_k"] = top_k
97
+ if data_type is not None:
98
+ body["data_type"] = data_type
99
+ if signal_paths is not None:
100
+ body["signal_paths"] = signal_paths
101
+ if filter_mode is not None:
102
+ body["filter_mode"] = filter_mode
103
+ if channels is not None:
104
+ body["channels"] = channels
105
+ if channel_weights is not None:
106
+ body["channel_weights"] = channel_weights
107
+ if rerank is not None:
108
+ body["rerank"] = rerank
109
+ if threshold is not None:
110
+ body["threshold"] = threshold
111
+ if internal_recall_k is not None:
112
+ body["internal_recall_k"] = internal_recall_k
113
+ if exclude_document_ids is not None:
114
+ body["exclude_document_ids"] = exclude_document_ids
115
+ if exclude_sections is not None:
116
+ body["exclude_sections"] = exclude_sections
117
+
118
+ return await self._request(
119
+ "POST",
120
+ "v1/retrieval/query",
121
+ body=body,
122
+ cast_to=RetrievalQueryResponse,
123
+ )
@@ -6,6 +6,9 @@ from knowhere.types.document import Document, DocumentListResponse
6
6
  from knowhere.types.job import Job, JobError, JobResult
7
7
  from knowhere.types.params import ParsingParams, WebhookConfig
8
8
  from knowhere.types.retrieval import (
9
+ RetrievalChannel,
10
+ RetrievalFilterMode,
11
+ RetrievalSectionExclusion,
9
12
  RetrievalSource,
10
13
  RetrievalQueryResponse,
11
14
  RetrievalResult,
@@ -38,6 +41,9 @@ __all__: list[str] = [
38
41
  "Document",
39
42
  "DocumentListResponse",
40
43
  # retrieval
44
+ "RetrievalChannel",
45
+ "RetrievalFilterMode",
46
+ "RetrievalSectionExclusion",
41
47
  "RetrievalSource",
42
48
  "RetrievalQueryResponse",
43
49
  "RetrievalResult",
@@ -41,7 +41,6 @@ class Job(BaseModel):
41
41
  status: str
42
42
  source_type: str
43
43
  namespace: Optional[str] = None
44
- document_id: Optional[str] = None
45
44
  data_id: Optional[str] = None
46
45
  created_at: Optional[datetime] = None
47
46
  upload_url: Optional[str] = None
@@ -272,6 +272,8 @@ class ParseResult:
272
272
  kb_csv: Optional[str]
273
273
  hierarchy_view_html: Optional[str]
274
274
  raw_zip: bytes
275
+ namespace: Optional[str]
276
+ document_id: Optional[str]
275
277
 
276
278
  def __init__(
277
279
  self,
@@ -285,6 +287,8 @@ class ParseResult:
285
287
  kb_csv: Optional[str],
286
288
  hierarchy_view_html: Optional[str],
287
289
  raw_zip: bytes,
290
+ namespace: Optional[str] = None,
291
+ document_id: Optional[str] = None,
288
292
  ) -> None:
289
293
  self.manifest = manifest
290
294
  self.chunks = chunks
@@ -295,6 +299,8 @@ class ParseResult:
295
299
  self.kb_csv = kb_csv
296
300
  self.hierarchy_view_html = hierarchy_view_html
297
301
  self.raw_zip = raw_zip
302
+ self.namespace = namespace
303
+ self.document_id = document_id
298
304
 
299
305
  # -- convenience properties --
300
306
 
@@ -2,11 +2,22 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Optional
5
+ from typing import Literal, Optional, TypedDict
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
9
9
 
10
+ RetrievalChannel = Literal["path", "content", "term"]
11
+ RetrievalFilterMode = Literal["delete", "keep"]
12
+
13
+
14
+ class RetrievalSectionExclusion(TypedDict):
15
+ """Section exclusion for follow-up retrieval queries."""
16
+
17
+ document_id: str
18
+ section_path: str
19
+
20
+
10
21
  class RetrievalSource(BaseModel):
11
22
  """Caller-facing source reference attached to a retrieval result."""
12
23
 
@@ -30,4 +41,5 @@ class RetrievalQueryResponse(BaseModel):
30
41
 
31
42
  namespace: str
32
43
  query: str
44
+ router_used: Optional[str] = None
33
45
  results: list[RetrievalResult]
@@ -72,7 +72,6 @@ def mock_job_response() -> Dict[str, Any]:
72
72
  "status": "waiting-file",
73
73
  "source_type": "file",
74
74
  "namespace": "default",
75
- "document_id": "doc_test123",
76
75
  "data_id": None,
77
76
  "created_at": "2025-01-01T00:00:00Z",
78
77
  "upload_url": "https://storage.example.com/upload?token=abc",
@@ -36,7 +36,6 @@ class TestJobsCreate:
36
36
  "status": "pending",
37
37
  "source_type": "url",
38
38
  "namespace": "support-center",
39
- "document_id": "doc_123",
40
39
  }
41
40
 
42
41
  route = respx.post(JOBS_URL).mock(
@@ -53,7 +52,7 @@ class TestJobsCreate:
53
52
  assert job.source_type == "url"
54
53
  assert job.status == "pending"
55
54
  assert job.namespace == "support-center"
56
- assert job.document_id == "doc_123"
55
+ assert not hasattr(job, "document_id")
57
56
 
58
57
  @respx.mock
59
58
  def test_create_with_file_source(
@@ -87,7 +86,6 @@ class TestJobsCreate:
87
86
  "status": "pending",
88
87
  "source_type": "url",
89
88
  "namespace": "support-center",
90
- "document_id": "doc_123",
91
89
  }
92
90
 
93
91
  route = respx.post(JOBS_URL).mock(
@@ -284,6 +282,8 @@ class TestJobsLoad:
284
282
  job_id="job_load",
285
283
  status="done",
286
284
  source_type="url",
285
+ namespace="support-center",
286
+ document_id="doc_123",
287
287
  result_url=result_url,
288
288
  )
289
289
 
@@ -293,3 +293,5 @@ class TestJobsLoad:
293
293
 
294
294
  assert route.called
295
295
  assert parse_result.manifest is not None
296
+ assert parse_result.namespace == "support-center"
297
+ assert parse_result.document_id == "doc_123"
@@ -18,7 +18,7 @@ class TestRedactSensitiveHeaders:
18
18
 
19
19
  def test_redacts_authorization_bearer(self) -> None:
20
20
  headers: Dict[str, str] = {
21
- "Authorization": "Bearer sk_live_abc123xyz",
21
+ "Authorization": "Bearer sk_example_redacted_token",
22
22
  "Content-Type": "application/json",
23
23
  }
24
24
  redacted: Dict[str, str] = redactSensitiveHeaders(headers)
@@ -55,7 +55,7 @@ class TestJobModel:
55
55
  }
56
56
  job: Job = Job(**data)
57
57
  assert job.namespace == "support-center"
58
- assert job.document_id == "doc_123"
58
+ assert "document_id" not in job.model_dump()
59
59
 
60
60
  def test_from_dict_with_upload(self) -> None:
61
61
  data: Dict[str, Any] = {
@@ -717,6 +717,11 @@ class TestParseResult:
717
717
  assert stats.total_chunks == 3
718
718
  assert stats.text_chunks == 1
719
719
 
720
+ def test_document_scope_defaults_to_none(self) -> None:
721
+ result: ParseResult = _build_parse_result()
722
+ assert result.namespace is None
723
+ assert result.document_id is None
724
+
720
725
  def test_raw_zip_accessible(self) -> None:
721
726
  result: ParseResult = _build_parse_result()
722
727
  assert result.raw_zip == b"fake zip bytes"
@@ -42,6 +42,8 @@ def _make_done_response(job_id: str, result_url: str) -> Dict[str, Any]:
42
42
  "job_id": job_id,
43
43
  "status": "done",
44
44
  "source_type": "url",
45
+ "namespace": "support-center",
46
+ "document_id": "doc_123",
45
47
  "result_url": result_url,
46
48
  }
47
49
 
@@ -96,6 +98,8 @@ class TestParseWithUrl:
96
98
 
97
99
  assert parse_result.manifest is not None
98
100
  assert parse_result.manifest.job_id == "job_test123"
101
+ assert parse_result.namespace == "support-center"
102
+ assert parse_result.document_id == "doc_123"
99
103
 
100
104
 
101
105
  # ---------------------------------------------------------------------------
@@ -19,6 +19,7 @@ def _make_retrieval_response() -> Dict[str, Any]:
19
19
  return {
20
20
  "namespace": "support-center",
21
21
  "query": "refund policy",
22
+ "router_used": "discovery+agent",
22
23
  "results": [
23
24
  {
24
25
  "chunk_type": "text",
@@ -47,6 +48,14 @@ class TestRetrievalQuery:
47
48
  query="refund policy",
48
49
  namespace="support-center",
49
50
  top_k=5,
51
+ data_type=6,
52
+ signal_paths=["Billing", "Refunds"],
53
+ filter_mode="keep",
54
+ channels=["path", "term"],
55
+ channel_weights={"path": 2.0, "term": 0.5},
56
+ rerank=True,
57
+ threshold=0.2,
58
+ internal_recall_k=25,
50
59
  exclude_document_ids=["doc_old"],
51
60
  exclude_sections=[
52
61
  {
@@ -62,6 +71,14 @@ class TestRetrievalQuery:
62
71
  "query": "refund policy",
63
72
  "namespace": "support-center",
64
73
  "top_k": 5,
74
+ "data_type": 6,
75
+ "signal_paths": ["Billing", "Refunds"],
76
+ "filter_mode": "keep",
77
+ "channels": ["path", "term"],
78
+ "channel_weights": {"path": 2.0, "term": 0.5},
79
+ "rerank": True,
80
+ "threshold": 0.2,
81
+ "internal_recall_k": 25,
65
82
  "exclude_document_ids": ["doc_old"],
66
83
  "exclude_sections": [
67
84
  {
@@ -71,6 +88,7 @@ class TestRetrievalQuery:
71
88
  ],
72
89
  }
73
90
  assert response.namespace == "support-center"
91
+ assert response.router_used == "discovery+agent"
74
92
  assert response.results[0].content == "Annual plans may be refunded within 30 days."
75
93
  assert response.results[0].source.document_id == "doc_123"
76
94
  assert response.results[0].source.source_file_name == "refund-policy.md"
@@ -107,4 +125,5 @@ class TestRetrievalQuery:
107
125
  )
108
126
 
109
127
  assert route.called
128
+ assert response.router_used == "discovery+agent"
110
129
  assert response.results[0].source.document_id == "doc_123"
@@ -1,3 +0,0 @@
1
- {
2
- ".": "0.3.0"
3
- }
@@ -1 +0,0 @@
1
- __version__ = "0.3.0" # x-release-please-version
@@ -1,70 +0,0 @@
1
- """Retrieval resource for querying published documents."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, Dict, Optional
6
-
7
- from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
8
- from knowhere.types.retrieval import RetrievalQueryResponse
9
-
10
-
11
- class Retrieval(SyncAPIResource):
12
- """Synchronous interface for ``/v1/retrieval`` endpoints."""
13
-
14
- def query(
15
- self,
16
- *,
17
- query: str,
18
- namespace: Optional[str] = None,
19
- top_k: Optional[int] = None,
20
- exclude_document_ids: Optional[list[str]] = None,
21
- exclude_sections: Optional[list[dict[str, str]]] = None,
22
- ) -> RetrievalQueryResponse:
23
- """Query published documents in a namespace."""
24
- body: Dict[str, Any] = {"query": query}
25
- if namespace is not None:
26
- body["namespace"] = namespace
27
- if top_k is not None:
28
- body["top_k"] = top_k
29
- if exclude_document_ids is not None:
30
- body["exclude_document_ids"] = exclude_document_ids
31
- if exclude_sections is not None:
32
- body["exclude_sections"] = exclude_sections
33
-
34
- return self._request(
35
- "POST",
36
- "v1/retrieval/query",
37
- body=body,
38
- cast_to=RetrievalQueryResponse,
39
- )
40
-
41
-
42
- class AsyncRetrieval(AsyncAPIResource):
43
- """Asynchronous interface for ``/v1/retrieval`` endpoints."""
44
-
45
- async def query(
46
- self,
47
- *,
48
- query: str,
49
- namespace: Optional[str] = None,
50
- top_k: Optional[int] = None,
51
- exclude_document_ids: Optional[list[str]] = None,
52
- exclude_sections: Optional[list[dict[str, str]]] = None,
53
- ) -> RetrievalQueryResponse:
54
- """Query published documents in a namespace."""
55
- body: Dict[str, Any] = {"query": query}
56
- if namespace is not None:
57
- body["namespace"] = namespace
58
- if top_k is not None:
59
- body["top_k"] = top_k
60
- if exclude_document_ids is not None:
61
- body["exclude_document_ids"] = exclude_document_ids
62
- if exclude_sections is not None:
63
- body["exclude_sections"] = exclude_sections
64
-
65
- return await self._request(
66
- "POST",
67
- "v1/retrieval/query",
68
- body=body,
69
- cast_to=RetrievalQueryResponse,
70
- )