knowhere-python-sdk 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/bug-report.yml +45 -0
- knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/config.yml +8 -0
- knowhere_python_sdk-0.3.2/.github/ISSUE_TEMPLATE/feature-request.yml +25 -0
- knowhere_python_sdk-0.3.2/.github/pull_request_template.md +15 -0
- knowhere_python_sdk-0.3.2/.release-please-manifest.json +3 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/CHANGELOG.md +16 -0
- knowhere_python_sdk-0.3.2/CODE_OF_CONDUCT.md +29 -0
- knowhere_python_sdk-0.3.2/CONTRIBUTING.md +44 -0
- knowhere_python_sdk-0.3.2/LICENSE +21 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/PKG-INFO +28 -9
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/README.md +26 -8
- knowhere_python_sdk-0.3.2/SECURITY.md +24 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/pyproject.toml +1 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/__init__.py +6 -0
- knowhere_python_sdk-0.3.2/src/knowhere/_version.py +1 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/jobs.py +16 -2
- knowhere_python_sdk-0.3.2/src/knowhere/resources/retrieval.py +123 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/__init__.py +6 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/job.py +0 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/result.py +6 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/retrieval.py +13 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/conftest.py +0 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_jobs.py +5 -3
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_logging.py +1 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_models.py +6 -1
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_parse.py +4 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_retrieval.py +19 -0
- knowhere_python_sdk-0.3.0/.release-please-manifest.json +0 -3
- knowhere_python_sdk-0.3.0/src/knowhere/_version.py +0 -1
- knowhere_python_sdk-0.3.0/src/knowhere/resources/retrieval.py +0 -70
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/ci.yml +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/publish-pypi.yml +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.github/workflows/publish.yml +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/.gitignore +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/docs/usage.md +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/async_usage.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/error_handling.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/parse_file.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/parse_url.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/examples/step_by_step.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/release-please-config.json +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_base_client.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_client.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_constants.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_exceptions.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_logging.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_response.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/_types.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/__init__.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/polling.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/result_parser.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/lib/upload.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/py.typed +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/__init__.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/_base.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/resources/documents.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/document.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/params.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/src/knowhere/types/shared.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/__init__.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/fixtures/real_result.zip +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_client.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_documents.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_exceptions.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_polling.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_result_parser.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_retry.py +0 -0
- {knowhere_python_sdk-0.3.0 → knowhere_python_sdk-0.3.2}/tests/test_upload.py +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
name: Bug report
|
|
2
|
+
description: Report a reproducible problem in the Python SDK.
|
|
3
|
+
title: "[Bug]: "
|
|
4
|
+
labels:
|
|
5
|
+
- bug
|
|
6
|
+
body:
|
|
7
|
+
- type: textarea
|
|
8
|
+
id: summary
|
|
9
|
+
attributes:
|
|
10
|
+
label: Summary
|
|
11
|
+
description: What happened, and what did you expect instead?
|
|
12
|
+
validations:
|
|
13
|
+
required: true
|
|
14
|
+
- type: input
|
|
15
|
+
id: sdk-version
|
|
16
|
+
attributes:
|
|
17
|
+
label: SDK version
|
|
18
|
+
placeholder: 0.3.1
|
|
19
|
+
validations:
|
|
20
|
+
required: true
|
|
21
|
+
- type: input
|
|
22
|
+
id: python-version
|
|
23
|
+
attributes:
|
|
24
|
+
label: Python version
|
|
25
|
+
placeholder: 3.11.9
|
|
26
|
+
validations:
|
|
27
|
+
required: true
|
|
28
|
+
- type: input
|
|
29
|
+
id: os
|
|
30
|
+
attributes:
|
|
31
|
+
label: Operating system
|
|
32
|
+
placeholder: macOS 15.4 / Ubuntu 24.04
|
|
33
|
+
- type: textarea
|
|
34
|
+
id: reproduction
|
|
35
|
+
attributes:
|
|
36
|
+
label: Reproduction
|
|
37
|
+
description: Minimal code or steps to reproduce the issue.
|
|
38
|
+
render: python
|
|
39
|
+
validations:
|
|
40
|
+
required: true
|
|
41
|
+
- type: textarea
|
|
42
|
+
id: logs
|
|
43
|
+
attributes:
|
|
44
|
+
label: Relevant logs or tracebacks
|
|
45
|
+
render: text
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
blank_issues_enabled: false
|
|
2
|
+
contact_links:
|
|
3
|
+
- name: Knowhere documentation
|
|
4
|
+
url: https://docs.knowhereto.ai
|
|
5
|
+
about: Check the public docs before opening a support issue.
|
|
6
|
+
- name: Security report
|
|
7
|
+
url: mailto:team@knowhereto.ai?subject=Security%20report%20for%20knowhere-python-sdk
|
|
8
|
+
about: Report vulnerabilities privately by email.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: Feature request
|
|
2
|
+
description: Propose an improvement for the Python SDK.
|
|
3
|
+
title: "[Feature]: "
|
|
4
|
+
labels:
|
|
5
|
+
- enhancement
|
|
6
|
+
body:
|
|
7
|
+
- type: textarea
|
|
8
|
+
id: problem
|
|
9
|
+
attributes:
|
|
10
|
+
label: Problem statement
|
|
11
|
+
description: What developer problem are you trying to solve?
|
|
12
|
+
validations:
|
|
13
|
+
required: true
|
|
14
|
+
- type: textarea
|
|
15
|
+
id: proposal
|
|
16
|
+
attributes:
|
|
17
|
+
label: Proposed solution
|
|
18
|
+
description: Describe the API or behavior you want to add or improve.
|
|
19
|
+
validations:
|
|
20
|
+
required: true
|
|
21
|
+
- type: textarea
|
|
22
|
+
id: alternatives
|
|
23
|
+
attributes:
|
|
24
|
+
label: Alternatives considered
|
|
25
|
+
description: Describe any workarounds or alternative designs you considered.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
## Summary
|
|
2
|
+
|
|
3
|
+
- describe the change
|
|
4
|
+
- describe any public API impact
|
|
5
|
+
|
|
6
|
+
## Verification
|
|
7
|
+
|
|
8
|
+
- list the commands you ran
|
|
9
|
+
- list any manual checks you performed
|
|
10
|
+
|
|
11
|
+
## Checklist
|
|
12
|
+
|
|
13
|
+
- [ ] Tests were added or updated when behavior changed
|
|
14
|
+
- [ ] Public docs or examples were updated when needed
|
|
15
|
+
- [ ] The pull request description explains any breaking or user-visible change
|
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.3.2](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.1...v0.3.2) (2026-04-23)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Chores
|
|
7
|
+
|
|
8
|
+
* harden python sdk OSS surface ([e7d9779](https://github.com/Ontos-AI/knowhere-python-sdk/commit/e7d9779502327d2bd9e4f27e666244d34f8fafb7))
|
|
9
|
+
* harden Python SDK OSS surface ([a9396cd](https://github.com/Ontos-AI/knowhere-python-sdk/commit/a9396cda70eabcba66172884e38045caefc85a01))
|
|
10
|
+
|
|
11
|
+
## [0.3.1](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.3.0...v0.3.1) (2026-04-22)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Documentation
|
|
15
|
+
|
|
16
|
+
* clarify ParseResult document scope ([861084e](https://github.com/Ontos-AI/knowhere-python-sdk/commit/861084e34144987994fa618ac0db262ce681b5a8))
|
|
17
|
+
* clarify ParseResult document scope ([bb14ad4](https://github.com/Ontos-AI/knowhere-python-sdk/commit/bb14ad4077c41cbe74a5dd155995d6f9937962b8))
|
|
18
|
+
|
|
3
19
|
## [0.3.0](https://github.com/Ontos-AI/knowhere-python-sdk/compare/v0.2.1...v0.3.0) (2026-04-21)
|
|
4
20
|
|
|
5
21
|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Code of Conduct
|
|
2
|
+
|
|
3
|
+
We want the Knowhere Python SDK community to be respectful, constructive, and
|
|
4
|
+
welcoming.
|
|
5
|
+
|
|
6
|
+
## Expected Behavior
|
|
7
|
+
|
|
8
|
+
- Be respectful in discussions and code review.
|
|
9
|
+
- Assume good intent and give actionable feedback.
|
|
10
|
+
- Focus on technical substance instead of personal attacks.
|
|
11
|
+
- Help keep the project useful for a broad developer audience.
|
|
12
|
+
|
|
13
|
+
## Unacceptable Behavior
|
|
14
|
+
|
|
15
|
+
- Harassment, discrimination, or hateful conduct
|
|
16
|
+
- Threats, intimidation, or doxxing
|
|
17
|
+
- Spam, trolling, or intentionally disruptive behavior
|
|
18
|
+
- Sharing private information without permission
|
|
19
|
+
|
|
20
|
+
## Enforcement
|
|
21
|
+
|
|
22
|
+
Maintainers may edit or remove content, close discussions, or restrict access
|
|
23
|
+
when behavior harms the project or its contributors.
|
|
24
|
+
|
|
25
|
+
To report a problem, email `team@knowhereto.ai` with:
|
|
26
|
+
|
|
27
|
+
- the repository name
|
|
28
|
+
- a link or screenshot if available
|
|
29
|
+
- a short description of what happened
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
Thanks for contributing to the Knowhere Python SDK.
|
|
4
|
+
|
|
5
|
+
## Development Setup
|
|
6
|
+
|
|
7
|
+
Requirements:
|
|
8
|
+
|
|
9
|
+
- Python 3.9+
|
|
10
|
+
- `uv`
|
|
11
|
+
|
|
12
|
+
Clone the repository and install the full development environment:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
uv sync --all-extras
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Local Checks
|
|
19
|
+
|
|
20
|
+
Run these commands before opening a pull request:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv run ruff check src/
|
|
24
|
+
uv run mypy src/knowhere
|
|
25
|
+
uv run pytest -q
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
If you change public behavior, also update the relevant documentation in:
|
|
29
|
+
|
|
30
|
+
- `README.md`
|
|
31
|
+
- `docs/usage.md`
|
|
32
|
+
- `examples/`
|
|
33
|
+
|
|
34
|
+
## Pull Requests
|
|
35
|
+
|
|
36
|
+
Please keep pull requests focused and easy to review.
|
|
37
|
+
|
|
38
|
+
Recommended checklist:
|
|
39
|
+
|
|
40
|
+
1. Add or update tests for behavior changes.
|
|
41
|
+
2. Keep public types and examples in sync with the implementation.
|
|
42
|
+
3. Document any breaking or user-visible changes in the pull request description.
|
|
43
|
+
|
|
44
|
+
Maintainers handle versioning and release automation through GitHub Actions.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Knowhere Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowhere-python-sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Official Python SDK for the Knowhere document parsing API
|
|
5
5
|
Project-URL: Homepage, https://knowhereto.ai
|
|
6
6
|
Project-URL: Documentation, https://docs.knowhereto.ai
|
|
7
7
|
Project-URL: Repository, https://github.com/Ontos-AI/knowhere-python-sdk
|
|
8
8
|
Author-email: Knowhere Team <team@knowhereto.ai>
|
|
9
9
|
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
10
11
|
Classifier: Development Status :: 3 - Alpha
|
|
11
12
|
Classifier: Intended Audience :: Developers
|
|
12
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -67,8 +68,9 @@ for chunk in result.text_chunks:
|
|
|
67
68
|
## Retrieval and document lifecycle
|
|
68
69
|
|
|
69
70
|
New documents are published into a retrieval namespace. The server returns a
|
|
70
|
-
stable `document_id`
|
|
71
|
-
|
|
71
|
+
stable `document_id` after the job is published. `client.jobs.create(...)`
|
|
72
|
+
does not return a usable `document_id`; persist `job_result.document_id` if you
|
|
73
|
+
need to update or archive the same document later.
|
|
72
74
|
|
|
73
75
|
```python
|
|
74
76
|
job = client.jobs.create(
|
|
@@ -77,7 +79,11 @@ job = client.jobs.create(
|
|
|
77
79
|
namespace="support-center",
|
|
78
80
|
)
|
|
79
81
|
|
|
80
|
-
|
|
82
|
+
job_result = client.jobs.wait(job.job_id)
|
|
83
|
+
document_id = job_result.document_id
|
|
84
|
+
|
|
85
|
+
if document_id is None:
|
|
86
|
+
raise RuntimeError("Expected document_id after successful publication.")
|
|
81
87
|
```
|
|
82
88
|
|
|
83
89
|
After the job is done and published, query the canonical document content:
|
|
@@ -87,8 +93,13 @@ response = client.retrieval.query(
|
|
|
87
93
|
namespace="support-center",
|
|
88
94
|
query="How do I reset Bluetooth pairing?",
|
|
89
95
|
top_k=5,
|
|
96
|
+
channels=["path", "term"],
|
|
97
|
+
filter_mode="keep",
|
|
98
|
+
signal_paths=["Bluetooth", "Pairing"],
|
|
90
99
|
)
|
|
91
100
|
|
|
101
|
+
print(response.router_used)
|
|
102
|
+
|
|
92
103
|
for result in response.results:
|
|
93
104
|
print(result.content)
|
|
94
105
|
print(result.score)
|
|
@@ -101,13 +112,13 @@ Use `document_id` to update or archive a document:
|
|
|
101
112
|
update_job = client.jobs.create(
|
|
102
113
|
source_type="url",
|
|
103
114
|
source_url="https://example.com/manual-v2.pdf",
|
|
104
|
-
document_id=
|
|
115
|
+
document_id=document_id,
|
|
105
116
|
)
|
|
106
117
|
|
|
107
|
-
document = client.documents.get(
|
|
118
|
+
document = client.documents.get(document_id)
|
|
108
119
|
print(document.status)
|
|
109
120
|
|
|
110
|
-
client.documents.archive(
|
|
121
|
+
client.documents.archive(document_id)
|
|
111
122
|
```
|
|
112
123
|
|
|
113
124
|
You can also list documents in a namespace:
|
|
@@ -146,6 +157,8 @@ result = client.parse(
|
|
|
146
157
|
|
|
147
158
|
print(result.manifest.source_file_name) # "report.pdf"
|
|
148
159
|
print(len(result.chunks)) # 152
|
|
160
|
+
print(result.namespace) # "default" or your explicit namespace
|
|
161
|
+
print(result.document_id) # Published canonical document id
|
|
149
162
|
```
|
|
150
163
|
|
|
151
164
|
### Access different chunk types
|
|
@@ -209,14 +222,14 @@ job = client.jobs.create(
|
|
|
209
222
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
210
223
|
)
|
|
211
224
|
|
|
212
|
-
print(job.document_id) # Persist this to update/archive the document later.
|
|
213
|
-
|
|
214
225
|
# Step 2: Upload file to presigned URL
|
|
215
226
|
client.jobs.upload(job, file=Path("report.pdf"))
|
|
216
227
|
|
|
217
228
|
# Step 3: Poll until done (adaptive backoff)
|
|
218
229
|
job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
|
|
219
230
|
|
|
231
|
+
print(job_result.document_id) # Persist this to update/archive the document later.
|
|
232
|
+
|
|
220
233
|
# Step 4: Download and parse results
|
|
221
234
|
result = client.jobs.load(job_result)
|
|
222
235
|
print(result.statistics)
|
|
@@ -293,6 +306,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
|
|
|
293
306
|
- [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
|
|
294
307
|
- [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
|
|
295
308
|
|
|
309
|
+
## Community
|
|
310
|
+
|
|
311
|
+
- Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
|
|
312
|
+
- Security policy: [SECURITY.md](./SECURITY.md)
|
|
313
|
+
- Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
|
|
314
|
+
|
|
296
315
|
## License
|
|
297
316
|
|
|
298
317
|
MIT
|
|
@@ -35,8 +35,9 @@ for chunk in result.text_chunks:
|
|
|
35
35
|
## Retrieval and document lifecycle
|
|
36
36
|
|
|
37
37
|
New documents are published into a retrieval namespace. The server returns a
|
|
38
|
-
stable `document_id`
|
|
39
|
-
|
|
38
|
+
stable `document_id` after the job is published. `client.jobs.create(...)`
|
|
39
|
+
does not return a usable `document_id`; persist `job_result.document_id` if you
|
|
40
|
+
need to update or archive the same document later.
|
|
40
41
|
|
|
41
42
|
```python
|
|
42
43
|
job = client.jobs.create(
|
|
@@ -45,7 +46,11 @@ job = client.jobs.create(
|
|
|
45
46
|
namespace="support-center",
|
|
46
47
|
)
|
|
47
48
|
|
|
48
|
-
|
|
49
|
+
job_result = client.jobs.wait(job.job_id)
|
|
50
|
+
document_id = job_result.document_id
|
|
51
|
+
|
|
52
|
+
if document_id is None:
|
|
53
|
+
raise RuntimeError("Expected document_id after successful publication.")
|
|
49
54
|
```
|
|
50
55
|
|
|
51
56
|
After the job is done and published, query the canonical document content:
|
|
@@ -55,8 +60,13 @@ response = client.retrieval.query(
|
|
|
55
60
|
namespace="support-center",
|
|
56
61
|
query="How do I reset Bluetooth pairing?",
|
|
57
62
|
top_k=5,
|
|
63
|
+
channels=["path", "term"],
|
|
64
|
+
filter_mode="keep",
|
|
65
|
+
signal_paths=["Bluetooth", "Pairing"],
|
|
58
66
|
)
|
|
59
67
|
|
|
68
|
+
print(response.router_used)
|
|
69
|
+
|
|
60
70
|
for result in response.results:
|
|
61
71
|
print(result.content)
|
|
62
72
|
print(result.score)
|
|
@@ -69,13 +79,13 @@ Use `document_id` to update or archive a document:
|
|
|
69
79
|
update_job = client.jobs.create(
|
|
70
80
|
source_type="url",
|
|
71
81
|
source_url="https://example.com/manual-v2.pdf",
|
|
72
|
-
document_id=
|
|
82
|
+
document_id=document_id,
|
|
73
83
|
)
|
|
74
84
|
|
|
75
|
-
document = client.documents.get(
|
|
85
|
+
document = client.documents.get(document_id)
|
|
76
86
|
print(document.status)
|
|
77
87
|
|
|
78
|
-
client.documents.archive(
|
|
88
|
+
client.documents.archive(document_id)
|
|
79
89
|
```
|
|
80
90
|
|
|
81
91
|
You can also list documents in a namespace:
|
|
@@ -114,6 +124,8 @@ result = client.parse(
|
|
|
114
124
|
|
|
115
125
|
print(result.manifest.source_file_name) # "report.pdf"
|
|
116
126
|
print(len(result.chunks)) # 152
|
|
127
|
+
print(result.namespace) # "default" or your explicit namespace
|
|
128
|
+
print(result.document_id) # Published canonical document id
|
|
117
129
|
```
|
|
118
130
|
|
|
119
131
|
### Access different chunk types
|
|
@@ -177,14 +189,14 @@ job = client.jobs.create(
|
|
|
177
189
|
parsing_params={"model": "advanced", "ocr_enabled": True},
|
|
178
190
|
)
|
|
179
191
|
|
|
180
|
-
print(job.document_id) # Persist this to update/archive the document later.
|
|
181
|
-
|
|
182
192
|
# Step 2: Upload file to presigned URL
|
|
183
193
|
client.jobs.upload(job, file=Path("report.pdf"))
|
|
184
194
|
|
|
185
195
|
# Step 3: Poll until done (adaptive backoff)
|
|
186
196
|
job_result = client.jobs.wait(job.job_id, poll_interval=10.0, poll_timeout=1800.0)
|
|
187
197
|
|
|
198
|
+
print(job_result.document_id) # Persist this to update/archive the document later.
|
|
199
|
+
|
|
188
200
|
# Step 4: Download and parse results
|
|
189
201
|
result = client.jobs.load(job_result)
|
|
190
202
|
print(result.statistics)
|
|
@@ -261,6 +273,12 @@ We publish stable releases to [PyPI](https://pypi.org/project/knowhere-python-sd
|
|
|
261
273
|
- [pydantic](https://docs.pydantic.dev/) `>=2.0.0,<3.0`
|
|
262
274
|
- [typing-extensions](https://pypi.org/project/typing-extensions/) `>=4.7.0`
|
|
263
275
|
|
|
276
|
+
## Community
|
|
277
|
+
|
|
278
|
+
- Contributing guide: [CONTRIBUTING.md](./CONTRIBUTING.md)
|
|
279
|
+
- Security policy: [SECURITY.md](./SECURITY.md)
|
|
280
|
+
- Code of conduct: [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md)
|
|
281
|
+
|
|
264
282
|
## License
|
|
265
283
|
|
|
266
284
|
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
Only the latest published release line is supported for security fixes.
|
|
6
|
+
|
|
7
|
+
| Version | Supported |
|
|
8
|
+
| --- | --- |
|
|
9
|
+
| Latest release | Yes |
|
|
10
|
+
| Older releases | No |
|
|
11
|
+
|
|
12
|
+
## Reporting a Vulnerability
|
|
13
|
+
|
|
14
|
+
Please do not open public GitHub issues for suspected vulnerabilities.
|
|
15
|
+
|
|
16
|
+
Instead, email `team@knowhereto.ai` with:
|
|
17
|
+
|
|
18
|
+
- the repository name
|
|
19
|
+
- a clear description of the issue
|
|
20
|
+
- reproduction steps or a proof of concept
|
|
21
|
+
- impact assessment if known
|
|
22
|
+
|
|
23
|
+
We will acknowledge the report, validate it, and coordinate remediation before
|
|
24
|
+
public disclosure.
|
|
@@ -39,6 +39,9 @@ from knowhere.types.document import Document, DocumentListResponse
|
|
|
39
39
|
from knowhere.types.job import Job, JobError, JobProgress, JobResult
|
|
40
40
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
41
41
|
from knowhere.types.retrieval import (
|
|
42
|
+
RetrievalChannel,
|
|
43
|
+
RetrievalFilterMode,
|
|
44
|
+
RetrievalSectionExclusion,
|
|
42
45
|
RetrievalSource,
|
|
43
46
|
RetrievalQueryResponse,
|
|
44
47
|
RetrievalResult,
|
|
@@ -97,6 +100,9 @@ __all__: list[str] = [
|
|
|
97
100
|
"Document",
|
|
98
101
|
"DocumentListResponse",
|
|
99
102
|
# Retrieval types
|
|
103
|
+
"RetrievalChannel",
|
|
104
|
+
"RetrievalFilterMode",
|
|
105
|
+
"RetrievalSectionExclusion",
|
|
100
106
|
"RetrievalSource",
|
|
101
107
|
"RetrievalQueryResponse",
|
|
102
108
|
"RetrievalResult",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.2" # x-release-please-version
|
|
@@ -145,8 +145,12 @@ class Jobs(SyncAPIResource):
|
|
|
145
145
|
if not job_result.result_url:
|
|
146
146
|
raise InvalidStateError("JobResult does not have a result_url.")
|
|
147
147
|
result_url: str = job_result.result_url
|
|
148
|
+
namespace: Optional[str] = job_result.namespace
|
|
149
|
+
document_id: Optional[str] = job_result.document_id
|
|
148
150
|
else:
|
|
149
151
|
result_url = job_result
|
|
152
|
+
namespace = None
|
|
153
|
+
document_id = None
|
|
150
154
|
|
|
151
155
|
response: httpx.Response = self._client._client.get(
|
|
152
156
|
result_url, timeout=self._client.upload_timeout
|
|
@@ -154,7 +158,10 @@ class Jobs(SyncAPIResource):
|
|
|
154
158
|
response.raise_for_status()
|
|
155
159
|
zip_bytes: bytes = response.content
|
|
156
160
|
|
|
157
|
-
|
|
161
|
+
parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
|
|
162
|
+
parsed_result.namespace = namespace
|
|
163
|
+
parsed_result.document_id = document_id
|
|
164
|
+
return parsed_result
|
|
158
165
|
|
|
159
166
|
|
|
160
167
|
class AsyncJobs(AsyncAPIResource):
|
|
@@ -251,8 +258,12 @@ class AsyncJobs(AsyncAPIResource):
|
|
|
251
258
|
if not job_result.result_url:
|
|
252
259
|
raise InvalidStateError("JobResult does not have a result_url.")
|
|
253
260
|
result_url: str = job_result.result_url
|
|
261
|
+
namespace: Optional[str] = job_result.namespace
|
|
262
|
+
document_id: Optional[str] = job_result.document_id
|
|
254
263
|
else:
|
|
255
264
|
result_url = job_result
|
|
265
|
+
namespace = None
|
|
266
|
+
document_id = None
|
|
256
267
|
|
|
257
268
|
response: httpx.Response = await self._client._client.get(
|
|
258
269
|
result_url, timeout=self._client.upload_timeout
|
|
@@ -260,4 +271,7 @@ class AsyncJobs(AsyncAPIResource):
|
|
|
260
271
|
response.raise_for_status()
|
|
261
272
|
zip_bytes: bytes = response.content
|
|
262
273
|
|
|
263
|
-
|
|
274
|
+
parsed_result = parseResultZip(zip_bytes, verify_checksum=verify_checksum)
|
|
275
|
+
parsed_result.namespace = namespace
|
|
276
|
+
parsed_result.document_id = document_id
|
|
277
|
+
return parsed_result
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Retrieval resource for querying published documents."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
|
|
8
|
+
from knowhere.types.retrieval import (
|
|
9
|
+
RetrievalChannel,
|
|
10
|
+
RetrievalFilterMode,
|
|
11
|
+
RetrievalQueryResponse,
|
|
12
|
+
RetrievalSectionExclusion,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Retrieval(SyncAPIResource):
|
|
17
|
+
"""Synchronous interface for ``/v1/retrieval`` endpoints."""
|
|
18
|
+
|
|
19
|
+
def query(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
query: str,
|
|
23
|
+
namespace: Optional[str] = None,
|
|
24
|
+
top_k: Optional[int] = None,
|
|
25
|
+
data_type: Optional[int] = None,
|
|
26
|
+
signal_paths: Optional[list[str]] = None,
|
|
27
|
+
filter_mode: Optional[RetrievalFilterMode] = None,
|
|
28
|
+
channels: Optional[list[RetrievalChannel]] = None,
|
|
29
|
+
channel_weights: Optional[dict[RetrievalChannel, float]] = None,
|
|
30
|
+
rerank: Optional[bool] = None,
|
|
31
|
+
threshold: Optional[float] = None,
|
|
32
|
+
internal_recall_k: Optional[int] = None,
|
|
33
|
+
exclude_document_ids: Optional[list[str]] = None,
|
|
34
|
+
exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
|
|
35
|
+
) -> RetrievalQueryResponse:
|
|
36
|
+
"""Query published documents in a namespace."""
|
|
37
|
+
body: Dict[str, Any] = {"query": query}
|
|
38
|
+
if namespace is not None:
|
|
39
|
+
body["namespace"] = namespace
|
|
40
|
+
if top_k is not None:
|
|
41
|
+
body["top_k"] = top_k
|
|
42
|
+
if data_type is not None:
|
|
43
|
+
body["data_type"] = data_type
|
|
44
|
+
if signal_paths is not None:
|
|
45
|
+
body["signal_paths"] = signal_paths
|
|
46
|
+
if filter_mode is not None:
|
|
47
|
+
body["filter_mode"] = filter_mode
|
|
48
|
+
if channels is not None:
|
|
49
|
+
body["channels"] = channels
|
|
50
|
+
if channel_weights is not None:
|
|
51
|
+
body["channel_weights"] = channel_weights
|
|
52
|
+
if rerank is not None:
|
|
53
|
+
body["rerank"] = rerank
|
|
54
|
+
if threshold is not None:
|
|
55
|
+
body["threshold"] = threshold
|
|
56
|
+
if internal_recall_k is not None:
|
|
57
|
+
body["internal_recall_k"] = internal_recall_k
|
|
58
|
+
if exclude_document_ids is not None:
|
|
59
|
+
body["exclude_document_ids"] = exclude_document_ids
|
|
60
|
+
if exclude_sections is not None:
|
|
61
|
+
body["exclude_sections"] = exclude_sections
|
|
62
|
+
|
|
63
|
+
return self._request(
|
|
64
|
+
"POST",
|
|
65
|
+
"v1/retrieval/query",
|
|
66
|
+
body=body,
|
|
67
|
+
cast_to=RetrievalQueryResponse,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class AsyncRetrieval(AsyncAPIResource):
|
|
72
|
+
"""Asynchronous interface for ``/v1/retrieval`` endpoints."""
|
|
73
|
+
|
|
74
|
+
async def query(
|
|
75
|
+
self,
|
|
76
|
+
*,
|
|
77
|
+
query: str,
|
|
78
|
+
namespace: Optional[str] = None,
|
|
79
|
+
top_k: Optional[int] = None,
|
|
80
|
+
data_type: Optional[int] = None,
|
|
81
|
+
signal_paths: Optional[list[str]] = None,
|
|
82
|
+
filter_mode: Optional[RetrievalFilterMode] = None,
|
|
83
|
+
channels: Optional[list[RetrievalChannel]] = None,
|
|
84
|
+
channel_weights: Optional[dict[RetrievalChannel, float]] = None,
|
|
85
|
+
rerank: Optional[bool] = None,
|
|
86
|
+
threshold: Optional[float] = None,
|
|
87
|
+
internal_recall_k: Optional[int] = None,
|
|
88
|
+
exclude_document_ids: Optional[list[str]] = None,
|
|
89
|
+
exclude_sections: Optional[list[RetrievalSectionExclusion]] = None,
|
|
90
|
+
) -> RetrievalQueryResponse:
|
|
91
|
+
"""Query published documents in a namespace."""
|
|
92
|
+
body: Dict[str, Any] = {"query": query}
|
|
93
|
+
if namespace is not None:
|
|
94
|
+
body["namespace"] = namespace
|
|
95
|
+
if top_k is not None:
|
|
96
|
+
body["top_k"] = top_k
|
|
97
|
+
if data_type is not None:
|
|
98
|
+
body["data_type"] = data_type
|
|
99
|
+
if signal_paths is not None:
|
|
100
|
+
body["signal_paths"] = signal_paths
|
|
101
|
+
if filter_mode is not None:
|
|
102
|
+
body["filter_mode"] = filter_mode
|
|
103
|
+
if channels is not None:
|
|
104
|
+
body["channels"] = channels
|
|
105
|
+
if channel_weights is not None:
|
|
106
|
+
body["channel_weights"] = channel_weights
|
|
107
|
+
if rerank is not None:
|
|
108
|
+
body["rerank"] = rerank
|
|
109
|
+
if threshold is not None:
|
|
110
|
+
body["threshold"] = threshold
|
|
111
|
+
if internal_recall_k is not None:
|
|
112
|
+
body["internal_recall_k"] = internal_recall_k
|
|
113
|
+
if exclude_document_ids is not None:
|
|
114
|
+
body["exclude_document_ids"] = exclude_document_ids
|
|
115
|
+
if exclude_sections is not None:
|
|
116
|
+
body["exclude_sections"] = exclude_sections
|
|
117
|
+
|
|
118
|
+
return await self._request(
|
|
119
|
+
"POST",
|
|
120
|
+
"v1/retrieval/query",
|
|
121
|
+
body=body,
|
|
122
|
+
cast_to=RetrievalQueryResponse,
|
|
123
|
+
)
|
|
@@ -6,6 +6,9 @@ from knowhere.types.document import Document, DocumentListResponse
|
|
|
6
6
|
from knowhere.types.job import Job, JobError, JobResult
|
|
7
7
|
from knowhere.types.params import ParsingParams, WebhookConfig
|
|
8
8
|
from knowhere.types.retrieval import (
|
|
9
|
+
RetrievalChannel,
|
|
10
|
+
RetrievalFilterMode,
|
|
11
|
+
RetrievalSectionExclusion,
|
|
9
12
|
RetrievalSource,
|
|
10
13
|
RetrievalQueryResponse,
|
|
11
14
|
RetrievalResult,
|
|
@@ -38,6 +41,9 @@ __all__: list[str] = [
|
|
|
38
41
|
"Document",
|
|
39
42
|
"DocumentListResponse",
|
|
40
43
|
# retrieval
|
|
44
|
+
"RetrievalChannel",
|
|
45
|
+
"RetrievalFilterMode",
|
|
46
|
+
"RetrievalSectionExclusion",
|
|
41
47
|
"RetrievalSource",
|
|
42
48
|
"RetrievalQueryResponse",
|
|
43
49
|
"RetrievalResult",
|
|
@@ -272,6 +272,8 @@ class ParseResult:
|
|
|
272
272
|
kb_csv: Optional[str]
|
|
273
273
|
hierarchy_view_html: Optional[str]
|
|
274
274
|
raw_zip: bytes
|
|
275
|
+
namespace: Optional[str]
|
|
276
|
+
document_id: Optional[str]
|
|
275
277
|
|
|
276
278
|
def __init__(
|
|
277
279
|
self,
|
|
@@ -285,6 +287,8 @@ class ParseResult:
|
|
|
285
287
|
kb_csv: Optional[str],
|
|
286
288
|
hierarchy_view_html: Optional[str],
|
|
287
289
|
raw_zip: bytes,
|
|
290
|
+
namespace: Optional[str] = None,
|
|
291
|
+
document_id: Optional[str] = None,
|
|
288
292
|
) -> None:
|
|
289
293
|
self.manifest = manifest
|
|
290
294
|
self.chunks = chunks
|
|
@@ -295,6 +299,8 @@ class ParseResult:
|
|
|
295
299
|
self.kb_csv = kb_csv
|
|
296
300
|
self.hierarchy_view_html = hierarchy_view_html
|
|
297
301
|
self.raw_zip = raw_zip
|
|
302
|
+
self.namespace = namespace
|
|
303
|
+
self.document_id = document_id
|
|
298
304
|
|
|
299
305
|
# -- convenience properties --
|
|
300
306
|
|
|
@@ -2,11 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
5
|
+
from typing import Literal, Optional, TypedDict
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
RetrievalChannel = Literal["path", "content", "term"]
|
|
11
|
+
RetrievalFilterMode = Literal["delete", "keep"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RetrievalSectionExclusion(TypedDict):
|
|
15
|
+
"""Section exclusion for follow-up retrieval queries."""
|
|
16
|
+
|
|
17
|
+
document_id: str
|
|
18
|
+
section_path: str
|
|
19
|
+
|
|
20
|
+
|
|
10
21
|
class RetrievalSource(BaseModel):
|
|
11
22
|
"""Caller-facing source reference attached to a retrieval result."""
|
|
12
23
|
|
|
@@ -30,4 +41,5 @@ class RetrievalQueryResponse(BaseModel):
|
|
|
30
41
|
|
|
31
42
|
namespace: str
|
|
32
43
|
query: str
|
|
44
|
+
router_used: Optional[str] = None
|
|
33
45
|
results: list[RetrievalResult]
|
|
@@ -72,7 +72,6 @@ def mock_job_response() -> Dict[str, Any]:
|
|
|
72
72
|
"status": "waiting-file",
|
|
73
73
|
"source_type": "file",
|
|
74
74
|
"namespace": "default",
|
|
75
|
-
"document_id": "doc_test123",
|
|
76
75
|
"data_id": None,
|
|
77
76
|
"created_at": "2025-01-01T00:00:00Z",
|
|
78
77
|
"upload_url": "https://storage.example.com/upload?token=abc",
|
|
@@ -36,7 +36,6 @@ class TestJobsCreate:
|
|
|
36
36
|
"status": "pending",
|
|
37
37
|
"source_type": "url",
|
|
38
38
|
"namespace": "support-center",
|
|
39
|
-
"document_id": "doc_123",
|
|
40
39
|
}
|
|
41
40
|
|
|
42
41
|
route = respx.post(JOBS_URL).mock(
|
|
@@ -53,7 +52,7 @@ class TestJobsCreate:
|
|
|
53
52
|
assert job.source_type == "url"
|
|
54
53
|
assert job.status == "pending"
|
|
55
54
|
assert job.namespace == "support-center"
|
|
56
|
-
assert job
|
|
55
|
+
assert not hasattr(job, "document_id")
|
|
57
56
|
|
|
58
57
|
@respx.mock
|
|
59
58
|
def test_create_with_file_source(
|
|
@@ -87,7 +86,6 @@ class TestJobsCreate:
|
|
|
87
86
|
"status": "pending",
|
|
88
87
|
"source_type": "url",
|
|
89
88
|
"namespace": "support-center",
|
|
90
|
-
"document_id": "doc_123",
|
|
91
89
|
}
|
|
92
90
|
|
|
93
91
|
route = respx.post(JOBS_URL).mock(
|
|
@@ -284,6 +282,8 @@ class TestJobsLoad:
|
|
|
284
282
|
job_id="job_load",
|
|
285
283
|
status="done",
|
|
286
284
|
source_type="url",
|
|
285
|
+
namespace="support-center",
|
|
286
|
+
document_id="doc_123",
|
|
287
287
|
result_url=result_url,
|
|
288
288
|
)
|
|
289
289
|
|
|
@@ -293,3 +293,5 @@ class TestJobsLoad:
|
|
|
293
293
|
|
|
294
294
|
assert route.called
|
|
295
295
|
assert parse_result.manifest is not None
|
|
296
|
+
assert parse_result.namespace == "support-center"
|
|
297
|
+
assert parse_result.document_id == "doc_123"
|
|
@@ -18,7 +18,7 @@ class TestRedactSensitiveHeaders:
|
|
|
18
18
|
|
|
19
19
|
def test_redacts_authorization_bearer(self) -> None:
|
|
20
20
|
headers: Dict[str, str] = {
|
|
21
|
-
"Authorization": "Bearer
|
|
21
|
+
"Authorization": "Bearer sk_example_redacted_token",
|
|
22
22
|
"Content-Type": "application/json",
|
|
23
23
|
}
|
|
24
24
|
redacted: Dict[str, str] = redactSensitiveHeaders(headers)
|
|
@@ -55,7 +55,7 @@ class TestJobModel:
|
|
|
55
55
|
}
|
|
56
56
|
job: Job = Job(**data)
|
|
57
57
|
assert job.namespace == "support-center"
|
|
58
|
-
assert
|
|
58
|
+
assert "document_id" not in job.model_dump()
|
|
59
59
|
|
|
60
60
|
def test_from_dict_with_upload(self) -> None:
|
|
61
61
|
data: Dict[str, Any] = {
|
|
@@ -717,6 +717,11 @@ class TestParseResult:
|
|
|
717
717
|
assert stats.total_chunks == 3
|
|
718
718
|
assert stats.text_chunks == 1
|
|
719
719
|
|
|
720
|
+
def test_document_scope_defaults_to_none(self) -> None:
|
|
721
|
+
result: ParseResult = _build_parse_result()
|
|
722
|
+
assert result.namespace is None
|
|
723
|
+
assert result.document_id is None
|
|
724
|
+
|
|
720
725
|
def test_raw_zip_accessible(self) -> None:
|
|
721
726
|
result: ParseResult = _build_parse_result()
|
|
722
727
|
assert result.raw_zip == b"fake zip bytes"
|
|
@@ -42,6 +42,8 @@ def _make_done_response(job_id: str, result_url: str) -> Dict[str, Any]:
|
|
|
42
42
|
"job_id": job_id,
|
|
43
43
|
"status": "done",
|
|
44
44
|
"source_type": "url",
|
|
45
|
+
"namespace": "support-center",
|
|
46
|
+
"document_id": "doc_123",
|
|
45
47
|
"result_url": result_url,
|
|
46
48
|
}
|
|
47
49
|
|
|
@@ -96,6 +98,8 @@ class TestParseWithUrl:
|
|
|
96
98
|
|
|
97
99
|
assert parse_result.manifest is not None
|
|
98
100
|
assert parse_result.manifest.job_id == "job_test123"
|
|
101
|
+
assert parse_result.namespace == "support-center"
|
|
102
|
+
assert parse_result.document_id == "doc_123"
|
|
99
103
|
|
|
100
104
|
|
|
101
105
|
# ---------------------------------------------------------------------------
|
|
@@ -19,6 +19,7 @@ def _make_retrieval_response() -> Dict[str, Any]:
|
|
|
19
19
|
return {
|
|
20
20
|
"namespace": "support-center",
|
|
21
21
|
"query": "refund policy",
|
|
22
|
+
"router_used": "discovery+agent",
|
|
22
23
|
"results": [
|
|
23
24
|
{
|
|
24
25
|
"chunk_type": "text",
|
|
@@ -47,6 +48,14 @@ class TestRetrievalQuery:
|
|
|
47
48
|
query="refund policy",
|
|
48
49
|
namespace="support-center",
|
|
49
50
|
top_k=5,
|
|
51
|
+
data_type=6,
|
|
52
|
+
signal_paths=["Billing", "Refunds"],
|
|
53
|
+
filter_mode="keep",
|
|
54
|
+
channels=["path", "term"],
|
|
55
|
+
channel_weights={"path": 2.0, "term": 0.5},
|
|
56
|
+
rerank=True,
|
|
57
|
+
threshold=0.2,
|
|
58
|
+
internal_recall_k=25,
|
|
50
59
|
exclude_document_ids=["doc_old"],
|
|
51
60
|
exclude_sections=[
|
|
52
61
|
{
|
|
@@ -62,6 +71,14 @@ class TestRetrievalQuery:
|
|
|
62
71
|
"query": "refund policy",
|
|
63
72
|
"namespace": "support-center",
|
|
64
73
|
"top_k": 5,
|
|
74
|
+
"data_type": 6,
|
|
75
|
+
"signal_paths": ["Billing", "Refunds"],
|
|
76
|
+
"filter_mode": "keep",
|
|
77
|
+
"channels": ["path", "term"],
|
|
78
|
+
"channel_weights": {"path": 2.0, "term": 0.5},
|
|
79
|
+
"rerank": True,
|
|
80
|
+
"threshold": 0.2,
|
|
81
|
+
"internal_recall_k": 25,
|
|
65
82
|
"exclude_document_ids": ["doc_old"],
|
|
66
83
|
"exclude_sections": [
|
|
67
84
|
{
|
|
@@ -71,6 +88,7 @@ class TestRetrievalQuery:
|
|
|
71
88
|
],
|
|
72
89
|
}
|
|
73
90
|
assert response.namespace == "support-center"
|
|
91
|
+
assert response.router_used == "discovery+agent"
|
|
74
92
|
assert response.results[0].content == "Annual plans may be refunded within 30 days."
|
|
75
93
|
assert response.results[0].source.document_id == "doc_123"
|
|
76
94
|
assert response.results[0].source.source_file_name == "refund-policy.md"
|
|
@@ -107,4 +125,5 @@ class TestRetrievalQuery:
|
|
|
107
125
|
)
|
|
108
126
|
|
|
109
127
|
assert route.called
|
|
128
|
+
assert response.router_used == "discovery+agent"
|
|
110
129
|
assert response.results[0].source.document_id == "doc_123"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.0" # x-release-please-version
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""Retrieval resource for querying published documents."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import Any, Dict, Optional
|
|
6
|
-
|
|
7
|
-
from knowhere.resources._base import AsyncAPIResource, SyncAPIResource
|
|
8
|
-
from knowhere.types.retrieval import RetrievalQueryResponse
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Retrieval(SyncAPIResource):
|
|
12
|
-
"""Synchronous interface for ``/v1/retrieval`` endpoints."""
|
|
13
|
-
|
|
14
|
-
def query(
|
|
15
|
-
self,
|
|
16
|
-
*,
|
|
17
|
-
query: str,
|
|
18
|
-
namespace: Optional[str] = None,
|
|
19
|
-
top_k: Optional[int] = None,
|
|
20
|
-
exclude_document_ids: Optional[list[str]] = None,
|
|
21
|
-
exclude_sections: Optional[list[dict[str, str]]] = None,
|
|
22
|
-
) -> RetrievalQueryResponse:
|
|
23
|
-
"""Query published documents in a namespace."""
|
|
24
|
-
body: Dict[str, Any] = {"query": query}
|
|
25
|
-
if namespace is not None:
|
|
26
|
-
body["namespace"] = namespace
|
|
27
|
-
if top_k is not None:
|
|
28
|
-
body["top_k"] = top_k
|
|
29
|
-
if exclude_document_ids is not None:
|
|
30
|
-
body["exclude_document_ids"] = exclude_document_ids
|
|
31
|
-
if exclude_sections is not None:
|
|
32
|
-
body["exclude_sections"] = exclude_sections
|
|
33
|
-
|
|
34
|
-
return self._request(
|
|
35
|
-
"POST",
|
|
36
|
-
"v1/retrieval/query",
|
|
37
|
-
body=body,
|
|
38
|
-
cast_to=RetrievalQueryResponse,
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class AsyncRetrieval(AsyncAPIResource):
|
|
43
|
-
"""Asynchronous interface for ``/v1/retrieval`` endpoints."""
|
|
44
|
-
|
|
45
|
-
async def query(
|
|
46
|
-
self,
|
|
47
|
-
*,
|
|
48
|
-
query: str,
|
|
49
|
-
namespace: Optional[str] = None,
|
|
50
|
-
top_k: Optional[int] = None,
|
|
51
|
-
exclude_document_ids: Optional[list[str]] = None,
|
|
52
|
-
exclude_sections: Optional[list[dict[str, str]]] = None,
|
|
53
|
-
) -> RetrievalQueryResponse:
|
|
54
|
-
"""Query published documents in a namespace."""
|
|
55
|
-
body: Dict[str, Any] = {"query": query}
|
|
56
|
-
if namespace is not None:
|
|
57
|
-
body["namespace"] = namespace
|
|
58
|
-
if top_k is not None:
|
|
59
|
-
body["top_k"] = top_k
|
|
60
|
-
if exclude_document_ids is not None:
|
|
61
|
-
body["exclude_document_ids"] = exclude_document_ids
|
|
62
|
-
if exclude_sections is not None:
|
|
63
|
-
body["exclude_sections"] = exclude_sections
|
|
64
|
-
|
|
65
|
-
return await self._request(
|
|
66
|
-
"POST",
|
|
67
|
-
"v1/retrieval/query",
|
|
68
|
-
body=body,
|
|
69
|
-
cast_to=RetrievalQueryResponse,
|
|
70
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|