knowledgesdk 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knowledgesdk-0.2.0/.github/workflows/publish.yml +58 -0
- knowledgesdk-0.2.0/.gitignore +9 -0
- knowledgesdk-0.2.0/PKG-INFO +266 -0
- knowledgesdk-0.2.0/README.md +242 -0
- knowledgesdk-0.2.0/knowledgesdk/__init__.py +86 -0
- knowledgesdk-0.2.0/knowledgesdk/api/__init__.py +0 -0
- knowledgesdk-0.2.0/knowledgesdk/api/classify.py +54 -0
- knowledgesdk-0.2.0/knowledgesdk/api/extract.py +88 -0
- knowledgesdk-0.2.0/knowledgesdk/api/jobs.py +99 -0
- knowledgesdk-0.2.0/knowledgesdk/api/scrape.py +48 -0
- knowledgesdk-0.2.0/knowledgesdk/api/screenshot.py +45 -0
- knowledgesdk-0.2.0/knowledgesdk/api/search.py +47 -0
- knowledgesdk-0.2.0/knowledgesdk/api/sitemap.py +47 -0
- knowledgesdk-0.2.0/knowledgesdk/api/webhooks.py +129 -0
- knowledgesdk-0.2.0/knowledgesdk/client.py +172 -0
- knowledgesdk-0.2.0/knowledgesdk/errors.py +73 -0
- knowledgesdk-0.2.0/knowledgesdk/interfaces/__init__.py +0 -0
- knowledgesdk-0.2.0/knowledgesdk/interfaces/types.py +145 -0
- knowledgesdk-0.2.0/knowledgesdk/utils/__init__.py +0 -0
- knowledgesdk-0.2.0/knowledgesdk/utils/http_client.py +327 -0
- knowledgesdk-0.2.0/pyproject.toml +60 -0
- knowledgesdk-0.2.0/tests/__init__.py +0 -0
- knowledgesdk-0.2.0/tests/test_client.py +62 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version_bump:
|
|
7
|
+
description: Version bump type
|
|
8
|
+
required: true
|
|
9
|
+
default: patch
|
|
10
|
+
type: choice
|
|
11
|
+
options:
|
|
12
|
+
- patch
|
|
13
|
+
- minor
|
|
14
|
+
- major
|
|
15
|
+
push:
|
|
16
|
+
tags:
|
|
17
|
+
- "v*"
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
publish:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
permissions:
|
|
23
|
+
contents: write
|
|
24
|
+
id-token: write
|
|
25
|
+
environment:
|
|
26
|
+
name: pypi
|
|
27
|
+
url: https://pypi.org/p/knowledgesdk
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
|
|
31
|
+
- uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.11"
|
|
34
|
+
|
|
35
|
+
- name: Bump version
|
|
36
|
+
if: github.event_name == 'workflow_dispatch'
|
|
37
|
+
run: |
|
|
38
|
+
CURRENT=$(python -c "import re; print(re.search(r'version = \"(.+?)\"', open('pyproject.toml').read()).group(1))")
|
|
39
|
+
IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
|
|
40
|
+
case "${{ github.event.inputs.version_bump }}" in
|
|
41
|
+
major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
|
|
42
|
+
minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
|
|
43
|
+
patch) PATCH=$((PATCH + 1)) ;;
|
|
44
|
+
esac
|
|
45
|
+
NEW_VERSION="${MAJOR}.${MINOR}.${PATCH}"
|
|
46
|
+
sed -i "s/version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
|
|
47
|
+
git config user.name "github-actions[bot]"
|
|
48
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
49
|
+
git add pyproject.toml
|
|
50
|
+
git commit -m "chore: bump version to ${NEW_VERSION}"
|
|
51
|
+
git tag "v${NEW_VERSION}"
|
|
52
|
+
git push && git push --tags
|
|
53
|
+
|
|
54
|
+
- run: pip install build
|
|
55
|
+
|
|
56
|
+
- run: python -m build
|
|
57
|
+
|
|
58
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: knowledgesdk
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: KnowledgeSDK Python SDK — Extract, classify and search web knowledge
|
|
5
|
+
Project-URL: Homepage, https://knowledgesdk.com
|
|
6
|
+
Project-URL: Repository, https://github.com/knowledgesdk/knowledgesdk-python
|
|
7
|
+
Project-URL: Issues, https://github.com/knowledgesdk/knowledgesdk-python/issues
|
|
8
|
+
Author: KnowledgeSDK
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Requires-Dist: pydantic>=2.0.0
|
|
15
|
+
Requires-Dist: requests>=2.31.0
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: flake8>=6.0.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: isort>=5.0.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# KnowledgeSDK Python SDK
|
|
26
|
+
|
|
27
|
+
Official Python client for the [KnowledgeSDK](https://knowledgesdk.com) API — extract, classify, scrape, screenshot, and search web knowledge programmatically.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install knowledgesdk
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from knowledgesdk import KnowledgeSDK
|
|
39
|
+
|
|
40
|
+
ks = KnowledgeSDK("sk_ks_your_key_here")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Usage
|
|
44
|
+
|
|
45
|
+
### Extract
|
|
46
|
+
|
|
47
|
+
Run a full knowledge extraction on a website (synchronous):
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
result = ks.extract.run("https://stripe.com")
|
|
51
|
+
|
|
52
|
+
print(result.business.business_name)
|
|
53
|
+
print(result.business.industry_sector)
|
|
54
|
+
print(result.pages_scraped)
|
|
55
|
+
|
|
56
|
+
for item in result.knowledge_items:
|
|
57
|
+
print(item.title, item.content)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Run an asynchronous extraction with a callback:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
job = ks.extract.run_async(
|
|
64
|
+
"https://stripe.com",
|
|
65
|
+
max_pages=20,
|
|
66
|
+
callback_url="https://myapp.com/webhook"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
print(job.job_id) # e.g. "job_abc123"
|
|
70
|
+
print(job.status) # e.g. "PENDING"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Scrape
|
|
74
|
+
|
|
75
|
+
Scrape a single web page and get its Markdown content:
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
page = ks.scrape.run("https://docs.stripe.com/get-started")
|
|
79
|
+
|
|
80
|
+
print(page.title)
|
|
81
|
+
print(page.markdown)
|
|
82
|
+
print(page.links)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Classify
|
|
86
|
+
|
|
87
|
+
Classify a business from its website:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
biz = ks.classify.run("https://stripe.com")
|
|
91
|
+
|
|
92
|
+
print(biz.business_name)
|
|
93
|
+
print(biz.business_type)
|
|
94
|
+
print(biz.industry_sector)
|
|
95
|
+
print(biz.target_audience)
|
|
96
|
+
print(biz.confidence_score)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Screenshot
|
|
100
|
+
|
|
101
|
+
Capture a screenshot of a web page:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
shot = ks.screenshot.run("https://stripe.com")
|
|
105
|
+
|
|
106
|
+
# shot.screenshot is a base64-encoded PNG string
|
|
107
|
+
import base64
|
|
108
|
+
image_bytes = base64.b64decode(shot.screenshot)
|
|
109
|
+
with open("screenshot.png", "wb") as f:
|
|
110
|
+
f.write(image_bytes)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Sitemap
|
|
114
|
+
|
|
115
|
+
Fetch the sitemap for a website:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
site_map = ks.sitemap.run("https://stripe.com")
|
|
119
|
+
|
|
120
|
+
print(site_map.count)
|
|
121
|
+
for url in site_map.urls:
|
|
122
|
+
print(url)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Search
|
|
126
|
+
|
|
127
|
+
Search the extracted knowledge base:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
results = ks.search.run("pricing plans", limit=5)
|
|
131
|
+
|
|
132
|
+
print(f"Found {results.total} results")
|
|
133
|
+
for hit in results.hits:
|
|
134
|
+
print(hit.title, hit.score)
|
|
135
|
+
print(hit.content)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Webhooks
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
# Create a webhook
|
|
142
|
+
wh = ks.webhooks.create(
|
|
143
|
+
url="https://myapp.com/hook",
|
|
144
|
+
events=["EXTRACTION_COMPLETED", "JOB_FAILED"],
|
|
145
|
+
display_name="My App Webhook"
|
|
146
|
+
)
|
|
147
|
+
print(wh.id) # e.g. "weh_xxx"
|
|
148
|
+
print(wh.token) # signing token
|
|
149
|
+
|
|
150
|
+
# List all webhooks
|
|
151
|
+
all_webhooks = ks.webhooks.list()
|
|
152
|
+
for w in all_webhooks:
|
|
153
|
+
print(w.id, w.url, w.status)
|
|
154
|
+
|
|
155
|
+
# Send a test event to a webhook
|
|
156
|
+
ks.webhooks.test("weh_xxx")
|
|
157
|
+
|
|
158
|
+
# Delete a webhook
|
|
159
|
+
ks.webhooks.delete("weh_xxx")
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Jobs
|
|
163
|
+
|
|
164
|
+
Retrieve a job by ID:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
job = ks.jobs.get("job_xxx")
|
|
168
|
+
print(job.status) # PENDING | RUNNING | COMPLETED | FAILED
|
|
169
|
+
print(job.progress) # 0–100
|
|
170
|
+
print(job.result)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Poll until a job completes (blocking):
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
completed = ks.jobs.poll("job_xxx", interval_sec=5, timeout_sec=300)
|
|
177
|
+
print(completed.result)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Configuration
|
|
181
|
+
|
|
182
|
+
| Parameter | Default | Description |
|
|
183
|
+
|---|---|---|
|
|
184
|
+
| `api_key` | required | API key starting with `sk_ks_` |
|
|
185
|
+
| `base_url` | `https://api.knowledgesdk.com` | Override via `KNOWLEDGESDK_BASE_URL` env var |
|
|
186
|
+
| `timeout` | `30000` | Request timeout in milliseconds |
|
|
187
|
+
| `max_retries` | `5` | Max retries with exponential backoff |
|
|
188
|
+
| `debug` | `False` | Enable request/response logging |
|
|
189
|
+
|
|
190
|
+
### Environment Variables
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
export KNOWLEDGESDK_BASE_URL="https://api.knowledgesdk.com"
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Debug Mode
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
ks = KnowledgeSDK("sk_ks_your_key", debug=True)
|
|
200
|
+
|
|
201
|
+
# Or toggle at runtime
|
|
202
|
+
ks.set_debug_mode(True)
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Custom Headers
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
ks.set_header("X-Custom-Header", "value")
|
|
209
|
+
ks.set_headers({"X-Header-A": "a", "X-Header-B": "b"})
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Error Handling
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
from knowledgesdk import (
|
|
216
|
+
KnowledgeSDK,
|
|
217
|
+
AuthenticationError,
|
|
218
|
+
APIError,
|
|
219
|
+
RateLimitError,
|
|
220
|
+
NetworkError,
|
|
221
|
+
TimeoutError,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
ks = KnowledgeSDK("sk_ks_your_key")
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
result = ks.extract.run("https://stripe.com")
|
|
228
|
+
except AuthenticationError as e:
|
|
229
|
+
print(f"Auth error: {e.message}")
|
|
230
|
+
except RateLimitError as e:
|
|
231
|
+
print(f"Rate limited: {e.message}")
|
|
232
|
+
except APIError as e:
|
|
233
|
+
print(f"API error {e.status_code}: {e.message}")
|
|
234
|
+
except NetworkError as e:
|
|
235
|
+
print(f"Network error: {e.message}")
|
|
236
|
+
except TimeoutError as e:
|
|
237
|
+
print(f"Request timed out: {e.message}")
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Type Reference
|
|
241
|
+
|
|
242
|
+
All response objects are Pydantic models and are fully typed.
|
|
243
|
+
|
|
244
|
+
| Type | Description |
|
|
245
|
+
|---|---|
|
|
246
|
+
| `ExtractResult` | Full extraction with business and knowledge items |
|
|
247
|
+
| `BusinessClassification` | Business name, type, industry, audience, etc. |
|
|
248
|
+
| `KnowledgeItem` | A single knowledge article extracted from a page |
|
|
249
|
+
| `ScrapeResult` | Markdown content, title, description, links |
|
|
250
|
+
| `ScreenshotResult` | Base64 PNG screenshot |
|
|
251
|
+
| `SitemapResult` | List of URLs from the site's sitemap |
|
|
252
|
+
| `SearchResult` | Search hits, total count, query |
|
|
253
|
+
| `SearchHit` | Individual search result with score |
|
|
254
|
+
| `AsyncJobRef` | Job ID and initial status for async operations |
|
|
255
|
+
| `JobResult` | Full job status, progress, result, and error |
|
|
256
|
+
| `WebhookFull` | Webhook ID, URL, events, status, token |
|
|
257
|
+
|
|
258
|
+
## Requirements
|
|
259
|
+
|
|
260
|
+
- Python >= 3.8
|
|
261
|
+
- `requests >= 2.31.0`
|
|
262
|
+
- `pydantic >= 2.0.0`
|
|
263
|
+
|
|
264
|
+
## License
|
|
265
|
+
|
|
266
|
+
MIT
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# KnowledgeSDK Python SDK
|
|
2
|
+
|
|
3
|
+
Official Python client for the [KnowledgeSDK](https://knowledgesdk.com) API — extract, classify, scrape, screenshot, and search web knowledge programmatically.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install knowledgesdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from knowledgesdk import KnowledgeSDK
|
|
15
|
+
|
|
16
|
+
ks = KnowledgeSDK("sk_ks_your_key_here")
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
### Extract
|
|
22
|
+
|
|
23
|
+
Run a full knowledge extraction on a website (synchronous):
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
result = ks.extract.run("https://stripe.com")
|
|
27
|
+
|
|
28
|
+
print(result.business.business_name)
|
|
29
|
+
print(result.business.industry_sector)
|
|
30
|
+
print(result.pages_scraped)
|
|
31
|
+
|
|
32
|
+
for item in result.knowledge_items:
|
|
33
|
+
print(item.title, item.content)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Run an asynchronous extraction with a callback:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
job = ks.extract.run_async(
|
|
40
|
+
"https://stripe.com",
|
|
41
|
+
max_pages=20,
|
|
42
|
+
callback_url="https://myapp.com/webhook"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
print(job.job_id) # e.g. "job_abc123"
|
|
46
|
+
print(job.status) # e.g. "PENDING"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Scrape
|
|
50
|
+
|
|
51
|
+
Scrape a single web page and get its Markdown content:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
page = ks.scrape.run("https://docs.stripe.com/get-started")
|
|
55
|
+
|
|
56
|
+
print(page.title)
|
|
57
|
+
print(page.markdown)
|
|
58
|
+
print(page.links)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Classify
|
|
62
|
+
|
|
63
|
+
Classify a business from its website:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
biz = ks.classify.run("https://stripe.com")
|
|
67
|
+
|
|
68
|
+
print(biz.business_name)
|
|
69
|
+
print(biz.business_type)
|
|
70
|
+
print(biz.industry_sector)
|
|
71
|
+
print(biz.target_audience)
|
|
72
|
+
print(biz.confidence_score)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Screenshot
|
|
76
|
+
|
|
77
|
+
Capture a screenshot of a web page:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
shot = ks.screenshot.run("https://stripe.com")
|
|
81
|
+
|
|
82
|
+
# shot.screenshot is a base64-encoded PNG string
|
|
83
|
+
import base64
|
|
84
|
+
image_bytes = base64.b64decode(shot.screenshot)
|
|
85
|
+
with open("screenshot.png", "wb") as f:
|
|
86
|
+
f.write(image_bytes)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Sitemap
|
|
90
|
+
|
|
91
|
+
Fetch the sitemap for a website:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
site_map = ks.sitemap.run("https://stripe.com")
|
|
95
|
+
|
|
96
|
+
print(site_map.count)
|
|
97
|
+
for url in site_map.urls:
|
|
98
|
+
print(url)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Search
|
|
102
|
+
|
|
103
|
+
Search the extracted knowledge base:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
results = ks.search.run("pricing plans", limit=5)
|
|
107
|
+
|
|
108
|
+
print(f"Found {results.total} results")
|
|
109
|
+
for hit in results.hits:
|
|
110
|
+
print(hit.title, hit.score)
|
|
111
|
+
print(hit.content)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Webhooks
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
# Create a webhook
|
|
118
|
+
wh = ks.webhooks.create(
|
|
119
|
+
url="https://myapp.com/hook",
|
|
120
|
+
events=["EXTRACTION_COMPLETED", "JOB_FAILED"],
|
|
121
|
+
display_name="My App Webhook"
|
|
122
|
+
)
|
|
123
|
+
print(wh.id) # e.g. "weh_xxx"
|
|
124
|
+
print(wh.token) # signing token
|
|
125
|
+
|
|
126
|
+
# List all webhooks
|
|
127
|
+
all_webhooks = ks.webhooks.list()
|
|
128
|
+
for w in all_webhooks:
|
|
129
|
+
print(w.id, w.url, w.status)
|
|
130
|
+
|
|
131
|
+
# Send a test event to a webhook
|
|
132
|
+
ks.webhooks.test("weh_xxx")
|
|
133
|
+
|
|
134
|
+
# Delete a webhook
|
|
135
|
+
ks.webhooks.delete("weh_xxx")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Jobs
|
|
139
|
+
|
|
140
|
+
Retrieve a job by ID:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
job = ks.jobs.get("job_xxx")
|
|
144
|
+
print(job.status) # PENDING | RUNNING | COMPLETED | FAILED
|
|
145
|
+
print(job.progress) # 0–100
|
|
146
|
+
print(job.result)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Poll until a job completes (blocking):
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
completed = ks.jobs.poll("job_xxx", interval_sec=5, timeout_sec=300)
|
|
153
|
+
print(completed.result)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Configuration
|
|
157
|
+
|
|
158
|
+
| Parameter | Default | Description |
|
|
159
|
+
|---|---|---|
|
|
160
|
+
| `api_key` | required | API key starting with `sk_ks_` |
|
|
161
|
+
| `base_url` | `https://api.knowledgesdk.com` | Override via `KNOWLEDGESDK_BASE_URL` env var |
|
|
162
|
+
| `timeout` | `30000` | Request timeout in milliseconds |
|
|
163
|
+
| `max_retries` | `5` | Max retries with exponential backoff |
|
|
164
|
+
| `debug` | `False` | Enable request/response logging |
|
|
165
|
+
|
|
166
|
+
### Environment Variables
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
export KNOWLEDGESDK_BASE_URL="https://api.knowledgesdk.com"
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Debug Mode
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
ks = KnowledgeSDK("sk_ks_your_key", debug=True)
|
|
176
|
+
|
|
177
|
+
# Or toggle at runtime
|
|
178
|
+
ks.set_debug_mode(True)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Custom Headers
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
ks.set_header("X-Custom-Header", "value")
|
|
185
|
+
ks.set_headers({"X-Header-A": "a", "X-Header-B": "b"})
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Error Handling
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from knowledgesdk import (
|
|
192
|
+
KnowledgeSDK,
|
|
193
|
+
AuthenticationError,
|
|
194
|
+
APIError,
|
|
195
|
+
RateLimitError,
|
|
196
|
+
NetworkError,
|
|
197
|
+
TimeoutError,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
ks = KnowledgeSDK("sk_ks_your_key")
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
result = ks.extract.run("https://stripe.com")
|
|
204
|
+
except AuthenticationError as e:
|
|
205
|
+
print(f"Auth error: {e.message}")
|
|
206
|
+
except RateLimitError as e:
|
|
207
|
+
print(f"Rate limited: {e.message}")
|
|
208
|
+
except APIError as e:
|
|
209
|
+
print(f"API error {e.status_code}: {e.message}")
|
|
210
|
+
except NetworkError as e:
|
|
211
|
+
print(f"Network error: {e.message}")
|
|
212
|
+
except TimeoutError as e:
|
|
213
|
+
print(f"Request timed out: {e.message}")
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Type Reference
|
|
217
|
+
|
|
218
|
+
All response objects are Pydantic models and are fully typed.
|
|
219
|
+
|
|
220
|
+
| Type | Description |
|
|
221
|
+
|---|---|
|
|
222
|
+
| `ExtractResult` | Full extraction with business and knowledge items |
|
|
223
|
+
| `BusinessClassification` | Business name, type, industry, audience, etc. |
|
|
224
|
+
| `KnowledgeItem` | A single knowledge article extracted from a page |
|
|
225
|
+
| `ScrapeResult` | Markdown content, title, description, links |
|
|
226
|
+
| `ScreenshotResult` | Base64 PNG screenshot |
|
|
227
|
+
| `SitemapResult` | List of URLs from the site's sitemap |
|
|
228
|
+
| `SearchResult` | Search hits, total count, query |
|
|
229
|
+
| `SearchHit` | Individual search result with score |
|
|
230
|
+
| `AsyncJobRef` | Job ID and initial status for async operations |
|
|
231
|
+
| `JobResult` | Full job status, progress, result, and error |
|
|
232
|
+
| `WebhookFull` | Webhook ID, URL, events, status, token |
|
|
233
|
+
|
|
234
|
+
## Requirements
|
|
235
|
+
|
|
236
|
+
- Python >= 3.8
|
|
237
|
+
- `requests >= 2.31.0`
|
|
238
|
+
- `pydantic >= 2.0.0`
|
|
239
|
+
|
|
240
|
+
## License
|
|
241
|
+
|
|
242
|
+
MIT
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KnowledgeSDK Python SDK
|
|
3
|
+
Official Python SDK for the KnowledgeSDK API
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .client import KnowledgeSDK
|
|
7
|
+
from .errors import (
|
|
8
|
+
KnowledgeSDKError,
|
|
9
|
+
APIError,
|
|
10
|
+
AuthenticationError,
|
|
11
|
+
NetworkError,
|
|
12
|
+
RateLimitError,
|
|
13
|
+
TimeoutError,
|
|
14
|
+
)
|
|
15
|
+
from .interfaces.types import (
|
|
16
|
+
# Client Options
|
|
17
|
+
KnowledgeSDKClientOptions,
|
|
18
|
+
|
|
19
|
+
# Extraction Types
|
|
20
|
+
KnowledgeItem,
|
|
21
|
+
BusinessClassification,
|
|
22
|
+
ExtractResult,
|
|
23
|
+
|
|
24
|
+
# Scrape Types
|
|
25
|
+
ScrapeResult,
|
|
26
|
+
|
|
27
|
+
# Screenshot Types
|
|
28
|
+
ScreenshotResult,
|
|
29
|
+
|
|
30
|
+
# Sitemap Types
|
|
31
|
+
SitemapResult,
|
|
32
|
+
|
|
33
|
+
# Search Types
|
|
34
|
+
SearchHit,
|
|
35
|
+
SearchResult,
|
|
36
|
+
|
|
37
|
+
# Job Types
|
|
38
|
+
AsyncJobRef,
|
|
39
|
+
JobResult,
|
|
40
|
+
|
|
41
|
+
# Webhook Types
|
|
42
|
+
WebhookFull,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
__version__ = "0.1.0"
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
# Main Client
|
|
49
|
+
"KnowledgeSDK",
|
|
50
|
+
|
|
51
|
+
# Errors
|
|
52
|
+
"KnowledgeSDKError",
|
|
53
|
+
"APIError",
|
|
54
|
+
"AuthenticationError",
|
|
55
|
+
"NetworkError",
|
|
56
|
+
"RateLimitError",
|
|
57
|
+
"TimeoutError",
|
|
58
|
+
|
|
59
|
+
# Client Options
|
|
60
|
+
"KnowledgeSDKClientOptions",
|
|
61
|
+
|
|
62
|
+
# Extraction Types
|
|
63
|
+
"KnowledgeItem",
|
|
64
|
+
"BusinessClassification",
|
|
65
|
+
"ExtractResult",
|
|
66
|
+
|
|
67
|
+
# Scrape Types
|
|
68
|
+
"ScrapeResult",
|
|
69
|
+
|
|
70
|
+
# Screenshot Types
|
|
71
|
+
"ScreenshotResult",
|
|
72
|
+
|
|
73
|
+
# Sitemap Types
|
|
74
|
+
"SitemapResult",
|
|
75
|
+
|
|
76
|
+
# Search Types
|
|
77
|
+
"SearchHit",
|
|
78
|
+
"SearchResult",
|
|
79
|
+
|
|
80
|
+
# Job Types
|
|
81
|
+
"AsyncJobRef",
|
|
82
|
+
"JobResult",
|
|
83
|
+
|
|
84
|
+
# Webhook Types
|
|
85
|
+
"WebhookFull",
|
|
86
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KnowledgeSDK Classify API
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from ..interfaces.types import BusinessClassification
|
|
6
|
+
from ..utils.http_client import HttpClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Classify:
|
|
10
|
+
"""
|
|
11
|
+
Classify API resource
|
|
12
|
+
|
|
13
|
+
Classifies a business from its website URL.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, http_client: HttpClient):
|
|
17
|
+
self._http_client = http_client
|
|
18
|
+
|
|
19
|
+
def run(self, url: str) -> BusinessClassification:
|
|
20
|
+
"""
|
|
21
|
+
Classify the business at the given URL.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
url: The website URL to classify
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
BusinessClassification with business name, type, industry, audience, etc.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
APIError: If the API returns an error response
|
|
31
|
+
NetworkError: If there's a network error
|
|
32
|
+
TimeoutError: If the request times out
|
|
33
|
+
AuthenticationError: If there's an authentication error
|
|
34
|
+
"""
|
|
35
|
+
data = {"url": url}
|
|
36
|
+
response = self._http_client.request(
|
|
37
|
+
method="POST",
|
|
38
|
+
path="/classify",
|
|
39
|
+
data=data,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Support both camelCase and snake_case response keys
|
|
43
|
+
return BusinessClassification(
|
|
44
|
+
business_name=response.get("businessName") or response.get("business_name"),
|
|
45
|
+
business_type=response.get("businessType") or response.get("business_type"),
|
|
46
|
+
industry_sector=response.get("industrySector") or response.get("industry_sector"),
|
|
47
|
+
target_audience=response.get("targetAudience") or response.get("target_audience"),
|
|
48
|
+
description=response.get("description"),
|
|
49
|
+
value_proposition=response.get("valueProposition") or response.get("value_proposition"),
|
|
50
|
+
pain_points=response.get("painPoints") or response.get("pain_points"),
|
|
51
|
+
unique_selling_points=response.get("uniqueSellingPoints") or response.get("unique_selling_points"),
|
|
52
|
+
key_insights=response.get("keyInsights") or response.get("key_insights"),
|
|
53
|
+
confidence_score=response.get("confidenceScore") or response.get("confidence_score"),
|
|
54
|
+
)
|