knowledgesdk 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ name: Publish
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ version_bump:
7
+ description: Version bump type
8
+ required: true
9
+ default: patch
10
+ type: choice
11
+ options:
12
+ - patch
13
+ - minor
14
+ - major
15
+ push:
16
+ tags:
17
+ - "v*"
18
+
19
+ jobs:
20
+ publish:
21
+ runs-on: ubuntu-latest
22
+ permissions:
23
+ contents: write
24
+ id-token: write
25
+ environment:
26
+ name: pypi
27
+ url: https://pypi.org/p/knowledgesdk
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+
31
+ - uses: actions/setup-python@v5
32
+ with:
33
+ python-version: "3.11"
34
+
35
+ - name: Bump version
36
+ if: github.event_name == 'workflow_dispatch'
37
+ run: |
38
+ CURRENT=$(python -c "import re; print(re.search(r'version = \"(.+?)\"', open('pyproject.toml').read()).group(1))")
39
+ IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
40
+ case "${{ github.event.inputs.version_bump }}" in
41
+ major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
42
+ minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
43
+ patch) PATCH=$((PATCH + 1)) ;;
44
+ esac
45
+ NEW_VERSION="${MAJOR}.${MINOR}.${PATCH}"
46
+ sed -i "s/version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
47
+ git config user.name "github-actions[bot]"
48
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
49
+ git add pyproject.toml
50
+ git commit -m "chore: bump version to ${NEW_VERSION}"
51
+ git tag "v${NEW_VERSION}"
52
+ git push && git push --tags
53
+
54
+ - run: pip install build
55
+
56
+ - run: python -m build
57
+
58
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .env
7
+ .DS_Store
8
+ venv/
9
+ .venv/
@@ -0,0 +1,266 @@
1
+ Metadata-Version: 2.4
2
+ Name: knowledgesdk
3
+ Version: 0.2.0
4
+ Summary: KnowledgeSDK Python SDK — Extract, classify and search web knowledge
5
+ Project-URL: Homepage, https://knowledgesdk.com
6
+ Project-URL: Repository, https://github.com/knowledgesdk/knowledgesdk-python
7
+ Project-URL: Issues, https://github.com/knowledgesdk/knowledgesdk-python/issues
8
+ Author: KnowledgeSDK
9
+ License-Expression: MIT
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.8
14
+ Requires-Dist: pydantic>=2.0.0
15
+ Requires-Dist: requests>=2.31.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: black>=23.0.0; extra == 'dev'
18
+ Requires-Dist: flake8>=6.0.0; extra == 'dev'
19
+ Requires-Dist: isort>=5.0.0; extra == 'dev'
20
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
21
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
22
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # KnowledgeSDK Python SDK
26
+
27
+ Official Python client for the [KnowledgeSDK](https://knowledgesdk.com) API — extract, classify, scrape, screenshot, and search web knowledge programmatically.
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install knowledgesdk
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ```python
38
+ from knowledgesdk import KnowledgeSDK
39
+
40
+ ks = KnowledgeSDK("sk_ks_your_key_here")
41
+ ```
42
+
43
+ ## Usage
44
+
45
+ ### Extract
46
+
47
+ Run a full knowledge extraction on a website (synchronous):
48
+
49
+ ```python
50
+ result = ks.extract.run("https://stripe.com")
51
+
52
+ print(result.business.business_name)
53
+ print(result.business.industry_sector)
54
+ print(result.pages_scraped)
55
+
56
+ for item in result.knowledge_items:
57
+ print(item.title, item.content)
58
+ ```
59
+
60
+ Run an asynchronous extraction with a callback:
61
+
62
+ ```python
63
+ job = ks.extract.run_async(
64
+ "https://stripe.com",
65
+ max_pages=20,
66
+ callback_url="https://myapp.com/webhook"
67
+ )
68
+
69
+ print(job.job_id) # e.g. "job_abc123"
70
+ print(job.status) # e.g. "PENDING"
71
+ ```
72
+
73
+ ### Scrape
74
+
75
+ Scrape a single web page and get its Markdown content:
76
+
77
+ ```python
78
+ page = ks.scrape.run("https://docs.stripe.com/get-started")
79
+
80
+ print(page.title)
81
+ print(page.markdown)
82
+ print(page.links)
83
+ ```
84
+
85
+ ### Classify
86
+
87
+ Classify a business from its website:
88
+
89
+ ```python
90
+ biz = ks.classify.run("https://stripe.com")
91
+
92
+ print(biz.business_name)
93
+ print(biz.business_type)
94
+ print(biz.industry_sector)
95
+ print(biz.target_audience)
96
+ print(biz.confidence_score)
97
+ ```
98
+
99
+ ### Screenshot
100
+
101
+ Capture a screenshot of a web page:
102
+
103
+ ```python
104
+ shot = ks.screenshot.run("https://stripe.com")
105
+
106
+ # shot.screenshot is a base64-encoded PNG string
107
+ import base64
108
+ image_bytes = base64.b64decode(shot.screenshot)
109
+ with open("screenshot.png", "wb") as f:
110
+ f.write(image_bytes)
111
+ ```
112
+
113
+ ### Sitemap
114
+
115
+ Fetch the sitemap for a website:
116
+
117
+ ```python
118
+ site_map = ks.sitemap.run("https://stripe.com")
119
+
120
+ print(site_map.count)
121
+ for url in site_map.urls:
122
+ print(url)
123
+ ```
124
+
125
+ ### Search
126
+
127
+ Search the extracted knowledge base:
128
+
129
+ ```python
130
+ results = ks.search.run("pricing plans", limit=5)
131
+
132
+ print(f"Found {results.total} results")
133
+ for hit in results.hits:
134
+ print(hit.title, hit.score)
135
+ print(hit.content)
136
+ ```
137
+
138
+ ### Webhooks
139
+
140
+ ```python
141
+ # Create a webhook
142
+ wh = ks.webhooks.create(
143
+ url="https://myapp.com/hook",
144
+ events=["EXTRACTION_COMPLETED", "JOB_FAILED"],
145
+ display_name="My App Webhook"
146
+ )
147
+ print(wh.id) # e.g. "weh_xxx"
148
+ print(wh.token) # signing token
149
+
150
+ # List all webhooks
151
+ all_webhooks = ks.webhooks.list()
152
+ for w in all_webhooks:
153
+ print(w.id, w.url, w.status)
154
+
155
+ # Send a test event to a webhook
156
+ ks.webhooks.test("weh_xxx")
157
+
158
+ # Delete a webhook
159
+ ks.webhooks.delete("weh_xxx")
160
+ ```
161
+
162
+ ### Jobs
163
+
164
+ Retrieve a job by ID:
165
+
166
+ ```python
167
+ job = ks.jobs.get("job_xxx")
168
+ print(job.status) # PENDING | RUNNING | COMPLETED | FAILED
169
+ print(job.progress) # 0–100
170
+ print(job.result)
171
+ ```
172
+
173
+ Poll until a job completes (blocking):
174
+
175
+ ```python
176
+ completed = ks.jobs.poll("job_xxx", interval_sec=5, timeout_sec=300)
177
+ print(completed.result)
178
+ ```
179
+
180
+ ## Configuration
181
+
182
+ | Parameter | Default | Description |
183
+ |---|---|---|
184
+ | `api_key` | required | API key starting with `sk_ks_` |
185
+ | `base_url` | `https://api.knowledgesdk.com` | Override via `KNOWLEDGESDK_BASE_URL` env var |
186
+ | `timeout` | `30000` | Request timeout in milliseconds |
187
+ | `max_retries` | `5` | Max retries with exponential backoff |
188
+ | `debug` | `False` | Enable request/response logging |
189
+
190
+ ### Environment Variables
191
+
192
+ ```bash
193
+ export KNOWLEDGESDK_BASE_URL="https://api.knowledgesdk.com"
194
+ ```
195
+
196
+ ### Debug Mode
197
+
198
+ ```python
199
+ ks = KnowledgeSDK("sk_ks_your_key", debug=True)
200
+
201
+ # Or toggle at runtime
202
+ ks.set_debug_mode(True)
203
+ ```
204
+
205
+ ### Custom Headers
206
+
207
+ ```python
208
+ ks.set_header("X-Custom-Header", "value")
209
+ ks.set_headers({"X-Header-A": "a", "X-Header-B": "b"})
210
+ ```
211
+
212
+ ## Error Handling
213
+
214
+ ```python
215
+ from knowledgesdk import (
216
+ KnowledgeSDK,
217
+ AuthenticationError,
218
+ APIError,
219
+ RateLimitError,
220
+ NetworkError,
221
+ TimeoutError,
222
+ )
223
+
224
+ ks = KnowledgeSDK("sk_ks_your_key")
225
+
226
+ try:
227
+ result = ks.extract.run("https://stripe.com")
228
+ except AuthenticationError as e:
229
+ print(f"Auth error: {e.message}")
230
+ except RateLimitError as e:
231
+ print(f"Rate limited: {e.message}")
232
+ except APIError as e:
233
+ print(f"API error {e.status_code}: {e.message}")
234
+ except NetworkError as e:
235
+ print(f"Network error: {e.message}")
236
+ except TimeoutError as e:
237
+ print(f"Request timed out: {e.message}")
238
+ ```
239
+
240
+ ## Type Reference
241
+
242
+ All response objects are Pydantic models and are fully typed.
243
+
244
+ | Type | Description |
245
+ |---|---|
246
+ | `ExtractResult` | Full extraction with business and knowledge items |
247
+ | `BusinessClassification` | Business name, type, industry, audience, etc. |
248
+ | `KnowledgeItem` | A single knowledge article extracted from a page |
249
+ | `ScrapeResult` | Markdown content, title, description, links |
250
+ | `ScreenshotResult` | Base64 PNG screenshot |
251
+ | `SitemapResult` | List of URLs from the site's sitemap |
252
+ | `SearchResult` | Search hits, total count, query |
253
+ | `SearchHit` | Individual search result with score |
254
+ | `AsyncJobRef` | Job ID and initial status for async operations |
255
+ | `JobResult` | Full job status, progress, result, and error |
256
+ | `WebhookFull` | Webhook ID, URL, events, status, token |
257
+
258
+ ## Requirements
259
+
260
+ - Python >= 3.8
261
+ - `requests >= 2.31.0`
262
+ - `pydantic >= 2.0.0`
263
+
264
+ ## License
265
+
266
+ MIT
@@ -0,0 +1,242 @@
1
+ # KnowledgeSDK Python SDK
2
+
3
+ Official Python client for the [KnowledgeSDK](https://knowledgesdk.com) API — extract, classify, scrape, screenshot, and search web knowledge programmatically.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install knowledgesdk
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from knowledgesdk import KnowledgeSDK
15
+
16
+ ks = KnowledgeSDK("sk_ks_your_key_here")
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ### Extract
22
+
23
+ Run a full knowledge extraction on a website (synchronous):
24
+
25
+ ```python
26
+ result = ks.extract.run("https://stripe.com")
27
+
28
+ print(result.business.business_name)
29
+ print(result.business.industry_sector)
30
+ print(result.pages_scraped)
31
+
32
+ for item in result.knowledge_items:
33
+ print(item.title, item.content)
34
+ ```
35
+
36
+ Run an asynchronous extraction with a callback:
37
+
38
+ ```python
39
+ job = ks.extract.run_async(
40
+ "https://stripe.com",
41
+ max_pages=20,
42
+ callback_url="https://myapp.com/webhook"
43
+ )
44
+
45
+ print(job.job_id) # e.g. "job_abc123"
46
+ print(job.status) # e.g. "PENDING"
47
+ ```
48
+
49
+ ### Scrape
50
+
51
+ Scrape a single web page and get its Markdown content:
52
+
53
+ ```python
54
+ page = ks.scrape.run("https://docs.stripe.com/get-started")
55
+
56
+ print(page.title)
57
+ print(page.markdown)
58
+ print(page.links)
59
+ ```
60
+
61
+ ### Classify
62
+
63
+ Classify a business from its website:
64
+
65
+ ```python
66
+ biz = ks.classify.run("https://stripe.com")
67
+
68
+ print(biz.business_name)
69
+ print(biz.business_type)
70
+ print(biz.industry_sector)
71
+ print(biz.target_audience)
72
+ print(biz.confidence_score)
73
+ ```
74
+
75
+ ### Screenshot
76
+
77
+ Capture a screenshot of a web page:
78
+
79
+ ```python
80
+ shot = ks.screenshot.run("https://stripe.com")
81
+
82
+ # shot.screenshot is a base64-encoded PNG string
83
+ import base64
84
+ image_bytes = base64.b64decode(shot.screenshot)
85
+ with open("screenshot.png", "wb") as f:
86
+ f.write(image_bytes)
87
+ ```
88
+
89
+ ### Sitemap
90
+
91
+ Fetch the sitemap for a website:
92
+
93
+ ```python
94
+ site_map = ks.sitemap.run("https://stripe.com")
95
+
96
+ print(site_map.count)
97
+ for url in site_map.urls:
98
+ print(url)
99
+ ```
100
+
101
+ ### Search
102
+
103
+ Search the extracted knowledge base:
104
+
105
+ ```python
106
+ results = ks.search.run("pricing plans", limit=5)
107
+
108
+ print(f"Found {results.total} results")
109
+ for hit in results.hits:
110
+ print(hit.title, hit.score)
111
+ print(hit.content)
112
+ ```
113
+
114
+ ### Webhooks
115
+
116
+ ```python
117
+ # Create a webhook
118
+ wh = ks.webhooks.create(
119
+ url="https://myapp.com/hook",
120
+ events=["EXTRACTION_COMPLETED", "JOB_FAILED"],
121
+ display_name="My App Webhook"
122
+ )
123
+ print(wh.id) # e.g. "weh_xxx"
124
+ print(wh.token) # signing token
125
+
126
+ # List all webhooks
127
+ all_webhooks = ks.webhooks.list()
128
+ for w in all_webhooks:
129
+ print(w.id, w.url, w.status)
130
+
131
+ # Send a test event to a webhook
132
+ ks.webhooks.test("weh_xxx")
133
+
134
+ # Delete a webhook
135
+ ks.webhooks.delete("weh_xxx")
136
+ ```
137
+
138
+ ### Jobs
139
+
140
+ Retrieve a job by ID:
141
+
142
+ ```python
143
+ job = ks.jobs.get("job_xxx")
144
+ print(job.status) # PENDING | RUNNING | COMPLETED | FAILED
145
+ print(job.progress) # 0–100
146
+ print(job.result)
147
+ ```
148
+
149
+ Poll until a job completes (blocking):
150
+
151
+ ```python
152
+ completed = ks.jobs.poll("job_xxx", interval_sec=5, timeout_sec=300)
153
+ print(completed.result)
154
+ ```
155
+
156
+ ## Configuration
157
+
158
+ | Parameter | Default | Description |
159
+ |---|---|---|
160
+ | `api_key` | required | API key starting with `sk_ks_` |
161
+ | `base_url` | `https://api.knowledgesdk.com` | Override via `KNOWLEDGESDK_BASE_URL` env var |
162
+ | `timeout` | `30000` | Request timeout in milliseconds |
163
+ | `max_retries` | `5` | Max retries with exponential backoff |
164
+ | `debug` | `False` | Enable request/response logging |
165
+
166
+ ### Environment Variables
167
+
168
+ ```bash
169
+ export KNOWLEDGESDK_BASE_URL="https://api.knowledgesdk.com"
170
+ ```
171
+
172
+ ### Debug Mode
173
+
174
+ ```python
175
+ ks = KnowledgeSDK("sk_ks_your_key", debug=True)
176
+
177
+ # Or toggle at runtime
178
+ ks.set_debug_mode(True)
179
+ ```
180
+
181
+ ### Custom Headers
182
+
183
+ ```python
184
+ ks.set_header("X-Custom-Header", "value")
185
+ ks.set_headers({"X-Header-A": "a", "X-Header-B": "b"})
186
+ ```
187
+
188
+ ## Error Handling
189
+
190
+ ```python
191
+ from knowledgesdk import (
192
+ KnowledgeSDK,
193
+ AuthenticationError,
194
+ APIError,
195
+ RateLimitError,
196
+ NetworkError,
197
+ TimeoutError,
198
+ )
199
+
200
+ ks = KnowledgeSDK("sk_ks_your_key")
201
+
202
+ try:
203
+ result = ks.extract.run("https://stripe.com")
204
+ except AuthenticationError as e:
205
+ print(f"Auth error: {e.message}")
206
+ except RateLimitError as e:
207
+ print(f"Rate limited: {e.message}")
208
+ except APIError as e:
209
+ print(f"API error {e.status_code}: {e.message}")
210
+ except NetworkError as e:
211
+ print(f"Network error: {e.message}")
212
+ except TimeoutError as e:
213
+ print(f"Request timed out: {e.message}")
214
+ ```
215
+
216
+ ## Type Reference
217
+
218
+ All response objects are Pydantic models and are fully typed.
219
+
220
+ | Type | Description |
221
+ |---|---|
222
+ | `ExtractResult` | Full extraction with business and knowledge items |
223
+ | `BusinessClassification` | Business name, type, industry, audience, etc. |
224
+ | `KnowledgeItem` | A single knowledge article extracted from a page |
225
+ | `ScrapeResult` | Markdown content, title, description, links |
226
+ | `ScreenshotResult` | Base64 PNG screenshot |
227
+ | `SitemapResult` | List of URLs from the site's sitemap |
228
+ | `SearchResult` | Search hits, total count, query |
229
+ | `SearchHit` | Individual search result with score |
230
+ | `AsyncJobRef` | Job ID and initial status for async operations |
231
+ | `JobResult` | Full job status, progress, result, and error |
232
+ | `WebhookFull` | Webhook ID, URL, events, status, token |
233
+
234
+ ## Requirements
235
+
236
+ - Python >= 3.8
237
+ - `requests >= 2.31.0`
238
+ - `pydantic >= 2.0.0`
239
+
240
+ ## License
241
+
242
+ MIT
@@ -0,0 +1,86 @@
1
+ """
2
+ KnowledgeSDK Python SDK
3
+ Official Python SDK for the KnowledgeSDK API
4
+ """
5
+
6
+ from .client import KnowledgeSDK
7
+ from .errors import (
8
+ KnowledgeSDKError,
9
+ APIError,
10
+ AuthenticationError,
11
+ NetworkError,
12
+ RateLimitError,
13
+ TimeoutError,
14
+ )
15
+ from .interfaces.types import (
16
+ # Client Options
17
+ KnowledgeSDKClientOptions,
18
+
19
+ # Extraction Types
20
+ KnowledgeItem,
21
+ BusinessClassification,
22
+ ExtractResult,
23
+
24
+ # Scrape Types
25
+ ScrapeResult,
26
+
27
+ # Screenshot Types
28
+ ScreenshotResult,
29
+
30
+ # Sitemap Types
31
+ SitemapResult,
32
+
33
+ # Search Types
34
+ SearchHit,
35
+ SearchResult,
36
+
37
+ # Job Types
38
+ AsyncJobRef,
39
+ JobResult,
40
+
41
+ # Webhook Types
42
+ WebhookFull,
43
+ )
44
+
45
+ __version__ = "0.1.0"
46
+
47
+ __all__ = [
48
+ # Main Client
49
+ "KnowledgeSDK",
50
+
51
+ # Errors
52
+ "KnowledgeSDKError",
53
+ "APIError",
54
+ "AuthenticationError",
55
+ "NetworkError",
56
+ "RateLimitError",
57
+ "TimeoutError",
58
+
59
+ # Client Options
60
+ "KnowledgeSDKClientOptions",
61
+
62
+ # Extraction Types
63
+ "KnowledgeItem",
64
+ "BusinessClassification",
65
+ "ExtractResult",
66
+
67
+ # Scrape Types
68
+ "ScrapeResult",
69
+
70
+ # Screenshot Types
71
+ "ScreenshotResult",
72
+
73
+ # Sitemap Types
74
+ "SitemapResult",
75
+
76
+ # Search Types
77
+ "SearchHit",
78
+ "SearchResult",
79
+
80
+ # Job Types
81
+ "AsyncJobRef",
82
+ "JobResult",
83
+
84
+ # Webhook Types
85
+ "WebhookFull",
86
+ ]
File without changes
@@ -0,0 +1,54 @@
1
+ """
2
+ KnowledgeSDK Classify API
3
+ """
4
+
5
+ from ..interfaces.types import BusinessClassification
6
+ from ..utils.http_client import HttpClient
7
+
8
+
9
+ class Classify:
10
+ """
11
+ Classify API resource
12
+
13
+ Classifies a business from its website URL.
14
+ """
15
+
16
+ def __init__(self, http_client: HttpClient):
17
+ self._http_client = http_client
18
+
19
+ def run(self, url: str) -> BusinessClassification:
20
+ """
21
+ Classify the business at the given URL.
22
+
23
+ Args:
24
+ url: The website URL to classify
25
+
26
+ Returns:
27
+ BusinessClassification with business name, type, industry, audience, etc.
28
+
29
+ Raises:
30
+ APIError: If the API returns an error response
31
+ NetworkError: If there's a network error
32
+ TimeoutError: If the request times out
33
+ AuthenticationError: If there's an authentication error
34
+ """
35
+ data = {"url": url}
36
+ response = self._http_client.request(
37
+ method="POST",
38
+ path="/classify",
39
+ data=data,
40
+ )
41
+
42
+ # Support both camelCase and snake_case response keys
43
+ return BusinessClassification(
44
+ business_name=response.get("businessName") or response.get("business_name"),
45
+ business_type=response.get("businessType") or response.get("business_type"),
46
+ industry_sector=response.get("industrySector") or response.get("industry_sector"),
47
+ target_audience=response.get("targetAudience") or response.get("target_audience"),
48
+ description=response.get("description"),
49
+ value_proposition=response.get("valueProposition") or response.get("value_proposition"),
50
+ pain_points=response.get("painPoints") or response.get("pain_points"),
51
+ unique_selling_points=response.get("uniqueSellingPoints") or response.get("unique_selling_points"),
52
+ key_insights=response.get("keyInsights") or response.get("key_insights"),
53
+ confidence_score=response.get("confidenceScore") or response.get("confidence_score"),
54
+ )