geonode-scraper-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geonode_scraper_sdk-0.1.0/CHANGELOG.md +9 -0
- geonode_scraper_sdk-0.1.0/LICENSE +21 -0
- geonode_scraper_sdk-0.1.0/MANIFEST.in +22 -0
- geonode_scraper_sdk-0.1.0/PKG-INFO +192 -0
- geonode_scraper_sdk-0.1.0/README.md +165 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/__init__.py +101 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api/__init__.py +7 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api/extraction_api.py +1018 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api/statistics_api.py +343 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api/system_api.py +280 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api_client.py +808 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/api_response.py +21 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/configuration.py +626 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/exceptions.py +218 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/__init__.py +41 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/error_response.py +99 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extract_async_response.py +95 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extract_request.py +112 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extract_sync_response.py +100 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extraction_data.py +100 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extraction_error.py +100 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extraction_error_code.py +47 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extraction_error_response.py +94 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/extraction_metadata.py +142 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/health_response.py +96 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/health_status.py +38 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/http_validation_error.py +96 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/job_collection_response.py +102 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/job_list_item_response.py +108 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/job_status.py +40 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/job_status_response.py +141 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/location_inner.py +138 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/output_format.py +37 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/processing_mode.py +37 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/proxy_settings.py +107 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/proxy_type.py +38 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/request_count_response.py +93 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/statistics_response.py +116 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/tokens_usage_response.py +91 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/models/validation_error.py +100 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/py.typed +0 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk/rest.py +263 -0
- geonode_scraper_sdk-0.1.0/geonode_scraper_sdk.egg-info/SOURCES.txt +43 -0
- geonode_scraper_sdk-0.1.0/pyproject.toml +120 -0
- geonode_scraper_sdk-0.1.0/setup.cfg +4 -0
- geonode_scraper_sdk-0.1.0/setup.py +8 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Geonode Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
include LICENSE
|
|
2
|
+
include CHANGELOG.md
|
|
3
|
+
include README.md
|
|
4
|
+
include pyproject.toml
|
|
5
|
+
include setup.py
|
|
6
|
+
|
|
7
|
+
graft geonode_scraper_sdk
|
|
8
|
+
|
|
9
|
+
prune .github
|
|
10
|
+
prune .openapi-generator
|
|
11
|
+
prune docs
|
|
12
|
+
prune geonode_scraper_sdk.egg-info
|
|
13
|
+
prune scripts
|
|
14
|
+
prune test
|
|
15
|
+
prune tests
|
|
16
|
+
|
|
17
|
+
exclude .gitignore
|
|
18
|
+
exclude .gitlab-ci.yml
|
|
19
|
+
exclude .openapi-generator-ignore
|
|
20
|
+
|
|
21
|
+
global-exclude __pycache__
|
|
22
|
+
global-exclude *.py[cod]
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: geonode-scraper-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for the Geonode Scraper API
|
|
5
|
+
Author: Geonode Team
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: OpenAPI,OpenAPI-Generator,Scraper API
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: Typing :: Typed
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: urllib3<3.0.0,>=2.1.0
|
|
17
|
+
Requires-Dist: python-dateutil>=2.8.2
|
|
18
|
+
Requires-Dist: pydantic>=2.11
|
|
19
|
+
Requires-Dist: typing-extensions>=4.7.1
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=7.2.1; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=2.8.1; extra == "dev"
|
|
23
|
+
Requires-Dist: mypy>=1.5; extra == "dev"
|
|
24
|
+
Requires-Dist: types-python-dateutil>=2.8.19.14; extra == "dev"
|
|
25
|
+
Requires-Dist: ruff>=0.12.11; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# Geonode Scraper SDK
|
|
29
|
+
|
|
30
|
+
Python SDK for the Geonode Scraper API. It supports synchronous and asynchronous
|
|
31
|
+
content extraction, job polling, usage statistics, and service health checks.
|
|
32
|
+
|
|
33
|
+
## Requirements
|
|
34
|
+
|
|
35
|
+
- Python 3.10+
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```sh
|
|
40
|
+
pip install geonode-scraper-sdk
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Configuration And Authentication
|
|
44
|
+
|
|
45
|
+
Create a client configuration with your API base URL and API key.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from geonode_scraper_sdk import Configuration
|
|
49
|
+
|
|
50
|
+
configuration = Configuration(
|
|
51
|
+
host="https://api.example.com",
|
|
52
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
If you do not set `host`, the generated client defaults to `http://localhost`.
|
|
57
|
+
You normally do not need `api_key_prefix` for this API.
|
|
58
|
+
|
|
59
|
+
## Quick Start
|
|
60
|
+
|
|
61
|
+
This example performs a synchronous extraction and prints the markdown result.
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from geonode_scraper_sdk import (
|
|
65
|
+
ApiClient,
|
|
66
|
+
ApiException,
|
|
67
|
+
Configuration,
|
|
68
|
+
ExtractRequest,
|
|
69
|
+
ExtractionApi,
|
|
70
|
+
OutputFormat,
|
|
71
|
+
ProcessingMode,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
configuration = Configuration(
|
|
75
|
+
host="https://api.example.com",
|
|
76
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
with ApiClient(configuration) as api_client:
|
|
80
|
+
api = ExtractionApi(api_client)
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
response = api.extract_v1_extract_post(
|
|
84
|
+
ExtractRequest(
|
|
85
|
+
url="https://example.com",
|
|
86
|
+
formats=[OutputFormat.MARKDOWN],
|
|
87
|
+
processing_mode=ProcessingMode.SYNC,
|
|
88
|
+
)
|
|
89
|
+
)
|
|
90
|
+
print(response.data.markdown)
|
|
91
|
+
print(response.tokens_charged)
|
|
92
|
+
except ApiException as exc:
|
|
93
|
+
print(exc.status)
|
|
94
|
+
print(exc.body)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Async Workflow
|
|
98
|
+
|
|
99
|
+
When `processing_mode=ProcessingMode.ASYNC`, the extract call returns an async
|
|
100
|
+
job response with a job ID and status URL.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from geonode_scraper_sdk import ApiClient, Configuration, ExtractRequest, ExtractionApi, ProcessingMode
|
|
104
|
+
|
|
105
|
+
configuration = Configuration(
|
|
106
|
+
host="https://api.example.com",
|
|
107
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
with ApiClient(configuration) as api_client:
|
|
111
|
+
api = ExtractionApi(api_client)
|
|
112
|
+
|
|
113
|
+
submit = api.extract_v1_extract_post(
|
|
114
|
+
ExtractRequest(
|
|
115
|
+
url="https://example.com",
|
|
116
|
+
processing_mode=ProcessingMode.ASYNC,
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
job = api.get_job_result_v1_extract_job_id_get(submit.job_id)
|
|
121
|
+
print(job.status)
|
|
122
|
+
if job.data and job.data.markdown:
|
|
123
|
+
print(job.data.markdown)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Use `get_job_result_v1_extract_job_id_get(job_id)` to poll a single job, or
|
|
127
|
+
`list_jobs_v1_extract_jobs_get(...)` to inspect and filter job history.
|
|
128
|
+
|
|
129
|
+
## Error Handling
|
|
130
|
+
|
|
131
|
+
Non-2xx responses raise `ApiException` or one of its subclasses.
|
|
132
|
+
The exception includes the HTTP status, response body, and any deserialized
|
|
133
|
+
error model in `exc.data`.
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from geonode_scraper_sdk import ApiClient, ApiException, Configuration, ExtractionApi, ExtractRequest
|
|
137
|
+
|
|
138
|
+
configuration = Configuration(
|
|
139
|
+
host="https://api.example.com",
|
|
140
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
with ApiClient(configuration) as api_client:
|
|
144
|
+
api = ExtractionApi(api_client)
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
api.extract_v1_extract_post(ExtractRequest(url="https://example.com"))
|
|
148
|
+
except ApiException as exc:
|
|
149
|
+
print(exc.status)
|
|
150
|
+
print(exc.body)
|
|
151
|
+
print(exc.data)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Request Options
|
|
155
|
+
|
|
156
|
+
`ExtractRequest` supports the main extraction controls:
|
|
157
|
+
|
|
158
|
+
- `formats`: output formats to return; defaults to `[OutputFormat.HTML]`
|
|
159
|
+
- `render_js`: use a headless browser for JavaScript-rendered pages; defaults to `False`
|
|
160
|
+
- `processing_mode`: `ProcessingMode.SYNC` or `ProcessingMode.ASYNC`; defaults to sync
|
|
161
|
+
- `proxy`: optional `ProxySettings` for country and proxy type selection
|
|
162
|
+
- `headers`: optional request headers dictionary
|
|
163
|
+
|
|
164
|
+
Example with additional options:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from geonode_scraper_sdk import ExtractRequest, OutputFormat, ProcessingMode, ProxySettings, ProxyType
|
|
168
|
+
|
|
169
|
+
request = ExtractRequest(
|
|
170
|
+
url="https://example.com",
|
|
171
|
+
formats=[OutputFormat.HTML, OutputFormat.MARKDOWN],
|
|
172
|
+
render_js=True,
|
|
173
|
+
processing_mode=ProcessingMode.SYNC,
|
|
174
|
+
proxy=ProxySettings(country="US", type=ProxyType.RESIDENTIAL),
|
|
175
|
+
headers={"User-Agent": "geonode-scraper-sdk-demo"},
|
|
176
|
+
)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## API Reference
|
|
180
|
+
|
|
181
|
+
- `ExtractionApi.extract_v1_extract_post(extract_request)`
|
|
182
|
+
- `ExtractionApi.get_job_result_v1_extract_job_id_get(job_id)`
|
|
183
|
+
- `ExtractionApi.list_jobs_v1_extract_jobs_get(job_id=None, url=None, status=None, output=None, start_date=None, end_date=None, page=None, page_size=None)`
|
|
184
|
+
- `StatisticsApi.get_statistics_v1_statistics_get(start_date=None, end_date=None)`
|
|
185
|
+
- `SystemApi.health_check_health_get()`
|
|
186
|
+
|
|
187
|
+
## Advanced Usage
|
|
188
|
+
|
|
189
|
+
Each generated API method also exposes:
|
|
190
|
+
|
|
191
|
+
- `*_with_http_info()` to get the deserialized payload together with status and headers
|
|
192
|
+
- `*_without_preload_content()` to work with the raw HTTP response directly
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Geonode Scraper SDK
|
|
2
|
+
|
|
3
|
+
Python SDK for the Geonode Scraper API. It supports synchronous and asynchronous
|
|
4
|
+
content extraction, job polling, usage statistics, and service health checks.
|
|
5
|
+
|
|
6
|
+
## Requirements
|
|
7
|
+
|
|
8
|
+
- Python 3.10+
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```sh
|
|
13
|
+
pip install geonode-scraper-sdk
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Configuration And Authentication
|
|
17
|
+
|
|
18
|
+
Create a client configuration with your API base URL and API key.
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from geonode_scraper_sdk import Configuration
|
|
22
|
+
|
|
23
|
+
configuration = Configuration(
|
|
24
|
+
host="https://api.example.com",
|
|
25
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
If you do not set `host`, the generated client defaults to `http://localhost`.
|
|
30
|
+
You normally do not need `api_key_prefix` for this API.
|
|
31
|
+
|
|
32
|
+
## Quick Start
|
|
33
|
+
|
|
34
|
+
This example performs a synchronous extraction and prints the markdown result.
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from geonode_scraper_sdk import (
|
|
38
|
+
ApiClient,
|
|
39
|
+
ApiException,
|
|
40
|
+
Configuration,
|
|
41
|
+
ExtractRequest,
|
|
42
|
+
ExtractionApi,
|
|
43
|
+
OutputFormat,
|
|
44
|
+
ProcessingMode,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
configuration = Configuration(
|
|
48
|
+
host="https://api.example.com",
|
|
49
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
with ApiClient(configuration) as api_client:
|
|
53
|
+
api = ExtractionApi(api_client)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
response = api.extract_v1_extract_post(
|
|
57
|
+
ExtractRequest(
|
|
58
|
+
url="https://example.com",
|
|
59
|
+
formats=[OutputFormat.MARKDOWN],
|
|
60
|
+
processing_mode=ProcessingMode.SYNC,
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
print(response.data.markdown)
|
|
64
|
+
print(response.tokens_charged)
|
|
65
|
+
except ApiException as exc:
|
|
66
|
+
print(exc.status)
|
|
67
|
+
print(exc.body)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Async Workflow
|
|
71
|
+
|
|
72
|
+
When `processing_mode=ProcessingMode.ASYNC`, the extract call returns an async
|
|
73
|
+
job response with a job ID and status URL.
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from geonode_scraper_sdk import ApiClient, Configuration, ExtractRequest, ExtractionApi, ProcessingMode
|
|
77
|
+
|
|
78
|
+
configuration = Configuration(
|
|
79
|
+
host="https://api.example.com",
|
|
80
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
with ApiClient(configuration) as api_client:
|
|
84
|
+
api = ExtractionApi(api_client)
|
|
85
|
+
|
|
86
|
+
submit = api.extract_v1_extract_post(
|
|
87
|
+
ExtractRequest(
|
|
88
|
+
url="https://example.com",
|
|
89
|
+
processing_mode=ProcessingMode.ASYNC,
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
job = api.get_job_result_v1_extract_job_id_get(submit.job_id)
|
|
94
|
+
print(job.status)
|
|
95
|
+
if job.data and job.data.markdown:
|
|
96
|
+
print(job.data.markdown)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Use `get_job_result_v1_extract_job_id_get(job_id)` to poll a single job, or
|
|
100
|
+
`list_jobs_v1_extract_jobs_get(...)` to inspect and filter job history.
|
|
101
|
+
|
|
102
|
+
## Error Handling
|
|
103
|
+
|
|
104
|
+
Non-2xx responses raise `ApiException` or one of its subclasses.
|
|
105
|
+
The exception includes the HTTP status, response body, and any deserialized
|
|
106
|
+
error model in `exc.data`.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from geonode_scraper_sdk import ApiClient, ApiException, Configuration, ExtractionApi, ExtractRequest
|
|
110
|
+
|
|
111
|
+
configuration = Configuration(
|
|
112
|
+
host="https://api.example.com",
|
|
113
|
+
api_key={"APIKeyHeader": "your-api-key"},
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
with ApiClient(configuration) as api_client:
|
|
117
|
+
api = ExtractionApi(api_client)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
api.extract_v1_extract_post(ExtractRequest(url="https://example.com"))
|
|
121
|
+
except ApiException as exc:
|
|
122
|
+
print(exc.status)
|
|
123
|
+
print(exc.body)
|
|
124
|
+
print(exc.data)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Request Options
|
|
128
|
+
|
|
129
|
+
`ExtractRequest` supports the main extraction controls:
|
|
130
|
+
|
|
131
|
+
- `formats`: output formats to return; defaults to `[OutputFormat.HTML]`
|
|
132
|
+
- `render_js`: use a headless browser for JavaScript-rendered pages; defaults to `False`
|
|
133
|
+
- `processing_mode`: `ProcessingMode.SYNC` or `ProcessingMode.ASYNC`; defaults to sync
|
|
134
|
+
- `proxy`: optional `ProxySettings` for country and proxy type selection
|
|
135
|
+
- `headers`: optional request headers dictionary
|
|
136
|
+
|
|
137
|
+
Example with additional options:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from geonode_scraper_sdk import ExtractRequest, OutputFormat, ProcessingMode, ProxySettings, ProxyType
|
|
141
|
+
|
|
142
|
+
request = ExtractRequest(
|
|
143
|
+
url="https://example.com",
|
|
144
|
+
formats=[OutputFormat.HTML, OutputFormat.MARKDOWN],
|
|
145
|
+
render_js=True,
|
|
146
|
+
processing_mode=ProcessingMode.SYNC,
|
|
147
|
+
proxy=ProxySettings(country="US", type=ProxyType.RESIDENTIAL),
|
|
148
|
+
headers={"User-Agent": "geonode-scraper-sdk-demo"},
|
|
149
|
+
)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## API Reference
|
|
153
|
+
|
|
154
|
+
- `ExtractionApi.extract_v1_extract_post(extract_request)`
|
|
155
|
+
- `ExtractionApi.get_job_result_v1_extract_job_id_get(job_id)`
|
|
156
|
+
- `ExtractionApi.list_jobs_v1_extract_jobs_get(job_id=None, url=None, status=None, output=None, start_date=None, end_date=None, page=None, page_size=None)`
|
|
157
|
+
- `StatisticsApi.get_statistics_v1_statistics_get(start_date=None, end_date=None)`
|
|
158
|
+
- `SystemApi.health_check_health_get()`
|
|
159
|
+
|
|
160
|
+
## Advanced Usage
|
|
161
|
+
|
|
162
|
+
Each generated API method also exposes:
|
|
163
|
+
|
|
164
|
+
- `*_with_http_info()` to get the deserialized payload together with status and headers
|
|
165
|
+
- `*_without_preload_content()` to work with the raw HTTP response directly
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
# flake8: noqa
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Scraper API
|
|
7
|
+
|
|
8
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
9
|
+
|
|
10
|
+
The version of the OpenAPI document: 0.1.0
|
|
11
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
12
|
+
|
|
13
|
+
Do not edit the class manually.
|
|
14
|
+
""" # noqa: E501
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
|
|
19
|
+
# Define package exports
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ExtractionApi",
|
|
22
|
+
"StatisticsApi",
|
|
23
|
+
"SystemApi",
|
|
24
|
+
"ApiResponse",
|
|
25
|
+
"ApiClient",
|
|
26
|
+
"Configuration",
|
|
27
|
+
"OpenApiException",
|
|
28
|
+
"ApiTypeError",
|
|
29
|
+
"ApiValueError",
|
|
30
|
+
"ApiKeyError",
|
|
31
|
+
"ApiAttributeError",
|
|
32
|
+
"ApiException",
|
|
33
|
+
"ErrorResponse",
|
|
34
|
+
"ExtractAsyncResponse",
|
|
35
|
+
"ExtractRequest",
|
|
36
|
+
"ExtractSyncResponse",
|
|
37
|
+
"ExtractionData",
|
|
38
|
+
"ExtractionError",
|
|
39
|
+
"ExtractionErrorCode",
|
|
40
|
+
"ExtractionErrorResponse",
|
|
41
|
+
"ExtractionMetadata",
|
|
42
|
+
"HTTPValidationError",
|
|
43
|
+
"HealthResponse",
|
|
44
|
+
"HealthStatus",
|
|
45
|
+
"JobCollectionResponse",
|
|
46
|
+
"JobListItemResponse",
|
|
47
|
+
"JobStatus",
|
|
48
|
+
"JobStatusResponse",
|
|
49
|
+
"LocationInner",
|
|
50
|
+
"OutputFormat",
|
|
51
|
+
"ProcessingMode",
|
|
52
|
+
"ProxySettings",
|
|
53
|
+
"ProxyType",
|
|
54
|
+
"RequestCountResponse",
|
|
55
|
+
"StatisticsResponse",
|
|
56
|
+
"TokensUsageResponse",
|
|
57
|
+
"ValidationError",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# import apis into sdk package
|
|
61
|
+
from geonode_scraper_sdk.api.extraction_api import ExtractionApi as ExtractionApi
|
|
62
|
+
from geonode_scraper_sdk.api.statistics_api import StatisticsApi as StatisticsApi
|
|
63
|
+
from geonode_scraper_sdk.api.system_api import SystemApi as SystemApi
|
|
64
|
+
|
|
65
|
+
# import ApiClient
|
|
66
|
+
from geonode_scraper_sdk.api_response import ApiResponse as ApiResponse
|
|
67
|
+
from geonode_scraper_sdk.api_client import ApiClient as ApiClient
|
|
68
|
+
from geonode_scraper_sdk.configuration import Configuration as Configuration
|
|
69
|
+
from geonode_scraper_sdk.exceptions import OpenApiException as OpenApiException
|
|
70
|
+
from geonode_scraper_sdk.exceptions import ApiTypeError as ApiTypeError
|
|
71
|
+
from geonode_scraper_sdk.exceptions import ApiValueError as ApiValueError
|
|
72
|
+
from geonode_scraper_sdk.exceptions import ApiKeyError as ApiKeyError
|
|
73
|
+
from geonode_scraper_sdk.exceptions import ApiAttributeError as ApiAttributeError
|
|
74
|
+
from geonode_scraper_sdk.exceptions import ApiException as ApiException
|
|
75
|
+
|
|
76
|
+
# import models into sdk package
|
|
77
|
+
from geonode_scraper_sdk.models.error_response import ErrorResponse as ErrorResponse
|
|
78
|
+
from geonode_scraper_sdk.models.extract_async_response import ExtractAsyncResponse as ExtractAsyncResponse
|
|
79
|
+
from geonode_scraper_sdk.models.extract_request import ExtractRequest as ExtractRequest
|
|
80
|
+
from geonode_scraper_sdk.models.extract_sync_response import ExtractSyncResponse as ExtractSyncResponse
|
|
81
|
+
from geonode_scraper_sdk.models.extraction_data import ExtractionData as ExtractionData
|
|
82
|
+
from geonode_scraper_sdk.models.extraction_error import ExtractionError as ExtractionError
|
|
83
|
+
from geonode_scraper_sdk.models.extraction_error_code import ExtractionErrorCode as ExtractionErrorCode
|
|
84
|
+
from geonode_scraper_sdk.models.extraction_error_response import ExtractionErrorResponse as ExtractionErrorResponse
|
|
85
|
+
from geonode_scraper_sdk.models.extraction_metadata import ExtractionMetadata as ExtractionMetadata
|
|
86
|
+
from geonode_scraper_sdk.models.http_validation_error import HTTPValidationError as HTTPValidationError
|
|
87
|
+
from geonode_scraper_sdk.models.health_response import HealthResponse as HealthResponse
|
|
88
|
+
from geonode_scraper_sdk.models.health_status import HealthStatus as HealthStatus
|
|
89
|
+
from geonode_scraper_sdk.models.job_collection_response import JobCollectionResponse as JobCollectionResponse
|
|
90
|
+
from geonode_scraper_sdk.models.job_list_item_response import JobListItemResponse as JobListItemResponse
|
|
91
|
+
from geonode_scraper_sdk.models.job_status import JobStatus as JobStatus
|
|
92
|
+
from geonode_scraper_sdk.models.job_status_response import JobStatusResponse as JobStatusResponse
|
|
93
|
+
from geonode_scraper_sdk.models.location_inner import LocationInner as LocationInner
|
|
94
|
+
from geonode_scraper_sdk.models.output_format import OutputFormat as OutputFormat
|
|
95
|
+
from geonode_scraper_sdk.models.processing_mode import ProcessingMode as ProcessingMode
|
|
96
|
+
from geonode_scraper_sdk.models.proxy_settings import ProxySettings as ProxySettings
|
|
97
|
+
from geonode_scraper_sdk.models.proxy_type import ProxyType as ProxyType
|
|
98
|
+
from geonode_scraper_sdk.models.request_count_response import RequestCountResponse as RequestCountResponse
|
|
99
|
+
from geonode_scraper_sdk.models.statistics_response import StatisticsResponse as StatisticsResponse
|
|
100
|
+
from geonode_scraper_sdk.models.tokens_usage_response import TokensUsageResponse as TokensUsageResponse
|
|
101
|
+
from geonode_scraper_sdk.models.validation_error import ValidationError as ValidationError
|