scrapio-py 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapio/__init__.py +21 -0
- scrapio/_http.py +159 -0
- scrapio/client.py +81 -0
- scrapio/errors.py +30 -0
- scrapio/resources/__init__.py +0 -0
- scrapio/resources/amazon.py +53 -0
- scrapio/resources/fetch.py +29 -0
- scrapio/resources/google.py +27 -0
- scrapio/resources/jobs.py +73 -0
- scrapio/resources/walmart.py +53 -0
- scrapio/resources/youtube.py +76 -0
- scrapio/types.py +136 -0
- scrapio_py-1.0.0.dist-info/METADATA +173 -0
- scrapio_py-1.0.0.dist-info/RECORD +15 -0
- scrapio_py-1.0.0.dist-info/WHEEL +4 -0
scrapio/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .client import ApiClient, AsyncApiClient
|
|
2
|
+
from .errors import ApiError, AuthError, RateLimitError, CreditsExhaustedError
|
|
3
|
+
from .types import (
|
|
4
|
+
FetchRequest, FetchResponse,
|
|
5
|
+
CreateJobRequest, Job, JobResult,
|
|
6
|
+
GoogleSearchParams, GoogleSearchResponse,
|
|
7
|
+
AmazonProductResponse, AmazonSearchResponse,
|
|
8
|
+
WalmartProductResponse, WalmartSearchResponse,
|
|
9
|
+
YouTubeVideoResponse, YouTubeSearchResponse, YouTubeSubtitleResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ApiClient", "AsyncApiClient",
|
|
14
|
+
"ApiError", "AuthError", "RateLimitError", "CreditsExhaustedError",
|
|
15
|
+
"FetchRequest", "FetchResponse",
|
|
16
|
+
"CreateJobRequest", "Job", "JobResult",
|
|
17
|
+
"GoogleSearchParams", "GoogleSearchResponse",
|
|
18
|
+
"AmazonProductResponse", "AmazonSearchResponse",
|
|
19
|
+
"WalmartProductResponse", "WalmartSearchResponse",
|
|
20
|
+
"YouTubeVideoResponse", "YouTubeSearchResponse", "YouTubeSubtitleResponse",
|
|
21
|
+
]
|
scrapio/_http.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Optional, Type, TypeVar
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from .errors import ApiError, AuthError, CreditsExhaustedError, RateLimitError
|
|
11
|
+
|
|
12
|
+
T = TypeVar("T", bound=BaseModel)
|
|
13
|
+
|
|
14
|
+
RETRYABLE_STATUS = {429, 503}
|
|
15
|
+
DEFAULT_TIMEOUT = 30.0
|
|
16
|
+
DEFAULT_MAX_RETRIES = 3
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _raise_for_status(status_code: int, body: dict[str, Any]) -> None:
|
|
20
|
+
code = body.get("error", {}).get("code", "")
|
|
21
|
+
if status_code == 401:
|
|
22
|
+
raise AuthError(body)
|
|
23
|
+
if status_code == 429:
|
|
24
|
+
raise RateLimitError(body)
|
|
25
|
+
if status_code == 402 or code == "credits_exhausted":
|
|
26
|
+
raise CreditsExhaustedError(body)
|
|
27
|
+
raise ApiError(status_code, body)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SyncHttpClient:
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
base_url: str,
|
|
34
|
+
api_key: str,
|
|
35
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
36
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
37
|
+
) -> None:
|
|
38
|
+
self._base_url = base_url.rstrip("/")
|
|
39
|
+
self._headers = {"Authorization": f"Bearer {api_key}"}
|
|
40
|
+
self._timeout = timeout
|
|
41
|
+
self._max_retries = max_retries
|
|
42
|
+
self._client = httpx.Client(
|
|
43
|
+
base_url=self._base_url,
|
|
44
|
+
headers=self._headers,
|
|
45
|
+
timeout=self._timeout,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def close(self) -> None:
|
|
49
|
+
self._client.close()
|
|
50
|
+
|
|
51
|
+
def __enter__(self) -> "SyncHttpClient":
|
|
52
|
+
return self
|
|
53
|
+
|
|
54
|
+
def __exit__(self, *args: Any) -> None:
|
|
55
|
+
self.close()
|
|
56
|
+
|
|
57
|
+
def request(
|
|
58
|
+
self,
|
|
59
|
+
method: str,
|
|
60
|
+
path: str,
|
|
61
|
+
*,
|
|
62
|
+
params: Optional[dict[str, Any]] = None,
|
|
63
|
+
json: Optional[Any] = None,
|
|
64
|
+
response_model: Type[T],
|
|
65
|
+
timeout: Optional[float] = None,
|
|
66
|
+
) -> T:
|
|
67
|
+
clean_params = {k: v for k, v in (params or {}).items() if v is not None}
|
|
68
|
+
|
|
69
|
+
for attempt in range(self._max_retries + 1):
|
|
70
|
+
res = self._client.request(
|
|
71
|
+
method,
|
|
72
|
+
path,
|
|
73
|
+
params=clean_params or None,
|
|
74
|
+
json=json,
|
|
75
|
+
timeout=timeout or self._timeout,
|
|
76
|
+
)
|
|
77
|
+
if res.is_success:
|
|
78
|
+
return response_model.model_validate(res.json())
|
|
79
|
+
|
|
80
|
+
body: dict[str, Any] = {}
|
|
81
|
+
try:
|
|
82
|
+
body = res.json()
|
|
83
|
+
except Exception:
|
|
84
|
+
body = {"request_id": "", "error": {"code": "unknown", "message": res.text}}
|
|
85
|
+
|
|
86
|
+
if res.status_code in RETRYABLE_STATUS and attempt < self._max_retries:
|
|
87
|
+
backoff = min(1.0 * (2**attempt), 8.0)
|
|
88
|
+
time.sleep(backoff)
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
_raise_for_status(res.status_code, body)
|
|
92
|
+
|
|
93
|
+
raise RuntimeError("Unexpected end of retry loop") # unreachable
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class AsyncHttpClient:
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
base_url: str,
|
|
100
|
+
api_key: str,
|
|
101
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
102
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
103
|
+
) -> None:
|
|
104
|
+
self._base_url = base_url.rstrip("/")
|
|
105
|
+
self._headers = {"Authorization": f"Bearer {api_key}"}
|
|
106
|
+
self._timeout = timeout
|
|
107
|
+
self._max_retries = max_retries
|
|
108
|
+
self._client = httpx.AsyncClient(
|
|
109
|
+
base_url=self._base_url,
|
|
110
|
+
headers=self._headers,
|
|
111
|
+
timeout=self._timeout,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
async def aclose(self) -> None:
|
|
115
|
+
await self._client.aclose()
|
|
116
|
+
|
|
117
|
+
async def __aenter__(self) -> "AsyncHttpClient":
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
121
|
+
await self.aclose()
|
|
122
|
+
|
|
123
|
+
async def request(
|
|
124
|
+
self,
|
|
125
|
+
method: str,
|
|
126
|
+
path: str,
|
|
127
|
+
*,
|
|
128
|
+
params: Optional[dict[str, Any]] = None,
|
|
129
|
+
json: Optional[Any] = None,
|
|
130
|
+
response_model: Type[T],
|
|
131
|
+
timeout: Optional[float] = None,
|
|
132
|
+
) -> T:
|
|
133
|
+
clean_params = {k: v for k, v in (params or {}).items() if v is not None}
|
|
134
|
+
|
|
135
|
+
for attempt in range(self._max_retries + 1):
|
|
136
|
+
res = await self._client.request(
|
|
137
|
+
method,
|
|
138
|
+
path,
|
|
139
|
+
params=clean_params or None,
|
|
140
|
+
json=json,
|
|
141
|
+
timeout=timeout or self._timeout,
|
|
142
|
+
)
|
|
143
|
+
if res.is_success:
|
|
144
|
+
return response_model.model_validate(res.json())
|
|
145
|
+
|
|
146
|
+
body: dict[str, Any] = {}
|
|
147
|
+
try:
|
|
148
|
+
body = res.json()
|
|
149
|
+
except Exception:
|
|
150
|
+
body = {"request_id": "", "error": {"code": "unknown", "message": res.text}}
|
|
151
|
+
|
|
152
|
+
if res.status_code in RETRYABLE_STATUS and attempt < self._max_retries:
|
|
153
|
+
backoff = min(1.0 * (2**attempt), 8.0)
|
|
154
|
+
await asyncio.sleep(backoff)
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
_raise_for_status(res.status_code, body)
|
|
158
|
+
|
|
159
|
+
raise RuntimeError("Unexpected end of retry loop") # unreachable
|
scrapio/client.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from ._http import SyncHttpClient, AsyncHttpClient, DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES
|
|
4
|
+
from .resources.fetch import FetchResource, AsyncFetchResource
|
|
5
|
+
from .resources.jobs import JobsResource, AsyncJobsResource
|
|
6
|
+
from .resources.google import GoogleResource, AsyncGoogleResource
|
|
7
|
+
from .resources.amazon import AmazonResource, AsyncAmazonResource
|
|
8
|
+
from .resources.walmart import WalmartResource, AsyncWalmartResource
|
|
9
|
+
from .resources.youtube import YouTubeResource, AsyncYouTubeResource
|
|
10
|
+
|
|
11
|
+
DEFAULT_BASE_URL = "https://api.webdataapi.com"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ApiClient:
|
|
15
|
+
fetch: FetchResource
|
|
16
|
+
jobs: JobsResource
|
|
17
|
+
google: GoogleResource
|
|
18
|
+
amazon: AmazonResource
|
|
19
|
+
walmart: WalmartResource
|
|
20
|
+
youtube: YouTubeResource
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
api_key: str,
|
|
25
|
+
*,
|
|
26
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
27
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
28
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
29
|
+
) -> None:
|
|
30
|
+
http = SyncHttpClient(base_url, api_key, timeout=timeout, max_retries=max_retries)
|
|
31
|
+
self.fetch = FetchResource(http)
|
|
32
|
+
self.jobs = JobsResource(http)
|
|
33
|
+
self.google = GoogleResource(http)
|
|
34
|
+
self.amazon = AmazonResource(http)
|
|
35
|
+
self.walmart = WalmartResource(http)
|
|
36
|
+
self.youtube = YouTubeResource(http)
|
|
37
|
+
self._http = http
|
|
38
|
+
|
|
39
|
+
def close(self) -> None:
|
|
40
|
+
self._http.close()
|
|
41
|
+
|
|
42
|
+
def __enter__(self) -> "ApiClient":
|
|
43
|
+
return self
|
|
44
|
+
|
|
45
|
+
def __exit__(self, *args: object) -> None:
|
|
46
|
+
self.close()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class AsyncApiClient:
|
|
50
|
+
fetch: AsyncFetchResource
|
|
51
|
+
jobs: AsyncJobsResource
|
|
52
|
+
google: AsyncGoogleResource
|
|
53
|
+
amazon: AsyncAmazonResource
|
|
54
|
+
walmart: AsyncWalmartResource
|
|
55
|
+
youtube: AsyncYouTubeResource
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
api_key: str,
|
|
60
|
+
*,
|
|
61
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
62
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
63
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
64
|
+
) -> None:
|
|
65
|
+
http = AsyncHttpClient(base_url, api_key, timeout=timeout, max_retries=max_retries)
|
|
66
|
+
self.fetch = AsyncFetchResource(http)
|
|
67
|
+
self.jobs = AsyncJobsResource(http)
|
|
68
|
+
self.google = AsyncGoogleResource(http)
|
|
69
|
+
self.amazon = AsyncAmazonResource(http)
|
|
70
|
+
self.walmart = AsyncWalmartResource(http)
|
|
71
|
+
self.youtube = AsyncYouTubeResource(http)
|
|
72
|
+
self._http = http
|
|
73
|
+
|
|
74
|
+
async def aclose(self) -> None:
|
|
75
|
+
await self._http.aclose()
|
|
76
|
+
|
|
77
|
+
async def __aenter__(self) -> "AsyncApiClient":
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
async def __aexit__(self, *args: object) -> None:
|
|
81
|
+
await self.aclose()
|
scrapio/errors.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ApiError(Exception):
|
|
6
|
+
status_code: int
|
|
7
|
+
request_id: str
|
|
8
|
+
code: str
|
|
9
|
+
|
|
10
|
+
def __init__(self, status_code: int, body: dict[str, Any]) -> None:
|
|
11
|
+
error = body.get("error", {})
|
|
12
|
+
super().__init__(error.get("message", "Unknown error"))
|
|
13
|
+
self.status_code = status_code
|
|
14
|
+
self.request_id = body.get("request_id", "")
|
|
15
|
+
self.code = error.get("code", "unknown")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AuthError(ApiError):
|
|
19
|
+
def __init__(self, body: dict[str, Any]) -> None:
|
|
20
|
+
super().__init__(401, body)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RateLimitError(ApiError):
|
|
24
|
+
def __init__(self, body: dict[str, Any]) -> None:
|
|
25
|
+
super().__init__(429, body)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CreditsExhaustedError(ApiError):
|
|
29
|
+
def __init__(self, body: dict[str, Any]) -> None:
|
|
30
|
+
super().__init__(402, body)
|
|
File without changes
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
4
|
+
from ..types import AmazonProductResponse, AmazonSearchResponse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AmazonResource:
|
|
8
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
9
|
+
self._http = http
|
|
10
|
+
|
|
11
|
+
def get_product(self, asin: str, *, country: Optional[str] = None) -> AmazonProductResponse:
|
|
12
|
+
return self._http.request(
|
|
13
|
+
"GET", "/v1/amazon/product",
|
|
14
|
+
params={"asin": asin, "country": country},
|
|
15
|
+
response_model=AmazonProductResponse,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def search(self, query: str, *, country: Optional[str] = None, page: Optional[int] = None) -> AmazonSearchResponse:
|
|
19
|
+
return self._http.request(
|
|
20
|
+
"GET", "/v1/amazon/search",
|
|
21
|
+
params={"query": query, "country": country, "page": page},
|
|
22
|
+
response_model=AmazonSearchResponse,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def queue_search_crawl(self, query: str, *, country: Optional[str] = None) -> dict:
|
|
26
|
+
from pydantic import RootModel
|
|
27
|
+
from typing import Any
|
|
28
|
+
class _R(RootModel[dict[str, Any]]): pass
|
|
29
|
+
result = self._http.request(
|
|
30
|
+
"GET", "/v1/amazon/search/crawl",
|
|
31
|
+
params={"query": query, "country": country},
|
|
32
|
+
response_model=_R,
|
|
33
|
+
)
|
|
34
|
+
return result.root
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AsyncAmazonResource:
|
|
38
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
39
|
+
self._http = http
|
|
40
|
+
|
|
41
|
+
async def get_product(self, asin: str, *, country: Optional[str] = None) -> AmazonProductResponse:
|
|
42
|
+
return await self._http.request(
|
|
43
|
+
"GET", "/v1/amazon/product",
|
|
44
|
+
params={"asin": asin, "country": country},
|
|
45
|
+
response_model=AmazonProductResponse,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
async def search(self, query: str, *, country: Optional[str] = None, page: Optional[int] = None) -> AmazonSearchResponse:
|
|
49
|
+
return await self._http.request(
|
|
50
|
+
"GET", "/v1/amazon/search",
|
|
51
|
+
params={"query": query, "country": country, "page": page},
|
|
52
|
+
response_model=AmazonSearchResponse,
|
|
53
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
3
|
+
from ..types import FetchRequest, FetchResponse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FetchResource:
|
|
7
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
8
|
+
self._http = http
|
|
9
|
+
|
|
10
|
+
def fetch(self, request: FetchRequest, *, timeout: float | None = None) -> FetchResponse:
|
|
11
|
+
return self._http.request(
|
|
12
|
+
"POST", "/v1/fetch",
|
|
13
|
+
json=request.model_dump(exclude_none=True),
|
|
14
|
+
response_model=FetchResponse,
|
|
15
|
+
timeout=timeout,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AsyncFetchResource:
|
|
20
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
21
|
+
self._http = http
|
|
22
|
+
|
|
23
|
+
async def fetch(self, request: FetchRequest, *, timeout: float | None = None) -> FetchResponse:
|
|
24
|
+
return await self._http.request(
|
|
25
|
+
"POST", "/v1/fetch",
|
|
26
|
+
json=request.model_dump(exclude_none=True),
|
|
27
|
+
response_model=FetchResponse,
|
|
28
|
+
timeout=timeout,
|
|
29
|
+
)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
3
|
+
from ..types import GoogleSearchParams, GoogleSearchResponse
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GoogleResource:
|
|
7
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
8
|
+
self._http = http
|
|
9
|
+
|
|
10
|
+
def search(self, params: GoogleSearchParams) -> GoogleSearchResponse:
|
|
11
|
+
return self._http.request(
|
|
12
|
+
"GET", "/v1/google/search",
|
|
13
|
+
params=params.model_dump(exclude_none=True),
|
|
14
|
+
response_model=GoogleSearchResponse,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AsyncGoogleResource:
|
|
19
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
20
|
+
self._http = http
|
|
21
|
+
|
|
22
|
+
async def search(self, params: GoogleSearchParams) -> GoogleSearchResponse:
|
|
23
|
+
return await self._http.request(
|
|
24
|
+
"GET", "/v1/google/search",
|
|
25
|
+
params=params.model_dump(exclude_none=True),
|
|
26
|
+
response_model=GoogleSearchResponse,
|
|
27
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import asyncio
|
|
3
|
+
import time
|
|
4
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
5
|
+
from ..types import CreateJobRequest, Job, JobResult
|
|
6
|
+
|
|
7
|
+
TERMINAL = {"completed", "partial", "failed", "cancelled"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JobsResource:
|
|
11
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
12
|
+
self._http = http
|
|
13
|
+
|
|
14
|
+
def create(self, request: CreateJobRequest) -> Job:
|
|
15
|
+
return self._http.request(
|
|
16
|
+
"POST", "/v1/jobs",
|
|
17
|
+
json=request.model_dump(exclude_none=True),
|
|
18
|
+
response_model=Job,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def get(self, job_id: str) -> Job:
|
|
22
|
+
return self._http.request("GET", f"/v1/jobs/{job_id}", response_model=Job)
|
|
23
|
+
|
|
24
|
+
def get_result(self, job_id: str) -> JobResult:
|
|
25
|
+
return self._http.request("GET", f"/v1/jobs/{job_id}/result", response_model=JobResult)
|
|
26
|
+
|
|
27
|
+
def wait_for_completion(
|
|
28
|
+
self,
|
|
29
|
+
job_id: str,
|
|
30
|
+
*,
|
|
31
|
+
poll_interval: float = 2.0,
|
|
32
|
+
timeout: float = 300.0,
|
|
33
|
+
) -> JobResult:
|
|
34
|
+
deadline = time.monotonic() + timeout
|
|
35
|
+
while time.monotonic() < deadline:
|
|
36
|
+
job = self.get(job_id)
|
|
37
|
+
if job.status in TERMINAL:
|
|
38
|
+
return self.get_result(job_id)
|
|
39
|
+
time.sleep(poll_interval)
|
|
40
|
+
raise TimeoutError(f"Job {job_id} did not complete within {timeout}s")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AsyncJobsResource:
|
|
44
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
45
|
+
self._http = http
|
|
46
|
+
|
|
47
|
+
async def create(self, request: CreateJobRequest) -> Job:
|
|
48
|
+
return await self._http.request(
|
|
49
|
+
"POST", "/v1/jobs",
|
|
50
|
+
json=request.model_dump(exclude_none=True),
|
|
51
|
+
response_model=Job,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
async def get(self, job_id: str) -> Job:
|
|
55
|
+
return await self._http.request("GET", f"/v1/jobs/{job_id}", response_model=Job)
|
|
56
|
+
|
|
57
|
+
async def get_result(self, job_id: str) -> JobResult:
|
|
58
|
+
return await self._http.request("GET", f"/v1/jobs/{job_id}/result", response_model=JobResult)
|
|
59
|
+
|
|
60
|
+
async def wait_for_completion(
|
|
61
|
+
self,
|
|
62
|
+
job_id: str,
|
|
63
|
+
*,
|
|
64
|
+
poll_interval: float = 2.0,
|
|
65
|
+
timeout: float = 300.0,
|
|
66
|
+
) -> JobResult:
|
|
67
|
+
deadline = asyncio.get_event_loop().time() + timeout
|
|
68
|
+
while asyncio.get_event_loop().time() < deadline:
|
|
69
|
+
job = await self.get(job_id)
|
|
70
|
+
if job.status in TERMINAL:
|
|
71
|
+
return await self.get_result(job_id)
|
|
72
|
+
await asyncio.sleep(poll_interval)
|
|
73
|
+
raise TimeoutError(f"Job {job_id} did not complete within {timeout}s")
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
4
|
+
from ..types import WalmartProductResponse, WalmartSearchResponse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WalmartResource:
|
|
8
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
9
|
+
self._http = http
|
|
10
|
+
|
|
11
|
+
def get_product(self, product_id: str, *, country: Optional[str] = None) -> WalmartProductResponse:
|
|
12
|
+
return self._http.request(
|
|
13
|
+
"GET", "/v1/walmart/product",
|
|
14
|
+
params={"product_id": product_id, "country": country},
|
|
15
|
+
response_model=WalmartProductResponse,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def search(self, query: str, *, country: Optional[str] = None, page: Optional[int] = None) -> WalmartSearchResponse:
|
|
19
|
+
return self._http.request(
|
|
20
|
+
"GET", "/v1/walmart/search",
|
|
21
|
+
params={"query": query, "country": country, "page": page},
|
|
22
|
+
response_model=WalmartSearchResponse,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def queue_search_crawl(self, query: str, *, country: Optional[str] = None) -> dict:
|
|
26
|
+
from pydantic import RootModel
|
|
27
|
+
from typing import Any
|
|
28
|
+
class _R(RootModel[dict[str, Any]]): pass
|
|
29
|
+
result = self._http.request(
|
|
30
|
+
"GET", "/v1/walmart/search/crawl",
|
|
31
|
+
params={"query": query, "country": country},
|
|
32
|
+
response_model=_R,
|
|
33
|
+
)
|
|
34
|
+
return result.root
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class AsyncWalmartResource:
|
|
38
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
39
|
+
self._http = http
|
|
40
|
+
|
|
41
|
+
async def get_product(self, product_id: str, *, country: Optional[str] = None) -> WalmartProductResponse:
|
|
42
|
+
return await self._http.request(
|
|
43
|
+
"GET", "/v1/walmart/product",
|
|
44
|
+
params={"product_id": product_id, "country": country},
|
|
45
|
+
response_model=WalmartProductResponse,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
async def search(self, query: str, *, country: Optional[str] = None, page: Optional[int] = None) -> WalmartSearchResponse:
|
|
49
|
+
return await self._http.request(
|
|
50
|
+
"GET", "/v1/walmart/search",
|
|
51
|
+
params={"query": query, "country": country, "page": page},
|
|
52
|
+
response_model=WalmartSearchResponse,
|
|
53
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from .._http import SyncHttpClient, AsyncHttpClient
|
|
4
|
+
from ..types import YouTubeVideoResponse, YouTubeSearchResponse, YouTubeSubtitleResponse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class YouTubeResource:
|
|
8
|
+
def __init__(self, http: SyncHttpClient) -> None:
|
|
9
|
+
self._http = http
|
|
10
|
+
|
|
11
|
+
def search(self, query: str, *, page: Optional[int] = None, country: Optional[str] = None, language: Optional[str] = None) -> YouTubeSearchResponse:
|
|
12
|
+
return self._http.request(
|
|
13
|
+
"GET", "/v1/youtube/search",
|
|
14
|
+
params={"query": query, "page": page, "country": country, "language": language},
|
|
15
|
+
response_model=YouTubeSearchResponse,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
def get_video(self, video_id: str) -> YouTubeVideoResponse:
|
|
19
|
+
return self._http.request(
|
|
20
|
+
"GET", f"/v1/youtube/videos/{video_id}",
|
|
21
|
+
response_model=YouTubeVideoResponse,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def get_subtitles(self, video_id: str, *, language: Optional[str] = None) -> YouTubeSubtitleResponse:
|
|
25
|
+
return self._http.request(
|
|
26
|
+
"GET", "/v1/youtube/subtitles",
|
|
27
|
+
params={"video_id": video_id, "language": language},
|
|
28
|
+
response_model=YouTubeSubtitleResponse,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def queue_search_crawl(self, query: str, *, page: Optional[int] = None) -> dict:
|
|
32
|
+
from pydantic import RootModel
|
|
33
|
+
from typing import Any
|
|
34
|
+
class _R(RootModel[dict[str, Any]]): pass
|
|
35
|
+
result = self._http.request(
|
|
36
|
+
"POST", "/v1/youtube/search/crawl",
|
|
37
|
+
json={"query": query, "page": page},
|
|
38
|
+
response_model=_R,
|
|
39
|
+
)
|
|
40
|
+
return result.root
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AsyncYouTubeResource:
|
|
44
|
+
def __init__(self, http: AsyncHttpClient) -> None:
|
|
45
|
+
self._http = http
|
|
46
|
+
|
|
47
|
+
async def search(self, query: str, *, page: Optional[int] = None, country: Optional[str] = None, language: Optional[str] = None) -> YouTubeSearchResponse:
|
|
48
|
+
return await self._http.request(
|
|
49
|
+
"GET", "/v1/youtube/search",
|
|
50
|
+
params={"query": query, "page": page, "country": country, "language": language},
|
|
51
|
+
response_model=YouTubeSearchResponse,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
async def get_video(self, video_id: str) -> YouTubeVideoResponse:
|
|
55
|
+
return await self._http.request(
|
|
56
|
+
"GET", f"/v1/youtube/videos/{video_id}",
|
|
57
|
+
response_model=YouTubeVideoResponse,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def get_subtitles(self, video_id: str, *, language: Optional[str] = None) -> YouTubeSubtitleResponse:
|
|
61
|
+
return await self._http.request(
|
|
62
|
+
"GET", "/v1/youtube/subtitles",
|
|
63
|
+
params={"video_id": video_id, "language": language},
|
|
64
|
+
response_model=YouTubeSubtitleResponse,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
async def queue_search_crawl(self, query: str, *, page: Optional[int] = None) -> dict:
|
|
68
|
+
from pydantic import RootModel
|
|
69
|
+
from typing import Any
|
|
70
|
+
class _R(RootModel[dict[str, Any]]): pass
|
|
71
|
+
result = await self._http.request(
|
|
72
|
+
"POST", "/v1/youtube/search/crawl",
|
|
73
|
+
json={"query": query, "page": page},
|
|
74
|
+
response_model=_R,
|
|
75
|
+
)
|
|
76
|
+
return result.root
|
scrapio/types.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any, Literal, Optional, Union
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# ---- Fetch ----
|
|
7
|
+
|
|
8
|
+
class FetchSession(BaseModel):
|
|
9
|
+
id: str
|
|
10
|
+
|
|
11
|
+
class FetchRequest(BaseModel):
|
|
12
|
+
url: str
|
|
13
|
+
render_js: Optional[bool] = None
|
|
14
|
+
device: Optional[Literal["desktop", "mobile", "tablet"]] = None
|
|
15
|
+
session: Optional[FetchSession] = None
|
|
16
|
+
output: Optional[list[str]] = None
|
|
17
|
+
extract: Optional[dict[str, Any]] = None
|
|
18
|
+
actions: Optional[list[Any]] = None
|
|
19
|
+
timeout: Optional[int] = None
|
|
20
|
+
proxy: Optional[str] = None
|
|
21
|
+
country: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
class FetchResponse(BaseModel):
|
|
24
|
+
request_id: str
|
|
25
|
+
url: str
|
|
26
|
+
status_code: int
|
|
27
|
+
outputs: dict[str, Any]
|
|
28
|
+
diagnostics: Optional[dict[str, Any]] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---- Jobs ----
|
|
32
|
+
|
|
33
|
+
JobStatus = Literal["queued", "running", "completed", "partial", "failed", "cancelled"]
|
|
34
|
+
|
|
35
|
+
class CreateJobRequest(BaseModel):
|
|
36
|
+
job_type: str
|
|
37
|
+
payload: dict[str, Any]
|
|
38
|
+
webhook_url: Optional[str] = None
|
|
39
|
+
|
|
40
|
+
class Job(BaseModel):
|
|
41
|
+
request_id: str
|
|
42
|
+
job_id: str
|
|
43
|
+
job_type: str
|
|
44
|
+
status: str
|
|
45
|
+
created_at: str
|
|
46
|
+
updated_at: Optional[str] = None
|
|
47
|
+
webhook_url: Optional[str] = None
|
|
48
|
+
|
|
49
|
+
class JobError(BaseModel):
|
|
50
|
+
code: str
|
|
51
|
+
message: str
|
|
52
|
+
|
|
53
|
+
class JobResult(Job):
|
|
54
|
+
result: Optional[Any] = None
|
|
55
|
+
error: Optional[JobError] = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---- Google ----
|
|
59
|
+
|
|
60
|
+
GoogleSearchType = Literal["classic", "news", "maps", "images", "lens", "shopping", "ai_mode", "ads"]
|
|
61
|
+
GoogleDevice = Literal["desktop", "mobile"]
|
|
62
|
+
GoogleDateRange = Literal["past_hour", "past_day", "past_week", "past_month", "past_year"]
|
|
63
|
+
GoogleSortBy = Literal["relevance", "reviews", "price_asc", "price_desc"]
|
|
64
|
+
|
|
65
|
+
class GoogleSearchParams(BaseModel):
|
|
66
|
+
search: str
|
|
67
|
+
search_type: Optional[GoogleSearchType] = None
|
|
68
|
+
country_code: Optional[str] = None
|
|
69
|
+
language: Optional[str] = None
|
|
70
|
+
device: Optional[GoogleDevice] = None
|
|
71
|
+
page: Optional[Union[int, str]] = None
|
|
72
|
+
date_range: Optional[GoogleDateRange] = None
|
|
73
|
+
latitude: Optional[Union[float, str]] = None
|
|
74
|
+
longitude: Optional[Union[float, str]] = None
|
|
75
|
+
radius: Optional[Union[float, str]] = None
|
|
76
|
+
sort_by: Optional[GoogleSortBy] = None
|
|
77
|
+
|
|
78
|
+
class GoogleSearchResponse(BaseModel):
|
|
79
|
+
request_id: str
|
|
80
|
+
results: list[Any]
|
|
81
|
+
pagination: Optional[dict[str, Any]] = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---- Amazon ----
|
|
85
|
+
|
|
86
|
+
class AmazonProductResponse(BaseModel):
|
|
87
|
+
provider: str
|
|
88
|
+
asin: str
|
|
89
|
+
title: str
|
|
90
|
+
brand: Optional[str] = None
|
|
91
|
+
price: Optional[float] = None
|
|
92
|
+
currency: Optional[str] = None
|
|
93
|
+
availability: Optional[str] = None
|
|
94
|
+
rating: Optional[float] = None
|
|
95
|
+
review_count: Optional[int] = None
|
|
96
|
+
images: Optional[list[str]] = None
|
|
97
|
+
bullet_points: Optional[list[str]] = None
|
|
98
|
+
url: str
|
|
99
|
+
model_config = {"extra": "allow"}
|
|
100
|
+
|
|
101
|
+
class AmazonSearchResponse(BaseModel):
|
|
102
|
+
request_id: str
|
|
103
|
+
results: list[AmazonProductResponse]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ---- Walmart ----
|
|
107
|
+
|
|
108
|
+
class WalmartProductResponse(BaseModel):
|
|
109
|
+
provider: str
|
|
110
|
+
product_id: str
|
|
111
|
+
title: str
|
|
112
|
+
brand: Optional[str] = None
|
|
113
|
+
price: Optional[float] = None
|
|
114
|
+
availability: Optional[str] = None
|
|
115
|
+
url: str
|
|
116
|
+
model_config = {"extra": "allow"}
|
|
117
|
+
|
|
118
|
+
class WalmartSearchResponse(BaseModel):
|
|
119
|
+
request_id: str
|
|
120
|
+
results: list[WalmartProductResponse]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ---- YouTube ----
|
|
124
|
+
|
|
125
|
+
class YouTubeVideoResponse(BaseModel):
|
|
126
|
+
request_id: str
|
|
127
|
+
video: dict[str, Any]
|
|
128
|
+
|
|
129
|
+
class YouTubeSearchResponse(BaseModel):
|
|
130
|
+
request_id: str
|
|
131
|
+
results: list[Any]
|
|
132
|
+
|
|
133
|
+
class YouTubeSubtitleResponse(BaseModel):
|
|
134
|
+
request_id: str
|
|
135
|
+
video_id: str
|
|
136
|
+
subtitles: list[Any]
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scrapio-py
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Official Python SDK for the Scrapio
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Requires-Dist: httpx>=0.25.0
|
|
8
|
+
Requires-Dist: pydantic>=2.0.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: anyio[trio]; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
# scrapio
|
|
16
|
+
|
|
17
|
+
Official Python SDK for [Scrapio](https://scrapio.dev) — fetch, crawl, search, and extract structured data from any URL.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install scrapio-py
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Requires Python 3.9 or later.
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from scrapio import ApiClient, FetchRequest
|
|
31
|
+
|
|
32
|
+
client = ApiClient(api_key="YOUR_API_KEY")
|
|
33
|
+
|
|
34
|
+
result = client.fetch.fetch(FetchRequest(
|
|
35
|
+
url="https://example.com",
|
|
36
|
+
output=["markdown"],
|
|
37
|
+
))
|
|
38
|
+
|
|
39
|
+
print(result.outputs["markdown"])
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
### Fetch a page
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
result = client.fetch.fetch(FetchRequest(
|
|
48
|
+
url="https://news.ycombinator.com",
|
|
49
|
+
render_js=True,
|
|
50
|
+
output=["markdown"],
|
|
51
|
+
))
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Google Search
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from scrapio import GoogleSearchParams
|
|
58
|
+
|
|
59
|
+
results = client.google.search(GoogleSearchParams(
|
|
60
|
+
search="best web scraping API 2025",
|
|
61
|
+
country_code="us",
|
|
62
|
+
))
|
|
63
|
+
print(results.organic_results)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Amazon product
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
product = client.amazon.get_product("B08N5WRWNW")
|
|
70
|
+
print(product.title, product.price)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Walmart search
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
items = client.walmart.search("headphones")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### YouTube transcript
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
video = client.youtube.get_video("dQw4w9WgXcQ")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Browser automation
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
result = client.interact.interact({
|
|
89
|
+
"url": "https://example.com",
|
|
90
|
+
"actions": [
|
|
91
|
+
{"type": "click", "selector": "#login"},
|
|
92
|
+
{"type": "type", "selector": "#email", "text": "user@example.com"},
|
|
93
|
+
],
|
|
94
|
+
})
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Crawl a site
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
result = client.crawl.crawl({
|
|
101
|
+
"seeds": ["https://docs.example.com"],
|
|
102
|
+
"max_pages": 50,
|
|
103
|
+
})
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Async jobs
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from scrapio import CreateJobRequest
|
|
110
|
+
|
|
111
|
+
job = client.jobs.create(CreateJobRequest(
|
|
112
|
+
job_type="fetch",
|
|
113
|
+
payload={"url": "https://example.com", "output": ["markdown"]},
|
|
114
|
+
))
|
|
115
|
+
result = client.jobs.wait_for_completion(job.job_id, poll_interval=2.0, timeout=120.0)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Async client
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
import asyncio
|
|
122
|
+
from scrapio import AsyncApiClient, FetchRequest
|
|
123
|
+
|
|
124
|
+
async def main():
|
|
125
|
+
async with AsyncApiClient(api_key="YOUR_API_KEY") as client:
|
|
126
|
+
result = await client.fetch.fetch(FetchRequest(
|
|
127
|
+
url="https://example.com",
|
|
128
|
+
output=["markdown"],
|
|
129
|
+
))
|
|
130
|
+
print(result.outputs["markdown"])
|
|
131
|
+
|
|
132
|
+
asyncio.run(main())
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Configuration
|
|
136
|
+
|
|
137
|
+
| Option | Type | Default | Description |
|
|
138
|
+
|--------|------|---------|-------------|
|
|
139
|
+
| `api_key` | `str` | required | Your API key |
|
|
140
|
+
| `base_url` | `str` | `https://api.scrapio.dev` | Override for local/staging |
|
|
141
|
+
| `timeout` | `float` | `30.0` | Per-request timeout (seconds) |
|
|
142
|
+
| `max_retries` | `int` | `3` | Max retries on 429/503 |
|
|
143
|
+
|
|
144
|
+
## Error handling
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from scrapio import (
|
|
148
|
+
ApiClient, FetchRequest,
|
|
149
|
+
AuthError, RateLimitError, CreditsExhaustedError, ApiError,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
client.fetch.fetch(FetchRequest(url="https://example.com"))
|
|
154
|
+
except AuthError:
|
|
155
|
+
print("Invalid API key")
|
|
156
|
+
except CreditsExhaustedError:
|
|
157
|
+
print("No credits remaining")
|
|
158
|
+
except RateLimitError:
|
|
159
|
+
print("Rate limited — back off and retry")
|
|
160
|
+
except ApiError as e:
|
|
161
|
+
print(f"API error {e.status_code}: {e}")
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Links
|
|
165
|
+
|
|
166
|
+
- [Documentation](https://scrapio.dev/docs)
|
|
167
|
+
- [API Reference](https://scrapio.dev/docs/api-reference/fetch)
|
|
168
|
+
- [Dashboard](https://app.scrapio.dev)
|
|
169
|
+
- [Get an API key](https://scrapio.dev#pricing)
|
|
170
|
+
|
|
171
|
+
## License
|
|
172
|
+
|
|
173
|
+
MIT
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
scrapio/__init__.py,sha256=-iCqn0QoLJbAP_kOp6fnOhno9r1eZHC8YTscelSM8wc,880
|
|
2
|
+
scrapio/_http.py,sha256=3-TE7q5rNEqstKonl9qpRb9dauXzW4y4kQjBFQrYDHM,4757
|
|
3
|
+
scrapio/client.py,sha256=2huKExDSASsRMylWJoipGN9bIw0-FJkhxxpR-HvK-q8,2604
|
|
4
|
+
scrapio/errors.py,sha256=UFYyZZPA8uyZpLs-DnD06JuxgzHsHe6Uqmg9zCgyUTg,832
|
|
5
|
+
scrapio/types.py,sha256=ullzSFMNhPF-t9G5fdvXNqPt4XQhysXGTdljOoVj8Mc,3528
|
|
6
|
+
scrapio/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
scrapio/resources/amazon.py,sha256=pjPvRJlmY-cV9TB3lntQQQcSaOd0KzmO5lfhqXIe9Ys,2064
|
|
8
|
+
scrapio/resources/fetch.py,sha256=ZsGY31HtrmLoXB7bUEkK0b-RMvdfl6de8ADi-dCQgzU,959
|
|
9
|
+
scrapio/resources/google.py,sha256=AJ97IjU9bQOokUqhvmy33NqIUmIQrepR_Jz4bsbt02U,906
|
|
10
|
+
scrapio/resources/jobs.py,sha256=mTzYqfUvrEqe7qNgUknPj-MZJtKnl9Fe6HUlqP1yxRE,2500
|
|
11
|
+
scrapio/resources/walmart.py,sha256=1UV0_9z-5RwhXWelUve-a59HMTcAVdX1rLz4iBd1-QA,2117
|
|
12
|
+
scrapio/resources/youtube.py,sha256=8VI7RpgtrDxzesWt3tsnjiT_M4iPrSDLBU1SBX8fyBs,3100
|
|
13
|
+
scrapio_py-1.0.0.dist-info/METADATA,sha256=lONsXnzZXmbXDWyXOTz6Do3kNNGIYyg9NpKLBcSLn8g,3658
|
|
14
|
+
scrapio_py-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
15
|
+
scrapio_py-1.0.0.dist-info/RECORD,,
|