pyaterochka-api 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyaterochka_api/__init__.py +2 -1
- pyaterochka_api/api.py +47 -146
- pyaterochka_api/enums.py +14 -0
- pyaterochka_api/manager.py +42 -40
- pyaterochka_api/tools.py +121 -0
- {pyaterochka_api-0.1.7.dist-info → pyaterochka_api-0.1.9.dist-info}/METADATA +10 -3
- pyaterochka_api-0.1.9.dist-info/RECORD +13 -0
- {pyaterochka_api-0.1.7.dist-info → pyaterochka_api-0.1.9.dist-info}/WHEEL +1 -1
- tests/{base_tests.py → api_tests.py} +16 -25
- tests/tools_tests.py +30 -0
- pyaterochka_api-0.1.7.dist-info/RECORD +0 -12
- tests/snapshots/__init__.py +0 -0
- tests/snapshots/snap_base_tests.py +0 -813
- {pyaterochka_api-0.1.7.dist-info → pyaterochka_api-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {pyaterochka_api-0.1.7.dist-info → pyaterochka_api-0.1.9.dist-info}/top_level.txt +0 -0
pyaterochka_api/__init__.py
CHANGED
pyaterochka_api/api.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
import aiohttp
|
2
2
|
from fake_useragent import UserAgent
|
3
|
-
from enum import Enum
|
4
|
-
import re
|
5
|
-
from tqdm.asyncio import tqdm
|
6
3
|
from camoufox import AsyncCamoufox
|
7
|
-
import
|
4
|
+
import logging
|
5
|
+
from .tools import parse_proxy, parse_js, get_env_proxy
|
8
6
|
|
9
7
|
|
10
8
|
class PyaterochkaAPI:
|
@@ -12,14 +10,13 @@ class PyaterochkaAPI:
|
|
12
10
|
Класс для загрузки JSON/image и парсинга JavaScript-конфигураций из удаленного источника.
|
13
11
|
"""
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
def __init__(self, debug: bool = False, proxy: str = None, autoclose_browser: bool = False, trust_env: bool = False, timeout: int = 10):
|
13
|
+
def __init__(self,
|
14
|
+
debug: bool = False,
|
15
|
+
proxy: str | None = None,
|
16
|
+
autoclose_browser: bool = False,
|
17
|
+
trust_env: bool = False,
|
18
|
+
timeout: float = 10.0
|
19
|
+
):
|
23
20
|
self._debug = debug
|
24
21
|
self._proxy = proxy
|
25
22
|
self._session = None
|
@@ -29,14 +26,13 @@ class PyaterochkaAPI:
|
|
29
26
|
self._trust_env = trust_env
|
30
27
|
self._timeout = timeout
|
31
28
|
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
self._logger = logging.getLogger(self.__class__.__name__)
|
30
|
+
handler = logging.StreamHandler()
|
31
|
+
formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(name)s: %(message)s')
|
32
|
+
handler.setFormatter(formatter)
|
33
|
+
if not self._logger.hasHandlers():
|
34
|
+
self._logger.addHandler(handler)
|
35
35
|
|
36
|
-
@proxy.setter
|
37
|
-
def proxy(self, value: str | None) -> None:
|
38
|
-
self._proxy = value
|
39
|
-
|
40
36
|
async def fetch(self, url: str) -> tuple[bool, dict | None | str, str]:
|
41
37
|
"""
|
42
38
|
Выполняет HTTP-запрос к указанному URL и возвращает результат.
|
@@ -46,12 +42,10 @@ class PyaterochkaAPI:
|
|
46
42
|
args = {'url': url, 'timeout': aiohttp.ClientTimeout(total=self._timeout)}
|
47
43
|
if self._proxy: args["proxy"] = self._proxy
|
48
44
|
|
49
|
-
if self.
|
50
|
-
print(f"Requesting \"{url}\" with proxy \"{args.get('proxy')}\", timeout {self._timeout}...", flush=True)
|
45
|
+
self._logger.info(f'Requesting "{url}" with proxy: "{args.get("proxy") or ("SYSTEM_PROXY" if get_env_proxy() else "WITHOUT")}", timeout: {self._timeout}...')
|
51
46
|
|
52
47
|
async with self._session.get(**args) as response:
|
53
|
-
|
54
|
-
print(f"Response status: {response.status}", flush=True)
|
48
|
+
self._logger.info(f'Response status: {response.status}')
|
55
49
|
|
56
50
|
if response.status == 200:
|
57
51
|
if response.headers['content-type'] == 'application/json':
|
@@ -63,79 +57,12 @@ class PyaterochkaAPI:
|
|
63
57
|
|
64
58
|
return True, await output_response, response.headers['content-type']
|
65
59
|
elif response.status == 403:
|
66
|
-
|
67
|
-
print("Anti-bot protection. Use Russia IP address and try again.", flush=True)
|
60
|
+
self._logger.warning('Anti-bot protection. Use Russia IP address and try again.')
|
68
61
|
return False, None, ''
|
69
62
|
else:
|
70
|
-
|
71
|
-
print(f"Unexpected error: {response.status}", flush=True)
|
63
|
+
self._logger.error(f'Unexpected error: {response.status}')
|
72
64
|
raise Exception(f"Response status: {response.status} (unknown error/status code)")
|
73
65
|
|
74
|
-
async def _parse_js(self, js_code: str) -> dict | None:
|
75
|
-
"""
|
76
|
-
Парсит JavaScript-код и извлекает данные из переменной "n".
|
77
|
-
|
78
|
-
:param js_code: JS-код в виде строки.
|
79
|
-
:return: Распарсенные данные в виде словаря или None.
|
80
|
-
"""
|
81
|
-
matches = re.finditer(self.Patterns.JS.value, js_code)
|
82
|
-
match_list = list(matches)
|
83
|
-
|
84
|
-
if self._debug:
|
85
|
-
print(f"Found matches {len(match_list)}")
|
86
|
-
progress_bar = tqdm(total=33, desc="Parsing JS", position=0)
|
87
|
-
|
88
|
-
async def parse_match(match: str) -> dict:
|
89
|
-
result = {}
|
90
|
-
|
91
|
-
if self._debug:
|
92
|
-
progress_bar.set_description("Parsing strings")
|
93
|
-
|
94
|
-
# Парсинг строк
|
95
|
-
string_matches = re.finditer(self.Patterns.STR.value, match)
|
96
|
-
for m in string_matches:
|
97
|
-
key, value = m.group(1), m.group(2)
|
98
|
-
result[key] = value.replace('\"', '"').replace('\\', '\\')
|
99
|
-
|
100
|
-
if self._debug:
|
101
|
-
progress_bar.update(1)
|
102
|
-
progress_bar.set_description("Parsing dictionaries")
|
103
|
-
|
104
|
-
# Парсинг словарей
|
105
|
-
dict_matches = re.finditer(self.Patterns.DICT.value, match)
|
106
|
-
for m in dict_matches:
|
107
|
-
key, value = m.group(1), m.group(2)
|
108
|
-
if not re.search(self.Patterns.STR.value, value):
|
109
|
-
result[key] = await parse_match(value)
|
110
|
-
|
111
|
-
if self._debug:
|
112
|
-
progress_bar.update(1)
|
113
|
-
progress_bar.set_description("Parsing lists")
|
114
|
-
|
115
|
-
# Парсинг списков
|
116
|
-
list_matches = re.finditer(self.Patterns.LIST.value, match)
|
117
|
-
for m in list_matches:
|
118
|
-
key, value = m.group(1), m.group(2)
|
119
|
-
if not re.search(self.Patterns.STR.value, value):
|
120
|
-
result[key] = [await parse_match(item.group(0)) for item in re.finditer(self.Patterns.FIND.value, value)]
|
121
|
-
|
122
|
-
if self._debug:
|
123
|
-
progress_bar.update(1)
|
124
|
-
|
125
|
-
return result
|
126
|
-
|
127
|
-
if match_list and len(match_list) >= 1:
|
128
|
-
if self._debug:
|
129
|
-
print("Starting to parse match")
|
130
|
-
result = await parse_match(match_list[1].group(0))
|
131
|
-
if self._debug:
|
132
|
-
progress_bar.close()
|
133
|
-
return result
|
134
|
-
else:
|
135
|
-
if self._debug:
|
136
|
-
progress_bar.close()
|
137
|
-
raise Exception("N variable in JS code not found")
|
138
|
-
|
139
66
|
async def download_config(self, config_url: str) -> dict | None:
|
140
67
|
"""
|
141
68
|
Загружает и парсит JavaScript-конфигурацию с указанного URL.
|
@@ -147,17 +74,17 @@ class PyaterochkaAPI:
|
|
147
74
|
|
148
75
|
if not is_success:
|
149
76
|
if self._debug:
|
150
|
-
|
77
|
+
self._logger.error('Failed to fetch JS code')
|
151
78
|
return None
|
152
79
|
elif self._debug:
|
153
|
-
|
80
|
+
self._logger.debug('JS code fetched successfully')
|
154
81
|
|
155
|
-
return await
|
82
|
+
return await parse_js(js_code=js_code, debug=self._debug, logger=self._logger)
|
156
83
|
|
157
84
|
|
158
|
-
async def
|
85
|
+
async def browser_fetch(self, url: str, selector: str, state: str = 'attached') -> dict:
|
159
86
|
if self._browser is None or self._bcontext is None:
|
160
|
-
await self.
|
87
|
+
await self.new_session(include_aiohttp=False, include_browser=True)
|
161
88
|
|
162
89
|
page = await self._bcontext.new_page()
|
163
90
|
await page.goto(url, wait_until='commit', timeout=self._timeout * 1000)
|
@@ -170,39 +97,7 @@ class PyaterochkaAPI:
|
|
170
97
|
await self.close(include_aiohttp=False, include_browser=True)
|
171
98
|
return content
|
172
99
|
|
173
|
-
def
|
174
|
-
if not proxy_str:
|
175
|
-
if self._trust_env:
|
176
|
-
proxy_str = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy")
|
177
|
-
|
178
|
-
if not proxy_str:
|
179
|
-
return None
|
180
|
-
|
181
|
-
# Example: user:pass@host:port or just host:port
|
182
|
-
match = re.match(
|
183
|
-
r'^(?:(?P<scheme>https?:\/\/))?(?:(?P<username>[^:@]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\d+)$',
|
184
|
-
proxy_str,
|
185
|
-
)
|
186
|
-
|
187
|
-
proxy_dict = {}
|
188
|
-
if not match:
|
189
|
-
proxy_dict['server'] = proxy_str
|
190
|
-
|
191
|
-
if not proxy_str.startswith('http://') and not proxy_str.startswith('https://'):
|
192
|
-
proxy_dict['server'] = f"http://{proxy_str}"
|
193
|
-
|
194
|
-
return proxy_dict
|
195
|
-
else:
|
196
|
-
match_dict = match.groupdict()
|
197
|
-
proxy_dict['server'] = f"{match_dict['scheme'] or 'http://'}{match_dict['host']}:{match_dict['port']}"
|
198
|
-
|
199
|
-
for key in ['username', 'password']:
|
200
|
-
if match_dict[key]:
|
201
|
-
proxy_dict[key] = match_dict[key]
|
202
|
-
|
203
|
-
return proxy_dict
|
204
|
-
|
205
|
-
async def _new_session(self, include_aiohttp: bool = True, include_browser: bool = False) -> None:
|
100
|
+
async def new_session(self, include_aiohttp: bool = True, include_browser: bool = False) -> None:
|
206
101
|
await self.close(include_aiohttp=include_aiohttp, include_browser=include_browser)
|
207
102
|
|
208
103
|
if include_aiohttp:
|
@@ -225,16 +120,14 @@ class PyaterochkaAPI:
|
|
225
120
|
"trust_env": self._trust_env,
|
226
121
|
}
|
227
122
|
self._session = aiohttp.ClientSession(**args)
|
228
|
-
|
229
|
-
if self._debug: print(f"A new connection aiohttp has been opened. trust_env: {args.get('trust_env')}")
|
123
|
+
self._logger.info(f"A new aiohttp connection has been opened. trust_env: {args.get('trust_env')}")
|
230
124
|
|
231
125
|
if include_browser:
|
232
|
-
prox = self.
|
126
|
+
prox = parse_proxy(self._proxy, self._trust_env, self._logger)
|
127
|
+
self._logger.info(f"Opening new browser connection with proxy: {'SYSTEM_PROXY' if prox and not self._proxy else prox}")
|
233
128
|
self._browser = await AsyncCamoufox(headless=not self._debug, proxy=prox, geoip=True).__aenter__()
|
234
129
|
self._bcontext = await self._browser.new_context()
|
235
|
-
|
236
|
-
toprint = "SYSTEM_PROXY" if prox and not self.proxy else prox
|
237
|
-
if self._debug: print(f"A new connection browser has been opened. Proxy used: {toprint}")
|
130
|
+
self._logger.info(f"A new browser context has been opened.")
|
238
131
|
|
239
132
|
async def close(
|
240
133
|
self,
|
@@ -253,8 +146,11 @@ class PyaterochkaAPI:
|
|
253
146
|
to_close.append("bcontext")
|
254
147
|
to_close.append("browser")
|
255
148
|
|
149
|
+
self._logger.info(f"Preparing to close: {to_close if to_close else 'nothing'}")
|
150
|
+
|
256
151
|
if not to_close:
|
257
|
-
|
152
|
+
self._logger.warning("No connections to close")
|
153
|
+
return
|
258
154
|
|
259
155
|
checks = {
|
260
156
|
"session": lambda a: a is not None and not a.closed,
|
@@ -265,15 +161,20 @@ class PyaterochkaAPI:
|
|
265
161
|
for name in to_close:
|
266
162
|
attr = getattr(self, f"_{name}", None)
|
267
163
|
if checks[name](attr):
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
164
|
+
self._logger.info(f"Closing {name} connection...")
|
165
|
+
try:
|
166
|
+
if name == "browser":
|
167
|
+
await attr.__aexit__(None, None, None)
|
168
|
+
elif name in ["bcontext", "session"]:
|
169
|
+
await attr.close()
|
170
|
+
else:
|
171
|
+
raise ValueError(f"Unknown connection type: {name}")
|
172
|
+
|
173
|
+
setattr(self, f"_{name}", None)
|
174
|
+
self._logger.info(f"The {name} connection was closed")
|
175
|
+
except Exception as e:
|
176
|
+
self._logger.error(f"Error closing {name}: {e}")
|
275
177
|
else:
|
276
|
-
|
277
|
-
print(f"The {name} connection was not open")
|
178
|
+
self._logger.warning(f"The {name} connection was not open")
|
278
179
|
|
279
180
|
|
pyaterochka_api/enums.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
class Patterns(Enum):
|
4
|
+
JS = r'\s*let\s+n\s*=\s*({.*});\s*' # let n = {...};
|
5
|
+
STR = r'(\w+)\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"' # key: "value"
|
6
|
+
DICT = r'(\w+)\s*:\s*{(.*?)}' # key: {...}
|
7
|
+
LIST = r'(\w+)\s*:\s*\[([^\[\]]*(?:\[.*?\])*)\]' # key: [value]
|
8
|
+
FIND = r'\{.*?\}|\[.*?\]' # {} or []
|
9
|
+
# http(s)://user:pass@host:port
|
10
|
+
PROXY = r'^(?:(?P<scheme>https?:\/\/))?(?:(?P<username>[^:@]+):(?P<password>[^@]+)@)?(?P<host>[^:\/]+)(?::(?P<port>\d+))?$'
|
11
|
+
|
12
|
+
class PurchaseMode(Enum):
|
13
|
+
STORE = "store"
|
14
|
+
DELIVERY = "delivery"
|
pyaterochka_api/manager.py
CHANGED
@@ -4,23 +4,24 @@ import re
|
|
4
4
|
import json
|
5
5
|
from io import BytesIO
|
6
6
|
from beartype import beartype
|
7
|
+
from .enums import PurchaseMode
|
7
8
|
|
8
9
|
|
9
10
|
class Pyaterochka:
|
10
|
-
BASE_URL
|
11
|
-
API_URL
|
11
|
+
BASE_URL = "https://5ka.ru"
|
12
|
+
API_URL = "https://5d.5ka.ru/api"
|
12
13
|
HARDCODE_JS_CONFIG = "https://prod-cdn.5ka.ru/scripts/main.a0c039ea81eb8cf69492.js" # TODO сделать не хардкодным имя файла
|
13
|
-
DEFAULT_STORE_ID
|
14
|
-
|
15
|
-
class PurchaseMode(Enum):
|
16
|
-
STORE = "store"
|
17
|
-
DELIVERY = "delivery"
|
14
|
+
DEFAULT_STORE_ID = "Y232"
|
18
15
|
|
19
16
|
@beartype
|
20
|
-
def __init__(self, debug: bool = False, proxy: str = None, autoclose_browser: bool = False, trust_env: bool = False, timeout:
|
21
|
-
self.
|
22
|
-
|
23
|
-
self.
|
17
|
+
def __init__(self, debug: bool = False, proxy: str | None = None, autoclose_browser: bool = False, trust_env: bool = False, timeout: float = 10.0):
|
18
|
+
self.api = PyaterochkaAPI()
|
19
|
+
|
20
|
+
self.debug = debug
|
21
|
+
self.proxy = proxy
|
22
|
+
self.autoclose_browser = autoclose_browser
|
23
|
+
self.trust_env = trust_env
|
24
|
+
self.timeout = timeout
|
24
25
|
|
25
26
|
@beartype
|
26
27
|
def __enter__(self):
|
@@ -47,7 +48,7 @@ class Pyaterochka:
|
|
47
48
|
session (bool, optional): Whether to create a new session (for all, except product_info). Defaults to True.
|
48
49
|
browser (bool, optional): Whether to create a new browser instance (for product_info). Defaults to False.
|
49
50
|
"""
|
50
|
-
await self.api.
|
51
|
+
await self.api.new_session(session, browser)
|
51
52
|
|
52
53
|
@beartype
|
53
54
|
async def close(self, session: bool = True, browser: bool = True) -> None:
|
@@ -63,25 +64,23 @@ class Pyaterochka:
|
|
63
64
|
@beartype
|
64
65
|
def debug(self) -> bool:
|
65
66
|
"""If True, it will print debug messages and disable headless in browser."""
|
66
|
-
return self._debug
|
67
|
+
return self.api._debug
|
67
68
|
|
68
69
|
@debug.setter
|
69
70
|
@beartype
|
70
71
|
def debug(self, value: bool):
|
71
|
-
self._debug = value
|
72
|
-
self.api.debug = value
|
72
|
+
self.api._debug = value
|
73
73
|
|
74
74
|
@property
|
75
75
|
@beartype
|
76
|
-
def proxy(self) -> str:
|
76
|
+
def proxy(self) -> str | None:
|
77
77
|
"""Proxy for requests. If None, it will be used without proxy."""
|
78
|
-
return self._proxy
|
78
|
+
return self.api._proxy
|
79
79
|
|
80
80
|
@proxy.setter
|
81
81
|
@beartype
|
82
|
-
def proxy(self, value: str):
|
83
|
-
self._proxy = value
|
84
|
-
self.api.proxy = value
|
82
|
+
def proxy(self, value: str | None):
|
83
|
+
self.api._proxy = value
|
85
84
|
|
86
85
|
@property
|
87
86
|
@beartype
|
@@ -108,13 +107,13 @@ class Pyaterochka:
|
|
108
107
|
|
109
108
|
@property
|
110
109
|
@beartype
|
111
|
-
def timeout(self) ->
|
110
|
+
def timeout(self) -> float:
|
112
111
|
"""Timeout value for the API requests."""
|
113
112
|
return self.api._timeout
|
114
113
|
|
115
|
-
@
|
114
|
+
@timeout.setter
|
116
115
|
@beartype
|
117
|
-
def timeout(self, value:
|
116
|
+
def timeout(self, value: float):
|
118
117
|
if value <= 0:
|
119
118
|
raise ValueError("Timeout must be greater than 0")
|
120
119
|
|
@@ -127,7 +126,7 @@ class Pyaterochka:
|
|
127
126
|
include_restrict: bool = True,
|
128
127
|
mode: PurchaseMode = PurchaseMode.STORE,
|
129
128
|
sap_code_store_id: str = DEFAULT_STORE_ID
|
130
|
-
) -> list[dict]
|
129
|
+
) -> list[dict]:
|
131
130
|
f"""
|
132
131
|
Asynchronously retrieves a list of categories from the Pyaterochka API.
|
133
132
|
|
@@ -138,7 +137,7 @@ class Pyaterochka:
|
|
138
137
|
sap_code_store_id (str, optional): The store ID (official name in API is "sap_code") to use. Defaults to "{self.DEFAULT_STORE_ID}". This lib not support search ID stores.
|
139
138
|
|
140
139
|
Returns:
|
141
|
-
dict
|
140
|
+
list[dict]: A dictionary representing the categories list if the request is successful, error otherwise.
|
142
141
|
|
143
142
|
Raises:
|
144
143
|
Exception: If the response status is not 200 (OK) or 403 (Forbidden / Anti-bot).
|
@@ -155,7 +154,7 @@ class Pyaterochka:
|
|
155
154
|
mode: PurchaseMode = PurchaseMode.STORE,
|
156
155
|
sap_code_store_id: str = DEFAULT_STORE_ID,
|
157
156
|
limit: int = 30
|
158
|
-
) -> dict
|
157
|
+
) -> dict:
|
159
158
|
f"""
|
160
159
|
Asynchronously retrieves a list of products from the Pyaterochka API for a given category.
|
161
160
|
|
@@ -166,7 +165,7 @@ class Pyaterochka:
|
|
166
165
|
limit (int, optional): The maximum number of products to retrieve. Defaults to 30. Must be between 1 and 499.
|
167
166
|
|
168
167
|
Returns:
|
169
|
-
dict
|
168
|
+
dict: A dictionary representing the products list if the request is successful, error otherwise.
|
170
169
|
|
171
170
|
Raises:
|
172
171
|
ValueError: If the limit is not between 1 and 499.
|
@@ -184,6 +183,7 @@ class Pyaterochka:
|
|
184
183
|
async def product_info(self, plu_id: int) -> dict:
|
185
184
|
"""
|
186
185
|
Asynchronously retrieves product information from the Pyaterochka API for a given PLU ID. Average time processing 2 seconds (first start 6 seconds).
|
186
|
+
|
187
187
|
Args:
|
188
188
|
plu_id (int): The PLU ID of the product.
|
189
189
|
Returns:
|
@@ -193,7 +193,7 @@ class Pyaterochka:
|
|
193
193
|
"""
|
194
194
|
|
195
195
|
url = f"{self.BASE_URL}/product/{plu_id}/"
|
196
|
-
response = await self.api.
|
196
|
+
response = await self.api.browser_fetch(url=url, selector='script#__NEXT_DATA__[type="application/json"]')
|
197
197
|
|
198
198
|
match = re.search(
|
199
199
|
r'<script\s+id="__NEXT_DATA__"\s+type="application/json">(.+?)</script>',
|
@@ -211,7 +211,7 @@ class Pyaterochka:
|
|
211
211
|
return data
|
212
212
|
|
213
213
|
@beartype
|
214
|
-
async def get_news(self, limit: int = None) -> dict
|
214
|
+
async def get_news(self, limit: int | None = None) -> dict:
|
215
215
|
"""
|
216
216
|
Asynchronously retrieves news from the Pyaterochka API.
|
217
217
|
|
@@ -219,7 +219,7 @@ class Pyaterochka:
|
|
219
219
|
limit (int, optional): The maximum number of news items to retrieve. Defaults to None.
|
220
220
|
|
221
221
|
Returns:
|
222
|
-
dict
|
222
|
+
dict: A dictionary representing the news if the request is successful, error otherwise.
|
223
223
|
"""
|
224
224
|
url = f"{self.BASE_URL}/api/public/v1/news/"
|
225
225
|
if limit and limit > 0:
|
@@ -230,7 +230,7 @@ class Pyaterochka:
|
|
230
230
|
return response
|
231
231
|
|
232
232
|
@beartype
|
233
|
-
async def find_store(self, longitude: float, latitude: float) -> dict
|
233
|
+
async def find_store(self, longitude: float, latitude: float) -> dict:
|
234
234
|
"""
|
235
235
|
Asynchronously finds the store associated with the given coordinates.
|
236
236
|
|
@@ -239,7 +239,7 @@ class Pyaterochka:
|
|
239
239
|
latitude (float): The latitude of the location.
|
240
240
|
|
241
241
|
Returns:
|
242
|
-
dict
|
242
|
+
dict: A dictionary representing the store information if the request is successful, error otherwise.
|
243
243
|
"""
|
244
244
|
|
245
245
|
request_url = f"{self.API_URL}/orders/v1/orders/stores/?lon={longitude}&lat={latitude}"
|
@@ -247,15 +247,17 @@ class Pyaterochka:
|
|
247
247
|
return response
|
248
248
|
|
249
249
|
@beartype
|
250
|
-
async def download_image(self, url: str) -> BytesIO
|
250
|
+
async def download_image(self, url: str) -> BytesIO:
|
251
251
|
is_success, image_data, response_type = await self.api.fetch(url=url)
|
252
252
|
|
253
253
|
if not is_success:
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
254
|
+
self.api._logger.error("Failed to fetch image")
|
255
|
+
return
|
256
|
+
elif not isinstance(image_data, (bytes, bytearray)):
|
257
|
+
self.api._logger.error("Image data is not bytes")
|
258
|
+
return
|
259
|
+
|
260
|
+
self.api._logger.debug("Image fetched successfully")
|
259
261
|
|
260
262
|
image = BytesIO(image_data)
|
261
263
|
image.name = f'{url.split("/")[-1]}.{response_type.split("/")[-1]}'
|
@@ -263,7 +265,7 @@ class Pyaterochka:
|
|
263
265
|
return image
|
264
266
|
|
265
267
|
@beartype
|
266
|
-
async def get_config(self) -> dict
|
268
|
+
async def get_config(self) -> dict:
|
267
269
|
"""
|
268
270
|
Asynchronously retrieves the configuration from the hardcoded JavaScript file.
|
269
271
|
|
@@ -271,7 +273,7 @@ class Pyaterochka:
|
|
271
273
|
debug (bool, optional): Whether to print debug information. Defaults to False.
|
272
274
|
|
273
275
|
Returns:
|
274
|
-
dict
|
276
|
+
dict: A dictionary representing the configuration if the request is successful, error otherwise.
|
275
277
|
"""
|
276
278
|
|
277
279
|
return await self.api.download_config(config_url=self.HARDCODE_JS_CONFIG)
|
pyaterochka_api/tools.py
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
from .enums import Patterns
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from tqdm import tqdm
|
5
|
+
|
6
|
+
def get_env_proxy() -> str | None:
|
7
|
+
"""
|
8
|
+
Получает прокси из переменных окружения.
|
9
|
+
:return: Прокси-строка или None.
|
10
|
+
"""
|
11
|
+
proxy = os.environ.get("HTTPS_PROXY") or os.environ.get("https_proxy") or os.environ.get("HTTP_PROXY") or os.environ.get("http_proxy")
|
12
|
+
return proxy if proxy else None
|
13
|
+
|
14
|
+
def parse_proxy(proxy_str: str | None, trust_env: bool, logger) -> dict | None:
|
15
|
+
logger.debug(f"Parsing proxy string: {proxy_str}")
|
16
|
+
|
17
|
+
if not proxy_str:
|
18
|
+
if trust_env:
|
19
|
+
logger.debug("Proxy string not provided, checking environment variables for HTTP(S)_PROXY")
|
20
|
+
proxy_str = get_env_proxy()
|
21
|
+
|
22
|
+
if not proxy_str:
|
23
|
+
logger.info("No proxy string found, returning None")
|
24
|
+
return None
|
25
|
+
else:
|
26
|
+
logger.info(f"Proxy string found in environment variables")
|
27
|
+
|
28
|
+
# Example: user:pass@host:port or just host:port
|
29
|
+
match = re.match(Patterns.PROXY.value, proxy_str)
|
30
|
+
|
31
|
+
proxy_dict = {}
|
32
|
+
if not match:
|
33
|
+
logger.warning(f"Proxy string did not match expected pattern, using basic formating")
|
34
|
+
proxy_dict['server'] = proxy_str
|
35
|
+
|
36
|
+
if not proxy_str.startswith('http://') and not proxy_str.startswith('https://'):
|
37
|
+
logger.warning("Proxy string missing protocol, prepending 'http://'")
|
38
|
+
proxy_dict['server'] = f"http://{proxy_str}"
|
39
|
+
|
40
|
+
logger.info(f"Proxy parsed as basic")
|
41
|
+
return proxy_dict
|
42
|
+
else:
|
43
|
+
match_dict = match.groupdict()
|
44
|
+
proxy_dict['server'] = f"{match_dict['scheme'] or 'http://'}{match_dict['host']}"
|
45
|
+
if match_dict['port']:
|
46
|
+
proxy_dict['server'] += f":{match_dict['port']}"
|
47
|
+
|
48
|
+
for key in ['username', 'password']:
|
49
|
+
if match_dict[key]:
|
50
|
+
proxy_dict[key] = match_dict[key]
|
51
|
+
|
52
|
+
logger.info(f"Proxy WITH{'OUT' if 'username' not in proxy_dict else ''} credentials")
|
53
|
+
|
54
|
+
logger.info(f"Proxy parsed as regex")
|
55
|
+
return proxy_dict
|
56
|
+
|
57
|
+
async def _parse_match(match: str, progress_bar: tqdm | None = None) -> dict:
|
58
|
+
result = {}
|
59
|
+
|
60
|
+
if progress_bar:
|
61
|
+
progress_bar.set_description("Parsing strings")
|
62
|
+
|
63
|
+
# Парсинг строк
|
64
|
+
string_matches = re.finditer(Patterns.STR.value, match)
|
65
|
+
for m in string_matches:
|
66
|
+
key, value = m.group(1), m.group(2)
|
67
|
+
result[key] = value.replace('\"', '"').replace('\\', '\\')
|
68
|
+
|
69
|
+
if progress_bar:
|
70
|
+
progress_bar.update(1)
|
71
|
+
progress_bar.set_description("Parsing dictionaries")
|
72
|
+
|
73
|
+
# Парсинг словарей
|
74
|
+
dict_matches = re.finditer(Patterns.DICT.value, match)
|
75
|
+
for m in dict_matches:
|
76
|
+
key, value = m.group(1), m.group(2)
|
77
|
+
if not re.search(Patterns.STR.value, value):
|
78
|
+
result[key] = await _parse_match(value, progress_bar)
|
79
|
+
|
80
|
+
if progress_bar:
|
81
|
+
progress_bar.update(1)
|
82
|
+
progress_bar.set_description("Parsing lists")
|
83
|
+
|
84
|
+
# Парсинг списков
|
85
|
+
list_matches = re.finditer(Patterns.LIST.value, match)
|
86
|
+
for m in list_matches:
|
87
|
+
key, value = m.group(1), m.group(2)
|
88
|
+
if not re.search(Patterns.STR.value, value):
|
89
|
+
result[key] = [await _parse_match(item.group(0), progress_bar) for item in re.finditer(Patterns.FIND.value, value)]
|
90
|
+
|
91
|
+
if progress_bar:
|
92
|
+
progress_bar.update(1)
|
93
|
+
|
94
|
+
return result
|
95
|
+
|
96
|
+
async def parse_js(js_code: str, debug: bool, logger) -> dict | None:
|
97
|
+
"""
|
98
|
+
Парсит JavaScript-код и извлекает данные из переменной "n".
|
99
|
+
|
100
|
+
:param js_code: JS-код в виде строки.
|
101
|
+
:return: Распарсенные данные в виде словаря или None.
|
102
|
+
"""
|
103
|
+
matches = re.finditer(Patterns.JS.value, js_code)
|
104
|
+
match_list = list(matches)
|
105
|
+
|
106
|
+
logger.debug(f'Found matches {len(match_list)}')
|
107
|
+
|
108
|
+
progress_bar = tqdm(total=33, desc="Parsing JS", position=0) if debug else None
|
109
|
+
|
110
|
+
if match_list and len(match_list) >= 1:
|
111
|
+
logger.info('Starting to parse match')
|
112
|
+
result = await _parse_match(match_list[1].group(0), progress_bar)
|
113
|
+
|
114
|
+
if progress_bar:
|
115
|
+
progress_bar.close()
|
116
|
+
logger.info('Complited parsing match')
|
117
|
+
return result
|
118
|
+
else:
|
119
|
+
if progress_bar:
|
120
|
+
progress_bar.close()
|
121
|
+
raise Exception("N variable in JS code not found")
|