pyaterochka-api 0.1.3__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyaterochka_api-0.1.6/PKG-INFO +141 -0
- pyaterochka_api-0.1.6/README.md +100 -0
- pyaterochka_api-0.1.6/pyaterochka_api/__init__.py +3 -0
- pyaterochka_api-0.1.6/pyaterochka_api/api.py +251 -0
- pyaterochka_api-0.1.6/pyaterochka_api/manager.py +232 -0
- pyaterochka_api-0.1.6/pyaterochka_api.egg-info/PKG-INFO +141 -0
- {pyaterochka_api-0.1.3 → pyaterochka_api-0.1.6}/pyaterochka_api.egg-info/SOURCES.txt +6 -1
- pyaterochka_api-0.1.6/pyaterochka_api.egg-info/requires.txt +9 -0
- {pyaterochka_api-0.1.3 → pyaterochka_api-0.1.6}/pyaterochka_api.egg-info/top_level.txt +1 -0
- pyaterochka_api-0.1.6/pyproject.toml +7 -0
- pyaterochka_api-0.1.6/setup.py +41 -0
- pyaterochka_api-0.1.6/tests/__init__.py +0 -0
- pyaterochka_api-0.1.6/tests/base_tests.py +66 -0
- pyaterochka_api-0.1.6/tests/snapshots/__init__.py +0 -0
- pyaterochka_api-0.1.6/tests/snapshots/snap_base_tests.py +813 -0
- pyaterochka_api-0.1.3/PKG-INFO +0 -63
- pyaterochka_api-0.1.3/README.md +0 -46
- pyaterochka_api-0.1.3/pyaterochka_api/__init__.py +0 -3
- pyaterochka_api-0.1.3/pyaterochka_api/api.py +0 -146
- pyaterochka_api-0.1.3/pyaterochka_api/manager.py +0 -103
- pyaterochka_api-0.1.3/pyaterochka_api.egg-info/PKG-INFO +0 -63
- pyaterochka_api-0.1.3/pyaterochka_api.egg-info/requires.txt +0 -3
- pyaterochka_api-0.1.3/setup.py +0 -23
- {pyaterochka_api-0.1.3 → pyaterochka_api-0.1.6}/LICENSE +0 -0
- {pyaterochka_api-0.1.3 → pyaterochka_api-0.1.6}/pyaterochka_api.egg-info/dependency_links.txt +0 -0
- {pyaterochka_api-0.1.3 → pyaterochka_api-0.1.6}/setup.cfg +0 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: pyaterochka_api
|
3
|
+
Version: 0.1.6
|
4
|
+
Summary: A Python API client for Pyaterochka store catalog
|
5
|
+
Home-page: https://github.com/Open-Inflation/pyaterochka_api
|
6
|
+
Author: Miskler
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
13
|
+
Classifier: Operating System :: Microsoft :: Windows
|
14
|
+
Classifier: Operating System :: POSIX :: Linux
|
15
|
+
Classifier: Intended Audience :: Developers
|
16
|
+
Classifier: Intended Audience :: Information Technology
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
18
|
+
Classifier: Topic :: Internet
|
19
|
+
Classifier: Topic :: Utilities
|
20
|
+
Requires-Python: >=3.10
|
21
|
+
Description-Content-Type: text/markdown
|
22
|
+
License-File: LICENSE
|
23
|
+
Requires-Dist: aiohttp
|
24
|
+
Requires-Dist: camoufox[geoip]
|
25
|
+
Requires-Dist: fake-useragent
|
26
|
+
Requires-Dist: tqdm
|
27
|
+
Provides-Extra: tests
|
28
|
+
Requires-Dist: pytest; extra == "tests"
|
29
|
+
Requires-Dist: pytest-asyncio; extra == "tests"
|
30
|
+
Requires-Dist: snapshottest~=1.0.0a1; extra == "tests"
|
31
|
+
Dynamic: author
|
32
|
+
Dynamic: classifier
|
33
|
+
Dynamic: description
|
34
|
+
Dynamic: description-content-type
|
35
|
+
Dynamic: home-page
|
36
|
+
Dynamic: license-file
|
37
|
+
Dynamic: provides-extra
|
38
|
+
Dynamic: requires-dist
|
39
|
+
Dynamic: requires-python
|
40
|
+
Dynamic: summary
|
41
|
+
|
42
|
+
# Pyaterochka API *(not official / не официальный)*
|
43
|
+
|
44
|
+
Pyaterochka (Пятёрочка) - https://5ka.ru/
|
45
|
+
|
46
|
+

|
47
|
+

|
48
|
+
[](https://pypi.org/project/pyaterochka-api/)
|
49
|
+
[](https://discord.gg/UnJnGHNbBp)
|
50
|
+
[](https://t.me/miskler_dev)
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
## Installation / Установка:
|
55
|
+
1. Install package / Установка пакета:
|
56
|
+
```bash
|
57
|
+
pip install pyaterochka_api
|
58
|
+
```
|
59
|
+
2. ***Debian/Ubuntu Linux***: Install dependencies / Установка зависимостей:
|
60
|
+
```bash
|
61
|
+
sudo apt update && sudo apt install -y libgtk-3-0 libx11-xcb1
|
62
|
+
```
|
63
|
+
3. Install browser / Установка браузера:
|
64
|
+
```bash
|
65
|
+
camoufox fetch
|
66
|
+
```
|
67
|
+
|
68
|
+
### Usage / Использование:
|
69
|
+
```py
|
70
|
+
from pyaterochka_api import Pyaterochka
|
71
|
+
import asyncio
|
72
|
+
|
73
|
+
|
74
|
+
async def main():
|
75
|
+
async with Pyaterochka(proxy="user:password@host:port", debug=False, autoclose_browser=False) as API:
|
76
|
+
# RUS: Вводим геоточку (самого магазина или рядом с ним) и получаем инфу о магазине
|
77
|
+
# ENG: Enter a geolocation (of the store or near it) and get info about the store
|
78
|
+
find_store = await API.find_store(longitude=37.63156, latitude=55.73768)
|
79
|
+
print(f"Store info output: {find_store!s:.100s}...\n")
|
80
|
+
|
81
|
+
# RUS: Выводит список всех категорий на сайте
|
82
|
+
# ENG: Outputs a list of all categories on the site
|
83
|
+
catalog = await API.categories_list(subcategories=True, mode=API.PurchaseMode.DELIVERY)
|
84
|
+
print(f"Categories list output: {catalog!s:.100s}...\n")
|
85
|
+
|
86
|
+
# RUS: Выводит список всех товаров выбранной категории (ограничение 100 элементов, если превышает - запрашивайте через дополнительные страницы)
|
87
|
+
# ENG: Outputs a list of all items in the selected category (limiting to 100 elements, if exceeds - request through additional pages)
|
88
|
+
# Страниц не сущетвует, использовать желаемый лимит (до 499) / Pages do not exist, use the desired limit (up to 499)
|
89
|
+
items = await API.products_list(catalog[0]['id'], limit=5)
|
90
|
+
print(f"Items list output: {items!s:.100s}...\n")
|
91
|
+
|
92
|
+
# RUS: Выводит информацию о товаре (по его plu - id товара).
|
93
|
+
# Функция в первый раз достаточно долгая, порядка 5-9 секунды, последующие запросы около 2 секунд (если браузер не был закрыт)
|
94
|
+
# ENG: Outputs information about the product (by its plu - product id).
|
95
|
+
# The function is quite long the first time, about 5-9 seconds, subsequent requests take about 2 seconds (if the browser was not closed)
|
96
|
+
info = await API.product_info(43347)
|
97
|
+
print(f"Product output: {info["props"]["pageProps"]["props"]['productStore']!s:.100s}...\n")
|
98
|
+
|
99
|
+
# RUS: Влияет исключительно на функцию выше (product_info), если включено, то после отработки запроса браузер закроется и кеши очищаются.
|
100
|
+
# Не рекомендую включать, если вам все же нужно освободить память, лучше использовать API.close(session=False, browser=True)
|
101
|
+
# ENG: Affects only the function above (product_info), if enabled, the browser will close after the request is processed and caches are cleared.
|
102
|
+
# I do not recommend enabling it, if you still need to free up memory, it is better to use API.close(session=False, browser=True)
|
103
|
+
API.autoclose_browser = True
|
104
|
+
|
105
|
+
# RUS: Выводит список последних промо-акций/новостей (можно поставить ограничитель по количеству, опционально)
|
106
|
+
# ENG: Outputs a list of the latest promotions/news (you can set a limit on the number, optionally)
|
107
|
+
news = await API.get_news(limit=5)
|
108
|
+
print(f"News output: {news!s:.100s}...\n")
|
109
|
+
|
110
|
+
# RUS: Выводит основной конфиг сайта (очень долгая функция, рекомендую сохранять в файл и переиспользовать)
|
111
|
+
# ENG: Outputs the main config of the site (large function, recommend to save in a file and re-use it)
|
112
|
+
print(f"Main config: {await API.get_config()!s:.100s}...\n")
|
113
|
+
|
114
|
+
# RUS: Если требуется, можно настроить вывод логов в консоль
|
115
|
+
# ENG: If required, you can configure the output of logs in the console
|
116
|
+
API.debug = True
|
117
|
+
|
118
|
+
# RUS: Скачивает картинку товара (возвращает BytesIO или None)
|
119
|
+
# ENG: Downloads the product image (returns BytesIO or None)
|
120
|
+
image = await API.download_image(url=items['products'][0]['image_links']['normal'][0])
|
121
|
+
with open(image.name, 'wb') as f:
|
122
|
+
f.write(image.getbuffer())
|
123
|
+
|
124
|
+
# RUS: Так же как и debug, в рантайме можно переназначить прокси
|
125
|
+
# ENG: As with debug, you can reassign the proxy in runtime
|
126
|
+
API.proxy = "user:password@host:port"
|
127
|
+
# RUS: Чтобы применить изменения, нужно пересоздать подключение (session - aiohttp отвечающее за все, кроме product_info, за него browser)
|
128
|
+
# ENG: To apply changes, you need rebuild connection (session - aiohttp responsible for everything except product_info, for it browser)
|
129
|
+
await API.rebuild_connection()
|
130
|
+
await API.categories_list()
|
131
|
+
|
132
|
+
|
133
|
+
if __name__ == '__main__':
|
134
|
+
asyncio.run(main())
|
135
|
+
```
|
136
|
+
|
137
|
+
### Report / Обратная связь
|
138
|
+
|
139
|
+
If you have any problems using it /suggestions, do not hesitate to write to the [project's GitHub](https://github.com/Open-Inflation/pyaterochka_api/issues)!
|
140
|
+
|
141
|
+
Если у вас возникнут проблемы в использовании / пожелания, не стесняйтесь писать на [GitHub проекта](https://github.com/Open-Inflation/pyaterochka_api/issues)!
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Pyaterochka API *(not official / не официальный)*
|
2
|
+
|
3
|
+
Pyaterochka (Пятёрочка) - https://5ka.ru/
|
4
|
+
|
5
|
+

|
6
|
+

|
7
|
+
[](https://pypi.org/project/pyaterochka-api/)
|
8
|
+
[](https://discord.gg/UnJnGHNbBp)
|
9
|
+
[](https://t.me/miskler_dev)
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## Installation / Установка:
|
14
|
+
1. Install package / Установка пакета:
|
15
|
+
```bash
|
16
|
+
pip install pyaterochka_api
|
17
|
+
```
|
18
|
+
2. ***Debian/Ubuntu Linux***: Install dependencies / Установка зависимостей:
|
19
|
+
```bash
|
20
|
+
sudo apt update && sudo apt install -y libgtk-3-0 libx11-xcb1
|
21
|
+
```
|
22
|
+
3. Install browser / Установка браузера:
|
23
|
+
```bash
|
24
|
+
camoufox fetch
|
25
|
+
```
|
26
|
+
|
27
|
+
### Usage / Использование:
|
28
|
+
```py
|
29
|
+
from pyaterochka_api import Pyaterochka
|
30
|
+
import asyncio
|
31
|
+
|
32
|
+
|
33
|
+
async def main():
|
34
|
+
async with Pyaterochka(proxy="user:password@host:port", debug=False, autoclose_browser=False) as API:
|
35
|
+
# RUS: Вводим геоточку (самого магазина или рядом с ним) и получаем инфу о магазине
|
36
|
+
# ENG: Enter a geolocation (of the store or near it) and get info about the store
|
37
|
+
find_store = await API.find_store(longitude=37.63156, latitude=55.73768)
|
38
|
+
print(f"Store info output: {find_store!s:.100s}...\n")
|
39
|
+
|
40
|
+
# RUS: Выводит список всех категорий на сайте
|
41
|
+
# ENG: Outputs a list of all categories on the site
|
42
|
+
catalog = await API.categories_list(subcategories=True, mode=API.PurchaseMode.DELIVERY)
|
43
|
+
print(f"Categories list output: {catalog!s:.100s}...\n")
|
44
|
+
|
45
|
+
# RUS: Выводит список всех товаров выбранной категории (ограничение 100 элементов, если превышает - запрашивайте через дополнительные страницы)
|
46
|
+
# ENG: Outputs a list of all items in the selected category (limiting to 100 elements, if exceeds - request through additional pages)
|
47
|
+
# Страниц не сущетвует, использовать желаемый лимит (до 499) / Pages do not exist, use the desired limit (up to 499)
|
48
|
+
items = await API.products_list(catalog[0]['id'], limit=5)
|
49
|
+
print(f"Items list output: {items!s:.100s}...\n")
|
50
|
+
|
51
|
+
# RUS: Выводит информацию о товаре (по его plu - id товара).
|
52
|
+
# Функция в первый раз достаточно долгая, порядка 5-9 секунды, последующие запросы около 2 секунд (если браузер не был закрыт)
|
53
|
+
# ENG: Outputs information about the product (by its plu - product id).
|
54
|
+
# The function is quite long the first time, about 5-9 seconds, subsequent requests take about 2 seconds (if the browser was not closed)
|
55
|
+
info = await API.product_info(43347)
|
56
|
+
print(f"Product output: {info["props"]["pageProps"]["props"]['productStore']!s:.100s}...\n")
|
57
|
+
|
58
|
+
# RUS: Влияет исключительно на функцию выше (product_info), если включено, то после отработки запроса браузер закроется и кеши очищаются.
|
59
|
+
# Не рекомендую включать, если вам все же нужно освободить память, лучше использовать API.close(session=False, browser=True)
|
60
|
+
# ENG: Affects only the function above (product_info), if enabled, the browser will close after the request is processed and caches are cleared.
|
61
|
+
# I do not recommend enabling it, if you still need to free up memory, it is better to use API.close(session=False, browser=True)
|
62
|
+
API.autoclose_browser = True
|
63
|
+
|
64
|
+
# RUS: Выводит список последних промо-акций/новостей (можно поставить ограничитель по количеству, опционально)
|
65
|
+
# ENG: Outputs a list of the latest promotions/news (you can set a limit on the number, optionally)
|
66
|
+
news = await API.get_news(limit=5)
|
67
|
+
print(f"News output: {news!s:.100s}...\n")
|
68
|
+
|
69
|
+
# RUS: Выводит основной конфиг сайта (очень долгая функция, рекомендую сохранять в файл и переиспользовать)
|
70
|
+
# ENG: Outputs the main config of the site (large function, recommend to save in a file and re-use it)
|
71
|
+
print(f"Main config: {await API.get_config()!s:.100s}...\n")
|
72
|
+
|
73
|
+
# RUS: Если требуется, можно настроить вывод логов в консоль
|
74
|
+
# ENG: If required, you can configure the output of logs in the console
|
75
|
+
API.debug = True
|
76
|
+
|
77
|
+
# RUS: Скачивает картинку товара (возвращает BytesIO или None)
|
78
|
+
# ENG: Downloads the product image (returns BytesIO or None)
|
79
|
+
image = await API.download_image(url=items['products'][0]['image_links']['normal'][0])
|
80
|
+
with open(image.name, 'wb') as f:
|
81
|
+
f.write(image.getbuffer())
|
82
|
+
|
83
|
+
# RUS: Так же как и debug, в рантайме можно переназначить прокси
|
84
|
+
# ENG: As with debug, you can reassign the proxy in runtime
|
85
|
+
API.proxy = "user:password@host:port"
|
86
|
+
# RUS: Чтобы применить изменения, нужно пересоздать подключение (session - aiohttp отвечающее за все, кроме product_info, за него browser)
|
87
|
+
# ENG: To apply changes, you need rebuild connection (session - aiohttp responsible for everything except product_info, for it browser)
|
88
|
+
await API.rebuild_connection()
|
89
|
+
await API.categories_list()
|
90
|
+
|
91
|
+
|
92
|
+
if __name__ == '__main__':
|
93
|
+
asyncio.run(main())
|
94
|
+
```
|
95
|
+
|
96
|
+
### Report / Обратная связь
|
97
|
+
|
98
|
+
If you have any problems using it /suggestions, do not hesitate to write to the [project's GitHub](https://github.com/Open-Inflation/pyaterochka_api/issues)!
|
99
|
+
|
100
|
+
Если у вас возникнут проблемы в использовании / пожелания, не стесняйтесь писать на [GitHub проекта](https://github.com/Open-Inflation/pyaterochka_api/issues)!
|
@@ -0,0 +1,251 @@
|
|
1
|
+
import aiohttp
|
2
|
+
from fake_useragent import UserAgent
|
3
|
+
from enum import Enum
|
4
|
+
import re
|
5
|
+
from tqdm.asyncio import tqdm
|
6
|
+
from camoufox import AsyncCamoufox
|
7
|
+
|
8
|
+
|
9
|
+
class PyaterochkaAPI:
|
10
|
+
"""
|
11
|
+
Класс для загрузки JSON/image и парсинга JavaScript-конфигураций из удаленного источника.
|
12
|
+
"""
|
13
|
+
|
14
|
+
class Patterns(Enum):
|
15
|
+
JS = r'\s*let\s+n\s*=\s*({.*});\s*' # let n = {...};
|
16
|
+
STR = r'(\w+)\s*:\s*"([^"\\]*(?:\\.[^"\\]*)*)"' # key: "value"
|
17
|
+
DICT = r'(\w+)\s*:\s*{(.*?)}' # key: {...}
|
18
|
+
LIST = r'(\w+)\s*:\s*\[([^\[\]]*(?:\[.*?\])*)\]' # key: [value]
|
19
|
+
FIND = r'\{.*?\}|\[.*?\]' # {} or []
|
20
|
+
|
21
|
+
def __init__(self, debug: bool = False, proxy: str = None, autoclose_browser: bool = False):
|
22
|
+
self._debug = debug
|
23
|
+
self._proxy = proxy
|
24
|
+
self._session = None
|
25
|
+
self._autoclose_browser = autoclose_browser
|
26
|
+
self._browser = None
|
27
|
+
self._bcontext = None
|
28
|
+
|
29
|
+
@property
|
30
|
+
def proxy(self) -> str | None:
|
31
|
+
return self._proxy if hasattr(self, '_proxy') else None
|
32
|
+
|
33
|
+
@proxy.setter
|
34
|
+
def proxy(self, value: str | None) -> None:
|
35
|
+
self._proxy = value
|
36
|
+
|
37
|
+
async def fetch(self, url: str) -> tuple[bool, dict | None | str, str]:
|
38
|
+
"""
|
39
|
+
Выполняет HTTP-запрос к указанному URL и возвращает результат.
|
40
|
+
|
41
|
+
:return: Кортеж (успех, данные или None).
|
42
|
+
"""
|
43
|
+
if self._debug:
|
44
|
+
print(f"Requesting \"{url}\"...", flush=True)
|
45
|
+
|
46
|
+
async with self._session.get(url=url) as response:
|
47
|
+
if self._debug:
|
48
|
+
print(f"Response status: {response.status}", flush=True)
|
49
|
+
|
50
|
+
if response.status == 200:
|
51
|
+
if response.headers['content-type'] == 'application/json':
|
52
|
+
output_response = response.json()
|
53
|
+
elif response.headers['content-type'] == 'image/jpeg':
|
54
|
+
output_response = response.read()
|
55
|
+
else:
|
56
|
+
output_response = response.text()
|
57
|
+
|
58
|
+
return True, await output_response, response.headers['content-type']
|
59
|
+
elif response.status == 403:
|
60
|
+
if self._debug:
|
61
|
+
print("Anti-bot protection. Use Russia IP address and try again.", flush=True)
|
62
|
+
return False, None, ''
|
63
|
+
else:
|
64
|
+
if self._debug:
|
65
|
+
print(f"Unexpected error: {response.status}", flush=True)
|
66
|
+
raise Exception(f"Response status: {response.status} (unknown error/status code)")
|
67
|
+
|
68
|
+
async def _parse_js(self, js_code: str) -> dict | None:
|
69
|
+
"""
|
70
|
+
Парсит JavaScript-код и извлекает данные из переменной "n".
|
71
|
+
|
72
|
+
:param js_code: JS-код в виде строки.
|
73
|
+
:return: Распарсенные данные в виде словаря или None.
|
74
|
+
"""
|
75
|
+
matches = re.finditer(self.Patterns.JS.value, js_code)
|
76
|
+
match_list = list(matches)
|
77
|
+
|
78
|
+
if self._debug:
|
79
|
+
print(f"Found matches {len(match_list)}")
|
80
|
+
progress_bar = tqdm(total=33, desc="Parsing JS", position=0)
|
81
|
+
|
82
|
+
async def parse_match(match: str) -> dict:
|
83
|
+
result = {}
|
84
|
+
|
85
|
+
if self._debug:
|
86
|
+
progress_bar.set_description("Parsing strings")
|
87
|
+
|
88
|
+
# Парсинг строк
|
89
|
+
string_matches = re.finditer(self.Patterns.STR.value, match)
|
90
|
+
for m in string_matches:
|
91
|
+
key, value = m.group(1), m.group(2)
|
92
|
+
result[key] = value.replace('\"', '"').replace('\\', '\\')
|
93
|
+
|
94
|
+
if self._debug:
|
95
|
+
progress_bar.update(1)
|
96
|
+
progress_bar.set_description("Parsing dictionaries")
|
97
|
+
|
98
|
+
# Парсинг словарей
|
99
|
+
dict_matches = re.finditer(self.Patterns.DICT.value, match)
|
100
|
+
for m in dict_matches:
|
101
|
+
key, value = m.group(1), m.group(2)
|
102
|
+
if not re.search(self.Patterns.STR.value, value):
|
103
|
+
result[key] = await parse_match(value)
|
104
|
+
|
105
|
+
if self._debug:
|
106
|
+
progress_bar.update(1)
|
107
|
+
progress_bar.set_description("Parsing lists")
|
108
|
+
|
109
|
+
# Парсинг списков
|
110
|
+
list_matches = re.finditer(self.Patterns.LIST.value, match)
|
111
|
+
for m in list_matches:
|
112
|
+
key, value = m.group(1), m.group(2)
|
113
|
+
if not re.search(self.Patterns.STR.value, value):
|
114
|
+
result[key] = [await parse_match(item.group(0)) for item in re.finditer(self.Patterns.FIND.value, value)]
|
115
|
+
|
116
|
+
if self._debug:
|
117
|
+
progress_bar.update(1)
|
118
|
+
|
119
|
+
return result
|
120
|
+
|
121
|
+
if match_list and len(match_list) >= 1:
|
122
|
+
if self._debug:
|
123
|
+
print("Starting to parse match")
|
124
|
+
result = await parse_match(match_list[1].group(0))
|
125
|
+
if self._debug:
|
126
|
+
progress_bar.close()
|
127
|
+
return result
|
128
|
+
else:
|
129
|
+
if self._debug:
|
130
|
+
progress_bar.close()
|
131
|
+
raise Exception("N variable in JS code not found")
|
132
|
+
|
133
|
+
async def download_config(self, config_url: str) -> dict | None:
|
134
|
+
"""
|
135
|
+
Загружает и парсит JavaScript-конфигурацию с указанного URL.
|
136
|
+
|
137
|
+
:param config_url: URL для загрузки конфигурации.
|
138
|
+
:return: Распарсенные данные в виде словаря или None.
|
139
|
+
"""
|
140
|
+
is_success, js_code, _response_type = await self.fetch(url=config_url)
|
141
|
+
|
142
|
+
if not is_success:
|
143
|
+
if self._debug:
|
144
|
+
print("Failed to fetch JS code")
|
145
|
+
return None
|
146
|
+
elif self._debug:
|
147
|
+
print("JS code fetched successfully")
|
148
|
+
|
149
|
+
return await self._parse_js(js_code=js_code)
|
150
|
+
|
151
|
+
|
152
|
+
async def _browser_fetch(self, url: str, selector: str, state: str = 'attached') -> dict:
|
153
|
+
if self._browser is None or self._bcontext is None:
|
154
|
+
await self._new_session(include_aiohttp=False, include_browser=True)
|
155
|
+
|
156
|
+
page = await self._bcontext.new_page()
|
157
|
+
await page.goto(url, wait_until='commit')
|
158
|
+
# Wait until the selector script tag appears
|
159
|
+
await page.wait_for_selector(selector=selector, state=state)
|
160
|
+
content = await page.content()
|
161
|
+
await page.close()
|
162
|
+
|
163
|
+
if self._autoclose_browser:
|
164
|
+
await self.close(include_aiohttp=False, include_browser=True)
|
165
|
+
return content
|
166
|
+
|
167
|
+
def _parse_proxy(self, proxy_str: str | None) -> dict | None:
|
168
|
+
if not proxy_str:
|
169
|
+
return None
|
170
|
+
|
171
|
+
# Example: user:pass@host:port or just host:port
|
172
|
+
match = re.match(
|
173
|
+
r'^(?:(?P<scheme>https?:\/\/))?(?:(?P<username>[^:@]+):(?P<password>[^@]+)@)?(?P<host>[^:]+):(?P<port>\d+)$',
|
174
|
+
proxy_str,
|
175
|
+
)
|
176
|
+
|
177
|
+
proxy_dict = {}
|
178
|
+
if not match:
|
179
|
+
proxy_dict['server'] = proxy_str
|
180
|
+
|
181
|
+
if not proxy_str.startswith('http://') and not proxy_str.startswith('https://'):
|
182
|
+
proxy_dict['server'] = f"http://{proxy_str}"
|
183
|
+
|
184
|
+
return proxy_dict
|
185
|
+
else:
|
186
|
+
match_dict = match.groupdict()
|
187
|
+
proxy_dict['server'] = f"{match_dict['scheme'] or 'http://'}{match_dict['host']}:{match_dict['port']}"
|
188
|
+
|
189
|
+
for key in ['username', 'password']:
|
190
|
+
if match_dict[key]:
|
191
|
+
proxy_dict[key] = match_dict[key]
|
192
|
+
|
193
|
+
return proxy_dict
|
194
|
+
|
195
|
+
async def _new_session(self, include_aiohttp: bool = True, include_browser: bool = False) -> None:
|
196
|
+
await self.close(include_aiohttp=include_aiohttp, include_browser=include_browser)
|
197
|
+
|
198
|
+
if include_aiohttp:
|
199
|
+
args = {"headers": {"User-Agent": UserAgent().random}}
|
200
|
+
if self._proxy: args["proxy"] = self._proxy
|
201
|
+
self._session = aiohttp.ClientSession(**args)
|
202
|
+
|
203
|
+
if self._debug: print(f"A new connection aiohttp has been opened. Proxy used: {args.get('proxy')}")
|
204
|
+
|
205
|
+
if include_browser:
|
206
|
+
self._browser = await AsyncCamoufox(headless=not self._debug, proxy=self._parse_proxy(self.proxy), geoip=True).__aenter__()
|
207
|
+
self._bcontext = await self._browser.new_context()
|
208
|
+
|
209
|
+
if self._debug: print(f"A new connection browser has been opened. Proxy used: {self.proxy}")
|
210
|
+
|
211
|
+
async def close(
|
212
|
+
self,
|
213
|
+
include_aiohttp: bool = True,
|
214
|
+
include_browser: bool = False
|
215
|
+
) -> None:
|
216
|
+
"""
|
217
|
+
Close the aiohttp session and/or Camoufox browser if they are open.
|
218
|
+
:param include_aiohttp: close aiohttp session if True
|
219
|
+
:param include_browser: close browser if True
|
220
|
+
"""
|
221
|
+
to_close = []
|
222
|
+
if include_aiohttp:
|
223
|
+
to_close.append("session")
|
224
|
+
if include_browser:
|
225
|
+
to_close.append("bcontext")
|
226
|
+
to_close.append("browser")
|
227
|
+
|
228
|
+
if not to_close:
|
229
|
+
raise ValueError("No connections to close")
|
230
|
+
|
231
|
+
checks = {
|
232
|
+
"session": lambda a: a is not None and not a.closed,
|
233
|
+
"browser": lambda a: a is not None,
|
234
|
+
"bcontext": lambda a: a is not None
|
235
|
+
}
|
236
|
+
|
237
|
+
for name in to_close:
|
238
|
+
attr = getattr(self, f"_{name}", None)
|
239
|
+
if checks[name](attr):
|
240
|
+
if "browser" in name:
|
241
|
+
await attr.__aexit__(None, None, None)
|
242
|
+
else:
|
243
|
+
await attr.close()
|
244
|
+
setattr(self, f"_{name}", None)
|
245
|
+
if self._debug:
|
246
|
+
print(f"The {name} connection was closed")
|
247
|
+
else:
|
248
|
+
if self._debug:
|
249
|
+
print(f"The {name} connection was not open")
|
250
|
+
|
251
|
+
|