mangagraph 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mangagraph-0.0.1/PKG-INFO +15 -0
- mangagraph-0.0.1/mangagraph/__init__.py +15 -0
- mangagraph-0.0.1/mangagraph/cli.py +41 -0
- mangagraph-0.0.1/mangagraph/exceptions.py +14 -0
- mangagraph-0.0.1/mangagraph/models.py +20 -0
- mangagraph-0.0.1/mangagraph/parser.py +358 -0
- mangagraph-0.0.1/mangagraph/utils.py +50 -0
- mangagraph-0.0.1/mangagraph.egg-info/PKG-INFO +15 -0
- mangagraph-0.0.1/mangagraph.egg-info/SOURCES.txt +15 -0
- mangagraph-0.0.1/mangagraph.egg-info/dependency_links.txt +1 -0
- mangagraph-0.0.1/mangagraph.egg-info/entry_points.txt +2 -0
- mangagraph-0.0.1/mangagraph.egg-info/requires.txt +4 -0
- mangagraph-0.0.1/mangagraph.egg-info/top_level.txt +2 -0
- mangagraph-0.0.1/pyproject.toml +6 -0
- mangagraph-0.0.1/setup.cfg +4 -0
- mangagraph-0.0.1/setup.py +40 -0
- mangagraph-0.0.1/tests/__init__.py +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: mangagraph
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Async manga parser-converter from mangalib to telegraph pages
|
|
5
|
+
Home-page: https://github.com/damirTAG/mangagraph
|
|
6
|
+
Author: damirTAG
|
|
7
|
+
Author-email: damirtagilbayev17@gmail.com
|
|
8
|
+
Keywords: mangalib,mangalib-parser,manga,telegraph
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Requires-Dist: aiohttp
|
|
13
|
+
Requires-Dist: sqlalchemy
|
|
14
|
+
Requires-Dist: telegraph
|
|
15
|
+
Requires-Dist: asyncio
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .parser import Mangagraph
|
|
2
|
+
from .models import Chapter
|
|
3
|
+
from .exceptions import (
|
|
4
|
+
MangagraphError,
|
|
5
|
+
InvalidURLException,
|
|
6
|
+
RequestFailedException
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
'Mangagraph',
|
|
11
|
+
'Chapter',
|
|
12
|
+
'MangagraphError',
|
|
13
|
+
'InvalidURLException',
|
|
14
|
+
'RequestFailedException'
|
|
15
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from .parser import Mangagraph
|
|
6
|
+
from .exceptions import MangagraphError
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
parser = argparse.ArgumentParser(description="Mangagraph")
|
|
10
|
+
parser.add_argument('url', type=str, help='URL of the manga to process')
|
|
11
|
+
parser.add_argument('--db', type=str, default='manga.db', help='Database file name')
|
|
12
|
+
parser.add_argument('--mirror', action='store_true', help='Use graph.org as mirror')
|
|
13
|
+
# parser.add_argument('--log', type=str, default='manga_parser.log', help='Log file name')
|
|
14
|
+
|
|
15
|
+
args = parser.parse_args()
|
|
16
|
+
|
|
17
|
+
logging.basicConfig(
|
|
18
|
+
level=logging.INFO,
|
|
19
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
20
|
+
handlers=[
|
|
21
|
+
logging.StreamHandler()
|
|
22
|
+
]
|
|
23
|
+
)
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
parser_instance = Mangagraph(
|
|
28
|
+
db_path=args.db,
|
|
29
|
+
use_mirror=args.mirror
|
|
30
|
+
)
|
|
31
|
+
toc_url, mirror_toc_url = asyncio.run(parser_instance.process_manga(args.url))
|
|
32
|
+
logger.info(f"База данных создана!")
|
|
33
|
+
logger.info(f"Оглавление: {toc_url}")
|
|
34
|
+
logger.info(f"Зеркало оглавления: {mirror_toc_url}")
|
|
35
|
+
except MangagraphError as e:
|
|
36
|
+
logger.error(f"Parser error: {e}")
|
|
37
|
+
except Exception as e:
|
|
38
|
+
logger.error(f"Unexpected error: {e}")
|
|
39
|
+
|
|
40
|
+
if __name__ == "__main__":
|
|
41
|
+
main()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
class MangagraphError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
class InvalidURLException(MangagraphError):
|
|
5
|
+
def __init__(self, url, message="Invalid URL provided."):
|
|
6
|
+
self.url = url
|
|
7
|
+
self.message = message
|
|
8
|
+
super().__init__(f"{self.message} URL: {self.url}")
|
|
9
|
+
|
|
10
|
+
class RequestFailedException(MangagraphError):
|
|
11
|
+
def __init__(self, url, message="Request to the URL failed."):
|
|
12
|
+
self.url = url
|
|
13
|
+
self.message = message
|
|
14
|
+
super().__init__(f"{self.message} URL: {self.url}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from sqlalchemy import Column, Integer, String, Text
|
|
2
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
Base = declarative_base()
|
|
7
|
+
|
|
8
|
+
class Chapter(Base):
|
|
9
|
+
__tablename__ = 'chapters'
|
|
10
|
+
|
|
11
|
+
id = Column(Integer, primary_key=True)
|
|
12
|
+
volume = Column(Integer)
|
|
13
|
+
chapter = Column(Integer)
|
|
14
|
+
title = Column(String)
|
|
15
|
+
url = Column(Text)
|
|
16
|
+
mirror_url = Column(Text) # Alternative URL if telegra.ph is not accessible
|
|
17
|
+
created_at = Column(String, default=lambda: datetime.now().isoformat())
|
|
18
|
+
|
|
19
|
+
def __repr__(self):
|
|
20
|
+
return f"<Chapter(volume={self.volume}, chapter={self.chapter}, title={self.title})>"
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: https://github.com/damirtag | https://t.me/damirtag
|
|
3
|
+
GH Repo: https://github.com/damirtag/mangagraph
|
|
4
|
+
|
|
5
|
+
MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2025 Tagilbayev Damir
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import aiohttp
|
|
30
|
+
import logging
|
|
31
|
+
|
|
32
|
+
from typing import List, Dict, Any, Tuple
|
|
33
|
+
|
|
34
|
+
from sqlalchemy import create_engine
|
|
35
|
+
from sqlalchemy.orm import sessionmaker
|
|
36
|
+
|
|
37
|
+
from .models import Base, Chapter
|
|
38
|
+
from .exceptions import (
|
|
39
|
+
MangagraphError,
|
|
40
|
+
RequestFailedException
|
|
41
|
+
)
|
|
42
|
+
from .utils import (
|
|
43
|
+
MangaLibUrl,
|
|
44
|
+
estimate_remaining_time,
|
|
45
|
+
extract_slug
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
from telegraph.aio import Telegraph
|
|
49
|
+
|
|
50
|
+
class Mangagraph:
|
|
51
|
+
"""
|
|
52
|
+
Автор: https://github.com/damirtag
|
|
53
|
+
|
|
54
|
+
Параметры:
|
|
55
|
+
db_name (str): Имя базы данных в которой будет хранится Том, глава, название главы, ссылка на телеграф, зеркало, дата создания
|
|
56
|
+
use_mirror (bool): Использовать зеркало как base url для telegraph, по дефолту False
|
|
57
|
+
"""
|
|
58
|
+
MAX_CONCURRENT = 3
|
|
59
|
+
# В 1 мин обрабатывается 12 глав
|
|
60
|
+
# что = 12 страницам телеграф в секунду
|
|
61
|
+
# При учете того что запросы делаются каждые 5 сек
|
|
62
|
+
CHAPTERS_PER_MINUTE = 12
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
db_name: str = 'manga.db',
|
|
68
|
+
use_mirror: bool = False
|
|
69
|
+
):
|
|
70
|
+
self.db_name = db_name if db_name.endswith('.db') else db_name + '.db'
|
|
71
|
+
self.logger = self._setup_logger()
|
|
72
|
+
self.engine = create_engine(f'sqlite:///{self.db_name}')
|
|
73
|
+
Base.metadata.create_all(self.engine)
|
|
74
|
+
self.Session = sessionmaker(bind=self.engine)
|
|
75
|
+
|
|
76
|
+
self.domain = 'telegra.ph' if not use_mirror else 'graph.org'
|
|
77
|
+
self.telegraph = Telegraph(domain=self.domain)
|
|
78
|
+
|
|
79
|
+
self.base_img_url = "https://img33.imgslib.link"
|
|
80
|
+
self.semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
|
|
81
|
+
|
|
82
|
+
self.processed_count = 0
|
|
83
|
+
self.total_chapters = 0
|
|
84
|
+
self.flood_wait_count = 0
|
|
85
|
+
|
|
86
|
+
self.headers = {
|
|
87
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
def _setup_logger(self):
|
|
91
|
+
logger = logging.getLogger('mangagraph')
|
|
92
|
+
handler = logging.StreamHandler()
|
|
93
|
+
formatter = logging.Formatter('[mangagraph]: %(levelname)s - %(message)s')
|
|
94
|
+
handler.setFormatter(formatter)
|
|
95
|
+
if not logger.handlers:
|
|
96
|
+
logger.addHandler(handler)
|
|
97
|
+
logger.setLevel(logging.INFO)
|
|
98
|
+
return logger
|
|
99
|
+
|
|
100
|
+
async def _make_request(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict:
|
|
101
|
+
async with self.semaphore:
|
|
102
|
+
for attempt in range(3):
|
|
103
|
+
try:
|
|
104
|
+
async with session.get(url, params=params, headers=self.headers) as response:
|
|
105
|
+
response.raise_for_status()
|
|
106
|
+
return await response.json()
|
|
107
|
+
except Exception as e:
|
|
108
|
+
if attempt == 2:
|
|
109
|
+
raise RequestFailedException(url, str(e))
|
|
110
|
+
await asyncio.sleep(2 ** attempt)
|
|
111
|
+
|
|
112
|
+
async def _get_manga_name(self, session: aiohttp.ClientSession, slug: str) -> str:
|
|
113
|
+
url = f"https://api2.mangalib.me/api/manga/{slug}"
|
|
114
|
+
data = await self._make_request(session, url)
|
|
115
|
+
rus_name = data['data']['rus_name']
|
|
116
|
+
if rus_name:
|
|
117
|
+
return rus_name
|
|
118
|
+
return data['data']['name']
|
|
119
|
+
|
|
120
|
+
async def get_chapters_info(self, session: aiohttp.ClientSession, slug: str) -> List[Dict[str, Any]]:
|
|
121
|
+
url = f"https://api2.mangalib.me/api/manga/{slug}/chapters"
|
|
122
|
+
data = await self._make_request(session, url)
|
|
123
|
+
return data['data']
|
|
124
|
+
|
|
125
|
+
async def get_chapter_pages(
|
|
126
|
+
self,
|
|
127
|
+
session: aiohttp.ClientSession,
|
|
128
|
+
slug: str,
|
|
129
|
+
volume: int,
|
|
130
|
+
chapter: int
|
|
131
|
+
) -> List[str]:
|
|
132
|
+
url = f"https://api2.mangalib.me/api/manga/{slug}/chapter"
|
|
133
|
+
params = {'number': chapter, 'volume': volume}
|
|
134
|
+
data = await self._make_request(session, url, params)
|
|
135
|
+
return [f"{self.base_img_url}{page['url']}" for page in data['data']['pages']]
|
|
136
|
+
|
|
137
|
+
async def _create_telegraph_page(
|
|
138
|
+
self,
|
|
139
|
+
title: str,
|
|
140
|
+
image_urls: List[str],
|
|
141
|
+
retry_count: int = 3
|
|
142
|
+
) -> Tuple[str, str]:
|
|
143
|
+
html_content = "".join(f'<img src="{url}"/>\n' for url in image_urls)
|
|
144
|
+
|
|
145
|
+
for attempt in range(retry_count):
|
|
146
|
+
try:
|
|
147
|
+
response = await self.telegraph.create_page(
|
|
148
|
+
title=title,
|
|
149
|
+
html_content=html_content,
|
|
150
|
+
author_name='Auto-Generated by MGLParser',
|
|
151
|
+
author_url='https://t.me/damirtag'
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return (
|
|
155
|
+
f"https://telegra.ph/{response['path']}",
|
|
156
|
+
f"https://graph.org/{response['path']}"
|
|
157
|
+
)
|
|
158
|
+
except Exception as e:
|
|
159
|
+
if "FLOOD_WAIT" in str(e):
|
|
160
|
+
self.flood_wait_count += 1
|
|
161
|
+
wait_time = 7
|
|
162
|
+
if "FLOOD_WAIT_" in str(e):
|
|
163
|
+
try:
|
|
164
|
+
wait_time = int(str(e).split("FLOOD_WAIT_")[1])
|
|
165
|
+
except:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
self.logger.warning(
|
|
169
|
+
f'Flood wait #{self.flood_wait_count} detected, '
|
|
170
|
+
f'waiting {wait_time} seconds. '
|
|
171
|
+
f'Total floods: {self.flood_wait_count}'
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
await asyncio.sleep(wait_time)
|
|
175
|
+
await self.telegraph.create_account(
|
|
176
|
+
short_name='Damir',
|
|
177
|
+
author_name='Auto-Generated by MGLParser',
|
|
178
|
+
author_url='https://t.me/damirtag'
|
|
179
|
+
)
|
|
180
|
+
continue
|
|
181
|
+
else:
|
|
182
|
+
raise MangagraphError(f"Не удалось создать telegraph страницу: {str(e)}")
|
|
183
|
+
|
|
184
|
+
async def _construct_chapters_list(
|
|
185
|
+
self,
|
|
186
|
+
title: str,
|
|
187
|
+
chapters: List[Tuple[int, int, str, str, str]]
|
|
188
|
+
) -> str:
|
|
189
|
+
"""Creates a table of contents page in Telegraph."""
|
|
190
|
+
content = [
|
|
191
|
+
{
|
|
192
|
+
"tag": "p",
|
|
193
|
+
"children": ["Создано Mangagraph, developer - ", {
|
|
194
|
+
"tag": "a",
|
|
195
|
+
"attrs": {"href": "https://t.me/damirtag"},
|
|
196
|
+
"children": ["@damirtag"]}
|
|
197
|
+
]
|
|
198
|
+
}
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
for volume, chapter_num, chapter_title, url, mirror_url in chapters:
|
|
202
|
+
content.append({
|
|
203
|
+
"tag": "p",
|
|
204
|
+
"children": [
|
|
205
|
+
{
|
|
206
|
+
"tag": "a",
|
|
207
|
+
"attrs": {"href": mirror_url},
|
|
208
|
+
"children": [f"Volume {volume} Chapter {chapter_num}: {chapter_title}"]
|
|
209
|
+
}
|
|
210
|
+
]
|
|
211
|
+
})
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
response = await self.telegraph.create_page(
|
|
215
|
+
title=title,
|
|
216
|
+
author_name='Damir',
|
|
217
|
+
author_url='https://github.com/damirtag/mangagraph',
|
|
218
|
+
content=content
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return response['url']
|
|
222
|
+
except Exception as e:
|
|
223
|
+
raise MangagraphError(f'Telegraph says: {str(e)}')
|
|
224
|
+
|
|
225
|
+
async def process_manga(self, manga_url: MangaLibUrl):
|
|
226
|
+
"""
|
|
227
|
+
Параметры:
|
|
228
|
+
manga_url (str): URL манги, которую нужно обработать.
|
|
229
|
+
|
|
230
|
+
Возвращает:
|
|
231
|
+
|
|
232
|
+
Кортеж из двух строк:
|
|
233
|
+
|
|
234
|
+
- URL оглавления в Telegraph.
|
|
235
|
+
|
|
236
|
+
- URL зеркала оглавления в graph.org (если используется).
|
|
237
|
+
|
|
238
|
+
Исключения:
|
|
239
|
+
MangagraphError: Выбрасывается, если возникает ошибка при обработке манги.
|
|
240
|
+
InvalidURLException: Выбрасывается, если URL манги недействителен.
|
|
241
|
+
RequestFailedException: Выбрасывается, если запрос к API завершается неудачей.
|
|
242
|
+
"""
|
|
243
|
+
db_session = self.Session()
|
|
244
|
+
await self.telegraph.create_account(
|
|
245
|
+
short_name='Damir',
|
|
246
|
+
author_name='Создано mangagraph by @damirtag',
|
|
247
|
+
author_url='https://github.com/damirtag/mangagraph'
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
slug = extract_slug(manga_url)
|
|
251
|
+
|
|
252
|
+
async with aiohttp.ClientSession() as session:
|
|
253
|
+
try:
|
|
254
|
+
manga_name = await self._get_manga_name(session, slug)
|
|
255
|
+
chapters = await self.get_chapters_info(session, slug)
|
|
256
|
+
|
|
257
|
+
self.total_chapters = len(chapters)
|
|
258
|
+
self.processed_count = 0
|
|
259
|
+
processed_chapters = []
|
|
260
|
+
|
|
261
|
+
first_chapter = chapters[0]
|
|
262
|
+
pages = await self.get_chapter_pages(
|
|
263
|
+
session,
|
|
264
|
+
slug,
|
|
265
|
+
first_chapter.get('volume'),
|
|
266
|
+
first_chapter.get('number')
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
for chapter_info in chapters:
|
|
270
|
+
volume = chapter_info.get('volume')
|
|
271
|
+
chapter_num = chapter_info.get('number')
|
|
272
|
+
title = (
|
|
273
|
+
f"{manga_name} | {chapter_info.get('name')}"
|
|
274
|
+
or
|
|
275
|
+
f"{manga_name} | Том {volume}, Глава {chapter_num}"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
existing_chapter = db_session.query(Chapter).filter_by(
|
|
279
|
+
volume=volume,
|
|
280
|
+
chapter=chapter_num
|
|
281
|
+
).first()
|
|
282
|
+
|
|
283
|
+
if existing_chapter:
|
|
284
|
+
self.processed_count += 1
|
|
285
|
+
processed_chapters.append(
|
|
286
|
+
(volume, chapter_num, title, existing_chapter.url, existing_chapter.mirror_url)
|
|
287
|
+
)
|
|
288
|
+
remaining = self.total_chapters - self.processed_count
|
|
289
|
+
est_time = estimate_remaining_time(remaining)
|
|
290
|
+
self.logger.info(
|
|
291
|
+
f"Глава {chapter_num} уже существует, пропускаем... "
|
|
292
|
+
f"[{self.processed_count}/{self.total_chapters}] "
|
|
293
|
+
f"Примерное время: {est_time}"
|
|
294
|
+
)
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
pages = await self.get_chapter_pages(
|
|
299
|
+
session,
|
|
300
|
+
slug,
|
|
301
|
+
volume,
|
|
302
|
+
chapter_num
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
url, mirror_url = await self._create_telegraph_page(
|
|
306
|
+
title=title,
|
|
307
|
+
image_urls=pages
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
new_chapter = Chapter(
|
|
311
|
+
volume=volume,
|
|
312
|
+
chapter=chapter_num,
|
|
313
|
+
title=title,
|
|
314
|
+
url=url,
|
|
315
|
+
mirror_url=mirror_url
|
|
316
|
+
)
|
|
317
|
+
db_session.add(new_chapter)
|
|
318
|
+
db_session.commit()
|
|
319
|
+
|
|
320
|
+
self.processed_count += 1
|
|
321
|
+
processed_chapters.append((volume, chapter_num, title, url, mirror_url))
|
|
322
|
+
|
|
323
|
+
remaining = self.total_chapters - self.processed_count
|
|
324
|
+
est_time = estimate_remaining_time(remaining)
|
|
325
|
+
self.logger.info(
|
|
326
|
+
f"Processed chapter: {title} "
|
|
327
|
+
f"[{self.processed_count}/{self.total_chapters}] "
|
|
328
|
+
f"Remaining time: {est_time}"
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
await asyncio.sleep(5)
|
|
332
|
+
|
|
333
|
+
except Exception as e:
|
|
334
|
+
self.logger.error(
|
|
335
|
+
f"Ошибка обработки главы {chapter_num}: {str(e)}\n"
|
|
336
|
+
f"Всего обработано: {self.processed_count}/{self.total_chapters}"
|
|
337
|
+
)
|
|
338
|
+
db_session.rollback()
|
|
339
|
+
break
|
|
340
|
+
|
|
341
|
+
if processed_chapters:
|
|
342
|
+
toc_url = await self._construct_chapters_list(
|
|
343
|
+
manga_name,
|
|
344
|
+
processed_chapters
|
|
345
|
+
)
|
|
346
|
+
mirror_toc_url = toc_url.replace("telegra.ph", "graph.org")
|
|
347
|
+
|
|
348
|
+
self.logger.info(f"Создано оглавление: {toc_url}")
|
|
349
|
+
self.logger.info(f"Зеркало: {mirror_toc_url}")
|
|
350
|
+
self.logger.info(f"Всего обработано: {self.processed_count}/{self.total_chapters}")
|
|
351
|
+
|
|
352
|
+
return toc_url, mirror_toc_url
|
|
353
|
+
|
|
354
|
+
except (KeyboardInterrupt, SystemExit):
|
|
355
|
+
self.logger.info('Sayonara!')
|
|
356
|
+
|
|
357
|
+
finally:
|
|
358
|
+
db_session.close()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
|
|
2
|
+
import re
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from .exceptions import InvalidURLException
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MangaLibUrl:
|
|
8
|
+
MANGALIB_URL_PATTERNS = [
|
|
9
|
+
r"^https://mangalib\.me/ru/manga/[\w\-]+(\?.+)?$",
|
|
10
|
+
r"^https://mangalib\.me/ru/\d+--[\w\-]+/read/v\d+/c\d+(\?.+)?$"
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
def __init__(self, url: str):
|
|
14
|
+
self.url = url
|
|
15
|
+
self._validate_url()
|
|
16
|
+
|
|
17
|
+
def _validate_url(self):
|
|
18
|
+
parsed_url = urlparse(self.url)
|
|
19
|
+
if not (parsed_url.scheme == "https" and parsed_url.netloc == "mangalib.me"):
|
|
20
|
+
raise ValueError("Неверный адресс. Только 'https://mangalib.me' доступен.")
|
|
21
|
+
|
|
22
|
+
if not any(re.compile(pattern).match(self.url) for pattern in self.MANGALIB_URL_PATTERNS):
|
|
23
|
+
raise ValueError(
|
|
24
|
+
"Неверный URL. Ссылка должна быть одного из типов:\n"
|
|
25
|
+
"- 'https://mangalib.me/ru/manga/{slug_url}'\n"
|
|
26
|
+
"- 'https://mangalib.me/ru/{slug_url}/read/v{volume}/c{chapter}'"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def __str__(self):
|
|
30
|
+
return self.url
|
|
31
|
+
|
|
32
|
+
def estimate_remaining_time(remaining_chapters: int, chapters_per_minute: int = 12) -> str:
|
|
33
|
+
estimated_minutes = (remaining_chapters / chapters_per_minute) * 1.1
|
|
34
|
+
|
|
35
|
+
if estimated_minutes < 1:
|
|
36
|
+
return "меньше минуты"
|
|
37
|
+
|
|
38
|
+
hours = int(estimated_minutes // 60)
|
|
39
|
+
minutes = int(estimated_minutes % 60)
|
|
40
|
+
|
|
41
|
+
if hours > 0:
|
|
42
|
+
return f"{hours} ч {minutes} мин"
|
|
43
|
+
return f"{minutes} мин"
|
|
44
|
+
|
|
45
|
+
def extract_slug(url: str) -> str:
|
|
46
|
+
pattern = r'/manga/(\d+--[^?]+)'
|
|
47
|
+
match = re.search(pattern, url)
|
|
48
|
+
if match:
|
|
49
|
+
return match.group(1)
|
|
50
|
+
raise InvalidURLException(url, "Invalid MangaLib URL format.")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: mangagraph
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Async manga parser-converter from mangalib to telegraph pages
|
|
5
|
+
Home-page: https://github.com/damirTAG/mangagraph
|
|
6
|
+
Author: damirTAG
|
|
7
|
+
Author-email: damirtagilbayev17@gmail.com
|
|
8
|
+
Keywords: mangalib,mangalib-parser,manga,telegraph
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Requires-Dist: aiohttp
|
|
13
|
+
Requires-Dist: sqlalchemy
|
|
14
|
+
Requires-Dist: telegraph
|
|
15
|
+
Requires-Dist: asyncio
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
setup.py
|
|
3
|
+
mangagraph/__init__.py
|
|
4
|
+
mangagraph/cli.py
|
|
5
|
+
mangagraph/exceptions.py
|
|
6
|
+
mangagraph/models.py
|
|
7
|
+
mangagraph/parser.py
|
|
8
|
+
mangagraph/utils.py
|
|
9
|
+
mangagraph.egg-info/PKG-INFO
|
|
10
|
+
mangagraph.egg-info/SOURCES.txt
|
|
11
|
+
mangagraph.egg-info/dependency_links.txt
|
|
12
|
+
mangagraph.egg-info/entry_points.txt
|
|
13
|
+
mangagraph.egg-info/requires.txt
|
|
14
|
+
mangagraph.egg-info/top_level.txt
|
|
15
|
+
tests/__init__.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
import pathlib
|
|
4
|
+
|
|
5
|
+
LIB_NAME = 'mangagraph'
|
|
6
|
+
|
|
7
|
+
__version__ = '0.0.1'
|
|
8
|
+
|
|
9
|
+
setup(
|
|
10
|
+
name='mangagraph',
|
|
11
|
+
version=__version__,
|
|
12
|
+
description='Async manga parser-converter from mangalib to telegraph pages',
|
|
13
|
+
url='https://github.com/damirTAG/mangagraph',
|
|
14
|
+
author='damirTAG',
|
|
15
|
+
author_email='damirtagilbayev17@gmail.com',
|
|
16
|
+
packages=find_packages(),
|
|
17
|
+
install_requires=[
|
|
18
|
+
'aiohttp',
|
|
19
|
+
'sqlalchemy',
|
|
20
|
+
'telegraph',
|
|
21
|
+
'asyncio'
|
|
22
|
+
],
|
|
23
|
+
entry_points={
|
|
24
|
+
'console_scripts': [
|
|
25
|
+
'mangagraph= mangagraph.cli:main'
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
classifiers=[
|
|
29
|
+
'Programming Language :: Python :: 3',
|
|
30
|
+
'Operating System :: OS Independent',
|
|
31
|
+
],
|
|
32
|
+
keywords=[
|
|
33
|
+
'mangalib',
|
|
34
|
+
'mangalib-parser',
|
|
35
|
+
'manga',
|
|
36
|
+
'telegraph'
|
|
37
|
+
],
|
|
38
|
+
python_requires='>=3.7',
|
|
39
|
+
include_package_data=False
|
|
40
|
+
)
|
|
File without changes
|