mangagraph 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mangagraph/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ from .parser import Mangagraph
2
+ from .models import Chapter
3
+ from .exceptions import (
4
+ MangagraphError,
5
+ InvalidURLException,
6
+ RequestFailedException
7
+ )
8
+
9
+ __all__ = [
10
+ 'Mangagraph',
11
+ 'Chapter',
12
+ 'MangagraphError',
13
+ 'InvalidURLException',
14
+ 'RequestFailedException'
15
+ ]
mangagraph/cli.py ADDED
@@ -0,0 +1,41 @@
1
+ import argparse
2
+ import asyncio
3
+ import logging
4
+
5
+ from .parser import Mangagraph
6
+ from .exceptions import MangagraphError
7
+
8
+ def main():
9
+ parser = argparse.ArgumentParser(description="Mangagraph")
10
+ parser.add_argument('url', type=str, help='URL of the manga to process')
11
+ parser.add_argument('--db', type=str, default='manga.db', help='Database file name')
12
+ parser.add_argument('--mirror', action='store_true', help='Use graph.org as mirror')
13
+ # parser.add_argument('--log', type=str, default='manga_parser.log', help='Log file name')
14
+
15
+ args = parser.parse_args()
16
+
17
+ logging.basicConfig(
18
+ level=logging.INFO,
19
+ format='%(asctime)s - %(levelname)s - %(message)s',
20
+ handlers=[
21
+ logging.StreamHandler()
22
+ ]
23
+ )
24
+ logger = logging.getLogger(__name__)
25
+
26
+ try:
27
+ parser_instance = Mangagraph(
28
+ db_path=args.db,
29
+ use_mirror=args.mirror
30
+ )
31
+ toc_url, mirror_toc_url = asyncio.run(parser_instance.process_manga(args.url))
32
+ logger.info(f"База данных создана!")
33
+ logger.info(f"Оглавление: {toc_url}")
34
+ logger.info(f"Зеркало оглавления: {mirror_toc_url}")
35
+ except MangagraphError as e:
36
+ logger.error(f"Parser error: {e}")
37
+ except Exception as e:
38
+ logger.error(f"Unexpected error: {e}")
39
+
40
+ if __name__ == "__main__":
41
+ main()
@@ -0,0 +1,14 @@
1
+ class MangagraphError(Exception):
2
+ pass
3
+
4
+ class InvalidURLException(MangagraphError):
5
+ def __init__(self, url, message="Invalid URL provided."):
6
+ self.url = url
7
+ self.message = message
8
+ super().__init__(f"{self.message} URL: {self.url}")
9
+
10
+ class RequestFailedException(MangagraphError):
11
+ def __init__(self, url, message="Request to the URL failed."):
12
+ self.url = url
13
+ self.message = message
14
+ super().__init__(f"{self.message} URL: {self.url}")
mangagraph/models.py ADDED
@@ -0,0 +1,20 @@
1
+ from sqlalchemy import Column, Integer, String, Text
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+
4
+ from datetime import datetime
5
+
6
+ Base = declarative_base()
7
+
8
+ class Chapter(Base):
9
+ __tablename__ = 'chapters'
10
+
11
+ id = Column(Integer, primary_key=True)
12
+ volume = Column(Integer)
13
+ chapter = Column(Integer)
14
+ title = Column(String)
15
+ url = Column(Text)
16
+ mirror_url = Column(Text) # Alternative URL if telegra.ph is not accessible
17
+ created_at = Column(String, default=lambda: datetime.now().isoformat())
18
+
19
+ def __repr__(self):
20
+ return f"<Chapter(volume={self.volume}, chapter={self.chapter}, title={self.title})>"
mangagraph/parser.py ADDED
@@ -0,0 +1,358 @@
1
+ """
2
+ Author: https://github.com/damirtag | https://t.me/damirtag
3
+ GH Repo: https://github.com/damirtag/mangagraph
4
+
5
+ MIT License
6
+
7
+ Copyright (c) 2025 Tagilbayev Damir
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+ """
27
+
28
+ import asyncio
29
+ import aiohttp
30
+ import logging
31
+
32
+ from typing import List, Dict, Any, Tuple
33
+
34
+ from sqlalchemy import create_engine
35
+ from sqlalchemy.orm import sessionmaker
36
+
37
+ from .models import Base, Chapter
38
+ from .exceptions import (
39
+ MangagraphError,
40
+ RequestFailedException
41
+ )
42
+ from .utils import (
43
+ MangaLibUrl,
44
+ estimate_remaining_time,
45
+ extract_slug
46
+ )
47
+
48
+ from telegraph.aio import Telegraph
49
+
50
+ class Mangagraph:
51
+ """
52
+ Автор: https://github.com/damirtag
53
+
54
+ Параметры:
55
+ db_name (str): Имя базы данных в которой будет хранится Том, глава, название главы, ссылка на телеграф, зеркало, дата создания
56
+ use_mirror (bool): Использовать зеркало как base url для telegraph, по дефолту False
57
+ """
58
+ MAX_CONCURRENT = 3
59
+ # В 1 мин обрабатывается 12 глав
60
+ # что = 12 страницам телеграф в секунду
61
+ # При учете того что запросы делаются каждые 5 сек
62
+ CHAPTERS_PER_MINUTE = 12
63
+
64
+
65
+ def __init__(
66
+ self,
67
+ db_name: str = 'manga.db',
68
+ use_mirror: bool = False
69
+ ):
70
+ self.db_name = db_name if db_name.endswith('.db') else db_name + '.db'
71
+ self.logger = self._setup_logger()
72
+ self.engine = create_engine(f'sqlite:///{self.db_name}')
73
+ Base.metadata.create_all(self.engine)
74
+ self.Session = sessionmaker(bind=self.engine)
75
+
76
+ self.domain = 'telegra.ph' if not use_mirror else 'graph.org'
77
+ self.telegraph = Telegraph(domain=self.domain)
78
+
79
+ self.base_img_url = "https://img33.imgslib.link"
80
+ self.semaphore = asyncio.Semaphore(self.MAX_CONCURRENT)
81
+
82
+ self.processed_count = 0
83
+ self.total_chapters = 0
84
+ self.flood_wait_count = 0
85
+
86
+ self.headers = {
87
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
88
+ }
89
+
90
+ def _setup_logger(self):
91
+ logger = logging.getLogger('mangagraph')
92
+ handler = logging.StreamHandler()
93
+ formatter = logging.Formatter('[mangagraph]: %(levelname)s - %(message)s')
94
+ handler.setFormatter(formatter)
95
+ if not logger.handlers:
96
+ logger.addHandler(handler)
97
+ logger.setLevel(logging.INFO)
98
+ return logger
99
+
100
+ async def _make_request(self, session: aiohttp.ClientSession, url: str, params: Dict = None) -> Dict:
101
+ async with self.semaphore:
102
+ for attempt in range(3):
103
+ try:
104
+ async with session.get(url, params=params, headers=self.headers) as response:
105
+ response.raise_for_status()
106
+ return await response.json()
107
+ except Exception as e:
108
+ if attempt == 2:
109
+ raise RequestFailedException(url, str(e))
110
+ await asyncio.sleep(2 ** attempt)
111
+
112
+ async def _get_manga_name(self, session: aiohttp.ClientSession, slug: str) -> str:
113
+ url = f"https://api2.mangalib.me/api/manga/{slug}"
114
+ data = await self._make_request(session, url)
115
+ rus_name = data['data']['rus_name']
116
+ if rus_name:
117
+ return rus_name
118
+ return data['data']['name']
119
+
120
+ async def get_chapters_info(self, session: aiohttp.ClientSession, slug: str) -> List[Dict[str, Any]]:
121
+ url = f"https://api2.mangalib.me/api/manga/{slug}/chapters"
122
+ data = await self._make_request(session, url)
123
+ return data['data']
124
+
125
+ async def get_chapter_pages(
126
+ self,
127
+ session: aiohttp.ClientSession,
128
+ slug: str,
129
+ volume: int,
130
+ chapter: int
131
+ ) -> List[str]:
132
+ url = f"https://api2.mangalib.me/api/manga/{slug}/chapter"
133
+ params = {'number': chapter, 'volume': volume}
134
+ data = await self._make_request(session, url, params)
135
+ return [f"{self.base_img_url}{page['url']}" for page in data['data']['pages']]
136
+
137
+ async def _create_telegraph_page(
138
+ self,
139
+ title: str,
140
+ image_urls: List[str],
141
+ retry_count: int = 3
142
+ ) -> Tuple[str, str]:
143
+ html_content = "".join(f'<img src="{url}"/>\n' for url in image_urls)
144
+
145
+ for attempt in range(retry_count):
146
+ try:
147
+ response = await self.telegraph.create_page(
148
+ title=title,
149
+ html_content=html_content,
150
+ author_name='Auto-Generated by MGLParser',
151
+ author_url='https://t.me/damirtag'
152
+ )
153
+
154
+ return (
155
+ f"https://telegra.ph/{response['path']}",
156
+ f"https://graph.org/{response['path']}"
157
+ )
158
+ except Exception as e:
159
+ if "FLOOD_WAIT" in str(e):
160
+ self.flood_wait_count += 1
161
+ wait_time = 7
162
+ if "FLOOD_WAIT_" in str(e):
163
+ try:
164
+ wait_time = int(str(e).split("FLOOD_WAIT_")[1])
165
+ except:
166
+ pass
167
+
168
+ self.logger.warning(
169
+ f'Flood wait #{self.flood_wait_count} detected, '
170
+ f'waiting {wait_time} seconds. '
171
+ f'Total floods: {self.flood_wait_count}'
172
+ )
173
+
174
+ await asyncio.sleep(wait_time)
175
+ await self.telegraph.create_account(
176
+ short_name='Damir',
177
+ author_name='Auto-Generated by MGLParser',
178
+ author_url='https://t.me/damirtag'
179
+ )
180
+ continue
181
+ else:
182
+ raise MangagraphError(f"Не удалось создать telegraph страницу: {str(e)}")
183
+
184
+ async def _construct_chapters_list(
185
+ self,
186
+ title: str,
187
+ chapters: List[Tuple[int, int, str, str, str]]
188
+ ) -> str:
189
+ """Creates a table of contents page in Telegraph."""
190
+ content = [
191
+ {
192
+ "tag": "p",
193
+ "children": ["Создано Mangagraph, developer - ", {
194
+ "tag": "a",
195
+ "attrs": {"href": "https://t.me/damirtag"},
196
+ "children": ["@damirtag"]}
197
+ ]
198
+ }
199
+ ]
200
+
201
+ for volume, chapter_num, chapter_title, url, mirror_url in chapters:
202
+ content.append({
203
+ "tag": "p",
204
+ "children": [
205
+ {
206
+ "tag": "a",
207
+ "attrs": {"href": mirror_url},
208
+ "children": [f"Volume {volume} Chapter {chapter_num}: {chapter_title}"]
209
+ }
210
+ ]
211
+ })
212
+
213
+ try:
214
+ response = await self.telegraph.create_page(
215
+ title=title,
216
+ author_name='Damir',
217
+ author_url='https://github.com/damirtag/mangagraph',
218
+ content=content
219
+ )
220
+
221
+ return response['url']
222
+ except Exception as e:
223
+ raise MangagraphError(f'Telegraph says: {str(e)}')
224
+
225
+ async def process_manga(self, manga_url: MangaLibUrl):
226
+ """
227
+ Параметры:
228
+ manga_url (str): URL манги, которую нужно обработать.
229
+
230
+ Возвращает:
231
+
232
+ Кортеж из двух строк:
233
+
234
+ - URL оглавления в Telegraph.
235
+
236
+ - URL зеркала оглавления в graph.org (если используется).
237
+
238
+ Исключения:
239
+ MangagraphError: Выбрасывается, если возникает ошибка при обработке манги.
240
+ InvalidURLException: Выбрасывается, если URL манги недействителен.
241
+ RequestFailedException: Выбрасывается, если запрос к API завершается неудачей.
242
+ """
243
+ db_session = self.Session()
244
+ await self.telegraph.create_account(
245
+ short_name='Damir',
246
+ author_name='Создано mangagraph by @damirtag',
247
+ author_url='https://github.com/damirtag/mangagraph'
248
+ )
249
+
250
+ slug = extract_slug(manga_url)
251
+
252
+ async with aiohttp.ClientSession() as session:
253
+ try:
254
+ manga_name = await self._get_manga_name(session, slug)
255
+ chapters = await self.get_chapters_info(session, slug)
256
+
257
+ self.total_chapters = len(chapters)
258
+ self.processed_count = 0
259
+ processed_chapters = []
260
+
261
+ first_chapter = chapters[0]
262
+ pages = await self.get_chapter_pages(
263
+ session,
264
+ slug,
265
+ first_chapter.get('volume'),
266
+ first_chapter.get('number')
267
+ )
268
+
269
+ for chapter_info in chapters:
270
+ volume = chapter_info.get('volume')
271
+ chapter_num = chapter_info.get('number')
272
+ title = (
273
+ f"{manga_name} | {chapter_info.get('name')}"
274
+ or
275
+ f"{manga_name} | Том {volume}, Глава {chapter_num}"
276
+ )
277
+
278
+ existing_chapter = db_session.query(Chapter).filter_by(
279
+ volume=volume,
280
+ chapter=chapter_num
281
+ ).first()
282
+
283
+ if existing_chapter:
284
+ self.processed_count += 1
285
+ processed_chapters.append(
286
+ (volume, chapter_num, title, existing_chapter.url, existing_chapter.mirror_url)
287
+ )
288
+ remaining = self.total_chapters - self.processed_count
289
+ est_time = estimate_remaining_time(remaining)
290
+ self.logger.info(
291
+ f"Глава {chapter_num} уже существует, пропускаем... "
292
+ f"[{self.processed_count}/{self.total_chapters}] "
293
+ f"Примерное время: {est_time}"
294
+ )
295
+ continue
296
+
297
+ try:
298
+ pages = await self.get_chapter_pages(
299
+ session,
300
+ slug,
301
+ volume,
302
+ chapter_num
303
+ )
304
+
305
+ url, mirror_url = await self._create_telegraph_page(
306
+ title=title,
307
+ image_urls=pages
308
+ )
309
+
310
+ new_chapter = Chapter(
311
+ volume=volume,
312
+ chapter=chapter_num,
313
+ title=title,
314
+ url=url,
315
+ mirror_url=mirror_url
316
+ )
317
+ db_session.add(new_chapter)
318
+ db_session.commit()
319
+
320
+ self.processed_count += 1
321
+ processed_chapters.append((volume, chapter_num, title, url, mirror_url))
322
+
323
+ remaining = self.total_chapters - self.processed_count
324
+ est_time = estimate_remaining_time(remaining)
325
+ self.logger.info(
326
+ f"Processed chapter: {title} "
327
+ f"[{self.processed_count}/{self.total_chapters}] "
328
+ f"Remaining time: {est_time}"
329
+ )
330
+
331
+ await asyncio.sleep(5)
332
+
333
+ except Exception as e:
334
+ self.logger.error(
335
+ f"Ошибка обработки главы {chapter_num}: {str(e)}\n"
336
+ f"Всего обработано: {self.processed_count}/{self.total_chapters}"
337
+ )
338
+ db_session.rollback()
339
+ break
340
+
341
+ if processed_chapters:
342
+ toc_url = await self._construct_chapters_list(
343
+ manga_name,
344
+ processed_chapters
345
+ )
346
+ mirror_toc_url = toc_url.replace("telegra.ph", "graph.org")
347
+
348
+ self.logger.info(f"Создано оглавление: {toc_url}")
349
+ self.logger.info(f"Зеркало: {mirror_toc_url}")
350
+ self.logger.info(f"Всего обработано: {self.processed_count}/{self.total_chapters}")
351
+
352
+ return toc_url, mirror_toc_url
353
+
354
+ except (KeyboardInterrupt, SystemExit):
355
+ self.logger.info('Sayonara!')
356
+
357
+ finally:
358
+ db_session.close()
mangagraph/utils.py ADDED
@@ -0,0 +1,50 @@
1
+
2
+ import re
3
+ from urllib.parse import urlparse
4
+ from .exceptions import InvalidURLException
5
+
6
+
7
+ class MangaLibUrl:
8
+ MANGALIB_URL_PATTERNS = [
9
+ r"^https://mangalib\.me/ru/manga/[\w\-]+(\?.+)?$",
10
+ r"^https://mangalib\.me/ru/\d+--[\w\-]+/read/v\d+/c\d+(\?.+)?$"
11
+ ]
12
+
13
+ def __init__(self, url: str):
14
+ self.url = url
15
+ self._validate_url()
16
+
17
+ def _validate_url(self):
18
+ parsed_url = urlparse(self.url)
19
+ if not (parsed_url.scheme == "https" and parsed_url.netloc == "mangalib.me"):
20
+ raise ValueError("Неверный адресс. Только 'https://mangalib.me' доступен.")
21
+
22
+ if not any(re.compile(pattern).match(self.url) for pattern in self.MANGALIB_URL_PATTERNS):
23
+ raise ValueError(
24
+ "Неверный URL. Ссылка должна быть одного из типов:\n"
25
+ "- 'https://mangalib.me/ru/manga/{slug_url}'\n"
26
+ "- 'https://mangalib.me/ru/{slug_url}/read/v{volume}/c{chapter}'"
27
+ )
28
+
29
+ def __str__(self):
30
+ return self.url
31
+
32
+ def estimate_remaining_time(remaining_chapters: int, chapters_per_minute: int = 12) -> str:
33
+ estimated_minutes = (remaining_chapters / chapters_per_minute) * 1.1
34
+
35
+ if estimated_minutes < 1:
36
+ return "меньше минуты"
37
+
38
+ hours = int(estimated_minutes // 60)
39
+ minutes = int(estimated_minutes % 60)
40
+
41
+ if hours > 0:
42
+ return f"{hours} ч {minutes} мин"
43
+ return f"{minutes} мин"
44
+
45
+ def extract_slug(url: str) -> str:
46
+ pattern = r'/manga/(\d+--[^?]+)'
47
+ match = re.search(pattern, url)
48
+ if match:
49
+ return match.group(1)
50
+ raise InvalidURLException(url, "Invalid MangaLib URL format.")
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.1
2
+ Name: mangagraph
3
+ Version: 0.0.1
4
+ Summary: Async manga parser-converter from mangalib to telegraph pages
5
+ Home-page: https://github.com/damirTAG/mangagraph
6
+ Author: damirTAG
7
+ Author-email: damirtagilbayev17@gmail.com
8
+ Keywords: mangalib,mangalib-parser,manga,telegraph
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.7
12
+ Requires-Dist: aiohttp
13
+ Requires-Dist: sqlalchemy
14
+ Requires-Dist: telegraph
15
+ Requires-Dist: asyncio
16
+
@@ -0,0 +1,12 @@
1
+ mangagraph/__init__.py,sha256=MNtsNZdl-EL-WBoaTKEVBMUs0GeLwRle7s5nS0nFQPo,300
2
+ mangagraph/cli.py,sha256=GJURu_E7n29wORRuj0puIh8tToBlMuylEC3jX9eabsY,1439
3
+ mangagraph/exceptions.py,sha256=8_G83I2U-VddIo5DAtvvQkYy6ZrK76cWQur41iM8bSs,505
4
+ mangagraph/models.py,sha256=68ZitVtc796M0V3uz_IKRLbQ_iO6_iqzxo6TbJ6YUL4,671
5
+ mangagraph/parser.py,sha256=Iai5e5PaaolVO3gL0W6tEBcB1prhIWAI8GPn2Px9FQw,14838
6
+ mangagraph/utils.py,sha256=77AsE2BO8sDoLvjwKziuOMPSU9jglYbB06MWHVafYUc,1797
7
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ mangagraph-0.0.1.dist-info/METADATA,sha256=gOpgYxAqAOEpjJkMnAOnSPIu-JvUy9NVbZYiCTlVIj0,517
9
+ mangagraph-0.0.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
10
+ mangagraph-0.0.1.dist-info/entry_points.txt,sha256=eQSK112FJYlpZktP6h-vafO2xBV7bA8-4FK8s_CCrwg,51
11
+ mangagraph-0.0.1.dist-info/top_level.txt,sha256=SQBWdE_2ty2EvRzW3XNZ_chfnTWZcicoqq3EKajSfvM,17
12
+ mangagraph-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.45.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mangagraph = mangagraph.cli:main
@@ -0,0 +1,2 @@
1
+ mangagraph
2
+ tests
tests/__init__.py ADDED
File without changes