kabigon 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/cli.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import click
2
2
  from rich import print
3
3
 
4
- from .cloudscraper import CloudscraperLoader
5
4
  from .compose import Compose
6
5
  from .httpx import HttpxLoader
7
6
  from .pdf import PDFLoader
@@ -20,7 +19,6 @@ def main(url: str) -> None:
20
19
  ReelLoader(),
21
20
  YtdlpLoader(),
22
21
  PDFLoader(),
23
- CloudscraperLoader(),
24
22
  HttpxLoader(),
25
23
  SinglefileLoader(),
26
24
  ]
kabigon/compose.py CHANGED
@@ -37,14 +37,33 @@ class Compose(Loader):
37
37
 
38
38
  for loader in self.loaders:
39
39
  try:
40
- loaded_content = loader.load(url)
40
+ content = loader.load(url)
41
41
 
42
- if not loaded_content:
42
+ if not content:
43
43
  logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
44
44
  continue
45
45
 
46
46
  logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
47
- return loaded_content
47
+ return content
48
+
49
+ except Exception as e:
50
+ logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
51
+
52
+ raise LoaderError(f"Failed to load URL: {url}")
53
+
54
+ async def async_load(self, url: str) -> str:
55
+ url = replace_domain(url)
56
+
57
+ for loader in self.loaders:
58
+ try:
59
+ content = await loader.async_load(url)
60
+
61
+ if not content:
62
+ logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
63
+ continue
64
+
65
+ logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
66
+ return content
48
67
 
49
68
  except Exception as e:
50
69
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
kabigon/httpx.py CHANGED
@@ -17,3 +17,9 @@ class HttpxLoader(Loader):
17
17
  response = httpx.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
18
18
  response.raise_for_status()
19
19
  return html_to_markdown(response.content)
20
+
21
+ async def async_load(self, url: str) -> str:
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
24
+ response.raise_for_status()
25
+ return html_to_markdown(response.content)
kabigon/loader.py CHANGED
@@ -1,3 +1,7 @@
1
+ import asyncio
2
+ import concurrent.futures
3
+
4
+
1
5
  class Loader:
2
6
  def __call__(self, url: str) -> str:
3
7
  return self.load(url)
@@ -5,6 +9,12 @@ class Loader:
5
9
  def load(self, url: str) -> str:
6
10
  raise NotImplementedError
7
11
 
12
+ async def async_load(self, url: str):
13
+ loop = asyncio.get_running_loop()
14
+ with concurrent.futures.ProcessPoolExecutor() as executor:
15
+ result = await loop.run_in_executor(executor, self.load, url)
16
+ return result
17
+
8
18
 
9
19
  class LoaderError(Exception):
10
20
  pass
kabigon/playwright.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from typing import Literal
2
2
 
3
- from loguru import logger
4
- from playwright.sync_api import TimeoutError
3
+ from playwright.async_api import async_playwright
5
4
  from playwright.sync_api import sync_playwright
6
5
 
7
6
  from .loader import Loader
@@ -24,13 +23,21 @@ class PlaywrightLoader(Loader):
24
23
  browser = p.chromium.launch(headless=self.browser_headless)
25
24
  page = browser.new_page()
26
25
 
27
- try:
28
- page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
29
- except TimeoutError as e:
30
- logger.error("TimeoutError: {}", e)
31
- page.goto(url)
26
+ page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
32
27
 
33
28
  content = page.content()
34
29
  browser.close()
35
30
 
36
31
  return html_to_markdown(content)
32
+
33
+ async def async_load(self, url: str) -> str:
34
+ async with async_playwright() as p:
35
+ browser = await p.chromium.launch(headless=self.browser_headless)
36
+ page = await browser.new_page()
37
+
38
+ await page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
39
+
40
+ content = await page.content()
41
+ await browser.close()
42
+
43
+ return html_to_markdown(content)
kabigon/reel.py CHANGED
@@ -29,3 +29,12 @@ class ReelLoader(Loader):
29
29
  html_content = self.httpx_loader.load(url)
30
30
 
31
31
  return f"{audio_content}\n\n{html_content}"
32
+
33
+ async def async_load(self, url: str):
34
+ if not is_reel_url(url):
35
+ raise NotReelURLError(url)
36
+
37
+ audio_content = await self.ytdlp_loader.async_load(url)
38
+ html_content = await self.httpx_loader.async_load(url)
39
+
40
+ return f"{audio_content}\n\n{html_content}"
kabigon/youtube.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from urllib.parse import parse_qs
2
2
  from urllib.parse import urlparse
3
3
 
4
+ import aioytt
4
5
  import timeout_decorator
5
6
  from youtube_transcript_api import YouTubeTranscriptApi
6
7
 
@@ -90,3 +91,12 @@ class YoutubeLoader(Loader):
90
91
  if text:
91
92
  lines.append(text)
92
93
  return "\n".join(lines)
94
+
95
+ async def async_load(self, url: str) -> str:
96
+ transcript = await aioytt.get_transcript_from_url(url)
97
+ lines = []
98
+ for piece in transcript:
99
+ text = piece.text.strip()
100
+ if text:
101
+ lines += text
102
+ return "\n".join(lines)
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.3.1
3
+ Version: 0.4.1
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
7
+ Requires-Dist: aioytt>=0.2.4
7
8
  Requires-Dist: click>=8.1.8
8
9
  Requires-Dist: cloudscraper>=1.2.71
9
10
  Requires-Dist: httpx>=0.28.1
@@ -19,3 +20,35 @@ Requires-Dist: yt-dlp>=2025.1.26
19
20
  Description-Content-Type: text/markdown
20
21
 
21
22
  # kabigon
23
+
24
+ ## Installation
25
+
26
+ ```shell
27
+ pip install kabigon
28
+ ```
29
+
30
+ ## Usage
31
+
32
+ ```shell
33
+ kabigon <url>
34
+ ```
35
+
36
+ or
37
+
38
+ ```python
39
+ import kabigon
40
+
41
+ url = "https://www.google.com.tw"
42
+
43
+ content = kabigon.Compose(
44
+ [
45
+ kabigon.YoutubeLoader(),
46
+ kabigon.ReelLoader(),
47
+ kabigon.YtdlpLoader(),
48
+ kabigon.PDFLoader(),
49
+ kabigon.HttpxLoader(),
50
+ kabigon.SinglefileLoader(),
51
+ ]
52
+ ).load(url)
53
+ print(content)
54
+ ```
@@ -0,0 +1,19 @@
1
+ kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
+ kabigon/cli.py,sha256=z3u2Msvi1SWf1fd9nCTzJULeO-rRb5oDKJfPxhUeYQ0,611
3
+ kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
4
+ kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
5
+ kabigon/httpx.py,sha256=B8_26rufJMbKSXINBEqyCIpaRueO_3Gk_PtEQmlOxQ4,955
6
+ kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
7
+ kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
8
+ kabigon/playwright.py,sha256=ciNUlpMbwd47utCLT454wFSirXFmt3eCXN2Q-nAsiu8,1356
9
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
+ kabigon/reel.py,sha256=1JTcn7qVH7FcD0Oj-Rz-pnjI-xS1UtkoJcuClGb8ExQ,1124
11
+ kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
12
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
+ kabigon/youtube.py,sha256=M1v7t4VS72ItqaNJTwlv59bafqORqldGjNzptZOeybA,2915
14
+ kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
+ kabigon-0.4.1.dist-info/METADATA,sha256=IfnrNBC17ac0E4aI9Y-VOWhaiOnMN_RZxhoS-_EvhzA,1049
16
+ kabigon-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ kabigon-0.4.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
+ kabigon-0.4.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
+ kabigon-0.4.1.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
- kabigon/cli.py,sha256=7qHklIhYUZ4E78C9MIB16AoVIYt2xLJS0Pz8sr51YPk,690
3
- kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
4
- kabigon/compose.py,sha256=IqNm-Cxl6e8u7X9v5SoG7cjxOWCMIcSZG1lDVNNAfo8,1433
5
- kabigon/httpx.py,sha256=ECNYGLOZ1uPz15ziABuziz8mQR3x1nF3cGcEOAsSmjo,668
6
- kabigon/loader.py,sha256=cV9ZqcWaNtS2WTDpgyNYK2kX5Cu1ZC-Sq-qS3PpPnJQ,198
7
- kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
8
- kabigon/playwright.py,sha256=g4cI3sIC_tVsk__x-1m-YA2LQs2_pWX8588rsVAmEOA,1095
9
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
- kabigon/reel.py,sha256=dkWXG2nBhIt0DpGJzevkIrRKLqJh_03-yrg_rjf6vnY,828
11
- kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
12
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
- kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
14
- kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
- kabigon-0.3.1.dist-info/METADATA,sha256=KWad9d5t-7NMX2xXhZWsQHpXlavwvEAkzDxNUO1mk8U,605
16
- kabigon-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- kabigon-0.3.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
- kabigon-0.3.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
- kabigon-0.3.1.dist-info/RECORD,,