kabigon 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/cli.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import click
2
2
  from rich import print
3
3
 
4
- from .cloudscraper import CloudscraperLoader
5
4
  from .compose import Compose
6
5
  from .httpx import HttpxLoader
7
6
  from .pdf import PDFLoader
@@ -20,7 +19,6 @@ def main(url: str) -> None:
20
19
  ReelLoader(),
21
20
  YtdlpLoader(),
22
21
  PDFLoader(),
23
- CloudscraperLoader(),
24
22
  HttpxLoader(),
25
23
  SinglefileLoader(),
26
24
  ]
kabigon/compose.py CHANGED
@@ -37,14 +37,33 @@ class Compose(Loader):
37
37
 
38
38
  for loader in self.loaders:
39
39
  try:
40
- loaded_content = loader.load(url)
40
+ content = loader.load(url)
41
41
 
42
- if not loaded_content:
42
+ if not content:
43
43
  logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
44
44
  continue
45
45
 
46
46
  logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
47
- return loaded_content
47
+ return content
48
+
49
+ except Exception as e:
50
+ logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
51
+
52
+ raise LoaderError(f"Failed to load URL: {url}")
53
+
54
+ async def async_load(self, url: str) -> str:
55
+ url = replace_domain(url)
56
+
57
+ for loader in self.loaders:
58
+ try:
59
+ content = await loader.async_load(url)
60
+
61
+ if not content:
62
+ logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
63
+ continue
64
+
65
+ logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
66
+ return content
48
67
 
49
68
  except Exception as e:
50
69
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
kabigon/httpx.py CHANGED
@@ -17,3 +17,9 @@ class HttpxLoader(Loader):
17
17
  response = httpx.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
18
18
  response.raise_for_status()
19
19
  return html_to_markdown(response.content)
20
+
21
+ async def async_load(self, url: str) -> str:
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
24
+ response.raise_for_status()
25
+ return html_to_markdown(response.content)
kabigon/loader.py CHANGED
@@ -1,3 +1,7 @@
1
+ import asyncio
2
+ import concurrent.futures
3
+
4
+
1
5
  class Loader:
2
6
  def __call__(self, url: str) -> str:
3
7
  return self.load(url)
@@ -5,6 +9,12 @@ class Loader:
5
9
  def load(self, url: str) -> str:
6
10
  raise NotImplementedError
7
11
 
12
+ async def async_load(self, url: str):
13
+ loop = asyncio.get_running_loop()
14
+ with concurrent.futures.ProcessPoolExecutor() as executor:
15
+ result = await loop.run_in_executor(executor, self.load, url)
16
+ return result
17
+
8
18
 
9
19
  class LoaderError(Exception):
10
20
  pass
kabigon/playwright.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from typing import Literal
2
2
 
3
- from loguru import logger
4
- from playwright.sync_api import TimeoutError
3
+ from playwright.async_api import async_playwright
5
4
  from playwright.sync_api import sync_playwright
6
5
 
7
6
  from .loader import Loader
@@ -24,13 +23,21 @@ class PlaywrightLoader(Loader):
24
23
  browser = p.chromium.launch(headless=self.browser_headless)
25
24
  page = browser.new_page()
26
25
 
27
- try:
28
- page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
29
- except TimeoutError as e:
30
- logger.error("TimeoutError: {}", e)
31
- page.goto(url)
26
+ page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
32
27
 
33
28
  content = page.content()
34
29
  browser.close()
35
30
 
36
31
  return html_to_markdown(content)
32
+
33
+ async def async_load(self, url: str) -> str:
34
+ async with async_playwright() as p:
35
+ browser = await p.chromium.launch(headless=self.browser_headless)
36
+ page = await browser.new_page()
37
+
38
+ await page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
39
+
40
+ content = await page.content()
41
+ await browser.close()
42
+
43
+ return html_to_markdown(content)
kabigon/reel.py CHANGED
@@ -29,3 +29,12 @@ class ReelLoader(Loader):
29
29
  html_content = self.httpx_loader.load(url)
30
30
 
31
31
  return f"{audio_content}\n\n{html_content}"
32
+
33
+ async def async_load(self, url: str):
34
+ if not is_reel_url(url):
35
+ raise NotReelURLError(url)
36
+
37
+ audio_content = await self.ytdlp_loader.async_load(url)
38
+ html_content = await self.httpx_loader.async_load(url)
39
+
40
+ return f"{audio_content}\n\n{html_content}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -19,3 +19,35 @@ Requires-Dist: yt-dlp>=2025.1.26
19
19
  Description-Content-Type: text/markdown
20
20
 
21
21
  # kabigon
22
+
23
+ ## Installation
24
+
25
+ ```shell
26
+ pip install kabigon
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```shell
32
+ kabigon <url>
33
+ ```
34
+
35
+ or
36
+
37
+ ```python
38
+ import kabigon
39
+
40
+ url = "https://www.google.com.tw"
41
+
42
+ content = kabigon.Compose(
43
+ [
44
+ kabigon.YoutubeLoader(),
45
+ kabigon.ReelLoader(),
46
+ kabigon.YtdlpLoader(),
47
+ kabigon.PDFLoader(),
48
+ kabigon.HttpxLoader(),
49
+ kabigon.SinglefileLoader(),
50
+ ]
51
+ ).load(url)
52
+ print(content)
53
+ ```
@@ -0,0 +1,19 @@
1
+ kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
+ kabigon/cli.py,sha256=z3u2Msvi1SWf1fd9nCTzJULeO-rRb5oDKJfPxhUeYQ0,611
3
+ kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
4
+ kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
5
+ kabigon/httpx.py,sha256=B8_26rufJMbKSXINBEqyCIpaRueO_3Gk_PtEQmlOxQ4,955
6
+ kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
7
+ kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
8
+ kabigon/playwright.py,sha256=ciNUlpMbwd47utCLT454wFSirXFmt3eCXN2Q-nAsiu8,1356
9
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
+ kabigon/reel.py,sha256=1JTcn7qVH7FcD0Oj-Rz-pnjI-xS1UtkoJcuClGb8ExQ,1124
11
+ kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
12
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
+ kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
14
+ kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
+ kabigon-0.4.0.dist-info/METADATA,sha256=NRqd2kpi19xYz7TJx5TBXDm-uJcVcpu17P0sw3N-SiA,1020
16
+ kabigon-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ kabigon-0.4.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
+ kabigon-0.4.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
+ kabigon-0.4.0.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
- kabigon/cli.py,sha256=7qHklIhYUZ4E78C9MIB16AoVIYt2xLJS0Pz8sr51YPk,690
3
- kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
4
- kabigon/compose.py,sha256=IqNm-Cxl6e8u7X9v5SoG7cjxOWCMIcSZG1lDVNNAfo8,1433
5
- kabigon/httpx.py,sha256=ECNYGLOZ1uPz15ziABuziz8mQR3x1nF3cGcEOAsSmjo,668
6
- kabigon/loader.py,sha256=cV9ZqcWaNtS2WTDpgyNYK2kX5Cu1ZC-Sq-qS3PpPnJQ,198
7
- kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
8
- kabigon/playwright.py,sha256=g4cI3sIC_tVsk__x-1m-YA2LQs2_pWX8588rsVAmEOA,1095
9
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
- kabigon/reel.py,sha256=dkWXG2nBhIt0DpGJzevkIrRKLqJh_03-yrg_rjf6vnY,828
11
- kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
12
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
- kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
14
- kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
- kabigon-0.3.1.dist-info/METADATA,sha256=KWad9d5t-7NMX2xXhZWsQHpXlavwvEAkzDxNUO1mk8U,605
16
- kabigon-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- kabigon-0.3.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
- kabigon-0.3.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
- kabigon-0.3.1.dist-info/RECORD,,