kabigon 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/cli.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import click
2
2
  from rich import print
3
3
 
4
- from .cloudscraper import CloudscraperLoader
5
4
  from .compose import Compose
6
5
  from .httpx import HttpxLoader
7
6
  from .pdf import PDFLoader
@@ -20,7 +19,6 @@ def main(url: str) -> None:
20
19
  ReelLoader(),
21
20
  YtdlpLoader(),
22
21
  PDFLoader(),
23
- CloudscraperLoader(),
24
22
  HttpxLoader(),
25
23
  SinglefileLoader(),
26
24
  ]
kabigon/cloudscraper.py CHANGED
@@ -6,7 +6,7 @@ from .utils import html_to_markdown
6
6
 
7
7
 
8
8
  class CloudscraperLoader(Loader):
9
- @timeout_decorator.timeout(5)
9
+ @timeout_decorator.timeout(10)
10
10
  def load(self, url: str) -> str:
11
11
  client = cloudscraper.create_scraper()
12
12
  response = client.get(url, allow_redirects=True)
kabigon/compose.py CHANGED
@@ -37,14 +37,33 @@ class Compose(Loader):
37
37
 
38
38
  for loader in self.loaders:
39
39
  try:
40
- loaded_content = loader.load(url)
40
+ content = loader.load(url)
41
41
 
42
- if not loaded_content:
42
+ if not content:
43
43
  logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
44
44
  continue
45
45
 
46
46
  logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
47
- return loaded_content
47
+ return content
48
+
49
+ except Exception as e:
50
+ logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
51
+
52
+ raise LoaderError(f"Failed to load URL: {url}")
53
+
54
+ async def async_load(self, url: str) -> str:
55
+ url = replace_domain(url)
56
+
57
+ for loader in self.loaders:
58
+ try:
59
+ content = await loader.async_load(url)
60
+
61
+ if not content:
62
+ logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
63
+ continue
64
+
65
+ logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
66
+ return content
48
67
 
49
68
  except Exception as e:
50
69
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
kabigon/httpx.py CHANGED
@@ -12,8 +12,14 @@ DEFAULT_HEADERS = {
12
12
 
13
13
 
14
14
  class HttpxLoader(Loader):
15
- @timeout_decorator.timeout(5)
15
+ @timeout_decorator.timeout(10)
16
16
  def load(self, url: str) -> str:
17
17
  response = httpx.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
18
18
  response.raise_for_status()
19
19
  return html_to_markdown(response.content)
20
+
21
+ async def async_load(self, url: str) -> str:
22
+ async with httpx.AsyncClient() as client:
23
+ response = await client.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
24
+ response.raise_for_status()
25
+ return html_to_markdown(response.content)
kabigon/loader.py CHANGED
@@ -1,3 +1,7 @@
1
+ import asyncio
2
+ import concurrent.futures
3
+
4
+
1
5
  class Loader:
2
6
  def __call__(self, url: str) -> str:
3
7
  return self.load(url)
@@ -5,6 +9,12 @@ class Loader:
5
9
  def load(self, url: str) -> str:
6
10
  raise NotImplementedError
7
11
 
12
+ async def async_load(self, url: str):
13
+ loop = asyncio.get_running_loop()
14
+ with concurrent.futures.ProcessPoolExecutor() as executor:
15
+ result = await loop.run_in_executor(executor, self.load, url)
16
+ return result
17
+
8
18
 
9
19
  class LoaderError(Exception):
10
20
  pass
kabigon/pdf.py CHANGED
@@ -19,7 +19,7 @@ class NotPDFError(LoaderError):
19
19
 
20
20
 
21
21
  class PDFLoader(Loader):
22
- @timeout_decorator.timeout(5)
22
+ @timeout_decorator.timeout(10)
23
23
  def load(self, url_or_file: str) -> str:
24
24
  if url_or_file.startswith("http"):
25
25
  url_or_file = download_pdf_from_url(url_or_file)
kabigon/playwright.py CHANGED
@@ -1,8 +1,6 @@
1
1
  from typing import Literal
2
2
 
3
- import timeout_decorator
4
- from loguru import logger
5
- from playwright.sync_api import TimeoutError
3
+ from playwright.async_api import async_playwright
6
4
  from playwright.sync_api import sync_playwright
7
5
 
8
6
  from .loader import Loader
@@ -20,19 +18,26 @@ class PlaywrightLoader(Loader):
20
18
  self.wait_until = wait_until
21
19
  self.browser_headless = browser_headless
22
20
 
23
- @timeout_decorator.timeout(5)
24
21
  def load(self, url: str) -> str:
25
22
  with sync_playwright() as p:
26
23
  browser = p.chromium.launch(headless=self.browser_headless)
27
24
  page = browser.new_page()
28
25
 
29
- try:
30
- page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
31
- except TimeoutError as e:
32
- logger.error("TimeoutError: {}", e)
33
- page.goto(url)
26
+ page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
34
27
 
35
28
  content = page.content()
36
29
  browser.close()
37
30
 
38
31
  return html_to_markdown(content)
32
+
33
+ async def async_load(self, url: str) -> str:
34
+ async with async_playwright() as p:
35
+ browser = await p.chromium.launch(headless=self.browser_headless)
36
+ page = await browser.new_page()
37
+
38
+ await page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
39
+
40
+ content = await page.content()
41
+ await browser.close()
42
+
43
+ return html_to_markdown(content)
kabigon/reel.py CHANGED
@@ -29,3 +29,12 @@ class ReelLoader(Loader):
29
29
  html_content = self.httpx_loader.load(url)
30
30
 
31
31
  return f"{audio_content}\n\n{html_content}"
32
+
33
+ async def async_load(self, url: str):
34
+ if not is_reel_url(url):
35
+ raise NotReelURLError(url)
36
+
37
+ audio_content = await self.ytdlp_loader.async_load(url)
38
+ html_content = await self.httpx_loader.async_load(url)
39
+
40
+ return f"{audio_content}\n\n{html_content}"
kabigon/singlefile.py CHANGED
@@ -6,7 +6,6 @@ from pathlib import Path
6
6
  from typing import Final
7
7
 
8
8
  import charset_normalizer
9
- import timeout_decorator
10
9
  from loguru import logger
11
10
 
12
11
  from .loader import Loader
@@ -29,7 +28,6 @@ class SinglefileLoader(Loader):
29
28
  self.cookies_file = cookies_file
30
29
  self.browser_headless = browser_headless
31
30
 
32
- @timeout_decorator.timeout(20)
33
31
  def load(self, url: str) -> str:
34
32
  filename = self.download(url)
35
33
  content = str(charset_normalizer.from_path(filename).best())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -9,7 +9,6 @@ Requires-Dist: cloudscraper>=1.2.71
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: loguru>=0.7.3
11
11
  Requires-Dist: markdownify>=0.14.1
12
- Requires-Dist: numpy>=2.1.3
13
12
  Requires-Dist: openai-whisper>=20240930
14
13
  Requires-Dist: playwright>=1.50.0
15
14
  Requires-Dist: pypdf>=5.3.0
@@ -20,3 +19,35 @@ Requires-Dist: yt-dlp>=2025.1.26
20
19
  Description-Content-Type: text/markdown
21
20
 
22
21
  # kabigon
22
+
23
+ ## Installation
24
+
25
+ ```shell
26
+ pip install kabigon
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```shell
32
+ kabigon <url>
33
+ ```
34
+
35
+ or
36
+
37
+ ```python
38
+ import kabigon
39
+
40
+ url = "https://www.google.com.tw"
41
+
42
+ content = kabigon.Compose(
43
+ [
44
+ kabigon.YoutubeLoader(),
45
+ kabigon.ReelLoader(),
46
+ kabigon.YtdlpLoader(),
47
+ kabigon.PDFLoader(),
48
+ kabigon.HttpxLoader(),
49
+ kabigon.SinglefileLoader(),
50
+ ]
51
+ ).load(url)
52
+ print(content)
53
+ ```
@@ -0,0 +1,19 @@
1
+ kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
+ kabigon/cli.py,sha256=z3u2Msvi1SWf1fd9nCTzJULeO-rRb5oDKJfPxhUeYQ0,611
3
+ kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
4
+ kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
5
+ kabigon/httpx.py,sha256=B8_26rufJMbKSXINBEqyCIpaRueO_3Gk_PtEQmlOxQ4,955
6
+ kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
7
+ kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
8
+ kabigon/playwright.py,sha256=ciNUlpMbwd47utCLT454wFSirXFmt3eCXN2Q-nAsiu8,1356
9
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
+ kabigon/reel.py,sha256=1JTcn7qVH7FcD0Oj-Rz-pnjI-xS1UtkoJcuClGb8ExQ,1124
11
+ kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
12
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
+ kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
14
+ kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
+ kabigon-0.4.0.dist-info/METADATA,sha256=NRqd2kpi19xYz7TJx5TBXDm-uJcVcpu17P0sw3N-SiA,1020
16
+ kabigon-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ kabigon-0.4.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
+ kabigon-0.4.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
+ kabigon-0.4.0.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
2
- kabigon/cli.py,sha256=7qHklIhYUZ4E78C9MIB16AoVIYt2xLJS0Pz8sr51YPk,690
3
- kabigon/cloudscraper.py,sha256=viaIWATsS8nD9HN0RfBiveUHuL012OjuaKlKwLEteGw,403
4
- kabigon/compose.py,sha256=IqNm-Cxl6e8u7X9v5SoG7cjxOWCMIcSZG1lDVNNAfo8,1433
5
- kabigon/httpx.py,sha256=SfhaJXNKlFOwWs_Eeadiegi5wNvZV0RX7lqQwR8nYGo,667
6
- kabigon/loader.py,sha256=cV9ZqcWaNtS2WTDpgyNYK2kX5Cu1ZC-Sq-qS3PpPnJQ,198
7
- kabigon/pdf.py,sha256=9Oi_ZP7D0LLWs2D8KO3omRld1nYyhEbQMForZUw9YZg,1548
8
- kabigon/playwright.py,sha256=0CkSDY90i7PHKxrJ6Zwad_NDCy5TPyCjeFvLrQuZXFU,1154
9
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
10
- kabigon/reel.py,sha256=dkWXG2nBhIt0DpGJzevkIrRKLqJh_03-yrg_rjf6vnY,828
11
- kabigon/singlefile.py,sha256=2nTCTFgW5Gp3l0ExaVh2foUDVSgLaAssDp3tBoQ1MhY,1860
12
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
13
- kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
14
- kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
15
- kabigon-0.3.0.dist-info/METADATA,sha256=w5bE9Wv-p4REXAC_7hPNIVilsxHK7tIZzg2EFz-vh_I,633
16
- kabigon-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- kabigon-0.3.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
18
- kabigon-0.3.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
19
- kabigon-0.3.0.dist-info/RECORD,,