kabigon 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/cli.py +0 -2
- kabigon/compose.py +22 -3
- kabigon/httpx.py +6 -0
- kabigon/loader.py +10 -0
- kabigon/playwright.py +14 -7
- kabigon/reel.py +9 -0
- {kabigon-0.3.1.dist-info → kabigon-0.4.0.dist-info}/METADATA +33 -1
- kabigon-0.4.0.dist-info/RECORD +19 -0
- kabigon-0.3.1.dist-info/RECORD +0 -19
- {kabigon-0.3.1.dist-info → kabigon-0.4.0.dist-info}/WHEEL +0 -0
- {kabigon-0.3.1.dist-info → kabigon-0.4.0.dist-info}/entry_points.txt +0 -0
- {kabigon-0.3.1.dist-info → kabigon-0.4.0.dist-info}/licenses/LICENSE +0 -0
kabigon/cli.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import click
|
2
2
|
from rich import print
|
3
3
|
|
4
|
-
from .cloudscraper import CloudscraperLoader
|
5
4
|
from .compose import Compose
|
6
5
|
from .httpx import HttpxLoader
|
7
6
|
from .pdf import PDFLoader
|
@@ -20,7 +19,6 @@ def main(url: str) -> None:
|
|
20
19
|
ReelLoader(),
|
21
20
|
YtdlpLoader(),
|
22
21
|
PDFLoader(),
|
23
|
-
CloudscraperLoader(),
|
24
22
|
HttpxLoader(),
|
25
23
|
SinglefileLoader(),
|
26
24
|
]
|
kabigon/compose.py
CHANGED
@@ -37,14 +37,33 @@ class Compose(Loader):
|
|
37
37
|
|
38
38
|
for loader in self.loaders:
|
39
39
|
try:
|
40
|
-
|
40
|
+
content = loader.load(url)
|
41
41
|
|
42
|
-
if not
|
42
|
+
if not content:
|
43
43
|
logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
|
44
44
|
continue
|
45
45
|
|
46
46
|
logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
|
47
|
-
return
|
47
|
+
return content
|
48
|
+
|
49
|
+
except Exception as e:
|
50
|
+
logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
|
51
|
+
|
52
|
+
raise LoaderError(f"Failed to load URL: {url}")
|
53
|
+
|
54
|
+
async def async_load(self, url: str) -> str:
|
55
|
+
url = replace_domain(url)
|
56
|
+
|
57
|
+
for loader in self.loaders:
|
58
|
+
try:
|
59
|
+
content = await loader.async_load(url)
|
60
|
+
|
61
|
+
if not content:
|
62
|
+
logger.info("[{}] Failed to load URL: {}, got empty result", loader.__class__.__name__, url)
|
63
|
+
continue
|
64
|
+
|
65
|
+
logger.info("[{}] Successfully loaded URL: {}", loader.__class__.__name__, url)
|
66
|
+
return content
|
48
67
|
|
49
68
|
except Exception as e:
|
50
69
|
logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
|
kabigon/httpx.py
CHANGED
@@ -17,3 +17,9 @@ class HttpxLoader(Loader):
|
|
17
17
|
response = httpx.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
|
18
18
|
response.raise_for_status()
|
19
19
|
return html_to_markdown(response.content)
|
20
|
+
|
21
|
+
async def async_load(self, url: str) -> str:
|
22
|
+
async with httpx.AsyncClient() as client:
|
23
|
+
response = await client.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
|
24
|
+
response.raise_for_status()
|
25
|
+
return html_to_markdown(response.content)
|
kabigon/loader.py
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
import asyncio
|
2
|
+
import concurrent.futures
|
3
|
+
|
4
|
+
|
1
5
|
class Loader:
|
2
6
|
def __call__(self, url: str) -> str:
|
3
7
|
return self.load(url)
|
@@ -5,6 +9,12 @@ class Loader:
|
|
5
9
|
def load(self, url: str) -> str:
|
6
10
|
raise NotImplementedError
|
7
11
|
|
12
|
+
async def async_load(self, url: str):
|
13
|
+
loop = asyncio.get_running_loop()
|
14
|
+
with concurrent.futures.ProcessPoolExecutor() as executor:
|
15
|
+
result = await loop.run_in_executor(executor, self.load, url)
|
16
|
+
return result
|
17
|
+
|
8
18
|
|
9
19
|
class LoaderError(Exception):
|
10
20
|
pass
|
kabigon/playwright.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from typing import Literal
|
2
2
|
|
3
|
-
from
|
4
|
-
from playwright.sync_api import TimeoutError
|
3
|
+
from playwright.async_api import async_playwright
|
5
4
|
from playwright.sync_api import sync_playwright
|
6
5
|
|
7
6
|
from .loader import Loader
|
@@ -24,13 +23,21 @@ class PlaywrightLoader(Loader):
|
|
24
23
|
browser = p.chromium.launch(headless=self.browser_headless)
|
25
24
|
page = browser.new_page()
|
26
25
|
|
27
|
-
|
28
|
-
page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
|
29
|
-
except TimeoutError as e:
|
30
|
-
logger.error("TimeoutError: {}", e)
|
31
|
-
page.goto(url)
|
26
|
+
page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
|
32
27
|
|
33
28
|
content = page.content()
|
34
29
|
browser.close()
|
35
30
|
|
36
31
|
return html_to_markdown(content)
|
32
|
+
|
33
|
+
async def async_load(self, url: str) -> str:
|
34
|
+
async with async_playwright() as p:
|
35
|
+
browser = await p.chromium.launch(headless=self.browser_headless)
|
36
|
+
page = await browser.new_page()
|
37
|
+
|
38
|
+
await page.goto(url, timeout=self.timeout, wait_until=self.wait_until)
|
39
|
+
|
40
|
+
content = await page.content()
|
41
|
+
await browser.close()
|
42
|
+
|
43
|
+
return html_to_markdown(content)
|
kabigon/reel.py
CHANGED
@@ -29,3 +29,12 @@ class ReelLoader(Loader):
|
|
29
29
|
html_content = self.httpx_loader.load(url)
|
30
30
|
|
31
31
|
return f"{audio_content}\n\n{html_content}"
|
32
|
+
|
33
|
+
async def async_load(self, url: str):
|
34
|
+
if not is_reel_url(url):
|
35
|
+
raise NotReelURLError(url)
|
36
|
+
|
37
|
+
audio_content = await self.ytdlp_loader.async_load(url)
|
38
|
+
html_content = await self.httpx_loader.async_load(url)
|
39
|
+
|
40
|
+
return f"{audio_content}\n\n{html_content}"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kabigon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Author-email: narumi <toucans-cutouts0f@icloud.com>
|
5
5
|
License-File: LICENSE
|
6
6
|
Requires-Python: >=3.10
|
@@ -19,3 +19,35 @@ Requires-Dist: yt-dlp>=2025.1.26
|
|
19
19
|
Description-Content-Type: text/markdown
|
20
20
|
|
21
21
|
# kabigon
|
22
|
+
|
23
|
+
## Installation
|
24
|
+
|
25
|
+
```shell
|
26
|
+
pip install kabigon
|
27
|
+
```
|
28
|
+
|
29
|
+
## Usage
|
30
|
+
|
31
|
+
```shell
|
32
|
+
kabigon <url>
|
33
|
+
```
|
34
|
+
|
35
|
+
or
|
36
|
+
|
37
|
+
```python
|
38
|
+
import kabigon
|
39
|
+
|
40
|
+
url = "https://www.google.com.tw"
|
41
|
+
|
42
|
+
content = kabigon.Compose(
|
43
|
+
[
|
44
|
+
kabigon.YoutubeLoader(),
|
45
|
+
kabigon.ReelLoader(),
|
46
|
+
kabigon.YtdlpLoader(),
|
47
|
+
kabigon.PDFLoader(),
|
48
|
+
kabigon.HttpxLoader(),
|
49
|
+
kabigon.SinglefileLoader(),
|
50
|
+
]
|
51
|
+
).load(url)
|
52
|
+
print(content)
|
53
|
+
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
|
2
|
+
kabigon/cli.py,sha256=z3u2Msvi1SWf1fd9nCTzJULeO-rRb5oDKJfPxhUeYQ0,611
|
3
|
+
kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
|
4
|
+
kabigon/compose.py,sha256=DO0hOJgEAX7ZLOS53dcE6V9zi7Tr9oGNW8koPHsx9eM,2110
|
5
|
+
kabigon/httpx.py,sha256=B8_26rufJMbKSXINBEqyCIpaRueO_3Gk_PtEQmlOxQ4,955
|
6
|
+
kabigon/loader.py,sha256=D5xUPJb3uAygmBaN_sX56ZpGcGsVz-ueHOXC7gSGaxM,493
|
7
|
+
kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
|
8
|
+
kabigon/playwright.py,sha256=ciNUlpMbwd47utCLT454wFSirXFmt3eCXN2Q-nAsiu8,1356
|
9
|
+
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
10
|
+
kabigon/reel.py,sha256=1JTcn7qVH7FcD0Oj-Rz-pnjI-xS1UtkoJcuClGb8ExQ,1124
|
11
|
+
kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
|
12
|
+
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
13
|
+
kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
|
14
|
+
kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
|
15
|
+
kabigon-0.4.0.dist-info/METADATA,sha256=NRqd2kpi19xYz7TJx5TBXDm-uJcVcpu17P0sw3N-SiA,1020
|
16
|
+
kabigon-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
kabigon-0.4.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
18
|
+
kabigon-0.4.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
19
|
+
kabigon-0.4.0.dist-info/RECORD,,
|
kabigon-0.3.1.dist-info/RECORD
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
kabigon/__init__.py,sha256=9RgyhhwjqrW2iQy9RBN2j7VZNhwA9xGo_atC7FKnZA4,545
|
2
|
-
kabigon/cli.py,sha256=7qHklIhYUZ4E78C9MIB16AoVIYt2xLJS0Pz8sr51YPk,690
|
3
|
-
kabigon/cloudscraper.py,sha256=0jzrXVXSZopExyxrDRbcI_2wsbHAg_dqOk4D3Re0jvk,404
|
4
|
-
kabigon/compose.py,sha256=IqNm-Cxl6e8u7X9v5SoG7cjxOWCMIcSZG1lDVNNAfo8,1433
|
5
|
-
kabigon/httpx.py,sha256=ECNYGLOZ1uPz15ziABuziz8mQR3x1nF3cGcEOAsSmjo,668
|
6
|
-
kabigon/loader.py,sha256=cV9ZqcWaNtS2WTDpgyNYK2kX5Cu1ZC-Sq-qS3PpPnJQ,198
|
7
|
-
kabigon/pdf.py,sha256=oM5pwZJ2GCcHyQXg98-Mda-MHxarYVZQge30KdS_aHY,1549
|
8
|
-
kabigon/playwright.py,sha256=g4cI3sIC_tVsk__x-1m-YA2LQs2_pWX8588rsVAmEOA,1095
|
9
|
-
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
10
|
-
kabigon/reel.py,sha256=dkWXG2nBhIt0DpGJzevkIrRKLqJh_03-yrg_rjf6vnY,828
|
11
|
-
kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
|
12
|
-
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
13
|
-
kabigon/youtube.py,sha256=_wdKvRRAMrYnv3rUhkd_6JuOGCuQClYpj1UlVeYeojc,2615
|
14
|
-
kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
|
15
|
-
kabigon-0.3.1.dist-info/METADATA,sha256=KWad9d5t-7NMX2xXhZWsQHpXlavwvEAkzDxNUO1mk8U,605
|
16
|
-
kabigon-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
-
kabigon-0.3.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
18
|
-
kabigon-0.3.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
19
|
-
kabigon-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|