kabigon 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/compose.py +2 -3
- kabigon/firecrawl.py +2 -4
- kabigon/pdf.py +5 -1
- kabigon/ptt.py +5 -7
- kabigon/reel.py +5 -7
- kabigon/twitter.py +5 -7
- {kabigon-0.8.0.dist-info → kabigon-0.8.1.dist-info}/METADATA +1 -1
- kabigon-0.8.1.dist-info/RECORD +20 -0
- kabigon/errors.py +0 -28
- kabigon-0.8.0.dist-info/RECORD +0 -21
- {kabigon-0.8.0.dist-info → kabigon-0.8.1.dist-info}/WHEEL +0 -0
- {kabigon-0.8.0.dist-info → kabigon-0.8.1.dist-info}/entry_points.txt +0 -0
- {kabigon-0.8.0.dist-info → kabigon-0.8.1.dist-info}/licenses/LICENSE +0 -0
kabigon/compose.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from loguru import logger
|
2
2
|
|
3
|
-
from .errors import KabigonError
|
4
3
|
from .loader import Loader
|
5
4
|
|
6
5
|
|
@@ -23,7 +22,7 @@ class Compose(Loader):
|
|
23
22
|
except Exception as e:
|
24
23
|
logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
|
25
24
|
|
26
|
-
raise
|
25
|
+
raise Exception(f"Failed to load URL: {url}")
|
27
26
|
|
28
27
|
async def async_load(self, url: str) -> str:
|
29
28
|
for loader in self.loaders:
|
@@ -40,4 +39,4 @@ class Compose(Loader):
|
|
40
39
|
except Exception as e:
|
41
40
|
logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
|
42
41
|
|
43
|
-
raise
|
42
|
+
raise Exception(f"Failed to load URL: {url}")
|
kabigon/firecrawl.py
CHANGED
@@ -2,8 +2,6 @@ import os
|
|
2
2
|
|
3
3
|
from firecrawl import FirecrawlApp
|
4
4
|
|
5
|
-
from .errors import FirecrawlError
|
6
|
-
from .errors import FirecrawlKeyError
|
7
5
|
from .loader import Loader
|
8
6
|
|
9
7
|
|
@@ -13,7 +11,7 @@ class FirecrawlLoader(Loader):
|
|
13
11
|
|
14
12
|
api_key = os.getenv("FIRECRAWL_API_KEY")
|
15
13
|
if not api_key:
|
16
|
-
raise
|
14
|
+
raise ValueError("FIRECRAWL_API_KEY is not set.")
|
17
15
|
|
18
16
|
self.app = FirecrawlApp(api_key=api_key)
|
19
17
|
|
@@ -25,7 +23,7 @@ class FirecrawlLoader(Loader):
|
|
25
23
|
)
|
26
24
|
|
27
25
|
if not result.success:
|
28
|
-
raise
|
26
|
+
raise Exception(f"Failed to load URL: {url}, got: {result.error}")
|
29
27
|
|
30
28
|
return result.markdown
|
31
29
|
|
kabigon/pdf.py
CHANGED
@@ -6,7 +6,6 @@ from typing import Any
|
|
6
6
|
import httpx
|
7
7
|
from pypdf import PdfReader
|
8
8
|
|
9
|
-
from .errors import NotPDFError
|
10
9
|
from .loader import Loader
|
11
10
|
|
12
11
|
DEFAULT_HEADERS = {
|
@@ -15,6 +14,11 @@ DEFAULT_HEADERS = {
|
|
15
14
|
}
|
16
15
|
|
17
16
|
|
17
|
+
class NotPDFError(Exception):
|
18
|
+
def __init__(self, url: str) -> None:
|
19
|
+
super().__init__(f"URL is not a PDF: {url}")
|
20
|
+
|
21
|
+
|
18
22
|
class PDFLoader(Loader):
|
19
23
|
def load(self, url_or_file: str) -> str:
|
20
24
|
if not url_or_file.startswith("http"):
|
kabigon/ptt.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
from urllib.parse import urlparse
|
2
2
|
|
3
|
-
from .errors import NotTwitterURLError
|
4
3
|
from .httpx import HttpxLoader
|
5
4
|
from .loader import Loader
|
6
5
|
|
7
6
|
|
8
|
-
def
|
9
|
-
|
7
|
+
def check_ptt_url(url: str) -> None:
|
8
|
+
if urlparse(url).netloc != "www.ptt.cc":
|
9
|
+
raise ValueError(f"URL must be from ptt.cc, got {url}")
|
10
10
|
|
11
11
|
|
12
12
|
class PttLoader(Loader):
|
@@ -20,13 +20,11 @@ class PttLoader(Loader):
|
|
20
20
|
)
|
21
21
|
|
22
22
|
def load(self, url: str) -> str:
|
23
|
-
|
24
|
-
raise NotTwitterURLError(url)
|
23
|
+
check_ptt_url(url)
|
25
24
|
|
26
25
|
return self.httpx_loader.load(url)
|
27
26
|
|
28
27
|
async def async_load(self, url: str):
|
29
|
-
|
30
|
-
raise NotTwitterURLError(url)
|
28
|
+
check_ptt_url(url)
|
31
29
|
|
32
30
|
return await self.httpx_loader.async_load(url)
|
kabigon/reel.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
from .errors import NotReelURLError
|
2
1
|
from .httpx import HttpxLoader
|
3
2
|
from .loader import Loader
|
4
3
|
from .ytdlp import YtdlpLoader
|
5
4
|
|
6
5
|
|
7
|
-
def
|
8
|
-
|
6
|
+
def check_reel_url(url: str) -> None:
|
7
|
+
if not url.startswith("https://www.instagram.com/reel"):
|
8
|
+
raise ValueError(f"URL is not an Instagram Reel: {url}")
|
9
9
|
|
10
10
|
|
11
11
|
class ReelLoader(Loader):
|
@@ -14,8 +14,7 @@ class ReelLoader(Loader):
|
|
14
14
|
self.ytdlp_loader = YtdlpLoader()
|
15
15
|
|
16
16
|
def load(self, url: str) -> str:
|
17
|
-
|
18
|
-
raise NotReelURLError(url)
|
17
|
+
check_reel_url(url)
|
19
18
|
|
20
19
|
audio_content = self.ytdlp_loader.load(url)
|
21
20
|
html_content = self.httpx_loader.load(url)
|
@@ -23,8 +22,7 @@ class ReelLoader(Loader):
|
|
23
22
|
return f"{audio_content}\n\n{html_content}"
|
24
23
|
|
25
24
|
async def async_load(self, url: str):
|
26
|
-
|
27
|
-
raise NotReelURLError(url)
|
25
|
+
check_reel_url(url)
|
28
26
|
|
29
27
|
audio_content = await self.ytdlp_loader.async_load(url)
|
30
28
|
html_content = await self.httpx_loader.async_load(url)
|
kabigon/twitter.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from urllib.parse import urlparse
|
2
2
|
from urllib.parse import urlunparse
|
3
3
|
|
4
|
-
from .errors import NotTwitterURLError
|
5
4
|
from .loader import Loader
|
6
5
|
from .playwright import PlaywrightLoader
|
7
6
|
|
@@ -21,8 +20,9 @@ def replace_domain(url: str, new_domain: str = "x.com") -> str:
|
|
21
20
|
return urlunparse(urlparse(url)._replace(netloc=new_domain))
|
22
21
|
|
23
22
|
|
24
|
-
def
|
25
|
-
|
23
|
+
def check_x_url(url: str) -> None:
|
24
|
+
if urlparse(url).netloc not in TWITTER_DOMAINS:
|
25
|
+
raise ValueError(f"URL is not a Twitter URL: {url}")
|
26
26
|
|
27
27
|
|
28
28
|
class TwitterLoader(Loader):
|
@@ -30,16 +30,14 @@ class TwitterLoader(Loader):
|
|
30
30
|
self.playwright_loader = PlaywrightLoader(wait_until="networkidle")
|
31
31
|
|
32
32
|
def load(self, url: str) -> str:
|
33
|
-
|
34
|
-
raise NotTwitterURLError(url)
|
33
|
+
check_x_url(url)
|
35
34
|
|
36
35
|
url = replace_domain(url)
|
37
36
|
|
38
37
|
return self.playwright_loader.load(url)
|
39
38
|
|
40
39
|
async def async_load(self, url: str):
|
41
|
-
|
42
|
-
raise NotTwitterURLError(url)
|
40
|
+
check_x_url(url)
|
43
41
|
|
44
42
|
url = replace_domain(url)
|
45
43
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
|
2
|
+
kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
|
3
|
+
kabigon/compose.py,sha256=Kb6_-SNeh08QELMF-r3mWxasDTxJBuJJQamFPLcwQ1I,1463
|
4
|
+
kabigon/firecrawl.py,sha256=-5AI9tla_684dtpubY_BRudqLgw28158WdwA1RjJvAA,778
|
5
|
+
kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
|
6
|
+
kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
|
7
|
+
kabigon/pdf.py,sha256=PNOx-Dz_VpN-EVnVar_wJJZdxWrDZrAFE-gSuUR9q7o,1870
|
8
|
+
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
9
|
+
kabigon/ptt.py,sha256=Gyp2nJrjptkjbwZJ9VEQHX0DEgKBe5QRQOmGVHUUgNA,896
|
10
|
+
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
11
|
+
kabigon/reel.py,sha256=qOwWCvcp7xNKg0JDunq_Bsl8yqqMzrnAOI9k5mSqrOU,874
|
12
|
+
kabigon/twitter.py,sha256=aRqAiFxIwln6lteWdoF6SmvbzO62yBTQRzcB7UcVJwk,1046
|
13
|
+
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
14
|
+
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
15
|
+
kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
|
16
|
+
kabigon-0.8.1.dist-info/METADATA,sha256=8nZX3ukADj-qndGHMMuZluG_HdI8-wqNu8QJBoplPRc,1079
|
17
|
+
kabigon-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
kabigon-0.8.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
19
|
+
kabigon-0.8.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
20
|
+
kabigon-0.8.1.dist-info/RECORD,,
|
kabigon/errors.py
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
class KabigonError(Exception):
|
2
|
-
pass
|
3
|
-
|
4
|
-
|
5
|
-
class FirecrawlKeyError(KabigonError):
|
6
|
-
def __init__(self) -> None:
|
7
|
-
super().__init__("FIRECRAWL_API_KEY is not set.")
|
8
|
-
|
9
|
-
|
10
|
-
class FirecrawlError(KabigonError):
|
11
|
-
def __init__(self, url: str, error: str) -> None:
|
12
|
-
msg = f"Failed to load URL: {url}, got: {error}"
|
13
|
-
super().__init__(msg)
|
14
|
-
|
15
|
-
|
16
|
-
class NotPDFError(KabigonError):
|
17
|
-
def __init__(self, url: str) -> None:
|
18
|
-
super().__init__(f"URL is not a PDF: {url}")
|
19
|
-
|
20
|
-
|
21
|
-
class NotReelURLError(KabigonError):
|
22
|
-
def __init__(self, url: str):
|
23
|
-
super().__init__(f"URL is not an Instagram Reel: {url}")
|
24
|
-
|
25
|
-
|
26
|
-
class NotTwitterURLError(KabigonError):
|
27
|
-
def __init__(self, url: str):
|
28
|
-
super().__init__(f"URL is not a Twitter URL: {url}")
|
kabigon-0.8.0.dist-info/RECORD
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
|
2
|
-
kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
|
3
|
-
kabigon/compose.py,sha256=l2D5OK91VcN2a6DbjMdwBk3YSqzVV7fOVX0TqNm2gJo,1502
|
4
|
-
kabigon/errors.py,sha256=iri_YS71UsOHwaVtfy5IA6iUfq30DCsptZsChmZaZic,755
|
5
|
-
kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
|
6
|
-
kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
|
7
|
-
kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
|
8
|
-
kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
|
9
|
-
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
10
|
-
kabigon/ptt.py,sha256=S2d6SeFGxM4E4kg-n5blN2BX56CWj_eOhapJxzUnxu8,965
|
11
|
-
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
12
|
-
kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
|
13
|
-
kabigon/twitter.py,sha256=U07pa8xA0nHAaaDPeUelQRvXR5ZnUvYJZW35xRAvHA8,1114
|
14
|
-
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
15
|
-
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
16
|
-
kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
|
17
|
-
kabigon-0.8.0.dist-info/METADATA,sha256=SAazEEnqklluyiH2psGQwUI40ee400GELlcxv0OyvuU,1079
|
18
|
-
kabigon-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
-
kabigon-0.8.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
20
|
-
kabigon-0.8.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
21
|
-
kabigon-0.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|