langroid 0.50.6__py3-none-any.whl → 0.50.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/parsing/web_search.py +9 -2
- {langroid-0.50.6.dist-info → langroid-0.50.7.dist-info}/METADATA +1 -1
- {langroid-0.50.6.dist-info → langroid-0.50.7.dist-info}/RECORD +5 -5
- {langroid-0.50.6.dist-info → langroid-0.50.7.dist-info}/WHEEL +0 -0
- {langroid-0.50.6.dist-info → langroid-0.50.7.dist-info}/licenses/LICENSE +0 -0
langroid/parsing/web_search.py
CHANGED
@@ -55,8 +55,7 @@ class WebSearchResult:
|
|
55
55
|
try:
|
56
56
|
# First check headers only to get content length and type
|
57
57
|
head_response: Response = requests.head(self.link, timeout=5)
|
58
|
-
|
59
|
-
return f"Error: HTTP {head_response.status_code} for {self.link}"
|
58
|
+
content_type = head_response.headers.get("content-type", "").lower()
|
60
59
|
|
61
60
|
# Skip large files
|
62
61
|
content_length = int(head_response.headers.get("content-length", 0))
|
@@ -64,8 +63,16 @@ class WebSearchResult:
|
|
64
63
|
return (
|
65
64
|
f"Error: Content too large ({content_length} bytes) for {self.link}"
|
66
65
|
)
|
66
|
+
# Skip non-HTML content types
|
67
|
+
if content_type and not any(
|
68
|
+
html_type in content_type
|
69
|
+
for html_type in ["text/html", "application/xhtml", "text/plain"]
|
70
|
+
):
|
71
|
+
return f"Skipping Content type '{content_type}' " f"in {self.link}"
|
67
72
|
|
68
73
|
response: Response = requests.get(self.link, timeout=10)
|
74
|
+
if response.status_code != 200:
|
75
|
+
return f"Error: HTTP {response.status_code} for {self.link}"
|
69
76
|
|
70
77
|
import warnings
|
71
78
|
|
@@ -96,7 +96,7 @@ langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz
|
|
96
96
|
langroid/parsing/url_loader.py,sha256=NQuCxa-hTOuxLZDq4xKLvPfGVB4IWFzh2ItqWq297DI,15675
|
97
97
|
langroid/parsing/urls.py,sha256=Tjzr64YsCusiYkY0LEGB5-rSuX8T2P_4DVoOFKAeKuI,8081
|
98
98
|
langroid/parsing/utils.py,sha256=WwqzOhbQRlorbVvddDIZKv9b1KqZCBDm955lgIHDXRw,12828
|
99
|
-
langroid/parsing/web_search.py,sha256=
|
99
|
+
langroid/parsing/web_search.py,sha256=atk8wIpOfiGTvW8yL_26TvjvyY2zD24xIHIi0QjEklI,8599
|
100
100
|
langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
|
101
101
|
langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
|
102
102
|
langroid/prompts/prompts_config.py,sha256=p_lp9nbMuQwhhMwAZsOxveRw9C0ZFZvql7pdIfgVZYo,143
|
@@ -129,7 +129,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
129
129
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
130
130
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
131
131
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
132
|
-
langroid-0.50.
|
133
|
-
langroid-0.50.
|
134
|
-
langroid-0.50.
|
135
|
-
langroid-0.50.
|
132
|
+
langroid-0.50.7.dist-info/METADATA,sha256=Am8GortMmvBBtL6aMTS88xmzYCa3ywMFEgraPvNYsTQ,63641
|
133
|
+
langroid-0.50.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
134
|
+
langroid-0.50.7.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
135
|
+
langroid-0.50.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|