langroid 0.48.1__py3-none-any.whl → 0.48.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/mytypes.py +10 -3
- langroid/parsing/url_loader.py +4 -4
- {langroid-0.48.1.dist-info → langroid-0.48.2.dist-info}/METADATA +1 -1
- {langroid-0.48.1.dist-info → langroid-0.48.2.dist-info}/RECORD +6 -6
- {langroid-0.48.1.dist-info → langroid-0.48.2.dist-info}/WHEEL +0 -0
- {langroid-0.48.1.dist-info → langroid-0.48.2.dist-info}/licenses/LICENSE +0 -0
langroid/mytypes.py
CHANGED
@@ -3,7 +3,7 @@ from textwrap import dedent
|
|
3
3
|
from typing import Any, Callable, Dict, List, Union
|
4
4
|
from uuid import uuid4
|
5
5
|
|
6
|
-
from langroid.pydantic_v1 import BaseModel, Extra, Field
|
6
|
+
from langroid.pydantic_v1 import BaseModel, Extra, Field, validator
|
7
7
|
|
8
8
|
Number = Union[int, float]
|
9
9
|
Embedding = List[Number]
|
@@ -45,12 +45,19 @@ class DocMetaData(BaseModel):
|
|
45
45
|
|
46
46
|
source: str = "context" # just reference
|
47
47
|
source_content: str = "context" # reference and content
|
48
|
-
title: str = "
|
49
|
-
published_date: str = "
|
48
|
+
title: str = "Unknown Title"
|
49
|
+
published_date: str = "Unknown Date"
|
50
50
|
is_chunk: bool = False # if it is a chunk, don't split
|
51
51
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
52
52
|
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
53
53
|
|
54
|
+
@validator("source", "source_content", "id", "title", "published_date")
|
55
|
+
def ensure_not_empty(cls, v: str) -> str:
|
56
|
+
"""Ensure required string fields are not empty."""
|
57
|
+
if not v:
|
58
|
+
raise ValueError("Field cannot be empty")
|
59
|
+
return v
|
60
|
+
|
54
61
|
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
55
62
|
"""
|
56
63
|
Special dict method to convert bool fields to int, to appease some
|
langroid/parsing/url_loader.py
CHANGED
@@ -262,7 +262,7 @@ class FirecrawlCrawler(BaseCrawler):
|
|
262
262
|
content=content,
|
263
263
|
metadata=DocMetaData(
|
264
264
|
source=url,
|
265
|
-
title=page["metadata"].get("title", ""),
|
265
|
+
title=page["metadata"].get("title", "Unknown Title"),
|
266
266
|
),
|
267
267
|
)
|
268
268
|
)
|
@@ -308,7 +308,7 @@ class FirecrawlCrawler(BaseCrawler):
|
|
308
308
|
content=result["markdown"],
|
309
309
|
metadata=DocMetaData(
|
310
310
|
source=url,
|
311
|
-
title=metadata.get("title", ""),
|
311
|
+
title=metadata.get("title", "Unknown Title"),
|
312
312
|
),
|
313
313
|
)
|
314
314
|
)
|
@@ -388,9 +388,9 @@ class ExaCrawler(BaseCrawler):
|
|
388
388
|
content=result.text,
|
389
389
|
metadata=DocMetaData(
|
390
390
|
source=url,
|
391
|
-
title=getattr(result, "title", ""),
|
391
|
+
title=getattr(result, "title", "Unknown Title"),
|
392
392
|
published_date=getattr(
|
393
|
-
result, "published_date", ""
|
393
|
+
result, "published_date", "Unknown Date"
|
394
394
|
),
|
395
395
|
),
|
396
396
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
|
2
2
|
langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
|
3
|
-
langroid/mytypes.py,sha256=
|
3
|
+
langroid/mytypes.py,sha256=yzsPpDQqfndMP8ZX9zuQY_oLuUTkW2VJ_iLPARmKoLE,3268
|
4
4
|
langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
6
6
|
langroid/agent/base.py,sha256=U-UjdpxIFqkzRIB5-LYwHrhMSNI3sDbfnNRqIhrtsyI,79568
|
@@ -91,7 +91,7 @@ langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1
|
|
91
91
|
langroid/parsing/search.py,sha256=0NJ5-Rou_BbrHAD7O9b20bKjZJnbadjObvGm4Zq8Kis,9818
|
92
92
|
langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,3258
|
93
93
|
langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
|
94
|
-
langroid/parsing/url_loader.py,sha256=
|
94
|
+
langroid/parsing/url_loader.py,sha256=DvgkdCZ3gDlAajH0dIUjea4YyXkziK-g36WnaE1J_WI,14884
|
95
95
|
langroid/parsing/urls.py,sha256=Tjzr64YsCusiYkY0LEGB5-rSuX8T2P_4DVoOFKAeKuI,8081
|
96
96
|
langroid/parsing/utils.py,sha256=WwqzOhbQRlorbVvddDIZKv9b1KqZCBDm955lgIHDXRw,12828
|
97
97
|
langroid/parsing/web_search.py,sha256=sARV1Tku4wiInhuCz0kRaMHcoF6Ok6CLu7vapLS8hjs,8222
|
@@ -127,7 +127,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
127
127
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
128
128
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
129
129
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
130
|
-
langroid-0.48.
|
131
|
-
langroid-0.48.
|
132
|
-
langroid-0.48.
|
133
|
-
langroid-0.48.
|
130
|
+
langroid-0.48.2.dist-info/METADATA,sha256=kCjeNq2-TNlc0DM8DRitNPJsHUhubpKGNJ2q-Mp6rY4,63606
|
131
|
+
langroid-0.48.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
132
|
+
langroid-0.48.2.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
133
|
+
langroid-0.48.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|