langroid 0.1.158__py3-none-any.whl → 0.1.160__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -169,7 +169,7 @@ class OpenAIGPTConfig(LLMConfig):
169
169
  local_model = "api_base" in kwargs and kwargs["api_base"] is not None
170
170
 
171
171
  chat_model = kwargs.get("chat_model", "")
172
- if chat_model.startswith("litellm") or chat_model.startswith("local"):
172
+ if chat_model.startswith("litellm/") or chat_model.startswith("local/"):
173
173
  local_model = True
174
174
 
175
175
  warn_gpt_3_5 = (
@@ -62,6 +62,8 @@ class DocumentParser(Parser):
62
62
  elif DocumentParser._document_type(source) == DocumentType.DOCX:
63
63
  if config.docx.library == "unstructured":
64
64
  return UnstructuredDocxParser(source, config)
65
+ elif config.docx.library == "python-docx":
66
+ return PythonDocxParser(source, config)
65
67
  else:
66
68
  raise ValueError(
67
69
  f"Unsupported DOCX library specified: {config.docx.library}"
@@ -436,3 +438,34 @@ class UnstructuredDocxParser(DocumentParser):
436
438
  """
437
439
  text = " ".join(el.text for el in page)
438
440
  return self.fix_text(text)
441
+
442
+
443
+ class PythonDocxParser(DocumentParser):
444
+ """
445
+ Parser for processing DOCX files using the `python-docx` library.
446
+ """
447
+
448
+ def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
449
+ """
450
+ Simulate iterating through pages.
451
+ In a DOCX file, pages are not explicitly defined,
452
+ so we consider each paragraph as a separate 'page' for simplicity.
453
+ """
454
+ import docx
455
+
456
+ doc = docx.Document(self.doc_bytes)
457
+ for i, para in enumerate(doc.paragraphs, start=1):
458
+ yield i, [para]
459
+
460
+ def extract_text_from_page(self, page: Any) -> str:
461
+ """
462
+ Extract text from a given 'page', which in this case is a single paragraph.
463
+
464
+ Args:
465
+ page (list): A list containing a single Paragraph object.
466
+
467
+ Returns:
468
+ str: Extracted text from the paragraph.
469
+ """
470
+ paragraph = page[0]
471
+ return self.fix_text(paragraph.text)
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from enum import Enum
3
- from typing import Dict, List
3
+ from typing import Dict, List, Literal
4
4
 
5
5
  import tiktoken
6
6
  from pydantic import BaseSettings
@@ -19,11 +19,13 @@ class Splitter(str, Enum):
19
19
 
20
20
 
21
21
  class PdfParsingConfig(BaseSettings):
22
- library: str = "pdfplumber"
22
+ library: Literal[
23
+ "fitz", "pdfplumber", "pypdf", "unstructured", "haystack"
24
+ ] = "pdfplumber"
23
25
 
24
26
 
25
27
  class DocxParsingConfig(BaseSettings):
26
- library: str = "unstructured"
28
+ library: Literal["python-docx", "unstructured"] = "unstructured"
27
29
 
28
30
 
29
31
  class ParsingConfig(BaseSettings):
langroid/utils/logging.py CHANGED
@@ -72,12 +72,9 @@ def setup_file_logger(
72
72
  propagate: bool = False,
73
73
  ) -> logging.Logger:
74
74
  os.makedirs(os.path.dirname(filename), exist_ok=True)
75
- if not append:
76
- if os.path.exists(filename):
77
- os.remove(filename)
78
-
75
+ file_mode = "a" if append else "w"
79
76
  logger = setup_logger(name)
80
- handler = logging.FileHandler(filename)
77
+ handler = logging.FileHandler(filename, mode=file_mode)
81
78
  handler.setLevel(logging.INFO)
82
79
  if log_format:
83
80
  formatter = logging.Formatter(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.158
3
+ Version: 0.1.160
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -64,6 +64,7 @@ Requires-Dist: pytest-asyncio (>=0.21.1,<0.22.0)
64
64
  Requires-Dist: pytest-mysql (>=2.4.2,<3.0.0) ; extra == "mysql"
65
65
  Requires-Dist: pytest-postgresql (>=5.0.0,<6.0.0) ; extra == "postgres"
66
66
  Requires-Dist: pytest-redis (>=3.0.2,<4.0.0)
67
+ Requires-Dist: python-docx (>=1.1.0,<2.0.0)
67
68
  Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
68
69
  Requires-Dist: qdrant-client (>=1.7.0,<2.0.0)
69
70
  Requires-Dist: rank-bm25 (>=0.2.2,<0.3.0)
@@ -48,7 +48,7 @@ langroid/language_models/azure_openai.py,sha256=_OOEoZOziI3NDOH_8t3qmh8IDWoHESQe
48
48
  langroid/language_models/base.py,sha256=jUEUqDWJBVxIxmG6U4Ysg2QKGOnP_CLmRuEMicsSwUw,20596
49
49
  langroid/language_models/config.py,sha256=PXcmEUq52GCDj2sekt8F9E1flWyyNjP2S0LTRs7T6Kg,269
50
50
  langroid/language_models/openai_assistants.py,sha256=9K-DEAL2aSWHeXj2hwCo2RAlK9_1oCPtqX2u1wISCj8,36
51
- langroid/language_models/openai_gpt.py,sha256=T_PkzK0zSbyh1hA_Pvao2RGbeVhJsaJpjTMJKyLdVK8,42074
51
+ langroid/language_models/openai_gpt.py,sha256=EF10UqGuAj-fZC6eAnssWGwLtA_pgW4e4ih64EP_LLA,42076
52
52
  langroid/language_models/prompt_formatter/__init__.py,sha256=wj2e6j7R9d3m63HCbSDY1vosjFuhHLQVlgBrq8iqF38,197
53
53
  langroid/language_models/prompt_formatter/base.py,sha256=2y_GcwhstvB5ih3haS7l5Fv79jVnFJ_vEw1jqWJzB9k,1247
54
54
  langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeuMENVIVvVqSYuEpvYSTndUe_jd6hVTko4,2899
@@ -59,10 +59,10 @@ langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulr
59
59
  langroid/parsing/code-parsing.md,sha256=--cyyNiSZSDlIwcjAV4-shKrSiRe2ytF3AdSoS_hD2g,3294
60
60
  langroid/parsing/code_parser.py,sha256=BbDAzp35wkYQ9U1dpf1ARL0lVyi0tfqEc6_eox2C090,3727
61
61
  langroid/parsing/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
- langroid/parsing/document_parser.py,sha256=YC3IXQ9ErpBGBZh6Be9gfJWHcTwGTSMfNQMT5ARrj5g,14615
62
+ langroid/parsing/document_parser.py,sha256=Msv8acFzVDex-nKPNxyGOvTw4eNKswrSQluYOa1qfAE,15670
63
63
  langroid/parsing/json.py,sha256=KfIIma_6IurQ09WTUyBn3mbSK67QeXZ8eHGDxGlOsv0,2551
64
64
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
65
- langroid/parsing/parser.py,sha256=BwVJboobG71N08w5LC7Tu36LI4pEJoSgAdiBSLChWGY,10251
65
+ langroid/parsing/parser.py,sha256=0QDDfRrcO9jUwEj9WQiWi8ayVZ19MRC1xjwTLDrCKwg,10372
66
66
  langroid/parsing/repo_loader.py,sha256=hhMfQBBSo-HvsZDQEcgmk_idKQQAeDQ_MMPd38x2ACU,29338
67
67
  langroid/parsing/search.py,sha256=xmQdAdTIwZ0REEUeQVFlGZlqf7k8Poah7-ALuyW7Ov0,8440
68
68
  langroid/parsing/spider.py,sha256=w_mHR1B4KOmxsBLoVI8kMkMTEbwTzeK3ath9fOMJrTk,3043
@@ -86,7 +86,7 @@ langroid/utils/docker.py,sha256=kJQOLTgM0x9j9pgIIqp0dZNZCTvoUDhp6i8tYBq1Jr0,1105
86
86
  langroid/utils/globals.py,sha256=VkTHhlqSz86oOPq65sjul0XU8I52UNaFC5vwybMQ74w,1343
87
87
  langroid/utils/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
88
  langroid/utils/llms/strings.py,sha256=CSAX9Z6FQOLXOzbLMe_Opqtc3ruDAKTTk7cPqc6Blh0,263
89
- langroid/utils/logging.py,sha256=xXpohbvK74_reomdkIWTeyDjGG8GT1fuU7zcLL3Ngt8,3951
89
+ langroid/utils/logging.py,sha256=R8TN-FqVpwZ4Ajgls9TDMthLvPpQd0QVNXK-PJDj1Z8,3917
90
90
  langroid/utils/output/__init__.py,sha256=IpfqnCkfXa4HaOx39EMUhXuA7GPZFd7N_QMm1n43C_I,174
91
91
  langroid/utils/output/printing.py,sha256=5EsYB1O4qKhocW19aebOUzK82RD9U5nygbY21yo8gfg,2872
92
92
  langroid/utils/pandas_utils.py,sha256=nSA1tIgOUTkRDn-IKq7HP8XGJcL6bA110LcPfRF7h8I,707
@@ -103,7 +103,7 @@ langroid/vector_store/meilisearch.py,sha256=d2huA9P-NoYRuAQ9ZeXJmMKr7ry8u90RUSR2
103
103
  langroid/vector_store/momento.py,sha256=j6Eo6oIDN2fe7lsBOlCXJn3uvvERHHTFL5QJfeREeOM,10044
104
104
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
105
105
  langroid/vector_store/qdrantdb.py,sha256=qt7Dye6rcgoe0551WzmOxRGIlJfL87D4MX7HdqxuEok,13393
106
- langroid-0.1.158.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
107
- langroid-0.1.158.dist-info/METADATA,sha256=dEmvRye20e3XICFZz1dze_GzYttGiGUZNuzUYNjQS0w,42701
108
- langroid-0.1.158.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
109
- langroid-0.1.158.dist-info/RECORD,,
106
+ langroid-0.1.160.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
107
+ langroid-0.1.160.dist-info/METADATA,sha256=C2tjQ0HKr3e3x0iWWa6vrbraLd9qgHZuslb1nrN5ERA,42745
108
+ langroid-0.1.160.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
109
+ langroid-0.1.160.dist-info/RECORD,,