langroid 0.36.0__py3-none-any.whl → 0.36.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ pip install "langroid[hf-embeddings]"
15
15
  """
16
16
 
17
17
  import logging
18
+ import textwrap
18
19
  from collections import OrderedDict
19
20
  from functools import cache
20
21
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, no_type_check
@@ -81,7 +82,7 @@ You will be given various passages from these documents, and asked to answer que
81
82
  about them, or summarize them into coherent answers.
82
83
  """
83
84
 
84
- CHUNK_ENRICHMENT_DELIMITER = "<##-##-##>"
85
+ CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>"
85
86
 
86
87
  has_sentence_transformers = False
87
88
  try:
@@ -810,9 +811,11 @@ class DocChatAgent(ChatAgent):
810
811
  return "\n".join(
811
812
  [
812
813
  f"""
813
- [{i+1}]
814
+ -----[EXTRACT #{i+1}]----------
814
815
  {content}
815
816
  {source}
817
+ -----END OF EXTRACT------------
818
+
816
819
  """
817
820
  for i, (content, source) in enumerate(zip(contents, sources))
818
821
  ]
@@ -949,12 +952,13 @@ class DocChatAgent(ChatAgent):
949
952
  continue
950
953
 
951
954
  # Combine original content with questions in a structured way
952
- combined_content = f"""
953
- {doc.content}
954
-
955
+ combined_content = textwrap.dedent(
956
+ f"""\
957
+ {doc.content}
955
958
  {enrichment_config.delimiter}
956
959
  {enrichment}
957
- """.strip()
960
+ """
961
+ )
958
962
 
959
963
  new_doc = doc.copy(
960
964
  update={
@@ -1440,7 +1444,7 @@ class DocChatAgent(ChatAgent):
1440
1444
  delimiter = self.config.chunk_enrichment_config.delimiter
1441
1445
  return [
1442
1446
  (
1443
- doc.copy(update={"content": doc.content.split(delimiter)[0].strip()})
1447
+ doc.copy(update={"content": doc.content.split(delimiter)[0]})
1444
1448
  if doc.content and getattr(doc.metadata, "has_enrichment", False)
1445
1449
  else doc
1446
1450
  )
@@ -267,9 +267,11 @@ class Parser:
267
267
  # Truncate the chunk text at the punctuation mark
268
268
  chunk_text = chunk_text[: last_punctuation + 1]
269
269
 
270
- # Remove any newline characters and strip any leading or
271
- # trailing whitespace
272
- chunk_text_to_append = re.sub(r"\n{2,}", "\n", chunk_text).strip()
270
+ # Replace redundant (3 or more) newlines with 2 newlines to preser
271
+ # paragraph separation!
272
+ # But do NOT strip leading/trailing whitespace, to preserve formatting
273
+ # (e.g. code blocks, or in case we want to stitch chunks back together)
274
+ chunk_text_to_append = re.sub(r"\n{3,}", "\n\n", chunk_text)
273
275
 
274
276
  if len(chunk_text_to_append) > self.config.discard_chunk_chars:
275
277
  # Append the chunk text to the list of chunks
langroid/parsing/utils.py CHANGED
@@ -310,9 +310,9 @@ def extract_numbered_segments(s: str, specs: str) -> str:
310
310
  ]
311
311
 
312
312
  # If we extracted any segments from this paragraph,
313
- # join them and append to results
313
+ # join them with ellipsis (...) and append to results.
314
314
  if extracted_segments:
315
- extracted_paragraphs.append(" ".join(extracted_segments))
315
+ extracted_paragraphs.append("...".join(extracted_segments))
316
316
 
317
317
  return "\n\n".join(extracted_paragraphs)
318
318
 
@@ -17,25 +17,45 @@ def extract_markdown_references(md_string: str) -> list[int]:
17
17
  return sorted(set(int(match) for match in matches))
18
18
 
19
19
 
20
- def format_footnote_text(content: str, width: int = 80) -> str:
20
+ def format_footnote_text(content: str, width: int = 0) -> str:
21
21
  """
22
- Formats the content part of a footnote (i.e. not the first line that
23
- appears right after the reference [^4])
24
- It wraps the text so that no line is longer than the specified width and indents
25
- lines as necessary for markdown footnotes.
22
+ Formats the content so that each original line is individually processed.
23
+ - If width=0, no wrapping is done (lines remain as is).
24
+ - If width>0, lines are wrapped to that width.
25
+ - Blank lines remain blank (with indentation).
26
+ - Everything is indented by 4 spaces (for markdown footnotes).
26
27
 
27
28
  Args:
28
29
  content (str): The text of the footnote to be formatted.
29
- width (int): Maximum width of the text lines.
30
+ width (int): Maximum width of the text lines. If 0, lines are not wrapped.
30
31
 
31
32
  Returns:
32
33
  str: Properly formatted markdown footnote text.
33
34
  """
34
35
  import textwrap
35
36
 
36
- # Wrap the text to the specified width
37
- wrapped_lines = textwrap.wrap(content, width)
38
- if len(wrapped_lines) == 0:
39
- return ""
40
- indent = " " # Indentation for markdown footnotes
41
- return indent + ("\n" + indent).join(wrapped_lines)
37
+ indent = " " # 4 spaces for markdown footnotes
38
+ lines = content.split("\n") # keep original line structure
39
+
40
+ output_lines = []
41
+ for line in lines:
42
+ # If the line is empty (or just spaces), keep it blank (but indented)
43
+ if not line.strip():
44
+ output_lines.append(indent)
45
+ continue
46
+
47
+ if width > 0:
48
+ # Wrap each non-empty line to the specified width
49
+ wrapped = textwrap.wrap(line, width=width)
50
+ if not wrapped:
51
+ # If textwrap gives nothing, add a blank (indented) line
52
+ output_lines.append(indent)
53
+ else:
54
+ for subline in wrapped:
55
+ output_lines.append(indent + subline)
56
+ else:
57
+ # No wrapping: just indent the original line
58
+ output_lines.append(indent + line)
59
+
60
+ # Join them with newline so we preserve the paragraph/blank line structure
61
+ return "\n".join(output_lines)
@@ -264,7 +264,7 @@ class VectorStore(ABC):
264
264
  metadata = copy.deepcopy(id2metadata[w[0]])
265
265
  metadata.window_ids = w
266
266
  document = Document(
267
- content=" ".join([d.content for d in self.get_documents_by_ids(w)]),
267
+ content="".join([d.content for d in self.get_documents_by_ids(w)]),
268
268
  metadata=metadata,
269
269
  )
270
270
  # make a fresh id since content is in general different
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.36.0
3
+ Version: 0.36.1
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -292,20 +292,28 @@ teacher_task.run()
292
292
  <summary> <b>Click to expand</b></summary>
293
293
 
294
294
  - **Jan 2025:**
295
- - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv!
295
+ - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
296
+ - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
297
+ Reasoning LLMs (e.g. DeepSeek, OpenAI o1) in addition to final answer.
298
+ - [0.34.0](https://github.com/langroid/langroid/releases/tag/0.34.0): DocChatAgent
299
+ chunk enrichment to improve retrieval. (collaboration with @dfm88).
300
+ - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv! (thanks @abab-dev).
296
301
  - [0.32.0](https://github.com/langroid/langroid/releases/tag/0.32.0) DeepSeek v3 support.
297
302
  - **Dec 2024:**
298
303
  - [0.31.0](https://github.com/langroid/langroid/releases/tag/0.31.0) Azure OpenAI Embeddings
299
- - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
300
- - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
304
+ - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings (thanks @Kwigg).
305
+ - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client (thanks
306
+ @johannestang).
301
307
  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
302
- default handler method name in `request` field.
308
+ default handler method name in `request` field (thanks @alexagr).
303
309
  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
304
310
  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
305
- - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
311
+ - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and
312
+ user-response (thanks @alexagr).
306
313
  - **Nov 2024:**
307
314
  - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
308
315
  Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.
316
+ (thanks @nilspalumbo).
309
317
  - **[0.23.0](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat)**:
310
318
  support for LLMs (e.g. `Qwen2.5-Coder-32b-Instruct`) hosted on glhf.chat
311
319
  - **[0.22.0](https://langroid.github.io/langroid/notes/large-tool-results/)**:
@@ -14,7 +14,7 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
14
14
  langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  langroid/agent/callbacks/chainlit.py,sha256=RH8qUXaZE5o2WQz3WJQ1SdFtASGlxWCA6_HYz_3meDQ,20822
16
16
  langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
17
- langroid/agent/special/doc_chat_agent.py,sha256=8OYJ7IRepdQ9GpoTX_dhw0NkENmTe9iQsJAN6JI-09c,64670
17
+ langroid/agent/special/doc_chat_agent.py,sha256=PCpdaVocIWt6ftO5OfmI1l20abVbKUSZWdMcu9hJDD0,64816
18
18
  langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
19
19
  langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
20
20
  langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
@@ -81,7 +81,7 @@ langroid/parsing/code_parser.py,sha256=AOxb3xbYpTBPP3goOm5dKfJdh5hS_2BhLVCEkifWZ
81
81
  langroid/parsing/document_parser.py,sha256=9xUOyrVNBAS9cpCvCptr2XK4Kq47W574i8zzGEoXc3c,24933
82
82
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
83
83
  langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
84
- langroid/parsing/parser.py,sha256=N0jr1Zl_f_rx-8YMmSQftPHquqSQfec-3s7JAhhEe6I,12032
84
+ langroid/parsing/parser.py,sha256=Wq204V1wqLdXS8kZ5J81dU2jE8fjoRY9zzNFbzLFDIs,12205
85
85
  langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
86
86
  langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
87
87
  langroid/parsing/search.py,sha256=0i_r0ESb5HEQfagA2g7_uMQyxYPADWVbdcN9ixZhS4E,8992
@@ -89,7 +89,7 @@ langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,32
89
89
  langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
90
90
  langroid/parsing/url_loader.py,sha256=JK48KktLRDBfjrt4nsUfy92M6yGdEeicAqOum2MdULM,4656
91
91
  langroid/parsing/urls.py,sha256=XjpaV5onG7gKQ5iQeFTzHSw5P08Aqw0g-rMUu61lR6s,7988
92
- langroid/parsing/utils.py,sha256=kb9DlHaG1iQB-6JagH1C26SdCNNf8U-2XaXia4_dWCw,12726
92
+ langroid/parsing/utils.py,sha256=YrV2GNL4EOBGknA4AClPGdJ4S5B31radrt-Ou8OAKoU,12749
93
93
  langroid/parsing/web_search.py,sha256=8rW8EI3tyHITaB2l9MT_6yLMeQfo8y-Ih-8N2v2uMpk,4931
94
94
  langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
95
95
  langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
@@ -111,18 +111,18 @@ langroid/utils/types.py,sha256=4GrOnU3HLWh-UwaUPp7LlB3V413q3K5OSzc0ggDoQ6A,2510
111
111
  langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
112
112
  langroid/utils/algorithms/graph.py,sha256=JbdpPnUOhw4-D6O7ou101JLA3xPCD0Lr3qaPoFCaRfo,2866
113
113
  langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87sbBPc,340
114
- langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMsVkXtc,1425
114
+ langroid/utils/output/citations.py,sha256=mQhRXVN-uhmKd2z32UZQBE0adZGEaQJ7cVXLfkrcZJI,2221
115
115
  langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
116
116
  langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
117
117
  langroid/vector_store/__init__.py,sha256=BcoOm1tG3y0EqjkIGmMOHkY9iTUhDHgyruknWDKgqIg,1214
118
- langroid/vector_store/base.py,sha256=c9slwOcSWCG0SFGDuPLAQF9vBLDb4Eg8uaUol27Jf9c,14209
118
+ langroid/vector_store/base.py,sha256=suBanIt0iKEgnMnGdQOyWS58guG20Jyy-GK4DMMuYL0,14208
119
119
  langroid/vector_store/chromadb.py,sha256=9WXW9IoSnhOmGEtMruVhEtVWL_VO6NXnPIz-nzh0gIQ,8235
120
120
  langroid/vector_store/lancedb.py,sha256=b3_vWkTjG8mweZ7ZNlUD-NjmQP_rLBZfyKWcxt2vosA,14855
121
121
  langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
122
122
  langroid/vector_store/momento.py,sha256=UNHGT6jXuQtqY9f6MdqGU14bVnS0zHgIJUa30ULpUJo,10474
123
123
  langroid/vector_store/qdrantdb.py,sha256=HRLCt-FG8y4718omwpFaQZnWeYxPj0XCwS4tjokI1sU,18116
124
124
  langroid/vector_store/weaviatedb.py,sha256=Jxe-cp2PyZdQ9NQVNZJ-CnsYsNxgUBdfAOoLZQEN650,10602
125
- langroid-0.36.0.dist-info/METADATA,sha256=aDLzYdeo80UbUZB0oEv9Rc1WMgWvG_jQtrBZnI2y5Bg,59508
126
- langroid-0.36.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
- langroid-0.36.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
128
- langroid-0.36.0.dist-info/RECORD,,
125
+ langroid-0.36.1.dist-info/METADATA,sha256=OzErGoPlFwxWia7jrFUx4M9FolTjexpJbgpTfhwT9Nk,60103
126
+ langroid-0.36.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
+ langroid-0.36.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
128
+ langroid-0.36.1.dist-info/RECORD,,