langroid 0.1.161__tar.gz → 0.1.163__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {langroid-0.1.161 → langroid-0.1.163}/PKG-INFO +5 -3
  2. {langroid-0.1.161 → langroid-0.1.163}/README.md +1 -1
  3. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/doc_chat_agent.py +7 -2
  4. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/relevance_extractor_agent.py +2 -1
  5. langroid-0.1.163/langroid/agent/tools/sciphi_search_rag_tool.py +76 -0
  6. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/openai_gpt.py +16 -6
  7. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/utils.py +51 -7
  8. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/lancedb.py +1 -4
  9. {langroid-0.1.161 → langroid-0.1.163}/pyproject.toml +4 -2
  10. {langroid-0.1.161 → langroid-0.1.163}/LICENSE +0 -0
  11. {langroid-0.1.161 → langroid-0.1.163}/langroid/__init__.py +0 -0
  12. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/__init__.py +0 -0
  13. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/base.py +0 -0
  14. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/batch.py +0 -0
  15. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/chat_agent.py +0 -0
  16. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/chat_document.py +0 -0
  17. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/helpers.py +0 -0
  18. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/junk +0 -0
  19. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/openai_assistant.py +0 -0
  20. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/__init__.py +0 -0
  21. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  22. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_rag/__init__.py +0 -0
  23. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  24. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  25. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_rag/lance_tools.py +0 -0
  26. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  27. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/retriever_agent.py +0 -0
  28. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/__init__.py +0 -0
  29. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  30. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/utils/__init__.py +0 -0
  31. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  32. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  33. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/utils/system_message.py +0 -0
  34. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/sql/utils/tools.py +0 -0
  35. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/special/table_chat_agent.py +0 -0
  36. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/task.py +0 -0
  37. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tool_message.py +0 -0
  38. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/__init__.py +0 -0
  39. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/extract_tool.py +0 -0
  40. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/generator_tool.py +0 -0
  41. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/google_search_tool.py +0 -0
  42. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/recipient_tool.py +0 -0
  43. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/run_python_code.py +0 -0
  44. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent/tools/segment_extract_tool.py +0 -0
  45. {langroid-0.1.161 → langroid-0.1.163}/langroid/agent_config.py +0 -0
  46. {langroid-0.1.161 → langroid-0.1.163}/langroid/cachedb/__init__.py +0 -0
  47. {langroid-0.1.161 → langroid-0.1.163}/langroid/cachedb/base.py +0 -0
  48. {langroid-0.1.161 → langroid-0.1.163}/langroid/cachedb/momento_cachedb.py +0 -0
  49. {langroid-0.1.161 → langroid-0.1.163}/langroid/cachedb/redis_cachedb.py +0 -0
  50. {langroid-0.1.161 → langroid-0.1.163}/langroid/embedding_models/__init__.py +0 -0
  51. {langroid-0.1.161 → langroid-0.1.163}/langroid/embedding_models/base.py +0 -0
  52. {langroid-0.1.161 → langroid-0.1.163}/langroid/embedding_models/clustering.py +0 -0
  53. {langroid-0.1.161 → langroid-0.1.163}/langroid/embedding_models/models.py +0 -0
  54. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/__init__.py +0 -0
  55. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/azure_openai.py +0 -0
  56. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/base.py +0 -0
  57. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/config.py +0 -0
  58. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/openai_assistants.py +0 -0
  59. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  60. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/prompt_formatter/base.py +0 -0
  61. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  62. {langroid-0.1.161 → langroid-0.1.163}/langroid/language_models/utils.py +0 -0
  63. {langroid-0.1.161 → langroid-0.1.163}/langroid/mytypes.py +0 -0
  64. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/__init__.py +0 -0
  65. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/agent_chats.py +0 -0
  66. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/code-parsing.md +0 -0
  67. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/code_parser.py +0 -0
  68. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/config.py +0 -0
  69. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/document_parser.py +0 -0
  70. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/json.py +0 -0
  71. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/para_sentence_split.py +0 -0
  72. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/parser.py +0 -0
  73. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/repo_loader.py +0 -0
  74. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/search.py +0 -0
  75. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/spider.py +0 -0
  76. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/table_loader.py +0 -0
  77. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/url_loader.py +0 -0
  78. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/url_loader_cookies.py +0 -0
  79. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/urls.py +0 -0
  80. {langroid-0.1.161 → langroid-0.1.163}/langroid/parsing/web_search.py +0 -0
  81. {langroid-0.1.161 → langroid-0.1.163}/langroid/prompts/__init__.py +0 -0
  82. {langroid-0.1.161 → langroid-0.1.163}/langroid/prompts/dialog.py +0 -0
  83. {langroid-0.1.161 → langroid-0.1.163}/langroid/prompts/prompts_config.py +0 -0
  84. {langroid-0.1.161 → langroid-0.1.163}/langroid/prompts/templates.py +0 -0
  85. {langroid-0.1.161 → langroid-0.1.163}/langroid/prompts/transforms.py +0 -0
  86. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/__init__.py +0 -0
  87. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/algorithms/__init__.py +0 -0
  88. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/algorithms/graph.py +0 -0
  89. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/configuration.py +0 -0
  90. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/constants.py +0 -0
  91. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/docker.py +0 -0
  92. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/globals.py +0 -0
  93. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/llms/__init__.py +0 -0
  94. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/llms/strings.py +0 -0
  95. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/logging.py +0 -0
  96. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/output/__init__.py +0 -0
  97. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/output/printing.py +0 -0
  98. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/pandas_utils.py +0 -0
  99. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/pydantic_utils.py +0 -0
  100. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/system.py +0 -0
  101. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/web/__init__.py +0 -0
  102. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/web/login.py +0 -0
  103. {langroid-0.1.161 → langroid-0.1.163}/langroid/utils/web/selenium_login.py +0 -0
  104. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/__init__.py +0 -0
  105. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/base.py +0 -0
  106. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/chromadb.py +0 -0
  107. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/meilisearch.py +0 -0
  108. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/momento.py +0 -0
  109. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/qdrant_cloud.py +0 -0
  110. {langroid-0.1.161 → langroid-0.1.163}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.161
3
+ Version: 0.1.163
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -14,6 +14,8 @@ Provides-Extra: hf-embeddings
14
14
  Provides-Extra: litellm
15
15
  Provides-Extra: mysql
16
16
  Provides-Extra: postgres
17
+ Provides-Extra: sciphi
18
+ Requires-Dist: agent-search (>=0.0.7,<0.0.8) ; extra == "sciphi"
17
19
  Requires-Dist: async-generator (>=1.10,<2.0)
18
20
  Requires-Dist: autopep8 (>=2.0.2,<3.0.0)
19
21
  Requires-Dist: black[jupyter] (>=23.3.0,<24.0.0)
@@ -53,7 +55,7 @@ Requires-Dist: pdfplumber (>=0.10.2,<0.11.0)
53
55
  Requires-Dist: pre-commit (>=3.3.2,<4.0.0)
54
56
  Requires-Dist: prettytable (>=3.8.0,<4.0.0)
55
57
  Requires-Dist: psycopg2 (>=2.9.7,<3.0.0) ; extra == "postgres"
56
- Requires-Dist: pydantic (==1.10.11)
58
+ Requires-Dist: pydantic (==1.10.13)
57
59
  Requires-Dist: pygithub (>=1.58.1,<2.0.0)
58
60
  Requires-Dist: pygments (>=2.15.1,<3.0.0)
59
61
  Requires-Dist: pymupdf (>=1.23.3,<2.0.0)
@@ -164,7 +166,7 @@ import langroid.language_models as lm
164
166
  # set up LLM
165
167
  llm_cfg = lm.OpenAIGPTConfig( # or OpenAIAssistant to use Assistant API
166
168
  # any model served via an OpenAI-compatible API
167
- chat_model=lm.OpenAIChatModel.GPT4_TURBO, # or, e.g., "local/ollama/mistral"
169
+ chat_model=lm.OpenAIChatModel.GPT4_TURBO, # or, e.g., "litellm/ollama/mistral"
168
170
  )
169
171
  # use LLM directly
170
172
  mdl = lm.OpenAIGPT(llm_cfg)
@@ -74,7 +74,7 @@ import langroid.language_models as lm
74
74
  # set up LLM
75
75
  llm_cfg = lm.OpenAIGPTConfig( # or OpenAIAssistant to use Assistant API
76
76
  # any model served via an OpenAI-compatible API
77
- chat_model=lm.OpenAIChatModel.GPT4_TURBO, # or, e.g., "local/ollama/mistral"
77
+ chat_model=lm.OpenAIChatModel.GPT4_TURBO, # or, e.g., "litellm/ollama/mistral"
78
78
  )
79
79
  # use LLM directly
80
80
  mdl = lm.OpenAIGPT(llm_cfg)
@@ -107,7 +107,9 @@ class DocChatAgentConfig(ChatAgentConfig):
107
107
  debug: bool = False
108
108
  stream: bool = True # allow streaming where needed
109
109
  relevance_extractor_config: None | RelevanceExtractorAgentConfig = (
110
- RelevanceExtractorAgentConfig()
110
+ RelevanceExtractorAgentConfig(
111
+ llm=None # use the parent's llm unless explicitly set here
112
+ )
111
113
  )
112
114
  doc_paths: List[str] = []
113
115
  default_paths: List[str] = [
@@ -999,7 +1001,10 @@ class DocChatAgent(ChatAgent):
999
1001
  if agent_cfg is None:
1000
1002
  # no relevance extraction: simply return passages
1001
1003
  return passages
1002
-
1004
+ if agent_cfg.llm is None:
1005
+ # Use main DocChatAgent's LLM if not provided explicitly:
1006
+ # this reduces setup burden on the user
1007
+ agent_cfg.llm = self.config.llm
1003
1008
  agent_cfg.query = query
1004
1009
  agent_cfg.segment_length = self.config.extraction_granularity
1005
1010
  agent_cfg.llm.stream = False # disable streaming for concurrent calls
@@ -11,6 +11,7 @@ from rich.console import Console
11
11
  from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
12
12
  from langroid.agent.chat_document import ChatDocument
13
13
  from langroid.agent.tools.segment_extract_tool import SegmentExtractTool
14
+ from langroid.language_models.base import LLMConfig
14
15
  from langroid.language_models.openai_gpt import OpenAIGPTConfig
15
16
  from langroid.mytypes import Entity
16
17
  from langroid.parsing.utils import extract_numbered_segments, number_segments
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class RelevanceExtractorAgentConfig(ChatAgentConfig):
24
- llm: OpenAIGPTConfig = OpenAIGPTConfig()
25
+ llm: LLMConfig | None = OpenAIGPTConfig()
25
26
  segment_length: int = 1 # number of sentences per segment
26
27
  query: str = "" # query for relevance extraction
27
28
  system_message = """
@@ -0,0 +1,76 @@
1
+ """
2
+ A tool which returns a Search RAG response from the SciPhi API.
3
+ their titles, links, summaries. Since the tool is stateless (i.e. does not need
4
+ access to agent state), it can be enabled for any agent, without having to define a
5
+ special method inside the agent: `agent.enable_message(SciPhiSearchRAGTool)`
6
+
7
+ Example return output appears as follows below:
8
+
9
+ <-- Query -->
10
+ ```
11
+ Find 3 results on the internet about the LK-99 superconducting material.
12
+ ``
13
+
14
+ <-- Response (compressed for this example)-->
15
+ ```
16
+ [ result1 ]
17
+
18
+ [ result2 ]
19
+
20
+ [ result3 ]
21
+
22
+ ```
23
+
24
+ NOTE: Using this tool requires getting an API key from sciphi.ai.
25
+ Setup is as simple as shown below
26
+ # Get a free API key at https://www.sciphi.ai/account
27
+ # export SCIPHI_API_KEY=$MY_SCIPHI_API_KEY before running the agent
28
+ # OR add SCIPHI_API_KEY=$MY_SCIPHI_API_KEY to your .env file
29
+
30
+ This tool requires installing langroid with the `sciphi` extra, e.g.
31
+ `pip install langroid[sciphi]` or `poetry add langroid[sciphi]`
32
+ (it installs the `agent-search` package from pypi).
33
+
34
+ For more information, please refer to the official docs:
35
+ https://agent-search.readthedocs.io/en/latest/
36
+ """
37
+
38
+ try:
39
+ from agent_search import SciPhi
40
+ except ImportError:
41
+ raise ImportError(
42
+ "You are attempting to use the `agent-search` library;"
43
+ "To use it, please install langroid with the `sciphi` extra, e.g. "
44
+ "`pip install langroid[sciphi]` or `poetry add langroid[sciphi]` "
45
+ "(it installs the `agent-search` package from pypi)."
46
+ )
47
+
48
+ from langroid.agent.tool_message import ToolMessage
49
+
50
+
51
+ class SciPhiSearchRAGTool(ToolMessage):
52
+ request: str = "web_search_rag"
53
+ purpose: str = """
54
+ To search the web with provider <search_provider> and
55
+ return a response summary with llm model <llm_model> the given <query>.
56
+ """
57
+ query: str
58
+ search_provider: str = "bing" # bing or agent-search
59
+ include_related_queries: bool = True
60
+ llm_model: str = "SciPhi/Sensei-7B-V1"
61
+ recursive_mode: bool = True
62
+
63
+ def handle(self) -> str:
64
+ rag_response = SciPhi().get_search_rag_response(
65
+ query=self.query,
66
+ search_provider=self.search_provider,
67
+ llm_model=self.llm_model,
68
+ )
69
+ result = rag_response["response"]
70
+ if self.include_related_queries:
71
+ result = (
72
+ f"### RAG Response:\n{result}\n\n"
73
+ + "### Related Queries:\n"
74
+ + "\n".join(rag_response["related_queries"])
75
+ )
76
+ return result # type: ignore
@@ -19,6 +19,7 @@ from typing import (
19
19
  no_type_check,
20
20
  )
21
21
 
22
+ import openai
22
23
  from httpx import Timeout
23
24
  from openai import AsyncOpenAI, OpenAI
24
25
  from pydantic import BaseModel
@@ -95,7 +96,19 @@ openAICompletionModelPreferenceList = [
95
96
 
96
97
 
97
98
  if "OPENAI_API_KEY" in os.environ:
98
- availableModels = set(map(lambda m: m.id, OpenAI().models.list()))
99
+ try:
100
+ availableModels = set(map(lambda m: m.id, OpenAI().models.list()))
101
+ except openai.AuthenticationError as e:
102
+ if settings.debug:
103
+ logging.warning(
104
+ f"""
105
+ OpenAI Authentication Error: {e}.
106
+ ---
107
+ If you intended to use an OpenAI Model, you should fix this,
108
+ otherwise you can ignore this warning.
109
+ """
110
+ )
111
+ availableModels = set()
99
112
  else:
100
113
  availableModels = set()
101
114
 
@@ -306,7 +319,8 @@ class OpenAIGPT(LanguageModel):
306
319
  # an explicit `export OPENAI_API_KEY=xxx` or `setenv OPENAI_API_KEY xxx`
307
320
  # Pydantic's BaseSettings will automatically pick it up from the
308
321
  # .env file
309
- self.api_key = config.api_key or "xxx"
322
+ # The config.api_key is ignored when not using an OpenAI model
323
+ self.api_key = config.api_key if self.is_openai_chat_model() else "xxx"
310
324
  self.client = OpenAI(
311
325
  api_key=self.api_key,
312
326
  base_url=self.api_base,
@@ -352,10 +366,6 @@ class OpenAIGPT(LanguageModel):
352
366
  openai_chat_models = [e.value for e in OpenAIChatModel]
353
367
  return self.config.chat_model in openai_chat_models
354
368
 
355
- def _is_openai_completion_model(self) -> bool:
356
- openai_completion_models = [e.value for e in OpenAICompletionModel]
357
- return self.config.completion_model in openai_completion_models
358
-
359
369
  def chat_context_length(self) -> int:
360
370
  """
361
371
  Context-length for chat-completion models/endpoints
@@ -101,14 +101,33 @@ def split_paragraphs(text: str) -> List[str]:
101
101
  return [para.strip() for para in paras if para.strip()]
102
102
 
103
103
 
104
- def number_segments(s: str, len: int = 1) -> str:
104
+ def split_newlines(text: str) -> List[str]:
105
+ """
106
+ Split the input text into lines using "\n" as the delimiter.
107
+
108
+ Args:
109
+ text (str): The input text.
110
+
111
+ Returns:
112
+ list: A list of lines.
113
+ """
114
+ lines = re.split(r"\n", text)
115
+ return [line.strip() for line in lines if line.strip()]
116
+
117
+
118
+ def number_segments(s: str, granularity: int = 1) -> str:
105
119
  """
106
120
  Number the segments in a given text, preserving paragraph structure.
107
- A segment is a sequence of `len` consecutive sentences.
121
+ A segment is a sequence of `len` consecutive "sentences", where a "sentence"
122
+ is either a normal sentence, or if there isn't enough punctuation to properly
123
+ identify sentences, then we use a pseudo-sentence via heuristics (split by newline
124
+ or failing that, just split every 40 words). The goal here is simply to number
125
+ segments at a reasonable granularity so the LLM can identify relevant segments,
126
+ in the RelevanceExtractorAgent.
108
127
 
109
128
  Args:
110
129
  s (str): The input text.
111
- len (int): The number of sentences in a segment.
130
+ granularity (int): The number of sentences in a segment.
112
131
  If this is -1, then the entire text is treated as a single segment,
113
132
  and is numbered as <#1#>.
114
133
 
@@ -119,7 +138,7 @@ def number_segments(s: str, len: int = 1) -> str:
119
138
  >>> number_segments("Hello world! How are you? Have a good day.")
120
139
  '<#1#> Hello world! <#2#> How are you? <#3#> Have a good day.'
121
140
  """
122
- if len < 0:
141
+ if granularity < 0:
123
142
  return "<#1#> " + s
124
143
  numbered_text = []
125
144
  count = 0
@@ -127,9 +146,34 @@ def number_segments(s: str, len: int = 1) -> str:
127
146
  paragraphs = split_paragraphs(s)
128
147
  for paragraph in paragraphs:
129
148
  sentences = nltk.sent_tokenize(paragraph)
149
+ # Some docs are problematic (e.g. resumes) and have no (or too few) periods,
150
+ # so we can't split usefully into sentences.
151
+ # We try a series of heuristics to split into sentences,
152
+ # until the avg num words per sentence is less than 40.
153
+ avg_words_per_sentence = sum(
154
+ len(nltk.word_tokenize(sentence)) for sentence in sentences
155
+ ) / len(sentences)
156
+ if avg_words_per_sentence > 40:
157
+ sentences = split_newlines(paragraph)
158
+ avg_words_per_sentence = sum(
159
+ len(nltk.word_tokenize(sentence)) for sentence in sentences
160
+ ) / len(sentences)
161
+ if avg_words_per_sentence > 40:
162
+ # Still too long, just split on every 40 words
163
+ sentences = []
164
+ for sentence in nltk.sent_tokenize(paragraph):
165
+ words = nltk.word_tokenize(sentence)
166
+ for i in range(0, len(words), 40):
167
+ # if there are less than 20 words left after this,
168
+ # just add them to the last sentence and break
169
+ if len(words) - i < 20:
170
+ sentences.append(" ".join(words[i:]))
171
+ break
172
+ else:
173
+ sentences.append(" ".join(words[i : i + 40]))
130
174
  for i, sentence in enumerate(sentences):
131
- num = count // len + 1
132
- number_prefix = f"<#{num}#>" if count % len == 0 else ""
175
+ num = count // granularity + 1
176
+ number_prefix = f"<#{num}#>" if count % granularity == 0 else ""
133
177
  sentence = f"{number_prefix} {sentence}"
134
178
  count += 1
135
179
  sentences[i] = sentence
@@ -140,7 +184,7 @@ def number_segments(s: str, len: int = 1) -> str:
140
184
 
141
185
 
142
186
  def number_sentences(s: str) -> str:
143
- return number_segments(s, len=1)
187
+ return number_segments(s, granularity=1)
144
188
 
145
189
 
146
190
  def parse_number_range_list(specs: str) -> List[int]:
@@ -210,13 +210,10 @@ class LanceDB(VectorStore):
210
210
  return
211
211
  else:
212
212
  logger.warning("Recreating fresh collection")
213
- tbl = self.client.create_table(
214
- collection_name, schema=self.schema, mode="overwrite"
215
- )
213
+ self.client.create_table(collection_name, schema=self.schema, mode="overwrite")
216
214
  if settings.debug:
217
215
  level = logger.getEffectiveLevel()
218
216
  logger.setLevel(logging.INFO)
219
- logger.info(tbl.schema)
220
217
  logger.setLevel(level)
221
218
 
222
219
  def add_documents(self, documents: Sequence[Document]) -> None:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "langroid"
3
- version = "0.1.161"
3
+ version = "0.1.163"
4
4
  description = "Harness LLMs with Multi-Agent Programming"
5
5
  authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
6
6
  readme = "README.md"
@@ -47,7 +47,7 @@ types-requests = "^2.31.0.1"
47
47
  pyparsing = "^3.0.9"
48
48
  nltk = "^3.8.1"
49
49
  qdrant-client = "^1.7.0"
50
- pydantic = "1.10.11"
50
+ pydantic = "1.10.13"
51
51
  pypdf = "^3.12.2"
52
52
  momento = "^1.10.2"
53
53
  pandas = "^2.0.3"
@@ -81,6 +81,7 @@ scrapy = "^2.11.0"
81
81
  async-generator = "^1.10"
82
82
  lancedb = "^0.4.1"
83
83
  pytest-redis = "^3.0.2"
84
+ agent-search = {version = "^0.0.7", optional = true}
84
85
  python-docx = "^1.1.0"
85
86
 
86
87
  [tool.poetry.extras]
@@ -89,6 +90,7 @@ hf-embeddings = ["sentence-transformers", "torch"]
89
90
  postgres = ["psycopg2", "pytest-postgresql"]
90
91
  mysql = ["pymysql", "pytest-mysql"]
91
92
  litellm = ["litellm"]
93
+ sciphi = ["agent-search"]
92
94
 
93
95
 
94
96
  [tool.poetry.group.dev.dependencies]
File without changes