langroid 0.1.134__tar.gz → 0.1.135__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {langroid-0.1.134 → langroid-0.1.135}/PKG-INFO +1 -1
  2. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/parser.py +31 -29
  3. {langroid-0.1.134 → langroid-0.1.135}/pyproject.toml +1 -1
  4. {langroid-0.1.134 → langroid-0.1.135}/LICENSE +0 -0
  5. {langroid-0.1.134 → langroid-0.1.135}/README.md +0 -0
  6. {langroid-0.1.134 → langroid-0.1.135}/langroid/__init__.py +0 -0
  7. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/__init__.py +0 -0
  8. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/base.py +0 -0
  9. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/batch.py +0 -0
  10. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/chat_agent.py +0 -0
  11. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/chat_document.py +0 -0
  12. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/helpers.py +0 -0
  13. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/junk +0 -0
  14. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/openai_assistant.py +0 -0
  15. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/__init__.py +0 -0
  16. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/doc_chat_agent.py +0 -0
  17. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/recipient_validator_agent.py +0 -0
  18. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  19. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/retriever_agent.py +0 -0
  20. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/__init__.py +0 -0
  21. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  22. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/utils/__init__.py +0 -0
  23. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  24. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  25. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/utils/system_message.py +0 -0
  26. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/sql/utils/tools.py +0 -0
  27. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/special/table_chat_agent.py +0 -0
  28. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/task.py +0 -0
  29. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tool_message.py +0 -0
  30. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/__init__.py +0 -0
  31. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/extract_tool.py +0 -0
  32. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/generator_tool.py +0 -0
  33. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/google_search_tool.py +0 -0
  34. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/recipient_tool.py +0 -0
  35. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/run_python_code.py +0 -0
  36. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent/tools/segment_extract_tool.py +0 -0
  37. {langroid-0.1.134 → langroid-0.1.135}/langroid/agent_config.py +0 -0
  38. {langroid-0.1.134 → langroid-0.1.135}/langroid/cachedb/__init__.py +0 -0
  39. {langroid-0.1.134 → langroid-0.1.135}/langroid/cachedb/base.py +0 -0
  40. {langroid-0.1.134 → langroid-0.1.135}/langroid/cachedb/momento_cachedb.py +0 -0
  41. {langroid-0.1.134 → langroid-0.1.135}/langroid/cachedb/redis_cachedb.py +0 -0
  42. {langroid-0.1.134 → langroid-0.1.135}/langroid/embedding_models/__init__.py +0 -0
  43. {langroid-0.1.134 → langroid-0.1.135}/langroid/embedding_models/base.py +0 -0
  44. {langroid-0.1.134 → langroid-0.1.135}/langroid/embedding_models/clustering.py +0 -0
  45. {langroid-0.1.134 → langroid-0.1.135}/langroid/embedding_models/models.py +0 -0
  46. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/__init__.py +0 -0
  47. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/azure_openai.py +0 -0
  48. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/base.py +0 -0
  49. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/config.py +0 -0
  50. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/openai_assistants.py +0 -0
  51. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/openai_gpt.py +0 -0
  52. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  53. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/prompt_formatter/base.py +0 -0
  54. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  55. {langroid-0.1.134 → langroid-0.1.135}/langroid/language_models/utils.py +0 -0
  56. {langroid-0.1.134 → langroid-0.1.135}/langroid/mytypes.py +0 -0
  57. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/__init__.py +0 -0
  58. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/agent_chats.py +0 -0
  59. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/code-parsing.md +0 -0
  60. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/code_parser.py +0 -0
  61. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/config.py +0 -0
  62. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/document_parser.py +0 -0
  63. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/json.py +0 -0
  64. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/para_sentence_split.py +0 -0
  65. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/repo_loader.py +0 -0
  66. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/search.py +0 -0
  67. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/spider.py +0 -0
  68. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/table_loader.py +0 -0
  69. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/url_loader.py +0 -0
  70. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/url_loader_cookies.py +0 -0
  71. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/urls.py +0 -0
  72. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/utils.py +0 -0
  73. {langroid-0.1.134 → langroid-0.1.135}/langroid/parsing/web_search.py +0 -0
  74. {langroid-0.1.134 → langroid-0.1.135}/langroid/prompts/__init__.py +0 -0
  75. {langroid-0.1.134 → langroid-0.1.135}/langroid/prompts/dialog.py +0 -0
  76. {langroid-0.1.134 → langroid-0.1.135}/langroid/prompts/prompts_config.py +0 -0
  77. {langroid-0.1.134 → langroid-0.1.135}/langroid/prompts/templates.py +0 -0
  78. {langroid-0.1.134 → langroid-0.1.135}/langroid/prompts/transforms.py +0 -0
  79. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/__init__.py +0 -0
  80. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/algorithms/__init__.py +0 -0
  81. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/algorithms/graph.py +0 -0
  82. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/configuration.py +0 -0
  83. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/constants.py +0 -0
  84. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/docker.py +0 -0
  85. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/globals.py +0 -0
  86. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/llms/__init__.py +0 -0
  87. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/llms/strings.py +0 -0
  88. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/logging.py +0 -0
  89. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/output/__init__.py +0 -0
  90. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/output/printing.py +0 -0
  91. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/pydantic_utils.py +0 -0
  92. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/system.py +0 -0
  93. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/web/__init__.py +0 -0
  94. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/web/login.py +0 -0
  95. {langroid-0.1.134 → langroid-0.1.135}/langroid/utils/web/selenium_login.py +0 -0
  96. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/__init__.py +0 -0
  97. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/base.py +0 -0
  98. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/chromadb.py +0 -0
  99. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/lancedb.py +0 -0
  100. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/meilisearch.py +0 -0
  101. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/momento.py +0 -0
  102. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/qdrant_cloud.py +0 -0
  103. {langroid-0.1.134 → langroid-0.1.135}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.134
3
+ Version: 0.1.135
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -52,13 +52,14 @@ class Parser:
52
52
  return len(tokens)
53
53
 
54
54
  def add_window_ids(self, chunks: List[Document]) -> None:
55
- """Chunks are consecutive parts of a single original document.
56
- Add window_ids in metadata"""
55
+ """Chunks may belong to multiple docs, but for each doc,
56
+ they appear consecutively. Add window_ids in metadata"""
57
57
 
58
58
  # The original metadata.id (if any) is ignored since it will be same for all
59
59
  # chunks and is useless. We want a distinct id for each chunk.
60
60
  orig_ids = [c.metadata.id for c in chunks]
61
61
  ids = [Document.hash_id(str(c)) for c in chunks]
62
+ id2chunk = {id: c for id, c in zip(ids, chunks)}
62
63
 
63
64
  # group the ids by orig_id
64
65
  orig_id_to_ids: Dict[str, List[str]] = {}
@@ -71,9 +72,11 @@ class Parser:
71
72
 
72
73
  k = self.config.n_neighbor_ids
73
74
  for orig, ids in orig_id_to_ids.items():
75
+ # ids are consecutive chunks in a single doc
74
76
  n = len(ids)
75
77
  window_ids = [ids[max(0, i - k) : min(n, i + k + 1)] for i in range(n)]
76
- for i, c in enumerate(chunks):
78
+ for i, _ in enumerate(ids):
79
+ c = id2chunk[ids[i]]
77
80
  if c.content.strip() == "":
78
81
  continue
79
82
  c.metadata.window_ids = window_ids[i]
@@ -101,35 +104,35 @@ class Parser:
101
104
  return final_docs
102
105
 
103
106
  def split_para_sentence(self, docs: List[Document]) -> List[Document]:
104
- final_chunks = []
105
107
  chunks = docs
106
108
  while True:
107
- long_chunks = [
108
- p
109
- for p in chunks
110
- if self.num_tokens(p.content) > 1.3 * self.config.chunk_size
111
- ]
112
- if len(long_chunks) == 0:
113
- break
114
- short_chunks = [
115
- p
116
- for p in chunks
117
- if self.num_tokens(p.content) <= 1.3 * self.config.chunk_size
118
- ]
119
- final_chunks += short_chunks
120
- chunks = self._split_para_sentence_once(long_chunks)
121
- if len(chunks) == len(long_chunks):
122
- max_len = max([self.num_tokens(p.content) for p in long_chunks])
123
- logger.warning(
124
- f"""
125
- Unable to split {len(long_chunks)} long chunks
126
- using chunk_size = {self.config.chunk_size}.
127
- Max chunk size is {max_len} tokens.
128
- """
129
- )
109
+ un_splittables = 0
110
+ split_chunks = []
111
+ for c in chunks:
112
+ if c.content.strip() == "":
113
+ continue
114
+ if self.num_tokens(c.content) <= 1.3 * self.config.chunk_size:
115
+ # small chunk: no need to split
116
+ split_chunks.append(c)
117
+ continue
118
+ splits = self._split_para_sentence_once([c])
119
+ un_splittables += len(splits) == 1
120
+ split_chunks += splits
121
+ if len(split_chunks) == len(chunks):
122
+ if un_splittables > 0:
123
+ max_len = max([self.num_tokens(p.content) for p in chunks])
124
+ logger.warning(
125
+ f"""
126
+ Unable to split {un_splittables} chunks
127
+ using chunk_size = {self.config.chunk_size}.
128
+ Max chunk size is {max_len} tokens.
129
+ """
130
+ )
130
131
  break # we won't be able to shorten them with current settings
132
+ chunks = split_chunks.copy()
131
133
 
132
- return final_chunks + chunks
134
+ self.add_window_ids(chunks)
135
+ return chunks
133
136
 
134
137
  def _split_para_sentence_once(self, docs: List[Document]) -> List[Document]:
135
138
  final_chunks = []
@@ -144,7 +147,6 @@ class Parser:
144
147
  for c in chunks
145
148
  if c.strip() != ""
146
149
  ]
147
- self.add_window_ids(chunk_docs)
148
150
  final_chunks += chunk_docs
149
151
 
150
152
  return final_chunks
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "langroid"
3
- version = "0.1.134"
3
+ version = "0.1.135"
4
4
  description = "Harness LLMs with Multi-Agent Programming"
5
5
  authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
6
6
  readme = "README.md"
File without changes
File without changes