langroid 0.32.2__py3-none-any.whl → 0.33.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {langroid-0.32.2.dist-info → langroid-0.33.4.dist-info}/METADATA +149 -123
  2. langroid-0.33.4.dist-info/RECORD +7 -0
  3. {langroid-0.32.2.dist-info → langroid-0.33.4.dist-info}/WHEEL +1 -1
  4. langroid-0.33.4.dist-info/entry_points.txt +4 -0
  5. pyproject.toml +317 -212
  6. langroid/__init__.py +0 -106
  7. langroid/agent/__init__.py +0 -41
  8. langroid/agent/base.py +0 -1983
  9. langroid/agent/batch.py +0 -398
  10. langroid/agent/callbacks/__init__.py +0 -0
  11. langroid/agent/callbacks/chainlit.py +0 -598
  12. langroid/agent/chat_agent.py +0 -1899
  13. langroid/agent/chat_document.py +0 -454
  14. langroid/agent/openai_assistant.py +0 -882
  15. langroid/agent/special/__init__.py +0 -59
  16. langroid/agent/special/arangodb/__init__.py +0 -0
  17. langroid/agent/special/arangodb/arangodb_agent.py +0 -656
  18. langroid/agent/special/arangodb/system_messages.py +0 -186
  19. langroid/agent/special/arangodb/tools.py +0 -107
  20. langroid/agent/special/arangodb/utils.py +0 -36
  21. langroid/agent/special/doc_chat_agent.py +0 -1466
  22. langroid/agent/special/lance_doc_chat_agent.py +0 -262
  23. langroid/agent/special/lance_rag/__init__.py +0 -9
  24. langroid/agent/special/lance_rag/critic_agent.py +0 -198
  25. langroid/agent/special/lance_rag/lance_rag_task.py +0 -82
  26. langroid/agent/special/lance_rag/query_planner_agent.py +0 -260
  27. langroid/agent/special/lance_tools.py +0 -61
  28. langroid/agent/special/neo4j/__init__.py +0 -0
  29. langroid/agent/special/neo4j/csv_kg_chat.py +0 -174
  30. langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -433
  31. langroid/agent/special/neo4j/system_messages.py +0 -120
  32. langroid/agent/special/neo4j/tools.py +0 -32
  33. langroid/agent/special/relevance_extractor_agent.py +0 -127
  34. langroid/agent/special/retriever_agent.py +0 -56
  35. langroid/agent/special/sql/__init__.py +0 -17
  36. langroid/agent/special/sql/sql_chat_agent.py +0 -654
  37. langroid/agent/special/sql/utils/__init__.py +0 -21
  38. langroid/agent/special/sql/utils/description_extractors.py +0 -190
  39. langroid/agent/special/sql/utils/populate_metadata.py +0 -85
  40. langroid/agent/special/sql/utils/system_message.py +0 -35
  41. langroid/agent/special/sql/utils/tools.py +0 -64
  42. langroid/agent/special/table_chat_agent.py +0 -263
  43. langroid/agent/task.py +0 -2095
  44. langroid/agent/tool_message.py +0 -393
  45. langroid/agent/tools/__init__.py +0 -38
  46. langroid/agent/tools/duckduckgo_search_tool.py +0 -50
  47. langroid/agent/tools/file_tools.py +0 -234
  48. langroid/agent/tools/google_search_tool.py +0 -39
  49. langroid/agent/tools/metaphor_search_tool.py +0 -67
  50. langroid/agent/tools/orchestration.py +0 -303
  51. langroid/agent/tools/recipient_tool.py +0 -235
  52. langroid/agent/tools/retrieval_tool.py +0 -32
  53. langroid/agent/tools/rewind_tool.py +0 -137
  54. langroid/agent/tools/segment_extract_tool.py +0 -41
  55. langroid/agent/xml_tool_message.py +0 -382
  56. langroid/cachedb/__init__.py +0 -17
  57. langroid/cachedb/base.py +0 -58
  58. langroid/cachedb/momento_cachedb.py +0 -108
  59. langroid/cachedb/redis_cachedb.py +0 -153
  60. langroid/embedding_models/__init__.py +0 -39
  61. langroid/embedding_models/base.py +0 -74
  62. langroid/embedding_models/models.py +0 -461
  63. langroid/embedding_models/protoc/__init__.py +0 -0
  64. langroid/embedding_models/protoc/embeddings.proto +0 -19
  65. langroid/embedding_models/protoc/embeddings_pb2.py +0 -33
  66. langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -50
  67. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -79
  68. langroid/embedding_models/remote_embeds.py +0 -153
  69. langroid/exceptions.py +0 -65
  70. langroid/language_models/__init__.py +0 -53
  71. langroid/language_models/azure_openai.py +0 -153
  72. langroid/language_models/base.py +0 -678
  73. langroid/language_models/config.py +0 -18
  74. langroid/language_models/mock_lm.py +0 -124
  75. langroid/language_models/openai_gpt.py +0 -1964
  76. langroid/language_models/prompt_formatter/__init__.py +0 -16
  77. langroid/language_models/prompt_formatter/base.py +0 -40
  78. langroid/language_models/prompt_formatter/hf_formatter.py +0 -132
  79. langroid/language_models/prompt_formatter/llama2_formatter.py +0 -75
  80. langroid/language_models/utils.py +0 -151
  81. langroid/mytypes.py +0 -84
  82. langroid/parsing/__init__.py +0 -52
  83. langroid/parsing/agent_chats.py +0 -38
  84. langroid/parsing/code_parser.py +0 -121
  85. langroid/parsing/document_parser.py +0 -718
  86. langroid/parsing/para_sentence_split.py +0 -62
  87. langroid/parsing/parse_json.py +0 -155
  88. langroid/parsing/parser.py +0 -313
  89. langroid/parsing/repo_loader.py +0 -790
  90. langroid/parsing/routing.py +0 -36
  91. langroid/parsing/search.py +0 -275
  92. langroid/parsing/spider.py +0 -102
  93. langroid/parsing/table_loader.py +0 -94
  94. langroid/parsing/url_loader.py +0 -111
  95. langroid/parsing/urls.py +0 -273
  96. langroid/parsing/utils.py +0 -373
  97. langroid/parsing/web_search.py +0 -155
  98. langroid/prompts/__init__.py +0 -9
  99. langroid/prompts/dialog.py +0 -17
  100. langroid/prompts/prompts_config.py +0 -5
  101. langroid/prompts/templates.py +0 -141
  102. langroid/pydantic_v1/__init__.py +0 -10
  103. langroid/pydantic_v1/main.py +0 -4
  104. langroid/utils/__init__.py +0 -19
  105. langroid/utils/algorithms/__init__.py +0 -3
  106. langroid/utils/algorithms/graph.py +0 -103
  107. langroid/utils/configuration.py +0 -98
  108. langroid/utils/constants.py +0 -30
  109. langroid/utils/git_utils.py +0 -252
  110. langroid/utils/globals.py +0 -49
  111. langroid/utils/logging.py +0 -135
  112. langroid/utils/object_registry.py +0 -66
  113. langroid/utils/output/__init__.py +0 -20
  114. langroid/utils/output/citations.py +0 -41
  115. langroid/utils/output/printing.py +0 -99
  116. langroid/utils/output/status.py +0 -40
  117. langroid/utils/pandas_utils.py +0 -30
  118. langroid/utils/pydantic_utils.py +0 -602
  119. langroid/utils/system.py +0 -286
  120. langroid/utils/types.py +0 -93
  121. langroid/vector_store/__init__.py +0 -50
  122. langroid/vector_store/base.py +0 -357
  123. langroid/vector_store/chromadb.py +0 -214
  124. langroid/vector_store/lancedb.py +0 -401
  125. langroid/vector_store/meilisearch.py +0 -299
  126. langroid/vector_store/momento.py +0 -278
  127. langroid/vector_store/qdrantdb.py +0 -468
  128. langroid-0.32.2.dist-info/RECORD +0 -128
  129. {langroid-0.32.2.dist-info → langroid-0.33.4.dist-info/licenses}/LICENSE +0 -0
@@ -1,121 +0,0 @@
1
- from functools import reduce
2
- from typing import Callable, List
3
-
4
- import tiktoken
5
- from pygments import lex
6
- from pygments.lexers import get_lexer_by_name
7
- from pygments.token import Token
8
-
9
- from langroid.mytypes import Document
10
- from langroid.pydantic_v1 import BaseSettings
11
-
12
-
13
- def chunk_code(
14
- code: str, language: str, max_tokens: int, len_fn: Callable[[str], int]
15
- ) -> List[str]:
16
- """
17
- Chunk code into smaller pieces, so that we don't exceed the maximum
18
- number of tokens allowed by the embedding model.
19
- Args:
20
- code: string of code
21
- language: str as a file extension, e.g. "py", "yml"
22
- max_tokens: max tokens per chunk
23
- len_fn: function to get the length of a string in token units
24
- Returns:
25
-
26
- """
27
- lexer = get_lexer_by_name(language)
28
- tokens = list(lex(code, lexer))
29
-
30
- chunks = []
31
- current_chunk = ""
32
- for token_type, token_value in tokens:
33
- if token_type in Token.Text.Whitespace:
34
- current_chunk += token_value
35
- else:
36
- token_tokens = len_fn(token_value)
37
- if len_fn(current_chunk) + token_tokens <= max_tokens:
38
- current_chunk += token_value
39
- else:
40
- chunks.append(current_chunk)
41
- current_chunk = token_value
42
-
43
- if current_chunk:
44
- chunks.append(current_chunk)
45
-
46
- return chunks
47
-
48
-
49
- class CodeParsingConfig(BaseSettings):
50
- extensions: List[str] = [
51
- "py",
52
- "java",
53
- "c",
54
- "cpp",
55
- "h",
56
- "hpp",
57
- "yml",
58
- "yaml",
59
- "toml",
60
- "cfg", # e.g. setup.cfg
61
- "ini",
62
- "json",
63
- "rst",
64
- "sh",
65
- "bash",
66
- ]
67
- chunk_size: int = 500 # tokens
68
- token_encoding_model: str = "text-embedding-ada-002"
69
- n_similar_docs: int = 4
70
-
71
-
72
- class CodeParser:
73
- def __init__(self, config: CodeParsingConfig):
74
- self.config = config
75
- self.tokenizer = tiktoken.encoding_for_model(config.token_encoding_model)
76
-
77
- def num_tokens(self, text: str) -> int:
78
- """
79
- How many tokens are in the text, according to the tokenizer.
80
- This needs to be accurate, otherwise we may exceed the maximum
81
- number of tokens allowed by the model.
82
- Args:
83
- text: string to tokenize
84
- Returns:
85
- number of tokens in the text
86
- """
87
- tokens = self.tokenizer.encode(text)
88
- return len(tokens)
89
-
90
- def split(self, docs: List[Document]) -> List[Document]:
91
- """
92
- Split the documents into chunks, according to the config.splitter.
93
- Only the documents with a language in the config.extensions are split.
94
- !!! note
95
- We assume the metadata in each document has at least a `language` field,
96
- which is used to determine how to chunk the code.
97
- Args:
98
- docs: list of documents to split
99
- Returns:
100
- list of documents, where each document is a chunk; the metadata of the
101
- original document is duplicated for each chunk, so that when we retrieve a
102
- chunk, we immediately know info about the original document.
103
- """
104
- chunked_docs = [
105
- [
106
- Document(content=chunk, metadata=d.metadata)
107
- for chunk in chunk_code(
108
- d.content,
109
- d.metadata.language, # type: ignore
110
- self.config.chunk_size,
111
- self.num_tokens,
112
- )
113
- if chunk.strip() != ""
114
- ]
115
- for d in docs
116
- if d.metadata.language in self.config.extensions # type: ignore
117
- ]
118
- if len(chunked_docs) == 0:
119
- return []
120
- # collapse the list of lists into a single list
121
- return reduce(lambda x, y: x + y, chunked_docs)