langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. langroid/__init__.py +95 -0
  2. langroid/agent/__init__.py +40 -0
  3. langroid/agent/base.py +222 -91
  4. langroid/agent/batch.py +264 -0
  5. langroid/agent/callbacks/chainlit.py +608 -0
  6. langroid/agent/chat_agent.py +247 -101
  7. langroid/agent/chat_document.py +41 -4
  8. langroid/agent/openai_assistant.py +842 -0
  9. langroid/agent/special/__init__.py +50 -0
  10. langroid/agent/special/doc_chat_agent.py +837 -141
  11. langroid/agent/special/lance_doc_chat_agent.py +258 -0
  12. langroid/agent/special/lance_rag/__init__.py +9 -0
  13. langroid/agent/special/lance_rag/critic_agent.py +136 -0
  14. langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
  15. langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
  16. langroid/agent/special/lance_tools.py +44 -0
  17. langroid/agent/special/neo4j/__init__.py +0 -0
  18. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
  20. langroid/agent/special/neo4j/utils/__init__.py +0 -0
  21. langroid/agent/special/neo4j/utils/system_message.py +46 -0
  22. langroid/agent/special/relevance_extractor_agent.py +127 -0
  23. langroid/agent/special/retriever_agent.py +32 -198
  24. langroid/agent/special/sql/__init__.py +11 -0
  25. langroid/agent/special/sql/sql_chat_agent.py +47 -23
  26. langroid/agent/special/sql/utils/__init__.py +22 -0
  27. langroid/agent/special/sql/utils/description_extractors.py +95 -46
  28. langroid/agent/special/sql/utils/populate_metadata.py +28 -21
  29. langroid/agent/special/table_chat_agent.py +43 -9
  30. langroid/agent/task.py +475 -122
  31. langroid/agent/tool_message.py +75 -13
  32. langroid/agent/tools/__init__.py +13 -0
  33. langroid/agent/tools/duckduckgo_search_tool.py +66 -0
  34. langroid/agent/tools/google_search_tool.py +11 -0
  35. langroid/agent/tools/metaphor_search_tool.py +67 -0
  36. langroid/agent/tools/recipient_tool.py +16 -29
  37. langroid/agent/tools/run_python_code.py +60 -0
  38. langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
  39. langroid/agent/tools/segment_extract_tool.py +36 -0
  40. langroid/cachedb/__init__.py +9 -0
  41. langroid/cachedb/base.py +22 -2
  42. langroid/cachedb/momento_cachedb.py +26 -2
  43. langroid/cachedb/redis_cachedb.py +78 -11
  44. langroid/embedding_models/__init__.py +34 -0
  45. langroid/embedding_models/base.py +21 -2
  46. langroid/embedding_models/models.py +120 -18
  47. langroid/embedding_models/protoc/embeddings.proto +19 -0
  48. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  49. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  50. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  51. langroid/embedding_models/remote_embeds.py +153 -0
  52. langroid/language_models/__init__.py +45 -0
  53. langroid/language_models/azure_openai.py +80 -27
  54. langroid/language_models/base.py +117 -12
  55. langroid/language_models/config.py +5 -0
  56. langroid/language_models/openai_assistants.py +3 -0
  57. langroid/language_models/openai_gpt.py +558 -174
  58. langroid/language_models/prompt_formatter/__init__.py +15 -0
  59. langroid/language_models/prompt_formatter/base.py +4 -6
  60. langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
  61. langroid/language_models/utils.py +18 -21
  62. langroid/mytypes.py +25 -8
  63. langroid/parsing/__init__.py +46 -0
  64. langroid/parsing/document_parser.py +260 -63
  65. langroid/parsing/image_text.py +32 -0
  66. langroid/parsing/parse_json.py +143 -0
  67. langroid/parsing/parser.py +122 -59
  68. langroid/parsing/repo_loader.py +114 -52
  69. langroid/parsing/search.py +68 -63
  70. langroid/parsing/spider.py +3 -2
  71. langroid/parsing/table_loader.py +44 -0
  72. langroid/parsing/url_loader.py +59 -11
  73. langroid/parsing/urls.py +85 -37
  74. langroid/parsing/utils.py +298 -4
  75. langroid/parsing/web_search.py +73 -0
  76. langroid/prompts/__init__.py +11 -0
  77. langroid/prompts/chat-gpt4-system-prompt.md +68 -0
  78. langroid/prompts/prompts_config.py +1 -1
  79. langroid/utils/__init__.py +17 -0
  80. langroid/utils/algorithms/__init__.py +3 -0
  81. langroid/utils/algorithms/graph.py +103 -0
  82. langroid/utils/configuration.py +36 -5
  83. langroid/utils/constants.py +4 -0
  84. langroid/utils/globals.py +2 -2
  85. langroid/utils/logging.py +2 -5
  86. langroid/utils/output/__init__.py +21 -0
  87. langroid/utils/output/printing.py +47 -1
  88. langroid/utils/output/status.py +33 -0
  89. langroid/utils/pandas_utils.py +30 -0
  90. langroid/utils/pydantic_utils.py +616 -2
  91. langroid/utils/system.py +98 -0
  92. langroid/vector_store/__init__.py +40 -0
  93. langroid/vector_store/base.py +203 -6
  94. langroid/vector_store/chromadb.py +59 -32
  95. langroid/vector_store/lancedb.py +463 -0
  96. langroid/vector_store/meilisearch.py +10 -7
  97. langroid/vector_store/momento.py +262 -0
  98. langroid/vector_store/qdrantdb.py +104 -22
  99. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
  100. langroid-0.1.219.dist-info/RECORD +127 -0
  101. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
  102. langroid/agent/special/recipient_validator_agent.py +0 -157
  103. langroid/parsing/json.py +0 -64
  104. langroid/utils/web/selenium_login.py +0 -36
  105. langroid-0.1.85.dist-info/RECORD +0 -94
  106. /langroid/{scripts → agent/callbacks}/__init__.py +0 -0
  107. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
langroid/parsing/utils.py CHANGED
@@ -1,19 +1,40 @@
1
1
  import difflib
2
+ import logging
2
3
  import random
4
+ import re
5
+ from functools import cache
3
6
  from itertools import islice
4
- from typing import Any, Iterable, List
7
+ from typing import Iterable, List, Sequence, TypeVar
5
8
 
6
9
  import nltk
7
10
  from faker import Faker
8
11
 
9
- nltk.download("punkt", quiet=True)
10
- nltk.download("gutenberg", quiet=True)
12
+ from langroid.mytypes import Document
13
+ from langroid.parsing.document_parser import DocumentType
14
+ from langroid.parsing.parser import Parser, ParsingConfig
15
+ from langroid.parsing.repo_loader import RepoLoader
16
+ from langroid.parsing.url_loader import URLLoader
17
+ from langroid.parsing.urls import get_urls_paths_bytes_indices
11
18
 
12
19
  Faker.seed(23)
13
20
  random.seed(43)
14
21
 
22
+ logger = logging.getLogger(__name__)
15
23
 
16
- def batched(iterable: Iterable[Any], n: int) -> Iterable[Any]:
24
+
25
+ # Ensures the NLTK resource is available
26
+ @cache
27
+ def download_nltk_resource(resource: str) -> None:
28
+ try:
29
+ nltk.data.find(resource)
30
+ except LookupError:
31
+ nltk.download(resource, quiet=True)
32
+
33
+
34
+ T = TypeVar("T")
35
+
36
+
37
+ def batched(iterable: Iterable[T], n: int) -> Iterable[Sequence[T]]:
17
38
  """Batch data into tuples of length n. The last batch may be shorter."""
18
39
  # batched('ABCDEFG', 3) --> ABC DEF G
19
40
  if n < 1:
@@ -25,6 +46,8 @@ def batched(iterable: Iterable[Any], n: int) -> Iterable[Any]:
25
46
 
26
47
  def generate_random_sentences(k: int) -> str:
27
48
  # Load the sample text
49
+ download_nltk_resource("gutenberg")
50
+
28
51
  from nltk.corpus import gutenberg
29
52
 
30
53
  text = gutenberg.raw("austen-emma.txt")
@@ -74,3 +97,274 @@ def closest_string(query: str, string_list: List[str]) -> str:
74
97
  )
75
98
 
76
99
  return original_closest_match
100
+
101
+
102
+ def split_paragraphs(text: str) -> List[str]:
103
+ """
104
+ Split the input text into paragraphs using "\n\n" as the delimiter.
105
+
106
+ Args:
107
+ text (str): The input text.
108
+
109
+ Returns:
110
+ list: A list of paragraphs.
111
+ """
112
+ # Split based on a newline, followed by spaces/tabs, then another newline.
113
+ paras = re.split(r"\n[ \t]*\n", text)
114
+ return [para.strip() for para in paras if para.strip()]
115
+
116
+
117
+ def split_newlines(text: str) -> List[str]:
118
+ """
119
+ Split the input text into lines using "\n" as the delimiter.
120
+
121
+ Args:
122
+ text (str): The input text.
123
+
124
+ Returns:
125
+ list: A list of lines.
126
+ """
127
+ lines = re.split(r"\n", text)
128
+ return [line.strip() for line in lines if line.strip()]
129
+
130
+
131
+ def number_segments(s: str, granularity: int = 1) -> str:
132
+ """
133
+ Number the segments in a given text, preserving paragraph structure.
134
+ A segment is a sequence of `len` consecutive "sentences", where a "sentence"
135
+ is either a normal sentence, or if there isn't enough punctuation to properly
136
+ identify sentences, then we use a pseudo-sentence via heuristics (split by newline
137
+ or failing that, just split every 40 words). The goal here is simply to number
138
+ segments at a reasonable granularity so the LLM can identify relevant segments,
139
+ in the RelevanceExtractorAgent.
140
+
141
+ Args:
142
+ s (str): The input text.
143
+ granularity (int): The number of sentences in a segment.
144
+ If this is -1, then the entire text is treated as a single segment,
145
+ and is numbered as <#1#>.
146
+
147
+ Returns:
148
+ str: The text with segments numbered in the style <#1#>, <#2#> etc.
149
+
150
+ Example:
151
+ >>> number_segments("Hello world! How are you? Have a good day.")
152
+ '<#1#> Hello world! <#2#> How are you? <#3#> Have a good day.'
153
+ """
154
+ if granularity < 0:
155
+ return "<#1#> " + s
156
+ numbered_text = []
157
+ count = 0
158
+
159
+ paragraphs = split_paragraphs(s)
160
+ for paragraph in paragraphs:
161
+ sentences = nltk.sent_tokenize(paragraph)
162
+ # Some docs are problematic (e.g. resumes) and have no (or too few) periods,
163
+ # so we can't split usefully into sentences.
164
+ # We try a series of heuristics to split into sentences,
165
+ # until the avg num words per sentence is less than 40.
166
+ avg_words_per_sentence = sum(
167
+ len(nltk.word_tokenize(sentence)) for sentence in sentences
168
+ ) / len(sentences)
169
+ if avg_words_per_sentence > 40:
170
+ sentences = split_newlines(paragraph)
171
+ avg_words_per_sentence = sum(
172
+ len(nltk.word_tokenize(sentence)) for sentence in sentences
173
+ ) / len(sentences)
174
+ if avg_words_per_sentence > 40:
175
+ # Still too long, just split on every 40 words
176
+ sentences = []
177
+ for sentence in nltk.sent_tokenize(paragraph):
178
+ words = nltk.word_tokenize(sentence)
179
+ for i in range(0, len(words), 40):
180
+ # if there are less than 20 words left after this,
181
+ # just add them to the last sentence and break
182
+ if len(words) - i < 20:
183
+ sentences.append(" ".join(words[i:]))
184
+ break
185
+ else:
186
+ sentences.append(" ".join(words[i : i + 40]))
187
+ for i, sentence in enumerate(sentences):
188
+ num = count // granularity + 1
189
+ number_prefix = f"<#{num}#>" if count % granularity == 0 else ""
190
+ sentence = f"{number_prefix} {sentence}"
191
+ count += 1
192
+ sentences[i] = sentence
193
+ numbered_paragraph = " ".join(sentences)
194
+ numbered_text.append(numbered_paragraph)
195
+
196
+ return " \n\n ".join(numbered_text)
197
+
198
+
199
+ def number_sentences(s: str) -> str:
200
+ return number_segments(s, granularity=1)
201
+
202
+
203
+ def parse_number_range_list(specs: str) -> List[int]:
204
+ """
205
+ Parse a specs string like "3,5,7-10" into a list of integers.
206
+
207
+ Args:
208
+ specs (str): A string containing segment numbers and/or ranges
209
+ (e.g., "3,5,7-10").
210
+
211
+ Returns:
212
+ List[int]: List of segment numbers.
213
+
214
+ Example:
215
+ >>> parse_number_range_list("3,5,7-10")
216
+ [3, 5, 7, 8, 9, 10]
217
+ """
218
+ spec_indices = set() # type: ignore
219
+ for part in specs.split(","):
220
+ # some weak LLMs may generate <#1#> instead of 1, so extract just the digits
221
+ # or the "-"
222
+ part = "".join(char for char in part if char.isdigit() or char == "-")
223
+ if "-" in part:
224
+ start, end = map(int, part.split("-"))
225
+ spec_indices.update(range(start, end + 1))
226
+ else:
227
+ spec_indices.add(int(part))
228
+
229
+ return sorted(list(spec_indices))
230
+
231
+
232
+ def strip_k(s: str, k: int = 2) -> str:
233
+ """
234
+ Strip any leading and trailing whitespaces from the input text beyond length k.
235
+ This is useful for removing leading/trailing whitespaces from a text while
236
+ preserving paragraph structure.
237
+
238
+ Args:
239
+ s (str): The input text.
240
+ k (int): The number of leading and trailing whitespaces to retain.
241
+
242
+ Returns:
243
+ str: The text with leading and trailing whitespaces removed beyond length k.
244
+ """
245
+
246
+ # Count leading and trailing whitespaces
247
+ leading_count = len(s) - len(s.lstrip())
248
+ trailing_count = len(s) - len(s.rstrip())
249
+
250
+ # Determine how many whitespaces to retain
251
+ leading_keep = min(leading_count, k)
252
+ trailing_keep = min(trailing_count, k)
253
+
254
+ # Use slicing to get the desired output
255
+ return s[leading_count - leading_keep : len(s) - (trailing_count - trailing_keep)]
256
+
257
+
258
+ def clean_whitespace(text: str) -> str:
259
+ """Remove extra whitespace from the input text, while preserving
260
+ paragraph structure.
261
+ """
262
+ paragraphs = split_paragraphs(text)
263
+ cleaned_paragraphs = [" ".join(p.split()) for p in paragraphs if p]
264
+ return "\n\n".join(cleaned_paragraphs) # Join the cleaned paragraphs.
265
+
266
+
267
+ def extract_numbered_segments(s: str, specs: str) -> str:
268
+ """
269
+ Extract specified segments from a numbered text, preserving paragraph structure.
270
+
271
+ Args:
272
+ s (str): The input text containing numbered segments.
273
+ specs (str): A string containing segment numbers and/or ranges
274
+ (e.g., "3,5,7-10").
275
+
276
+ Returns:
277
+ str: Extracted segments, keeping original paragraph structures.
278
+
279
+ Example:
280
+ >>> text = "(1) Hello world! (2) How are you? (3) Have a good day."
281
+ >>> extract_numbered_segments(text, "1,3")
282
+ 'Hello world! Have a good day.'
283
+ """
284
+ # Use the helper function to get the list of indices from specs
285
+ if specs.strip() == "":
286
+ return ""
287
+ spec_indices = parse_number_range_list(specs)
288
+
289
+ # Regular expression to identify numbered segments like
290
+ # <#1#> Hello world! This is me. <#2#> How are you? <#3#> Have a good day.
291
+ # Note we match any character between segment markers, including newlines.
292
+ segment_pattern = re.compile(r"<#(\d+)#>([\s\S]*?)(?=<#\d+#>|$)")
293
+
294
+ # Split the text into paragraphs while preserving their boundaries
295
+ paragraphs = split_paragraphs(s)
296
+
297
+ extracted_paragraphs = []
298
+
299
+ for paragraph in paragraphs:
300
+ segments_with_numbers = segment_pattern.findall(paragraph)
301
+
302
+ # Extract the desired segments from this paragraph
303
+ extracted_segments = [
304
+ segment
305
+ for num, segment in segments_with_numbers
306
+ if int(num) in spec_indices
307
+ ]
308
+
309
+ # If we extracted any segments from this paragraph,
310
+ # join them and append to results
311
+ if extracted_segments:
312
+ extracted_paragraphs.append(" ".join(extracted_segments))
313
+
314
+ return "\n\n".join(extracted_paragraphs)
315
+
316
+
317
+ def extract_content_from_path(
318
+ path: bytes | str | List[bytes | str],
319
+ parsing: ParsingConfig,
320
+ doc_type: str | DocumentType | None = None,
321
+ ) -> str | List[str]:
322
+ """
323
+ Extract the content from a file path or URL, or a list of file paths or URLs.
324
+
325
+ Args:
326
+ path (bytes | str | List[str]): The file path or URL, or a list of file paths or
327
+ URLs, or bytes content. The bytes option is meant to support cases
328
+ where upstream code may have already loaded the content (e.g., from a
329
+ database or API) and we want to avoid having to copy the content to a
330
+ temporary file.
331
+ parsing (ParsingConfig): The parsing configuration.
332
+ doc_type (str | DocumentType | None): The document type if known.
333
+ If multiple paths are given, this MUST apply to ALL docs.
334
+
335
+ Returns:
336
+ str | List[str]: The extracted content if a single file path or URL is provided,
337
+ or a list of extracted contents if a
338
+ list of file paths or URLs is provided.
339
+ """
340
+ if isinstance(path, str) or isinstance(path, bytes):
341
+ paths = [path]
342
+ elif isinstance(path, list) and len(path) == 0:
343
+ return ""
344
+ else:
345
+ paths = path
346
+
347
+ url_idxs, path_idxs, byte_idxs = get_urls_paths_bytes_indices(paths)
348
+ urls = [paths[i] for i in url_idxs]
349
+ path_list = [paths[i] for i in path_idxs]
350
+ byte_list = [paths[i] for i in byte_idxs]
351
+ path_list.extend(byte_list)
352
+ parser = Parser(parsing)
353
+ docs: List[Document] = []
354
+ try:
355
+ if len(urls) > 0:
356
+ loader = URLLoader(urls=urls, parser=parser) # type: ignore
357
+ docs = loader.load()
358
+ if len(path_list) > 0:
359
+ for p in path_list:
360
+ path_docs = RepoLoader.get_documents(
361
+ p, parser=parser, doc_type=doc_type
362
+ )
363
+ docs.extend(path_docs)
364
+ except Exception as e:
365
+ logger.warning(f"Error loading path {paths}: {e}")
366
+ return ""
367
+ if len(docs) == 1:
368
+ return docs[0].content
369
+ else:
370
+ return [d.content for d in docs]
@@ -12,6 +12,7 @@ from typing import Dict, List
12
12
  import requests
13
13
  from bs4 import BeautifulSoup
14
14
  from dotenv import load_dotenv
15
+ from duckduckgo_search import DDGS
15
16
  from googleapiclient.discovery import Resource, build
16
17
  from requests.models import Response
17
18
 
@@ -77,3 +78,75 @@ def google_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
77
78
  WebSearchResult(result["title"], result["link"], 3500, 300)
78
79
  for result in raw_results
79
80
  ]
81
+
82
+
83
+ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
84
+ """
85
+ Method that makes an API call by Metaphor client that queries
86
+ the top num_results links that matches the query. Returns a list
87
+ of WebSearchResult objects.
88
+
89
+ Args:
90
+ query (str): The query body that users wants to make.
91
+ num_results (int): Number of top matching results that we want
92
+ to grab
93
+ """
94
+
95
+ load_dotenv()
96
+
97
+ api_key = os.getenv("METAPHOR_API_KEY") or os.getenv("EXA_API_KEY")
98
+ if not api_key:
99
+ raise ValueError(
100
+ """
101
+ Neither METAPHOR_API_KEY nor EXA_API_KEY environment variables are set.
102
+ Please set one of them to your API key, and try again.
103
+ """
104
+ )
105
+
106
+ try:
107
+ from metaphor_python import Metaphor
108
+ except ImportError:
109
+ raise ImportError(
110
+ "You are attempting to use the `metaphor_python` library;"
111
+ "To use it, please install langroid with the `metaphor` extra, e.g. "
112
+ "`pip install langroid[metaphor]` or `poetry add langroid[metaphor]` "
113
+ "(it installs the `metaphor_python` package from pypi)."
114
+ )
115
+
116
+ client = Metaphor(api_key=api_key)
117
+
118
+ response = client.search(
119
+ query=query,
120
+ num_results=num_results,
121
+ )
122
+ raw_results = response.results
123
+
124
+ return [
125
+ WebSearchResult(result.title, result.url, 3500, 300) for result in raw_results
126
+ ]
127
+
128
+
129
+ def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
130
+ """
131
+ Method that makes an API call by DuckDuckGo client that queries
132
+ the top `num_results` links that matche the query. Returns a list
133
+ of WebSearchResult objects.
134
+
135
+ Args:
136
+ query (str): The query body that users wants to make.
137
+ num_results (int): Number of top matching results that we want
138
+ to grab
139
+ """
140
+
141
+ with DDGS() as ddgs:
142
+ search_results = [r for r in ddgs.text(query, max_results=num_results)]
143
+
144
+ return [
145
+ WebSearchResult(
146
+ title=result["title"],
147
+ link=result["href"],
148
+ max_content_length=3500,
149
+ max_summary_length=300,
150
+ )
151
+ for result in search_results
152
+ ]
@@ -0,0 +1,11 @@
1
+ from . import dialog
2
+ from . import prompts_config
3
+ from . import templates
4
+ from . import transforms
5
+
6
+ __all__ = [
7
+ "dialog",
8
+ "prompts_config",
9
+ "templates",
10
+ "transforms",
11
+ ]
@@ -0,0 +1,68 @@
1
+ Image input capabilities: Enabled
2
+
3
+ Tools
4
+ python
5
+ When you send a message containing Python code to python, it will be executed in a
6
+ stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0
7
+ seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is disabled. Do not make external web requests or API calls as they will fail.
8
+
9
+ dalle
10
+ // Whenever a description of an image is given, create a prompt that dalle can use to generate the image and abide to the following policy:
11
+ // 1. The prompt must be in English. Translate to English if needed.
12
+ // 2. DO NOT ask for permission to generate the image, just do it!
13
+ // 3. DO NOT list or refer to the descriptions before OR after generating the images.
14
+ // 4. Do not create more than 1 image, even if the user requests more.
15
+ // 5. Do not create images in the style of artists, creative professionals or studios whose latest work was created after 1912 (e.g. Picasso, Kahlo).
16
+ // - You can name artists, creative professionals or studios in prompts only if their latest work was created prior to 1912 (e.g. Van Gogh, Goya)
17
+ // - If asked to generate an image that would violate this policy, instead apply the following procedure: (a) substitute the artist's name with three adjectives that capture key aspects of the style; (b) include an associated artistic movement or era to provide context; and (c) mention the primary medium used by the artist
18
+ // 6. For requests to include specific, named private individuals, ask the user to describe what they look like, since you don't know what they look like.
19
+ // 7. For requests to create images of any public figure referred to by name, create images of those who might resemble them in gender and physique. But they shouldn't look like them. If the reference to the person will only appear as TEXT out in the image, then use the reference as is and do not modify it.
20
+ // 8. Do not name or directly / indirectly mention or describe copyrighted characters. Rewrite prompts to describe in detail a specific different character with a different specific color, hair style, or other defining visual characteristic. Do not discuss copyright policies in responses.
21
+ // The generated prompt sent to dalle should be very detailed, and around 100 words long.
22
+ // Example dalle invocation:
23
+ // // { // "prompt": "<insert prompt here>" // } //
24
+ namespace dalle {
25
+
26
+ // Create images from a text-only prompt.
27
+ type text2im = (_: {
28
+ // The size of the requested image. Use 1024x1024 (square) as the default, 1792x1024 if the user requests a wide image, and 1024x1792 for full-body portraits. Always include this parameter in the request.
29
+ size?: "1792x1024" | "1024x1024" | "1024x1792",
30
+ // The number of images to generate. If the user does not specify a number, generate 1 image.
31
+ n?: number, // default: 2
32
+ // The detailed image description, potentially modified to abide by the dalle policies. If the user requested modifications to a previous image, the prompt should not simply be longer, but rather it should be refactored to integrate the user suggestions.
33
+ prompt: string,
34
+ // If the user references a previous image, this field should be populated with the gen_id from the dalle image metadata.
35
+ referenced_image_ids?: string[],
36
+ }) => any;
37
+
38
+ } // namespace dalle
39
+
40
+ voice_mode
41
+ // Voice mode functions are not available in text conversations.
42
+ namespace voice_mode {
43
+
44
+ } // namespace voice_mode
45
+
46
+ browser
47
+ You have the tool browser. Use browser in the following circumstances:
48
+ - User is asking about current events or something that requires real-time information (weather, sports scores, etc.)
49
+ - User is asking about some term you are totally unfamiliar with (it might be new)
50
+ - User explicitly asks you to browse or provide links to references
51
+
52
+ Given a query that requires retrieval, your turn will consist of three steps:
53
+
54
+ Call the search function to get a list of results.
55
+ Call the mclick function to retrieve a diverse and high-quality subset of these results (in parallel). Remember to SELECT AT LEAST 3 sources when using mclick.
56
+ Write a response to the user based on these results. In your response, cite sources using the citation format below.
57
+ In some cases, you should repeat step 1 twice, if the initial results are unsatisfactory, and you believe that you can refine the query to get better results.
58
+
59
+ You can also open a url directly if one is provided by the user. Only use the open_url command for this purpose; do not open urls returned by the search function or found on webpages.
60
+
61
+ The browser tool has the following commands:
62
+ search(query: str, recency_days: int) Issues a query to a search engine and displays the results.
63
+ mclick(ids: list[str]). Retrieves the contents of the webpages with provided IDs (indices). You should ALWAYS SELECT AT LEAST 3 and at most 10 pages. Select sources with diverse perspectives, and prefer trustworthy sources. Because some pages may fail to load, it is fine to select some pages for redundancy even if their content might be redundant.
64
+ open_url(url: str) Opens the given URL and displays it.
65
+
66
+ For citing quotes from the 'browser' tool: please render in this format: 【{message idx}†{link text}】.
67
+ For long citations: please render in this format: [link text](message idx).
68
+ Otherwise do not render links.
@@ -2,4 +2,4 @@ from pydantic import BaseSettings
2
2
 
3
3
 
4
4
  class PromptsConfig(BaseSettings):
5
- max_tokens: int = 1000
5
+ max_tokens: int = 1000 # for output; NOT USED ANYWHERE
@@ -0,0 +1,17 @@
1
+ from . import configuration
2
+ from . import globals
3
+ from . import constants
4
+ from . import logging
5
+ from . import pydantic_utils
6
+ from . import system
7
+ from . import output
8
+
9
+ __all__ = [
10
+ "configuration",
11
+ "globals",
12
+ "constants",
13
+ "logging",
14
+ "pydantic_utils",
15
+ "system",
16
+ "output",
17
+ ]
@@ -0,0 +1,3 @@
1
+ from . import graph
2
+
3
+ __all__ = ["graph"]
@@ -0,0 +1,103 @@
1
+ """
2
+ Graph algos.
3
+ """
4
+
5
+ from typing import Dict, List, no_type_check
6
+
7
+ import numpy as np
8
+
9
+
10
+ @no_type_check
11
+ def topological_sort(order: np.array) -> List[int]:
12
+ """
13
+ Given a directed adjacency matrix, return a topological sort of the nodes.
14
+ order[i,j] = -1 means there is an edge from i to j.
15
+ order[i,j] = 0 means there is no edge from i to j.
16
+ order[i,j] = 1 means there is an edge from j to i.
17
+
18
+ Args:
19
+ order (np.array): The adjacency matrix.
20
+
21
+ Returns:
22
+ List[int]: The topological sort of the nodes.
23
+
24
+ """
25
+ n = order.shape[0]
26
+
27
+ # Calculate the in-degrees
28
+ in_degree = [0] * n
29
+ for i in range(n):
30
+ for j in range(n):
31
+ if order[i, j] == -1:
32
+ in_degree[j] += 1
33
+
34
+ # Initialize the queue with nodes of in-degree 0
35
+ queue = [i for i in range(n) if in_degree[i] == 0]
36
+ result = []
37
+
38
+ while queue:
39
+ node = queue.pop(0)
40
+ result.append(node)
41
+
42
+ for i in range(n):
43
+ if order[node, i] == -1:
44
+ in_degree[i] -= 1
45
+ if in_degree[i] == 0:
46
+ queue.append(i)
47
+
48
+ assert len(result) == n, "Cycle detected"
49
+ return result
50
+
51
+
52
+ @no_type_check
53
+ def components(order: np.ndarray) -> List[List[int]]:
54
+ """
55
+ Find the connected components in an undirected graph represented by a matrix.
56
+
57
+ Args:
58
+ order (np.ndarray): A matrix with values 0 or 1 indicating
59
+ undirected graph edges. `order[i][j] = 1` means an edge between `i`
60
+ and `j`, and `0` means no edge.
61
+
62
+ Returns:
63
+ List[List[int]]: A list of List where each List contains the indices of
64
+ nodes in the same connected component.
65
+
66
+ Example:
67
+ order = np.array([
68
+ [1, 1, 0, 0],
69
+ [1, 1, 1, 0],
70
+ [0, 1, 1, 0],
71
+ [0, 0, 0, 1]
72
+ ])
73
+ components(order)
74
+ # [[0, 1, 2], [3]]
75
+ """
76
+
77
+ i2g: Dict[int, int] = {} # index to group mapping
78
+ next_group = 0
79
+ n = order.shape[0]
80
+ for i in range(n):
81
+ connected_groups = {i2g[j] for j in np.nonzero(order[i, :])[0] if j in i2g}
82
+
83
+ # If the node is not part of any group
84
+ # and is not connected to any groups, assign a new group
85
+ if not connected_groups:
86
+ i2g[i] = next_group
87
+ next_group += 1
88
+ else:
89
+ # If the node is connected to multiple groups, we merge them
90
+ main_group = min(connected_groups)
91
+ for j in np.nonzero(order[i, :])[0]:
92
+ if i2g.get(j) in connected_groups:
93
+ i2g[j] = main_group
94
+ i2g[i] = main_group
95
+
96
+ # Convert i2g to a list of Lists
97
+ groups: Dict[int, List[int]] = {}
98
+ for index, group in i2g.items():
99
+ if group not in groups:
100
+ groups[group] = []
101
+ groups[group].append(index)
102
+
103
+ return list(groups.values())