letta-nightly 0.1.7.dev20240924104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (189) hide show
  1. letta/__init__.py +24 -0
  2. letta/__main__.py +3 -0
  3. letta/agent.py +1427 -0
  4. letta/agent_store/chroma.py +295 -0
  5. letta/agent_store/db.py +546 -0
  6. letta/agent_store/lancedb.py +177 -0
  7. letta/agent_store/milvus.py +198 -0
  8. letta/agent_store/qdrant.py +201 -0
  9. letta/agent_store/storage.py +188 -0
  10. letta/benchmark/benchmark.py +96 -0
  11. letta/benchmark/constants.py +14 -0
  12. letta/cli/cli.py +689 -0
  13. letta/cli/cli_config.py +1282 -0
  14. letta/cli/cli_load.py +166 -0
  15. letta/client/__init__.py +0 -0
  16. letta/client/admin.py +171 -0
  17. letta/client/client.py +2360 -0
  18. letta/client/streaming.py +90 -0
  19. letta/client/utils.py +61 -0
  20. letta/config.py +484 -0
  21. letta/configs/anthropic.json +13 -0
  22. letta/configs/letta_hosted.json +11 -0
  23. letta/configs/openai.json +12 -0
  24. letta/constants.py +134 -0
  25. letta/credentials.py +140 -0
  26. letta/data_sources/connectors.py +247 -0
  27. letta/embeddings.py +218 -0
  28. letta/errors.py +26 -0
  29. letta/functions/__init__.py +0 -0
  30. letta/functions/function_sets/base.py +174 -0
  31. letta/functions/function_sets/extras.py +132 -0
  32. letta/functions/functions.py +105 -0
  33. letta/functions/schema_generator.py +205 -0
  34. letta/humans/__init__.py +0 -0
  35. letta/humans/examples/basic.txt +1 -0
  36. letta/humans/examples/cs_phd.txt +9 -0
  37. letta/interface.py +314 -0
  38. letta/llm_api/__init__.py +0 -0
  39. letta/llm_api/anthropic.py +383 -0
  40. letta/llm_api/azure_openai.py +155 -0
  41. letta/llm_api/cohere.py +396 -0
  42. letta/llm_api/google_ai.py +468 -0
  43. letta/llm_api/llm_api_tools.py +485 -0
  44. letta/llm_api/openai.py +470 -0
  45. letta/local_llm/README.md +3 -0
  46. letta/local_llm/__init__.py +0 -0
  47. letta/local_llm/chat_completion_proxy.py +279 -0
  48. letta/local_llm/constants.py +31 -0
  49. letta/local_llm/function_parser.py +68 -0
  50. letta/local_llm/grammars/__init__.py +0 -0
  51. letta/local_llm/grammars/gbnf_grammar_generator.py +1324 -0
  52. letta/local_llm/grammars/json.gbnf +26 -0
  53. letta/local_llm/grammars/json_func_calls_with_inner_thoughts.gbnf +32 -0
  54. letta/local_llm/groq/api.py +97 -0
  55. letta/local_llm/json_parser.py +202 -0
  56. letta/local_llm/koboldcpp/api.py +62 -0
  57. letta/local_llm/koboldcpp/settings.py +23 -0
  58. letta/local_llm/llamacpp/api.py +58 -0
  59. letta/local_llm/llamacpp/settings.py +22 -0
  60. letta/local_llm/llm_chat_completion_wrappers/__init__.py +0 -0
  61. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +452 -0
  62. letta/local_llm/llm_chat_completion_wrappers/chatml.py +470 -0
  63. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +387 -0
  64. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +246 -0
  65. letta/local_llm/llm_chat_completion_wrappers/llama3.py +345 -0
  66. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +156 -0
  67. letta/local_llm/llm_chat_completion_wrappers/wrapper_base.py +11 -0
  68. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +345 -0
  69. letta/local_llm/lmstudio/api.py +100 -0
  70. letta/local_llm/lmstudio/settings.py +29 -0
  71. letta/local_llm/ollama/api.py +88 -0
  72. letta/local_llm/ollama/settings.py +32 -0
  73. letta/local_llm/settings/__init__.py +0 -0
  74. letta/local_llm/settings/deterministic_mirostat.py +45 -0
  75. letta/local_llm/settings/settings.py +72 -0
  76. letta/local_llm/settings/simple.py +28 -0
  77. letta/local_llm/utils.py +265 -0
  78. letta/local_llm/vllm/api.py +63 -0
  79. letta/local_llm/webui/api.py +60 -0
  80. letta/local_llm/webui/legacy_api.py +58 -0
  81. letta/local_llm/webui/legacy_settings.py +23 -0
  82. letta/local_llm/webui/settings.py +24 -0
  83. letta/log.py +76 -0
  84. letta/main.py +437 -0
  85. letta/memory.py +440 -0
  86. letta/metadata.py +884 -0
  87. letta/openai_backcompat/__init__.py +0 -0
  88. letta/openai_backcompat/openai_object.py +437 -0
  89. letta/persistence_manager.py +148 -0
  90. letta/personas/__init__.py +0 -0
  91. letta/personas/examples/anna_pa.txt +13 -0
  92. letta/personas/examples/google_search_persona.txt +15 -0
  93. letta/personas/examples/memgpt_doc.txt +6 -0
  94. letta/personas/examples/memgpt_starter.txt +4 -0
  95. letta/personas/examples/sam.txt +14 -0
  96. letta/personas/examples/sam_pov.txt +14 -0
  97. letta/personas/examples/sam_simple_pov_gpt35.txt +13 -0
  98. letta/personas/examples/sqldb/test.db +0 -0
  99. letta/prompts/__init__.py +0 -0
  100. letta/prompts/gpt_summarize.py +14 -0
  101. letta/prompts/gpt_system.py +26 -0
  102. letta/prompts/system/memgpt_base.txt +49 -0
  103. letta/prompts/system/memgpt_chat.txt +58 -0
  104. letta/prompts/system/memgpt_chat_compressed.txt +13 -0
  105. letta/prompts/system/memgpt_chat_fstring.txt +51 -0
  106. letta/prompts/system/memgpt_doc.txt +50 -0
  107. letta/prompts/system/memgpt_gpt35_extralong.txt +53 -0
  108. letta/prompts/system/memgpt_intuitive_knowledge.txt +31 -0
  109. letta/prompts/system/memgpt_modified_chat.txt +23 -0
  110. letta/pytest.ini +0 -0
  111. letta/schemas/agent.py +117 -0
  112. letta/schemas/api_key.py +21 -0
  113. letta/schemas/block.py +135 -0
  114. letta/schemas/document.py +21 -0
  115. letta/schemas/embedding_config.py +54 -0
  116. letta/schemas/enums.py +35 -0
  117. letta/schemas/job.py +38 -0
  118. letta/schemas/letta_base.py +80 -0
  119. letta/schemas/letta_message.py +175 -0
  120. letta/schemas/letta_request.py +23 -0
  121. letta/schemas/letta_response.py +28 -0
  122. letta/schemas/llm_config.py +54 -0
  123. letta/schemas/memory.py +224 -0
  124. letta/schemas/message.py +727 -0
  125. letta/schemas/openai/chat_completion_request.py +123 -0
  126. letta/schemas/openai/chat_completion_response.py +136 -0
  127. letta/schemas/openai/chat_completions.py +123 -0
  128. letta/schemas/openai/embedding_response.py +11 -0
  129. letta/schemas/openai/openai.py +157 -0
  130. letta/schemas/organization.py +20 -0
  131. letta/schemas/passage.py +80 -0
  132. letta/schemas/source.py +62 -0
  133. letta/schemas/tool.py +143 -0
  134. letta/schemas/usage.py +18 -0
  135. letta/schemas/user.py +33 -0
  136. letta/server/__init__.py +0 -0
  137. letta/server/constants.py +6 -0
  138. letta/server/rest_api/__init__.py +0 -0
  139. letta/server/rest_api/admin/__init__.py +0 -0
  140. letta/server/rest_api/admin/agents.py +21 -0
  141. letta/server/rest_api/admin/tools.py +83 -0
  142. letta/server/rest_api/admin/users.py +98 -0
  143. letta/server/rest_api/app.py +193 -0
  144. letta/server/rest_api/auth/__init__.py +0 -0
  145. letta/server/rest_api/auth/index.py +43 -0
  146. letta/server/rest_api/auth_token.py +22 -0
  147. letta/server/rest_api/interface.py +726 -0
  148. letta/server/rest_api/routers/__init__.py +0 -0
  149. letta/server/rest_api/routers/openai/__init__.py +0 -0
  150. letta/server/rest_api/routers/openai/assistants/__init__.py +0 -0
  151. letta/server/rest_api/routers/openai/assistants/assistants.py +115 -0
  152. letta/server/rest_api/routers/openai/assistants/schemas.py +121 -0
  153. letta/server/rest_api/routers/openai/assistants/threads.py +336 -0
  154. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  155. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +131 -0
  156. letta/server/rest_api/routers/v1/__init__.py +15 -0
  157. letta/server/rest_api/routers/v1/agents.py +543 -0
  158. letta/server/rest_api/routers/v1/blocks.py +73 -0
  159. letta/server/rest_api/routers/v1/jobs.py +46 -0
  160. letta/server/rest_api/routers/v1/llms.py +28 -0
  161. letta/server/rest_api/routers/v1/organizations.py +61 -0
  162. letta/server/rest_api/routers/v1/sources.py +199 -0
  163. letta/server/rest_api/routers/v1/tools.py +103 -0
  164. letta/server/rest_api/routers/v1/users.py +109 -0
  165. letta/server/rest_api/static_files.py +74 -0
  166. letta/server/rest_api/utils.py +69 -0
  167. letta/server/server.py +1995 -0
  168. letta/server/startup.sh +8 -0
  169. letta/server/static_files/assets/index-0cbf7ad5.js +274 -0
  170. letta/server/static_files/assets/index-156816da.css +1 -0
  171. letta/server/static_files/assets/index-486e3228.js +274 -0
  172. letta/server/static_files/favicon.ico +0 -0
  173. letta/server/static_files/index.html +39 -0
  174. letta/server/static_files/memgpt_logo_transparent.png +0 -0
  175. letta/server/utils.py +46 -0
  176. letta/server/ws_api/__init__.py +0 -0
  177. letta/server/ws_api/example_client.py +104 -0
  178. letta/server/ws_api/interface.py +108 -0
  179. letta/server/ws_api/protocol.py +100 -0
  180. letta/server/ws_api/server.py +145 -0
  181. letta/settings.py +165 -0
  182. letta/streaming_interface.py +396 -0
  183. letta/system.py +207 -0
  184. letta/utils.py +1065 -0
  185. letta_nightly-0.1.7.dev20240924104148.dist-info/LICENSE +190 -0
  186. letta_nightly-0.1.7.dev20240924104148.dist-info/METADATA +98 -0
  187. letta_nightly-0.1.7.dev20240924104148.dist-info/RECORD +189 -0
  188. letta_nightly-0.1.7.dev20240924104148.dist-info/WHEEL +4 -0
  189. letta_nightly-0.1.7.dev20240924104148.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1324 @@
1
+ import inspect
2
+ import json
3
+ import re
4
+ from copy import copy
5
+ from enum import Enum
6
+ from inspect import getdoc, isclass
7
+ from types import NoneType
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ List,
12
+ Optional,
13
+ Tuple,
14
+ Type,
15
+ Union,
16
+ _GenericAlias,
17
+ get_args,
18
+ get_origin,
19
+ )
20
+
21
+ from docstring_parser import parse
22
+ from pydantic import BaseModel, create_model
23
+
24
+ from letta.utils import json_dumps
25
+
26
+
27
+ class PydanticDataType(Enum):
28
+ """
29
+ Defines the data types supported by the grammar_generator.
30
+
31
+ Attributes:
32
+ STRING (str): Represents a string data type.
33
+ BOOLEAN (str): Represents a boolean data type.
34
+ INTEGER (str): Represents an integer data type.
35
+ FLOAT (str): Represents a float data type.
36
+ OBJECT (str): Represents an object data type.
37
+ ARRAY (str): Represents an array data type.
38
+ ENUM (str): Represents an enum data type.
39
+ CUSTOM_CLASS (str): Represents a custom class data type.
40
+ """
41
+
42
+ STRING = "string"
43
+ TRIPLE_QUOTED_STRING = "triple_quoted_string"
44
+ MARKDOWN_CODE_BLOCK = "markdown_code_block"
45
+ BOOLEAN = "boolean"
46
+ INTEGER = "integer"
47
+ FLOAT = "float"
48
+ OBJECT = "object"
49
+ ARRAY = "array"
50
+ ENUM = "enum"
51
+ ANY = "any"
52
+ NULL = "null"
53
+ CUSTOM_CLASS = "custom-class"
54
+ CUSTOM_DICT = "custom-dict"
55
+ SET = "set"
56
+
57
+
58
+ def map_pydantic_type_to_gbnf(pydantic_type: Type[Any]) -> str:
59
+ if isclass(pydantic_type) and issubclass(pydantic_type, str):
60
+ return PydanticDataType.STRING.value
61
+ elif isclass(pydantic_type) and issubclass(pydantic_type, bool):
62
+ return PydanticDataType.BOOLEAN.value
63
+ elif isclass(pydantic_type) and issubclass(pydantic_type, int):
64
+ return PydanticDataType.INTEGER.value
65
+ elif isclass(pydantic_type) and issubclass(pydantic_type, float):
66
+ return PydanticDataType.FLOAT.value
67
+ elif isclass(pydantic_type) and issubclass(pydantic_type, Enum):
68
+ return PydanticDataType.ENUM.value
69
+
70
+ elif isclass(pydantic_type) and issubclass(pydantic_type, BaseModel):
71
+ return format_model_and_field_name(pydantic_type.__name__)
72
+ elif get_origin(pydantic_type) == list:
73
+ element_type = get_args(pydantic_type)[0]
74
+ return f"{map_pydantic_type_to_gbnf(element_type)}-list"
75
+ elif get_origin(pydantic_type) == set:
76
+ element_type = get_args(pydantic_type)[0]
77
+ return f"{map_pydantic_type_to_gbnf(element_type)}-set"
78
+ elif get_origin(pydantic_type) == Union:
79
+ union_types = get_args(pydantic_type)
80
+ union_rules = [map_pydantic_type_to_gbnf(ut) for ut in union_types]
81
+ return f"union-{'-or-'.join(union_rules)}"
82
+ elif get_origin(pydantic_type) == Optional:
83
+ element_type = get_args(pydantic_type)[0]
84
+ return f"optional-{map_pydantic_type_to_gbnf(element_type)}"
85
+ elif isclass(pydantic_type):
86
+ return f"{PydanticDataType.CUSTOM_CLASS.value}-{format_model_and_field_name(pydantic_type.__name__)}"
87
+ elif get_origin(pydantic_type) == dict:
88
+ key_type, value_type = get_args(pydantic_type)
89
+ return f"custom-dict-key-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(key_type))}-value-type-{format_model_and_field_name(map_pydantic_type_to_gbnf(value_type))}"
90
+ else:
91
+ return "unknown"
92
+
93
+
94
+ def format_model_and_field_name(model_name: str) -> str:
95
+ parts = re.findall("[A-Z][^A-Z]*", model_name)
96
+ if not parts: # Check if the list is empty
97
+ return model_name.lower().replace("_", "-")
98
+ return "-".join(part.lower().replace("_", "-") for part in parts)
99
+
100
+
101
+ def generate_list_rule(element_type):
102
+ """
103
+ Generate a GBNF rule for a list of a given element type.
104
+
105
+ :param element_type: The type of the elements in the list (e.g., 'string').
106
+ :return: A string representing the GBNF rule for a list of the given type.
107
+ """
108
+ rule_name = f"{map_pydantic_type_to_gbnf(element_type)}-list"
109
+ element_rule = map_pydantic_type_to_gbnf(element_type)
110
+ list_rule = rf'{rule_name} ::= "[" {element_rule} ("," {element_rule})* "]"'
111
+ return list_rule
112
+
113
+
114
+ def get_members_structure(cls, rule_name):
115
+ if issubclass(cls, Enum):
116
+ # Handle Enum types
117
+ members = [f'"\\"{member.value}\\""' for name, member in cls.__members__.items()]
118
+ return f"{cls.__name__.lower()} ::= " + " | ".join(members)
119
+ if cls.__annotations__ and cls.__annotations__ != {}:
120
+ result = f'{rule_name} ::= "{{"'
121
+ type_list_rules = []
122
+ # Modify this comprehension
123
+ members = [
124
+ f' "\\"{name}\\"" ":" {map_pydantic_type_to_gbnf(param_type)}'
125
+ for name, param_type in cls.__annotations__.items()
126
+ if name != "self"
127
+ ]
128
+
129
+ result += '"," '.join(members)
130
+ result += ' "}"'
131
+ return result, type_list_rules
132
+ elif rule_name == "custom-class-any":
133
+ result = f"{rule_name} ::= "
134
+ result += "value"
135
+ type_list_rules = []
136
+ return result, type_list_rules
137
+ else:
138
+ init_signature = inspect.signature(cls.__init__)
139
+ parameters = init_signature.parameters
140
+ result = f'{rule_name} ::= "{{"'
141
+ type_list_rules = []
142
+ # Modify this comprehension too
143
+ members = [
144
+ f' "\\"{name}\\"" ":" {map_pydantic_type_to_gbnf(param.annotation)}'
145
+ for name, param in parameters.items()
146
+ if name != "self" and param.annotation != inspect.Parameter.empty
147
+ ]
148
+
149
+ result += '", "'.join(members)
150
+ result += ' "}"'
151
+ return result, type_list_rules
152
+
153
+
154
+ def regex_to_gbnf(regex_pattern: str) -> str:
155
+ """
156
+ Translate a basic regex pattern to a GBNF rule.
157
+ Note: This function handles only a subset of simple regex patterns.
158
+ """
159
+ gbnf_rule = regex_pattern
160
+
161
+ # Translate common regex components to GBNF
162
+ gbnf_rule = gbnf_rule.replace("\\d", "[0-9]")
163
+ gbnf_rule = gbnf_rule.replace("\\s", "[ \t\n]")
164
+
165
+ # Handle quantifiers and other regex syntax that is similar in GBNF
166
+ # (e.g., '*', '+', '?', character classes)
167
+
168
+ return gbnf_rule
169
+
170
+
171
+ def generate_gbnf_integer_rules(max_digit=None, min_digit=None):
172
+ """
173
+
174
+ Generate GBNF Integer Rules
175
+
176
+ Generates GBNF (Generalized Backus-Naur Form) rules for integers based on the given maximum and minimum digits.
177
+
178
+ Parameters:
179
+ max_digit (int): The maximum number of digits for the integer. Default is None.
180
+ min_digit (int): The minimum number of digits for the integer. Default is None.
181
+
182
+ Returns:
183
+ integer_rule (str): The identifier for the integer rule generated.
184
+ additional_rules (list): A list of additional rules generated based on the given maximum and minimum digits.
185
+
186
+ """
187
+ additional_rules = []
188
+
189
+ # Define the rule identifier based on max_digit and min_digit
190
+ integer_rule = "integer-part"
191
+ if max_digit is not None:
192
+ integer_rule += f"-max{max_digit}"
193
+ if min_digit is not None:
194
+ integer_rule += f"-min{min_digit}"
195
+
196
+ # Handling Integer Rules
197
+ if max_digit is not None or min_digit is not None:
198
+ # Start with an empty rule part
199
+ integer_rule_part = ""
200
+
201
+ # Add mandatory digits as per min_digit
202
+ if min_digit is not None:
203
+ integer_rule_part += "[0-9] " * min_digit
204
+
205
+ # Add optional digits up to max_digit
206
+ if max_digit is not None:
207
+ optional_digits = max_digit - (min_digit if min_digit is not None else 0)
208
+ integer_rule_part += "".join(["[0-9]? " for _ in range(optional_digits)])
209
+
210
+ # Trim the rule part and append it to additional rules
211
+ integer_rule_part = integer_rule_part.strip()
212
+ if integer_rule_part:
213
+ additional_rules.append(f"{integer_rule} ::= {integer_rule_part}")
214
+
215
+ return integer_rule, additional_rules
216
+
217
+
218
+ def generate_gbnf_float_rules(max_digit=None, min_digit=None, max_precision=None, min_precision=None):
219
+ """
220
+ Generate GBNF float rules based on the given constraints.
221
+
222
+ :param max_digit: Maximum number of digits in the integer part (default: None)
223
+ :param min_digit: Minimum number of digits in the integer part (default: None)
224
+ :param max_precision: Maximum number of digits in the fractional part (default: None)
225
+ :param min_precision: Minimum number of digits in the fractional part (default: None)
226
+ :return: A tuple containing the float rule and additional rules as a list
227
+
228
+ Example Usage:
229
+ max_digit = 3
230
+ min_digit = 1
231
+ max_precision = 2
232
+ min_precision = 1
233
+ generate_gbnf_float_rules(max_digit, min_digit, max_precision, min_precision)
234
+
235
+ Output:
236
+ ('float-3-1-2-1', ['integer-part-max3-min1 ::= [0-9] [0-9] [0-9]?', 'fractional-part-max2-min1 ::= [0-9] [0-9]?', 'float-3-1-2-1 ::= integer-part-max3-min1 "." fractional-part-max2-min
237
+ *1'])
238
+
239
+ Note:
240
+ GBNF stands for Generalized Backus-Naur Form, which is a notation technique to specify the syntax of programming languages or other formal grammars.
241
+ """
242
+ additional_rules = []
243
+
244
+ # Define the integer part rule
245
+ integer_part_rule = (
246
+ "integer-part" + (f"-max{max_digit}" if max_digit is not None else "") + (f"-min{min_digit}" if min_digit is not None else "")
247
+ )
248
+
249
+ # Define the fractional part rule based on precision constraints
250
+ fractional_part_rule = "fractional-part"
251
+ fractional_rule_part = ""
252
+ if max_precision is not None or min_precision is not None:
253
+ fractional_part_rule += (f"-max{max_precision}" if max_precision is not None else "") + (
254
+ f"-min{min_precision}" if min_precision is not None else ""
255
+ )
256
+ # Minimum number of digits
257
+ fractional_rule_part = "[0-9]" * (min_precision if min_precision is not None else 1)
258
+ # Optional additional digits
259
+ fractional_rule_part += "".join(
260
+ [" [0-9]?"] * ((max_precision - (min_precision if min_precision is not None else 1)) if max_precision is not None else 0)
261
+ )
262
+ additional_rules.append(f"{fractional_part_rule} ::= {fractional_rule_part}")
263
+
264
+ # Define the float rule
265
+ float_rule = f"float-{max_digit if max_digit is not None else 'X'}-{min_digit if min_digit is not None else 'X'}-{max_precision if max_precision is not None else 'X'}-{min_precision if min_precision is not None else 'X'}"
266
+ additional_rules.append(f'{float_rule} ::= {integer_part_rule} "." {fractional_part_rule}')
267
+
268
+ # Generating the integer part rule definition, if necessary
269
+ if max_digit is not None or min_digit is not None:
270
+ integer_rule_part = "[0-9]"
271
+ if min_digit is not None and min_digit > 1:
272
+ integer_rule_part += " [0-9]" * (min_digit - 1)
273
+ if max_digit is not None:
274
+ integer_rule_part += "".join([" [0-9]?"] * (max_digit - (min_digit if min_digit is not None else 1)))
275
+ additional_rules.append(f"{integer_part_rule} ::= {integer_rule_part.strip()}")
276
+
277
+ return float_rule, additional_rules
278
+
279
+
280
+ def generate_gbnf_rule_for_type(
281
+ model_name, field_name, field_type, is_optional, processed_models, created_rules, field_info=None
282
+ ) -> Tuple[str, list]:
283
+ """
284
+ Generate GBNF rule for a given field type.
285
+
286
+ :param model_name: Name of the model.
287
+
288
+ :param field_name: Name of the field.
289
+ :param field_type: Type of the field.
290
+ :param is_optional: Whether the field is optional.
291
+ :param processed_models: List of processed models.
292
+ :param created_rules: List of created rules.
293
+ :param field_info: Additional information about the field (optional).
294
+
295
+ :return: Tuple containing the GBNF type and a list of additional rules.
296
+ :rtype: Tuple[str, list]
297
+ """
298
+ rules = []
299
+
300
+ field_name = format_model_and_field_name(field_name)
301
+ gbnf_type = map_pydantic_type_to_gbnf(field_type)
302
+
303
+ if isclass(field_type) and issubclass(field_type, BaseModel):
304
+ nested_model_name = format_model_and_field_name(field_type.__name__)
305
+ nested_model_rules, _ = generate_gbnf_grammar(field_type, processed_models, created_rules)
306
+ rules.extend(nested_model_rules)
307
+ gbnf_type, rules = nested_model_name, rules
308
+ elif isclass(field_type) and issubclass(field_type, Enum):
309
+ enum_values = [f'"\\"{e.value}\\""' for e in field_type] # Adding escaped quotes
310
+ enum_rule = f"{model_name}-{field_name} ::= {' | '.join(enum_values)}"
311
+ rules.append(enum_rule)
312
+ gbnf_type, rules = model_name + "-" + field_name, rules
313
+ elif get_origin(field_type) == list: # Array
314
+ element_type = get_args(field_type)[0]
315
+ element_rule_name, additional_rules = generate_gbnf_rule_for_type(
316
+ model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
317
+ )
318
+ rules.extend(additional_rules)
319
+ array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """
320
+ rules.append(array_rule)
321
+ gbnf_type, rules = model_name + "-" + field_name, rules
322
+
323
+ elif get_origin(field_type) == set or field_type == set: # Array
324
+ element_type = get_args(field_type)[0]
325
+ element_rule_name, additional_rules = generate_gbnf_rule_for_type(
326
+ model_name, f"{field_name}-element", element_type, is_optional, processed_models, created_rules
327
+ )
328
+ rules.extend(additional_rules)
329
+ array_rule = f"""{model_name}-{field_name} ::= "[" ws {element_rule_name} ("," ws {element_rule_name})* "]" """
330
+ rules.append(array_rule)
331
+ gbnf_type, rules = model_name + "-" + field_name, rules
332
+
333
+ elif gbnf_type.startswith("custom-class-"):
334
+ nested_model_rules, field_types = get_members_structure(field_type, gbnf_type)
335
+ rules.append(nested_model_rules)
336
+ elif gbnf_type.startswith("custom-dict-"):
337
+ key_type, value_type = get_args(field_type)
338
+
339
+ additional_key_type, additional_key_rules = generate_gbnf_rule_for_type(
340
+ model_name, f"{field_name}-key-type", key_type, is_optional, processed_models, created_rules
341
+ )
342
+ additional_value_type, additional_value_rules = generate_gbnf_rule_for_type(
343
+ model_name, f"{field_name}-value-type", value_type, is_optional, processed_models, created_rules
344
+ )
345
+ gbnf_type = rf'{gbnf_type} ::= "{{" ( {additional_key_type} ": " {additional_value_type} ("," "\n" ws {additional_key_type} ":" {additional_value_type})* )? "}}" '
346
+
347
+ rules.extend(additional_key_rules)
348
+ rules.extend(additional_value_rules)
349
+ elif gbnf_type.startswith("union-"):
350
+ union_types = get_args(field_type)
351
+ union_rules = []
352
+
353
+ for union_type in union_types:
354
+ if isinstance(union_type, _GenericAlias):
355
+ union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
356
+ model_name, field_name, union_type, False, processed_models, created_rules
357
+ )
358
+ union_rules.append(union_gbnf_type)
359
+ rules.extend(union_rules_list)
360
+
361
+ elif not issubclass(union_type, NoneType):
362
+ union_gbnf_type, union_rules_list = generate_gbnf_rule_for_type(
363
+ model_name, field_name, union_type, False, processed_models, created_rules
364
+ )
365
+ union_rules.append(union_gbnf_type)
366
+ rules.extend(union_rules_list)
367
+
368
+ # Defining the union grammar rule separately
369
+ if len(union_rules) == 1:
370
+ union_grammar_rule = f"{model_name}-{field_name}-optional ::= {' | '.join(union_rules)} | null"
371
+ else:
372
+ union_grammar_rule = f"{model_name}-{field_name}-union ::= {' | '.join(union_rules)}"
373
+ rules.append(union_grammar_rule)
374
+ if len(union_rules) == 1:
375
+ gbnf_type = f"{model_name}-{field_name}-optional"
376
+ else:
377
+ gbnf_type = f"{model_name}-{field_name}-union"
378
+ elif isclass(field_type) and issubclass(field_type, str):
379
+ if field_info and hasattr(field_info, "json_schema_extra") and field_info.json_schema_extra is not None:
380
+ triple_quoted_string = field_info.json_schema_extra.get("triple_quoted_string", False)
381
+ markdown_string = field_info.json_schema_extra.get("markdown_code_block", False)
382
+
383
+ gbnf_type = PydanticDataType.TRIPLE_QUOTED_STRING.value if triple_quoted_string else PydanticDataType.STRING.value
384
+ gbnf_type = PydanticDataType.MARKDOWN_CODE_BLOCK.value if markdown_string else gbnf_type
385
+
386
+ elif field_info and hasattr(field_info, "pattern"):
387
+ # Convert regex pattern to grammar rule
388
+ regex_pattern = field_info.regex.pattern
389
+ gbnf_type = f"pattern-{field_name} ::= {regex_to_gbnf(regex_pattern)}"
390
+ else:
391
+ gbnf_type = PydanticDataType.STRING.value
392
+
393
+ elif (
394
+ isclass(field_type)
395
+ and issubclass(field_type, float)
396
+ and field_info
397
+ and hasattr(field_info, "json_schema_extra")
398
+ and field_info.json_schema_extra is not None
399
+ ):
400
+ # Retrieve precision attributes for floats
401
+ max_precision = (
402
+ field_info.json_schema_extra.get("max_precision") if field_info and hasattr(field_info, "json_schema_extra") else None
403
+ )
404
+ min_precision = (
405
+ field_info.json_schema_extra.get("min_precision") if field_info and hasattr(field_info, "json_schema_extra") else None
406
+ )
407
+ max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info, "json_schema_extra") else None
408
+ min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info, "json_schema_extra") else None
409
+
410
+ # Generate GBNF rule for float with given attributes
411
+ gbnf_type, rules = generate_gbnf_float_rules(
412
+ max_digit=max_digits, min_digit=min_digits, max_precision=max_precision, min_precision=min_precision
413
+ )
414
+
415
+ elif (
416
+ isclass(field_type)
417
+ and issubclass(field_type, int)
418
+ and field_info
419
+ and hasattr(field_info, "json_schema_extra")
420
+ and field_info.json_schema_extra is not None
421
+ ):
422
+ # Retrieve digit attributes for integers
423
+ max_digits = field_info.json_schema_extra.get("max_digit") if field_info and hasattr(field_info, "json_schema_extra") else None
424
+ min_digits = field_info.json_schema_extra.get("min_digit") if field_info and hasattr(field_info, "json_schema_extra") else None
425
+
426
+ # Generate GBNF rule for integer with given attributes
427
+ gbnf_type, rules = generate_gbnf_integer_rules(max_digit=max_digits, min_digit=min_digits)
428
+ else:
429
+ gbnf_type, rules = gbnf_type, []
430
+
431
+ if gbnf_type not in created_rules:
432
+ return gbnf_type, rules
433
+ else:
434
+ if gbnf_type in created_rules:
435
+ return gbnf_type, rules
436
+
437
+
438
+ def generate_gbnf_grammar(model: Type[BaseModel], processed_models: set, created_rules: dict) -> (list, bool, bool):
439
+ """
440
+
441
+ Generate GBnF Grammar
442
+
443
+ Generates a GBnF grammar for a given model.
444
+
445
+ :param model: A Pydantic model class to generate the grammar for. Must be a subclass of BaseModel.
446
+ :param processed_models: A set of already processed models to prevent infinite recursion.
447
+ :param created_rules: A dict containing already created rules to prevent duplicates.
448
+ :return: A list of GBnF grammar rules in string format. And two booleans indicating if an extra markdown or triple quoted string is in the grammar.
449
+ Example Usage:
450
+ ```
451
+ model = MyModel
452
+ processed_models = set()
453
+ created_rules = dict()
454
+
455
+ gbnf_grammar = generate_gbnf_grammar(model, processed_models, created_rules)
456
+ ```
457
+ """
458
+ if model in processed_models:
459
+ return []
460
+
461
+ processed_models.add(model)
462
+ model_name = format_model_and_field_name(model.__name__)
463
+
464
+ if not issubclass(model, BaseModel):
465
+ # For non-Pydantic classes, generate model_fields from __annotations__ or __init__
466
+ if hasattr(model, "__annotations__") and model.__annotations__:
467
+ model_fields = {name: (typ, ...) for name, typ in model.__annotations__.items()}
468
+ else:
469
+ init_signature = inspect.signature(model.__init__)
470
+ parameters = init_signature.parameters
471
+ model_fields = {name: (param.annotation, param.default) for name, param in parameters.items() if name != "self"}
472
+ else:
473
+ # For Pydantic models, use model_fields and check for ellipsis (required fields)
474
+ model_fields = model.__annotations__
475
+
476
+ model_rule_parts = []
477
+ nested_rules = []
478
+ has_markdown_code_block = False
479
+ has_triple_quoted_string = False
480
+
481
+ for field_name, field_info in model_fields.items():
482
+ if not issubclass(model, BaseModel):
483
+ field_type, default_value = field_info
484
+ # Check if the field is optional (not required)
485
+ is_optional = (default_value is not inspect.Parameter.empty) and (default_value is not Ellipsis)
486
+ else:
487
+ field_type = field_info
488
+ field_info = model.model_fields[field_name]
489
+ is_optional = field_info.is_required is False and get_origin(field_type) is Optional
490
+ rule_name, additional_rules = generate_gbnf_rule_for_type(
491
+ model_name, format_model_and_field_name(field_name), field_type, is_optional, processed_models, created_rules, field_info
492
+ )
493
+ look_for_markdown_code_block = True if rule_name == "markdown_code_block" else False
494
+ look_for_triple_quoted_string = True if rule_name == "triple_quoted_string" else False
495
+ if not look_for_markdown_code_block and not look_for_triple_quoted_string:
496
+ if rule_name not in created_rules:
497
+ created_rules[rule_name] = additional_rules
498
+ model_rule_parts.append(f' ws "\\"{field_name}\\"" ":" ws {rule_name}') # Adding escaped quotes
499
+ nested_rules.extend(additional_rules)
500
+ else:
501
+ has_triple_quoted_string = look_for_triple_quoted_string
502
+ has_markdown_code_block = look_for_markdown_code_block
503
+
504
+ fields_joined = r' "," "\n" '.join(model_rule_parts)
505
+ model_rule = rf'{model_name} ::= "{{" "\n" {fields_joined} "\n" ws "}}"'
506
+
507
+ has_special_string = False
508
+ if has_triple_quoted_string:
509
+ model_rule += '"\\n" ws "}"'
510
+ model_rule += '"\\n" triple-quoted-string'
511
+ has_special_string = True
512
+ if has_markdown_code_block:
513
+ model_rule += '"\\n" ws "}"'
514
+ model_rule += '"\\n" markdown-code-block'
515
+ has_special_string = True
516
+ all_rules = [model_rule] + nested_rules
517
+
518
+ return all_rules, has_special_string
519
+
520
+
521
+ def generate_gbnf_grammar_from_pydantic_models(
522
+ models: List[Type[BaseModel]],
523
+ outer_object_name: str = None,
524
+ outer_object_content: str = None,
525
+ list_of_outputs: bool = False,
526
+ add_inner_thoughts: bool = False,
527
+ allow_only_inner_thoughts: bool = False,
528
+ ) -> str:
529
+ """
530
+ Generate GBNF Grammar from Pydantic Models.
531
+
532
+ This method takes a list of Pydantic models and uses them to generate a GBNF grammar string. The generated grammar string can be used for parsing and validating data using the generated
533
+ * grammar.
534
+
535
+ Args:
536
+ models (List[Type[BaseModel]]): A list of Pydantic models to generate the grammar from.
537
+ outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
538
+ outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
539
+ list_of_outputs (str, optional): Allows a list of output objects
540
+ add_inner_thoughts (bool): Add inner thoughts field on the top level.
541
+ allow_only_inner_thoughts (bool): Allow inner thoughts without a function call.
542
+ Returns:
543
+ str: The generated GBNF grammar string.
544
+
545
+ Examples:
546
+ models = [UserModel, PostModel]
547
+ grammar = generate_gbnf_grammar_from_pydantic(models)
548
+ print(grammar)
549
+ # Output:
550
+ # root ::= UserModel | PostModel
551
+ # ...
552
+ """
553
+ processed_models = set()
554
+ all_rules = []
555
+ created_rules = {}
556
+ if outer_object_name is None:
557
+ for model in models:
558
+ model_rules, _ = generate_gbnf_grammar(model, processed_models, created_rules)
559
+ all_rules.extend(model_rules)
560
+
561
+ if list_of_outputs:
562
+ root_rule = r'root ::= (" "| "\n") "[" ws grammar-models ("," ws grammar-models)* ws "]"' + "\n"
563
+ else:
564
+ root_rule = r'root ::= (" "| "\n") grammar-models' + "\n"
565
+ root_rule += "grammar-models ::= " + " | ".join([format_model_and_field_name(model.__name__) for model in models])
566
+ all_rules.insert(0, root_rule)
567
+ return "\n".join(all_rules)
568
+ elif outer_object_name is not None:
569
+ if list_of_outputs:
570
+ root_rule = (
571
+ rf'root ::= (" "| "\n") "[" ws {format_model_and_field_name(outer_object_name)} ("," ws {format_model_and_field_name(outer_object_name)})* ws "]"'
572
+ + "\n"
573
+ )
574
+ else:
575
+ root_rule = f"root ::= {format_model_and_field_name(outer_object_name)}\n"
576
+
577
+ if add_inner_thoughts:
578
+ if allow_only_inner_thoughts:
579
+ model_rule = rf'{format_model_and_field_name(outer_object_name)} ::= (" "| "\n") "{{" ws "\"inner_thoughts\"" ":" ws string ("," "\n" ws "\"{outer_object_name}\"" ":" ws grammar-models)?'
580
+ else:
581
+ model_rule = rf'{format_model_and_field_name(outer_object_name)} ::= (" "| "\n") "{{" ws "\"inner_thoughts\"" ":" ws string "," "\n" ws "\"{outer_object_name}\"" ":" ws grammar-models'
582
+ else:
583
+ model_rule = rf'{format_model_and_field_name(outer_object_name)} ::= (" "| "\n") "{{" ws "\"{outer_object_name}\"" ":" ws grammar-models'
584
+
585
+ fields_joined = " | ".join([rf"{format_model_and_field_name(model.__name__)}-grammar-model" for model in models])
586
+
587
+ grammar_model_rules = f"\ngrammar-models ::= {fields_joined}"
588
+ mod_rules = []
589
+ for model in models:
590
+ mod_rule = rf"{format_model_and_field_name(model.__name__)}-grammar-model ::= "
591
+ mod_rule += (
592
+ rf'"\"{model.__name__}\"" "," ws "\"{outer_object_content}\"" ":" ws {format_model_and_field_name(model.__name__)}' + "\n"
593
+ )
594
+ mod_rules.append(mod_rule)
595
+ grammar_model_rules += "\n" + "\n".join(mod_rules)
596
+
597
+ for model in models:
598
+ model_rules, has_special_string = generate_gbnf_grammar(model, processed_models, created_rules)
599
+
600
+ if not has_special_string:
601
+ model_rules[0] += r'"\n" ws "}"'
602
+
603
+ all_rules.extend(model_rules)
604
+
605
+ all_rules.insert(0, root_rule + model_rule + grammar_model_rules)
606
+ return "\n".join(all_rules)
607
+
608
+
609
+ def get_primitive_grammar(grammar):
610
+ """
611
+ Returns the needed GBNF primitive grammar for a given GBNF grammar string.
612
+
613
+ Args:
614
+ grammar (str): The string containing the GBNF grammar.
615
+
616
+ Returns:
617
+ str: GBNF primitive grammar string.
618
+ """
619
+ type_list = []
620
+ if "string-list" in grammar:
621
+ type_list.append(str)
622
+ if "boolean-list" in grammar:
623
+ type_list.append(bool)
624
+ if "integer-list" in grammar:
625
+ type_list.append(int)
626
+ if "float-list" in grammar:
627
+ type_list.append(float)
628
+ additional_grammar = [generate_list_rule(t) for t in type_list]
629
+ primitive_grammar = r"""
630
+ boolean ::= "true" | "false"
631
+ null ::= "null"
632
+ string ::= "\"" (
633
+ [^"\\] |
634
+ "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
635
+ )* "\""
636
+ ws ::= ([ \t\n] ws)?
637
+ float ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
638
+
639
+ integer ::= [0-9]+"""
640
+
641
+ any_block = ""
642
+ if "custom-class-any" in grammar:
643
+ any_block = """
644
+ value ::= object | array | string | number | boolean | null
645
+
646
+ object ::=
647
+ "{" ws (
648
+ string ":" ws value
649
+ ("," ws string ":" ws value)*
650
+ )? "}"
651
+
652
+ array ::=
653
+ "[" ws (
654
+ value
655
+ ("," ws value)*
656
+ )? "]"
657
+
658
+ number ::= integer | float"""
659
+
660
+ markdown_code_block_grammar = ""
661
+ if "markdown-code-block" in grammar:
662
+ markdown_code_block_grammar = r'''
663
+ markdown-code-block ::= opening-triple-ticks markdown-code-block-content closing-triple-ticks
664
+ markdown-code-block-content ::= ( [^`] | "`" [^`] | "`" "`" [^`] )*
665
+ opening-triple-ticks ::= "```" "python" "\n" | "```" "c" "\n" | "```" "cpp" "\n" | "```" "txt" "\n" | "```" "text" "\n" | "```" "json" "\n" | "```" "javascript" "\n" | "```" "css" "\n" | "```" "html" "\n" | "```" "markdown" "\n"
666
+ closing-triple-ticks ::= "```" "\n"'''
667
+
668
+ if "triple-quoted-string" in grammar:
669
+ markdown_code_block_grammar = r"""
670
+ triple-quoted-string ::= triple-quotes triple-quoted-string-content triple-quotes
671
+ triple-quoted-string-content ::= ( [^'] | "'" [^'] | "'" "'" [^'] )*
672
+ triple-quotes ::= "'''" """
673
+ return "\n" + "\n".join(additional_grammar) + any_block + primitive_grammar + markdown_code_block_grammar
674
+
675
+
676
+ def generate_markdown_documentation(
677
+ pydantic_models: List[Type[BaseModel]], model_prefix="Model", fields_prefix="Fields", documentation_with_field_description=True
678
+ ) -> str:
679
+ """
680
+ Generate markdown documentation for a list of Pydantic models.
681
+
682
+ Args:
683
+ pydantic_models (List[Type[BaseModel]]): List of Pydantic model classes.
684
+ model_prefix (str): Prefix for the model section.
685
+ fields_prefix (str): Prefix for the fields section.
686
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
687
+
688
+ Returns:
689
+ str: Generated text documentation.
690
+ """
691
+ documentation = ""
692
+ pyd_models = [(model, True) for model in pydantic_models]
693
+ for model, add_prefix in pyd_models:
694
+ if add_prefix:
695
+ documentation += f"{model_prefix}: {model.__name__}\n"
696
+ else:
697
+ documentation += f"class: {model.__name__}\n"
698
+
699
+ # Handling multi-line model description with proper indentation
700
+
701
+ class_doc = getdoc(model)
702
+ base_class_doc = getdoc(BaseModel)
703
+ class_description = class_doc if class_doc and class_doc != base_class_doc else ""
704
+ if class_description != "":
705
+ documentation += format_multiline_description("description: " + class_description, 1) + "\n"
706
+
707
+ if add_prefix:
708
+ # Indenting the fields section
709
+ documentation += f" {fields_prefix}:\n"
710
+ else:
711
+ documentation += f" attributes:\n"
712
+ if isclass(model) and issubclass(model, BaseModel):
713
+ for name, field_type in model.__annotations__.items():
714
+ # if name == "markdown_code_block":
715
+ # continue
716
+ if isclass(field_type) and issubclass(field_type, BaseModel):
717
+ pyd_models.append((field_type, False))
718
+ if get_origin(field_type) == list:
719
+ element_type = get_args(field_type)[0]
720
+ if isclass(element_type) and issubclass(element_type, BaseModel):
721
+ pyd_models.append((element_type, False))
722
+ if get_origin(field_type) == Union:
723
+ element_types = get_args(field_type)
724
+ for element_type in element_types:
725
+ if isclass(element_type) and issubclass(element_type, BaseModel):
726
+ pyd_models.append((element_type, False))
727
+ documentation += generate_field_markdown(
728
+ name, field_type, model, documentation_with_field_description=documentation_with_field_description
729
+ )
730
+ documentation += "\n"
731
+
732
+ if hasattr(model, "Config") and hasattr(model.Config, "json_schema_extra") and "example" in model.Config.json_schema_extra:
733
+ documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
734
+ json_example = json_dumps(model.Config.json_schema_extra["example"])
735
+ documentation += format_multiline_description(json_example, 2) + "\n"
736
+
737
+ return documentation
738
+
739
+
740
+ def generate_field_markdown(
741
+ field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1, documentation_with_field_description=True
742
+ ) -> str:
743
+ """
744
+ Generate markdown documentation for a Pydantic model field.
745
+
746
+ Args:
747
+ field_name (str): Name of the field.
748
+ field_type (Type[Any]): Type of the field.
749
+ model (Type[BaseModel]): Pydantic model class.
750
+ depth (int): Indentation depth in the documentation.
751
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
752
+
753
+ Returns:
754
+ str: Generated text documentation for the field.
755
+ """
756
+ indent = " " * depth
757
+
758
+ field_info = model.model_fields.get(field_name)
759
+ field_description = field_info.description if field_info and field_info.description else ""
760
+
761
+ if get_origin(field_type) == list:
762
+ element_type = get_args(field_type)[0]
763
+ field_text = f"{indent}{field_name} ({field_type.__name__} of {element_type.__name__})"
764
+ if field_description != "":
765
+ field_text += ": "
766
+ else:
767
+ field_text += "\n"
768
+ elif get_origin(field_type) == Union:
769
+ element_types = get_args(field_type)
770
+ types = []
771
+ for element_type in element_types:
772
+ types.append(element_type.__name__)
773
+ field_text = f"{indent}{field_name} ({' or '.join(types)})"
774
+ if field_description != "":
775
+ field_text += ": "
776
+ else:
777
+ field_text += "\n"
778
+ elif issubclass(field_type, Enum):
779
+ enum_values = [f"'{str(member.value)}'" for member in field_type]
780
+
781
+ field_text = f"{indent}{field_name} ({' or '.join(enum_values)})"
782
+ if field_description != "":
783
+ field_text += ": "
784
+ else:
785
+ field_text += "\n"
786
+ else:
787
+ field_text = f"{indent}{field_name} ({field_type.__name__})"
788
+ if field_description != "":
789
+ field_text += ": "
790
+ else:
791
+ field_text += "\n"
792
+
793
+ if not documentation_with_field_description:
794
+ return field_text
795
+
796
+ if field_description != "":
797
+ field_text += field_description + "\n"
798
+
799
+ # Check for and include field-specific examples if available
800
+ if hasattr(model, "Config") and hasattr(model.Config, "json_schema_extra") and "example" in model.Config.json_schema_extra:
801
+ field_example = model.Config.json_schema_extra["example"].get(field_name)
802
+ if field_example is not None:
803
+ example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
804
+ field_text += f"{indent} Example: {example_text}\n"
805
+
806
+ if isclass(field_type) and issubclass(field_type, BaseModel):
807
+ field_text += f"{indent} details:\n"
808
+ for name, type_ in field_type.__annotations__.items():
809
+ field_text += generate_field_markdown(name, type_, field_type, depth + 2)
810
+
811
+ return field_text
812
+
813
+
814
+ def format_json_example(example: dict, depth: int) -> str:
815
+ """
816
+ Format a JSON example into a readable string with indentation.
817
+
818
+ Args:
819
+ example (dict): JSON example to be formatted.
820
+ depth (int): Indentation depth.
821
+
822
+ Returns:
823
+ str: Formatted JSON example string.
824
+ """
825
+ indent = " " * depth
826
+ formatted_example = "{\n"
827
+ for key, value in example.items():
828
+ value_text = f"'{value}'" if isinstance(value, str) else value
829
+ formatted_example += f"{indent}{key}: {value_text},\n"
830
+ formatted_example = formatted_example.rstrip(",\n") + "\n" + indent + "}"
831
+ return formatted_example
832
+
833
+
834
+ def generate_text_documentation(
835
+ pydantic_models: List[Type[BaseModel]], model_prefix="Model", fields_prefix="Fields", documentation_with_field_description=True
836
+ ) -> str:
837
+ """
838
+ Generate text documentation for a list of Pydantic models.
839
+
840
+ Args:
841
+ pydantic_models (List[Type[BaseModel]]): List of Pydantic model classes.
842
+ model_prefix (str): Prefix for the model section.
843
+ fields_prefix (str): Prefix for the fields section.
844
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
845
+
846
+ Returns:
847
+ str: Generated text documentation.
848
+ """
849
+ documentation = ""
850
+ pyd_models = [(model, True) for model in pydantic_models]
851
+ for model, add_prefix in pyd_models:
852
+ if add_prefix:
853
+ documentation += f"{model_prefix}: {model.__name__}\n"
854
+ else:
855
+ documentation += f"Model: {model.__name__}\n"
856
+
857
+ # Handling multi-line model description with proper indentation
858
+
859
+ class_doc = getdoc(model)
860
+ base_class_doc = getdoc(BaseModel)
861
+ class_description = class_doc if class_doc and class_doc != base_class_doc else ""
862
+ if class_description != "":
863
+ documentation += " Description: "
864
+ documentation += "\n" + format_multiline_description(class_description, 2) + "\n"
865
+
866
+ if isclass(model) and issubclass(model, BaseModel):
867
+ documentation_fields = ""
868
+ for name, field_type in model.__annotations__.items():
869
+ # if name == "markdown_code_block":
870
+ # continue
871
+ if get_origin(field_type) == list:
872
+ element_type = get_args(field_type)[0]
873
+ if isclass(element_type) and issubclass(element_type, BaseModel):
874
+ pyd_models.append((element_type, False))
875
+ if get_origin(field_type) == Union:
876
+ element_types = get_args(field_type)
877
+ for element_type in element_types:
878
+ if isclass(element_type) and issubclass(element_type, BaseModel):
879
+ pyd_models.append((element_type, False))
880
+ documentation_fields += generate_field_text(
881
+ name, field_type, model, documentation_with_field_description=documentation_with_field_description
882
+ )
883
+ if documentation_fields != "":
884
+ if add_prefix:
885
+ documentation += f" {fields_prefix}:\n{documentation_fields}"
886
+ else:
887
+ documentation += f" Fields:\n{documentation_fields}"
888
+ documentation += "\n"
889
+
890
+ if hasattr(model, "Config") and hasattr(model.Config, "json_schema_extra") and "example" in model.Config.json_schema_extra:
891
+ documentation += f" Expected Example Output for {format_model_and_field_name(model.__name__)}:\n"
892
+ json_example = json.dumps(model.Config.json_schema_extra["example"])
893
+ documentation += format_multiline_description(json_example, 2) + "\n"
894
+
895
+ return documentation
896
+
897
+
898
+ def generate_field_text(
899
+ field_name: str, field_type: Type[Any], model: Type[BaseModel], depth=1, documentation_with_field_description=True
900
+ ) -> str:
901
+ """
902
+ Generate text documentation for a Pydantic model field.
903
+
904
+ Args:
905
+ field_name (str): Name of the field.
906
+ field_type (Type[Any]): Type of the field.
907
+ model (Type[BaseModel]): Pydantic model class.
908
+ depth (int): Indentation depth in the documentation.
909
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
910
+
911
+ Returns:
912
+ str: Generated text documentation for the field.
913
+ """
914
+ indent = " " * depth
915
+
916
+ field_info = model.model_fields.get(field_name)
917
+ field_description = field_info.description if field_info and field_info.description else ""
918
+
919
+ if get_origin(field_type) == list:
920
+ element_type = get_args(field_type)[0]
921
+ field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)} of {format_model_and_field_name(element_type.__name__)})"
922
+ if field_description != "":
923
+ field_text += ":\n"
924
+ else:
925
+ field_text += "\n"
926
+ elif get_origin(field_type) == Union:
927
+ element_types = get_args(field_type)
928
+ types = []
929
+ for element_type in element_types:
930
+ types.append(format_model_and_field_name(element_type.__name__))
931
+ field_text = f"{indent}{field_name} ({' or '.join(types)})"
932
+ if field_description != "":
933
+ field_text += ":\n"
934
+ else:
935
+ field_text += "\n"
936
+ else:
937
+ field_text = f"{indent}{field_name} ({format_model_and_field_name(field_type.__name__)})"
938
+ if field_description != "":
939
+ field_text += ":\n"
940
+ else:
941
+ field_text += "\n"
942
+
943
+ if not documentation_with_field_description:
944
+ return field_text
945
+
946
+ if field_description != "":
947
+ field_text += f"{indent} Description: " + field_description + "\n"
948
+
949
+ # Check for and include field-specific examples if available
950
+ if hasattr(model, "Config") and hasattr(model.Config, "json_schema_extra") and "example" in model.Config.json_schema_extra:
951
+ field_example = model.Config.json_schema_extra["example"].get(field_name)
952
+ if field_example is not None:
953
+ example_text = f"'{field_example}'" if isinstance(field_example, str) else field_example
954
+ field_text += f"{indent} Example: {example_text}\n"
955
+
956
+ if isclass(field_type) and issubclass(field_type, BaseModel):
957
+ field_text += f"{indent} Details:\n"
958
+ for name, type_ in field_type.__annotations__.items():
959
+ field_text += generate_field_text(name, type_, field_type, depth + 2)
960
+
961
+ return field_text
962
+
963
+
964
+ def format_multiline_description(description: str, indent_level: int) -> str:
965
+ """
966
+ Format a multiline description with proper indentation.
967
+
968
+ Args:
969
+ description (str): Multiline description.
970
+ indent_level (int): Indentation level.
971
+
972
+ Returns:
973
+ str: Formatted multiline description.
974
+ """
975
+ indent = " " * indent_level
976
+ return indent + description.replace("\n", "\n" + indent)
977
+
978
+
979
+ def save_gbnf_grammar_and_documentation(
980
+ grammar, documentation, grammar_file_path="./grammar.gbnf", documentation_file_path="./grammar_documentation.md"
981
+ ):
982
+ """
983
+ Save GBNF grammar and documentation to specified files.
984
+
985
+ Args:
986
+ grammar (str): GBNF grammar string.
987
+ documentation (str): Documentation string.
988
+ grammar_file_path (str): File path to save the GBNF grammar.
989
+ documentation_file_path (str): File path to save the documentation.
990
+
991
+ Returns:
992
+ None
993
+ """
994
+ try:
995
+ with open(grammar_file_path, "w", encoding="utf-8") as file:
996
+ file.write(grammar + get_primitive_grammar(grammar))
997
+ print(f"Grammar successfully saved to {grammar_file_path}")
998
+ except IOError as e:
999
+ print(f"An error occurred while saving the grammar file: {e}")
1000
+
1001
+ try:
1002
+ with open(documentation_file_path, "w", encoding="utf-8") as file:
1003
+ file.write(documentation)
1004
+ print(f"Documentation successfully saved to {documentation_file_path}")
1005
+ except IOError as e:
1006
+ print(f"An error occurred while saving the documentation file: {e}")
1007
+
1008
+
1009
+ def remove_empty_lines(string):
1010
+ """
1011
+ Remove empty lines from a string.
1012
+
1013
+ Args:
1014
+ string (str): Input string.
1015
+
1016
+ Returns:
1017
+ str: String with empty lines removed.
1018
+ """
1019
+ lines = string.splitlines()
1020
+ non_empty_lines = [line for line in lines if line.strip() != ""]
1021
+ string_no_empty_lines = "\n".join(non_empty_lines)
1022
+ return string_no_empty_lines
1023
+
1024
+
1025
+ def generate_and_save_gbnf_grammar_and_documentation(
1026
+ pydantic_model_list,
1027
+ grammar_file_path="./generated_grammar.gbnf",
1028
+ documentation_file_path="./generated_grammar_documentation.md",
1029
+ outer_object_name: str = None,
1030
+ outer_object_content: str = None,
1031
+ model_prefix: str = "Output Model",
1032
+ fields_prefix: str = "Output Fields",
1033
+ list_of_outputs: bool = False,
1034
+ documentation_with_field_description=True,
1035
+ ):
1036
+ """
1037
+ Generate GBNF grammar and documentation, and save them to specified files.
1038
+
1039
+ Args:
1040
+ pydantic_model_list: List of Pydantic model classes.
1041
+ grammar_file_path (str): File path to save the generated GBNF grammar.
1042
+ documentation_file_path (str): File path to save the generated documentation.
1043
+ outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1044
+ outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1045
+ model_prefix (str): Prefix for the model section in the documentation.
1046
+ fields_prefix (str): Prefix for the fields section in the documentation.
1047
+ list_of_outputs (bool): Whether the output is a list of items.
1048
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
1049
+
1050
+ Returns:
1051
+ None
1052
+ """
1053
+ documentation = generate_markdown_documentation(
1054
+ pydantic_model_list, model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description
1055
+ )
1056
+ grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs)
1057
+ grammar = remove_empty_lines(grammar)
1058
+ save_gbnf_grammar_and_documentation(grammar, documentation, grammar_file_path, documentation_file_path)
1059
+
1060
+
1061
+ def generate_gbnf_grammar_and_documentation(
1062
+ pydantic_model_list,
1063
+ outer_object_name: str = None,
1064
+ outer_object_content: str = None,
1065
+ model_prefix: str = "Output Model",
1066
+ fields_prefix: str = "Output Fields",
1067
+ list_of_outputs: bool = False,
1068
+ add_inner_thoughts: bool = False,
1069
+ allow_only_inner_thoughts: bool = False,
1070
+ documentation_with_field_description=True,
1071
+ ):
1072
+ """
1073
+ Generate GBNF grammar and documentation for a list of Pydantic models.
1074
+
1075
+ Args:
1076
+ pydantic_model_list: List of Pydantic model classes.
1077
+ outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1078
+ outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1079
+ model_prefix (str): Prefix for the model section in the documentation.
1080
+ fields_prefix (str): Prefix for the fields section in the documentation.
1081
+ list_of_outputs (bool): Whether the output is a list of items.
1082
+ add_inner_thoughts (bool): Add inner thoughts field on the top level.
1083
+ allow_only_inner_thoughts (bool): Allow inner thoughts without a function call.
1084
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
1085
+
1086
+ Returns:
1087
+ tuple: GBNF grammar string, documentation string.
1088
+ """
1089
+ documentation = generate_markdown_documentation(
1090
+ copy(pydantic_model_list), model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description
1091
+ )
1092
+ grammar = generate_gbnf_grammar_from_pydantic_models(
1093
+ pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs, add_inner_thoughts, allow_only_inner_thoughts
1094
+ )
1095
+ grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1096
+ return grammar, documentation
1097
+
1098
+
1099
+ def generate_gbnf_grammar_and_documentation_from_dictionaries(
1100
+ dictionaries: List[dict],
1101
+ outer_object_name: str = None,
1102
+ outer_object_content: str = None,
1103
+ model_prefix: str = "Output Model",
1104
+ fields_prefix: str = "Output Fields",
1105
+ list_of_outputs: bool = False,
1106
+ documentation_with_field_description=True,
1107
+ ):
1108
+ """
1109
+ Generate GBNF grammar and documentation from a list of dictionaries.
1110
+
1111
+ Args:
1112
+ dictionaries (List[dict]): List of dictionaries representing Pydantic models.
1113
+ outer_object_name (str): Outer object name for the GBNF grammar. If None, no outer object will be generated. Eg. "function" for function calling.
1114
+ outer_object_content (str): Content for the outer rule in the GBNF grammar. Eg. "function_parameters" or "params" for function calling.
1115
+ model_prefix (str): Prefix for the model section in the documentation.
1116
+ fields_prefix (str): Prefix for the fields section in the documentation.
1117
+ list_of_outputs (bool): Whether the output is a list of items.
1118
+ documentation_with_field_description (bool): Include field descriptions in the documentation.
1119
+
1120
+ Returns:
1121
+ tuple: GBNF grammar string, documentation string.
1122
+ """
1123
+ pydantic_model_list = create_dynamic_models_from_dictionaries(dictionaries)
1124
+ documentation = generate_markdown_documentation(
1125
+ copy(pydantic_model_list), model_prefix, fields_prefix, documentation_with_field_description=documentation_with_field_description
1126
+ )
1127
+ grammar = generate_gbnf_grammar_from_pydantic_models(pydantic_model_list, outer_object_name, outer_object_content, list_of_outputs)
1128
+ grammar = remove_empty_lines(grammar + get_primitive_grammar(grammar))
1129
+ return grammar, documentation
1130
+
1131
+
1132
+ def create_dynamic_model_from_function(func: Callable, add_inner_thoughts: bool = False):
1133
+ """
1134
+ Creates a dynamic Pydantic model from a given function's type hints and adds the function as a 'run' method.
1135
+
1136
+ Args:
1137
+ func (Callable): A function with type hints from which to create the model.
1138
+ add_inner_thoughts: Add an inner thoughts parameter on the params level
1139
+
1140
+ Returns:
1141
+ A dynamic Pydantic model class with the provided function as a 'run' method.
1142
+ """
1143
+
1144
+ # Get the signature of the function
1145
+ sig = inspect.signature(func)
1146
+
1147
+ # Parse the docstring
1148
+ docstring = parse(func.__doc__)
1149
+
1150
+ dynamic_fields = {}
1151
+ param_docs = []
1152
+ if add_inner_thoughts:
1153
+ dynamic_fields["inner_thoughts"] = (str, None)
1154
+ for param in sig.parameters.values():
1155
+ # Exclude 'self' parameter
1156
+ if param.name == "self":
1157
+ continue
1158
+
1159
+ # Assert that the parameter has a type annotation
1160
+ if param.annotation == inspect.Parameter.empty:
1161
+ raise TypeError(f"Parameter '{param.name}' in function '{func.__name__}' lacks a type annotation")
1162
+
1163
+ # Find the parameter's description in the docstring
1164
+ param_doc = next((d for d in docstring.params if d.arg_name == param.name), None)
1165
+
1166
+ # Assert that the parameter has a description
1167
+ if not param_doc or not param_doc.description:
1168
+ raise ValueError(f"Parameter '{param.name}' in function '{func.__name__}' lacks a description in the docstring")
1169
+
1170
+ # Add parameter details to the schema
1171
+ param_doc = next((d for d in docstring.params if d.arg_name == param.name), None)
1172
+ param_docs.append((param.name, param_doc))
1173
+ if param.default == inspect.Parameter.empty:
1174
+ default_value = ...
1175
+ else:
1176
+ default_value = param.default
1177
+
1178
+ dynamic_fields[param.name] = (param.annotation if param.annotation != inspect.Parameter.empty else str, default_value)
1179
+ # Creating the dynamic model
1180
+ dynamic_model = create_model(f"{func.__name__}", **dynamic_fields)
1181
+ if add_inner_thoughts:
1182
+ dynamic_model.model_fields["inner_thoughts"].description = "Deep inner monologue private to you only."
1183
+ for param_doc in param_docs:
1184
+ dynamic_model.model_fields[param_doc[0]].description = param_doc[1].description
1185
+
1186
+ dynamic_model.__doc__ = docstring.short_description
1187
+
1188
+ def run_method_wrapper(self):
1189
+ func_args = {name: getattr(self, name) for name, _ in dynamic_fields.items()}
1190
+ return func(**func_args)
1191
+
1192
+ # Adding the wrapped function as a 'run' method
1193
+ setattr(dynamic_model, "run", run_method_wrapper)
1194
+ return dynamic_model
1195
+
1196
+
1197
+ def add_run_method_to_dynamic_model(model: Type[BaseModel], func: Callable):
1198
+ """
1199
+ Add a 'run' method to a dynamic Pydantic model, using the provided function.
1200
+
1201
+ Args:
1202
+ model (Type[BaseModel]): Dynamic Pydantic model class.
1203
+ func (Callable): Function to be added as a 'run' method to the model.
1204
+
1205
+ Returns:
1206
+ Type[BaseModel]: Pydantic model class with the added 'run' method.
1207
+ """
1208
+
1209
+ def run_method_wrapper(self):
1210
+ func_args = {name: getattr(self, name) for name in model.model_fields}
1211
+ return func(**func_args)
1212
+
1213
+ # Adding the wrapped function as a 'run' method
1214
+ setattr(model, "run", run_method_wrapper)
1215
+
1216
+ return model
1217
+
1218
+
1219
+ def create_dynamic_models_from_dictionaries(dictionaries: List[dict]):
1220
+ """
1221
+ Create a list of dynamic Pydantic model classes from a list of dictionaries.
1222
+
1223
+ Args:
1224
+ dictionaries (List[dict]): List of dictionaries representing model structures.
1225
+
1226
+ Returns:
1227
+ List[Type[BaseModel]]: List of generated dynamic Pydantic model classes.
1228
+ """
1229
+ dynamic_models = []
1230
+ for func in dictionaries:
1231
+ model_name = format_model_and_field_name(func.get("name", ""))
1232
+ dyn_model = convert_dictionary_to_pydantic_model(func, model_name)
1233
+ dynamic_models.append(dyn_model)
1234
+ return dynamic_models
1235
+
1236
+
1237
+ def map_grammar_names_to_pydantic_model_class(pydantic_model_list):
1238
+ output = {}
1239
+ for model in pydantic_model_list:
1240
+ output[format_model_and_field_name(model.__name__)] = model
1241
+
1242
+ return output
1243
+
1244
+
1245
+ from enum import Enum
1246
+
1247
+
1248
+ def json_schema_to_python_types(schema):
1249
+ type_map = {
1250
+ "any": Any,
1251
+ "string": str,
1252
+ "number": float,
1253
+ "integer": int,
1254
+ "boolean": bool,
1255
+ "array": list,
1256
+ }
1257
+ return type_map[schema]
1258
+
1259
+
1260
+ def list_to_enum(enum_name, values):
1261
+ return Enum(enum_name, {value: value for value in values})
1262
+
1263
+
1264
+ def convert_dictionary_to_pydantic_model(dictionary: dict, model_name: str = "CustomModel") -> Type[BaseModel]:
1265
+ """
1266
+ Convert a dictionary to a Pydantic model class.
1267
+
1268
+ Args:
1269
+ dictionary (dict): Dictionary representing the model structure.
1270
+ model_name (str): Name of the generated Pydantic model.
1271
+
1272
+ Returns:
1273
+ Type[BaseModel]: Generated Pydantic model class.
1274
+ """
1275
+ fields = {}
1276
+
1277
+ if "properties" in dictionary:
1278
+ for field_name, field_data in dictionary.get("properties", {}).items():
1279
+ if field_data == "object":
1280
+ submodel = convert_dictionary_to_pydantic_model(dictionary, f"{model_name}_{field_name}")
1281
+ fields[field_name] = (submodel, ...)
1282
+ else:
1283
+ field_type = field_data.get("type", "str")
1284
+
1285
+ if field_data.get("enum", []):
1286
+ fields[field_name] = (list_to_enum(field_name, field_data.get("enum", [])), ...)
1287
+ elif field_type == "array":
1288
+ items = field_data.get("items", {})
1289
+ if items != {}:
1290
+ array = {"properties": items}
1291
+ array_type = convert_dictionary_to_pydantic_model(array, f"{model_name}_{field_name}_items")
1292
+ fields[field_name] = (List[array_type], ...)
1293
+ else:
1294
+ fields[field_name] = (list, ...)
1295
+ elif field_type == "object":
1296
+ submodel = convert_dictionary_to_pydantic_model(field_data, f"{model_name}_{field_name}")
1297
+ fields[field_name] = (submodel, ...)
1298
+ elif field_type == "required":
1299
+ required = field_data.get("enum", [])
1300
+ for key, field in fields.items():
1301
+ if key not in required:
1302
+ fields[key] = (Optional[fields[key][0]], ...)
1303
+ else:
1304
+ field_type = json_schema_to_python_types(field_type)
1305
+ fields[field_name] = (field_type, ...)
1306
+ if "function" in dictionary:
1307
+ for field_name, field_data in dictionary.get("function", {}).items():
1308
+ if field_name == "name":
1309
+ model_name = field_data
1310
+ elif field_name == "description":
1311
+ fields["__doc__"] = field_data
1312
+ elif field_name == "parameters":
1313
+ return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1314
+
1315
+ if "parameters" in dictionary:
1316
+ field_data = {"function": dictionary}
1317
+ return convert_dictionary_to_pydantic_model(field_data, f"{model_name}")
1318
+ if "required" in dictionary:
1319
+ required = dictionary.get("required", [])
1320
+ for key, field in fields.items():
1321
+ if key not in required:
1322
+ fields[key] = (Optional[fields[key][0]], ...)
1323
+ custom_model = create_model(model_name, **fields)
1324
+ return custom_model