llama-stack 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. llama_stack/core/library_client.py +80 -3
  2. llama_stack/core/routing_tables/common.py +11 -0
  3. llama_stack/core/routing_tables/vector_stores.py +4 -0
  4. llama_stack/core/stack.py +16 -1
  5. llama_stack/core/storage/kvstore/kvstore.py +11 -0
  6. llama_stack/core/storage/kvstore/mongodb/mongodb.py +5 -0
  7. llama_stack/core/storage/kvstore/postgres/postgres.py +8 -0
  8. llama_stack/core/storage/kvstore/redis/redis.py +5 -0
  9. llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py +8 -0
  10. llama_stack/core/storage/sqlstore/sqlstore.py +8 -0
  11. llama_stack/distributions/dell/doc_template.md +209 -0
  12. llama_stack/distributions/meta-reference-gpu/doc_template.md +119 -0
  13. llama_stack/distributions/nvidia/doc_template.md +170 -0
  14. llama_stack/distributions/oci/doc_template.md +140 -0
  15. llama_stack/models/llama/llama3/dog.jpg +0 -0
  16. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  17. llama_stack/models/llama/resources/dog.jpg +0 -0
  18. llama_stack/models/llama/resources/pasta.jpeg +0 -0
  19. llama_stack/models/llama/resources/small_dog.jpg +0 -0
  20. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +184 -33
  21. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +4 -0
  22. llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +9 -1
  23. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
  24. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
  25. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
  26. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
  27. llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
  28. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
  29. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  30. llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
  31. llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
  32. llama_stack/providers/remote/eval/nvidia/README.md +134 -0
  33. llama_stack/providers/remote/files/s3/README.md +266 -0
  34. llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
  35. llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
  36. llama_stack/providers/remote/safety/nvidia/README.md +78 -0
  37. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +13 -1
  38. llama_stack/providers/utils/inference/embedding_mixin.py +20 -16
  39. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +33 -0
  40. llama_stack/providers/utils/responses/responses_store.py +34 -0
  41. llama_stack/providers/utils/tools/mcp.py +258 -16
  42. {llama_stack-0.4.2.dist-info → llama_stack-0.4.4.dist-info}/METADATA +2 -2
  43. {llama_stack-0.4.2.dist-info → llama_stack-0.4.4.dist-info}/RECORD +47 -158
  44. {llama_stack-0.4.2.dist-info → llama_stack-0.4.4.dist-info}/WHEEL +1 -1
  45. llama_stack-0.4.4.dist-info/top_level.txt +1 -0
  46. llama_stack-0.4.2.dist-info/top_level.txt +0 -2
  47. llama_stack_api/__init__.py +0 -945
  48. llama_stack_api/admin/__init__.py +0 -45
  49. llama_stack_api/admin/api.py +0 -72
  50. llama_stack_api/admin/fastapi_routes.py +0 -117
  51. llama_stack_api/admin/models.py +0 -113
  52. llama_stack_api/agents.py +0 -173
  53. llama_stack_api/batches/__init__.py +0 -40
  54. llama_stack_api/batches/api.py +0 -53
  55. llama_stack_api/batches/fastapi_routes.py +0 -113
  56. llama_stack_api/batches/models.py +0 -78
  57. llama_stack_api/benchmarks/__init__.py +0 -43
  58. llama_stack_api/benchmarks/api.py +0 -39
  59. llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  60. llama_stack_api/benchmarks/models.py +0 -109
  61. llama_stack_api/common/__init__.py +0 -5
  62. llama_stack_api/common/content_types.py +0 -101
  63. llama_stack_api/common/errors.py +0 -95
  64. llama_stack_api/common/job_types.py +0 -38
  65. llama_stack_api/common/responses.py +0 -77
  66. llama_stack_api/common/training_types.py +0 -47
  67. llama_stack_api/common/type_system.py +0 -146
  68. llama_stack_api/connectors.py +0 -146
  69. llama_stack_api/conversations.py +0 -270
  70. llama_stack_api/datasetio.py +0 -55
  71. llama_stack_api/datasets/__init__.py +0 -61
  72. llama_stack_api/datasets/api.py +0 -35
  73. llama_stack_api/datasets/fastapi_routes.py +0 -104
  74. llama_stack_api/datasets/models.py +0 -152
  75. llama_stack_api/datatypes.py +0 -373
  76. llama_stack_api/eval.py +0 -137
  77. llama_stack_api/file_processors/__init__.py +0 -27
  78. llama_stack_api/file_processors/api.py +0 -64
  79. llama_stack_api/file_processors/fastapi_routes.py +0 -78
  80. llama_stack_api/file_processors/models.py +0 -42
  81. llama_stack_api/files/__init__.py +0 -35
  82. llama_stack_api/files/api.py +0 -51
  83. llama_stack_api/files/fastapi_routes.py +0 -124
  84. llama_stack_api/files/models.py +0 -107
  85. llama_stack_api/inference.py +0 -1169
  86. llama_stack_api/inspect_api/__init__.py +0 -37
  87. llama_stack_api/inspect_api/api.py +0 -25
  88. llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  89. llama_stack_api/inspect_api/models.py +0 -28
  90. llama_stack_api/internal/__init__.py +0 -9
  91. llama_stack_api/internal/kvstore.py +0 -26
  92. llama_stack_api/internal/sqlstore.py +0 -79
  93. llama_stack_api/llama_stack_api/__init__.py +0 -945
  94. llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
  95. llama_stack_api/llama_stack_api/admin/api.py +0 -72
  96. llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
  97. llama_stack_api/llama_stack_api/admin/models.py +0 -113
  98. llama_stack_api/llama_stack_api/agents.py +0 -173
  99. llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
  100. llama_stack_api/llama_stack_api/batches/api.py +0 -53
  101. llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
  102. llama_stack_api/llama_stack_api/batches/models.py +0 -78
  103. llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
  104. llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
  105. llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
  106. llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
  107. llama_stack_api/llama_stack_api/common/__init__.py +0 -5
  108. llama_stack_api/llama_stack_api/common/content_types.py +0 -101
  109. llama_stack_api/llama_stack_api/common/errors.py +0 -95
  110. llama_stack_api/llama_stack_api/common/job_types.py +0 -38
  111. llama_stack_api/llama_stack_api/common/responses.py +0 -77
  112. llama_stack_api/llama_stack_api/common/training_types.py +0 -47
  113. llama_stack_api/llama_stack_api/common/type_system.py +0 -146
  114. llama_stack_api/llama_stack_api/connectors.py +0 -146
  115. llama_stack_api/llama_stack_api/conversations.py +0 -270
  116. llama_stack_api/llama_stack_api/datasetio.py +0 -55
  117. llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
  118. llama_stack_api/llama_stack_api/datasets/api.py +0 -35
  119. llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
  120. llama_stack_api/llama_stack_api/datasets/models.py +0 -152
  121. llama_stack_api/llama_stack_api/datatypes.py +0 -373
  122. llama_stack_api/llama_stack_api/eval.py +0 -137
  123. llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
  124. llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
  125. llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
  126. llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
  127. llama_stack_api/llama_stack_api/files/__init__.py +0 -35
  128. llama_stack_api/llama_stack_api/files/api.py +0 -51
  129. llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
  130. llama_stack_api/llama_stack_api/files/models.py +0 -107
  131. llama_stack_api/llama_stack_api/inference.py +0 -1169
  132. llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
  133. llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
  134. llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
  135. llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
  136. llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
  137. llama_stack_api/llama_stack_api/internal/kvstore.py +0 -26
  138. llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -79
  139. llama_stack_api/llama_stack_api/models.py +0 -171
  140. llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
  141. llama_stack_api/llama_stack_api/post_training.py +0 -370
  142. llama_stack_api/llama_stack_api/prompts.py +0 -203
  143. llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
  144. llama_stack_api/llama_stack_api/providers/api.py +0 -16
  145. llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
  146. llama_stack_api/llama_stack_api/providers/models.py +0 -24
  147. llama_stack_api/llama_stack_api/py.typed +0 -0
  148. llama_stack_api/llama_stack_api/rag_tool.py +0 -168
  149. llama_stack_api/llama_stack_api/resource.py +0 -37
  150. llama_stack_api/llama_stack_api/router_utils.py +0 -160
  151. llama_stack_api/llama_stack_api/safety.py +0 -132
  152. llama_stack_api/llama_stack_api/schema_utils.py +0 -208
  153. llama_stack_api/llama_stack_api/scoring.py +0 -93
  154. llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
  155. llama_stack_api/llama_stack_api/shields.py +0 -93
  156. llama_stack_api/llama_stack_api/tools.py +0 -226
  157. llama_stack_api/llama_stack_api/vector_io.py +0 -941
  158. llama_stack_api/llama_stack_api/vector_stores.py +0 -51
  159. llama_stack_api/llama_stack_api/version.py +0 -9
  160. llama_stack_api/models.py +0 -171
  161. llama_stack_api/openai_responses.py +0 -1468
  162. llama_stack_api/post_training.py +0 -370
  163. llama_stack_api/prompts.py +0 -203
  164. llama_stack_api/providers/__init__.py +0 -33
  165. llama_stack_api/providers/api.py +0 -16
  166. llama_stack_api/providers/fastapi_routes.py +0 -57
  167. llama_stack_api/providers/models.py +0 -24
  168. llama_stack_api/py.typed +0 -0
  169. llama_stack_api/rag_tool.py +0 -168
  170. llama_stack_api/resource.py +0 -37
  171. llama_stack_api/router_utils.py +0 -160
  172. llama_stack_api/safety.py +0 -132
  173. llama_stack_api/schema_utils.py +0 -208
  174. llama_stack_api/scoring.py +0 -93
  175. llama_stack_api/scoring_functions.py +0 -211
  176. llama_stack_api/shields.py +0 -93
  177. llama_stack_api/tools.py +0 -226
  178. llama_stack_api/vector_io.py +0 -941
  179. llama_stack_api/vector_stores.py +0 -51
  180. llama_stack_api/version.py +0 -9
  181. {llama_stack-0.4.2.dist-info → llama_stack-0.4.4.dist-info}/entry_points.txt +0 -0
  182. {llama_stack-0.4.2.dist-info → llama_stack-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,189 @@
1
+ import Foundation
2
+
3
+ import LLaMARunner
4
+ import LlamaStackClient
5
+
6
+ class RunnerHolder: ObservableObject {
7
+ var runner: Runner?
8
+ }
9
+
10
+ public class LocalInference: Inference {
11
+ private var runnerHolder = RunnerHolder()
12
+ private let runnerQueue: DispatchQueue
13
+
14
+ public init (queue: DispatchQueue) {
15
+ runnerQueue = queue
16
+ }
17
+
18
+ public func loadModel(modelPath: String, tokenizerPath: String, completion: @escaping (Result<Void, Error>) -> Void) {
19
+ runnerHolder.runner = runnerHolder.runner ?? Runner(
20
+ modelPath: modelPath,
21
+ tokenizerPath: tokenizerPath
22
+ )
23
+
24
+
25
+ runnerQueue.async {
26
+ let runner = self.runnerHolder.runner
27
+ do {
28
+ try runner!.load()
29
+ completion(.success(()))
30
+ } catch let loadError {
31
+ print("error: " + loadError.localizedDescription)
32
+ completion(.failure(loadError))
33
+ }
34
+ }
35
+ }
36
+
37
+ public func stop() {
38
+ runnerHolder.runner?.stop()
39
+ }
40
+
41
+ public func chatCompletion(request: Components.Schemas.ChatCompletionRequest) -> AsyncStream<Components.Schemas.ChatCompletionResponseStreamChunk> {
42
+ return AsyncStream { continuation in
43
+ let workItem = DispatchWorkItem {
44
+ do {
45
+ var tokens: [String] = []
46
+
47
+ let prompt = try encodeDialogPrompt(messages: prepareMessages(request: request))
48
+ var stopReason: Components.Schemas.CompletionMessage.stop_reasonPayload? = nil
49
+ var buffer = ""
50
+ var ipython = false
51
+ var echoDropped = false
52
+
53
+ try self.runnerHolder.runner?.generate(prompt, sequenceLength: 4096) { token in
54
+ buffer += token
55
+
56
+ // HACK: Workaround until LlamaRunner exposes echo param
57
+ if (!echoDropped) {
58
+ if (buffer.hasPrefix(prompt)) {
59
+ buffer = String(buffer.dropFirst(prompt.count))
60
+ echoDropped = true
61
+ }
62
+ return
63
+ }
64
+
65
+ tokens.append(token)
66
+
67
+ if !ipython && (buffer.starts(with: "<|python_tag|>") || buffer.starts(with: "[") ) {
68
+ ipython = true
69
+ continuation.yield(
70
+ Components.Schemas.ChatCompletionResponseStreamChunk(
71
+ event: Components.Schemas.ChatCompletionResponseEvent(
72
+ event_type: .progress,
73
+ delta: .tool_call(Components.Schemas.ToolCallDelta(
74
+ _type: Components.Schemas.ToolCallDelta._typePayload.tool_call,
75
+ tool_call: .case1(""),
76
+ parse_status: Components.Schemas.ToolCallDelta.parse_statusPayload.started
77
+ )
78
+ )
79
+ )
80
+ )
81
+ )
82
+
83
+ if (buffer.starts(with: "<|python_tag|>")) {
84
+ buffer = String(buffer.dropFirst("<|python_tag|>".count))
85
+ }
86
+ }
87
+
88
+ // TODO: Non-streaming lobprobs
89
+
90
+ var text = ""
91
+ if token == "<|eot_id|>" {
92
+ stopReason = Components.Schemas.CompletionMessage.stop_reasonPayload.end_of_turn
93
+ } else if token == "<|eom_id|>" {
94
+ stopReason = Components.Schemas.CompletionMessage.stop_reasonPayload.end_of_message
95
+ } else {
96
+ text = token
97
+ }
98
+
99
+ var delta: Components.Schemas.ContentDelta
100
+ if ipython {
101
+ delta = .tool_call(Components.Schemas.ToolCallDelta(
102
+ _type: .tool_call,
103
+ tool_call: .case1(text),
104
+ parse_status: .in_progress
105
+ ))
106
+ } else {
107
+ delta = .text(Components.Schemas.TextDelta(
108
+ _type: Components.Schemas.TextDelta._typePayload.text,
109
+ text: text
110
+ )
111
+ )
112
+ }
113
+
114
+ if stopReason == nil {
115
+ continuation.yield(
116
+ Components.Schemas.ChatCompletionResponseStreamChunk(
117
+ event: Components.Schemas.ChatCompletionResponseEvent(
118
+ event_type: .progress,
119
+ delta: delta
120
+ )
121
+ )
122
+ )
123
+ }
124
+ }
125
+
126
+ if stopReason == nil {
127
+ stopReason = Components.Schemas.CompletionMessage.stop_reasonPayload.out_of_tokens
128
+ }
129
+
130
+ let message = decodeAssistantMessage(tokens: tokens.joined(), stopReason: stopReason!)
131
+ // TODO: non-streaming support
132
+
133
+ let didParseToolCalls = message.tool_calls?.count ?? 0 > 0
134
+ if ipython && !didParseToolCalls {
135
+ continuation.yield(
136
+ Components.Schemas.ChatCompletionResponseStreamChunk(
137
+ event: Components.Schemas.ChatCompletionResponseEvent(
138
+ event_type: .progress,
139
+ delta: .tool_call(Components.Schemas.ToolCallDelta(
140
+ _type: Components.Schemas.ToolCallDelta._typePayload.tool_call,
141
+ tool_call: .case1(""),
142
+ parse_status: Components.Schemas.ToolCallDelta.parse_statusPayload.failed
143
+ )
144
+ )
145
+ )
146
+ // TODO: stopReason
147
+ )
148
+ )
149
+ }
150
+
151
+ for toolCall in message.tool_calls! {
152
+ continuation.yield(
153
+ Components.Schemas.ChatCompletionResponseStreamChunk(
154
+ event: Components.Schemas.ChatCompletionResponseEvent(
155
+ event_type: .progress,
156
+ delta: .tool_call(Components.Schemas.ToolCallDelta(
157
+ _type: Components.Schemas.ToolCallDelta._typePayload.tool_call,
158
+ tool_call: Components.Schemas.ToolCallDelta.tool_callPayload.ToolCall(toolCall),
159
+ parse_status: Components.Schemas.ToolCallDelta.parse_statusPayload.succeeded
160
+ )
161
+ )
162
+ )
163
+ // TODO: stopReason
164
+ )
165
+ )
166
+ }
167
+
168
+ continuation.yield(
169
+ Components.Schemas.ChatCompletionResponseStreamChunk(
170
+ event: Components.Schemas.ChatCompletionResponseEvent(
171
+ event_type: .complete,
172
+ delta: .text(Components.Schemas.TextDelta(
173
+ _type: Components.Schemas.TextDelta._typePayload.text,
174
+ text: ""
175
+ )
176
+ )
177
+ )
178
+ // TODO: stopReason
179
+ )
180
+ )
181
+ }
182
+ catch (let error) {
183
+ print("Inference error: " + error.localizedDescription)
184
+ }
185
+ }
186
+ runnerQueue.async(execute: workItem)
187
+ }
188
+ }
189
+ }
@@ -0,0 +1,238 @@
1
+ import Foundation
2
+
3
+ import LlamaStackClient
4
+
5
+ func encodeHeader(role: String) -> String {
6
+ return "<|start_header_id|>\(role)<|end_header_id|>\n\n"
7
+ }
8
+
9
+ func encodeDialogPrompt(messages: [Components.Schemas.Message]) -> String {
10
+ var prompt = ""
11
+
12
+ prompt.append("<|begin_of_text|>")
13
+ for message in messages {
14
+ let msg = encodeMessage(message: message)
15
+ prompt += msg
16
+ }
17
+
18
+ prompt.append(encodeHeader(role: "assistant"))
19
+
20
+ return prompt
21
+ }
22
+
23
+ func getRole(message: Components.Schemas.Message) -> String {
24
+ switch (message) {
25
+ case .user(let m):
26
+ return m.role.rawValue
27
+ case .system(let m):
28
+ return m.role.rawValue
29
+ case .tool(let m):
30
+ return m.role.rawValue
31
+ case .assistant(let m):
32
+ return m.role.rawValue
33
+ }
34
+ }
35
+
36
+ func encodeMessage(message: Components.Schemas.Message) -> String {
37
+ var prompt = encodeHeader(role: getRole(message: message))
38
+
39
+ switch (message) {
40
+ case .assistant(let m):
41
+ if (m.tool_calls?.count ?? 0 > 0) {
42
+ prompt += "<|python_tag|>"
43
+ }
44
+ default:0
45
+ break
46
+ }
47
+
48
+ func _processContent(_ content: Any) -> String {
49
+ func _process(_ c: Any) {
50
+ if let str = c as? String {
51
+ prompt += str
52
+ }
53
+ }
54
+
55
+ if let str = content as? String {
56
+ _process(str)
57
+ } else if let list = content as? [Any] {
58
+ for c in list {
59
+ _process(c)
60
+ }
61
+ }
62
+
63
+ return ""
64
+ }
65
+
66
+ switch (message) {
67
+ case .user(let m):
68
+ prompt += _processContent(m.content)
69
+ case .system(let m):
70
+ prompt += _processContent(m.content)
71
+ case .tool(let m):
72
+ prompt += _processContent(m.content)
73
+ case .assistant(let m):
74
+ prompt += _processContent(m.content)
75
+ }
76
+
77
+ var eom = false
78
+
79
+ switch (message) {
80
+ case .user(let m):
81
+ switch (m.content) {
82
+ case .case1(let c):
83
+ prompt += _processContent(c)
84
+ case .InterleavedContentItem(let c):
85
+ prompt += _processContent(c)
86
+ case .case3(let c):
87
+ prompt += _processContent(c)
88
+ }
89
+ case .assistant(let m):
90
+ // TODO: Support encoding past tool call history
91
+ // for t in m.tool_calls {
92
+ // _processContent(t.)
93
+ //}
94
+ eom = m.stop_reason == Components.Schemas.CompletionMessage.stop_reasonPayload.end_of_message
95
+ case .system(_):
96
+ break
97
+ case .tool(_):
98
+ break
99
+ }
100
+
101
+ if (eom) {
102
+ prompt += "<|eom_id|>"
103
+ } else {
104
+ prompt += "<|eot_id|>"
105
+ }
106
+
107
+ return prompt
108
+ }
109
+
110
+ func prepareMessages(request: Components.Schemas.ChatCompletionRequest) throws -> [Components.Schemas.Message] {
111
+ var existingMessages = request.messages
112
+ var existingSystemMessage: Components.Schemas.Message?
113
+ // TODO: Existing system message
114
+
115
+ var messages: [Components.Schemas.Message] = []
116
+
117
+ let defaultGen = SystemDefaultGenerator()
118
+ let defaultTemplate = defaultGen.gen()
119
+
120
+ var sysContent = ""
121
+
122
+ // TODO: Built-in tools
123
+
124
+ sysContent += try defaultTemplate.render()
125
+
126
+ messages.append(.system(Components.Schemas.SystemMessage(
127
+ role: .system,
128
+ content: .case1(sysContent)
129
+ ))
130
+ )
131
+
132
+ if request.tools?.isEmpty == false {
133
+ // TODO: Separate built-ins and custom tools (right now everything treated as custom)
134
+ let toolGen = FunctionTagCustomToolGenerator()
135
+ let toolTemplate = try toolGen.gen(customTools: request.tools!)
136
+ let tools = try toolTemplate.render()
137
+ messages.append(.user(Components.Schemas.UserMessage(
138
+ role: .user,
139
+ content: .case1(tools))
140
+ ))
141
+ }
142
+
143
+ messages.append(contentsOf: existingMessages)
144
+
145
+ return messages
146
+ }
147
+
148
+ struct FunctionCall {
149
+ let name: String
150
+ let params: [String: Any]
151
+ }
152
+
153
+ public func maybeExtractCustomToolCalls(input: String) -> [Components.Schemas.ToolCall] {
154
+ guard input.hasPrefix("[") && input.hasSuffix("]") else {
155
+ return []
156
+ }
157
+
158
+ do {
159
+ let trimmed = input.trimmingCharacters(in: CharacterSet(charactersIn: "[]"))
160
+ let calls = trimmed.components(separatedBy: "),").map { $0.hasSuffix(")") ? $0 : $0 + ")" }
161
+
162
+ var result: [Components.Schemas.ToolCall] = []
163
+
164
+ for call in calls {
165
+ guard let nameEndIndex = call.firstIndex(of: "("),
166
+ let paramsStartIndex = call.firstIndex(of: "{"),
167
+ let paramsEndIndex = call.lastIndex(of: "}") else {
168
+ return []
169
+ }
170
+
171
+ let name = String(call[..<nameEndIndex]).trimmingCharacters(in: .whitespacesAndNewlines)
172
+ let paramsString = String(call[paramsStartIndex...paramsEndIndex])
173
+
174
+ guard let data = paramsString.data(using: .utf8),
175
+ let params = try? JSONSerialization.jsonObject(with: data, options: []) as? [String: Any] else {
176
+ return []
177
+ }
178
+
179
+ var props: [String : Components.Schemas.ToolCall.argumentsPayload.additionalPropertiesPayload] = [:]
180
+ for (param_name, param) in params {
181
+ switch (param) {
182
+ case let value as String:
183
+ props[param_name] = .case1(value)
184
+ case let value as Int:
185
+ props[param_name] = .case2(value)
186
+ case let value as Double:
187
+ props[param_name] = .case3(value)
188
+ case let value as Bool:
189
+ props[param_name] = .case4(value)
190
+ default:
191
+ return []
192
+ }
193
+ }
194
+
195
+ result.append(
196
+ Components.Schemas.ToolCall(
197
+ call_id: UUID().uuidString,
198
+ tool_name: .case2(name), // custom_tool
199
+ arguments: .init(additionalProperties: props)
200
+ )
201
+ )
202
+ }
203
+
204
+ return result.isEmpty ? [] : result
205
+ } catch {
206
+ return []
207
+ }
208
+ }
209
+
210
+ func decodeAssistantMessage(tokens: String, stopReason: Components.Schemas.CompletionMessage.stop_reasonPayload) -> Components.Schemas.CompletionMessage {
211
+ var content = tokens
212
+
213
+ let roles = ["user", "system", "assistant"]
214
+ for role in roles {
215
+ let headerStr = encodeHeader(role: role)
216
+ if content.hasPrefix(headerStr) {
217
+ content = String(content.dropFirst(encodeHeader(role: role).count))
218
+ }
219
+ }
220
+
221
+ if content.hasPrefix("<|python_tag|>") {
222
+ content = String(content.dropFirst("<|python_tag|>".count))
223
+ }
224
+
225
+
226
+ if content.hasSuffix("<|eot_id|>") {
227
+ content = String(content.dropLast("<|eot_id|>".count))
228
+ } else {
229
+ content = String(content.dropLast("<|eom_id|>".count))
230
+ }
231
+
232
+ return Components.Schemas.CompletionMessage(
233
+ role: .assistant,
234
+ content: .case1(content),
235
+ stop_reason: stopReason,
236
+ tool_calls: maybeExtractCustomToolCalls(input: content)
237
+ )
238
+ }
@@ -0,0 +1,12 @@
1
+ import Foundation
2
+ import Stencil
3
+
4
+ public struct PromptTemplate {
5
+ let template: String
6
+ let data: [String: Any]
7
+
8
+ public func render() throws -> String {
9
+ let template = Template(templateString: self.template)
10
+ return try template.render(self.data)
11
+ }
12
+ }
@@ -0,0 +1,89 @@
1
+ import Foundation
2
+
3
+ import LlamaStackClient
4
+
5
+ func convertToNativeSwiftType(_ value: Any) -> Any {
6
+ switch value {
7
+ case let number as NSNumber:
8
+ if CFGetTypeID(number) == CFBooleanGetTypeID() {
9
+ return number.boolValue
10
+ }
11
+ if floor(number.doubleValue) == number.doubleValue {
12
+ return number.intValue
13
+ }
14
+ return number.doubleValue
15
+ case let string as String:
16
+ return string
17
+ case let array as [Any]:
18
+ return array.map(convertToNativeSwiftType)
19
+ case let dict as [String: Any]:
20
+ return dict.mapValues(convertToNativeSwiftType)
21
+ case is NSNull:
22
+ return NSNull()
23
+ default:
24
+ return value
25
+ }
26
+ }
27
+
28
+ public class SystemDefaultGenerator {
29
+ public init() {}
30
+
31
+ public func gen() -> PromptTemplate {
32
+ let templateStr = """
33
+ Cutting Knowledge Date: December 2023
34
+ Today Date: {{ today }}
35
+ """
36
+
37
+ let dateFormatter = DateFormatter()
38
+ dateFormatter.dateFormat = "dd MMMM yyyy"
39
+
40
+ return PromptTemplate(
41
+ template: templateStr,
42
+ data: ["today": dateFormatter.string(from: Date())]
43
+ )
44
+ }
45
+ }
46
+
47
+
48
+ public class FunctionTagCustomToolGenerator {
49
+ public init() {}
50
+
51
+ public func gen(customTools: [Components.Schemas.ToolDefinition]) throws -> PromptTemplate {
52
+ // TODO: required params
53
+ // TODO: {{#unless @last}},{{/unless}}
54
+
55
+ let templateStr = """
56
+ You are an expert in composing functions. You are given a question and a set of possible functions.
57
+ Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
58
+ If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
59
+ also point it out. You should only return the function call in tools call sections.
60
+
61
+ If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
62
+ You SHOULD NOT include any other text in the response.
63
+
64
+ Here is a list of functions in JSON format that you can invoke.
65
+
66
+ [
67
+ {% for t in custom_tools %}
68
+ {
69
+ "name": "{{t.tool_name}}",
70
+ "description": "{{t.description}}",
71
+ "input_schema": { {{t.input_schema}} }
72
+ }
73
+
74
+ {{/let}}
75
+ {% endfor -%}
76
+ ]
77
+ """
78
+
79
+ let encoder = JSONEncoder()
80
+ return PromptTemplate(
81
+ template: templateStr,
82
+ data: ["custom_tools": try customTools.map {
83
+ let data = try encoder.encode($0)
84
+ let obj = try JSONSerialization.jsonObject(with: data)
85
+ return convertToNativeSwiftType(obj)
86
+ }]
87
+ )
88
+ }
89
+ }