lex-llm 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGELOG.md +12 -1
  4. data/Gemfile +1 -19
  5. data/README.md +25 -26
  6. data/lex-llm.gemspec +2 -2
  7. data/lib/legion/extensions/llm/agent.rb +366 -0
  8. data/lib/legion/extensions/llm/aliases.rb +42 -0
  9. data/lib/legion/extensions/llm/attachment.rb +229 -0
  10. data/lib/legion/extensions/llm/chat.rb +355 -0
  11. data/lib/legion/extensions/llm/chunk.rb +10 -0
  12. data/lib/legion/extensions/llm/configuration.rb +82 -0
  13. data/lib/legion/extensions/llm/connection.rb +134 -0
  14. data/lib/legion/extensions/llm/content.rb +81 -0
  15. data/lib/legion/extensions/llm/context.rb +33 -0
  16. data/lib/legion/extensions/llm/embedding.rb +33 -0
  17. data/lib/legion/extensions/llm/error.rb +116 -0
  18. data/lib/legion/extensions/llm/image.rb +109 -0
  19. data/lib/legion/extensions/llm/message.rb +111 -0
  20. data/lib/legion/extensions/llm/mime_type.rb +75 -0
  21. data/lib/legion/extensions/llm/model/info.rb +117 -0
  22. data/lib/legion/extensions/llm/model/modalities.rb +26 -0
  23. data/lib/legion/extensions/llm/model/pricing.rb +52 -0
  24. data/lib/legion/extensions/llm/model/pricing_category.rb +50 -0
  25. data/lib/legion/extensions/llm/model/pricing_tier.rb +37 -0
  26. data/lib/legion/extensions/llm/model.rb +11 -0
  27. data/lib/legion/extensions/llm/models.rb +514 -0
  28. data/lib/{lex_llm → legion/extensions/llm}/models_schema.json +1 -1
  29. data/lib/legion/extensions/llm/moderation.rb +60 -0
  30. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +276 -0
  31. data/lib/legion/extensions/llm/provider.rb +337 -0
  32. data/lib/legion/extensions/llm/routing/lane_key.rb +57 -0
  33. data/lib/legion/extensions/llm/routing/model_offering.rb +173 -0
  34. data/lib/legion/extensions/llm/routing.rb +11 -0
  35. data/lib/legion/extensions/llm/stream_accumulator.rb +209 -0
  36. data/lib/legion/extensions/llm/streaming.rb +181 -0
  37. data/lib/legion/extensions/llm/thinking.rb +53 -0
  38. data/lib/legion/extensions/llm/tokens.rb +51 -0
  39. data/lib/legion/extensions/llm/tool.rb +258 -0
  40. data/lib/legion/extensions/llm/tool_call.rb +29 -0
  41. data/lib/legion/extensions/llm/transcription.rb +39 -0
  42. data/lib/legion/extensions/llm/utils.rb +95 -0
  43. data/lib/legion/extensions/llm/version.rb +9 -0
  44. data/lib/legion/extensions/llm.rb +85 -6
  45. metadata +40 -122
  46. data/lib/generators/lex_llm/agent/agent_generator.rb +0 -36
  47. data/lib/generators/lex_llm/agent/templates/agent.rb.tt +0 -6
  48. data/lib/generators/lex_llm/agent/templates/instructions.txt.erb.tt +0 -0
  49. data/lib/generators/lex_llm/chat_ui/chat_ui_generator.rb +0 -256
  50. data/lib/generators/lex_llm/chat_ui/templates/controllers/chats_controller.rb.tt +0 -38
  51. data/lib/generators/lex_llm/chat_ui/templates/controllers/messages_controller.rb.tt +0 -21
  52. data/lib/generators/lex_llm/chat_ui/templates/controllers/models_controller.rb.tt +0 -14
  53. data/lib/generators/lex_llm/chat_ui/templates/helpers/messages_helper.rb.tt +0 -25
  54. data/lib/generators/lex_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +0 -12
  55. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/chats/_chat.html.erb.tt +0 -16
  56. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/chats/_form.html.erb.tt +0 -31
  57. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/chats/index.html.erb.tt +0 -31
  58. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/chats/new.html.erb.tt +0 -9
  59. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/chats/show.html.erb.tt +0 -27
  60. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_assistant.html.erb.tt +0 -14
  61. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_content.html.erb.tt +0 -1
  62. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_error.html.erb.tt +0 -13
  63. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_form.html.erb.tt +0 -23
  64. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_system.html.erb.tt +0 -10
  65. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_tool.html.erb.tt +0 -2
  66. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_tool_calls.html.erb.tt +0 -4
  67. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/_user.html.erb.tt +0 -14
  68. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/tool_calls/_default.html.erb.tt +0 -13
  69. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/messages/tool_results/_default.html.erb.tt +0 -21
  70. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/models/_model.html.erb.tt +0 -17
  71. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/models/index.html.erb.tt +0 -40
  72. data/lib/generators/lex_llm/chat_ui/templates/tailwind/views/models/show.html.erb.tt +0 -27
  73. data/lib/generators/lex_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +0 -16
  74. data/lib/generators/lex_llm/chat_ui/templates/views/chats/_form.html.erb.tt +0 -29
  75. data/lib/generators/lex_llm/chat_ui/templates/views/chats/index.html.erb.tt +0 -28
  76. data/lib/generators/lex_llm/chat_ui/templates/views/chats/new.html.erb.tt +0 -11
  77. data/lib/generators/lex_llm/chat_ui/templates/views/chats/show.html.erb.tt +0 -25
  78. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_assistant.html.erb.tt +0 -9
  79. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_content.html.erb.tt +0 -1
  80. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_error.html.erb.tt +0 -8
  81. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_form.html.erb.tt +0 -21
  82. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_system.html.erb.tt +0 -6
  83. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_tool.html.erb.tt +0 -2
  84. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +0 -4
  85. data/lib/generators/lex_llm/chat_ui/templates/views/messages/_user.html.erb.tt +0 -9
  86. data/lib/generators/lex_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +0 -7
  87. data/lib/generators/lex_llm/chat_ui/templates/views/messages/tool_calls/_default.html.erb.tt +0 -8
  88. data/lib/generators/lex_llm/chat_ui/templates/views/messages/tool_results/_default.html.erb.tt +0 -16
  89. data/lib/generators/lex_llm/chat_ui/templates/views/models/_model.html.erb.tt +0 -15
  90. data/lib/generators/lex_llm/chat_ui/templates/views/models/index.html.erb.tt +0 -38
  91. data/lib/generators/lex_llm/chat_ui/templates/views/models/show.html.erb.tt +0 -17
  92. data/lib/generators/lex_llm/generator_helpers.rb +0 -214
  93. data/lib/generators/lex_llm/install/install_generator.rb +0 -109
  94. data/lib/generators/lex_llm/install/templates/add_references_to_chats_tool_calls_and_messages_migration.rb.tt +0 -9
  95. data/lib/generators/lex_llm/install/templates/chat_model.rb.tt +0 -3
  96. data/lib/generators/lex_llm/install/templates/create_chats_migration.rb.tt +0 -7
  97. data/lib/generators/lex_llm/install/templates/create_messages_migration.rb.tt +0 -19
  98. data/lib/generators/lex_llm/install/templates/create_models_migration.rb.tt +0 -39
  99. data/lib/generators/lex_llm/install/templates/create_tool_calls_migration.rb.tt +0 -21
  100. data/lib/generators/lex_llm/install/templates/initializer.rb.tt +0 -20
  101. data/lib/generators/lex_llm/install/templates/message_model.rb.tt +0 -4
  102. data/lib/generators/lex_llm/install/templates/model_model.rb.tt +0 -3
  103. data/lib/generators/lex_llm/install/templates/tool_call_model.rb.tt +0 -3
  104. data/lib/generators/lex_llm/schema/schema_generator.rb +0 -26
  105. data/lib/generators/lex_llm/schema/templates/schema.rb.tt +0 -2
  106. data/lib/generators/lex_llm/tool/templates/tool.rb.tt +0 -9
  107. data/lib/generators/lex_llm/tool/templates/tool_call.html.erb.tt +0 -13
  108. data/lib/generators/lex_llm/tool/templates/tool_result.html.erb.tt +0 -13
  109. data/lib/generators/lex_llm/tool/tool_generator.rb +0 -96
  110. data/lib/generators/lex_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +0 -19
  111. data/lib/generators/lex_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +0 -50
  112. data/lib/generators/lex_llm/upgrade_to_v1_14/templates/add_v1_14_tool_call_columns.rb.tt +0 -7
  113. data/lib/generators/lex_llm/upgrade_to_v1_14/upgrade_to_v1_14_generator.rb +0 -49
  114. data/lib/generators/lex_llm/upgrade_to_v1_7/templates/migration.rb.tt +0 -145
  115. data/lib/generators/lex_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +0 -122
  116. data/lib/generators/lex_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +0 -15
  117. data/lib/generators/lex_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +0 -49
  118. data/lib/lex_llm/active_record/acts_as.rb +0 -180
  119. data/lib/lex_llm/active_record/acts_as_legacy.rb +0 -503
  120. data/lib/lex_llm/active_record/chat_methods.rb +0 -468
  121. data/lib/lex_llm/active_record/message_methods.rb +0 -131
  122. data/lib/lex_llm/active_record/model_methods.rb +0 -76
  123. data/lib/lex_llm/active_record/payload_helpers.rb +0 -26
  124. data/lib/lex_llm/active_record/tool_call_methods.rb +0 -15
  125. data/lib/lex_llm/agent.rb +0 -365
  126. data/lib/lex_llm/aliases.rb +0 -38
  127. data/lib/lex_llm/attachment.rb +0 -223
  128. data/lib/lex_llm/chat.rb +0 -351
  129. data/lib/lex_llm/chunk.rb +0 -6
  130. data/lib/lex_llm/configuration.rb +0 -81
  131. data/lib/lex_llm/connection.rb +0 -130
  132. data/lib/lex_llm/content.rb +0 -77
  133. data/lib/lex_llm/context.rb +0 -29
  134. data/lib/lex_llm/embedding.rb +0 -29
  135. data/lib/lex_llm/error.rb +0 -112
  136. data/lib/lex_llm/image.rb +0 -105
  137. data/lib/lex_llm/message.rb +0 -107
  138. data/lib/lex_llm/mime_type.rb +0 -71
  139. data/lib/lex_llm/model/info.rb +0 -113
  140. data/lib/lex_llm/model/modalities.rb +0 -22
  141. data/lib/lex_llm/model/pricing.rb +0 -48
  142. data/lib/lex_llm/model/pricing_category.rb +0 -46
  143. data/lib/lex_llm/model/pricing_tier.rb +0 -33
  144. data/lib/lex_llm/model.rb +0 -7
  145. data/lib/lex_llm/models.rb +0 -506
  146. data/lib/lex_llm/moderation.rb +0 -56
  147. data/lib/lex_llm/provider/open_ai_compatible.rb +0 -219
  148. data/lib/lex_llm/provider.rb +0 -278
  149. data/lib/lex_llm/railtie.rb +0 -35
  150. data/lib/lex_llm/routing/lane_key.rb +0 -51
  151. data/lib/lex_llm/routing/model_offering.rb +0 -169
  152. data/lib/lex_llm/routing.rb +0 -7
  153. data/lib/lex_llm/stream_accumulator.rb +0 -203
  154. data/lib/lex_llm/streaming.rb +0 -175
  155. data/lib/lex_llm/thinking.rb +0 -49
  156. data/lib/lex_llm/tokens.rb +0 -47
  157. data/lib/lex_llm/tool.rb +0 -254
  158. data/lib/lex_llm/tool_call.rb +0 -25
  159. data/lib/lex_llm/transcription.rb +0 -35
  160. data/lib/lex_llm/utils.rb +0 -91
  161. data/lib/lex_llm/version.rb +0 -5
  162. data/lib/lex_llm.rb +0 -96
  163. data/lib/tasks/lex_llm.rake +0 -23
  164. /data/lib/{lex_llm → legion/extensions/llm}/aliases.json +0 -0
  165. /data/lib/{lex_llm → legion/extensions/llm}/models.json +0 -0
@@ -0,0 +1,173 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Routing
7
+ # Describes one concrete model made available by one provider instance.
8
+ class ModelOffering
9
+ attr_reader :provider_family, :instance_id, :transport, :tier, :model, :usage_type, :capabilities, :limits,
10
+ :credentials, :health, :cost, :policy_tags, :metadata
11
+
12
+ def initialize(data)
13
+ @provider_family = normalize_symbol(fetch_value(data, :provider_family, fetch_value(data, :provider)))
14
+ @instance_id = normalize_symbol(fetch_value(data, :instance_id, @provider_family))
15
+ @transport = normalize_symbol(fetch_value(data, :transport, :http))
16
+ @tier = normalize_symbol(fetch_value(data, :tier, default_tier))
17
+ @model = fetch_value(data, :model).to_s
18
+ @usage_type = normalize_usage_type(fetch_value(data, :usage_type,
19
+ fetch_value(data, :type) ||
20
+ fetch_value(data, :kind) ||
21
+ infer_usage_type(data)))
22
+ @capabilities = normalize_array(fetch_value(data, :capabilities))
23
+ @limits = normalize_hash(fetch_value(data, :limits))
24
+ @credentials = fetch_value(data, :credentials)
25
+ @health = normalize_hash(fetch_value(data, :health))
26
+ @cost = normalize_hash(fetch_value(data, :cost))
27
+ @policy_tags = normalize_array(fetch_value(data, :policy_tags)).map(&:to_sym)
28
+ @metadata = normalize_hash(fetch_value(data, :metadata))
29
+ end
30
+
31
+ def enabled?
32
+ !metadata.key?(:enabled) || metadata[:enabled] != false
33
+ end
34
+
35
+ def embedding?
36
+ usage_type == :embedding
37
+ end
38
+
39
+ def inference?
40
+ %i[chat inference completion].include?(usage_type)
41
+ end
42
+
43
+ def context_window
44
+ integer_limit(:context_window) || integer_limit(:max_input_tokens)
45
+ end
46
+
47
+ def max_output_tokens
48
+ integer_limit(:max_output_tokens)
49
+ end
50
+
51
+ def supports?(capability)
52
+ capabilities.include?(capability.to_sym)
53
+ end
54
+
55
+ def eligible_for?(usage_type: nil, required_capabilities: [], min_context_window: nil, policy_tags: [])
56
+ return false unless enabled?
57
+ return false unless usage_type_matches?(usage_type)
58
+ return false unless capabilities_match?(required_capabilities)
59
+ return false unless context_window_matches?(min_context_window)
60
+ return false unless policy_tags_match?(policy_tags)
61
+
62
+ true
63
+ end
64
+
65
+ def lane_key(prefix: 'llm.fleet', include_context: true, include_fingerprint: false)
66
+ LaneKey.for(self, prefix:, include_context:, include_fingerprint:)
67
+ end
68
+
69
+ def eligibility_fingerprint
70
+ LaneKey.eligibility_fingerprint(self)
71
+ end
72
+
73
+ def to_h
74
+ {
75
+ provider_family: provider_family,
76
+ instance_id: instance_id,
77
+ transport: transport,
78
+ tier: tier,
79
+ model: model,
80
+ usage_type: usage_type,
81
+ capabilities: capabilities,
82
+ limits: limits,
83
+ credentials: credentials,
84
+ health: health,
85
+ cost: cost,
86
+ policy_tags: policy_tags,
87
+ metadata: metadata
88
+ }
89
+ end
90
+
91
+ private
92
+
93
+ def default_tier
94
+ case @transport
95
+ when :local
96
+ :local
97
+ when :rabbitmq
98
+ :fleet
99
+ else
100
+ :private
101
+ end
102
+ end
103
+
104
+ def infer_usage_type(data)
105
+ capabilities = normalize_array(fetch_value(data, :capabilities))
106
+ return :embedding if capabilities.include?(:embedding) || capabilities.include?(:embed)
107
+
108
+ :inference
109
+ end
110
+
111
+ def normalize_usage_type(value)
112
+ case value.to_sym
113
+ when :embed, :embeddings
114
+ :embedding
115
+ when :completion, :text, :chat
116
+ :inference
117
+ else
118
+ value.to_sym
119
+ end
120
+ end
121
+
122
+ def normalize_symbol(value)
123
+ return nil if value.nil?
124
+
125
+ value.to_sym
126
+ end
127
+
128
+ def normalize_array(value)
129
+ Array(value).compact.map(&:to_sym)
130
+ end
131
+
132
+ def normalize_hash(value)
133
+ (value || {}).to_h.transform_keys(&:to_sym)
134
+ end
135
+
136
+ def fetch_value(hash, key, default = nil)
137
+ return default unless hash.respond_to?(:key?)
138
+
139
+ string_key = key.to_s
140
+ return hash[string_key] if hash.key?(string_key)
141
+
142
+ hash.key?(key) ? hash[key] : default
143
+ end
144
+
145
+ def usage_type_matches?(expected)
146
+ expected.nil? || normalize_usage_type(expected) == usage_type
147
+ end
148
+
149
+ def capabilities_match?(required)
150
+ Array(required).all? { |capability| supports?(capability) }
151
+ end
152
+
153
+ def context_window_matches?(minimum)
154
+ minimum.nil? || (!!context_window && context_window >= minimum.to_i)
155
+ end
156
+
157
+ def policy_tags_match?(required)
158
+ Array(required).all? { |tag| policy_tags.include?(tag.to_sym) }
159
+ end
160
+
161
+ def integer_limit(key)
162
+ value = limits[key]
163
+ return nil if value.nil?
164
+
165
+ Integer(value)
166
+ rescue ArgumentError, TypeError
167
+ nil
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Provider-neutral routing metadata used by Legion LLM provider gems.
7
+ module Routing
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Assembles streaming responses from LLMs into complete messages.
7
+ class StreamAccumulator
8
+ attr_reader :content, :model_id, :tool_calls
9
+
10
+ def initialize
11
+ @content = +''
12
+ @thinking_text = +''
13
+ @thinking_signature = nil
14
+ @tool_calls = {}
15
+ @input_tokens = nil
16
+ @output_tokens = nil
17
+ @cached_tokens = nil
18
+ @cache_creation_tokens = nil
19
+ @thinking_tokens = nil
20
+ @inside_think_tag = false
21
+ @pending_think_tag = +''
22
+ @latest_tool_call_id = nil
23
+ end
24
+
25
+ def add(chunk)
26
+ Legion::Extensions::Llm.logger.debug { chunk.inspect } if Legion::Extensions::Llm.config.log_stream_debug
27
+ @model_id ||= chunk.model_id
28
+
29
+ handle_chunk_content(chunk)
30
+ append_thinking_from_chunk(chunk)
31
+ count_tokens chunk
32
+ Legion::Extensions::Llm.logger.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
33
+ end
34
+
35
+ def to_message(response)
36
+ Message.new(
37
+ role: :assistant,
38
+ content: content.empty? ? nil : content,
39
+ thinking: Thinking.build(
40
+ text: @thinking_text.empty? ? nil : @thinking_text,
41
+ signature: @thinking_signature
42
+ ),
43
+ tokens: Tokens.build(
44
+ input: @input_tokens,
45
+ output: @output_tokens,
46
+ cached: @cached_tokens,
47
+ cache_creation: @cache_creation_tokens,
48
+ thinking: @thinking_tokens
49
+ ),
50
+ model_id: model_id,
51
+ tool_calls: tool_calls_from_stream,
52
+ raw: response
53
+ )
54
+ end
55
+
56
+ private
57
+
58
+ def tool_calls_from_stream
59
+ tool_calls.transform_values do |tc|
60
+ arguments = if tc.arguments.is_a?(String) && !tc.arguments.empty?
61
+ Legion::JSON.parse(tc.arguments, symbolize_names: false)
62
+ elsif tc.arguments.is_a?(String)
63
+ {}
64
+ else
65
+ tc.arguments
66
+ end
67
+
68
+ ToolCall.new(
69
+ id: tc.id,
70
+ name: tc.name,
71
+ arguments: arguments,
72
+ thought_signature: tc.thought_signature
73
+ )
74
+ end
75
+ end
76
+
77
+ def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
78
+ if Legion::Extensions::Llm.config.log_stream_debug
79
+ Legion::Extensions::Llm.logger.debug { "Accumulating tool calls: #{new_tool_calls}" }
80
+ end
81
+ new_tool_calls.each_value do |tool_call|
82
+ if tool_call.id
83
+ tool_call_id = tool_call.id.empty? ? SecureRandom.uuid : tool_call.id
84
+ tool_call_arguments = tool_call.arguments
85
+ if tool_call_arguments.nil? || (tool_call_arguments.respond_to?(:empty?) && tool_call_arguments.empty?)
86
+ tool_call_arguments = +''
87
+ end
88
+ @tool_calls[tool_call.id] = ToolCall.new(
89
+ id: tool_call_id,
90
+ name: tool_call.name,
91
+ arguments: tool_call_arguments,
92
+ thought_signature: tool_call.thought_signature
93
+ )
94
+ @latest_tool_call_id = tool_call.id
95
+ else
96
+ existing = @tool_calls[@latest_tool_call_id]
97
+ if existing
98
+ fragment = tool_call.arguments
99
+ fragment = '' if fragment.nil?
100
+ existing.arguments << fragment
101
+ if tool_call.thought_signature && existing.thought_signature.nil?
102
+ existing.thought_signature = tool_call.thought_signature
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ def find_tool_call(tool_call_id)
110
+ if tool_call_id.nil?
111
+ @tool_calls[@latest_tool_call]
112
+ else
113
+ @latest_tool_call_id = tool_call_id
114
+ @tool_calls[tool_call_id]
115
+ end
116
+ end
117
+
118
+ def count_tokens(chunk)
119
+ @input_tokens = chunk.input_tokens if chunk.input_tokens
120
+ @output_tokens = chunk.output_tokens if chunk.output_tokens
121
+ @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
122
+ @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
123
+ @thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
124
+ end
125
+
126
+ def handle_chunk_content(chunk)
127
+ return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
128
+
129
+ content_text = chunk.content || ''
130
+ if content_text.is_a?(String)
131
+ append_text_with_thinking(content_text)
132
+ else
133
+ @content << content_text.to_s
134
+ end
135
+ end
136
+
137
+ def append_text_with_thinking(text)
138
+ content_chunk, thinking_chunk = extract_think_tags(text)
139
+ @content << content_chunk
140
+ @thinking_text << thinking_chunk if thinking_chunk
141
+ end
142
+
143
+ def append_thinking_from_chunk(chunk)
144
+ thinking = chunk.thinking
145
+ return unless thinking
146
+
147
+ @thinking_text << thinking.text.to_s if thinking.text
148
+ @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
149
+ end
150
+
151
+ def extract_think_tags(text)
152
+ start_tag = '<think>'
153
+ end_tag = '</think>'
154
+ remaining = @pending_think_tag + text
155
+ @pending_think_tag = +''
156
+
157
+ output = +''
158
+ thinking = +''
159
+
160
+ until remaining.empty?
161
+ remaining = if @inside_think_tag
162
+ consume_think_content(remaining, end_tag, thinking)
163
+ else
164
+ consume_non_think_content(remaining, start_tag, output)
165
+ end
166
+ end
167
+
168
+ [output, thinking.empty? ? nil : thinking]
169
+ end
170
+
171
+ def consume_think_content(remaining, end_tag, thinking)
172
+ end_index = remaining.index(end_tag)
173
+ if end_index
174
+ thinking << remaining.slice(0, end_index)
175
+ @inside_think_tag = false
176
+ remaining.slice((end_index + end_tag.length)..) || +''
177
+ else
178
+ suffix_len = longest_suffix_prefix(remaining, end_tag)
179
+ thinking << remaining.slice(0, remaining.length - suffix_len)
180
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
181
+ +''
182
+ end
183
+ end
184
+
185
+ def consume_non_think_content(remaining, start_tag, output)
186
+ start_index = remaining.index(start_tag)
187
+ if start_index
188
+ output << remaining.slice(0, start_index)
189
+ @inside_think_tag = true
190
+ remaining.slice((start_index + start_tag.length)..) || +''
191
+ else
192
+ suffix_len = longest_suffix_prefix(remaining, start_tag)
193
+ output << remaining.slice(0, remaining.length - suffix_len)
194
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
195
+ +''
196
+ end
197
+ end
198
+
199
+ def longest_suffix_prefix(text, tag)
200
+ max = [text.length, tag.length - 1].min
201
+ max.downto(1) do |len|
202
+ return len if text.end_with?(tag[0, len])
203
+ end
204
+ 0
205
+ end
206
+ end
207
+ end
208
+ end
209
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Handles streaming responses from AI providers.
7
+ module Streaming
8
+ module_function
9
+
10
+ def stream_response(connection, payload, additional_headers = {}, &block)
11
+ accumulator = StreamAccumulator.new
12
+
13
+ response = connection.post stream_url, payload do |req|
14
+ req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
15
+ if faraday_1?
16
+ req.options[:on_data] = handle_stream do |chunk|
17
+ accumulator.add chunk
18
+ block.call chunk
19
+ end
20
+ else
21
+ req.options.on_data = handle_stream do |chunk|
22
+ accumulator.add chunk
23
+ block.call chunk
24
+ end
25
+ end
26
+ end
27
+
28
+ message = accumulator.to_message(response)
29
+ Legion::Extensions::Llm.logger.debug { "Stream completed: #{message.content}" }
30
+ message
31
+ end
32
+
33
+ def handle_stream(&block)
34
+ build_on_data_handler do |data|
35
+ block.call(build_chunk(data)) if data.is_a?(Hash)
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def faraday_1?
42
+ Faraday::VERSION.start_with?('1')
43
+ end
44
+
45
+ def build_on_data_handler(&)
46
+ buffer = +''
47
+ parser = EventStreamParser::Parser.new
48
+
49
+ FaradayHandlers.build(
50
+ faraday_v1: faraday_1?,
51
+ on_chunk: ->(chunk, env) { process_stream_chunk(chunk, parser, env, &) },
52
+ on_failed_response: ->(chunk, env) { handle_failed_response(chunk, buffer, env) }
53
+ )
54
+ end
55
+
56
+ def process_stream_chunk(chunk, parser, env, &)
57
+ if Legion::Extensions::Llm.config.log_stream_debug
58
+ Legion::Extensions::Llm.logger.debug { "Received chunk: #{chunk}" }
59
+ end
60
+
61
+ if error_chunk?(chunk)
62
+ handle_error_chunk(chunk, env)
63
+ elsif json_error_payload?(chunk)
64
+ handle_json_error_chunk(chunk, env)
65
+ else
66
+ yield handle_sse(chunk, parser, env, &)
67
+ end
68
+ end
69
+
70
+ def error_chunk?(chunk)
71
+ chunk.start_with?('event: error')
72
+ end
73
+
74
+ def json_error_payload?(chunk)
75
+ chunk.lstrip.start_with?('{') && chunk.include?('"error"')
76
+ end
77
+
78
+ def handle_json_error_chunk(chunk, env)
79
+ parse_error_from_json(chunk, env, 'Failed to parse JSON error chunk')
80
+ end
81
+
82
+ def handle_error_chunk(chunk, env)
83
+ error_data = chunk.split("\n")[1].delete_prefix('data: ')
84
+ parse_error_from_json(error_data, env, 'Failed to parse error chunk')
85
+ end
86
+
87
+ def handle_failed_response(chunk, buffer, env)
88
+ buffer << chunk
89
+ error_data = Legion::JSON.parse(buffer, symbolize_names: false)
90
+ handle_parsed_error(error_data, env)
91
+ rescue Legion::JSON::ParseError
92
+ Legion::Extensions::Llm.logger.debug { "Accumulating error chunk: #{chunk}" }
93
+ end
94
+
95
+ def handle_sse(chunk, parser, env, &)
96
+ parser.feed(chunk) do |type, data|
97
+ case type.to_sym
98
+ when :error
99
+ handle_error_event(data, env)
100
+ else
101
+ yield handle_data(data, env, &) unless data == '[DONE]'
102
+ end
103
+ end
104
+ end
105
+
106
+ def handle_data(data, env)
107
+ parsed = Legion::JSON.parse(data, symbolize_names: false)
108
+ return parsed unless parsed.is_a?(Hash) && parsed.key?('error')
109
+
110
+ handle_parsed_error(parsed, env)
111
+ rescue Legion::JSON::ParseError => e
112
+ Legion::Extensions::Llm.logger.debug { "Failed to parse data chunk: #{e.message}" }
113
+ end
114
+
115
+ def handle_error_event(data, env)
116
+ parse_error_from_json(data, env, 'Failed to parse error event')
117
+ end
118
+
119
+ def parse_streaming_error(data)
120
+ error_data = Legion::JSON.parse(data, symbolize_names: false)
121
+ [500, error_data['message'] || 'Unknown streaming error']
122
+ rescue Legion::JSON::ParseError => e
123
+ Legion::Extensions::Llm.logger.debug { "Failed to parse streaming error: #{e.message}" }
124
+ [500, "Failed to parse error: #{data}"]
125
+ end
126
+
127
+ def handle_parsed_error(parsed_data, env)
128
+ status, _message = parse_streaming_error(parsed_data.to_json)
129
+ error_response = build_stream_error_response(parsed_data, env, status)
130
+ ErrorMiddleware.parse_error(provider: self, response: error_response)
131
+ end
132
+
133
+ def parse_error_from_json(data, env, error_message)
134
+ parsed_data = Legion::JSON.parse(data, symbolize_names: false)
135
+ handle_parsed_error(parsed_data, env)
136
+ rescue Legion::JSON::ParseError => e
137
+ Legion::Extensions::Llm.logger.debug { "#{error_message}: #{e.message}" }
138
+ end
139
+
140
+ def build_stream_error_response(parsed_data, env, status)
141
+ error_status = status || env&.status || 500
142
+
143
+ if faraday_1?
144
+ Struct.new(:body, :status).new(parsed_data, error_status)
145
+ else
146
+ env.merge(body: parsed_data, status: error_status)
147
+ end
148
+ end
149
+
150
+ # Builds Faraday on_data handlers for different major versions.
151
+ module FaradayHandlers
152
+ module_function
153
+
154
+ def build(faraday_v1:, on_chunk:, on_failed_response:)
155
+ if faraday_v1
156
+ v1_on_data(on_chunk)
157
+ else
158
+ v2_on_data(on_chunk, on_failed_response)
159
+ end
160
+ end
161
+
162
+ def v1_on_data(on_chunk)
163
+ proc do |chunk, _size|
164
+ on_chunk.call(chunk, nil)
165
+ end
166
+ end
167
+
168
+ def v2_on_data(on_chunk, on_failed_response)
169
+ proc do |chunk, _bytes, env|
170
+ if env&.status == 200
171
+ on_chunk.call(chunk, env)
172
+ else
173
+ on_failed_response.call(chunk, env)
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Represents provider thinking output.
7
+ class Thinking
8
+ attr_reader :text, :signature
9
+
10
+ def initialize(text: nil, signature: nil)
11
+ @text = text
12
+ @signature = signature
13
+ end
14
+
15
+ def self.build(text: nil, signature: nil)
16
+ text = nil if text.is_a?(String) && text.empty?
17
+ signature = nil if signature.is_a?(String) && signature.empty?
18
+
19
+ return nil if text.nil? && signature.nil?
20
+
21
+ new(text: text, signature: signature)
22
+ end
23
+
24
+ def pretty_print(printer)
25
+ printer.object_group(self) do
26
+ printer.breakable
27
+ printer.text 'text='
28
+ printer.pp text
29
+ printer.comma_breakable
30
+ printer.text 'signature='
31
+ printer.pp(signature ? '[REDACTED]' : nil)
32
+ end
33
+ end
34
+ end
35
+
36
+ class Thinking
37
+ # Normalized config for thinking across providers.
38
+ class Config
39
+ attr_reader :effort, :budget
40
+
41
+ def initialize(effort: nil, budget: nil)
42
+ @effort = effort.is_a?(Symbol) ? effort.to_s : effort
43
+ @budget = budget
44
+ end
45
+
46
+ def enabled?
47
+ !effort.nil? || !budget.nil?
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Represents token usage for a response.
7
+ class Tokens
8
+ attr_reader :input, :output, :cached, :cache_creation, :thinking
9
+
10
+ # rubocop:disable Metrics/ParameterLists
11
+ def initialize(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
12
+ @input = input
13
+ @output = output
14
+ @cached = cached
15
+ @cache_creation = cache_creation
16
+ @thinking = thinking || reasoning
17
+ end
18
+ # rubocop:enable Metrics/ParameterLists
19
+
20
+ # rubocop:disable Metrics/ParameterLists
21
+ def self.build(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
22
+ return nil if [input, output, cached, cache_creation, thinking, reasoning].all?(&:nil?)
23
+
24
+ new(
25
+ input: input,
26
+ output: output,
27
+ cached: cached,
28
+ cache_creation: cache_creation,
29
+ thinking: thinking,
30
+ reasoning: reasoning
31
+ )
32
+ end
33
+ # rubocop:enable Metrics/ParameterLists
34
+
35
+ def to_h
36
+ {
37
+ input_tokens: input,
38
+ output_tokens: output,
39
+ cached_tokens: cached,
40
+ cache_creation_tokens: cache_creation,
41
+ thinking_tokens: thinking
42
+ }.compact
43
+ end
44
+
45
+ def reasoning
46
+ thinking
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end