fastworkflow 2.15.8__py3-none-any.whl → 2.15.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fastworkflow might be problematic. Click here for more details.
- fastworkflow/_workflows/command_metadata_extraction/_commands/wildcard.py +10 -637
- fastworkflow/_workflows/command_metadata_extraction/intent_detection.py +360 -0
- fastworkflow/_workflows/command_metadata_extraction/parameter_extraction.py +327 -0
- fastworkflow/chat_session.py +8 -1
- fastworkflow/command_executor.py +13 -4
- fastworkflow/utils/signatures.py +243 -53
- fastworkflow/workflow.py +1 -5
- {fastworkflow-2.15.8.dist-info → fastworkflow-2.15.10.dist-info}/METADATA +3 -3
- {fastworkflow-2.15.8.dist-info → fastworkflow-2.15.10.dist-info}/RECORD +12 -10
- {fastworkflow-2.15.8.dist-info → fastworkflow-2.15.10.dist-info}/LICENSE +0 -0
- {fastworkflow-2.15.8.dist-info → fastworkflow-2.15.10.dist-info}/WHEEL +0 -0
- {fastworkflow-2.15.8.dist-info → fastworkflow-2.15.10.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
import os
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from speedict import Rdict
|
|
8
|
+
|
|
9
|
+
import fastworkflow
|
|
10
|
+
from fastworkflow.utils.logging import logger
|
|
11
|
+
from fastworkflow import NLUPipelineStage
|
|
12
|
+
from fastworkflow.cache_matching import cache_match, store_utterance_cache
|
|
13
|
+
from fastworkflow.model_pipeline_training import (
|
|
14
|
+
CommandRouter
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from fastworkflow.utils.fuzzy_match import find_best_matches
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CommandNamePrediction:
|
|
21
|
+
class Output(BaseModel):
|
|
22
|
+
command_name: Optional[str] = None
|
|
23
|
+
error_msg: Optional[str] = None
|
|
24
|
+
is_cme_command: bool = False
|
|
25
|
+
|
|
26
|
+
def __init__(self, cme_workflow: fastworkflow.Workflow):
|
|
27
|
+
self.cme_workflow = cme_workflow
|
|
28
|
+
self.app_workflow = cme_workflow.context["app_workflow"]
|
|
29
|
+
self.app_workflow_folderpath = self.app_workflow.folderpath
|
|
30
|
+
self.app_workflow_id = self.app_workflow.id
|
|
31
|
+
|
|
32
|
+
self.convo_path = os.path.join(self.app_workflow_folderpath, "___convo_info")
|
|
33
|
+
self.cache_path = self._get_cache_path(self.app_workflow_id, self.convo_path)
|
|
34
|
+
self.path = self._get_cache_path_cache(self.convo_path)
|
|
35
|
+
|
|
36
|
+
def predict(self, command_context_name: str, command: str, nlu_pipeline_stage: NLUPipelineStage) -> "CommandNamePrediction.Output":
|
|
37
|
+
# sourcery skip: extract-duplicate-method
|
|
38
|
+
|
|
39
|
+
model_artifact_path = f"{self.app_workflow_folderpath}/___command_info/{command_context_name}"
|
|
40
|
+
command_router = CommandRouter(model_artifact_path)
|
|
41
|
+
|
|
42
|
+
# Re-use the already-built ModelPipeline attached to the router
|
|
43
|
+
# instead of instantiating a fresh one. This avoids reloading HF
|
|
44
|
+
# checkpoints and transferring tensors each time we see a new
|
|
45
|
+
# message for the same context.
|
|
46
|
+
modelpipeline = command_router.modelpipeline
|
|
47
|
+
|
|
48
|
+
crd = fastworkflow.RoutingRegistry.get_definition(
|
|
49
|
+
self.cme_workflow.folderpath)
|
|
50
|
+
cme_command_names = crd.get_command_names('IntentDetection')
|
|
51
|
+
|
|
52
|
+
valid_command_names = set()
|
|
53
|
+
if nlu_pipeline_stage == NLUPipelineStage.INTENT_AMBIGUITY_CLARIFICATION:
|
|
54
|
+
valid_command_names = self._get_suggested_commands(self.path)
|
|
55
|
+
elif nlu_pipeline_stage in (
|
|
56
|
+
NLUPipelineStage.INTENT_DETECTION, NLUPipelineStage.INTENT_MISUNDERSTANDING_CLARIFICATION):
|
|
57
|
+
app_crd = fastworkflow.RoutingRegistry.get_definition(
|
|
58
|
+
self.app_workflow_folderpath)
|
|
59
|
+
valid_command_names = (
|
|
60
|
+
set(cme_command_names) |
|
|
61
|
+
set(app_crd.get_command_names(command_context_name))
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
command_name_dict = {
|
|
65
|
+
fully_qualified_command_name.split('/')[-1]: fully_qualified_command_name
|
|
66
|
+
for fully_qualified_command_name in valid_command_names
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if nlu_pipeline_stage == NLUPipelineStage.INTENT_AMBIGUITY_CLARIFICATION:
|
|
70
|
+
# what_can_i_do is special in INTENT_AMBIGUITY_CLARIFICATION
|
|
71
|
+
# We will not predict, just match plain utterances with exact or fuzzy match
|
|
72
|
+
command_name_dict |= {
|
|
73
|
+
plain_utterance: 'IntentDetection/what_can_i_do'
|
|
74
|
+
for plain_utterance in crd.command_directory.map_command_2_utterance_metadata[
|
|
75
|
+
'IntentDetection/what_can_i_do'
|
|
76
|
+
].plain_utterances
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if nlu_pipeline_stage != NLUPipelineStage.INTENT_DETECTION:
|
|
80
|
+
# abort is special.
|
|
81
|
+
# We will not predict, just match plain utterances with exact or fuzzy match
|
|
82
|
+
command_name_dict |= {
|
|
83
|
+
plain_utterance: 'ErrorCorrection/abort'
|
|
84
|
+
for plain_utterance in crd.command_directory.map_command_2_utterance_metadata[
|
|
85
|
+
'ErrorCorrection/abort'
|
|
86
|
+
].plain_utterances
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if nlu_pipeline_stage != NLUPipelineStage.INTENT_MISUNDERSTANDING_CLARIFICATION:
|
|
90
|
+
# you_misunderstood is special.
|
|
91
|
+
# We will not predict, just match plain utterances with exact or fuzzy match
|
|
92
|
+
command_name_dict |= {
|
|
93
|
+
plain_utterance: 'ErrorCorrection/you_misunderstood'
|
|
94
|
+
for plain_utterance in crd.command_directory.map_command_2_utterance_metadata[
|
|
95
|
+
'ErrorCorrection/you_misunderstood'
|
|
96
|
+
].plain_utterances
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# See if the command starts with a command name followed by a space
|
|
100
|
+
tentative_command_name = command.split(" ", 1)[0]
|
|
101
|
+
normalized_command_name = tentative_command_name.lower()
|
|
102
|
+
command_name = None
|
|
103
|
+
if normalized_command_name in command_name_dict:
|
|
104
|
+
command_name = normalized_command_name
|
|
105
|
+
command = command.replace(f"{tentative_command_name}", "").strip().replace(" ", " ")
|
|
106
|
+
else:
|
|
107
|
+
# Use Levenshtein distance for fuzzy matching with the full command part after @
|
|
108
|
+
best_matched_commands, _ = find_best_matches(
|
|
109
|
+
command.replace(" ", "_"),
|
|
110
|
+
command_name_dict.keys(),
|
|
111
|
+
threshold=0.3 # Adjust threshold as needed
|
|
112
|
+
)
|
|
113
|
+
if best_matched_commands:
|
|
114
|
+
command_name = best_matched_commands[0]
|
|
115
|
+
|
|
116
|
+
if nlu_pipeline_stage == NLUPipelineStage.INTENT_DETECTION:
|
|
117
|
+
if not command_name:
|
|
118
|
+
if cache_result := cache_match(self.path, command, modelpipeline, 0.85):
|
|
119
|
+
command_name = cache_result
|
|
120
|
+
else:
|
|
121
|
+
predictions=command_router.predict(command)
|
|
122
|
+
# predictions = majority_vote_predictions(command_router, command)
|
|
123
|
+
|
|
124
|
+
if len(predictions)==1:
|
|
125
|
+
command_name = predictions[0].split('/')[-1]
|
|
126
|
+
else:
|
|
127
|
+
# If confidence is low, treat as ambiguous command (type 1)
|
|
128
|
+
error_msg = self._formulate_ambiguous_command_error_message(
|
|
129
|
+
predictions, "run_as_agent" in self.app_workflow.context)
|
|
130
|
+
|
|
131
|
+
# Store suggested commands
|
|
132
|
+
self._store_suggested_commands(self.path, predictions, 1)
|
|
133
|
+
return CommandNamePrediction.Output(error_msg=error_msg)
|
|
134
|
+
|
|
135
|
+
elif nlu_pipeline_stage in (
|
|
136
|
+
NLUPipelineStage.INTENT_AMBIGUITY_CLARIFICATION,
|
|
137
|
+
NLUPipelineStage.INTENT_MISUNDERSTANDING_CLARIFICATION
|
|
138
|
+
) and not command_name:
|
|
139
|
+
command_name = "what_can_i_do"
|
|
140
|
+
|
|
141
|
+
if not command_name or command_name == "wildcard":
|
|
142
|
+
fully_qualified_command_name=None
|
|
143
|
+
is_cme_command=False
|
|
144
|
+
else:
|
|
145
|
+
fully_qualified_command_name = command_name_dict[command_name]
|
|
146
|
+
is_cme_command=(
|
|
147
|
+
fully_qualified_command_name in cme_command_names or
|
|
148
|
+
fully_qualified_command_name in crd.get_command_names('ErrorCorrection')
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if (
|
|
152
|
+
nlu_pipeline_stage
|
|
153
|
+
in (
|
|
154
|
+
NLUPipelineStage.INTENT_AMBIGUITY_CLARIFICATION,
|
|
155
|
+
NLUPipelineStage.INTENT_MISUNDERSTANDING_CLARIFICATION,
|
|
156
|
+
)
|
|
157
|
+
and not fully_qualified_command_name.endswith('abort')
|
|
158
|
+
and not fully_qualified_command_name.endswith('what_can_i_do')
|
|
159
|
+
and not fully_qualified_command_name.endswith('you_misunderstood')
|
|
160
|
+
):
|
|
161
|
+
command = self.cme_workflow.context["command"]
|
|
162
|
+
store_utterance_cache(self.path, command, command_name, modelpipeline)
|
|
163
|
+
|
|
164
|
+
return CommandNamePrediction.Output(
|
|
165
|
+
command_name=fully_qualified_command_name,
|
|
166
|
+
is_cme_command=is_cme_command
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def _get_cache_path(workflow_id, convo_path):
|
|
171
|
+
"""
|
|
172
|
+
Generate cache file path based on workflow ID
|
|
173
|
+
"""
|
|
174
|
+
base_dir = convo_path
|
|
175
|
+
# Create directory if it doesn't exist
|
|
176
|
+
os.makedirs(base_dir, exist_ok=True)
|
|
177
|
+
return os.path.join(base_dir, f"{workflow_id}.db")
|
|
178
|
+
|
|
179
|
+
@staticmethod
|
|
180
|
+
def _get_cache_path_cache(convo_path):
|
|
181
|
+
"""
|
|
182
|
+
Generate cache file path based on workflow ID
|
|
183
|
+
"""
|
|
184
|
+
base_dir = convo_path
|
|
185
|
+
# Create directory if it doesn't exist
|
|
186
|
+
os.makedirs(base_dir, exist_ok=True)
|
|
187
|
+
return os.path.join(base_dir, "cache.db")
|
|
188
|
+
|
|
189
|
+
# Store the suggested commands with the flag type
|
|
190
|
+
@staticmethod
|
|
191
|
+
def _store_suggested_commands(cache_path, command_list, flag_type):
|
|
192
|
+
"""
|
|
193
|
+
Store the list of suggested commands for the constrained selection
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
cache_path: Path to the cache database
|
|
197
|
+
command_list: List of suggested commands
|
|
198
|
+
flag_type: Type of constraint (1=ambiguous, 2=misclassified)
|
|
199
|
+
"""
|
|
200
|
+
db = Rdict(cache_path)
|
|
201
|
+
try:
|
|
202
|
+
db["suggested_commands"] = command_list
|
|
203
|
+
db["flag_type"] = flag_type
|
|
204
|
+
finally:
|
|
205
|
+
db.close()
|
|
206
|
+
|
|
207
|
+
# Get the suggested commands
|
|
208
|
+
@staticmethod
|
|
209
|
+
def _get_suggested_commands(cache_path):
|
|
210
|
+
"""
|
|
211
|
+
Get the list of suggested commands for the constrained selection
|
|
212
|
+
"""
|
|
213
|
+
db = Rdict(cache_path)
|
|
214
|
+
try:
|
|
215
|
+
return db.get("suggested_commands", [])
|
|
216
|
+
finally:
|
|
217
|
+
db.close()
|
|
218
|
+
|
|
219
|
+
@staticmethod
|
|
220
|
+
def _get_count(cache_path):
|
|
221
|
+
db = Rdict(cache_path)
|
|
222
|
+
try:
|
|
223
|
+
return db.get("utterance_count", 0) # Default to 0 if key doesn't exist
|
|
224
|
+
finally:
|
|
225
|
+
db.close()
|
|
226
|
+
|
|
227
|
+
@staticmethod
|
|
228
|
+
def _print_db_contents(cache_path):
|
|
229
|
+
db = Rdict(cache_path)
|
|
230
|
+
try:
|
|
231
|
+
print("All keys in database:", list(db.keys()))
|
|
232
|
+
for key in db.keys():
|
|
233
|
+
print(f"Key: {key}, Value: {db[key]}")
|
|
234
|
+
finally:
|
|
235
|
+
db.close()
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
def _store_utterance(cache_path, utterance, label):
|
|
239
|
+
"""
|
|
240
|
+
Store utterance in existing or new database
|
|
241
|
+
Returns: The utterance count used
|
|
242
|
+
"""
|
|
243
|
+
# Open the database (creates if doesn't exist)
|
|
244
|
+
db = Rdict(cache_path)
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
# Get existing counter or initialize to 0
|
|
248
|
+
utterance_count = db.get("utterance_count", 0)
|
|
249
|
+
|
|
250
|
+
# Create and store the utterance entry
|
|
251
|
+
utterance_data = {
|
|
252
|
+
"utterance": utterance,
|
|
253
|
+
"label": label
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
db[utterance_count] = utterance_data
|
|
257
|
+
|
|
258
|
+
# Increment and store the counter
|
|
259
|
+
utterance_count += 1
|
|
260
|
+
db["utterance_count"] = utterance_count
|
|
261
|
+
|
|
262
|
+
return utterance_count - 1 # Return the count used for this utterance
|
|
263
|
+
|
|
264
|
+
finally:
|
|
265
|
+
# Always close the database
|
|
266
|
+
db.close()
|
|
267
|
+
|
|
268
|
+
# Function to read from database
|
|
269
|
+
@staticmethod
|
|
270
|
+
def _read_utterance(cache_path, utterance_id):
|
|
271
|
+
"""
|
|
272
|
+
Read a specific utterance from the database
|
|
273
|
+
"""
|
|
274
|
+
db = Rdict(cache_path)
|
|
275
|
+
try:
|
|
276
|
+
return db.get(utterance_id)['utterance']
|
|
277
|
+
finally:
|
|
278
|
+
db.close()
|
|
279
|
+
|
|
280
|
+
@staticmethod
|
|
281
|
+
def _formulate_ambiguous_command_error_message(
|
|
282
|
+
route_choice_list: list[str], run_as_agent: bool) -> str:
|
|
283
|
+
command_list = (
|
|
284
|
+
"\n".join([
|
|
285
|
+
f"{route_choice.split('/')[-1].lower()}"
|
|
286
|
+
for route_choice in route_choice_list if route_choice != 'wildcard'
|
|
287
|
+
])
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
return (
|
|
291
|
+
"The command is ambiguous. "
|
|
292
|
+
+ (
|
|
293
|
+
"Choose the correct command name from these possible options and update your command:\n"
|
|
294
|
+
if run_as_agent
|
|
295
|
+
else "Please choose a command name from these possible options:\n"
|
|
296
|
+
)
|
|
297
|
+
+ f"{command_list}\n\nor type 'what can i do' to see all commands\n"
|
|
298
|
+
+ ("or type 'abort' to cancel" if run_as_agent else '')
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# TODO - generation is deterministic. They all return the same answer
|
|
303
|
+
# TODO - Need 'temperature' for intent detection pipeline
|
|
304
|
+
def majority_vote_predictions(command_router, command: str, n_predictions: int = 5) -> list[str]:
|
|
305
|
+
"""
|
|
306
|
+
Generate N prediction sets in parallel and return the set that wins the majority vote.
|
|
307
|
+
|
|
308
|
+
This function improves prediction reliability by running multiple parallel predictions
|
|
309
|
+
and selecting the most common result through majority voting. This helps reduce
|
|
310
|
+
the impact of random variations in model predictions.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
command_router: The CommandRouter instance to use for predictions
|
|
314
|
+
command: The input command string
|
|
315
|
+
n_predictions: Number of parallel predictions to generate (default: 5)
|
|
316
|
+
Can be configured via N_PARALLEL_PREDICTIONS environment variable
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
The prediction set that received the majority vote. Falls back to a single
|
|
320
|
+
prediction if all parallel predictions fail.
|
|
321
|
+
|
|
322
|
+
Note:
|
|
323
|
+
Uses ThreadPoolExecutor with max_workers limited to min(n_predictions, 10)
|
|
324
|
+
to avoid overwhelming the system with too many concurrent threads.
|
|
325
|
+
"""
|
|
326
|
+
def get_single_prediction():
|
|
327
|
+
"""Helper function to get a single prediction"""
|
|
328
|
+
return command_router.predict(command)
|
|
329
|
+
|
|
330
|
+
# Generate N predictions in parallel
|
|
331
|
+
prediction_sets = []
|
|
332
|
+
with ThreadPoolExecutor(max_workers=min(n_predictions, 10)) as executor:
|
|
333
|
+
# Submit all prediction tasks
|
|
334
|
+
futures = [executor.submit(get_single_prediction) for _ in range(n_predictions)]
|
|
335
|
+
|
|
336
|
+
# Collect results as they complete
|
|
337
|
+
for future in as_completed(futures):
|
|
338
|
+
try:
|
|
339
|
+
prediction_set = future.result()
|
|
340
|
+
prediction_sets.append(prediction_set)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.warning(f"Prediction failed: {e}")
|
|
343
|
+
# Continue with other predictions even if one fails
|
|
344
|
+
|
|
345
|
+
if not prediction_sets:
|
|
346
|
+
# Fallback to single prediction if all parallel predictions failed
|
|
347
|
+
logger.warning("All parallel predictions failed, falling back to single prediction")
|
|
348
|
+
return command_router.predict(command)
|
|
349
|
+
|
|
350
|
+
# Convert lists to tuples so they can be hashed and counted
|
|
351
|
+
prediction_tuples = [tuple(sorted(pred_set)) for pred_set in prediction_sets]
|
|
352
|
+
|
|
353
|
+
# Count occurrences of each unique prediction set
|
|
354
|
+
vote_counts = Counter(prediction_tuples)
|
|
355
|
+
|
|
356
|
+
# Get the prediction set with the most votes
|
|
357
|
+
winning_tuple = vote_counts.most_common(1)[0][0]
|
|
358
|
+
|
|
359
|
+
# Convert back to list and return
|
|
360
|
+
return list(winning_tuple)
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
import fastworkflow
|
|
8
|
+
from fastworkflow.utils.logging import logger
|
|
9
|
+
from fastworkflow import ModuleType
|
|
10
|
+
|
|
11
|
+
from fastworkflow.utils.signatures import InputForParamExtraction
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
INVALID_INT_VALUE = -sys.maxsize
|
|
15
|
+
INVALID_FLOAT_VALUE = -sys.float_info.max
|
|
16
|
+
|
|
17
|
+
MISSING_INFORMATION_ERRMSG = fastworkflow.get_env_var("MISSING_INFORMATION_ERRMSG")
|
|
18
|
+
INVALID_INFORMATION_ERRMSG = fastworkflow.get_env_var("INVALID_INFORMATION_ERRMSG")
|
|
19
|
+
|
|
20
|
+
NOT_FOUND = fastworkflow.get_env_var("NOT_FOUND")
|
|
21
|
+
INVALID = fastworkflow.get_env_var("INVALID")
|
|
22
|
+
PARAMETER_EXTRACTION_ERROR_MSG = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ParameterExtraction:
|
|
26
|
+
class Output(BaseModel):
|
|
27
|
+
parameters_are_valid: bool
|
|
28
|
+
cmd_parameters: Optional[BaseModel] = None
|
|
29
|
+
error_msg: Optional[str] = None
|
|
30
|
+
suggestions: Optional[Dict[str, List[str]]] = None
|
|
31
|
+
|
|
32
|
+
def __init__(self, cme_workflow: fastworkflow.Workflow, app_workflow: fastworkflow.Workflow, command_name: str, command: str):
|
|
33
|
+
self.cme_workflow = cme_workflow
|
|
34
|
+
self.app_workflow = app_workflow
|
|
35
|
+
self.command_name = command_name
|
|
36
|
+
self.command = command
|
|
37
|
+
|
|
38
|
+
def extract(self) -> "ParameterExtraction.Output":
|
|
39
|
+
app_workflow_folderpath = self.app_workflow.folderpath
|
|
40
|
+
app_command_routing_definition = fastworkflow.RoutingRegistry.get_definition(app_workflow_folderpath)
|
|
41
|
+
|
|
42
|
+
command_parameters_class = (
|
|
43
|
+
app_command_routing_definition.get_command_class(
|
|
44
|
+
self.command_name, ModuleType.COMMAND_PARAMETERS_CLASS
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
if not command_parameters_class:
|
|
48
|
+
return self.Output(parameters_are_valid=True)
|
|
49
|
+
|
|
50
|
+
stored_params = self._get_stored_parameters(self.cme_workflow)
|
|
51
|
+
|
|
52
|
+
self.command = self.command.replace(self.command_name, "").strip()
|
|
53
|
+
|
|
54
|
+
input_for_param_extraction = InputForParamExtraction.create(
|
|
55
|
+
self.app_workflow, self.command_name,
|
|
56
|
+
self.command)
|
|
57
|
+
|
|
58
|
+
# If we have missing fields (in parameter extraction error state), try to apply the command directly
|
|
59
|
+
if stored_params:
|
|
60
|
+
new_params = self._extract_and_merge_missing_parameters(stored_params, self.command)
|
|
61
|
+
else:
|
|
62
|
+
# Otherwise use the LLM-based extraction
|
|
63
|
+
new_params = input_for_param_extraction.extract_parameters(
|
|
64
|
+
command_parameters_class,
|
|
65
|
+
self.command_name,
|
|
66
|
+
app_workflow_folderpath)
|
|
67
|
+
|
|
68
|
+
is_valid, error_msg, suggestions, missing_invalid_fields = \
|
|
69
|
+
input_for_param_extraction.validate_parameters(
|
|
70
|
+
self.app_workflow, self.command_name, new_params
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Set all the missing and invalid fields to None before storing
|
|
74
|
+
current_values = {
|
|
75
|
+
field_name: getattr(new_params, field_name, None)
|
|
76
|
+
for field_name in list(type(new_params).model_fields.keys())
|
|
77
|
+
}
|
|
78
|
+
for field_name in missing_invalid_fields:
|
|
79
|
+
if field_name in current_values:
|
|
80
|
+
current_values[field_name] = NOT_FOUND
|
|
81
|
+
# Reconstruct the model instance without validation
|
|
82
|
+
new_params = new_params.__class__.model_construct(**current_values)
|
|
83
|
+
|
|
84
|
+
self._store_parameters(self.cme_workflow, new_params)
|
|
85
|
+
|
|
86
|
+
if not is_valid:
|
|
87
|
+
if params_str := self._format_parameters_for_display(new_params):
|
|
88
|
+
error_msg = f"Extracted parameters so far:\n{params_str}\n\n{error_msg}"
|
|
89
|
+
|
|
90
|
+
if "run_as_agent" not in self.app_workflow.context:
|
|
91
|
+
error_msg += "\nEnter 'abort' to get out of this error state and/or execute a different command."
|
|
92
|
+
error_msg += "\nEnter 'you misunderstood' if the wrong command was executed."
|
|
93
|
+
else:
|
|
94
|
+
error_msg += "\nCheck your command name if the wrong command was executed."
|
|
95
|
+
return self.Output(
|
|
96
|
+
parameters_are_valid=False,
|
|
97
|
+
error_msg=error_msg,
|
|
98
|
+
cmd_parameters=new_params,
|
|
99
|
+
suggestions=suggestions)
|
|
100
|
+
|
|
101
|
+
self._clear_parameters(self.cme_workflow)
|
|
102
|
+
return self.Output(
|
|
103
|
+
parameters_are_valid=True,
|
|
104
|
+
cmd_parameters=new_params)
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def _get_stored_parameters(cme_workflow: fastworkflow.Workflow):
|
|
108
|
+
return cme_workflow.context.get("stored_parameters")
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def _store_parameters(cme_workflow: fastworkflow.Workflow, parameters):
|
|
112
|
+
cme_workflow.context["stored_parameters"] = parameters
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _clear_parameters(cme_workflow: fastworkflow.Workflow):
|
|
116
|
+
if "stored_parameters" in cme_workflow.context:
|
|
117
|
+
del cme_workflow.context["stored_parameters"]
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _extract_missing_fields(input_for_param_extraction, sws, command_name, stored_params):
|
|
121
|
+
stored_missing_fields = []
|
|
122
|
+
is_valid, error_msg, _ = input_for_param_extraction.validate_parameters(
|
|
123
|
+
sws, command_name, stored_params
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if not is_valid:
|
|
127
|
+
if MISSING_INFORMATION_ERRMSG in error_msg:
|
|
128
|
+
missing_fields_str = error_msg.split(f"{MISSING_INFORMATION_ERRMSG}")[1].split("\n")[0]
|
|
129
|
+
stored_missing_fields = [f.strip() for f in missing_fields_str.split(",")]
|
|
130
|
+
if INVALID_INFORMATION_ERRMSG in error_msg:
|
|
131
|
+
invalid_section = error_msg.split(f"{INVALID_INFORMATION_ERRMSG}")[1]
|
|
132
|
+
if "\n" in invalid_section:
|
|
133
|
+
invalid_fields_str = invalid_section.split("\n")[0]
|
|
134
|
+
stored_missing_fields.extend(
|
|
135
|
+
invalid_field.split(" '")[0].strip()
|
|
136
|
+
for invalid_field in invalid_fields_str.split(", ")
|
|
137
|
+
)
|
|
138
|
+
return stored_missing_fields
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def _merge_parameters(old_params, new_params, missing_fields):
|
|
142
|
+
"""
|
|
143
|
+
Merge new parameters with old parameters, prioritizing new values when appropriate.
|
|
144
|
+
"""
|
|
145
|
+
merged_data = {
|
|
146
|
+
field_name: getattr(old_params, field_name, None)
|
|
147
|
+
for field_name in list(type(old_params).model_fields.keys())
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# all_fields = list(old_params.model_fields.keys())
|
|
151
|
+
missing_fields = missing_fields or []
|
|
152
|
+
|
|
153
|
+
for field_name in missing_fields:
|
|
154
|
+
merged_data[field_name] = getattr(new_params, field_name)
|
|
155
|
+
|
|
156
|
+
# Construct the model instance without validation
|
|
157
|
+
return old_params.__class__.model_construct(**merged_data)
|
|
158
|
+
|
|
159
|
+
# if hasattr(new_params, field_name):
|
|
160
|
+
# new_value = getattr(new_params, field_name)
|
|
161
|
+
# old_value = merged_data.get(field_name)
|
|
162
|
+
|
|
163
|
+
# if new_value is not None and new_value != NOT_FOUND:
|
|
164
|
+
# if isinstance(old_value, str) and INVALID in old_value and INVALID not in new_value:
|
|
165
|
+
# merged_data[field_name] = new_value
|
|
166
|
+
|
|
167
|
+
# elif old_value is None or old_value == NOT_FOUND:
|
|
168
|
+
# merged_data[field_name] = new_value
|
|
169
|
+
|
|
170
|
+
# elif isinstance(old_value, int) and old_value == INVALID_INT_VALUE:
|
|
171
|
+
# with contextlib.suppress(ValueError, TypeError):
|
|
172
|
+
# merged_data[field_name] = int(new_value)
|
|
173
|
+
|
|
174
|
+
# elif isinstance(old_value, float) and old_value == INVALID_FLOAT_VALUE:
|
|
175
|
+
# with contextlib.suppress(ValueError, TypeError):
|
|
176
|
+
# merged_data[field_name] = float(new_value)
|
|
177
|
+
|
|
178
|
+
# elif (field_name in missing_fields and
|
|
179
|
+
# hasattr(old_params.model_fields.get(field_name), "json_schema_extra") and
|
|
180
|
+
# old_params.model_fields.get(field_name).json_schema_extra and
|
|
181
|
+
# "db_lookup" in old_params.model_fields.get(field_name).json_schema_extra):
|
|
182
|
+
# merged_data[field_name] = new_value
|
|
183
|
+
|
|
184
|
+
# elif field_name in missing_fields:
|
|
185
|
+
# field_info = old_params.model_fields.get(field_name)
|
|
186
|
+
# has_pattern = hasattr(field_info, "pattern") and field_info.pattern is not None
|
|
187
|
+
|
|
188
|
+
# if not has_pattern:
|
|
189
|
+
# for meta in getattr(field_info, "metadata", []):
|
|
190
|
+
# if hasattr(meta, "pattern"):
|
|
191
|
+
# has_pattern = True
|
|
192
|
+
# break
|
|
193
|
+
|
|
194
|
+
# if not has_pattern and hasattr(field_info, "json_schema_extra") and field_info.json_schema_extra:
|
|
195
|
+
# has_pattern = "pattern" in field_info.json_schema_extra
|
|
196
|
+
|
|
197
|
+
# if has_pattern:
|
|
198
|
+
# merged_data[field_name] = new_value
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def _format_parameters_for_display(params):
|
|
202
|
+
"""
|
|
203
|
+
Format parameters for display in the error message.
|
|
204
|
+
"""
|
|
205
|
+
if not params:
|
|
206
|
+
return ""
|
|
207
|
+
|
|
208
|
+
lines = []
|
|
209
|
+
|
|
210
|
+
all_fields = list(type(params).model_fields.keys())
|
|
211
|
+
|
|
212
|
+
for field_name in all_fields:
|
|
213
|
+
value = getattr(params, field_name, None)
|
|
214
|
+
|
|
215
|
+
if value in [
|
|
216
|
+
NOT_FOUND,
|
|
217
|
+
None,
|
|
218
|
+
INVALID_INT_VALUE,
|
|
219
|
+
INVALID_FLOAT_VALUE
|
|
220
|
+
]:
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
display_name = " ".join(word.capitalize() for word in field_name.split('_'))
|
|
224
|
+
|
|
225
|
+
# Format fields appropriately based on type
|
|
226
|
+
if (
|
|
227
|
+
isinstance(value, bool)
|
|
228
|
+
or not hasattr(value, 'value')
|
|
229
|
+
and isinstance(value, (int, float))
|
|
230
|
+
or not hasattr(value, 'value')
|
|
231
|
+
and isinstance(value, str)
|
|
232
|
+
or not hasattr(value, 'value')
|
|
233
|
+
):
|
|
234
|
+
lines.append(f"{display_name}: {value}")
|
|
235
|
+
else: # Handle enum types
|
|
236
|
+
lines.append(f"{display_name}: {value.value}")
|
|
237
|
+
return "\n".join(lines)
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _apply_missing_fields(command: str, default_params: BaseModel, missing_fields: list):
|
|
241
|
+
global PARAMETER_EXTRACTION_ERROR_MSG
|
|
242
|
+
if not PARAMETER_EXTRACTION_ERROR_MSG:
|
|
243
|
+
PARAMETER_EXTRACTION_ERROR_MSG = fastworkflow.get_env_var("PARAMETER_EXTRACTION_ERROR_MSG")
|
|
244
|
+
|
|
245
|
+
# Work on plain dict to avoid validation during assignment
|
|
246
|
+
params_data = {
|
|
247
|
+
field_name: getattr(default_params, field_name, None)
|
|
248
|
+
for field_name in list(type(default_params).model_fields.keys())
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if "," in command:
|
|
252
|
+
parts = [part.strip() for part in command.split(",")]
|
|
253
|
+
|
|
254
|
+
if (
|
|
255
|
+
len(parts) == len(missing_fields) == 1
|
|
256
|
+
or len(parts) != len(missing_fields)
|
|
257
|
+
and parts
|
|
258
|
+
and missing_fields
|
|
259
|
+
):
|
|
260
|
+
field = missing_fields[0]
|
|
261
|
+
if field in params_data:
|
|
262
|
+
params_data[field] = parts[0]
|
|
263
|
+
elif len(parts) == len(missing_fields) and len(missing_fields) > 1:
|
|
264
|
+
for i, field in enumerate(missing_fields):
|
|
265
|
+
if i < len(parts) and field in params_data:
|
|
266
|
+
params_data[field] = parts[i]
|
|
267
|
+
elif missing_fields:
|
|
268
|
+
field = missing_fields[0]
|
|
269
|
+
if field in params_data:
|
|
270
|
+
params_data[field] = command.strip()
|
|
271
|
+
|
|
272
|
+
# Construct model without validation
|
|
273
|
+
return default_params.__class__.model_construct(**params_data)
|
|
274
|
+
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _extract_and_merge_missing_parameters(stored_params: BaseModel, command: str):
|
|
277
|
+
"""
|
|
278
|
+
Identify fields to fill by scanning for sentinel values and merge values
|
|
279
|
+
parsed from the command string into a new params instance. This preserves
|
|
280
|
+
existing behavior for token/field count mismatches and leaves values as
|
|
281
|
+
strings (no type coercion).
|
|
282
|
+
"""
|
|
283
|
+
# Initialize with existing values to avoid triggering validation
|
|
284
|
+
field_names = list(type(stored_params).model_fields.keys())
|
|
285
|
+
params_data = {
|
|
286
|
+
field_name: getattr(stored_params, field_name, None)
|
|
287
|
+
for field_name in field_names
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Determine which fields still need user-provided input based on sentinels
|
|
291
|
+
fields_to_fill = []
|
|
292
|
+
for field_name in field_names:
|
|
293
|
+
value = getattr(stored_params, field_name, None)
|
|
294
|
+
if value in [
|
|
295
|
+
NOT_FOUND,
|
|
296
|
+
None,
|
|
297
|
+
INVALID_INT_VALUE,
|
|
298
|
+
INVALID_FLOAT_VALUE,
|
|
299
|
+
]:
|
|
300
|
+
fields_to_fill.append(field_name)
|
|
301
|
+
|
|
302
|
+
if not fields_to_fill:
|
|
303
|
+
return stored_params
|
|
304
|
+
|
|
305
|
+
# Preserve existing mismatch handling and keep all values as strings
|
|
306
|
+
if "," in command:
|
|
307
|
+
parts = [part.strip() for part in command.split(",")]
|
|
308
|
+
|
|
309
|
+
if (
|
|
310
|
+
len(parts) == len(fields_to_fill) == 1
|
|
311
|
+
or len(parts) != len(fields_to_fill)
|
|
312
|
+
and parts
|
|
313
|
+
):
|
|
314
|
+
field = fields_to_fill[0]
|
|
315
|
+
if field in params_data:
|
|
316
|
+
params_data[field] = parts[0]
|
|
317
|
+
elif len(parts) == len(fields_to_fill) and len(fields_to_fill) > 1:
|
|
318
|
+
for i, field in enumerate(fields_to_fill):
|
|
319
|
+
if i < len(parts) and field in params_data:
|
|
320
|
+
params_data[field] = parts[i]
|
|
321
|
+
else:
|
|
322
|
+
field = fields_to_fill[0]
|
|
323
|
+
if field in params_data:
|
|
324
|
+
params_data[field] = command.strip()
|
|
325
|
+
|
|
326
|
+
# Return a new instance without validation
|
|
327
|
+
return stored_params.__class__.model_construct(**params_data)
|