dynamic-skill-compiler 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dynamic_skill_compiler/__init__.py +66 -0
- dynamic_skill_compiler/cli.py +141 -0
- dynamic_skill_compiler/decompose.py +207 -0
- dynamic_skill_compiler/fragments.py +205 -0
- dynamic_skill_compiler/graph.py +2036 -0
- dynamic_skill_compiler/grounding.py +55 -0
- dynamic_skill_compiler/models.py +175 -0
- dynamic_skill_compiler/pipeline.py +558 -0
- dynamic_skill_compiler/py.typed +1 -0
- dynamic_skill_compiler/query.py +254 -0
- dynamic_skill_compiler/retriever.py +280 -0
- dynamic_skill_compiler/semantic.py +280 -0
- dynamic_skill_compiler-0.1.0.dist-info/METADATA +155 -0
- dynamic_skill_compiler-0.1.0.dist-info/RECORD +18 -0
- dynamic_skill_compiler-0.1.0.dist-info/WHEEL +5 -0
- dynamic_skill_compiler-0.1.0.dist-info/entry_points.txt +2 -0
- dynamic_skill_compiler-0.1.0.dist-info/licenses/LICENSE +21 -0
- dynamic_skill_compiler-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from dynamic_skill_compiler.models import (
|
|
2
|
+
CompilationMetrics,
|
|
3
|
+
CompiledSkill,
|
|
4
|
+
CompiledSkillPackage,
|
|
5
|
+
CompilerPassTrace,
|
|
6
|
+
LocalEnvironment,
|
|
7
|
+
QueryPlan,
|
|
8
|
+
SkillAsset,
|
|
9
|
+
SkillFragment,
|
|
10
|
+
SkillGraph,
|
|
11
|
+
SkillRelation,
|
|
12
|
+
Subgoal,
|
|
13
|
+
)
|
|
14
|
+
from dynamic_skill_compiler.decompose import TaskDecomposer
|
|
15
|
+
from dynamic_skill_compiler.fragments import FragmentMatcher, SkillFragmentExtractor
|
|
16
|
+
from dynamic_skill_compiler.graph import (
|
|
17
|
+
DEFAULT_GRAPH_PASSES,
|
|
18
|
+
GRAPH_PASS_PRESETS,
|
|
19
|
+
LEGACY_DEFAULT_GRAPH_PASSES,
|
|
20
|
+
SLIM_GRAPH_PASSES,
|
|
21
|
+
SUPPORTED_GRAPH_PASSES,
|
|
22
|
+
)
|
|
23
|
+
from dynamic_skill_compiler.grounding import EnvironmentGrounder
|
|
24
|
+
from dynamic_skill_compiler.pipeline import CompilerConfig, DynamicSkillCompiler
|
|
25
|
+
from dynamic_skill_compiler.semantic import SemanticSoftMatcher
|
|
26
|
+
from dynamic_skill_compiler.query import QueryOptimizer
|
|
27
|
+
from dynamic_skill_compiler.retriever import (
|
|
28
|
+
CompositeSkillRetriever,
|
|
29
|
+
InMemorySkillRetriever,
|
|
30
|
+
LocalSkillLibraryRetriever,
|
|
31
|
+
SkillRetriever,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__version__ = "0.1.0"
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"CompilationMetrics",
|
|
38
|
+
"CompiledSkill",
|
|
39
|
+
"CompiledSkillPackage",
|
|
40
|
+
"CompilerPassTrace",
|
|
41
|
+
"CompilerConfig",
|
|
42
|
+
"CompositeSkillRetriever",
|
|
43
|
+
"DEFAULT_GRAPH_PASSES",
|
|
44
|
+
"DynamicSkillCompiler",
|
|
45
|
+
"EnvironmentGrounder",
|
|
46
|
+
"FragmentMatcher",
|
|
47
|
+
"GRAPH_PASS_PRESETS",
|
|
48
|
+
"InMemorySkillRetriever",
|
|
49
|
+
"LEGACY_DEFAULT_GRAPH_PASSES",
|
|
50
|
+
"LocalSkillLibraryRetriever",
|
|
51
|
+
"LocalEnvironment",
|
|
52
|
+
"QueryOptimizer",
|
|
53
|
+
"QueryPlan",
|
|
54
|
+
"SkillAsset",
|
|
55
|
+
"SkillFragment",
|
|
56
|
+
"SkillFragmentExtractor",
|
|
57
|
+
"SkillGraph",
|
|
58
|
+
"SkillRelation",
|
|
59
|
+
"SkillRetriever",
|
|
60
|
+
"SLIM_GRAPH_PASSES",
|
|
61
|
+
"Subgoal",
|
|
62
|
+
"SemanticSoftMatcher",
|
|
63
|
+
"SUPPORTED_GRAPH_PASSES",
|
|
64
|
+
"TaskDecomposer",
|
|
65
|
+
"__version__",
|
|
66
|
+
]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from dataclasses import asdict, is_dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from dynamic_skill_compiler.models import LocalEnvironment
|
|
10
|
+
from dynamic_skill_compiler.pipeline import CompilerConfig, DynamicSkillCompiler
|
|
11
|
+
from dynamic_skill_compiler.retriever import LocalSkillLibraryRetriever
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
prog="dsc",
|
|
17
|
+
description="Compile a local skill library into a task-specific DSC package.",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"query",
|
|
21
|
+
nargs="?",
|
|
22
|
+
help="Task query to compile. If omitted, the query is read from stdin.",
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--skills-dir",
|
|
26
|
+
required=True,
|
|
27
|
+
help="Directory containing local skill folders with SKILL.md files.",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--benchmark",
|
|
31
|
+
default="generic",
|
|
32
|
+
help="Optional environment/benchmark label used by adaptive compiler profiles.",
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--cwd",
|
|
36
|
+
default=".",
|
|
37
|
+
help="Execution working directory used for localization.",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--workspace-root",
|
|
41
|
+
default=".",
|
|
42
|
+
help="Workspace root used for localization.",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--python-bin",
|
|
46
|
+
default=sys.executable or "python",
|
|
47
|
+
help="Python executable name/path used for localization.",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--min-relevance",
|
|
51
|
+
type=float,
|
|
52
|
+
default=CompilerConfig.min_relevance,
|
|
53
|
+
help="Minimum utility score required for skill selection.",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--preserve-top-k",
|
|
57
|
+
type=int,
|
|
58
|
+
default=CompilerConfig.preserve_top_k,
|
|
59
|
+
help="Always preserve at least this many top-scored skills.",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--max-selected-skills",
|
|
63
|
+
type=int,
|
|
64
|
+
default=CompilerConfig.max_selected_skills,
|
|
65
|
+
help="Hard cap for selected skills. Use 0 for no explicit cap.",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--pretty",
|
|
69
|
+
action="store_true",
|
|
70
|
+
help="Pretty-print JSON output.",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--include-instructions",
|
|
74
|
+
action="store_true",
|
|
75
|
+
help="Include localized selected instructions in the JSON output.",
|
|
76
|
+
)
|
|
77
|
+
return parser
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def compile_from_args(args: argparse.Namespace) -> dict[str, Any]:
|
|
81
|
+
query = args.query if args.query is not None else sys.stdin.read().strip()
|
|
82
|
+
if not query:
|
|
83
|
+
raise SystemExit("A task query is required, either as an argument or on stdin.")
|
|
84
|
+
|
|
85
|
+
compiler = DynamicSkillCompiler(
|
|
86
|
+
retriever=LocalSkillLibraryRetriever(args.skills_dir),
|
|
87
|
+
config=CompilerConfig(
|
|
88
|
+
min_relevance=args.min_relevance,
|
|
89
|
+
preserve_top_k=args.preserve_top_k,
|
|
90
|
+
max_selected_skills=args.max_selected_skills,
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
compiled = compiler.compile(
|
|
94
|
+
query,
|
|
95
|
+
environment=LocalEnvironment(
|
|
96
|
+
cwd=args.cwd,
|
|
97
|
+
workspace_root=args.workspace_root,
|
|
98
|
+
python_bin=args.python_bin,
|
|
99
|
+
benchmark=args.benchmark,
|
|
100
|
+
),
|
|
101
|
+
)
|
|
102
|
+
summary = compiler.summarize(compiled)
|
|
103
|
+
summary["metrics"] = _to_jsonable(compiled.metrics)
|
|
104
|
+
if args.include_instructions:
|
|
105
|
+
summary["compiled_skills"] = [
|
|
106
|
+
{
|
|
107
|
+
"name": item.asset.name,
|
|
108
|
+
"skill_id": item.asset.skill_id,
|
|
109
|
+
"selected_reason": item.selected_reason,
|
|
110
|
+
"utility_score": item.utility_score,
|
|
111
|
+
"localized_instructions": item.localized_instructions,
|
|
112
|
+
"selected_fragments": [_to_jsonable(fragment) for fragment in item.selected_fragments],
|
|
113
|
+
}
|
|
114
|
+
for item in compiled.compiled_skills
|
|
115
|
+
]
|
|
116
|
+
return summary
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def main(argv: list[str] | None = None) -> int:
|
|
120
|
+
parser = build_parser()
|
|
121
|
+
args = parser.parse_args(argv)
|
|
122
|
+
summary = compile_from_args(args)
|
|
123
|
+
indent = 2 if args.pretty else None
|
|
124
|
+
print(json.dumps(summary, ensure_ascii=False, indent=indent))
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _to_jsonable(value: Any) -> Any:
|
|
129
|
+
if is_dataclass(value):
|
|
130
|
+
return _to_jsonable(asdict(value))
|
|
131
|
+
if isinstance(value, dict):
|
|
132
|
+
return {str(key): _to_jsonable(item) for key, item in value.items()}
|
|
133
|
+
if isinstance(value, (list, tuple)):
|
|
134
|
+
return [_to_jsonable(item) for item in value]
|
|
135
|
+
if isinstance(value, set):
|
|
136
|
+
return sorted(_to_jsonable(item) for item in value)
|
|
137
|
+
return value
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if __name__ == "__main__":
|
|
141
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import List, Set
|
|
6
|
+
|
|
7
|
+
from dynamic_skill_compiler.models import QueryPlan, Subgoal
|
|
8
|
+
from dynamic_skill_compiler.query import QueryOptimizer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
CONNECTORS = {"and", "then", "after", "before", "finally", "otherwise", "else"}
|
|
12
|
+
LEADING_PHASE_MARKERS = {
|
|
13
|
+
"first",
|
|
14
|
+
"next",
|
|
15
|
+
"then",
|
|
16
|
+
"after",
|
|
17
|
+
"before",
|
|
18
|
+
"finally",
|
|
19
|
+
"if",
|
|
20
|
+
"when",
|
|
21
|
+
"while",
|
|
22
|
+
"until",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class TaskDecomposer:
|
|
27
|
+
optimizer: QueryOptimizer = field(default_factory=QueryOptimizer)
|
|
28
|
+
|
|
29
|
+
def decompose(self, query_plan: QueryPlan) -> List[Subgoal]:
|
|
30
|
+
raw = self.optimizer.normalize_text(
|
|
31
|
+
query_plan.raw_query or query_plan.normalized_query,
|
|
32
|
+
preserve_delimiters=True,
|
|
33
|
+
)
|
|
34
|
+
clauses = self._split_into_clauses(raw)
|
|
35
|
+
if not clauses:
|
|
36
|
+
clauses = [query_plan.normalized_query]
|
|
37
|
+
|
|
38
|
+
subgoals: List[Subgoal] = []
|
|
39
|
+
previous_id = None
|
|
40
|
+
for index, clause in enumerate(clauses):
|
|
41
|
+
tokens = self.optimizer.extract_content_terms(clause)
|
|
42
|
+
token_set = set(tokens)
|
|
43
|
+
required = {
|
|
44
|
+
token
|
|
45
|
+
for token in tokens
|
|
46
|
+
if token in query_plan.required_capabilities or token in query_plan.optional_capabilities
|
|
47
|
+
}
|
|
48
|
+
if not required:
|
|
49
|
+
required = set(tokens[:3]) if tokens else set(clause.split()[:3])
|
|
50
|
+
optional = set(tokens[3:]) if len(tokens) > 3 else set()
|
|
51
|
+
required |= self._augment_required_capabilities(clause, token_set)
|
|
52
|
+
optional |= self._augment_optional_capabilities(clause, token_set)
|
|
53
|
+
subgoal_id = f"sg_{index + 1}"
|
|
54
|
+
hints = self._infer_environment_hints(clause, required | optional)
|
|
55
|
+
subgoals.append(
|
|
56
|
+
Subgoal(
|
|
57
|
+
subgoal_id=subgoal_id,
|
|
58
|
+
description=clause,
|
|
59
|
+
required_capabilities=required,
|
|
60
|
+
optional_capabilities=optional,
|
|
61
|
+
depends_on=[previous_id] if previous_id else [],
|
|
62
|
+
priority=index,
|
|
63
|
+
environment_hints=hints,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
previous_id = subgoal_id
|
|
67
|
+
return subgoals
|
|
68
|
+
|
|
69
|
+
def _split_into_clauses(self, text: str) -> List[str]:
|
|
70
|
+
normalized = re.sub(r"\s+", " ", text).strip()
|
|
71
|
+
if not normalized:
|
|
72
|
+
return []
|
|
73
|
+
|
|
74
|
+
for marker in ("first", "next", "then", "finally", "otherwise", "else", "if", "when", "while", "until"):
|
|
75
|
+
normalized = re.sub(rf"\b{marker}\b", f". {marker}", normalized)
|
|
76
|
+
|
|
77
|
+
sentence_candidates = []
|
|
78
|
+
for segment in re.split(r"[.;!?]", normalized):
|
|
79
|
+
segment = segment.strip()
|
|
80
|
+
if segment:
|
|
81
|
+
sentence_candidates.append(segment)
|
|
82
|
+
|
|
83
|
+
clauses: List[str] = []
|
|
84
|
+
for sentence in sentence_candidates or [normalized]:
|
|
85
|
+
tokens = sentence.split()
|
|
86
|
+
current: List[str] = []
|
|
87
|
+
for token in tokens:
|
|
88
|
+
lowered = token.lower().strip(",")
|
|
89
|
+
if lowered in CONNECTORS or lowered in LEADING_PHASE_MARKERS:
|
|
90
|
+
if current:
|
|
91
|
+
clauses.append(" ".join(current).strip())
|
|
92
|
+
current = []
|
|
93
|
+
# Keep conditional markers so the clause still carries its branch meaning.
|
|
94
|
+
if lowered in {"if", "when", "while", "until"}:
|
|
95
|
+
current.append(lowered)
|
|
96
|
+
continue
|
|
97
|
+
current.append(token)
|
|
98
|
+
if current:
|
|
99
|
+
clauses.append(" ".join(current).strip())
|
|
100
|
+
return [clause for clause in clauses if clause]
|
|
101
|
+
|
|
102
|
+
def _infer_environment_hints(self, clause: str, capabilities: Set[str]) -> dict:
|
|
103
|
+
lowered = clause.lower()
|
|
104
|
+
caps = {cap.lower() for cap in capabilities}
|
|
105
|
+
|
|
106
|
+
scienceworld_terms = {
|
|
107
|
+
"conductive",
|
|
108
|
+
"conductivity",
|
|
109
|
+
"battery",
|
|
110
|
+
"wire",
|
|
111
|
+
"thermometer",
|
|
112
|
+
"workshop",
|
|
113
|
+
"focus",
|
|
114
|
+
"box",
|
|
115
|
+
"boil",
|
|
116
|
+
"melt",
|
|
117
|
+
"freeze",
|
|
118
|
+
"combust",
|
|
119
|
+
"temperature",
|
|
120
|
+
"substance",
|
|
121
|
+
}
|
|
122
|
+
strong_scienceworld_terms = {
|
|
123
|
+
"conductive",
|
|
124
|
+
"conductivity",
|
|
125
|
+
"battery",
|
|
126
|
+
"wire",
|
|
127
|
+
"thermometer",
|
|
128
|
+
"workshop",
|
|
129
|
+
"boil",
|
|
130
|
+
"melt",
|
|
131
|
+
"freeze",
|
|
132
|
+
"combust",
|
|
133
|
+
}
|
|
134
|
+
scienceworld_phrases = (
|
|
135
|
+
"state of matter",
|
|
136
|
+
"degrees celsius",
|
|
137
|
+
"electrically conductive",
|
|
138
|
+
)
|
|
139
|
+
alfworld_terms = {
|
|
140
|
+
"stoveburner",
|
|
141
|
+
"microwave",
|
|
142
|
+
"fridge",
|
|
143
|
+
"sinkbasin",
|
|
144
|
+
"cabinet",
|
|
145
|
+
"drawer",
|
|
146
|
+
"countertop",
|
|
147
|
+
"cool",
|
|
148
|
+
"heat",
|
|
149
|
+
"clean",
|
|
150
|
+
"put",
|
|
151
|
+
"move",
|
|
152
|
+
"take",
|
|
153
|
+
}
|
|
154
|
+
webshop_terms = {
|
|
155
|
+
"search",
|
|
156
|
+
"buy",
|
|
157
|
+
"price",
|
|
158
|
+
"size",
|
|
159
|
+
"color",
|
|
160
|
+
"click",
|
|
161
|
+
"product",
|
|
162
|
+
"results",
|
|
163
|
+
"listing",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
token_text = " ".join(sorted(caps)) + " " + lowered
|
|
167
|
+
|
|
168
|
+
if any(term in token_text for term in webshop_terms):
|
|
169
|
+
return {"domain": "webshop"}
|
|
170
|
+
if any(phrase in lowered for phrase in scienceworld_phrases):
|
|
171
|
+
return {"domain": "scienceworld"}
|
|
172
|
+
if any(term in lowered for term in {"stoveburner", "microwave", "fridge", "sinkbasin"}):
|
|
173
|
+
return {"domain": "alfworld"}
|
|
174
|
+
if (
|
|
175
|
+
caps & {"cool", "heat", "clean", "wash", "slice"}
|
|
176
|
+
and not any(term in token_text for term in strong_scienceworld_terms)
|
|
177
|
+
):
|
|
178
|
+
return {"domain": "alfworld"}
|
|
179
|
+
if any(term in token_text for term in scienceworld_terms):
|
|
180
|
+
return {"domain": "scienceworld"}
|
|
181
|
+
if any(term in token_text for term in alfworld_terms):
|
|
182
|
+
return {"domain": "alfworld"}
|
|
183
|
+
return {"domain": "generic"} if capabilities else {}
|
|
184
|
+
|
|
185
|
+
def _augment_required_capabilities(self, clause: str, tokens: Set[str]) -> Set[str]:
|
|
186
|
+
augmented: Set[str] = set()
|
|
187
|
+
if "focus" in tokens:
|
|
188
|
+
augmented.add("focus")
|
|
189
|
+
if self._is_phase_change_clause(clause, tokens):
|
|
190
|
+
augmented |= {"heat", "contain", "apparatu"}
|
|
191
|
+
if tokens & {"measure", "temperature"}:
|
|
192
|
+
augmented |= {"measure", "monit"}
|
|
193
|
+
return augmented
|
|
194
|
+
|
|
195
|
+
def _augment_optional_capabilities(self, clause: str, tokens: Set[str]) -> Set[str]:
|
|
196
|
+
augmented = set(self.optimizer.infer_structural_capabilities(tokens))
|
|
197
|
+
if self._is_phase_change_clause(clause, tokens):
|
|
198
|
+
augmented |= {"monit", "wait", "state", "chang", "substance"}
|
|
199
|
+
if "combust" in tokens:
|
|
200
|
+
augmented |= {"heat", "fallback"}
|
|
201
|
+
return augmented
|
|
202
|
+
|
|
203
|
+
def _is_phase_change_clause(self, clause: str, tokens: Set[str]) -> bool:
|
|
204
|
+
if tokens & {"boil", "melt", "freeze", "heat", "cool", "combust"}:
|
|
205
|
+
return True
|
|
206
|
+
lowered = clause.lower()
|
|
207
|
+
return "state of matter" in lowered or ("change" in lowered and "state" in lowered)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Dict, Iterable, List
|
|
6
|
+
|
|
7
|
+
from dynamic_skill_compiler.models import SkillAsset, SkillFragment, Subgoal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
LINE_SPLIT = re.compile(r"[.\n]")
|
|
11
|
+
ACTION_QUOTED = re.compile(r"`([^`]+)`")
|
|
12
|
+
ACTION_PREFIXES = (
|
|
13
|
+
"teleport to ",
|
|
14
|
+
"pick up ",
|
|
15
|
+
"go to ",
|
|
16
|
+
"take ",
|
|
17
|
+
"move ",
|
|
18
|
+
"open ",
|
|
19
|
+
"close ",
|
|
20
|
+
"use ",
|
|
21
|
+
"look at ",
|
|
22
|
+
"examine ",
|
|
23
|
+
"turn on ",
|
|
24
|
+
"turn off ",
|
|
25
|
+
"deactivate ",
|
|
26
|
+
"wait1",
|
|
27
|
+
"wait",
|
|
28
|
+
"clean ",
|
|
29
|
+
"heat ",
|
|
30
|
+
"cool ",
|
|
31
|
+
"focus on ",
|
|
32
|
+
"activate ",
|
|
33
|
+
"pour ",
|
|
34
|
+
"mix ",
|
|
35
|
+
"measure ",
|
|
36
|
+
"search ",
|
|
37
|
+
"click ",
|
|
38
|
+
"select ",
|
|
39
|
+
"buy now",
|
|
40
|
+
"add to cart",
|
|
41
|
+
"filter ",
|
|
42
|
+
"sort ",
|
|
43
|
+
)
|
|
44
|
+
LOW_SIGNAL_PREFIXES = (
|
|
45
|
+
"purpose",
|
|
46
|
+
"when to use",
|
|
47
|
+
"notes",
|
|
48
|
+
"key parameters",
|
|
49
|
+
"key considerations",
|
|
50
|
+
"integration with other skills",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class SkillFragmentExtractor:
|
|
56
|
+
max_fragments_per_skill: int = 6
|
|
57
|
+
|
|
58
|
+
def extract(self, skills: Iterable[SkillAsset]) -> Dict[str, List[SkillFragment]]:
|
|
59
|
+
fragment_map: Dict[str, List[SkillFragment]] = {}
|
|
60
|
+
for skill in skills:
|
|
61
|
+
fragments = self._extract_for_skill(skill)
|
|
62
|
+
fragment_map[skill.skill_id] = fragments
|
|
63
|
+
return fragment_map
|
|
64
|
+
|
|
65
|
+
def _extract_for_skill(self, skill: SkillAsset) -> List[SkillFragment]:
|
|
66
|
+
indexed_lines: List[tuple[int, str]] = []
|
|
67
|
+
for instruction in skill.instructions:
|
|
68
|
+
for line in LINE_SPLIT.split(instruction):
|
|
69
|
+
cleaned = line.strip(" -*\t")
|
|
70
|
+
if cleaned:
|
|
71
|
+
indexed_lines.append((len(indexed_lines), cleaned))
|
|
72
|
+
lines = [line for _, line in indexed_lines]
|
|
73
|
+
if not lines:
|
|
74
|
+
lines.append(skill.description)
|
|
75
|
+
indexed_lines = [(0, skill.description)]
|
|
76
|
+
|
|
77
|
+
selected_lines = self._select_fragment_lines(indexed_lines)
|
|
78
|
+
|
|
79
|
+
fragments: List[SkillFragment] = []
|
|
80
|
+
for index, line in enumerate(selected_lines):
|
|
81
|
+
capabilities = {
|
|
82
|
+
token.strip(".,:;()[]{}").lower()
|
|
83
|
+
for token in line.split()
|
|
84
|
+
if len(token.strip(".,:;()[]{}")) > 2
|
|
85
|
+
}
|
|
86
|
+
example_actions = self._extract_actions(line)
|
|
87
|
+
preconditions = self._extract_preconditions(line)
|
|
88
|
+
postconditions = self._extract_postconditions(line)
|
|
89
|
+
action_schema = self._extract_action_schema(line, example_actions)
|
|
90
|
+
fragments.append(
|
|
91
|
+
SkillFragment(
|
|
92
|
+
fragment_id=f"{skill.skill_id}::frag_{index + 1}",
|
|
93
|
+
skill_id=skill.skill_id,
|
|
94
|
+
title=f"{skill.name} fragment {index + 1}",
|
|
95
|
+
content=line,
|
|
96
|
+
capabilities=capabilities or skill.normalized_capabilities(),
|
|
97
|
+
action_schema=action_schema,
|
|
98
|
+
preconditions=preconditions,
|
|
99
|
+
postconditions=postconditions,
|
|
100
|
+
example_actions=example_actions,
|
|
101
|
+
token_cost=max(1.0, len(line.split()) / 20.0),
|
|
102
|
+
metadata={"source_skill": skill.name},
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
return fragments
|
|
106
|
+
|
|
107
|
+
def _select_fragment_lines(self, indexed_lines: List[tuple[int, str]]) -> List[str]:
|
|
108
|
+
if len(indexed_lines) <= self.max_fragments_per_skill:
|
|
109
|
+
return [line for _, line in indexed_lines]
|
|
110
|
+
|
|
111
|
+
ranked = sorted(
|
|
112
|
+
indexed_lines,
|
|
113
|
+
key=lambda item: (
|
|
114
|
+
self._line_priority(item[1]),
|
|
115
|
+
-item[0],
|
|
116
|
+
),
|
|
117
|
+
reverse=True,
|
|
118
|
+
)
|
|
119
|
+
chosen = sorted(
|
|
120
|
+
ranked[: self.max_fragments_per_skill],
|
|
121
|
+
key=lambda item: item[0],
|
|
122
|
+
)
|
|
123
|
+
return [line for _, line in chosen]
|
|
124
|
+
|
|
125
|
+
def _line_priority(self, line: str) -> int:
|
|
126
|
+
lowered = line.lower()
|
|
127
|
+
priority = 0
|
|
128
|
+
if self._extract_actions(line):
|
|
129
|
+
priority += 8
|
|
130
|
+
if self._extract_preconditions(line):
|
|
131
|
+
priority += 3
|
|
132
|
+
if self._extract_postconditions(line):
|
|
133
|
+
priority += 2
|
|
134
|
+
if any(marker in lowered for marker in ("command", "action pattern", "execute", "retry", "verify")):
|
|
135
|
+
priority += 2
|
|
136
|
+
if any(lowered.startswith(prefix) for prefix in LOW_SIGNAL_PREFIXES):
|
|
137
|
+
priority -= 2
|
|
138
|
+
return priority
|
|
139
|
+
|
|
140
|
+
def _extract_actions(self, line: str) -> List[str]:
|
|
141
|
+
matches = ACTION_QUOTED.findall(line)
|
|
142
|
+
if matches:
|
|
143
|
+
return matches
|
|
144
|
+
lowered = line.lower()
|
|
145
|
+
for marker in ("action:", "command:", "action pattern:", "example:"):
|
|
146
|
+
if marker in lowered:
|
|
147
|
+
candidate = line[lowered.find(marker) + len(marker):].strip()
|
|
148
|
+
if candidate:
|
|
149
|
+
return [candidate]
|
|
150
|
+
if "action:" in lowered:
|
|
151
|
+
return [line.split(":", 1)[1].strip()]
|
|
152
|
+
for prefix in ACTION_PREFIXES:
|
|
153
|
+
idx = lowered.find(prefix)
|
|
154
|
+
if idx != -1:
|
|
155
|
+
return [line[idx:].strip()]
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
def _extract_preconditions(self, line: str) -> List[str]:
|
|
159
|
+
lowered = line.lower()
|
|
160
|
+
if lowered.startswith(("if ", "ensure ", "verify ", "when ", "requires ")):
|
|
161
|
+
return [line]
|
|
162
|
+
if "before " in lowered or "must " in lowered:
|
|
163
|
+
return [line]
|
|
164
|
+
return []
|
|
165
|
+
|
|
166
|
+
def _extract_postconditions(self, line: str) -> List[str]:
|
|
167
|
+
lowered = line.lower()
|
|
168
|
+
if lowered.startswith(("output", "result", "return", "the agent receives", "the specified object is")):
|
|
169
|
+
return [line]
|
|
170
|
+
if "output" in lowered or "confirm" in lowered or "transferred" in lowered:
|
|
171
|
+
return [line]
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
def _extract_action_schema(self, line: str, example_actions: List[str]) -> str | None:
|
|
175
|
+
if example_actions:
|
|
176
|
+
return example_actions[0]
|
|
177
|
+
lowered = line.lower()
|
|
178
|
+
for prefix in ACTION_PREFIXES:
|
|
179
|
+
if prefix in lowered:
|
|
180
|
+
start = lowered.find(prefix)
|
|
181
|
+
return line[start:].strip()
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class FragmentMatcher:
|
|
187
|
+
subgoal_weight: float = 0.75
|
|
188
|
+
optional_weight: float = 0.25
|
|
189
|
+
|
|
190
|
+
def match(self, subgoal: Subgoal, fragments: List[SkillFragment]) -> List[SkillFragment]:
|
|
191
|
+
ranked = sorted(
|
|
192
|
+
fragments,
|
|
193
|
+
key=lambda fragment: self.score(subgoal, fragment),
|
|
194
|
+
reverse=True,
|
|
195
|
+
)
|
|
196
|
+
return [fragment for fragment in ranked if self.score(subgoal, fragment) > 0]
|
|
197
|
+
|
|
198
|
+
def score(self, subgoal: Subgoal, fragment: SkillFragment) -> float:
|
|
199
|
+
required = subgoal.required_capabilities
|
|
200
|
+
optional = subgoal.optional_capabilities
|
|
201
|
+
frag_caps = fragment.capabilities
|
|
202
|
+
required_hit = len(required & frag_caps) / max(len(required), 1)
|
|
203
|
+
optional_hit = len(optional & frag_caps) / max(len(optional), 1) if optional else 0.0
|
|
204
|
+
action_bonus = 0.15 if fragment.example_actions else 0.0
|
|
205
|
+
return self.subgoal_weight * required_hit + self.optional_weight * optional_hit + action_bonus
|