openclacky 0.9.31 → 0.9.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/lib/clacky/agent/llm_caller.rb +5 -5
- data/lib/clacky/agent/memory_updater.rb +1 -1
- data/lib/clacky/agent/skill_auto_creator.rb +119 -0
- data/lib/clacky/agent/skill_evolution.rb +46 -0
- data/lib/clacky/agent/skill_manager.rb +8 -0
- data/lib/clacky/agent/skill_reflector.rb +97 -0
- data/lib/clacky/agent.rb +32 -19
- data/lib/clacky/agent_config.rb +10 -1
- data/lib/clacky/cli.rb +1 -1
- data/lib/clacky/default_skills/personal-website/publish.rb +1 -1
- data/lib/clacky/default_skills/skill-creator/SKILL.md +46 -0
- data/lib/clacky/json_ui_controller.rb +0 -4
- data/lib/clacky/message_history.rb +0 -12
- data/lib/clacky/plain_ui_controller.rb +0 -1
- data/lib/clacky/platform_http_client.rb +2 -4
- data/lib/clacky/server/channel/channel_ui_controller.rb +0 -2
- data/lib/clacky/server/web_ui_controller.rb +15 -13
- data/lib/clacky/tools/shell.rb +71 -169
- data/lib/clacky/ui2/ui_controller.rb +63 -93
- data/lib/clacky/ui_interface.rb +0 -1
- data/lib/clacky/utils/arguments_parser.rb +0 -2
- data/lib/clacky/utils/limit_stack.rb +81 -13
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.js +6 -3
- data/lib/clacky/web/sessions.js +195 -58
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a4dd6332b6e7425bea0dd817603ad5af83e4d23b5742b79f5ca97f2d0fc18a0c
|
|
4
|
+
data.tar.gz: 640788854a81c8760999e866dce8364c6e2547603098f24582f6b6b51837797b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 96423895e7df89b17c5eb7196aca0f829e1f5544c14def222d888faddb35a39c78f3df1bdea40d3ce884c9d3edf7b3a4ac8f8447e5ab7394e569ed9d9ffd8038
|
|
7
|
+
data.tar.gz: 2f85e85244ddfa8720a9c2cf6fc053d68671e051ce0d2ccafbbb01581b3fc460e1c7cd10c4a05dc93a88614c3f6505e61aaa4510f821a283a39974069f2694a6
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.9.32] - 2026-04-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **Skill auto-evolution system**: after completing a complex task (12+ iterations) without an existing skill, the agent automatically analyzes whether the workflow is worth capturing as a reusable skill and creates one via `skill-creator` if it meets the criteria
|
|
14
|
+
- **Skill reflection**: after executing a skill via slash command, the agent reflects on whether the skill's instructions could be improved and updates it automatically if concrete improvements are found
|
|
15
|
+
|
|
16
|
+
### Improved
|
|
17
|
+
- **Shell tool output management**: shell tool now uses `LimitStack` for output buffering — per-line character limits, total character budgets, and rolling-window line caps are all enforced in a single, well-tested utility, replacing ad-hoc truncation logic
|
|
18
|
+
- **Progress display**: cleaned up progress/spinner lifecycle — all UIs (Web, CLI, UI2, channel) now use a unified `show_progress done` pattern instead of a separate `clear_progress` call, eliminating stale spinners
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- **Thinking state bleed across sessions**: in the Web UI, a "thinking" indicator from a previous session no longer bleeds into a freshly opened session
|
|
22
|
+
- **Token truncation reliability**: improved agent token-limit handling — context trimming now uses a cleaner single-pass approach and removes the dependency on per-message character counting in `MessageHistory`
|
|
23
|
+
- **Skill auto-creation crash**: fixed `nil.to_a` error in `SkillAutoCreator` when conversation history contained messages without tool calls
|
|
24
|
+
|
|
25
|
+
### More
|
|
26
|
+
- Updated platform HTTP client fallback host for improved connectivity reliability
|
|
27
|
+
|
|
10
28
|
## [0.9.31] - 2026-04-18
|
|
11
29
|
|
|
12
30
|
### Added
|
|
@@ -68,7 +68,7 @@ module Clacky
|
|
|
68
68
|
handle_probe_success if @config.probing?
|
|
69
69
|
|
|
70
70
|
rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError, Errno::ECONNREFUSED, Errno::ETIMEDOUT => e
|
|
71
|
-
@ui&.
|
|
71
|
+
@ui&.show_progress(phase: "done")
|
|
72
72
|
retries += 1
|
|
73
73
|
|
|
74
74
|
# Probing failure: primary still down — renew cooling-off and retry with fallback.
|
|
@@ -90,13 +90,13 @@ module Clacky
|
|
|
90
90
|
sleep retry_delay
|
|
91
91
|
retry
|
|
92
92
|
else
|
|
93
|
-
@ui&.show_progress(
|
|
93
|
+
@ui&.show_progress(phase: "done")
|
|
94
94
|
@ui&.show_error("Network failed after #{max_retries} retries: #{e.message}")
|
|
95
95
|
raise AgentError, "Network connection failed after #{max_retries} retries: #{e.message}"
|
|
96
96
|
end
|
|
97
97
|
|
|
98
98
|
rescue RetryableError => e
|
|
99
|
-
@ui&.
|
|
99
|
+
@ui&.show_progress(phase: "done")
|
|
100
100
|
retries += 1
|
|
101
101
|
|
|
102
102
|
# Probing failure: primary still down — renew cooling-off and retry with fallback.
|
|
@@ -127,13 +127,13 @@ module Clacky
|
|
|
127
127
|
sleep retry_delay
|
|
128
128
|
retry
|
|
129
129
|
else
|
|
130
|
-
@ui&.show_progress(
|
|
130
|
+
@ui&.show_progress(phase: "done")
|
|
131
131
|
@ui&.show_error("LLM service unavailable after #{current_max} retries. Please try again later.")
|
|
132
132
|
raise AgentError, "LLM service unavailable after #{current_max} retries"
|
|
133
133
|
end
|
|
134
134
|
|
|
135
135
|
ensure
|
|
136
|
-
@ui&.
|
|
136
|
+
@ui&.show_progress(phase: "done")
|
|
137
137
|
end
|
|
138
138
|
|
|
139
139
|
# Track cost and collect token usage data.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Clacky
|
|
4
|
+
class Agent
|
|
5
|
+
# Scenario 1: Auto-create new skills from complex task patterns.
|
|
6
|
+
#
|
|
7
|
+
# After completing a complex task (high iteration count, no existing skill used),
|
|
8
|
+
# inject a system prompt asking the LLM to analyze if the workflow is reusable
|
|
9
|
+
# and worth capturing as a new skill.
|
|
10
|
+
#
|
|
11
|
+
# If the LLM determines it's valuable, it can invoke skill-creator in "quick mode"
|
|
12
|
+
# to generate a new skill automatically.
|
|
13
|
+
module SkillAutoCreator
|
|
14
|
+
# Default minimum iterations to consider auto-creating a skill
|
|
15
|
+
DEFAULT_AUTO_CREATE_THRESHOLD = 12
|
|
16
|
+
|
|
17
|
+
# Check if we should prompt the LLM to consider creating a new skill
|
|
18
|
+
# Called from SkillEvolution#run_skill_evolution_hooks
|
|
19
|
+
def maybe_create_skill_from_task
|
|
20
|
+
return unless should_auto_create_skill?
|
|
21
|
+
|
|
22
|
+
inject_skill_creation_prompt
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Determine if this task is a candidate for skill auto-creation
|
|
26
|
+
# @return [Boolean]
|
|
27
|
+
private def should_auto_create_skill?
|
|
28
|
+
threshold = skill_evolution_config[:auto_create_threshold] || DEFAULT_AUTO_CREATE_THRESHOLD
|
|
29
|
+
|
|
30
|
+
# Conditions (ALL must be true):
|
|
31
|
+
# 1. Task was complex enough (high iteration count)
|
|
32
|
+
# 2. No skill was explicitly invoked (not a skill refinement session)
|
|
33
|
+
# 3. Task succeeded (not an error state)
|
|
34
|
+
|
|
35
|
+
@iterations >= threshold &&
|
|
36
|
+
!@skill_execution_context &&
|
|
37
|
+
!skill_invoked_in_history?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Check if any skill was invoked during this task
|
|
41
|
+
# Looks for invoke_skill tool calls in the conversation history
|
|
42
|
+
# @return [Boolean]
|
|
43
|
+
private def skill_invoked_in_history?
|
|
44
|
+
@history.to_a.any? { |msg|
|
|
45
|
+
msg[:role] == "assistant" &&
|
|
46
|
+
msg[:tool_calls]&.any? { |tc| tc[:name] == "invoke_skill" }
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Inject skill creation prompt as a system message
|
|
51
|
+
# The LLM will analyze and decide whether to create a new skill
|
|
52
|
+
private def inject_skill_creation_prompt
|
|
53
|
+
@history.append({
|
|
54
|
+
role: "user",
|
|
55
|
+
content: build_skill_creation_prompt,
|
|
56
|
+
system_injected: true,
|
|
57
|
+
skill_auto_create: true
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
@ui&.show_info("Analyzing task for skill creation opportunity...")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Build the skill auto-creation prompt content
|
|
64
|
+
# @return [String]
|
|
65
|
+
private def build_skill_creation_prompt
|
|
66
|
+
<<~PROMPT
|
|
67
|
+
═══════════════════════════════════════════════════════════════
|
|
68
|
+
SKILL AUTO-CREATION MODE
|
|
69
|
+
═══════════════════════════════════════════════════════════════
|
|
70
|
+
You just completed a complex task (#{@iterations} iterations) without using any existing skill.
|
|
71
|
+
|
|
72
|
+
## Analysis
|
|
73
|
+
|
|
74
|
+
Review the conversation history and determine:
|
|
75
|
+
- Is this workflow likely to be reused in similar future tasks?
|
|
76
|
+
- Does it have a clear input → process → output pattern?
|
|
77
|
+
- Would it save significant time if automated as a skill?
|
|
78
|
+
|
|
79
|
+
## Decision Criteria (ALL must be true)
|
|
80
|
+
|
|
81
|
+
1. **Reusable**: The workflow could apply to similar tasks in the future
|
|
82
|
+
(not a one-off, project-specific task)
|
|
83
|
+
2. **Well-defined**: Clear steps with consistent logic, not just exploratory conversation
|
|
84
|
+
3. **Valuable**: Would save more than 5 minutes of work if reused
|
|
85
|
+
4. **Generalizable**: Can be parameterized for different inputs/contexts
|
|
86
|
+
|
|
87
|
+
## Action
|
|
88
|
+
|
|
89
|
+
If **ALL** criteria are met:
|
|
90
|
+
→ Call invoke_skill with:
|
|
91
|
+
- skill_name: "skill-creator"
|
|
92
|
+
- task: A clear description of what to automate and how (be specific)
|
|
93
|
+
- mode: "quick" (enables fast auto-creation without user interviews)
|
|
94
|
+
- suggested_name: A descriptive identifier (lowercase, hyphens OK)
|
|
95
|
+
|
|
96
|
+
Example invocation:
|
|
97
|
+
```
|
|
98
|
+
invoke_skill(
|
|
99
|
+
skill_name: "skill-creator",
|
|
100
|
+
task: "Create a skill to extract and summarize content from URLs. The skill should: 1) fetch the URL content, 2) parse the main text, 3) generate a concise summary. Expected input: URL. Expected output: markdown summary.",
|
|
101
|
+
mode: "quick",
|
|
102
|
+
suggested_name: "url-summarizer"
|
|
103
|
+
)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
If **NOT all** criteria are met:
|
|
107
|
+
→ Respond briefly: "This task doesn't warrant a new skill." (no tool calls)
|
|
108
|
+
|
|
109
|
+
## Constraints
|
|
110
|
+
|
|
111
|
+
- Be selective: Don't create skills for one-off tasks or project-specific workflows
|
|
112
|
+
- Be specific: When creating a skill, clearly describe the workflow steps
|
|
113
|
+
- Keep it simple: Focus on the core happy path, edge cases can be added later
|
|
114
|
+
- Prefer generalization: The skill should work across different contexts
|
|
115
|
+
PROMPT
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Clacky
|
|
4
|
+
class Agent
|
|
5
|
+
# Unified entry point for skill self-evolution system.
|
|
6
|
+
# Coordinates two scenarios:
|
|
7
|
+
# 1. Auto-create new skills from complex task patterns
|
|
8
|
+
# 2. Reflect on executed skills and suggest improvements
|
|
9
|
+
#
|
|
10
|
+
# Triggered at the end of Agent#run (post-run hooks), only for main agents.
|
|
11
|
+
module SkillEvolution
|
|
12
|
+
# Main entry point - runs all skill evolution checks
|
|
13
|
+
# Called from Agent#run after the main loop completes
|
|
14
|
+
def run_skill_evolution_hooks
|
|
15
|
+
return unless skill_evolution_enabled?
|
|
16
|
+
return if @is_subagent
|
|
17
|
+
|
|
18
|
+
# Scenario 2: Reflect on executed skill (if one just ran)
|
|
19
|
+
maybe_reflect_on_skill if @skill_execution_context
|
|
20
|
+
|
|
21
|
+
# Scenario 1: Auto-create new skill from complex task
|
|
22
|
+
maybe_create_skill_from_task
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Check if skill evolution is enabled in config
|
|
26
|
+
# @return [Boolean]
|
|
27
|
+
private def skill_evolution_enabled?
|
|
28
|
+
# Default to true if not explicitly disabled
|
|
29
|
+
return true unless @config.respond_to?(:skill_evolution)
|
|
30
|
+
|
|
31
|
+
config = @config.skill_evolution
|
|
32
|
+
return true if config.nil?
|
|
33
|
+
|
|
34
|
+
config[:enabled] != false
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Get skill evolution configuration hash
|
|
38
|
+
# @return [Hash]
|
|
39
|
+
private def skill_evolution_config
|
|
40
|
+
return {} unless @config.respond_to?(:skill_evolution)
|
|
41
|
+
|
|
42
|
+
@config.skill_evolution || {}
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -200,6 +200,14 @@ module Clacky
|
|
|
200
200
|
# @param task_id [Integer] Current task ID (for message tagging)
|
|
201
201
|
# @return [void]
|
|
202
202
|
def inject_skill_as_assistant_message(skill, arguments, task_id, slash_command: false)
|
|
203
|
+
# Track skill execution context for self-evolution system
|
|
204
|
+
@skill_execution_context = {
|
|
205
|
+
skill_name: skill.identifier,
|
|
206
|
+
start_iteration: @iterations,
|
|
207
|
+
arguments: arguments,
|
|
208
|
+
slash_command: slash_command
|
|
209
|
+
}
|
|
210
|
+
|
|
203
211
|
# For encrypted brand skills with supporting scripts: decrypt to a tmpdir so the
|
|
204
212
|
# LLM receives the real paths it can execute. The tmpdir is registered on the agent
|
|
205
213
|
# and shredded when agent.run completes (see Agent#shred_script_tmpdirs).
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Clacky
|
|
4
|
+
class Agent
|
|
5
|
+
# Scenario 2: Reflect on skill execution and suggest improvements.
|
|
6
|
+
#
|
|
7
|
+
# After a skill completes, inject a system prompt asking the LLM to analyze:
|
|
8
|
+
# - Were instructions clear enough?
|
|
9
|
+
# - Any missing edge cases?
|
|
10
|
+
# - Any improvements needed?
|
|
11
|
+
#
|
|
12
|
+
# If the LLM identifies concrete improvements, it can invoke skill-creator
|
|
13
|
+
# to update the skill.
|
|
14
|
+
module SkillReflector
|
|
15
|
+
# Minimum iterations for a skill execution to warrant reflection.
|
|
16
|
+
# Raised to 5 to filter out lightweight skill invocations (e.g. platform
|
|
17
|
+
# management skills like cron-task-creator that the user triggered incidentally).
|
|
18
|
+
MIN_SKILL_ITERATIONS = 5
|
|
19
|
+
|
|
20
|
+
# Check if we should reflect on the skill that just executed
|
|
21
|
+
# Called from SkillEvolution#run_skill_evolution_hooks
|
|
22
|
+
def maybe_reflect_on_skill
|
|
23
|
+
return unless @skill_execution_context
|
|
24
|
+
|
|
25
|
+
# Only reflect on skills that the user explicitly invoked via slash command.
|
|
26
|
+
# Skills triggered by the LLM itself (e.g. as part of a broader task) or
|
|
27
|
+
# platform-management skills invoked incidentally should not be reflected on.
|
|
28
|
+
return unless @skill_execution_context[:slash_command]
|
|
29
|
+
|
|
30
|
+
skill_name = @skill_execution_context[:skill_name]
|
|
31
|
+
start_iteration = @skill_execution_context[:start_iteration]
|
|
32
|
+
iterations = @iterations - start_iteration
|
|
33
|
+
|
|
34
|
+
# Only reflect if the skill actually ran for a meaningful number of iterations
|
|
35
|
+
return if iterations < MIN_SKILL_ITERATIONS
|
|
36
|
+
|
|
37
|
+
inject_skill_reflection_prompt(skill_name, iterations)
|
|
38
|
+
|
|
39
|
+
# Clear the context so we don't reflect again
|
|
40
|
+
@skill_execution_context = nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Inject reflection prompt into history as a system message
|
|
44
|
+
# The LLM will respond in the next user interaction (non-blocking)
|
|
45
|
+
#
|
|
46
|
+
# @param skill_name [String] Identifier of the skill that was executed
|
|
47
|
+
# @param iterations [Integer] Number of iterations the skill ran for
|
|
48
|
+
private def inject_skill_reflection_prompt(skill_name, iterations)
|
|
49
|
+
@history.append({
|
|
50
|
+
role: "user",
|
|
51
|
+
content: build_skill_reflection_prompt(skill_name, iterations),
|
|
52
|
+
system_injected: true,
|
|
53
|
+
skill_reflection: true
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
@ui&.show_info("Reflecting on skill execution: #{skill_name}")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Build the reflection prompt content
|
|
60
|
+
# @param skill_name [String]
|
|
61
|
+
# @param iterations [Integer]
|
|
62
|
+
# @return [String]
|
|
63
|
+
private def build_skill_reflection_prompt(skill_name, iterations)
|
|
64
|
+
<<~PROMPT
|
|
65
|
+
═══════════════════════════════════════════════════════════════
|
|
66
|
+
SKILL REFLECTION MODE
|
|
67
|
+
═══════════════════════════════════════════════════════════════
|
|
68
|
+
You just executed the skill "#{skill_name}" over #{iterations} iterations.
|
|
69
|
+
|
|
70
|
+
## Quick Analysis
|
|
71
|
+
|
|
72
|
+
Reflect on whether the skill could be improved:
|
|
73
|
+
- Were the instructions clear enough?
|
|
74
|
+
- Did you encounter any edge cases not covered?
|
|
75
|
+
- Were there any steps that could be streamlined?
|
|
76
|
+
- Is there missing context that would make it easier next time?
|
|
77
|
+
- Did the skill produce the expected results?
|
|
78
|
+
|
|
79
|
+
## Decision
|
|
80
|
+
|
|
81
|
+
If you identified **concrete, actionable improvements**:
|
|
82
|
+
→ Call invoke_skill("skill-creator", task: "Improve skill #{skill_name}: [describe specific improvements needed]")
|
|
83
|
+
|
|
84
|
+
If the skill worked well as-is:
|
|
85
|
+
→ Respond briefly: "Skill #{skill_name} worked well, no improvements needed."
|
|
86
|
+
|
|
87
|
+
## Constraints
|
|
88
|
+
|
|
89
|
+
- DO NOT spend more than 30 seconds on this reflection
|
|
90
|
+
- Be specific and actionable in your improvement suggestions
|
|
91
|
+
- Only suggest improvements that would make a meaningful difference
|
|
92
|
+
- If you're unsure, err on the side of "no improvements needed"
|
|
93
|
+
PROMPT
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
data/lib/clacky/agent.rb
CHANGED
|
@@ -20,6 +20,9 @@ require_relative "agent/system_prompt_builder"
|
|
|
20
20
|
require_relative "agent/llm_caller"
|
|
21
21
|
require_relative "agent/time_machine"
|
|
22
22
|
require_relative "agent/memory_updater"
|
|
23
|
+
require_relative "agent/skill_evolution"
|
|
24
|
+
require_relative "agent/skill_reflector"
|
|
25
|
+
require_relative "agent/skill_auto_creator"
|
|
23
26
|
|
|
24
27
|
module Clacky
|
|
25
28
|
class Agent
|
|
@@ -33,6 +36,9 @@ module Clacky
|
|
|
33
36
|
include LlmCaller
|
|
34
37
|
include TimeMachine
|
|
35
38
|
include MemoryUpdater
|
|
39
|
+
include SkillEvolution
|
|
40
|
+
include SkillReflector
|
|
41
|
+
include SkillAutoCreator
|
|
36
42
|
|
|
37
43
|
attr_reader :session_id, :name, :history, :iterations, :total_cost, :working_dir, :created_at, :total_tasks, :todos,
|
|
38
44
|
:cache_stats, :cost_source, :ui, :skill_loader, :agent_profile,
|
|
@@ -174,6 +180,7 @@ module Clacky
|
|
|
174
180
|
task_id = start_new_task
|
|
175
181
|
|
|
176
182
|
@start_time = Time.now
|
|
183
|
+
@task_truncation_count = 0 # Reset truncation counter for each task
|
|
177
184
|
@task_cost_source = :estimated # Reset for new task
|
|
178
185
|
# Note: Do NOT reset @previous_total_tokens here - it should maintain the value from the last iteration
|
|
179
186
|
# across tasks to correctly calculate delta tokens in each iteration
|
|
@@ -408,6 +415,11 @@ module Clacky
|
|
|
408
415
|
save_modified_files_snapshot(@modified_files_in_task)
|
|
409
416
|
@modified_files_in_task = [] # Reset for next task
|
|
410
417
|
end
|
|
418
|
+
|
|
419
|
+
# Run skill evolution hooks after main loop completes
|
|
420
|
+
# Only for main agent (not subagents) to avoid recursive evolution
|
|
421
|
+
run_skill_evolution_hooks unless @is_subagent
|
|
422
|
+
|
|
411
423
|
if @is_subagent
|
|
412
424
|
# Parent agent (skill_manager) prints the completion summary; skip here.
|
|
413
425
|
else
|
|
@@ -499,9 +511,9 @@ module Clacky
|
|
|
499
511
|
# Handle truncated responses (when max_tokens limit is reached)
|
|
500
512
|
if response[:finish_reason] == "length"
|
|
501
513
|
# Count recent truncations to prevent infinite loops
|
|
502
|
-
|
|
514
|
+
@task_truncation_count = (@task_truncation_count || 0) + 1
|
|
503
515
|
|
|
504
|
-
if
|
|
516
|
+
if @task_truncation_count >= 3
|
|
505
517
|
# Too many truncations - task is too complex
|
|
506
518
|
@ui&.show_error("Response truncated multiple times. Task is too complex.")
|
|
507
519
|
|
|
@@ -525,18 +537,30 @@ module Clacky
|
|
|
525
537
|
return error_response
|
|
526
538
|
end
|
|
527
539
|
|
|
540
|
+
# Preserve the truncated assistant message (text only, drop incomplete tool_calls)
|
|
541
|
+
# so the LLM sees what it attempted before. This also maintains the required
|
|
542
|
+
# user/assistant alternation for Bedrock Converse API.
|
|
543
|
+
truncated_text = response[:content] || ""
|
|
544
|
+
truncated_text = "..." if truncated_text.strip.empty?
|
|
545
|
+
@history.append({
|
|
546
|
+
role: "assistant",
|
|
547
|
+
content: truncated_text,
|
|
548
|
+
task_id: @current_task_id
|
|
549
|
+
})
|
|
550
|
+
|
|
528
551
|
# Insert system message to guide LLM to retry with smaller steps
|
|
529
552
|
@history.append({
|
|
530
553
|
role: "user",
|
|
531
|
-
content: "[SYSTEM] Your response was truncated
|
|
532
|
-
"
|
|
533
|
-
"-
|
|
534
|
-
"-
|
|
554
|
+
content: "[SYSTEM] Your previous response was truncated because it exceeded the output token limit (max_tokens=#{@config.max_tokens}). " \
|
|
555
|
+
"The incomplete tool call has been discarded. Please retry with a different approach:\n" \
|
|
556
|
+
"- For long file content: create the file with a basic structure first, then use edit() to add content section by section\n" \
|
|
557
|
+
"- Break down large tasks into multiple smaller tool calls\n" \
|
|
558
|
+
"- Keep each tool call argument under 2000 characters\n" \
|
|
535
559
|
"- Use multiple tool calls instead of one large call",
|
|
536
560
|
truncated: true
|
|
537
561
|
})
|
|
538
562
|
|
|
539
|
-
@ui&.show_warning("Response truncated. Retrying with smaller steps...")
|
|
563
|
+
@ui&.show_warning("Response truncated (#{@task_truncation_count}/3). Retrying with smaller steps...")
|
|
540
564
|
|
|
541
565
|
# Recursively retry
|
|
542
566
|
return think
|
|
@@ -684,7 +708,7 @@ module Clacky
|
|
|
684
708
|
progress_timer.kill
|
|
685
709
|
progress_timer.join
|
|
686
710
|
end
|
|
687
|
-
@ui&.
|
|
711
|
+
@ui&.show_progress(phase: "done") if progress_shown
|
|
688
712
|
end
|
|
689
713
|
|
|
690
714
|
# Track modified files for Time Machine snapshots
|
|
@@ -736,17 +760,6 @@ module Clacky
|
|
|
736
760
|
}
|
|
737
761
|
Clacky::Logger.error("tool_execution_error", tool: call[:name], error: e)
|
|
738
762
|
|
|
739
|
-
# If arguments were malformed/truncated (e.g. Bedrock streaming truncation),
|
|
740
|
-
# retract the bad assistant message from history so the next LLM call gets a
|
|
741
|
-
# fresh context rather than re-reading a cached broken tool call.
|
|
742
|
-
# Also skip adding a tool_result — without the assistant message there is no
|
|
743
|
-
# tool_call to pair with, and sending an orphan tool_result breaks the API.
|
|
744
|
-
if e.is_a?(Utils::BadArgumentsError)
|
|
745
|
-
size_before = @history.size
|
|
746
|
-
@history.pop_while { |m| m[:role] == "assistant" && m[:tool_calls]&.any? { |tc| tc[:id] == call[:id] } }
|
|
747
|
-
next if @history.size < size_before # message was retracted, skip tool_result
|
|
748
|
-
end
|
|
749
|
-
|
|
750
763
|
@hooks.trigger(:on_tool_error, call, e)
|
|
751
764
|
@ui&.show_tool_error(e)
|
|
752
765
|
# Use build_denied_result with system_injected=true so LLM knows it can retry
|
data/lib/clacky/agent_config.rb
CHANGED
|
@@ -152,7 +152,8 @@ module Clacky
|
|
|
152
152
|
|
|
153
153
|
attr_accessor :permission_mode, :max_tokens, :verbose,
|
|
154
154
|
:enable_compression, :enable_prompt_caching,
|
|
155
|
-
:models, :current_model_index
|
|
155
|
+
:models, :current_model_index,
|
|
156
|
+
:memory_update_enabled, :skill_evolution
|
|
156
157
|
|
|
157
158
|
def initialize(options = {})
|
|
158
159
|
@permission_mode = validate_permission_mode(options[:permission_mode])
|
|
@@ -165,6 +166,14 @@ module Clacky
|
|
|
165
166
|
# Models configuration
|
|
166
167
|
@models = options[:models] || []
|
|
167
168
|
@current_model_index = options[:current_model_index] || 0
|
|
169
|
+
|
|
170
|
+
# Memory and skill evolution configuration
|
|
171
|
+
@memory_update_enabled = options[:memory_update_enabled].nil? ? true : options[:memory_update_enabled]
|
|
172
|
+
@skill_evolution = options[:skill_evolution] || {
|
|
173
|
+
enabled: true,
|
|
174
|
+
auto_create_threshold: 12,
|
|
175
|
+
reflection_mode: "llm_analysis"
|
|
176
|
+
}
|
|
168
177
|
end
|
|
169
178
|
|
|
170
179
|
# Load configuration from file
|
data/lib/clacky/cli.rb
CHANGED
|
@@ -471,7 +471,7 @@ module Clacky
|
|
|
471
471
|
|
|
472
472
|
# Handle agent error/interrupt with cleanup
|
|
473
473
|
def handle_agent_exception(ui_controller, agent, session_manager, exception)
|
|
474
|
-
ui_controller.
|
|
474
|
+
ui_controller.show_progress(phase: "done")
|
|
475
475
|
ui_controller.set_idle_status
|
|
476
476
|
|
|
477
477
|
if exception.is_a?(Clacky::AgentInterrupted)
|
|
@@ -25,7 +25,7 @@ require "fileutils"
|
|
|
25
25
|
# Primary CDN-accelerated endpoint.
|
|
26
26
|
# Fallback bypasses EdgeOne and is used when the primary times out or errors.
|
|
27
27
|
PRIMARY_HOST = ENV.fetch("CLACKY_LICENSE_SERVER", "https://www.openclacky.com")
|
|
28
|
-
FALLBACK_HOST = "https://openclacky
|
|
28
|
+
FALLBACK_HOST = "https://openclacky.up.railway.app"
|
|
29
29
|
# When the env override is set we use only that host (dev/test mode).
|
|
30
30
|
API_HOSTS = ENV["CLACKY_LICENSE_SERVER"] ? [PRIMARY_HOST] : [PRIMARY_HOST, FALLBACK_HOST]
|
|
31
31
|
|
|
@@ -7,6 +7,15 @@ description: Create new skills, modify and improve existing skills, and measure
|
|
|
7
7
|
|
|
8
8
|
A skill for creating new skills and iteratively improving them.
|
|
9
9
|
|
|
10
|
+
## Usage Modes
|
|
11
|
+
|
|
12
|
+
This skill supports two modes:
|
|
13
|
+
|
|
14
|
+
### 1. Interactive Mode (default)
|
|
15
|
+
|
|
16
|
+
The full workflow with user interviews, test cases, and iteration cycles.
|
|
17
|
+
Use when creating or refining skills manually.
|
|
18
|
+
|
|
10
19
|
At a high level, the process of creating a skill goes like this:
|
|
11
20
|
|
|
12
21
|
- Decide what you want the skill to do and roughly how it should do it
|
|
@@ -22,6 +31,43 @@ Your job is to figure out where the user is in this process and jump in to help
|
|
|
22
31
|
|
|
23
32
|
Always be flexible. If the user says "skip the evals, just vibe with me", do that instead.
|
|
24
33
|
|
|
34
|
+
### 2. Quick Mode (for agent self-evolution)
|
|
35
|
+
|
|
36
|
+
**Trigger**: When invoked with `mode: "quick"` in the task arguments.
|
|
37
|
+
|
|
38
|
+
Fast, opinionated skill creation without user interaction. This mode is used by the agent's self-evolution system to automatically create or improve skills.
|
|
39
|
+
|
|
40
|
+
**Behavior**:
|
|
41
|
+
- Skip user interviews and detailed requirements gathering
|
|
42
|
+
- Extract workflow pattern from provided context
|
|
43
|
+
- Write a minimal but functional SKILL.md
|
|
44
|
+
- Save to `~/.clacky/skills/auto-<name>-<timestamp>/` (or improve existing skill in place)
|
|
45
|
+
- Skip test cases and evals (user can refine later if needed)
|
|
46
|
+
- Always validate frontmatter with the validator script after creation
|
|
47
|
+
- Focus on the happy path; edge cases can be added later
|
|
48
|
+
|
|
49
|
+
**Expected arguments when using quick mode**:
|
|
50
|
+
- `task`: Clear description of what to automate and how (be specific about workflow steps)
|
|
51
|
+
- `mode`: Must be set to `"quick"`
|
|
52
|
+
- `suggested_name`: (optional) Proposed skill identifier (lowercase, hyphens OK)
|
|
53
|
+
|
|
54
|
+
**Quick mode principles**:
|
|
55
|
+
- **Be opinionated**: Make reasonable assumptions without asking
|
|
56
|
+
- **Be concise**: Keep instructions simple and focused
|
|
57
|
+
- **Be practical**: Focus on the core workflow that will save the most time
|
|
58
|
+
- **Be correct**: Always set `disable-model-invocation: false` and `user-invocable: true`
|
|
59
|
+
- **Be validating**: Run the frontmatter validator immediately after creation
|
|
60
|
+
|
|
61
|
+
**Example invocation from the agent's self-evolution system**:
|
|
62
|
+
```
|
|
63
|
+
invoke_skill(
|
|
64
|
+
skill_name: "skill-creator",
|
|
65
|
+
task: "Create a skill to extract and summarize content from URLs. The skill should: 1) fetch the URL using safe_shell with curl, 2) parse the HTML to extract main text content, 3) generate a concise markdown summary. Expected input: URL string. Expected output: markdown summary with title and key points.",
|
|
66
|
+
mode: "quick",
|
|
67
|
+
suggested_name: "url-summarizer"
|
|
68
|
+
)
|
|
69
|
+
```
|
|
70
|
+
|
|
25
71
|
---
|
|
26
72
|
|
|
27
73
|
## Platform Context: Clacky
|
|
@@ -132,10 +132,6 @@ module Clacky
|
|
|
132
132
|
@progress_start_time = nil if phase == "done"
|
|
133
133
|
end
|
|
134
134
|
|
|
135
|
-
def clear_progress
|
|
136
|
-
show_progress(progress_type: "thinking", phase: "done")
|
|
137
|
-
end
|
|
138
|
-
|
|
139
135
|
# === State updates ===
|
|
140
136
|
|
|
141
137
|
def update_sessionbar(tasks: nil, cost: nil, status: nil)
|
|
@@ -62,12 +62,6 @@ module Clacky
|
|
|
62
62
|
@messages.pop
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
-
# Remove messages from the end while the block is truthy.
|
|
66
|
-
def pop_while(&block)
|
|
67
|
-
@messages.pop while !@messages.empty? && block.call(@messages.last)
|
|
68
|
-
self
|
|
69
|
-
end
|
|
70
|
-
|
|
71
65
|
# Remove all messages matching the block in-place
|
|
72
66
|
# (e.g. cleanup_memory_messages uses reject! { m[:memory_update] }).
|
|
73
67
|
def delete_where(&block)
|
|
@@ -152,12 +146,6 @@ module Clacky
|
|
|
152
146
|
@messages.select { |m| !m[:task_id] || m[:task_id] <= task_id }
|
|
153
147
|
end
|
|
154
148
|
|
|
155
|
-
# Count how many of the last N messages have :truncated set.
|
|
156
|
-
# Used by think() to guard against infinite truncation retry loops.
|
|
157
|
-
def recent_truncation_count(n)
|
|
158
|
-
@messages.last(n).count { |m| m[:truncated] }
|
|
159
|
-
end
|
|
160
|
-
|
|
161
149
|
# ─────────────────────────────────────────────
|
|
162
150
|
# Size helpers
|
|
163
151
|
# ─────────────────────────────────────────────
|
|
@@ -126,7 +126,6 @@ module Clacky
|
|
|
126
126
|
# === Progress (no-ops — no spinner in plain mode) ===
|
|
127
127
|
|
|
128
128
|
def show_progress(message = nil, prefix_newline: true, progress_type: "thinking", phase: "active", metadata: {}); end
|
|
129
|
-
def clear_progress; end
|
|
130
129
|
|
|
131
130
|
# === State updates (no-ops) ===
|
|
132
131
|
|
|
@@ -9,8 +9,6 @@ module Clacky
|
|
|
9
9
|
# OpenClacky platform API (www.openclacky.com and its fallback domain).
|
|
10
10
|
#
|
|
11
11
|
# Features:
|
|
12
|
-
# - Primary domain: https://www.openclacky.com (EdgeOne CDN-accelerated)
|
|
13
|
-
# - Fallback domain: https://openclacky-platform.clackyai.app (direct, no CDN)
|
|
14
12
|
# - Automatic retry with exponential back-off on transient failures
|
|
15
13
|
# - Transparent domain failover: if the primary domain times out or returns a
|
|
16
14
|
# 5xx error, the request is automatically retried against the fallback domain
|
|
@@ -23,9 +21,9 @@ module Clacky
|
|
|
23
21
|
# # or { success: false, error: "...", data: {} }
|
|
24
22
|
class PlatformHttpClient
|
|
25
23
|
# Primary CDN-accelerated endpoint
|
|
26
|
-
PRIMARY_HOST
|
|
24
|
+
PRIMARY_HOST = "https://www.openclacky.com"
|
|
27
25
|
# Direct fallback — bypasses EdgeOne, used when the primary times out
|
|
28
|
-
FALLBACK_HOST
|
|
26
|
+
FALLBACK_HOST = "https://openclacky.up.railway.app"
|
|
29
27
|
|
|
30
28
|
# Number of attempts per domain (1 = no retry within the same domain)
|
|
31
29
|
ATTEMPTS_PER_HOST = 2
|