kward 0.68.0 → 0.69.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/pages.yml +48 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +40 -0
- data/Gemfile.lock +8 -2
- data/README.md +32 -25
- data/Rakefile +14 -1
- data/doc/authentication.md +74 -56
- data/doc/code-search.md +55 -28
- data/doc/configuration.md +18 -0
- data/doc/extensibility.md +89 -128
- data/doc/getting-started.md +52 -54
- data/doc/memory.md +51 -118
- data/doc/personas.md +417 -0
- data/doc/plugins.md +55 -97
- data/doc/releasing.md +3 -1
- data/doc/rpc.md +1 -1
- data/doc/usage.md +125 -144
- data/doc/web-search.md +80 -14
- data/exe/kward +2 -0
- data/lib/kward/agent.rb +1 -1
- data/lib/kward/cli/commands.rb +10 -3
- data/lib/kward/cli/compaction.rb +3 -3
- data/lib/kward/cli/interactive_turn.rb +3 -1
- data/lib/kward/cli/memory_commands.rb +16 -16
- data/lib/kward/cli/plugins.rb +3 -3
- data/lib/kward/cli/prompt_interface.rb +15 -13
- data/lib/kward/cli/rendering.rb +35 -46
- data/lib/kward/cli/runtime_helpers.rb +13 -2
- data/lib/kward/cli/sessions.rb +21 -21
- data/lib/kward/cli/settings.rb +49 -43
- data/lib/kward/cli/slash_commands.rb +6 -4
- data/lib/kward/cli/stats.rb +2 -2
- data/lib/kward/cli/sysprompt.rb +57 -0
- data/lib/kward/cli/tool_summaries.rb +5 -1
- data/lib/kward/cli.rb +14 -2
- data/lib/kward/cli_transcript_formatter.rb +36 -5
- data/lib/kward/compactor.rb +2 -2
- data/lib/kward/config_files.rb +45 -10
- data/lib/kward/conversation.rb +41 -9
- data/lib/kward/memory/manager.rb +131 -14
- data/lib/kward/message_access.rb +6 -0
- data/lib/kward/model/context_usage.rb +11 -10
- data/lib/kward/model/model_info.rb +18 -1
- data/lib/kward/model/payloads.rb +89 -10
- data/lib/kward/model/stream_parser.rb +258 -25
- data/lib/kward/prompt_interface/question_prompt.rb +1 -1
- data/lib/kward/prompt_interface/transcript_renderer.rb +20 -11
- data/lib/kward/prompts.rb +61 -7
- data/lib/kward/rpc/server.rb +7 -2
- data/lib/kward/rpc/session_manager.rb +18 -2
- data/lib/kward/rpc/session_metrics.rb +2 -2
- data/lib/kward/rpc/session_tree_rows.rb +54 -13
- data/lib/kward/rpc/transcript_normalizer.rb +47 -0
- data/lib/kward/session_store.rb +45 -2
- data/lib/kward/session_tree_renderer.rb +54 -13
- data/lib/kward/starter_pack_installer.rb +2 -2
- data/lib/kward/tools/fetch_content.rb +41 -0
- data/lib/kward/tools/fetch_raw.rb +40 -0
- data/lib/kward/tools/registry.rb +9 -2
- data/lib/kward/tools/search/web.rb +3 -3
- data/lib/kward/tools/search/web_fetch.rb +202 -0
- data/lib/kward/tools/tool_call.rb +2 -0
- data/lib/kward/version.rb +1 -1
- data/templates/default/fulldoc/html/css/kward.css +1501 -0
- data/templates/default/fulldoc/html/images/kward_logo.png +0 -0
- data/templates/default/fulldoc/html/js/kward.js +296 -0
- data/templates/default/fulldoc/html/setup.rb +8 -0
- data/templates/default/layout/html/breadcrumb.erb +11 -0
- data/templates/default/layout/html/layout.erb +141 -0
- data/templates/default/layout/html/setup.rb +139 -0
- metadata +14 -1
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
require "nokogiri"
|
|
2
|
+
require "uri"
|
|
3
|
+
require_relative "web"
|
|
4
|
+
|
|
5
|
+
# Namespace for the Kward CLI agent runtime.
|
|
6
|
+
module Kward
|
|
7
|
+
# Fetches specific web resources for agent research workflows.
|
|
8
|
+
class WebFetch
|
|
9
|
+
DEFAULT_MAX_BYTES = 16 * 1024
|
|
10
|
+
MAX_MAX_BYTES = 128 * 1024
|
|
11
|
+
MAX_REDIRECTS = 5
|
|
12
|
+
HTTP_TIMEOUT_SECONDS = 10
|
|
13
|
+
|
|
14
|
+
# Creates a fetcher for web content and raw resources.
|
|
15
|
+
def initialize(http_client: WebSearch::NetHttpClient.new)
|
|
16
|
+
@http_client = http_client
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Fetches a URL and extracts readable text for human-facing pages.
|
|
20
|
+
def fetch_content(args)
|
|
21
|
+
url = args_value(args, "url").to_s.strip
|
|
22
|
+
return "Error: url is required" if url.empty?
|
|
23
|
+
|
|
24
|
+
max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
|
|
25
|
+
extract = normalize_extract(args_value(args, "extract") || "auto")
|
|
26
|
+
return "Error: extract must be one of: auto, text, markdown" unless extract
|
|
27
|
+
|
|
28
|
+
response = fetch_url(url, max_bytes: max_bytes)
|
|
29
|
+
return response if response.is_a?(String)
|
|
30
|
+
|
|
31
|
+
body = response[:body].to_s
|
|
32
|
+
content_type = header_value(response[:headers], "content-type")
|
|
33
|
+
text = extract_readable_text(body, content_type: content_type, mode: extract)
|
|
34
|
+
text = truncate_bytes(text, max_bytes)
|
|
35
|
+
|
|
36
|
+
[
|
|
37
|
+
"# Fetched content",
|
|
38
|
+
"- URL: #{response[:url]}",
|
|
39
|
+
"- Content type: #{content_type.empty? ? "unknown" : content_type}",
|
|
40
|
+
"- Bytes returned: #{text.bytesize}",
|
|
41
|
+
"",
|
|
42
|
+
text.empty? ? "(No readable text extracted.)" : text
|
|
43
|
+
].join("\n")
|
|
44
|
+
rescue StandardError => e
|
|
45
|
+
"Error: fetch_content failed: #{e.message}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Fetches a URL and returns bounded raw response content.
|
|
49
|
+
def fetch_raw(args)
|
|
50
|
+
url = args_value(args, "url").to_s.strip
|
|
51
|
+
return "Error: url is required" if url.empty?
|
|
52
|
+
|
|
53
|
+
max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
|
|
54
|
+
accept = args_value(args, "accept").to_s.strip
|
|
55
|
+
response = fetch_url(url, max_bytes: max_bytes, accept: accept.empty? ? "*/*" : accept)
|
|
56
|
+
return response if response.is_a?(String)
|
|
57
|
+
|
|
58
|
+
body = truncate_bytes(response[:body].to_s, max_bytes)
|
|
59
|
+
content_type = header_value(response[:headers], "content-type")
|
|
60
|
+
[
|
|
61
|
+
"# Fetched raw content",
|
|
62
|
+
"- URL: #{response[:url]}",
|
|
63
|
+
"- Content type: #{content_type.empty? ? "unknown" : content_type}",
|
|
64
|
+
"- Bytes returned: #{body.bytesize}",
|
|
65
|
+
"",
|
|
66
|
+
body
|
|
67
|
+
].join("\n")
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
"Error: fetch_raw failed: #{e.message}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def fetch_url(url, max_bytes:, accept: "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8")
|
|
75
|
+
current_url = normalize_url(url)
|
|
76
|
+
redirects = 0
|
|
77
|
+
|
|
78
|
+
loop do
|
|
79
|
+
response = @http_client.get(current_url, headers: browser_headers(accept))
|
|
80
|
+
code = response.code.to_i
|
|
81
|
+
headers = response_headers(response)
|
|
82
|
+
|
|
83
|
+
if redirect?(code)
|
|
84
|
+
return "Error: too many redirects" if redirects >= MAX_REDIRECTS
|
|
85
|
+
|
|
86
|
+
location = header_value(headers, "location")
|
|
87
|
+
return "Error: redirect missing Location header" if location.empty?
|
|
88
|
+
|
|
89
|
+
current_url = normalize_url(URI.join(current_url, location).to_s)
|
|
90
|
+
redirects += 1
|
|
91
|
+
next
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
return "Error: fetch failed with HTTP #{response.code}" unless code.between?(200, 299)
|
|
95
|
+
|
|
96
|
+
body = response.body.to_s
|
|
97
|
+
body = truncate_bytes(body, max_bytes)
|
|
98
|
+
return { url: current_url, headers: headers, body: body }
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def normalize_url(value)
|
|
103
|
+
uri = URI.parse(value.to_s.strip)
|
|
104
|
+
raise "url must use http or https" unless %w[http https].include?(uri.scheme)
|
|
105
|
+
raise "url host is required" if uri.host.to_s.empty?
|
|
106
|
+
|
|
107
|
+
uri.to_s
|
|
108
|
+
rescue URI::InvalidURIError
|
|
109
|
+
raise "invalid url"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_headers(response)
|
|
113
|
+
return {} unless response.respond_to?(:headers) && response.headers.is_a?(Hash)
|
|
114
|
+
|
|
115
|
+
response.headers.transform_keys { |key| key.to_s.downcase }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def header_value(headers, key)
|
|
119
|
+
headers[key.to_s.downcase].to_s
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def redirect?(code)
|
|
123
|
+
[301, 302, 303, 307, 308].include?(code)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def bounded_max_bytes(value)
|
|
127
|
+
number = value.to_i
|
|
128
|
+
number = DEFAULT_MAX_BYTES if number <= 0
|
|
129
|
+
[number, MAX_MAX_BYTES].min
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def normalize_extract(value)
|
|
133
|
+
normalized = value.to_s.strip.downcase
|
|
134
|
+
%w[auto text markdown].include?(normalized) ? normalized : nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def extract_readable_text(body, content_type:, mode:)
|
|
138
|
+
return clean_text(body) if mode == "text" || !html_content?(content_type, body)
|
|
139
|
+
|
|
140
|
+
document = Nokogiri::HTML(body)
|
|
141
|
+
document.css("script, style, noscript, svg, nav, footer, form").remove
|
|
142
|
+
title = document.at_css("title")&.text.to_s.strip
|
|
143
|
+
root = document.at_css("article") || document.at_css("main") || document.at_css("body") || document
|
|
144
|
+
parts = []
|
|
145
|
+
parts << "# #{clean_text(title)}" unless title.empty?
|
|
146
|
+
root.css("h1, h2, h3, h4, h5, h6, p, li, pre, code, blockquote").each do |node|
|
|
147
|
+
text = clean_text(node.text)
|
|
148
|
+
next if text.empty?
|
|
149
|
+
|
|
150
|
+
parts << format_html_node(node, text, mode: mode)
|
|
151
|
+
end
|
|
152
|
+
parts.uniq.join("\n\n")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def html_content?(content_type, body)
|
|
156
|
+
content_type.to_s.include?("html") || body.to_s.lstrip.start_with?("<!DOCTYPE html", "<html", "<HTML")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def format_html_node(node, text, mode:)
|
|
160
|
+
return text if mode == "text"
|
|
161
|
+
|
|
162
|
+
case node.name
|
|
163
|
+
when /^h([1-6])$/
|
|
164
|
+
"#{"#" * Regexp.last_match(1).to_i} #{text}"
|
|
165
|
+
when "li"
|
|
166
|
+
"- #{text}"
|
|
167
|
+
when "pre", "code"
|
|
168
|
+
"```\n#{text}\n```"
|
|
169
|
+
when "blockquote"
|
|
170
|
+
"> #{text}"
|
|
171
|
+
else
|
|
172
|
+
text
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def clean_text(text)
|
|
177
|
+
text.to_s.gsub(/\s+/, " ").strip
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def truncate_bytes(text, max_bytes)
|
|
181
|
+
return text if text.bytesize <= max_bytes
|
|
182
|
+
|
|
183
|
+
"#{text.byteslice(0, max_bytes).to_s.scrub}\n... truncated to #{max_bytes} bytes"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def browser_headers(accept)
|
|
187
|
+
{
|
|
188
|
+
"Accept" => accept,
|
|
189
|
+
"Accept-Language" => "en-US,en;q=0.9",
|
|
190
|
+
"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
|
191
|
+
}
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def args_value(args, key)
|
|
195
|
+
return nil unless args.is_a?(Hash)
|
|
196
|
+
return args[key] if args.key?(key)
|
|
197
|
+
return args[key.to_sym] if args.key?(key.to_sym)
|
|
198
|
+
|
|
199
|
+
nil
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -18,6 +18,8 @@ module Kward
|
|
|
18
18
|
"list_directory" => "list_directory",
|
|
19
19
|
"code_search" => "code_search",
|
|
20
20
|
"web_search" => "web_search",
|
|
21
|
+
"fetch_content" => "fetch_content",
|
|
22
|
+
"fetch_raw" => "fetch_raw",
|
|
21
23
|
"read_skill" => "read_skill",
|
|
22
24
|
"ask_user_question" => "ask_user_question"
|
|
23
25
|
}.freeze
|
data/lib/kward/version.rb
CHANGED