kward 0.68.0 → 0.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/pages.yml +48 -0
- data/.yardopts +1 -0
- data/CHANGELOG.md +34 -0
- data/Gemfile.lock +8 -2
- data/README.md +32 -25
- data/Rakefile +14 -1
- data/doc/authentication.md +74 -56
- data/doc/code-search.md +55 -28
- data/doc/configuration.md +18 -0
- data/doc/extensibility.md +89 -128
- data/doc/getting-started.md +52 -54
- data/doc/memory.md +51 -118
- data/doc/personas.md +417 -0
- data/doc/plugins.md +55 -97
- data/doc/releasing.md +3 -1
- data/doc/rpc.md +1 -1
- data/doc/usage.md +125 -144
- data/doc/web-search.md +80 -14
- data/exe/kward +2 -0
- data/lib/kward/agent.rb +1 -1
- data/lib/kward/cli/commands.rb +10 -3
- data/lib/kward/cli/compaction.rb +3 -3
- data/lib/kward/cli/interactive_turn.rb +3 -1
- data/lib/kward/cli/memory_commands.rb +16 -16
- data/lib/kward/cli/plugins.rb +3 -3
- data/lib/kward/cli/prompt_interface.rb +15 -13
- data/lib/kward/cli/rendering.rb +35 -46
- data/lib/kward/cli/runtime_helpers.rb +13 -2
- data/lib/kward/cli/sessions.rb +21 -21
- data/lib/kward/cli/settings.rb +49 -43
- data/lib/kward/cli/slash_commands.rb +6 -4
- data/lib/kward/cli/stats.rb +2 -2
- data/lib/kward/cli/sysprompt.rb +57 -0
- data/lib/kward/cli/tool_summaries.rb +5 -1
- data/lib/kward/cli.rb +14 -2
- data/lib/kward/cli_transcript_formatter.rb +36 -5
- data/lib/kward/compactor.rb +2 -2
- data/lib/kward/config_files.rb +45 -10
- data/lib/kward/conversation.rb +41 -9
- data/lib/kward/memory/manager.rb +131 -14
- data/lib/kward/message_access.rb +6 -0
- data/lib/kward/model/context_usage.rb +11 -10
- data/lib/kward/model/model_info.rb +18 -1
- data/lib/kward/model/payloads.rb +89 -10
- data/lib/kward/model/stream_parser.rb +258 -25
- data/lib/kward/prompt_interface/question_prompt.rb +1 -1
- data/lib/kward/prompt_interface/transcript_renderer.rb +20 -11
- data/lib/kward/prompts.rb +61 -7
- data/lib/kward/rpc/server.rb +7 -2
- data/lib/kward/rpc/session_manager.rb +18 -2
- data/lib/kward/rpc/session_metrics.rb +2 -2
- data/lib/kward/rpc/transcript_normalizer.rb +47 -0
- data/lib/kward/session_store.rb +40 -1
- data/lib/kward/starter_pack_installer.rb +2 -2
- data/lib/kward/tools/fetch_content.rb +41 -0
- data/lib/kward/tools/fetch_raw.rb +40 -0
- data/lib/kward/tools/registry.rb +9 -2
- data/lib/kward/tools/search/web.rb +3 -3
- data/lib/kward/tools/search/web_fetch.rb +202 -0
- data/lib/kward/tools/tool_call.rb +2 -0
- data/lib/kward/version.rb +1 -1
- data/templates/default/fulldoc/html/css/kward.css +1501 -0
- data/templates/default/fulldoc/html/images/kward_logo.png +0 -0
- data/templates/default/fulldoc/html/js/kward.js +296 -0
- data/templates/default/fulldoc/html/setup.rb +8 -0
- data/templates/default/layout/html/breadcrumb.erb +11 -0
- data/templates/default/layout/html/layout.erb +141 -0
- data/templates/default/layout/html/setup.rb +139 -0
- metadata +14 -1
data/lib/kward/session_store.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "fileutils"
|
|
2
|
+
require "digest"
|
|
2
3
|
require "json"
|
|
3
4
|
require "securerandom"
|
|
4
5
|
require "time"
|
|
@@ -28,7 +29,7 @@ module Kward
|
|
|
28
29
|
VERSION = 2
|
|
29
30
|
LAST_SESSION_FILENAME = "last_session.json"
|
|
30
31
|
|
|
31
|
-
SessionInfo = Struct.new(:id, :path, :cwd, :created_at, :modified_at, :name, :first_message, :message_count, :parent_id, :parent_path, :depth, :is_last, :ancestor_continues, keyword_init: true)
|
|
32
|
+
SessionInfo = Struct.new(:id, :path, :cwd, :created_at, :modified_at, :name, :first_message, :message_count, :provider, :model, :reasoning_effort, :parent_id, :parent_path, :depth, :is_last, :ancestor_continues, keyword_init: true)
|
|
32
33
|
|
|
33
34
|
# Live handle that attaches persistence callbacks to a conversation.
|
|
34
35
|
#
|
|
@@ -77,6 +78,8 @@ module Kward
|
|
|
77
78
|
conversation.on_compact = lambda { |message| compact(message) }
|
|
78
79
|
conversation.on_tool_execution = lambda { |tool_call, content| append_tool_execution(tool_call, content) }
|
|
79
80
|
conversation.on_runtime_update = lambda { |provider:, model:, reasoning_effort:| update_runtime(provider: provider, model: model, reasoning_effort: reasoning_effort) }
|
|
81
|
+
conversation.on_system_message_change = lambda { |system_message| append_system_prompt_snapshot(system_message, reason: "changed") }
|
|
82
|
+
append_system_prompt_snapshot(conversation.system_message, reason: "attach")
|
|
80
83
|
self
|
|
81
84
|
end
|
|
82
85
|
|
|
@@ -99,6 +102,11 @@ module Kward
|
|
|
99
102
|
@store.append_record(@path, RPC::ToolEventNormalizer.new(tool_call, content: content).execution_record)
|
|
100
103
|
end
|
|
101
104
|
|
|
105
|
+
# Persists the current system prompt as audit metadata when it changes.
|
|
106
|
+
def append_system_prompt_snapshot(system_message, reason: "changed")
|
|
107
|
+
@store.append_system_prompt_snapshot(@path, system_message, reason: reason)
|
|
108
|
+
end
|
|
109
|
+
|
|
102
110
|
# Persists the session memory snapshot used when the session is restored.
|
|
103
111
|
def update_memory_state(session_memories:, last_retrieval: nil)
|
|
104
112
|
@store.append_record(@path, {
|
|
@@ -427,12 +435,39 @@ module Kward
|
|
|
427
435
|
end
|
|
428
436
|
end
|
|
429
437
|
|
|
438
|
+
def append_system_prompt_snapshot(path, system_message, reason: "changed")
|
|
439
|
+
content = MessageAccess.content(system_message).to_s
|
|
440
|
+
return if content.empty?
|
|
441
|
+
return if latest_system_prompt_hash(records_from_file(path)) == system_prompt_hash(content)
|
|
442
|
+
|
|
443
|
+
append_record(path, {
|
|
444
|
+
type: "system_prompt",
|
|
445
|
+
timestamp: Time.now.utc.iso8601(3),
|
|
446
|
+
reason: reason.to_s,
|
|
447
|
+
hash: system_prompt_hash(content),
|
|
448
|
+
content: content
|
|
449
|
+
})
|
|
450
|
+
end
|
|
451
|
+
|
|
430
452
|
def self.safe_cwd(cwd)
|
|
431
453
|
"--#{File.expand_path(cwd).sub(%r{\A[/\\]}, "").gsub(%r{[/\\:]}, "-")}--"
|
|
432
454
|
end
|
|
433
455
|
|
|
434
456
|
private
|
|
435
457
|
|
|
458
|
+
def latest_system_prompt_hash(records)
|
|
459
|
+
records.reverse_each do |record|
|
|
460
|
+
next unless record["type"] == "system_prompt"
|
|
461
|
+
|
|
462
|
+
return record["hash"].to_s unless record["hash"].to_s.empty?
|
|
463
|
+
end
|
|
464
|
+
nil
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
def system_prompt_hash(content)
|
|
468
|
+
"sha256:#{Digest::SHA256.hexdigest(content.to_s)}"
|
|
469
|
+
end
|
|
470
|
+
|
|
436
471
|
def resolve_session_path(path)
|
|
437
472
|
expanded = path.to_s.start_with?("~/") ? File.join(Dir.home, path.to_s[2..]) : path.to_s
|
|
438
473
|
resolved = File.expand_path(expanded, @cwd)
|
|
@@ -722,6 +757,7 @@ module Kward
|
|
|
722
757
|
|
|
723
758
|
messages = restored_messages(records)
|
|
724
759
|
name = session_name(records)
|
|
760
|
+
runtime = session_runtime(records, header)
|
|
725
761
|
first_message = messages.find { |message| ["user", "compactionSummary"].include?(message_role(message)) }
|
|
726
762
|
stats = File.stat(path)
|
|
727
763
|
|
|
@@ -734,6 +770,9 @@ module Kward
|
|
|
734
770
|
name: name,
|
|
735
771
|
first_message: first_message ? message_text(first_message) : "",
|
|
736
772
|
message_count: messages.count { |message| ["user", "assistant", "tool", "toolResult", "compactionSummary"].include?(message_role(message)) },
|
|
773
|
+
provider: runtime["provider"],
|
|
774
|
+
model: runtime["model"],
|
|
775
|
+
reasoning_effort: runtime["reasoningEffort"],
|
|
737
776
|
parent_id: header["parentId"],
|
|
738
777
|
parent_path: header["parentPath"],
|
|
739
778
|
depth: 0,
|
|
@@ -11,9 +11,9 @@ require_relative "config_files"
|
|
|
11
11
|
module Kward
|
|
12
12
|
# Installs Kward's starter prompt/instruction files into the user config dir.
|
|
13
13
|
class StarterPackInstaller
|
|
14
|
-
VERSION = "v1.0.
|
|
14
|
+
VERSION = "v1.0.1"
|
|
15
15
|
ARCHIVE_URL = "https://codeload.github.com/kaiwood/kward-starter-pack/tar.gz/refs/tags/#{VERSION}".freeze
|
|
16
|
-
ALLOWED_FILES = ["
|
|
16
|
+
ALLOWED_FILES = ["PRINCIPLES.md"].freeze
|
|
17
17
|
ALLOWED_PREFIXES = ["prompts/", "skills/"].freeze
|
|
18
18
|
Result = Struct.new(:installed, :skipped, keyword_init: true)
|
|
19
19
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require_relative "base"
|
|
2
|
+
require_relative "search/web_fetch"
|
|
3
|
+
|
|
4
|
+
# Namespace for the Kward CLI agent runtime.
|
|
5
|
+
module Kward
|
|
6
|
+
# Model-callable tool wrappers and their argument schemas.
|
|
7
|
+
module Tools
|
|
8
|
+
# Fetches a specific URL and extracts readable page content.
|
|
9
|
+
class FetchContent < Base
|
|
10
|
+
# Builds the tool schema and stores the execution dependency.
|
|
11
|
+
def initialize(web_fetch:)
|
|
12
|
+
@web_fetch = web_fetch
|
|
13
|
+
super(
|
|
14
|
+
"fetch_content",
|
|
15
|
+
"Fetch a specific URL and extract readable bounded content.",
|
|
16
|
+
properties: {
|
|
17
|
+
url: {
|
|
18
|
+
type: "string",
|
|
19
|
+
description: "HTTP or HTTPS URL to fetch."
|
|
20
|
+
},
|
|
21
|
+
max_bytes: {
|
|
22
|
+
type: "integer",
|
|
23
|
+
description: "Maximum returned content bytes; default 16384, max 131072."
|
|
24
|
+
},
|
|
25
|
+
extract: {
|
|
26
|
+
type: "string",
|
|
27
|
+
enum: %w[auto text markdown],
|
|
28
|
+
description: "Extraction mode; default auto."
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
required: ["url"]
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Executes the tool and returns model-facing output text.
|
|
36
|
+
def call(args, _conversation, cancellation: nil)
|
|
37
|
+
@web_fetch.fetch_content(args)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require_relative "base"
|
|
2
|
+
require_relative "search/web_fetch"
|
|
3
|
+
|
|
4
|
+
# Namespace for the Kward CLI agent runtime.
|
|
5
|
+
module Kward
|
|
6
|
+
# Model-callable tool wrappers and their argument schemas.
|
|
7
|
+
module Tools
|
|
8
|
+
# Fetches bounded raw content from a specific URL.
|
|
9
|
+
class FetchRaw < Base
|
|
10
|
+
# Builds the tool schema and stores the execution dependency.
|
|
11
|
+
def initialize(web_fetch:)
|
|
12
|
+
@web_fetch = web_fetch
|
|
13
|
+
super(
|
|
14
|
+
"fetch_raw",
|
|
15
|
+
"Fetch bounded raw content from a specific URL.",
|
|
16
|
+
properties: {
|
|
17
|
+
url: {
|
|
18
|
+
type: "string",
|
|
19
|
+
description: "HTTP or HTTPS URL to fetch."
|
|
20
|
+
},
|
|
21
|
+
max_bytes: {
|
|
22
|
+
type: "integer",
|
|
23
|
+
description: "Maximum returned content bytes; default 16384, max 131072."
|
|
24
|
+
},
|
|
25
|
+
accept: {
|
|
26
|
+
type: "string",
|
|
27
|
+
description: "Optional HTTP Accept header."
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
required: ["url"]
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Executes the tool and returns model-facing output text.
|
|
35
|
+
def call(args, _conversation, cancellation: nil)
|
|
36
|
+
@web_fetch.fetch_raw(args)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
data/lib/kward/tools/registry.rb
CHANGED
|
@@ -2,6 +2,8 @@ require_relative "../config_files"
|
|
|
2
2
|
require_relative "ask_user_question"
|
|
3
3
|
require_relative "code_search"
|
|
4
4
|
require_relative "edit_file"
|
|
5
|
+
require_relative "fetch_content"
|
|
6
|
+
require_relative "fetch_raw"
|
|
5
7
|
require_relative "list_directory"
|
|
6
8
|
require_relative "read_file"
|
|
7
9
|
require_relative "read_skill"
|
|
@@ -10,6 +12,7 @@ require_relative "web_search"
|
|
|
10
12
|
require_relative "write_file"
|
|
11
13
|
require_relative "search/code"
|
|
12
14
|
require_relative "search/web"
|
|
15
|
+
require_relative "search/web_fetch"
|
|
13
16
|
require_relative "tool_call"
|
|
14
17
|
require_relative "../workspace"
|
|
15
18
|
|
|
@@ -45,14 +48,16 @@ module Kward
|
|
|
45
48
|
# @param prompt [Object, nil] interactive prompt bridge; must implement
|
|
46
49
|
# `ask_user_question` before that tool is advertised
|
|
47
50
|
# @param web_search [WebSearch] live web search implementation
|
|
51
|
+
# @param web_fetch [WebFetch] specific URL fetch implementation
|
|
48
52
|
# @param code_search [CodeSearch] public source/package search implementation
|
|
49
53
|
# @param web_search_enabled [Boolean, nil] override for web search exposure
|
|
50
54
|
# @param skills [Array<ConfigFiles::Skill>, nil] override discovered skills
|
|
51
55
|
# @param ask_user_question_enabled [Boolean, nil] override question exposure
|
|
52
|
-
def initialize(workspace: Workspace.new, prompt: nil, web_search: WebSearch.new, code_search: CodeSearch.new, web_search_enabled: nil, skills: nil, ask_user_question_enabled: nil)
|
|
56
|
+
def initialize(workspace: Workspace.new, prompt: nil, web_search: WebSearch.new, web_fetch: WebFetch.new, code_search: CodeSearch.new, web_search_enabled: nil, skills: nil, ask_user_question_enabled: nil)
|
|
53
57
|
@workspace = workspace
|
|
54
58
|
@prompt = prompt
|
|
55
59
|
@web_search = web_search
|
|
60
|
+
@web_fetch = web_fetch
|
|
56
61
|
@code_search = code_search
|
|
57
62
|
@skills = skills
|
|
58
63
|
@web_search_enabled = web_search_enabled
|
|
@@ -103,7 +108,7 @@ module Kward
|
|
|
103
108
|
tools = @tools.values_at(
|
|
104
109
|
"list_directory", "read_file", "write_file", "edit_file", "run_shell_command", "code_search"
|
|
105
110
|
)
|
|
106
|
-
tools
|
|
111
|
+
tools.concat(@tools.values_at("web_search", "fetch_content", "fetch_raw")) if web_search_available?
|
|
107
112
|
tools << @tools["read_skill"] if skills_available?
|
|
108
113
|
tools << @tools["ask_user_question"] if ask_user_question_available?
|
|
109
114
|
tools
|
|
@@ -112,6 +117,8 @@ module Kward
|
|
|
112
117
|
def all_tools
|
|
113
118
|
core_tools + [
|
|
114
119
|
Tools::WebSearch.new(web_search: @web_search),
|
|
120
|
+
Tools::FetchContent.new(web_fetch: @web_fetch),
|
|
121
|
+
Tools::FetchRaw.new(web_fetch: @web_fetch),
|
|
115
122
|
Tools::ReadSkill.new,
|
|
116
123
|
Tools::AskUserQuestion.new(prompt: @prompt)
|
|
117
124
|
]
|
|
@@ -64,7 +64,7 @@ module Kward
|
|
|
64
64
|
provider: provider
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
sections = ["# Web search"]
|
|
67
|
+
sections = ["# Web search", "Use fetch_content with a result URL to verify human-readable pages, or fetch_raw for specs, JSON, YAML, XML, and other machine-readable resources."]
|
|
68
68
|
failures = []
|
|
69
69
|
any_results = false
|
|
70
70
|
|
|
@@ -715,7 +715,7 @@ module Kward
|
|
|
715
715
|
|
|
716
716
|
# HTTP adapter used by web-search providers and fallbacks.
|
|
717
717
|
class NetHttpClient
|
|
718
|
-
Response = Struct.new(:code, :body, keyword_init: true)
|
|
718
|
+
Response = Struct.new(:code, :body, :headers, keyword_init: true)
|
|
719
719
|
|
|
720
720
|
def get(url, headers: {})
|
|
721
721
|
request(url, Net::HTTP::Get, headers: headers)
|
|
@@ -742,7 +742,7 @@ module Kward
|
|
|
742
742
|
headers.each { |key, value| http_request[key] = value }
|
|
743
743
|
yield http_request if block_given?
|
|
744
744
|
response = http.request(http_request)
|
|
745
|
-
Response.new(code: response.code, body: response.body)
|
|
745
|
+
Response.new(code: response.code, body: response.body, headers: response.each_header.to_h)
|
|
746
746
|
end
|
|
747
747
|
end
|
|
748
748
|
end
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
require "nokogiri"
|
|
2
|
+
require "uri"
|
|
3
|
+
require_relative "web"
|
|
4
|
+
|
|
5
|
+
# Namespace for the Kward CLI agent runtime.
|
|
6
|
+
module Kward
|
|
7
|
+
# Fetches specific web resources for agent research workflows.
|
|
8
|
+
class WebFetch
|
|
9
|
+
DEFAULT_MAX_BYTES = 16 * 1024
|
|
10
|
+
MAX_MAX_BYTES = 128 * 1024
|
|
11
|
+
MAX_REDIRECTS = 5
|
|
12
|
+
HTTP_TIMEOUT_SECONDS = 10
|
|
13
|
+
|
|
14
|
+
# Creates a fetcher for web content and raw resources.
|
|
15
|
+
def initialize(http_client: WebSearch::NetHttpClient.new)
|
|
16
|
+
@http_client = http_client
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Fetches a URL and extracts readable text for human-facing pages.
|
|
20
|
+
def fetch_content(args)
|
|
21
|
+
url = args_value(args, "url").to_s.strip
|
|
22
|
+
return "Error: url is required" if url.empty?
|
|
23
|
+
|
|
24
|
+
max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
|
|
25
|
+
extract = normalize_extract(args_value(args, "extract") || "auto")
|
|
26
|
+
return "Error: extract must be one of: auto, text, markdown" unless extract
|
|
27
|
+
|
|
28
|
+
response = fetch_url(url, max_bytes: max_bytes)
|
|
29
|
+
return response if response.is_a?(String)
|
|
30
|
+
|
|
31
|
+
body = response[:body].to_s
|
|
32
|
+
content_type = header_value(response[:headers], "content-type")
|
|
33
|
+
text = extract_readable_text(body, content_type: content_type, mode: extract)
|
|
34
|
+
text = truncate_bytes(text, max_bytes)
|
|
35
|
+
|
|
36
|
+
[
|
|
37
|
+
"# Fetched content",
|
|
38
|
+
"- URL: #{response[:url]}",
|
|
39
|
+
"- Content type: #{content_type.empty? ? "unknown" : content_type}",
|
|
40
|
+
"- Bytes returned: #{text.bytesize}",
|
|
41
|
+
"",
|
|
42
|
+
text.empty? ? "(No readable text extracted.)" : text
|
|
43
|
+
].join("\n")
|
|
44
|
+
rescue StandardError => e
|
|
45
|
+
"Error: fetch_content failed: #{e.message}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Fetches a URL and returns bounded raw response content.
|
|
49
|
+
def fetch_raw(args)
|
|
50
|
+
url = args_value(args, "url").to_s.strip
|
|
51
|
+
return "Error: url is required" if url.empty?
|
|
52
|
+
|
|
53
|
+
max_bytes = bounded_max_bytes(args_value(args, "max_bytes") || args_value(args, "maxBytes"))
|
|
54
|
+
accept = args_value(args, "accept").to_s.strip
|
|
55
|
+
response = fetch_url(url, max_bytes: max_bytes, accept: accept.empty? ? "*/*" : accept)
|
|
56
|
+
return response if response.is_a?(String)
|
|
57
|
+
|
|
58
|
+
body = truncate_bytes(response[:body].to_s, max_bytes)
|
|
59
|
+
content_type = header_value(response[:headers], "content-type")
|
|
60
|
+
[
|
|
61
|
+
"# Fetched raw content",
|
|
62
|
+
"- URL: #{response[:url]}",
|
|
63
|
+
"- Content type: #{content_type.empty? ? "unknown" : content_type}",
|
|
64
|
+
"- Bytes returned: #{body.bytesize}",
|
|
65
|
+
"",
|
|
66
|
+
body
|
|
67
|
+
].join("\n")
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
"Error: fetch_raw failed: #{e.message}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def fetch_url(url, max_bytes:, accept: "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8")
|
|
75
|
+
current_url = normalize_url(url)
|
|
76
|
+
redirects = 0
|
|
77
|
+
|
|
78
|
+
loop do
|
|
79
|
+
response = @http_client.get(current_url, headers: browser_headers(accept))
|
|
80
|
+
code = response.code.to_i
|
|
81
|
+
headers = response_headers(response)
|
|
82
|
+
|
|
83
|
+
if redirect?(code)
|
|
84
|
+
return "Error: too many redirects" if redirects >= MAX_REDIRECTS
|
|
85
|
+
|
|
86
|
+
location = header_value(headers, "location")
|
|
87
|
+
return "Error: redirect missing Location header" if location.empty?
|
|
88
|
+
|
|
89
|
+
current_url = normalize_url(URI.join(current_url, location).to_s)
|
|
90
|
+
redirects += 1
|
|
91
|
+
next
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
return "Error: fetch failed with HTTP #{response.code}" unless code.between?(200, 299)
|
|
95
|
+
|
|
96
|
+
body = response.body.to_s
|
|
97
|
+
body = truncate_bytes(body, max_bytes)
|
|
98
|
+
return { url: current_url, headers: headers, body: body }
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def normalize_url(value)
|
|
103
|
+
uri = URI.parse(value.to_s.strip)
|
|
104
|
+
raise "url must use http or https" unless %w[http https].include?(uri.scheme)
|
|
105
|
+
raise "url host is required" if uri.host.to_s.empty?
|
|
106
|
+
|
|
107
|
+
uri.to_s
|
|
108
|
+
rescue URI::InvalidURIError
|
|
109
|
+
raise "invalid url"
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def response_headers(response)
|
|
113
|
+
return {} unless response.respond_to?(:headers) && response.headers.is_a?(Hash)
|
|
114
|
+
|
|
115
|
+
response.headers.transform_keys { |key| key.to_s.downcase }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def header_value(headers, key)
|
|
119
|
+
headers[key.to_s.downcase].to_s
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def redirect?(code)
|
|
123
|
+
[301, 302, 303, 307, 308].include?(code)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def bounded_max_bytes(value)
|
|
127
|
+
number = value.to_i
|
|
128
|
+
number = DEFAULT_MAX_BYTES if number <= 0
|
|
129
|
+
[number, MAX_MAX_BYTES].min
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def normalize_extract(value)
|
|
133
|
+
normalized = value.to_s.strip.downcase
|
|
134
|
+
%w[auto text markdown].include?(normalized) ? normalized : nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def extract_readable_text(body, content_type:, mode:)
|
|
138
|
+
return clean_text(body) if mode == "text" || !html_content?(content_type, body)
|
|
139
|
+
|
|
140
|
+
document = Nokogiri::HTML(body)
|
|
141
|
+
document.css("script, style, noscript, svg, nav, footer, form").remove
|
|
142
|
+
title = document.at_css("title")&.text.to_s.strip
|
|
143
|
+
root = document.at_css("article") || document.at_css("main") || document.at_css("body") || document
|
|
144
|
+
parts = []
|
|
145
|
+
parts << "# #{clean_text(title)}" unless title.empty?
|
|
146
|
+
root.css("h1, h2, h3, h4, h5, h6, p, li, pre, code, blockquote").each do |node|
|
|
147
|
+
text = clean_text(node.text)
|
|
148
|
+
next if text.empty?
|
|
149
|
+
|
|
150
|
+
parts << format_html_node(node, text, mode: mode)
|
|
151
|
+
end
|
|
152
|
+
parts.uniq.join("\n\n")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def html_content?(content_type, body)
|
|
156
|
+
content_type.to_s.include?("html") || body.to_s.lstrip.start_with?("<!DOCTYPE html", "<html", "<HTML")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def format_html_node(node, text, mode:)
|
|
160
|
+
return text if mode == "text"
|
|
161
|
+
|
|
162
|
+
case node.name
|
|
163
|
+
when /^h([1-6])$/
|
|
164
|
+
"#{"#" * Regexp.last_match(1).to_i} #{text}"
|
|
165
|
+
when "li"
|
|
166
|
+
"- #{text}"
|
|
167
|
+
when "pre", "code"
|
|
168
|
+
"```\n#{text}\n```"
|
|
169
|
+
when "blockquote"
|
|
170
|
+
"> #{text}"
|
|
171
|
+
else
|
|
172
|
+
text
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def clean_text(text)
|
|
177
|
+
text.to_s.gsub(/\s+/, " ").strip
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def truncate_bytes(text, max_bytes)
|
|
181
|
+
return text if text.bytesize <= max_bytes
|
|
182
|
+
|
|
183
|
+
"#{text.byteslice(0, max_bytes).to_s.scrub}\n... truncated to #{max_bytes} bytes"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def browser_headers(accept)
|
|
187
|
+
{
|
|
188
|
+
"Accept" => accept,
|
|
189
|
+
"Accept-Language" => "en-US,en;q=0.9",
|
|
190
|
+
"User-Agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
|
191
|
+
}
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def args_value(args, key)
|
|
195
|
+
return nil unless args.is_a?(Hash)
|
|
196
|
+
return args[key] if args.key?(key)
|
|
197
|
+
return args[key.to_sym] if args.key?(key.to_sym)
|
|
198
|
+
|
|
199
|
+
nil
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
@@ -18,6 +18,8 @@ module Kward
|
|
|
18
18
|
"list_directory" => "list_directory",
|
|
19
19
|
"code_search" => "code_search",
|
|
20
20
|
"web_search" => "web_search",
|
|
21
|
+
"fetch_content" => "fetch_content",
|
|
22
|
+
"fetch_raw" => "fetch_raw",
|
|
21
23
|
"read_skill" => "read_skill",
|
|
22
24
|
"ask_user_question" => "ask_user_question"
|
|
23
25
|
}.freeze
|
data/lib/kward/version.rb
CHANGED