asky-cli 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of asky-cli might be problematic. Click here for more details.
- asky/__init__.py +7 -0
- asky/__main__.py +6 -0
- asky/banner.py +123 -0
- asky/cli.py +506 -0
- asky/config.py +270 -0
- asky/config.toml +226 -0
- asky/html.py +62 -0
- asky/llm.py +378 -0
- asky/storage.py +157 -0
- asky/tools.py +314 -0
- asky_cli-0.1.6.dist-info/METADATA +290 -0
- asky_cli-0.1.6.dist-info/RECORD +14 -0
- asky_cli-0.1.6.dist-info/WHEEL +4 -0
- asky_cli-0.1.6.dist-info/entry_points.txt +3 -0
asky/config.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Configuration constants and static declarations for asky."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tomllib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import copy
|
|
7
|
+
from typing import Dict, Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import shutil
|
|
11
|
+
from importlib import resources
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _get_config_dir() -> Path:
|
|
15
|
+
"""Return the configuration directory path."""
|
|
16
|
+
return Path.home() / ".config" / "asky"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _hydrate_models(config: Dict[str, Any]) -> Dict[str, Any]:
|
|
20
|
+
"""Hydrate model definitions with API details."""
|
|
21
|
+
api_defs = config.get("api", {})
|
|
22
|
+
models = config.get("models", {})
|
|
23
|
+
|
|
24
|
+
for alias, model_data in models.items():
|
|
25
|
+
model_data["alias"] = alias
|
|
26
|
+
api_ref = model_data.get("api")
|
|
27
|
+
if api_ref and api_ref in api_defs:
|
|
28
|
+
api_config = api_defs[api_ref]
|
|
29
|
+
|
|
30
|
+
if "url" in api_config and "base_url" not in model_data:
|
|
31
|
+
model_data["base_url"] = api_config["url"]
|
|
32
|
+
|
|
33
|
+
if "api_key" in api_config and "api_key" not in model_data:
|
|
34
|
+
model_data["api_key"] = api_config["api_key"]
|
|
35
|
+
|
|
36
|
+
if "api_key_env" in api_config and "api_key_env" not in model_data:
|
|
37
|
+
model_data["api_key_env"] = api_config["api_key_env"]
|
|
38
|
+
|
|
39
|
+
return config
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def load_config() -> Dict[str, Any]:
|
|
43
|
+
"""Load configuration from TOML file, falling back to defaults."""
|
|
44
|
+
config_dir = _get_config_dir()
|
|
45
|
+
config_path = config_dir / "config.toml"
|
|
46
|
+
|
|
47
|
+
# Ensure config directory exists
|
|
48
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
bundled_config_path_traversable = resources.files("asky").joinpath("config.toml")
|
|
51
|
+
|
|
52
|
+
# Read default config from package
|
|
53
|
+
try:
|
|
54
|
+
with bundled_config_path_traversable.open("rb") as f:
|
|
55
|
+
default_config = tomllib.load(f)
|
|
56
|
+
except Exception as e:
|
|
57
|
+
print(f"Error loading bundled config: {e}")
|
|
58
|
+
# Build a minimal fallback if bundled config fails
|
|
59
|
+
default_config = {
|
|
60
|
+
"general": {
|
|
61
|
+
"query_summary_max_chars": 40,
|
|
62
|
+
"continue_query_threshold": 160,
|
|
63
|
+
"answer_summary_max_chars": 200,
|
|
64
|
+
"searxng_url": "http://localhost:8888",
|
|
65
|
+
"max_turns": 20,
|
|
66
|
+
"default_model": "gf",
|
|
67
|
+
"summarization_model": "lfm",
|
|
68
|
+
"request_timeout": 60,
|
|
69
|
+
"default_context_size": 4096,
|
|
70
|
+
},
|
|
71
|
+
"api": {},
|
|
72
|
+
"models": {
|
|
73
|
+
"gf": {
|
|
74
|
+
"id": "gemini-flash-latest",
|
|
75
|
+
"api": "gemini",
|
|
76
|
+
"max_chars": 1000000,
|
|
77
|
+
"context_size": 1000000,
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
"prompts": {
|
|
81
|
+
"system_prefix": "",
|
|
82
|
+
"force_search": "",
|
|
83
|
+
"system_suffix": "",
|
|
84
|
+
"deep_research": "",
|
|
85
|
+
"deep_dive": "",
|
|
86
|
+
},
|
|
87
|
+
"user_prompts": {},
|
|
88
|
+
"tool": {},
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
# Initialize from defaults
|
|
92
|
+
final_config = copy.deepcopy(default_config)
|
|
93
|
+
|
|
94
|
+
# Copy bundled config to user config dir if it doesn't exist
|
|
95
|
+
if not config_path.exists():
|
|
96
|
+
try:
|
|
97
|
+
with resources.as_file(bundled_config_path_traversable) as source_path:
|
|
98
|
+
shutil.copy(source_path, config_path)
|
|
99
|
+
print(f"Created default configuration at {config_path}")
|
|
100
|
+
except Exception as e:
|
|
101
|
+
print(f"Warning: Failed to create default config at {config_path}: {e}")
|
|
102
|
+
|
|
103
|
+
# Load user config if it exists and merge
|
|
104
|
+
if config_path.exists():
|
|
105
|
+
try:
|
|
106
|
+
with open(config_path, "rb") as f:
|
|
107
|
+
user_config = tomllib.load(f)
|
|
108
|
+
|
|
109
|
+
# Recursive merge with default config
|
|
110
|
+
def merge(base, update):
|
|
111
|
+
for k, v in update.items():
|
|
112
|
+
if k in base and isinstance(base[k], dict) and isinstance(v, dict):
|
|
113
|
+
merge(base[k], v)
|
|
114
|
+
else:
|
|
115
|
+
base[k] = v
|
|
116
|
+
|
|
117
|
+
merge(final_config, user_config)
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
print(f"Warning: Failed to load config from {config_path}: {e}")
|
|
121
|
+
|
|
122
|
+
return _hydrate_models(final_config)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# --- Initialize Configuration ---
|
|
126
|
+
_CONFIG = load_config()
|
|
127
|
+
|
|
128
|
+
# --- Expose Constants ---
|
|
129
|
+
|
|
130
|
+
# General
|
|
131
|
+
_gen = _CONFIG["general"]
|
|
132
|
+
QUERY_SUMMARY_MAX_CHARS = _gen["query_summary_max_chars"]
|
|
133
|
+
CONTINUE_QUERY_THRESHOLD = _gen.get("continue_query_threshold", 160)
|
|
134
|
+
ANSWER_SUMMARY_MAX_CHARS = _gen["answer_summary_max_chars"]
|
|
135
|
+
ANSWER_SUMMARY_MAX_CHARS = _gen["answer_summary_max_chars"]
|
|
136
|
+
SEARXNG_URL = _gen["searxng_url"]
|
|
137
|
+
MAX_TURNS = _gen["max_turns"]
|
|
138
|
+
DEFAULT_MODEL = _gen["default_model"]
|
|
139
|
+
SUMMARIZATION_MODEL = _gen["summarization_model"]
|
|
140
|
+
SEARCH_PROVIDER = _gen.get("search_provider", "searxng")
|
|
141
|
+
SERPER_API_URL = _gen.get("serper_api_url", "https://google.serper.dev/search")
|
|
142
|
+
SERPER_API_KEY_ENV = _gen.get("serper_api_key_env", "SERPER_API_KEY")
|
|
143
|
+
USER_AGENT = _gen.get(
|
|
144
|
+
"user_agent",
|
|
145
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
|
146
|
+
)
|
|
147
|
+
LLM_USER_AGENT = _gen.get("llm_user_agent", USER_AGENT)
|
|
148
|
+
REQUEST_TIMEOUT = _gen.get("request_timeout", 60)
|
|
149
|
+
DEFAULT_CONTEXT_SIZE = _gen.get("default_context_size", 4096)
|
|
150
|
+
|
|
151
|
+
# Database
|
|
152
|
+
# DB Path logic:
|
|
153
|
+
# 1. Env Var (name defined in config, e.g. SEARXNG_HISTORY_DB_PATH)
|
|
154
|
+
# 2. Configured 'db_path' in [general]
|
|
155
|
+
# 3. Default: ~/.config/asky/history.db
|
|
156
|
+
|
|
157
|
+
_db_env_var_name = _gen.get("db_path_env_var", "SEARXNG_HISTORY_DB_PATH")
|
|
158
|
+
_env_path = os.environ.get(_db_env_var_name)
|
|
159
|
+
|
|
160
|
+
if _env_path:
|
|
161
|
+
DB_PATH = Path(_env_path)
|
|
162
|
+
elif "db_path" in _gen and _gen["db_path"]:
|
|
163
|
+
DB_PATH = Path(_gen["db_path"]).expanduser()
|
|
164
|
+
else:
|
|
165
|
+
DB_PATH = _get_config_dir() / "history.db"
|
|
166
|
+
|
|
167
|
+
# Models
|
|
168
|
+
MODELS = _CONFIG["models"]
|
|
169
|
+
|
|
170
|
+
# Prompts
|
|
171
|
+
_prompts = _CONFIG["prompts"]
|
|
172
|
+
SYSTEM_PROMPT = _prompts["system_prefix"]
|
|
173
|
+
FORCE_SEARCH_PROMPT = _prompts["force_search"]
|
|
174
|
+
SYSTEM_PROMPT_SUFFIX = _prompts["system_suffix"]
|
|
175
|
+
DEEP_RESEARCH_PROMPT_TEMPLATE = _prompts["deep_research"]
|
|
176
|
+
DEEP_DIVE_PROMPT_TEMPLATE = _prompts["deep_dive"]
|
|
177
|
+
SUMMARIZE_QUERY_PROMPT_TEMPLATE = _prompts.get(
|
|
178
|
+
"summarize_query",
|
|
179
|
+
"Summarize the following query into a single short sentence (max {QUERY_SUMMARY_MAX_CHARS} chars).",
|
|
180
|
+
)
|
|
181
|
+
SUMMARIZE_ANSWER_PROMPT_TEMPLATE = _prompts.get(
|
|
182
|
+
"summarize_answer",
|
|
183
|
+
"Summarize the following answer into a short paragraph (max {ANSWER_SUMMARY_MAX_CHARS} chars).",
|
|
184
|
+
)
|
|
185
|
+
USER_PROMPTS = _CONFIG.get("user_prompts", {})
|
|
186
|
+
|
|
187
|
+
# --- Custom Tools ---
|
|
188
|
+
# These are loaded from [tool.NAME] sections in config.toml
|
|
189
|
+
CUSTOM_TOOLS = _CONFIG.get("tool", {})
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# --- Tool Definitions ---
|
|
193
|
+
# Tools are code-coupled schemas, keeping them here as constants.
|
|
194
|
+
TOOLS = [
|
|
195
|
+
{
|
|
196
|
+
"type": "function",
|
|
197
|
+
"function": {
|
|
198
|
+
"name": "web_search",
|
|
199
|
+
"description": "Search the web and return top results.",
|
|
200
|
+
"parameters": {
|
|
201
|
+
"type": "object",
|
|
202
|
+
"properties": {
|
|
203
|
+
"q": {"type": "string"},
|
|
204
|
+
"count": {"type": "integer", "default": 5},
|
|
205
|
+
},
|
|
206
|
+
"required": ["q"],
|
|
207
|
+
},
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
"type": "function",
|
|
212
|
+
"function": {
|
|
213
|
+
"name": "get_url_content",
|
|
214
|
+
"description": "Fetch the content of one or more URLs and return their text content (HTML stripped).",
|
|
215
|
+
"parameters": {
|
|
216
|
+
"type": "object",
|
|
217
|
+
"properties": {
|
|
218
|
+
"urls": {
|
|
219
|
+
"type": "array",
|
|
220
|
+
"items": {"type": "string"},
|
|
221
|
+
"description": "List of URLs to fetch content from.",
|
|
222
|
+
},
|
|
223
|
+
"url": {
|
|
224
|
+
"type": "string",
|
|
225
|
+
"description": "Single URL (deprecated, use 'urls' instead).",
|
|
226
|
+
},
|
|
227
|
+
"summarize": {
|
|
228
|
+
"type": "boolean",
|
|
229
|
+
"description": "If true, summarize the content of the page using an LLM.",
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
"required": [],
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"type": "function",
|
|
238
|
+
"function": {
|
|
239
|
+
"name": "get_url_details",
|
|
240
|
+
"description": "Fetch content and extract links from a URL. Use this in deep dive mode.",
|
|
241
|
+
"parameters": {
|
|
242
|
+
"type": "object",
|
|
243
|
+
"properties": {"url": {"type": "string"}},
|
|
244
|
+
"required": ["url"],
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
"type": "function",
|
|
250
|
+
"function": {
|
|
251
|
+
"name": "get_date_time",
|
|
252
|
+
"description": "Return the current date and time.",
|
|
253
|
+
"parameters": {"type": "object", "properties": {}},
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
# Append custom tools from config.toml
|
|
259
|
+
for tool_name, tool_data in CUSTOM_TOOLS.items():
|
|
260
|
+
tool_entry = {
|
|
261
|
+
"type": "function",
|
|
262
|
+
"function": {
|
|
263
|
+
"name": tool_name,
|
|
264
|
+
"description": tool_data.get("description", f"Custom tool: {tool_name}"),
|
|
265
|
+
"parameters": tool_data.get(
|
|
266
|
+
"parameters", {"type": "object", "properties": {}}
|
|
267
|
+
),
|
|
268
|
+
},
|
|
269
|
+
}
|
|
270
|
+
TOOLS.append(tool_entry)
|
asky/config.toml
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# asky Configuration File
|
|
2
|
+
# This file defines general settings, API endpoints, user shortcuts, and model configurations.
|
|
3
|
+
|
|
4
|
+
# --- General Settings ---
|
|
5
|
+
[general]
|
|
6
|
+
# Name of the environment variable that stores the path to the SQLite history database.
|
|
7
|
+
# Default if not set: SEARXNG_HISTORY_DB_PATH
|
|
8
|
+
db_path_env_var = "SEARXNG_HISTORY_DB_PATH"
|
|
9
|
+
|
|
10
|
+
# Maximum length of the query and answer summaries shown in 'asky -H'.
|
|
11
|
+
query_summary_max_chars = 40
|
|
12
|
+
answer_summary_max_chars = 200
|
|
13
|
+
|
|
14
|
+
# Threshold for using full query vs summary in --continue-chat mode.
|
|
15
|
+
# If query length is below this, it's used as is.
|
|
16
|
+
continue_query_threshold = 160
|
|
17
|
+
|
|
18
|
+
# URL of your SearXNG instance.
|
|
19
|
+
searxng_url = "http://localhost:8888"
|
|
20
|
+
|
|
21
|
+
# Search provider to be used: "searxng" or "serper"
|
|
22
|
+
search_provider = "searxng"
|
|
23
|
+
|
|
24
|
+
# URL of the Serper API.
|
|
25
|
+
serper_api_url = "https://google.serper.dev/search"
|
|
26
|
+
|
|
27
|
+
# Name of the environment variable that stores the Serper API key.
|
|
28
|
+
serper_api_key_env = "SERPER_API_KEY"
|
|
29
|
+
|
|
30
|
+
# Maximum number of turns (tool calls) allowed in a single conversation loop.
|
|
31
|
+
max_turns = 20
|
|
32
|
+
|
|
33
|
+
# Timeout for API requests in seconds
|
|
34
|
+
request_timeout = 60
|
|
35
|
+
|
|
36
|
+
# Default context size (if not specified in the model configuration)
|
|
37
|
+
default_context_size = 4096
|
|
38
|
+
|
|
39
|
+
# Default model used when no model is specified via CLI (-m).
|
|
40
|
+
default_model = "gf"
|
|
41
|
+
|
|
42
|
+
# Model used specifically for internal text summarization tasks.
|
|
43
|
+
summarization_model = "lfm"
|
|
44
|
+
|
|
45
|
+
# User-Agent string to be used for search and content retrieval requests.
|
|
46
|
+
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
|
|
47
|
+
|
|
48
|
+
# Specific User-Agent string for LLM API requests.
|
|
49
|
+
# Some providers only accept specific user agents for certain subscriptions types.
|
|
50
|
+
llm_user_agent = "asky/1.0.0"
|
|
51
|
+
|
|
52
|
+
# --- API Definitions ---
|
|
53
|
+
# Define reusable API endpoints and authentication details here.
|
|
54
|
+
# For each section [api.NAME]:
|
|
55
|
+
# url: The base URL for the chat completions endpoint.
|
|
56
|
+
# api_key_env: (Recommended) Name of the environment variable containing the API key.
|
|
57
|
+
# api_key: (Optional) The API key directly. Takes precedence over api_key_env if both are missing.
|
|
58
|
+
|
|
59
|
+
[api.gemini]
|
|
60
|
+
url = "https://generativelanguage.googleapis.com/v1beta/chat/completions"
|
|
61
|
+
api_key_env = "GOOGLE_API_KEY"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
[api.anthropic]
|
|
65
|
+
url = "https://api.anthropic.com/v1/messages"
|
|
66
|
+
api_key_env = "ANTHROPIC_API_KEY"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
[api.openai]
|
|
70
|
+
url = "https://api.openai.com/v1/chat/completions"
|
|
71
|
+
api_key_env = "OPENAI_API_KEY"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
[api.openrouter]
|
|
75
|
+
url = "https://openrouter.ai/api/v1/chat/completions"
|
|
76
|
+
api_key_env = "OPENROUTER_API_KEY"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
[api.lmstudio]
|
|
80
|
+
url = "http://localhost:1234/v1/chat/completions"
|
|
81
|
+
api_key = "lm-studio"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
[api.zai]
|
|
85
|
+
url = "https://api.z.ai/api/paas/v4/chat/completions"
|
|
86
|
+
api_key_env = "ZAI_API_KEY"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# --- User Prompts (Shortcuts) ---
|
|
90
|
+
# Define shortcuts that can be used via '/key' in the CLI.
|
|
91
|
+
# Example: 'ask /gn' will be expanded to the prompt below.
|
|
92
|
+
[user_prompts]
|
|
93
|
+
gn = "Give me latest news from The Guardian, use https://www.theguardian.com/europe"
|
|
94
|
+
wh = "how is weather in "
|
|
95
|
+
ex = "Explain this: /cp"
|
|
96
|
+
|
|
97
|
+
# --- User defined tools ---
|
|
98
|
+
# These tools allow you to expose CLI commands to the LLM.
|
|
99
|
+
# Use {parameter_name} in the command to inject arguments, or they will be appended.
|
|
100
|
+
[tool.list_dir]
|
|
101
|
+
command = "ls {flags} {path}"
|
|
102
|
+
description = "List the contents of a directory."
|
|
103
|
+
parameter_type = "object"
|
|
104
|
+
[tool.list_dir.parameters]
|
|
105
|
+
type = "object"
|
|
106
|
+
required = ["path"]
|
|
107
|
+
|
|
108
|
+
[tool.list_dir.parameters.properties.path]
|
|
109
|
+
type = "string"
|
|
110
|
+
default = "."
|
|
111
|
+
|
|
112
|
+
[tool.list_dir.parameters.properties.flags]
|
|
113
|
+
type = "string"
|
|
114
|
+
default = "-la"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# [tool.grep_search]
|
|
118
|
+
# command = "grep -r --exclude-dir={.venv,node_modules} {pattern} {path}"
|
|
119
|
+
# description = "Search for a pattern in files recursively."
|
|
120
|
+
|
|
121
|
+
# [tool.grep_search.parameters]
|
|
122
|
+
# type = "object"
|
|
123
|
+
# required = ["pattern"]
|
|
124
|
+
|
|
125
|
+
# [tool.grep_search.parameters.properties.pattern]
|
|
126
|
+
# type = "string"
|
|
127
|
+
# description = "The regex pattern to search for."
|
|
128
|
+
|
|
129
|
+
# [tool.grep_search.parameters.properties.path]
|
|
130
|
+
# type = "string"
|
|
131
|
+
# description = "The directory path to search in."
|
|
132
|
+
# default = "."
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# --- Internal Prompt Templates ---
|
|
137
|
+
# Templates used to construct system prompts for different modes.
|
|
138
|
+
# Placeholders like {MAX_TURNS} and {n} are filled at runtime.
|
|
139
|
+
[prompts]
|
|
140
|
+
# Global system prompt prefix.
|
|
141
|
+
system_prefix = """You are a helpful assistant with web searc and URL retrieval capabilities. Use get_date_time for current date/time if needed (e.g., for 'today' or 'recently'). """
|
|
142
|
+
|
|
143
|
+
# Prompt appended when force_search is enabled (e.g., --force-search).
|
|
144
|
+
force_search = """Unless you are asked to use a specific URL, always use web_search, never try to answer without using web_search. """
|
|
145
|
+
|
|
146
|
+
# Global system prompt suffix.
|
|
147
|
+
system_suffix = """Then use get_url_content for details of the search results. You can pass a list of URLs to get_url_content to fetch multiple pages efficiently at once. Use tools, don't say you can't.You have {MAX_TURNS} turns to complete your task, if you reach the limit, process will be terminated.You should finish your task before reaching %100 of your token limit."""
|
|
148
|
+
|
|
149
|
+
# Prompt for summarizing the user query.
|
|
150
|
+
summarize_query = "Summarize the following query into a single short sentence (max {QUERY_SUMMARY_MAX_CHARS} chars)."
|
|
151
|
+
|
|
152
|
+
# Prompt for summarizing the final answer.
|
|
153
|
+
summarize_answer = """Summarize the following answer into a short paragraph (max {ANSWER_SUMMARY_MAX_CHARS} chars).
|
|
154
|
+
Be sure to include all numerical values and dates """
|
|
155
|
+
|
|
156
|
+
# Template for DEEP RESEARCH mode (-d). {n} is the number of searches.
|
|
157
|
+
deep_research = """
|
|
158
|
+
You are in DEEP RESEARCH mode. You MUST perform at least {n} distinct web searches, or make {n} get_url_content calls to gather comprehensive information before providing a final answer.If you need to get links from a URL, use get_url_details. If you just need to get content from a URL, use get_url_content."""
|
|
159
|
+
|
|
160
|
+
# Template for DEEP DIVE mode (-dd).
|
|
161
|
+
deep_dive = """
|
|
162
|
+
You are in DEEP DIVE mode. Follow these instructions:
|
|
163
|
+
1. Use 'get_url_details' for the INITIAL page to retrieve content and links.
|
|
164
|
+
2. Follow up to 25 relevant links within the same domain to gather comprehensive information.
|
|
165
|
+
3. IMPORTANT: Use 'get_url_details' ONLY for the first page. Use 'get_url_content' for all subsequent links.
|
|
166
|
+
4. Do not rely on your internal knowledge; base your answer strictly on the retrieved content.5. Do not use web_search in deep dive mode."""
|
|
167
|
+
|
|
168
|
+
# --- Model Definitions ---
|
|
169
|
+
# Each section [models.NAME] configures a specific model:
|
|
170
|
+
# id: The exact model ID used by the API provider.
|
|
171
|
+
# api: Reference to a name defined in the [api] section.
|
|
172
|
+
# max_chars: Character limit for content fetched via get_url_content. More than this will be truncated.
|
|
173
|
+
# context_size: Total context window size (tokens/chars approximation) for trimming history.
|
|
174
|
+
|
|
175
|
+
# Note: max_chars is the context_size values of the following models are arbitrarily set for my own use.
|
|
176
|
+
# Check model provider's documentation for the actual context size of the models.
|
|
177
|
+
# Experiment with max_chars to find the optimal value for your use case (depending on your needs, and model/hardware capabilities)
|
|
178
|
+
|
|
179
|
+
[models.gf]
|
|
180
|
+
id = "gemini-flash-latest"
|
|
181
|
+
api = "gemini"
|
|
182
|
+
max_chars = 10000
|
|
183
|
+
context_size = 1000000
|
|
184
|
+
|
|
185
|
+
[models.glmair]
|
|
186
|
+
id = "glm-4.5-air"
|
|
187
|
+
api = "zai"
|
|
188
|
+
max_chars = 4000
|
|
189
|
+
context_size = 100000
|
|
190
|
+
|
|
191
|
+
[models.glmflash]
|
|
192
|
+
id = "glm-4.7-flash"
|
|
193
|
+
api = "zai"
|
|
194
|
+
max_chars = 4000
|
|
195
|
+
context_size = 100000
|
|
196
|
+
|
|
197
|
+
[models.q34t]
|
|
198
|
+
id = "qwen/qwen3-4b-thinking-2507"
|
|
199
|
+
api = "lmstudio"
|
|
200
|
+
max_chars = 4000
|
|
201
|
+
context_size = 32000
|
|
202
|
+
|
|
203
|
+
[models.q34]
|
|
204
|
+
id = "qwen/qwen3-4b-2507"
|
|
205
|
+
api = "lmstudio"
|
|
206
|
+
max_chars = 4000
|
|
207
|
+
context_size = 32000
|
|
208
|
+
|
|
209
|
+
[models.lfm]
|
|
210
|
+
id = "liquid/lfm2.5-1.2b"
|
|
211
|
+
api = "lmstudio"
|
|
212
|
+
max_chars = 100000
|
|
213
|
+
context_size = 32000
|
|
214
|
+
|
|
215
|
+
[models.q8]
|
|
216
|
+
id = "qwen/qwen3-8b"
|
|
217
|
+
api = "lmstudio"
|
|
218
|
+
max_chars = 4000
|
|
219
|
+
context_size = 32000
|
|
220
|
+
|
|
221
|
+
[models.q30]
|
|
222
|
+
id = "qwen/qwen3-30b-a3b-2507"
|
|
223
|
+
api = "lmstudio"
|
|
224
|
+
max_chars = 3000
|
|
225
|
+
context_size = 32000
|
|
226
|
+
|
asky/html.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""HTML parsing utilities."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from html.parser import HTMLParser
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class HTMLStripper(HTMLParser):
|
|
9
|
+
"""Parse HTML and extract text content and links."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__()
|
|
13
|
+
self.reset()
|
|
14
|
+
self.strict = False
|
|
15
|
+
self.convert_charrefs = True
|
|
16
|
+
self.text: List[str] = []
|
|
17
|
+
self.links: List[Dict[str, str]] = []
|
|
18
|
+
self.ignore = False
|
|
19
|
+
self.current_href: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
def handle_starttag(self, tag: str, attrs: List[Any]) -> None:
|
|
22
|
+
if tag in ("script", "style"):
|
|
23
|
+
self.ignore = True
|
|
24
|
+
elif tag == "a":
|
|
25
|
+
for k, v in attrs:
|
|
26
|
+
if k == "href":
|
|
27
|
+
self.current_href = v
|
|
28
|
+
break
|
|
29
|
+
|
|
30
|
+
def handle_endtag(self, tag: str) -> None:
|
|
31
|
+
if tag in ("script", "style"):
|
|
32
|
+
self.ignore = False
|
|
33
|
+
elif tag == "a":
|
|
34
|
+
self.current_href = None
|
|
35
|
+
|
|
36
|
+
def handle_data(self, data: str) -> None:
|
|
37
|
+
if not self.ignore:
|
|
38
|
+
text = data.strip()
|
|
39
|
+
if text:
|
|
40
|
+
self.text.append(data)
|
|
41
|
+
if self.current_href:
|
|
42
|
+
self.links.append({"text": text, "href": self.current_href})
|
|
43
|
+
|
|
44
|
+
def get_data(self) -> str:
|
|
45
|
+
return "".join(self.text).strip()
|
|
46
|
+
|
|
47
|
+
def get_links(self) -> List[Dict[str, str]]:
|
|
48
|
+
return self.links
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def strip_tags(html: str) -> str:
|
|
52
|
+
"""Strip HTML tags from text and return plain text content."""
|
|
53
|
+
s = HTMLStripper()
|
|
54
|
+
s.feed(html)
|
|
55
|
+
return s.get_data()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def strip_think_tags(text: str) -> str:
|
|
59
|
+
"""Remove <think>...</think> blocks from LLM output."""
|
|
60
|
+
if not text:
|
|
61
|
+
return ""
|
|
62
|
+
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|