openclacky 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clackyrules +4 -0
- data/CHANGELOG.md +43 -0
- data/README.md +1 -1
- data/docs/ui2-architecture.md +124 -0
- data/lib/clacky/agent.rb +354 -296
- data/lib/clacky/agent_config.rb +1 -7
- data/lib/clacky/cli.rb +157 -330
- data/lib/clacky/client.rb +68 -36
- data/lib/clacky/gitignore_parser.rb +26 -12
- data/lib/clacky/model_pricing.rb +6 -2
- data/lib/clacky/progress_indicator.rb +1 -1
- data/lib/clacky/session_manager.rb +6 -2
- data/lib/clacky/tools/file_reader.rb +73 -10
- data/lib/clacky/tools/glob.rb +65 -9
- data/lib/clacky/tools/grep.rb +44 -116
- data/lib/clacky/tools/run_project.rb +5 -0
- data/lib/clacky/tools/safe_shell.rb +49 -13
- data/lib/clacky/tools/shell.rb +1 -49
- data/lib/clacky/tools/web_fetch.rb +2 -2
- data/lib/clacky/tools/web_search.rb +38 -26
- data/lib/clacky/ui2/README.md +214 -0
- data/lib/clacky/ui2/components/base_component.rb +163 -0
- data/lib/clacky/ui2/components/common_component.rb +89 -0
- data/lib/clacky/ui2/components/inline_input.rb +187 -0
- data/lib/clacky/ui2/components/input_area.rb +1029 -0
- data/lib/clacky/ui2/components/message_component.rb +76 -0
- data/lib/clacky/ui2/components/output_area.rb +112 -0
- data/lib/clacky/ui2/components/todo_area.rb +137 -0
- data/lib/clacky/ui2/components/tool_component.rb +106 -0
- data/lib/clacky/ui2/components/welcome_banner.rb +93 -0
- data/lib/clacky/ui2/layout_manager.rb +331 -0
- data/lib/clacky/ui2/line_editor.rb +201 -0
- data/lib/clacky/ui2/screen_buffer.rb +238 -0
- data/lib/clacky/ui2/theme_manager.rb +68 -0
- data/lib/clacky/ui2/themes/base_theme.rb +99 -0
- data/lib/clacky/ui2/themes/hacker_theme.rb +56 -0
- data/lib/clacky/ui2/themes/minimal_theme.rb +50 -0
- data/lib/clacky/ui2/ui_controller.rb +720 -0
- data/lib/clacky/ui2/view_renderer.rb +160 -0
- data/lib/clacky/ui2.rb +37 -0
- data/lib/clacky/utils/file_ignore_helper.rb +126 -0
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky.rb +1 -6
- metadata +38 -6
- data/lib/clacky/ui/banner.rb +0 -155
- data/lib/clacky/ui/enhanced_prompt.rb +0 -540
- data/lib/clacky/ui/formatter.rb +0 -209
- data/lib/clacky/ui/statusbar.rb +0 -96
data/lib/clacky/tools/grep.rb
CHANGED
|
@@ -3,36 +3,6 @@
|
|
|
3
3
|
module Clacky
|
|
4
4
|
module Tools
|
|
5
5
|
class Grep < Base
|
|
6
|
-
# Default patterns to ignore when .gitignore is not available
|
|
7
|
-
DEFAULT_IGNORED_PATTERNS = [
|
|
8
|
-
'node_modules',
|
|
9
|
-
'vendor/bundle',
|
|
10
|
-
'.git',
|
|
11
|
-
'.svn',
|
|
12
|
-
'tmp',
|
|
13
|
-
'log',
|
|
14
|
-
'coverage',
|
|
15
|
-
'dist',
|
|
16
|
-
'build',
|
|
17
|
-
'.bundle',
|
|
18
|
-
'.sass-cache',
|
|
19
|
-
'.DS_Store',
|
|
20
|
-
'*.log'
|
|
21
|
-
].freeze
|
|
22
|
-
|
|
23
|
-
# Config file patterns that should always be searchable
|
|
24
|
-
CONFIG_FILE_PATTERNS = [
|
|
25
|
-
/\.env/,
|
|
26
|
-
/\.ya?ml$/,
|
|
27
|
-
/\.json$/,
|
|
28
|
-
/\.toml$/,
|
|
29
|
-
/\.ini$/,
|
|
30
|
-
/\.conf$/,
|
|
31
|
-
/\.config$/,
|
|
32
|
-
/config\//,
|
|
33
|
-
/\.config\//
|
|
34
|
-
].freeze
|
|
35
|
-
|
|
36
6
|
# Maximum file size to search (1MB)
|
|
37
7
|
MAX_FILE_SIZE = 1_048_576
|
|
38
8
|
|
|
@@ -135,7 +105,7 @@ module Clacky
|
|
|
135
105
|
regex = Regexp.new(pattern, regex_options)
|
|
136
106
|
|
|
137
107
|
# Initialize gitignore parser
|
|
138
|
-
gitignore_path = find_gitignore(expanded_path)
|
|
108
|
+
gitignore_path = Clacky::Utils::FileIgnoreHelper.find_gitignore(expanded_path)
|
|
139
109
|
gitignore = gitignore_path ? Clacky::GitignoreParser.new(gitignore_path) : nil
|
|
140
110
|
|
|
141
111
|
results = []
|
|
@@ -165,13 +135,14 @@ module Clacky
|
|
|
165
135
|
end
|
|
166
136
|
|
|
167
137
|
# Skip if file should be ignored (unless it's a config file)
|
|
168
|
-
if should_ignore_file?(file, expanded_path, gitignore) &&
|
|
138
|
+
if Clacky::Utils::FileIgnoreHelper.should_ignore_file?(file, expanded_path, gitignore) &&
|
|
139
|
+
!Clacky::Utils::FileIgnoreHelper.is_config_file?(file)
|
|
169
140
|
skipped[:ignored] += 1
|
|
170
141
|
next
|
|
171
142
|
end
|
|
172
143
|
|
|
173
144
|
# Skip binary files
|
|
174
|
-
if binary_file?(file)
|
|
145
|
+
if Clacky::Utils::FileIgnoreHelper.binary_file?(file)
|
|
175
146
|
skipped[:binary] += 1
|
|
176
147
|
next
|
|
177
148
|
end
|
|
@@ -256,84 +227,52 @@ module Clacky
|
|
|
256
227
|
end
|
|
257
228
|
end
|
|
258
229
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
# Stop if we've reached the search limit or root
|
|
285
|
-
break if current == search_limit || current == root
|
|
286
|
-
current = File.dirname(current)
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
nil
|
|
290
|
-
end
|
|
230
|
+
# Format result for LLM consumption - return a compact version to save tokens
|
|
231
|
+
def format_result_for_llm(result)
|
|
232
|
+
# If there's an error, return it as-is
|
|
233
|
+
return result if result[:error]
|
|
234
|
+
|
|
235
|
+
# Build a compact summary with file list and sample matches
|
|
236
|
+
compact = {
|
|
237
|
+
summary: {
|
|
238
|
+
total_matches: result[:total_matches],
|
|
239
|
+
files_with_matches: result[:files_with_matches],
|
|
240
|
+
files_searched: result[:files_searched],
|
|
241
|
+
truncated: result[:truncated],
|
|
242
|
+
truncation_reason: result[:truncation_reason]
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
# Include list of files with match counts
|
|
247
|
+
if result[:results] && !result[:results].empty?
|
|
248
|
+
compact[:files] = result[:results].map do |file_result|
|
|
249
|
+
{
|
|
250
|
+
file: file_result[:file],
|
|
251
|
+
match_count: file_result[:matches].length
|
|
252
|
+
}
|
|
253
|
+
end
|
|
291
254
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
else
|
|
306
|
-
# File is outside base path - use just the filename
|
|
307
|
-
relative_path = File.basename(expanded_file)
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
# Clean up relative path
|
|
311
|
-
relative_path = relative_path.sub(/^\.\//, '') if relative_path
|
|
312
|
-
|
|
313
|
-
if gitignore
|
|
314
|
-
# Use .gitignore rules
|
|
315
|
-
gitignore.ignored?(relative_path)
|
|
316
|
-
else
|
|
317
|
-
# Use default ignore patterns - only match against relative path components
|
|
318
|
-
DEFAULT_IGNORED_PATTERNS.any? do |pattern|
|
|
319
|
-
if pattern.include?('*')
|
|
320
|
-
File.fnmatch(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
|
|
321
|
-
else
|
|
322
|
-
# Match pattern as a path component (not substring of absolute path)
|
|
323
|
-
relative_path.start_with?("#{pattern}/") ||
|
|
324
|
-
relative_path.include?("/#{pattern}/") ||
|
|
325
|
-
relative_path == pattern ||
|
|
326
|
-
File.basename(relative_path) == pattern
|
|
327
|
-
end
|
|
255
|
+
# Include sample matches (first 2 matches from first 3 files) for context
|
|
256
|
+
sample_results = result[:results].take(3)
|
|
257
|
+
compact[:sample_matches] = sample_results.map do |file_result|
|
|
258
|
+
{
|
|
259
|
+
file: file_result[:file],
|
|
260
|
+
matches: file_result[:matches].take(2).map do |match|
|
|
261
|
+
{
|
|
262
|
+
line_number: match[:line_number],
|
|
263
|
+
line: match[:line]
|
|
264
|
+
# Omit context to save space - it's rarely needed by LLM
|
|
265
|
+
}
|
|
266
|
+
end
|
|
267
|
+
}
|
|
328
268
|
end
|
|
329
269
|
end
|
|
330
|
-
end
|
|
331
270
|
|
|
332
|
-
|
|
333
|
-
def is_config_file?(file)
|
|
334
|
-
CONFIG_FILE_PATTERNS.any? { |pattern| file.match?(pattern) }
|
|
271
|
+
compact
|
|
335
272
|
end
|
|
336
273
|
|
|
274
|
+
private
|
|
275
|
+
|
|
337
276
|
def search_file(file, regex, context_lines, max_matches)
|
|
338
277
|
matches = []
|
|
339
278
|
|
|
@@ -391,17 +330,6 @@ module Clacky
|
|
|
391
330
|
rescue StandardError
|
|
392
331
|
nil
|
|
393
332
|
end
|
|
394
|
-
|
|
395
|
-
def binary_file?(file)
|
|
396
|
-
# Simple heuristic: check if file contains null bytes in first 8KB
|
|
397
|
-
return false unless File.exist?(file)
|
|
398
|
-
return false if File.size(file).zero?
|
|
399
|
-
|
|
400
|
-
sample = File.read(file, 8192, encoding: "ASCII-8BIT")
|
|
401
|
-
sample.include?("\x00")
|
|
402
|
-
rescue StandardError
|
|
403
|
-
true
|
|
404
|
-
end
|
|
405
333
|
end
|
|
406
334
|
end
|
|
407
335
|
end
|
|
@@ -255,6 +255,11 @@ module Clacky
|
|
|
255
255
|
ready[0].each do |io|
|
|
256
256
|
begin
|
|
257
257
|
data = io.read_nonblock(4096)
|
|
258
|
+
# Force UTF-8 encoding to avoid incompatible encoding errors
|
|
259
|
+
data.force_encoding('UTF-8')
|
|
260
|
+
# Replace invalid UTF-8 sequences with replacement character
|
|
261
|
+
data = data.scrub('?') unless data.valid_encoding?
|
|
262
|
+
|
|
258
263
|
if io == stdout
|
|
259
264
|
stdout_buf.push_lines(data)
|
|
260
265
|
else
|
|
@@ -19,13 +19,9 @@ module Clacky
|
|
|
19
19
|
type: "string",
|
|
20
20
|
description: "Shell command to execute"
|
|
21
21
|
},
|
|
22
|
-
|
|
22
|
+
timeout: {
|
|
23
23
|
type: "integer",
|
|
24
|
-
description: "
|
|
25
|
-
},
|
|
26
|
-
hard_timeout: {
|
|
27
|
-
type: "integer",
|
|
28
|
-
description: "Hard timeout in seconds (force kill)"
|
|
24
|
+
description: "Command timeout in seconds (auto-detected if not specified: 60s for normal commands, 180s for build/install commands)"
|
|
29
25
|
},
|
|
30
26
|
max_output_lines: {
|
|
31
27
|
type: "integer",
|
|
@@ -36,19 +32,29 @@ module Clacky
|
|
|
36
32
|
required: ["command"]
|
|
37
33
|
}
|
|
38
34
|
|
|
39
|
-
def execute(command:,
|
|
35
|
+
def execute(command:, timeout: nil, max_output_lines: 1000)
|
|
40
36
|
# Get project root directory
|
|
41
37
|
project_root = Dir.pwd
|
|
42
38
|
|
|
43
39
|
begin
|
|
44
|
-
# 1.
|
|
40
|
+
# 1. Extract timeout from command if it starts with "timeout N"
|
|
41
|
+
command, extracted_timeout = extract_timeout_from_command(command)
|
|
42
|
+
|
|
43
|
+
# Use extracted timeout if not explicitly provided
|
|
44
|
+
timeout ||= extracted_timeout
|
|
45
|
+
|
|
46
|
+
# 2. Use safety replacer to process command
|
|
45
47
|
safety_replacer = CommandSafetyReplacer.new(project_root)
|
|
46
48
|
safe_command = safety_replacer.make_command_safe(command)
|
|
47
49
|
|
|
48
|
-
#
|
|
50
|
+
# 3. Calculate timeouts: soft_timeout is fixed at 5s, hard_timeout from timeout parameter
|
|
51
|
+
soft_timeout = 5
|
|
52
|
+
hard_timeout = calculate_hard_timeout(command, timeout)
|
|
53
|
+
|
|
54
|
+
# 4. Call parent class execution method
|
|
49
55
|
result = super(command: safe_command, soft_timeout: soft_timeout, hard_timeout: hard_timeout, max_output_lines: max_output_lines)
|
|
50
56
|
|
|
51
|
-
#
|
|
57
|
+
# 5. Enhance result information
|
|
52
58
|
enhance_result(result, command, safe_command)
|
|
53
59
|
|
|
54
60
|
rescue SecurityError => e
|
|
@@ -64,6 +70,30 @@ module Clacky
|
|
|
64
70
|
end
|
|
65
71
|
end
|
|
66
72
|
|
|
73
|
+
private def extract_timeout_from_command(command)
|
|
74
|
+
# Match patterns: "timeout 30 ...", "timeout 30s ...", etc.
|
|
75
|
+
# Supports: timeout N command, timeout Ns command, timeout -s SIGNAL N command
|
|
76
|
+
match = command.match(/^timeout\s+(?:-s\s+\w+\s+)?(\d+)s?\s+(.+)$/i)
|
|
77
|
+
|
|
78
|
+
if match
|
|
79
|
+
timeout_value = match[1].to_i
|
|
80
|
+
actual_command = match[2]
|
|
81
|
+
return [actual_command, timeout_value]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# No timeout prefix found, return original command
|
|
85
|
+
[command, nil]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private def calculate_hard_timeout(command, timeout)
|
|
89
|
+
# If timeout is provided, use it directly
|
|
90
|
+
return timeout if timeout
|
|
91
|
+
|
|
92
|
+
# Otherwise, auto-detect based on command type
|
|
93
|
+
is_slow = SLOW_COMMANDS.any? { |slow_cmd| command.include?(slow_cmd) }
|
|
94
|
+
is_slow ? 180 : 60
|
|
95
|
+
end
|
|
96
|
+
|
|
67
97
|
# Safe read-only commands that don't modify system state
|
|
68
98
|
SAFE_READONLY_COMMANDS = %w[
|
|
69
99
|
ls pwd cat less more head tail
|
|
@@ -267,12 +297,18 @@ module Clacky
|
|
|
267
297
|
|
|
268
298
|
def validate_general_command(command)
|
|
269
299
|
# Check general command security
|
|
300
|
+
# Note: We need to be careful not to match patterns inside quoted strings
|
|
301
|
+
|
|
302
|
+
# First, remove quoted strings to avoid false positives
|
|
303
|
+
# This is a simplified approach - removes both single and double quoted content
|
|
304
|
+
cmd_without_quotes = command.gsub(/'[^']*'|"[^"]*"/, '')
|
|
305
|
+
|
|
270
306
|
dangerous_patterns = [
|
|
271
307
|
/eval\s*\(/,
|
|
272
308
|
/exec\s*\(/,
|
|
273
309
|
/system\s*\(/,
|
|
274
|
-
|
|
275
|
-
/\$\(
|
|
310
|
+
/`[^`]+`/, # Command substitution with backticks (but only if not in quotes)
|
|
311
|
+
/\$\([^)]+\)/, # Command substitution with $() (but only if not in quotes)
|
|
276
312
|
/\|\s*sh\s*$/,
|
|
277
313
|
/\|\s*bash\s*$/,
|
|
278
314
|
/>\s*\/etc\//,
|
|
@@ -281,7 +317,7 @@ module Clacky
|
|
|
281
317
|
]
|
|
282
318
|
|
|
283
319
|
dangerous_patterns.each do |pattern|
|
|
284
|
-
if
|
|
320
|
+
if cmd_without_quotes.match?(pattern)
|
|
285
321
|
raise SecurityError, "Dangerous command pattern detected: #{pattern.source}"
|
|
286
322
|
end
|
|
287
323
|
end
|
data/lib/clacky/tools/shell.rb
CHANGED
|
@@ -95,7 +95,7 @@ module Clacky
|
|
|
95
95
|
if elapsed > soft_timeout && !soft_timeout_triggered
|
|
96
96
|
soft_timeout_triggered = true
|
|
97
97
|
|
|
98
|
-
# L1:
|
|
98
|
+
# L1: Check for interaction patterns
|
|
99
99
|
interaction = detect_interaction(stdout_buffer.string)
|
|
100
100
|
if interaction
|
|
101
101
|
Process.kill('TERM', wait_thr.pid) rescue nil
|
|
@@ -107,24 +107,6 @@ module Clacky
|
|
|
107
107
|
max_output_lines
|
|
108
108
|
)
|
|
109
109
|
end
|
|
110
|
-
|
|
111
|
-
# L2:
|
|
112
|
-
last_size = stdout_buffer.size
|
|
113
|
-
stdin.puts("\n") rescue nil
|
|
114
|
-
sleep 2
|
|
115
|
-
|
|
116
|
-
if stdout_buffer.size > last_size
|
|
117
|
-
next
|
|
118
|
-
else
|
|
119
|
-
Process.kill('TERM', wait_thr.pid) rescue nil
|
|
120
|
-
return format_stuck_result(
|
|
121
|
-
command,
|
|
122
|
-
stdout_buffer.string,
|
|
123
|
-
stderr_buffer.string,
|
|
124
|
-
elapsed,
|
|
125
|
-
max_output_lines
|
|
126
|
-
)
|
|
127
|
-
end
|
|
128
110
|
end
|
|
129
111
|
|
|
130
112
|
break unless wait_thr.alive?
|
|
@@ -255,36 +237,6 @@ module Clacky
|
|
|
255
237
|
MSG
|
|
256
238
|
end
|
|
257
239
|
|
|
258
|
-
def format_stuck_result(command, stdout, stderr, elapsed, max_output_lines)
|
|
259
|
-
{
|
|
260
|
-
command: command,
|
|
261
|
-
stdout: truncate_output(stdout, max_output_lines),
|
|
262
|
-
stderr: truncate_output(stderr, max_output_lines),
|
|
263
|
-
exit_code: -3,
|
|
264
|
-
success: false,
|
|
265
|
-
state: 'STUCK',
|
|
266
|
-
elapsed: elapsed,
|
|
267
|
-
message: format_stuck_message(truncate_output(stdout, max_output_lines), elapsed),
|
|
268
|
-
output_truncated: output_truncated?(stdout, stderr, max_output_lines)
|
|
269
|
-
}
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
def format_stuck_message(output, elapsed)
|
|
273
|
-
<<~MSG
|
|
274
|
-
#{output}
|
|
275
|
-
|
|
276
|
-
#{'=' * 60}
|
|
277
|
-
[Terminal State: STUCK]
|
|
278
|
-
#{'=' * 60}
|
|
279
|
-
|
|
280
|
-
The terminal is not responding after #{elapsed.round(1)}s.
|
|
281
|
-
|
|
282
|
-
Suggested actions:
|
|
283
|
-
• Try interrupting with Ctrl+C
|
|
284
|
-
• Check if command is frozen
|
|
285
|
-
MSG
|
|
286
|
-
end
|
|
287
|
-
|
|
288
240
|
def format_timeout_result(command, stdout, stderr, elapsed, type, timeout, max_output_lines)
|
|
289
241
|
{
|
|
290
242
|
command: command,
|
|
@@ -40,8 +40,8 @@ module Clacky
|
|
|
40
40
|
# Fetch the web page
|
|
41
41
|
response = fetch_url(uri)
|
|
42
42
|
|
|
43
|
-
# Extract content
|
|
44
|
-
content = response.body
|
|
43
|
+
# Extract content and force UTF-8 encoding at the source
|
|
44
|
+
content = response.body.force_encoding('UTF-8').scrub('?')
|
|
45
45
|
content_type = response["content-type"] || ""
|
|
46
46
|
|
|
47
47
|
# Parse HTML if it's an HTML page
|
|
@@ -48,14 +48,14 @@ module Clacky
|
|
|
48
48
|
end
|
|
49
49
|
end
|
|
50
50
|
|
|
51
|
-
def search_duckduckgo(query, max_results)
|
|
51
|
+
private def search_duckduckgo(query, max_results)
|
|
52
52
|
# DuckDuckGo HTML search endpoint
|
|
53
53
|
encoded_query = CGI.escape(query)
|
|
54
54
|
url = URI("https://html.duckduckgo.com/html/?q=#{encoded_query}")
|
|
55
55
|
|
|
56
56
|
# Make request with user agent
|
|
57
57
|
request = Net::HTTP::Get.new(url)
|
|
58
|
-
request["User-Agent"] = "Mozilla/5.0 (
|
|
58
|
+
request["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
59
59
|
|
|
60
60
|
response = Net::HTTP.start(url.hostname, url.port, use_ssl: true, read_timeout: 10) do |http|
|
|
61
61
|
http.request(request)
|
|
@@ -78,45 +78,57 @@ module Clacky
|
|
|
78
78
|
]
|
|
79
79
|
end
|
|
80
80
|
|
|
81
|
-
def parse_duckduckgo_html(html, max_results)
|
|
81
|
+
private def parse_duckduckgo_html(html, max_results)
|
|
82
82
|
results = []
|
|
83
83
|
|
|
84
|
-
#
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
# Ensure HTML is UTF-8 encoded
|
|
85
|
+
html = html.force_encoding('UTF-8') unless html.encoding == Encoding::UTF_8
|
|
86
|
+
|
|
87
|
+
# Extract all result links and snippets
|
|
88
|
+
# Pattern: <a class="result__a" href="//duckduckgo.com/l/?uddg=ENCODED_URL...">TITLE</a>
|
|
89
|
+
links = html.scan(%r{<a[^>]*class="result__a"[^>]*href="//duckduckgo\.com/l/\?uddg=([^"&]+)[^"]*"[^>]*>(.*?)</a>}m)
|
|
90
|
+
|
|
91
|
+
# Pattern: <a class="result__snippet">SNIPPET</a>
|
|
92
|
+
snippets = html.scan(%r{<a[^>]*class="result__snippet"[^>]*>(.*?)</a>}m)
|
|
93
|
+
|
|
94
|
+
# Combine links and snippets
|
|
95
|
+
links.each_with_index do |link_data, index|
|
|
87
96
|
break if results.length >= max_results
|
|
88
97
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
snippet = ""
|
|
96
|
-
|
|
97
|
-
snippet = $1.gsub(/<[^>]+>/, "").strip
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
results << {
|
|
101
|
-
title: title,
|
|
102
|
-
url: url,
|
|
103
|
-
snippet: snippet
|
|
104
|
-
}
|
|
98
|
+
url = CGI.unescape(link_data[0]).force_encoding('UTF-8')
|
|
99
|
+
title = link_data[1].gsub(/<[^>]+>/, "").strip
|
|
100
|
+
title = CGI.unescapeHTML(title) if title.include?("&")
|
|
101
|
+
|
|
102
|
+
snippet = ""
|
|
103
|
+
if snippets[index]
|
|
104
|
+
snippet = snippets[index][0].gsub(/<[^>]+>/, "").strip
|
|
105
|
+
snippet = CGI.unescapeHTML(snippet) if snippet.include?("&")
|
|
105
106
|
end
|
|
107
|
+
|
|
108
|
+
results << {
|
|
109
|
+
title: title,
|
|
110
|
+
url: url,
|
|
111
|
+
snippet: snippet
|
|
112
|
+
}
|
|
106
113
|
end
|
|
107
114
|
|
|
108
115
|
# If parsing failed, provide a fallback
|
|
109
116
|
if results.empty?
|
|
110
117
|
results << {
|
|
111
118
|
title: "Web search results",
|
|
112
|
-
url: "https://duckduckgo.com
|
|
113
|
-
snippet: "Could not parse search results.
|
|
119
|
+
url: "https://duckduckgo.com/",
|
|
120
|
+
snippet: "Could not parse search results. Please try again."
|
|
114
121
|
}
|
|
115
122
|
end
|
|
116
123
|
|
|
117
124
|
results
|
|
118
|
-
rescue StandardError
|
|
119
|
-
|
|
125
|
+
rescue StandardError => e
|
|
126
|
+
# Return fallback on error
|
|
127
|
+
[{
|
|
128
|
+
title: "Web search error",
|
|
129
|
+
url: "https://duckduckgo.com/",
|
|
130
|
+
snippet: "Error parsing results: #{e.message}"
|
|
131
|
+
}]
|
|
120
132
|
end
|
|
121
133
|
|
|
122
134
|
def format_call(args)
|