@apmantza/greedysearch-pi 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +219 -208
- package/cdp.mjs +16 -16
- package/extractors/bing-copilot.mjs +12 -21
- package/extractors/consent.mjs +10 -3
- package/extractors/gemini.mjs +12 -53
- package/extractors/google-ai.mjs +7 -10
- package/extractors/perplexity.mjs +28 -31
- package/extractors/selectors.mjs +52 -52
- package/index.ts +623 -623
- package/launch.mjs +33 -33
- package/newfeaturesideas.md +105 -0
- package/package.json +1 -1
- package/skills/greedy-search/SKILL.md +145 -145
- package/test.sh +298 -298
package/test.sh
CHANGED
|
@@ -1,298 +1,298 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# test.sh — GreedySearch test suite
|
|
3
|
-
#
|
|
4
|
-
# Usage:
|
|
5
|
-
# ./test.sh # run all tests
|
|
6
|
-
# ./test.sh parallel # run only parallel test
|
|
7
|
-
# ./test.sh quick # skip slow tests (parallel + stress)
|
|
8
|
-
#
|
|
9
|
-
# Tests verify:
|
|
10
|
-
# - No crashes/errors from extractors
|
|
11
|
-
# - All engines complete in "all" mode
|
|
12
|
-
# - Correct queries in results (not mixed up)
|
|
13
|
-
# - Parallel searches don't race on shared tabs
|
|
14
|
-
|
|
15
|
-
set -e
|
|
16
|
-
|
|
17
|
-
cd "$(dirname "$0")"
|
|
18
|
-
RESULTS_DIR="results/test_$(date +%Y%m%d_%H%M%S)"
|
|
19
|
-
mkdir -p "$RESULTS_DIR"
|
|
20
|
-
|
|
21
|
-
RED='\033[0;31m'
|
|
22
|
-
GREEN='\033[0;32m'
|
|
23
|
-
YELLOW='\033[1;33m'
|
|
24
|
-
NC='\033[0m'
|
|
25
|
-
|
|
26
|
-
PASS=0
|
|
27
|
-
FAIL=0
|
|
28
|
-
FAILURES=() # Array to store failure details for report
|
|
29
|
-
|
|
30
|
-
pass() { PASS=$((PASS+1)); echo -e " ${GREEN}✓${NC} $1"; }
|
|
31
|
-
fail() {
|
|
32
|
-
FAIL=$((FAIL+1));
|
|
33
|
-
echo -e " ${RED}✗${NC} $1"
|
|
34
|
-
FAILURES+=("$1")
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
check_no_errors() {
|
|
38
|
-
local file="$1"
|
|
39
|
-
local errors=$(node -e "
|
|
40
|
-
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
41
|
-
const errs = [];
|
|
42
|
-
if (d.perplexity?.error) errs.push('perplexity: ' + d.perplexity.error);
|
|
43
|
-
if (d.bing?.error) errs.push('bing: ' + d.bing.error);
|
|
44
|
-
if (d.google?.error) errs.push('google: ' + d.google.error);
|
|
45
|
-
console.log(errs.join('; ') || '');
|
|
46
|
-
" 2>/dev/null)
|
|
47
|
-
echo "$errors"
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
check_correct_queries() {
|
|
51
|
-
local file="$1"
|
|
52
|
-
local expected="$2"
|
|
53
|
-
local result=$(node -e "
|
|
54
|
-
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
55
|
-
const queries = [d.perplexity?.query, d.bing?.query, d.google?.query].filter(Boolean);
|
|
56
|
-
const allMatch = queries.every(q => q === '$expected');
|
|
57
|
-
console.log(allMatch ? 'ok' : 'queries: ' + queries.join(', '));
|
|
58
|
-
" 2>/dev/null)
|
|
59
|
-
echo "$result"
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
check_all_engines_completed() {
|
|
63
|
-
local file="$1"
|
|
64
|
-
local result=$(node -e "
|
|
65
|
-
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
66
|
-
const hasAnswer = (e) => d[e]?.answer && d[e].answer.length > 10;
|
|
67
|
-
const engines = ['perplexity', 'bing', 'google'];
|
|
68
|
-
const ok = engines.every(hasAnswer);
|
|
69
|
-
console.log(ok ? 'ok' : 'missing: ' + engines.filter(e => !hasAnswer(e)).join(', '));
|
|
70
|
-
" 2>/dev/null)
|
|
71
|
-
echo "$result"
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# ─────────────────────────────────────────────────────────
|
|
75
|
-
echo -e "\n${YELLOW}═══ GreedySearch Test Suite ═══${NC}\n"
|
|
76
|
-
|
|
77
|
-
# ── Test 1: Single engine mode ──────────────────────────
|
|
78
|
-
if [[ "$1" != "parallel" ]]; then
|
|
79
|
-
echo "Test 1: Single engine mode"
|
|
80
|
-
|
|
81
|
-
for engine in perplexity bing google gemini; do
|
|
82
|
-
outfile="$RESULTS_DIR/single_${engine}.json"
|
|
83
|
-
node search.mjs "$engine" "explain $engine attention mechanism" --out "$outfile" 2>/dev/null
|
|
84
|
-
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
85
|
-
errors=$(check_no_errors "$outfile")
|
|
86
|
-
if [[ -z "$errors" ]]; then
|
|
87
|
-
pass "$engine completed without errors"
|
|
88
|
-
else
|
|
89
|
-
fail "$engine errors: $errors"
|
|
90
|
-
fi
|
|
91
|
-
else
|
|
92
|
-
fail "$engine failed to run"
|
|
93
|
-
fi
|
|
94
|
-
done
|
|
95
|
-
fi
|
|
96
|
-
|
|
97
|
-
# ── Test 2: Sequential "all" mode ───────────────────────
|
|
98
|
-
if [[ "$1" != "parallel" ]]; then
|
|
99
|
-
echo -e "\nTest 2: Sequential 'all' mode (3 runs)"
|
|
100
|
-
|
|
101
|
-
for i in 1 2 3; do
|
|
102
|
-
outfile="$RESULTS_DIR/seq_${i}.json"
|
|
103
|
-
query="LLM inference optimization techniques $i"
|
|
104
|
-
node search.mjs all "$query" --out "$outfile" 2>/dev/null
|
|
105
|
-
|
|
106
|
-
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
107
|
-
errors=$(check_no_errors "$outfile")
|
|
108
|
-
if [[ -z "$errors" ]]; then
|
|
109
|
-
pass "Run $i: no errors"
|
|
110
|
-
else
|
|
111
|
-
fail "Run $i errors: $errors"
|
|
112
|
-
fi
|
|
113
|
-
|
|
114
|
-
correct=$(check_correct_queries "$outfile" "$query")
|
|
115
|
-
if [[ "$correct" == "ok" ]]; then
|
|
116
|
-
pass "Run $i: correct queries"
|
|
117
|
-
else
|
|
118
|
-
fail "Run $i: $correct"
|
|
119
|
-
fi
|
|
120
|
-
else
|
|
121
|
-
fail "Run $i: failed to run"
|
|
122
|
-
fi
|
|
123
|
-
done
|
|
124
|
-
fi
|
|
125
|
-
|
|
126
|
-
# ── Test 3: Parallel "all" mode (race condition test) ───
|
|
127
|
-
if [[ "$1" != "quick" && "$1" != "sequential" ]]; then
|
|
128
|
-
echo -e "\nTest 3: Parallel 'all' mode (5 concurrent searches)"
|
|
129
|
-
|
|
130
|
-
PARALLEL_QUERIES=(
|
|
131
|
-
"what are transformer architectures in LLMs"
|
|
132
|
-
"explain RLHF fine-tuning process"
|
|
133
|
-
"difference between GPT and BERT models"
|
|
134
|
-
"how does chain of thought prompting work"
|
|
135
|
-
"what is retrieval augmented generation"
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
PIDS=()
|
|
139
|
-
for i in "${!PARALLEL_QUERIES[@]}"; do
|
|
140
|
-
outfile="$RESULTS_DIR/parallel_${i}.json"
|
|
141
|
-
query="${PARALLEL_QUERIES[$i]}"
|
|
142
|
-
node search.mjs all "$query" --out "$outfile" 2>/dev/null &
|
|
143
|
-
PIDS+=($!)
|
|
144
|
-
done
|
|
145
|
-
|
|
146
|
-
# Wait for all to complete
|
|
147
|
-
FAILED=0
|
|
148
|
-
for i in "${!PIDS[@]}"; do
|
|
149
|
-
if ! wait "${PIDS[$i]}"; then
|
|
150
|
-
fail "Parallel $i: process exited with error"
|
|
151
|
-
((FAILED++))
|
|
152
|
-
fi
|
|
153
|
-
done
|
|
154
|
-
|
|
155
|
-
if [[ $FAILED -eq 0 ]]; then
|
|
156
|
-
# Check results
|
|
157
|
-
for i in "${!PARALLEL_QUERIES[@]}"; do
|
|
158
|
-
outfile="$RESULTS_DIR/parallel_${i}.json"
|
|
159
|
-
query="${PARALLEL_QUERIES[$i]}"
|
|
160
|
-
|
|
161
|
-
if [[ -f "$outfile" ]]; then
|
|
162
|
-
errors=$(check_no_errors "$outfile")
|
|
163
|
-
if [[ -z "$errors" ]]; then
|
|
164
|
-
pass "Parallel $i: no errors"
|
|
165
|
-
else
|
|
166
|
-
fail "Parallel $i: $errors"
|
|
167
|
-
fi
|
|
168
|
-
|
|
169
|
-
correct=$(check_correct_queries "$outfile" "$query")
|
|
170
|
-
if [[ "$correct" == "ok" ]]; then
|
|
171
|
-
pass "Parallel $i: correct query"
|
|
172
|
-
else
|
|
173
|
-
fail "Parallel $i: $correct (TAB RACE DETECTED)"
|
|
174
|
-
fi
|
|
175
|
-
|
|
176
|
-
all_done=$(check_all_engines_completed "$outfile")
|
|
177
|
-
if [[ "$all_done" == "ok" ]]; then
|
|
178
|
-
pass "Parallel $i: all engines answered"
|
|
179
|
-
else
|
|
180
|
-
fail "Parallel $i: $all_done"
|
|
181
|
-
fi
|
|
182
|
-
else
|
|
183
|
-
fail "Parallel $i: no result file"
|
|
184
|
-
fi
|
|
185
|
-
done
|
|
186
|
-
fi
|
|
187
|
-
fi
|
|
188
|
-
|
|
189
|
-
# ── Test 4: Synthesis mode ──────────────────────────────
|
|
190
|
-
if [[ "$1" != "parallel" && "$1" != "quick" ]]; then
|
|
191
|
-
echo -e "\nTest 4: Synthesis mode"
|
|
192
|
-
|
|
193
|
-
outfile="$RESULTS_DIR/synthesis.json"
|
|
194
|
-
node search.mjs all "what is Mixture of Experts in neural networks" --synthesize --out "$outfile" 2>/dev/null
|
|
195
|
-
|
|
196
|
-
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
197
|
-
has_synthesis=$(node -e "
|
|
198
|
-
const d = JSON.parse(require('fs').readFileSync('$outfile','utf8'));
|
|
199
|
-
console.log(d._synthesis?.answer ? 'ok' : 'missing');
|
|
200
|
-
" 2>/dev/null)
|
|
201
|
-
|
|
202
|
-
if [[ "$has_synthesis" == "ok" ]]; then
|
|
203
|
-
pass "Synthesis completed"
|
|
204
|
-
else
|
|
205
|
-
fail "Synthesis missing"
|
|
206
|
-
fi
|
|
207
|
-
|
|
208
|
-
errors=$(check_no_errors "$outfile")
|
|
209
|
-
if [[ -z "$errors" ]]; then
|
|
210
|
-
pass "Synthesis: no engine errors"
|
|
211
|
-
else
|
|
212
|
-
fail "Synthesis: $errors"
|
|
213
|
-
fi
|
|
214
|
-
else
|
|
215
|
-
fail "Synthesis failed to run"
|
|
216
|
-
fi
|
|
217
|
-
fi
|
|
218
|
-
|
|
219
|
-
# ─────────────────────────────────────────────────────────
|
|
220
|
-
# Generate test report
|
|
221
|
-
REPORT_FILE="$RESULTS_DIR/REPORT.md"
|
|
222
|
-
|
|
223
|
-
cat > "$REPORT_FILE" << EOF
|
|
224
|
-
# GreedySearch Test Report
|
|
225
|
-
|
|
226
|
-
**Date:** $(date)
|
|
227
|
-
**Test run:** $RESULTS_DIR
|
|
228
|
-
|
|
229
|
-
## Summary
|
|
230
|
-
|
|
231
|
-
| Result | Count |
|
|
232
|
-
|--------|-------|
|
|
233
|
-
| ✅ Passed | $PASS |
|
|
234
|
-
| ❌ Failed | $FAIL |
|
|
235
|
-
| Total | $((PASS + FAIL)) |
|
|
236
|
-
|
|
237
|
-
## Failures
|
|
238
|
-
|
|
239
|
-
EOF
|
|
240
|
-
|
|
241
|
-
if [[ ${#FAILURES[@]} -eq 0 ]]; then
|
|
242
|
-
echo "No failures — all tests passed! 🎉" >> "$REPORT_FILE"
|
|
243
|
-
else
|
|
244
|
-
for i in "${!FAILURES[@]}"; do
|
|
245
|
-
echo "$((i+1)). ${FAILURES[$i]}" >> "$REPORT_FILE"
|
|
246
|
-
done
|
|
247
|
-
|
|
248
|
-
cat >> "$REPORT_FILE" << 'EOF'
|
|
249
|
-
|
|
250
|
-
## Common Issues
|
|
251
|
-
|
|
252
|
-
### Bing Copilot "copy button did not appear"
|
|
253
|
-
This usually means:
|
|
254
|
-
- **Verification challenge appeared** — Cloudflare Turnstile or Microsoft auth
|
|
255
|
-
- **Page didn't load** — network issue or Copilot slow to respond
|
|
256
|
-
- **UI changed** — selector no longer matches Copilot's DOM
|
|
257
|
-
|
|
258
|
-
To debug: check the result JSON file for the full error message.
|
|
259
|
-
|
|
260
|
-
### Google "verification required"
|
|
261
|
-
Google sometimes shows CAPTCHAs that can't be auto-solved.
|
|
262
|
-
Manual intervention required in the Chrome window.
|
|
263
|
-
|
|
264
|
-
### Perplexity "Clipboard interceptor returned empty text"
|
|
265
|
-
Perplexity's UI may have changed. Check if the copy button selector still works.
|
|
266
|
-
|
|
267
|
-
EOF
|
|
268
|
-
fi
|
|
269
|
-
|
|
270
|
-
cat >> "$REPORT_FILE" << EOF
|
|
271
|
-
|
|
272
|
-
## Result Files
|
|
273
|
-
|
|
274
|
-
\`\`\`
|
|
275
|
-
$(ls -la "$RESULTS_DIR"/*.json 2>/dev/null | awk '{print $NF}' | xargs -I{} basename {})
|
|
276
|
-
\`\`\`
|
|
277
|
-
|
|
278
|
-
---
|
|
279
|
-
*Generated by test.sh*
|
|
280
|
-
EOF
|
|
281
|
-
|
|
282
|
-
echo -e "\n${YELLOW}═══ Results ═══${NC}"
|
|
283
|
-
echo -e " ${GREEN}Passed: $PASS${NC}"
|
|
284
|
-
[[ $FAIL -gt 0 ]] && echo -e " ${RED}Failed: $FAIL${NC}" || echo " Failed: 0"
|
|
285
|
-
echo " Results in: $RESULTS_DIR"
|
|
286
|
-
echo " Report: $REPORT_FILE"
|
|
287
|
-
echo ""
|
|
288
|
-
|
|
289
|
-
# Print failure details to console too
|
|
290
|
-
if [[ ${#FAILURES[@]} -gt 0 ]]; then
|
|
291
|
-
echo -e "${RED}Failures:${NC}"
|
|
292
|
-
for f in "${FAILURES[@]}"; do
|
|
293
|
-
echo -e " ${RED}•${NC} $f"
|
|
294
|
-
done
|
|
295
|
-
echo ""
|
|
296
|
-
fi
|
|
297
|
-
|
|
298
|
-
[[ $FAIL -eq 0 ]] && exit 0 || exit 1
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test.sh — GreedySearch test suite
|
|
3
|
+
#
|
|
4
|
+
# Usage:
|
|
5
|
+
# ./test.sh # run all tests
|
|
6
|
+
# ./test.sh parallel # run only parallel test
|
|
7
|
+
# ./test.sh quick # skip slow tests (parallel + stress)
|
|
8
|
+
#
|
|
9
|
+
# Tests verify:
|
|
10
|
+
# - No crashes/errors from extractors
|
|
11
|
+
# - All engines complete in "all" mode
|
|
12
|
+
# - Correct queries in results (not mixed up)
|
|
13
|
+
# - Parallel searches don't race on shared tabs
|
|
14
|
+
|
|
15
|
+
set -e
|
|
16
|
+
|
|
17
|
+
cd "$(dirname "$0")"
|
|
18
|
+
RESULTS_DIR="results/test_$(date +%Y%m%d_%H%M%S)"
|
|
19
|
+
mkdir -p "$RESULTS_DIR"
|
|
20
|
+
|
|
21
|
+
RED='\033[0;31m'
|
|
22
|
+
GREEN='\033[0;32m'
|
|
23
|
+
YELLOW='\033[1;33m'
|
|
24
|
+
NC='\033[0m'
|
|
25
|
+
|
|
26
|
+
PASS=0
|
|
27
|
+
FAIL=0
|
|
28
|
+
FAILURES=() # Array to store failure details for report
|
|
29
|
+
|
|
30
|
+
pass() { PASS=$((PASS+1)); echo -e " ${GREEN}✓${NC} $1"; }
|
|
31
|
+
fail() {
|
|
32
|
+
FAIL=$((FAIL+1));
|
|
33
|
+
echo -e " ${RED}✗${NC} $1"
|
|
34
|
+
FAILURES+=("$1")
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
check_no_errors() {
|
|
38
|
+
local file="$1"
|
|
39
|
+
local errors=$(node -e "
|
|
40
|
+
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
41
|
+
const errs = [];
|
|
42
|
+
if (d.perplexity?.error) errs.push('perplexity: ' + d.perplexity.error);
|
|
43
|
+
if (d.bing?.error) errs.push('bing: ' + d.bing.error);
|
|
44
|
+
if (d.google?.error) errs.push('google: ' + d.google.error);
|
|
45
|
+
console.log(errs.join('; ') || '');
|
|
46
|
+
" 2>/dev/null)
|
|
47
|
+
echo "$errors"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
check_correct_queries() {
|
|
51
|
+
local file="$1"
|
|
52
|
+
local expected="$2"
|
|
53
|
+
local result=$(node -e "
|
|
54
|
+
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
55
|
+
const queries = [d.perplexity?.query, d.bing?.query, d.google?.query].filter(Boolean);
|
|
56
|
+
const allMatch = queries.every(q => q === '$expected');
|
|
57
|
+
console.log(allMatch ? 'ok' : 'queries: ' + queries.join(', '));
|
|
58
|
+
" 2>/dev/null)
|
|
59
|
+
echo "$result"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
check_all_engines_completed() {
|
|
63
|
+
local file="$1"
|
|
64
|
+
local result=$(node -e "
|
|
65
|
+
const d = JSON.parse(require('fs').readFileSync('$file','utf8'));
|
|
66
|
+
const hasAnswer = (e) => d[e]?.answer && d[e].answer.length > 10;
|
|
67
|
+
const engines = ['perplexity', 'bing', 'google'];
|
|
68
|
+
const ok = engines.every(hasAnswer);
|
|
69
|
+
console.log(ok ? 'ok' : 'missing: ' + engines.filter(e => !hasAnswer(e)).join(', '));
|
|
70
|
+
" 2>/dev/null)
|
|
71
|
+
echo "$result"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# ─────────────────────────────────────────────────────────
|
|
75
|
+
echo -e "\n${YELLOW}═══ GreedySearch Test Suite ═══${NC}\n"
|
|
76
|
+
|
|
77
|
+
# ── Test 1: Single engine mode ──────────────────────────
|
|
78
|
+
if [[ "$1" != "parallel" ]]; then
|
|
79
|
+
echo "Test 1: Single engine mode"
|
|
80
|
+
|
|
81
|
+
for engine in perplexity bing google gemini; do
|
|
82
|
+
outfile="$RESULTS_DIR/single_${engine}.json"
|
|
83
|
+
node search.mjs "$engine" "explain $engine attention mechanism" --out "$outfile" 2>/dev/null
|
|
84
|
+
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
85
|
+
errors=$(check_no_errors "$outfile")
|
|
86
|
+
if [[ -z "$errors" ]]; then
|
|
87
|
+
pass "$engine completed without errors"
|
|
88
|
+
else
|
|
89
|
+
fail "$engine errors: $errors"
|
|
90
|
+
fi
|
|
91
|
+
else
|
|
92
|
+
fail "$engine failed to run"
|
|
93
|
+
fi
|
|
94
|
+
done
|
|
95
|
+
fi
|
|
96
|
+
|
|
97
|
+
# ── Test 2: Sequential "all" mode ───────────────────────
|
|
98
|
+
if [[ "$1" != "parallel" ]]; then
|
|
99
|
+
echo -e "\nTest 2: Sequential 'all' mode (3 runs)"
|
|
100
|
+
|
|
101
|
+
for i in 1 2 3; do
|
|
102
|
+
outfile="$RESULTS_DIR/seq_${i}.json"
|
|
103
|
+
query="LLM inference optimization techniques $i"
|
|
104
|
+
node search.mjs all "$query" --out "$outfile" 2>/dev/null
|
|
105
|
+
|
|
106
|
+
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
107
|
+
errors=$(check_no_errors "$outfile")
|
|
108
|
+
if [[ -z "$errors" ]]; then
|
|
109
|
+
pass "Run $i: no errors"
|
|
110
|
+
else
|
|
111
|
+
fail "Run $i errors: $errors"
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
correct=$(check_correct_queries "$outfile" "$query")
|
|
115
|
+
if [[ "$correct" == "ok" ]]; then
|
|
116
|
+
pass "Run $i: correct queries"
|
|
117
|
+
else
|
|
118
|
+
fail "Run $i: $correct"
|
|
119
|
+
fi
|
|
120
|
+
else
|
|
121
|
+
fail "Run $i: failed to run"
|
|
122
|
+
fi
|
|
123
|
+
done
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
# ── Test 3: Parallel "all" mode (race condition test) ───
|
|
127
|
+
if [[ "$1" != "quick" && "$1" != "sequential" ]]; then
|
|
128
|
+
echo -e "\nTest 3: Parallel 'all' mode (5 concurrent searches)"
|
|
129
|
+
|
|
130
|
+
PARALLEL_QUERIES=(
|
|
131
|
+
"what are transformer architectures in LLMs"
|
|
132
|
+
"explain RLHF fine-tuning process"
|
|
133
|
+
"difference between GPT and BERT models"
|
|
134
|
+
"how does chain of thought prompting work"
|
|
135
|
+
"what is retrieval augmented generation"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
PIDS=()
|
|
139
|
+
for i in "${!PARALLEL_QUERIES[@]}"; do
|
|
140
|
+
outfile="$RESULTS_DIR/parallel_${i}.json"
|
|
141
|
+
query="${PARALLEL_QUERIES[$i]}"
|
|
142
|
+
node search.mjs all "$query" --out "$outfile" 2>/dev/null &
|
|
143
|
+
PIDS+=($!)
|
|
144
|
+
done
|
|
145
|
+
|
|
146
|
+
# Wait for all to complete
|
|
147
|
+
FAILED=0
|
|
148
|
+
for i in "${!PIDS[@]}"; do
|
|
149
|
+
if ! wait "${PIDS[$i]}"; then
|
|
150
|
+
fail "Parallel $i: process exited with error"
|
|
151
|
+
((FAILED++))
|
|
152
|
+
fi
|
|
153
|
+
done
|
|
154
|
+
|
|
155
|
+
if [[ $FAILED -eq 0 ]]; then
|
|
156
|
+
# Check results
|
|
157
|
+
for i in "${!PARALLEL_QUERIES[@]}"; do
|
|
158
|
+
outfile="$RESULTS_DIR/parallel_${i}.json"
|
|
159
|
+
query="${PARALLEL_QUERIES[$i]}"
|
|
160
|
+
|
|
161
|
+
if [[ -f "$outfile" ]]; then
|
|
162
|
+
errors=$(check_no_errors "$outfile")
|
|
163
|
+
if [[ -z "$errors" ]]; then
|
|
164
|
+
pass "Parallel $i: no errors"
|
|
165
|
+
else
|
|
166
|
+
fail "Parallel $i: $errors"
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
correct=$(check_correct_queries "$outfile" "$query")
|
|
170
|
+
if [[ "$correct" == "ok" ]]; then
|
|
171
|
+
pass "Parallel $i: correct query"
|
|
172
|
+
else
|
|
173
|
+
fail "Parallel $i: $correct (TAB RACE DETECTED)"
|
|
174
|
+
fi
|
|
175
|
+
|
|
176
|
+
all_done=$(check_all_engines_completed "$outfile")
|
|
177
|
+
if [[ "$all_done" == "ok" ]]; then
|
|
178
|
+
pass "Parallel $i: all engines answered"
|
|
179
|
+
else
|
|
180
|
+
fail "Parallel $i: $all_done"
|
|
181
|
+
fi
|
|
182
|
+
else
|
|
183
|
+
fail "Parallel $i: no result file"
|
|
184
|
+
fi
|
|
185
|
+
done
|
|
186
|
+
fi
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
# ── Test 4: Synthesis mode ──────────────────────────────
|
|
190
|
+
if [[ "$1" != "parallel" && "$1" != "quick" ]]; then
|
|
191
|
+
echo -e "\nTest 4: Synthesis mode"
|
|
192
|
+
|
|
193
|
+
outfile="$RESULTS_DIR/synthesis.json"
|
|
194
|
+
node search.mjs all "what is Mixture of Experts in neural networks" --synthesize --out "$outfile" 2>/dev/null
|
|
195
|
+
|
|
196
|
+
if [[ $? -eq 0 && -f "$outfile" ]]; then
|
|
197
|
+
has_synthesis=$(node -e "
|
|
198
|
+
const d = JSON.parse(require('fs').readFileSync('$outfile','utf8'));
|
|
199
|
+
console.log(d._synthesis?.answer ? 'ok' : 'missing');
|
|
200
|
+
" 2>/dev/null)
|
|
201
|
+
|
|
202
|
+
if [[ "$has_synthesis" == "ok" ]]; then
|
|
203
|
+
pass "Synthesis completed"
|
|
204
|
+
else
|
|
205
|
+
fail "Synthesis missing"
|
|
206
|
+
fi
|
|
207
|
+
|
|
208
|
+
errors=$(check_no_errors "$outfile")
|
|
209
|
+
if [[ -z "$errors" ]]; then
|
|
210
|
+
pass "Synthesis: no engine errors"
|
|
211
|
+
else
|
|
212
|
+
fail "Synthesis: $errors"
|
|
213
|
+
fi
|
|
214
|
+
else
|
|
215
|
+
fail "Synthesis failed to run"
|
|
216
|
+
fi
|
|
217
|
+
fi
|
|
218
|
+
|
|
219
|
+
# ─────────────────────────────────────────────────────────
|
|
220
|
+
# Generate test report
|
|
221
|
+
REPORT_FILE="$RESULTS_DIR/REPORT.md"
|
|
222
|
+
|
|
223
|
+
cat > "$REPORT_FILE" << EOF
|
|
224
|
+
# GreedySearch Test Report
|
|
225
|
+
|
|
226
|
+
**Date:** $(date)
|
|
227
|
+
**Test run:** $RESULTS_DIR
|
|
228
|
+
|
|
229
|
+
## Summary
|
|
230
|
+
|
|
231
|
+
| Result | Count |
|
|
232
|
+
|--------|-------|
|
|
233
|
+
| ✅ Passed | $PASS |
|
|
234
|
+
| ❌ Failed | $FAIL |
|
|
235
|
+
| Total | $((PASS + FAIL)) |
|
|
236
|
+
|
|
237
|
+
## Failures
|
|
238
|
+
|
|
239
|
+
EOF
|
|
240
|
+
|
|
241
|
+
if [[ ${#FAILURES[@]} -eq 0 ]]; then
|
|
242
|
+
echo "No failures — all tests passed! 🎉" >> "$REPORT_FILE"
|
|
243
|
+
else
|
|
244
|
+
for i in "${!FAILURES[@]}"; do
|
|
245
|
+
echo "$((i+1)). ${FAILURES[$i]}" >> "$REPORT_FILE"
|
|
246
|
+
done
|
|
247
|
+
|
|
248
|
+
cat >> "$REPORT_FILE" << 'EOF'
|
|
249
|
+
|
|
250
|
+
## Common Issues
|
|
251
|
+
|
|
252
|
+
### Bing Copilot "copy button did not appear"
|
|
253
|
+
This usually means:
|
|
254
|
+
- **Verification challenge appeared** — Cloudflare Turnstile or Microsoft auth
|
|
255
|
+
- **Page didn't load** — network issue or Copilot slow to respond
|
|
256
|
+
- **UI changed** — selector no longer matches Copilot's DOM
|
|
257
|
+
|
|
258
|
+
To debug: check the result JSON file for the full error message.
|
|
259
|
+
|
|
260
|
+
### Google "verification required"
|
|
261
|
+
Google sometimes shows CAPTCHAs that can't be auto-solved.
|
|
262
|
+
Manual intervention required in the Chrome window.
|
|
263
|
+
|
|
264
|
+
### Perplexity "Clipboard interceptor returned empty text"
|
|
265
|
+
Perplexity's UI may have changed. Check if the copy button selector still works.
|
|
266
|
+
|
|
267
|
+
EOF
|
|
268
|
+
fi
|
|
269
|
+
|
|
270
|
+
cat >> "$REPORT_FILE" << EOF
|
|
271
|
+
|
|
272
|
+
## Result Files
|
|
273
|
+
|
|
274
|
+
\`\`\`
|
|
275
|
+
$(ls -la "$RESULTS_DIR"/*.json 2>/dev/null | awk '{print $NF}' | xargs -I{} basename {})
|
|
276
|
+
\`\`\`
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
*Generated by test.sh*
|
|
280
|
+
EOF
|
|
281
|
+
|
|
282
|
+
echo -e "\n${YELLOW}═══ Results ═══${NC}"
|
|
283
|
+
echo -e " ${GREEN}Passed: $PASS${NC}"
|
|
284
|
+
[[ $FAIL -gt 0 ]] && echo -e " ${RED}Failed: $FAIL${NC}" || echo " Failed: 0"
|
|
285
|
+
echo " Results in: $RESULTS_DIR"
|
|
286
|
+
echo " Report: $REPORT_FILE"
|
|
287
|
+
echo ""
|
|
288
|
+
|
|
289
|
+
# Print failure details to console too
|
|
290
|
+
if [[ ${#FAILURES[@]} -gt 0 ]]; then
|
|
291
|
+
echo -e "${RED}Failures:${NC}"
|
|
292
|
+
for f in "${FAILURES[@]}"; do
|
|
293
|
+
echo -e " ${RED}•${NC} $f"
|
|
294
|
+
done
|
|
295
|
+
echo ""
|
|
296
|
+
fi
|
|
297
|
+
|
|
298
|
+
[[ $FAIL -eq 0 ]] && exit 0 || exit 1
|