@gajae-code/coding-agent 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/dist/types/cli/notify-cli.d.ts +2 -0
- package/dist/types/config/settings-schema.d.ts +39 -2
- package/dist/types/extensibility/shared-events.d.ts +1 -0
- package/dist/types/gjc-runtime/launch-tmux.d.ts +1 -0
- package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
- package/dist/types/gjc-runtime/tmux-common.d.ts +3 -0
- package/dist/types/gjc-runtime/tmux-sessions.d.ts +2 -0
- package/dist/types/lsp/types.d.ts +2 -0
- package/dist/types/notifications/attachment-registry.d.ts +17 -0
- package/dist/types/notifications/chat-adapters.d.ts +9 -0
- package/dist/types/notifications/config.d.ts +9 -1
- package/dist/types/notifications/engine.d.ts +59 -0
- package/dist/types/notifications/managed-daemon.d.ts +48 -0
- package/dist/types/notifications/telegram-daemon.d.ts +19 -0
- package/dist/types/notifications/threaded-inbound.d.ts +19 -0
- package/dist/types/notifications/threaded-render.d.ts +6 -1
- package/dist/types/session/agent-session.d.ts +2 -0
- package/dist/types/tools/fetch.d.ts +23 -0
- package/dist/types/tools/index.d.ts +1 -0
- package/dist/types/tools/telegram-send.d.ts +32 -0
- package/dist/types/web/insane/bridge.d.ts +103 -0
- package/dist/types/web/insane/url-guard.d.ts +22 -0
- package/dist/types/web/search/provider.d.ts +18 -1
- package/dist/types/web/search/providers/insane.d.ts +53 -0
- package/dist/types/web/search/providers/text-citations.d.ts +23 -0
- package/dist/types/web/search/types.d.ts +12 -4
- package/package.json +10 -8
- package/scripts/verify-insane-vendor.ts +132 -0
- package/src/cli/args.ts +1 -1
- package/src/cli/fast-help.ts +1 -1
- package/src/cli/notify-cli.ts +152 -5
- package/src/cli.ts +1 -3
- package/src/commands/team.ts +1 -1
- package/src/config/settings-schema.ts +30 -1
- package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
- package/src/edit/modes/replace.ts +1 -1
- package/src/extensibility/shared-events.ts +1 -0
- package/src/gjc-runtime/launch-tmux.ts +27 -5
- package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
- package/src/gjc-runtime/ralplan-runtime.ts +2 -2
- package/src/gjc-runtime/tmux-common.ts +8 -0
- package/src/gjc-runtime/tmux-sessions.ts +8 -1
- package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
- package/src/gjc-runtime/workflow-manifest.ts +7 -2
- package/src/hashline/hash.ts +1 -1
- package/src/internal-urls/docs-index.generated.ts +9 -8
- package/src/lsp/config.ts +16 -3
- package/src/lsp/defaults.json +7 -0
- package/src/lsp/types.ts +2 -0
- package/src/modes/controllers/event-controller.ts +15 -0
- package/src/modes/interactive-mode.ts +46 -2
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/notifications/attachment-registry.ts +23 -0
- package/src/notifications/chat-adapters.ts +147 -0
- package/src/notifications/config.ts +23 -2
- package/src/notifications/engine.ts +100 -0
- package/src/notifications/index.ts +224 -45
- package/src/notifications/managed-daemon.ts +163 -0
- package/src/notifications/telegram-daemon.ts +235 -14
- package/src/notifications/threaded-inbound.ts +60 -4
- package/src/notifications/threaded-render.ts +20 -2
- package/src/session/agent-session.ts +82 -51
- package/src/tools/ask.ts +3 -2
- package/src/tools/fetch.ts +78 -1
- package/src/tools/index.ts +3 -0
- package/src/tools/telegram-send.ts +137 -0
- package/src/web/insane/bridge.ts +350 -0
- package/src/web/insane/url-guard.ts +155 -0
- package/src/web/search/provider.ts +77 -18
- package/src/web/search/providers/anthropic.ts +70 -3
- package/src/web/search/providers/codex.ts +1 -119
- package/src/web/search/providers/gemini.ts +99 -0
- package/src/web/search/providers/insane.ts +551 -0
- package/src/web/search/providers/openai-compatible.ts +66 -32
- package/src/web/search/providers/text-citations.ts +111 -0
- package/src/web/search/types.ts +13 -2
- package/vendor/insane-search/LICENSE +21 -0
- package/vendor/insane-search/MANIFEST.json +24 -0
- package/vendor/insane-search/engine/__init__.py +23 -0
- package/vendor/insane-search/engine/__main__.py +128 -0
- package/vendor/insane-search/engine/bias_check.py +183 -0
- package/vendor/insane-search/engine/executor.py +254 -0
- package/vendor/insane-search/engine/fetch_chain.py +725 -0
- package/vendor/insane-search/engine/learning.py +175 -0
- package/vendor/insane-search/engine/phase0.py +214 -0
- package/vendor/insane-search/engine/safety.py +91 -0
- package/vendor/insane-search/engine/templates/package.json +11 -0
- package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
- package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
- package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
- package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
- package/vendor/insane-search/engine/tests/test_u1.py +200 -0
- package/vendor/insane-search/engine/tests/test_u4.py +131 -0
- package/vendor/insane-search/engine/tests/test_u5.py +163 -0
- package/vendor/insane-search/engine/tests/test_u7.py +124 -0
- package/vendor/insane-search/engine/transport.py +211 -0
- package/vendor/insane-search/engine/url_transforms.py +98 -0
- package/vendor/insane-search/engine/validators.py +331 -0
- package/vendor/insane-search/engine/waf_detector.py +214 -0
- package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""U4 tests — SessionPool, root warmup, browser→curl cookie bridge.
|
|
3
|
+
|
|
4
|
+
Offline unit tests + a couple of benign online checks (example.com). Run:
|
|
5
|
+
python3 engine/tests/test_u4.py
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
13
|
+
ROOT = os.path.abspath(os.path.join(HERE, "..", ""))
|
|
14
|
+
sys.path.insert(0, os.path.abspath(os.path.join(HERE, "..", "..")))
|
|
15
|
+
|
|
16
|
+
from engine.transport import SessionPool, _host_of, _root_of # noqa: E402
|
|
17
|
+
from engine.executor import _parse_envelope # noqa: E402
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def t_host_and_root_helpers():
|
|
21
|
+
assert _host_of("https://www.x.com/a/b?q=1") == "www.x.com"
|
|
22
|
+
assert _root_of("https://www.x.com/a/b?q=1") == "https://www.x.com/"
|
|
23
|
+
print(" ✓ host/root helpers")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def t_session_reuse_same_key():
|
|
27
|
+
p = SessionPool()
|
|
28
|
+
e1 = p.get("www.x.com", "safari")
|
|
29
|
+
e2 = p.get("www.x.com", "safari")
|
|
30
|
+
e3 = p.get("www.x.com", "chrome")
|
|
31
|
+
if e1 is None:
|
|
32
|
+
print(" ⚠ curl_cffi unavailable — skipped reuse check")
|
|
33
|
+
return
|
|
34
|
+
assert e1 is e2, "same (host,impersonate) must reuse entry"
|
|
35
|
+
assert e1 is not e3, "different impersonate must be separate session"
|
|
36
|
+
assert p.stats()["sessions"] == 2, p.stats()
|
|
37
|
+
print(f" ✓ session reuse (same key→same, diff impersonate→new): {p.stats()}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def t_inject_cookies_then_present():
|
|
41
|
+
p = SessionPool()
|
|
42
|
+
ok = p.inject_cookies("www.x.com", "chrome",
|
|
43
|
+
[{"name": "cf_clearance", "value": "abc", "domain": "www.x.com"}],
|
|
44
|
+
user_agent="UA/1.0")
|
|
45
|
+
ent = p.get("www.x.com", "chrome")
|
|
46
|
+
if ent is None:
|
|
47
|
+
print(" ⚠ curl_cffi unavailable — skipped cookie inject check")
|
|
48
|
+
return
|
|
49
|
+
assert ok, "inject should report success"
|
|
50
|
+
assert ent.injected_ua == "UA/1.0"
|
|
51
|
+
names = {c.name for c in ent.session.cookies.jar}
|
|
52
|
+
assert "cf_clearance" in names, names
|
|
53
|
+
print(f" ✓ injected cookies present on session: {sorted(names)}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def t_parse_envelope_json():
|
|
57
|
+
env = '{"html":"<h1>hi</h1>","finalUrl":"https://x/p","status":200,' \
|
|
58
|
+
'"cookies":[{"name":"a","value":"b"}],"userAgent":"UA"}'
|
|
59
|
+
html, final, status, cookies, ua, automation = _parse_envelope(env, "https://x/q")
|
|
60
|
+
assert html == "<h1>hi</h1>" and final == "https://x/p" and status == 200
|
|
61
|
+
assert cookies and cookies[0]["name"] == "a" and ua == "UA"
|
|
62
|
+
print(" ✓ envelope JSON parsed")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def t_parse_envelope_raw_html_fallback():
|
|
66
|
+
html, final, status, cookies, ua, automation = _parse_envelope("<html>raw</html>", "https://x/q")
|
|
67
|
+
assert html == "<html>raw</html>" and final == "https://x/q" and status == 200
|
|
68
|
+
assert cookies == [] and ua is None
|
|
69
|
+
print(" ✓ raw-HTML fallback (non-JSON stdout)")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def t_warmup_once_guard_online():
|
|
73
|
+
p = SessionPool()
|
|
74
|
+
first = p.warmup("example.com", "safari", "https://example.com/", timeout=15)
|
|
75
|
+
second = p.warmup("example.com", "safari", "https://example.com/", timeout=15)
|
|
76
|
+
ent = p.get("example.com", "safari")
|
|
77
|
+
if ent is None:
|
|
78
|
+
print(" ⚠ curl_cffi unavailable — skipped warmup check")
|
|
79
|
+
return
|
|
80
|
+
# first may be True (network) or False (offline); second must be False (guard).
|
|
81
|
+
assert second is False, "warmup must be idempotent"
|
|
82
|
+
assert ent.warmed is True
|
|
83
|
+
print(f" ✓ warmup once-guard (first={first}, second={second})")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def t_fetch_many_reuses_pool_online():
|
|
87
|
+
from engine import transport
|
|
88
|
+
from engine.fetch_chain import fetch_many
|
|
89
|
+
transport.POOL.reset()
|
|
90
|
+
results = fetch_many(
|
|
91
|
+
["https://example.com/", "https://example.com/index.html"],
|
|
92
|
+
success_selectors=["h1", "p"], timeout=15, max_attempts=2, enable_playwright=False,
|
|
93
|
+
)
|
|
94
|
+
st = transport.POOL.stats()
|
|
95
|
+
assert len(results) == 2
|
|
96
|
+
# Same host → should not spawn a separate session per URL per identity.
|
|
97
|
+
assert st["sessions"] <= 2, st
|
|
98
|
+
oks = sum(1 for r in results if r.ok)
|
|
99
|
+
print(f" ✓ fetch_many reused pool: stats={st}, ok={oks}/2")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
ALL = [
|
|
103
|
+
("host_and_root_helpers", t_host_and_root_helpers),
|
|
104
|
+
("session_reuse_same_key", t_session_reuse_same_key),
|
|
105
|
+
("inject_cookies_then_present", t_inject_cookies_then_present),
|
|
106
|
+
("parse_envelope_json", t_parse_envelope_json),
|
|
107
|
+
("parse_envelope_raw_html_fallback", t_parse_envelope_raw_html_fallback),
|
|
108
|
+
("warmup_once_guard_online", t_warmup_once_guard_online),
|
|
109
|
+
("fetch_many_reuses_pool_online", t_fetch_many_reuses_pool_online),
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def main() -> int:
|
|
114
|
+
p = f = 0
|
|
115
|
+
for name, fn in ALL:
|
|
116
|
+
try:
|
|
117
|
+
print(f"[{name}]")
|
|
118
|
+
fn()
|
|
119
|
+
p += 1
|
|
120
|
+
except AssertionError as e:
|
|
121
|
+
f += 1
|
|
122
|
+
print(f" ✗ FAIL: {e}")
|
|
123
|
+
except Exception as e:
|
|
124
|
+
f += 1
|
|
125
|
+
print(f" ✗ ERROR: {type(e).__name__}: {e}")
|
|
126
|
+
print(f"\n{p} passed, {f} failed")
|
|
127
|
+
return 0 if f == 0 else 1
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
sys.exit(main())
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""U5 self-learning store — unit coverage (no network).
|
|
2
|
+
|
|
3
|
+
Run: python3 -m engine.tests.test_u5
|
|
4
|
+
Covers: round-trip, win counting, failure striking + eviction at 2,
|
|
5
|
+
transient vs real-failure classification, TTL prune, LRU cap, key scoping,
|
|
6
|
+
grid priority reordering, and winning-route extraction from a trace."""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import tempfile
|
|
11
|
+
from datetime import datetime, timezone, timedelta
|
|
12
|
+
|
|
13
|
+
from engine import learning
|
|
14
|
+
from engine.fetch_chain import _build_plan, _winning_route, _load_profiles, FetchResult, Attempt
|
|
15
|
+
from engine.validators import Verdict
|
|
16
|
+
|
|
17
|
+
_passed = 0
|
|
18
|
+
_failed = 0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check(name: str, cond: bool, detail: str = ""):
|
|
22
|
+
global _passed, _failed
|
|
23
|
+
if cond:
|
|
24
|
+
_passed += 1
|
|
25
|
+
print(f"[{name}]\n ✓ {detail or 'ok'}")
|
|
26
|
+
else:
|
|
27
|
+
_failed += 1
|
|
28
|
+
print(f"[{name}]\n ✗ FAIL {detail}")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _tmp() -> str:
|
|
32
|
+
fd, path = tempfile.mkstemp(suffix="_learned.json")
|
|
33
|
+
os.close(fd)
|
|
34
|
+
os.unlink(path) # start empty
|
|
35
|
+
return path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
U = "https://example.com/some/page"
|
|
39
|
+
ROUTE_A = {"transform": "original", "impersonate": "chrome", "referer": "self_root", "phase": "grid"}
|
|
40
|
+
ROUTE_B = {"transform": "mobile_subdomain", "impersonate": "safari_ios", "referer": "none", "phase": "grid"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# 1) round-trip + win counting
|
|
44
|
+
p = _tmp()
|
|
45
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
46
|
+
check("roundtrip_lookup", learning.lookup(U, "desktop", path=p) == ROUTE_A,
|
|
47
|
+
f"learned route returned: {learning.lookup(U, 'desktop', path=p)}")
|
|
48
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
49
|
+
data = learning.load(p)
|
|
50
|
+
check("wins_increment_same_route", data[learning.key_for(U, "desktop")]["wins"] == 2,
|
|
51
|
+
f"wins={data[learning.key_for(U, 'desktop')]['wins']}")
|
|
52
|
+
learning.record_success(U, "desktop", ROUTE_B, path=p)
|
|
53
|
+
data = learning.load(p)
|
|
54
|
+
check("wins_reset_on_new_route", data[learning.key_for(U, "desktop")]["wins"] == 1
|
|
55
|
+
and learning.lookup(U, "desktop", path=p) == ROUTE_B, "new route replaces, wins=1")
|
|
56
|
+
|
|
57
|
+
# 2) transient failure does NOT strike; refreshes last_used
|
|
58
|
+
p = _tmp()
|
|
59
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
60
|
+
learning.record_failure(U, "desktop", penalize=False, path=p)
|
|
61
|
+
data = learning.load(p)
|
|
62
|
+
k = learning.key_for(U, "desktop")
|
|
63
|
+
check("transient_no_strike", k in data and data[k]["consecutive_fails"] == 0,
|
|
64
|
+
"entry kept, consecutive_fails stays 0 on transient")
|
|
65
|
+
|
|
66
|
+
# 3) real failure strikes; evicts after 2
|
|
67
|
+
p = _tmp()
|
|
68
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
69
|
+
learning.record_failure(U, "desktop", penalize=True, path=p)
|
|
70
|
+
data = learning.load(p)
|
|
71
|
+
check("real_failure_strike_1", data[k]["consecutive_fails"] == 1, "1st strike kept, fails=1")
|
|
72
|
+
learning.record_failure(U, "desktop", penalize=True, path=p)
|
|
73
|
+
check("evict_after_2_strikes", learning.lookup(U, "desktop", path=p) is None,
|
|
74
|
+
"evicted after 2nd consecutive real failure")
|
|
75
|
+
|
|
76
|
+
# 3b) success resets the strike counter
|
|
77
|
+
p = _tmp()
|
|
78
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
79
|
+
learning.record_failure(U, "desktop", penalize=True, path=p)
|
|
80
|
+
learning.record_success(U, "desktop", ROUTE_A, path=p)
|
|
81
|
+
data = learning.load(p)
|
|
82
|
+
check("success_resets_strikes", data[k]["consecutive_fails"] == 0, "strike reset to 0 after a win")
|
|
83
|
+
|
|
84
|
+
# 4) is_real_failure classification
|
|
85
|
+
real = all(learning.is_real_failure(r) for r in ("exhausted", "challenge", "blocked"))
|
|
86
|
+
nonreal = not any(learning.is_real_failure(r) for r in
|
|
87
|
+
("rate_limited", "unknown", "budget", "auth_required", "not_found", "success", ""))
|
|
88
|
+
check("classify_real_failures", real and nonreal,
|
|
89
|
+
"exhausted/challenge/blocked strike; 429/unknown/budget/auth/404 do not")
|
|
90
|
+
|
|
91
|
+
# 5) TTL prune on load (monkeypatch a small TTL)
|
|
92
|
+
p = _tmp()
|
|
93
|
+
old_ttl = learning.TTL_DAYS
|
|
94
|
+
learning.TTL_DAYS = 30
|
|
95
|
+
stale_ts = (datetime.now(timezone.utc) - timedelta(days=31)).isoformat()
|
|
96
|
+
fresh_ts = datetime.now(timezone.utc).isoformat()
|
|
97
|
+
learning.save({
|
|
98
|
+
"stale.com::desktop": {"route": ROUTE_A, "wins": 1, "consecutive_fails": 0,
|
|
99
|
+
"last_used": stale_ts, "last_success": stale_ts},
|
|
100
|
+
"fresh.com::desktop": {"route": ROUTE_B, "wins": 1, "consecutive_fails": 0,
|
|
101
|
+
"last_used": fresh_ts, "last_success": fresh_ts},
|
|
102
|
+
}, path=p)
|
|
103
|
+
data = learning.load(p)
|
|
104
|
+
check("ttl_prunes_stale", "stale.com::desktop" not in data and "fresh.com::desktop" in data,
|
|
105
|
+
f"31-day-old dropped, fresh kept (kept={list(data)})")
|
|
106
|
+
learning.TTL_DAYS = old_ttl
|
|
107
|
+
|
|
108
|
+
# 6) LRU cap (monkeypatch small cap)
|
|
109
|
+
p = _tmp()
|
|
110
|
+
old_max = learning.MAX_ENTRIES
|
|
111
|
+
learning.MAX_ENTRIES = 5
|
|
112
|
+
now = datetime.now(timezone.utc)
|
|
113
|
+
big = {}
|
|
114
|
+
for i in range(12):
|
|
115
|
+
ts = (now - timedelta(minutes=i)).isoformat() # i=0 newest
|
|
116
|
+
big[f"h{i}.com::desktop"] = {"route": ROUTE_A, "wins": 1, "consecutive_fails": 0,
|
|
117
|
+
"last_used": ts, "last_success": ts}
|
|
118
|
+
learning.save(big, path=p)
|
|
119
|
+
data = learning.load(p)
|
|
120
|
+
kept_newest = all(f"h{i}.com::desktop" in data for i in range(5))
|
|
121
|
+
check("lru_cap", len(data) == 5 and kept_newest,
|
|
122
|
+
f"capped to 5, kept 5 most-recent (n={len(data)})")
|
|
123
|
+
learning.MAX_ENTRIES = old_max
|
|
124
|
+
|
|
125
|
+
# 7) key scoping: desktop vs mobile distinct; auto == desktop
|
|
126
|
+
check("key_scoping",
|
|
127
|
+
learning.key_for(U, "mobile") != learning.key_for(U, "desktop")
|
|
128
|
+
and learning.key_for(U, "auto") == learning.key_for(U, "desktop"),
|
|
129
|
+
"mobile/desktop separate; auto folds into desktop")
|
|
130
|
+
|
|
131
|
+
# 8) grid priority reordering (no network)
|
|
132
|
+
profiles = _load_profiles()
|
|
133
|
+
hits = [type("H", (), {"profile_id": "unknown_challenge", "confidence": 0.5})()]
|
|
134
|
+
plan = _build_plan(U, hits, profiles, "desktop", "safari", "self_root")
|
|
135
|
+
target = plan[min(3, len(plan) - 1)]
|
|
136
|
+
prio = {"transform": target.transform, "impersonate": target.impersonate, "referer": target.referer}
|
|
137
|
+
plan2 = _build_plan(U, hits, profiles, "desktop", "safari", "self_root", priority=prio)
|
|
138
|
+
check("priority_moves_to_front",
|
|
139
|
+
plan2[0].transform == target.transform and plan2[0].impersonate == target.impersonate
|
|
140
|
+
and plan2[0].referer == target.referer and len(plan2) == len(plan),
|
|
141
|
+
"learned candidate promoted to plan[0], no items lost")
|
|
142
|
+
|
|
143
|
+
# 9) winning-route extraction from trace
|
|
144
|
+
r_ok = FetchResult(ok=True, trace=[
|
|
145
|
+
Attempt(phase="probe", executor="curl_cffi", url=U, url_transform="original",
|
|
146
|
+
impersonate="safari", referer="self_root", verdict=Verdict.CHALLENGE.value),
|
|
147
|
+
Attempt(phase="grid", executor="curl_cffi", url=U, url_transform="mobile_subdomain",
|
|
148
|
+
impersonate="chrome", referer="none", verdict=Verdict.STRONG_OK.value),
|
|
149
|
+
])
|
|
150
|
+
check("winning_route_from_grid",
|
|
151
|
+
_winning_route(r_ok) == {"transform": "mobile_subdomain", "impersonate": "chrome",
|
|
152
|
+
"referer": "none", "phase": "grid"},
|
|
153
|
+
f"extracted: {_winning_route(r_ok)}")
|
|
154
|
+
r_browser = FetchResult(ok=True, trace=[
|
|
155
|
+
Attempt(phase="fallback", executor="playwright_real_chrome", url=U, url_transform="original",
|
|
156
|
+
impersonate=None, referer="", verdict=Verdict.STRONG_OK.value),
|
|
157
|
+
])
|
|
158
|
+
check("winning_route_skips_browser", _winning_route(r_browser) is None,
|
|
159
|
+
"browser-only win is not learnable (None)")
|
|
160
|
+
|
|
161
|
+
print(f"\n{_passed} passed, {_failed} failed")
|
|
162
|
+
import sys
|
|
163
|
+
sys.exit(1 if _failed else 0)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""U7 tests — SSRF / redirect guard. Offline & deterministic.
|
|
3
|
+
|
|
4
|
+
Run: python3 engine/tests/test_u7.py
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
12
|
+
sys.path.insert(0, os.path.abspath(os.path.join(HERE, "..", "..")))
|
|
13
|
+
|
|
14
|
+
from engine.safety import classify_url # noqa: E402
|
|
15
|
+
from engine.transport import SessionPool # noqa: E402
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def t_classify_blocks_internal():
|
|
19
|
+
blocked = [
|
|
20
|
+
"http://127.0.0.1/",
|
|
21
|
+
"http://169.254.169.254/latest/meta-data/", # cloud metadata
|
|
22
|
+
"http://10.0.0.1/",
|
|
23
|
+
"http://192.168.1.1/admin",
|
|
24
|
+
"http://172.16.0.1/",
|
|
25
|
+
"http://[::1]/",
|
|
26
|
+
"http://0.0.0.0/",
|
|
27
|
+
"ftp://example.com/", # scheme
|
|
28
|
+
"file:///etc/passwd", # scheme
|
|
29
|
+
"http://localhost/", # resolves to loopback
|
|
30
|
+
]
|
|
31
|
+
for u in blocked:
|
|
32
|
+
ok, reason = classify_url(u, allow_private=False)
|
|
33
|
+
assert not ok, f"should block {u} (got ok, reason={reason})"
|
|
34
|
+
print(f" ✓ blocks {len(blocked)} internal/metadata/scheme targets")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def t_classify_allows_public():
|
|
38
|
+
for u in ["https://1.1.1.1/", "http://8.8.8.8/"]: # public IP literals (no DNS)
|
|
39
|
+
ok, reason = classify_url(u, allow_private=False)
|
|
40
|
+
assert ok, f"should allow public {u} ({reason})"
|
|
41
|
+
print(" ✓ allows public IP literals")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def t_allow_private_optin():
|
|
45
|
+
ok, _ = classify_url("http://127.0.0.1:8080/", allow_private=True)
|
|
46
|
+
assert ok, "allow_private=True must permit loopback"
|
|
47
|
+
print(" ✓ allow_private=True opt-in permits loopback (local testing)")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def t_request_blocks_localhost_by_default():
|
|
51
|
+
p = SessionPool()
|
|
52
|
+
resp, err = p.request("http://127.0.0.1:9/", impersonate="chrome") # no fetch happens
|
|
53
|
+
assert resp is None and err and err.startswith("ssrf_blocked"), (resp, err)
|
|
54
|
+
print(f" ✓ POOL.request blocks loopback pre-fetch: {err}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class _FakeResp:
|
|
58
|
+
def __init__(self, status, headers=None):
|
|
59
|
+
self.status_code = status
|
|
60
|
+
self.headers = headers or {}
|
|
61
|
+
self.text = "ok"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def t_redirect_to_metadata_blocked():
|
|
65
|
+
def do_get(u):
|
|
66
|
+
if "evil" in u:
|
|
67
|
+
return _FakeResp(302, {"Location": "http://169.254.169.254/latest/meta-data/"})
|
|
68
|
+
return _FakeResp(200)
|
|
69
|
+
resp, err = SessionPool._fetch_following(do_get, "https://evil.test/", False, 5, None)
|
|
70
|
+
assert resp is None and err and err.startswith("ssrf_redirect_blocked"), (resp, err)
|
|
71
|
+
print(f" ✓ redirect into metadata IP blocked: {err}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def t_safe_redirect_followed():
|
|
75
|
+
hops = {"n": 0}
|
|
76
|
+
def do_get(u):
|
|
77
|
+
hops["n"] += 1
|
|
78
|
+
if "start" in u:
|
|
79
|
+
return _FakeResp(302, {"Location": "http://1.1.1.1/landing"}) # public
|
|
80
|
+
return _FakeResp(200)
|
|
81
|
+
resp, err = SessionPool._fetch_following(do_get, "https://start.test/", False, 5, None)
|
|
82
|
+
assert err is None and resp is not None and resp.status_code == 200, (resp, err)
|
|
83
|
+
assert hops["n"] == 2, hops
|
|
84
|
+
print(f" ✓ safe redirect to public IP followed ({hops['n']} hops → 200)")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def t_too_many_redirects():
|
|
88
|
+
def do_get(u):
|
|
89
|
+
return _FakeResp(302, {"Location": "http://1.1.1.1/loop"})
|
|
90
|
+
resp, err = SessionPool._fetch_following(do_get, "http://1.1.1.1/loop", False, 3, None)
|
|
91
|
+
assert resp is None and err == "too_many_redirects", (resp, err)
|
|
92
|
+
print(" ✓ redirect loop capped (too_many_redirects)")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
ALL = [
|
|
96
|
+
("classify_blocks_internal", t_classify_blocks_internal),
|
|
97
|
+
("classify_allows_public", t_classify_allows_public),
|
|
98
|
+
("allow_private_optin", t_allow_private_optin),
|
|
99
|
+
("request_blocks_localhost_by_default", t_request_blocks_localhost_by_default),
|
|
100
|
+
("redirect_to_metadata_blocked", t_redirect_to_metadata_blocked),
|
|
101
|
+
("safe_redirect_followed", t_safe_redirect_followed),
|
|
102
|
+
("too_many_redirects", t_too_many_redirects),
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main() -> int:
|
|
107
|
+
p = f = 0
|
|
108
|
+
for name, fn in ALL:
|
|
109
|
+
try:
|
|
110
|
+
print(f"[{name}]")
|
|
111
|
+
fn()
|
|
112
|
+
p += 1
|
|
113
|
+
except AssertionError as e:
|
|
114
|
+
f += 1
|
|
115
|
+
print(f" ✗ FAIL: {e}")
|
|
116
|
+
except Exception as e:
|
|
117
|
+
f += 1
|
|
118
|
+
print(f" ✗ ERROR: {type(e).__name__}: {e}")
|
|
119
|
+
print(f"\n{p} passed, {f} failed")
|
|
120
|
+
return 0 if f == 0 else 1
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
sys.exit(main())
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Per-host curl_cffi Session pool + root warmup + browser→curl cookie bridge.
|
|
2
|
+
|
|
3
|
+
Why (multi-AI review 2026-06-21):
|
|
4
|
+
* v1 issued a brand-new `curl_cffi.requests.get()` per attempt, so cookies
|
|
5
|
+
set by a WAF (e.g. an Akamai `_abck` sensor or a CF `cf_clearance`) and
|
|
6
|
+
the warm TLS/connection were thrown away between attempts and between
|
|
7
|
+
pages of the same host. That caps both success rate (sensor cookies never
|
|
8
|
+
mature) and throughput (handshake per request).
|
|
9
|
+
* A browser fallback that punches through a JS challenge produces exactly the
|
|
10
|
+
cookies + User-Agent a plain HTTP client needs — but v1 discarded them
|
|
11
|
+
(`_FakeResp` kept only HTML). The bridge here lets one expensive browser
|
|
12
|
+
pass convert into cheap curl_cffi throughput (the FlareSolverr pattern).
|
|
13
|
+
|
|
14
|
+
No-Site-Name Rule: keys are hashed hosts; no site names are stored or branched.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import threading
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Any, Optional
|
|
22
|
+
from urllib.parse import urlsplit
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _host_of(url: str) -> str:
|
|
26
|
+
return (urlsplit(url).hostname or "unknown").lower()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _root_of(url: str) -> str:
|
|
30
|
+
p = urlsplit(url)
|
|
31
|
+
return f"{p.scheme}://{p.netloc}/"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class _Entry:
|
|
36
|
+
session: Any
|
|
37
|
+
warmed: bool = False
|
|
38
|
+
injected_ua: Optional[str] = None
|
|
39
|
+
requests_made: int = 0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SessionPool:
|
|
44
|
+
"""Thread-safe pool of curl_cffi Sessions keyed by (host, impersonate)."""
|
|
45
|
+
_entries: dict = field(default_factory=dict)
|
|
46
|
+
_lock: Any = field(default_factory=threading.Lock)
|
|
47
|
+
|
|
48
|
+
def _key(self, host: str, impersonate: str) -> tuple:
|
|
49
|
+
return (host, impersonate)
|
|
50
|
+
|
|
51
|
+
def get(self, host: str, impersonate: str) -> Optional[_Entry]:
|
|
52
|
+
"""Return (creating if needed) the pool entry, or None if curl_cffi
|
|
53
|
+
is unavailable."""
|
|
54
|
+
key = self._key(host, impersonate)
|
|
55
|
+
with self._lock:
|
|
56
|
+
ent = self._entries.get(key)
|
|
57
|
+
if ent is not None:
|
|
58
|
+
return ent
|
|
59
|
+
try:
|
|
60
|
+
from curl_cffi import requests as cffi_requests
|
|
61
|
+
except ImportError:
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
sess = cffi_requests.Session(impersonate=impersonate)
|
|
65
|
+
except Exception:
|
|
66
|
+
# Some impersonate names need a newer curl_cffi; let caller
|
|
67
|
+
# fall back to a one-shot get by returning None.
|
|
68
|
+
return None
|
|
69
|
+
ent = _Entry(session=sess)
|
|
70
|
+
self._entries[key] = ent
|
|
71
|
+
return ent
|
|
72
|
+
|
|
73
|
+
def warmup(self, host: str, impersonate: str, root_url: str, timeout: int = 15) -> bool:
|
|
74
|
+
"""Hit the site root once per (host, impersonate) so a WAF sensor can
|
|
75
|
+
set a resolved session cookie before the real (deep) request. Idempotent."""
|
|
76
|
+
ent = self.get(host, impersonate)
|
|
77
|
+
if ent is None or ent.warmed:
|
|
78
|
+
return False
|
|
79
|
+
from . import safety
|
|
80
|
+
ok, _reason = safety.classify_url(root_url, safety.allow_private_default())
|
|
81
|
+
if not ok:
|
|
82
|
+
ent.warmed = True # don't retry a blocked root
|
|
83
|
+
return False
|
|
84
|
+
ent.warmed = True # mark first to avoid duplicate warmups under race
|
|
85
|
+
def _do_get(u):
|
|
86
|
+
return ent.session.get(u, timeout=timeout, allow_redirects=False)
|
|
87
|
+
resp, err = self._fetch_following(_do_get, root_url, allow_private, DEFAULT_MAX_REDIRECTS, ent)
|
|
88
|
+
return resp is not None and err is None
|
|
89
|
+
|
|
90
|
+
def inject_cookies(self, host: str, impersonate: str,
|
|
91
|
+
cookies: list[dict], user_agent: Optional[str] = None) -> bool:
|
|
92
|
+
"""Seed a session with cookies harvested by a real browser. Subsequent
|
|
93
|
+
requests on this (host, impersonate) reuse the browser-cleared state."""
|
|
94
|
+
ent = self.get(host, impersonate)
|
|
95
|
+
if ent is None:
|
|
96
|
+
return False
|
|
97
|
+
ok = False
|
|
98
|
+
for c in cookies or []:
|
|
99
|
+
name = c.get("name")
|
|
100
|
+
value = c.get("value")
|
|
101
|
+
if not name:
|
|
102
|
+
continue
|
|
103
|
+
try:
|
|
104
|
+
ent.session.cookies.set(name, value, domain=c.get("domain") or host)
|
|
105
|
+
ok = True
|
|
106
|
+
except Exception:
|
|
107
|
+
try:
|
|
108
|
+
ent.session.cookies.set(name, value)
|
|
109
|
+
ok = True
|
|
110
|
+
except Exception:
|
|
111
|
+
continue
|
|
112
|
+
if user_agent:
|
|
113
|
+
ent.injected_ua = user_agent
|
|
114
|
+
return ok
|
|
115
|
+
|
|
116
|
+
def request(self, url: str, *, impersonate: str, referer: str = "",
|
|
117
|
+
timeout: int = 25, extra_headers: Optional[dict] = None,
|
|
118
|
+
allow_private: Optional[bool] = None,
|
|
119
|
+
max_redirects: Optional[int] = None) -> tuple[Any, Optional[str]]:
|
|
120
|
+
"""GET via the pooled session (cookie + connection reuse), with an SSRF
|
|
121
|
+
guard: the initial URL and EVERY redirect hop are validated against the
|
|
122
|
+
private/loopback/link-local/metadata block-list before being fetched.
|
|
123
|
+
Falls back to a one-shot get if no session could be created."""
|
|
124
|
+
from . import safety
|
|
125
|
+
if allow_private is None:
|
|
126
|
+
allow_private = safety.allow_private_default()
|
|
127
|
+
if max_redirects is None:
|
|
128
|
+
max_redirects = safety.DEFAULT_MAX_REDIRECTS
|
|
129
|
+
|
|
130
|
+
ok, reason = safety.classify_url(url, allow_private)
|
|
131
|
+
if not ok:
|
|
132
|
+
return None, f"ssrf_blocked:{reason}"
|
|
133
|
+
|
|
134
|
+
host = _host_of(url)
|
|
135
|
+
headers = {
|
|
136
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
137
|
+
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
|
138
|
+
}
|
|
139
|
+
if referer:
|
|
140
|
+
headers["Referer"] = referer
|
|
141
|
+
if extra_headers:
|
|
142
|
+
headers.update(extra_headers)
|
|
143
|
+
|
|
144
|
+
ent = self.get(host, impersonate)
|
|
145
|
+
if ent is None:
|
|
146
|
+
try:
|
|
147
|
+
from curl_cffi import requests as cffi_requests
|
|
148
|
+
except ImportError:
|
|
149
|
+
return None, "curl_cffi not installed"
|
|
150
|
+
def _do_get(u):
|
|
151
|
+
return cffi_requests.get(u, impersonate=impersonate, headers=headers,
|
|
152
|
+
timeout=timeout, allow_redirects=False)
|
|
153
|
+
return self._fetch_following(_do_get, url, allow_private, max_redirects, None)
|
|
154
|
+
|
|
155
|
+
if ent.injected_ua:
|
|
156
|
+
headers.setdefault("User-Agent", ent.injected_ua)
|
|
157
|
+
def _do_get(u):
|
|
158
|
+
return ent.session.get(u, headers=headers, timeout=timeout, allow_redirects=False)
|
|
159
|
+
return self._fetch_following(_do_get, url, allow_private, max_redirects, ent)
|
|
160
|
+
|
|
161
|
+
@staticmethod
|
|
162
|
+
def _fetch_following(do_get, url: str, allow_private: bool, max_redirects: int,
|
|
163
|
+
ent) -> tuple[Any, Optional[str]]:
|
|
164
|
+
"""Manually follow redirects so each hop is SSRF-validated (curl_cffi's
|
|
165
|
+
own allow_redirects=True would skip the per-hop check)."""
|
|
166
|
+
from . import safety
|
|
167
|
+
cur = url
|
|
168
|
+
for _ in range(max_redirects + 1):
|
|
169
|
+
try:
|
|
170
|
+
resp = do_get(cur)
|
|
171
|
+
except Exception as e:
|
|
172
|
+
return None, f"{type(e).__name__}:{str(e)[:200]}"
|
|
173
|
+
if ent is not None:
|
|
174
|
+
ent.requests_made += 1
|
|
175
|
+
if safety.is_redirect(resp):
|
|
176
|
+
loc = safety.location_of(resp)
|
|
177
|
+
if not loc:
|
|
178
|
+
return resp, None # redirect w/o Location → return as-is
|
|
179
|
+
nxt = safety.resolve_redirect(cur, loc)
|
|
180
|
+
ok, reason = safety.classify_url(nxt, allow_private)
|
|
181
|
+
if not ok:
|
|
182
|
+
return None, f"ssrf_redirect_blocked:{reason}"
|
|
183
|
+
cur = nxt
|
|
184
|
+
continue
|
|
185
|
+
return resp, None
|
|
186
|
+
return None, "too_many_redirects"
|
|
187
|
+
|
|
188
|
+
def stats(self) -> dict:
|
|
189
|
+
with self._lock:
|
|
190
|
+
return {
|
|
191
|
+
"sessions": len(self._entries),
|
|
192
|
+
"warmed": sum(1 for e in self._entries.values() if e.warmed),
|
|
193
|
+
"requests": sum(e.requests_made for e in self._entries.values()),
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
def reset(self) -> None:
|
|
197
|
+
with self._lock:
|
|
198
|
+
for e in self._entries.values():
|
|
199
|
+
try:
|
|
200
|
+
e.session.close()
|
|
201
|
+
except Exception:
|
|
202
|
+
pass
|
|
203
|
+
self._entries.clear()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# Process-wide pool. Disable via INSANE_NO_SESSION_POOL=1 (one-shot mode).
|
|
207
|
+
POOL = SessionPool()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def pool_enabled() -> bool:
|
|
211
|
+
return os.environ.get("INSANE_NO_SESSION_POOL", "") not in ("1", "true", "yes")
|