@gajae-code/coding-agent 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/dist/types/cli/notify-cli.d.ts +2 -0
  3. package/dist/types/config/settings-schema.d.ts +39 -2
  4. package/dist/types/extensibility/shared-events.d.ts +1 -0
  5. package/dist/types/gjc-runtime/launch-tmux.d.ts +1 -0
  6. package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
  7. package/dist/types/gjc-runtime/tmux-common.d.ts +3 -0
  8. package/dist/types/gjc-runtime/tmux-sessions.d.ts +2 -0
  9. package/dist/types/lsp/types.d.ts +2 -0
  10. package/dist/types/notifications/attachment-registry.d.ts +17 -0
  11. package/dist/types/notifications/chat-adapters.d.ts +9 -0
  12. package/dist/types/notifications/config.d.ts +9 -1
  13. package/dist/types/notifications/engine.d.ts +59 -0
  14. package/dist/types/notifications/managed-daemon.d.ts +48 -0
  15. package/dist/types/notifications/telegram-daemon.d.ts +19 -0
  16. package/dist/types/notifications/threaded-inbound.d.ts +19 -0
  17. package/dist/types/notifications/threaded-render.d.ts +6 -1
  18. package/dist/types/session/agent-session.d.ts +2 -0
  19. package/dist/types/tools/fetch.d.ts +23 -0
  20. package/dist/types/tools/index.d.ts +1 -0
  21. package/dist/types/tools/telegram-send.d.ts +32 -0
  22. package/dist/types/web/insane/bridge.d.ts +103 -0
  23. package/dist/types/web/insane/url-guard.d.ts +22 -0
  24. package/dist/types/web/search/provider.d.ts +18 -1
  25. package/dist/types/web/search/providers/insane.d.ts +53 -0
  26. package/dist/types/web/search/providers/text-citations.d.ts +23 -0
  27. package/dist/types/web/search/types.d.ts +12 -4
  28. package/package.json +10 -8
  29. package/scripts/verify-insane-vendor.ts +132 -0
  30. package/src/cli/args.ts +1 -1
  31. package/src/cli/fast-help.ts +1 -1
  32. package/src/cli/notify-cli.ts +152 -5
  33. package/src/cli.ts +1 -3
  34. package/src/commands/team.ts +1 -1
  35. package/src/config/settings-schema.ts +30 -1
  36. package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
  37. package/src/edit/modes/replace.ts +1 -1
  38. package/src/extensibility/shared-events.ts +1 -0
  39. package/src/gjc-runtime/launch-tmux.ts +27 -5
  40. package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
  41. package/src/gjc-runtime/ralplan-runtime.ts +2 -2
  42. package/src/gjc-runtime/tmux-common.ts +8 -0
  43. package/src/gjc-runtime/tmux-sessions.ts +8 -1
  44. package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
  45. package/src/gjc-runtime/workflow-manifest.ts +7 -2
  46. package/src/hashline/hash.ts +1 -1
  47. package/src/internal-urls/docs-index.generated.ts +9 -8
  48. package/src/lsp/config.ts +16 -3
  49. package/src/lsp/defaults.json +7 -0
  50. package/src/lsp/types.ts +2 -0
  51. package/src/modes/controllers/event-controller.ts +15 -0
  52. package/src/modes/interactive-mode.ts +46 -2
  53. package/src/modes/utils/context-usage.ts +2 -2
  54. package/src/notifications/attachment-registry.ts +23 -0
  55. package/src/notifications/chat-adapters.ts +147 -0
  56. package/src/notifications/config.ts +23 -2
  57. package/src/notifications/engine.ts +100 -0
  58. package/src/notifications/index.ts +224 -45
  59. package/src/notifications/managed-daemon.ts +163 -0
  60. package/src/notifications/telegram-daemon.ts +235 -14
  61. package/src/notifications/threaded-inbound.ts +60 -4
  62. package/src/notifications/threaded-render.ts +20 -2
  63. package/src/session/agent-session.ts +82 -51
  64. package/src/tools/ask.ts +3 -2
  65. package/src/tools/fetch.ts +78 -1
  66. package/src/tools/index.ts +3 -0
  67. package/src/tools/telegram-send.ts +137 -0
  68. package/src/web/insane/bridge.ts +350 -0
  69. package/src/web/insane/url-guard.ts +155 -0
  70. package/src/web/search/provider.ts +77 -18
  71. package/src/web/search/providers/anthropic.ts +70 -3
  72. package/src/web/search/providers/codex.ts +1 -119
  73. package/src/web/search/providers/gemini.ts +99 -0
  74. package/src/web/search/providers/insane.ts +551 -0
  75. package/src/web/search/providers/openai-compatible.ts +66 -32
  76. package/src/web/search/providers/text-citations.ts +111 -0
  77. package/src/web/search/types.ts +13 -2
  78. package/vendor/insane-search/LICENSE +21 -0
  79. package/vendor/insane-search/MANIFEST.json +24 -0
  80. package/vendor/insane-search/engine/__init__.py +23 -0
  81. package/vendor/insane-search/engine/__main__.py +128 -0
  82. package/vendor/insane-search/engine/bias_check.py +183 -0
  83. package/vendor/insane-search/engine/executor.py +254 -0
  84. package/vendor/insane-search/engine/fetch_chain.py +725 -0
  85. package/vendor/insane-search/engine/learning.py +175 -0
  86. package/vendor/insane-search/engine/phase0.py +214 -0
  87. package/vendor/insane-search/engine/safety.py +91 -0
  88. package/vendor/insane-search/engine/templates/package.json +11 -0
  89. package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
  90. package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
  91. package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
  92. package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
  93. package/vendor/insane-search/engine/tests/test_u1.py +200 -0
  94. package/vendor/insane-search/engine/tests/test_u4.py +131 -0
  95. package/vendor/insane-search/engine/tests/test_u5.py +163 -0
  96. package/vendor/insane-search/engine/tests/test_u7.py +124 -0
  97. package/vendor/insane-search/engine/transport.py +211 -0
  98. package/vendor/insane-search/engine/url_transforms.py +98 -0
  99. package/vendor/insane-search/engine/validators.py +331 -0
  100. package/vendor/insane-search/engine/waf_detector.py +214 -0
  101. package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ """U4 tests — SessionPool, root warmup, browser→curl cookie bridge.
3
+
4
+ Offline unit tests + a couple of benign online checks (example.com). Run:
5
+ python3 engine/tests/test_u4.py
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import sys
11
+
12
+ HERE = os.path.dirname(os.path.abspath(__file__))
13
+ ROOT = os.path.abspath(os.path.join(HERE, "..", ""))
14
+ sys.path.insert(0, os.path.abspath(os.path.join(HERE, "..", "..")))
15
+
16
+ from engine.transport import SessionPool, _host_of, _root_of # noqa: E402
17
+ from engine.executor import _parse_envelope # noqa: E402
18
+
19
+
20
+ def t_host_and_root_helpers():
21
+ assert _host_of("https://www.x.com/a/b?q=1") == "www.x.com"
22
+ assert _root_of("https://www.x.com/a/b?q=1") == "https://www.x.com/"
23
+ print(" ✓ host/root helpers")
24
+
25
+
26
+ def t_session_reuse_same_key():
27
+ p = SessionPool()
28
+ e1 = p.get("www.x.com", "safari")
29
+ e2 = p.get("www.x.com", "safari")
30
+ e3 = p.get("www.x.com", "chrome")
31
+ if e1 is None:
32
+ print(" ⚠ curl_cffi unavailable — skipped reuse check")
33
+ return
34
+ assert e1 is e2, "same (host,impersonate) must reuse entry"
35
+ assert e1 is not e3, "different impersonate must be separate session"
36
+ assert p.stats()["sessions"] == 2, p.stats()
37
+ print(f" ✓ session reuse (same key→same, diff impersonate→new): {p.stats()}")
38
+
39
+
40
+ def t_inject_cookies_then_present():
41
+ p = SessionPool()
42
+ ok = p.inject_cookies("www.x.com", "chrome",
43
+ [{"name": "cf_clearance", "value": "abc", "domain": "www.x.com"}],
44
+ user_agent="UA/1.0")
45
+ ent = p.get("www.x.com", "chrome")
46
+ if ent is None:
47
+ print(" ⚠ curl_cffi unavailable — skipped cookie inject check")
48
+ return
49
+ assert ok, "inject should report success"
50
+ assert ent.injected_ua == "UA/1.0"
51
+ names = {c.name for c in ent.session.cookies.jar}
52
+ assert "cf_clearance" in names, names
53
+ print(f" ✓ injected cookies present on session: {sorted(names)}")
54
+
55
+
56
+ def t_parse_envelope_json():
57
+ env = '{"html":"<h1>hi</h1>","finalUrl":"https://x/p","status":200,' \
58
+ '"cookies":[{"name":"a","value":"b"}],"userAgent":"UA"}'
59
+ html, final, status, cookies, ua, automation = _parse_envelope(env, "https://x/q")
60
+ assert html == "<h1>hi</h1>" and final == "https://x/p" and status == 200
61
+ assert cookies and cookies[0]["name"] == "a" and ua == "UA"
62
+ print(" ✓ envelope JSON parsed")
63
+
64
+
65
+ def t_parse_envelope_raw_html_fallback():
66
+ html, final, status, cookies, ua, automation = _parse_envelope("<html>raw</html>", "https://x/q")
67
+ assert html == "<html>raw</html>" and final == "https://x/q" and status == 200
68
+ assert cookies == [] and ua is None
69
+ print(" ✓ raw-HTML fallback (non-JSON stdout)")
70
+
71
+
72
+ def t_warmup_once_guard_online():
73
+ p = SessionPool()
74
+ first = p.warmup("example.com", "safari", "https://example.com/", timeout=15)
75
+ second = p.warmup("example.com", "safari", "https://example.com/", timeout=15)
76
+ ent = p.get("example.com", "safari")
77
+ if ent is None:
78
+ print(" ⚠ curl_cffi unavailable — skipped warmup check")
79
+ return
80
+ # first may be True (network) or False (offline); second must be False (guard).
81
+ assert second is False, "warmup must be idempotent"
82
+ assert ent.warmed is True
83
+ print(f" ✓ warmup once-guard (first={first}, second={second})")
84
+
85
+
86
+ def t_fetch_many_reuses_pool_online():
87
+ from engine import transport
88
+ from engine.fetch_chain import fetch_many
89
+ transport.POOL.reset()
90
+ results = fetch_many(
91
+ ["https://example.com/", "https://example.com/index.html"],
92
+ success_selectors=["h1", "p"], timeout=15, max_attempts=2, enable_playwright=False,
93
+ )
94
+ st = transport.POOL.stats()
95
+ assert len(results) == 2
96
+ # Same host → should not spawn a separate session per URL per identity.
97
+ assert st["sessions"] <= 2, st
98
+ oks = sum(1 for r in results if r.ok)
99
+ print(f" ✓ fetch_many reused pool: stats={st}, ok={oks}/2")
100
+
101
+
102
+ ALL = [
103
+ ("host_and_root_helpers", t_host_and_root_helpers),
104
+ ("session_reuse_same_key", t_session_reuse_same_key),
105
+ ("inject_cookies_then_present", t_inject_cookies_then_present),
106
+ ("parse_envelope_json", t_parse_envelope_json),
107
+ ("parse_envelope_raw_html_fallback", t_parse_envelope_raw_html_fallback),
108
+ ("warmup_once_guard_online", t_warmup_once_guard_online),
109
+ ("fetch_many_reuses_pool_online", t_fetch_many_reuses_pool_online),
110
+ ]
111
+
112
+
113
+ def main() -> int:
114
+ p = f = 0
115
+ for name, fn in ALL:
116
+ try:
117
+ print(f"[{name}]")
118
+ fn()
119
+ p += 1
120
+ except AssertionError as e:
121
+ f += 1
122
+ print(f" ✗ FAIL: {e}")
123
+ except Exception as e:
124
+ f += 1
125
+ print(f" ✗ ERROR: {type(e).__name__}: {e}")
126
+ print(f"\n{p} passed, {f} failed")
127
+ return 0 if f == 0 else 1
128
+
129
+
130
+ if __name__ == "__main__":
131
+ sys.exit(main())
@@ -0,0 +1,163 @@
1
+ """U5 self-learning store — unit coverage (no network).
2
+
3
+ Run: python3 -m engine.tests.test_u5
4
+ Covers: round-trip, win counting, failure striking + eviction at 2,
5
+ transient vs real-failure classification, TTL prune, LRU cap, key scoping,
6
+ grid priority reordering, and winning-route extraction from a trace."""
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import tempfile
11
+ from datetime import datetime, timezone, timedelta
12
+
13
+ from engine import learning
14
+ from engine.fetch_chain import _build_plan, _winning_route, _load_profiles, FetchResult, Attempt
15
+ from engine.validators import Verdict
16
+
17
+ _passed = 0
18
+ _failed = 0
19
+
20
+
21
+ def check(name: str, cond: bool, detail: str = ""):
22
+ global _passed, _failed
23
+ if cond:
24
+ _passed += 1
25
+ print(f"[{name}]\n ✓ {detail or 'ok'}")
26
+ else:
27
+ _failed += 1
28
+ print(f"[{name}]\n ✗ FAIL {detail}")
29
+
30
+
31
+ def _tmp() -> str:
32
+ fd, path = tempfile.mkstemp(suffix="_learned.json")
33
+ os.close(fd)
34
+ os.unlink(path) # start empty
35
+ return path
36
+
37
+
38
+ U = "https://example.com/some/page"
39
+ ROUTE_A = {"transform": "original", "impersonate": "chrome", "referer": "self_root", "phase": "grid"}
40
+ ROUTE_B = {"transform": "mobile_subdomain", "impersonate": "safari_ios", "referer": "none", "phase": "grid"}
41
+
42
+
43
+ # 1) round-trip + win counting
44
+ p = _tmp()
45
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
46
+ check("roundtrip_lookup", learning.lookup(U, "desktop", path=p) == ROUTE_A,
47
+ f"learned route returned: {learning.lookup(U, 'desktop', path=p)}")
48
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
49
+ data = learning.load(p)
50
+ check("wins_increment_same_route", data[learning.key_for(U, "desktop")]["wins"] == 2,
51
+ f"wins={data[learning.key_for(U, 'desktop')]['wins']}")
52
+ learning.record_success(U, "desktop", ROUTE_B, path=p)
53
+ data = learning.load(p)
54
+ check("wins_reset_on_new_route", data[learning.key_for(U, "desktop")]["wins"] == 1
55
+ and learning.lookup(U, "desktop", path=p) == ROUTE_B, "new route replaces, wins=1")
56
+
57
+ # 2) transient failure does NOT strike; refreshes last_used
58
+ p = _tmp()
59
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
60
+ learning.record_failure(U, "desktop", penalize=False, path=p)
61
+ data = learning.load(p)
62
+ k = learning.key_for(U, "desktop")
63
+ check("transient_no_strike", k in data and data[k]["consecutive_fails"] == 0,
64
+ "entry kept, consecutive_fails stays 0 on transient")
65
+
66
+ # 3) real failure strikes; evicts after 2
67
+ p = _tmp()
68
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
69
+ learning.record_failure(U, "desktop", penalize=True, path=p)
70
+ data = learning.load(p)
71
+ check("real_failure_strike_1", data[k]["consecutive_fails"] == 1, "1st strike kept, fails=1")
72
+ learning.record_failure(U, "desktop", penalize=True, path=p)
73
+ check("evict_after_2_strikes", learning.lookup(U, "desktop", path=p) is None,
74
+ "evicted after 2nd consecutive real failure")
75
+
76
+ # 3b) success resets the strike counter
77
+ p = _tmp()
78
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
79
+ learning.record_failure(U, "desktop", penalize=True, path=p)
80
+ learning.record_success(U, "desktop", ROUTE_A, path=p)
81
+ data = learning.load(p)
82
+ check("success_resets_strikes", data[k]["consecutive_fails"] == 0, "strike reset to 0 after a win")
83
+
84
+ # 4) is_real_failure classification
85
+ real = all(learning.is_real_failure(r) for r in ("exhausted", "challenge", "blocked"))
86
+ nonreal = not any(learning.is_real_failure(r) for r in
87
+ ("rate_limited", "unknown", "budget", "auth_required", "not_found", "success", ""))
88
+ check("classify_real_failures", real and nonreal,
89
+ "exhausted/challenge/blocked strike; 429/unknown/budget/auth/404 do not")
90
+
91
+ # 5) TTL prune on load (monkeypatch a small TTL)
92
+ p = _tmp()
93
+ old_ttl = learning.TTL_DAYS
94
+ learning.TTL_DAYS = 30
95
+ stale_ts = (datetime.now(timezone.utc) - timedelta(days=31)).isoformat()
96
+ fresh_ts = datetime.now(timezone.utc).isoformat()
97
+ learning.save({
98
+ "stale.com::desktop": {"route": ROUTE_A, "wins": 1, "consecutive_fails": 0,
99
+ "last_used": stale_ts, "last_success": stale_ts},
100
+ "fresh.com::desktop": {"route": ROUTE_B, "wins": 1, "consecutive_fails": 0,
101
+ "last_used": fresh_ts, "last_success": fresh_ts},
102
+ }, path=p)
103
+ data = learning.load(p)
104
+ check("ttl_prunes_stale", "stale.com::desktop" not in data and "fresh.com::desktop" in data,
105
+ f"31-day-old dropped, fresh kept (kept={list(data)})")
106
+ learning.TTL_DAYS = old_ttl
107
+
108
+ # 6) LRU cap (monkeypatch small cap)
109
+ p = _tmp()
110
+ old_max = learning.MAX_ENTRIES
111
+ learning.MAX_ENTRIES = 5
112
+ now = datetime.now(timezone.utc)
113
+ big = {}
114
+ for i in range(12):
115
+ ts = (now - timedelta(minutes=i)).isoformat() # i=0 newest
116
+ big[f"h{i}.com::desktop"] = {"route": ROUTE_A, "wins": 1, "consecutive_fails": 0,
117
+ "last_used": ts, "last_success": ts}
118
+ learning.save(big, path=p)
119
+ data = learning.load(p)
120
+ kept_newest = all(f"h{i}.com::desktop" in data for i in range(5))
121
+ check("lru_cap", len(data) == 5 and kept_newest,
122
+ f"capped to 5, kept 5 most-recent (n={len(data)})")
123
+ learning.MAX_ENTRIES = old_max
124
+
125
+ # 7) key scoping: desktop vs mobile distinct; auto == desktop
126
+ check("key_scoping",
127
+ learning.key_for(U, "mobile") != learning.key_for(U, "desktop")
128
+ and learning.key_for(U, "auto") == learning.key_for(U, "desktop"),
129
+ "mobile/desktop separate; auto folds into desktop")
130
+
131
+ # 8) grid priority reordering (no network)
132
+ profiles = _load_profiles()
133
+ hits = [type("H", (), {"profile_id": "unknown_challenge", "confidence": 0.5})()]
134
+ plan = _build_plan(U, hits, profiles, "desktop", "safari", "self_root")
135
+ target = plan[min(3, len(plan) - 1)]
136
+ prio = {"transform": target.transform, "impersonate": target.impersonate, "referer": target.referer}
137
+ plan2 = _build_plan(U, hits, profiles, "desktop", "safari", "self_root", priority=prio)
138
+ check("priority_moves_to_front",
139
+ plan2[0].transform == target.transform and plan2[0].impersonate == target.impersonate
140
+ and plan2[0].referer == target.referer and len(plan2) == len(plan),
141
+ "learned candidate promoted to plan[0], no items lost")
142
+
143
+ # 9) winning-route extraction from trace
144
+ r_ok = FetchResult(ok=True, trace=[
145
+ Attempt(phase="probe", executor="curl_cffi", url=U, url_transform="original",
146
+ impersonate="safari", referer="self_root", verdict=Verdict.CHALLENGE.value),
147
+ Attempt(phase="grid", executor="curl_cffi", url=U, url_transform="mobile_subdomain",
148
+ impersonate="chrome", referer="none", verdict=Verdict.STRONG_OK.value),
149
+ ])
150
+ check("winning_route_from_grid",
151
+ _winning_route(r_ok) == {"transform": "mobile_subdomain", "impersonate": "chrome",
152
+ "referer": "none", "phase": "grid"},
153
+ f"extracted: {_winning_route(r_ok)}")
154
+ r_browser = FetchResult(ok=True, trace=[
155
+ Attempt(phase="fallback", executor="playwright_real_chrome", url=U, url_transform="original",
156
+ impersonate=None, referer="", verdict=Verdict.STRONG_OK.value),
157
+ ])
158
+ check("winning_route_skips_browser", _winning_route(r_browser) is None,
159
+ "browser-only win is not learnable (None)")
160
+
161
+ print(f"\n{_passed} passed, {_failed} failed")
162
+ import sys
163
+ sys.exit(1 if _failed else 0)
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/env python3
2
+ """U7 tests — SSRF / redirect guard. Offline & deterministic.
3
+
4
+ Run: python3 engine/tests/test_u7.py
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import sys
10
+
11
+ HERE = os.path.dirname(os.path.abspath(__file__))
12
+ sys.path.insert(0, os.path.abspath(os.path.join(HERE, "..", "..")))
13
+
14
+ from engine.safety import classify_url # noqa: E402
15
+ from engine.transport import SessionPool # noqa: E402
16
+
17
+
18
+ def t_classify_blocks_internal():
19
+ blocked = [
20
+ "http://127.0.0.1/",
21
+ "http://169.254.169.254/latest/meta-data/", # cloud metadata
22
+ "http://10.0.0.1/",
23
+ "http://192.168.1.1/admin",
24
+ "http://172.16.0.1/",
25
+ "http://[::1]/",
26
+ "http://0.0.0.0/",
27
+ "ftp://example.com/", # scheme
28
+ "file:///etc/passwd", # scheme
29
+ "http://localhost/", # resolves to loopback
30
+ ]
31
+ for u in blocked:
32
+ ok, reason = classify_url(u, allow_private=False)
33
+ assert not ok, f"should block {u} (got ok, reason={reason})"
34
+ print(f" ✓ blocks {len(blocked)} internal/metadata/scheme targets")
35
+
36
+
37
+ def t_classify_allows_public():
38
+ for u in ["https://1.1.1.1/", "http://8.8.8.8/"]: # public IP literals (no DNS)
39
+ ok, reason = classify_url(u, allow_private=False)
40
+ assert ok, f"should allow public {u} ({reason})"
41
+ print(" ✓ allows public IP literals")
42
+
43
+
44
+ def t_allow_private_optin():
45
+ ok, _ = classify_url("http://127.0.0.1:8080/", allow_private=True)
46
+ assert ok, "allow_private=True must permit loopback"
47
+ print(" ✓ allow_private=True opt-in permits loopback (local testing)")
48
+
49
+
50
+ def t_request_blocks_localhost_by_default():
51
+ p = SessionPool()
52
+ resp, err = p.request("http://127.0.0.1:9/", impersonate="chrome") # no fetch happens
53
+ assert resp is None and err and err.startswith("ssrf_blocked"), (resp, err)
54
+ print(f" ✓ POOL.request blocks loopback pre-fetch: {err}")
55
+
56
+
57
+ class _FakeResp:
58
+ def __init__(self, status, headers=None):
59
+ self.status_code = status
60
+ self.headers = headers or {}
61
+ self.text = "ok"
62
+
63
+
64
+ def t_redirect_to_metadata_blocked():
65
+ def do_get(u):
66
+ if "evil" in u:
67
+ return _FakeResp(302, {"Location": "http://169.254.169.254/latest/meta-data/"})
68
+ return _FakeResp(200)
69
+ resp, err = SessionPool._fetch_following(do_get, "https://evil.test/", False, 5, None)
70
+ assert resp is None and err and err.startswith("ssrf_redirect_blocked"), (resp, err)
71
+ print(f" ✓ redirect into metadata IP blocked: {err}")
72
+
73
+
74
+ def t_safe_redirect_followed():
75
+ hops = {"n": 0}
76
+ def do_get(u):
77
+ hops["n"] += 1
78
+ if "start" in u:
79
+ return _FakeResp(302, {"Location": "http://1.1.1.1/landing"}) # public
80
+ return _FakeResp(200)
81
+ resp, err = SessionPool._fetch_following(do_get, "https://start.test/", False, 5, None)
82
+ assert err is None and resp is not None and resp.status_code == 200, (resp, err)
83
+ assert hops["n"] == 2, hops
84
+ print(f" ✓ safe redirect to public IP followed ({hops['n']} hops → 200)")
85
+
86
+
87
+ def t_too_many_redirects():
88
+ def do_get(u):
89
+ return _FakeResp(302, {"Location": "http://1.1.1.1/loop"})
90
+ resp, err = SessionPool._fetch_following(do_get, "http://1.1.1.1/loop", False, 3, None)
91
+ assert resp is None and err == "too_many_redirects", (resp, err)
92
+ print(" ✓ redirect loop capped (too_many_redirects)")
93
+
94
+
95
+ ALL = [
96
+ ("classify_blocks_internal", t_classify_blocks_internal),
97
+ ("classify_allows_public", t_classify_allows_public),
98
+ ("allow_private_optin", t_allow_private_optin),
99
+ ("request_blocks_localhost_by_default", t_request_blocks_localhost_by_default),
100
+ ("redirect_to_metadata_blocked", t_redirect_to_metadata_blocked),
101
+ ("safe_redirect_followed", t_safe_redirect_followed),
102
+ ("too_many_redirects", t_too_many_redirects),
103
+ ]
104
+
105
+
106
+ def main() -> int:
107
+ p = f = 0
108
+ for name, fn in ALL:
109
+ try:
110
+ print(f"[{name}]")
111
+ fn()
112
+ p += 1
113
+ except AssertionError as e:
114
+ f += 1
115
+ print(f" ✗ FAIL: {e}")
116
+ except Exception as e:
117
+ f += 1
118
+ print(f" ✗ ERROR: {type(e).__name__}: {e}")
119
+ print(f"\n{p} passed, {f} failed")
120
+ return 0 if f == 0 else 1
121
+
122
+
123
+ if __name__ == "__main__":
124
+ sys.exit(main())
@@ -0,0 +1,211 @@
1
+ """Per-host curl_cffi Session pool + root warmup + browser→curl cookie bridge.
2
+
3
+ Why (multi-AI review 2026-06-21):
4
+ * v1 issued a brand-new `curl_cffi.requests.get()` per attempt, so cookies
5
+ set by a WAF (e.g. an Akamai `_abck` sensor or a CF `cf_clearance`) and
6
+ the warm TLS/connection were thrown away between attempts and between
7
+ pages of the same host. That caps both success rate (sensor cookies never
8
+ mature) and throughput (handshake per request).
9
+ * A browser fallback that punches through a JS challenge produces exactly the
10
+ cookies + User-Agent a plain HTTP client needs — but v1 discarded them
11
+ (`_FakeResp` kept only HTML). The bridge here lets one expensive browser
12
+ pass convert into cheap curl_cffi throughput (the FlareSolverr pattern).
13
+
14
+ No-Site-Name Rule: keys are hashed hosts; no site names are stored or branched.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import threading
20
+ from dataclasses import dataclass, field
21
+ from typing import Any, Optional
22
+ from urllib.parse import urlsplit
23
+
24
+
25
+ def _host_of(url: str) -> str:
26
+ return (urlsplit(url).hostname or "unknown").lower()
27
+
28
+
29
+ def _root_of(url: str) -> str:
30
+ p = urlsplit(url)
31
+ return f"{p.scheme}://{p.netloc}/"
32
+
33
+
34
+ @dataclass
35
+ class _Entry:
36
+ session: Any
37
+ warmed: bool = False
38
+ injected_ua: Optional[str] = None
39
+ requests_made: int = 0
40
+
41
+
42
+ @dataclass
43
+ class SessionPool:
44
+ """Thread-safe pool of curl_cffi Sessions keyed by (host, impersonate)."""
45
+ _entries: dict = field(default_factory=dict)
46
+ _lock: Any = field(default_factory=threading.Lock)
47
+
48
+ def _key(self, host: str, impersonate: str) -> tuple:
49
+ return (host, impersonate)
50
+
51
+ def get(self, host: str, impersonate: str) -> Optional[_Entry]:
52
+ """Return (creating if needed) the pool entry, or None if curl_cffi
53
+ is unavailable."""
54
+ key = self._key(host, impersonate)
55
+ with self._lock:
56
+ ent = self._entries.get(key)
57
+ if ent is not None:
58
+ return ent
59
+ try:
60
+ from curl_cffi import requests as cffi_requests
61
+ except ImportError:
62
+ return None
63
+ try:
64
+ sess = cffi_requests.Session(impersonate=impersonate)
65
+ except Exception:
66
+ # Some impersonate names need a newer curl_cffi; let caller
67
+ # fall back to a one-shot get by returning None.
68
+ return None
69
+ ent = _Entry(session=sess)
70
+ self._entries[key] = ent
71
+ return ent
72
+
73
+ def warmup(self, host: str, impersonate: str, root_url: str, timeout: int = 15) -> bool:
74
+ """Hit the site root once per (host, impersonate) so a WAF sensor can
75
+ set a resolved session cookie before the real (deep) request. Idempotent."""
76
+ ent = self.get(host, impersonate)
77
+ if ent is None or ent.warmed:
78
+ return False
79
+ from . import safety
80
+ ok, _reason = safety.classify_url(root_url, safety.allow_private_default())
81
+ if not ok:
82
+ ent.warmed = True # don't retry a blocked root
83
+ return False
84
+ ent.warmed = True # mark first to avoid duplicate warmups under race
85
+ def _do_get(u):
86
+ return ent.session.get(u, timeout=timeout, allow_redirects=False)
87
+ resp, err = self._fetch_following(_do_get, root_url, allow_private, DEFAULT_MAX_REDIRECTS, ent)
88
+ return resp is not None and err is None
89
+
90
+ def inject_cookies(self, host: str, impersonate: str,
91
+ cookies: list[dict], user_agent: Optional[str] = None) -> bool:
92
+ """Seed a session with cookies harvested by a real browser. Subsequent
93
+ requests on this (host, impersonate) reuse the browser-cleared state."""
94
+ ent = self.get(host, impersonate)
95
+ if ent is None:
96
+ return False
97
+ ok = False
98
+ for c in cookies or []:
99
+ name = c.get("name")
100
+ value = c.get("value")
101
+ if not name:
102
+ continue
103
+ try:
104
+ ent.session.cookies.set(name, value, domain=c.get("domain") or host)
105
+ ok = True
106
+ except Exception:
107
+ try:
108
+ ent.session.cookies.set(name, value)
109
+ ok = True
110
+ except Exception:
111
+ continue
112
+ if user_agent:
113
+ ent.injected_ua = user_agent
114
+ return ok
115
+
116
+ def request(self, url: str, *, impersonate: str, referer: str = "",
117
+ timeout: int = 25, extra_headers: Optional[dict] = None,
118
+ allow_private: Optional[bool] = None,
119
+ max_redirects: Optional[int] = None) -> tuple[Any, Optional[str]]:
120
+ """GET via the pooled session (cookie + connection reuse), with an SSRF
121
+ guard: the initial URL and EVERY redirect hop are validated against the
122
+ private/loopback/link-local/metadata block-list before being fetched.
123
+ Falls back to a one-shot get if no session could be created."""
124
+ from . import safety
125
+ if allow_private is None:
126
+ allow_private = safety.allow_private_default()
127
+ if max_redirects is None:
128
+ max_redirects = safety.DEFAULT_MAX_REDIRECTS
129
+
130
+ ok, reason = safety.classify_url(url, allow_private)
131
+ if not ok:
132
+ return None, f"ssrf_blocked:{reason}"
133
+
134
+ host = _host_of(url)
135
+ headers = {
136
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
137
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
138
+ }
139
+ if referer:
140
+ headers["Referer"] = referer
141
+ if extra_headers:
142
+ headers.update(extra_headers)
143
+
144
+ ent = self.get(host, impersonate)
145
+ if ent is None:
146
+ try:
147
+ from curl_cffi import requests as cffi_requests
148
+ except ImportError:
149
+ return None, "curl_cffi not installed"
150
+ def _do_get(u):
151
+ return cffi_requests.get(u, impersonate=impersonate, headers=headers,
152
+ timeout=timeout, allow_redirects=False)
153
+ return self._fetch_following(_do_get, url, allow_private, max_redirects, None)
154
+
155
+ if ent.injected_ua:
156
+ headers.setdefault("User-Agent", ent.injected_ua)
157
+ def _do_get(u):
158
+ return ent.session.get(u, headers=headers, timeout=timeout, allow_redirects=False)
159
+ return self._fetch_following(_do_get, url, allow_private, max_redirects, ent)
160
+
161
+ @staticmethod
162
+ def _fetch_following(do_get, url: str, allow_private: bool, max_redirects: int,
163
+ ent) -> tuple[Any, Optional[str]]:
164
+ """Manually follow redirects so each hop is SSRF-validated (curl_cffi's
165
+ own allow_redirects=True would skip the per-hop check)."""
166
+ from . import safety
167
+ cur = url
168
+ for _ in range(max_redirects + 1):
169
+ try:
170
+ resp = do_get(cur)
171
+ except Exception as e:
172
+ return None, f"{type(e).__name__}:{str(e)[:200]}"
173
+ if ent is not None:
174
+ ent.requests_made += 1
175
+ if safety.is_redirect(resp):
176
+ loc = safety.location_of(resp)
177
+ if not loc:
178
+ return resp, None # redirect w/o Location → return as-is
179
+ nxt = safety.resolve_redirect(cur, loc)
180
+ ok, reason = safety.classify_url(nxt, allow_private)
181
+ if not ok:
182
+ return None, f"ssrf_redirect_blocked:{reason}"
183
+ cur = nxt
184
+ continue
185
+ return resp, None
186
+ return None, "too_many_redirects"
187
+
188
+ def stats(self) -> dict:
189
+ with self._lock:
190
+ return {
191
+ "sessions": len(self._entries),
192
+ "warmed": sum(1 for e in self._entries.values() if e.warmed),
193
+ "requests": sum(e.requests_made for e in self._entries.values()),
194
+ }
195
+
196
+ def reset(self) -> None:
197
+ with self._lock:
198
+ for e in self._entries.values():
199
+ try:
200
+ e.session.close()
201
+ except Exception:
202
+ pass
203
+ self._entries.clear()
204
+
205
+
206
+ # Process-wide pool. Disable via INSANE_NO_SESSION_POOL=1 (one-shot mode).
207
+ POOL = SessionPool()
208
+
209
+
210
+ def pool_enabled() -> bool:
211
+ return os.environ.get("INSANE_NO_SESSION_POOL", "") not in ("1", "true", "yes")