@safebrowse/daemon 0.1.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +31 -0
  3. package/dist/cli.d.ts +8 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +93 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/index.d.ts +4 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +21 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/loaders.d.ts +23 -0
  12. package/dist/loaders.d.ts.map +1 -0
  13. package/dist/loaders.js +181 -0
  14. package/dist/loaders.js.map +1 -0
  15. package/dist/runtime/config/adapter-registry.json +65 -0
  16. package/dist/runtime/config/adapter-registry.json.sig +1 -0
  17. package/dist/runtime/config/v2-compromised-fixtures.json +34 -0
  18. package/dist/runtime/knowledge_base/safebrowse_vf_action_integrity_patterns.json +1411 -0
  19. package/dist/runtime/knowledge_base/safebrowse_vf_artifact_surface_patterns.json +891 -0
  20. package/dist/runtime/knowledge_base/safebrowse_vf_evaluation_scenarios.json +217 -0
  21. package/dist/runtime/knowledge_base/safebrowse_vf_incident_response_playbooks.json +209 -0
  22. package/dist/runtime/knowledge_base/safebrowse_vf_knowledge_base_index.json +143 -0
  23. package/dist/runtime/knowledge_base/safebrowse_vf_knowledge_base_index.json.sig +1 -0
  24. package/dist/runtime/knowledge_base/safebrowse_vf_knowledge_bases.zip +0 -0
  25. package/dist/runtime/knowledge_base/safebrowse_vf_knowledge_bases.zip.sig +1 -0
  26. package/dist/runtime/knowledge_base/safebrowse_vf_memory_context_poisoning_patterns.json +803 -0
  27. package/dist/runtime/knowledge_base/safebrowse_vf_policy_controls_catalog.json +686 -0
  28. package/dist/runtime/knowledge_base/safebrowse_vf_prompt_injection_patterns.json +9930 -0
  29. package/dist/runtime/knowledge_base/safebrowse_vf_source_registry.json +345 -0
  30. package/dist/runtime/knowledge_base/safebrowse_vf_tool_protocol_supply_chain_patterns.json +879 -0
  31. package/dist/runtime/knowledge_base/safebrowse_vf_trust_signals_provenance.json +480 -0
  32. package/dist/runtime/knowledge_base/signing/safebrowse_vf_ed25519_public.pem +3 -0
  33. package/dist/runtime/policies/base/research.yaml +56 -0
  34. package/dist/runtime/policies/emergency/default.yaml +14 -0
  35. package/dist/runtime/policies/project/default.yaml +13 -0
  36. package/dist/runtime/policies/tenant/default.yaml +12 -0
  37. package/dist/server.d.ts +14 -0
  38. package/dist/server.d.ts.map +1 -0
  39. package/dist/server.js +195 -0
  40. package/dist/server.js.map +1 -0
  41. package/package.json +53 -0
@@ -0,0 +1,345 @@
1
+ {
2
+ "kb_meta": {
3
+ "name": "SafeBrowse vf source registry",
4
+ "version": "vf-final",
5
+ "generated_on": "2026-03-28",
6
+ "entry_count": 30,
7
+ "quality_scale": {
8
+ "high": "Official documentation/guidance, government guidance, or peer-reviewed/benchmark source with direct relevance.",
9
+ "medium": "Credible technical documentation or preprint used with caution and corroborated by other sources.",
10
+ "low": "Not used in this bundle."
11
+ }
12
+ },
13
+ "sources": [
14
+ {
15
+ "source_id": "SRC_OPENAI_PROMPT_INJECTION_2026",
16
+ "title": "Designing AI agents to resist prompt injection",
17
+ "publisher": "OpenAI",
18
+ "url": "https://openai.com/index/designing-agents-to-resist-prompt-injection/",
19
+ "published_date": "2026-03-11",
20
+ "evidence_type": "official_engineering_guidance",
21
+ "credibility": "high",
22
+ "why_it_matters": "Current operational guidance on prompt injection as social engineering, source-sink analysis, and action confirmation/blocking patterns.",
23
+ "web_ref": "turn325573view0"
24
+ },
25
+ {
26
+ "source_id": "SRC_NCSC_PROMPT_INJECTION_2025",
27
+ "title": "Prompt injection is not SQL injection (it may be worse)",
28
+ "publisher": "UK National Cyber Security Centre",
29
+ "url": "https://www.ncsc.gov.uk/blog-post/prompt-injection-is-not-sql-injection",
30
+ "published_date": "2025-12-17",
31
+ "evidence_type": "government_security_guidance",
32
+ "credibility": "high",
33
+ "why_it_matters": "Strong framing that LLMs do not enforce a security boundary between instructions and data; logging and threat-modeling guidance.",
34
+ "web_ref": "turn325573view1"
35
+ },
36
+ {
37
+ "source_id": "SRC_GOOGLE_CHROME_AGENTIC_2025",
38
+ "title": "Architecting Security for Agentic Capabilities in Chrome",
39
+ "publisher": "Google Online Security Blog",
40
+ "url": "https://security.googleblog.com/2025/12/architecting-security-for-agentic.html",
41
+ "published_date": "2025-12-08",
42
+ "evidence_type": "official_engineering_guidance",
43
+ "credibility": "high",
44
+ "why_it_matters": "Documents User Alignment Critic, read-only/read-write origin sets, and deterministic URL/origin checks for agentic browsing.",
45
+ "web_ref": "turn325573view2"
46
+ },
47
+ {
48
+ "source_id": "SRC_ANTHROPIC_BROWSER_USE_2025",
49
+ "title": "Mitigating the risk of prompt injections in browser use",
50
+ "publisher": "Anthropic",
51
+ "url": "https://www.anthropic.com/research/prompt-injection-defenses",
52
+ "published_date": "2025-11-24",
53
+ "evidence_type": "official_research_or_engineering",
54
+ "credibility": "high",
55
+ "why_it_matters": "Directly addresses browser-agent threat surfaces including webpages, embedded documents, ads, and dynamic scripts.",
56
+ "web_ref": "turn325573view3"
57
+ },
58
+ {
59
+ "source_id": "SRC_MICROSOFT_INDIRECT_PI_2025",
60
+ "title": "How Microsoft defends against indirect prompt injection attacks",
61
+ "publisher": "Microsoft Security Response Center",
62
+ "url": "https://www.microsoft.com/en-us/msrc/blog/2025/07/how-microsoft-defends-against-indirect-prompt-injection-attacks/",
63
+ "published_date": "2025-07-03",
64
+ "evidence_type": "official_engineering_guidance",
65
+ "credibility": "high",
66
+ "why_it_matters": "Production defense guidance including Spotlighting and layered mitigations for indirect prompt injection.",
67
+ "web_ref": "turn325573view4"
68
+ },
69
+ {
70
+ "source_id": "SRC_OWASP_PI_CHEATSHEET_2026",
71
+ "title": "LLM Prompt Injection Prevention Cheat Sheet",
72
+ "publisher": "OWASP Cheat Sheet Series",
73
+ "url": "https://cheatsheetseries.owasp.org/cheatsheets/LLM_Prompt_Injection_Prevention_Cheat_Sheet.html",
74
+ "published_date": "2026-01-01",
75
+ "evidence_type": "community_best_practice",
76
+ "credibility": "high",
77
+ "why_it_matters": "Comprehensive taxonomy of prompt injection patterns and baseline mitigations.",
78
+ "web_ref": "turn325573view5"
79
+ },
80
+ {
81
+ "source_id": "SRC_NIST_HIJACK_EVAL_2025",
82
+ "title": "Technical Blog: Strengthening AI Agent Hijacking Evaluations",
83
+ "publisher": "NIST",
84
+ "url": "https://www.nist.gov/news-events/news/2025/01/technical-blog-strengthening-ai-agent-hijacking-evaluations",
85
+ "published_date": "2025-01-17",
86
+ "evidence_type": "government_research_guidance",
87
+ "credibility": "high",
88
+ "why_it_matters": "Evaluation guidance: adaptive attacks, task-specific analysis, and multi-attempt testing are necessary for realistic risk estimates.",
89
+ "web_ref": "turn325573view6"
90
+ },
91
+ {
92
+ "source_id": "SRC_NIST_CAISI_RFI_2026",
93
+ "title": "CAISI Issues Request for Information About Securing AI Agent Systems",
94
+ "publisher": "NIST",
95
+ "url": "https://www.nist.gov/news-events/news/2026/01/caisi-issues-request-information-about-securing-ai-agent-systems",
96
+ "published_date": "2026-01-12",
97
+ "evidence_type": "government_security_guidance",
98
+ "credibility": "high",
99
+ "why_it_matters": "Distinguishes key agent risks including indirect prompt injection, data/model poisoning, and harmful actions absent adversarial input.",
100
+ "web_ref": "turn325573view7"
101
+ },
102
+ {
103
+ "source_id": "SRC_MCP_SECURITY_BEST_PRACTICES_2025",
104
+ "title": "Security Best Practices",
105
+ "publisher": "Model Context Protocol",
106
+ "url": "https://modelcontextprotocol.io/docs/tutorials/security/security_best_practices",
107
+ "published_date": "2025-06-18",
108
+ "evidence_type": "official_specification_guidance",
109
+ "credibility": "high",
110
+ "why_it_matters": "Official MCP security guidance covering token passthrough, SSRF, session hijacking, local server compromise, and scope minimization.",
111
+ "web_ref": "turn325573view8"
112
+ },
113
+ {
114
+ "source_id": "SRC_OWASP_SECURE_MCP_GUIDE_2026",
115
+ "title": "A Practical Guide for Secure MCP Server Development",
116
+ "publisher": "OWASP Gen AI Security Project",
117
+ "url": "https://genai.owasp.org/resource/a-practical-guide-for-secure-mcp-server-development/",
118
+ "published_date": "2026-02-16",
119
+ "evidence_type": "community_best_practice",
120
+ "credibility": "high",
121
+ "why_it_matters": "Operational guidance for MCP server architecture, authentication, authorization, validation, session isolation, and hardened deployment.",
122
+ "web_ref": "turn325573view10"
123
+ },
124
+ {
125
+ "source_id": "SRC_OWASP_AGENTIC_TOP10_2026",
126
+ "title": "OWASP Top 10 for Agentic Applications for 2026",
127
+ "publisher": "OWASP Gen AI Security Project",
128
+ "url": "https://genai.owasp.org/resource/owasp-top-10-for-agentic-applications-for-2026/",
129
+ "published_date": "2025-12-09",
130
+ "evidence_type": "community_consensus_framework",
131
+ "credibility": "high",
132
+ "why_it_matters": "Peer-reviewed, community-driven taxonomy covering goal hijack, tool misuse, supply chain, code execution, and memory/context poisoning.",
133
+ "web_ref": "turn325573view11"
134
+ },
135
+ {
136
+ "source_id": "SRC_OWASP_AGENT_MEMORY_GUARD_2026",
137
+ "title": "OWASP Agent Memory Guard",
138
+ "publisher": "OWASP Foundation",
139
+ "url": "https://owasp.org/www-project-agent-memory-guard/",
140
+ "published_date": "2026-01-01",
141
+ "evidence_type": "open_source_project_guidance",
142
+ "credibility": "medium_high",
143
+ "why_it_matters": "Concrete open-source design for defending memory poisoning with integrity checks, policy enforcement, snapshots, and rollback.",
144
+ "web_ref": "turn325573view22"
145
+ },
146
+ {
147
+ "source_id": "SRC_NEMO_GUARDRAILS_DOCS_2026",
148
+ "title": "NVIDIA NeMo Guardrails Library Developer Guide",
149
+ "publisher": "NVIDIA",
150
+ "url": "https://docs.nvidia.com/nemo/guardrails/latest/",
151
+ "published_date": "2026-01-01",
152
+ "evidence_type": "official_product_documentation",
153
+ "credibility": "high",
154
+ "why_it_matters": "Documents programmable guardrails, including input, retrieval, dialog, execution, and output rails.",
155
+ "web_ref": "turn325573view12"
156
+ },
157
+ {
158
+ "source_id": "SRC_GUARDRAILS_AI_VALIDATORS_2026",
159
+ "title": "Validators",
160
+ "publisher": "Guardrails AI",
161
+ "url": "https://guardrailsai.com/guardrails/docs/concepts/validators",
162
+ "published_date": "2026-01-01",
163
+ "evidence_type": "official_product_documentation",
164
+ "credibility": "medium_high",
165
+ "why_it_matters": "Shows an output/input validation model focused on validators and metadata-driven checks.",
166
+ "web_ref": "turn573368view0"
167
+ },
168
+ {
169
+ "source_id": "SRC_LLAMAFIREWALL_DOCS_2026",
170
+ "title": "About LlamaFirewall",
171
+ "publisher": "Meta / LlamaFirewall",
172
+ "url": "https://meta-llama.github.io/PurpleLlama/LlamaFirewall/docs/documentation/about-llamafirewall",
173
+ "published_date": "2026-01-01",
174
+ "evidence_type": "official_project_documentation",
175
+ "credibility": "medium_high",
176
+ "why_it_matters": "Documents scanner-based, layered defenses across user input, agent reasoning, and generated code.",
177
+ "web_ref": "turn325573view13"
178
+ },
179
+ {
180
+ "source_id": "SRC_OPENGUARDRAILS_DOCS_2026",
181
+ "title": "OpenGuardrails Documentation",
182
+ "publisher": "OpenGuardrails",
183
+ "url": "https://openguardrails.com/docs",
184
+ "published_date": "2026-01-01",
185
+ "evidence_type": "vendor_product_documentation",
186
+ "credibility": "medium",
187
+ "why_it_matters": "Shows gateway/API-mode security posture, data leak prevention, and knowledge-base response patterns.",
188
+ "web_ref": "turn325573view14"
189
+ },
190
+ {
191
+ "source_id": "SRC_BROWSERGYM_GITHUB_2026",
192
+ "title": "BrowserGym",
193
+ "publisher": "ServiceNow / GitHub",
194
+ "url": "https://github.com/ServiceNow/BrowserGym",
195
+ "published_date": "2026-01-20",
196
+ "evidence_type": "official_open_source_repository",
197
+ "credibility": "medium_high",
198
+ "why_it_matters": "A maintained benchmark ecosystem for web task automation that is useful for regression and adversarial evaluation.",
199
+ "web_ref": "turn325573view15"
200
+ },
201
+ {
202
+ "source_id": "SRC_AGENTDOJO_BENCHMARK_2026",
203
+ "title": "AgentDojo Benchmark",
204
+ "publisher": "AgentDojo",
205
+ "url": "https://agentdojo.spylab.ai/api/benchmark/",
206
+ "published_date": "2026-01-01",
207
+ "evidence_type": "official_open_source_documentation",
208
+ "credibility": "medium_high",
209
+ "why_it_matters": "Provides benchmark APIs with utility and security results for injection-aware agent evaluation.",
210
+ "web_ref": "turn325573view16"
211
+ },
212
+ {
213
+ "source_id": "SRC_CAMEL_ARXIV_2025",
214
+ "title": "Defeating Prompt Injections by Design",
215
+ "publisher": "arXiv",
216
+ "url": "https://arxiv.org/abs/2503.18813",
217
+ "published_date": "2025-03-24",
218
+ "evidence_type": "preprint_research",
219
+ "credibility": "medium_high",
220
+ "why_it_matters": "Introduces CaMeL: explicit control/data-flow separation and capability enforcement for prompt-injection-resistant agents.",
221
+ "web_ref": "turn303329view0"
222
+ },
223
+ {
224
+ "source_id": "SRC_ACE_NDSS_2026",
225
+ "title": "ACE: A Security Architecture for LLM-Integrated App Systems",
226
+ "publisher": "NDSS Symposium",
227
+ "url": "https://www.ndss-symposium.org/ndss-paper/ace-a-security-architecture-for-llm-integrated-app-systems/",
228
+ "published_date": "2026-02-01",
229
+ "evidence_type": "peer_reviewed_conference",
230
+ "credibility": "high",
231
+ "why_it_matters": "System-security architecture with abstract planning on trusted info, static analysis, and data/capability barriers.",
232
+ "web_ref": "turn325573view17"
233
+ },
234
+ {
235
+ "source_id": "SRC_TOOLHIJACKER_NDSS_2026",
236
+ "title": "Prompt Injection Attack to Tool Selection in LLM Agents",
237
+ "publisher": "NDSS Symposium",
238
+ "url": "https://www.ndss-symposium.org/ndss-paper/prompt-injection-attack-to-tool-selection-in-llm-agents/",
239
+ "published_date": "2026-02-01",
240
+ "evidence_type": "peer_reviewed_conference",
241
+ "credibility": "high",
242
+ "why_it_matters": "Shows malicious tool documents can subvert tool selection and that common defenses can be insufficient.",
243
+ "web_ref": "turn325573view18"
244
+ },
245
+ {
246
+ "source_id": "SRC_OBLIINJECTION_NDSS_2026",
247
+ "title": "ObliInjection: Order-Oblivious Prompt Injection Attack to LLM Agents with Multi-source Data",
248
+ "publisher": "NDSS Symposium",
249
+ "url": "https://www.ndss-symposium.org/ndss-paper/obliinjection-order-oblivious-prompt-injection-attack-to-llm-agents-with-multi-source-data/",
250
+ "published_date": "2026-02-01",
251
+ "evidence_type": "peer_reviewed_conference",
252
+ "credibility": "high",
253
+ "why_it_matters": "Demonstrates successful multi-source injection when only a small portion of segments are attacker-controlled.",
254
+ "web_ref": "turn325573view19"
255
+ },
256
+ {
257
+ "source_id": "SRC_TOPICATTACK_EMNLP_2025",
258
+ "title": "TopicAttack: An Indirect Prompt Injection Attack via Topic Transition",
259
+ "publisher": "ACL Anthology / EMNLP 2025",
260
+ "url": "https://aclanthology.org/2025.emnlp-main.372/",
261
+ "published_date": "2025-11-01",
262
+ "evidence_type": "peer_reviewed_conference",
263
+ "credibility": "high",
264
+ "why_it_matters": "Documents a social-engineering-style topic-transition attack that maintains plausibility while increasing success.",
265
+ "web_ref": "turn281925search0"
266
+ },
267
+ {
268
+ "source_id": "SRC_IPI_DETECT_REMOVE_ACL_2025",
269
+ "title": "Can Indirect Prompt Injection Attacks Be Detected and Removed?",
270
+ "publisher": "ACL Anthology / ACL 2025",
271
+ "url": "https://aclanthology.org/2025.acl-long.890/",
272
+ "published_date": "2025-07-01",
273
+ "evidence_type": "peer_reviewed_conference",
274
+ "credibility": "high",
275
+ "why_it_matters": "Shows detection and removal are possible but should be treated as one layer, not a complete guarantee.",
276
+ "web_ref": "turn129181search5"
277
+ },
278
+ {
279
+ "source_id": "SRC_INSTRUCTDETECTOR_EMNLP_2025",
280
+ "title": "Defending against Indirect Prompt Injection by Instruction Detection",
281
+ "publisher": "ACL Anthology / EMNLP 2025 Findings",
282
+ "url": "https://aclanthology.org/2025.findings-emnlp.1060/",
283
+ "published_date": "2025-11-01",
284
+ "evidence_type": "peer_reviewed_conference",
285
+ "credibility": "high",
286
+ "why_it_matters": "Intermediate-state instruction detection offers a promising detector layer for indirect prompt injection.",
287
+ "web_ref": "turn129181search6"
288
+ },
289
+ {
290
+ "source_id": "SRC_ONE_SHOT_DOMINANCE_EMNLP_2025",
291
+ "title": "One Shot Dominance: Knowledge Poisoning Attack on Retrieval-Augmented Generation Systems",
292
+ "publisher": "ACL Anthology / EMNLP 2025 Findings",
293
+ "url": "https://aclanthology.org/2025.findings-emnlp.1023/",
294
+ "published_date": "2025-11-01",
295
+ "evidence_type": "peer_reviewed_conference",
296
+ "credibility": "high",
297
+ "why_it_matters": "Shows single-document knowledge poisoning can be effective in RAG systems.",
298
+ "web_ref": "turn325573view20"
299
+ },
300
+ {
301
+ "source_id": "SRC_REVPRAG_EMNLP_2025",
302
+ "title": "RevPRAG: Revealing Poisoning Attacks in Retrieval-Augmented Generation through LLM Activation Analysis",
303
+ "publisher": "ACL Anthology / EMNLP 2025 Findings",
304
+ "url": "https://aclanthology.org/2025.findings-emnlp.698/",
305
+ "published_date": "2025-11-01",
306
+ "evidence_type": "peer_reviewed_conference",
307
+ "credibility": "high",
308
+ "why_it_matters": "Demonstrates a promising activation-based detector for poisoned RAG responses.",
309
+ "web_ref": "turn325573view21"
310
+ },
311
+ {
312
+ "source_id": "SRC_ADAPTIVE_ATTACKS_ACL_2025",
313
+ "title": "Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents",
314
+ "publisher": "ACL Anthology / Findings of NAACL 2025",
315
+ "url": "https://aclanthology.org/2025.findings-naacl.395/",
316
+ "published_date": "2025-04-01",
317
+ "evidence_type": "peer_reviewed_conference",
318
+ "credibility": "high",
319
+ "why_it_matters": "Supports the need for adaptive testing because fixed defenses can break under tailored attacks.",
320
+ "web_ref": "turn129181search4"
321
+ },
322
+ {
323
+ "source_id": "SRC_OWASP_LLM02_OUTPUT_2026",
324
+ "title": "LLM02: Insecure Output Handling",
325
+ "publisher": "OWASP Gen AI Security Project",
326
+ "url": "https://genai.owasp.org/llmrisk2023-24/llm02-insecure-output-handling/",
327
+ "published_date": "2026-01-01",
328
+ "evidence_type": "community_best_practice",
329
+ "credibility": "high",
330
+ "why_it_matters": "Grounds output validation, downstream sink controls, and zero-trust handling of model outputs.",
331
+ "web_ref": "turn647671search1"
332
+ },
333
+ {
334
+ "source_id": "SRC_NIST_AGENTIC_EMERGING_2026",
335
+ "title": "Agentic AI: Emerging Threats, Mitigations, and Challenges",
336
+ "publisher": "NIST CSRC Presentation",
337
+ "url": "https://csrc.nist.gov/Presentations/2026/agentic-ai-emerging-threats-mitigations-and-cha",
338
+ "published_date": "2026-01-21",
339
+ "evidence_type": "government_presentation",
340
+ "credibility": "medium_high",
341
+ "why_it_matters": "Useful supporting taxonomy for memory and context poisoning in agentic systems.",
342
+ "web_ref": "turn491225search4"
343
+ }
344
+ ]
345
+ }