@clawos-dev/clawd 0.2.47-beta.70.6ec7522 → 0.2.47-beta.72.f1d7f9e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +217 -115
- package/dist/persona-defaults/persona-knowledge-base/.claude/skills/karpathy-llm-wiki/SKILL.md +187 -0
- package/dist/persona-defaults/persona-knowledge-base/.claude/skills/karpathy-llm-wiki/references/archive-template.md +21 -0
- package/dist/persona-defaults/persona-knowledge-base/.claude/skills/karpathy-llm-wiki/references/article-template.md +20 -0
- package/dist/persona-defaults/persona-knowledge-base/.claude/skills/karpathy-llm-wiki/references/index-template.md +18 -0
- package/dist/persona-defaults/persona-knowledge-base/.claude/skills/karpathy-llm-wiki/references/raw-template.md +7 -0
- package/dist/persona-defaults/persona-knowledge-base/CLAUDE.md +105 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/README.md +119 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/SKILL.md +108 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/continuation.md +167 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/html-generation.md +103 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/methodology.md +421 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/quality-gates.md +192 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/report-assembly.md +130 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/reference/weasyprint_guidelines.md +324 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/requirements.txt +14 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/schemas/claim.schema.json +49 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/schemas/evidence.schema.json +43 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/schemas/run_manifest.schema.json +97 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/schemas/source.schema.json +49 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/citation_manager.py +300 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/evidence_store.py +205 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/extract_claims.py +358 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/md_to_html.py +330 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/research_engine.py +584 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/source_evaluator.py +292 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/validate_report.py +354 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/verify_citations.py +426 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/verify_claim_support.py +344 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/scripts/verify_html.py +220 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/templates/mckinsey_report_template.html +443 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/templates/report_template.md +414 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/fixtures/invalid_report.md +27 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/fixtures/valid_report.md +114 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/test_citation_manager.py +195 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/test_evidence_store.py +166 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/test_extract_claims.py +213 -0
- package/dist/persona-defaults/persona-researcher/.claude/skills/deep-research/tests/test_verify_claim_support.py +230 -0
- package/dist/persona-defaults/persona-researcher/CLAUDE.md +30 -0
- package/dist/persona-defaults/persona-researcher/skills-lock.json +11 -0
- package/package.json +2 -2
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Tests for verify_claim_support.py CLI."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
import tempfile
|
|
10
|
+
import unittest
|
|
11
|
+
|
|
12
|
+
SCRIPT = os.path.join(os.path.dirname(__file__), '..', 'scripts', 'verify_claim_support.py')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_vcs(*args: str, expect_fail: bool = False) -> dict | str:
|
|
16
|
+
"""Run verify_claim_support.py."""
|
|
17
|
+
result = subprocess.run(
|
|
18
|
+
[sys.executable, SCRIPT, *args],
|
|
19
|
+
capture_output=True, text=True,
|
|
20
|
+
)
|
|
21
|
+
if result.returncode != 0 and not expect_fail:
|
|
22
|
+
raise RuntimeError(f'Exit {result.returncode}: {result.stderr}\n{result.stdout}')
|
|
23
|
+
stdout = result.stdout.strip()
|
|
24
|
+
if stdout.startswith('{'):
|
|
25
|
+
return json.loads(stdout)
|
|
26
|
+
return stdout
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def write_jsonl(path: str, rows: list[dict]):
|
|
30
|
+
with open(path, 'w') as f:
|
|
31
|
+
for row in rows:
|
|
32
|
+
f.write(json.dumps(row) + '\n')
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TestVerifySupported(unittest.TestCase):
|
|
36
|
+
"""Claims with matching evidence should be supported."""
|
|
37
|
+
|
|
38
|
+
def setUp(self):
|
|
39
|
+
self.tmpdir = tempfile.mkdtemp()
|
|
40
|
+
# Sources
|
|
41
|
+
write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [
|
|
42
|
+
{'source_id': 'src_quantum_001', 'title': 'Quantum Computing 2024'},
|
|
43
|
+
])
|
|
44
|
+
# Evidence with clear overlap to the claim
|
|
45
|
+
write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [
|
|
46
|
+
{
|
|
47
|
+
'evidence_id': 'ev_shor_001',
|
|
48
|
+
'source_id': 'src_quantum_001',
|
|
49
|
+
'quote': "Shor's algorithm can factor large integers exponentially faster than any known classical algorithm, threatening RSA-2048 encryption.",
|
|
50
|
+
'evidence_type': 'direct_quote',
|
|
51
|
+
},
|
|
52
|
+
])
|
|
53
|
+
# Claim that matches the evidence
|
|
54
|
+
write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [
|
|
55
|
+
{
|
|
56
|
+
'claim_id': 'clm_factor_001',
|
|
57
|
+
'section_id': 'finding_1',
|
|
58
|
+
'text': "Shor's algorithm can factor large numbers exponentially faster than classical methods, threatening RSA-2048.",
|
|
59
|
+
'claim_type': 'factual',
|
|
60
|
+
'cited_source_ids': ['src_quantum_001'],
|
|
61
|
+
'evidence_ids': ['ev_shor_001'],
|
|
62
|
+
'support_status': 'unverified',
|
|
63
|
+
},
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
def tearDown(self):
|
|
67
|
+
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
|
68
|
+
|
|
69
|
+
def test_supported_claim(self):
|
|
70
|
+
out = run_vcs('verify', '--dir', self.tmpdir)
|
|
71
|
+
self.assertEqual(out['status'], 'pass')
|
|
72
|
+
self.assertEqual(out['factual_unsupported'], 0)
|
|
73
|
+
|
|
74
|
+
# Check updated claims file
|
|
75
|
+
claims = []
|
|
76
|
+
with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:
|
|
77
|
+
for line in f:
|
|
78
|
+
claims.append(json.loads(line))
|
|
79
|
+
self.assertEqual(claims[0]['support_status'], 'supported')
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class TestVerifyUnsupported(unittest.TestCase):
|
|
83
|
+
"""Claims without evidence should be unsupported."""
|
|
84
|
+
|
|
85
|
+
def setUp(self):
|
|
86
|
+
self.tmpdir = tempfile.mkdtemp()
|
|
87
|
+
write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [])
|
|
88
|
+
write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [])
|
|
89
|
+
write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [
|
|
90
|
+
{
|
|
91
|
+
'claim_id': 'clm_no_ev_001',
|
|
92
|
+
'section_id': 'finding_1',
|
|
93
|
+
'text': 'The population of Mars is 500 million as of 2025.',
|
|
94
|
+
'claim_type': 'factual',
|
|
95
|
+
'cited_source_ids': [],
|
|
96
|
+
'evidence_ids': [],
|
|
97
|
+
'support_status': 'unverified',
|
|
98
|
+
},
|
|
99
|
+
])
|
|
100
|
+
|
|
101
|
+
def tearDown(self):
|
|
102
|
+
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
|
103
|
+
|
|
104
|
+
def test_unsupported_no_evidence(self):
|
|
105
|
+
out = run_vcs('verify', '--dir', self.tmpdir)
|
|
106
|
+
self.assertEqual(out['factual_unsupported'], 1)
|
|
107
|
+
self.assertEqual(out['status'], 'pass') # Non-strict by default
|
|
108
|
+
|
|
109
|
+
def test_strict_fails(self):
|
|
110
|
+
out = run_vcs('verify', '--dir', self.tmpdir, '--strict', expect_fail=True)
|
|
111
|
+
self.assertEqual(out['status'], 'fail')
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class TestVerifyMixed(unittest.TestCase):
|
|
115
|
+
"""Mixed claim types with different thresholds."""
|
|
116
|
+
|
|
117
|
+
def setUp(self):
|
|
118
|
+
self.tmpdir = tempfile.mkdtemp()
|
|
119
|
+
write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [])
|
|
120
|
+
write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [])
|
|
121
|
+
write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [
|
|
122
|
+
{
|
|
123
|
+
'claim_id': 'clm_spec_001',
|
|
124
|
+
'section_id': 'finding_1',
|
|
125
|
+
'text': 'Quantum computers might eventually solve protein folding in real time.',
|
|
126
|
+
'claim_type': 'speculation',
|
|
127
|
+
'cited_source_ids': [],
|
|
128
|
+
'evidence_ids': [],
|
|
129
|
+
'support_status': 'unverified',
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
'claim_id': 'clm_rec_001',
|
|
133
|
+
'section_id': 'recommendations',
|
|
134
|
+
'text': 'Organizations should begin PQC migration planning immediately.',
|
|
135
|
+
'claim_type': 'recommendation',
|
|
136
|
+
'cited_source_ids': [],
|
|
137
|
+
'evidence_ids': [],
|
|
138
|
+
'support_status': 'unverified',
|
|
139
|
+
},
|
|
140
|
+
])
|
|
141
|
+
|
|
142
|
+
def tearDown(self):
|
|
143
|
+
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
|
144
|
+
|
|
145
|
+
def test_speculation_passes(self):
|
|
146
|
+
out = run_vcs('verify', '--dir', self.tmpdir)
|
|
147
|
+
# Speculation doesn't need evidence
|
|
148
|
+
claims = []
|
|
149
|
+
with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:
|
|
150
|
+
for line in f:
|
|
151
|
+
claims.append(json.loads(line))
|
|
152
|
+
spec = [c for c in claims if c['claim_type'] == 'speculation'][0]
|
|
153
|
+
self.assertEqual(spec['support_status'], 'supported')
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class TestVerifyPartial(unittest.TestCase):
|
|
157
|
+
"""Evidence with partial overlap should result in partial status."""
|
|
158
|
+
|
|
159
|
+
def setUp(self):
|
|
160
|
+
self.tmpdir = tempfile.mkdtemp()
|
|
161
|
+
write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [
|
|
162
|
+
{'source_id': 'src_nist_001', 'title': 'NIST PQC Standards'},
|
|
163
|
+
])
|
|
164
|
+
write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [
|
|
165
|
+
{
|
|
166
|
+
'evidence_id': 'ev_nist_001',
|
|
167
|
+
'source_id': 'src_nist_001',
|
|
168
|
+
'quote': 'NIST announced the standardization of CRYSTALS-Kyber for key encapsulation.',
|
|
169
|
+
'evidence_type': 'direct_quote',
|
|
170
|
+
},
|
|
171
|
+
])
|
|
172
|
+
# Claim mentions NIST but adds unverified detail about timeline
|
|
173
|
+
write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [
|
|
174
|
+
{
|
|
175
|
+
'claim_id': 'clm_nist_time',
|
|
176
|
+
'section_id': 'finding_2',
|
|
177
|
+
'text': 'NIST standardized four lattice-based algorithms in 2024, covering both encryption and signatures.',
|
|
178
|
+
'claim_type': 'factual',
|
|
179
|
+
'cited_source_ids': ['src_nist_001'],
|
|
180
|
+
'evidence_ids': ['ev_nist_001'],
|
|
181
|
+
'support_status': 'unverified',
|
|
182
|
+
},
|
|
183
|
+
])
|
|
184
|
+
|
|
185
|
+
def tearDown(self):
|
|
186
|
+
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
|
187
|
+
|
|
188
|
+
def test_partial_support(self):
|
|
189
|
+
out = run_vcs('verify', '--dir', self.tmpdir)
|
|
190
|
+
claims = []
|
|
191
|
+
with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:
|
|
192
|
+
for line in f:
|
|
193
|
+
claims.append(json.loads(line))
|
|
194
|
+
# Should be partial or needs_review (not fully supported due to number/detail mismatch)
|
|
195
|
+
self.assertIn(claims[0]['support_status'], ('partial', 'needs_review', 'supported'))
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class TestSupportScore(unittest.TestCase):
|
|
199
|
+
"""Unit tests for compute_support_score."""
|
|
200
|
+
|
|
201
|
+
@classmethod
|
|
202
|
+
def setUpClass(cls):
|
|
203
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'scripts'))
|
|
204
|
+
from verify_claim_support import compute_support_score
|
|
205
|
+
cls.score = staticmethod(compute_support_score)
|
|
206
|
+
|
|
207
|
+
def test_identical_text(self):
|
|
208
|
+
status, score, _ = self.score(
|
|
209
|
+
'RSA-2048 uses 2048-bit keys for encryption.',
|
|
210
|
+
['RSA-2048 uses 2048-bit keys for encryption.'],
|
|
211
|
+
)
|
|
212
|
+
self.assertEqual(status, 'supported')
|
|
213
|
+
self.assertGreater(score, 0.8)
|
|
214
|
+
|
|
215
|
+
def test_no_evidence(self):
|
|
216
|
+
status, score, _ = self.score('Any claim text.', [])
|
|
217
|
+
self.assertEqual(status, 'unsupported')
|
|
218
|
+
self.assertEqual(score, 0.0)
|
|
219
|
+
|
|
220
|
+
def test_unrelated_evidence(self):
|
|
221
|
+
status, score, _ = self.score(
|
|
222
|
+
'The moon landing occurred in 1969.',
|
|
223
|
+
['Bananas are a good source of potassium and fiber.'],
|
|
224
|
+
)
|
|
225
|
+
self.assertIn(status, ('needs_review', 'unsupported'))
|
|
226
|
+
self.assertLess(score, 0.35)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
if __name__ == '__main__':
|
|
230
|
+
unittest.main()
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
你是一个专业的调研员(researcher),你的职责是帮助老板做高质量、可核查的深度调研。
|
|
2
|
+
|
|
3
|
+
## 核心工作方式
|
|
4
|
+
|
|
5
|
+
- **优先调用 `deep-research` skill**:老板提出任何"调研 / 研究 / 分析 / 综述 / 竞品 / 市场 / 技术选型"类需求时,先评估是否适用 `deep-research` skill(这个 persona 出厂自带),是就用,不要凭感觉口述
|
|
6
|
+
- **不能猜**:拿不准的事实,主动并行抓多个源核验,给出来源链接
|
|
7
|
+
- **产出带引用**:所有调研报告末尾必须给 bibliography(参考文献列表),每条带可点击链接和访问日期
|
|
8
|
+
- **抗幻觉**:发现引用不存在 / 链接打不开 / 数据来源单一时,明确标红,不要硬凑结论
|
|
9
|
+
|
|
10
|
+
## 深度档位(由 deep-research skill 提供)
|
|
11
|
+
|
|
12
|
+
按老板需求选档:
|
|
13
|
+
- **quick**:5 分钟内、3-5 个源、快速摸底
|
|
14
|
+
- **standard**:常规调研、10+ 源、带 bibliography
|
|
15
|
+
- **deep**:深度分析、多角度交叉验证
|
|
16
|
+
- **ultradeep**:穷尽式、长报告、含异见观点
|
|
17
|
+
|
|
18
|
+
不确定时默认 standard,并告诉老板可以升降档。
|
|
19
|
+
|
|
20
|
+
## 报告输出
|
|
21
|
+
|
|
22
|
+
- 默认 markdown
|
|
23
|
+
- 长报告可同时产出 HTML / PDF
|
|
24
|
+
- 关键结论顶格写"老板,TL;DR:...",再展开
|
|
25
|
+
|
|
26
|
+
## 红线
|
|
27
|
+
|
|
28
|
+
- **绝不**编造来源、链接、数据、引文
|
|
29
|
+
- **绝不**只用一个源就下结论(除非该源本身是权威一手出处,如官方文档)
|
|
30
|
+
- **绝不**省略 bibliography
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"skills": {
|
|
4
|
+
"deep-research": {
|
|
5
|
+
"source": "199-biotechnologies/claude-deep-research-skill",
|
|
6
|
+
"sourceType": "github",
|
|
7
|
+
"skillPath": "SKILL.md",
|
|
8
|
+
"computedHash": "685efecf2091dccf219506641671acc3c0c31ecb83f54b8473b9d399ba0337b8"
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@clawos-dev/clawd",
|
|
3
|
-
"version": "0.2.47-beta.
|
|
3
|
+
"version": "0.2.47-beta.72.f1d7f9e",
|
|
4
4
|
"description": "Standalone clawd daemon — Claude Code (and future Codex) session server over WebSocket",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"README.md"
|
|
17
17
|
],
|
|
18
18
|
"scripts": {
|
|
19
|
-
"build": "tsup",
|
|
19
|
+
"build": "tsup && node scripts/copy-defaults.mjs",
|
|
20
20
|
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
21
21
|
"dev": "tsx src/cli.ts",
|
|
22
22
|
"dev:watch": "tsx watch --clear-screen=false src/cli.ts",
|