@3-/aiapi 0.1.76 → 0.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/check/GEN.js +41 -0
- package/check/SEARCH.js +57 -0
- package/fmt.js +7 -2
- package/fmtJson.js +4 -9
- package/fmtJsonMd.js +5 -3
- package/gemini.js +4 -1
- package/package.json +1 -1
- package/refmt.js +2 -0
- package/researchFmt.js +49 -0
- package/seg.js +7 -6
- package/check/GEN_QA.js +0 -41
- package/check/STEP_2_RETRIEVAL_ROUTER.js +0 -39
- package/check/STEP_3_AGENT_PLANNING.js +0 -57
- package/check/STEP_4_EVIDENCE_CHECK.js +0 -54
- package/check/STEP_5_ADVERSARIAL_REVIEW.js +0 -59
- package/factCheck.js +0 -189
- package/fmtWithFactCheck.js +0 -32
package/check/GEN.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import TYPE from '../TYPE.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
type: TYPE.ARRAY,
|
|
5
|
+
description: '基于对话分析生成的、可进行外部搜索验证的投资洞察列表(请把公司提及的个案数据,转为对行业搜索研究的命题)',
|
|
6
|
+
minItems: 0,
|
|
7
|
+
items: {
|
|
8
|
+
type: TYPE.OBJECT,
|
|
9
|
+
properties: {
|
|
10
|
+
title: {
|
|
11
|
+
type: TYPE.STRING,
|
|
12
|
+
description: '简短的标题,概述研究内容'
|
|
13
|
+
},
|
|
14
|
+
question: {
|
|
15
|
+
type: TYPE.STRING,
|
|
16
|
+
description: '从对话中提炼出的、具有明确事实属性且可通过外部搜索验证的关键行业洞察,是可以被证明或者证伪的命题。请避免无法验证的主观形容词。'
|
|
17
|
+
},
|
|
18
|
+
zh: {
|
|
19
|
+
type: TYPE.ARRAY,
|
|
20
|
+
description: '用于验证该洞察真伪的搜索关键词列表(中文)',
|
|
21
|
+
minItems: 0,
|
|
22
|
+
items: {
|
|
23
|
+
type: TYPE.STRING
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
en: {
|
|
27
|
+
type: TYPE.ARRAY,
|
|
28
|
+
description: '用于验证该洞察真伪的搜索关键词列表(英文)',
|
|
29
|
+
minItems: 0,
|
|
30
|
+
items: {
|
|
31
|
+
type: TYPE.STRING
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
reason: {
|
|
35
|
+
type: TYPE.STRING,
|
|
36
|
+
description: '分析为何搜索研究该洞察对判断决策至关重要'
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
required: ['title', 'question', 'zh', 'en', 'reason']
|
|
40
|
+
}
|
|
41
|
+
};
|
package/check/SEARCH.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import TYPE from '../TYPE.js';
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
type: TYPE.OBJECT,
|
|
5
|
+
properties: {
|
|
6
|
+
研究: {
|
|
7
|
+
type: TYPE.ARRAY,
|
|
8
|
+
minItems: 1,
|
|
9
|
+
description: `请执行以下结构化的研究报告撰写任务:
|
|
10
|
+
1. 章节拆分规划:首先,请根据研究主题的内在逻辑,将报告内容拆分为3-5个主要章节,每个章节需有明确的主题和研究范围。
|
|
11
|
+
2. 研究过程汇报:其次,详细记录并汇报研究实施的完整过程,包括但不限于:研究方法选择、数据收集途径、分析工具使用、关键节点时间线等。
|
|
12
|
+
3. 资料汇总整理:然后,在最后一章中,基于汇总的资料进行严谨的逻辑推理,分析数据间的关联性和规律,最终得出明确、有依据的研究结论。`,
|
|
13
|
+
items: {
|
|
14
|
+
type: TYPE.OBJECT,
|
|
15
|
+
required: ['title', 'md'],
|
|
16
|
+
properties: {
|
|
17
|
+
title: {
|
|
18
|
+
type: TYPE.STRING,
|
|
19
|
+
description: '章节标题'
|
|
20
|
+
},
|
|
21
|
+
md: {
|
|
22
|
+
type: TYPE.STRING,
|
|
23
|
+
description: '章节正文,markdown格式(禁止包含标题)'
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
结论: {
|
|
29
|
+
type: TYPE.STRING,
|
|
30
|
+
description: '一句话概述研究结论(支持还是不支持命题)'
|
|
31
|
+
},
|
|
32
|
+
文献: {
|
|
33
|
+
type: TYPE.ARRAY,
|
|
34
|
+
description: '参考资料',
|
|
35
|
+
minItems: 1,
|
|
36
|
+
items: {
|
|
37
|
+
type: TYPE.OBJECT,
|
|
38
|
+
required: ['title', 'brief', 'url'],
|
|
39
|
+
properties: {
|
|
40
|
+
title: {
|
|
41
|
+
type: TYPE.STRING,
|
|
42
|
+
description: '标题'
|
|
43
|
+
},
|
|
44
|
+
brief: {
|
|
45
|
+
type: TYPE.STRING,
|
|
46
|
+
description: '概述文章内容'
|
|
47
|
+
},
|
|
48
|
+
url: {
|
|
49
|
+
type: TYPE.STRING,
|
|
50
|
+
description: '链接'
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
required: ['结论', '研究', "文献"]
|
|
57
|
+
};
|
package/fmt.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
2
|
import fmtJson from './fmtJson.js';
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
import partition from './partition.js';
|
|
5
5
|
|
|
6
6
|
export default async(chat, txt) => {
|
|
7
|
-
|
|
7
|
+
var pli;
|
|
8
|
+
pli = (await partition(chat, txt));
|
|
9
|
+
if (!pli.length) {
|
|
10
|
+
return '';
|
|
11
|
+
}
|
|
12
|
+
return fmtJson(chat, pli);
|
|
8
13
|
};
|
package/fmtJson.js
CHANGED
|
@@ -1,16 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
2
|
import fmtSeg from './fmtSeg.js';
|
|
3
3
|
|
|
4
|
-
import
|
|
4
|
+
import fmtJsonMd from './fmtJsonMd.js';
|
|
5
5
|
|
|
6
|
-
export default async(chat,
|
|
7
|
-
|
|
8
|
-
pli = (await partition(chat, txt));
|
|
9
|
-
if (!pli.length) {
|
|
10
|
-
return [];
|
|
11
|
-
}
|
|
12
|
-
return Promise.all(pli.map(async([title, li]) => {
|
|
6
|
+
export default async(chat, pli) => {
|
|
7
|
+
return fmtJsonMd((await Promise.all(pli.map(async([title, li]) => {
|
|
13
8
|
console.log('\n---\n→ ' + title + '\n' + li.join('\n') + '\n---\n');
|
|
14
9
|
return [title, (await fmtSeg(chat, li.join('\n')))];
|
|
15
|
-
}));
|
|
10
|
+
}))));
|
|
16
11
|
};
|
package/fmtJsonMd.js
CHANGED
|
@@ -3,6 +3,8 @@ var txtFmt;
|
|
|
3
3
|
|
|
4
4
|
import TxtLi from '@3-/txt_li';
|
|
5
5
|
|
|
6
|
+
import refmt from './refmt.js';
|
|
7
|
+
|
|
6
8
|
txtFmt = (txt) => {
|
|
7
9
|
return TxtLi(txt).join('\n\n');
|
|
8
10
|
};
|
|
@@ -12,15 +14,15 @@ export default (title_json_li) => {
|
|
|
12
14
|
md_li = [];
|
|
13
15
|
for (x of title_json_li) {
|
|
14
16
|
[title, li] = x;
|
|
15
|
-
md_li.push('
|
|
17
|
+
md_li.push('## ' + title);
|
|
16
18
|
for (y of li) {
|
|
17
19
|
({题, 问, 答} = y);
|
|
18
20
|
答 = txtFmt(答).trim();
|
|
19
21
|
if ((答.startsWith('1. ')) || 答.endsWith(':') || 答.endsWith(':')) {
|
|
20
22
|
答 = '\n' + 答;
|
|
21
23
|
}
|
|
22
|
-
md_li.push('
|
|
24
|
+
md_li.push('### ' + 题 + '\n问: ' + txtFmt(问).trimEnd() + '\n\n答: ' + 答 + '\n');
|
|
23
25
|
}
|
|
24
26
|
}
|
|
25
|
-
return md_li.join('\n');
|
|
27
|
+
return md_li.map(refmt).join('\n');
|
|
26
28
|
};
|
package/gemini.js
CHANGED
|
@@ -34,7 +34,10 @@ export default (token_li, model = 'gemini-3-flash-preview') => { // model='gemi
|
|
|
34
34
|
if (schema) {
|
|
35
35
|
body.generationConfig = {
|
|
36
36
|
responseMimeType: 'application/json',
|
|
37
|
-
responseJsonSchema: schema
|
|
37
|
+
responseJsonSchema: schema,
|
|
38
|
+
thinkingConfig: {
|
|
39
|
+
thinkingLevel: 'high'
|
|
40
|
+
}
|
|
38
41
|
};
|
|
39
42
|
}
|
|
40
43
|
if (option) {
|
package/package.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"name":"@3-/aiapi","version":"0.1.
|
|
1
|
+
{"name":"@3-/aiapi","version":"0.1.78","repository":{"type":"git","url":"git+https://atomgit.com/i18n/lib.git"},"homepage":"https://atomgit.com/i18n/lib/tree/dev/aiapi","author":"i18n.site@gmail.com","license":"MulanPSL-2.0","exports":{".":"./lib.js","./*":"./*"},"files":["./*"],"devDependencies":{"@3-/read":"^0.1.4"},"scripts":{},"type":"module","dependencies":{"@3-/retry":"^0.0.2","@3-/rm_cn_space":"^0.1.1","@3-/sleep":"^0.0.4","@3-/txt_li":"^0.1.5","@3-/utf8":"^0.0.4","lodash-es":"^4.17.21"}}
|
package/refmt.js
ADDED
package/researchFmt.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
|
+
import fmtJson from './fmtJson.js';
|
|
3
|
+
|
|
4
|
+
import partition from './partition.js';
|
|
5
|
+
|
|
6
|
+
import GEN from './check/GEN.js';
|
|
7
|
+
|
|
8
|
+
import SEARCH from './check/SEARCH.js';
|
|
9
|
+
|
|
10
|
+
import refmt from './refmt.js';
|
|
11
|
+
|
|
12
|
+
export default async(chat, txt) => {
|
|
13
|
+
var fmt, gen_txt, pli, search;
|
|
14
|
+
pli = (await partition(chat, txt));
|
|
15
|
+
if (!pli.length) {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
fmt = fmtJson(chat, pli);
|
|
19
|
+
gen_txt = pli.map(([title, li]) => {
|
|
20
|
+
return '# ' + title + '\n' + li.join('\n');
|
|
21
|
+
}).join('\n');
|
|
22
|
+
search = Promise.all(((await chat(gen_txt, GEN, "你是专业的风险投资人"))).map(async(i) => {
|
|
23
|
+
var body, en, question, reason, title, zh, 文献, 研究, 结论;
|
|
24
|
+
// console.log i
|
|
25
|
+
({title, question, zh, en, reason} = i);
|
|
26
|
+
body = title + '\n研究命题: ' + question + '\n中文搜索词: ' + zh + '\n英文搜索词: ' + en;
|
|
27
|
+
console.log('\n# ' + body);
|
|
28
|
+
({结论, 研究, 文献} = (await chat('请基于搜索,对以下命题深度研究:\n' + body, SEARCH, '利用搜索工具进行客观理性的研究分析', {
|
|
29
|
+
tools: [
|
|
30
|
+
{
|
|
31
|
+
google_search: {}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
url_context: {}
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
})));
|
|
38
|
+
return '## ' + title.trim() + '\n\n**<u>问题:</u>** ' + question.trim() + '\n\n**<u>结论</u>:** ' + 结论.trim() + '\n\n' + 研究.map(({title, md}) => {
|
|
39
|
+
return '### ' + title + '\n\n' + md;
|
|
40
|
+
}).join('\n') + '\n\n### 参考资料\n\n' + 文献.map(({title, brief, url}, pos) => {
|
|
41
|
+
return (pos + 1) + '. [' + title + '](' + url + ') : ' + brief;
|
|
42
|
+
}).join('\n');
|
|
43
|
+
}));
|
|
44
|
+
[fmt, search] = (await Promise.all([fmt, search]));
|
|
45
|
+
if (search.length) {
|
|
46
|
+
fmt += '# 机器投研\n\n' + search.map(refmt).join('\n');
|
|
47
|
+
}
|
|
48
|
+
return fmt;
|
|
49
|
+
};
|
package/seg.js
CHANGED
|
@@ -4,29 +4,30 @@ import TYPE from './TYPE.js';
|
|
|
4
4
|
export default async(chat, txt_li) => {
|
|
5
5
|
var split_li, 提示词;
|
|
6
6
|
提示词 = `下文第1列为行号,第2列为对话内容,以tab分隔。
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
请划分章节,每章不要太长也不要太短,千字左右一章。
|
|
8
|
+
输出章节标题、每章最后一句号的行号。
|
|
9
|
+
输出格式为json数组:\n` + txt_li.map((i, pos) => {
|
|
9
10
|
return (pos + 1) + '\t' + i.trim();
|
|
10
11
|
}).join('\n');
|
|
11
12
|
split_li = (await chat(提示词, {
|
|
12
13
|
type: TYPE.ARRAY,
|
|
13
|
-
description: '
|
|
14
|
+
description: '拆分文章为多个章节,每个章节包含一系列问答对',
|
|
14
15
|
minItems: 1,
|
|
15
16
|
items: {
|
|
16
17
|
type: TYPE.OBJECT,
|
|
17
18
|
properties: {
|
|
18
19
|
题: {
|
|
19
|
-
description: '
|
|
20
|
+
description: '章节标题(只写标题,不写注释)',
|
|
20
21
|
type: TYPE.STRING
|
|
21
22
|
},
|
|
22
23
|
行: {
|
|
23
|
-
description: '
|
|
24
|
+
description: '该章最后一句的行号',
|
|
24
25
|
type: TYPE.INTEGER
|
|
25
26
|
}
|
|
26
27
|
},
|
|
27
28
|
required: ['题', '行']
|
|
28
29
|
}
|
|
29
|
-
}, '
|
|
30
|
+
}, '你是专业资深的杂志编辑'));
|
|
30
31
|
return split_li.map((i) => {
|
|
31
32
|
return [i.题, i.行];
|
|
32
33
|
});
|
package/check/GEN_QA.js
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import TYPE from '../TYPE.js';
|
|
2
|
-
|
|
3
|
-
export default {
|
|
4
|
-
type: TYPE.ARRAY,
|
|
5
|
-
description: '事实提取与命题化。将自然语言主张拆解为可独立验证的布尔命题元组,为后续网络搜索查证做准备(如果觉得不太可被搜索验证,不要列出来),如果事实不重要,不要列出来',
|
|
6
|
-
minItems: 0,
|
|
7
|
-
items: {
|
|
8
|
-
type: TYPE.OBJECT,
|
|
9
|
-
properties: {
|
|
10
|
-
原句: {
|
|
11
|
-
type: TYPE.STRING,
|
|
12
|
-
description: '来源的原始文本片段(去除语气助词、优化语言转录的可读性)'
|
|
13
|
-
},
|
|
14
|
-
实体: {
|
|
15
|
-
type: TYPE.STRING,
|
|
16
|
-
description: '命题的主体,必须是一个可以被明确搜索的关键词'
|
|
17
|
-
},
|
|
18
|
-
宾语: {
|
|
19
|
-
type: TYPE.STRING,
|
|
20
|
-
description: '核心核查点,具体的金额、具体的份额等'
|
|
21
|
-
},
|
|
22
|
-
命题: {
|
|
23
|
-
type: TYPE.STRING,
|
|
24
|
-
description: '完整的陈述句,必须是一个可以独立判定真伪的布尔命题'
|
|
25
|
-
},
|
|
26
|
-
如何搜索: {
|
|
27
|
-
type: TYPE.STRING,
|
|
28
|
-
description: '如何通过谷歌搜索查证此命题的真伪'
|
|
29
|
-
},
|
|
30
|
-
重要度: {
|
|
31
|
-
type: TYPE.INTEGER,
|
|
32
|
-
description: '0-10分,命题对于决策判断重要程度,分数越高越重要。有具体数字的内容,可以被搜索到的内容,会比较重要。竞争对手是谁创办的这种不重要。'
|
|
33
|
-
},
|
|
34
|
-
可查性: {
|
|
35
|
-
type: TYPE.INTEGER,
|
|
36
|
-
description: '0-10分,命题是否可以通过互联网搜索验证,分数越高代表容易被搜索查证。0表示是公司内部数据,无法搜索被验证'
|
|
37
|
-
}
|
|
38
|
-
},
|
|
39
|
-
required: ['实体', '宾语', '命题', '如何搜索', '原句', '可查性']
|
|
40
|
-
}
|
|
41
|
-
};
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import TYPE from '../TYPE.js';
|
|
2
|
-
|
|
3
|
-
export default {
|
|
4
|
-
type: TYPE.ARRAY,
|
|
5
|
-
description: '第二阶段:检索可行性判断 (Self-RAG 滤网)。对每个命题进行检索必要性打分和隐私判定。',
|
|
6
|
-
minItems: 1,
|
|
7
|
-
items: {
|
|
8
|
-
type: TYPE.OBJECT,
|
|
9
|
-
properties: {
|
|
10
|
-
对应命题ID: {
|
|
11
|
-
type: TYPE.STRING,
|
|
12
|
-
description: '关联的第一阶段命题ID'
|
|
13
|
-
},
|
|
14
|
-
检索必要性得分: {
|
|
15
|
-
type: TYPE.NUMBER,
|
|
16
|
-
description: '0-1之间的分数,分数越高代表越需要检索'
|
|
17
|
-
},
|
|
18
|
-
隐私边界: {
|
|
19
|
-
type: TYPE.STRING,
|
|
20
|
-
description: '判定类别:可公开检索/半公开推论/内部隐私',
|
|
21
|
-
enum: ['可公开检索', '半公开推论', '内部隐私']
|
|
22
|
-
},
|
|
23
|
-
数据类型: {
|
|
24
|
-
type: TYPE.STRING,
|
|
25
|
-
description: '数据性质:硬数据(法律/备案等)或 软数据(侧面印证/舆情)',
|
|
26
|
-
enum: ['硬数据', '软数据']
|
|
27
|
-
},
|
|
28
|
-
是否需要尽调: {
|
|
29
|
-
type: TYPE.BOOLEAN,
|
|
30
|
-
description: '对于内部隐私或不可验证的数据,标记为True'
|
|
31
|
-
},
|
|
32
|
-
推断逻辑: {
|
|
33
|
-
type: TYPE.STRING,
|
|
34
|
-
description: '简述判定的理由'
|
|
35
|
-
}
|
|
36
|
-
},
|
|
37
|
-
required: ['对应命题ID', '检索必要性得分', '隐私边界', '数据类型']
|
|
38
|
-
}
|
|
39
|
-
};
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import TYPE from '../TYPE.js';
|
|
2
|
-
|
|
3
|
-
export default {
|
|
4
|
-
type: TYPE.ARRAY,
|
|
5
|
-
description: '第三阶段:规划与工具调用 (Agentic Workflow)。将自然语言主张转化为可执行的验证程序步骤。',
|
|
6
|
-
minItems: 1,
|
|
7
|
-
items: {
|
|
8
|
-
type: TYPE.OBJECT,
|
|
9
|
-
properties: {
|
|
10
|
-
场景类型: {
|
|
11
|
-
type: TYPE.STRING,
|
|
12
|
-
description: '验证场景分类',
|
|
13
|
-
enum: ['数值验证', '逻辑验证', '负面排除', '其他']
|
|
14
|
-
},
|
|
15
|
-
目标命题ID: {
|
|
16
|
-
type: TYPE.ARRAY,
|
|
17
|
-
description: '该计划覆盖的命题ID列表',
|
|
18
|
-
items: {
|
|
19
|
-
type: TYPE.STRING
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
SOTA策略: {
|
|
23
|
-
type: TYPE.STRING,
|
|
24
|
-
description: '使用的策略名称,如 HyDE, ReAct, Query Expansion'
|
|
25
|
-
},
|
|
26
|
-
执行步骤: {
|
|
27
|
-
type: TYPE.ARRAY,
|
|
28
|
-
description: '具体的每一步执行动作',
|
|
29
|
-
items: {
|
|
30
|
-
type: TYPE.OBJECT,
|
|
31
|
-
properties: {
|
|
32
|
-
步骤序号: {
|
|
33
|
-
type: TYPE.NUMBER
|
|
34
|
-
},
|
|
35
|
-
步骤描述: {
|
|
36
|
-
type: TYPE.STRING,
|
|
37
|
-
description: '自然语言描述该步骤要做什么'
|
|
38
|
-
},
|
|
39
|
-
搜索关键词: {
|
|
40
|
-
type: TYPE.ARRAY,
|
|
41
|
-
description: '生成的搜索Query列表',
|
|
42
|
-
items: {
|
|
43
|
-
type: TYPE.STRING
|
|
44
|
-
}
|
|
45
|
-
},
|
|
46
|
-
预期动作: {
|
|
47
|
-
type: TYPE.STRING,
|
|
48
|
-
description: '执行的具体操作,如:提取Top5、对比数据、向量匹配'
|
|
49
|
-
}
|
|
50
|
-
},
|
|
51
|
-
required: ['步骤序号', '步骤描述', '搜索关键词']
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
},
|
|
55
|
-
required: ['场景类型', 'SOTA策略', '执行步骤']
|
|
56
|
-
}
|
|
57
|
-
};
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import TYPE from '../TYPE.js';
|
|
2
|
-
|
|
3
|
-
export default {
|
|
4
|
-
type: TYPE.ARRAY,
|
|
5
|
-
description: '第四阶段:多源证据推理 (Multi-Hop Reasoning)。进行证据一致性校验与矛盾检测。',
|
|
6
|
-
minItems: 1,
|
|
7
|
-
items: {
|
|
8
|
-
type: TYPE.OBJECT,
|
|
9
|
-
properties: {
|
|
10
|
-
对应命题ID: {
|
|
11
|
-
type: TYPE.STRING
|
|
12
|
-
},
|
|
13
|
-
证据三角化状态: {
|
|
14
|
-
type: TYPE.STRING,
|
|
15
|
-
description: '证据的支持情况',
|
|
16
|
-
enum: ['直接支持', '直接反驳', '时间线矛盾', '数量级偏差', '证据缺失']
|
|
17
|
-
},
|
|
18
|
-
证据来源: {
|
|
19
|
-
type: TYPE.ARRAY,
|
|
20
|
-
description: '引用的具体证据源',
|
|
21
|
-
items: {
|
|
22
|
-
type: TYPE.OBJECT,
|
|
23
|
-
properties: {
|
|
24
|
-
来源名称: {
|
|
25
|
-
type: TYPE.STRING,
|
|
26
|
-
description: '来源标题或网站名'
|
|
27
|
-
},
|
|
28
|
-
链接: {
|
|
29
|
-
type: TYPE.STRING,
|
|
30
|
-
description: 'URL链接'
|
|
31
|
-
},
|
|
32
|
-
引文片段: {
|
|
33
|
-
type: TYPE.STRING,
|
|
34
|
-
description: '相关的原文片段'
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
},
|
|
39
|
-
判定详情: {
|
|
40
|
-
type: TYPE.STRING,
|
|
41
|
-
description: '具体的判定理由,解释为何支持或反驳'
|
|
42
|
-
},
|
|
43
|
-
时间线矛盾: {
|
|
44
|
-
type: TYPE.BOOLEAN,
|
|
45
|
-
description: '是否存在时间逻辑上的矛盾(如:成立前就拿订单)'
|
|
46
|
-
},
|
|
47
|
-
数量级偏差: {
|
|
48
|
-
type: TYPE.BOOLEAN,
|
|
49
|
-
description: '是否存在数量级或定义的营销性夸大'
|
|
50
|
-
}
|
|
51
|
-
},
|
|
52
|
-
required: ['对应命题ID', '证据三角化状态', '判定详情']
|
|
53
|
-
}
|
|
54
|
-
};
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import TYPE from '../TYPE.js';
|
|
2
|
-
|
|
3
|
-
export default {
|
|
4
|
-
type: TYPE.ARRAY,
|
|
5
|
-
description: '第五阶段:合成与对抗 (Adversarial Review)。引入红蓝军对抗,输出最终结论。',
|
|
6
|
-
minItems: 1,
|
|
7
|
-
items: {
|
|
8
|
-
type: TYPE.OBJECT,
|
|
9
|
-
properties: {
|
|
10
|
-
对抗辩论: {
|
|
11
|
-
type: TYPE.OBJECT,
|
|
12
|
-
description: '红蓝军自我博弈记录',
|
|
13
|
-
properties: {
|
|
14
|
-
律师陈述: {
|
|
15
|
-
type: TYPE.STRING,
|
|
16
|
-
description: 'Agent A (律师) 辩护词:尽力为创作者说法辩护,寻找合理解释'
|
|
17
|
-
},
|
|
18
|
-
检察官陈述: {
|
|
19
|
-
type: TYPE.STRING,
|
|
20
|
-
description: 'Agent B (检察官) 控诉词:尽力寻找漏洞,攻击其真实性'
|
|
21
|
-
}
|
|
22
|
-
},
|
|
23
|
-
required: ['律师陈述', '检察官陈述']
|
|
24
|
-
},
|
|
25
|
-
法官判决: {
|
|
26
|
-
type: TYPE.OBJECT,
|
|
27
|
-
description: 'Agent C (法官) 的最终裁决',
|
|
28
|
-
properties: {
|
|
29
|
-
可信度评分: {
|
|
30
|
-
type: TYPE.NUMBER,
|
|
31
|
-
description: '0-100分,分数越高越可信'
|
|
32
|
-
},
|
|
33
|
-
风险警示灯: {
|
|
34
|
-
type: TYPE.STRING,
|
|
35
|
-
description: '直观的风险等级',
|
|
36
|
-
enum: ['绿色-可靠', '黄色-警示', '红色-造假风险']
|
|
37
|
-
},
|
|
38
|
-
风险提示报告: {
|
|
39
|
-
type: TYPE.STRING,
|
|
40
|
-
description: '详细的风险点分析'
|
|
41
|
-
},
|
|
42
|
-
最终结论: {
|
|
43
|
-
type: TYPE.STRING,
|
|
44
|
-
description: '综合结论摘要'
|
|
45
|
-
},
|
|
46
|
-
建议追问: {
|
|
47
|
-
type: TYPE.ARRAY,
|
|
48
|
-
description: '建议投资经理进一步核实的问题清单',
|
|
49
|
-
items: {
|
|
50
|
-
type: TYPE.STRING
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
},
|
|
54
|
-
required: ['可信度评分', '风险警示灯', '最终结论']
|
|
55
|
-
}
|
|
56
|
-
},
|
|
57
|
-
required: ['对抗辩论', '法官判决']
|
|
58
|
-
}
|
|
59
|
-
};
|
package/factCheck.js
DELETED
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
|
-
var PREFIX, SHOW_KIND, addUrl, circle, realUrl, txtLi, urlLi;
|
|
3
|
-
|
|
4
|
-
import retry from '@3-/retry';
|
|
5
|
-
|
|
6
|
-
import utf8d from '@3-/utf8/utf8d.js';
|
|
7
|
-
|
|
8
|
-
import utf8e from '@3-/utf8/utf8e.js';
|
|
9
|
-
|
|
10
|
-
// @3-/read
|
|
11
|
-
// @3-/write
|
|
12
|
-
SHOW_KIND = ['严重失实', '不正确'];
|
|
13
|
-
|
|
14
|
-
PREFIX = `对下文问答中提及的事实做核查(不核查观点)。
|
|
15
|
-
数据偏差不超过30%都当做基本一致,找不到信息当无法核实。
|
|
16
|
-
必须搜索到明确的证伪证据并才算失实。
|
|
17
|
-
只输出不正确/严重失实的核查。
|
|
18
|
-
输出文本必须是简体中文,格式如下:
|
|
19
|
-
- id:
|
|
20
|
-
标题: 要简短,几个词即可
|
|
21
|
-
观点:
|
|
22
|
-
事实: 须标注搜索到的事实网页, 完整描述观点错误所在(不引用其他核查)
|
|
23
|
-
失实度: ${SHOW_KIND.join(' / ')} / 基本一致 / 无法核实
|
|
24
|
-
---\n`;
|
|
25
|
-
|
|
26
|
-
realUrl = async(url) => {
|
|
27
|
-
var err, r, status;
|
|
28
|
-
if (url.startsWith('http')) {
|
|
29
|
-
try {
|
|
30
|
-
r = (await fetch(url, {
|
|
31
|
-
method: 'HEAD',
|
|
32
|
-
redirect: 'follow'
|
|
33
|
-
}));
|
|
34
|
-
({status} = r);
|
|
35
|
-
if (status === 200) {
|
|
36
|
-
({url} = r);
|
|
37
|
-
} else {
|
|
38
|
-
console.error(status + ' :' + url);
|
|
39
|
-
}
|
|
40
|
-
} catch (error) {
|
|
41
|
-
err = error;
|
|
42
|
-
console.error(err);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
return url;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
circle = (num) => {
|
|
49
|
-
switch (false) {
|
|
50
|
-
case !(num > 0 && num <= 10):
|
|
51
|
-
return String.fromCodePoint(0x245F + num);
|
|
52
|
-
default:
|
|
53
|
-
return `[${num}]`;
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
urlLi = (id_li, url_li) => {
|
|
58
|
-
var i, r;
|
|
59
|
-
r = [];
|
|
60
|
-
for (i of id_li) {
|
|
61
|
-
r.push('[' + circle(i + 1) + '](' + url_li[i] + ')');
|
|
62
|
-
}
|
|
63
|
-
return r.join('');
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
addUrl = (text, items) => {
|
|
67
|
-
var bin, chunks, end, entry, j, len, output, pos, push, sorted, start;
|
|
68
|
-
output = '';
|
|
69
|
-
push = (bin) => {
|
|
70
|
-
output += utf8d(bin);
|
|
71
|
-
};
|
|
72
|
-
pos = 0;
|
|
73
|
-
sorted = items.sort(function(a, b) {
|
|
74
|
-
return a.segment.startIndex - b.segment.startIndex;
|
|
75
|
-
});
|
|
76
|
-
bin = utf8e(text);
|
|
77
|
-
for (j = 0, len = sorted.length; j < len; j++) {
|
|
78
|
-
entry = sorted[j];
|
|
79
|
-
({
|
|
80
|
-
segment: {
|
|
81
|
-
startIndex: start,
|
|
82
|
-
endIndex: end
|
|
83
|
-
},
|
|
84
|
-
groundingChunkIndices: chunks
|
|
85
|
-
} = entry);
|
|
86
|
-
if (start > pos) {
|
|
87
|
-
push(bin.slice(pos, start));
|
|
88
|
-
}
|
|
89
|
-
push(bin.slice(start, end));
|
|
90
|
-
output += chunks;
|
|
91
|
-
pos = end;
|
|
92
|
-
}
|
|
93
|
-
if (pos < bin.length) {
|
|
94
|
-
push(bin.slice(pos));
|
|
95
|
-
}
|
|
96
|
-
return output;
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
txtLi = (txt) => {
|
|
100
|
-
var all_tag, i, id, pre, r, reset, t, tag, trim, txt_li;
|
|
101
|
-
r = [];
|
|
102
|
-
reset = () => {
|
|
103
|
-
all_tag = new Set(['标题', '观点', '事实', '失实度']);
|
|
104
|
-
if (t) {
|
|
105
|
-
r.push(t);
|
|
106
|
-
}
|
|
107
|
-
pre = t = void 0;
|
|
108
|
-
};
|
|
109
|
-
txt_li = txt.replaceAll('**', '').split('\n');
|
|
110
|
-
out: //;
|
|
111
|
-
for (i of txt_li) {
|
|
112
|
-
if (i.startsWith('- id:')) {
|
|
113
|
-
reset();
|
|
114
|
-
id = parseInt(i.slice(5).trim());
|
|
115
|
-
if (id) {
|
|
116
|
-
t = {id};
|
|
117
|
-
}
|
|
118
|
-
} else if (t) {
|
|
119
|
-
trim = i.replace('- ', '').trimStart();
|
|
120
|
-
for (tag of all_tag) {
|
|
121
|
-
if (trim.startsWith(tag + ':')) {
|
|
122
|
-
all_tag.delete(tag);
|
|
123
|
-
pre = tag;
|
|
124
|
-
t[tag] = trim.slice(tag.length + 1).trim();
|
|
125
|
-
if (tag === '失实度' && all_tag.size === 0) {
|
|
126
|
-
reset();
|
|
127
|
-
}
|
|
128
|
-
continue out;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
if (pre && trim) {
|
|
132
|
-
t[pre] += '\n' + trim;
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
if (t) {
|
|
137
|
-
r.push(t);
|
|
138
|
-
}
|
|
139
|
-
return r;
|
|
140
|
-
};
|
|
141
|
-
|
|
142
|
-
export default retry(async(chat, li) => {
|
|
143
|
-
var content, groundingChunks, groundingSupports, i, map, msg, out, ref, ref1, uri, url_li, x;
|
|
144
|
-
msg = PREFIX + li.map((i, pos) => {
|
|
145
|
-
return 'id:' + (pos + 1) + '\n' + i;
|
|
146
|
-
}).join('\n---\n');
|
|
147
|
-
out = (await chat(msg, 0, 0, {
|
|
148
|
-
tools: {
|
|
149
|
-
google_search: {}
|
|
150
|
-
}
|
|
151
|
-
}));
|
|
152
|
-
({
|
|
153
|
-
// write '/tmp/fact.json',JSON.stringify out
|
|
154
|
-
// out = JSON.parse read '/tmp/fact.json'
|
|
155
|
-
content,
|
|
156
|
-
groundingMetadata: {groundingChunks, groundingSupports}
|
|
157
|
-
} = out);
|
|
158
|
-
if (!groundingChunks) {
|
|
159
|
-
return new Map();
|
|
160
|
-
}
|
|
161
|
-
content = content.parts[0].text;
|
|
162
|
-
console.log(content);
|
|
163
|
-
url_li = [];
|
|
164
|
-
for (x of groundingChunks) {
|
|
165
|
-
({
|
|
166
|
-
web: {uri}
|
|
167
|
-
} = x);
|
|
168
|
-
url_li.push(realUrl(uri));
|
|
169
|
-
}
|
|
170
|
-
url_li = (await Promise.all(url_li));
|
|
171
|
-
for (i of groundingSupports) {
|
|
172
|
-
i.groundingChunkIndices = urlLi(i.groundingChunkIndices, url_li);
|
|
173
|
-
}
|
|
174
|
-
map = new Map();
|
|
175
|
-
ref = txtLi(addUrl(content, groundingSupports));
|
|
176
|
-
for (i of ref) {
|
|
177
|
-
console.log(i);
|
|
178
|
-
if (!SHOW_KIND.includes((ref1 = i.失实度) != null ? ref1.split('[')[0] : void 0)) {
|
|
179
|
-
continue;
|
|
180
|
-
}
|
|
181
|
-
li = map.get(i.id);
|
|
182
|
-
if (!li) {
|
|
183
|
-
li = [];
|
|
184
|
-
map.set(i.id, li);
|
|
185
|
-
}
|
|
186
|
-
li.push(i);
|
|
187
|
-
}
|
|
188
|
-
return map;
|
|
189
|
-
});
|
package/fmtWithFactCheck.js
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
|
-
import fmtSeg from './fmtSeg.js';
|
|
3
|
-
|
|
4
|
-
import partition from './partition.js';
|
|
5
|
-
|
|
6
|
-
import GEN_QA from './check/GEN_QA.js';
|
|
7
|
-
|
|
8
|
-
export default async(chat, txt) => {
|
|
9
|
-
var pli;
|
|
10
|
-
pli = (await partition(chat, txt));
|
|
11
|
-
if (!pli.length) {
|
|
12
|
-
return [];
|
|
13
|
-
}
|
|
14
|
-
return (await Promise.all(pli.map(async([title, li]) => {
|
|
15
|
-
var i, ref, t;
|
|
16
|
-
t = li.join('\n');
|
|
17
|
-
console.log('\n---\n→ ' + title + '\n' + t + '\n---\n');
|
|
18
|
-
ref = (await chat(t, GEN_QA, "请帮忙筛选出可以被搜索验证、影响决策的核心问题。如果是公司内部的数据、资料,不要列成搜索问题。如果搜索主体的名字不明确,不要列成问题。"));
|
|
19
|
-
for (i of ref) {
|
|
20
|
-
if (i.可查性 > 6 && i.重要度 > 6) {
|
|
21
|
-
console.log(i);
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
})));
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
// return Promise.all
|
|
28
|
-
// console.log '\n---\n→ '+title+'\n'+li.join('\n')+'\n---\n'
|
|
29
|
-
// [
|
|
30
|
-
// title
|
|
31
|
-
// await fmtSeg(chat, li.join('\n'))
|
|
32
|
-
// ]
|