@3-/aiapi 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/factCheck.js +177 -0
- package/fmtSeg.js +3 -1
- package/fmtWithFactCheck.js +55 -0
- package/gemini.js +21 -14
- package/package.json +4 -2
package/factCheck.js
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
|
+
var PREFIX, SHOW_KIND, addUrl, circle, realUrl, txtLi, urlLi;
|
|
3
|
+
|
|
4
|
+
import retry from '@3-/retry';
|
|
5
|
+
|
|
6
|
+
import utf8d from '@3-/utf8/utf8d.js';
|
|
7
|
+
|
|
8
|
+
import utf8e from '@3-/utf8/utf8e.js';
|
|
9
|
+
|
|
10
|
+
// @3-/read
|
|
11
|
+
// @3-/write
|
|
12
|
+
SHOW_KIND = ['严重失实'];
|
|
13
|
+
|
|
14
|
+
PREFIX = `对下文问答对做事实核查,只输出与事实不符的观点。
|
|
15
|
+
必须搜索到明确的反面事实才算失实,模棱两可算无法核实。
|
|
16
|
+
输出文本必须是简体中文,格式如下:
|
|
17
|
+
- id:
|
|
18
|
+
标题: 要简短,一两个词即可
|
|
19
|
+
观点:
|
|
20
|
+
事实: 须标注搜索到的事实网页
|
|
21
|
+
失实度: ${SHOW_KIND.join(' / ')} / 略有出入 / 基本一致 / 无法核实
|
|
22
|
+
---\n`;
|
|
23
|
+
|
|
24
|
+
realUrl = async(url) => {
|
|
25
|
+
var err, r, status;
|
|
26
|
+
if (url.startsWith('http')) {
|
|
27
|
+
try {
|
|
28
|
+
r = (await fetch(url, {
|
|
29
|
+
method: 'HEAD',
|
|
30
|
+
redirect: 'follow'
|
|
31
|
+
}));
|
|
32
|
+
({status} = r);
|
|
33
|
+
if (status === 200) {
|
|
34
|
+
({url} = r);
|
|
35
|
+
} else {
|
|
36
|
+
console.error(status + ' :' + url);
|
|
37
|
+
}
|
|
38
|
+
} catch (error) {
|
|
39
|
+
err = error;
|
|
40
|
+
console.error(err);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return url;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
circle = (num) => {
|
|
47
|
+
switch (false) {
|
|
48
|
+
case !(num > 0 && num <= 10):
|
|
49
|
+
return String.fromCodePoint(0x245F + num);
|
|
50
|
+
default:
|
|
51
|
+
return `[${num}]`;
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
urlLi = (id_li, url_li) => {
|
|
56
|
+
var i, r;
|
|
57
|
+
r = [];
|
|
58
|
+
for (i of id_li) {
|
|
59
|
+
r.push('[' + circle(i + 1) + '](' + url_li[i] + ')');
|
|
60
|
+
}
|
|
61
|
+
return r.join('');
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
addUrl = (text, items) => {
|
|
65
|
+
var bin, chunks, end, entry, j, len, output, pos, push, sorted, start;
|
|
66
|
+
output = '';
|
|
67
|
+
push = (bin) => {
|
|
68
|
+
output += utf8d(bin);
|
|
69
|
+
};
|
|
70
|
+
pos = 0;
|
|
71
|
+
sorted = items.sort(function(a, b) {
|
|
72
|
+
return a.segment.startIndex - b.segment.startIndex;
|
|
73
|
+
});
|
|
74
|
+
bin = utf8e(text);
|
|
75
|
+
for (j = 0, len = sorted.length; j < len; j++) {
|
|
76
|
+
entry = sorted[j];
|
|
77
|
+
({
|
|
78
|
+
segment: {
|
|
79
|
+
startIndex: start,
|
|
80
|
+
endIndex: end
|
|
81
|
+
},
|
|
82
|
+
groundingChunkIndices: chunks
|
|
83
|
+
} = entry);
|
|
84
|
+
if (start > pos) {
|
|
85
|
+
push(bin.slice(pos, start));
|
|
86
|
+
}
|
|
87
|
+
push(bin.slice(start, end));
|
|
88
|
+
output += chunks;
|
|
89
|
+
pos = end;
|
|
90
|
+
}
|
|
91
|
+
if (pos < bin.length) {
|
|
92
|
+
push(bin.slice(pos));
|
|
93
|
+
}
|
|
94
|
+
return output;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
txtLi = (txt) => {
|
|
98
|
+
var i, id, msg, pos, r, ref, t, tag, trim;
|
|
99
|
+
r = [];
|
|
100
|
+
ref = txt.replaceAll('**', '').split('\n');
|
|
101
|
+
for (i of ref) {
|
|
102
|
+
if (i.startsWith('- id:')) {
|
|
103
|
+
if (t) {
|
|
104
|
+
r.push(t);
|
|
105
|
+
}
|
|
106
|
+
id = parseInt(i.slice(5).trim());
|
|
107
|
+
if (id) {
|
|
108
|
+
t = {id};
|
|
109
|
+
}
|
|
110
|
+
} else if (t) {
|
|
111
|
+
trim = i.trimStart();
|
|
112
|
+
if (trim.length === i) {
|
|
113
|
+
r.push(t);
|
|
114
|
+
t = 0;
|
|
115
|
+
} else {
|
|
116
|
+
pos = trim.indexOf(':');
|
|
117
|
+
if (pos > 0) {
|
|
118
|
+
tag = trim.slice(0, pos).replace('- ', '').trim();
|
|
119
|
+
msg = trim.slice(pos + 1).trim();
|
|
120
|
+
t[tag] = msg;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (t) {
|
|
126
|
+
r.push(t);
|
|
127
|
+
}
|
|
128
|
+
return r;
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
export default retry(async(chat, li) => {
|
|
132
|
+
var content, groundingChunks, groundingSupports, i, map, msg, out, ref, ref1, uri, url_li, x;
|
|
133
|
+
msg = PREFIX + li.map((i, pos) => {
|
|
134
|
+
return 'id:' + (pos + 1) + '\n' + i;
|
|
135
|
+
}).join('\n---\n');
|
|
136
|
+
out = (await chat(msg, 0, 0, {
|
|
137
|
+
tools: {
|
|
138
|
+
google_search: {}
|
|
139
|
+
}
|
|
140
|
+
}));
|
|
141
|
+
({
|
|
142
|
+
// write '/tmp/fact.json',JSON.stringify out
|
|
143
|
+
// out = JSON.parse read '/tmp/fact.json'
|
|
144
|
+
content,
|
|
145
|
+
groundingMetadata: {groundingChunks, groundingSupports}
|
|
146
|
+
} = out);
|
|
147
|
+
if (!groundingChunks) {
|
|
148
|
+
return new Map();
|
|
149
|
+
}
|
|
150
|
+
content = content.parts[0].text;
|
|
151
|
+
url_li = [];
|
|
152
|
+
for (x of groundingChunks) {
|
|
153
|
+
({
|
|
154
|
+
web: {uri}
|
|
155
|
+
} = x);
|
|
156
|
+
url_li.push(realUrl(uri));
|
|
157
|
+
}
|
|
158
|
+
url_li = (await Promise.all(url_li));
|
|
159
|
+
for (i of groundingSupports) {
|
|
160
|
+
i.groundingChunkIndices = urlLi(i.groundingChunkIndices, url_li);
|
|
161
|
+
}
|
|
162
|
+
map = new Map();
|
|
163
|
+
ref = txtLi(addUrl(content, groundingSupports));
|
|
164
|
+
for (i of ref) {
|
|
165
|
+
console.log(i);
|
|
166
|
+
if (!SHOW_KIND.includes((ref1 = i.失实度) != null ? ref1.split('[')[0] : void 0)) {
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
li = map.get(i.id);
|
|
170
|
+
if (!li) {
|
|
171
|
+
li = [];
|
|
172
|
+
map.set(i.id, li);
|
|
173
|
+
}
|
|
174
|
+
li.push(i);
|
|
175
|
+
}
|
|
176
|
+
return map;
|
|
177
|
+
});
|
package/fmtSeg.js
CHANGED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
|
+
var check;
|
|
3
|
+
|
|
4
|
+
import fmtJson from './fmtJson.js';
|
|
5
|
+
|
|
6
|
+
import fmtJsonMd from './fmtJsonMd.js';
|
|
7
|
+
|
|
8
|
+
import factCheck from './factCheck.js';
|
|
9
|
+
|
|
10
|
+
import txtFmt from '@3-/txt_li/txtFmt.js';
|
|
11
|
+
|
|
12
|
+
// @3-/read
|
|
13
|
+
// @3-/write
|
|
14
|
+
check = async(chat, json) => {
|
|
15
|
+
var j, k, li, n, x, y, z, 答, 问, 问答;
|
|
16
|
+
li = [];
|
|
17
|
+
for (x of json) {
|
|
18
|
+
({问答} = x);
|
|
19
|
+
for (y of 问答) {
|
|
20
|
+
({问, 答} = y);
|
|
21
|
+
li.push(`问:${txtFmt(问)}\n答:${txtFmt(答)}`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
check = (await factCheck(chat, li));
|
|
25
|
+
n = 0;
|
|
26
|
+
for (z of json) {
|
|
27
|
+
({问答} = z);
|
|
28
|
+
for (j of 问答) {
|
|
29
|
+
li = check.get(++n);
|
|
30
|
+
if (li) {
|
|
31
|
+
for (k of li) {
|
|
32
|
+
j.答 += `\n#### ${k.失实度} : ${k.标题}
|
|
33
|
+
|
|
34
|
+
> 观点: ${k.观点}
|
|
35
|
+
>
|
|
36
|
+
> 事实:
|
|
37
|
+
>
|
|
38
|
+
> ${k.事实}`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
export default async(chat, txt) => {
|
|
46
|
+
var json_li;
|
|
47
|
+
json_li = (await fmtJson(chat, txt));
|
|
48
|
+
// write('/tmp/chat.json', JSON.stringify(json_li))
|
|
49
|
+
// json_li = JSON.parse read('/tmp/chat.json')
|
|
50
|
+
// await check(chat, json_li[0])
|
|
51
|
+
await Promise.all(json_li.map((i) => {
|
|
52
|
+
return check(chat, i);
|
|
53
|
+
}));
|
|
54
|
+
return fmtJsonMd(json_li);
|
|
55
|
+
};
|
package/gemini.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env -S node --trace-uncaught --expose-gc --unhandled-rejections=strict --experimental-wasm-modules
|
|
2
2
|
import sleep from '@3-/sleep';
|
|
3
3
|
|
|
4
|
+
import merge from 'lodash-es/merge.js';
|
|
5
|
+
|
|
4
6
|
export default (token_li) => {
|
|
5
7
|
var _NEXT_TOKEN, _nextToken, nextToken;
|
|
6
8
|
token_li.sort(() => {
|
|
@@ -18,21 +20,23 @@ export default (token_li) => {
|
|
|
18
20
|
nextToken = () => {
|
|
19
21
|
return _NEXT_TOKEN.next().value;
|
|
20
22
|
};
|
|
21
|
-
return async(text, schema, system,
|
|
23
|
+
return async(text, schema, system, option) => {
|
|
22
24
|
var body, err, error, message, r, status;
|
|
23
25
|
body = {
|
|
24
|
-
generationConfig: {
|
|
25
|
-
responseMimeType: 'application/json',
|
|
26
|
-
responseJsonSchema: schema
|
|
27
|
-
},
|
|
28
26
|
contents: [
|
|
29
27
|
{
|
|
30
28
|
parts: [{text}]
|
|
31
29
|
}
|
|
32
30
|
]
|
|
33
31
|
};
|
|
34
|
-
if (
|
|
35
|
-
|
|
32
|
+
if (schema) {
|
|
33
|
+
body.generationConfig = {
|
|
34
|
+
responseMimeType: 'application/json',
|
|
35
|
+
responseJsonSchema: schema
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
if (option) {
|
|
39
|
+
merge(body, option);
|
|
36
40
|
}
|
|
37
41
|
if (system) {
|
|
38
42
|
body.system_instruction = {
|
|
@@ -87,13 +91,16 @@ export default (token_li) => {
|
|
|
87
91
|
}
|
|
88
92
|
throw new Error(text);
|
|
89
93
|
}
|
|
90
|
-
r = ((await r.json())).candidates[0]
|
|
91
|
-
|
|
92
|
-
r =
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
r = ((await r.json())).candidates[0];
|
|
95
|
+
if (schema) {
|
|
96
|
+
r = r.content.parts[0].text;
|
|
97
|
+
try {
|
|
98
|
+
r = JSON.parse(r);
|
|
99
|
+
} catch (error1) {
|
|
100
|
+
err = error1;
|
|
101
|
+
console.error(r);
|
|
102
|
+
throw err;
|
|
103
|
+
}
|
|
97
104
|
}
|
|
98
105
|
return r;
|
|
99
106
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@3-/aiapi",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.12",
|
|
4
4
|
"repository": {
|
|
5
5
|
"type": "git",
|
|
6
6
|
"url": "git+https://atomgit.com/i18n/lib.git"
|
|
@@ -22,7 +22,9 @@
|
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"@3-/retry": "^0.0.2",
|
|
24
24
|
"@3-/sleep": "^0.0.4",
|
|
25
|
-
"@3-/txt_li": "^0.1.4"
|
|
25
|
+
"@3-/txt_li": "^0.1.4",
|
|
26
|
+
"@3-/utf8": "^0.0.4",
|
|
27
|
+
"lodash-es": "^4.17.21"
|
|
26
28
|
},
|
|
27
29
|
"scripts": {}
|
|
28
30
|
}
|