halbot 1995.1.58 → 1995.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.mjs CHANGED
@@ -11,11 +11,27 @@ const init = async (options = {}) => {
11
11
  const info = bot.lines([
12
12
  `[${bot.BOT} ${pkg.title}](${pkg.homepage})`, pkg.description
13
13
  ]);
14
- // use google's search if google is enabled
15
- options.googleApiKey && options.googleCx && await web.initSearch({
16
- provider: 'GOOGLE', apiKey: options.googleApiKey, cx: options.googleCx,
17
- });
18
- // use openrouter's AI models, embedding if OpenRouter is enabled
14
+ // use google rerank
15
+ if (options?.googleCredentials && options.googleProjectId) {
16
+ opts = {
17
+ provider: 'GOOGLE', credentials: options.googleCredentials,
18
+ projectId: options.googleProjectId,
19
+ };
20
+ if (!_rerank) {
21
+ await rag.initReranker(opts);
22
+ _rerank = rag.rerank;
23
+ }
24
+ }
25
+ // use jina search and reranker
26
+ if (options.jinaApiKey) {
27
+ opts = { provider: 'JINA', apiKey: options.jinaApiKey };
28
+ await web.initSearch(opts);
29
+ if (!_rerank) {
30
+ await rag.initReranker(opts);
31
+ _rerank = rag.rerank;
32
+ }
33
+ }
34
+ // use openrouter's AI models
19
35
  if (options.openrouterApiKey) {
20
36
  opts = { provider: 'OPENROUTER', apiKey: options.openrouterApiKey };
21
37
  await alan.init({
@@ -27,17 +43,8 @@ const init = async (options = {}) => {
27
43
  await rag.initEmbedding(opts);
28
44
  _embed = rag.embed;
29
45
  }
30
- }
31
- // use google's imagen, veo, tts if google is enabled
32
- if (options.googleApiKey) {
33
- opts = { provider: 'GOOGLE', apiKey: options.googleApiKey };
34
- await alan.init({
35
- ...opts, model: options.googleModel || '*',
36
- priority: options.googlePriority, ...options,
37
- });
38
- }
39
- // use openai's embedding, tts if openai is enabled, and google is not
40
- if (options.openaiApiKey) {
46
+ // use openai models, embedding if openrouter is disabled
47
+ } else if (options.openaiApiKey && options.openaiModels) {
41
48
  opts = { provider: 'OPENAI', apiKey: options.openaiApiKey };
42
49
  await alan.init({
43
50
  ...opts, model: options.openaiModel || '*',
@@ -48,31 +55,15 @@ const init = async (options = {}) => {
48
55
  _embed = rag.embed;
49
56
  }
50
57
  }
51
- // use google rerank if google is enabled
52
- if (options?.googleCredentials && options.googleProjectId) {
53
- opts = {
54
- provider: 'GOOGLE', credentials: options.googleCredentials,
55
- projectId: options.googleProjectId,
56
- };
57
- if (!_rerank) {
58
- await rag.initReranker(opts);
59
- _rerank = rag.rerank;
60
- }
61
- }
62
- // init other ai providers
63
- options.siliconflowApiKey && await alan.init({
64
- provider: 'SILICONFLOW', apiKey: options.siliconflowApiKey,
65
- model: options.siliconflowModel || '*',
66
- priority: options.siliconflowPriority, ...options,
67
- });
68
- if (options.jinaApiKey) {
69
- opts = { provider: 'JINA', apiKey: options.jinaApiKey };
70
- await web.initSearch(opts);
71
- if (!_rerank) {
72
- await rag.initReranker(opts);
73
- _rerank = rag.rerank;
74
- }
58
+ // use google's veo
59
+ if (options.googleApiKey) {
60
+ opts = { provider: 'GOOGLE', apiKey: options.googleApiKey };
61
+ await alan.init({
62
+ ...opts, model: options.googleModel || '*',
63
+ priority: options.googlePriority, ...options,
64
+ });
75
65
  }
66
+ // use ollama
76
67
  if (options?.ollamaEnabled || options?.ollamaEndpoint) {
77
68
  await alan.init({
78
69
  provider: 'OLLAMA', model: options?.ollamaModel || '*',
package/lib/hal.mjs CHANGED
@@ -20,6 +20,9 @@ const { __filename } = utilitas.__(import.meta.url);
20
20
  const workdir = path.dirname(__filename);
21
21
  const getPath = (subPath) => { return path.join(workdir, subPath || ''); };
22
22
  const table = 'utilitas_hal_events';
23
+ const vectorIndex = `${table}_distilled_vector_index`;
24
+ const vectorIndexLists = 1000;
25
+ const vectorIndexProbes = 20;
23
26
  const log = (c, o) => utilitas.log(c, import.meta.url, { time: 1, ...o || {} });
24
27
  const [end] = [bot.end];
25
28
  const uList = arr => bot.lines(arr.map(x => `- ${x}`));
@@ -65,6 +68,8 @@ const initSql = {
65
68
  ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4`), [table],
66
69
  ]],
67
70
  [dbio.POSTGRESQL]: [[
71
+ 'CREATE EXTENSION IF NOT EXISTS vchord CASCADE',
72
+ ], [
68
73
  dbio.cleanSql(`CREATE TABLE IF NOT EXISTS ${table} (
69
74
  id SERIAL PRIMARY KEY,
70
75
  bot_id BIGINT NOT NULL,
@@ -77,7 +82,7 @@ const initSql = {
77
82
  response_text TEXT NOT NULL,
78
83
  collected TEXT NOT NULL,
79
84
  distilled TEXT NOT NULL,
80
- distilled_vector VECTOR(768) NOT NULL,
85
+ distilled_vector RABITQ8(768) NOT NULL,
81
86
  token VARCHAR(255) NOT NULL DEFAULT '',
82
87
  created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
83
88
  updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
@@ -102,8 +107,6 @@ const initSql = {
102
107
  `CREATE INDEX IF NOT EXISTS ${table}_collected_index ON ${table} USING GIN(to_tsvector('english', collected))`,
103
108
  ], [
104
109
  `CREATE INDEX IF NOT EXISTS ${table}_distilled_index ON ${table} USING GIN(to_tsvector('english', distilled))`,
105
- ], [
106
- `CREATE INDEX IF NOT EXISTS ${table}_distilled_vector_index ON ${table} USING hnsw(distilled_vector vector_cosine_ops)`,
107
110
  ], [
108
111
  `CREATE INDEX IF NOT EXISTS ${table}_token_index ON ${table} (token)`,
109
112
  ], [
@@ -115,6 +118,24 @@ const initSql = {
115
118
 
116
119
  let _;
117
120
 
121
+ const ensurePostgresqlRagIndex = async (client) => {
122
+ const index = await client.query(
123
+ `SELECT indexdef FROM pg_indexes
124
+ WHERE tablename = $1 AND indexname = $2`,
125
+ [table, vectorIndex]
126
+ );
127
+ if (index?.length) { return; }
128
+ await client.query(`CREATE INDEX IF NOT EXISTS ${vectorIndex}
129
+ ON ${table} USING vchordrq (distilled_vector rabitq8_cosine_ops)
130
+ WITH (options = $$
131
+ [build.internal]
132
+ lists = [${vectorIndexLists}]
133
+ spherical_centroids = true
134
+ build_threads = 8
135
+ $$, probes = '${vectorIndexProbes}')`
136
+ );
137
+ };
138
+
118
139
  const newCommand = (command, description) => ({
119
140
  command: utilitas.ensureString(command, { case: 'SNAKE' }).slice(0, COMMAND_LENGTH),
120
141
  description: utilitas.trim(description).slice(0, COMMAND_DESCRIPTION_LENGTH),
@@ -219,6 +240,9 @@ const init = async (options) => {
219
240
  for (const act of initSql[_.storage?.provider]) {
220
241
  dbResult.push(await _.storage.client.query(...act));
221
242
  }
243
+ if (_.storage?.provider === dbio.POSTGRESQL) {
244
+ await ensurePostgresqlRagIndex(_.storage.client);
245
+ }
222
246
  } catch (e) { console.error(e); }
223
247
  }
224
248
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "halbot",
3
3
  "description": "Just another AI powered Telegram bot, which is simple design, easy to use, extendable and fun.",
4
- "version": "1995.1.58",
4
+ "version": "1995.1.61",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/halbot",
7
7
  "type": "module",
@@ -31,27 +31,27 @@
31
31
  "dependencies": {
32
32
  "@ffmpeg-installer/ffmpeg": "^1.1.0",
33
33
  "@ffprobe-installer/ffprobe": "^2.1.2",
34
- "@google-cloud/discoveryengine": "^2.5.2",
35
- "@google/genai": "^1.43.0",
34
+ "@google-cloud/discoveryengine": "^2.6.0",
35
+ "@google/genai": "^1.51.0",
36
36
  "@mozilla/readability": "^0.6.0",
37
37
  "@resvg/resvg-js": "^2.6.2",
38
38
  "fluent-ffmpeg": "^2.1.3",
39
39
  "google-gax": "^5.0.6",
40
- "ioredis": "^5.9.3",
41
- "jsdom": "^28.1.0",
40
+ "ioredis": "^5.10.1",
41
+ "jsdom": "^29.1.1",
42
42
  "lorem-ipsum": "^2.0.8",
43
43
  "mime": "^4.1.0",
44
- "mysql2": "^3.18.1",
44
+ "mysql2": "^3.22.3",
45
45
  "office-text-extractor": "^4.0.0",
46
- "openai": "^6.25.0",
46
+ "openai": "^6.35.0",
47
47
  "parse-numeric-range": "^1.3.0",
48
- "pg": "^8.19.0",
48
+ "pg": "^8.20.0",
49
49
  "pgvector": "^0.2.1",
50
- "satori": "^0.19.2",
50
+ "satori": "^0.26.0",
51
51
  "telegraf": "^4.16.3",
52
52
  "tellegram": "^1.1.18",
53
53
  "tesseract.js": "^7.0.0",
54
- "webjam": "^1995.3.10",
55
- "youtube-transcript": "^1.2.1"
54
+ "webjam": "^1995.3.21",
55
+ "youtube-transcript": "^1.3.1"
56
56
  }
57
57
  }
@@ -1,5 +1,6 @@
1
- import { bot, hal, uoid, utilitas } from '../index.mjs';
1
+ import { bot, hal, storage, uoid, utilitas } from '../index.mjs';
2
2
  import { convert, paginate } from 'tellegram';
3
+ import { v4 as uuidv4 } from 'uuid';
3
4
 
4
5
  const _name = 'Broca';
5
6
  const [PRIVATE_LIMIT, GROUP_LIMIT] = [60 / 60, 60 / 20].map(x => x * 1000);
@@ -44,6 +45,24 @@ const getExtra = (ctx, options) => {
44
45
  return resp;
45
46
  };
46
47
 
48
+ const getMediaName = func => func.replace(/^replyWith/, '') || 'File';
49
+
50
+ const packInputFile = async (func, media) => {
51
+ if (media.filename || media.url || !Buffer.isBuffer(media.source)) {
52
+ return media;
53
+ }
54
+ const { extension } = await storage.getMime(media.source);
55
+ return { ...media, filename: `${getMediaName(func)}_${uuidv4()}.${extension}` };
56
+ };
57
+
58
+ const getMedia = async (func, src) => {
59
+ if (src?.source || src?.url) { return await packInputFile(func, src); }
60
+ if (Buffer.isBuffer(src)) {
61
+ return await packInputFile(func, { source: src });
62
+ }
63
+ return { [getKey(src)]: src };
64
+ };
65
+
47
66
  const resp = async (ctx, text, extra) => {
48
67
  // if (ctx._.type === 'inline_query') {
49
68
  // return await ctx.answerInlineQuery([{}, {}]);
@@ -72,7 +91,7 @@ const resp = async (ctx, text, extra) => {
72
91
  const replyWith = async (ctx, func, src, options) => ctx._.done.push(
73
92
  await ctx[func](Array.isArray(src) ? src.map(x => ({
74
93
  type: x.type || 'photo', media: { [getKey(x.src)]: x.src },
75
- })) : { [getKey(src)]: src }, getExtra(ctx, options))
94
+ })) : await getMedia(func, src), getExtra(ctx, options))
76
95
  );
77
96
 
78
97
  const edit = async (ctx, lastMsgId, text, extra) => {
@@ -3,12 +3,31 @@ import { bot, dbio, hal, utilitas } from '../index.mjs';
3
3
  const [RELEVANCE, SEARCH_LIMIT, SUB_LIMIT] = [0.2, 10, 200]; // Google Rerank limit
4
4
  const compact = (str, op) => utilitas.ensureString(str, { ...op || {}, compact: true });
5
5
  const compactLimit = (str, op) => compact(str, { ...op || {}, limit: 140 });
6
+ const quote = key => `"${key}"`;
6
7
 
7
8
  const packMessage = (messages) => messages.map(x => ({
8
9
  message_id: x.message_id, score: x.score, created_at: x.created_at,
9
10
  request: x.received_text, response: x.response_text,
10
11
  }));
11
12
 
13
+ const upsertPostgresqlEvent = async event => {
14
+ event = {
15
+ ...event,
16
+ distilled_vector: await dbio.encodeVector(event.distilled_vector),
17
+ };
18
+ const fields = Object.keys(event);
19
+ const values = fields.map(key => event[key]);
20
+ const placeholders = fields.map((key, i) => key === 'distilled_vector'
21
+ ? `quantize_to_rabitq8($${i + 1}::vector)` : `$${i + 1}`);
22
+ return await hal._.storage?.client?.query?.(
23
+ `INSERT INTO ${hal.table} (${fields.map(quote).join(', ')}) `
24
+ + `VALUES (${placeholders.join(', ')}) `
25
+ + `ON CONFLICT ("id") DO UPDATE SET `
26
+ + fields.map(key => `${quote(key)} = EXCLUDED.${quote(key)}`).join(', '),
27
+ values
28
+ );
29
+ };
30
+
12
31
  const recall = async (sessionId, keyword, offset = 0, limit = SEARCH_LIMIT, options = {}) => {
13
32
  assert(sessionId, 'Session ID is required.');
14
33
  let [result, _limit, exclude] = [
@@ -35,13 +54,14 @@ const recall = async (sessionId, keyword, offset = 0, limit = SEARCH_LIMIT, opti
35
54
  // globalThis.debug = 2;
36
55
  const vector = await dbio.encodeVector(await hal._.embed(keyword));
37
56
  result = await hal._.storage?.client?.query?.(
38
- `SELECT *, (1 - (distilled_vector <=> $1)) as relevance `
57
+ `SELECT *, (1 - (distilled_vector <=> `
58
+ + `quantize_to_rabitq8($1::vector))) as relevance `
39
59
  + `FROM ${hal.table} WHERE bot_id = $2 AND chat_id = $3 `
40
60
  + `AND received_text != '' `
41
61
  + `AND received_text NOT LIKE '/%' `
42
62
  + `AND response_text != '' `
43
63
  + (exclude.length ? `AND message_id NOT IN (${exclude.join(',')}) ` : '')
44
- + `ORDER BY (distilled_vector <=> $1) ASC `
64
+ + `ORDER BY (distilled_vector <=> quantize_to_rabitq8($1::vector)) ASC `
45
65
  + `LIMIT ${_limit} OFFSET $4`,
46
66
  [vector, hal._.bot.botInfo.id, sessionId, offset]
47
67
  );
@@ -127,6 +147,8 @@ const memorize = async (ctx) => {
127
147
  case dbio.MYSQL:
128
148
  event.distilled_vector = JSON.stringify(event.distilled_vector);
129
149
  break;
150
+ case dbio.POSTGRESQL:
151
+ return await upsertPostgresqlEvent(event);
130
152
  }
131
153
  await hal._.storage?.client?.upsert?.(hal.table, event, { skipEcho: true });
132
154
  }, hal.logOptions);
@@ -1,4 +1,4 @@
1
- import { callosum, alan } from '../index.mjs';
1
+ import { callosum, alan, utilitas } from '../index.mjs';
2
2
  import { token } from 'webjam';
3
3
 
4
4
  const _name = 'Chat';