memex-mvp 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HELP.md +308 -0
- package/LICENSE +21 -0
- package/README.md +542 -0
- package/bot/README.md +169 -0
- package/bot/config.js +66 -0
- package/bot/inbox.js +153 -0
- package/bot/index.js +294 -0
- package/bot/nexara.js +61 -0
- package/bot/poll.js +304 -0
- package/bot/search.js +155 -0
- package/bot/telegram.js +96 -0
- package/ingest.js +1473 -0
- package/lib/config.js +179 -0
- package/lib/parse-cursor.js +172 -0
- package/lib/parse-obsidian.js +256 -0
- package/lib/parse.js +175 -0
- package/lib/render-markdown.js +0 -0
- package/package.json +70 -0
- package/server.js +2530 -0
package/bot/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# memex-bot — Telegram capture surface for memex
|
|
2
|
+
|
|
3
|
+
A Telegram bot that lets you capture thoughts, voice notes, and forwarded
|
|
4
|
+
messages on the go. Everything you send (or forward) flows into the same
|
|
5
|
+
FTS5 corpus as the rest of your memex memory.
|
|
6
|
+
|
|
7
|
+
## How it works
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
You message @your_memex_bot
|
|
11
|
+
↓
|
|
12
|
+
bot process (long-poll getUpdates)
|
|
13
|
+
↓
|
|
14
|
+
builds Telegram-export-format JSON snippet
|
|
15
|
+
↓
|
|
16
|
+
~/.memex/inbox/bot-<ts>-<msg_id>.json
|
|
17
|
+
↓
|
|
18
|
+
[memex MCP server's existing inbox watcher imports → memex.db]
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
The bot writes JSON files in the same shape as a Telegram Desktop export.
|
|
22
|
+
The existing `importTelegram` parser in `server.js` ingests them. **Zero new
|
|
23
|
+
ingest code path** — voice transcripts, forwards, and direct messages all
|
|
24
|
+
land in the dedicated `tg-memex-bot-<your_user_id>` conversation. The
|
|
25
|
+
synthetic `memex-bot-` prefix keeps the bot thread separate from any real
|
|
26
|
+
Telegram chat (including Saved Messages, which Telegram Desktop exports
|
|
27
|
+
with `chat.id == your own user_id`).
|
|
28
|
+
|
|
29
|
+
To search only what you captured via the bot, scope by chat title:
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
memex_search({ query: "...", chat: "Memex Bot" })
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## What's captured
|
|
36
|
+
|
|
37
|
+
| Input | Stored as |
|
|
38
|
+
|-----------------------|----------------------------------------------------------------|
|
|
39
|
+
| Text message | `text` field, `from = "me"` |
|
|
40
|
+
| Forwarded message | `↪ Forwarded from <X>:\n\n<text>` (so it's searchable via FTS5)|
|
|
41
|
+
| Voice note (v0.2+) | Nexara transcript prefixed with `🎙`; original OGG kept on disk|
|
|
42
|
+
| Photos / documents | _Not yet — acknowledged with a warning so you know._ |
|
|
43
|
+
|
|
44
|
+
Idempotency is automatic: each message uses Telegram's stable `msg.id` and
|
|
45
|
+
the `messages` table has `UNIQUE(source, conversation_id, msg_id)`. Restarts
|
|
46
|
+
and re-emits are safe.
|
|
47
|
+
|
|
48
|
+
## Setup
|
|
49
|
+
|
|
50
|
+
### 1. Create a bot
|
|
51
|
+
|
|
52
|
+
Talk to [@BotFather](https://t.me/BotFather) on Telegram:
|
|
53
|
+
|
|
54
|
+
1. `/newbot`
|
|
55
|
+
2. Give it a name (shown to you only) and a username (must end in `bot`).
|
|
56
|
+
3. Save the **HTTP API token** BotFather gives you.
|
|
57
|
+
|
|
58
|
+
### 2. Find your numeric Telegram user_id
|
|
59
|
+
|
|
60
|
+
Talk to [@userinfobot](https://t.me/userinfobot) — it replies with your
|
|
61
|
+
numeric `id`. Save it.
|
|
62
|
+
|
|
63
|
+
### 3. Write `~/.memex/bot.config.json`
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"telegram_bot_token": "1234567890:ABCdef...",
|
|
68
|
+
"allowlist_user_ids": [123456789],
|
|
69
|
+
"voice_enabled": true,
|
|
70
|
+
"nexara_api_key": "nx-..."
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
- `telegram_bot_token` — required.
|
|
75
|
+
- `allowlist_user_ids` — required. Only these Telegram user_ids can talk to
|
|
76
|
+
the bot; everyone else gets a polite reject. The bot is single-user.
|
|
77
|
+
- `nexara_api_key` — optional; only needed for voice transcription.
|
|
78
|
+
- `voice_enabled` — defaults to `true` if `nexara_api_key` is set,
|
|
79
|
+
otherwise off. Set explicitly to `false` to disable even with a key.
|
|
80
|
+
|
|
81
|
+
### 4. Run it
|
|
82
|
+
|
|
83
|
+
Foreground (debug):
|
|
84
|
+
```sh
|
|
85
|
+
npx memex-bot
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Or as a launchd autostart agent (macOS):
|
|
89
|
+
```sh
|
|
90
|
+
npx memex-bot install
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Then send a test message to your bot. Within a few seconds:
|
|
94
|
+
1. A JSON file appears in `~/.memex/inbox/`.
|
|
95
|
+
2. The memex MCP server's inbox watcher imports it.
|
|
96
|
+
3. It's searchable via `memex_search` and visible in `memex_recent`.
|
|
97
|
+
|
|
98
|
+
### CLI
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
memex-bot run foreground
|
|
102
|
+
memex-bot install register macOS LaunchAgent
|
|
103
|
+
memex-bot uninstall remove LaunchAgent (config preserved)
|
|
104
|
+
memex-bot restart reload after a config edit
|
|
105
|
+
memex-bot status show installed/running/offset/last-activity
|
|
106
|
+
memex-bot logs tail -f the bot log
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Bot commands
|
|
110
|
+
|
|
111
|
+
| Command | What it does |
|
|
112
|
+
|--------------------|---------------------------------------------------------|
|
|
113
|
+
| `/help` or `/start`| Print usage |
|
|
114
|
+
| `/search <query>` | Top-3 FTS5 matches across all memex sources |
|
|
115
|
+
| `/recent` | 5 most recent captured messages, any source |
|
|
116
|
+
|
|
117
|
+
Anything else is captured as text. Unknown slash-commands are too — so a
|
|
118
|
+
typo never silently swallows your thought.
|
|
119
|
+
|
|
120
|
+
## Offline behavior
|
|
121
|
+
|
|
122
|
+
**The bot is local-only — no cloud relay.** When your laptop is asleep or
|
|
123
|
+
offline:
|
|
124
|
+
|
|
125
|
+
- Telegram buffers updates ~24 hours server-side. The bot picks them up on
|
|
126
|
+
next poll.
|
|
127
|
+
- For longer gaps, **export the bot chat from Telegram Desktop** and drop
|
|
128
|
+
the resulting `result.json` into `~/.memex/inbox/`. The `UNIQUE`
|
|
129
|
+
constraint dedupes against anything already captured, so it's safe to
|
|
130
|
+
re-import overlapping ranges.
|
|
131
|
+
|
|
132
|
+
This is a deliberate design choice: cloud relays would break memex's
|
|
133
|
+
local-first wedge for what amounts to a convenience feature. If you need
|
|
134
|
+
guaranteed delivery for a 3-week trip, plan for the manual export step.
|
|
135
|
+
|
|
136
|
+
## Files & paths
|
|
137
|
+
|
|
138
|
+
| Thing | Path |
|
|
139
|
+
|------------------------|-----------------------------------------------------------------|
|
|
140
|
+
| Config | `~/.memex/bot.config.json` |
|
|
141
|
+
| Inbox JSONs | `~/.memex/inbox/bot-<ts>-<msg_id>.json` → archived after import |
|
|
142
|
+
| Voice OGGs | `~/.memex/data/conversations/telegram/media/<msg_id>.oga` |
|
|
143
|
+
| Update offset state | `~/.memex/data/bot-state.json` |
|
|
144
|
+
| Log | `~/.memex/data/bot.log` |
|
|
145
|
+
| LaunchAgent plist (mac)| `~/Library/LaunchAgents/com.parallelclaw.memex.bot.plist` |
|
|
146
|
+
|
|
147
|
+
## Architecture notes
|
|
148
|
+
|
|
149
|
+
- **No webhook** — long-polling via `getUpdates` only. No public URL, no
|
|
150
|
+
HTTPS, no port forwarding. Works behind any NAT.
|
|
151
|
+
- **No new ingest code path** — bot writes files; the existing inbox
|
|
152
|
+
watcher in `server.js` parses them.
|
|
153
|
+
- **Read-only DB access** for `/search` and `/recent`. The bot opens
|
|
154
|
+
`memex.db` in `readonly` mode; WAL allows concurrent readers alongside
|
|
155
|
+
the running MCP server.
|
|
156
|
+
- **No new dependencies.** Native `fetch` and `FormData` (Node 18+, which
|
|
157
|
+
memex already requires) handle Telegram and Nexara HTTP. `better-sqlite3`
|
|
158
|
+
was already in the parent `package.json`.
|
|
159
|
+
|
|
160
|
+
## Out of scope (for now)
|
|
161
|
+
|
|
162
|
+
- Webhook mode
|
|
163
|
+
- Cloud relay / always-on infrastructure
|
|
164
|
+
- Multi-user (bot serves exactly one TG user_id)
|
|
165
|
+
- Rich UI inside bot (button-based memory browser)
|
|
166
|
+
- LLM auto-summarization (would break memex's verbatim principle)
|
|
167
|
+
- Tag/category system (FTS5 + recency boost is enough for discovery)
|
|
168
|
+
- URL fetching (paste link → ingest cleaned HTML)
|
|
169
|
+
- Periodic reminders ("you haven't captured anything in 7 days")
|
package/bot/config.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bot config loader. Reads ~/.memex/bot.config.json (or $MEMEX_DIR/bot.config.json).
|
|
3
|
+
*
|
|
4
|
+
* Required: telegram_bot_token, allowlist_user_ids[]
|
|
5
|
+
* Optional: nexara_api_key, voice_enabled, inbox_path, media_path, db_path
|
|
6
|
+
*
|
|
7
|
+
* Voice support is auto-disabled if no nexara_api_key is set, regardless of
|
|
8
|
+
* the voice_enabled flag.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
12
|
+
import { homedir } from 'node:os';
|
|
13
|
+
import { join } from 'node:path';
|
|
14
|
+
|
|
15
|
+
const HOME = homedir();
|
|
16
|
+
const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
|
|
17
|
+
export const BOT_CONFIG_PATH = join(MEMEX_DIR, 'bot.config.json');
|
|
18
|
+
|
|
19
|
+
const EXAMPLE = `{
|
|
20
|
+
"telegram_bot_token": "<get from @BotFather>",
|
|
21
|
+
"allowlist_user_ids": [<your numeric Telegram user_id — get from @userinfobot>],
|
|
22
|
+
"voice_enabled": true,
|
|
23
|
+
"nexara_api_key": "nx-..."
|
|
24
|
+
}`;
|
|
25
|
+
|
|
26
|
+
export function loadBotConfig() {
|
|
27
|
+
if (!existsSync(BOT_CONFIG_PATH)) {
|
|
28
|
+
const err = new Error(
|
|
29
|
+
`bot config not found at ${BOT_CONFIG_PATH}\n\n` +
|
|
30
|
+
`Create it with:\n ${BOT_CONFIG_PATH}\n\n` +
|
|
31
|
+
`Example contents:\n${EXAMPLE}`
|
|
32
|
+
);
|
|
33
|
+
err.code = 'NO_CONFIG';
|
|
34
|
+
throw err;
|
|
35
|
+
}
|
|
36
|
+
let parsed;
|
|
37
|
+
try {
|
|
38
|
+
parsed = JSON.parse(readFileSync(BOT_CONFIG_PATH, 'utf-8'));
|
|
39
|
+
} catch (e) {
|
|
40
|
+
throw new Error(`failed to parse ${BOT_CONFIG_PATH}: ${e.message}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (!parsed.telegram_bot_token || typeof parsed.telegram_bot_token !== 'string') {
|
|
44
|
+
throw new Error('bot.config.json: telegram_bot_token (string) is required');
|
|
45
|
+
}
|
|
46
|
+
if (!Array.isArray(parsed.allowlist_user_ids) || parsed.allowlist_user_ids.length === 0) {
|
|
47
|
+
throw new Error('bot.config.json: allowlist_user_ids[] (numeric TG user id(s)) is required');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const allowlist = parsed.allowlist_user_ids.map((x) => Number(x));
|
|
51
|
+
if (allowlist.some((x) => !Number.isFinite(x))) {
|
|
52
|
+
throw new Error('bot.config.json: allowlist_user_ids must be numbers');
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const nexaraKey = parsed.nexara_api_key || null;
|
|
56
|
+
return {
|
|
57
|
+
telegram_bot_token: parsed.telegram_bot_token,
|
|
58
|
+
allowlist_user_ids: allowlist,
|
|
59
|
+
nexara_api_key: nexaraKey,
|
|
60
|
+
voice_enabled: parsed.voice_enabled !== false && !!nexaraKey,
|
|
61
|
+
inbox_path: parsed.inbox_path || join(MEMEX_DIR, 'inbox'),
|
|
62
|
+
media_path: parsed.media_path || join(MEMEX_DIR, 'data', 'conversations', 'telegram', 'media'),
|
|
63
|
+
db_path: parsed.db_path || join(MEMEX_DIR, 'data', 'memex.db'),
|
|
64
|
+
state_path: parsed.state_path || join(MEMEX_DIR, 'data', 'bot-state.json'),
|
|
65
|
+
};
|
|
66
|
+
}
|
package/bot/inbox.js
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build a minimal Telegram-Desktop-export JSON snippet for one captured
|
|
3
|
+
* message and write it atomically to the memex inbox.
|
|
4
|
+
*
|
|
5
|
+
* The shape mirrors what `importTelegram` (server.js) expects:
|
|
6
|
+
* {
|
|
7
|
+
* personal_information: { user_id: "<me>" },
|
|
8
|
+
* chats: { list: [{ id, name, type, messages: [oneMsg] }] }
|
|
9
|
+
* }
|
|
10
|
+
*
|
|
11
|
+
* One file per message. The conversation_id derived by the parser is
|
|
12
|
+
* `tg-<chat.id>`. We set chat.id to the string `memex-bot-<userId>` (NOT
|
|
13
|
+
* the bare Telegram user_id) so the resulting conversation_id is
|
|
14
|
+
* `tg-memex-bot-<userId>` — distinct from any real Telegram chat.id and,
|
|
15
|
+
* critically, distinct from "Saved Messages" exports (which Telegram
|
|
16
|
+
* Desktop emits with chat.id == your own user_id, so a synthetic prefix
|
|
17
|
+
* here is the only thing keeping the two streams from merging).
|
|
18
|
+
*
|
|
19
|
+
* Every captured message — typed, forwarded, voice-transcribed — lands
|
|
20
|
+
* in this same dedicated thread.
|
|
21
|
+
*
|
|
22
|
+
* Idempotency is automatic: each message uses Telegram's stable msg.id,
|
|
23
|
+
* and `messages` has UNIQUE(source, conversation_id, msg_id). Reprocessing
|
|
24
|
+
* the same update is a no-op at the DB layer.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { mkdirSync, writeFileSync, renameSync, unlinkSync } from 'node:fs';
|
|
28
|
+
import { join } from 'node:path';
|
|
29
|
+
|
|
30
|
+
const CHAT_NAME = 'Memex Bot';
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Write a single message to the inbox. Returns the absolute path written.
|
|
34
|
+
*
|
|
35
|
+
* @param {object} opts
|
|
36
|
+
* @param {string} opts.inboxPath - directory to write into
|
|
37
|
+
* @param {number|string} opts.userId - the bot owner's Telegram user_id (becomes chat.id and from_id seed)
|
|
38
|
+
* @param {object} opts.message - the constructed TG-export message object (id, type, date, date_unixtime, from, from_id, text, [forwarded_from], [media_path], …)
|
|
39
|
+
*/
|
|
40
|
+
export function writeInboxMessage({ inboxPath, userId, message }) {
|
|
41
|
+
if (!inboxPath) throw new Error('writeInboxMessage: inboxPath required');
|
|
42
|
+
if (userId === undefined || userId === null) throw new Error('writeInboxMessage: userId required');
|
|
43
|
+
if (!message || !message.id) throw new Error('writeInboxMessage: message.id required');
|
|
44
|
+
|
|
45
|
+
mkdirSync(inboxPath, { recursive: true });
|
|
46
|
+
|
|
47
|
+
const payload = {
|
|
48
|
+
personal_information: { user_id: String(userId) },
|
|
49
|
+
chats: {
|
|
50
|
+
list: [
|
|
51
|
+
{
|
|
52
|
+
// Synthetic chat.id keeps the bot thread separate from any real
|
|
53
|
+
// Telegram chat (including Saved Messages, which uses the bare
|
|
54
|
+
// user_id). Resulting conversation_id is `tg-memex-bot-<userId>`.
|
|
55
|
+
id: `memex-bot-${userId}`,
|
|
56
|
+
name: CHAT_NAME,
|
|
57
|
+
type: 'personal_chat',
|
|
58
|
+
messages: [message],
|
|
59
|
+
},
|
|
60
|
+
],
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
const ts = parseInt(message.date_unixtime, 10) || Math.floor(Date.now() / 1000);
|
|
65
|
+
const fileName = `bot-${ts}-${message.id}.json`;
|
|
66
|
+
const targetPath = join(inboxPath, fileName);
|
|
67
|
+
const tmpPath = targetPath + '.tmp';
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
writeFileSync(tmpPath, JSON.stringify(payload, null, 2));
|
|
71
|
+
renameSync(tmpPath, targetPath);
|
|
72
|
+
} catch (e) {
|
|
73
|
+
try { unlinkSync(tmpPath); } catch (_) {}
|
|
74
|
+
throw e;
|
|
75
|
+
}
|
|
76
|
+
return targetPath;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Convert a Telegram getUpdates `update.message` into the export-format
|
|
81
|
+
* message object expected by the parser.
|
|
82
|
+
*
|
|
83
|
+
* Forwards: prepend `↪ Forwarded from <name>:` to text so it's searchable
|
|
84
|
+
* via FTS5; also keep `forwarded_from` on the record for archival round-trip
|
|
85
|
+
* (parser ignores it but the JSON lives forever in ~/.memex/data/conversations).
|
|
86
|
+
*
|
|
87
|
+
* Voice: caller passes `textOverride` (the Nexara transcription) and
|
|
88
|
+
* `mediaPath` (where the OGG was saved).
|
|
89
|
+
*
|
|
90
|
+
* @param {object} opts
|
|
91
|
+
* @param {object} opts.tgMessage - raw Telegram message
|
|
92
|
+
* @param {number|string} opts.userId - bot owner's user_id (used as from_id)
|
|
93
|
+
* @param {string} [opts.textOverride] - replace text (used for voice transcripts)
|
|
94
|
+
* @param {string} [opts.mediaPath] - absolute path to saved media (informational)
|
|
95
|
+
*/
|
|
96
|
+
export function tgUpdateToExportMessage({ tgMessage, userId, textOverride, mediaPath }) {
|
|
97
|
+
const ts = tgMessage.date || Math.floor(Date.now() / 1000);
|
|
98
|
+
const isoDate = new Date(ts * 1000).toISOString().replace(/\.\d{3}Z$/, '');
|
|
99
|
+
|
|
100
|
+
const fwd = tgMessage.forward_from || tgMessage.forward_from_chat;
|
|
101
|
+
let forwardedName = null;
|
|
102
|
+
if (fwd) {
|
|
103
|
+
if (tgMessage.forward_from) {
|
|
104
|
+
const f = tgMessage.forward_from;
|
|
105
|
+
forwardedName = [f.first_name, f.last_name].filter(Boolean).join(' ') || f.username || `user${f.id}`;
|
|
106
|
+
} else {
|
|
107
|
+
const c = tgMessage.forward_from_chat;
|
|
108
|
+
forwardedName = c.title || c.username || `chat${c.id}`;
|
|
109
|
+
}
|
|
110
|
+
} else if (tgMessage.forward_sender_name) {
|
|
111
|
+
forwardedName = tgMessage.forward_sender_name;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Resolve text payload from possible sources.
|
|
115
|
+
let text;
|
|
116
|
+
if (typeof textOverride === 'string') {
|
|
117
|
+
text = textOverride;
|
|
118
|
+
} else if (typeof tgMessage.text === 'string') {
|
|
119
|
+
text = tgMessage.text;
|
|
120
|
+
} else if (typeof tgMessage.caption === 'string') {
|
|
121
|
+
text = tgMessage.caption;
|
|
122
|
+
} else {
|
|
123
|
+
text = '';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (forwardedName && text) {
|
|
127
|
+
text = `↪ Forwarded from ${forwardedName}:\n\n${text}`;
|
|
128
|
+
} else if (forwardedName && !text) {
|
|
129
|
+
text = `↪ Forwarded from ${forwardedName}: (no text)`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const msg = {
|
|
133
|
+
id: tgMessage.message_id,
|
|
134
|
+
type: 'message',
|
|
135
|
+
date: isoDate,
|
|
136
|
+
date_unixtime: String(ts),
|
|
137
|
+
from: 'me',
|
|
138
|
+
from_id: `user${userId}`,
|
|
139
|
+
text,
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
if (forwardedName) {
|
|
143
|
+
msg.forwarded_from = forwardedName;
|
|
144
|
+
}
|
|
145
|
+
if (mediaPath) {
|
|
146
|
+
msg.media_path = mediaPath;
|
|
147
|
+
msg.media_type = 'voice_message';
|
|
148
|
+
}
|
|
149
|
+
if (tgMessage.reply_to_message?.message_id) {
|
|
150
|
+
msg.reply_to_message_id = tgMessage.reply_to_message.message_id;
|
|
151
|
+
}
|
|
152
|
+
return msg;
|
|
153
|
+
}
|
package/bot/index.js
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* memex-bot — Telegram capture bot for memex.
|
|
4
|
+
*
|
|
5
|
+
* The bot is a separate process. It writes Telegram-Desktop-export-format
|
|
6
|
+
* JSON snippets into ~/.memex/inbox/, where memex's existing inbox watcher
|
|
7
|
+
* picks them up. Zero new ingest code path.
|
|
8
|
+
*
|
|
9
|
+
* CLI usage:
|
|
10
|
+
* memex-bot # run in foreground (debug / launchctl ProgramArguments)
|
|
11
|
+
* memex-bot serve # explicit foreground (alias)
|
|
12
|
+
* memex-bot install # register macOS LaunchAgent (autostart on login)
|
|
13
|
+
* memex-bot uninstall # unload + remove LaunchAgent (config preserved)
|
|
14
|
+
* memex-bot status # show daemon state, last activity, offset
|
|
15
|
+
* memex-bot logs # tail -f the daemon log
|
|
16
|
+
* memex-bot restart # reload LaunchAgent after config edit
|
|
17
|
+
*
|
|
18
|
+
* Config:
|
|
19
|
+
* ~/.memex/bot.config.json — token, allowlist, optional Nexara key
|
|
20
|
+
*
|
|
21
|
+
* Offline behavior:
|
|
22
|
+
* When the laptop is off, Telegram buffers updates ~24h server-side and
|
|
23
|
+
* the bot catches up on next poll. Beyond that, export the bot chat from
|
|
24
|
+
* Telegram Desktop and drop result.json into ~/.memex/inbox/ — UNIQUE
|
|
25
|
+
* constraint dedupes safely.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { homedir, platform } from 'node:os';
|
|
29
|
+
import { join, resolve } from 'node:path';
|
|
30
|
+
import {
|
|
31
|
+
existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync,
|
|
32
|
+
appendFileSync, statSync,
|
|
33
|
+
} from 'node:fs';
|
|
34
|
+
import { execSync, spawn } from 'node:child_process';
|
|
35
|
+
import { fileURLToPath } from 'node:url';
|
|
36
|
+
import { loadBotConfig, BOT_CONFIG_PATH } from './config.js';
|
|
37
|
+
import { BotRunner } from './poll.js';
|
|
38
|
+
|
|
39
|
+
const HOME = homedir();
|
|
40
|
+
const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
|
|
41
|
+
const DATA = join(MEMEX_DIR, 'data');
|
|
42
|
+
const LOG_PATH = join(DATA, 'bot.log');
|
|
43
|
+
|
|
44
|
+
const LAUNCH_LABEL = 'com.parallelclaw.memex.bot';
|
|
45
|
+
const PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LAUNCH_LABEL}.plist`);
|
|
46
|
+
|
|
47
|
+
mkdirSync(DATA, { recursive: true });
|
|
48
|
+
|
|
49
|
+
function log(...args) {
|
|
50
|
+
const line = `[${new Date().toISOString()}] ${args.map(String).join(' ')}\n`;
|
|
51
|
+
process.stderr.write(line);
|
|
52
|
+
try { appendFileSync(LOG_PATH, line); } catch (_) {}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const subcommand = process.argv[2];
|
|
56
|
+
|
|
57
|
+
if (subcommand === '--help' || subcommand === '-h') {
|
|
58
|
+
console.log(`memex-bot — Telegram capture bot for memex memory
|
|
59
|
+
|
|
60
|
+
run modes:
|
|
61
|
+
memex-bot run in foreground (default; same as 'serve')
|
|
62
|
+
memex-bot serve explicit foreground
|
|
63
|
+
memex-bot install register macOS LaunchAgent (autostart on login)
|
|
64
|
+
memex-bot uninstall unload and remove LaunchAgent (config preserved)
|
|
65
|
+
memex-bot restart restart the LaunchAgent (after config changes)
|
|
66
|
+
memex-bot status show daemon health, offset, last activity
|
|
67
|
+
memex-bot logs tail the daemon log
|
|
68
|
+
|
|
69
|
+
config:
|
|
70
|
+
${BOT_CONFIG_PATH}
|
|
71
|
+
|
|
72
|
+
paths:
|
|
73
|
+
log: ${LOG_PATH}
|
|
74
|
+
plist: ${PLIST_PATH}`);
|
|
75
|
+
process.exit(0);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const handlers = {
|
|
79
|
+
install: cmdInstall,
|
|
80
|
+
uninstall: cmdUninstall,
|
|
81
|
+
status: cmdStatus,
|
|
82
|
+
logs: cmdLogs,
|
|
83
|
+
restart: cmdRestart,
|
|
84
|
+
serve: cmdServe,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
if (subcommand && !subcommand.startsWith('-')) {
|
|
88
|
+
const handler = handlers[subcommand];
|
|
89
|
+
if (!handler) {
|
|
90
|
+
console.error(`unknown command: ${subcommand}`);
|
|
91
|
+
console.error(`usage: memex-bot [serve|install|uninstall|restart|status|logs]`);
|
|
92
|
+
process.exit(2);
|
|
93
|
+
}
|
|
94
|
+
handler();
|
|
95
|
+
} else {
|
|
96
|
+
cmdServe();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// -------------------- CLI handlers --------------------
|
|
100
|
+
|
|
101
|
+
function cmdServe() {
|
|
102
|
+
let config;
|
|
103
|
+
try { config = loadBotConfig(); }
|
|
104
|
+
catch (e) {
|
|
105
|
+
console.error(e.message);
|
|
106
|
+
process.exit(e.code === 'NO_CONFIG' ? 2 : 1);
|
|
107
|
+
}
|
|
108
|
+
const runner = new BotRunner({ config, log });
|
|
109
|
+
|
|
110
|
+
let shuttingDown = false;
|
|
111
|
+
function shutdown(sig) {
|
|
112
|
+
if (shuttingDown) return;
|
|
113
|
+
shuttingDown = true;
|
|
114
|
+
log(`received ${sig}, shutting down`);
|
|
115
|
+
runner.stop();
|
|
116
|
+
setTimeout(() => process.exit(0), 1500);
|
|
117
|
+
}
|
|
118
|
+
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
119
|
+
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
120
|
+
|
|
121
|
+
log('memex-bot starting');
|
|
122
|
+
runner.start().catch((e) => {
|
|
123
|
+
log(`fatal: ${e.stack || e.message}`);
|
|
124
|
+
process.exit(1);
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function cmdInstall() {
|
|
129
|
+
if (platform() !== 'darwin') {
|
|
130
|
+
console.error('install: macOS-only for now (LaunchAgent). Linux: nohup memex-bot &');
|
|
131
|
+
process.exit(1);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Validate config exists before installing — otherwise the daemon would
|
|
135
|
+
// crashloop the moment launchd starts it.
|
|
136
|
+
try { loadBotConfig(); }
|
|
137
|
+
catch (e) {
|
|
138
|
+
console.error(`Cannot install — config not ready:\n\n${e.message}`);
|
|
139
|
+
process.exit(2);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const nodePath = process.execPath;
|
|
143
|
+
const scriptPath = resolve(fileURLToPath(import.meta.url));
|
|
144
|
+
|
|
145
|
+
const plist = `<?xml version="1.0" encoding="UTF-8"?>
|
|
146
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
147
|
+
<plist version="1.0">
|
|
148
|
+
<dict>
|
|
149
|
+
<key>Label</key>
|
|
150
|
+
<string>${LAUNCH_LABEL}</string>
|
|
151
|
+
<key>ProgramArguments</key>
|
|
152
|
+
<array>
|
|
153
|
+
<string>${nodePath}</string>
|
|
154
|
+
<string>${scriptPath}</string>
|
|
155
|
+
</array>
|
|
156
|
+
<key>RunAtLoad</key><true/>
|
|
157
|
+
<key>KeepAlive</key><true/>
|
|
158
|
+
<key>ProcessType</key><string>Background</string>
|
|
159
|
+
<key>LowPriorityIO</key><true/>
|
|
160
|
+
<key>Nice</key><integer>5</integer>
|
|
161
|
+
<key>StandardOutPath</key><string>${join(DATA, 'bot.launchd.out.log')}</string>
|
|
162
|
+
<key>StandardErrorPath</key><string>${join(DATA, 'bot.launchd.err.log')}</string>
|
|
163
|
+
<key>WorkingDirectory</key><string>${resolve(scriptPath, '..', '..')}</string>
|
|
164
|
+
</dict>
|
|
165
|
+
</plist>
|
|
166
|
+
`;
|
|
167
|
+
|
|
168
|
+
mkdirSync(join(HOME, 'Library', 'LaunchAgents'), { recursive: true });
|
|
169
|
+
try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
|
|
170
|
+
writeFileSync(PLIST_PATH, plist);
|
|
171
|
+
try {
|
|
172
|
+
execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'inherit' });
|
|
173
|
+
} catch (e) {
|
|
174
|
+
console.error(`launchctl load failed: ${e.message}`);
|
|
175
|
+
process.exit(1);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
console.log(`✓ memex-bot installed and running`);
|
|
179
|
+
console.log(` plist: ${PLIST_PATH}`);
|
|
180
|
+
console.log(` log: ${LOG_PATH}`);
|
|
181
|
+
console.log(` config: ${BOT_CONFIG_PATH}`);
|
|
182
|
+
console.log('');
|
|
183
|
+
console.log(`Send a test message to your bot — it should appear in ~/.memex/inbox/`);
|
|
184
|
+
console.log(`within ~5s, then in memex.db within another ~2s.`);
|
|
185
|
+
console.log('');
|
|
186
|
+
console.log(`status: npx memex-bot status`);
|
|
187
|
+
console.log(`logs: npx memex-bot logs`);
|
|
188
|
+
process.exit(0);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function cmdUninstall() {
|
|
192
|
+
if (platform() !== 'darwin') {
|
|
193
|
+
console.error('uninstall: macOS-only for now.');
|
|
194
|
+
process.exit(1);
|
|
195
|
+
}
|
|
196
|
+
if (!existsSync(PLIST_PATH)) {
|
|
197
|
+
console.log(`memex-bot was not installed (nothing to remove).`);
|
|
198
|
+
process.exit(0);
|
|
199
|
+
}
|
|
200
|
+
try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
|
|
201
|
+
try { unlinkSync(PLIST_PATH); } catch (_) {}
|
|
202
|
+
console.log(`✓ memex-bot uninstalled`);
|
|
203
|
+
console.log(`\nConfig preserved at ${BOT_CONFIG_PATH}.`);
|
|
204
|
+
console.log(`To reinstall: npx memex-bot install`);
|
|
205
|
+
process.exit(0);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function cmdRestart() {
|
|
209
|
+
if (platform() !== 'darwin') {
|
|
210
|
+
console.error('restart: macOS-only for now.');
|
|
211
|
+
process.exit(1);
|
|
212
|
+
}
|
|
213
|
+
if (!existsSync(PLIST_PATH)) {
|
|
214
|
+
console.error('memex-bot is not installed (no LaunchAgent plist found).');
|
|
215
|
+
console.error('Run: npx memex-bot install');
|
|
216
|
+
process.exit(1);
|
|
217
|
+
}
|
|
218
|
+
try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
|
|
219
|
+
try {
|
|
220
|
+
execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' });
|
|
221
|
+
} catch (e) {
|
|
222
|
+
console.error('launchctl load failed:', e.message);
|
|
223
|
+
process.exit(1);
|
|
224
|
+
}
|
|
225
|
+
console.log(`✓ memex-bot restarted`);
|
|
226
|
+
process.exit(0);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function cmdStatus() {
|
|
230
|
+
const installed = existsSync(PLIST_PATH);
|
|
231
|
+
let runningPid = null;
|
|
232
|
+
if (installed) {
|
|
233
|
+
try {
|
|
234
|
+
const out = execSync(`launchctl list | grep ${LAUNCH_LABEL}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
|
|
235
|
+
const m = out.match(/^(\d+|-)\s+(\d+|-)\s+\S+/m);
|
|
236
|
+
if (m && m[1] !== '-') runningPid = parseInt(m[1], 10);
|
|
237
|
+
} catch (_) {}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
console.log('memex-bot status\n');
|
|
241
|
+
|
|
242
|
+
if (installed) {
|
|
243
|
+
console.log(` daemon: installed (${PLIST_PATH})`);
|
|
244
|
+
} else {
|
|
245
|
+
console.log(` daemon: NOT installed`);
|
|
246
|
+
console.log(` enable autostart with: memex-bot install`);
|
|
247
|
+
}
|
|
248
|
+
console.log(` process: ${runningPid ? `running (PID ${runningPid})` : 'not running'}`);
|
|
249
|
+
|
|
250
|
+
// Config probe
|
|
251
|
+
if (existsSync(BOT_CONFIG_PATH)) {
|
|
252
|
+
try {
|
|
253
|
+
const cfg = loadBotConfig();
|
|
254
|
+
console.log(` config: ✓ ${BOT_CONFIG_PATH}`);
|
|
255
|
+
console.log(` allowlist: ${cfg.allowlist_user_ids.join(', ')}`);
|
|
256
|
+
console.log(` voice: ${cfg.voice_enabled ? 'enabled' : 'disabled'}`);
|
|
257
|
+
console.log(` inbox: ${cfg.inbox_path}`);
|
|
258
|
+
} catch (e) {
|
|
259
|
+
console.log(` config: ✗ ${BOT_CONFIG_PATH} — ${e.message.split('\n')[0]}`);
|
|
260
|
+
}
|
|
261
|
+
} else {
|
|
262
|
+
console.log(` config: ✗ missing at ${BOT_CONFIG_PATH}`);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Offset state
|
|
266
|
+
let offsetInfo = '(none)';
|
|
267
|
+
let lastSeen = null;
|
|
268
|
+
try {
|
|
269
|
+
const cfg = existsSync(BOT_CONFIG_PATH) ? loadBotConfig() : null;
|
|
270
|
+
if (cfg && existsSync(cfg.state_path)) {
|
|
271
|
+
const s = JSON.parse(readFileSync(cfg.state_path, 'utf-8'));
|
|
272
|
+
offsetInfo = `last update_id: ${s.lastUpdateId}`;
|
|
273
|
+
const ageMs = Date.now() - statSync(cfg.state_path).mtimeMs;
|
|
274
|
+
const min = Math.floor(ageMs / 60000);
|
|
275
|
+
lastSeen = min < 1 ? 'just now' : (min < 60 ? `${min} min ago` : `${Math.floor(min / 60)}h ${min % 60}m ago`);
|
|
276
|
+
}
|
|
277
|
+
} catch (_) {}
|
|
278
|
+
console.log(` state: ${offsetInfo}`);
|
|
279
|
+
if (lastSeen) console.log(` last activity: ${lastSeen}`);
|
|
280
|
+
|
|
281
|
+
console.log('');
|
|
282
|
+
console.log(` log: ${LOG_PATH}`);
|
|
283
|
+
process.exit(0);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function cmdLogs() {
|
|
287
|
+
if (!existsSync(LOG_PATH)) {
|
|
288
|
+
console.error(`no log file at ${LOG_PATH} — bot never started?`);
|
|
289
|
+
process.exit(1);
|
|
290
|
+
}
|
|
291
|
+
const tail = spawn('tail', ['-n', '50', '-f', LOG_PATH], { stdio: 'inherit' });
|
|
292
|
+
process.on('SIGINT', () => { tail.kill('SIGINT'); process.exit(0); });
|
|
293
|
+
tail.on('exit', (code) => process.exit(code || 0));
|
|
294
|
+
}
|