kabi-discord-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabi_discord_cli-0.1.0/.gitignore +8 -0
- kabi_discord_cli-0.1.0/PKG-INFO +17 -0
- kabi_discord_cli-0.1.0/pyproject.toml +30 -0
- kabi_discord_cli-0.1.0/src/discord_cli/__init__.py +3 -0
- kabi_discord_cli-0.1.0/src/discord_cli/auth.py +116 -0
- kabi_discord_cli-0.1.0/src/discord_cli/cli/__init__.py +0 -0
- kabi_discord_cli-0.1.0/src/discord_cli/cli/data.py +84 -0
- kabi_discord_cli-0.1.0/src/discord_cli/cli/discord_cmds.py +267 -0
- kabi_discord_cli-0.1.0/src/discord_cli/cli/main.py +95 -0
- kabi_discord_cli-0.1.0/src/discord_cli/cli/query.py +120 -0
- kabi_discord_cli-0.1.0/src/discord_cli/client.py +202 -0
- kabi_discord_cli-0.1.0/src/discord_cli/config.py +80 -0
- kabi_discord_cli-0.1.0/src/discord_cli/db.py +237 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kabi-discord-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Discord CLI — fetch chat history, search messages, daily sync
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: click>=8.0
|
|
8
|
+
Requires-Dist: httpx>=0.27
|
|
9
|
+
Requires-Dist: python-dotenv>=1.0
|
|
10
|
+
Requires-Dist: rich>=13.0
|
|
11
|
+
Provides-Extra: ai
|
|
12
|
+
Requires-Dist: anthropic>=0.40; extra == 'ai'
|
|
13
|
+
Provides-Extra: all
|
|
14
|
+
Requires-Dist: anthropic>=0.40; extra == 'all'
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "kabi-discord-cli"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Discord CLI — fetch chat history, search messages, daily sync"
|
|
5
|
+
license = "Apache-2.0"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"httpx>=0.27",
|
|
9
|
+
"click>=8.0",
|
|
10
|
+
"rich>=13.0",
|
|
11
|
+
"python-dotenv>=1.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
ai = ["anthropic>=0.40"]
|
|
16
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24"]
|
|
17
|
+
all = ["kabi-discord-cli[ai]"]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
discord = "discord_cli.cli.main:cli"
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["hatchling"]
|
|
24
|
+
build-backend = "hatchling.build"
|
|
25
|
+
|
|
26
|
+
[tool.hatch.build.targets.wheel]
|
|
27
|
+
packages = ["src/discord_cli"]
|
|
28
|
+
|
|
29
|
+
[tool.hatch.build.targets.sdist]
|
|
30
|
+
include = ["src/discord_cli"]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Discord token extraction from local browser and Discord client data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Discord token regex patterns
|
|
12
|
+
# Tokens can be:
|
|
13
|
+
# - Regular user token: base64(user_id).timestamp.hmac
|
|
14
|
+
# - MFA token: mfa.base64_encoded_string
|
|
15
|
+
_TOKEN_PATTERNS = [
|
|
16
|
+
re.compile(r'[\w-]{24,}\.[\w-]{6}\.[\w-]{27,}'),
|
|
17
|
+
re.compile(r'mfa\.[\w-]{84}'),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
# Encrypted token pattern (Discord client stores dQw4w9WgXcQ: encrypted)
|
|
21
|
+
_ENCRYPTED_TOKEN_PATTERN = re.compile(r'dQw4w9WgXcQ:([^\s"]+)')
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_search_paths() -> list[tuple[str, Path]]:
|
|
25
|
+
"""Return list of (source_name, leveldb_path) to search for tokens."""
|
|
26
|
+
home = Path.home()
|
|
27
|
+
|
|
28
|
+
if sys.platform == "darwin":
|
|
29
|
+
paths = [
|
|
30
|
+
("Discord App", home / "Library/Application Support/discord/Local Storage/leveldb"),
|
|
31
|
+
("Discord PTB", home / "Library/Application Support/discordptb/Local Storage/leveldb"),
|
|
32
|
+
("Discord Canary", home / "Library/Application Support/discordcanary/Local Storage/leveldb"),
|
|
33
|
+
("Chrome", home / "Library/Application Support/Google/Chrome/Default/Local Storage/leveldb"),
|
|
34
|
+
("Brave", home / "Library/Application Support/BraveSoftware/Brave-Browser/Default/Local Storage/leveldb"),
|
|
35
|
+
("Edge", home / "Library/Application Support/Microsoft Edge/Default/Local Storage/leveldb"),
|
|
36
|
+
("Firefox", home / "Library/Application Support/Firefox/Profiles"),
|
|
37
|
+
]
|
|
38
|
+
elif os.name == "nt":
|
|
39
|
+
appdata = Path(os.environ.get("APPDATA", ""))
|
|
40
|
+
local_appdata = Path(os.environ.get("LOCALAPPDATA", ""))
|
|
41
|
+
paths = [
|
|
42
|
+
("Discord App", appdata / "discord/Local Storage/leveldb"),
|
|
43
|
+
("Discord PTB", appdata / "discordptb/Local Storage/leveldb"),
|
|
44
|
+
("Discord Canary", appdata / "discordcanary/Local Storage/leveldb"),
|
|
45
|
+
("Chrome", local_appdata / "Google/Chrome/User Data/Default/Local Storage/leveldb"),
|
|
46
|
+
("Brave", local_appdata / "BraveSoftware/Brave-Browser/User Data/Default/Local Storage/leveldb"),
|
|
47
|
+
("Edge", local_appdata / "Microsoft/Edge/User Data/Default/Local Storage/leveldb"),
|
|
48
|
+
]
|
|
49
|
+
else: # Linux
|
|
50
|
+
config = Path(os.environ.get("XDG_CONFIG_HOME", home / ".config"))
|
|
51
|
+
paths = [
|
|
52
|
+
("Discord App", config / "discord/Local Storage/leveldb"),
|
|
53
|
+
("Discord PTB", config / "discordptb/Local Storage/leveldb"),
|
|
54
|
+
("Discord Canary", config / "discordcanary/Local Storage/leveldb"),
|
|
55
|
+
("Chrome", config / "google-chrome/Default/Local Storage/leveldb"),
|
|
56
|
+
("Brave", config / "BraveSoftware/Brave-Browser/Default/Local Storage/leveldb"),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
return [(name, p) for name, p in paths if p.exists()]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_tokens_from_file(filepath: Path) -> list[str]:
|
|
63
|
+
"""Extract Discord tokens from a single file by regex scanning."""
|
|
64
|
+
tokens: list[str] = []
|
|
65
|
+
try:
|
|
66
|
+
data = filepath.read_bytes().decode("utf-8", errors="ignore")
|
|
67
|
+
for pattern in _TOKEN_PATTERNS:
|
|
68
|
+
tokens.extend(pattern.findall(data))
|
|
69
|
+
except (OSError, PermissionError):
|
|
70
|
+
pass
|
|
71
|
+
return tokens
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def find_tokens() -> list[dict]:
|
|
75
|
+
"""Scan known browser/Discord client paths for tokens.
|
|
76
|
+
|
|
77
|
+
Returns list of {source, token} dicts, deduplicated by token.
|
|
78
|
+
"""
|
|
79
|
+
search_paths = _get_search_paths()
|
|
80
|
+
found: dict[str, str] = {} # token -> source
|
|
81
|
+
|
|
82
|
+
for source_name, db_path in search_paths:
|
|
83
|
+
if not db_path.is_dir():
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Scan .ldb and .log files
|
|
87
|
+
for ext in ("*.ldb", "*.log"):
|
|
88
|
+
for filepath in db_path.glob(ext):
|
|
89
|
+
for token in _extract_tokens_from_file(filepath):
|
|
90
|
+
if token not in found:
|
|
91
|
+
found[token] = source_name
|
|
92
|
+
|
|
93
|
+
return [{"source": source, "token": token} for token, source in found.items()]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def save_token_to_env(token: str, env_path: Path | None = None) -> Path:
|
|
97
|
+
"""Save token to .env file."""
|
|
98
|
+
if env_path is None:
|
|
99
|
+
env_path = Path.cwd() / ".env"
|
|
100
|
+
|
|
101
|
+
lines = []
|
|
102
|
+
token_found = False
|
|
103
|
+
|
|
104
|
+
if env_path.exists():
|
|
105
|
+
for line in env_path.read_text().splitlines():
|
|
106
|
+
if line.startswith("DISCORD_TOKEN="):
|
|
107
|
+
lines.append(f"DISCORD_TOKEN={token}")
|
|
108
|
+
token_found = True
|
|
109
|
+
else:
|
|
110
|
+
lines.append(line)
|
|
111
|
+
|
|
112
|
+
if not token_found:
|
|
113
|
+
lines.append(f"DISCORD_TOKEN={token}")
|
|
114
|
+
|
|
115
|
+
env_path.write_text("\n".join(lines) + "\n")
|
|
116
|
+
return env_path
|
|
File without changes
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Data commands — export, purge."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
from ..db import MessageDB
|
|
9
|
+
|
|
10
|
+
console = Console()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.group("data", invoke_without_command=True)
|
|
14
|
+
def data_group():
|
|
15
|
+
"""Data management commands (registered at top-level)."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@data_group.command("export")
|
|
20
|
+
@click.argument("channel")
|
|
21
|
+
@click.option("-f", "--format", "fmt", type=click.Choice(["text", "json"]), default="text")
|
|
22
|
+
@click.option("-o", "--output", "output_file", help="Output file path")
|
|
23
|
+
@click.option("--hours", type=int, help="Only export last N hours")
|
|
24
|
+
def export(channel: str, fmt: str, output_file: str | None, hours: int | None):
|
|
25
|
+
"""Export messages from CHANNEL to text or JSON."""
|
|
26
|
+
db = MessageDB()
|
|
27
|
+
channel_id = db.resolve_channel_id(channel)
|
|
28
|
+
|
|
29
|
+
if channel_id is None:
|
|
30
|
+
console.print(f"[red]Channel '{channel}' not found in database.[/red]")
|
|
31
|
+
db.close()
|
|
32
|
+
return
|
|
33
|
+
|
|
34
|
+
if hours:
|
|
35
|
+
msgs = db.get_recent(channel_id=channel_id, hours=hours, limit=100000)
|
|
36
|
+
else:
|
|
37
|
+
msgs = db.get_recent(channel_id=channel_id, hours=None, limit=100000)
|
|
38
|
+
db.close()
|
|
39
|
+
|
|
40
|
+
if not msgs:
|
|
41
|
+
console.print(f"[yellow]No messages found for '{channel}'.[/yellow]")
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if fmt == "json":
|
|
45
|
+
content = json.dumps(msgs, ensure_ascii=False, indent=2, default=str)
|
|
46
|
+
else:
|
|
47
|
+
lines = []
|
|
48
|
+
for msg in msgs:
|
|
49
|
+
ts = (msg.get("timestamp") or "")[:19]
|
|
50
|
+
sender = msg.get("sender_name") or "Unknown"
|
|
51
|
+
text = msg.get("content") or ""
|
|
52
|
+
lines.append(f"[{ts}] {sender}: {text}")
|
|
53
|
+
content = "\n".join(lines)
|
|
54
|
+
|
|
55
|
+
if output_file:
|
|
56
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
57
|
+
f.write(content)
|
|
58
|
+
console.print(f"[green]✓[/green] Exported {len(msgs)} messages to {output_file}")
|
|
59
|
+
else:
|
|
60
|
+
console.print(content)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@data_group.command("purge")
|
|
64
|
+
@click.argument("channel")
|
|
65
|
+
@click.option("-y", "--yes", is_flag=True, help="Skip confirmation")
|
|
66
|
+
def purge(channel: str, yes: bool):
|
|
67
|
+
"""Delete all stored messages for CHANNEL."""
|
|
68
|
+
db = MessageDB()
|
|
69
|
+
channel_id = db.resolve_channel_id(channel)
|
|
70
|
+
|
|
71
|
+
if channel_id is None:
|
|
72
|
+
console.print(f"[red]Channel '{channel}' not found in database.[/red]")
|
|
73
|
+
db.close()
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
if not yes:
|
|
77
|
+
count = db.count(channel_id)
|
|
78
|
+
if not click.confirm(f"Delete {count} messages from channel {channel_id}?"):
|
|
79
|
+
db.close()
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
deleted = db.delete_channel(channel_id)
|
|
83
|
+
db.close()
|
|
84
|
+
console.print(f"[green]✓[/green] Deleted {deleted} messages")
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""Discord subcommands — guilds, channels, history, sync, sync-all."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
|
|
10
|
+
from ..client import datetime_to_snowflake, fetch_messages, get_client, get_guild_info, list_channels, list_guilds
|
|
11
|
+
from ..db import MessageDB
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.group("dc")
|
|
17
|
+
def discord_group():
|
|
18
|
+
"""Discord operations — list servers, fetch history, sync."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@discord_group.command("guilds")
|
|
23
|
+
def dc_guilds():
|
|
24
|
+
"""List joined Discord servers."""
|
|
25
|
+
|
|
26
|
+
async def _run():
|
|
27
|
+
async with get_client() as client:
|
|
28
|
+
return await list_guilds(client)
|
|
29
|
+
|
|
30
|
+
guilds = asyncio.run(_run())
|
|
31
|
+
table = Table(title="Discord Servers")
|
|
32
|
+
table.add_column("ID", style="dim")
|
|
33
|
+
table.add_column("Name", style="bold")
|
|
34
|
+
table.add_column("Owner", justify="center")
|
|
35
|
+
|
|
36
|
+
for g in guilds:
|
|
37
|
+
table.add_row(g["id"], g["name"], "✓" if g["owner"] else "")
|
|
38
|
+
|
|
39
|
+
console.print(table)
|
|
40
|
+
console.print(f"\nTotal: {len(guilds)} servers")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@discord_group.command("channels")
|
|
44
|
+
@click.argument("guild")
|
|
45
|
+
def dc_channels(guild: str):
|
|
46
|
+
"""List text channels in a GUILD (server ID or name)."""
|
|
47
|
+
|
|
48
|
+
async def _run():
|
|
49
|
+
async with get_client() as client:
|
|
50
|
+
# If guild looks like a name, search for it
|
|
51
|
+
guild_id = guild
|
|
52
|
+
if not guild.isdigit():
|
|
53
|
+
guilds = await list_guilds(client)
|
|
54
|
+
match = next(
|
|
55
|
+
(g for g in guilds if guild.lower() in g["name"].lower()),
|
|
56
|
+
None,
|
|
57
|
+
)
|
|
58
|
+
if not match:
|
|
59
|
+
console.print(f"[red]Guild '{guild}' not found.[/red]")
|
|
60
|
+
return []
|
|
61
|
+
guild_id = match["id"]
|
|
62
|
+
console.print(f"[dim]Resolved to: {match['name']} ({guild_id})[/dim]")
|
|
63
|
+
|
|
64
|
+
return await list_channels(client, guild_id)
|
|
65
|
+
|
|
66
|
+
channels = asyncio.run(_run())
|
|
67
|
+
if not channels:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
table = Table(title="Text Channels")
|
|
71
|
+
table.add_column("ID", style="dim")
|
|
72
|
+
table.add_column("Name", style="bold")
|
|
73
|
+
table.add_column("Topic", max_width=50)
|
|
74
|
+
|
|
75
|
+
for ch in channels:
|
|
76
|
+
table.add_row(ch["id"], f"#{ch['name']}", (ch.get("topic") or "")[:50])
|
|
77
|
+
|
|
78
|
+
console.print(table)
|
|
79
|
+
console.print(f"\nTotal: {len(channels)} text channels")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@discord_group.command("history")
|
|
83
|
+
@click.argument("channel")
|
|
84
|
+
@click.option("-n", "--limit", default=1000, help="Max messages to fetch")
|
|
85
|
+
@click.option("--guild-name", help="Guild name to store with messages")
|
|
86
|
+
@click.option("--channel-name", help="Channel name to store with messages")
|
|
87
|
+
def dc_history(channel: str, limit: int, guild_name: str | None, channel_name: str | None):
|
|
88
|
+
"""Fetch historical messages from CHANNEL (channel ID)."""
|
|
89
|
+
|
|
90
|
+
async def _run():
|
|
91
|
+
db = MessageDB()
|
|
92
|
+
try:
|
|
93
|
+
async with get_client() as client:
|
|
94
|
+
# Try to get channel info for naming
|
|
95
|
+
ch_name = channel_name
|
|
96
|
+
g_name = guild_name
|
|
97
|
+
|
|
98
|
+
if not ch_name:
|
|
99
|
+
try:
|
|
100
|
+
ch_info = await client.get(f"/channels/{channel}")
|
|
101
|
+
if ch_info.status_code == 200:
|
|
102
|
+
ch_data = ch_info.json()
|
|
103
|
+
ch_name = ch_data.get("name", channel)
|
|
104
|
+
if not g_name and ch_data.get("guild_id"):
|
|
105
|
+
g_info = await get_guild_info(client, ch_data["guild_id"])
|
|
106
|
+
if g_info:
|
|
107
|
+
g_name = g_info["name"]
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
with Progress(
|
|
112
|
+
SpinnerColumn(),
|
|
113
|
+
TextColumn("[progress.description]{task.description}"),
|
|
114
|
+
console=console,
|
|
115
|
+
) as progress:
|
|
116
|
+
task = progress.add_task(f"Fetching messages from {ch_name or channel}...", total=None)
|
|
117
|
+
|
|
118
|
+
messages = await fetch_messages(client, channel, limit=limit)
|
|
119
|
+
progress.update(task, description=f"Fetched {len(messages)} messages")
|
|
120
|
+
|
|
121
|
+
# Enrich with guild/channel names
|
|
122
|
+
for msg in messages:
|
|
123
|
+
msg["guild_name"] = g_name
|
|
124
|
+
msg["channel_name"] = ch_name
|
|
125
|
+
|
|
126
|
+
inserted = db.insert_batch(messages)
|
|
127
|
+
return len(messages), inserted
|
|
128
|
+
finally:
|
|
129
|
+
db.close()
|
|
130
|
+
|
|
131
|
+
total, inserted = asyncio.run(_run())
|
|
132
|
+
console.print(
|
|
133
|
+
f"\n[green]✓[/green] Fetched {total} messages, stored {inserted} new"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@discord_group.command("sync")
|
|
138
|
+
@click.argument("channel")
|
|
139
|
+
@click.option("-n", "--limit", default=5000, help="Max messages per sync")
|
|
140
|
+
def dc_sync(channel: str, limit: int):
|
|
141
|
+
"""Incremental sync — fetch only new messages from CHANNEL."""
|
|
142
|
+
db = MessageDB()
|
|
143
|
+
last_id = db.get_last_msg_id(channel)
|
|
144
|
+
if last_id:
|
|
145
|
+
console.print(f"Syncing from msg_id > {last_id}...")
|
|
146
|
+
|
|
147
|
+
async def _run():
|
|
148
|
+
try:
|
|
149
|
+
async with get_client() as client:
|
|
150
|
+
# Get channel info
|
|
151
|
+
ch_name = None
|
|
152
|
+
g_name = None
|
|
153
|
+
try:
|
|
154
|
+
ch_info = await client.get(f"/channels/{channel}")
|
|
155
|
+
if ch_info.status_code == 200:
|
|
156
|
+
ch_data = ch_info.json()
|
|
157
|
+
ch_name = ch_data.get("name")
|
|
158
|
+
if ch_data.get("guild_id"):
|
|
159
|
+
g_info = await get_guild_info(client, ch_data["guild_id"])
|
|
160
|
+
if g_info:
|
|
161
|
+
g_name = g_info["name"]
|
|
162
|
+
except Exception:
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
with Progress(
|
|
166
|
+
SpinnerColumn(),
|
|
167
|
+
TextColumn("[progress.description]{task.description}"),
|
|
168
|
+
console=console,
|
|
169
|
+
) as progress:
|
|
170
|
+
task_id = progress.add_task(f"Syncing {ch_name or channel}...", total=None)
|
|
171
|
+
|
|
172
|
+
messages = await fetch_messages(
|
|
173
|
+
client, channel, limit=limit, after=last_id
|
|
174
|
+
)
|
|
175
|
+
progress.update(task_id, description=f"Fetched {len(messages)} new messages")
|
|
176
|
+
|
|
177
|
+
for msg in messages:
|
|
178
|
+
msg["guild_name"] = g_name
|
|
179
|
+
msg["channel_name"] = ch_name
|
|
180
|
+
|
|
181
|
+
inserted = db.insert_batch(messages)
|
|
182
|
+
return len(messages), inserted
|
|
183
|
+
finally:
|
|
184
|
+
db.close()
|
|
185
|
+
|
|
186
|
+
total, inserted = asyncio.run(_run())
|
|
187
|
+
console.print(f"\n[green]✓[/green] Synced {total} messages, stored {inserted} new")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@discord_group.command("sync-all")
|
|
191
|
+
@click.option("-n", "--limit", default=5000, help="Max messages per channel")
|
|
192
|
+
def dc_sync_all(limit: int):
|
|
193
|
+
"""Sync ALL channels in the database."""
|
|
194
|
+
db = MessageDB()
|
|
195
|
+
channels = db.get_channels()
|
|
196
|
+
if not channels:
|
|
197
|
+
console.print("[yellow]No channels in database. Run 'discord dc history' first.[/yellow]")
|
|
198
|
+
db.close()
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
console.print(f"Syncing {len(channels)} channels...")
|
|
202
|
+
|
|
203
|
+
async def _run():
|
|
204
|
+
try:
|
|
205
|
+
async with get_client() as client:
|
|
206
|
+
results: dict[str, int] = {}
|
|
207
|
+
|
|
208
|
+
for ch in channels:
|
|
209
|
+
ch_id = ch["channel_id"]
|
|
210
|
+
ch_name = ch.get("channel_name") or ch_id
|
|
211
|
+
|
|
212
|
+
last_id = db.get_last_msg_id(ch_id)
|
|
213
|
+
try:
|
|
214
|
+
messages = await fetch_messages(
|
|
215
|
+
client, ch_id, limit=limit, after=last_id
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Preserve existing names
|
|
219
|
+
for msg in messages:
|
|
220
|
+
msg["guild_name"] = ch.get("guild_name")
|
|
221
|
+
msg["channel_name"] = ch.get("channel_name")
|
|
222
|
+
|
|
223
|
+
inserted = db.insert_batch(messages)
|
|
224
|
+
results[ch_name] = inserted
|
|
225
|
+
|
|
226
|
+
if inserted > 0:
|
|
227
|
+
console.print(
|
|
228
|
+
f" [green]✓[/green] {ch_name}: +{inserted}"
|
|
229
|
+
)
|
|
230
|
+
else:
|
|
231
|
+
console.print(f" [dim]✓ {ch_name}: no new messages[/dim]")
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
console.print(f" [red]✗ {ch_name}: {e}[/red]")
|
|
235
|
+
results[ch_name] = 0
|
|
236
|
+
|
|
237
|
+
return results
|
|
238
|
+
finally:
|
|
239
|
+
db.close()
|
|
240
|
+
|
|
241
|
+
results = asyncio.run(_run())
|
|
242
|
+
total_new = sum(results.values())
|
|
243
|
+
console.print(f"\n[green]✓[/green] Synced {total_new} new messages across {len(results)} channels")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@discord_group.command("info")
|
|
247
|
+
@click.argument("guild")
|
|
248
|
+
def dc_info(guild: str):
|
|
249
|
+
"""Show detailed info about a GUILD (server)."""
|
|
250
|
+
|
|
251
|
+
async def _run():
|
|
252
|
+
async with get_client() as client:
|
|
253
|
+
return await get_guild_info(client, guild)
|
|
254
|
+
|
|
255
|
+
info = asyncio.run(_run())
|
|
256
|
+
if not info:
|
|
257
|
+
console.print(f"[red]Could not find guild: {guild}[/red]")
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
table = Table(title="Guild Info", show_header=False)
|
|
261
|
+
table.add_column("Field", style="bold")
|
|
262
|
+
table.add_column("Value")
|
|
263
|
+
|
|
264
|
+
for k, v in info.items():
|
|
265
|
+
table.add_row(k, str(v) if v is not None else "—")
|
|
266
|
+
|
|
267
|
+
console.print(table)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""discord-cli — CLI entry point."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
|
|
7
|
+
from .data import data_group
|
|
8
|
+
from .discord_cmds import discord_group
|
|
9
|
+
from .query import query_group
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group()
|
|
15
|
+
@click.version_option(package_name="discord-cli")
|
|
16
|
+
def cli():
|
|
17
|
+
"""discord — CLI for fetching Discord chat history and searching messages."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@cli.command("auth")
|
|
22
|
+
@click.option("--save", is_flag=True, help="Save found token to .env automatically")
|
|
23
|
+
def auth(save: bool):
|
|
24
|
+
"""Extract Discord token from local browser/Discord client."""
|
|
25
|
+
import httpx
|
|
26
|
+
|
|
27
|
+
from ..auth import find_tokens, save_token_to_env
|
|
28
|
+
|
|
29
|
+
console.print("[dim]Scanning for Discord tokens...[/dim]")
|
|
30
|
+
results = find_tokens()
|
|
31
|
+
|
|
32
|
+
if not results:
|
|
33
|
+
console.print("[red]No tokens found.[/red]")
|
|
34
|
+
console.print(
|
|
35
|
+
"[dim]Make sure Discord desktop app or browser is logged in.[/dim]"
|
|
36
|
+
)
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
console.print(f"[dim]Found {len(results)} candidate token(s), validating...[/dim]")
|
|
40
|
+
|
|
41
|
+
# Validate each token against the API
|
|
42
|
+
valid_token = None
|
|
43
|
+
valid_source = None
|
|
44
|
+
user_info = None
|
|
45
|
+
|
|
46
|
+
for r in results:
|
|
47
|
+
token = r["token"]
|
|
48
|
+
try:
|
|
49
|
+
resp = httpx.get(
|
|
50
|
+
"https://discord.com/api/v10/users/@me",
|
|
51
|
+
headers={"Authorization": token},
|
|
52
|
+
timeout=10.0,
|
|
53
|
+
)
|
|
54
|
+
if resp.status_code == 200:
|
|
55
|
+
user_info = resp.json()
|
|
56
|
+
valid_token = token
|
|
57
|
+
valid_source = r["source"]
|
|
58
|
+
break
|
|
59
|
+
except Exception:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
if not valid_token or not user_info:
|
|
63
|
+
console.print("[red]No valid token found. All tokens returned 401.[/red]")
|
|
64
|
+
console.print("[dim]Try logging into Discord in your browser and retry.[/dim]")
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
masked = f"{valid_token[:8]}...{valid_token[-8:]}"
|
|
68
|
+
username = user_info.get("username", "?")
|
|
69
|
+
global_name = user_info.get("global_name") or username
|
|
70
|
+
console.print(
|
|
71
|
+
f"[green]✓[/green] Valid token from [cyan]{valid_source}[/cyan]: {masked}"
|
|
72
|
+
)
|
|
73
|
+
console.print(
|
|
74
|
+
f" Logged in as: [bold]{global_name}[/bold] (@{username})"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if save:
|
|
78
|
+
env_path = save_token_to_env(valid_token)
|
|
79
|
+
console.print(f"[green]✓[/green] Saved to {env_path}")
|
|
80
|
+
else:
|
|
81
|
+
console.print(
|
|
82
|
+
"\n[dim]Run with --save to auto-save to .env[/dim]"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Register sub-groups
|
|
87
|
+
cli.add_command(discord_group, "dc")
|
|
88
|
+
|
|
89
|
+
# Register top-level query commands
|
|
90
|
+
for name, cmd in query_group.commands.items():
|
|
91
|
+
cli.add_command(cmd, name)
|
|
92
|
+
|
|
93
|
+
# Register top-level data commands
|
|
94
|
+
for name, cmd in data_group.commands.items():
|
|
95
|
+
cli.add_command(cmd, name)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Query commands — search, stats, today."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from ..db import MessageDB
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@click.group("query", invoke_without_command=True)
|
|
15
|
+
def query_group():
|
|
16
|
+
"""Query and analysis commands (registered at top-level)."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@query_group.command("search")
|
|
21
|
+
@click.argument("keyword")
|
|
22
|
+
@click.option("-c", "--channel", help="Filter by channel name")
|
|
23
|
+
@click.option("-n", "--limit", default=50, help="Max results")
|
|
24
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
25
|
+
def search(keyword: str, channel: str | None, limit: int, as_json: bool):
|
|
26
|
+
"""Search messages by KEYWORD."""
|
|
27
|
+
import json
|
|
28
|
+
|
|
29
|
+
db = MessageDB()
|
|
30
|
+
channel_id = db.resolve_channel_id(channel) if channel else None
|
|
31
|
+
results = db.search(keyword, channel_id=channel_id, limit=limit)
|
|
32
|
+
db.close()
|
|
33
|
+
|
|
34
|
+
if not results:
|
|
35
|
+
console.print("[yellow]No messages found.[/yellow]")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if as_json:
|
|
39
|
+
console.print(json.dumps(results, ensure_ascii=False, indent=2, default=str))
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
for msg in results:
|
|
43
|
+
ts = (msg.get("timestamp") or "")[:19]
|
|
44
|
+
sender = msg.get("sender_name") or "Unknown"
|
|
45
|
+
ch_name = msg.get("channel_name") or ""
|
|
46
|
+
content = (msg.get("content") or "")[:200]
|
|
47
|
+
console.print(
|
|
48
|
+
f"[dim]{ts}[/dim] [cyan]#{ch_name}[/cyan] | "
|
|
49
|
+
f"[bold]{sender}[/bold]: {content}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
console.print(f"\n[dim]Found {len(results)} messages[/dim]")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@query_group.command("stats")
|
|
56
|
+
def stats():
|
|
57
|
+
"""Show message statistics per channel."""
|
|
58
|
+
db = MessageDB()
|
|
59
|
+
channels = db.get_channels()
|
|
60
|
+
total = db.count()
|
|
61
|
+
db.close()
|
|
62
|
+
|
|
63
|
+
table = Table(title=f"Message Stats (Total: {total})")
|
|
64
|
+
table.add_column("Channel ID", style="dim")
|
|
65
|
+
table.add_column("Channel", style="bold")
|
|
66
|
+
table.add_column("Guild", style="cyan")
|
|
67
|
+
table.add_column("Messages", justify="right")
|
|
68
|
+
table.add_column("First", style="dim")
|
|
69
|
+
table.add_column("Last", style="dim")
|
|
70
|
+
|
|
71
|
+
for c in channels:
|
|
72
|
+
table.add_row(
|
|
73
|
+
str(c["channel_id"])[-6:] + "…",
|
|
74
|
+
f"#{c['channel_name']}" if c["channel_name"] else "—",
|
|
75
|
+
c.get("guild_name") or "—",
|
|
76
|
+
str(c["msg_count"]),
|
|
77
|
+
(c["first_msg"] or "")[:10],
|
|
78
|
+
(c["last_msg"] or "")[:10],
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
console.print(table)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@query_group.command("today")
|
|
85
|
+
@click.option("-c", "--channel", help="Filter by channel name")
|
|
86
|
+
@click.option("--json", "as_json", is_flag=True, help="Output as JSON")
|
|
87
|
+
def today(channel: str | None, as_json: bool):
|
|
88
|
+
"""Show today's messages, grouped by channel."""
|
|
89
|
+
import json
|
|
90
|
+
|
|
91
|
+
db = MessageDB()
|
|
92
|
+
channel_id = db.resolve_channel_id(channel) if channel else None
|
|
93
|
+
msgs = db.get_today(channel_id=channel_id)
|
|
94
|
+
db.close()
|
|
95
|
+
|
|
96
|
+
if not msgs:
|
|
97
|
+
console.print("[yellow]No messages today.[/yellow]")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
if as_json:
|
|
101
|
+
console.print(json.dumps(msgs, ensure_ascii=False, indent=2, default=str))
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Group by channel
|
|
105
|
+
grouped: dict[str, list[dict]] = defaultdict(list)
|
|
106
|
+
for m in msgs:
|
|
107
|
+
key = f"#{m.get('channel_name') or 'unknown'}"
|
|
108
|
+
if m.get("guild_name"):
|
|
109
|
+
key = f"{m['guild_name']} > {key}"
|
|
110
|
+
grouped[key].append(m)
|
|
111
|
+
|
|
112
|
+
for ch_label, ch_msgs in sorted(grouped.items(), key=lambda x: -len(x[1])):
|
|
113
|
+
console.print(f"\n[bold cyan]═══ {ch_label} ({len(ch_msgs)} msgs) ═══[/bold cyan]")
|
|
114
|
+
for m in ch_msgs:
|
|
115
|
+
ts = (m.get("timestamp") or "")[11:19]
|
|
116
|
+
sender = m.get("sender_name") or "Unknown"
|
|
117
|
+
content = (m.get("content") or "")[:200].replace("\n", " ")
|
|
118
|
+
console.print(f" [dim]{ts}[/dim] [bold]{sender[:15]}[/bold]: {content}")
|
|
119
|
+
|
|
120
|
+
console.print(f"\n[green]Total: {len(msgs)} messages today[/green]")
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Discord REST API v10 client using httpx."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from contextlib import asynccontextmanager
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any, AsyncGenerator
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from .config import API_BASE, get_token
|
|
13
|
+
|
|
14
|
+
# Discord epoch: 2015-01-01T00:00:00Z
|
|
15
|
+
DISCORD_EPOCH = 1420070400000
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def snowflake_to_datetime(snowflake: int | str) -> datetime:
|
|
19
|
+
"""Convert a Discord snowflake ID to a UTC datetime."""
|
|
20
|
+
ms = (int(snowflake) >> 22) + DISCORD_EPOCH
|
|
21
|
+
return datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def datetime_to_snowflake(dt: datetime) -> int:
|
|
25
|
+
"""Convert a datetime to a Discord snowflake ID (for use as 'after' param)."""
|
|
26
|
+
ms = int(dt.timestamp() * 1000) - DISCORD_EPOCH
|
|
27
|
+
return ms << 22
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@asynccontextmanager
|
|
31
|
+
async def get_client() -> AsyncGenerator[httpx.AsyncClient, None]:
|
|
32
|
+
"""Async context manager for an authenticated httpx client."""
|
|
33
|
+
token = get_token()
|
|
34
|
+
async with httpx.AsyncClient(
|
|
35
|
+
base_url=API_BASE,
|
|
36
|
+
headers={
|
|
37
|
+
"Authorization": token,
|
|
38
|
+
"Content-Type": "application/json",
|
|
39
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
40
|
+
},
|
|
41
|
+
timeout=30.0,
|
|
42
|
+
) as client:
|
|
43
|
+
yield client
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
async def _handle_rate_limit(response: httpx.Response) -> None:
|
|
47
|
+
"""Sleep if we hit a rate limit."""
|
|
48
|
+
if response.status_code == 429:
|
|
49
|
+
data = response.json()
|
|
50
|
+
retry_after = data.get("retry_after", 1.0)
|
|
51
|
+
await asyncio.sleep(retry_after)
|
|
52
|
+
elif remaining := response.headers.get("X-RateLimit-Remaining"):
|
|
53
|
+
if int(remaining) == 0:
|
|
54
|
+
reset_after = float(response.headers.get("X-RateLimit-Reset-After", "1.0"))
|
|
55
|
+
await asyncio.sleep(reset_after)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def _get(client: httpx.AsyncClient, path: str, **params: Any) -> Any:
|
|
59
|
+
"""GET request with rate limit handling and retry."""
|
|
60
|
+
for attempt in range(3):
|
|
61
|
+
response = await client.get(path, params=params)
|
|
62
|
+
if response.status_code == 429:
|
|
63
|
+
await _handle_rate_limit(response)
|
|
64
|
+
continue
|
|
65
|
+
await _handle_rate_limit(response)
|
|
66
|
+
response.raise_for_status()
|
|
67
|
+
return response.json()
|
|
68
|
+
raise RuntimeError(f"Rate limited after 3 retries: {path}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def list_guilds(client: httpx.AsyncClient) -> list[dict]:
|
|
72
|
+
"""List all guilds (servers) the user has joined."""
|
|
73
|
+
data = await _get(client, "/users/@me/guilds")
|
|
74
|
+
return [
|
|
75
|
+
{
|
|
76
|
+
"id": g["id"],
|
|
77
|
+
"name": g["name"],
|
|
78
|
+
"icon": g.get("icon"),
|
|
79
|
+
"owner": g.get("owner", False),
|
|
80
|
+
}
|
|
81
|
+
for g in data
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def list_channels(client: httpx.AsyncClient, guild_id: str) -> list[dict]:
|
|
86
|
+
"""List all text channels in a guild."""
|
|
87
|
+
data = await _get(client, f"/guilds/{guild_id}/channels")
|
|
88
|
+
# type 0 = text channel, 5 = announcement, 15 = forum
|
|
89
|
+
text_types = {0, 5, 15}
|
|
90
|
+
results = []
|
|
91
|
+
for ch in data:
|
|
92
|
+
if ch.get("type") in text_types:
|
|
93
|
+
results.append(
|
|
94
|
+
{
|
|
95
|
+
"id": ch["id"],
|
|
96
|
+
"name": ch["name"],
|
|
97
|
+
"type": ch.get("type", 0),
|
|
98
|
+
"position": ch.get("position", 0),
|
|
99
|
+
"parent_id": ch.get("parent_id"),
|
|
100
|
+
"topic": ch.get("topic"),
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
return sorted(results, key=lambda x: x["position"])
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def fetch_messages(
|
|
107
|
+
client: httpx.AsyncClient,
|
|
108
|
+
channel_id: str,
|
|
109
|
+
*,
|
|
110
|
+
limit: int = 1000,
|
|
111
|
+
after: str | None = None,
|
|
112
|
+
before: str | None = None,
|
|
113
|
+
) -> list[dict]:
|
|
114
|
+
"""Fetch messages from a channel, handling pagination.
|
|
115
|
+
|
|
116
|
+
Discord returns max 100 messages per request, so we paginate.
|
|
117
|
+
"""
|
|
118
|
+
all_messages: list[dict] = []
|
|
119
|
+
remaining = limit
|
|
120
|
+
|
|
121
|
+
while remaining > 0:
|
|
122
|
+
batch_limit = min(remaining, 100)
|
|
123
|
+
params: dict[str, Any] = {"limit": batch_limit}
|
|
124
|
+
if after:
|
|
125
|
+
params["after"] = after
|
|
126
|
+
|
|
127
|
+
data = await _get(client, f"/channels/{channel_id}/messages", **params)
|
|
128
|
+
|
|
129
|
+
if not data:
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
for msg in data:
|
|
133
|
+
all_messages.append(_parse_message(msg, channel_id))
|
|
134
|
+
|
|
135
|
+
remaining -= len(data)
|
|
136
|
+
|
|
137
|
+
if len(data) < batch_limit:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
# For pagination with 'after', we need to use the latest message ID.
|
|
141
|
+
# Discord returns messages newest first, so the last item is the oldest.
|
|
142
|
+
# When using 'after', we want to get messages AFTER a snowflake,
|
|
143
|
+
# and they come back newest-first. We move 'after' to the newest we've seen.
|
|
144
|
+
if after is not None:
|
|
145
|
+
# 'after' mode: messages come newest first, move forward
|
|
146
|
+
after = data[0]["id"]
|
|
147
|
+
else:
|
|
148
|
+
# Default: newest first, use 'before' to paginate backward
|
|
149
|
+
before = data[-1]["id"]
|
|
150
|
+
params.pop("after", None)
|
|
151
|
+
# Re-set for next iteration: we need to use 'before' instead
|
|
152
|
+
after = None
|
|
153
|
+
|
|
154
|
+
# Small delay to be nice
|
|
155
|
+
await asyncio.sleep(0.5)
|
|
156
|
+
|
|
157
|
+
# Sort by timestamp ascending
|
|
158
|
+
all_messages.sort(key=lambda m: m["msg_id"])
|
|
159
|
+
return all_messages
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _parse_message(msg: dict, channel_id: str) -> dict:
|
|
163
|
+
"""Parse a raw Discord message into our standard format."""
|
|
164
|
+
author = msg.get("author", {})
|
|
165
|
+
ts_str = msg.get("timestamp", "")
|
|
166
|
+
timestamp = datetime.fromisoformat(ts_str) if ts_str else datetime.now(timezone.utc)
|
|
167
|
+
if timestamp.tzinfo is None:
|
|
168
|
+
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
169
|
+
|
|
170
|
+
# Build content: message text + any attachment URLs
|
|
171
|
+
content_parts = []
|
|
172
|
+
if msg.get("content"):
|
|
173
|
+
content_parts.append(msg["content"])
|
|
174
|
+
for att in msg.get("attachments", []):
|
|
175
|
+
content_parts.append(f"[attachment: {att.get('filename', 'file')}]")
|
|
176
|
+
for embed in msg.get("embeds", []):
|
|
177
|
+
if title := embed.get("title"):
|
|
178
|
+
content_parts.append(f"[embed: {title}]")
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
"msg_id": msg["id"],
|
|
182
|
+
"channel_id": channel_id,
|
|
183
|
+
"sender_id": author.get("id"),
|
|
184
|
+
"sender_name": author.get("global_name") or author.get("username") or "Unknown",
|
|
185
|
+
"content": "\n".join(content_parts),
|
|
186
|
+
"timestamp": timestamp,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
async def get_guild_info(client: httpx.AsyncClient, guild_id: str) -> dict | None:
|
|
191
|
+
"""Get detailed guild info."""
|
|
192
|
+
try:
|
|
193
|
+
data = await _get(client, f"/guilds/{guild_id}", with_counts="true")
|
|
194
|
+
return {
|
|
195
|
+
"id": data["id"],
|
|
196
|
+
"name": data["name"],
|
|
197
|
+
"description": data.get("description"),
|
|
198
|
+
"member_count": data.get("approximate_member_count"),
|
|
199
|
+
"online_count": data.get("approximate_presence_count"),
|
|
200
|
+
}
|
|
201
|
+
except Exception:
|
|
202
|
+
return None
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Configuration management - loads from .env or environment variables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _load_env() -> None:
|
|
15
|
+
"""Load .env from cwd first, then fall back to the source checkout."""
|
|
16
|
+
for candidate in (Path.cwd() / ".env", _PROJECT_ROOT / ".env"):
|
|
17
|
+
if candidate.is_file():
|
|
18
|
+
load_dotenv(candidate)
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _default_data_home() -> Path:
|
|
23
|
+
"""Return a platform-appropriate base directory for application data."""
|
|
24
|
+
if raw := os.environ.get("XDG_DATA_HOME", ""):
|
|
25
|
+
return Path(raw).expanduser()
|
|
26
|
+
|
|
27
|
+
home = Path.home()
|
|
28
|
+
if sys.platform == "darwin":
|
|
29
|
+
return home / "Library" / "Application Support"
|
|
30
|
+
if os.name == "nt":
|
|
31
|
+
local_appdata = os.environ.get("LOCALAPPDATA", "")
|
|
32
|
+
if local_appdata:
|
|
33
|
+
return Path(local_appdata).expanduser()
|
|
34
|
+
return home / "AppData" / "Local"
|
|
35
|
+
return home / ".local" / "share"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _resolve_env_path(raw: str) -> Path:
|
|
39
|
+
"""Resolve user-provided paths relative to the current working directory."""
|
|
40
|
+
path = Path(raw).expanduser()
|
|
41
|
+
if not path.is_absolute():
|
|
42
|
+
path = Path.cwd() / path
|
|
43
|
+
return path
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
_load_env()
|
|
47
|
+
|
|
48
|
+
APP_NAME = "discord-cli"
|
|
49
|
+
API_BASE = "https://discord.com/api/v10"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_token() -> str:
|
|
53
|
+
val = os.environ.get("DISCORD_TOKEN", "")
|
|
54
|
+
if not val:
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"DISCORD_TOKEN not set. Get it from browser DevTools → "
|
|
57
|
+
"Network tab → any Discord request → Authorization header."
|
|
58
|
+
)
|
|
59
|
+
return val
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_data_dir() -> Path:
|
|
63
|
+
"""Return data directory, create if not exists."""
|
|
64
|
+
raw = os.environ.get("DATA_DIR", "")
|
|
65
|
+
if raw:
|
|
66
|
+
d = _resolve_env_path(raw)
|
|
67
|
+
else:
|
|
68
|
+
d = _default_data_home() / APP_NAME
|
|
69
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
return d
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_db_path() -> Path:
|
|
74
|
+
raw = os.environ.get("DB_PATH", "")
|
|
75
|
+
if raw:
|
|
76
|
+
p = _resolve_env_path(raw)
|
|
77
|
+
else:
|
|
78
|
+
p = get_data_dir() / "messages.db"
|
|
79
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
return p
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""SQLite database for storing Discord chat messages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .config import get_db_path
|
|
12
|
+
|
|
13
|
+
_CREATE_TABLE = """
|
|
14
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
15
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
16
|
+
platform TEXT NOT NULL DEFAULT 'discord',
|
|
17
|
+
guild_id TEXT,
|
|
18
|
+
guild_name TEXT,
|
|
19
|
+
channel_id TEXT NOT NULL,
|
|
20
|
+
channel_name TEXT,
|
|
21
|
+
msg_id TEXT NOT NULL,
|
|
22
|
+
sender_id TEXT,
|
|
23
|
+
sender_name TEXT,
|
|
24
|
+
content TEXT,
|
|
25
|
+
timestamp TEXT NOT NULL,
|
|
26
|
+
raw_json TEXT,
|
|
27
|
+
UNIQUE(platform, channel_id, msg_id)
|
|
28
|
+
);
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
_CREATE_INDEX = """
|
|
32
|
+
CREATE INDEX IF NOT EXISTS idx_messages_channel_ts ON messages(channel_id, timestamp);
|
|
33
|
+
CREATE INDEX IF NOT EXISTS idx_messages_content ON messages(content);
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_messages_sender ON messages(sender_name);
|
|
35
|
+
CREATE INDEX IF NOT EXISTS idx_messages_guild ON messages(guild_id);
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MessageDB:
|
|
40
|
+
"""SQLite message store with context manager support."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, db_path: Path | str | None = None):
|
|
43
|
+
if db_path is None:
|
|
44
|
+
self.db_path = get_db_path()
|
|
45
|
+
else:
|
|
46
|
+
self.db_path = Path(db_path)
|
|
47
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
self.conn = sqlite3.connect(str(self.db_path))
|
|
49
|
+
self.conn.row_factory = sqlite3.Row
|
|
50
|
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
51
|
+
self.conn.executescript(_CREATE_TABLE + _CREATE_INDEX)
|
|
52
|
+
|
|
53
|
+
def __enter__(self):
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
57
|
+
self.close()
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
def insert_batch(self, messages: list[dict], platform: str = "discord") -> int:
|
|
61
|
+
"""Batch insert messages. Returns rows actually inserted (excluding dupes)."""
|
|
62
|
+
if not messages:
|
|
63
|
+
return 0
|
|
64
|
+
rows = [
|
|
65
|
+
(
|
|
66
|
+
platform,
|
|
67
|
+
m.get("guild_id"),
|
|
68
|
+
m.get("guild_name"),
|
|
69
|
+
m["channel_id"],
|
|
70
|
+
m.get("channel_name"),
|
|
71
|
+
m["msg_id"],
|
|
72
|
+
m.get("sender_id"),
|
|
73
|
+
m.get("sender_name"),
|
|
74
|
+
m.get("content"),
|
|
75
|
+
m["timestamp"].isoformat() if isinstance(m["timestamp"], datetime) else m["timestamp"],
|
|
76
|
+
json.dumps(m["raw_json"], ensure_ascii=False) if m.get("raw_json") else None,
|
|
77
|
+
)
|
|
78
|
+
for m in messages
|
|
79
|
+
]
|
|
80
|
+
try:
|
|
81
|
+
before = self.conn.total_changes
|
|
82
|
+
self.conn.executemany(
|
|
83
|
+
"""INSERT OR IGNORE INTO messages
|
|
84
|
+
(platform, guild_id, guild_name, channel_id, channel_name,
|
|
85
|
+
msg_id, sender_id, sender_name, content, timestamp, raw_json)
|
|
86
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
87
|
+
rows,
|
|
88
|
+
)
|
|
89
|
+
self.conn.commit()
|
|
90
|
+
return self.conn.total_changes - before
|
|
91
|
+
except sqlite3.Error:
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
def resolve_channel_id(self, channel_str: str) -> str | None:
|
|
95
|
+
"""Resolve a channel string (name or ID) to a database channel_id."""
|
|
96
|
+
channels = self.get_channels()
|
|
97
|
+
|
|
98
|
+
# Try name match first
|
|
99
|
+
for c in channels:
|
|
100
|
+
if c["channel_name"] and channel_str.lower() in c["channel_name"].lower():
|
|
101
|
+
return c["channel_id"]
|
|
102
|
+
|
|
103
|
+
# Try raw ID
|
|
104
|
+
for c in channels:
|
|
105
|
+
if c["channel_id"] == channel_str:
|
|
106
|
+
return c["channel_id"]
|
|
107
|
+
|
|
108
|
+
return channel_str # Return as-is, might be a valid ID
|
|
109
|
+
|
|
110
|
+
def search(
|
|
111
|
+
self,
|
|
112
|
+
keyword: str,
|
|
113
|
+
channel_id: str | None = None,
|
|
114
|
+
limit: int = 50,
|
|
115
|
+
) -> list[dict]:
|
|
116
|
+
"""Search messages by keyword."""
|
|
117
|
+
query = "SELECT * FROM messages WHERE content LIKE ?"
|
|
118
|
+
params: list[Any] = [f"%{keyword}%"]
|
|
119
|
+
if channel_id:
|
|
120
|
+
query += " AND channel_id = ?"
|
|
121
|
+
params.append(channel_id)
|
|
122
|
+
query += " ORDER BY timestamp DESC LIMIT ?"
|
|
123
|
+
params.append(limit)
|
|
124
|
+
rows = self.conn.execute(query, params).fetchall()
|
|
125
|
+
return [dict(r) for r in rows]
|
|
126
|
+
|
|
127
|
+
def get_recent(
|
|
128
|
+
self,
|
|
129
|
+
channel_id: str | None = None,
|
|
130
|
+
hours: int | None = 24,
|
|
131
|
+
limit: int = 500,
|
|
132
|
+
) -> list[dict]:
|
|
133
|
+
"""Get recent messages. If hours is None, return all."""
|
|
134
|
+
if hours is not None:
|
|
135
|
+
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
|
|
136
|
+
query = "SELECT * FROM messages WHERE timestamp >= ?"
|
|
137
|
+
params: list[Any] = [cutoff]
|
|
138
|
+
else:
|
|
139
|
+
query = "SELECT * FROM messages WHERE 1=1"
|
|
140
|
+
params = []
|
|
141
|
+
if channel_id:
|
|
142
|
+
query += " AND channel_id = ?"
|
|
143
|
+
params.append(channel_id)
|
|
144
|
+
query += " ORDER BY timestamp ASC LIMIT ?"
|
|
145
|
+
params.append(limit)
|
|
146
|
+
rows = self.conn.execute(query, params).fetchall()
|
|
147
|
+
return [dict(r) for r in rows]
|
|
148
|
+
|
|
149
|
+
def get_today(
|
|
150
|
+
self,
|
|
151
|
+
channel_id: str | None = None,
|
|
152
|
+
tz_offset_hours: int = 8,
|
|
153
|
+
limit: int = 5000,
|
|
154
|
+
) -> list[dict]:
|
|
155
|
+
"""Get today's messages (in local timezone)."""
|
|
156
|
+
now_utc = datetime.now(timezone.utc)
|
|
157
|
+
local_tz = timezone(timedelta(hours=tz_offset_hours))
|
|
158
|
+
today_local = now_utc.astimezone(local_tz).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
159
|
+
cutoff_utc = today_local.astimezone(timezone.utc).isoformat()
|
|
160
|
+
|
|
161
|
+
query = "SELECT * FROM messages WHERE timestamp >= ?"
|
|
162
|
+
params: list[Any] = [cutoff_utc]
|
|
163
|
+
if channel_id:
|
|
164
|
+
query += " AND channel_id = ?"
|
|
165
|
+
params.append(channel_id)
|
|
166
|
+
query += " ORDER BY channel_name, timestamp ASC LIMIT ?"
|
|
167
|
+
params.append(limit)
|
|
168
|
+
rows = self.conn.execute(query, params).fetchall()
|
|
169
|
+
return [dict(r) for r in rows]
|
|
170
|
+
|
|
171
|
+
def get_channels(self) -> list[dict]:
|
|
172
|
+
"""Get all known channels with message counts."""
|
|
173
|
+
rows = self.conn.execute(
|
|
174
|
+
"""SELECT channel_id, channel_name, guild_id, guild_name,
|
|
175
|
+
COUNT(*) as msg_count,
|
|
176
|
+
MIN(timestamp) as first_msg, MAX(timestamp) as last_msg
|
|
177
|
+
FROM messages
|
|
178
|
+
GROUP BY channel_id
|
|
179
|
+
ORDER BY msg_count DESC"""
|
|
180
|
+
).fetchall()
|
|
181
|
+
return [dict(r) for r in rows]
|
|
182
|
+
|
|
183
|
+
def get_last_msg_id(self, channel_id: str) -> str | None:
|
|
184
|
+
"""Get the latest msg_id for a channel, used for incremental sync."""
|
|
185
|
+
row = self.conn.execute(
|
|
186
|
+
"SELECT MAX(msg_id) FROM messages WHERE channel_id = ?", (channel_id,)
|
|
187
|
+
).fetchone()
|
|
188
|
+
return row[0] if row and row[0] is not None else None
|
|
189
|
+
|
|
190
|
+
def count(self, channel_id: str | None = None) -> int:
|
|
191
|
+
if channel_id:
|
|
192
|
+
row = self.conn.execute(
|
|
193
|
+
"SELECT COUNT(*) FROM messages WHERE channel_id = ?", (channel_id,)
|
|
194
|
+
).fetchone()
|
|
195
|
+
else:
|
|
196
|
+
row = self.conn.execute("SELECT COUNT(*) FROM messages").fetchone()
|
|
197
|
+
return row[0]
|
|
198
|
+
|
|
199
|
+
def delete_channel(self, channel_id: str) -> int:
|
|
200
|
+
"""Delete all messages for a channel. Returns number of deleted rows."""
|
|
201
|
+
cursor = self.conn.execute(
|
|
202
|
+
"DELETE FROM messages WHERE channel_id = ?", (channel_id,)
|
|
203
|
+
)
|
|
204
|
+
self.conn.commit()
|
|
205
|
+
return cursor.rowcount
|
|
206
|
+
|
|
207
|
+
def top_senders(
|
|
208
|
+
self,
|
|
209
|
+
channel_id: str | None = None,
|
|
210
|
+
hours: int | None = None,
|
|
211
|
+
limit: int = 20,
|
|
212
|
+
) -> list[dict]:
|
|
213
|
+
"""Get most active senders."""
|
|
214
|
+
conditions = ["sender_name IS NOT NULL"]
|
|
215
|
+
params: list[Any] = []
|
|
216
|
+
if channel_id:
|
|
217
|
+
conditions.append("channel_id = ?")
|
|
218
|
+
params.append(channel_id)
|
|
219
|
+
if hours:
|
|
220
|
+
cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
|
|
221
|
+
conditions.append("timestamp >= ?")
|
|
222
|
+
params.append(cutoff)
|
|
223
|
+
|
|
224
|
+
where = " AND ".join(conditions)
|
|
225
|
+
rows = self.conn.execute(
|
|
226
|
+
f"""SELECT sender_name, sender_id, COUNT(*) as msg_count,
|
|
227
|
+
MIN(timestamp) as first_msg, MAX(timestamp) as last_msg
|
|
228
|
+
FROM messages WHERE {where}
|
|
229
|
+
GROUP BY sender_name
|
|
230
|
+
ORDER BY msg_count DESC
|
|
231
|
+
LIMIT ?""",
|
|
232
|
+
params + [limit],
|
|
233
|
+
).fetchall()
|
|
234
|
+
return [dict(r) for r in rows]
|
|
235
|
+
|
|
236
|
+
def close(self):
|
|
237
|
+
self.conn.close()
|