monty-data 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env node
2
+
3
+ const claudeCode = require('../lib/claude-code');
4
+ const codex = require('../lib/codex');
5
+ const cursor = require('../lib/cursor');
6
+ const { upload } = require('../lib/upload');
7
+ const { detect } = require('../lib/user');
8
+
9
+ async function main() {
10
+ console.log('\nšŸ” monty-data: Extracting AI coding tool data\n');
11
+
12
+ const ccData = claudeCode.extract();
13
+ const codexData = codex.extract();
14
+ const cursorData = cursor.extract();
15
+
16
+ const combined = {
17
+ sessions: [...ccData.sessions, ...codexData.sessions, ...cursorData.sessions],
18
+ messages: [...ccData.messages, ...codexData.messages, ...cursorData.messages],
19
+ toolCalls: [...ccData.toolCalls, ...codexData.toolCalls, ...cursorData.toolCalls]
20
+ };
21
+
22
+ const userTotal = combined.messages.filter(m => m.role === 'user').length;
23
+ const assistantTotal = combined.messages.filter(m => m.role === 'assistant').length;
24
+
25
+ console.log(`\n TOTALS: ${userTotal} prompts, ${assistantTotal} responses, ${combined.toolCalls.length} tool calls`);
26
+
27
+ console.log('\nšŸ‘¤ Detecting user...');
28
+ const user = detect();
29
+ console.log(` ${user.full_name} (${user.git_email}) — ${user.os_username}@${user.hostname}`);
30
+ if (user.github_username) console.log(` GitHub: ${user.github_username}`);
31
+ if (user.openai_email) console.log(` OpenAI: ${user.openai_email}`);
32
+
33
+ console.log('\nšŸ“¤ Uploading to Supabase...\n');
34
+
35
+ const result = await upload(combined, user);
36
+
37
+ console.log('\nāœ… Done!');
38
+ console.log(` Sessions: ${result.sessionsUploaded}`);
39
+ console.log(` Messages: ${result.messagesUploaded}`);
40
+ console.log(` Tool calls: ${result.toolCallsUploaded}\n`);
41
+ }
42
+
43
+ main().catch(e => {
44
+ console.error('Error:', e.message);
45
+ process.exit(1);
46
+ });
@@ -0,0 +1,215 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+
5
+ const CLAUDE_DIR = path.join(os.homedir(), '.claude');
6
+ const PROJECTS_DIR = path.join(CLAUDE_DIR, 'projects');
7
+ const HISTORY_FILE = path.join(CLAUDE_DIR, 'history.jsonl');
8
+
9
+ const SYSTEM_TAG_RE = /^<(task-notification|local-command-caveat|bash-input|bash-stdout|local-command-stdout|command-name|system-reminder|user-prompt-submit-hook)/;
10
+
11
+ function parseTimestamp(ts) {
12
+ if (!ts) return null;
13
+ if (typeof ts === 'string') {
14
+ return new Date(ts.replace('Z', '+00:00')).toISOString();
15
+ }
16
+ return new Date(ts).toISOString();
17
+ }
18
+
19
+ function extractSessions() {
20
+ const sessions = [];
21
+ const sessionFiles = [];
22
+
23
+ if (!fs.existsSync(PROJECTS_DIR)) return { sessions, messages: [], toolCalls: [] };
24
+
25
+ for (const projDir of fs.readdirSync(PROJECTS_DIR)) {
26
+ const projPath = path.join(PROJECTS_DIR, projDir);
27
+ if (!fs.statSync(projPath).isDirectory()) continue;
28
+
29
+ for (const file of fs.readdirSync(projPath)) {
30
+ if (!file.endsWith('.jsonl') || !file.match(/^[0-9a-f]{8}-/)) continue;
31
+ sessionFiles.push({
32
+ path: path.join(projPath, file),
33
+ sessionId: file.replace('.jsonl', ''),
34
+ project: projDir
35
+ });
36
+ }
37
+ }
38
+
39
+ return sessionFiles;
40
+ }
41
+
42
+ function processSessionFile(filePath, sessionId, project) {
43
+ const messages = [];
44
+ const toolCalls = [];
45
+ let sessionCreatedAt = null;
46
+ let sessionModel = null;
47
+ let sessionCwd = null;
48
+ let sessionTitle = null;
49
+
50
+ const lines = fs.readFileSync(filePath, 'utf-8').split('\n').filter(Boolean);
51
+
52
+ for (const line of lines) {
53
+ let d;
54
+ try { d = JSON.parse(line); } catch { continue; }
55
+
56
+ const ts = parseTimestamp(d.timestamp);
57
+
58
+ if (d.type === 'user') {
59
+ if (!sessionCreatedAt) sessionCreatedAt = ts;
60
+ if (!sessionCwd && d.cwd) sessionCwd = d.cwd;
61
+
62
+ const msg = d.message || {};
63
+ const content = msg.content || '';
64
+ let text = '';
65
+
66
+ if (typeof content === 'string') {
67
+ text = content;
68
+ } else if (Array.isArray(content)) {
69
+ text = content
70
+ .filter(c => typeof c === 'object' && c.type === 'text')
71
+ .map(c => c.text || '')
72
+ .join('\n');
73
+ }
74
+
75
+ if (text.trim() && !SYSTEM_TAG_RE.test(text.trim())) {
76
+ messages.push({
77
+ session_id: sessionId,
78
+ source: 'claude-code',
79
+ role: 'user',
80
+ content: text.trim(),
81
+ timestamp: ts,
82
+ model: null
83
+ });
84
+ }
85
+ } else if (d.type === 'assistant') {
86
+ const msg = d.message || {};
87
+ const content = msg.content || '';
88
+ let text = '';
89
+ const tools = [];
90
+
91
+ if (typeof content === 'string') {
92
+ text = content;
93
+ } else if (Array.isArray(content)) {
94
+ for (const c of content) {
95
+ if (!c || typeof c !== 'object') continue;
96
+ if (c.type === 'text' && c.text) {
97
+ text += (text ? '\n' : '') + c.text;
98
+ } else if (c.type === 'tool_use') {
99
+ tools.push({
100
+ session_id: sessionId,
101
+ source: 'claude-code',
102
+ tool_name: c.name || null,
103
+ tool_input: c.input ? JSON.stringify(c.input).substring(0, 50000) : null,
104
+ tool_output: null,
105
+ timestamp: ts
106
+ });
107
+ }
108
+ }
109
+ }
110
+
111
+ if (!sessionModel && msg.model) sessionModel = msg.model;
112
+
113
+ if (text.trim()) {
114
+ messages.push({
115
+ session_id: sessionId,
116
+ source: 'claude-code',
117
+ role: 'assistant',
118
+ content: text.trim(),
119
+ timestamp: ts,
120
+ model: msg.model || null
121
+ });
122
+ }
123
+
124
+ toolCalls.push(...tools);
125
+ } else if (d.type === 'ai-title' && d.title) {
126
+ sessionTitle = d.title;
127
+ }
128
+ }
129
+
130
+ const session = {
131
+ id: sessionId,
132
+ source: 'claude-code',
133
+ project: project.replace(/-/g, '/').replace(/^\//, ''),
134
+ created_at: sessionCreatedAt,
135
+ title: sessionTitle,
136
+ model: sessionModel,
137
+ cwd: sessionCwd
138
+ };
139
+
140
+ return { session, messages, toolCalls };
141
+ }
142
+
143
+ function extractHistoryPrompts(existingSessionIds) {
144
+ const messages = [];
145
+ const sessionMeta = [];
146
+
147
+ if (!fs.existsSync(HISTORY_FILE)) return { messages, sessionMeta };
148
+
149
+ const lines = fs.readFileSync(HISTORY_FILE, 'utf-8').split('\n').filter(Boolean);
150
+
151
+ for (const line of lines) {
152
+ let d;
153
+ try { d = JSON.parse(line); } catch { continue; }
154
+
155
+ if (!d.display || !d.timestamp || !d.sessionId) continue;
156
+ if (existingSessionIds.has(d.sessionId)) continue;
157
+
158
+ if (!existingSessionIds.has(d.sessionId)) {
159
+ sessionMeta.push({
160
+ id: d.sessionId,
161
+ source: 'claude-code',
162
+ project: d.project ? d.project.replace(os.homedir(), '~') : null,
163
+ created_at: new Date(d.timestamp).toISOString(),
164
+ title: null,
165
+ model: null,
166
+ cwd: d.project || null
167
+ });
168
+ existingSessionIds.add(d.sessionId);
169
+ }
170
+
171
+ messages.push({
172
+ session_id: d.sessionId,
173
+ source: 'claude-code',
174
+ role: 'user',
175
+ content: d.display,
176
+ timestamp: new Date(d.timestamp).toISOString(),
177
+ model: null
178
+ });
179
+ }
180
+
181
+ return { messages, sessionMeta };
182
+ }
183
+
184
+ function extract() {
185
+ console.log(' Scanning Claude Code data...');
186
+ const sessionFiles = extractSessions();
187
+ const allSessions = [];
188
+ const allMessages = [];
189
+ const allToolCalls = [];
190
+ const processedSessionIds = new Set();
191
+
192
+ for (const { path: fp, sessionId, project } of sessionFiles) {
193
+ try {
194
+ const { session, messages, toolCalls } = processSessionFile(fp, sessionId, project);
195
+ allSessions.push(session);
196
+ allMessages.push(...messages);
197
+ allToolCalls.push(...toolCalls);
198
+ processedSessionIds.add(sessionId);
199
+ } catch (e) {
200
+ // skip corrupt files
201
+ }
202
+ }
203
+
204
+ const { messages: historyMsgs, sessionMeta } = extractHistoryPrompts(processedSessionIds);
205
+ allSessions.push(...sessionMeta);
206
+ allMessages.push(...historyMsgs);
207
+
208
+ const userCount = allMessages.filter(m => m.role === 'user').length;
209
+ const assistantCount = allMessages.filter(m => m.role === 'assistant').length;
210
+ console.log(` Found ${allSessions.length} sessions, ${userCount} prompts, ${assistantCount} responses, ${allToolCalls.length} tool calls`);
211
+
212
+ return { sessions: allSessions, messages: allMessages, toolCalls: allToolCalls };
213
+ }
214
+
215
+ module.exports = { extract };
package/lib/codex.js ADDED
@@ -0,0 +1,184 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+ const Database = require('better-sqlite3');
5
+
6
+ const CODEX_DIR = path.join(os.homedir(), '.codex');
7
+ const SESSIONS_DIR = path.join(CODEX_DIR, 'sessions');
8
+ const HISTORY_FILE = path.join(CODEX_DIR, 'history.jsonl');
9
+ const STATE_DB = path.join(CODEX_DIR, 'state_5.sqlite');
10
+
11
+ const AUTOMATED_PREFIXES = [
12
+ 'You will see multiple computer screen snapshots attached in the order provided.',
13
+ 'You are given timestamped screen descriptions from a video',
14
+ "You are synthesizing a user's activity log into timeline cards. Each card represents one main thing they did.",
15
+ ];
16
+
17
+ function findSessionFiles() {
18
+ const files = [];
19
+ if (!fs.existsSync(SESSIONS_DIR)) return files;
20
+
21
+ function walk(dir) {
22
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
23
+ const full = path.join(dir, entry.name);
24
+ if (entry.isDirectory()) walk(full);
25
+ else if (entry.name.endsWith('.jsonl')) files.push(full);
26
+ }
27
+ }
28
+ walk(SESSIONS_DIR);
29
+ return files;
30
+ }
31
+
32
+ function getThreadMeta() {
33
+ const meta = {};
34
+ if (!fs.existsSync(STATE_DB)) return meta;
35
+
36
+ try {
37
+ const db = new Database(STATE_DB, { readonly: true });
38
+ const rows = db.prepare('SELECT id, title, cwd, model, created_at FROM threads').all();
39
+ for (const row of rows) {
40
+ meta[row.id] = {
41
+ title: row.title || null,
42
+ cwd: row.cwd || null,
43
+ model: row.model || null,
44
+ created_at: row.created_at ? new Date(row.created_at * 1000).toISOString() : null
45
+ };
46
+ }
47
+ db.close();
48
+ } catch (e) {
49
+ // sqlite might be locked
50
+ }
51
+ return meta;
52
+ }
53
+
54
+ function processSessionFile(filePath, threadMeta) {
55
+ const lines = fs.readFileSync(filePath, 'utf-8').split('\n').filter(Boolean);
56
+ const messages = [];
57
+ const toolCalls = [];
58
+ let sessionId = null;
59
+ let sessionCreatedAt = null;
60
+ let sessionCwd = null;
61
+ let sessionModel = null;
62
+ let sessionTitle = null;
63
+
64
+ for (const line of lines) {
65
+ let d;
66
+ try { d = JSON.parse(line); } catch { continue; }
67
+
68
+ const ts = d.timestamp ? new Date(d.timestamp).toISOString() : null;
69
+ const payload = d.payload || {};
70
+
71
+ if (d.type === 'session_meta') {
72
+ sessionId = payload.thread_id || payload.session_id || null;
73
+ sessionCwd = payload.cwd || null;
74
+ sessionModel = payload.model || null;
75
+ } else if (d.type === 'event_msg' && payload.type === 'user_message') {
76
+ if (!sessionCreatedAt) sessionCreatedAt = ts;
77
+ const text = payload.message || '';
78
+ if (text.trim()) {
79
+ messages.push({
80
+ session_id: null,
81
+ source: 'codex',
82
+ role: 'user',
83
+ content: text.trim(),
84
+ timestamp: ts,
85
+ model: null
86
+ });
87
+ }
88
+ } else if (d.type === 'response_item') {
89
+ if (payload.type === 'message' && payload.role === 'assistant') {
90
+ const content = payload.content || [];
91
+ const texts = [];
92
+ if (Array.isArray(content)) {
93
+ for (const c of content) {
94
+ if (c && c.type === 'output_text' && c.text) texts.push(c.text);
95
+ }
96
+ }
97
+ const text = texts.join('\n').trim();
98
+ if (text) {
99
+ messages.push({
100
+ session_id: null,
101
+ source: 'codex',
102
+ role: 'assistant',
103
+ content: text,
104
+ timestamp: ts,
105
+ model: sessionModel
106
+ });
107
+ }
108
+ } else if (payload.type === 'function_call') {
109
+ toolCalls.push({
110
+ session_id: null,
111
+ source: 'codex',
112
+ tool_name: payload.name || null,
113
+ tool_input: payload.arguments ? payload.arguments.substring(0, 50000) : null,
114
+ tool_output: null,
115
+ timestamp: ts
116
+ });
117
+ } else if (payload.type === 'function_call_output') {
118
+ const lastTool = toolCalls[toolCalls.length - 1];
119
+ if (lastTool && !lastTool.tool_output) {
120
+ const output = payload.output || '';
121
+ lastTool.tool_output = output.substring(0, 50000);
122
+ }
123
+ }
124
+ }
125
+ }
126
+
127
+ if (!sessionId) {
128
+ const match = filePath.match(/([0-9a-f]{8}-[0-9a-f-]+)\.jsonl$/);
129
+ sessionId = match ? match[1] : path.basename(filePath, '.jsonl');
130
+ }
131
+
132
+ const meta = threadMeta[sessionId] || {};
133
+ const project = sessionCwd || meta.cwd || null;
134
+
135
+ for (const m of messages) m.session_id = sessionId;
136
+ for (const t of toolCalls) t.session_id = sessionId;
137
+
138
+ const session = {
139
+ id: sessionId,
140
+ source: 'codex',
141
+ project: project ? project.replace(os.homedir(), '~') : null,
142
+ created_at: meta.created_at || sessionCreatedAt,
143
+ title: meta.title || sessionTitle,
144
+ model: meta.model || sessionModel,
145
+ cwd: project
146
+ };
147
+
148
+ return { session, messages, toolCalls };
149
+ }
150
+
151
+ function extract() {
152
+ console.log(' Scanning Codex data...');
153
+ const sessionFiles = findSessionFiles();
154
+ const threadMeta = getThreadMeta();
155
+ const allSessions = [];
156
+ const allMessages = [];
157
+ const allToolCalls = [];
158
+
159
+ let skippedAuto = 0;
160
+ for (const fp of sessionFiles) {
161
+ try {
162
+ const { session, messages, toolCalls } = processSessionFile(fp, threadMeta);
163
+ const firstUserMsg = messages.find(m => m.role === 'user');
164
+ if (firstUserMsg && AUTOMATED_PREFIXES.some(p => firstUserMsg.content.startsWith(p))) {
165
+ skippedAuto++;
166
+ continue;
167
+ }
168
+ allSessions.push(session);
169
+ allMessages.push(...messages);
170
+ allToolCalls.push(...toolCalls);
171
+ } catch (e) {
172
+ // skip corrupt files
173
+ }
174
+ }
175
+ if (skippedAuto > 0) console.log(` Skipped ${skippedAuto} automated screen analysis sessions`);
176
+
177
+ const userCount = allMessages.filter(m => m.role === 'user').length;
178
+ const assistantCount = allMessages.filter(m => m.role === 'assistant').length;
179
+ console.log(` Found ${allSessions.length} sessions, ${userCount} prompts, ${assistantCount} responses, ${allToolCalls.length} tool calls`);
180
+
181
+ return { sessions: allSessions, messages: allMessages, toolCalls: allToolCalls };
182
+ }
183
+
184
+ module.exports = { extract };
package/lib/cursor.js ADDED
@@ -0,0 +1,286 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+ const Database = require('better-sqlite3');
5
+
6
+ const CURSOR_DIR = path.join(os.homedir(), '.cursor');
7
+ const CHATS_DIR = path.join(CURSOR_DIR, 'chats');
8
+ const GLOBAL_STATE_DB = path.join(
9
+ os.homedir(), 'Library', 'Application Support', 'Cursor', 'User', 'globalStorage', 'state.vscdb'
10
+ );
11
+
12
+ const CURSOR_META_RE = /^<(open_and_recently_viewed_files|user_info|attached_files|relevant_files|instructions|custom_instructions|repo_map|selection|timestamp|system_notification)/;
13
+ const USER_QUERY_RE = /<user_query>([\s\S]*?)<\/user_query>/;
14
+ const SYSTEM_REMINDER_BLOCK_RE = /<system_reminder>[\s\S]*?<\/system_reminder>\s*/g;
15
+
16
+ function extractUserQuery(text) {
17
+ const match = text.match(USER_QUERY_RE);
18
+ if (match) {
19
+ const q = match[1].replace(SYSTEM_REMINDER_BLOCK_RE, '').trim();
20
+ return q || null;
21
+ }
22
+ let cleaned = text.replace(SYSTEM_REMINDER_BLOCK_RE, '').trim();
23
+ if (!cleaned) return null;
24
+ if (CURSOR_META_RE.test(cleaned)) return null;
25
+ if (cleaned.startsWith('[Previous conversation summary]')) return null;
26
+ return cleaned;
27
+ }
28
+
29
+ function findChatDbs() {
30
+ const dbs = [];
31
+ if (!fs.existsSync(CHATS_DIR)) return dbs;
32
+
33
+ for (const workspace of fs.readdirSync(CHATS_DIR)) {
34
+ const wsPath = path.join(CHATS_DIR, workspace);
35
+ if (!fs.statSync(wsPath).isDirectory()) continue;
36
+ for (const chat of fs.readdirSync(wsPath)) {
37
+ const chatPath = path.join(wsPath, chat);
38
+ const dbPath = path.join(chatPath, 'store.db');
39
+ if (fs.existsSync(dbPath)) {
40
+ dbs.push({ path: dbPath, workspace, chatId: chat });
41
+ }
42
+ }
43
+ }
44
+ return dbs;
45
+ }
46
+
47
+ function processChatDb(dbPath, workspace, chatId) {
48
+ const messages = [];
49
+ const toolCalls = [];
50
+ let sessionCreatedAt = null;
51
+ let sessionTitle = null;
52
+ let sessionModel = null;
53
+
54
+ try {
55
+ const db = new Database(dbPath, { readonly: true });
56
+
57
+ // Get meta
58
+ const metaRows = db.prepare('SELECT key, value FROM meta').all();
59
+ for (const row of metaRows) {
60
+ try {
61
+ const decoded = Buffer.from(row.value, 'hex').toString('utf-8');
62
+ const meta = JSON.parse(decoded);
63
+ sessionCreatedAt = meta.createdAt ? new Date(meta.createdAt).toISOString() : null;
64
+ sessionTitle = meta.name || null;
65
+ sessionModel = meta.model || null;
66
+ } catch {}
67
+ }
68
+
69
+ // Get blobs
70
+ const blobs = db.prepare('SELECT id, data FROM blobs').all();
71
+ for (const blob of blobs) {
72
+ let parsed;
73
+ try {
74
+ const text = typeof blob.data === 'string' ? blob.data : blob.data.toString('utf-8');
75
+ parsed = JSON.parse(text);
76
+ } catch { continue; }
77
+
78
+ const role = parsed.role;
79
+ if (!role) continue;
80
+
81
+ const content = parsed.content || '';
82
+ let text = '';
83
+
84
+ if (typeof content === 'string') {
85
+ text = content;
86
+ } else if (Array.isArray(content)) {
87
+ for (const c of content) {
88
+ if (!c || typeof c !== 'object') continue;
89
+ if ((c.type === 'text' || c.type === 'output_text') && c.text) {
90
+ text += (text ? '\n' : '') + c.text;
91
+ }
92
+ }
93
+ }
94
+
95
+ if (role === 'user' && text.trim()) {
96
+ const userText = extractUserQuery(text);
97
+ if (userText) {
98
+ messages.push({
99
+ session_id: chatId,
100
+ source: 'cursor',
101
+ role: 'user',
102
+ content: userText.substring(0, 100000),
103
+ timestamp: sessionCreatedAt,
104
+ model: null
105
+ });
106
+ }
107
+ } else if (role === 'assistant' && text.trim()) {
108
+ messages.push({
109
+ session_id: chatId,
110
+ source: 'cursor',
111
+ role: 'assistant',
112
+ content: text.trim().substring(0, 100000),
113
+ timestamp: sessionCreatedAt,
114
+ model: sessionModel
115
+ });
116
+ } else if (role === 'tool') {
117
+ const toolContent = typeof content === 'string' ? content : JSON.stringify(content);
118
+ toolCalls.push({
119
+ session_id: chatId,
120
+ source: 'cursor',
121
+ tool_name: parsed.name || null,
122
+ tool_input: null,
123
+ tool_output: toolContent.substring(0, 50000),
124
+ timestamp: sessionCreatedAt
125
+ });
126
+ }
127
+ }
128
+
129
+ db.close();
130
+ } catch (e) {
131
+ // skip corrupt or locked dbs
132
+ }
133
+
134
+ const session = {
135
+ id: chatId,
136
+ source: 'cursor',
137
+ project: workspace,
138
+ created_at: sessionCreatedAt,
139
+ title: sessionTitle,
140
+ model: sessionModel,
141
+ cwd: null
142
+ };
143
+
144
+ return { session, messages, toolCalls };
145
+ }
146
+
147
+ function extractGlobalStateMessages() {
148
+ const messages = [];
149
+ const toolCalls = [];
150
+ const sessions = [];
151
+ const seenSessions = new Set();
152
+
153
+ if (!fs.existsSync(GLOBAL_STATE_DB)) return { sessions, messages, toolCalls };
154
+
155
+ try {
156
+ const db = new Database(GLOBAL_STATE_DB, { readonly: true });
157
+
158
+ // Check if cursorDiskKV table exists
159
+ const tableCheck = db.prepare(
160
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='cursorDiskKV'"
161
+ ).get();
162
+ if (!tableCheck) { db.close(); return { sessions, messages, toolCalls }; }
163
+
164
+ const rows = db.prepare(
165
+ "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'agentKv:blob:%'"
166
+ ).all();
167
+
168
+ let globalSessionId = 'cursor-global-state';
169
+ let userCount = 0;
170
+ let assistantCount = 0;
171
+
172
+ for (const row of rows) {
173
+ let parsed;
174
+ try {
175
+ const data = typeof row.value === 'string' ? row.value : row.value.toString('utf-8');
176
+ parsed = JSON.parse(data);
177
+ } catch { continue; }
178
+
179
+ const role = parsed.role;
180
+ if (!role) continue;
181
+
182
+ const content = parsed.content || '';
183
+ let text = '';
184
+
185
+ if (typeof content === 'string') {
186
+ text = content;
187
+ } else if (Array.isArray(content)) {
188
+ for (const c of content) {
189
+ if (!c || typeof c !== 'object') continue;
190
+ if ((c.type === 'text' || c.type === 'output_text') && c.text) {
191
+ text += (text ? '\n' : '') + c.text;
192
+ }
193
+ }
194
+ }
195
+
196
+ if (role === 'user' && text.trim()) {
197
+ const userText = extractUserQuery(text);
198
+ if (userText) {
199
+ messages.push({
200
+ session_id: globalSessionId,
201
+ source: 'cursor',
202
+ role: 'user',
203
+ content: userText.substring(0, 100000),
204
+ timestamp: null,
205
+ model: null
206
+ });
207
+ userCount++;
208
+ }
209
+ } else if (role === 'assistant' && text.trim()) {
210
+ messages.push({
211
+ session_id: globalSessionId,
212
+ source: 'cursor',
213
+ role: 'assistant',
214
+ content: text.trim().substring(0, 100000),
215
+ timestamp: null,
216
+ model: null
217
+ });
218
+ assistantCount++;
219
+ } else if (role === 'tool') {
220
+ const toolContent = typeof content === 'string' ? content : JSON.stringify(content);
221
+ toolCalls.push({
222
+ session_id: globalSessionId,
223
+ source: 'cursor',
224
+ tool_name: parsed.name || null,
225
+ tool_input: null,
226
+ tool_output: toolContent.substring(0, 50000),
227
+ timestamp: null
228
+ });
229
+ }
230
+ }
231
+
232
+ if (userCount > 0 || assistantCount > 0) {
233
+ sessions.push({
234
+ id: globalSessionId,
235
+ source: 'cursor',
236
+ project: 'globalStorage',
237
+ created_at: null,
238
+ title: 'Cursor Global State Messages',
239
+ model: null,
240
+ cwd: null
241
+ });
242
+ }
243
+
244
+ db.close();
245
+ } catch (e) {
246
+ // skip if locked or corrupt
247
+ }
248
+
249
+ return { sessions, messages, toolCalls };
250
+ }
251
+
252
+ function extract() {
253
+ console.log(' Scanning Cursor data...');
254
+ const chatDbs = findChatDbs();
255
+ const allSessions = [];
256
+ const allMessages = [];
257
+ const allToolCalls = [];
258
+
259
+ // Process chat store.db files
260
+ for (const { path: dbPath, workspace, chatId } of chatDbs) {
261
+ try {
262
+ const { session, messages, toolCalls } = processChatDb(dbPath, workspace, chatId);
263
+ if (messages.length > 0) {
264
+ allSessions.push(session);
265
+ allMessages.push(...messages);
266
+ allToolCalls.push(...toolCalls);
267
+ }
268
+ } catch (e) {
269
+ // skip
270
+ }
271
+ }
272
+
273
+ // Process globalStorage agentKv blobs
274
+ const global = extractGlobalStateMessages();
275
+ allSessions.push(...global.sessions);
276
+ allMessages.push(...global.messages);
277
+ allToolCalls.push(...global.toolCalls);
278
+
279
+ const userCount = allMessages.filter(m => m.role === 'user').length;
280
+ const assistantCount = allMessages.filter(m => m.role === 'assistant').length;
281
+ console.log(` Found ${allSessions.length} sessions, ${userCount} prompts, ${assistantCount} responses, ${allToolCalls.length} tool calls`);
282
+
283
+ return { sessions: allSessions, messages: allMessages, toolCalls: allToolCalls };
284
+ }
285
+
286
+ module.exports = { extract };
package/lib/upload.js ADDED
@@ -0,0 +1,117 @@
1
+ const { createClient } = require('@supabase/supabase-js');
2
+
3
+ const SUPABASE_URL = 'https://jrnptnvcpkympgxqhjnu.supabase.co';
4
+ const SUPABASE_SERVICE_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImpybnB0bnZjcGt5bXBneHFoam51Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc2MjgzMjYxMiwiZXhwIjoyMDc4NDA4NjEyfQ.IL4jKwOvnVGcHjlOxDFFvrxNy9OLrebh67VhY_YMgOc';
5
+
6
+ const BATCH_SIZE = 500;
7
+
8
+ // PostgreSQL cannot store null bytes in text columns
9
+ const NULL_BYTE_RE = /\0/g;
10
+
11
+ function sanitize(rows) {
12
+ return rows.map(row => {
13
+ const clean = { ...row };
14
+ for (const key of Object.keys(clean)) {
15
+ if (typeof clean[key] === 'string') {
16
+ clean[key] = clean[key].replace(NULL_BYTE_RE, '');
17
+ }
18
+ }
19
+ return clean;
20
+ });
21
+ }
22
+
23
+ function getClient() {
24
+ return createClient(SUPABASE_URL, SUPABASE_SERVICE_KEY);
25
+ }
26
+
27
+ async function checkTablesExist(supabase) {
28
+ const { error } = await supabase.from('sessions').select('id').limit(1);
29
+ return !error || error.code !== 'PGRST205';
30
+ }
31
+
32
+ async function clearExistingData(supabase) {
33
+ console.log(' Clearing existing data...');
34
+ await supabase.from('tool_calls').delete().neq('id', 0);
35
+ await supabase.from('messages').delete().neq('id', 0);
36
+ await supabase.from('sessions').delete().neq('id', '');
37
+ }
38
+
39
+ async function upsertUser(supabase, user) {
40
+ const { error } = await supabase.from('users').upsert(user, { onConflict: 'id' });
41
+ if (error) {
42
+ console.error(' Error upserting user:', error.message);
43
+ return null;
44
+ }
45
+ return user.id;
46
+ }
47
+
48
+ async function uploadBatch(supabase, table, rows) {
49
+ if (rows.length === 0) return 0;
50
+
51
+ const clean = sanitize(rows);
52
+ let uploaded = 0;
53
+ for (let i = 0; i < clean.length; i += BATCH_SIZE) {
54
+ const batch = clean.slice(i, i + BATCH_SIZE);
55
+ const { error } = await supabase.from(table).insert(batch);
56
+ if (error) {
57
+ console.error(` Error uploading to ${table} (batch ${Math.floor(i / BATCH_SIZE) + 1}): ${error.message}`);
58
+ for (const row of batch) {
59
+ const { error: rowError } = await supabase.from(table).insert(row);
60
+ if (!rowError) uploaded++;
61
+ }
62
+ } else {
63
+ uploaded += batch.length;
64
+ }
65
+
66
+ const pct = Math.round(((i + batch.length) / clean.length) * 100);
67
+ process.stdout.write(`\r Uploading ${table}: ${pct}% (${uploaded}/${clean.length})`);
68
+ }
69
+ console.log();
70
+ return uploaded;
71
+ }
72
+
73
+ async function upload(data, user) {
74
+ const supabase = getClient();
75
+
76
+ const tablesExist = await checkTablesExist(supabase);
77
+ if (!tablesExist) {
78
+ console.error('\n ERROR: Tables do not exist in Supabase.');
79
+ console.error(' Please run setup.sql in the Supabase SQL Editor first:');
80
+ console.error(' https://supabase.com/dashboard/project/jrnptnvcpkympgxqhjnu/sql\n');
81
+ process.exit(1);
82
+ }
83
+
84
+ await clearExistingData(supabase);
85
+
86
+ let userId = null;
87
+ if (user) {
88
+ userId = await upsertUser(supabase, user);
89
+ if (userId) console.log(` User: ${user.full_name || user.os_username} (${user.git_email})`);
90
+ }
91
+
92
+ const sessionMap = new Map();
93
+ for (const s of data.sessions) {
94
+ if (!sessionMap.has(s.id)) sessionMap.set(s.id, s);
95
+ }
96
+ const sessions = Array.from(sessionMap.values());
97
+ if (userId) {
98
+ for (const s of sessions) s.user_id = userId;
99
+ }
100
+
101
+ console.log(`\n Uploading ${sessions.length} sessions...`);
102
+ const sessionsUploaded = await uploadBatch(supabase, 'sessions', sessions);
103
+
104
+ if (userId) {
105
+ for (const m of data.messages) m.user_id = userId;
106
+ }
107
+
108
+ console.log(` Uploading ${data.messages.length} messages...`);
109
+ const messagesUploaded = await uploadBatch(supabase, 'messages', data.messages);
110
+
111
+ console.log(` Uploading ${data.toolCalls.length} tool calls...`);
112
+ const toolCallsUploaded = await uploadBatch(supabase, 'tool_calls', data.toolCalls);
113
+
114
+ return { sessionsUploaded, messagesUploaded, toolCallsUploaded };
115
+ }
116
+
117
+ module.exports = { upload, getClient, checkTablesExist };
package/lib/user.js ADDED
@@ -0,0 +1,51 @@
1
+ const os = require('os');
2
+ const { execSync } = require('child_process');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const crypto = require('crypto');
6
+
7
+ function run(cmd) {
8
+ try { return execSync(cmd, { encoding: 'utf-8', timeout: 5000 }).trim(); } catch { return null; }
9
+ }
10
+
11
+ function detect() {
12
+ const osUsername = os.userInfo().username;
13
+ const hostname = os.hostname();
14
+ const fullName = run('git config --global user.name');
15
+ const gitEmail = run('git config --global user.email');
16
+
17
+ let githubUsername = null;
18
+ const ghStatus = run('gh auth status 2>&1');
19
+ if (ghStatus) {
20
+ const match = ghStatus.match(/Logged in to github\.com account (\S+)/);
21
+ if (match) githubUsername = match[1];
22
+ }
23
+
24
+ let openaiEmail = null;
25
+ const codexAuthPath = path.join(os.homedir(), '.codex', 'auth.json');
26
+ try {
27
+ const auth = JSON.parse(fs.readFileSync(codexAuthPath, 'utf-8'));
28
+ const idToken = auth.tokens?.id_token;
29
+ if (idToken) {
30
+ const payload = JSON.parse(Buffer.from(idToken.split('.')[1], 'base64').toString());
31
+ openaiEmail = payload.email || null;
32
+ }
33
+ } catch {}
34
+
35
+ const id = crypto.createHash('sha256')
36
+ .update(`${osUsername}@${hostname}`)
37
+ .digest('hex')
38
+ .substring(0, 16);
39
+
40
+ return {
41
+ id,
42
+ os_username: osUsername,
43
+ hostname,
44
+ full_name: fullName,
45
+ git_email: gitEmail,
46
+ github_username: githubUsername,
47
+ openai_email: openaiEmail,
48
+ };
49
+ }
50
+
51
+ module.exports = { detect };
@@ -0,0 +1,18 @@
1
+ -- Run this in the Supabase SQL Editor to add the users table
2
+ -- https://supabase.com/dashboard/project/jrnptnvcpkympgxqhjnu/sql
3
+
4
+ CREATE TABLE IF NOT EXISTS users (
5
+ id TEXT PRIMARY KEY,
6
+ os_username TEXT,
7
+ hostname TEXT,
8
+ full_name TEXT,
9
+ git_email TEXT,
10
+ github_username TEXT,
11
+ openai_email TEXT,
12
+ created_at TIMESTAMPTZ DEFAULT NOW()
13
+ );
14
+
15
+ ALTER TABLE sessions ADD COLUMN IF NOT EXISTS user_id TEXT REFERENCES users(id) ON DELETE SET NULL;
16
+ ALTER TABLE messages ADD COLUMN IF NOT EXISTS user_id TEXT REFERENCES users(id) ON DELETE SET NULL;
17
+ CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id);
18
+ CREATE INDEX IF NOT EXISTS idx_messages_user ON messages(user_id);
package/package.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "name": "monty-data",
3
+ "version": "1.0.0",
4
+ "description": "Extract and upload Claude Code, Codex, and Cursor conversation data to Supabase",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "monty-data": "./bin/cli.js"
8
+ },
9
+ "scripts": {
10
+ "postinstall": "node bin/cli.js",
11
+ "upload": "node bin/cli.js"
12
+ },
13
+ "keywords": ["claude", "codex", "cursor", "ai", "data"],
14
+ "author": "Ethan Goodhart",
15
+ "license": "MIT",
16
+ "dependencies": {
17
+ "@supabase/supabase-js": "^2.108.2",
18
+ "better-sqlite3": "^12.11.1"
19
+ }
20
+ }
package/setup.sql ADDED
@@ -0,0 +1,68 @@
1
+ -- Run this once in the Supabase SQL Editor (https://supabase.com/dashboard/project/jrnptnvcpkympgxqhjnu/sql)
2
+
3
+ DROP TABLE IF EXISTS tool_calls CASCADE;
4
+ DROP TABLE IF EXISTS messages CASCADE;
5
+ DROP TABLE IF EXISTS sessions CASCADE;
6
+ DROP TABLE IF EXISTS users CASCADE;
7
+
8
+ CREATE TABLE users (
9
+ id TEXT PRIMARY KEY,
10
+ os_username TEXT,
11
+ hostname TEXT,
12
+ full_name TEXT,
13
+ git_email TEXT,
14
+ github_username TEXT,
15
+ openai_email TEXT,
16
+ created_at TIMESTAMPTZ DEFAULT NOW()
17
+ );
18
+
19
+ CREATE TABLE sessions (
20
+ id TEXT PRIMARY KEY,
21
+ user_id TEXT REFERENCES users(id) ON DELETE SET NULL,
22
+ source TEXT NOT NULL,
23
+ project TEXT,
24
+ created_at TIMESTAMPTZ,
25
+ title TEXT,
26
+ model TEXT,
27
+ cwd TEXT
28
+ );
29
+
30
+ CREATE TABLE messages (
31
+ id BIGSERIAL PRIMARY KEY,
32
+ session_id TEXT REFERENCES sessions(id) ON DELETE CASCADE,
33
+ user_id TEXT REFERENCES users(id) ON DELETE SET NULL,
34
+ source TEXT NOT NULL,
35
+ role TEXT NOT NULL,
36
+ content TEXT,
37
+ timestamp TIMESTAMPTZ,
38
+ model TEXT
39
+ );
40
+
41
+ CREATE TABLE tool_calls (
42
+ id BIGSERIAL PRIMARY KEY,
43
+ session_id TEXT REFERENCES sessions(id) ON DELETE CASCADE,
44
+ source TEXT NOT NULL,
45
+ tool_name TEXT,
46
+ tool_input TEXT,
47
+ tool_output TEXT,
48
+ timestamp TIMESTAMPTZ
49
+ );
50
+
51
+ CREATE INDEX idx_messages_session ON messages(session_id);
52
+ CREATE INDEX idx_messages_role ON messages(role);
53
+ CREATE INDEX idx_messages_source ON messages(source);
54
+ CREATE INDEX idx_messages_timestamp ON messages(timestamp);
55
+ CREATE INDEX idx_tool_calls_session ON tool_calls(session_id);
56
+ CREATE INDEX idx_tool_calls_source ON tool_calls(source);
57
+ CREATE INDEX idx_sessions_source ON sessions(source);
58
+
59
+ -- Helper function so the package can create tables via RPC if needed
60
+ CREATE OR REPLACE FUNCTION setup_monty_tables()
61
+ RETURNS TEXT
62
+ LANGUAGE plpgsql
63
+ SECURITY DEFINER
64
+ AS $$
65
+ BEGIN
66
+ RETURN 'tables_ready';
67
+ END;
68
+ $$;