lumencode 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,316 +1,360 @@
1
- import { readFileSync, readdirSync, statSync, existsSync } from 'fs';
2
- import { join, basename, dirname } from 'path';
3
- import { BaseParser } from './base.js';
4
- import { createUsageRecord } from '../models/usage-record.js';
5
-
6
- export class CodexParser extends BaseParser {
7
- getInfo() {
8
- return {
9
- name: 'codex',
10
- displayName: 'OpenAI Codex',
11
- defaultDir: '~/.codex',
12
- envVar: 'CODEX_HOME',
13
- };
14
- }
15
-
16
- async detect(config) {
17
- const dir = this.getDataDir(config);
18
- if (!dir) return false;
19
- try {
20
- const sessionsDir = join(dir, 'sessions');
21
- return statSync(sessionsDir).isDirectory();
22
- } catch {
23
- try {
24
- const archivedDir = join(dir, 'archived_sessions');
25
- return statSync(archivedDir).isDirectory();
26
- } catch {
27
- return false;
28
- }
29
- }
30
- }
31
-
32
- async parse(config, options = {}) {
33
- const dir = this.getDataDir(config);
34
- const records = [];
35
- if (!dir) return records;
36
-
37
- const files = this._collectJsonlFiles(dir);
38
- const parsedSessionIds = new Set();
39
-
40
- for (const filePath of files) {
41
- try {
42
- const fileRecords = this._parseFile(filePath);
43
- for (const r of fileRecords) {
44
- if (r.sessionId) parsedSessionIds.add(r.sessionId);
45
- records.push(r);
46
- }
47
- } catch (err) {
48
- console.warn(`Codex 解析文件失败: ${filePath}`, err.message);
49
- }
50
- }
51
-
52
- // state DB 的 threads 表补充缺失的会话(JSONL 被清理/归档的场景)
53
- const fallbackRecords = await this._parseStateDb(dir, parsedSessionIds);
54
- records.push(...fallbackRecords);
55
-
56
- return records;
57
- }
58
-
59
- _collectJsonlFiles(dir) {
60
- const files = [];
61
- const sessionsDir = join(dir, 'sessions');
62
- try {
63
- if (statSync(sessionsDir).isDirectory()) {
64
- files.push(...this._walkDir(sessionsDir));
65
- }
66
- } catch {}
67
-
68
- const archivedDir = join(dir, 'archived_sessions');
69
- try {
70
- if (statSync(archivedDir).isDirectory()) {
71
- const archived = readdirSync(archivedDir).filter(f => f.endsWith('.jsonl'));
72
- for (const f of archived) {
73
- files.push(join(archivedDir, f));
74
- }
75
- }
76
- } catch {}
77
-
78
- return files;
79
- }
80
-
81
- _walkDir(dir) {
82
- const results = [];
83
- try {
84
- const entries = readdirSync(dir);
85
- for (const entry of entries) {
86
- const fullPath = join(dir, entry);
87
- try {
88
- const stat = statSync(fullPath);
89
- if (stat.isDirectory()) {
90
- results.push(...this._walkDir(fullPath));
91
- } else if (entry.endsWith('.jsonl')) {
92
- results.push(fullPath);
93
- }
94
- } catch {}
95
- }
96
- } catch {}
97
- return results;
98
- }
99
-
100
- _looksLikeNonProject(name) {
101
- if (!name || name.length < 2) return true;
102
- // 日期格式:2026-05-20, 20260520
103
- if (/^\d{4}-\d{2}-\d{2}$/.test(name)) return true;
104
- if (/^\d{8}$/.test(name)) return true;
105
- // 纯数字(如 20)
106
- if (/^\d+$/.test(name)) return true;
107
- // Hash:16-64 位十六进制
108
- if (/^[0-9a-f]{16,64}$/i.test(name)) return true;
109
- return false;
110
- }
111
-
112
- _inferProject(filePath) {
113
- let dir = dirname(filePath);
114
- const root = dirname(dirname(filePath));
115
- while (dir !== root && dir !== dirname(dir)) {
116
- const dirName = basename(dir);
117
- if (dirName === 'sessions' || dirName === 'archived_sessions') {
118
- return '';
119
- }
120
- if (!this._looksLikeNonProject(dirName)) {
121
- return dirName;
122
- }
123
- dir = dirname(dir);
124
- }
125
- return '';
126
- }
127
-
128
- _parseFile(filePath) {
129
- const content = readFileSync(filePath, 'utf-8');
130
- const lines = content.split('\n').filter(l => l.trim());
131
-
132
- let sessionId = '';
133
- let currentModel = '';
134
- let lastTokenUsage = null;
135
- let project = '';
136
- const records = [];
137
- const pendingToolCalls = [];
138
- const userTexts = [];
139
-
140
- for (const line of lines) {
141
- try {
142
- const event = JSON.parse(line);
143
-
144
- if (event.type === 'session_meta' && event.payload) {
145
- if (event.payload.id) sessionId = event.payload.id;
146
- if (event.payload.project) {
147
- project = event.payload.project;
148
- } else if (event.payload.cwd) {
149
- project = event.payload.cwd.replace(/\\/g, '/');
150
- }
151
- }
152
-
153
- if (event.type === 'turn_context' && event.payload?.model) {
154
- currentModel = event.payload.model;
155
- }
156
-
157
- // 提取用户消息文本(用于场景分类)
158
- if (event.type === 'response_item' && event.payload?.role === 'user') {
159
- const text = this._extractText(event.payload.content);
160
- if (text && !text.startsWith('<system-reminder') && !text.startsWith('# AGENTS.md')) {
161
- userTexts.push(text);
162
- records.push(createUsageRecord({
163
- timestamp: event.timestamp || new Date().toISOString(),
164
- tool: 'codex',
165
- sessionId: sessionId || basename(filePath, '.jsonl'),
166
- model: '',
167
- inputTokens: 0,
168
- outputTokens: 0,
169
- project: project || this._inferProject(filePath),
170
- metadata: { type: 'user', text },
171
- }));
172
- }
173
- }
174
-
175
- // 收集工具调用
176
- if (event.type === 'response_item' && event.payload?.type === 'function_call') {
177
- pendingToolCalls.push({ name: event.payload.name || 'unknown' });
178
- }
179
-
180
- if (event.type === 'event_msg' && event.payload?.type === 'token_count') {
181
- const info = event.payload.info;
182
- if (!info || !info.total_token_usage) continue;
183
-
184
- const total = info.total_token_usage;
185
- const current = {
186
- input: total.input_tokens || 0,
187
- output: total.output_tokens || 0,
188
- cachedInput: total.cached_input_tokens || 0,
189
- cacheCreation: total.cache_creation_input_tokens || 0,
190
- reasoningOutput: total.reasoning_output_tokens || 0,
191
- };
192
-
193
- let delta = { ...current };
194
- if (lastTokenUsage) {
195
- delta.input = Math.max(0, current.input - lastTokenUsage.input);
196
- delta.output = Math.max(0, current.output - lastTokenUsage.output);
197
- delta.cachedInput = Math.max(0, current.cachedInput - lastTokenUsage.cachedInput);
198
- delta.cacheCreation = Math.max(0, current.cacheCreation - lastTokenUsage.cacheCreation);
199
- }
200
- lastTokenUsage = current;
201
-
202
- if (delta.input > 0 || delta.output > 0) {
203
- records.push(createUsageRecord({
204
- timestamp: event.timestamp || new Date().toISOString(),
205
- tool: 'codex',
206
- sessionId: sessionId || basename(filePath, '.jsonl'),
207
- model: currentModel || 'gpt-5',
208
- inputTokens: delta.input,
209
- outputTokens: delta.output,
210
- cacheReadTokens: delta.cachedInput,
211
- cacheWriteTokens: delta.cacheCreation,
212
- costUSD: null,
213
- project: project || this._inferProject(filePath),
214
- metadata: {
215
- type: 'assistant',
216
- toolCalls: pendingToolCalls.splice(0),
217
- reasoningOutputTokens: delta.reasoningOutput,
218
- isFallback: !currentModel,
219
- },
220
- }));
221
- }
222
- }
223
- } catch {}
224
- }
225
-
226
- return records;
227
- }
228
-
229
- _extractText(content) {
230
- if (!content) return '';
231
- if (typeof content === 'string') return content.trim();
232
- if (Array.isArray(content)) {
233
- return content
234
- .filter(c => c && (c.type === 'input_text' || c.type === 'text'))
235
- .map(c => c.text || '')
236
- .join(' ')
237
- .trim();
238
- }
239
- return '';
240
- }
241
-
242
- // 从 state_*.sqlite 的 threads 表提取 JSONL 已丢失的会话元数据
243
- async _parseStateDb(dir, alreadyParsed) {
244
- const records = [];
245
- try {
246
- // 查找最新版本的 state DB
247
- const entries = readdirSync(dir);
248
- const stateDbs = entries
249
- .filter(f => /^state_\d+\.sqlite$/.test(f))
250
- .sort()
251
- .reverse();
252
- if (stateDbs.length === 0) return records;
253
-
254
- const dbPath = join(dir, stateDbs[0]);
255
- if (!existsSync(dbPath)) return records;
256
-
257
- const initSqlJs = (await import('sql.js')).default;
258
- const SQL = await initSqlJs();
259
- const dbBuf = readFileSync(dbPath);
260
- const db = new SQL.Database(dbBuf);
261
-
262
- const rows = db.exec(
263
- `SELECT id, cwd, tokens_used, title, git_branch, model,
264
- created_at_ms, updated_at_ms, first_user_message, archived
265
- FROM threads`
266
- );
267
-
268
- if (rows[0]) {
269
- for (const [sid, cwd, tokens, title, gitBranch, model, createdMs, updatedMs, firstMsg, archived] of rows[0].values) {
270
- if (!sid || alreadyParsed.has(sid)) continue;
271
- const project = (cwd || '').replace(/\\/g, '/');
272
- const ts = createdMs ? new Date(createdMs).toISOString() : '';
273
- const tsEnd = updatedMs ? new Date(updatedMs).toISOString() : ts;
274
-
275
- // User record
276
- records.push(createUsageRecord({
277
- timestamp: ts,
278
- tool: 'codex',
279
- sessionId: sid,
280
- model: '',
281
- inputTokens: 0,
282
- outputTokens: 0,
283
- project,
284
- metadata: { type: 'user', text: firstMsg || title || '', _fromStateDb: true },
285
- }));
286
- // Assistant record with total tokens
287
- if (tokens > 0) {
288
- records.push(createUsageRecord({
289
- timestamp: tsEnd,
290
- tool: 'codex',
291
- sessionId: sid,
292
- model: model || '',
293
- inputTokens: tokens,
294
- outputTokens: 0,
295
- project,
296
- metadata: { type: 'assistant', _fromStateDb: true, gitBranch },
297
- }));
298
- }
299
- }
300
- }
301
- db.close();
302
- } catch {}
303
- return records;
304
- }
305
-
306
- async getVersion(config) {
307
- const dir = this.getDataDir(config);
308
- if (!dir) return null;
309
- try {
310
- const data = JSON.parse(readFileSync(join(dir, 'version.json'), 'utf8'));
311
- return data.latest_version || null;
312
- } catch {
313
- return null;
314
- }
315
- }
316
- }
1
+ import { readFileSync, readdirSync, statSync, existsSync } from 'fs';
2
+ import { join, basename, dirname } from 'path';
3
+ import { BaseParser } from './base.js';
4
+ import { createUsageRecord } from '../models/usage-record.js';
5
+
6
+ // 文件级解析缓存:基于 mtime
7
+ const _codexFileCache = new Map();
8
+ const CODEX_CACHE_MAX = 200;
9
+
10
+ function getCachedCodexParse(filePath, parseFn) {
11
+ try {
12
+ const { mtimeMs } = statSync(filePath);
13
+ const cached = _codexFileCache.get(filePath);
14
+ if (cached && cached.mtime === mtimeMs) return cached.records;
15
+ const records = parseFn(filePath);
16
+ _codexFileCache.set(filePath, { mtime: mtimeMs, records });
17
+ while (_codexFileCache.size > CODEX_CACHE_MAX) {
18
+ const oldest = _codexFileCache.keys().next().value;
19
+ _codexFileCache.delete(oldest);
20
+ }
21
+ return records;
22
+ } catch {
23
+ return parseFn(filePath);
24
+ }
25
+ }
26
+
27
+ // State DB 解析缓存
28
+ const _codexStateCache = {
29
+ dbPath: '',
30
+ mtimeMs: 0,
31
+ records: null,
32
+ };
33
+
34
+ export class CodexParser extends BaseParser {
35
+ getInfo() {
36
+ return {
37
+ name: 'codex',
38
+ displayName: 'OpenAI Codex',
39
+ defaultDir: '~/.codex',
40
+ envVar: 'CODEX_HOME',
41
+ };
42
+ }
43
+
44
+ async detect(config) {
45
+ const dir = this.getDataDir(config);
46
+ if (!dir) return false;
47
+ try {
48
+ const sessionsDir = join(dir, 'sessions');
49
+ return statSync(sessionsDir).isDirectory();
50
+ } catch {
51
+ try {
52
+ const archivedDir = join(dir, 'archived_sessions');
53
+ return statSync(archivedDir).isDirectory();
54
+ } catch {
55
+ return false;
56
+ }
57
+ }
58
+ }
59
+
60
+ async parse(config, options = {}) {
61
+ const dir = this.getDataDir(config);
62
+ const records = [];
63
+ if (!dir) return records;
64
+
65
+ const files = this._collectJsonlFiles(dir);
66
+ const parsedSessionIds = new Set();
67
+
68
+ // 并行解析所有 JSONL 文件(带缓存)
69
+ const fileResults = await Promise.all(
70
+ files.map(async (filePath) => {
71
+ try {
72
+ return getCachedCodexParse(filePath, (fp) => this._parseFile(fp));
73
+ } catch (err) {
74
+ console.warn(`Codex 解析文件失败: ${filePath}`, err.message);
75
+ return [];
76
+ }
77
+ })
78
+ );
79
+ for (const fr of fileResults) {
80
+ for (const r of fr) {
81
+ if (r.sessionId) parsedSessionIds.add(r.sessionId);
82
+ records.push(r);
83
+ }
84
+ }
85
+
86
+ // state DB 的 threads 表补充缺失的会话(JSONL 被清理/归档的场景)
87
+ const fallbackRecords = await this._parseStateDb(dir, parsedSessionIds);
88
+ records.push(...fallbackRecords);
89
+
90
+ return records;
91
+ }
92
+
93
+ _collectJsonlFiles(dir) {
94
+ const files = [];
95
+ const sessionsDir = join(dir, 'sessions');
96
+ try {
97
+ if (statSync(sessionsDir).isDirectory()) {
98
+ files.push(...this._walkDir(sessionsDir));
99
+ }
100
+ } catch {}
101
+
102
+ const archivedDir = join(dir, 'archived_sessions');
103
+ try {
104
+ if (statSync(archivedDir).isDirectory()) {
105
+ const archived = readdirSync(archivedDir).filter(f => f.endsWith('.jsonl'));
106
+ for (const f of archived) {
107
+ files.push(join(archivedDir, f));
108
+ }
109
+ }
110
+ } catch {}
111
+
112
+ return files;
113
+ }
114
+
115
+ _walkDir(dir) {
116
+ const results = [];
117
+ try {
118
+ const entries = readdirSync(dir);
119
+ for (const entry of entries) {
120
+ const fullPath = join(dir, entry);
121
+ try {
122
+ const stat = statSync(fullPath);
123
+ if (stat.isDirectory()) {
124
+ results.push(...this._walkDir(fullPath));
125
+ } else if (entry.endsWith('.jsonl')) {
126
+ results.push(fullPath);
127
+ }
128
+ } catch {}
129
+ }
130
+ } catch {}
131
+ return results;
132
+ }
133
+
134
+ _looksLikeNonProject(name) {
135
+ if (!name || name.length < 2) return true;
136
+ // 日期格式:2026-05-20, 20260520
137
+ if (/^\d{4}-\d{2}-\d{2}$/.test(name)) return true;
138
+ if (/^\d{8}$/.test(name)) return true;
139
+ // 纯数字(如 20)
140
+ if (/^\d+$/.test(name)) return true;
141
+ // Hash:16-64 位十六进制
142
+ if (/^[0-9a-f]{16,64}$/i.test(name)) return true;
143
+ return false;
144
+ }
145
+
146
+ _inferProject(filePath) {
147
+ let dir = dirname(filePath);
148
+ const root = dirname(dirname(filePath));
149
+ while (dir !== root && dir !== dirname(dir)) {
150
+ const dirName = basename(dir);
151
+ if (dirName === 'sessions' || dirName === 'archived_sessions') {
152
+ return '';
153
+ }
154
+ if (!this._looksLikeNonProject(dirName)) {
155
+ return dirName;
156
+ }
157
+ dir = dirname(dir);
158
+ }
159
+ return '';
160
+ }
161
+
162
+ _parseFile(filePath) {
163
+ const content = readFileSync(filePath, 'utf-8');
164
+ const lines = content.split('\n').filter(l => l.trim());
165
+
166
+ let sessionId = '';
167
+ let currentModel = '';
168
+ let lastTokenUsage = null;
169
+ let project = '';
170
+ const records = [];
171
+ const pendingToolCalls = [];
172
+ const userTexts = [];
173
+
174
+ for (const line of lines) {
175
+ try {
176
+ const event = JSON.parse(line);
177
+
178
+ if (event.type === 'session_meta' && event.payload) {
179
+ if (event.payload.id) sessionId = event.payload.id;
180
+ if (event.payload.project) {
181
+ project = event.payload.project;
182
+ } else if (event.payload.cwd) {
183
+ project = event.payload.cwd.replace(/\\/g, '/');
184
+ }
185
+ }
186
+
187
+ if (event.type === 'turn_context' && event.payload?.model) {
188
+ currentModel = event.payload.model;
189
+ }
190
+
191
+ // 提取用户消息文本(用于场景分类)
192
+ if (event.type === 'response_item' && event.payload?.role === 'user') {
193
+ const text = this._extractText(event.payload.content);
194
+ if (text && !text.startsWith('<system-reminder') && !text.startsWith('# AGENTS.md')) {
195
+ userTexts.push(text);
196
+ records.push(createUsageRecord({
197
+ timestamp: event.timestamp || new Date().toISOString(),
198
+ tool: 'codex',
199
+ sessionId: sessionId || basename(filePath, '.jsonl'),
200
+ model: '',
201
+ inputTokens: 0,
202
+ outputTokens: 0,
203
+ project: project || this._inferProject(filePath),
204
+ metadata: { type: 'user', text },
205
+ }));
206
+ }
207
+ }
208
+
209
+ // 收集工具调用
210
+ if (event.type === 'response_item' && event.payload?.type === 'function_call') {
211
+ pendingToolCalls.push({ name: event.payload.name || 'unknown' });
212
+ }
213
+
214
+ if (event.type === 'event_msg' && event.payload?.type === 'token_count') {
215
+ const info = event.payload.info;
216
+ if (!info || !info.total_token_usage) continue;
217
+
218
+ const total = info.total_token_usage;
219
+ const current = {
220
+ input: total.input_tokens || 0,
221
+ output: total.output_tokens || 0,
222
+ cachedInput: total.cached_input_tokens || 0,
223
+ cacheCreation: total.cache_creation_input_tokens || 0,
224
+ reasoningOutput: total.reasoning_output_tokens || 0,
225
+ };
226
+
227
+ let delta = { ...current };
228
+ if (lastTokenUsage) {
229
+ delta.input = Math.max(0, current.input - lastTokenUsage.input);
230
+ delta.output = Math.max(0, current.output - lastTokenUsage.output);
231
+ delta.cachedInput = Math.max(0, current.cachedInput - lastTokenUsage.cachedInput);
232
+ delta.cacheCreation = Math.max(0, current.cacheCreation - lastTokenUsage.cacheCreation);
233
+ }
234
+ lastTokenUsage = current;
235
+
236
+ if (delta.input > 0 || delta.output > 0) {
237
+ records.push(createUsageRecord({
238
+ timestamp: event.timestamp || new Date().toISOString(),
239
+ tool: 'codex',
240
+ sessionId: sessionId || basename(filePath, '.jsonl'),
241
+ model: currentModel || 'gpt-5',
242
+ inputTokens: delta.input,
243
+ outputTokens: delta.output,
244
+ cacheReadTokens: delta.cachedInput,
245
+ cacheWriteTokens: delta.cacheCreation,
246
+ costUSD: null,
247
+ project: project || this._inferProject(filePath),
248
+ metadata: {
249
+ type: 'assistant',
250
+ toolCalls: pendingToolCalls.splice(0),
251
+ reasoningOutputTokens: delta.reasoningOutput,
252
+ isFallback: !currentModel,
253
+ },
254
+ }));
255
+ }
256
+ }
257
+ } catch {}
258
+ }
259
+
260
+ return records;
261
+ }
262
+
263
+ _extractText(content) {
264
+ if (!content) return '';
265
+ if (typeof content === 'string') return content.trim();
266
+ if (Array.isArray(content)) {
267
+ return content
268
+ .filter(c => c && (c.type === 'input_text' || c.type === 'text'))
269
+ .map(c => c.text || '')
270
+ .join(' ')
271
+ .trim();
272
+ }
273
+ return '';
274
+ }
275
+
276
+ // 从 state_*.sqlite 的 threads 表提取 JSONL 已丢失的会话元数据
277
+ async _parseStateDb(dir, alreadyParsed) {
278
+ const records = [];
279
+ try {
280
+ // 查找最新版本的 state DB
281
+ const entries = readdirSync(dir);
282
+ const stateDbs = entries
283
+ .filter(f => /^state_\d+\.sqlite$/.test(f))
284
+ .sort()
285
+ .reverse();
286
+ if (stateDbs.length === 0) return records;
287
+
288
+ const dbPath = join(dir, stateDbs[0]);
289
+ if (!existsSync(dbPath)) return records;
290
+
291
+ // 检查缓存
292
+ const { mtimeMs } = statSync(dbPath);
293
+ const useCache = _codexStateCache.dbPath === dbPath && _codexStateCache.mtimeMs === mtimeMs && _codexStateCache.rows;
294
+ let rows = useCache ? _codexStateCache.rows : null;
295
+
296
+ if (!rows) {
297
+ const initSqlJs = (await import('sql.js')).default;
298
+ const SQL = await initSqlJs();
299
+ const dbBuf = readFileSync(dbPath);
300
+ const db = new SQL.Database(dbBuf);
301
+
302
+ const result = db.exec(
303
+ `SELECT id, cwd, tokens_used, title, git_branch, model,
304
+ created_at_ms, updated_at_ms, first_user_message, archived
305
+ FROM threads`
306
+ );
307
+
308
+ rows = result[0]?.values || [];
309
+ _codexStateCache.dbPath = dbPath;
310
+ _codexStateCache.mtimeMs = mtimeMs;
311
+ _codexStateCache.rows = rows;
312
+ db.close();
313
+ }
314
+
315
+ for (const [sid, cwd, tokens, title, gitBranch, model, createdMs, updatedMs, firstMsg, archived] of rows) {
316
+ if (!sid || alreadyParsed.has(sid)) continue;
317
+ const project = (cwd || '').replace(/\\/g, '/');
318
+ const ts = createdMs ? new Date(createdMs).toISOString() : '';
319
+ const tsEnd = updatedMs ? new Date(updatedMs).toISOString() : ts;
320
+
321
+ // User record
322
+ records.push(createUsageRecord({
323
+ timestamp: ts,
324
+ tool: 'codex',
325
+ sessionId: sid,
326
+ model: '',
327
+ inputTokens: 0,
328
+ outputTokens: 0,
329
+ project,
330
+ metadata: { type: 'user', text: firstMsg || title || '', _fromStateDb: true },
331
+ }));
332
+ // Assistant record with total tokens
333
+ if (tokens > 0) {
334
+ records.push(createUsageRecord({
335
+ timestamp: tsEnd,
336
+ tool: 'codex',
337
+ sessionId: sid,
338
+ model: model || '',
339
+ inputTokens: tokens,
340
+ outputTokens: 0,
341
+ project,
342
+ metadata: { type: 'assistant', _fromStateDb: true, gitBranch },
343
+ }));
344
+ }
345
+ }
346
+ } catch {}
347
+ return records;
348
+ }
349
+
350
+ async getVersion(config) {
351
+ const dir = this.getDataDir(config);
352
+ if (!dir) return null;
353
+ try {
354
+ const data = JSON.parse(readFileSync(join(dir, 'version.json'), 'utf8'));
355
+ return data.latest_version || null;
356
+ } catch {
357
+ return null;
358
+ }
359
+ }
360
+ }