hiperf_txt_parser 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  export { parsePerfData, filterByTgid } from "./parser.js";
2
2
  export { formatPerfDataToText, formatPerfDataToJson } from "./serializer.js";
3
3
  export { toBackTraceStack, toBackTraceStacks } from "./backtrace.js";
4
- export { parseTraceFormat, parseCommonFieldsFromRaw, parseAllFieldsFromRaw, rawHexLinesToBuffer, buildTraceParserRegistry, decodeRawByRegistry, decodePerfRawData, } from "./traceFormat.js";
5
- export type { ParsedTraceFormat, TraceFormatField, TraceParserRegistry, DecodedRawSample, } from "./traceFormat.js";
4
+ export { parseTraceFormat, parseCommonFieldsFromRaw, parseAllFieldsFromRaw, rawHexLinesToBuffer, bufferToRawHexLines, buildTraceParserRegistry, decodeRawByRegistry, decodePerfRawData, } from "./traceFormat.js";
5
+ export type { ParsedTraceFormat, TraceFormatField, TraceParserRegistry, DecodedRawSample, DecodePerfRawDataOptions, Endian, } from "./traceFormat.js";
6
6
  export type { PerfData, RecordSample } from "./types.js";
7
7
  export type { RecordSampleJsonExportItem } from "./serializer.js";
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
1
  export { parsePerfData, filterByTgid } from "./parser.js";
2
2
  export { formatPerfDataToText, formatPerfDataToJson } from "./serializer.js";
3
3
  export { toBackTraceStack, toBackTraceStacks } from "./backtrace.js";
4
- export { parseTraceFormat, parseCommonFieldsFromRaw, parseAllFieldsFromRaw, rawHexLinesToBuffer, buildTraceParserRegistry, decodeRawByRegistry, decodePerfRawData, } from "./traceFormat.js";
4
+ export { parseTraceFormat, parseCommonFieldsFromRaw, parseAllFieldsFromRaw, rawHexLinesToBuffer, bufferToRawHexLines, buildTraceParserRegistry, decodeRawByRegistry, decodePerfRawData, } from "./traceFormat.js";
@@ -30,6 +30,11 @@ export interface DecodedRawSample {
30
30
  renderedText?: string;
31
31
  skipped: boolean;
32
32
  }
33
+ export interface DecodePerfRawDataOptions {
34
+ /** 是否在替换后的 raw 内容中保留 common_* 信息 */
35
+ keepCommonFields?: boolean;
36
+ }
37
+ export type Endian = "le" | "be";
33
38
  /**
34
39
  * 解析 sample/trace_format 风格的文本,得到字段列表
35
40
  */
@@ -41,13 +46,23 @@ export declare function parseCommonFieldsFromRaw(raw: Uint8Array, format: Parsed
41
46
  /**
42
47
  * 解析 format 中全部字段(含数组),数组字段名会去掉 [],如 args[6] => args: [...]
43
48
  */
44
- export declare function parseAllFieldsFromRaw(raw: Uint8Array, format: ParsedTraceFormat): Record<string, number | bigint | Array<number | bigint>>;
49
+ export declare function parseAllFieldsFromRaw(raw: Uint8Array, format: ParsedTraceFormat): Record<string, number | bigint | string | Array<number | bigint>>;
45
50
  /**
46
- * 将 perf 文本里 raw 段的 hex 行拼成连续字节(小端:每行一个数值,宽度由 hex 位数决定,4→2 字节,8→4 字节,16→8 字节)
51
+ * 将 perf 文本里 raw 段的 hex 行拼成连续字节(小端)。
52
+ *
53
+ * 规则:每行视为一个整数 token(如 0x12345678),按该 token 的字节宽度转成 LE 字节序。
54
+ * 例如:0x12345678 => [0x78, 0x56, 0x34, 0x12]
47
55
  */
48
56
  export declare function rawHexLinesToBuffer(lines: Array<{
49
57
  hex: string;
50
- }>): Uint8Array;
58
+ }>, endian?: Endian): Uint8Array;
59
+ /**
60
+ * 将字节缓冲按小端编码为 raw hex 行。
61
+ * 默认每行 4 字节(即 8 hex digits)。
62
+ */
63
+ export declare function bufferToRawHexLines(raw: Uint8Array, bytesPerLine?: number, endian?: Endian): Array<{
64
+ hex: string;
65
+ }>;
51
66
  /**
52
67
  * 从多个 trace_format 文本构建解析器集合(按 event ID 索引)
53
68
  */
@@ -67,4 +82,4 @@ export declare function decodeRawByRegistry(raw: Uint8Array, registry: TracePars
67
82
  * - 若找到 common_type 对应解析器:将 print fmt 渲染结果写入 `sample.raw.lines`(一行)。
68
83
  * - 若找不到解析器:放弃解析,并将 `common_type` 写入 `sample.raw.lines`(一行)。
69
84
  */
70
- export declare function decodePerfRawData(perfData: PerfData, registry: TraceParserRegistry): PerfData;
85
+ export declare function decodePerfRawData(perfData: PerfData, registry: TraceParserRegistry, options?: DecodePerfRawDataOptions): PerfData;
@@ -163,9 +163,38 @@ export function parseCommonFieldsFromRaw(raw, format) {
163
163
  export function parseAllFieldsFromRaw(raw, format) {
164
164
  const view = new DataView(raw.buffer, raw.byteOffset, raw.byteLength);
165
165
  const out = {};
166
+ const decoder = new TextDecoder("utf-8", { fatal: false });
166
167
  for (const field of format.fields) {
167
168
  const arr = parseArrayName(field.name);
168
169
  const key = arr ? arr.baseName : field.name;
170
+ const t = normalizeType(field.typeName);
171
+ // __data_loc char[] reason: 实际存一个 u32,低 16 位为 offset,高 16 位为 length
172
+ if (t.includes("__data_loc") && t.includes("char")) {
173
+ const locField = {
174
+ ...field,
175
+ typeName: "unsigned int",
176
+ signed: false,
177
+ size: 4,
178
+ };
179
+ const loc = readFieldScalar(view, locField);
180
+ if (typeof loc === "number") {
181
+ const locKey = `__data_loc_${field.name}`;
182
+ out[locKey] = loc;
183
+ const offset = loc & 0xffff;
184
+ const len = (loc >>> 16) & 0xffff;
185
+ if (offset >= 0 && len > 0 && offset + len <= raw.length) {
186
+ const bytes = raw.slice(offset, offset + len);
187
+ // 去掉末尾 \0
188
+ const nul = bytes.indexOf(0);
189
+ const slice = nul >= 0 ? bytes.slice(0, nul) : bytes;
190
+ out[field.name] = decoder.decode(slice);
191
+ }
192
+ else {
193
+ out[field.name] = "";
194
+ }
195
+ }
196
+ continue;
197
+ }
169
198
  const v = readFieldValue(view, field);
170
199
  if (v !== undefined) {
171
200
  out[key] = v;
@@ -174,25 +203,60 @@ export function parseAllFieldsFromRaw(raw, format) {
174
203
  return out;
175
204
  }
176
205
  /**
177
- * 将 perf 文本里 raw 段的 hex 行拼成连续字节(小端:每行一个数值,宽度由 hex 位数决定,4→2 字节,8→4 字节,16→8 字节)
206
+ * 将 perf 文本里 raw 段的 hex 行拼成连续字节(小端)。
207
+ *
208
+ * 规则:每行视为一个整数 token(如 0x12345678),按该 token 的字节宽度转成 LE 字节序。
209
+ * 例如:0x12345678 => [0x78, 0x56, 0x34, 0x12]
178
210
  */
179
- export function rawHexLinesToBuffer(lines) {
211
+ export function rawHexLinesToBuffer(lines, endian = "le") {
180
212
  const chunks = [];
181
213
  for (const { hex } of lines) {
182
- const s = hex.replace(/^0x/i, "").trim();
214
+ let s = hex.replace(/^0x/i, "").trim();
183
215
  if (!s)
184
216
  continue;
185
- const byteLen = Math.ceil(s.length / 2);
186
- const width = byteLen <= 2 ? 2 : byteLen <= 4 ? 4 : 8;
217
+ if (s.length % 2 === 1)
218
+ s = "0" + s;
219
+ const byteLen = s.length / 2;
187
220
  let value = BigInt("0x" + s);
188
- const mask = (1n << BigInt(width * 8)) - 1n;
189
- value &= mask;
190
- for (let i = 0; i < width; i++) {
191
- chunks.push(Number((value >> BigInt(8 * i)) & 0xffn));
221
+ if (endian === "le") {
222
+ for (let i = 0; i < byteLen; i++) {
223
+ chunks.push(Number((value >> BigInt(8 * i)) & 0xffn));
224
+ }
225
+ }
226
+ else {
227
+ for (let i = byteLen - 1; i >= 0; i--) {
228
+ chunks.push(Number((value >> BigInt(8 * i)) & 0xffn));
229
+ }
192
230
  }
193
231
  }
194
232
  return Uint8Array.from(chunks);
195
233
  }
234
+ /**
235
+ * 将字节缓冲按小端编码为 raw hex 行。
236
+ * 默认每行 4 字节(即 8 hex digits)。
237
+ */
238
+ export function bufferToRawHexLines(raw, bytesPerLine = 4, endian = "le") {
239
+ if (bytesPerLine <= 0)
240
+ return [];
241
+ const out = [];
242
+ for (let i = 0; i < raw.length; i += bytesPerLine) {
243
+ const end = Math.min(i + bytesPerLine, raw.length);
244
+ let value = 0n;
245
+ if (endian === "le") {
246
+ for (let j = 0; j < end - i; j++) {
247
+ value |= BigInt(raw[i + j]) << BigInt(8 * j);
248
+ }
249
+ }
250
+ else {
251
+ for (let j = 0; j < end - i; j++) {
252
+ value = (value << 8n) | BigInt(raw[i + j]);
253
+ }
254
+ }
255
+ const width = (end - i) * 2;
256
+ out.push({ hex: "0x" + value.toString(16).padStart(width, "0") });
257
+ }
258
+ return out;
259
+ }
196
260
  /**
197
261
  * 从多个 trace_format 文本构建解析器集合(按 event ID 索引)
198
262
  */
@@ -211,6 +275,9 @@ export function buildTraceParserRegistry(formatTexts) {
211
275
  return { byEventId, commonFormat };
212
276
  }
213
277
  function formatArgBySpecifier(value, spec) {
278
+ if (spec.toLowerCase() === "s") {
279
+ return typeof value === "string" ? value : String(value);
280
+ }
214
281
  const isBig = typeof value === "bigint";
215
282
  const lower = spec.toLowerCase();
216
283
  if (lower === "x") {
@@ -225,36 +292,51 @@ function formatArgBySpecifier(value, spec) {
225
292
  }
226
293
  function renderPrintFmt(printFmt, printArgs, fieldMap) {
227
294
  const values = [];
228
- for (const expr of printArgs ?? []) {
229
- const em = expr.match(/^REC->(\w+)(?:\[(\d+)])?$/);
230
- if (!em) {
231
- values.push(0);
232
- continue;
295
+ function evalExpr(exprRaw) {
296
+ // 允许表达式带括号与空格
297
+ let expr = exprRaw.trim();
298
+ // 去掉外层括号(可能不止一层,也可能只有一侧被 match 捕获到)
299
+ while (expr.startsWith("("))
300
+ expr = expr.slice(1).trim();
301
+ while (expr.endsWith(")"))
302
+ expr = expr.slice(0, -1).trim();
303
+ expr = expr.replace(/\s+/g, "");
304
+ // (REC->__data_loc_reason&0xffff) / (REC->__data_loc_reason>>16)
305
+ let m = expr.match(/^REC->(\w+)&0xffff$/);
306
+ if (m) {
307
+ const v = fieldMap[m[1]];
308
+ const n = typeof v === "number" ? v : 0;
309
+ return n & 0xffff;
233
310
  }
234
- const name = em[1];
235
- const idxRaw = em[2];
311
+ m = expr.match(/^REC->(\w+)>>16$/);
312
+ if (m) {
313
+ const v = fieldMap[m[1]];
314
+ const n = typeof v === "number" ? v : 0;
315
+ return (n >>> 16) & 0xffff;
316
+ }
317
+ // REC->field 或 REC->field[idx]
318
+ m = expr.match(/^REC->(\w+)(?:\[(\d+)])?$/);
319
+ if (!m)
320
+ return 0;
321
+ const name = m[1];
322
+ const idxRaw = m[2];
236
323
  const v = fieldMap[name];
237
324
  if (idxRaw !== undefined) {
238
325
  const idx = parseInt(idxRaw, 10);
239
326
  if (Array.isArray(v) && idx >= 0 && idx < v.length) {
240
- values.push(v[idx]);
241
- }
242
- else {
243
- values.push(0);
327
+ return v[idx];
244
328
  }
329
+ return 0;
245
330
  }
246
- else if (Array.isArray(v)) {
247
- values.push(v[0] ?? 0);
248
- }
249
- else if (v !== undefined) {
250
- values.push(v);
251
- }
252
- else {
253
- values.push(0);
254
- }
331
+ if (Array.isArray(v))
332
+ return v[0] ?? 0;
333
+ return v ?? 0;
334
+ }
335
+ for (const expr of printArgs ?? []) {
336
+ values.push(evalExpr(expr));
255
337
  }
256
338
  let valueIdx = 0;
257
- return printFmt.replace(/%[0-9]*[lh]*([duxX])/g, (_all, spec) => {
339
+ return printFmt.replace(/%[0-9]*[lh]*([duxXsS])/g, (_all, spec) => {
258
340
  const v = values[valueIdx++] ?? 0;
259
341
  return formatArgBySpecifier(v, spec);
260
342
  });
@@ -305,22 +387,29 @@ export function decodeRawByRegistry(raw, registry) {
305
387
  * - 若找到 common_type 对应解析器:将 print fmt 渲染结果写入 `sample.raw.lines`(一行)。
306
388
  * - 若找不到解析器:放弃解析,并将 `common_type` 写入 `sample.raw.lines`(一行)。
307
389
  */
308
- export function decodePerfRawData(perfData, registry) {
390
+ export function decodePerfRawData(perfData, registry, options = {}) {
309
391
  return {
310
392
  recordSamples: perfData.recordSamples.map((sample) => {
311
393
  if (!sample.raw || sample.raw.lines.length === 0)
312
394
  return sample;
313
395
  const raw = rawHexLinesToBuffer(sample.raw.lines);
314
396
  const decoded = decodeRawByRegistry(raw, registry);
315
- const replacement = decoded.renderedText ??
397
+ const base = decoded.renderedText ??
316
398
  (decoded.commonType !== undefined ? String(decoded.commonType) : "");
317
- if (!replacement)
399
+ if (!base)
318
400
  return sample;
401
+ const commonLine = options.keepCommonFields
402
+ ? Object.entries(decoded.commonFields)
403
+ .map(([k, v]) => `${k}:${typeof v === "bigint" ? v.toString(10) : v}`)
404
+ .join(" ")
405
+ : "";
319
406
  return {
320
407
  ...sample,
321
408
  raw: {
322
409
  ...sample.raw,
323
- lines: [{ hex: replacement }],
410
+ lines: options.keepCommonFields && commonLine.length > 0
411
+ ? [{ hex: base }, { hex: commonLine }]
412
+ : [{ hex: base }],
324
413
  },
325
414
  };
326
415
  }),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hiperf_txt_parser",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "Parse perf data.txt and output structured TypeScript data",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",