kakaotalk-chat-analyzer 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/aggregator.d.ts +41 -0
- package/dist/src/aggregator.js +628 -0
- package/dist/src/aggregator.js.map +1 -0
- package/dist/src/analysis-labels.d.ts +3 -0
- package/dist/src/analysis-labels.js +37 -0
- package/dist/src/analysis-labels.js.map +1 -0
- package/dist/src/analysis.d.ts +7 -9
- package/dist/src/analysis.js +34 -669
- package/dist/src/analysis.js.map +1 -1
- package/dist/src/analyze-pool.d.ts +9 -0
- package/dist/src/analyze-pool.js +47 -0
- package/dist/src/analyze-pool.js.map +1 -0
- package/dist/src/analyze-worker.d.ts +1 -0
- package/dist/src/analyze-worker.js +14 -0
- package/dist/src/analyze-worker.js.map +1 -0
- package/dist/src/cli.js +33 -11
- package/dist/src/cli.js.map +1 -1
- package/dist/src/encoding.d.ts +6 -0
- package/dist/src/encoding.js +23 -8
- package/dist/src/encoding.js.map +1 -1
- package/dist/src/gap-stats.d.ts +18 -0
- package/dist/src/gap-stats.js +75 -0
- package/dist/src/gap-stats.js.map +1 -0
- package/dist/src/kakao-line.d.ts +4 -0
- package/dist/src/kakao-line.js +134 -0
- package/dist/src/kakao-line.js.map +1 -0
- package/dist/src/keyword-counter.d.ts +11 -0
- package/dist/src/keyword-counter.js +42 -0
- package/dist/src/keyword-counter.js.map +1 -0
- package/dist/src/parser.js +2 -134
- package/dist/src/parser.js.map +1 -1
- package/dist/src/report.js +0 -9
- package/dist/src/report.js.map +1 -1
- package/dist/src/stream-parser.d.ts +20 -0
- package/dist/src/stream-parser.js +109 -0
- package/dist/src/stream-parser.js.map +1 -0
- package/dist/src/version.d.ts +2 -2
- package/dist/src/version.js +1 -1
- package/package.json +2 -1
package/dist/src/analysis.js
CHANGED
|
@@ -1,684 +1,49 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
"파일",
|
|
7
|
-
"이모티콘",
|
|
8
|
-
"지도",
|
|
9
|
-
"연락처",
|
|
10
|
-
"투표",
|
|
11
|
-
"공유",
|
|
12
|
-
"음성메시지",
|
|
13
|
-
"삭제된 메시지",
|
|
14
|
-
];
|
|
15
|
-
/** 카톡 CSV 시스템 문구 — 키워드(본문 단어) 집계에서 제외, 첨부 유형으로만 집계 */
|
|
16
|
-
const KEYWORD_EXCLUDE = new Set(ATTACHMENT_MARKERS);
|
|
17
|
-
const WEEKDAY_LABELS_KO = ["일", "월", "화", "수", "목", "금", "토"];
|
|
18
|
-
const URL_RE = /\bhttps?:\/\/[^\s<>"']+|www\.[^\s<>"']+/gi;
|
|
19
|
-
const EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
|
|
20
|
-
const PHONE_RE = /\b(?:\+?\d[\d\s().-]{7,}\d)\b/g;
|
|
21
|
-
const TOKEN_RE = /[가-힣A-Za-z][가-힣A-Za-z0-9_+-]{1,}/g;
|
|
22
|
-
const STOPWORDS = new Set([
|
|
23
|
-
"그리고",
|
|
24
|
-
"그냥",
|
|
25
|
-
"근데",
|
|
26
|
-
"그래서",
|
|
27
|
-
"저는",
|
|
28
|
-
"제가",
|
|
29
|
-
"우리",
|
|
30
|
-
"오늘",
|
|
31
|
-
"내일",
|
|
32
|
-
"어제",
|
|
33
|
-
"이거",
|
|
34
|
-
"저거",
|
|
35
|
-
"그거",
|
|
36
|
-
"수정",
|
|
37
|
-
"확인",
|
|
38
|
-
"가능",
|
|
39
|
-
"입니다",
|
|
40
|
-
"합니다",
|
|
41
|
-
"있습니다",
|
|
42
|
-
"없는",
|
|
43
|
-
"있는",
|
|
44
|
-
"the",
|
|
45
|
-
"and",
|
|
46
|
-
"for",
|
|
47
|
-
"with",
|
|
48
|
-
"this",
|
|
49
|
-
"that",
|
|
50
|
-
"from",
|
|
51
|
-
"http",
|
|
52
|
-
"https",
|
|
53
|
-
]);
|
|
54
|
-
const NIGHT_HOURS = new Set([23, 0, 1, 2, 3, 4, 5]);
|
|
55
|
-
const MAX_GAP_MS = 7 * 24 * 60 * 60 * 1000;
|
|
1
|
+
import { ReportAggregator } from "./aggregator.js";
|
|
2
|
+
export { maskPartialDisplayName, parseChatRoomNameFromExportPath, safeInputName } from "./analysis-labels.js";
|
|
3
|
+
import { runAnalyzeWorker, shouldUseAnalyzeWorker } from "./analyze-pool.js";
|
|
4
|
+
import { streamKakaoExport } from "./stream-parser.js";
|
|
5
|
+
const DEFAULT_TOP = 30;
|
|
56
6
|
export function buildReportData(result, options) {
|
|
57
|
-
const top = options?.top ?? 30;
|
|
58
7
|
const privacy = options?.privacy ?? "public-masked";
|
|
59
|
-
const
|
|
60
|
-
const
|
|
61
|
-
const daily = new Map();
|
|
62
|
-
const monthly = new Map();
|
|
63
|
-
const hourly = Array.from({ length: 24 }, () => 0);
|
|
64
|
-
const weekdays = Array.from({ length: 7 }, () => 0);
|
|
65
|
-
const attachments = new Map();
|
|
66
|
-
const domains = new Map();
|
|
67
|
-
const keywords = new Map();
|
|
68
|
-
const senderNames = new Set(result.records.map((record) => normalizeToken(record.sender)));
|
|
69
|
-
let totalCharacters = 0;
|
|
70
|
-
let messagesWithLinks = 0;
|
|
71
|
-
let messagesWithAttachments = 0;
|
|
72
|
-
let nightMessages = 0;
|
|
73
|
-
let emojiMessages = 0;
|
|
74
|
-
let weekendMessages = 0;
|
|
75
|
-
let questionMessages = 0;
|
|
76
|
-
let speakerSwitches = 0;
|
|
77
|
-
let monologueMessages = 0;
|
|
78
|
-
const gapsMs = [];
|
|
79
|
-
let prevMs = null;
|
|
80
|
-
let prevSender = null;
|
|
81
|
-
let runSender = null;
|
|
82
|
-
let runLen = 0;
|
|
8
|
+
const top = options?.top ?? DEFAULT_TOP;
|
|
9
|
+
const agg = new ReportAggregator(result.filePath, privacy, top);
|
|
83
10
|
for (const record of result.records) {
|
|
84
|
-
|
|
85
|
-
speakerSwitches += 1;
|
|
86
|
-
}
|
|
87
|
-
const alias = aliases.get(record.sender) ?? "???";
|
|
88
|
-
const stat = getParticipantStat(senderStats, alias);
|
|
89
|
-
const messageLength = record.message.length;
|
|
90
|
-
const foundAttachments = getAttachmentMarkers(record.message);
|
|
91
|
-
const foundDomains = getDomains(record.message);
|
|
92
|
-
const ms = partsToUtcMs(record.date);
|
|
93
|
-
if (/\p{Extended_Pictographic}/u.test(record.message)) {
|
|
94
|
-
emojiMessages += 1;
|
|
95
|
-
}
|
|
96
|
-
if (/\?|?/.test(record.message)) {
|
|
97
|
-
questionMessages += 1;
|
|
98
|
-
}
|
|
99
|
-
const wi = weekdayIndex(record.date);
|
|
100
|
-
if (wi === 0 || wi === 6) {
|
|
101
|
-
weekendMessages += 1;
|
|
102
|
-
}
|
|
103
|
-
if (NIGHT_HOURS.has(record.date.hour)) {
|
|
104
|
-
nightMessages += 1;
|
|
105
|
-
stat.nightMessages += 1;
|
|
106
|
-
}
|
|
107
|
-
if (prevMs !== null) {
|
|
108
|
-
const delta = ms - prevMs;
|
|
109
|
-
if (delta > 0 && delta <= MAX_GAP_MS)
|
|
110
|
-
gapsMs.push(delta);
|
|
111
|
-
}
|
|
112
|
-
prevMs = ms;
|
|
113
|
-
if (record.sender === prevSender) {
|
|
114
|
-
runLen += 1;
|
|
115
|
-
if (runLen >= 3) {
|
|
116
|
-
monologueMessages += 1;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
if (prevSender !== null && runSender !== null) {
|
|
121
|
-
const prevAlias = aliases.get(prevSender) ?? "???";
|
|
122
|
-
const prevStat = getParticipantStat(senderStats, prevAlias);
|
|
123
|
-
prevStat.maxConsecutive = Math.max(prevStat.maxConsecutive, runLen);
|
|
124
|
-
}
|
|
125
|
-
runSender = record.sender;
|
|
126
|
-
runLen = 1;
|
|
127
|
-
}
|
|
128
|
-
prevSender = record.sender;
|
|
129
|
-
stat.messages += 1;
|
|
130
|
-
stat.characters += messageLength;
|
|
131
|
-
totalCharacters += messageLength;
|
|
132
|
-
if (foundAttachments.length > 0) {
|
|
133
|
-
stat.attachmentMessages += 1;
|
|
134
|
-
messagesWithAttachments += 1;
|
|
135
|
-
for (const marker of foundAttachments)
|
|
136
|
-
increment(attachments, marker);
|
|
137
|
-
}
|
|
138
|
-
if (foundDomains.length > 0) {
|
|
139
|
-
stat.linkMessages += 1;
|
|
140
|
-
messagesWithLinks += 1;
|
|
141
|
-
for (const domain of foundDomains)
|
|
142
|
-
increment(domains, domain);
|
|
143
|
-
}
|
|
144
|
-
for (const keyword of extractKeywords(record.message, senderNames)) {
|
|
145
|
-
increment(keywords, keyword);
|
|
146
|
-
}
|
|
147
|
-
const dayKey = formatDate(record.date);
|
|
148
|
-
increment(daily, dayKey);
|
|
149
|
-
increment(monthly, `${record.date.year}-${pad2(record.date.month)}`);
|
|
150
|
-
hourly[record.date.hour] = (hourly[record.date.hour] ?? 0) + 1;
|
|
151
|
-
weekdays[wi] = (weekdays[wi] ?? 0) + 1;
|
|
152
|
-
}
|
|
153
|
-
if (prevSender !== null && runSender !== null) {
|
|
154
|
-
const prevAlias = aliases.get(prevSender) ?? "???";
|
|
155
|
-
const prevStat = getParticipantStat(senderStats, prevAlias);
|
|
156
|
-
prevStat.maxConsecutive = Math.max(prevStat.maxConsecutive, runLen);
|
|
157
|
-
}
|
|
158
|
-
const total = result.records.length;
|
|
159
|
-
const participantStats = [...senderStats.values()]
|
|
160
|
-
.map((stat) => {
|
|
161
|
-
const sharePercent = total > 0 ? round((stat.messages / total) * 100, 1) : 0;
|
|
162
|
-
return {
|
|
163
|
-
alias: stat.alias,
|
|
164
|
-
messages: stat.messages,
|
|
165
|
-
characters: stat.characters,
|
|
166
|
-
averageLength: round(stat.characters / Math.max(stat.messages, 1), 1),
|
|
167
|
-
attachmentMessages: stat.attachmentMessages,
|
|
168
|
-
linkMessages: stat.linkMessages,
|
|
169
|
-
sharePercent,
|
|
170
|
-
nightMessages: stat.nightMessages,
|
|
171
|
-
maxConsecutive: stat.maxConsecutive,
|
|
172
|
-
};
|
|
173
|
-
})
|
|
174
|
-
.sort((a, b) => b.messages - a.messages)
|
|
175
|
-
.slice(0, top);
|
|
176
|
-
const sortedDays = [...daily.keys()].sort();
|
|
177
|
-
const longestStreak = longestDateStreak(sortedDays);
|
|
178
|
-
let peakHour = null;
|
|
179
|
-
let peakCount = -1;
|
|
180
|
-
for (let h = 0; h < 24; h += 1) {
|
|
181
|
-
const c = hourly[h] ?? 0;
|
|
182
|
-
if (c > peakCount) {
|
|
183
|
-
peakCount = c;
|
|
184
|
-
peakHour = h;
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
if (peakCount <= 0)
|
|
188
|
-
peakHour = null;
|
|
189
|
-
let busiestIdx = -1;
|
|
190
|
-
let busiestCount = -1;
|
|
191
|
-
for (let i = 0; i < 7; i += 1) {
|
|
192
|
-
const c = weekdays[i] ?? 0;
|
|
193
|
-
if (c > busiestCount) {
|
|
194
|
-
busiestCount = c;
|
|
195
|
-
busiestIdx = i;
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
const busiestWeekdayLabel = busiestIdx >= 0 && busiestCount > 0 ? `${WEEKDAY_LABELS_KO[busiestIdx] ?? ""}요일` : null;
|
|
199
|
-
const medianReplyGapMinutes = gapsMs.length > 0 ? round(medianSorted([...gapsMs].sort((a, b) => a - b)) / 60_000, 1) : null;
|
|
200
|
-
const nightSharePercent = total > 0 ? round((nightMessages / total) * 100, 1) : 0;
|
|
201
|
-
const activeDays = daily.size;
|
|
202
|
-
const messagesPerActiveDay = activeDays > 0 ? round(total / activeDays, 1) : 0;
|
|
203
|
-
const allMessageCounts = [...senderStats.values()].map((s) => s.messages).sort((a, b) => a - b);
|
|
204
|
-
const participantGini = computeGini(allMessageCounts);
|
|
205
|
-
const gapsSorted = gapsMs.length > 0 ? [...gapsMs].sort((a, b) => a - b) : [];
|
|
206
|
-
const replyGapP90Minutes = gapsSorted.length > 0 ? round(quantileSorted(gapsSorted, 0.9) / 60_000, 1) : null;
|
|
207
|
-
const maxSilenceBetweenActiveDays = maxSilenceGapDays(sortedDays);
|
|
208
|
-
const top3ParticipantSharePercent = computeTop3Share(senderStats, total);
|
|
209
|
-
const linkDomainEntropyBits = domainEntropyBits(domains);
|
|
210
|
-
const densityMessagesPerCalendarDay = computeDensityPerCalendarDay(result.records, total);
|
|
211
|
-
const weekendSharePercent = total > 0 ? round((weekendMessages / total) * 100, 1) : 0;
|
|
212
|
-
const questionLikeMessagesPer100 = total > 0 ? round((questionMessages / total) * 100, 2) : 0;
|
|
213
|
-
const speakerSwitchRatePer100 = total > 0 ? round((speakerSwitches / total) * 100, 2) : 0;
|
|
214
|
-
const daypartPercents = computeDaypartPercents(hourly, total);
|
|
215
|
-
const rhythmScore = computeRhythmScore({
|
|
216
|
-
gini: participantGini,
|
|
217
|
-
longestStreak,
|
|
218
|
-
density: densityMessagesPerCalendarDay,
|
|
219
|
-
});
|
|
220
|
-
const linksPer100 = total > 0 ? round((messagesWithLinks / total) * 100, 2) : 0;
|
|
221
|
-
const attachmentsPer100 = total > 0 ? round((messagesWithAttachments / total) * 100, 2) : 0;
|
|
222
|
-
const perParticipantMsgs = [...senderStats.values()].map((s) => s.messages);
|
|
223
|
-
const medianMessagesPerParticipant = perParticipantMsgs.length > 0
|
|
224
|
-
? round(medianSorted([...perParticipantMsgs].sort((a, b) => a - b)), 2)
|
|
225
|
-
: null;
|
|
226
|
-
let burstUnder1m = 0;
|
|
227
|
-
let gapOver60m = 0;
|
|
228
|
-
for (const g of gapsMs) {
|
|
229
|
-
if (g < 60_000)
|
|
230
|
-
burstUnder1m += 1;
|
|
231
|
-
if (g > 3_600_000)
|
|
232
|
-
gapOver60m += 1;
|
|
11
|
+
agg.consume(record);
|
|
233
12
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
activeHoursCount += 1;
|
|
240
|
-
}
|
|
241
|
-
let keywordTokenSum = 0;
|
|
242
|
-
let keywordTopCount = 0;
|
|
243
|
-
for (const c of keywords.values()) {
|
|
244
|
-
keywordTokenSum += c;
|
|
245
|
-
keywordTopCount = Math.max(keywordTopCount, c);
|
|
246
|
-
}
|
|
247
|
-
const keywordTop1SharePercent = keywordTokenSum > 0 ? round((keywordTopCount / keywordTokenSum) * 100, 1) : null;
|
|
248
|
-
let attachmentMarkerSum = 0;
|
|
249
|
-
for (const c of attachments.values())
|
|
250
|
-
attachmentMarkerSum += c;
|
|
251
|
-
const photoMarkerCount = attachments.get("사진") ?? 0;
|
|
252
|
-
const photoShareOfAllAttachmentMarkers = attachmentMarkerSum > 0 ? round((photoMarkerCount / attachmentMarkerSum) * 100, 1) : null;
|
|
253
|
-
let maxDayMessages = 0;
|
|
254
|
-
for (const c of daily.values())
|
|
255
|
-
maxDayMessages = Math.max(maxDayMessages, c);
|
|
256
|
-
const peakDaySharePercent = total > 0 ? round((maxDayMessages / total) * 100, 1) : 0;
|
|
257
|
-
const uniqueDomainCount = domains.size;
|
|
258
|
-
const replyGapCoeffVariation = gapCoeffVariation(gapsMs);
|
|
259
|
-
const monologueMessagesPercent = total > 0 ? round((monologueMessages / total) * 100, 1) : 0;
|
|
260
|
-
const insights = {
|
|
261
|
-
weekendSharePercent,
|
|
262
|
-
participantGini,
|
|
263
|
-
replyGapP90Minutes,
|
|
264
|
-
maxSilenceBetweenActiveDays,
|
|
265
|
-
top3ParticipantSharePercent,
|
|
266
|
-
linkDomainEntropyBits,
|
|
267
|
-
densityMessagesPerCalendarDay,
|
|
268
|
-
questionLikeMessagesPer100,
|
|
269
|
-
speakerSwitchRatePer100,
|
|
270
|
-
rhythmScore,
|
|
271
|
-
daypartPercents,
|
|
272
|
-
linksPer100,
|
|
273
|
-
attachmentsPer100,
|
|
274
|
-
medianMessagesPerParticipant,
|
|
275
|
-
burstGapUnder1mPercent,
|
|
276
|
-
gapOver60mPercent,
|
|
277
|
-
activeHoursCount,
|
|
278
|
-
keywordTop1SharePercent,
|
|
279
|
-
photoShareOfAllAttachmentMarkers,
|
|
280
|
-
monologueMessagesPercent,
|
|
281
|
-
peakDaySharePercent,
|
|
282
|
-
uniqueDomainCount,
|
|
283
|
-
replyGapCoeffVariation,
|
|
284
|
-
};
|
|
285
|
-
const highlights = buildHighlights({
|
|
286
|
-
total,
|
|
287
|
-
topAlias: participantStats[0]?.alias ?? null,
|
|
288
|
-
topShare: participantStats[0]?.sharePercent ?? null,
|
|
289
|
-
busiestWeekdayLabel,
|
|
290
|
-
peakHour,
|
|
291
|
-
medianReplyGapMinutes,
|
|
292
|
-
nightSharePercent,
|
|
293
|
-
longestStreak,
|
|
294
|
-
emojiMessages,
|
|
295
|
-
messagesWithAttachments,
|
|
296
|
-
weekendSharePercent,
|
|
297
|
-
participantGini,
|
|
298
|
-
replyGapP90Minutes,
|
|
299
|
-
maxSilenceBetweenActiveDays,
|
|
300
|
-
rhythmScore,
|
|
301
|
-
burstGapUnder1mPercent,
|
|
302
|
-
monologueMessagesPercent,
|
|
13
|
+
return agg.finalize({
|
|
14
|
+
filePath: result.filePath,
|
|
15
|
+
encoding: result.encoding,
|
|
16
|
+
physicalLines: result.physicalLines,
|
|
17
|
+
warningCount: result.warnings.length,
|
|
303
18
|
});
|
|
304
|
-
return {
|
|
305
|
-
generatedAt: new Date().toISOString(),
|
|
306
|
-
privacy,
|
|
307
|
-
source: {
|
|
308
|
-
fileName: safeInputName(result.filePath),
|
|
309
|
-
chatRoomName: parseChatRoomNameFromExportPath(result.filePath),
|
|
310
|
-
encoding: result.encoding,
|
|
311
|
-
physicalLines: result.physicalLines,
|
|
312
|
-
warnings: result.warnings.length,
|
|
313
|
-
},
|
|
314
|
-
summary: {
|
|
315
|
-
totalMessages: total,
|
|
316
|
-
participants: aliases.size,
|
|
317
|
-
activeDays,
|
|
318
|
-
firstMessage: result.records[0] ? formatDateTime(result.records[0].date) : null,
|
|
319
|
-
lastMessage: result.records.at(-1) ? formatDateTime(result.records.at(-1).date) : null,
|
|
320
|
-
averageMessageLength: round(totalCharacters / Math.max(total, 1), 1),
|
|
321
|
-
messagesWithLinks,
|
|
322
|
-
messagesWithAttachments,
|
|
323
|
-
messagesPerActiveDay,
|
|
324
|
-
longestActiveStreakDays: longestStreak,
|
|
325
|
-
peakHour,
|
|
326
|
-
busiestWeekdayLabel,
|
|
327
|
-
medianReplyGapMinutes,
|
|
328
|
-
nightSharePercent,
|
|
329
|
-
emojiMessages,
|
|
330
|
-
},
|
|
331
|
-
insights,
|
|
332
|
-
participants: participantStats,
|
|
333
|
-
daily: [...daily.entries()].map(([date, count]) => ({ date, count })).sort((a, b) => a.date.localeCompare(b.date)),
|
|
334
|
-
hourly,
|
|
335
|
-
weekdays: weekdays.map((count, index) => ({ label: `${WEEKDAY_LABELS_KO[index] ?? index}요일`, count })),
|
|
336
|
-
monthly: [...monthly.entries()].map(([date, count]) => ({ date, count })).sort((a, b) => a.date.localeCompare(b.date)),
|
|
337
|
-
attachments: topCounts(attachments, top),
|
|
338
|
-
domains: topCounts(domains, top),
|
|
339
|
-
keywords: topCounts(keywords, top),
|
|
340
|
-
highlights,
|
|
341
|
-
};
|
|
342
|
-
}
|
|
343
|
-
export function safeInputName(filePath) {
|
|
344
|
-
const name = basename(filePath);
|
|
345
|
-
return name.length > 80 ? `${name.slice(0, 77)}...` : name;
|
|
346
|
-
}
|
|
347
|
-
const KAKAO_CHAT_PREFIX_RE = /^KakaoTalk_Chat_/i;
|
|
348
|
-
const KAKAO_PREFIX_RE = /^KakaoTalk_/i;
|
|
349
|
-
/** 카카오 CSV 보내기 파일명 끝의 보내기 시각: _2026-05-16-15-03-41 */
|
|
350
|
-
const EXPORT_TIMESTAMP_SUFFIX_RE = /_\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}$/;
|
|
351
|
-
export function parseChatRoomNameFromExportPath(filePath) {
|
|
352
|
-
const base = basename(filePath).replace(/\.(csv|txt)$/i, "");
|
|
353
|
-
let room = base;
|
|
354
|
-
if (KAKAO_CHAT_PREFIX_RE.test(room)) {
|
|
355
|
-
room = room.replace(KAKAO_CHAT_PREFIX_RE, "");
|
|
356
|
-
}
|
|
357
|
-
else if (KAKAO_PREFIX_RE.test(room)) {
|
|
358
|
-
room = room.replace(KAKAO_PREFIX_RE, "");
|
|
359
|
-
}
|
|
360
|
-
room = room.replace(EXPORT_TIMESTAMP_SUFFIX_RE, "").trim();
|
|
361
|
-
if (room.length > 0)
|
|
362
|
-
return room.length > 120 ? `${room.slice(0, 117)}...` : room;
|
|
363
|
-
const fallback = base.replace(EXPORT_TIMESTAMP_SUFFIX_RE, "").trim();
|
|
364
|
-
return fallback.length > 0 ? fallback : "채팅방";
|
|
365
|
-
}
|
|
366
|
-
function getParticipantStat(stats, alias) {
|
|
367
|
-
const existing = stats.get(alias);
|
|
368
|
-
if (existing)
|
|
369
|
-
return existing;
|
|
370
|
-
const created = {
|
|
371
|
-
alias,
|
|
372
|
-
messages: 0,
|
|
373
|
-
characters: 0,
|
|
374
|
-
attachmentMessages: 0,
|
|
375
|
-
linkMessages: 0,
|
|
376
|
-
nightMessages: 0,
|
|
377
|
-
maxConsecutive: 0,
|
|
378
|
-
};
|
|
379
|
-
stats.set(alias, created);
|
|
380
|
-
return created;
|
|
381
19
|
}
|
|
382
|
-
function
|
|
383
|
-
const
|
|
384
|
-
const
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
}
|
|
391
|
-
if (privacy === "public-anonymous") {
|
|
392
|
-
const map = new Map();
|
|
393
|
-
unique.forEach((sender, i) => map.set(sender, `User ${String(i + 1).padStart(3, "0")}`));
|
|
394
|
-
return map;
|
|
395
|
-
}
|
|
396
|
-
const map = new Map();
|
|
397
|
-
const used = new Map();
|
|
398
|
-
for (const raw of unique) {
|
|
399
|
-
let base = maskPartialDisplayName(raw);
|
|
400
|
-
const n = (used.get(base) ?? 0) + 1;
|
|
401
|
-
used.set(base, n);
|
|
402
|
-
if (n > 1)
|
|
403
|
-
base = `${base}·${n}`;
|
|
404
|
-
map.set(raw, base);
|
|
405
|
-
}
|
|
406
|
-
return map;
|
|
407
|
-
}
|
|
408
|
-
/** 참여자 실명 대신 앞·뒤 일부만 남기고 가운데는 마스킹합니다. */
|
|
409
|
-
export function maskPartialDisplayName(raw) {
|
|
410
|
-
const s = raw.trim();
|
|
411
|
-
if (!s)
|
|
412
|
-
return "?";
|
|
413
|
-
const chars = [...s];
|
|
414
|
-
if (chars.length === 1)
|
|
415
|
-
return `${chars[0]}*`;
|
|
416
|
-
if (chars.length === 2)
|
|
417
|
-
return `${chars[0]}*`;
|
|
418
|
-
const midLen = Math.min(chars.length - 2, 6);
|
|
419
|
-
const middle = "*".repeat(Math.max(1, midLen));
|
|
420
|
-
return `${chars[0]}${middle}${chars[chars.length - 1]}`;
|
|
421
|
-
}
|
|
422
|
-
function getAttachmentMarkers(message) {
|
|
423
|
-
return ATTACHMENT_MARKERS.filter((marker) => message.includes(marker));
|
|
424
|
-
}
|
|
425
|
-
function getDomains(message) {
|
|
426
|
-
const matches = message.match(URL_RE) ?? [];
|
|
427
|
-
const domains = [];
|
|
428
|
-
for (const match of matches) {
|
|
429
|
-
const urlText = match.startsWith("http") ? match : `https://${match}`;
|
|
430
|
-
try {
|
|
431
|
-
const url = new URL(urlText);
|
|
432
|
-
domains.push(url.hostname.toLowerCase().replace(/^www\./, ""));
|
|
433
|
-
}
|
|
434
|
-
catch {
|
|
435
|
-
continue;
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
return domains;
|
|
439
|
-
}
|
|
440
|
-
function extractKeywords(message, senderNames) {
|
|
441
|
-
const withoutSensitivePatterns = message
|
|
442
|
-
.replace(URL_RE, " ")
|
|
443
|
-
.replace(EMAIL_RE, " ")
|
|
444
|
-
.replace(PHONE_RE, " ");
|
|
445
|
-
const tokens = withoutSensitivePatterns.match(TOKEN_RE) ?? [];
|
|
446
|
-
const keywords = [];
|
|
447
|
-
for (const token of tokens) {
|
|
448
|
-
const normalized = normalizeToken(token);
|
|
449
|
-
if (!normalized)
|
|
450
|
-
continue;
|
|
451
|
-
if (normalized.length < 2 || normalized.length > 30)
|
|
452
|
-
continue;
|
|
453
|
-
if (STOPWORDS.has(normalized))
|
|
454
|
-
continue;
|
|
455
|
-
if (senderNames.has(normalized))
|
|
456
|
-
continue;
|
|
457
|
-
if (KEYWORD_EXCLUDE.has(normalized))
|
|
458
|
-
continue;
|
|
459
|
-
if (/^\d+$/.test(normalized))
|
|
460
|
-
continue;
|
|
461
|
-
keywords.push(normalized);
|
|
462
|
-
}
|
|
463
|
-
return keywords;
|
|
464
|
-
}
|
|
465
|
-
function normalizeToken(token) {
|
|
466
|
-
return /^[A-Za-z0-9_+-]+$/.test(token) ? token.toLowerCase() : token.trim();
|
|
467
|
-
}
|
|
468
|
-
function increment(map, key, amount = 1) {
|
|
469
|
-
map.set(key, (map.get(key) ?? 0) + amount);
|
|
470
|
-
}
|
|
471
|
-
function topCounts(map, limit) {
|
|
472
|
-
return [...map.entries()]
|
|
473
|
-
.map(([label, count]) => ({ label, count }))
|
|
474
|
-
.sort((a, b) => b.count - a.count || a.label.localeCompare(b.label))
|
|
475
|
-
.slice(0, limit);
|
|
476
|
-
}
|
|
477
|
-
function round(value, decimals) {
|
|
478
|
-
const factor = 10 ** decimals;
|
|
479
|
-
return Math.round(value * factor) / factor;
|
|
480
|
-
}
|
|
481
|
-
function pad2(value) {
|
|
482
|
-
return value.toString().padStart(2, "0");
|
|
483
|
-
}
|
|
484
|
-
function medianSorted(sorted) {
|
|
485
|
-
if (sorted.length === 0)
|
|
486
|
-
return 0;
|
|
487
|
-
const mid = Math.floor(sorted.length / 2);
|
|
488
|
-
return sorted.length % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
489
|
-
}
|
|
490
|
-
function longestDateStreak(sortedYmd) {
|
|
491
|
-
if (sortedYmd.length === 0)
|
|
492
|
-
return 0;
|
|
493
|
-
let best = 1;
|
|
494
|
-
let cur = 1;
|
|
495
|
-
for (let i = 1; i < sortedYmd.length; i += 1) {
|
|
496
|
-
const a = new Date(`${sortedYmd[i - 1]}T12:00:00Z`).getTime();
|
|
497
|
-
const b = new Date(`${sortedYmd[i]}T12:00:00Z`).getTime();
|
|
498
|
-
const diffDays = Math.round((b - a) / 86_400_000);
|
|
499
|
-
if (diffDays === 1) {
|
|
500
|
-
cur += 1;
|
|
501
|
-
best = Math.max(best, cur);
|
|
20
|
+
export async function buildReportFromExportSync(filePath, options) {
|
|
21
|
+
const privacy = options?.privacy ?? "public-masked";
|
|
22
|
+
const top = options?.top ?? DEFAULT_TOP;
|
|
23
|
+
const agg = new ReportAggregator(filePath, privacy, top);
|
|
24
|
+
let meta = null;
|
|
25
|
+
for await (const event of streamKakaoExport(filePath)) {
|
|
26
|
+
if (event.type === "record") {
|
|
27
|
+
agg.consume(event.record);
|
|
502
28
|
}
|
|
503
29
|
else {
|
|
504
|
-
|
|
30
|
+
meta = {
|
|
31
|
+
filePath: event.meta.filePath,
|
|
32
|
+
encoding: event.meta.encoding,
|
|
33
|
+
physicalLines: event.meta.physicalLines,
|
|
34
|
+
warningCount: event.meta.warnings.length,
|
|
35
|
+
};
|
|
505
36
|
}
|
|
506
37
|
}
|
|
507
|
-
|
|
508
|
-
}
|
|
509
|
-
function computeGini(counts) {
|
|
510
|
-
if (counts.length === 0)
|
|
511
|
-
return null;
|
|
512
|
-
const sorted = [...counts].sort((a, b) => a - b);
|
|
513
|
-
const n = sorted.length;
|
|
514
|
-
let sum = 0;
|
|
515
|
-
for (const x of sorted)
|
|
516
|
-
sum += x;
|
|
517
|
-
if (sum === 0)
|
|
518
|
-
return null;
|
|
519
|
-
let num = 0;
|
|
520
|
-
for (let i = 0; i < n; i += 1) {
|
|
521
|
-
num += (2 * i - n + 1) * sorted[i];
|
|
38
|
+
if (!meta) {
|
|
39
|
+
throw new Error(`No messages parsed from export: ${filePath}`);
|
|
522
40
|
}
|
|
523
|
-
return
|
|
524
|
-
}
|
|
525
|
-
function quantileSorted(sortedAsc, p) {
|
|
526
|
-
if (sortedAsc.length === 0)
|
|
527
|
-
return 0;
|
|
528
|
-
const pos = (sortedAsc.length - 1) * p;
|
|
529
|
-
const lo = Math.floor(pos);
|
|
530
|
-
const hi = Math.ceil(pos);
|
|
531
|
-
if (lo === hi)
|
|
532
|
-
return sortedAsc[lo];
|
|
533
|
-
const w = pos - lo;
|
|
534
|
-
return sortedAsc[lo] * (1 - w) + sortedAsc[hi] * w;
|
|
41
|
+
return agg.finalize(meta);
|
|
535
42
|
}
|
|
536
|
-
function
|
|
537
|
-
if (
|
|
538
|
-
return
|
|
539
|
-
let sum = 0;
|
|
540
|
-
for (const g of gaps)
|
|
541
|
-
sum += g;
|
|
542
|
-
const mean = sum / gaps.length;
|
|
543
|
-
if (mean <= 0)
|
|
544
|
-
return null;
|
|
545
|
-
let varAcc = 0;
|
|
546
|
-
for (const g of gaps) {
|
|
547
|
-
const d = g - mean;
|
|
548
|
-
varAcc += d * d;
|
|
549
|
-
}
|
|
550
|
-
const variance = varAcc / gaps.length;
|
|
551
|
-
const sd = Math.sqrt(variance);
|
|
552
|
-
return round(sd / mean, 2);
|
|
553
|
-
}
|
|
554
|
-
function maxSilenceGapDays(sortedYmd) {
|
|
555
|
-
if (sortedYmd.length < 2)
|
|
556
|
-
return null;
|
|
557
|
-
let best = 0;
|
|
558
|
-
for (let i = 1; i < sortedYmd.length; i += 1) {
|
|
559
|
-
const a = new Date(`${sortedYmd[i - 1]}T12:00:00Z`).getTime();
|
|
560
|
-
const b = new Date(`${sortedYmd[i]}T12:00:00Z`).getTime();
|
|
561
|
-
const diffDays = Math.round((b - a) / 86_400_000);
|
|
562
|
-
best = Math.max(best, Math.max(0, diffDays - 1));
|
|
563
|
-
}
|
|
564
|
-
return best;
|
|
565
|
-
}
|
|
566
|
-
function computeTop3Share(stats, total) {
|
|
567
|
-
if (total === 0)
|
|
568
|
-
return 0;
|
|
569
|
-
const top3 = [...stats.values()]
|
|
570
|
-
.map((s) => s.messages)
|
|
571
|
-
.sort((a, b) => b - a)
|
|
572
|
-
.slice(0, 3)
|
|
573
|
-
.reduce((a, c) => a + c, 0);
|
|
574
|
-
return round((top3 / total) * 100, 1);
|
|
575
|
-
}
|
|
576
|
-
function domainEntropyBits(domains) {
|
|
577
|
-
let sum = 0;
|
|
578
|
-
for (const c of domains.values())
|
|
579
|
-
sum += c;
|
|
580
|
-
if (sum === 0)
|
|
581
|
-
return null;
|
|
582
|
-
let h = 0;
|
|
583
|
-
for (const c of domains.values()) {
|
|
584
|
-
if (c <= 0)
|
|
585
|
-
continue;
|
|
586
|
-
const p = c / sum;
|
|
587
|
-
h -= p * Math.log2(p);
|
|
588
|
-
}
|
|
589
|
-
return round(h, 2);
|
|
590
|
-
}
|
|
591
|
-
function computeDensityPerCalendarDay(records, total) {
|
|
592
|
-
if (total === 0 || records.length === 0)
|
|
593
|
-
return null;
|
|
594
|
-
const first = records[0].date;
|
|
595
|
-
const last = records[records.length - 1].date;
|
|
596
|
-
const spanDays = Math.max(1, Math.floor((partsToUtcMs(last) - partsToUtcMs(first)) / 86_400_000) + 1);
|
|
597
|
-
return round(total / spanDays, 2);
|
|
598
|
-
}
|
|
599
|
-
function computeDaypartPercents(hourly, total) {
|
|
600
|
-
const bands = [
|
|
601
|
-
{ key: "dawn", label: "새벽(0~5시)", lo: 0, hi: 5 },
|
|
602
|
-
{ key: "morning", label: "오전(6~11시)", lo: 6, hi: 11 },
|
|
603
|
-
{ key: "afternoon", label: "오후(12~17시)", lo: 12, hi: 17 },
|
|
604
|
-
{ key: "evening", label: "저녁(18~23시)", lo: 18, hi: 23 },
|
|
605
|
-
];
|
|
606
|
-
if (total === 0) {
|
|
607
|
-
return bands.map((b) => ({ key: b.key, label: b.label, percent: 0 }));
|
|
608
|
-
}
|
|
609
|
-
const raw = bands.map((b) => {
|
|
610
|
-
let c = 0;
|
|
611
|
-
for (let h = b.lo; h <= b.hi; h += 1)
|
|
612
|
-
c += hourly[h] ?? 0;
|
|
613
|
-
return { key: b.key, label: b.label, count: c };
|
|
614
|
-
});
|
|
615
|
-
const sum = raw.reduce((a, x) => a + x.count, 0) || 1;
|
|
616
|
-
let rounded = raw.map((x) => ({
|
|
617
|
-
key: x.key,
|
|
618
|
-
label: x.label,
|
|
619
|
-
percent: round((x.count / sum) * 100, 1),
|
|
620
|
-
}));
|
|
621
|
-
const drift = 100 - rounded.reduce((a, x) => a + x.percent, 0);
|
|
622
|
-
if (Math.abs(drift) >= 0.05 && rounded.length > 0) {
|
|
623
|
-
const idx = rounded.reduce((best, x, i, arr) => (x.percent >= arr[best].percent ? i : best), 0);
|
|
624
|
-
rounded = rounded.map((x, i) => i === idx ? { ...x, percent: round(x.percent + drift, 1) } : x);
|
|
625
|
-
}
|
|
626
|
-
return rounded;
|
|
627
|
-
}
|
|
628
|
-
function computeRhythmScore(input) {
|
|
629
|
-
const g = input.gini ?? 0.45;
|
|
630
|
-
const streakN = Math.min(1, input.longestStreak / 28);
|
|
631
|
-
const densityN = input.density != null ? Math.min(1, input.density / 40) : 0.25;
|
|
632
|
-
const score = 48 * (1 - Math.min(0.95, g)) + 32 * streakN + 20 * densityN;
|
|
633
|
-
return Math.max(0, Math.min(100, Math.round(score)));
|
|
634
|
-
}
|
|
635
|
-
function buildHighlights(input) {
|
|
636
|
-
const out = [];
|
|
637
|
-
if (input.topAlias && input.topShare !== null && input.total > 0) {
|
|
638
|
-
out.push(`가장 말이 많았던 분은 **${input.topAlias}** (전체의 **${input.topShare}%**).`);
|
|
639
|
-
}
|
|
640
|
-
if (input.busiestWeekdayLabel) {
|
|
641
|
-
out.push(`요일별로는 **${input.busiestWeekdayLabel}**에 활동이 가장 활발했어요.`);
|
|
642
|
-
}
|
|
643
|
-
if (input.peakHour !== null) {
|
|
644
|
-
out.push(`시간대는 **${input.peakHour}시**대에 메시지가 가장 몰렸습니다.`);
|
|
645
|
-
}
|
|
646
|
-
if (input.medianReplyGapMinutes !== null) {
|
|
647
|
-
out.push(`연속 메시지 사이 간격의 중앙값은 약 **${input.medianReplyGapMinutes}분**이에요.`);
|
|
648
|
-
}
|
|
649
|
-
if (input.nightSharePercent > 0) {
|
|
650
|
-
out.push(`심야(23~05시) 메시지 비중은 **${input.nightSharePercent}%**입니다.`);
|
|
651
|
-
}
|
|
652
|
-
if (input.longestStreak > 1) {
|
|
653
|
-
out.push(`하루도 빠짐없이 이어진 최장 **${input.longestStreak}일** 연속 활동 기록이 있어요.`);
|
|
654
|
-
}
|
|
655
|
-
if (input.emojiMessages > 0) {
|
|
656
|
-
out.push(`이모지·스티커 느낌의 메시지는 **${input.emojiMessages}**건 정도 감지됐어요.`);
|
|
657
|
-
}
|
|
658
|
-
if (input.messagesWithAttachments > 0) {
|
|
659
|
-
out.push(`사진·파일·동영상 등 첨부가 들어간 메시지는 **${input.messagesWithAttachments}**건입니다.`);
|
|
660
|
-
}
|
|
661
|
-
if (input.total > 0 && input.weekendSharePercent > 0) {
|
|
662
|
-
out.push(`주말(토·일) 메시지 비중은 **${input.weekendSharePercent}%**예요.`);
|
|
663
|
-
}
|
|
664
|
-
if (input.participantGini !== null && input.participantGini >= 0.35) {
|
|
665
|
-
out.push(`참여도는 소수에게 조금 몰린 편이에요(Gini **${input.participantGini}** 근처).`);
|
|
666
|
-
}
|
|
667
|
-
if (input.replyGapP90Minutes !== null && input.replyGapP90Minutes >= 30) {
|
|
668
|
-
out.push(`가끔 긴 침묵도 있어요 — 응답 간격 **상위 10%**가 약 **${input.replyGapP90Minutes}분** 이상입니다.`);
|
|
669
|
-
}
|
|
670
|
-
if (input.maxSilenceBetweenActiveDays !== null && input.maxSilenceBetweenActiveDays >= 7) {
|
|
671
|
-
out.push(`활동일 사이 최대 **${input.maxSilenceBetweenActiveDays}일** 동안은 메시지가 끊긴 구간이 있었어요.`);
|
|
672
|
-
}
|
|
673
|
-
if (input.rhythmScore >= 65) {
|
|
674
|
-
out.push(`종합 **리듬 점수**는 **${input.rhythmScore}/100** — 꾸준하고 균형 잡힌 페이스에 가깝습니다.`);
|
|
675
|
-
}
|
|
676
|
-
if (input.burstGapUnder1mPercent !== null && input.burstGapUnder1mPercent >= 40) {
|
|
677
|
-
out.push(`응답 간격의 **${input.burstGapUnder1mPercent}%**가 1분 이내로, 실시간 대화 톤이 강해요.`);
|
|
678
|
-
}
|
|
679
|
-
if (input.monologueMessagesPercent >= 25) {
|
|
680
|
-
out.push(`같은 사람 **3연속 이상** 메시지가 전체의 **${input.monologueMessagesPercent}%** — 긴 설명·정리 구간이 잦을 수 있어요.`);
|
|
43
|
+
export async function buildReportFromExport(filePath, options) {
|
|
44
|
+
if (await shouldUseAnalyzeWorker(filePath, options)) {
|
|
45
|
+
return runAnalyzeWorker(filePath, options);
|
|
681
46
|
}
|
|
682
|
-
return
|
|
47
|
+
return buildReportFromExportSync(filePath, options);
|
|
683
48
|
}
|
|
684
49
|
//# sourceMappingURL=analysis.js.map
|