kakaotalk-chat-analyzer 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/aggregator.d.ts +41 -0
- package/dist/src/aggregator.js +628 -0
- package/dist/src/aggregator.js.map +1 -0
- package/dist/src/analysis-labels.d.ts +3 -0
- package/dist/src/analysis-labels.js +37 -0
- package/dist/src/analysis-labels.js.map +1 -0
- package/dist/src/analysis.d.ts +7 -9
- package/dist/src/analysis.js +34 -669
- package/dist/src/analysis.js.map +1 -1
- package/dist/src/analyze-pool.d.ts +9 -0
- package/dist/src/analyze-pool.js +47 -0
- package/dist/src/analyze-pool.js.map +1 -0
- package/dist/src/analyze-worker.d.ts +1 -0
- package/dist/src/analyze-worker.js +14 -0
- package/dist/src/analyze-worker.js.map +1 -0
- package/dist/src/cli.js +33 -11
- package/dist/src/cli.js.map +1 -1
- package/dist/src/encoding.d.ts +6 -0
- package/dist/src/encoding.js +23 -8
- package/dist/src/encoding.js.map +1 -1
- package/dist/src/gap-stats.d.ts +18 -0
- package/dist/src/gap-stats.js +75 -0
- package/dist/src/gap-stats.js.map +1 -0
- package/dist/src/kakao-line.d.ts +4 -0
- package/dist/src/kakao-line.js +134 -0
- package/dist/src/kakao-line.js.map +1 -0
- package/dist/src/keyword-counter.d.ts +11 -0
- package/dist/src/keyword-counter.js +42 -0
- package/dist/src/keyword-counter.js.map +1 -0
- package/dist/src/parser.js +2 -134
- package/dist/src/parser.js.map +1 -1
- package/dist/src/report.js +0 -9
- package/dist/src/report.js.map +1 -1
- package/dist/src/stream-parser.d.ts +20 -0
- package/dist/src/stream-parser.js +109 -0
- package/dist/src/stream-parser.js.map +1 -0
- package/dist/src/version.d.ts +2 -2
- package/dist/src/version.js +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { ChatRecord, EncodingName, PrivacyMode, ReportData } from "./types.js";
|
|
2
|
+
export interface FinalizeSourceMeta {
|
|
3
|
+
filePath: string;
|
|
4
|
+
encoding: EncodingName;
|
|
5
|
+
physicalLines: number;
|
|
6
|
+
warningCount: number;
|
|
7
|
+
}
|
|
8
|
+
export declare class ReportAggregator {
|
|
9
|
+
private readonly filePath;
|
|
10
|
+
private readonly privacy;
|
|
11
|
+
private readonly top;
|
|
12
|
+
private readonly senderStats;
|
|
13
|
+
private readonly senderNamesNormalized;
|
|
14
|
+
private readonly daily;
|
|
15
|
+
private readonly monthly;
|
|
16
|
+
private readonly hourly;
|
|
17
|
+
private readonly weekdays;
|
|
18
|
+
private readonly attachments;
|
|
19
|
+
private readonly domains;
|
|
20
|
+
private readonly keywordCounter;
|
|
21
|
+
private readonly gapStats;
|
|
22
|
+
private total;
|
|
23
|
+
private totalCharacters;
|
|
24
|
+
private messagesWithLinks;
|
|
25
|
+
private messagesWithAttachments;
|
|
26
|
+
private nightMessages;
|
|
27
|
+
private emojiMessages;
|
|
28
|
+
private weekendMessages;
|
|
29
|
+
private questionMessages;
|
|
30
|
+
private speakerSwitches;
|
|
31
|
+
private monologueMessages;
|
|
32
|
+
private prevMs;
|
|
33
|
+
private prevSender;
|
|
34
|
+
private runSender;
|
|
35
|
+
private runLen;
|
|
36
|
+
private firstDate;
|
|
37
|
+
private lastDate;
|
|
38
|
+
constructor(filePath: string, privacy: PrivacyMode, top: number);
|
|
39
|
+
consume(record: ChatRecord): void;
|
|
40
|
+
finalize(meta: FinalizeSourceMeta): ReportData;
|
|
41
|
+
}
|
|
@@ -0,0 +1,628 @@
|
|
|
1
|
+
import { formatDate, formatDateTime, partsToUtcMs, weekdayIndex } from "./date.js";
|
|
2
|
+
import { maskPartialDisplayName, parseChatRoomNameFromExportPath, safeInputName } from "./analysis-labels.js";
|
|
3
|
+
import { GapStreamStats } from "./gap-stats.js";
|
|
4
|
+
import { KeywordCounter } from "./keyword-counter.js";
|
|
5
|
+
const ATTACHMENT_MARKERS = [
|
|
6
|
+
"사진",
|
|
7
|
+
"동영상",
|
|
8
|
+
"파일",
|
|
9
|
+
"이모티콘",
|
|
10
|
+
"지도",
|
|
11
|
+
"연락처",
|
|
12
|
+
"투표",
|
|
13
|
+
"공유",
|
|
14
|
+
"음성메시지",
|
|
15
|
+
"삭제된 메시지",
|
|
16
|
+
];
|
|
17
|
+
const KEYWORD_EXCLUDE = new Set(ATTACHMENT_MARKERS);
|
|
18
|
+
const WEEKDAY_LABELS_KO = ["일", "월", "화", "수", "목", "금", "토"];
|
|
19
|
+
const URL_RE = /\bhttps?:\/\/[^\s<>"']+|www\.[^\s<>"']+/gi;
|
|
20
|
+
const EMAIL_RE = /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi;
|
|
21
|
+
const PHONE_RE = /\b(?:\+?\d[\d\s().-]{7,}\d)\b/g;
|
|
22
|
+
const TOKEN_RE = /[가-힣A-Za-z][가-힣A-Za-z0-9_+-]{1,}/g;
|
|
23
|
+
const STOPWORDS = new Set([
|
|
24
|
+
"그리고",
|
|
25
|
+
"그냥",
|
|
26
|
+
"근데",
|
|
27
|
+
"그래서",
|
|
28
|
+
"저는",
|
|
29
|
+
"제가",
|
|
30
|
+
"우리",
|
|
31
|
+
"오늘",
|
|
32
|
+
"내일",
|
|
33
|
+
"어제",
|
|
34
|
+
"이거",
|
|
35
|
+
"저거",
|
|
36
|
+
"그거",
|
|
37
|
+
"수정",
|
|
38
|
+
"확인",
|
|
39
|
+
"가능",
|
|
40
|
+
"입니다",
|
|
41
|
+
"합니다",
|
|
42
|
+
"있습니다",
|
|
43
|
+
"없는",
|
|
44
|
+
"있는",
|
|
45
|
+
"the",
|
|
46
|
+
"and",
|
|
47
|
+
"for",
|
|
48
|
+
"with",
|
|
49
|
+
"this",
|
|
50
|
+
"that",
|
|
51
|
+
"from",
|
|
52
|
+
"http",
|
|
53
|
+
"https",
|
|
54
|
+
]);
|
|
55
|
+
const NIGHT_HOURS = new Set([23, 0, 1, 2, 3, 4, 5]);
|
|
56
|
+
const EMOJI_RE = /\p{Extended_Pictographic}/u;
|
|
57
|
+
const QUESTION_RE = /\?|?/;
|
|
58
|
+
export class ReportAggregator {
|
|
59
|
+
filePath;
|
|
60
|
+
privacy;
|
|
61
|
+
top;
|
|
62
|
+
senderStats = new Map();
|
|
63
|
+
senderNamesNormalized = new Set();
|
|
64
|
+
daily = new Map();
|
|
65
|
+
monthly = new Map();
|
|
66
|
+
hourly = Array.from({ length: 24 }, () => 0);
|
|
67
|
+
weekdays = Array.from({ length: 7 }, () => 0);
|
|
68
|
+
attachments = new Map();
|
|
69
|
+
domains = new Map();
|
|
70
|
+
keywordCounter = new KeywordCounter();
|
|
71
|
+
gapStats = new GapStreamStats();
|
|
72
|
+
total = 0;
|
|
73
|
+
totalCharacters = 0;
|
|
74
|
+
messagesWithLinks = 0;
|
|
75
|
+
messagesWithAttachments = 0;
|
|
76
|
+
nightMessages = 0;
|
|
77
|
+
emojiMessages = 0;
|
|
78
|
+
weekendMessages = 0;
|
|
79
|
+
questionMessages = 0;
|
|
80
|
+
speakerSwitches = 0;
|
|
81
|
+
monologueMessages = 0;
|
|
82
|
+
prevMs = null;
|
|
83
|
+
prevSender = null;
|
|
84
|
+
runSender = null;
|
|
85
|
+
runLen = 0;
|
|
86
|
+
firstDate = null;
|
|
87
|
+
lastDate = null;
|
|
88
|
+
constructor(filePath, privacy, top) {
|
|
89
|
+
this.filePath = filePath;
|
|
90
|
+
this.privacy = privacy;
|
|
91
|
+
this.top = top;
|
|
92
|
+
}
|
|
93
|
+
consume(record) {
|
|
94
|
+
if (this.prevSender !== null && record.sender !== this.prevSender) {
|
|
95
|
+
this.speakerSwitches += 1;
|
|
96
|
+
}
|
|
97
|
+
const stat = getParticipantStat(this.senderStats, record.sender);
|
|
98
|
+
this.senderNamesNormalized.add(normalizeToken(record.sender));
|
|
99
|
+
const messageLength = record.message.length;
|
|
100
|
+
const foundAttachments = getAttachmentMarkers(record.message);
|
|
101
|
+
const foundDomains = getDomains(record.message);
|
|
102
|
+
const ms = partsToUtcMs(record.date);
|
|
103
|
+
if (this.firstDate === null)
|
|
104
|
+
this.firstDate = record.date;
|
|
105
|
+
this.lastDate = record.date;
|
|
106
|
+
this.total += 1;
|
|
107
|
+
if (EMOJI_RE.test(record.message)) {
|
|
108
|
+
this.emojiMessages += 1;
|
|
109
|
+
}
|
|
110
|
+
if (QUESTION_RE.test(record.message)) {
|
|
111
|
+
this.questionMessages += 1;
|
|
112
|
+
}
|
|
113
|
+
const wi = weekdayIndex(record.date);
|
|
114
|
+
if (wi === 0 || wi === 6) {
|
|
115
|
+
this.weekendMessages += 1;
|
|
116
|
+
}
|
|
117
|
+
if (NIGHT_HOURS.has(record.date.hour)) {
|
|
118
|
+
this.nightMessages += 1;
|
|
119
|
+
stat.nightMessages += 1;
|
|
120
|
+
}
|
|
121
|
+
if (this.prevMs !== null) {
|
|
122
|
+
const delta = ms - this.prevMs;
|
|
123
|
+
this.gapStats.add(delta);
|
|
124
|
+
}
|
|
125
|
+
this.prevMs = ms;
|
|
126
|
+
if (record.sender === this.prevSender) {
|
|
127
|
+
this.runLen += 1;
|
|
128
|
+
if (this.runLen >= 3) {
|
|
129
|
+
this.monologueMessages += 1;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
if (this.prevSender !== null && this.runSender !== null) {
|
|
134
|
+
const prevStat = getParticipantStat(this.senderStats, this.prevSender);
|
|
135
|
+
prevStat.maxConsecutive = Math.max(prevStat.maxConsecutive, this.runLen);
|
|
136
|
+
}
|
|
137
|
+
this.runSender = record.sender;
|
|
138
|
+
this.runLen = 1;
|
|
139
|
+
}
|
|
140
|
+
this.prevSender = record.sender;
|
|
141
|
+
stat.messages += 1;
|
|
142
|
+
stat.characters += messageLength;
|
|
143
|
+
this.totalCharacters += messageLength;
|
|
144
|
+
if (foundAttachments.length > 0) {
|
|
145
|
+
stat.attachmentMessages += 1;
|
|
146
|
+
this.messagesWithAttachments += 1;
|
|
147
|
+
for (const marker of foundAttachments)
|
|
148
|
+
increment(this.attachments, marker);
|
|
149
|
+
}
|
|
150
|
+
if (foundDomains.length > 0) {
|
|
151
|
+
stat.linkMessages += 1;
|
|
152
|
+
this.messagesWithLinks += 1;
|
|
153
|
+
for (const domain of foundDomains)
|
|
154
|
+
increment(this.domains, domain);
|
|
155
|
+
}
|
|
156
|
+
if (shouldExtractKeywords(record.message, foundAttachments)) {
|
|
157
|
+
for (const keyword of extractKeywords(record.message, this.senderNamesNormalized)) {
|
|
158
|
+
this.keywordCounter.add(keyword);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const dayKey = formatDate(record.date);
|
|
162
|
+
increment(this.daily, dayKey);
|
|
163
|
+
increment(this.monthly, `${record.date.year}-${pad2(record.date.month)}`);
|
|
164
|
+
this.hourly[record.date.hour] = (this.hourly[record.date.hour] ?? 0) + 1;
|
|
165
|
+
this.weekdays[wi] = (this.weekdays[wi] ?? 0) + 1;
|
|
166
|
+
}
|
|
167
|
+
finalize(meta) {
|
|
168
|
+
if (this.prevSender !== null && this.runSender !== null) {
|
|
169
|
+
const prevStat = getParticipantStat(this.senderStats, this.prevSender);
|
|
170
|
+
prevStat.maxConsecutive = Math.max(prevStat.maxConsecutive, this.runLen);
|
|
171
|
+
}
|
|
172
|
+
const total = this.total;
|
|
173
|
+
const aliases = buildSenderLabels([...this.senderStats.keys()], this.privacy);
|
|
174
|
+
const participantStats = [...this.senderStats.entries()]
|
|
175
|
+
.map(([raw, stat]) => {
|
|
176
|
+
const sharePercent = total > 0 ? round((stat.messages / total) * 100, 1) : 0;
|
|
177
|
+
return {
|
|
178
|
+
alias: aliases.get(raw) ?? "???",
|
|
179
|
+
messages: stat.messages,
|
|
180
|
+
characters: stat.characters,
|
|
181
|
+
averageLength: round(stat.characters / Math.max(stat.messages, 1), 1),
|
|
182
|
+
attachmentMessages: stat.attachmentMessages,
|
|
183
|
+
linkMessages: stat.linkMessages,
|
|
184
|
+
sharePercent,
|
|
185
|
+
nightMessages: stat.nightMessages,
|
|
186
|
+
maxConsecutive: stat.maxConsecutive,
|
|
187
|
+
};
|
|
188
|
+
})
|
|
189
|
+
.sort((a, b) => b.messages - a.messages)
|
|
190
|
+
.slice(0, this.top);
|
|
191
|
+
const sortedDays = [...this.daily.keys()].sort();
|
|
192
|
+
const longestStreak = longestDateStreak(sortedDays);
|
|
193
|
+
let peakHour = null;
|
|
194
|
+
let peakCount = -1;
|
|
195
|
+
for (let h = 0; h < 24; h += 1) {
|
|
196
|
+
const c = this.hourly[h] ?? 0;
|
|
197
|
+
if (c > peakCount) {
|
|
198
|
+
peakCount = c;
|
|
199
|
+
peakHour = h;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
if (peakCount <= 0)
|
|
203
|
+
peakHour = null;
|
|
204
|
+
let busiestIdx = -1;
|
|
205
|
+
let busiestCount = -1;
|
|
206
|
+
for (let i = 0; i < 7; i += 1) {
|
|
207
|
+
const c = this.weekdays[i] ?? 0;
|
|
208
|
+
if (c > busiestCount) {
|
|
209
|
+
busiestCount = c;
|
|
210
|
+
busiestIdx = i;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
const busiestWeekdayLabel = busiestIdx >= 0 && busiestCount > 0 ? `${WEEKDAY_LABELS_KO[busiestIdx] ?? ""}요일` : null;
|
|
214
|
+
const medianMs = this.gapStats.medianMs();
|
|
215
|
+
const medianReplyGapMinutes = medianMs !== null ? round(medianMs / 60_000, 1) : null;
|
|
216
|
+
const nightSharePercent = total > 0 ? round((this.nightMessages / total) * 100, 1) : 0;
|
|
217
|
+
const activeDays = this.daily.size;
|
|
218
|
+
const messagesPerActiveDay = activeDays > 0 ? round(total / activeDays, 1) : 0;
|
|
219
|
+
const allMessageCounts = [...this.senderStats.values()].map((s) => s.messages).sort((a, b) => a - b);
|
|
220
|
+
const participantGini = computeGini(allMessageCounts);
|
|
221
|
+
const p90Ms = this.gapStats.p90Ms();
|
|
222
|
+
const replyGapP90Minutes = p90Ms !== null ? round(p90Ms / 60_000, 1) : null;
|
|
223
|
+
const maxSilenceBetweenActiveDays = maxSilenceGapDays(sortedDays);
|
|
224
|
+
const top3ParticipantSharePercent = computeTop3Share(this.senderStats, total);
|
|
225
|
+
const linkDomainEntropyBits = domainEntropyBits(this.domains);
|
|
226
|
+
const densityMessagesPerCalendarDay = computeDensityFromSpan(this.firstDate, this.lastDate, total);
|
|
227
|
+
const weekendSharePercent = total > 0 ? round((this.weekendMessages / total) * 100, 1) : 0;
|
|
228
|
+
const questionLikeMessagesPer100 = total > 0 ? round((this.questionMessages / total) * 100, 2) : 0;
|
|
229
|
+
const speakerSwitchRatePer100 = total > 0 ? round((this.speakerSwitches / total) * 100, 2) : 0;
|
|
230
|
+
const daypartPercents = computeDaypartPercents(this.hourly, total);
|
|
231
|
+
const rhythmScore = computeRhythmScore({
|
|
232
|
+
gini: participantGini,
|
|
233
|
+
longestStreak,
|
|
234
|
+
density: densityMessagesPerCalendarDay,
|
|
235
|
+
});
|
|
236
|
+
const linksPer100 = total > 0 ? round((this.messagesWithLinks / total) * 100, 2) : 0;
|
|
237
|
+
const attachmentsPer100 = total > 0 ? round((this.messagesWithAttachments / total) * 100, 2) : 0;
|
|
238
|
+
const perParticipantMsgs = [...this.senderStats.values()].map((s) => s.messages);
|
|
239
|
+
const medianMessagesPerParticipant = perParticipantMsgs.length > 0
|
|
240
|
+
? round(medianSorted([...perParticipantMsgs].sort((a, b) => a - b)), 2)
|
|
241
|
+
: null;
|
|
242
|
+
const burstGapUnder1mPercent = this.gapStats.burstUnder1mPercent();
|
|
243
|
+
const gapOver60mPercent = this.gapStats.gapOver60mPercent();
|
|
244
|
+
let activeHoursCount = 0;
|
|
245
|
+
for (let h = 0; h < 24; h += 1) {
|
|
246
|
+
if ((this.hourly[h] ?? 0) > 0)
|
|
247
|
+
activeHoursCount += 1;
|
|
248
|
+
}
|
|
249
|
+
const keywordTop1SharePercent = this.keywordCounter.top1SharePercent();
|
|
250
|
+
let attachmentMarkerSum = 0;
|
|
251
|
+
for (const c of this.attachments.values())
|
|
252
|
+
attachmentMarkerSum += c;
|
|
253
|
+
const photoMarkerCount = this.attachments.get("사진") ?? 0;
|
|
254
|
+
const photoShareOfAllAttachmentMarkers = attachmentMarkerSum > 0 ? round((photoMarkerCount / attachmentMarkerSum) * 100, 1) : null;
|
|
255
|
+
let maxDayMessages = 0;
|
|
256
|
+
for (const c of this.daily.values())
|
|
257
|
+
maxDayMessages = Math.max(maxDayMessages, c);
|
|
258
|
+
const peakDaySharePercent = total > 0 ? round((maxDayMessages / total) * 100, 1) : 0;
|
|
259
|
+
const uniqueDomainCount = this.domains.size;
|
|
260
|
+
const replyGapCoeffVariation = this.gapStats.coeffVariation();
|
|
261
|
+
const monologueMessagesPercent = total > 0 ? round((this.monologueMessages / total) * 100, 1) : 0;
|
|
262
|
+
const insights = {
|
|
263
|
+
weekendSharePercent,
|
|
264
|
+
participantGini,
|
|
265
|
+
replyGapP90Minutes,
|
|
266
|
+
maxSilenceBetweenActiveDays,
|
|
267
|
+
top3ParticipantSharePercent,
|
|
268
|
+
linkDomainEntropyBits,
|
|
269
|
+
densityMessagesPerCalendarDay,
|
|
270
|
+
questionLikeMessagesPer100,
|
|
271
|
+
speakerSwitchRatePer100,
|
|
272
|
+
rhythmScore,
|
|
273
|
+
daypartPercents,
|
|
274
|
+
linksPer100,
|
|
275
|
+
attachmentsPer100,
|
|
276
|
+
medianMessagesPerParticipant,
|
|
277
|
+
burstGapUnder1mPercent,
|
|
278
|
+
gapOver60mPercent,
|
|
279
|
+
activeHoursCount,
|
|
280
|
+
keywordTop1SharePercent,
|
|
281
|
+
photoShareOfAllAttachmentMarkers,
|
|
282
|
+
monologueMessagesPercent,
|
|
283
|
+
peakDaySharePercent,
|
|
284
|
+
uniqueDomainCount,
|
|
285
|
+
replyGapCoeffVariation,
|
|
286
|
+
};
|
|
287
|
+
const highlights = buildHighlights({
|
|
288
|
+
total,
|
|
289
|
+
topAlias: participantStats[0]?.alias ?? null,
|
|
290
|
+
topShare: participantStats[0]?.sharePercent ?? null,
|
|
291
|
+
busiestWeekdayLabel,
|
|
292
|
+
peakHour,
|
|
293
|
+
medianReplyGapMinutes,
|
|
294
|
+
nightSharePercent,
|
|
295
|
+
longestStreak,
|
|
296
|
+
emojiMessages: this.emojiMessages,
|
|
297
|
+
messagesWithAttachments: this.messagesWithAttachments,
|
|
298
|
+
weekendSharePercent,
|
|
299
|
+
participantGini,
|
|
300
|
+
replyGapP90Minutes,
|
|
301
|
+
maxSilenceBetweenActiveDays,
|
|
302
|
+
rhythmScore,
|
|
303
|
+
burstGapUnder1mPercent,
|
|
304
|
+
monologueMessagesPercent,
|
|
305
|
+
});
|
|
306
|
+
return {
|
|
307
|
+
generatedAt: new Date().toISOString(),
|
|
308
|
+
privacy: this.privacy,
|
|
309
|
+
source: {
|
|
310
|
+
fileName: safeInputName(meta.filePath),
|
|
311
|
+
chatRoomName: parseChatRoomNameFromExportPath(meta.filePath),
|
|
312
|
+
encoding: meta.encoding,
|
|
313
|
+
physicalLines: meta.physicalLines,
|
|
314
|
+
warnings: meta.warningCount,
|
|
315
|
+
},
|
|
316
|
+
summary: {
|
|
317
|
+
totalMessages: total,
|
|
318
|
+
participants: aliases.size,
|
|
319
|
+
activeDays,
|
|
320
|
+
firstMessage: this.firstDate ? formatDateTime(this.firstDate) : null,
|
|
321
|
+
lastMessage: this.lastDate ? formatDateTime(this.lastDate) : null,
|
|
322
|
+
averageMessageLength: round(this.totalCharacters / Math.max(total, 1), 1),
|
|
323
|
+
messagesWithLinks: this.messagesWithLinks,
|
|
324
|
+
messagesWithAttachments: this.messagesWithAttachments,
|
|
325
|
+
messagesPerActiveDay,
|
|
326
|
+
longestActiveStreakDays: longestStreak,
|
|
327
|
+
peakHour,
|
|
328
|
+
busiestWeekdayLabel,
|
|
329
|
+
medianReplyGapMinutes,
|
|
330
|
+
nightSharePercent,
|
|
331
|
+
emojiMessages: this.emojiMessages,
|
|
332
|
+
},
|
|
333
|
+
insights,
|
|
334
|
+
participants: participantStats,
|
|
335
|
+
daily: [...this.daily.entries()].map(([date, count]) => ({ date, count })).sort((a, b) => a.date.localeCompare(b.date)),
|
|
336
|
+
hourly: this.hourly,
|
|
337
|
+
weekdays: this.weekdays.map((count, index) => ({
|
|
338
|
+
label: `${WEEKDAY_LABELS_KO[index] ?? index}요일`,
|
|
339
|
+
count,
|
|
340
|
+
})),
|
|
341
|
+
monthly: [...this.monthly.entries()].map(([date, count]) => ({ date, count })).sort((a, b) => a.date.localeCompare(b.date)),
|
|
342
|
+
attachments: topCounts(this.attachments, this.top),
|
|
343
|
+
domains: topCounts(this.domains, this.top),
|
|
344
|
+
keywords: this.keywordCounter.topCounts(this.top),
|
|
345
|
+
highlights,
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
function buildSenderLabels(senders, privacy) {
|
|
350
|
+
const unique = [...new Set(senders)];
|
|
351
|
+
if (privacy === "public-anonymous") {
|
|
352
|
+
const map = new Map();
|
|
353
|
+
unique.forEach((sender, i) => map.set(sender, `User ${String(i + 1).padStart(3, "0")}`));
|
|
354
|
+
return map;
|
|
355
|
+
}
|
|
356
|
+
const map = new Map();
|
|
357
|
+
const used = new Map();
|
|
358
|
+
for (const raw of unique) {
|
|
359
|
+
let base = maskPartialDisplayName(raw);
|
|
360
|
+
const n = (used.get(base) ?? 0) + 1;
|
|
361
|
+
used.set(base, n);
|
|
362
|
+
if (n > 1)
|
|
363
|
+
base = `${base}·${n}`;
|
|
364
|
+
map.set(raw, base);
|
|
365
|
+
}
|
|
366
|
+
return map;
|
|
367
|
+
}
|
|
368
|
+
function getParticipantStat(stats, sender) {
|
|
369
|
+
const existing = stats.get(sender);
|
|
370
|
+
if (existing)
|
|
371
|
+
return existing;
|
|
372
|
+
const created = {
|
|
373
|
+
messages: 0,
|
|
374
|
+
characters: 0,
|
|
375
|
+
attachmentMessages: 0,
|
|
376
|
+
linkMessages: 0,
|
|
377
|
+
nightMessages: 0,
|
|
378
|
+
maxConsecutive: 0,
|
|
379
|
+
};
|
|
380
|
+
stats.set(sender, created);
|
|
381
|
+
return created;
|
|
382
|
+
}
|
|
383
|
+
function shouldExtractKeywords(message, attachmentMarkers) {
|
|
384
|
+
const trimmed = message.trim();
|
|
385
|
+
if (trimmed.length === 0)
|
|
386
|
+
return false;
|
|
387
|
+
if (attachmentMarkers.length === 1 && trimmed === attachmentMarkers[0])
|
|
388
|
+
return false;
|
|
389
|
+
if (attachmentMarkers.length > 0 && trimmed.length <= 16) {
|
|
390
|
+
const onlyMarkers = attachmentMarkers.every((m) => trimmed === m || trimmed.includes(m));
|
|
391
|
+
if (onlyMarkers && !/[가-힣A-Za-z]{3,}/.test(trimmed.replace(/[^\p{L}\p{N}]/gu, ""))) {
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
return true;
|
|
396
|
+
}
|
|
397
|
+
function getAttachmentMarkers(message) {
|
|
398
|
+
return ATTACHMENT_MARKERS.filter((marker) => message.includes(marker));
|
|
399
|
+
}
|
|
400
|
+
function getDomains(message) {
|
|
401
|
+
const matches = message.match(URL_RE) ?? [];
|
|
402
|
+
const domains = [];
|
|
403
|
+
for (const match of matches) {
|
|
404
|
+
const urlText = match.startsWith("http") ? match : `https://${match}`;
|
|
405
|
+
try {
|
|
406
|
+
const url = new URL(urlText);
|
|
407
|
+
domains.push(url.hostname.toLowerCase().replace(/^www\./, ""));
|
|
408
|
+
}
|
|
409
|
+
catch {
|
|
410
|
+
continue;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
return domains;
|
|
414
|
+
}
|
|
415
|
+
function extractKeywords(message, senderNames) {
|
|
416
|
+
const withoutSensitivePatterns = message
|
|
417
|
+
.replace(URL_RE, " ")
|
|
418
|
+
.replace(EMAIL_RE, " ")
|
|
419
|
+
.replace(PHONE_RE, " ");
|
|
420
|
+
const tokens = withoutSensitivePatterns.match(TOKEN_RE) ?? [];
|
|
421
|
+
const keywords = [];
|
|
422
|
+
for (const token of tokens) {
|
|
423
|
+
const normalized = normalizeToken(token);
|
|
424
|
+
if (!normalized)
|
|
425
|
+
continue;
|
|
426
|
+
if (normalized.length < 2 || normalized.length > 30)
|
|
427
|
+
continue;
|
|
428
|
+
if (STOPWORDS.has(normalized))
|
|
429
|
+
continue;
|
|
430
|
+
if (senderNames.has(normalized))
|
|
431
|
+
continue;
|
|
432
|
+
if (KEYWORD_EXCLUDE.has(normalized))
|
|
433
|
+
continue;
|
|
434
|
+
if (/^\d+$/.test(normalized))
|
|
435
|
+
continue;
|
|
436
|
+
keywords.push(normalized);
|
|
437
|
+
}
|
|
438
|
+
return keywords;
|
|
439
|
+
}
|
|
440
|
+
function normalizeToken(token) {
|
|
441
|
+
return /^[A-Za-z0-9_+-]+$/.test(token) ? token.toLowerCase() : token.trim();
|
|
442
|
+
}
|
|
443
|
+
function increment(map, key, amount = 1) {
|
|
444
|
+
map.set(key, (map.get(key) ?? 0) + amount);
|
|
445
|
+
}
|
|
446
|
+
function topCounts(map, limit) {
|
|
447
|
+
return [...map.entries()]
|
|
448
|
+
.map(([label, count]) => ({ label, count }))
|
|
449
|
+
.sort((a, b) => b.count - a.count || a.label.localeCompare(b.label))
|
|
450
|
+
.slice(0, limit);
|
|
451
|
+
}
|
|
452
|
+
function round(value, decimals) {
|
|
453
|
+
const factor = 10 ** decimals;
|
|
454
|
+
return Math.round(value * factor) / factor;
|
|
455
|
+
}
|
|
456
|
+
function pad2(value) {
|
|
457
|
+
return value.toString().padStart(2, "0");
|
|
458
|
+
}
|
|
459
|
+
function medianSorted(sorted) {
|
|
460
|
+
if (sorted.length === 0)
|
|
461
|
+
return 0;
|
|
462
|
+
const mid = Math.floor(sorted.length / 2);
|
|
463
|
+
return sorted.length % 2 === 1 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
464
|
+
}
|
|
465
|
+
function longestDateStreak(sortedYmd) {
|
|
466
|
+
if (sortedYmd.length === 0)
|
|
467
|
+
return 0;
|
|
468
|
+
let best = 1;
|
|
469
|
+
let cur = 1;
|
|
470
|
+
for (let i = 1; i < sortedYmd.length; i += 1) {
|
|
471
|
+
const a = new Date(`${sortedYmd[i - 1]}T12:00:00Z`).getTime();
|
|
472
|
+
const b = new Date(`${sortedYmd[i]}T12:00:00Z`).getTime();
|
|
473
|
+
const diffDays = Math.round((b - a) / 86_400_000);
|
|
474
|
+
if (diffDays === 1) {
|
|
475
|
+
cur += 1;
|
|
476
|
+
best = Math.max(best, cur);
|
|
477
|
+
}
|
|
478
|
+
else {
|
|
479
|
+
cur = 1;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
return best;
|
|
483
|
+
}
|
|
484
|
+
function computeGini(counts) {
|
|
485
|
+
if (counts.length === 0)
|
|
486
|
+
return null;
|
|
487
|
+
const sorted = [...counts].sort((a, b) => a - b);
|
|
488
|
+
const n = sorted.length;
|
|
489
|
+
let sum = 0;
|
|
490
|
+
for (const x of sorted)
|
|
491
|
+
sum += x;
|
|
492
|
+
if (sum === 0)
|
|
493
|
+
return null;
|
|
494
|
+
let num = 0;
|
|
495
|
+
for (let i = 0; i < n; i += 1) {
|
|
496
|
+
num += (2 * i - n + 1) * sorted[i];
|
|
497
|
+
}
|
|
498
|
+
return round(num / (n * sum), 3);
|
|
499
|
+
}
|
|
500
|
+
function maxSilenceGapDays(sortedYmd) {
|
|
501
|
+
if (sortedYmd.length < 2)
|
|
502
|
+
return null;
|
|
503
|
+
let best = 0;
|
|
504
|
+
for (let i = 1; i < sortedYmd.length; i += 1) {
|
|
505
|
+
const a = new Date(`${sortedYmd[i - 1]}T12:00:00Z`).getTime();
|
|
506
|
+
const b = new Date(`${sortedYmd[i]}T12:00:00Z`).getTime();
|
|
507
|
+
const diffDays = Math.round((b - a) / 86_400_000);
|
|
508
|
+
best = Math.max(best, Math.max(0, diffDays - 1));
|
|
509
|
+
}
|
|
510
|
+
return best;
|
|
511
|
+
}
|
|
512
|
+
function computeTop3Share(stats, total) {
|
|
513
|
+
if (total === 0)
|
|
514
|
+
return 0;
|
|
515
|
+
const top3 = [...stats.values()]
|
|
516
|
+
.map((s) => s.messages)
|
|
517
|
+
.sort((a, b) => b - a)
|
|
518
|
+
.slice(0, 3)
|
|
519
|
+
.reduce((a, c) => a + c, 0);
|
|
520
|
+
return round((top3 / total) * 100, 1);
|
|
521
|
+
}
|
|
522
|
+
function domainEntropyBits(domains) {
|
|
523
|
+
let sum = 0;
|
|
524
|
+
for (const c of domains.values())
|
|
525
|
+
sum += c;
|
|
526
|
+
if (sum === 0)
|
|
527
|
+
return null;
|
|
528
|
+
let h = 0;
|
|
529
|
+
for (const c of domains.values()) {
|
|
530
|
+
if (c <= 0)
|
|
531
|
+
continue;
|
|
532
|
+
const p = c / sum;
|
|
533
|
+
h -= p * Math.log2(p);
|
|
534
|
+
}
|
|
535
|
+
return round(h, 2);
|
|
536
|
+
}
|
|
537
|
+
function computeDensityFromSpan(first, last, total) {
|
|
538
|
+
if (total === 0 || !first || !last)
|
|
539
|
+
return null;
|
|
540
|
+
const spanDays = Math.max(1, Math.floor((partsToUtcMs(last) - partsToUtcMs(first)) / 86_400_000) + 1);
|
|
541
|
+
return round(total / spanDays, 2);
|
|
542
|
+
}
|
|
543
|
+
function computeDaypartPercents(hourly, total) {
|
|
544
|
+
const bands = [
|
|
545
|
+
{ key: "dawn", label: "새벽(0~5시)", lo: 0, hi: 5 },
|
|
546
|
+
{ key: "morning", label: "오전(6~11시)", lo: 6, hi: 11 },
|
|
547
|
+
{ key: "afternoon", label: "오후(12~17시)", lo: 12, hi: 17 },
|
|
548
|
+
{ key: "evening", label: "저녁(18~23시)", lo: 18, hi: 23 },
|
|
549
|
+
];
|
|
550
|
+
if (total === 0) {
|
|
551
|
+
return bands.map((b) => ({ key: b.key, label: b.label, percent: 0 }));
|
|
552
|
+
}
|
|
553
|
+
const raw = bands.map((b) => {
|
|
554
|
+
let c = 0;
|
|
555
|
+
for (let h = b.lo; h <= b.hi; h += 1)
|
|
556
|
+
c += hourly[h] ?? 0;
|
|
557
|
+
return { key: b.key, label: b.label, count: c };
|
|
558
|
+
});
|
|
559
|
+
const sum = raw.reduce((a, x) => a + x.count, 0) || 1;
|
|
560
|
+
let rounded = raw.map((x) => ({
|
|
561
|
+
key: x.key,
|
|
562
|
+
label: x.label,
|
|
563
|
+
percent: round((x.count / sum) * 100, 1),
|
|
564
|
+
}));
|
|
565
|
+
const drift = 100 - rounded.reduce((a, x) => a + x.percent, 0);
|
|
566
|
+
if (Math.abs(drift) >= 0.05 && rounded.length > 0) {
|
|
567
|
+
const idx = rounded.reduce((best, x, i, arr) => (x.percent >= arr[best].percent ? i : best), 0);
|
|
568
|
+
rounded = rounded.map((x, i) => (i === idx ? { ...x, percent: round(x.percent + drift, 1) } : x));
|
|
569
|
+
}
|
|
570
|
+
return rounded;
|
|
571
|
+
}
|
|
572
|
+
function computeRhythmScore(input) {
|
|
573
|
+
const g = input.gini ?? 0.45;
|
|
574
|
+
const streakN = Math.min(1, input.longestStreak / 28);
|
|
575
|
+
const densityN = input.density != null ? Math.min(1, input.density / 40) : 0.25;
|
|
576
|
+
const score = 48 * (1 - Math.min(0.95, g)) + 32 * streakN + 20 * densityN;
|
|
577
|
+
return Math.max(0, Math.min(100, Math.round(score)));
|
|
578
|
+
}
|
|
579
|
+
function buildHighlights(input) {
|
|
580
|
+
const out = [];
|
|
581
|
+
if (input.topAlias && input.topShare !== null && input.total > 0) {
|
|
582
|
+
out.push(`가장 말이 많았던 분은 **${input.topAlias}** (전체의 **${input.topShare}%**).`);
|
|
583
|
+
}
|
|
584
|
+
if (input.busiestWeekdayLabel) {
|
|
585
|
+
out.push(`요일별로는 **${input.busiestWeekdayLabel}**에 활동이 가장 활발했어요.`);
|
|
586
|
+
}
|
|
587
|
+
if (input.peakHour !== null) {
|
|
588
|
+
out.push(`시간대는 **${input.peakHour}시**대에 메시지가 가장 몰렸습니다.`);
|
|
589
|
+
}
|
|
590
|
+
if (input.medianReplyGapMinutes !== null) {
|
|
591
|
+
out.push(`연속 메시지 사이 간격의 중앙값은 약 **${input.medianReplyGapMinutes}분**이에요.`);
|
|
592
|
+
}
|
|
593
|
+
if (input.nightSharePercent > 0) {
|
|
594
|
+
out.push(`심야(23~05시) 메시지 비중은 **${input.nightSharePercent}%**입니다.`);
|
|
595
|
+
}
|
|
596
|
+
if (input.longestStreak > 1) {
|
|
597
|
+
out.push(`하루도 빠짐없이 이어진 최장 **${input.longestStreak}일** 연속 활동 기록이 있어요.`);
|
|
598
|
+
}
|
|
599
|
+
if (input.emojiMessages > 0) {
|
|
600
|
+
out.push(`이모지·스티커 느낌의 메시지는 **${input.emojiMessages}**건 정도 감지됐어요.`);
|
|
601
|
+
}
|
|
602
|
+
if (input.messagesWithAttachments > 0) {
|
|
603
|
+
out.push(`사진·파일·동영상 등 첨부가 들어간 메시지는 **${input.messagesWithAttachments}**건입니다.`);
|
|
604
|
+
}
|
|
605
|
+
if (input.total > 0 && input.weekendSharePercent > 0) {
|
|
606
|
+
out.push(`주말(토·일) 메시지 비중은 **${input.weekendSharePercent}%**예요.`);
|
|
607
|
+
}
|
|
608
|
+
if (input.participantGini !== null && input.participantGini >= 0.35) {
|
|
609
|
+
out.push(`참여도는 소수에게 조금 몰린 편이에요(Gini **${input.participantGini}** 근처).`);
|
|
610
|
+
}
|
|
611
|
+
if (input.replyGapP90Minutes !== null && input.replyGapP90Minutes >= 30) {
|
|
612
|
+
out.push(`가끔 긴 침묵도 있어요 — 응답 간격 **상위 10%**가 약 **${input.replyGapP90Minutes}분** 이상입니다.`);
|
|
613
|
+
}
|
|
614
|
+
if (input.maxSilenceBetweenActiveDays !== null && input.maxSilenceBetweenActiveDays >= 7) {
|
|
615
|
+
out.push(`활동일 사이 최대 **${input.maxSilenceBetweenActiveDays}일** 동안은 메시지가 끊긴 구간이 있었어요.`);
|
|
616
|
+
}
|
|
617
|
+
if (input.rhythmScore >= 65) {
|
|
618
|
+
out.push(`종합 **리듬 점수**는 **${input.rhythmScore}/100** — 꾸준하고 균형 잡힌 페이스에 가깝습니다.`);
|
|
619
|
+
}
|
|
620
|
+
if (input.burstGapUnder1mPercent !== null && input.burstGapUnder1mPercent >= 40) {
|
|
621
|
+
out.push(`응답 간격의 **${input.burstGapUnder1mPercent}%**가 1분 이내로, 실시간 대화 톤이 강해요.`);
|
|
622
|
+
}
|
|
623
|
+
if (input.monologueMessagesPercent >= 25) {
|
|
624
|
+
out.push(`같은 사람 **3연속 이상** 메시지가 전체의 **${input.monologueMessagesPercent}%** — 긴 설명·정리 구간이 잦을 수 있어요.`);
|
|
625
|
+
}
|
|
626
|
+
return out.slice(0, 12);
|
|
627
|
+
}
|
|
628
|
+
//# sourceMappingURL=aggregator.js.map
|