patina-cli 3.11.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +29 -29
- package/CHANGELOG.md +53 -0
- package/NOTICE +21 -0
- package/README.md +117 -224
- package/README_JA.md +134 -77
- package/README_KR.md +132 -74
- package/README_ZH.md +137 -80
- package/SKILL.md +11 -20
- package/artifacts/rebaseline-2025/README.md +147 -0
- package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
- package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
- package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
- package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
- package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
- package/assets/brand/patina-badge.svg +18 -0
- package/assets/brand/patina-mark.svg +8 -0
- package/assets/demo/README.md +79 -0
- package/core/scoring.md +12 -12
- package/core/standalone-prompt.md +3 -1
- package/core/stylometry.md +93 -22
- package/docs/API.md +1554 -0
- package/docs/AUTHENTICATION.md +50 -26
- package/docs/AUTHENTICATION_KR.md +54 -29
- package/docs/BRANDING.md +9 -8
- package/docs/CLI.md +55 -14
- package/docs/COOKBOOK.md +8 -21
- package/docs/DEMO.md +32 -5
- package/docs/EXIT-CODES.md +2 -3
- package/docs/FALSE-POSITIVES.md +63 -0
- package/docs/FAQ.md +9 -1
- package/docs/FAQ_KR.md +3 -1
- package/docs/FLAG-PARITY.md +33 -47
- package/docs/ISSUE-WAVES.md +57 -0
- package/docs/PATTERNS-EN.md +67 -3
- package/docs/PATTERNS-JA.md +68 -2
- package/docs/PATTERNS-KO.md +70 -7
- package/docs/PATTERNS-ZH.md +67 -3
- package/docs/PATTERNS.md +5 -5
- package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
- package/docs/ROADMAP.md +46 -66
- package/docs/TRANSLATIONESE-KO.md +51 -0
- package/docs/audits/2026-05-deep-research.md +3 -1
- package/docs/benchmarks/README.md +51 -0
- package/docs/benchmarks/detector-comparison.json +69 -9
- package/docs/benchmarks/detector-comparison.md +10 -5
- package/docs/benchmarks/katfish-ko-latest.json +657 -0
- package/docs/benchmarks/katfish-ko-latest.md +77 -0
- package/docs/benchmarks/latest.json +1183 -108
- package/docs/benchmarks/latest.md +84 -60
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
- package/docs/benchmarks/rebaseline-latest.json +381 -0
- package/docs/benchmarks/rebaseline-latest.md +121 -0
- package/docs/benchmarks/register-stratified-latest.json +164 -0
- package/docs/benchmarks/register-stratified-latest.md +99 -0
- package/docs/benchmarks/register-stratified.md +43 -0
- package/docs/integrations/github-action.md +44 -11
- package/docs/integrations/playground.md +58 -0
- package/docs/integrations/pre-commit.md +5 -5
- package/docs/integrations/release.md +5 -3
- package/docs/integrations/static-sites.md +83 -0
- package/docs/research/2025-rebaseline-plan.md +71 -2
- package/docs/research/2026-rebaseline.md +102 -0
- package/docs/research/adversarial-mps.md +41 -0
- package/docs/research/ai-human-metrics.md +35 -23
- package/docs/research/human-eval-panel.md +42 -0
- package/docs/research/judge-agreement.md +24 -0
- package/docs/research/ko-2025-corpus-sources.md +135 -0
- package/docs/research/lexicon-freshness-audit.md +64 -0
- package/docs/research/zh-ja-lexicon-calibration.md +60 -0
- package/docs/social/patina-launch-copy.md +173 -100
- package/docs/social/patina-launch-execution.md +94 -0
- package/docs/social/patina-launch-korean-first.md +83 -0
- package/docs/social/signs-of-ai-writing.md +26 -0
- package/docs/social/signs-of-ai-writing_KR.md +26 -0
- package/lexicon/ai-en.md +21 -24
- package/lexicon/ai-ja.md +158 -0
- package/lexicon/ai-ko.md +9 -9
- package/lexicon/ai-zh.md +158 -0
- package/lexicon/provenance/ai-en.json +970 -0
- package/lexicon/provenance/ai-ja.json +542 -0
- package/lexicon/provenance/ai-ko.json +866 -0
- package/lexicon/provenance/ai-zh.json +542 -0
- package/package.json +49 -8
- package/patterns/en-communication.md +5 -0
- package/patterns/en-content.md +5 -0
- package/patterns/en-filler.md +5 -0
- package/patterns/en-language.md +29 -1
- package/patterns/en-structure.md +5 -0
- package/patterns/en-style.md +5 -0
- package/patterns/en-viral-hook.md +42 -2
- package/patterns/ja-communication.md +5 -0
- package/patterns/ja-content.md +5 -0
- package/patterns/ja-filler.md +5 -0
- package/patterns/ja-language.md +33 -1
- package/patterns/ja-structure.md +12 -0
- package/patterns/ja-style.md +5 -0
- package/patterns/ja-viral-hook.md +41 -2
- package/patterns/ko-communication.md +5 -0
- package/patterns/ko-content.md +5 -0
- package/patterns/ko-filler.md +5 -0
- package/patterns/ko-language.md +33 -1
- package/patterns/ko-structure.md +25 -6
- package/patterns/ko-style.md +5 -0
- package/patterns/ko-viral-hook.md +38 -2
- package/patterns/zh-communication.md +5 -0
- package/patterns/zh-content.md +5 -0
- package/patterns/zh-filler.md +5 -0
- package/patterns/zh-language.md +37 -1
- package/patterns/zh-structure.md +12 -0
- package/patterns/zh-style.md +5 -0
- package/patterns/zh-viral-hook.md +38 -2
- package/playground/README.md +55 -0
- package/playground/analytics.js +4 -0
- package/playground/analyzer.js +883 -0
- package/playground/app.js +157 -0
- package/playground/data/lexicons.js +343 -0
- package/playground/index.html +138 -0
- package/playground/styles.css +267 -0
- package/profiles/namuwiki.md +111 -0
- package/scripts/adversarial-mps-report.mjs +201 -0
- package/scripts/badge-json.mjs +79 -0
- package/scripts/benchmark-report.mjs +56 -9
- package/scripts/check-release-metadata.mjs +0 -2
- package/scripts/detector-comparison.mjs +7 -7
- package/scripts/generate-playground-data.mjs +77 -0
- package/scripts/katfish-calibration.mjs +464 -0
- package/scripts/lexicon-freshness.mjs +485 -0
- package/scripts/lint.mjs +1 -1
- package/scripts/precommit-score.mjs +4 -3
- package/scripts/prose-score.mjs +81 -5
- package/scripts/rebaseline-intake.mjs +242 -0
- package/scripts/rebaseline-score.mjs +268 -0
- package/scripts/rebaseline-summary.mjs +773 -0
- package/scripts/rebaseline-web-collect.mjs +410 -0
- package/scripts/update-benchmark-ranges.mjs +1 -0
- package/src/api.js +69 -105
- package/src/auth.js +50 -2
- package/src/backends/claude-cli.js +19 -4
- package/src/backends/codex-cli.js +19 -3
- package/src/backends/contract.js +230 -1
- package/src/backends/gemini-cli.js +18 -5
- package/src/backends/index.js +87 -12
- package/src/backends/kimi-cli.js +161 -0
- package/src/cli.js +577 -567
- package/src/commands/doctor.js +2 -2
- package/src/config.js +29 -0
- package/src/errors.js +53 -1
- package/src/features/discourse-tells.js +68 -0
- package/src/features/index.js +82 -8
- package/src/features/lexicon.js +40 -6
- package/src/features/markup-leakage.js +69 -0
- package/src/features/segment.js +41 -0
- package/src/features/signal-strength.js +81 -0
- package/src/features/stylometry.js +231 -1
- package/src/features/translationese.js +127 -0
- package/src/loader.js +76 -0
- package/src/logger.js +22 -23
- package/src/model-defaults.js +55 -0
- package/src/ouroboros.js +31 -0
- package/src/output.js +102 -90
- package/src/prompt-builder.js +103 -68
- package/src/providers.js +51 -4
- package/src/scoring.js +210 -2
- package/src/security.js +75 -0
- package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
- package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
- package/tests/quality/README.md +188 -11
- package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
- package/tests/quality/benchmark.mjs +39 -1
- package/tests/quality/dogfood.mjs +5 -3
- package/tests/quality/live-fixtures.jsonl +2 -0
- package/tests/quality/live-quality.mjs +596 -0
- package/tests/quality/ranking-metrics.mjs +136 -0
- package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
- package/vercel.json +53 -0
- package/SKILL-MAX.md +0 -455
- package/docs/internal/HARNESS.md +0 -14
- package/docs/internal/README.md +0 -14
- package/docs/internal/WARP.md +0 -23
- package/patina-max/SKILL.md +0 -523
- package/patina-max/composite.py +0 -457
- package/src/cache.js +0 -106
- package/src/commands/init.js +0 -208
- package/src/manifest.js +0 -162
- package/src/max-mode.js +0 -207
|
@@ -1,9 +1,89 @@
|
|
|
1
|
-
// Burstiness CV
|
|
1
|
+
// Burstiness CV, MATTR, and dependency-free KO diagnostics per core/stylometry.md.
|
|
2
2
|
// Pure functions over token arrays; no I/O.
|
|
3
3
|
|
|
4
4
|
export const DEFAULT_BURSTINESS_BANDS = { low: 0.30, high: 0.50 };
|
|
5
5
|
export const DEFAULT_MATTR_BANDS = { low: 0.55, high: 0.70 };
|
|
6
6
|
export const DEFAULT_MATTR_WINDOW = 50;
|
|
7
|
+
export const DEFAULT_MIN_BURSTINESS_SENTENCES = 3;
|
|
8
|
+
export const DEFAULT_KO_DIAGNOSTIC_BANDS = {
|
|
9
|
+
minSentences: 4,
|
|
10
|
+
minEojeols: 20,
|
|
11
|
+
spacing: {
|
|
12
|
+
maxEojeolLengthCV: 0.38,
|
|
13
|
+
},
|
|
14
|
+
comma: {
|
|
15
|
+
maxPerSentence: 1,
|
|
16
|
+
},
|
|
17
|
+
posProxy: {
|
|
18
|
+
minMatchedCount: 10,
|
|
19
|
+
maxClassDiversity: 0.26,
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const HANGUL_RE = /[\u3131-\u318e\uac00-\ud7a3]/u;
|
|
24
|
+
const COMMA_RE = /[,,、]/gu;
|
|
25
|
+
|
|
26
|
+
const KO_SUFFIX_GROUPS = [
|
|
27
|
+
{ className: 'quote', suffixes: ['라고', '이라고'] },
|
|
28
|
+
{ className: 'source', suffixes: ['에게서', '한테서', '으로부터', '로부터'] },
|
|
29
|
+
{ className: 'instrument', suffixes: ['으로써', '로써'] },
|
|
30
|
+
{ className: 'standard', suffixes: ['으로서', '로서'] },
|
|
31
|
+
{ className: 'topic', suffixes: ['은', '는'] },
|
|
32
|
+
{ className: 'subject', suffixes: ['이', '가', '께서'] },
|
|
33
|
+
{ className: 'object', suffixes: ['을', '를'] },
|
|
34
|
+
{ className: 'genitive', suffixes: ['의'] },
|
|
35
|
+
{ className: 'location', suffixes: ['에서', '에게', '한테', '께', '에'] },
|
|
36
|
+
{ className: 'direction', suffixes: ['으로', '로'] },
|
|
37
|
+
{ className: 'conjunction', suffixes: ['와', '과', '하고', '랑'] },
|
|
38
|
+
{ className: 'additive', suffixes: ['도', '또한'] },
|
|
39
|
+
{ className: 'delimiter', suffixes: ['만', '까지', '부터', '마다'] },
|
|
40
|
+
{ className: 'comparison', suffixes: ['보다', '처럼'] },
|
|
41
|
+
{ className: 'formal_ending', suffixes: ['습니다', '습니까', '합니다', '합니까', '입니다'] },
|
|
42
|
+
{ className: 'polite_ending', suffixes: ['어요', '아요', '예요', '이에요', '네요', '군요', '지요'] },
|
|
43
|
+
{ className: 'casual_ending', suffixes: ['죠', '네', '군'] },
|
|
44
|
+
{ className: 'declarative_ending', suffixes: ['한다', '된다', '했다', '였다', '이다', '있다', '없다'] },
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
const KO_SUFFIX_MATCHERS = KO_SUFFIX_GROUPS
|
|
48
|
+
.flatMap((group) =>
|
|
49
|
+
group.suffixes.map((suffix) => ({
|
|
50
|
+
className: group.className,
|
|
51
|
+
suffix,
|
|
52
|
+
length: Array.from(suffix).length,
|
|
53
|
+
}))
|
|
54
|
+
)
|
|
55
|
+
.sort((a, b) => b.length - a.length);
|
|
56
|
+
|
|
57
|
+
function mean(values) {
|
|
58
|
+
if (!Array.isArray(values) || values.length === 0) return null;
|
|
59
|
+
return values.reduce((a, b) => a + b, 0) / values.length;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function coefficientOfVariation(values) {
|
|
63
|
+
if (!Array.isArray(values) || values.length < 2) return null;
|
|
64
|
+
const avg = mean(values);
|
|
65
|
+
if (!avg) return null;
|
|
66
|
+
const variance = values.reduce((acc, x) => acc + (x - avg) ** 2, 0) / values.length;
|
|
67
|
+
return Math.sqrt(variance) / avg;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function cleanKoreanEojeol(chunk) {
|
|
71
|
+
return chunk
|
|
72
|
+
.normalize('NFC')
|
|
73
|
+
.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, '');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function koreanEojeols(paragraph) {
|
|
77
|
+
if (!paragraph) return [];
|
|
78
|
+
return paragraph
|
|
79
|
+
.split(/\s+/u)
|
|
80
|
+
.map(cleanKoreanEojeol)
|
|
81
|
+
.filter((token) => HANGUL_RE.test(token));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function koreanLength(token) {
|
|
85
|
+
return Array.from(token.replace(/[^\u3131-\u318e\uac00-\ud7a3]/gu, '')).length;
|
|
86
|
+
}
|
|
7
87
|
|
|
8
88
|
// Coefficient of variation of sentence token counts.
|
|
9
89
|
// Returns null when the paragraph has fewer than 2 sentences or mean is 0.
|
|
@@ -35,6 +115,116 @@ export function mattr(tokens, window = DEFAULT_MATTR_WINDOW) {
|
|
|
35
115
|
return sum / count;
|
|
36
116
|
}
|
|
37
117
|
|
|
118
|
+
export function koreanSpacingFeatures(paragraph) {
|
|
119
|
+
const eojeols = koreanEojeols(paragraph);
|
|
120
|
+
const lengths = eojeols.map(koreanLength).filter((length) => length > 0);
|
|
121
|
+
const eojeolCount = lengths.length;
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
eojeolCount,
|
|
125
|
+
meanEojeolLength: mean(lengths),
|
|
126
|
+
eojeolLengthCV: coefficientOfVariation(lengths),
|
|
127
|
+
singleSyllableRatio:
|
|
128
|
+
eojeolCount > 0 ? lengths.filter((length) => length === 1).length / eojeolCount : null,
|
|
129
|
+
longEojeolRatio:
|
|
130
|
+
eojeolCount > 0 ? lengths.filter((length) => length >= 7).length / eojeolCount : null,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export function commaDensity(paragraph, sentenceCount = null) {
|
|
135
|
+
const commaCount = (paragraph.match(COMMA_RE) ?? []).length;
|
|
136
|
+
const charCount = Array.from(paragraph.replace(/\s+/gu, '')).length;
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
count: commaCount,
|
|
140
|
+
perSentence: sentenceCount > 0 ? commaCount / sentenceCount : null,
|
|
141
|
+
per100Chars: charCount > 0 ? (commaCount / charCount) * 100 : null,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function koreanPosDiversityProxy(paragraph) {
|
|
146
|
+
const eojeols = koreanEojeols(paragraph);
|
|
147
|
+
const matches = [];
|
|
148
|
+
|
|
149
|
+
for (const token of eojeols) {
|
|
150
|
+
const match = KO_SUFFIX_MATCHERS.find(
|
|
151
|
+
(candidate) => token.length > candidate.suffix.length && token.endsWith(candidate.suffix)
|
|
152
|
+
);
|
|
153
|
+
if (match) {
|
|
154
|
+
matches.push({ className: match.className, suffix: match.suffix });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const matchedCount = matches.length;
|
|
159
|
+
const classes = [...new Set(matches.map((match) => match.className))].sort();
|
|
160
|
+
const suffixes = [...new Set(matches.map((match) => match.suffix))].sort();
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
proxy: 'suffix',
|
|
164
|
+
eojeolCount: eojeols.length,
|
|
165
|
+
matchedCount,
|
|
166
|
+
coverage: eojeols.length > 0 ? matchedCount / eojeols.length : null,
|
|
167
|
+
distinctClassCount: classes.length,
|
|
168
|
+
classDiversity: matchedCount > 0 ? classes.length / matchedCount : null,
|
|
169
|
+
distinctSuffixCount: suffixes.length,
|
|
170
|
+
suffixDiversity: matchedCount > 0 ? suffixes.length / matchedCount : null,
|
|
171
|
+
classes,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* @param {{ sentenceCount?: number, spacing?: object, comma?: object, posDiversity?: object }} [features]
|
|
177
|
+
* @param {object} [bands]
|
|
178
|
+
*/
|
|
179
|
+
export function classifyKoreanDiagnostics({
|
|
180
|
+
sentenceCount = 0,
|
|
181
|
+
spacing,
|
|
182
|
+
comma,
|
|
183
|
+
posDiversity,
|
|
184
|
+
} = {}, bands = DEFAULT_KO_DIAGNOSTIC_BANDS) {
|
|
185
|
+
const thresholds = mergeKoreanDiagnosticBands(bands);
|
|
186
|
+
const reasons = [];
|
|
187
|
+
|
|
188
|
+
const hasEnoughText =
|
|
189
|
+
sentenceCount >= thresholds.minSentences &&
|
|
190
|
+
(spacing?.eojeolCount ?? 0) >= thresholds.minEojeols;
|
|
191
|
+
if (!hasEnoughText) {
|
|
192
|
+
return { hot: false, strength: 0, reasons, thresholds };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const spacingStrength = lowThresholdStrength(
|
|
196
|
+
spacing?.eojeolLengthCV,
|
|
197
|
+
thresholds.spacing.maxEojeolLengthCV
|
|
198
|
+
);
|
|
199
|
+
if (spacingStrength > 0) reasons.push('regular-eojeol-length');
|
|
200
|
+
|
|
201
|
+
const commaStrength = lowThresholdStrength(
|
|
202
|
+
comma?.perSentence,
|
|
203
|
+
thresholds.comma.maxPerSentence
|
|
204
|
+
);
|
|
205
|
+
if (commaStrength > 0) reasons.push('low-comma-density');
|
|
206
|
+
|
|
207
|
+
const posHasCoverage =
|
|
208
|
+
(posDiversity?.matchedCount ?? 0) >= thresholds.posProxy.minMatchedCount;
|
|
209
|
+
const posStrength = posHasCoverage
|
|
210
|
+
? lowThresholdStrength(
|
|
211
|
+
posDiversity?.classDiversity,
|
|
212
|
+
thresholds.posProxy.maxClassDiversity
|
|
213
|
+
)
|
|
214
|
+
: 0;
|
|
215
|
+
if (posStrength > 0) reasons.push('low-suffix-class-diversity');
|
|
216
|
+
|
|
217
|
+
const componentStrengths = [spacingStrength, commaStrength, posStrength];
|
|
218
|
+
const hot = componentStrengths.every((value) => value > 0);
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
hot,
|
|
222
|
+
strength: hot ? Math.min(...componentStrengths) : 0,
|
|
223
|
+
reasons: hot ? reasons : [],
|
|
224
|
+
thresholds,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
38
228
|
export function classifyBurstiness(cv, bands = DEFAULT_BURSTINESS_BANDS) {
|
|
39
229
|
if (cv == null) return null;
|
|
40
230
|
if (cv < bands.low) return 'low';
|
|
@@ -48,3 +238,43 @@ export function classifyMattr(value, bands = DEFAULT_MATTR_BANDS) {
|
|
|
48
238
|
if (value > bands.high) return 'high';
|
|
49
239
|
return 'mid';
|
|
50
240
|
}
|
|
241
|
+
|
|
242
|
+
function mergeKoreanDiagnosticBands(bands = {}) {
|
|
243
|
+
return {
|
|
244
|
+
minSentences: resolveNumber(bands.minSentences, DEFAULT_KO_DIAGNOSTIC_BANDS.minSentences),
|
|
245
|
+
minEojeols: resolveNumber(bands.minEojeols, DEFAULT_KO_DIAGNOSTIC_BANDS.minEojeols),
|
|
246
|
+
spacing: {
|
|
247
|
+
maxEojeolLengthCV: resolveNumber(
|
|
248
|
+
bands.spacing?.maxEojeolLengthCV,
|
|
249
|
+
DEFAULT_KO_DIAGNOSTIC_BANDS.spacing.maxEojeolLengthCV
|
|
250
|
+
),
|
|
251
|
+
},
|
|
252
|
+
comma: {
|
|
253
|
+
maxPerSentence: resolveNumber(
|
|
254
|
+
bands.comma?.maxPerSentence,
|
|
255
|
+
DEFAULT_KO_DIAGNOSTIC_BANDS.comma.maxPerSentence
|
|
256
|
+
),
|
|
257
|
+
},
|
|
258
|
+
posProxy: {
|
|
259
|
+
minMatchedCount: resolveNumber(
|
|
260
|
+
bands.posProxy?.minMatchedCount,
|
|
261
|
+
DEFAULT_KO_DIAGNOSTIC_BANDS.posProxy.minMatchedCount
|
|
262
|
+
),
|
|
263
|
+
maxClassDiversity: resolveNumber(
|
|
264
|
+
bands.posProxy?.maxClassDiversity,
|
|
265
|
+
DEFAULT_KO_DIAGNOSTIC_BANDS.posProxy.maxClassDiversity
|
|
266
|
+
),
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function resolveNumber(value, fallback) {
|
|
272
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function lowThresholdStrength(value, threshold) {
|
|
276
|
+
if (typeof value !== 'number' || !Number.isFinite(value)) return 0;
|
|
277
|
+
if (threshold === 0) return value <= 0 ? 100 : 0;
|
|
278
|
+
if (!threshold || threshold < 0 || value > threshold) return 0;
|
|
279
|
+
return Math.max(0, Math.min(100, (1 - value / threshold) * 100));
|
|
280
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
// Korean translationese (번역투 / calque) detector. The stylometry + lexicon
|
|
2
|
+
// signals catch STRUCTURE (sentence rhythm, AI lexicon); they do NOT catch
|
|
3
|
+
// lexical calques — phrasings that are grammatical Korean but read as
|
|
4
|
+
// translated-from-English ("커맨드 기둥" for "command pillars", "~에 의해" passives,
|
|
5
|
+
// "당신" for "you"). This deterministic, auditable detector fills that gap.
|
|
6
|
+
//
|
|
7
|
+
// IMPORTANT — precision first. Most of these constructions ALSO appear in good
|
|
8
|
+
// native Korean (formal/technical prose especially). So this is a DENSITY-GATED
|
|
9
|
+
// SUSPICION signal, not proof: a single "~에 의해" means nothing. It is surfaced
|
|
10
|
+
// as its own `translationese` signal and does NOT flip the document `hot`
|
|
11
|
+
// verdict (so it cannot regress benchmark false positives); the SKILL / callers
|
|
12
|
+
// decide what to do with it. Each rule ships a before→after example.
|
|
13
|
+
//
|
|
14
|
+
// ko-only for now (calques are language-specific).
|
|
15
|
+
import { splitProseSentences } from './segment.js';
|
|
16
|
+
|
|
17
|
+
// strong: rarer in good Korean, weighted higher. weak: common, advisory only.
|
|
18
|
+
// Each rule: { id, label, strong, re() -> fresh global RegExp, example:{before,after} }
|
|
19
|
+
const RULES = [
|
|
20
|
+
{
|
|
21
|
+
id: 'noun-calque',
|
|
22
|
+
label: '직역 명사구 (pillar/layer 류 calque)',
|
|
23
|
+
strong: true,
|
|
24
|
+
re: () => /커맨드 기둥|명령(?:어)? 기둥|기둥 커맨드|[가-힣]+ 레이어로서/g,
|
|
25
|
+
example: { before: '세 가지 커맨드 기둥을 설치합니다.', after: '핵심 커맨드 세 가지를 설치합니다.' },
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
id: 'dummy-subject',
|
|
29
|
+
label: '가주어 "그것은/이것은" (English "it is")',
|
|
30
|
+
strong: true,
|
|
31
|
+
re: () => /(?:^|[.!?。]\s+|\n)\s*(?:그것은|이것은|그것이|이것이)\s/g,
|
|
32
|
+
example: { before: '그것은 매우 중요하다.', after: '매우 중요하다.' },
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: 'direct-address-you',
|
|
36
|
+
label: '"당신" 직접 호칭 (English "you")',
|
|
37
|
+
strong: true,
|
|
38
|
+
re: () => /당신(?:은|이|의|에게|을|를|께서|께)?/g,
|
|
39
|
+
example: { before: '당신은 이것을 설정할 수 있습니다.', after: '이건 설정할 수 있다.' },
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
id: 'passive-e-uihae',
|
|
43
|
+
label: '"~에 의해" 피동 (English by-passive)',
|
|
44
|
+
strong: false,
|
|
45
|
+
re: () => /에 의해/g,
|
|
46
|
+
example: { before: '작업은 에이전트에 의해 처리됩니다.', after: '에이전트가 작업을 처리합니다.' },
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
id: 'have-overuse',
|
|
50
|
+
label: '"~을 가지고 있다" (English "have")',
|
|
51
|
+
strong: false,
|
|
52
|
+
re: () => /(?:을|를)\s*가지(?:고 있|고 있습니다|고 있다)/g,
|
|
53
|
+
example: { before: '이 도구는 유연성을 가지고 있습니다.', after: '이 도구는 유연합니다.' },
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
id: 'one-of',
|
|
57
|
+
label: '"~중 하나" (English "one of the")',
|
|
58
|
+
strong: false,
|
|
59
|
+
re: () => /중\s*하나(?:이다|입니다|인|로|다|예요)?/g,
|
|
60
|
+
example: { before: '가장 빠른 도구 중 하나입니다.', after: '손꼽히게 빠릅니다.' },
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: 'provides',
|
|
64
|
+
label: '"~을 제공합니다" (English "provides")',
|
|
65
|
+
strong: false,
|
|
66
|
+
re: () => /(?:을|를)\s*제공(?:합니다|한다|해 줍니다|해준다)/g,
|
|
67
|
+
example: { before: '다양한 기능을 제공합니다.', after: '여러 기능을 쓸 수 있다.' },
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
id: 'as-follows',
|
|
71
|
+
label: '"다음과 같습니다" (English "as follows")',
|
|
72
|
+
strong: false,
|
|
73
|
+
re: () => /다음과\s*같(?:습니다|다|은|이)/g,
|
|
74
|
+
example: { before: '사용법은 다음과 같습니다.', after: '사용법은 이렇다.' },
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
id: 'make-easy',
|
|
78
|
+
label: '"~하게 만들어 준다" (English "make it ~")',
|
|
79
|
+
strong: false,
|
|
80
|
+
re: () => /(?:쉽게|가능하게|간단하게|편하게)\s*(?:만들어\s*(?:줍니다|준다|줘)|만듭니다|만든다)/g,
|
|
81
|
+
example: { before: '설치를 쉽게 만들어 줍니다.', after: '설치가 쉬워진다.' },
|
|
82
|
+
},
|
|
83
|
+
];
|
|
84
|
+
|
|
85
|
+
const ABS_MIN = 4; // need at least this many total calque hits, and
|
|
86
|
+
const DENSITY_MIN = 0.5; // at least this many hits per prose sentence, to call it hot.
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Scan ko text for translationese (calque) markers.
|
|
90
|
+
* @param {string} text
|
|
91
|
+
* @param {{lang?: string}} [opts]
|
|
92
|
+
* @returns {{count:number, density:number, sentences:number, byRule:Array, hits:string[], hot:boolean, thresholds:{count:number,density:number}}}
|
|
93
|
+
*/
|
|
94
|
+
export function detectTranslationese(text, opts = {}) {
|
|
95
|
+
const lang = opts.lang ?? 'ko';
|
|
96
|
+
const str = typeof text === 'string' ? text : '';
|
|
97
|
+
if (lang !== 'ko' || !str) {
|
|
98
|
+
return { count: 0, density: 0, sentences: 0, byRule: [], hits: [], hot: false, thresholds: { count: ABS_MIN, density: DENSITY_MIN } };
|
|
99
|
+
}
|
|
100
|
+
const byRule = [];
|
|
101
|
+
const hits = [];
|
|
102
|
+
let count = 0;
|
|
103
|
+
for (const rule of RULES) {
|
|
104
|
+
const matches = str.match(rule.re());
|
|
105
|
+
if (matches && matches.length) {
|
|
106
|
+
count += matches.length;
|
|
107
|
+
byRule.push({ id: rule.id, label: rule.label, strong: rule.strong, count: matches.length, example: rule.example });
|
|
108
|
+
hits.push(...new Set(matches.map((m) => m.trim()).filter(Boolean)));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
const sentences = Math.max(1, splitProseSentences(str).length);
|
|
112
|
+
const density = count / sentences;
|
|
113
|
+
// Conservative: needs both an absolute floor AND a per-sentence density, so
|
|
114
|
+
// long legit docs with a few calques never trip it.
|
|
115
|
+
const hot = count >= ABS_MIN && density >= DENSITY_MIN;
|
|
116
|
+
return {
|
|
117
|
+
count,
|
|
118
|
+
density: Number(density.toFixed(3)),
|
|
119
|
+
sentences,
|
|
120
|
+
byRule: byRule.sort((a, b) => b.count - a.count),
|
|
121
|
+
hits: [...new Set(hits)].slice(0, 8),
|
|
122
|
+
hot,
|
|
123
|
+
thresholds: { count: ABS_MIN, density: DENSITY_MIN },
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export { RULES as TRANSLATIONESE_RULES, ABS_MIN, DENSITY_MIN };
|
package/src/loader.js
CHANGED
|
@@ -3,10 +3,28 @@ import { resolve, sep } from 'node:path';
|
|
|
3
3
|
import yaml from 'js-yaml';
|
|
4
4
|
import { validateProfileName } from './security.js';
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Read a UTF-8 text file.
|
|
8
|
+
*
|
|
9
|
+
* @param {string} path File path to read.
|
|
10
|
+
* @returns {string} File contents.
|
|
11
|
+
* @throws {Error} When the file cannot be read.
|
|
12
|
+
* @example
|
|
13
|
+
* const markdown = loadFile('README.md');
|
|
14
|
+
*/
|
|
6
15
|
export function loadFile(path) {
|
|
7
16
|
return readFileSync(path, 'utf8');
|
|
8
17
|
}
|
|
9
18
|
|
|
19
|
+
/**
|
|
20
|
+
* Split Markdown-style YAML frontmatter from a document body.
|
|
21
|
+
*
|
|
22
|
+
* @param {string} content File contents.
|
|
23
|
+
* @returns {{frontmatter: object|null, body: string}} Parsed frontmatter and trimmed body.
|
|
24
|
+
* @throws {Error} When YAML frontmatter is invalid.
|
|
25
|
+
* @example
|
|
26
|
+
* const { frontmatter, body } = splitFrontmatter('---\ntitle: x\n---\nBody');
|
|
27
|
+
*/
|
|
10
28
|
export function splitFrontmatter(content) {
|
|
11
29
|
const match = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
|
|
12
30
|
if (!match) {
|
|
@@ -18,6 +36,17 @@ export function splitFrontmatter(content) {
|
|
|
18
36
|
};
|
|
19
37
|
}
|
|
20
38
|
|
|
39
|
+
/**
|
|
40
|
+
* Load language-specific pattern packs from patterns/{lang}-*.md.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} repoRoot Repository root path.
|
|
43
|
+
* @param {string} lang Language code, such as ko, en, zh, or ja.
|
|
44
|
+
* @param {string[]} [skipPatterns=[]] Pack names to omit, without .md.
|
|
45
|
+
* @returns {Array<{file: string, frontmatter: object|null, body: string, isStructure: boolean, isScoreOnly: boolean}>} Pattern packs.
|
|
46
|
+
* @throws {Error} When the patterns directory or a pattern file cannot be read.
|
|
47
|
+
* @example
|
|
48
|
+
* const patterns = loadPatterns(getRepoRoot(), 'en');
|
|
49
|
+
*/
|
|
21
50
|
export function loadPatterns(repoRoot, lang, skipPatterns = []) {
|
|
22
51
|
const patternsDir = resolve(repoRoot, 'patterns');
|
|
23
52
|
const files = readdirSync(patternsDir)
|
|
@@ -43,6 +72,16 @@ export function loadPatterns(repoRoot, lang, skipPatterns = []) {
|
|
|
43
72
|
return packs;
|
|
44
73
|
}
|
|
45
74
|
|
|
75
|
+
/**
|
|
76
|
+
* Load a named profile from profiles/{profileName}.md after path validation.
|
|
77
|
+
*
|
|
78
|
+
* @param {string} repoRoot Repository root path.
|
|
79
|
+
* @param {string} profileName Profile file stem.
|
|
80
|
+
* @returns {{frontmatter: object|null, body: string}} Parsed profile document.
|
|
81
|
+
* @throws {Error} When the profile name is invalid or the file cannot be read.
|
|
82
|
+
* @example
|
|
83
|
+
* const profile = loadProfile(getRepoRoot(), 'default');
|
|
84
|
+
*/
|
|
46
85
|
export function loadProfile(repoRoot, profileName) {
|
|
47
86
|
validateProfileName(profileName);
|
|
48
87
|
const profilesDir = resolve(repoRoot, 'profiles');
|
|
@@ -54,16 +93,44 @@ export function loadProfile(repoRoot, profileName) {
|
|
|
54
93
|
return splitFrontmatter(content);
|
|
55
94
|
}
|
|
56
95
|
|
|
96
|
+
/**
|
|
97
|
+
* Load a Markdown file from the core/ directory.
|
|
98
|
+
*
|
|
99
|
+
* @param {string} repoRoot Repository root path.
|
|
100
|
+
* @param {string} filename Core filename, such as scoring.md.
|
|
101
|
+
* @returns {{frontmatter: object|null, body: string}} Parsed core document.
|
|
102
|
+
* @throws {Error} When the file cannot be read or frontmatter is invalid.
|
|
103
|
+
* @example
|
|
104
|
+
* const scoring = loadCoreFile(getRepoRoot(), 'scoring.md');
|
|
105
|
+
*/
|
|
57
106
|
export function loadCoreFile(repoRoot, filename) {
|
|
58
107
|
const path = resolve(repoRoot, 'core', filename);
|
|
59
108
|
const content = loadFile(path);
|
|
60
109
|
return splitFrontmatter(content);
|
|
61
110
|
}
|
|
62
111
|
|
|
112
|
+
/**
|
|
113
|
+
* Read user input text from disk.
|
|
114
|
+
*
|
|
115
|
+
* @param {string} path Input file path.
|
|
116
|
+
* @returns {string} UTF-8 input text.
|
|
117
|
+
* @throws {Error} When the file cannot be read.
|
|
118
|
+
* @example
|
|
119
|
+
* const text = loadInputText('draft.md');
|
|
120
|
+
*/
|
|
63
121
|
export function loadInputText(path) {
|
|
64
122
|
return readFileSync(path, 'utf8');
|
|
65
123
|
}
|
|
66
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Load up to three non-empty paragraphs from a voice sample file.
|
|
127
|
+
*
|
|
128
|
+
* @param {string} path Voice sample file path.
|
|
129
|
+
* @returns {{path: string, paragraphs: string[], body: string, truncated: boolean}} Voice sample payload.
|
|
130
|
+
* @throws {Error} When the file is unreadable or has no non-empty paragraphs.
|
|
131
|
+
* @example
|
|
132
|
+
* const sample = loadVoiceSample('voice.md');
|
|
133
|
+
*/
|
|
67
134
|
export function loadVoiceSample(path) {
|
|
68
135
|
const content = loadFile(path);
|
|
69
136
|
const paragraphs = content
|
|
@@ -98,6 +165,15 @@ const TONE_BACKBONE = {
|
|
|
98
165
|
instructional: 'instructional',
|
|
99
166
|
};
|
|
100
167
|
|
|
168
|
+
/**
|
|
169
|
+
* Map a resolved named tone to its primary backbone profile.
|
|
170
|
+
*
|
|
171
|
+
* @param {string} tone Tone name.
|
|
172
|
+
* @returns {string|null} Profile name, or null when no mapping exists.
|
|
173
|
+
* @throws {Error} Propagates validation, filesystem, network, or dependency failures when the underlying operation cannot complete.
|
|
174
|
+
* @example
|
|
175
|
+
* const profile = toneToBackboneProfile('casual'); // blog
|
|
176
|
+
*/
|
|
101
177
|
export function toneToBackboneProfile(tone) {
|
|
102
178
|
return TONE_BACKBONE[tone] || null;
|
|
103
179
|
}
|
package/src/logger.js
CHANGED
|
@@ -6,10 +6,22 @@ const LEVELS = {
|
|
|
6
6
|
silent: Infinity,
|
|
7
7
|
};
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Create a small stderr logger with text and progress modes.
|
|
11
|
+
*
|
|
12
|
+
* @param {object} [options] Logger options.
|
|
13
|
+
* @param {string} [options.level=info] Minimum log level.
|
|
14
|
+
* @param {boolean} [options.quiet=false] Suppress all log output.
|
|
15
|
+
* @param {NodeJS.WritableStream} [options.stream=process.stderr] Progress stream.
|
|
16
|
+
* @returns {{debug: Function, info: Function, warn: Function, error: Function, progress: Function, closeProgress: Function, child: Function}} Logger facade.
|
|
17
|
+
* @throws {Error} Propagates stream write errors from the configured output stream.
|
|
18
|
+
* @example
|
|
19
|
+
* const logger = createLogger();
|
|
20
|
+
* logger.info('event', { message: 'ready' });
|
|
21
|
+
*/
|
|
9
22
|
export function createLogger({
|
|
10
23
|
level = process.env.PATINA_LOG_LEVEL || 'info',
|
|
11
24
|
quiet = false,
|
|
12
|
-
json = false,
|
|
13
25
|
stream = process.stderr,
|
|
14
26
|
} = {}) {
|
|
15
27
|
const threshold = quiet ? LEVELS.silent : (LEVELS[String(level).toLowerCase()] ?? LEVELS.info);
|
|
@@ -18,19 +30,11 @@ export function createLogger({
|
|
|
18
30
|
const emit = (levelName, event, fields = {}) => {
|
|
19
31
|
if (LEVELS[levelName] < threshold) return;
|
|
20
32
|
closeProgress();
|
|
21
|
-
if (json) {
|
|
22
|
-
console.error(JSON.stringify(record(levelName, event, fields)));
|
|
23
|
-
return;
|
|
24
|
-
}
|
|
25
33
|
if (fields.message) console.error(fields.message);
|
|
26
34
|
};
|
|
27
35
|
|
|
28
|
-
const progress = (
|
|
36
|
+
const progress = (_event, fields = {}) => {
|
|
29
37
|
if (LEVELS.info < threshold) return;
|
|
30
|
-
if (json) {
|
|
31
|
-
console.error(JSON.stringify(record('info', event, fields)));
|
|
32
|
-
return;
|
|
33
|
-
}
|
|
34
38
|
if (!fields.message || !stream?.write) return;
|
|
35
39
|
stream.write(`\r${fields.message}`);
|
|
36
40
|
progressOpen = true;
|
|
@@ -49,22 +53,17 @@ export function createLogger({
|
|
|
49
53
|
progress,
|
|
50
54
|
closeProgress,
|
|
51
55
|
child(extra = {}) {
|
|
52
|
-
return createLogger({ level, quiet,
|
|
56
|
+
return createLogger({ level, quiet, stream, ...extra });
|
|
53
57
|
},
|
|
54
58
|
};
|
|
55
59
|
}
|
|
56
60
|
|
|
57
|
-
function record(level, event, fields = {}) {
|
|
58
|
-
const { message, model = null, latency_ms = null, ...rest } = fields;
|
|
59
|
-
return {
|
|
60
|
-
ts: new Date().toISOString(),
|
|
61
|
-
level,
|
|
62
|
-
event,
|
|
63
|
-
model,
|
|
64
|
-
latency_ms,
|
|
65
|
-
...(message ? { message } : {}),
|
|
66
|
-
...rest,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Default stderr logger used by simple callers.
|
|
64
|
+
*
|
|
65
|
+
* @type {Object}
|
|
66
|
+
* @example
|
|
67
|
+
* defaultLogger.info('patina.ready', { message: 'ready' });
|
|
68
|
+
*/
|
|
70
69
|
export const defaultLogger = createLogger();
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Default to the strongest stable model ids that patina documents for each
|
|
5
|
+
* backend family. These values are intentionally centralized so releases can
|
|
6
|
+
* refresh "latest best" defaults without touching backend process plumbing.
|
|
7
|
+
*/
|
|
8
|
+
export const DEFAULT_BEST_MODELS = Object.freeze({
|
|
9
|
+
openai: 'gpt-5.5',
|
|
10
|
+
codexCli: 'gpt-5.5',
|
|
11
|
+
claudeCli: 'claude-sonnet-4-6',
|
|
12
|
+
geminiCli: 'gemini-2.5-pro',
|
|
13
|
+
kimiCli: 'kimi-code/kimi-for-coding',
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
const BACKEND_MODEL_KEYS = Object.freeze({
|
|
17
|
+
'codex-cli': 'codexCli',
|
|
18
|
+
'claude-cli': 'claudeCli',
|
|
19
|
+
'gemini-cli': 'geminiCli',
|
|
20
|
+
'kimi-cli': 'kimiCli',
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const BACKEND_SELECTOR_ALIASES = Object.freeze({
|
|
24
|
+
'codex-cli': 'codex',
|
|
25
|
+
'claude-cli': 'claude',
|
|
26
|
+
'gemini-cli': 'gemini',
|
|
27
|
+
'kimi-cli': 'kimi',
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Resolve the model id a local CLI backend should receive.
|
|
32
|
+
*
|
|
33
|
+
* `resolveProviderConfig` always supplies an HTTP default model when the user
|
|
34
|
+
* did not choose one. Local CLIs need their own family-specific defaults, so
|
|
35
|
+
* `modelSource: "default"` is treated as unset. Exact selector aliases such as
|
|
36
|
+
* `--model codex` still route to the backend without becoming invalid model ids.
|
|
37
|
+
*
|
|
38
|
+
* @param {object} options
|
|
39
|
+
* @param {string} options.backendName Local backend name.
|
|
40
|
+
* @param {string|null|undefined} [options.model] Resolved model value.
|
|
41
|
+
* @param {string|null|undefined} [options.modelSource] Source label from provider resolution.
|
|
42
|
+
* @returns {string|null} Effective local CLI model id.
|
|
43
|
+
*/
|
|
44
|
+
export function resolveLocalCliModel({ backendName, model, modelSource }) {
|
|
45
|
+
const key = BACKEND_MODEL_KEYS[backendName];
|
|
46
|
+
if (!key) return model || null;
|
|
47
|
+
|
|
48
|
+
const defaultModel = DEFAULT_BEST_MODELS[key];
|
|
49
|
+
if (!model || modelSource === 'default') return defaultModel;
|
|
50
|
+
|
|
51
|
+
const alias = BACKEND_SELECTOR_ALIASES[backendName];
|
|
52
|
+
if (alias && String(model).toLowerCase() === alias) return defaultModel;
|
|
53
|
+
|
|
54
|
+
return model;
|
|
55
|
+
}
|