patina-cli 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +211 -0
- package/CHANGELOG.md +265 -0
- package/LICENSE +21 -0
- package/README.md +319 -0
- package/README_JA.md +254 -0
- package/README_KR.md +253 -0
- package/README_ZH.md +254 -0
- package/SKILL-MAX.md +455 -0
- package/SKILL.md +730 -0
- package/assets/brand/patina-icon.svg +9 -0
- package/assets/brand/patina-logo.svg +17 -0
- package/assets/social/patina-before-after.svg +46 -0
- package/assets/social/patina-og.svg +31 -0
- package/bin/patina.js +9 -0
- package/core/scoring.md +657 -0
- package/core/standalone-prompt.md +364 -0
- package/core/stylometry.md +754 -0
- package/core/voice.md +163 -0
- package/docs/AUTHENTICATION.md +105 -0
- package/docs/AUTHENTICATION_KR.md +105 -0
- package/docs/BRANDING.md +37 -0
- package/docs/CLI.md +80 -0
- package/docs/COMPARISON.md +38 -0
- package/docs/COOKBOOK.md +173 -0
- package/docs/DEMO.md +40 -0
- package/docs/ETHICS.md +27 -0
- package/docs/EXAMPLES.md +130 -0
- package/docs/EXAMPLES_KR.md +130 -0
- package/docs/EXIT-CODES.md +25 -0
- package/docs/FAQ.md +67 -0
- package/docs/FAQ_KR.md +65 -0
- package/docs/FLAG-PARITY.md +53 -0
- package/docs/GLOSSARY.md +123 -0
- package/docs/PATTERNS-EN.md +718 -0
- package/docs/PATTERNS-JA.md +706 -0
- package/docs/PATTERNS-KO.md +707 -0
- package/docs/PATTERNS-ZH.md +706 -0
- package/docs/PATTERNS.md +22 -0
- package/docs/ROADMAP.md +315 -0
- package/docs/audits/2026-05-deep-research.md +290 -0
- package/docs/benchmarks/detector-comparison.json +442 -0
- package/docs/benchmarks/detector-comparison.md +65 -0
- package/docs/benchmarks/latest.json +988 -0
- package/docs/benchmarks/latest.md +112 -0
- package/docs/integrations/docker.md +19 -0
- package/docs/integrations/github-action.md +59 -0
- package/docs/integrations/pre-commit.md +77 -0
- package/docs/integrations/release.md +43 -0
- package/docs/internal/HARNESS.md +14 -0
- package/docs/internal/README.md +14 -0
- package/docs/internal/WARP.md +23 -0
- package/docs/research/2025-rebaseline-plan.md +89 -0
- package/docs/research/ai-human-metrics.md +380 -0
- package/docs/social/gstack-cardnews.html +236 -0
- package/docs/social/gstack-cardnews.md +88 -0
- package/docs/social/gstack-thread.md +106 -0
- package/docs/social/patina-launch-copy.md +227 -0
- package/docs/superpowers/specs/2026-04-03-meaning-preservation-design.md +299 -0
- package/lexicon/ai-en.md +162 -0
- package/lexicon/ai-ko.md +159 -0
- package/package.json +100 -0
- package/patina-max/SKILL.md +523 -0
- package/patina-max/composite.py +457 -0
- package/patterns/en-communication.md +89 -0
- package/patterns/en-content.md +133 -0
- package/patterns/en-filler.md +113 -0
- package/patterns/en-language.md +163 -0
- package/patterns/en-structure.md +173 -0
- package/patterns/en-style.md +139 -0
- package/patterns/en-viral-hook.md +211 -0
- package/patterns/ja-communication.md +101 -0
- package/patterns/ja-content.md +153 -0
- package/patterns/ja-filler.md +123 -0
- package/patterns/ja-language.md +190 -0
- package/patterns/ja-structure.md +142 -0
- package/patterns/ja-style.md +147 -0
- package/patterns/ja-viral-hook.md +216 -0
- package/patterns/ko-communication.md +98 -0
- package/patterns/ko-content.md +154 -0
- package/patterns/ko-filler.md +105 -0
- package/patterns/ko-language.md +182 -0
- package/patterns/ko-structure.md +147 -0
- package/patterns/ko-style.md +146 -0
- package/patterns/ko-viral-hook.md +211 -0
- package/patterns/zh-communication.md +101 -0
- package/patterns/zh-content.md +153 -0
- package/patterns/zh-filler.md +118 -0
- package/patterns/zh-language.md +173 -0
- package/patterns/zh-structure.md +145 -0
- package/patterns/zh-style.md +159 -0
- package/patterns/zh-viral-hook.md +216 -0
- package/profiles/academic.md +53 -0
- package/profiles/blog.md +81 -0
- package/profiles/casual-conversation.md +105 -0
- package/profiles/code-comment.md +104 -0
- package/profiles/commit-message.md +99 -0
- package/profiles/default.md +62 -0
- package/profiles/email.md +52 -0
- package/profiles/formal.md +98 -0
- package/profiles/instructional.md +80 -0
- package/profiles/legal.md +57 -0
- package/profiles/marketing.md +56 -0
- package/profiles/medical.md +53 -0
- package/profiles/narrative.md +79 -0
- package/profiles/release-notes.md +98 -0
- package/profiles/social.md +56 -0
- package/profiles/technical.md +53 -0
- package/scripts/benchmark-report.mjs +252 -0
- package/scripts/check-release-metadata.mjs +48 -0
- package/scripts/detector-comparison.mjs +267 -0
- package/scripts/lint.mjs +40 -0
- package/scripts/precommit-score.mjs +31 -0
- package/scripts/prose-score.mjs +186 -0
- package/scripts/update-benchmark-ranges.mjs +108 -0
- package/src/api.js +330 -0
- package/src/auth.js +105 -0
- package/src/backends/claude-cli.js +112 -0
- package/src/backends/codex-cli.js +121 -0
- package/src/backends/contract.js +21 -0
- package/src/backends/gemini-cli.js +135 -0
- package/src/backends/index.js +159 -0
- package/src/cache.js +106 -0
- package/src/cli.js +1280 -0
- package/src/commands/doctor.js +229 -0
- package/src/commands/init.js +208 -0
- package/src/config.js +126 -0
- package/src/errors.js +53 -0
- package/src/features/index.js +96 -0
- package/src/features/lexicon.js +90 -0
- package/src/features/segment.js +49 -0
- package/src/features/stylometry.js +50 -0
- package/src/loader.js +103 -0
- package/src/logger.js +70 -0
- package/src/manifest.js +162 -0
- package/src/max-mode.js +207 -0
- package/src/ouroboros.js +233 -0
- package/src/output.js +480 -0
- package/src/prompt-builder.js +409 -0
- package/src/providers.js +100 -0
- package/src/scoring.js +531 -0
- package/src/security.js +133 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-01.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-03.md +17 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/en/ai/en-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-04.md +15 -0
- package/tests/fixtures/suspect-zones/en/natural/en-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +939 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-03.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-01.md +14 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-04.md +15 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-05.md +16 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-06-chat-register.md +16 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-01.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-02.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-03.md +15 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-04.md +14 -0
- package/tests/fixtures/suspect-zones/ko/natural/ko-nat-05.md +15 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-03.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-01.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-02.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-03.md +11 -0
- package/tests/quality/README.md +121 -0
- package/tests/quality/benchmark.mjs +306 -0
- package/tests/quality/detectors.manual.example.json +31 -0
- package/tests/quality/dogfood.mjs +44 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
// Top-level analyzer: runs the deterministic stylometry + lexicon signals
|
|
2
|
+
// described in core/stylometry.md and returns a per-paragraph result. This
|
|
3
|
+
// is the in-tree port of the algorithm previously delegated to the LLM via
|
|
4
|
+
// SKILL.md Step 4.6/4.7. It does not call any LLM.
|
|
5
|
+
|
|
6
|
+
import { splitParagraphs, splitSentences, tokenize } from './segment.js';
|
|
7
|
+
import {
|
|
8
|
+
burstinessCV,
|
|
9
|
+
mattr,
|
|
10
|
+
classifyBurstiness,
|
|
11
|
+
classifyMattr,
|
|
12
|
+
DEFAULT_BURSTINESS_BANDS,
|
|
13
|
+
DEFAULT_MATTR_BANDS,
|
|
14
|
+
DEFAULT_MATTR_WINDOW,
|
|
15
|
+
} from './stylometry.js';
|
|
16
|
+
import { loadLexicon, computeDensity, DEFAULT_LEXICON_DENSITY_THRESHOLD } from './lexicon.js';
|
|
17
|
+
|
|
18
|
+
export function analyzeText(text, opts = {}) {
|
|
19
|
+
const {
|
|
20
|
+
lang = 'en',
|
|
21
|
+
repoRoot,
|
|
22
|
+
burstinessBands = DEFAULT_BURSTINESS_BANDS,
|
|
23
|
+
mattrBands = DEFAULT_MATTR_BANDS,
|
|
24
|
+
mattrWindow = DEFAULT_MATTR_WINDOW,
|
|
25
|
+
lexiconDensityThreshold = DEFAULT_LEXICON_DENSITY_THRESHOLD,
|
|
26
|
+
lexicon: providedLexicon,
|
|
27
|
+
} = opts;
|
|
28
|
+
|
|
29
|
+
// Normalize to NFC at the boundary so downstream tokenization and lexicon
|
|
30
|
+
// comparison see canonical form. Mixed NFC/NFD inputs (e.g. "café" composed
|
|
31
|
+
// vs decomposed) would otherwise yield different MATTR/lexicon hits.
|
|
32
|
+
const normalized = text ? text.normalize('NFC') : '';
|
|
33
|
+
const paragraphs = splitParagraphs(normalized);
|
|
34
|
+
const lexicon =
|
|
35
|
+
providedLexicon ??
|
|
36
|
+
(repoRoot ? loadLexicon(lang, repoRoot) : { strict: [], phrases: [] });
|
|
37
|
+
|
|
38
|
+
// §8 skip conditions are advisory only — production callers (SKILL.md 4.6/4.7)
|
|
39
|
+
// can suppress meta-block emission, but the benchmark wants raw signals on
|
|
40
|
+
// single-paragraph fixtures so we compute them unconditionally.
|
|
41
|
+
const totalSentences = paragraphs.reduce(
|
|
42
|
+
(n, p) => n + splitSentences(p).length,
|
|
43
|
+
0
|
|
44
|
+
);
|
|
45
|
+
const skipReason =
|
|
46
|
+
paragraphs.length <= 2 ? 'paragraphs<=2' :
|
|
47
|
+
totalSentences <= 2 ? 'sentences<=2' :
|
|
48
|
+
null;
|
|
49
|
+
|
|
50
|
+
const analyzed = paragraphs.map((paragraph, idx) => {
|
|
51
|
+
const sentences = splitSentences(paragraph);
|
|
52
|
+
const sentenceTokens = sentences.map((sentence) => tokenize(sentence, { lang }));
|
|
53
|
+
const sentenceTokenCounts = sentenceTokens.map((t) => t.length);
|
|
54
|
+
const allTokens = sentenceTokens.flat();
|
|
55
|
+
|
|
56
|
+
const cv = burstinessCV(sentenceTokenCounts);
|
|
57
|
+
const cvBand = classifyBurstiness(cv, burstinessBands);
|
|
58
|
+
const mattrValue = mattr(allTokens, mattrWindow);
|
|
59
|
+
const mattrBand = classifyMattr(mattrValue, mattrBands);
|
|
60
|
+
const lex = computeDensity(paragraph, allTokens, lexicon);
|
|
61
|
+
|
|
62
|
+
const lexiconHot = lex.density > lexiconDensityThreshold;
|
|
63
|
+
const hot =
|
|
64
|
+
cvBand === 'low' || mattrBand === 'low' || lexiconHot;
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
id: `P${idx + 1}`,
|
|
68
|
+
sentenceCount: sentences.length,
|
|
69
|
+
tokenCount: allTokens.length,
|
|
70
|
+
burstiness: { cv, band: cvBand },
|
|
71
|
+
mattr: { value: mattrValue, band: mattrBand },
|
|
72
|
+
lexicon: { ...lex, hot: lexiconHot },
|
|
73
|
+
hot,
|
|
74
|
+
};
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
lang,
|
|
79
|
+
skipped: Boolean(skipReason),
|
|
80
|
+
skipReason,
|
|
81
|
+
paragraphs: analyzed,
|
|
82
|
+
hot: analyzed.some((p) => p.hot),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export {
|
|
87
|
+
splitParagraphs,
|
|
88
|
+
splitSentences,
|
|
89
|
+
tokenize,
|
|
90
|
+
burstinessCV,
|
|
91
|
+
mattr,
|
|
92
|
+
classifyBurstiness,
|
|
93
|
+
classifyMattr,
|
|
94
|
+
loadLexicon,
|
|
95
|
+
computeDensity,
|
|
96
|
+
};
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// AI-lexicon loading and density per core/stylometry.md §16.
|
|
2
|
+
// Loads the markdown lexicon at lexicon/ai-{lang}.md and computes
|
|
3
|
+
// hits-per-1000-tokens density. Custom lexicons under custom/lexicon/
|
|
4
|
+
// take precedence when present.
|
|
5
|
+
|
|
6
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
7
|
+
import { resolve } from 'node:path';
|
|
8
|
+
|
|
9
|
+
export const DEFAULT_LEXICON_DENSITY_THRESHOLD = 2.0;
|
|
10
|
+
|
|
11
|
+
// Parses the two well-known sections out of a lexicon markdown file.
|
|
12
|
+
// Returns { strict: string[], phrases: string[] }.
|
|
13
|
+
function parseLexiconBody(body) {
|
|
14
|
+
const strict = [];
|
|
15
|
+
const phrases = [];
|
|
16
|
+
let mode = null;
|
|
17
|
+
for (const rawLine of body.split('\n')) {
|
|
18
|
+
const line = rawLine.trim();
|
|
19
|
+
if (line.startsWith('## ')) {
|
|
20
|
+
const heading = line.toLowerCase();
|
|
21
|
+
if (heading.includes('strict matches')) mode = 'strict';
|
|
22
|
+
else if (heading.includes('multi-word phrases')) mode = 'phrases';
|
|
23
|
+
else mode = null;
|
|
24
|
+
continue;
|
|
25
|
+
}
|
|
26
|
+
if (mode && line.startsWith('- ')) {
|
|
27
|
+
// Normalize to NFC so visually identical entries don't fail to match
|
|
28
|
+
// tokens that arrive in a different normalization form.
|
|
29
|
+
const entry = line.slice(2).trim().normalize('NFC');
|
|
30
|
+
if (entry) (mode === 'strict' ? strict : phrases).push(entry);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return { strict, phrases };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function loadLexicon(lang, repoRoot) {
|
|
37
|
+
const candidates = [
|
|
38
|
+
resolve(repoRoot, 'custom', 'lexicon', `ai-${lang}.md`),
|
|
39
|
+
resolve(repoRoot, 'lexicon', `ai-${lang}.md`),
|
|
40
|
+
];
|
|
41
|
+
for (const path of candidates) {
|
|
42
|
+
if (existsSync(path)) {
|
|
43
|
+
const raw = readFileSync(path, 'utf8');
|
|
44
|
+
const body = raw.replace(/^---[\s\S]*?---\s*/, '');
|
|
45
|
+
return { lang, path, ...parseLexiconBody(body) };
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return { lang, path: null, strict: [], phrases: [] };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Phrases may include `~` as a wildcard standing in for up to 40 chars.
|
|
52
|
+
function phraseToRegex(phrase) {
|
|
53
|
+
const escaped = phrase
|
|
54
|
+
.toLowerCase()
|
|
55
|
+
.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
56
|
+
const withWildcard = escaped.replace(/~/g, '.{0,40}');
|
|
57
|
+
return new RegExp(withWildcard);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Counts paragraph-level lexicon hits. Strict entries match whole-word
|
|
61
|
+
// (Unicode-aware boundaries via \p{L}\p{N}); phrases match as substrings
|
|
62
|
+
// with `~` wildcard support. Each entry counts at most once per paragraph.
|
|
63
|
+
export function computeDensity(paragraphText, tokens, lexicon) {
|
|
64
|
+
const lowerText = paragraphText.toLowerCase();
|
|
65
|
+
const hits = [];
|
|
66
|
+
const tokenSet = new Set(tokens.map((t) => t.toLowerCase()));
|
|
67
|
+
|
|
68
|
+
// §16: English strict entries match whole-word; Korean strict entries are
|
|
69
|
+
// approximated by substring (어절 inflection means `자리매김` should also
|
|
70
|
+
// hit `자리매김했다`, `자리매김으로`, etc.). Punctuated entries always need
|
|
71
|
+
// substring fallback because tokenization strips edge punct.
|
|
72
|
+
const koSubstring = lexicon.lang === 'ko';
|
|
73
|
+
for (const entry of lexicon.strict) {
|
|
74
|
+
const lowerEntry = entry.toLowerCase();
|
|
75
|
+
if (tokenSet.has(lowerEntry)) {
|
|
76
|
+
hits.push(entry);
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
const hasInternalPunct = /[^\p{L}\p{N}]/u.test(lowerEntry);
|
|
80
|
+
if ((koSubstring || hasInternalPunct) && lowerText.includes(lowerEntry)) {
|
|
81
|
+
hits.push(entry);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
for (const phrase of lexicon.phrases) {
|
|
85
|
+
if (phraseToRegex(phrase).test(lowerText)) hits.push(phrase);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const density = tokens.length > 0 ? (hits.length / tokens.length) * 1000 : 0;
|
|
89
|
+
return { matches: hits.length, density, hits };
|
|
90
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
// Paragraph / sentence / token splitting per core/stylometry.md §2 §3.
|
|
2
|
+
//
|
|
3
|
+
// Tokenization is intentionally simple: whitespace split + edge-punctuation
|
|
4
|
+
// strip, no morphological analysis. For Chinese/Japanese prose, where normal
|
|
5
|
+
// text often has no whitespace, use a deterministic character-token fallback
|
|
6
|
+
// so sentence-length and lexical-diversity signals are not collapsed to
|
|
7
|
+
// "one token per sentence." Sentence splitting is regex-only and accepts known
|
|
8
|
+
// false splits on abbreviations / decimals (documented limit).
|
|
9
|
+
|
|
10
|
+
const SENTENCE_SPLIT_RE = /[.!?]+\s+|(?<=[。!?…])|\n+/u;
|
|
11
|
+
const PARAGRAPH_SPLIT_RE = /\n\s*\n/;
|
|
12
|
+
// \W in Unicode-aware mode. Strips edge punctuation but keeps internal
|
|
13
|
+
// hyphens / apostrophes (e.g. "don't", "좋은-도구") as a single token.
|
|
14
|
+
const EDGE_PUNCT_RE = /^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu;
|
|
15
|
+
const CJK_TOKEN_RE = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\u30FC]|[A-Za-z0-9]+/gu;
|
|
16
|
+
|
|
17
|
+
export function splitParagraphs(text) {
|
|
18
|
+
if (!text) return [];
|
|
19
|
+
return text
|
|
20
|
+
.split(PARAGRAPH_SPLIT_RE)
|
|
21
|
+
.map((p) => p.trim())
|
|
22
|
+
.filter((p) => p.length > 0);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function splitSentences(paragraph) {
|
|
26
|
+
if (!paragraph) return [];
|
|
27
|
+
return paragraph
|
|
28
|
+
.split(SENTENCE_SPLIT_RE)
|
|
29
|
+
.map((s) => s.trim().replace(/[.!?。!?…]+$/u, ''))
|
|
30
|
+
.filter((s) => s.length > 0);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function tokenizeCjk(text) {
|
|
34
|
+
const tokens = [];
|
|
35
|
+
for (const match of text.matchAll(CJK_TOKEN_RE)) {
|
|
36
|
+
const token = match[0].replace(EDGE_PUNCT_RE, '');
|
|
37
|
+
if (token) tokens.push(token);
|
|
38
|
+
}
|
|
39
|
+
return tokens;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function tokenize(text, opts = {}) {
|
|
43
|
+
if (!text) return [];
|
|
44
|
+
if (opts.lang === 'zh' || opts.lang === 'ja') return tokenizeCjk(text);
|
|
45
|
+
return text
|
|
46
|
+
.split(/\s+/)
|
|
47
|
+
.map((chunk) => chunk.replace(EDGE_PUNCT_RE, ''))
|
|
48
|
+
.filter((t) => t.length > 0);
|
|
49
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// Burstiness CV + MATTR per core/stylometry.md §4 §5.
|
|
2
|
+
// Pure functions over token arrays; no I/O.
|
|
3
|
+
|
|
4
|
+
export const DEFAULT_BURSTINESS_BANDS = { low: 0.30, high: 0.50 };
|
|
5
|
+
export const DEFAULT_MATTR_BANDS = { low: 0.55, high: 0.70 };
|
|
6
|
+
export const DEFAULT_MATTR_WINDOW = 50;
|
|
7
|
+
|
|
8
|
+
// Coefficient of variation of sentence token counts.
|
|
9
|
+
// Returns null when the paragraph has fewer than 2 sentences or mean is 0.
|
|
10
|
+
export function burstinessCV(sentenceTokenCounts) {
|
|
11
|
+
if (!Array.isArray(sentenceTokenCounts) || sentenceTokenCounts.length < 2) return null;
|
|
12
|
+
const n = sentenceTokenCounts.length;
|
|
13
|
+
const mean = sentenceTokenCounts.reduce((a, b) => a + b, 0) / n;
|
|
14
|
+
if (mean === 0) return null;
|
|
15
|
+
const variance =
|
|
16
|
+
sentenceTokenCounts.reduce((acc, x) => acc + (x - mean) ** 2, 0) / n;
|
|
17
|
+
return Math.sqrt(variance) / mean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Moving Average Type-Token Ratio (window default 50).
|
|
21
|
+
// Falls back to simple TTR when token count < window.
|
|
22
|
+
export function mattr(tokens, window = DEFAULT_MATTR_WINDOW) {
|
|
23
|
+
if (!Array.isArray(tokens) || tokens.length === 0) return null;
|
|
24
|
+
const lower = tokens.map((t) => t.toLowerCase());
|
|
25
|
+
if (lower.length < window) {
|
|
26
|
+
return new Set(lower).size / lower.length;
|
|
27
|
+
}
|
|
28
|
+
let sum = 0;
|
|
29
|
+
let count = 0;
|
|
30
|
+
for (let i = 0; i + window <= lower.length; i++) {
|
|
31
|
+
const slice = lower.slice(i, i + window);
|
|
32
|
+
sum += new Set(slice).size / window;
|
|
33
|
+
count++;
|
|
34
|
+
}
|
|
35
|
+
return sum / count;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function classifyBurstiness(cv, bands = DEFAULT_BURSTINESS_BANDS) {
|
|
39
|
+
if (cv == null) return null;
|
|
40
|
+
if (cv < bands.low) return 'low';
|
|
41
|
+
if (cv > bands.high) return 'high';
|
|
42
|
+
return 'mid';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function classifyMattr(value, bands = DEFAULT_MATTR_BANDS) {
|
|
46
|
+
if (value == null) return null;
|
|
47
|
+
if (value < bands.low) return 'low';
|
|
48
|
+
if (value > bands.high) return 'high';
|
|
49
|
+
return 'mid';
|
|
50
|
+
}
|
package/src/loader.js
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import { readFileSync, readdirSync } from 'node:fs';
|
|
2
|
+
import { resolve, sep } from 'node:path';
|
|
3
|
+
import yaml from 'js-yaml';
|
|
4
|
+
import { validateProfileName } from './security.js';
|
|
5
|
+
|
|
6
|
+
export function loadFile(path) {
|
|
7
|
+
return readFileSync(path, 'utf8');
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function splitFrontmatter(content) {
|
|
11
|
+
const match = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
|
|
12
|
+
if (!match) {
|
|
13
|
+
return { frontmatter: null, body: content };
|
|
14
|
+
}
|
|
15
|
+
return {
|
|
16
|
+
frontmatter: yaml.load(match[1]),
|
|
17
|
+
body: match[2].trim(),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function loadPatterns(repoRoot, lang, skipPatterns = []) {
|
|
22
|
+
const patternsDir = resolve(repoRoot, 'patterns');
|
|
23
|
+
const files = readdirSync(patternsDir)
|
|
24
|
+
.filter((f) => f.startsWith(`${lang}-`) && f.endsWith('.md'))
|
|
25
|
+
.filter((f) => {
|
|
26
|
+
const packName = f.replace('.md', '');
|
|
27
|
+
return !skipPatterns.includes(packName);
|
|
28
|
+
})
|
|
29
|
+
.sort();
|
|
30
|
+
|
|
31
|
+
const packs = [];
|
|
32
|
+
for (const file of files) {
|
|
33
|
+
const content = loadFile(resolve(patternsDir, file));
|
|
34
|
+
const { frontmatter, body } = splitFrontmatter(content);
|
|
35
|
+
packs.push({
|
|
36
|
+
file,
|
|
37
|
+
frontmatter,
|
|
38
|
+
body,
|
|
39
|
+
isStructure: frontmatter?.phase === 'structure',
|
|
40
|
+
isScoreOnly: frontmatter?.score_only === true,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
return packs;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function loadProfile(repoRoot, profileName) {
|
|
47
|
+
validateProfileName(profileName);
|
|
48
|
+
const profilesDir = resolve(repoRoot, 'profiles');
|
|
49
|
+
const profilePath = resolve(profilesDir, `${profileName}.md`);
|
|
50
|
+
if (!profilePath.startsWith(profilesDir + sep)) {
|
|
51
|
+
throw new Error(`Profile path escaped profiles/: ${profilePath}`);
|
|
52
|
+
}
|
|
53
|
+
const content = loadFile(profilePath);
|
|
54
|
+
return splitFrontmatter(content);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function loadCoreFile(repoRoot, filename) {
|
|
58
|
+
const path = resolve(repoRoot, 'core', filename);
|
|
59
|
+
const content = loadFile(path);
|
|
60
|
+
return splitFrontmatter(content);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function loadInputText(path) {
|
|
64
|
+
return readFileSync(path, 'utf8');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function loadVoiceSample(path) {
|
|
68
|
+
const content = loadFile(path);
|
|
69
|
+
const paragraphs = content
|
|
70
|
+
.split(/\n\s*\n/)
|
|
71
|
+
.map((paragraph) => paragraph.trim())
|
|
72
|
+
.filter(Boolean);
|
|
73
|
+
|
|
74
|
+
if (paragraphs.length === 0) {
|
|
75
|
+
throw new Error(`Voice sample is empty: ${path}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const selected = paragraphs.slice(0, 3);
|
|
79
|
+
return {
|
|
80
|
+
path,
|
|
81
|
+
paragraphs: selected,
|
|
82
|
+
body: selected.join('\n\n'),
|
|
83
|
+
truncated: paragraphs.length > selected.length,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Tone → backbone profile mapping (v3.10, mirrors SKILL.md Phase 1).
|
|
88
|
+
// Returns the *primary* backbone profile name for a resolved tone.
|
|
89
|
+
// Multi-profile tones (e.g. professional → email + formal + legal + medical)
|
|
90
|
+
// expose only the primary here; secondary profiles are documented in SKILL.md
|
|
91
|
+
// and respected via legal/medical fidelity-floor enforcement at Phase 5b.
|
|
92
|
+
const TONE_BACKBONE = {
|
|
93
|
+
casual: 'blog', // primary; social is a secondary backbone
|
|
94
|
+
professional: 'email', // primary; formal/legal/medical secondary
|
|
95
|
+
academic: 'academic', // primary; technical secondary
|
|
96
|
+
narrative: 'narrative',
|
|
97
|
+
marketing: 'marketing',
|
|
98
|
+
instructional: 'instructional',
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
export function toneToBackboneProfile(tone) {
|
|
102
|
+
return TONE_BACKBONE[tone] || null;
|
|
103
|
+
}
|
package/src/logger.js
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
const LEVELS = {
|
|
2
|
+
debug: 10,
|
|
3
|
+
info: 20,
|
|
4
|
+
warn: 30,
|
|
5
|
+
error: 40,
|
|
6
|
+
silent: Infinity,
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export function createLogger({
|
|
10
|
+
level = process.env.PATINA_LOG_LEVEL || 'info',
|
|
11
|
+
quiet = false,
|
|
12
|
+
json = false,
|
|
13
|
+
stream = process.stderr,
|
|
14
|
+
} = {}) {
|
|
15
|
+
const threshold = quiet ? LEVELS.silent : (LEVELS[String(level).toLowerCase()] ?? LEVELS.info);
|
|
16
|
+
let progressOpen = false;
|
|
17
|
+
|
|
18
|
+
const emit = (levelName, event, fields = {}) => {
|
|
19
|
+
if (LEVELS[levelName] < threshold) return;
|
|
20
|
+
closeProgress();
|
|
21
|
+
if (json) {
|
|
22
|
+
console.error(JSON.stringify(record(levelName, event, fields)));
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
if (fields.message) console.error(fields.message);
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const progress = (event, fields = {}) => {
|
|
29
|
+
if (LEVELS.info < threshold) return;
|
|
30
|
+
if (json) {
|
|
31
|
+
console.error(JSON.stringify(record('info', event, fields)));
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
if (!fields.message || !stream?.write) return;
|
|
35
|
+
stream.write(`\r${fields.message}`);
|
|
36
|
+
progressOpen = true;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
function closeProgress() {
|
|
40
|
+
if (progressOpen && stream?.write) stream.write('\n');
|
|
41
|
+
progressOpen = false;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
debug: (event, fields) => emit('debug', event, fields),
|
|
46
|
+
info: (event, fields) => emit('info', event, fields),
|
|
47
|
+
warn: (event, fields) => emit('warn', event, fields),
|
|
48
|
+
error: (event, fields) => emit('error', event, fields),
|
|
49
|
+
progress,
|
|
50
|
+
closeProgress,
|
|
51
|
+
child(extra = {}) {
|
|
52
|
+
return createLogger({ level, quiet, json, stream, ...extra });
|
|
53
|
+
},
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function record(level, event, fields = {}) {
|
|
58
|
+
const { message, model = null, latency_ms = null, ...rest } = fields;
|
|
59
|
+
return {
|
|
60
|
+
ts: new Date().toISOString(),
|
|
61
|
+
level,
|
|
62
|
+
event,
|
|
63
|
+
model,
|
|
64
|
+
latency_ms,
|
|
65
|
+
...(message ? { message } : {}),
|
|
66
|
+
...rest,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export const defaultLogger = createLogger();
|
package/src/manifest.js
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
// Reproducibility manifest writer — captures enough metadata about a run
|
|
2
|
+
// to reproduce it later (config hash, prompt hash, selected patterns,
|
|
3
|
+
// provider/model, package version, results). Schema is versioned so
|
|
4
|
+
// callers and tooling can detect breaking shape changes.
|
|
5
|
+
|
|
6
|
+
import { createHash } from 'node:crypto';
|
|
7
|
+
import { readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
8
|
+
import { resolve } from 'node:path';
|
|
9
|
+
|
|
10
|
+
export const MANIFEST_SCHEMA_VERSION = '2';
|
|
11
|
+
|
|
12
|
+
export function hashSha256(input) {
|
|
13
|
+
if (input == null) return null;
|
|
14
|
+
const data = typeof input === 'string' ? input : JSON.stringify(input);
|
|
15
|
+
return `sha256:${createHash('sha256').update(data).digest('hex')}`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Build the manifest body. Caller passes the already-resolved run state;
|
|
19
|
+
// this function is pure (no I/O) so it's easy to unit-test.
|
|
20
|
+
export function buildManifest({
|
|
21
|
+
patinaVersion,
|
|
22
|
+
mode,
|
|
23
|
+
lang,
|
|
24
|
+
profile,
|
|
25
|
+
provider,
|
|
26
|
+
backend,
|
|
27
|
+
model,
|
|
28
|
+
configPath,
|
|
29
|
+
config,
|
|
30
|
+
patterns,
|
|
31
|
+
results,
|
|
32
|
+
startedAt,
|
|
33
|
+
finishedAt = new Date().toISOString(),
|
|
34
|
+
temperature = null,
|
|
35
|
+
seed = null,
|
|
36
|
+
}) {
|
|
37
|
+
return {
|
|
38
|
+
manifestVersion: MANIFEST_SCHEMA_VERSION,
|
|
39
|
+
patina: patinaVersion,
|
|
40
|
+
startedAt,
|
|
41
|
+
finishedAt,
|
|
42
|
+
mode,
|
|
43
|
+
lang,
|
|
44
|
+
profile,
|
|
45
|
+
provider: provider ?? null,
|
|
46
|
+
backend: backend ?? null,
|
|
47
|
+
model: model ?? null,
|
|
48
|
+
temperature,
|
|
49
|
+
seed,
|
|
50
|
+
configPath: configPath ?? null,
|
|
51
|
+
configHash: hashSha256(config),
|
|
52
|
+
patterns: (patterns ?? []).map((p) => p.frontmatter?.pack || p.file),
|
|
53
|
+
results: results ?? [],
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Add one input/output pair's hash + ref to the running results array.
|
|
58
|
+
// Mutates the input array for convenience.
|
|
59
|
+
export function appendResult(
|
|
60
|
+
results,
|
|
61
|
+
{
|
|
62
|
+
inputPath,
|
|
63
|
+
prompt,
|
|
64
|
+
outputRef,
|
|
65
|
+
response,
|
|
66
|
+
tokensIn = null,
|
|
67
|
+
tokensOut = null,
|
|
68
|
+
temperature = null,
|
|
69
|
+
seed = null,
|
|
70
|
+
cost = null,
|
|
71
|
+
scores,
|
|
72
|
+
iterationLog,
|
|
73
|
+
calls,
|
|
74
|
+
}
|
|
75
|
+
) {
|
|
76
|
+
const entry = {
|
|
77
|
+
input: inputPath,
|
|
78
|
+
promptHash: hashSha256(prompt),
|
|
79
|
+
responseHash: hashSha256(response),
|
|
80
|
+
output: outputRef,
|
|
81
|
+
tokensIn,
|
|
82
|
+
tokensOut,
|
|
83
|
+
temperature,
|
|
84
|
+
seed,
|
|
85
|
+
cost,
|
|
86
|
+
};
|
|
87
|
+
if (scores) entry.scores = scores;
|
|
88
|
+
if (iterationLog) entry.iterationLog = iterationLog;
|
|
89
|
+
if (calls) entry.calls = calls;
|
|
90
|
+
results.push(entry);
|
|
91
|
+
return results;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function readManifest(path) {
|
|
95
|
+
return normalizeManifest(JSON.parse(readFileSync(path, 'utf8')));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function normalizeManifest(manifest) {
|
|
99
|
+
if (!manifest || typeof manifest !== 'object') {
|
|
100
|
+
throw new Error('Manifest must be a JSON object');
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const version = String(manifest.manifestVersion ?? '1');
|
|
104
|
+
if (version === MANIFEST_SCHEMA_VERSION) {
|
|
105
|
+
return {
|
|
106
|
+
...manifest,
|
|
107
|
+
results: normalizeV2Results(manifest.results),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (version === '1') {
|
|
112
|
+
return {
|
|
113
|
+
...manifest,
|
|
114
|
+
manifestVersion: '1',
|
|
115
|
+
temperature: manifest.temperature ?? null,
|
|
116
|
+
seed: manifest.seed ?? null,
|
|
117
|
+
results: normalizeV1Results(manifest.results),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
throw new Error(`Unsupported manifest schema version: ${version}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function normalizeV1Results(results) {
|
|
125
|
+
return (results ?? []).map((entry) => ({
|
|
126
|
+
input: entry.input ?? null,
|
|
127
|
+
promptHash: entry.promptHash ?? null,
|
|
128
|
+
responseHash: entry.responseHash ?? null,
|
|
129
|
+
output: entry.output ?? null,
|
|
130
|
+
tokensIn: entry.tokensIn ?? null,
|
|
131
|
+
tokensOut: entry.tokensOut ?? null,
|
|
132
|
+
temperature: entry.temperature ?? null,
|
|
133
|
+
seed: entry.seed ?? null,
|
|
134
|
+
cost: entry.cost ?? null,
|
|
135
|
+
...(entry.scores ? { scores: entry.scores } : {}),
|
|
136
|
+
...(entry.iterationLog ? { iterationLog: entry.iterationLog } : {}),
|
|
137
|
+
...(entry.calls ? { calls: entry.calls } : {}),
|
|
138
|
+
}));
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function normalizeV2Results(results) {
|
|
142
|
+
return (results ?? []).map((entry) => ({
|
|
143
|
+
...entry,
|
|
144
|
+
responseHash: entry.responseHash ?? null,
|
|
145
|
+
tokensIn: entry.tokensIn ?? null,
|
|
146
|
+
tokensOut: entry.tokensOut ?? null,
|
|
147
|
+
temperature: entry.temperature ?? null,
|
|
148
|
+
seed: entry.seed ?? null,
|
|
149
|
+
cost: entry.cost ?? null,
|
|
150
|
+
calls: entry.calls ?? [],
|
|
151
|
+
}));
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function writeManifest(dir, manifest, outputs = []) {
|
|
155
|
+
mkdirSync(dir, { recursive: true });
|
|
156
|
+
const manifestPath = resolve(dir, 'manifest.json');
|
|
157
|
+
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n');
|
|
158
|
+
for (const { name, content } of outputs) {
|
|
159
|
+
writeFileSync(resolve(dir, name), content);
|
|
160
|
+
}
|
|
161
|
+
return manifestPath;
|
|
162
|
+
}
|