@tracecart/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +131 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +149 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/delta.d.ts +1 -0
- package/dist/commands/delta.js +19 -0
- package/dist/commands/delta.js.map +1 -0
- package/dist/commands/extract-prompt.d.ts +2 -0
- package/dist/commands/extract-prompt.js +35 -0
- package/dist/commands/extract-prompt.js.map +1 -0
- package/dist/commands/extract-validate.d.ts +2 -0
- package/dist/commands/extract-validate.js +21 -0
- package/dist/commands/extract-validate.js.map +1 -0
- package/dist/commands/finalize.d.ts +1 -0
- package/dist/commands/finalize.js +64 -0
- package/dist/commands/finalize.js.map +1 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.js +33 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/match-prompt.d.ts +2 -0
- package/dist/commands/match-prompt.js +51 -0
- package/dist/commands/match-prompt.js.map +1 -0
- package/dist/commands/match-validate.d.ts +2 -0
- package/dist/commands/match-validate.js +15 -0
- package/dist/commands/match-validate.js.map +1 -0
- package/dist/commands/presets.d.ts +1 -0
- package/dist/commands/presets.js +14 -0
- package/dist/commands/presets.js.map +1 -0
- package/dist/commands/remainder.d.ts +1 -0
- package/dist/commands/remainder.js +19 -0
- package/dist/commands/remainder.js.map +1 -0
- package/dist/commands/reverse-extract-validate.d.ts +1 -0
- package/dist/commands/reverse-extract-validate.js +20 -0
- package/dist/commands/reverse-extract-validate.js.map +1 -0
- package/dist/commands/reverse-match-prompt.d.ts +2 -0
- package/dist/commands/reverse-match-prompt.js +50 -0
- package/dist/commands/reverse-match-prompt.js.map +1 -0
- package/dist/commands/reverse-match-validate.d.ts +1 -0
- package/dist/commands/reverse-match-validate.js +14 -0
- package/dist/commands/reverse-match-validate.js.map +1 -0
- package/dist/commands/split.d.ts +1 -0
- package/dist/commands/split.js +26 -0
- package/dist/commands/split.js.map +1 -0
- package/dist/commands/status.d.ts +1 -0
- package/dist/commands/status.js +34 -0
- package/dist/commands/status.js.map +1 -0
- package/dist/commands/update.d.ts +1 -0
- package/dist/commands/update.js +5 -0
- package/dist/commands/update.js.map +1 -0
- package/dist/extract/extract.d.ts +37 -0
- package/dist/extract/extract.js +158 -0
- package/dist/extract/extract.js.map +1 -0
- package/dist/extract/remainder.d.ts +16 -0
- package/dist/extract/remainder.js +34 -0
- package/dist/extract/remainder.js.map +1 -0
- package/dist/match/coverage.d.ts +64 -0
- package/dist/match/coverage.js +375 -0
- package/dist/match/coverage.js.map +1 -0
- package/dist/output/delta.d.ts +46 -0
- package/dist/output/delta.js +155 -0
- package/dist/output/delta.js.map +1 -0
- package/dist/output/trace-map.d.ts +89 -0
- package/dist/output/trace-map.js +135 -0
- package/dist/output/trace-map.js.map +1 -0
- package/dist/parse/clause-split.d.ts +20 -0
- package/dist/parse/clause-split.js +185 -0
- package/dist/parse/clause-split.js.map +1 -0
- package/dist/parse/discover-inputs.d.ts +16 -0
- package/dist/parse/discover-inputs.js +97 -0
- package/dist/parse/discover-inputs.js.map +1 -0
- package/dist/parse/parse-document.d.ts +28 -0
- package/dist/parse/parse-document.js +141 -0
- package/dist/parse/parse-document.js.map +1 -0
- package/dist/preset.d.ts +18 -0
- package/dist/preset.js +85 -0
- package/dist/preset.js.map +1 -0
- package/package.json +58 -0
- package/presets/spec-coverage.json +15 -0
- package/prompts/coverage_check.txt +38 -0
- package/prompts/extract.txt +40 -0
- package/prompts/reverse_check.txt +36 -0
- package/templates/claude/commands/tracecart.md +217 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
export declare const TRACE_MAP_VERSION = 1;
|
|
2
|
+
interface TraceSource {
|
|
3
|
+
file?: string;
|
|
4
|
+
line?: number;
|
|
5
|
+
col_start?: number;
|
|
6
|
+
col_end?: number;
|
|
7
|
+
}
|
|
8
|
+
interface TraceRef {
|
|
9
|
+
file?: string;
|
|
10
|
+
section?: string;
|
|
11
|
+
line?: number;
|
|
12
|
+
}
|
|
13
|
+
interface TraceInput {
|
|
14
|
+
id?: string;
|
|
15
|
+
text?: string;
|
|
16
|
+
type?: string;
|
|
17
|
+
source?: TraceSource;
|
|
18
|
+
status?: string;
|
|
19
|
+
refs?: TraceRef[];
|
|
20
|
+
topics?: string[];
|
|
21
|
+
nearest_source_trace?: string;
|
|
22
|
+
similarity_note?: string;
|
|
23
|
+
[key: string]: unknown;
|
|
24
|
+
}
|
|
25
|
+
interface UntracedClause {
|
|
26
|
+
clause_id: string;
|
|
27
|
+
text: string;
|
|
28
|
+
}
|
|
29
|
+
interface AnnotationRange {
|
|
30
|
+
start: {
|
|
31
|
+
line: number;
|
|
32
|
+
col: number;
|
|
33
|
+
};
|
|
34
|
+
end: {
|
|
35
|
+
line: number;
|
|
36
|
+
col: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
export interface SourceAnnotation {
|
|
40
|
+
range: AnnotationRange;
|
|
41
|
+
status: string;
|
|
42
|
+
trace_id: string | undefined;
|
|
43
|
+
trace_text: string;
|
|
44
|
+
refs: unknown[];
|
|
45
|
+
}
|
|
46
|
+
export interface TargetAnnotation {
|
|
47
|
+
line: number;
|
|
48
|
+
section: string;
|
|
49
|
+
trace_id: string | undefined;
|
|
50
|
+
trace_text: string;
|
|
51
|
+
status: string;
|
|
52
|
+
source?: TraceSource | Record<string, unknown>;
|
|
53
|
+
nearest_source_trace?: string;
|
|
54
|
+
similarity_note?: string;
|
|
55
|
+
}
|
|
56
|
+
export interface Summary {
|
|
57
|
+
total: number;
|
|
58
|
+
covered: number;
|
|
59
|
+
partial: number;
|
|
60
|
+
missing: number;
|
|
61
|
+
deferred: number;
|
|
62
|
+
superseded: number;
|
|
63
|
+
na: number;
|
|
64
|
+
coverage_score_pct: number;
|
|
65
|
+
reverse_total?: number;
|
|
66
|
+
reverse_traced?: number;
|
|
67
|
+
reverse_partial_source?: number;
|
|
68
|
+
reverse_untraced?: number;
|
|
69
|
+
reverse_coverage_pct?: number;
|
|
70
|
+
}
|
|
71
|
+
export interface TraceMap {
|
|
72
|
+
version: number;
|
|
73
|
+
generated: string;
|
|
74
|
+
source_files: string[];
|
|
75
|
+
target_files: string[];
|
|
76
|
+
traces: TraceInput[];
|
|
77
|
+
untraced_clauses: UntracedClause[];
|
|
78
|
+
summary: Summary;
|
|
79
|
+
reverse_traces?: TraceInput[];
|
|
80
|
+
metadata?: Record<string, unknown>;
|
|
81
|
+
}
|
|
82
|
+
export declare function buildTraceMap(traces: TraceInput[], sourceFiles: string[], targetFiles: string[], untracedClauses?: Array<{
|
|
83
|
+
clause_id?: string;
|
|
84
|
+
text?: string;
|
|
85
|
+
}> | null, metadata?: Record<string, unknown> | null, reverseTraces?: TraceInput[] | null): TraceMap;
|
|
86
|
+
export declare function computeSummary(traces: TraceInput[], reverseTraces?: TraceInput[] | null): Summary;
|
|
87
|
+
export declare function buildSourceAnnotations(traces: TraceInput[]): Record<string, SourceAnnotation[]>;
|
|
88
|
+
export declare function buildTargetAnnotations(traces: TraceInput[], reverseTraces?: TraceInput[] | null): Record<string, TargetAnnotation[]>;
|
|
89
|
+
export {};
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
export const TRACE_MAP_VERSION = 1;
|
|
2
|
+
export function buildTraceMap(traces, sourceFiles, targetFiles, untracedClauses = null, metadata = null, reverseTraces = null) {
|
|
3
|
+
const summary = computeSummary(traces, reverseTraces);
|
|
4
|
+
const traceMap = {
|
|
5
|
+
version: TRACE_MAP_VERSION,
|
|
6
|
+
generated: new Date().toISOString(),
|
|
7
|
+
source_files: sourceFiles,
|
|
8
|
+
target_files: targetFiles,
|
|
9
|
+
traces,
|
|
10
|
+
untraced_clauses: (untracedClauses ?? []).map(c => ({
|
|
11
|
+
clause_id: c.clause_id ?? '',
|
|
12
|
+
text: c.text ?? '',
|
|
13
|
+
})),
|
|
14
|
+
summary,
|
|
15
|
+
};
|
|
16
|
+
if (reverseTraces !== null) {
|
|
17
|
+
traceMap.reverse_traces = reverseTraces;
|
|
18
|
+
}
|
|
19
|
+
if (metadata) {
|
|
20
|
+
traceMap.metadata = metadata;
|
|
21
|
+
}
|
|
22
|
+
return traceMap;
|
|
23
|
+
}
|
|
24
|
+
export function computeSummary(traces, reverseTraces = null) {
|
|
25
|
+
const statusCounts = {};
|
|
26
|
+
for (const trace of traces) {
|
|
27
|
+
const status = trace.status ?? 'UNKNOWN';
|
|
28
|
+
statusCounts[status] = (statusCounts[status] ?? 0) + 1;
|
|
29
|
+
}
|
|
30
|
+
const total = traces.length;
|
|
31
|
+
const covered = statusCounts['COVERED'] ?? 0;
|
|
32
|
+
const partial = statusCounts['PARTIAL'] ?? 0;
|
|
33
|
+
const missing = statusCounts['MISSING'] ?? 0;
|
|
34
|
+
const checkable = covered + partial + missing;
|
|
35
|
+
const score = checkable > 0
|
|
36
|
+
? (covered + partial * 0.5) / checkable * 100
|
|
37
|
+
: 0;
|
|
38
|
+
const summary = {
|
|
39
|
+
total,
|
|
40
|
+
covered,
|
|
41
|
+
partial,
|
|
42
|
+
missing,
|
|
43
|
+
deferred: statusCounts['DEFERRED'] ?? 0,
|
|
44
|
+
superseded: statusCounts['SUPERSEDED'] ?? 0,
|
|
45
|
+
na: statusCounts['N/A'] ?? 0,
|
|
46
|
+
coverage_score_pct: Math.round(score * 10) / 10,
|
|
47
|
+
};
|
|
48
|
+
if (reverseTraces !== null) {
|
|
49
|
+
const revCounts = {};
|
|
50
|
+
for (const rt of reverseTraces) {
|
|
51
|
+
const st = rt.status ?? 'UNKNOWN';
|
|
52
|
+
revCounts[st] = (revCounts[st] ?? 0) + 1;
|
|
53
|
+
}
|
|
54
|
+
const revTraced = revCounts['TRACED'] ?? 0;
|
|
55
|
+
const revPartial = revCounts['PARTIAL_SOURCE'] ?? 0;
|
|
56
|
+
const revUntraced = revCounts['UNTRACED_IN_SOURCE'] ?? 0;
|
|
57
|
+
const revCheckable = revTraced + revPartial + revUntraced;
|
|
58
|
+
const revScore = revCheckable > 0
|
|
59
|
+
? (revTraced + revPartial * 0.5) / revCheckable * 100
|
|
60
|
+
: 0;
|
|
61
|
+
summary.reverse_total = reverseTraces.length;
|
|
62
|
+
summary.reverse_traced = revTraced;
|
|
63
|
+
summary.reverse_partial_source = revPartial;
|
|
64
|
+
summary.reverse_untraced = revUntraced;
|
|
65
|
+
summary.reverse_coverage_pct = Math.round(revScore * 10) / 10;
|
|
66
|
+
}
|
|
67
|
+
return summary;
|
|
68
|
+
}
|
|
69
|
+
export function buildSourceAnnotations(traces) {
|
|
70
|
+
const annotations = {};
|
|
71
|
+
for (const trace of traces) {
|
|
72
|
+
const source = trace.source;
|
|
73
|
+
const filepath = source?.file;
|
|
74
|
+
if (!filepath)
|
|
75
|
+
continue;
|
|
76
|
+
if (!annotations[filepath]) {
|
|
77
|
+
annotations[filepath] = [];
|
|
78
|
+
}
|
|
79
|
+
annotations[filepath].push({
|
|
80
|
+
range: {
|
|
81
|
+
start: { line: source.line ?? 0, col: source.col_start ?? 0 },
|
|
82
|
+
end: { line: source.line ?? 0, col: source.col_end ?? 0 },
|
|
83
|
+
},
|
|
84
|
+
status: trace.status ?? 'UNKNOWN',
|
|
85
|
+
trace_id: trace.id,
|
|
86
|
+
trace_text: trace.text ?? '',
|
|
87
|
+
refs: trace.refs ?? [],
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return annotations;
|
|
91
|
+
}
|
|
92
|
+
export function buildTargetAnnotations(traces, reverseTraces = null) {
|
|
93
|
+
const annotations = {};
|
|
94
|
+
for (const trace of traces) {
|
|
95
|
+
for (const ref of (trace.refs ?? [])) {
|
|
96
|
+
const filepath = ref.file;
|
|
97
|
+
if (!filepath)
|
|
98
|
+
continue;
|
|
99
|
+
if (!annotations[filepath]) {
|
|
100
|
+
annotations[filepath] = [];
|
|
101
|
+
}
|
|
102
|
+
annotations[filepath].push({
|
|
103
|
+
line: ref.line ?? 0,
|
|
104
|
+
section: ref.section ?? '',
|
|
105
|
+
trace_id: trace.id,
|
|
106
|
+
trace_text: trace.text ?? '',
|
|
107
|
+
status: trace.status ?? 'UNKNOWN',
|
|
108
|
+
source: trace.source ?? {},
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
for (const rt of (reverseTraces ?? [])) {
|
|
113
|
+
if (rt.status !== 'UNTRACED_IN_SOURCE' && rt.status !== 'PARTIAL_SOURCE') {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
const source = rt.source;
|
|
117
|
+
const filepath = source?.file;
|
|
118
|
+
if (!filepath)
|
|
119
|
+
continue;
|
|
120
|
+
if (!annotations[filepath]) {
|
|
121
|
+
annotations[filepath] = [];
|
|
122
|
+
}
|
|
123
|
+
annotations[filepath].push({
|
|
124
|
+
line: source.line ?? 0,
|
|
125
|
+
section: '',
|
|
126
|
+
trace_id: rt.id,
|
|
127
|
+
trace_text: rt.text ?? '',
|
|
128
|
+
status: rt.status,
|
|
129
|
+
nearest_source_trace: rt.nearest_source_trace,
|
|
130
|
+
similarity_note: rt.similarity_note ?? '',
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
return annotations;
|
|
134
|
+
}
|
|
135
|
+
//# sourceMappingURL=trace-map.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace-map.js","sourceRoot":"","sources":["../../src/output/trace-map.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAqFnC,MAAM,UAAU,aAAa,CAC3B,MAAoB,EACpB,WAAqB,EACrB,WAAqB,EACrB,kBAAuE,IAAI,EAC3E,WAA2C,IAAI,EAC/C,gBAAqC,IAAI;IAEzC,MAAM,OAAO,GAAG,cAAc,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;IAEtD,MAAM,QAAQ,GAAa;QACzB,OAAO,EAAE,iBAAiB;QAC1B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,YAAY,EAAE,WAAW;QACzB,YAAY,EAAE,WAAW;QACzB,MAAM;QACN,gBAAgB,EAAE,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAClD,SAAS,EAAE,CAAC,CAAC,SAAS,IAAI,EAAE;YAC5B,IAAI,EAAE,CAAC,CAAC,IAAI,IAAI,EAAE;SACnB,CAAC,CAAC;QACH,OAAO;KACR,CAAC;IAEF,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;QAC3B,QAAQ,CAAC,cAAc,GAAG,aAAa,CAAC;IAC1C,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,QAAQ,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,MAAoB,EACpB,gBAAqC,IAAI;IAEzC,MAAM,YAAY,GAA2B,EAAE,CAAC;IAChD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,SAAS,CAAC;QACzC,YAAY,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;IAC5B,MAAM,OAAO,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAE7C,MAAM,SAAS,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;IAC9C,MAAM,KAAK,GAAG,SAAS,GAAG,CAAC;QACzB,CAAC,CAAC,CAAC,OAAO,GAAG,OAAO,GAAG,GAAG,CAAC,GAAG,SAAS,GAAG,GAAG;QAC7C,CAAC,CAAC,CAAC,CAAC;IAEN,MAAM,OAAO,GAAY;QACvB,KAAK;QACL,OAAO;QACP,OAAO;QACP,OAAO;QACP,QAAQ,EAAE,YAAY,CAAC,UAAU,CAAC,IAAI,CAAC;QACvC,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,CAAC;QAC3C,EAAE,EAAE,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC;QAC5B,kBAAkB,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,EAAE,CAAC,GAAG,EAAE;KAChD,CAAC;IAEF,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;QAC3B,MAAM,SAAS,GAA2B,EAAE,CAAC;QAC7C,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;YAC/B,MAAM,EAAE,GAAG,EAAE,CAAC,MAAM,IAAI,SAAS,CAAC;YAClC,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,SAAS,GAAG,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAC3C,MAAM,UAAU,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QACpD,MAAM,WAAW,GAAG,SAAS,CAAC,oBAAoB,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,YAAY,GAAG,SAAS,GAAG,UAAU,GAAG,WAAW,CAAC;QAE1D,MAAM,QAAQ,GAAG,YAAY,GAAG,CAAC;YAC/B,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,GAAG,GAAG,CAAC,GAAG,YAAY,GAAG,GAAG;YACrD,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO,CAAC,aAAa,GAAG,aAAa,CAAC,MAAM,CAAC;QAC7C,OAAO,CAAC,cAAc,GAAG,SAAS,CAAC;QACnC,OAAO,CAAC,sBAAsB,GAAG,UAAU,CAAC;QAC5C,OAAO,CAAC,gBAAgB,GAAG,WAAW,CAAC;QACvC,OAAO,CAAC,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC;IAChE,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,MAAoB;IACzD,MAAM,WAAW,GAAuC,EAAE,CAAC;IAE3D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;QAC5B,MAAM,QAAQ,GAAG,MAAM,EAAE,IAAI,CAAC;QAC9B,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC3B,WAAW,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;QAC7B,CAAC;QAED,WAAW,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;YACzB,KAAK,EAAE;gBACL,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,CAAC,SAAS,IAAI,CAAC,EAAE;gBAC7D,GAAG,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC,EAAE,GAAG,EAAE,MAAM,CAAC,OAAO,IAAI,CAAC,EAAE;aAC1D;YACD,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS;YACjC,QAAQ,EAAE,KAAK,CAAC,EAAE;YAClB,UAAU,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;YAC5B,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;SACvB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,MAAoB,EACpB,gBAAqC,IAAI;IAEzC,MAAM,WAAW,GAAuC,EAAE,CAAC;IAE3D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,CAAC;YACrC,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC;YAC1B,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAExB,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3B,WAAW,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;YAC7B,CAAC;YAED,WAAW,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;gBACzB,IAAI,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC;gBACnB,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE;gBAC1B,QAAQ,EAAE,KAAK,CAAC,EAAE;gBAClB,UAAU,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;gBAC5B,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS;gBACjC,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,MAAM,EAAE,IAAI,CAAC,aAAa,IAAI,EAAE,CAAC,EAAE,CAAC;QACvC,IAAI,EAAE,CAAC,MAAM,KAAK,oBAAoB,IAAI,EAAE,CAAC,MAAM,KAAK,gBAAgB,EAAE,CAAC;YACzE,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,EAAE,CAAC,MAAM,CAAC;QACzB,MAAM,QAAQ,GAAG,MAAM,EAAE,IAAI,CAAC;QAC9B,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC3B,WAAW,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC;QAC7B,CAAC;QAED,WAAW,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,CAAC;YACtB,OAAO,EAAE,EAAE;YACX,QAAQ,EAAE,EAAE,CAAC,EAAE;YACf,UAAU,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;YACzB,MAAM,EAAE,EAAE,CAAC,MAAO;YAClB,oBAAoB,EAAE,EAAE,CAAC,oBAAoB;YAC7C,eAAe,EAAE,EAAE,CAAC,eAAe,IAAI,EAAE;SAC1C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface Clause {
|
|
2
|
+
clause_id: string;
|
|
3
|
+
text: string;
|
|
4
|
+
line_number: number;
|
|
5
|
+
clause_index: number;
|
|
6
|
+
original_line: string;
|
|
7
|
+
is_header: boolean;
|
|
8
|
+
is_meta?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export interface DocumentResult {
|
|
11
|
+
total_lines: number;
|
|
12
|
+
content_clauses: number;
|
|
13
|
+
compound_lines_split: number;
|
|
14
|
+
clauses: Clause[];
|
|
15
|
+
}
|
|
16
|
+
export declare function splitSentences(text: string): string[];
|
|
17
|
+
export declare function splitCoordinated(text: string): string[];
|
|
18
|
+
export declare function splitEnumerations(text: string): string[];
|
|
19
|
+
export declare function processLine(text: string): string[];
|
|
20
|
+
export declare function processDocument(text: string): Clause[];
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
const SENTENCE_SPLIT = /(?<=[.!?])\s+(?=[A-ZÁÉÍÓÖŐÚÜŰ])/;
|
|
3
|
+
const COORD_CONJUNCTIONS = [
|
|
4
|
+
",\\s+de\\s+",
|
|
5
|
+
",\\s+illetve\\s+",
|
|
6
|
+
",\\s+valamint\\s+",
|
|
7
|
+
",\\s+viszont\\s+",
|
|
8
|
+
",\\s+azonban\\s+",
|
|
9
|
+
"\\.\\s+Nem\\s+",
|
|
10
|
+
];
|
|
11
|
+
const META_PATTERNS = [
|
|
12
|
+
/^[Ll]ásd:?\s+§/i,
|
|
13
|
+
/^[Ss]ee:?\s+§/i,
|
|
14
|
+
/^\(.{0,5}lásd/i,
|
|
15
|
+
/^\d+\.\s+\.{3,}/,
|
|
16
|
+
/^\.\.\.\s+\d+$/,
|
|
17
|
+
];
|
|
18
|
+
const ENUM_PATTERN = /(?:kell vennie |figyelembe kell vennie |figyelembevétele |alapján történő ).*?(a [^,]+(?:,\s*a [^,]+)*(?:\s+és\s+a [^,]+))/;
|
|
19
|
+
const LIST_ENUM = /,\s+a\s+|,\s+az\s+|\s+és\s+a\s+|\s+és\s+az\s+/;
|
|
20
|
+
export function splitSentences(text) {
|
|
21
|
+
const parts = text.split(SENTENCE_SPLIT);
|
|
22
|
+
return parts.map((p) => p.trim()).filter((p) => p.length > 0);
|
|
23
|
+
}
|
|
24
|
+
export function splitCoordinated(text) {
|
|
25
|
+
for (const pattern of COORD_CONJUNCTIONS) {
|
|
26
|
+
const re = new RegExp(pattern);
|
|
27
|
+
const parts = text.split(re);
|
|
28
|
+
if (parts.length > 1) {
|
|
29
|
+
const result = [];
|
|
30
|
+
for (const p of parts) {
|
|
31
|
+
result.push(...splitCoordinated(p));
|
|
32
|
+
}
|
|
33
|
+
return result;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return [text];
|
|
37
|
+
}
|
|
38
|
+
export function splitEnumerations(text) {
|
|
39
|
+
const patterns = [
|
|
40
|
+
["(figyelembe kell vennie )(a .+)", "$1"],
|
|
41
|
+
["(figyelembe kell venni )(a .+)", "$1"],
|
|
42
|
+
["(kell vennie )(a .+)", "$1"],
|
|
43
|
+
];
|
|
44
|
+
for (const [triggerPattern] of patterns) {
|
|
45
|
+
const m = new RegExp(triggerPattern).exec(text);
|
|
46
|
+
if (m) {
|
|
47
|
+
const prefix = m[1];
|
|
48
|
+
const enumPart = m[2];
|
|
49
|
+
const items = enumPart
|
|
50
|
+
.split(/,\s+(?=a[z]?\s)|(?:\s+és\s+)/)
|
|
51
|
+
.map((i) => i.trim().replace(/\.$/, ""))
|
|
52
|
+
.filter((i) => i.length > 0);
|
|
53
|
+
if (items.length > 1) {
|
|
54
|
+
const before = text.slice(0, m.index).trim();
|
|
55
|
+
const clauses = [];
|
|
56
|
+
for (const item of items) {
|
|
57
|
+
let clause = `${prefix}${item}`;
|
|
58
|
+
if (before) {
|
|
59
|
+
clause = `${before} ${clause}`;
|
|
60
|
+
}
|
|
61
|
+
clauses.push(clause.trim());
|
|
62
|
+
}
|
|
63
|
+
return clauses;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return [text];
|
|
68
|
+
}
|
|
69
|
+
export function processLine(text) {
|
|
70
|
+
const sentences = splitSentences(text);
|
|
71
|
+
const clauses = [];
|
|
72
|
+
for (const sent of sentences) {
|
|
73
|
+
const coordinated = splitCoordinated(sent);
|
|
74
|
+
for (const coord of coordinated) {
|
|
75
|
+
const enumerated = splitEnumerations(coord);
|
|
76
|
+
clauses.push(...enumerated);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return clauses.map((c) => c.trim()).filter((c) => c.length > 0);
|
|
80
|
+
}
|
|
81
|
+
export function processDocument(text) {
|
|
82
|
+
const lines = text.split("\n");
|
|
83
|
+
const clauses = [];
|
|
84
|
+
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
|
|
85
|
+
const stripped = lines[lineIdx].trim();
|
|
86
|
+
if (!stripped)
|
|
87
|
+
continue;
|
|
88
|
+
const lineNum = lineIdx + 1;
|
|
89
|
+
if (/^\d+[\\.]\d*\s/.test(stripped) || /^#+\s/.test(stripped)) {
|
|
90
|
+
clauses.push({
|
|
91
|
+
clause_id: `L${lineNum}`,
|
|
92
|
+
text: stripped,
|
|
93
|
+
line_number: lineNum,
|
|
94
|
+
clause_index: 0,
|
|
95
|
+
original_line: stripped,
|
|
96
|
+
is_header: true,
|
|
97
|
+
});
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (META_PATTERNS.some((p) => p.test(stripped))) {
|
|
101
|
+
clauses.push({
|
|
102
|
+
clause_id: `L${lineNum}`,
|
|
103
|
+
text: stripped,
|
|
104
|
+
line_number: lineNum,
|
|
105
|
+
clause_index: 0,
|
|
106
|
+
original_line: stripped,
|
|
107
|
+
is_header: false,
|
|
108
|
+
is_meta: true,
|
|
109
|
+
});
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
const subClauses = processLine(stripped);
|
|
113
|
+
if (subClauses.length <= 1) {
|
|
114
|
+
clauses.push({
|
|
115
|
+
clause_id: `L${lineNum}`,
|
|
116
|
+
text: stripped,
|
|
117
|
+
line_number: lineNum,
|
|
118
|
+
clause_index: 0,
|
|
119
|
+
original_line: stripped,
|
|
120
|
+
is_header: false,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
for (let ci = 0; ci < subClauses.length; ci++) {
|
|
125
|
+
clauses.push({
|
|
126
|
+
clause_id: `L${lineNum}-C${ci + 1}`,
|
|
127
|
+
text: subClauses[ci],
|
|
128
|
+
line_number: lineNum,
|
|
129
|
+
clause_index: ci,
|
|
130
|
+
original_line: stripped,
|
|
131
|
+
is_header: false,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return clauses;
|
|
137
|
+
}
|
|
138
|
+
function main() {
|
|
139
|
+
const args = process.argv.slice(2);
|
|
140
|
+
if (args.length === 0) {
|
|
141
|
+
process.stderr.write("Usage: clause-split.ts <input_file_or_-> [--stats]\n");
|
|
142
|
+
process.stderr.write(" Reads from file or stdin (-). Outputs JSON.\n");
|
|
143
|
+
process.exit(1);
|
|
144
|
+
}
|
|
145
|
+
const showStats = args.includes("--stats");
|
|
146
|
+
let text;
|
|
147
|
+
if (args[0] === "-") {
|
|
148
|
+
text = fs.readFileSync(0, "utf-8");
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
text = fs.readFileSync(args[0], "utf-8");
|
|
152
|
+
}
|
|
153
|
+
const clauses = processDocument(text);
|
|
154
|
+
const contentClauses = clauses.filter((c) => !c.is_header);
|
|
155
|
+
const compoundLines = new Set();
|
|
156
|
+
for (const c of contentClauses) {
|
|
157
|
+
if (c.clause_index > 0) {
|
|
158
|
+
compoundLines.add(c.line_number);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const result = {
|
|
162
|
+
total_lines: text.split("\n").length,
|
|
163
|
+
content_clauses: contentClauses.length,
|
|
164
|
+
compound_lines_split: compoundLines.size,
|
|
165
|
+
clauses,
|
|
166
|
+
};
|
|
167
|
+
if (showStats) {
|
|
168
|
+
process.stderr.write(`Total lines: ${result.total_lines}\n`);
|
|
169
|
+
process.stderr.write(`Content clauses: ${result.content_clauses}\n`);
|
|
170
|
+
process.stderr.write(`Compound lines split: ${result.compound_lines_split}\n`);
|
|
171
|
+
for (const c of clauses) {
|
|
172
|
+
if (!c.is_header) {
|
|
173
|
+
const marker = c.clause_index > 0 ? " *" : "";
|
|
174
|
+
process.stderr.write(` [${c.clause_id}] ${c.text.slice(0, 80)}${marker}\n`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
process.stdout.write(JSON.stringify(result, null, 2));
|
|
179
|
+
}
|
|
180
|
+
const isMain = process.argv[1] &&
|
|
181
|
+
fs.realpathSync(process.argv[1]) === fs.realpathSync(import.meta.url.replace("file://", ""));
|
|
182
|
+
if (isMain) {
|
|
183
|
+
main();
|
|
184
|
+
}
|
|
185
|
+
//# sourceMappingURL=clause-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clause-split.js","sourceRoot":"","sources":["../../src/parse/clause-split.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAE9B,MAAM,cAAc,GAAG,iCAAiC,CAAC;AAEzD,MAAM,kBAAkB,GAAa;IACnC,aAAa;IACb,kBAAkB;IAClB,mBAAmB;IACnB,kBAAkB;IAClB,kBAAkB;IAClB,gBAAgB;CACjB,CAAC;AAEF,MAAM,aAAa,GAAa;IAC9B,iBAAiB;IACjB,gBAAgB;IAChB,gBAAgB;IAChB,iBAAiB;IACjB,gBAAgB;CACjB,CAAC;AAEF,MAAM,YAAY,GAChB,4HAA4H,CAAC;AAE/H,MAAM,SAAS,GAAG,+CAA+C,CAAC;AAmBlE,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;QACzC,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC7B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,MAAM,GAAa,EAAE,CAAC;YAC5B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC;YACtC,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,IAAY;IAC5C,MAAM,QAAQ,GAAuB;QACnC,CAAC,iCAAiC,EAAE,IAAI,CAAC;QACzC,CAAC,gCAAgC,EAAE,IAAI,CAAC;QACxC,CAAC,sBAAsB,EAAE,IAAI,CAAC;KAC/B,CAAC;IAEF,KAAK,MAAM,CAAC,cAAc,CAAC,IAAI,QAAQ,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,KAAK,GAAG,QAAQ;iBACnB,KAAK,CAAC,8BAA8B,CAAC;iBACrC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC/B,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;gBAC7C,MAAM,OAAO,GAAa,EAAE,CAAC;gBAC7B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,MAAM,GAAG,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC;oBAChC,IAAI,MAAM,EAAE,CAAC;wBACX,MAAM,GAAG,GAAG,MAAM,IAAI,MAAM,EAAE,CAAC;oBACjC,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC9B,CAAC;gBACD,OAAO,OAAO,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,MAAM,SAAS,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAEvC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;QAC3C,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YAChC,MAAM,UAAU,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;YAC5C,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,CAAC;QAE5B,IAAI,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9D,OAAO,CAAC,IAAI,CAAC;gBACX,SAAS,EAAE,IAAI,OAAO,EAAE;gBACxB,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,OAAO;gBACpB,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,QAAQ;gBACvB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;YAChD,OAAO,CAAC,IAAI,CAAC;gBACX,SAAS,EAAE,IAAI,OAAO,EAAE;gBACxB,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,OAAO;gBACpB,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,QAAQ;gBACvB,SAAS,EAAE,KAAK;gBAChB,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,UAAU,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QAEzC,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC;gBACX,SAAS,EAAE,IAAI,OAAO,EAAE;gBACxB,IAAI,EAAE,QAAQ;gBACd,WAAW,EAAE,OAAO;gBACpB,YAAY,EAAE,CAAC;gBACf,aAAa,EAAE,QAAQ;gBACvB,SAAS,EAAE,KAAK;aACjB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,UAAU,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC;gBAC9C,OAAO,CAAC,IAAI,CAAC;oBACX,SAAS,EAAE,IAAI,OAAO,KAAK,EAAE,GAAG,CAAC,EAAE;oBACnC,IAAI,EAAE,UAAU,CAAC,EAAE,CAAC;oBACpB,WAAW,EAAE,OAAO;oBACpB,YAAY,EAAE,EAAE;oBAChB,aAAa,EAAE,QAAQ;oBACvB,SAAS,EAAE,KAAK;iBACjB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,IAAI;IACX,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,sDAAsD,CACvD,CAAC;QACF,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,iDAAiD,CAClD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAE3C,IAAI,IAAY,CAAC;IACjB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;QACpB,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IACrC,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAED,MAAM,OAAO,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAEtC,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAC3D,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC;YACvB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAmB;QAC7B,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM;QACpC,eAAe,EAAE,cAAc,CAAC,MAAM;QACtC,oBAAoB,EAAE,aAAa,CAAC,IAAI;QACxC,OAAO;KACR,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,gBAAgB,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;QAC7D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,oBAAoB,MAAM,CAAC,eAAe,IAAI,CAAC,CAAC;QACrE,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,yBAAyB,MAAM,CAAC,oBAAoB,IAAI,CACzD,CAAC;QACF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;gBACjB,MAAM,MAAM,GAAG,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,MAAM,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,MAAM,IAAI,CACvD,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,MAAM,GACV,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IACf,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/F,IAAI,MAAM,EAAE,CAAC;IACX,IAAI,EAAE,CAAC;AACT,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface Document {
|
|
2
|
+
path: string;
|
|
3
|
+
filename: string;
|
|
4
|
+
type: string;
|
|
5
|
+
date: string;
|
|
6
|
+
}
|
|
7
|
+
export interface DiscoverResult {
|
|
8
|
+
base_dir: string;
|
|
9
|
+
total: number;
|
|
10
|
+
by_type: Record<string, number>;
|
|
11
|
+
documents: Document[];
|
|
12
|
+
}
|
|
13
|
+
export declare function extractDate(filename: string): string | null;
|
|
14
|
+
export declare function shouldSkip(filename: string): boolean;
|
|
15
|
+
export declare function classifyDocument(filePath: string): string;
|
|
16
|
+
export declare function discover(baseDir: string): Document[];
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
const SKIP_PATTERNS = [
|
|
4
|
+
"teszt-",
|
|
5
|
+
"calendar-invite-",
|
|
6
|
+
"happening-now-",
|
|
7
|
+
"invitation-for-",
|
|
8
|
+
];
|
|
9
|
+
const DATE_PATTERN = /(\d{4}-\d{2}-\d{2})/;
|
|
10
|
+
export function extractDate(filename) {
|
|
11
|
+
const m = DATE_PATTERN.exec(filename);
|
|
12
|
+
return m ? m[1] : null;
|
|
13
|
+
}
|
|
14
|
+
export function shouldSkip(filename) {
|
|
15
|
+
const lower = filename.toLowerCase();
|
|
16
|
+
return SKIP_PATTERNS.some((p) => new RegExp(p).test(lower));
|
|
17
|
+
}
|
|
18
|
+
export function classifyDocument(filePath) {
|
|
19
|
+
const parts = filePath.split(path.sep);
|
|
20
|
+
if (parts.includes("meetings"))
|
|
21
|
+
return "meeting";
|
|
22
|
+
if (parts.includes("emails"))
|
|
23
|
+
return "email";
|
|
24
|
+
if (parts.includes("discord"))
|
|
25
|
+
return "discord";
|
|
26
|
+
if (parts.includes("discord-voice"))
|
|
27
|
+
return "discord-voice";
|
|
28
|
+
if (parts.includes("client-spec"))
|
|
29
|
+
return "client-spec";
|
|
30
|
+
return "other";
|
|
31
|
+
}
|
|
32
|
+
function walkMdFiles(dir) {
|
|
33
|
+
const results = [];
|
|
34
|
+
if (!fs.existsSync(dir))
|
|
35
|
+
return results;
|
|
36
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
37
|
+
for (const entry of entries) {
|
|
38
|
+
const fullPath = path.join(dir, entry.name);
|
|
39
|
+
if (entry.isDirectory()) {
|
|
40
|
+
results.push(...walkMdFiles(fullPath));
|
|
41
|
+
}
|
|
42
|
+
else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
43
|
+
results.push(fullPath);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return results.sort();
|
|
47
|
+
}
|
|
48
|
+
export function discover(baseDir) {
|
|
49
|
+
const convertedDir = path.join(baseDir, "docs", "converted");
|
|
50
|
+
if (!fs.existsSync(convertedDir)) {
|
|
51
|
+
process.stderr.write(`Error: ${convertedDir} not found\n`);
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
54
|
+
const mdFiles = walkMdFiles(convertedDir);
|
|
55
|
+
const results = [];
|
|
56
|
+
for (const mdFile of mdFiles) {
|
|
57
|
+
const filename = path.basename(mdFile);
|
|
58
|
+
if (shouldSkip(filename))
|
|
59
|
+
continue;
|
|
60
|
+
const docType = classifyDocument(mdFile);
|
|
61
|
+
const date = extractDate(filename);
|
|
62
|
+
const relPath = path.relative(baseDir, mdFile);
|
|
63
|
+
results.push({
|
|
64
|
+
path: relPath,
|
|
65
|
+
filename,
|
|
66
|
+
type: docType,
|
|
67
|
+
date: date ?? "0000-00-00",
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
results.sort((a, b) => {
|
|
71
|
+
if (a.date !== b.date)
|
|
72
|
+
return a.date < b.date ? -1 : 1;
|
|
73
|
+
return a.path < b.path ? -1 : a.path > b.path ? 1 : 0;
|
|
74
|
+
});
|
|
75
|
+
return results;
|
|
76
|
+
}
|
|
77
|
+
function main() {
|
|
78
|
+
const baseDir = process.argv[2] ?? ".";
|
|
79
|
+
const documents = discover(baseDir);
|
|
80
|
+
const summary = {};
|
|
81
|
+
for (const doc of documents) {
|
|
82
|
+
summary[doc.type] = (summary[doc.type] ?? 0) + 1;
|
|
83
|
+
}
|
|
84
|
+
const output = {
|
|
85
|
+
base_dir: baseDir,
|
|
86
|
+
total: documents.length,
|
|
87
|
+
by_type: summary,
|
|
88
|
+
documents,
|
|
89
|
+
};
|
|
90
|
+
process.stdout.write(JSON.stringify(output, null, 2));
|
|
91
|
+
}
|
|
92
|
+
const isMain = process.argv[1] &&
|
|
93
|
+
fs.realpathSync(process.argv[1]) === fs.realpathSync(import.meta.url.replace("file://", ""));
|
|
94
|
+
if (isMain) {
|
|
95
|
+
main();
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=discover-inputs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"discover-inputs.js","sourceRoot":"","sources":["../../src/parse/discover-inputs.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,MAAM,aAAa,GAAa;IAC9B,QAAQ;IACR,kBAAkB;IAClB,gBAAgB;IAChB,iBAAiB;CAClB,CAAC;AAEF,MAAM,YAAY,GAAG,qBAAqB,CAAC;AAgB3C,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,CAAC,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACtC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACzB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAAE,OAAO,SAAS,CAAC;IACjD,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAC7C,IAAI,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAChD,IAAI,KAAK,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAC5D,IAAI,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC;QAAE,OAAO,aAAa,CAAC;IACxD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC9B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,OAAO,CAAC;IAExC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;QACzC,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,OAAe;IACtC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;IAC7D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,YAAY,cAAc,CAAC,CAAC;QAC3D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAe,EAAE,CAAC;IAE/B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEvC,IAAI,UAAU,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEnC,MAAM,OAAO,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAE/C,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,OAAO;YACb,QAAQ;YACR,IAAI,EAAE,OAAO;YACb,IAAI,EAAE,IAAI,IAAI,YAAY;SAC3B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACpB,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACvD,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,IAAI;IACX,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAEvC,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAEpC,MAAM,OAAO,GAA2B,EAAE,CAAC;IAC3C,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAmB;QAC7B,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,SAAS,CAAC,MAAM;QACvB,OAAO,EAAE,OAAO;QAChB,SAAS;KACV,CAAC;IAEF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,MAAM,GACV,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IACf,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,CAAC;AAC/F,IAAI,MAAM,EAAE,CAAC;IACX,IAAI,EAAE,CAAC;AACT,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface ChapterHeader {
|
|
2
|
+
number: number;
|
|
3
|
+
title: string;
|
|
4
|
+
line: number;
|
|
5
|
+
}
|
|
6
|
+
export interface ChapterBoundary {
|
|
7
|
+
number: number;
|
|
8
|
+
title: string;
|
|
9
|
+
start_line: number;
|
|
10
|
+
end_line: number;
|
|
11
|
+
}
|
|
12
|
+
export interface Section {
|
|
13
|
+
number: string;
|
|
14
|
+
title: string;
|
|
15
|
+
}
|
|
16
|
+
export interface ChapterResult {
|
|
17
|
+
number: number;
|
|
18
|
+
title: string;
|
|
19
|
+
start_line: number;
|
|
20
|
+
end_line: number;
|
|
21
|
+
sections: Section[];
|
|
22
|
+
content_lines: number;
|
|
23
|
+
text: string;
|
|
24
|
+
}
|
|
25
|
+
export declare function findChapterBoundaries(lines: string[]): ChapterBoundary[];
|
|
26
|
+
export declare function extractChapterText(lines: string[], chapter: ChapterBoundary): string;
|
|
27
|
+
export declare function countContentLines(text: string): number;
|
|
28
|
+
export declare function parseSections(text: string, chapterNum: number): Section[];
|