@nforma.ai/nforma 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +1024 -0
- package/agents/qgsd-codebase-mapper.md +764 -0
- package/agents/qgsd-debugger.md +1201 -0
- package/agents/qgsd-executor.md +472 -0
- package/agents/qgsd-integration-checker.md +443 -0
- package/agents/qgsd-phase-researcher.md +502 -0
- package/agents/qgsd-plan-checker.md +643 -0
- package/agents/qgsd-planner.md +1182 -0
- package/agents/qgsd-project-researcher.md +621 -0
- package/agents/qgsd-quorum-orchestrator.md +628 -0
- package/agents/qgsd-quorum-slot-worker.md +41 -0
- package/agents/qgsd-quorum-synthesizer.md +133 -0
- package/agents/qgsd-quorum-test-worker.md +37 -0
- package/agents/qgsd-quorum-worker.md +161 -0
- package/agents/qgsd-research-synthesizer.md +239 -0
- package/agents/qgsd-roadmapper.md +660 -0
- package/agents/qgsd-verifier.md +628 -0
- package/bin/accept-debug-invariant.cjs +165 -0
- package/bin/account-manager.cjs +719 -0
- package/bin/aggregate-requirements.cjs +466 -0
- package/bin/analyze-assumptions.cjs +757 -0
- package/bin/analyze-state-space.cjs +921 -0
- package/bin/attribute-trace-divergence.cjs +150 -0
- package/bin/auth-drivers/gh-cli.cjs +93 -0
- package/bin/auth-drivers/index.cjs +46 -0
- package/bin/auth-drivers/pool.cjs +67 -0
- package/bin/auth-drivers/simple.cjs +95 -0
- package/bin/autoClosePtoF.cjs +110 -0
- package/bin/blessed-terminal.cjs +350 -0
- package/bin/build-phase-index.cjs +472 -0
- package/bin/call-quorum-slot.cjs +541 -0
- package/bin/ccr-secure-config.cjs +99 -0
- package/bin/ccr-secure-start.cjs +83 -0
- package/bin/check-bundled-sdks.cjs +177 -0
- package/bin/check-coverage-guard.cjs +112 -0
- package/bin/check-liveness-fairness.cjs +95 -0
- package/bin/check-mcp-health.cjs +123 -0
- package/bin/check-provider-health.cjs +395 -0
- package/bin/check-results-exit.cjs +24 -0
- package/bin/check-spec-sync.cjs +360 -0
- package/bin/check-trace-redaction.cjs +271 -0
- package/bin/check-trace-schema-drift.cjs +99 -0
- package/bin/compareDrift.cjs +21 -0
- package/bin/conformance-schema.cjs +12 -0
- package/bin/count-scenarios.cjs +420 -0
- package/bin/debt-dedup.cjs +144 -0
- package/bin/debt-ledger.cjs +61 -0
- package/bin/debt-retention.cjs +76 -0
- package/bin/debt-state-machine.cjs +80 -0
- package/bin/detect-coverage-gaps.cjs +204 -0
- package/bin/detect-project-intent.cjs +362 -0
- package/bin/export-prism-constants.cjs +164 -0
- package/bin/extract-annotations.cjs +633 -0
- package/bin/extractFormalExpected.cjs +104 -0
- package/bin/fingerprint-drift.cjs +24 -0
- package/bin/fingerprint-issue.cjs +46 -0
- package/bin/formal-core.cjs +519 -0
- package/bin/formal-ref-linker.cjs +141 -0
- package/bin/formal-test-sync.cjs +788 -0
- package/bin/generate-formal-specs.cjs +588 -0
- package/bin/generate-petri-net.cjs +397 -0
- package/bin/generate-phase-spec.cjs +249 -0
- package/bin/generate-proposed-changes.cjs +194 -0
- package/bin/generate-tla-cfg.cjs +122 -0
- package/bin/generate-traceability-matrix.cjs +701 -0
- package/bin/generate-triage-bundle.cjs +300 -0
- package/bin/gh-account-rotate.cjs +34 -0
- package/bin/initialize-model-registry.cjs +105 -0
- package/bin/install-formal-tools.cjs +382 -0
- package/bin/install.js +2424 -0
- package/bin/isNumericThreshold.cjs +34 -0
- package/bin/issue-classifier.cjs +151 -0
- package/bin/levenshtein.cjs +74 -0
- package/bin/lint-formal-models.cjs +580 -0
- package/bin/load-baseline-requirements.cjs +275 -0
- package/bin/manage-agents-core.cjs +815 -0
- package/bin/migrate-formal-dir.cjs +172 -0
- package/bin/migrate-planning.cjs +206 -0
- package/bin/migrate-to-slots.cjs +255 -0
- package/bin/nForma.cjs +2726 -0
- package/bin/observe-config.cjs +353 -0
- package/bin/observe-debt-writer.cjs +140 -0
- package/bin/observe-handler-grafana.cjs +128 -0
- package/bin/observe-handler-internal.cjs +301 -0
- package/bin/observe-handler-logstash.cjs +153 -0
- package/bin/observe-handler-prometheus.cjs +185 -0
- package/bin/observe-handlers.cjs +436 -0
- package/bin/observe-registry.cjs +131 -0
- package/bin/observe-render.cjs +168 -0
- package/bin/planning-paths.cjs +167 -0
- package/bin/polyrepo.cjs +560 -0
- package/bin/prism-priority.cjs +153 -0
- package/bin/probe-quorum-slots.cjs +167 -0
- package/bin/promote-model.cjs +225 -0
- package/bin/propose-debug-invariants.cjs +165 -0
- package/bin/providers.json +392 -0
- package/bin/pty-proxy.py +129 -0
- package/bin/qgsd-solve.cjs +2477 -0
- package/bin/quorum-consensus-gate.cjs +238 -0
- package/bin/quorum-formal-context.cjs +183 -0
- package/bin/quorum-slot-dispatch.cjs +934 -0
- package/bin/read-policy.cjs +60 -0
- package/bin/requirement-map.cjs +63 -0
- package/bin/requirements-core.cjs +247 -0
- package/bin/resolve-cli.cjs +101 -0
- package/bin/review-mcp-logs.cjs +294 -0
- package/bin/run-account-manager-tlc.cjs +188 -0
- package/bin/run-account-pool-alloy.cjs +158 -0
- package/bin/run-alloy.cjs +153 -0
- package/bin/run-audit-alloy.cjs +187 -0
- package/bin/run-breaker-tlc.cjs +181 -0
- package/bin/run-formal-check.cjs +395 -0
- package/bin/run-formal-verify.cjs +701 -0
- package/bin/run-installer-alloy.cjs +188 -0
- package/bin/run-oauth-rotation-prism.cjs +132 -0
- package/bin/run-oscillation-tlc.cjs +202 -0
- package/bin/run-phase-tlc.cjs +228 -0
- package/bin/run-prism.cjs +446 -0
- package/bin/run-protocol-tlc.cjs +201 -0
- package/bin/run-quorum-composition-alloy.cjs +155 -0
- package/bin/run-sensitivity-sweep.cjs +231 -0
- package/bin/run-stop-hook-tlc.cjs +188 -0
- package/bin/run-tlc.cjs +467 -0
- package/bin/run-transcript-alloy.cjs +173 -0
- package/bin/run-uppaal.cjs +264 -0
- package/bin/secrets.cjs +134 -0
- package/bin/sensitivity-report.cjs +219 -0
- package/bin/sensitivity-sweep-feedback.cjs +194 -0
- package/bin/set-secret.cjs +29 -0
- package/bin/setup-telemetry-cron.sh +36 -0
- package/bin/sweepPtoF.cjs +63 -0
- package/bin/sync-baseline-requirements.cjs +290 -0
- package/bin/task-envelope.cjs +360 -0
- package/bin/telemetry-collector.cjs +229 -0
- package/bin/unified-mcp-server.mjs +735 -0
- package/bin/update-agents.cjs +369 -0
- package/bin/update-scoreboard.cjs +1134 -0
- package/bin/validate-debt-entry.cjs +207 -0
- package/bin/validate-invariant.cjs +419 -0
- package/bin/validate-memory.cjs +389 -0
- package/bin/validate-requirements-haiku.cjs +435 -0
- package/bin/validate-traces.cjs +438 -0
- package/bin/verify-formal-results.cjs +124 -0
- package/bin/verify-quorum-health.cjs +273 -0
- package/bin/write-check-result.cjs +106 -0
- package/bin/xstate-to-tla.cjs +483 -0
- package/bin/xstate-trace-walker.cjs +205 -0
- package/commands/qgsd/add-phase.md +43 -0
- package/commands/qgsd/add-requirement.md +24 -0
- package/commands/qgsd/add-todo.md +47 -0
- package/commands/qgsd/audit-milestone.md +37 -0
- package/commands/qgsd/check-todos.md +45 -0
- package/commands/qgsd/cleanup.md +18 -0
- package/commands/qgsd/close-formal-gaps.md +33 -0
- package/commands/qgsd/complete-milestone.md +136 -0
- package/commands/qgsd/debug.md +166 -0
- package/commands/qgsd/discuss-phase.md +83 -0
- package/commands/qgsd/execute-phase.md +117 -0
- package/commands/qgsd/fix-tests.md +27 -0
- package/commands/qgsd/formal-test-sync.md +32 -0
- package/commands/qgsd/health.md +22 -0
- package/commands/qgsd/help.md +22 -0
- package/commands/qgsd/insert-phase.md +32 -0
- package/commands/qgsd/join-discord.md +18 -0
- package/commands/qgsd/list-phase-assumptions.md +46 -0
- package/commands/qgsd/map-codebase.md +71 -0
- package/commands/qgsd/map-requirements.md +20 -0
- package/commands/qgsd/mcp-restart.md +176 -0
- package/commands/qgsd/mcp-set-model.md +134 -0
- package/commands/qgsd/mcp-setup.md +1371 -0
- package/commands/qgsd/mcp-status.md +274 -0
- package/commands/qgsd/mcp-update.md +238 -0
- package/commands/qgsd/new-milestone.md +44 -0
- package/commands/qgsd/new-project.md +42 -0
- package/commands/qgsd/observe.md +260 -0
- package/commands/qgsd/pause-work.md +38 -0
- package/commands/qgsd/plan-milestone-gaps.md +34 -0
- package/commands/qgsd/plan-phase.md +44 -0
- package/commands/qgsd/polyrepo.md +50 -0
- package/commands/qgsd/progress.md +24 -0
- package/commands/qgsd/queue.md +54 -0
- package/commands/qgsd/quick.md +133 -0
- package/commands/qgsd/quorum-test.md +275 -0
- package/commands/qgsd/quorum.md +707 -0
- package/commands/qgsd/reapply-patches.md +110 -0
- package/commands/qgsd/remove-phase.md +31 -0
- package/commands/qgsd/research-phase.md +189 -0
- package/commands/qgsd/resume-work.md +40 -0
- package/commands/qgsd/set-profile.md +34 -0
- package/commands/qgsd/settings.md +39 -0
- package/commands/qgsd/solve.md +565 -0
- package/commands/qgsd/sync-baselines.md +119 -0
- package/commands/qgsd/triage.md +233 -0
- package/commands/qgsd/update.md +37 -0
- package/commands/qgsd/verify-work.md +38 -0
- package/hooks/dist/config-loader.js +297 -0
- package/hooks/dist/conformance-schema.cjs +12 -0
- package/hooks/dist/gsd-context-monitor.js +64 -0
- package/hooks/dist/qgsd-check-update.js +62 -0
- package/hooks/dist/qgsd-circuit-breaker.js +682 -0
- package/hooks/dist/qgsd-precompact.js +156 -0
- package/hooks/dist/qgsd-prompt.js +653 -0
- package/hooks/dist/qgsd-session-start.js +122 -0
- package/hooks/dist/qgsd-slot-correlator.js +58 -0
- package/hooks/dist/qgsd-spec-regen.js +86 -0
- package/hooks/dist/qgsd-statusline.js +91 -0
- package/hooks/dist/qgsd-stop.js +553 -0
- package/hooks/dist/qgsd-token-collector.js +133 -0
- package/hooks/dist/unified-mcp-server.mjs +669 -0
- package/package.json +95 -0
- package/scripts/build-hooks.js +46 -0
- package/scripts/postinstall.js +48 -0
- package/scripts/secret-audit.sh +45 -0
- package/templates/qgsd.json +49 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
// bin/check-trace-schema-drift.cjs
|
|
4
|
+
// Detects when .planning/formal/trace/trace.schema.json is modified without co-modifying
|
|
5
|
+
// bin/validate-traces.cjs and at least one trace emitter file in the same commit.
|
|
6
|
+
//
|
|
7
|
+
// Exit code 0: no schema change, or schema changed atomically
|
|
8
|
+
// Exit code 1: schema drift detected (validator or emitter not updated)
|
|
9
|
+
|
|
10
|
+
const { execFileSync } = require('child_process');
|
|
11
|
+
const { writeCheckResult } = require('./write-check-result.cjs');
|
|
12
|
+
const { getRequirementIds } = require('./requirement-map.cjs');
|
|
13
|
+
|
|
14
|
+
const SCHEMA_FILE = '.planning/formal/trace/trace.schema.json';
|
|
15
|
+
const VALIDATOR_FILE = 'bin/validate-traces.cjs';
|
|
16
|
+
|
|
17
|
+
const KNOWN_EMITTERS = [
|
|
18
|
+
'bin/validate-traces.cjs',
|
|
19
|
+
'hooks/qgsd-stop.js',
|
|
20
|
+
'hooks/qgsd-prompt.js',
|
|
21
|
+
'hooks/dist/qgsd-stop.js',
|
|
22
|
+
'hooks/dist/qgsd-prompt.js',
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Check whether the list of changed files represents a schema drift situation.
|
|
27
|
+
*
|
|
28
|
+
* @param {string[]} changedFiles List of file paths changed in the current commit
|
|
29
|
+
* @returns {{ status: 'pass'|'fail', reason: string, [key: string]: any }}
|
|
30
|
+
*/
|
|
31
|
+
function checkSchemaDrift(changedFiles) {
|
|
32
|
+
const hasSchemaChange = changedFiles.some(f =>
|
|
33
|
+
f === SCHEMA_FILE || f.endsWith('/' + SCHEMA_FILE) || f.includes('trace.schema.json')
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
if (!hasSchemaChange) {
|
|
37
|
+
return { status: 'pass', reason: 'no-schema-change' };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const validatorUpdated = changedFiles.some(f =>
|
|
41
|
+
f === VALIDATOR_FILE || f.includes(VALIDATOR_FILE)
|
|
42
|
+
);
|
|
43
|
+
// Emitter check: at least one KNOWN_EMITTER changed (can include validate-traces.cjs,
|
|
44
|
+
// but validator_updated alone does not satisfy emitter_updated — need a non-validator emitter
|
|
45
|
+
// OR validate-traces.cjs satisfies both when it IS the emitter.
|
|
46
|
+
// Per spec: atomic requires validator AND an emitter. validate-traces.cjs counts as emitter only
|
|
47
|
+
// when a separate hook file (qgsd-stop.js, qgsd-prompt.js, etc.) is also present.
|
|
48
|
+
const NON_VALIDATOR_EMITTERS = KNOWN_EMITTERS.filter(e => e !== VALIDATOR_FILE);
|
|
49
|
+
const emitterUpdated = changedFiles.some(f =>
|
|
50
|
+
NON_VALIDATOR_EMITTERS.some(emitter => f === emitter || f.includes(emitter))
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
if (validatorUpdated && emitterUpdated) {
|
|
54
|
+
return { status: 'pass', reason: 'schema-change-atomic', files: changedFiles.length };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return {
|
|
58
|
+
status: 'fail',
|
|
59
|
+
reason: 'schema-drift-detected',
|
|
60
|
+
schema_changed: true,
|
|
61
|
+
validator_updated: validatorUpdated,
|
|
62
|
+
emitter_updated: emitterUpdated,
|
|
63
|
+
changed_files: changedFiles,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (require.main === module) {
|
|
68
|
+
const _startMs = Date.now();
|
|
69
|
+
try {
|
|
70
|
+
const raw = execFileSync('git', ['diff', '--name-only', 'HEAD~1'], { encoding: 'utf8' });
|
|
71
|
+
const changedFiles = raw.split('\n').filter(f => f.trim().length > 0);
|
|
72
|
+
const result = checkSchemaDrift(changedFiles);
|
|
73
|
+
const _runtimeMs = Date.now() - _startMs;
|
|
74
|
+
try {
|
|
75
|
+
writeCheckResult({
|
|
76
|
+
tool: 'check-trace-schema-drift',
|
|
77
|
+
formalism: 'trace',
|
|
78
|
+
result: result.status,
|
|
79
|
+
check_id: 'ci:trace-schema-drift', surface: 'ci', property: 'Trace schema drift — no non-atomic conformance schema changes between commits',
|
|
80
|
+
runtime_ms: _runtimeMs, summary: (result.status === 'pass' ? 'pass' : 'fail') + ': ci:trace-schema-drift in ' + _runtimeMs + 'ms', triage_tags: [],
|
|
81
|
+
requirement_ids: getRequirementIds('ci:trace-schema-drift'),
|
|
82
|
+
metadata: result,
|
|
83
|
+
});
|
|
84
|
+
} catch (e) {
|
|
85
|
+
process.stderr.write('[check-trace-schema-drift] Warning: failed to write check result: ' + e.message + '\n');
|
|
86
|
+
}
|
|
87
|
+
process.exit(result.status === 'pass' ? 0 : 1);
|
|
88
|
+
} catch (err) {
|
|
89
|
+
const _runtimeMs = Date.now() - _startMs;
|
|
90
|
+
const meta = { reason: 'git-error', error: err.message };
|
|
91
|
+
try {
|
|
92
|
+
writeCheckResult({ tool: 'check-trace-schema-drift', formalism: 'trace', result: 'fail', check_id: 'ci:trace-schema-drift', surface: 'ci', property: 'Trace schema drift — no non-atomic conformance schema changes between commits', runtime_ms: _runtimeMs, summary: 'fail: ci:trace-schema-drift in ' + _runtimeMs + 'ms', triage_tags: [], requirement_ids: getRequirementIds('ci:trace-schema-drift'), metadata: meta });
|
|
93
|
+
} catch (_) { /* swallow */ }
|
|
94
|
+
process.stderr.write('[check-trace-schema-drift] git error: ' + err.message + '\n');
|
|
95
|
+
process.exit(1);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
module.exports = { checkSchemaDrift, KNOWN_EMITTERS };
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Drift comparison helper for P->F residual layer
|
|
3
|
+
* Compares a debt entry's production measurement against a formal expected value
|
|
4
|
+
* Fail-open: returns false when either value is null/missing
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
'use strict';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Compare production measurement against formal expected value
|
|
11
|
+
* @param {object} entry - Debt entry with meta.measured_value
|
|
12
|
+
* @param {*} formalExpected - Expected value from formal model
|
|
13
|
+
* @returns {boolean} true if measured differs from expected (divergent), false otherwise
|
|
14
|
+
*/
|
|
15
|
+
function compareDrift(entry, formalExpected) {
|
|
16
|
+
const measured = entry?.meta?.measured_value;
|
|
17
|
+
if (measured == null || formalExpected == null) return false;
|
|
18
|
+
return measured !== formalExpected;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
module.exports = { compareDrift };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
// bin/conformance-schema.cjs
|
|
3
|
+
// Single source of truth for conformance event field enumerations.
|
|
4
|
+
// Imported by hooks (qgsd-stop.js, qgsd-prompt.js, qgsd-circuit-breaker.js) and validate-traces.cjs.
|
|
5
|
+
// NEVER add external require() calls — hooks have zero runtime dependencies.
|
|
6
|
+
|
|
7
|
+
const VALID_ACTIONS = ['quorum_start', 'quorum_complete', 'quorum_block', 'deliberation_round', 'circuit_break'];
|
|
8
|
+
const VALID_PHASES = ['IDLE', 'COLLECTING_VOTES', 'DELIBERATING', 'DECIDED'];
|
|
9
|
+
const VALID_OUTCOMES = ['APPROVE', 'BLOCK', 'UNAVAILABLE', 'DELIBERATE'];
|
|
10
|
+
const schema_version = '1';
|
|
11
|
+
|
|
12
|
+
module.exports = { VALID_ACTIONS, VALID_PHASES, VALID_OUTCOMES, schema_version };
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
// bin/count-scenarios.cjs
|
|
4
|
+
// Counts scenarios (instance upper-bound) for every formal model across all frameworks.
|
|
5
|
+
// Outputs a sorted table: biggest models first, so you know what to split.
|
|
6
|
+
//
|
|
7
|
+
// Frameworks: Alloy (.als), TLA+ (.tla via state-space-report.json), UPPAAL (.xml)
|
|
8
|
+
//
|
|
9
|
+
// Usage:
|
|
10
|
+
// node bin/count-scenarios.cjs # table to stdout
|
|
11
|
+
// node bin/count-scenarios.cjs --json # JSON to stdout
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
|
|
16
|
+
const ROOT = process.cwd();
|
|
17
|
+
const ALLOY_DIR = path.join(ROOT, '.planning', 'formal', 'alloy');
|
|
18
|
+
const TLA_REPORT = path.join(ROOT, '.planning', 'formal', 'state-space-report.json');
|
|
19
|
+
const UPPAAL_DIR = path.join(ROOT, '.planning', 'formal', 'uppaal');
|
|
20
|
+
|
|
21
|
+
const jsonMode = process.argv.includes('--json');
|
|
22
|
+
|
|
23
|
+
// ── Alloy parser ─────────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
function parseAlloySigs(content) {
|
|
26
|
+
const sigs = [];
|
|
27
|
+
const sigRegex = /\b(abstract\s+)?(one\s+|lone\s+)?sig\s+(\w+(?:\s*,\s*\w+)*)\s*(?:extends\s+(\w+)\s*)?\{([^}]*)\}/g;
|
|
28
|
+
let match;
|
|
29
|
+
while ((match = sigRegex.exec(content)) !== null) {
|
|
30
|
+
const isAbstract = !!match[1];
|
|
31
|
+
const mult = (match[2] || '').trim();
|
|
32
|
+
const nameStr = match[3];
|
|
33
|
+
const parent = match[4] || null;
|
|
34
|
+
const body = match[5];
|
|
35
|
+
|
|
36
|
+
const names = nameStr.split(',').map(function(n) { return n.trim(); }).filter(Boolean);
|
|
37
|
+
for (const name of names) {
|
|
38
|
+
const fields = parseFields(body);
|
|
39
|
+
sigs.push({ name, isAbstract, mult, parent, fields });
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return sigs;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function parseFields(body) {
|
|
46
|
+
const fields = [];
|
|
47
|
+
const lines = body.split(',');
|
|
48
|
+
for (const line of lines) {
|
|
49
|
+
const trimmed = line.replace(/--.*$/, '').trim();
|
|
50
|
+
if (!trimmed) continue;
|
|
51
|
+
const fieldMatch = trimmed.match(/^(\w+)\s*:\s*(one\s+|set\s+|lone\s+|seq\s+)?(.+)$/);
|
|
52
|
+
if (fieldMatch) {
|
|
53
|
+
fields.push({
|
|
54
|
+
name: fieldMatch[1],
|
|
55
|
+
mult: (fieldMatch[2] || 'one').trim(),
|
|
56
|
+
type: fieldMatch[3].trim(),
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return fields;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function parseAlloyCommands(content) {
|
|
64
|
+
const commands = [];
|
|
65
|
+
const cmdRegex = /\b(run|check)\s+(?:(\w+)\s*)?(?:\{[^}]*\}\s*)?for\s+(.+)/g;
|
|
66
|
+
let match;
|
|
67
|
+
while ((match = cmdRegex.exec(content)) !== null) {
|
|
68
|
+
const type = match[1];
|
|
69
|
+
const name = match[2] || '(anonymous)';
|
|
70
|
+
const scopeStr = match[3].trim();
|
|
71
|
+
const scope = parseScopeStr(scopeStr);
|
|
72
|
+
commands.push({ type, name, scopeStr, scope });
|
|
73
|
+
}
|
|
74
|
+
return commands;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function parseScopeStr(scopeStr) {
|
|
78
|
+
var scope = {};
|
|
79
|
+
var defaultScope = null;
|
|
80
|
+
|
|
81
|
+
var clean = scopeStr.replace(/--.*$/, '').trim();
|
|
82
|
+
var butMatch = clean.match(/^(\d+)\s+but\s+(.+)$/);
|
|
83
|
+
var entries = butMatch ? butMatch[2] : clean;
|
|
84
|
+
if (butMatch) {
|
|
85
|
+
defaultScope = parseInt(butMatch[1], 10);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
var parts = entries.split(',');
|
|
89
|
+
for (var i = 0; i < parts.length; i++) {
|
|
90
|
+
var trimmed = parts[i].trim();
|
|
91
|
+
var m = trimmed.match(/^(\d+)\s+(\w+)$/);
|
|
92
|
+
if (m) {
|
|
93
|
+
scope[m[2]] = parseInt(m[1], 10);
|
|
94
|
+
} else {
|
|
95
|
+
var bareNum = trimmed.match(/^(\d+)$/);
|
|
96
|
+
if (bareNum && defaultScope === null) {
|
|
97
|
+
defaultScope = parseInt(bareNum[1], 10);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (defaultScope !== null) {
|
|
103
|
+
scope._default = defaultScope;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return scope;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function getSigScope(sigName, scope, sigs) {
|
|
110
|
+
if (scope[sigName] !== undefined) return scope[sigName];
|
|
111
|
+
|
|
112
|
+
var sig = sigs.find(function(s) { return s.name === sigName; });
|
|
113
|
+
if (sig && sig.mult === 'one') return 1;
|
|
114
|
+
if (sig && sig.mult === 'lone') return 1;
|
|
115
|
+
|
|
116
|
+
if (scope._default !== undefined) return scope._default;
|
|
117
|
+
return 3;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function getIntBits(scope) {
|
|
121
|
+
return scope['int'] || scope['Int'] || 4;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function estimateAlloyScenarios(sigs, cmd) {
|
|
125
|
+
var scope = cmd.scope;
|
|
126
|
+
var intBits = getIntBits(scope);
|
|
127
|
+
var intRange = Math.pow(2, intBits);
|
|
128
|
+
|
|
129
|
+
var sigAtoms = {};
|
|
130
|
+
for (var i = 0; i < sigs.length; i++) {
|
|
131
|
+
var sig = sigs[i];
|
|
132
|
+
if (sig.isAbstract && !sig.mult) {
|
|
133
|
+
sigAtoms[sig.name] = 0;
|
|
134
|
+
} else {
|
|
135
|
+
sigAtoms[sig.name] = getSigScope(sig.name, scope, sigs);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Abstract sigs: count = sum of children
|
|
140
|
+
for (var i = 0; i < sigs.length; i++) {
|
|
141
|
+
var sig = sigs[i];
|
|
142
|
+
if (sig.isAbstract && !sig.mult) {
|
|
143
|
+
var children = sigs.filter(function(s) { return s.parent === sig.name; });
|
|
144
|
+
var sum = 0;
|
|
145
|
+
for (var j = 0; j < children.length; j++) {
|
|
146
|
+
sum += (sigAtoms[children[j].name] || 0);
|
|
147
|
+
}
|
|
148
|
+
sigAtoms[sig.name] = sum;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
var totalScenarios = 1n;
|
|
153
|
+
|
|
154
|
+
for (var i = 0; i < sigs.length; i++) {
|
|
155
|
+
var sig = sigs[i];
|
|
156
|
+
var atomCount = sigAtoms[sig.name] || 0;
|
|
157
|
+
if (atomCount === 0) continue;
|
|
158
|
+
|
|
159
|
+
for (var k = 0; k < sig.fields.length; k++) {
|
|
160
|
+
var field = sig.fields[k];
|
|
161
|
+
var targetSize;
|
|
162
|
+
|
|
163
|
+
if (field.type === 'Int' || field.type === 'int') {
|
|
164
|
+
targetSize = intRange;
|
|
165
|
+
} else if (field.type === 'Bool' || field.type === 'BOOLEAN') {
|
|
166
|
+
targetSize = 2;
|
|
167
|
+
} else if (sigAtoms[field.type] !== undefined) {
|
|
168
|
+
targetSize = sigAtoms[field.type];
|
|
169
|
+
} else {
|
|
170
|
+
targetSize = scope._default || 3;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (targetSize <= 0) targetSize = 1;
|
|
174
|
+
|
|
175
|
+
var fieldScenarios;
|
|
176
|
+
if (field.mult === 'one') {
|
|
177
|
+
fieldScenarios = BigInt(targetSize) ** BigInt(atomCount);
|
|
178
|
+
} else if (field.mult === 'lone') {
|
|
179
|
+
fieldScenarios = BigInt(targetSize + 1) ** BigInt(atomCount);
|
|
180
|
+
} else if (field.mult === 'set') {
|
|
181
|
+
fieldScenarios = (2n ** BigInt(targetSize)) ** BigInt(atomCount);
|
|
182
|
+
} else if (field.mult === 'seq') {
|
|
183
|
+
fieldScenarios = BigInt(targetSize) ** BigInt(atomCount * 3);
|
|
184
|
+
} else {
|
|
185
|
+
fieldScenarios = BigInt(targetSize) ** BigInt(atomCount);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
totalScenarios *= fieldScenarios;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return totalScenarios;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function analyzeAlloyModel(filePath) {
|
|
196
|
+
var content = fs.readFileSync(filePath, 'utf8');
|
|
197
|
+
var sigs = parseAlloySigs(content);
|
|
198
|
+
var commands = parseAlloyCommands(content);
|
|
199
|
+
|
|
200
|
+
var results = [];
|
|
201
|
+
for (var i = 0; i < commands.length; i++) {
|
|
202
|
+
var scenarios = estimateAlloyScenarios(sigs, commands[i]);
|
|
203
|
+
results.push({
|
|
204
|
+
command: commands[i].type + ' ' + commands[i].name,
|
|
205
|
+
scope: commands[i].scopeStr,
|
|
206
|
+
scenarios: scenarios,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
var maxScenarios = 0n;
|
|
211
|
+
var maxCmd = null;
|
|
212
|
+
for (var i = 0; i < results.length; i++) {
|
|
213
|
+
if (results[i].scenarios > maxScenarios) {
|
|
214
|
+
maxScenarios = results[i].scenarios;
|
|
215
|
+
maxCmd = results[i];
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
commands: results,
|
|
221
|
+
sigCount: sigs.length,
|
|
222
|
+
fieldCount: sigs.reduce(function(s, sig) { return s + sig.fields.length; }, 0),
|
|
223
|
+
maxScenarios: maxScenarios,
|
|
224
|
+
maxCommand: maxCmd ? maxCmd.command : null,
|
|
225
|
+
maxScope: maxCmd ? maxCmd.scope : null,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// ── Main ─────────────────────────────────────────────────────────────────────
|
|
230
|
+
|
|
231
|
+
function formatBigInt(n) {
|
|
232
|
+
if (n === null || n === undefined) return '?';
|
|
233
|
+
var s = n.toString();
|
|
234
|
+
if (s.length <= 6) return s;
|
|
235
|
+
var exp = s.length - 1;
|
|
236
|
+
return s[0] + '.' + s.substring(1, 3) + 'e' + exp;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function riskLevel(n) {
|
|
240
|
+
if (n === null || n === undefined) return 'UNKNOWN';
|
|
241
|
+
if (n <= 1000n) return 'MINIMAL';
|
|
242
|
+
if (n <= 100000n) return 'LOW';
|
|
243
|
+
if (n <= 10000000n) return 'MODERATE';
|
|
244
|
+
return 'HIGH';
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function main() {
|
|
248
|
+
var rows = [];
|
|
249
|
+
|
|
250
|
+
// ── Alloy models ──
|
|
251
|
+
if (fs.existsSync(ALLOY_DIR)) {
|
|
252
|
+
var files = fs.readdirSync(ALLOY_DIR).filter(function(f) { return f.endsWith('.als'); }).sort();
|
|
253
|
+
for (var i = 0; i < files.length; i++) {
|
|
254
|
+
try {
|
|
255
|
+
var analysis = analyzeAlloyModel(path.join(ALLOY_DIR, files[i]));
|
|
256
|
+
rows.push({
|
|
257
|
+
framework: 'Alloy',
|
|
258
|
+
model: files[i].replace('.als', ''),
|
|
259
|
+
scenarios: analysis.maxScenarios,
|
|
260
|
+
risk: riskLevel(analysis.maxScenarios),
|
|
261
|
+
sigs: analysis.sigCount,
|
|
262
|
+
fields: analysis.fieldCount,
|
|
263
|
+
commands: analysis.commands.length,
|
|
264
|
+
detail: analysis.maxScope || '',
|
|
265
|
+
});
|
|
266
|
+
} catch (err) {
|
|
267
|
+
rows.push({
|
|
268
|
+
framework: 'Alloy',
|
|
269
|
+
model: files[i].replace('.als', ''),
|
|
270
|
+
scenarios: null,
|
|
271
|
+
risk: 'ERROR',
|
|
272
|
+
sigs: 0,
|
|
273
|
+
fields: 0,
|
|
274
|
+
commands: 0,
|
|
275
|
+
detail: err.message,
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ── TLA+ models (from existing report) ──
|
|
282
|
+
if (fs.existsSync(TLA_REPORT)) {
|
|
283
|
+
var report = JSON.parse(fs.readFileSync(TLA_REPORT, 'utf8'));
|
|
284
|
+
var entries = Object.entries(report.models || {});
|
|
285
|
+
for (var i = 0; i < entries.length; i++) {
|
|
286
|
+
var modelPath = entries[i][0];
|
|
287
|
+
var data = entries[i][1];
|
|
288
|
+
var name = data.module_name || path.basename(modelPath, '.tla');
|
|
289
|
+
if (name.includes('_TTrace_')) continue;
|
|
290
|
+
var states = data.estimated_states;
|
|
291
|
+
var scenariosBig = states !== null ? BigInt(states) : null;
|
|
292
|
+
rows.push({
|
|
293
|
+
framework: 'TLA+',
|
|
294
|
+
model: name,
|
|
295
|
+
scenarios: scenariosBig,
|
|
296
|
+
risk: states !== null ? riskLevel(scenariosBig) : (data.has_unbounded ? 'HIGH' : 'UNKNOWN'),
|
|
297
|
+
sigs: data.variables ? data.variables.length : 0,
|
|
298
|
+
fields: 0,
|
|
299
|
+
commands: (data.invariant_count || 0) + (data.property_count || 0),
|
|
300
|
+
detail: data.has_unbounded ? 'UNBOUNDED' : (states !== null ? states + ' states' : 'unresolvable'),
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// ── UPPAAL models ──
|
|
306
|
+
if (fs.existsSync(UPPAAL_DIR)) {
|
|
307
|
+
var uFiles = fs.readdirSync(UPPAAL_DIR).filter(function(f) { return f.endsWith('.xml'); });
|
|
308
|
+
for (var i = 0; i < uFiles.length; i++) {
|
|
309
|
+
var content = fs.readFileSync(path.join(UPPAAL_DIR, uFiles[i]), 'utf8');
|
|
310
|
+
var templateCount = (content.match(/<template>/g) || []).length;
|
|
311
|
+
var locationCount = (content.match(/<location /g) || []).length;
|
|
312
|
+
var clockCount = (content.match(/clock\s+\w/g) || []).length;
|
|
313
|
+
rows.push({
|
|
314
|
+
framework: 'UPPAAL',
|
|
315
|
+
model: uFiles[i].replace('.xml', ''),
|
|
316
|
+
scenarios: null,
|
|
317
|
+
risk: 'TIMED',
|
|
318
|
+
sigs: templateCount,
|
|
319
|
+
fields: locationCount,
|
|
320
|
+
commands: clockCount,
|
|
321
|
+
detail: templateCount + ' automata, ' + locationCount + ' locations, ' + clockCount + ' clocks',
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Sort: HIGH/biggest first
|
|
327
|
+
var riskOrder = { HIGH: 0, UNKNOWN: 1, ERROR: 1, TIMED: 1, MODERATE: 2, LOW: 3, MINIMAL: 4 };
|
|
328
|
+
rows.sort(function(a, b) {
|
|
329
|
+
var ra = riskOrder[a.risk] !== undefined ? riskOrder[a.risk] : 2;
|
|
330
|
+
var rb = riskOrder[b.risk] !== undefined ? riskOrder[b.risk] : 2;
|
|
331
|
+
if (ra !== rb) return ra - rb;
|
|
332
|
+
var sa = a.scenarios || 0n;
|
|
333
|
+
var sb = b.scenarios || 0n;
|
|
334
|
+
if (sa > sb) return -1;
|
|
335
|
+
if (sa < sb) return 1;
|
|
336
|
+
return 0;
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
if (jsonMode) {
|
|
340
|
+
var jsonRows = rows.map(function(r) {
|
|
341
|
+
return Object.assign({}, r, {
|
|
342
|
+
scenarios: r.scenarios !== null ? r.scenarios.toString() : null,
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
process.stdout.write(JSON.stringify({ models: jsonRows, total: rows.length }, null, 2) + '\n');
|
|
346
|
+
return;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// ── Table output ──
|
|
350
|
+
var header = ['Risk', 'Framework', 'Model', 'Scenarios', 'Sigs', 'Fields', 'Cmds', 'Detail'];
|
|
351
|
+
var colWidths = header.map(function(h, idx) {
|
|
352
|
+
var maxData = 0;
|
|
353
|
+
for (var j = 0; j < rows.length; j++) {
|
|
354
|
+
var r = rows[j];
|
|
355
|
+
var vals = [r.risk, r.framework, r.model, formatBigInt(r.scenarios), String(r.sigs), String(r.fields), String(r.commands), r.detail];
|
|
356
|
+
maxData = Math.max(maxData, String(vals[idx]).length);
|
|
357
|
+
}
|
|
358
|
+
return Math.max(h.length, maxData);
|
|
359
|
+
});
|
|
360
|
+
colWidths[7] = Math.min(colWidths[7], 45);
|
|
361
|
+
|
|
362
|
+
var sep = colWidths.map(function(w) { return '-'.repeat(w + 2); }).join('+');
|
|
363
|
+
|
|
364
|
+
function formatRow(vals) {
|
|
365
|
+
return vals.map(function(v, i) {
|
|
366
|
+
var s = String(v).substring(0, colWidths[i] + 2);
|
|
367
|
+
return (' ' + s).padEnd(colWidths[i] + 2);
|
|
368
|
+
}).join('|');
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
console.log(formatRow(header));
|
|
372
|
+
console.log(sep);
|
|
373
|
+
|
|
374
|
+
var prevRisk = null;
|
|
375
|
+
for (var j = 0; j < rows.length; j++) {
|
|
376
|
+
var r = rows[j];
|
|
377
|
+
if (prevRisk !== null && r.risk !== prevRisk) {
|
|
378
|
+
var prevOrder = riskOrder[prevRisk] !== undefined ? riskOrder[prevRisk] : 2;
|
|
379
|
+
var currOrder = riskOrder[r.risk] !== undefined ? riskOrder[r.risk] : 2;
|
|
380
|
+
if (currOrder !== prevOrder) {
|
|
381
|
+
console.log(sep);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
prevRisk = r.risk;
|
|
385
|
+
console.log(formatRow([
|
|
386
|
+
r.risk,
|
|
387
|
+
r.framework,
|
|
388
|
+
r.model,
|
|
389
|
+
formatBigInt(r.scenarios),
|
|
390
|
+
r.sigs,
|
|
391
|
+
r.fields,
|
|
392
|
+
r.commands,
|
|
393
|
+
r.detail,
|
|
394
|
+
]));
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
console.log(sep);
|
|
398
|
+
|
|
399
|
+
// Summary
|
|
400
|
+
var byRisk = {};
|
|
401
|
+
var byFramework = {};
|
|
402
|
+
for (var j = 0; j < rows.length; j++) {
|
|
403
|
+
byRisk[rows[j].risk] = (byRisk[rows[j].risk] || 0) + 1;
|
|
404
|
+
byFramework[rows[j].framework] = (byFramework[rows[j].framework] || 0) + 1;
|
|
405
|
+
}
|
|
406
|
+
console.log('\nTotal: ' + rows.length + ' models');
|
|
407
|
+
console.log('By risk: ' + Object.entries(byRisk).map(function(e) { return e[0] + '=' + e[1]; }).join(' '));
|
|
408
|
+
console.log('By framework: ' + Object.entries(byFramework).map(function(e) { return e[0] + '=' + e[1]; }).join(' '));
|
|
409
|
+
|
|
410
|
+
var splitCandidates = rows.filter(function(r) { return r.risk === 'HIGH' && r.scenarios !== null && r.scenarios > 10000000n; });
|
|
411
|
+
if (splitCandidates.length > 0) {
|
|
412
|
+
console.log('\nSplit candidates (HIGH risk, >10M scenarios):');
|
|
413
|
+
for (var j = 0; j < splitCandidates.length; j++) {
|
|
414
|
+
var r = splitCandidates[j];
|
|
415
|
+
console.log(' ' + r.framework + '/' + r.model + ': ' + formatBigInt(r.scenarios) + ' scenarios');
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
main();
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dedup engine for debt entries
|
|
3
|
+
* Phase 1: Fingerprint exact-match (O(n) via hash map)
|
|
4
|
+
* Phase 2: Levenshtein near-duplicate detection (O(n^2) on remaining unmatched)
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const { levenshteinSimilarity } = require('./levenshtein.cjs');
|
|
8
|
+
|
|
9
|
+
// Status ordering: more advanced state wins during merge
|
|
10
|
+
const STATUS_ORDER = { open: 0, acknowledged: 1, resolving: 2, resolved: 3 };
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Merge two debt entries, preserving the one with higher occurrences as primary.
|
|
14
|
+
* All source_entries are concatenated. Timestamps use min/max. Environments are unioned.
|
|
15
|
+
*
|
|
16
|
+
* @param {object} entryA - First debt entry
|
|
17
|
+
* @param {object} entryB - Second debt entry
|
|
18
|
+
* @returns {object} Merged debt entry
|
|
19
|
+
*/
|
|
20
|
+
function mergeDebtEntries(entryA, entryB) {
|
|
21
|
+
// Determine primary: entry with higher occurrences (if equal, entryA is primary)
|
|
22
|
+
let primary, secondary;
|
|
23
|
+
if ((entryB.occurrences || 1) > (entryA.occurrences || 1)) {
|
|
24
|
+
primary = entryB;
|
|
25
|
+
secondary = entryA;
|
|
26
|
+
} else {
|
|
27
|
+
primary = entryA;
|
|
28
|
+
secondary = entryB;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// More advanced status wins
|
|
32
|
+
const statusA = STATUS_ORDER[primary.status] ?? 0;
|
|
33
|
+
const statusB = STATUS_ORDER[secondary.status] ?? 0;
|
|
34
|
+
const mergedStatus = statusA >= statusB ? primary.status : secondary.status;
|
|
35
|
+
|
|
36
|
+
return {
|
|
37
|
+
id: primary.id,
|
|
38
|
+
fingerprint: primary.fingerprint,
|
|
39
|
+
title: primary.title,
|
|
40
|
+
occurrences: (primary.occurrences || 1) + (secondary.occurrences || 1),
|
|
41
|
+
first_seen: primary.first_seen < secondary.first_seen ? primary.first_seen : secondary.first_seen,
|
|
42
|
+
last_seen: primary.last_seen > secondary.last_seen ? primary.last_seen : secondary.last_seen,
|
|
43
|
+
environments: [...new Set([...(primary.environments || []), ...(secondary.environments || [])])],
|
|
44
|
+
status: mergedStatus,
|
|
45
|
+
formal_ref: primary.formal_ref != null ? primary.formal_ref : secondary.formal_ref || null,
|
|
46
|
+
formal_ref_source: primary.formal_ref_source != null ? primary.formal_ref_source : secondary.formal_ref_source || null,
|
|
47
|
+
source_entries: [...(primary.source_entries || []), ...(secondary.source_entries || [])],
|
|
48
|
+
...(primary.resolved_at || secondary.resolved_at
|
|
49
|
+
? { resolved_at: primary.resolved_at || secondary.resolved_at }
|
|
50
|
+
: {})
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Deduplicate debt entries using two-phase strategy:
|
|
56
|
+
* 1. Fingerprint exact-match (fast, O(n))
|
|
57
|
+
* 2. Levenshtein near-duplicate on remaining (O(n^2) on smaller set)
|
|
58
|
+
*
|
|
59
|
+
* @param {object[]} entries - Array of debt entries
|
|
60
|
+
* @param {object} [options] - Options
|
|
61
|
+
* @param {number} [options.threshold=0.85] - Levenshtein similarity threshold for near-duplicate merge
|
|
62
|
+
* @returns {{ entries: object[], mergeCount: number, mergeLog: object[] }}
|
|
63
|
+
*/
|
|
64
|
+
function deduplicateEntries(entries, options = {}) {
|
|
65
|
+
const threshold = options.threshold ?? 0.85;
|
|
66
|
+
const mergeLog = [];
|
|
67
|
+
let mergeCount = 0;
|
|
68
|
+
|
|
69
|
+
if (entries.length <= 1) {
|
|
70
|
+
return { entries: [...entries], mergeCount: 0, mergeLog: [] };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Phase 1: Fingerprint exact-match
|
|
74
|
+
const fpGroups = new Map();
|
|
75
|
+
for (const entry of entries) {
|
|
76
|
+
const fp = entry.fingerprint;
|
|
77
|
+
if (!fpGroups.has(fp)) {
|
|
78
|
+
fpGroups.set(fp, []);
|
|
79
|
+
}
|
|
80
|
+
fpGroups.get(fp).push(entry);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Merge fingerprint groups
|
|
84
|
+
const afterPhase1 = [];
|
|
85
|
+
for (const [fp, group] of fpGroups) {
|
|
86
|
+
if (group.length === 1) {
|
|
87
|
+
afterPhase1.push(group[0]);
|
|
88
|
+
} else {
|
|
89
|
+
// Merge all entries in the group
|
|
90
|
+
let merged = group[0];
|
|
91
|
+
for (let i = 1; i < group.length; i++) {
|
|
92
|
+
mergeLog.push({
|
|
93
|
+
primary_id: merged.id,
|
|
94
|
+
secondary_id: group[i].id,
|
|
95
|
+
merge_type: 'fingerprint'
|
|
96
|
+
});
|
|
97
|
+
merged = mergeDebtEntries(merged, group[i]);
|
|
98
|
+
mergeCount++;
|
|
99
|
+
}
|
|
100
|
+
afterPhase1.push(merged);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Phase 2: Levenshtein near-duplicate on remaining
|
|
105
|
+
if (afterPhase1.length <= 1) {
|
|
106
|
+
return { entries: afterPhase1, mergeCount, mergeLog };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const merged = new Set(); // indices that have been merged into another
|
|
110
|
+
const result = [];
|
|
111
|
+
|
|
112
|
+
for (let i = 0; i < afterPhase1.length; i++) {
|
|
113
|
+
if (merged.has(i)) continue;
|
|
114
|
+
|
|
115
|
+
let current = afterPhase1[i];
|
|
116
|
+
|
|
117
|
+
for (let j = i + 1; j < afterPhase1.length; j++) {
|
|
118
|
+
if (merged.has(j)) continue;
|
|
119
|
+
|
|
120
|
+
const sim = levenshteinSimilarity(
|
|
121
|
+
current.title.toLowerCase(),
|
|
122
|
+
afterPhase1[j].title.toLowerCase()
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
if (sim >= threshold) {
|
|
126
|
+
mergeLog.push({
|
|
127
|
+
primary_id: current.id,
|
|
128
|
+
secondary_id: afterPhase1[j].id,
|
|
129
|
+
merge_type: 'levenshtein',
|
|
130
|
+
similarity: sim
|
|
131
|
+
});
|
|
132
|
+
current = mergeDebtEntries(current, afterPhase1[j]);
|
|
133
|
+
merged.add(j);
|
|
134
|
+
mergeCount++;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
result.push(current);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return { entries: result, mergeCount, mergeLog };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
module.exports = { deduplicateEntries, mergeDebtEntries };
|