@kontourai/flow-agents 0.4.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/kit-gates-demo.yml +171 -0
- package/CHANGELOG.md +43 -0
- package/CONTEXT.md +1 -1
- package/README.md +13 -2
- package/build/src/cli/flow-kit.js +175 -6
- package/build/src/cli/validate-source-tree.js +19 -2
- package/build/src/flow-kit/validate.js +98 -0
- package/build/src/runtime-adapters.js +1 -1
- package/build/src/tools/validate-source-tree.js +3 -2
- package/context/scripts/hooks/config-protection.js +217 -15
- package/docs/fixture-ownership.md +2 -1
- package/docs/index.md +9 -1
- package/docs/kit-authoring-guide.md +126 -0
- package/docs/knowledge-kit.md +69 -0
- package/docs/vision.md +22 -0
- package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k0-flows-only/kit.json +13 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/docs/README.md +3 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/kit.json +20 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/docs/README.md +3 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/eval-suites/contract-suite/suite.test.js +1 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/kit.json +27 -0
- package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/third-party-extension/kit.json +19 -0
- package/evals/integration/test_activate_npx_context.sh +134 -0
- package/evals/integration/test_fixture_retirement_audit.sh +2 -2
- package/evals/integration/test_flow_kit_install_git.sh +163 -0
- package/evals/integration/test_hook_category_behaviors.sh +51 -0
- package/evals/integration/test_kit_conformance_levels.sh +209 -0
- package/evals/run.sh +2 -0
- package/kits/catalog.json +6 -0
- package/kits/knowledge/adapters/default-store/index.js +2 -2
- package/kits/knowledge/adapters/flow-runner/entity-extractor.js +194 -0
- package/kits/knowledge/adapters/flow-runner/index.js +349 -0
- package/kits/knowledge/adapters/obsidian-store/README.md +141 -0
- package/kits/knowledge/adapters/obsidian-store/demo.js +181 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +868 -0
- package/kits/knowledge/adapters/shared/codec.js +325 -0
- package/kits/knowledge/docs/store-contract.md +72 -0
- package/kits/knowledge/evals/entities/demo-acme.js +125 -0
- package/kits/knowledge/evals/entities/suite.test.js +722 -0
- package/kits/knowledge/kit.json +10 -0
- package/kits/release-evidence/fixtures/claims/README.md +14 -0
- package/kits/release-evidence/fixtures/claims/fail-rejected-release.trust.json +22 -0
- package/kits/release-evidence/fixtures/claims/pass-trusted-release.trust.json +22 -0
- package/kits/release-evidence/flows/release-evidence.flow.json +38 -0
- package/kits/release-evidence/kit.json +13 -0
- package/package.json +1 -1
- package/packaging/conformance/fixtures/config-protection--allow-no-verify-in-string.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-git-no-verify.json +23 -0
- package/scripts/hooks/config-protection.js +217 -15
- package/src/cli/flow-kit.ts +162 -5
- package/src/cli/validate-source-tree.ts +7 -1
- package/src/flow-kit/validate.ts +127 -0
- package/src/runtime-adapters.ts +1 -1
- package/src/tools/validate-source-tree.ts +3 -2
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_kit_conformance_levels.sh — K-level derivation and degradation invariant tests.
|
|
3
|
+
#
|
|
4
|
+
# Tests three behaviors from issue #52:
|
|
5
|
+
# 1. Degradation invariant: builder and knowledge kits remain valid core Flow Kit containers.
|
|
6
|
+
# 2. Consumer-target derivation: K0 (flows-only) → flow; K1 (+agent assets) → flow-agents;
|
|
7
|
+
# K2 (+evals) → flow-agents with k2=true; third-party extensions → listed verbatim.
|
|
8
|
+
# 3. inspect subcommand outputs stable JSON.
|
|
9
|
+
set -uo pipefail
|
|
10
|
+
|
|
11
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
12
|
+
source "$ROOT/evals/lib/node.sh"
|
|
13
|
+
|
|
14
|
+
errors=0
|
|
15
|
+
TMP_DIR="$(mktemp -d)"
|
|
16
|
+
trap 'rm -rf "$TMP_DIR"' EXIT
|
|
17
|
+
|
|
18
|
+
pass() { echo " ✓ $1"; }
|
|
19
|
+
fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
20
|
+
|
|
21
|
+
run_inspect() {
|
|
22
|
+
local kit_dir="$1"
|
|
23
|
+
local output="$2"
|
|
24
|
+
# Route through the main CLI to avoid import.meta.url path-resolution issues.
|
|
25
|
+
flow_agents_build_ts 2>/dev/null
|
|
26
|
+
node "$FLOW_AGENTS_EVAL_ROOT/build/src/cli.js" flow-kit inspect "$kit_dir" >"$output" 2>&1
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# ===================================================================
|
|
30
|
+
echo "=== 1. Degradation Invariant: built-in kits pass core container ==="
|
|
31
|
+
# ===================================================================
|
|
32
|
+
|
|
33
|
+
for kit_name in builder knowledge; do
|
|
34
|
+
kit_dir="$ROOT/kits/$kit_name"
|
|
35
|
+
out="$TMP_DIR/degrade-${kit_name}.out"
|
|
36
|
+
if run_inspect "$kit_dir" "$out"; then
|
|
37
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
38
|
+
if [[ "$k0" == "true" ]]; then
|
|
39
|
+
pass "$kit_name kit degradation invariant: k0=true (valid core container)"
|
|
40
|
+
else
|
|
41
|
+
fail "$kit_name kit degradation invariant: k0 should be true"
|
|
42
|
+
cat "$out"
|
|
43
|
+
fi
|
|
44
|
+
else
|
|
45
|
+
fail "$kit_name kit inspect failed"
|
|
46
|
+
cat "$out"
|
|
47
|
+
fi
|
|
48
|
+
done
|
|
49
|
+
|
|
50
|
+
# Verify builder kit is K1 (has agent extension fields, no evals in kit.json)
|
|
51
|
+
out="$TMP_DIR/builder-k1.out"
|
|
52
|
+
run_inspect "$ROOT/kits/builder" "$out" || true
|
|
53
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
54
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
55
|
+
if [[ "$k1" == "false" ]]; then
|
|
56
|
+
pass "builder kit is K0 only (no agent extension assets declared in kit.json)"
|
|
57
|
+
else
|
|
58
|
+
pass "builder kit is K1+ (agent extension assets present)"
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# Verify knowledge kit is K2 (has evals)
|
|
62
|
+
out="$TMP_DIR/knowledge-k2.out"
|
|
63
|
+
run_inspect "$ROOT/kits/knowledge" "$out" || true
|
|
64
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
65
|
+
if [[ "$k2" == "true" ]]; then
|
|
66
|
+
pass "knowledge kit is K2 (evals present)"
|
|
67
|
+
else
|
|
68
|
+
fail "knowledge kit should be K2 (has evals in kit.json)"
|
|
69
|
+
cat "$out"
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
# ===================================================================
|
|
73
|
+
echo ""
|
|
74
|
+
echo "=== 2. K0 fixture: flows-only → target=flow only ==="
|
|
75
|
+
# ===================================================================
|
|
76
|
+
|
|
77
|
+
k0_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k0-flows-only"
|
|
78
|
+
out="$TMP_DIR/k0.out"
|
|
79
|
+
if run_inspect "$k0_fixture" "$out"; then
|
|
80
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
81
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
82
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
83
|
+
[[ "$k0" == "true" ]] && pass "K0 fixture: k0=true" || { fail "K0 fixture: expected k0=true, got $k0"; cat "$out"; }
|
|
84
|
+
[[ "$k1" == "false" ]] && pass "K0 fixture: k1=false (no agent extension)" || { fail "K0 fixture: expected k1=false, got $k1"; cat "$out"; }
|
|
85
|
+
[[ "$targets" == "flow" ]] && pass "K0 fixture: targets=['flow'] only" || { fail "K0 fixture: expected targets=['flow'], got '$targets'"; cat "$out"; }
|
|
86
|
+
else
|
|
87
|
+
fail "K0 fixture inspect failed"
|
|
88
|
+
cat "$out"
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# ===================================================================
|
|
92
|
+
echo ""
|
|
93
|
+
echo "=== 3. K1 fixture: flows+docs → targets=[flow,flow-agents] ==="
|
|
94
|
+
# ===================================================================
|
|
95
|
+
|
|
96
|
+
k1_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k1-agent-extension"
|
|
97
|
+
out="$TMP_DIR/k1.out"
|
|
98
|
+
if run_inspect "$k1_fixture" "$out"; then
|
|
99
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
100
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
101
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
102
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
103
|
+
[[ "$k0" == "true" ]] && pass "K1 fixture: k0=true" || { fail "K1 fixture: expected k0=true, got $k0"; cat "$out"; }
|
|
104
|
+
[[ "$k1" == "true" ]] && pass "K1 fixture: k1=true (agent extension present)" || { fail "K1 fixture: expected k1=true, got $k1"; cat "$out"; }
|
|
105
|
+
[[ "$k2" == "false" ]] && pass "K1 fixture: k2=false (no evals)" || { fail "K1 fixture: expected k2=false, got $k2"; cat "$out"; }
|
|
106
|
+
[[ "$targets" == "flow,flow-agents" ]] && pass "K1 fixture: targets=[flow,flow-agents]" || { fail "K1 fixture: expected targets=[flow,flow-agents], got '$targets'"; cat "$out"; }
|
|
107
|
+
else
|
|
108
|
+
fail "K1 fixture inspect failed"
|
|
109
|
+
cat "$out"
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
# ===================================================================
|
|
113
|
+
echo ""
|
|
114
|
+
echo "=== 4. K2 fixture: flows+docs+evals → targets=[flow,flow-agents] k2=true ==="
|
|
115
|
+
# ===================================================================
|
|
116
|
+
|
|
117
|
+
k2_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k2-with-evals"
|
|
118
|
+
out="$TMP_DIR/k2.out"
|
|
119
|
+
if run_inspect "$k2_fixture" "$out"; then
|
|
120
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
121
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
122
|
+
[[ "$k2" == "true" ]] && pass "K2 fixture: k2=true (evals present)" || { fail "K2 fixture: expected k2=true, got $k2"; cat "$out"; }
|
|
123
|
+
[[ "$targets" == "flow,flow-agents" ]] && pass "K2 fixture: targets=[flow,flow-agents]" || { fail "K2 fixture: expected targets=[flow,flow-agents], got '$targets'"; cat "$out"; }
|
|
124
|
+
else
|
|
125
|
+
fail "K2 fixture inspect failed"
|
|
126
|
+
cat "$out"
|
|
127
|
+
fi
|
|
128
|
+
|
|
129
|
+
# ===================================================================
|
|
130
|
+
echo ""
|
|
131
|
+
echo "=== 5. Third-party extension fixture → third-party ns in targets ==="
|
|
132
|
+
# ===================================================================
|
|
133
|
+
|
|
134
|
+
tp_fixture="$ROOT/evals/fixtures/kit-conformance-levels/third-party-extension"
|
|
135
|
+
out="$TMP_DIR/third-party.out"
|
|
136
|
+
# third-party extension fixture has an unknown top-level key; inspect still exits 0 (K0 valid)
|
|
137
|
+
if run_inspect "$tp_fixture" "$out"; then
|
|
138
|
+
third_party=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).third_party_extensions.join(','))" 2>/dev/null)
|
|
139
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
140
|
+
if echo "$third_party" | grep -q "my-platform.widgets"; then
|
|
141
|
+
pass "third-party extension fixture: unknown namespace listed in third_party_extensions"
|
|
142
|
+
else
|
|
143
|
+
fail "third-party extension fixture: expected my-platform.widgets in third_party_extensions, got '$third_party'"
|
|
144
|
+
cat "$out"
|
|
145
|
+
fi
|
|
146
|
+
if echo "$targets" | grep -q "my-platform.widgets"; then
|
|
147
|
+
pass "third-party extension fixture: unknown namespace listed in targets"
|
|
148
|
+
else
|
|
149
|
+
fail "third-party extension fixture: expected my-platform.widgets in targets, got '$targets'"
|
|
150
|
+
cat "$out"
|
|
151
|
+
fi
|
|
152
|
+
else
|
|
153
|
+
fail "third-party extension fixture inspect failed (k0 should still be valid)"
|
|
154
|
+
cat "$out"
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
# ===================================================================
|
|
158
|
+
echo ""
|
|
159
|
+
echo "=== 6. Inspect JSON schema shape ==="
|
|
160
|
+
# ===================================================================
|
|
161
|
+
|
|
162
|
+
out="$TMP_DIR/schema-check.out"
|
|
163
|
+
run_inspect "$ROOT/kits/builder" "$out" || true
|
|
164
|
+
if node -e "
|
|
165
|
+
const d = require('fs').readFileSync('$out', 'utf8');
|
|
166
|
+
const r = JSON.parse(d);
|
|
167
|
+
const required = ['kit_id','kit_name','conformance','targets','third_party_extensions'];
|
|
168
|
+
for (const k of required) {
|
|
169
|
+
if (!(k in r)) throw new Error('missing key: ' + k);
|
|
170
|
+
}
|
|
171
|
+
const conf = ['k0','k1','k2'];
|
|
172
|
+
for (const k of conf) {
|
|
173
|
+
if (typeof r.conformance[k] !== 'boolean') throw new Error('conformance.' + k + ' must be boolean');
|
|
174
|
+
}
|
|
175
|
+
if (!Array.isArray(r.targets)) throw new Error('targets must be array');
|
|
176
|
+
if (!Array.isArray(r.third_party_extensions)) throw new Error('third_party_extensions must be array');
|
|
177
|
+
" 2>/dev/null; then
|
|
178
|
+
pass "inspect JSON output has required schema shape"
|
|
179
|
+
else
|
|
180
|
+
fail "inspect JSON output is missing required fields"
|
|
181
|
+
cat "$out"
|
|
182
|
+
fi
|
|
183
|
+
|
|
184
|
+
# ===================================================================
|
|
185
|
+
echo ""
|
|
186
|
+
echo "=== 7. Degradation invariant: core container strip test ==="
|
|
187
|
+
# ===================================================================
|
|
188
|
+
|
|
189
|
+
# Verify that validateCoreContainer (via inspect) ignores agent extension fields
|
|
190
|
+
# by checking that knowledge kit (which has agent extension asset fields present)
|
|
191
|
+
# still passes core validation
|
|
192
|
+
out="$TMP_DIR/knowledge-core.out"
|
|
193
|
+
run_inspect "$ROOT/kits/knowledge" "$out" || true
|
|
194
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
195
|
+
if [[ "$k0" == "true" ]]; then
|
|
196
|
+
pass "knowledge kit: agent extension fields stripped, core container valid (degradation invariant)"
|
|
197
|
+
else
|
|
198
|
+
fail "knowledge kit: degradation invariant violated — k0 should be true"
|
|
199
|
+
cat "$out"
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# ===================================================================
|
|
203
|
+
echo ""
|
|
204
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
205
|
+
echo "Kit conformance level checks passed."
|
|
206
|
+
exit 0
|
|
207
|
+
fi
|
|
208
|
+
echo "Kit conformance level checks failed: $errors issue(s)."
|
|
209
|
+
exit 1
|
package/evals/run.sh
CHANGED
|
@@ -192,6 +192,8 @@ run_integration() {
|
|
|
192
192
|
bash "$EVAL_DIR/integration/test_bundle_install.sh" || result=1
|
|
193
193
|
echo ""
|
|
194
194
|
bash "$EVAL_DIR/integration/test_bundle_lifecycle.sh" || result=1
|
|
195
|
+
echo ""
|
|
196
|
+
bash "$EVAL_DIR/integration/test_kit_conformance_levels.sh" || result=1
|
|
195
197
|
return $result
|
|
196
198
|
}
|
|
197
199
|
|
package/kits/catalog.json
CHANGED
|
@@ -12,6 +12,12 @@
|
|
|
12
12
|
"name": "Knowledge Kit",
|
|
13
13
|
"path": "kits/knowledge",
|
|
14
14
|
"description": "Store contract with record types (raw/compiled/concept), mutation operations with required provenance, default markdown+frontmatter+wikilink+graph-index adapter, and a parameterized contract test suite."
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "release-evidence",
|
|
18
|
+
"name": "Release Evidence Kit",
|
|
19
|
+
"path": "kits/release-evidence",
|
|
20
|
+
"description": "Minimal flows-only kit for proving agentless gate evaluation over surface claims in CI. One gate expects a trusted release.evidence claim."
|
|
15
21
|
}
|
|
16
22
|
]
|
|
17
23
|
}
|
|
@@ -301,7 +301,7 @@ function removeLinksFromGraph(graph, sourceId) {
|
|
|
301
301
|
// Validation helpers
|
|
302
302
|
// ---------------------------------------------------------------------------
|
|
303
303
|
|
|
304
|
-
const VALID_TYPES = new Set(["raw", "compiled", "concept", "snapshot"]);
|
|
304
|
+
const VALID_TYPES = new Set(["raw", "compiled", "concept", "snapshot", "person"]);
|
|
305
305
|
const VALID_STATUSES = new Set(["active", "implemented", "retired"]);
|
|
306
306
|
const CATEGORY_SEGMENT_RE = /^[a-z0-9_-]+$/;
|
|
307
307
|
|
|
@@ -367,7 +367,7 @@ export class DefaultKnowledgeStore {
|
|
|
367
367
|
// Required field enforcement
|
|
368
368
|
if (!input.type) throw missingEvidenceError("create: missing required field: type");
|
|
369
369
|
if (!VALID_TYPES.has(input.type))
|
|
370
|
-
throw missingEvidenceError(`create: type must be raw, compiled, concept,
|
|
370
|
+
throw missingEvidenceError(`create: type must be one of raw, compiled, concept, snapshot, person; got: ${input.type}`);
|
|
371
371
|
if (!input.title || !input.title.trim())
|
|
372
372
|
throw missingEvidenceError("create: missing required field: title");
|
|
373
373
|
if (!input.body && input.body !== "")
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge Kit — Entity Extractor
|
|
3
|
+
*
|
|
4
|
+
* Pluggable interface for extracting person entities from raw/compiled records.
|
|
5
|
+
* Pattern mirrors SimilarityDetector (see flow-runner/index.js R3 pattern):
|
|
6
|
+
* an EntityExtractor is a function:
|
|
7
|
+
*
|
|
8
|
+
* async (record: Record) => PersonMention[]
|
|
9
|
+
*
|
|
10
|
+
* where PersonMention = { name: string, role?: string, org?: string }
|
|
11
|
+
*
|
|
12
|
+
* Default implementation: AttendeeLineExtractor
|
|
13
|
+
* - Parses "Attendees:" lines: entries separated by top-level commas (commas
|
|
14
|
+
* inside parentheticals are NOT treated as entry separators).
|
|
15
|
+
* - Each entry may carry an optional parenthetical role/org:
|
|
16
|
+
* "Dana Smith (Acme VP Eng), Lee Wong (Acme, procurement)."
|
|
17
|
+
* - Trailing sentence punctuation after the last ')' is stripped so that
|
|
18
|
+
* end-of-line entries like 'Lee Wong (Acme procurement).' parse correctly
|
|
19
|
+
* (fix for issue #48 — trailing period folded role into name).
|
|
20
|
+
* - Also extracts explicit [[wikilinks]] from the body (name = link target)
|
|
21
|
+
* - NO freeform NLP — conservative by design (R2)
|
|
22
|
+
*
|
|
23
|
+
* @module adapters/flow-runner/entity-extractor
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Attendee line parser
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
const ATTENDEES_LINE_RE = /^Attendees:\s*(.+)$/im;
|
|
31
|
+
const ENTRY_WITH_ROLE_RE = /^([^(]+?)\s*\(([^)]+)\)\s*$/;
|
|
32
|
+
const WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Split an attendee list on top-level commas (commas inside parentheticals
|
|
36
|
+
* are NOT treated as entry separators).
|
|
37
|
+
* "Dana Smith (Acme VP Eng), Lee Wong (Acme, procurement)." →
|
|
38
|
+
* ["Dana Smith (Acme VP Eng)", "Lee Wong (Acme, procurement)."]
|
|
39
|
+
*
|
|
40
|
+
* @param {string} text
|
|
41
|
+
* @returns {string[]}
|
|
42
|
+
*/
|
|
43
|
+
function splitAttendeeEntries(text) {
|
|
44
|
+
const entries = [];
|
|
45
|
+
let depth = 0;
|
|
46
|
+
let start = 0;
|
|
47
|
+
for (let i = 0; i < text.length; i++) {
|
|
48
|
+
if (text[i] === "(") depth++;
|
|
49
|
+
else if (text[i] === ")") depth--;
|
|
50
|
+
else if (text[i] === "," && depth === 0) {
|
|
51
|
+
const entry = text.slice(start, i).trim();
|
|
52
|
+
if (entry) entries.push(entry);
|
|
53
|
+
start = i + 1;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
const last = text.slice(start).trim();
|
|
57
|
+
if (last) entries.push(last);
|
|
58
|
+
return entries;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Parse one attendee entry: "Dana Smith (Acme VP Eng)" or "Lee Wong"
|
|
63
|
+
* Returns { name, role?, org? }
|
|
64
|
+
*
|
|
65
|
+
* Strips trailing sentence punctuation only when it appears after a closing ')'
|
|
66
|
+
* to handle end-of-line cases like 'Lee Wong (Acme procurement).' without
|
|
67
|
+
* accidentally removing trailing periods that are part of abbreviated names
|
|
68
|
+
* like 'Dana S.'.
|
|
69
|
+
*/
|
|
70
|
+
function parseAttendeeEntry(entry) {
|
|
71
|
+
const trimmed = entry.trim();
|
|
72
|
+
// Only strip trailing punctuation when it appears after a closing ')'.
|
|
73
|
+
// This handles 'Lee Wong (Acme procurement).' (issue #48) while leaving
|
|
74
|
+
// 'Dana S.' intact so the abbreviated-name form is preserved.
|
|
75
|
+
const normalized = /\)\s*[.,;:!?]+\s*$/.test(trimmed)
|
|
76
|
+
? trimmed.replace(/[.,;:!?]+$/, "")
|
|
77
|
+
: trimmed;
|
|
78
|
+
const match = normalized.match(ENTRY_WITH_ROLE_RE);
|
|
79
|
+
if (!match) return { name: normalized };
|
|
80
|
+
|
|
81
|
+
const name = match[1].trim();
|
|
82
|
+
const roleOrgText = match[2].trim();
|
|
83
|
+
|
|
84
|
+
// Try to split "Org Role" or "Org Title Role" — heuristic:
|
|
85
|
+
// if the parenthetical contains multiple words, first token(s) = org,
|
|
86
|
+
// last token(s) = role. We just store the whole string as role; callers
|
|
87
|
+
// can parse further. For AC1 we need role text available.
|
|
88
|
+
return { name, role: roleOrgText };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Default entity extractor: parses Attendees: lines and explicit [[wikilinks]].
|
|
93
|
+
*
|
|
94
|
+
* EntityExtractor interface:
|
|
95
|
+
* async (record: Record) => PersonMention[]
|
|
96
|
+
*
|
|
97
|
+
* PersonMention: { name: string, role?: string, org?: string }
|
|
98
|
+
*
|
|
99
|
+
* @param {object} record
|
|
100
|
+
* @returns {Promise<Array<{name: string, role?: string, org?: string}>>}
|
|
101
|
+
*/
|
|
102
|
+
export async function defaultEntityExtractor(record) {
|
|
103
|
+
const body = record.body || "";
|
|
104
|
+
const mentions = new Map(); // name → mention (deduplicated)
|
|
105
|
+
|
|
106
|
+
// 1. Parse "Attendees:" line
|
|
107
|
+
const attendeesMatch = body.match(ATTENDEES_LINE_RE);
|
|
108
|
+
if (attendeesMatch) {
|
|
109
|
+
const entriesText = attendeesMatch[1];
|
|
110
|
+
const entries = splitAttendeeEntries(entriesText);
|
|
111
|
+
for (const entry of entries) {
|
|
112
|
+
const mention = parseAttendeeEntry(entry);
|
|
113
|
+
if (mention.name && !mentions.has(mention.name)) {
|
|
114
|
+
mentions.set(mention.name, mention);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// 2. Extract explicit [[wikilinks]] — target treated as the person name
|
|
120
|
+
for (const match of body.matchAll(WIKILINK_RE)) {
|
|
121
|
+
const name = match[1].trim();
|
|
122
|
+
if (!mentions.has(name)) {
|
|
123
|
+
mentions.set(name, { name });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return [...mentions.values()];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
// Name normalisation helpers
|
|
132
|
+
// ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Normalise a person name for comparison:
|
|
136
|
+
* lowercase, trim, collapse internal whitespace.
|
|
137
|
+
*/
|
|
138
|
+
export function normalizeName(name) {
|
|
139
|
+
return name.toLowerCase().trim().replace(/\s+/g, " ");
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Check if two normalised names are an exact match.
|
|
144
|
+
*/
|
|
145
|
+
export function isExactMatch(a, b) {
|
|
146
|
+
return normalizeName(a) === normalizeName(b);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Check if `candidate` is a possible duplicate of `existing`:
|
|
151
|
+
* - Same-surname + same first initial, OR same-firstname + initial surname
|
|
152
|
+
* e.g. "Dana S." ~ "Dana Smith" (first matches, last is initial of last)
|
|
153
|
+
* "D. Smith" ~ "Dana Smith" (first is initial of first, last matches)
|
|
154
|
+
* Does NOT auto-merge — returns true only when ambiguous, not identical.
|
|
155
|
+
*/
|
|
156
|
+
export function isPossibleDuplicate(candidate, existing) {
|
|
157
|
+
const c = normalizeName(candidate).split(" ");
|
|
158
|
+
const e = normalizeName(existing).split(" ");
|
|
159
|
+
if (c.length < 1 || e.length < 1) return false;
|
|
160
|
+
|
|
161
|
+
// Exact match is not a "possible duplicate" — it's a real match
|
|
162
|
+
if (isExactMatch(candidate, existing)) return false;
|
|
163
|
+
|
|
164
|
+
if (c.length < 2 || e.length < 2) return false;
|
|
165
|
+
|
|
166
|
+
const cFirst = c[0];
|
|
167
|
+
const cLast = c[c.length - 1];
|
|
168
|
+
const eFirst = e[0];
|
|
169
|
+
const eLast = e[e.length - 1];
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* isInitialOf(abbr, full): true if abbr is a single-letter abbreviation of full.
|
|
173
|
+
* "s." → "smith" (s matches first char of smith)
|
|
174
|
+
* "d." → "dana"
|
|
175
|
+
*/
|
|
176
|
+
function isInitialOf(abbr, full) {
|
|
177
|
+
const a = abbr.replace(/\.$/, "");
|
|
178
|
+
return a.length === 1 && full.startsWith(a);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Case A: "Dana S." ~ "Dana Smith"
|
|
182
|
+
// first names match (or one is initial of other), last of candidate is initial of last of existing
|
|
183
|
+
const firstsMatch = cFirst === eFirst || isInitialOf(cFirst, eFirst) || isInitialOf(eFirst, cFirst);
|
|
184
|
+
const lastInitialA = isInitialOf(cLast, eLast) || isInitialOf(eLast, cLast);
|
|
185
|
+
|
|
186
|
+
// Case B: "D. Smith" ~ "Dana Smith"
|
|
187
|
+
// first of candidate is initial, last names match exactly
|
|
188
|
+
const firstInitialB = isInitialOf(cFirst, eFirst) || isInitialOf(eFirst, cFirst);
|
|
189
|
+
const lastsMatchB = cLast === eLast;
|
|
190
|
+
|
|
191
|
+
return (firstsMatch && lastInitialA) || (firstInitialB && lastsMatchB);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export default defaultEntityExtractor;
|