@onlooker-community/ecosystem 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +26 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +4 -2
- package/CHANGELOG.md +14 -0
- package/docs/memory-architecture.md +102 -0
- package/package.json +3 -3
- package/plugins/curator/.claude-plugin/plugin.json +14 -0
- package/plugins/curator/CHANGELOG.md +10 -0
- package/plugins/curator/README.md +55 -0
- package/plugins/curator/config.json +41 -0
- package/plugins/curator/docs/adr/001-staleness-tiers.md +100 -0
- package/plugins/curator/docs/design.md +311 -0
- package/plugins/curator/hooks/hooks.json +15 -0
- package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
- package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
- package/plugins/curator/scripts/lib/curator-config.sh +67 -0
- package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
- package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
- package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
- package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
- package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
- package/plugins/historian/docs/adr/001-local-embeddings-only.md +96 -0
- package/plugins/historian/docs/design.md +317 -0
- package/plugins/librarian/.claude-plugin/plugin.json +14 -0
- package/plugins/librarian/CHANGELOG.md +10 -0
- package/plugins/librarian/README.md +51 -0
- package/plugins/librarian/config.json +52 -0
- package/plugins/librarian/docs/adr/001-propose-dont-auto-write.md +87 -0
- package/plugins/librarian/docs/design.md +301 -0
- package/plugins/librarian/hooks/hooks.json +26 -0
- package/plugins/librarian/scripts/hooks/librarian-session-end.sh +312 -0
- package/plugins/librarian/scripts/hooks/librarian-session-start.sh +103 -0
- package/plugins/librarian/scripts/lib/librarian-archivist-reader.sh +67 -0
- package/plugins/librarian/scripts/lib/librarian-classifier.sh +139 -0
- package/plugins/librarian/scripts/lib/librarian-config.sh +74 -0
- package/plugins/librarian/scripts/lib/librarian-durability.sh +77 -0
- package/plugins/librarian/scripts/lib/librarian-emit.sh +72 -0
- package/plugins/librarian/scripts/lib/librarian-project-key.sh +83 -0
- package/plugins/librarian/scripts/lib/librarian-storage.sh +222 -0
- package/plugins/librarian/scripts/lib/librarian-ulid.sh +50 -0
- package/release-please-config.json +32 -0
- package/test/bats/curator-session-start.bats +316 -0
- package/test/bats/librarian-session-end.bats +182 -0
- package/test/bats/librarian-session-start.bats +136 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Cheap-tier checks for Curator.
|
|
3
|
+
#
|
|
4
|
+
# Pure data transforms over the memory record array produced by
|
|
5
|
+
# curator-memory-reader.sh. Each function returns a JSON array of findings
|
|
6
|
+
# of a single kind. Callers attach the project key and persist via
|
|
7
|
+
# curator-storage.sh.
|
|
8
|
+
#
|
|
9
|
+
# All checks are intentionally cheap — string scans and file-exists
|
|
10
|
+
# probes only. The LLM contradiction sweep lives in its own module.
|
|
11
|
+
|
|
12
|
+
# Date check. Scans memory bodies for ISO-8601 dates (YYYY-MM-DD) and
|
|
13
|
+
# flags any that are more than <grace_period_days> in the past, on the
|
|
14
|
+
# theory that those are most likely decayed deadlines or stale "by date"
|
|
15
|
+
# references the body never updated.
|
|
16
|
+
#
|
|
17
|
+
# Usage: curator_check_dates <memories_json> <grace_period_days>
|
|
18
|
+
# Output: JSON array of date_decayed finding payload candidates.
|
|
19
|
+
# (Caller assigns finding_id and deduped_hash.)
|
|
20
|
+
curator_check_dates() {
|
|
21
|
+
local memories="${1:-[]}"
|
|
22
|
+
local grace="${2:-14}"
|
|
23
|
+
|
|
24
|
+
local today
|
|
25
|
+
today=$(date -u +"%Y-%m-%d")
|
|
26
|
+
|
|
27
|
+
# Extract every YYYY-MM-DD substring per memory body via jq, then hand
|
|
28
|
+
# the candidate list to python for precise date math and grace-period
|
|
29
|
+
# filtering. Python gets the JSON as an argv (not stdin) because the
|
|
30
|
+
# heredoc-on-stdin pattern collides with piped input — see SC2259.
|
|
31
|
+
local candidates
|
|
32
|
+
candidates=$(printf '%s' "$memories" | jq -c '
|
|
33
|
+
[ .[] | select(.exists and .body != null and .body != "")
|
|
34
|
+
| .filename as $fname
|
|
35
|
+
| (.body | [scan("[0-9]{4}-[0-9]{2}-[0-9]{2}")])
|
|
36
|
+
| .[]
|
|
37
|
+
| { memory_file: $fname, matched_phrase: . }
|
|
38
|
+
]
|
|
39
|
+
')
|
|
40
|
+
|
|
41
|
+
python3 - "$today" "$grace" "$candidates" <<'PY'
|
|
42
|
+
import json, sys, datetime
|
|
43
|
+
|
|
44
|
+
today_str = sys.argv[1]
|
|
45
|
+
grace = int(sys.argv[2])
|
|
46
|
+
data = json.loads(sys.argv[3] or "[]")
|
|
47
|
+
today = datetime.datetime.strptime(today_str, "%Y-%m-%d").date()
|
|
48
|
+
out = []
|
|
49
|
+
for entry in data:
|
|
50
|
+
try:
|
|
51
|
+
d = datetime.datetime.strptime(entry["matched_phrase"], "%Y-%m-%d").date()
|
|
52
|
+
except (ValueError, KeyError):
|
|
53
|
+
continue
|
|
54
|
+
days_past = (today - d).days
|
|
55
|
+
if days_past > grace:
|
|
56
|
+
out.append({
|
|
57
|
+
"memory_file": entry["memory_file"],
|
|
58
|
+
"matched_phrase": entry["matched_phrase"],
|
|
59
|
+
"days_past": days_past,
|
|
60
|
+
})
|
|
61
|
+
print(json.dumps(out))
|
|
62
|
+
PY
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Path reference check. For each memory, scans the body for path-shaped
|
|
66
|
+
# strings ("scripts/foo.py", "src/lib/bar.ts", etc.) and emits a finding
|
|
67
|
+
# when the path doesn't resolve under the given repo root.
|
|
68
|
+
#
|
|
69
|
+
# Path heuristic: at least one `/`, contains an extension (`.ext`), and
|
|
70
|
+
# only matches the conservative character class `[A-Za-z0-9._/-]+`. A
|
|
71
|
+
# negative lookbehind rejects candidates preceded by `/` or `:`, so:
|
|
72
|
+
# - URL substrings ("https://example.com/foo.py") don't match — the
|
|
73
|
+
# host segment is preceded by `:`, the path segment is preceded by `/`.
|
|
74
|
+
# - Absolute paths ("/usr/bin/python3.11") don't match — the first
|
|
75
|
+
# segment is preceded by `/`.
|
|
76
|
+
# That preserves the conservative "rename detection" target (in-repo
|
|
77
|
+
# relative paths like scripts/legacy_ingest.py) without the URL and
|
|
78
|
+
# absolute-path false positives Copilot review caught.
|
|
79
|
+
#
|
|
80
|
+
# Usage: curator_check_paths <memories_json> <repo_root>
|
|
81
|
+
curator_check_paths() {
|
|
82
|
+
local memories="${1:-[]}"
|
|
83
|
+
local repo_root="${2:-}"
|
|
84
|
+
|
|
85
|
+
[[ -z "$repo_root" || ! -d "$repo_root" ]] && { echo '[]'; return 0; }
|
|
86
|
+
|
|
87
|
+
local abs_root
|
|
88
|
+
abs_root=$(cd "$repo_root" 2>/dev/null && pwd -P) || { echo '[]'; return 0; }
|
|
89
|
+
|
|
90
|
+
# Extract candidate paths per memory body. The jq scan regex returns
|
|
91
|
+
# every match in the body; the negative lookbehind keeps URL and
|
|
92
|
+
# absolute-path substrings from matching at all. Deduping happens after.
|
|
93
|
+
local candidates
|
|
94
|
+
candidates=$(printf '%s' "$memories" | jq -c '
|
|
95
|
+
[ .[] | select(.exists and .body != null and .body != "")
|
|
96
|
+
| .filename as $fname
|
|
97
|
+
| (.body | [scan("(?<![A-Za-z0-9._/:-])[A-Za-z0-9._-]+(?:/[A-Za-z0-9._-]+)+\\.[A-Za-z0-9]+")])
|
|
98
|
+
| unique
|
|
99
|
+
| .[]
|
|
100
|
+
| { memory_file: $fname, candidate: . }
|
|
101
|
+
]
|
|
102
|
+
')
|
|
103
|
+
|
|
104
|
+
# Walk each candidate, drop ones that resolve. JSON goes via argv to
|
|
105
|
+
# avoid the SC2259 stdin clobber pattern that the date check tripped.
|
|
106
|
+
local candidates_compact
|
|
107
|
+
candidates_compact=$(printf '%s' "$candidates" | jq -c '.')
|
|
108
|
+
python3 - "$abs_root" "$candidates_compact" <<'PY'
|
|
109
|
+
import json, os, sys
|
|
110
|
+
|
|
111
|
+
repo_root = sys.argv[1]
|
|
112
|
+
data = json.loads(sys.argv[2] or "[]")
|
|
113
|
+
out = []
|
|
114
|
+
for entry in data:
|
|
115
|
+
candidate = entry["candidate"]
|
|
116
|
+
abs_candidate = candidate if candidate.startswith("/") else os.path.join(repo_root, candidate)
|
|
117
|
+
if os.path.exists(abs_candidate):
|
|
118
|
+
continue
|
|
119
|
+
# Strip the repo root prefix when reporting absolute matches.
|
|
120
|
+
reported = candidate
|
|
121
|
+
if candidate.startswith(repo_root + os.sep):
|
|
122
|
+
reported = candidate[len(repo_root) + 1:]
|
|
123
|
+
out.append({
|
|
124
|
+
"memory_file": entry["memory_file"],
|
|
125
|
+
"broken_path": reported,
|
|
126
|
+
})
|
|
127
|
+
print(json.dumps(out))
|
|
128
|
+
PY
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Broken-index check: MEMORY.md references a file that doesn't exist on
|
|
132
|
+
# disk. The memory reader already encodes this via the `exists: false`
|
|
133
|
+
# record; this check just shapes it into a finding payload.
|
|
134
|
+
#
|
|
135
|
+
# Usage: curator_check_broken_index <memories_json>
|
|
136
|
+
curator_check_broken_index() {
|
|
137
|
+
local memories="${1:-[]}"
|
|
138
|
+
printf '%s' "$memories" | jq -c '
|
|
139
|
+
[ .[] | select(.referenced == true and .exists == false)
|
|
140
|
+
| { referenced_file: .filename }
|
|
141
|
+
]
|
|
142
|
+
'
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
# Orphaned memory: file in the dir but not referenced from MEMORY.md.
|
|
146
|
+
#
|
|
147
|
+
# Usage: curator_check_orphaned <memories_json>
|
|
148
|
+
curator_check_orphaned() {
|
|
149
|
+
local memories="${1:-[]}"
|
|
150
|
+
printf '%s' "$memories" | jq -c '
|
|
151
|
+
[ .[] | select(.referenced == false and .exists == true)
|
|
152
|
+
| { memory_file: .filename }
|
|
153
|
+
]
|
|
154
|
+
'
|
|
155
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Config resolution for Curator.
|
|
3
|
+
#
|
|
4
|
+
# Reads three layers, latest wins:
|
|
5
|
+
# 1. plugins/curator/config.json (defaults shipped with the plugin)
|
|
6
|
+
# 2. ~/.claude/settings.json
|
|
7
|
+
# 3. <repo>/.claude/settings.json
|
|
8
|
+
#
|
|
9
|
+
# Exposes:
|
|
10
|
+
# curator_config_load <repo_root> # populates _CURATOR_CONFIG (JSON)
|
|
11
|
+
# curator_config_get <jq-path> # echoes string value (empty if unset)
|
|
12
|
+
# curator_config_enabled # 0 if curator.enabled is true
|
|
13
|
+
#
|
|
14
|
+
# Settings overlay only touches the `curator.*` subtree of settings.json.
|
|
15
|
+
|
|
16
|
+
_CURATOR_CONFIG="{}"
|
|
17
|
+
|
|
18
|
+
curator_config_load() {
|
|
19
|
+
local repo_root="${1:-}"
|
|
20
|
+
local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
|
|
21
|
+
local home_dir="${HOME:-}"
|
|
22
|
+
|
|
23
|
+
local merged="{}"
|
|
24
|
+
local file
|
|
25
|
+
|
|
26
|
+
file="${plugin_root}/config.json"
|
|
27
|
+
if [[ -f "$file" ]]; then
|
|
28
|
+
local defaults
|
|
29
|
+
defaults=$(jq '.' "$file" 2>/dev/null) || defaults="{}"
|
|
30
|
+
merged=$(jq -n --argjson a "$merged" --argjson b "$defaults" '$a * $b' 2>/dev/null) \
|
|
31
|
+
|| merged="$defaults"
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
for file in "${home_dir}/.claude/settings.json" "${repo_root}/.claude/settings.json"; do
|
|
35
|
+
[[ -n "$file" && -f "$file" ]] || continue
|
|
36
|
+
local overlay
|
|
37
|
+
overlay=$(jq '{ curator: (.curator // {}) }' "$file" 2>/dev/null) || continue
|
|
38
|
+
[[ -z "$overlay" ]] && continue
|
|
39
|
+
merged=$(jq -n --argjson a "$merged" --argjson b "$overlay" '
|
|
40
|
+
def deepmerge($a; $b):
|
|
41
|
+
if ($a|type) == "object" and ($b|type) == "object" then
|
|
42
|
+
reduce (($a|keys) + ($b|keys) | unique)[] as $k
|
|
43
|
+
({}; .[$k] = deepmerge($a[$k]; $b[$k]))
|
|
44
|
+
elif $b == null then $a
|
|
45
|
+
else $b end;
|
|
46
|
+
deepmerge($a; $b)
|
|
47
|
+
' 2>/dev/null) || true
|
|
48
|
+
done
|
|
49
|
+
|
|
50
|
+
_CURATOR_CONFIG="$merged"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
curator_config_get() {
|
|
54
|
+
local path="$1"
|
|
55
|
+
# The `// empty` operator treats `false` the same as null, so a
|
|
56
|
+
# value of `false` would silently disappear and the caller would
|
|
57
|
+
# misread "explicitly disabled" as "default to enabled". Use an
|
|
58
|
+
# explicit null check so booleans round-trip correctly.
|
|
59
|
+
printf '%s' "$_CURATOR_CONFIG" \
|
|
60
|
+
| jq -r "${path} | if . == null then empty else . end" 2>/dev/null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
curator_config_enabled() {
|
|
64
|
+
local v
|
|
65
|
+
v=$(curator_config_get '.curator.enabled')
|
|
66
|
+
[[ "$v" == "true" ]]
|
|
67
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Event emission helpers for Curator.
|
|
3
|
+
#
|
|
4
|
+
# Thin wrapper around onlooker-event.mjs `emit` mode for curator.* events.
|
|
5
|
+
# Fail-soft: returns 0 on success or when the substrate is unavailable.
|
|
6
|
+
|
|
7
|
+
_curator_resolve_event_js() {
|
|
8
|
+
local script_dir plugin_root ecosystem_root candidate
|
|
9
|
+
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
10
|
+
plugin_root="$(cd "${script_dir}/../.." && pwd)"
|
|
11
|
+
|
|
12
|
+
ecosystem_root="${ONLOOKER_ECOSYSTEM_ROOT:-}"
|
|
13
|
+
if [[ -z "$ecosystem_root" ]]; then
|
|
14
|
+
candidate="$(cd "${plugin_root}/../.." 2>/dev/null && pwd)"
|
|
15
|
+
if [[ -f "${candidate}/scripts/lib/onlooker-event.mjs" ]]; then
|
|
16
|
+
ecosystem_root="$candidate"
|
|
17
|
+
fi
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
if [[ -n "$ecosystem_root" ]]; then
|
|
21
|
+
printf '%s/scripts/lib/onlooker-event.mjs' "$ecosystem_root"
|
|
22
|
+
fi
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_CURATOR_EVENT_JS="${_CURATOR_EVENT_JS:-$(_curator_resolve_event_js)}"
|
|
26
|
+
|
|
27
|
+
# Emit a curator.* event. Fail-soft: returns 0 on any error.
|
|
28
|
+
# Usage: curator_emit <event_type> <session_id> <payload_json>
|
|
29
|
+
curator_emit() {
|
|
30
|
+
local event_type="${1:-}"
|
|
31
|
+
local session_id="${2:-}"
|
|
32
|
+
local payload="${3:-{\}}"
|
|
33
|
+
|
|
34
|
+
[[ -z "$event_type" || -z "$session_id" ]] && return 0
|
|
35
|
+
[[ -z "$_CURATOR_EVENT_JS" || ! -f "$_CURATOR_EVENT_JS" ]] && return 0
|
|
36
|
+
command -v node >/dev/null 2>&1 || return 0
|
|
37
|
+
[[ -z "${ONLOOKER_EVENTS_LOG:-}" ]] && return 0
|
|
38
|
+
|
|
39
|
+
local params event_json
|
|
40
|
+
params=$(jq -cn \
|
|
41
|
+
--arg plugin "curator" \
|
|
42
|
+
--arg session_id "$session_id" \
|
|
43
|
+
--arg event_type "$event_type" \
|
|
44
|
+
--argjson payload "$payload" \
|
|
45
|
+
'{
|
|
46
|
+
plugin: $plugin,
|
|
47
|
+
session_id: $session_id,
|
|
48
|
+
event_type: $event_type,
|
|
49
|
+
payload: $payload
|
|
50
|
+
}') || return 0
|
|
51
|
+
|
|
52
|
+
event_json=$(
|
|
53
|
+
ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
|
|
54
|
+
ONLOOKER_PLUGIN_NAME="curator" \
|
|
55
|
+
printf '%s' "$params" | node "$_CURATOR_EVENT_JS" emit 2>/dev/null
|
|
56
|
+
) || return 0
|
|
57
|
+
[[ -z "$event_json" ]] && return 0
|
|
58
|
+
|
|
59
|
+
mkdir -p "$(dirname "$ONLOOKER_EVENTS_LOG")" 2>/dev/null
|
|
60
|
+
printf '%s\n' "$event_json" >> "$ONLOOKER_EVENTS_LOG" 2>/dev/null
|
|
61
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Memory store reader for Curator.
|
|
3
|
+
#
|
|
4
|
+
# Parses ~/.claude/projects/<encoded-project>/memory/MEMORY.md and the
|
|
5
|
+
# referenced *.md files. Returns a JSON array of memory records:
|
|
6
|
+
#
|
|
7
|
+
# [
|
|
8
|
+
# {
|
|
9
|
+
# "filename": "feedback_no_trailing_summaries.md",
|
|
10
|
+
# "title": "...", # from frontmatter `name` or MEMORY.md link
|
|
11
|
+
# "type": "feedback", # from frontmatter `type`
|
|
12
|
+
# "body": "...", # everything after the frontmatter
|
|
13
|
+
# "exists": true, # false when MEMORY.md points at a missing file
|
|
14
|
+
# "frontmatter_parsed": true|false
|
|
15
|
+
# },
|
|
16
|
+
# ...
|
|
17
|
+
# ]
|
|
18
|
+
#
|
|
19
|
+
# Orphans (files present in the memory dir but not referenced from MEMORY.md)
|
|
20
|
+
# get their own record with `referenced: false`. Broken index entries
|
|
21
|
+
# (referenced by MEMORY.md but missing on disk) get `exists: false`.
|
|
22
|
+
|
|
23
|
+
# Returns 0 iff the given filename contains a path separator, parent-dir
|
|
24
|
+
# escape, leading dot, null byte, or other shape that should never get
|
|
25
|
+
# joined onto the memory dir. Used to defang MEMORY.md entries before we
|
|
26
|
+
# interpolate them into a filesystem path.
|
|
27
|
+
_curator_memory_unsafe_filename() {
|
|
28
|
+
local fname="$1"
|
|
29
|
+
[[ -z "$fname" ]] && return 0
|
|
30
|
+
case "$fname" in
|
|
31
|
+
# Absolute paths, traversal, separators, dotfiles, control chars.
|
|
32
|
+
/*|*/*|*\\*|*..*|.*|*$'\n'*|*$'\r'*) return 0 ;;
|
|
33
|
+
esac
|
|
34
|
+
# Must end in .md and look like a plain filename.
|
|
35
|
+
[[ "$fname" == *.md ]] || return 0
|
|
36
|
+
[[ "$fname" =~ ^[A-Za-z0-9._-]+\.md$ ]] || return 0
|
|
37
|
+
return 1
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Resolve the memory store path. The runtime resolves
|
|
41
|
+
# $CLAUDE_PROJECT_ENCODED — when unset, the caller provides it explicitly.
|
|
42
|
+
#
|
|
43
|
+
# Usage: curator_memory_resolve_path <memory_store_path_template>
|
|
44
|
+
# Returns the resolved absolute path, or empty if it can't be resolved.
|
|
45
|
+
curator_memory_resolve_path() {
|
|
46
|
+
local template="$1"
|
|
47
|
+
[[ -z "$template" ]] && return 0
|
|
48
|
+
local encoded="${CLAUDE_PROJECT_ENCODED:-}"
|
|
49
|
+
# Best-effort interpolation. The template may contain ${HOME} and
|
|
50
|
+
# ${CLAUDE_PROJECT_ENCODED}.
|
|
51
|
+
local resolved
|
|
52
|
+
resolved="${template//\$\{HOME\}/${HOME:-}}"
|
|
53
|
+
resolved="${resolved//\$\{CLAUDE_PROJECT_ENCODED\}/${encoded}}"
|
|
54
|
+
# If the encoded var is missing, the path still contains the literal
|
|
55
|
+
# placeholder; caller treats empty as "skip the audit".
|
|
56
|
+
if [[ "$resolved" == *'${CLAUDE_PROJECT_ENCODED}'* ]]; then
|
|
57
|
+
return 0
|
|
58
|
+
fi
|
|
59
|
+
printf '%s' "$resolved"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Parse a single memory file. Returns a JSON object on stdout.
|
|
63
|
+
# Usage: curator_memory_parse_file <abs_path> <referenced_bool>
|
|
64
|
+
curator_memory_parse_file() {
|
|
65
|
+
local path="$1"
|
|
66
|
+
local referenced="${2:-true}"
|
|
67
|
+
[[ -z "$path" ]] && return 0
|
|
68
|
+
|
|
69
|
+
local filename
|
|
70
|
+
filename="$(basename "$path")"
|
|
71
|
+
|
|
72
|
+
if [[ ! -f "$path" ]]; then
|
|
73
|
+
jq -cn \
|
|
74
|
+
--arg filename "$filename" \
|
|
75
|
+
--argjson referenced "$referenced" \
|
|
76
|
+
'{
|
|
77
|
+
filename: $filename,
|
|
78
|
+
title: null, type: null, body: "",
|
|
79
|
+
exists: false, referenced: $referenced,
|
|
80
|
+
frontmatter_parsed: false
|
|
81
|
+
}'
|
|
82
|
+
return 0
|
|
83
|
+
fi
|
|
84
|
+
|
|
85
|
+
local raw
|
|
86
|
+
raw=$(cat "$path" 2>/dev/null || true)
|
|
87
|
+
[[ -z "$raw" ]] && raw=""
|
|
88
|
+
|
|
89
|
+
local has_fm name desc type body fm_parsed="false"
|
|
90
|
+
if [[ "$raw" == "---"* ]]; then
|
|
91
|
+
# YAML frontmatter present. Extract simple `key: value` lines until
|
|
92
|
+
# the closing `---`. Fancier YAML (nested, lists) isn't expected in
|
|
93
|
+
# the auto-memory format.
|
|
94
|
+
local fm_block
|
|
95
|
+
fm_block=$(printf '%s' "$raw" | awk '
|
|
96
|
+
NR == 1 && /^---/ { in_fm = 1; next }
|
|
97
|
+
in_fm && /^---/ { in_fm = 0; exit }
|
|
98
|
+
in_fm { print }
|
|
99
|
+
')
|
|
100
|
+
name=$(printf '%s' "$fm_block" | sed -nE 's/^name:[[:space:]]*(.*)$/\1/p' | head -1)
|
|
101
|
+
desc=$(printf '%s' "$fm_block" | sed -nE 's/^description:[[:space:]]*(.*)$/\1/p' | head -1)
|
|
102
|
+
type=$(printf '%s' "$fm_block" | sed -nE 's/^type:[[:space:]]*(.*)$/\1/p' | head -1)
|
|
103
|
+
body=$(printf '%s' "$raw" | awk '
|
|
104
|
+
BEGIN { in_fm = 0; seen_close = 0 }
|
|
105
|
+
NR == 1 && /^---/ { in_fm = 1; next }
|
|
106
|
+
in_fm && /^---/ { in_fm = 0; seen_close = 1; next }
|
|
107
|
+
seen_close { print }
|
|
108
|
+
')
|
|
109
|
+
fm_parsed="true"
|
|
110
|
+
has_fm="true"
|
|
111
|
+
else
|
|
112
|
+
# No frontmatter — treat the whole body as content; type unknown.
|
|
113
|
+
name=""
|
|
114
|
+
desc=""
|
|
115
|
+
type=""
|
|
116
|
+
body="$raw"
|
|
117
|
+
fm_parsed="false"
|
|
118
|
+
has_fm="false"
|
|
119
|
+
fi
|
|
120
|
+
|
|
121
|
+
jq -cn \
|
|
122
|
+
--arg filename "$filename" \
|
|
123
|
+
--arg name "$name" \
|
|
124
|
+
--arg desc "$desc" \
|
|
125
|
+
--arg type "$type" \
|
|
126
|
+
--arg body "$body" \
|
|
127
|
+
--argjson referenced "$referenced" \
|
|
128
|
+
--argjson fm_parsed "$fm_parsed" \
|
|
129
|
+
'{
|
|
130
|
+
filename: $filename,
|
|
131
|
+
title: (if $name == "" then null else $name end),
|
|
132
|
+
description: (if $desc == "" then null else $desc end),
|
|
133
|
+
type: (if $type == "" then null else $type end),
|
|
134
|
+
body: $body,
|
|
135
|
+
exists: true,
|
|
136
|
+
referenced: $referenced,
|
|
137
|
+
frontmatter_parsed: $fm_parsed
|
|
138
|
+
}'
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# Load every memory file referenced by MEMORY.md plus every file in the dir.
|
|
142
|
+
# Output: JSON array of memory records (as defined at the top of this file).
|
|
143
|
+
#
|
|
144
|
+
# Usage: curator_memory_load_all <memory_dir_abs>
|
|
145
|
+
curator_memory_load_all() {
|
|
146
|
+
local mem_dir="$1"
|
|
147
|
+
[[ -z "$mem_dir" || ! -d "$mem_dir" ]] && { echo '[]'; return 0; }
|
|
148
|
+
|
|
149
|
+
# 1. Parse MEMORY.md for referenced filenames.
|
|
150
|
+
local index_path="${mem_dir}/MEMORY.md"
|
|
151
|
+
local referenced_list=()
|
|
152
|
+
if [[ -f "$index_path" ]]; then
|
|
153
|
+
# Match the standard line format: `- [Title](file.md) — hook`
|
|
154
|
+
while IFS= read -r line; do
|
|
155
|
+
referenced_list+=("$line")
|
|
156
|
+
done < <(grep -oE '\[[^]]+\]\([^)]+\)' "$index_path" 2>/dev/null \
|
|
157
|
+
| sed -E 's/.*\(([^)]+)\)/\1/' | awk '{ print }')
|
|
158
|
+
fi
|
|
159
|
+
|
|
160
|
+
# 2. Build the canonical set of referenced filenames.
|
|
161
|
+
local referenced_json='[]'
|
|
162
|
+
if [[ ${#referenced_list[@]} -gt 0 ]]; then
|
|
163
|
+
referenced_json=$(printf '%s\n' "${referenced_list[@]}" | jq -R . | jq -s .)
|
|
164
|
+
fi
|
|
165
|
+
|
|
166
|
+
# 3. Visit each referenced filename (broken or not) plus every *.md
|
|
167
|
+
# on disk that wasn't referenced.
|
|
168
|
+
local all='[]'
|
|
169
|
+
local fname rec
|
|
170
|
+
local seen_json='{}'
|
|
171
|
+
|
|
172
|
+
# Referenced first — preserves MEMORY.md ordering for downstream display.
|
|
173
|
+
# Filename sanitization: anything with a path separator, parent-dir
|
|
174
|
+
# escape, leading dot, or non-printable bytes is recorded as a broken
|
|
175
|
+
# index entry and NEVER passed to the parser. Without this guard a
|
|
176
|
+
# MEMORY.md entry like `[X](../../etc/passwd)` would read outside the
|
|
177
|
+
# memory dir.
|
|
178
|
+
local refcount
|
|
179
|
+
refcount=$(printf '%s' "$referenced_json" | jq 'length')
|
|
180
|
+
local i
|
|
181
|
+
for ((i = 0; i < refcount; i++)); do
|
|
182
|
+
fname=$(printf '%s' "$referenced_json" | jq -r ".[$i]")
|
|
183
|
+
[[ -z "$fname" || "$fname" == "null" ]] && continue
|
|
184
|
+
# Skip MEMORY.md itself if it self-references.
|
|
185
|
+
[[ "$fname" == "MEMORY.md" ]] && continue
|
|
186
|
+
|
|
187
|
+
if _curator_memory_unsafe_filename "$fname"; then
|
|
188
|
+
# Record as a broken/unsafe index entry so the broken_index
|
|
189
|
+
# check surfaces it. The parser is bypassed, so no read
|
|
190
|
+
# happens outside the memory dir.
|
|
191
|
+
rec=$(jq -cn \
|
|
192
|
+
--arg filename "$fname" \
|
|
193
|
+
'{
|
|
194
|
+
filename: $filename,
|
|
195
|
+
title: null, description: null, type: null, body: "",
|
|
196
|
+
exists: false, referenced: true,
|
|
197
|
+
frontmatter_parsed: false, unsafe: true
|
|
198
|
+
}')
|
|
199
|
+
all=$(printf '%s' "$all" | jq --argjson rec "$rec" '. + [$rec]')
|
|
200
|
+
seen_json=$(printf '%s' "$seen_json" | jq --arg f "$fname" '. + {($f): true}')
|
|
201
|
+
continue
|
|
202
|
+
fi
|
|
203
|
+
|
|
204
|
+
rec=$(curator_memory_parse_file "${mem_dir}/${fname}" true)
|
|
205
|
+
[[ -z "$rec" ]] && continue
|
|
206
|
+
all=$(printf '%s' "$all" | jq --argjson rec "$rec" '. + [$rec]')
|
|
207
|
+
seen_json=$(printf '%s' "$seen_json" | jq --arg f "$fname" '. + {($f): true}')
|
|
208
|
+
done
|
|
209
|
+
|
|
210
|
+
# Then any orphans (files on disk not referenced from MEMORY.md).
|
|
211
|
+
local file
|
|
212
|
+
for file in "$mem_dir"/*.md; do
|
|
213
|
+
[[ -f "$file" ]] || continue
|
|
214
|
+
fname="$(basename "$file")"
|
|
215
|
+
[[ "$fname" == "MEMORY.md" ]] && continue
|
|
216
|
+
local already_seen
|
|
217
|
+
already_seen=$(printf '%s' "$seen_json" | jq -r --arg f "$fname" '.[$f] // false')
|
|
218
|
+
[[ "$already_seen" == "true" ]] && continue
|
|
219
|
+
rec=$(curator_memory_parse_file "$file" false)
|
|
220
|
+
[[ -z "$rec" ]] && continue
|
|
221
|
+
all=$(printf '%s' "$all" | jq --argjson rec "$rec" '. + [$rec]')
|
|
222
|
+
done
|
|
223
|
+
|
|
224
|
+
printf '%s' "$all"
|
|
225
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Project key derivation for Curator.
|
|
3
|
+
#
|
|
4
|
+
# Curator stores findings under the ecosystem-wide 12-char hex project key so
|
|
5
|
+
# state survives clone path changes and is shared across worktrees / clones of
|
|
6
|
+
# the same repo. (The typed memory store Curator audits lives at a different
|
|
7
|
+
# path keyed by the Claude Code per-checkout encoding; that path is resolved
|
|
8
|
+
# separately by curator-memory-reader.sh.)
|
|
9
|
+
#
|
|
10
|
+
# Resolution order:
|
|
11
|
+
# 1. SHA256(`git remote get-url origin`) — preferred, machine-portable
|
|
12
|
+
# 2. SHA256(realpath of `git rev-parse --show-toplevel`) — fallback for
|
|
13
|
+
# repos without an origin remote
|
|
14
|
+
#
|
|
15
|
+
# Returns the first 12 hex chars. Empty when not in a git repo at all.
|
|
16
|
+
|
|
17
|
+
_curator_sha256_first12() {
|
|
18
|
+
local input="$1"
|
|
19
|
+
if command -v shasum >/dev/null 2>&1; then
|
|
20
|
+
printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
|
|
21
|
+
elif command -v sha256sum >/dev/null 2>&1; then
|
|
22
|
+
printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
|
|
23
|
+
else
|
|
24
|
+
return 1
|
|
25
|
+
fi
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
curator_project_remote_url() {
|
|
29
|
+
local cwd="${1:-}"
|
|
30
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
31
|
+
git -C "$cwd" remote get-url origin 2>/dev/null || true
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
curator_project_repo_root() {
|
|
35
|
+
local cwd="${1:-}"
|
|
36
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
37
|
+
|
|
38
|
+
if ! git -C "$cwd" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
|
39
|
+
return 0
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
local common_dir toplevel
|
|
43
|
+
common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
|
|
44
|
+
|
|
45
|
+
if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
|
|
46
|
+
common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
if [[ -n "$common_dir" && -d "$common_dir" ]]; then
|
|
50
|
+
toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
if [[ -z "$toplevel" ]]; then
|
|
54
|
+
toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
|
|
55
|
+
[[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
printf '%s' "$toplevel"
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# Compute the project key for the given cwd. Prints the key or empty.
|
|
62
|
+
# Usage: key=$(curator_project_key "$CWD")
|
|
63
|
+
curator_project_key() {
|
|
64
|
+
local cwd="${1:-}"
|
|
65
|
+
[[ -z "$cwd" ]] && cwd="$(pwd)"
|
|
66
|
+
|
|
67
|
+
local remote
|
|
68
|
+
remote=$(curator_project_remote_url "$cwd")
|
|
69
|
+
if [[ -n "$remote" ]]; then
|
|
70
|
+
_curator_sha256_first12 "remote:$remote"
|
|
71
|
+
return 0
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
local root
|
|
75
|
+
root=$(curator_project_repo_root "$cwd")
|
|
76
|
+
if [[ -n "$root" ]]; then
|
|
77
|
+
_curator_sha256_first12 "root:$root"
|
|
78
|
+
return 0
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
return 0
|
|
82
|
+
}
|